Skip to content

Commit

Permalink
Use repodata_from_packages, annotate failed artifacts, fix download url
Browse files Browse the repository at this point in the history
  • Loading branch information
jaimergp committed Mar 12, 2024
1 parent 962b542 commit fff3c0f
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 6 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/bootstrap.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Bootstrap database from libcfgraph
name: Update database

on: [push, workflow_dispatch]

Expand All @@ -15,7 +15,7 @@ concurrency:
cancel-in-progress: true

jobs:
build-and-push:
update:
runs-on: ubuntu-latest
defaults:
run:
Expand All @@ -28,7 +28,7 @@ jobs:
- name: Fetch latest release
run: |
curl -L -o path_to_artifacts.tar.zst \
https://github.com/$GITHUB_REPOSITORY/releases/latest/path_to_artifacts.tar.zst
https://github.com/jaimergp/conda-forge-paths/releases/latest/download/path_to_artifacts.tar.zst
tar xf path_to_artifacts.tar.zst
- uses: actions/setup-python@v5
Expand All @@ -39,14 +39,14 @@ jobs:
run: |
set -x
ll -h *.db
python $THIS_REPO/conda_forge_paths/path_to_artifacts.db.py update-from-repodata
python conda_forge_paths/path_to_artifacts.db.py update-from-repodata
ll -h *.db
- name: Update FTS index
run: |
set -x
ll -h *.db
python $THIS_REPO/conda_forge_paths/path_to_artifacts.db.py fts
python conda_forge_paths/path_to_artifacts.db.py fts
ll -h *.db
- name: Compress DB file
Expand Down
43 changes: 42 additions & 1 deletion conda_forge_paths/path_to_artifacts_db.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import bz2
import json
import os
import sqlite3
Expand All @@ -7,9 +8,10 @@
from datetime import datetime, UTC
from itertools import batched, chain, product
from pathlib import Path
from urllib.request import urlretrieve

from conda_forge_metadata.artifact_info import get_artifact_info_as_json
from conda_forge_metadata.repodata import SUBDIRS, fetch_repodata, all_labels
from conda_forge_metadata.repodata import SUBDIRS, all_labels

try:
from tqdm.auto import tqdm
Expand Down Expand Up @@ -173,6 +175,34 @@ def most_recent_artifact(db):
return row


def fetch_repodata(
subdirs=SUBDIRS,
force_download=False,
cache_dir=".repodata_cache",
label="main",
):
assert all(subdir in SUBDIRS for subdir in subdirs)
paths = []
for subdir in subdirs:
prefix = "https://conda.anaconda.org/conda-forge"
if label == "main":
# We don't need patches, and this way we can get 'removed' items with timestamps
repodata = f"{prefix}/{subdir}/repodata_from_packages.json"
else:
repodata = f"{prefix}/label/{label}/{subdir}/repodata.json"
local_fn = Path(cache_dir, f"{subdir}.{label}.json")
local_fn_bz2 = Path(str(local_fn) + ".bz2")
paths.append(local_fn)
if force_download or not local_fn.exists():
local_fn.parent.mkdir(parents=True, exist_ok=True)
# Download the file
urlretrieve(f"{repodata}.bz2", local_fn_bz2)
with open(local_fn_bz2, "rb") as compressed, open(local_fn, "wb") as f:
f.write(bz2.decompress(compressed.read()))
local_fn_bz2.unlink()
return paths


def new_artifacts(ts):
futures = []
with ThreadPoolExecutor(max_workers=10) as executor:
Expand Down Expand Up @@ -335,6 +365,8 @@ def update_from_repodata(db):
", ".join(f"'{name}'" for name in failed_artifacts)
)
)
with open("failed_artifacts.txt", "a") as f:
f.write("\n".join(failed_artifacts) + "\n")
db.commit()


Expand Down Expand Up @@ -382,6 +414,15 @@ def update_from_repodata(db):
db = connect()
update_from_repodata(db)
db.close()
failed = Path("failed_artifacts.txt")
if failed.is_file():
print(
"!! Couldn't fetch these artifacts, please retry:",
failed.read_text(),
sep="\n",
file=sys.stderr,
)
sys.exit(1)
sys.exit()

print(
Expand Down

0 comments on commit fff3c0f

Please sign in to comment.