diff --git a/alexi/extract.py b/alexi/extract.py index 6d761bd..8e049bc 100644 --- a/alexi/extract.py +++ b/alexi/extract.py @@ -435,6 +435,7 @@ def analyse(self, iob: Iterable[T_obj], pdf_path: Path): def output_json(self): """Sauvegarder les metadonnées""" + self.metadata["urls"] = sorted(self.resolver.urls) with open(self.outdir / "index.json", "wt") as outfh: LOGGER.info("Génération de %s", self.outdir / "index.json") json.dump(self.metadata, outfh, indent=2, ensure_ascii=False) diff --git a/alexi/link.py b/alexi/link.py index 18ccfa4..c822ae0 100644 --- a/alexi/link.py +++ b/alexi/link.py @@ -55,6 +55,7 @@ def __init__(self, metadata: Optional[dict] = None): self.metadata = {"docs": {}} if metadata is None else metadata self.numeros = {} self.titles = {} + self.urls = set() for docpath, info in self.metadata["docs"].items(): self.numeros[info["numero"]] = docpath self.titles[normalize_title(info["titre"])] = docpath @@ -202,6 +203,7 @@ def resolve_external(self, text: str) -> Optional[str]: url = "https://www.legisquebec.gouv.qc.ca/fr/document/lc/Q-2" else: return None + self.urls.add(url) for m in SEC_RE.finditer(text): sectype = m.group("sec") num = m.group("num")