Skip to content

Commit

Permalink
rm more person entities < min number when "search person" disabled
Browse files Browse the repository at this point in the history
Should remove person entity when "search person" option is disabled
even if it has Wikipedia page.
  • Loading branch information
xxyzz committed Sep 6, 2024
1 parent 1de8a11 commit 2ea201c
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 5 deletions.
2 changes: 1 addition & 1 deletion data/deps.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"lxml": "5.3.0",
"rapidfuzz": "3.9.6",
"rapidfuzz": "3.9.7",
"spacy": "3.7.6",
"spacy_cpu_model": "3.7.0",
"en_spacy_cpu_model": "3.7.1",
Expand Down
15 changes: 11 additions & 4 deletions x_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def add_entity(

self.entity_occurrences[entity_id].append((start, entity_len))

def merge_entities(self, minimal_count: int) -> None:
def merge_entities(self, prefs: Prefs) -> None:
for entity_name, entity_data in self.entities.copy().items():
if entity_name in self.custom_x_ray:
continue
Expand All @@ -161,8 +161,15 @@ def merge_entities(self, minimal_count: int) -> None:
del self.entity_occurrences[entity_data.id]
del self.entities[entity_name]
continue
entity_cache = self.mediawiki.get_cache(entity_name)
if entity_cache is None and entity_data.count < minimal_count:
has_cache = self.mediawiki.get_cache(entity_name) is not None
is_person = entity_data.label in PERSON_LABELS
if entity_data.count < prefs["minimal_x_ray_count"] and (
(prefs["search_people"] and not has_cache)
or (
not prefs["search_people"]
and (is_person or (not is_person and not has_cache))
)
):
del self.entity_occurrences[entity_data.id]
del self.entities[entity_name]

Expand All @@ -178,7 +185,7 @@ def finish(
self.mediawiki.query(self.entities, prefs["search_people"])
if self.wikidata is not None:
query_wikidata(self.entities, self.mediawiki, self.wikidata)
self.merge_entities(prefs["minimal_x_ray_count"])
self.merge_entities(prefs)

insert_x_entities(
self.conn,
Expand Down

0 comments on commit 2ea201c

Please sign in to comment.