From 3b1088e44e9e5b531cdfc201384a3debf9c36a25 Mon Sep 17 00:00:00 2001 From: YishaiGlasner Date: Sun, 8 Dec 2024 11:19:19 +0200 Subject: [PATCH] feat(elasticSearch): add languageFamilyName and isPrimary to search API result. --- sefaria/model/dependencies.py | 2 +- sefaria/search.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/sefaria/model/dependencies.py b/sefaria/model/dependencies.py index eb842a16bb..2a186610f2 100644 --- a/sefaria/model/dependencies.py +++ b/sefaria/model/dependencies.py @@ -57,7 +57,7 @@ def process_version_title_change_in_search(ver, **kwargs): text_index = library.get_index(ver.title) delete_version(text_index, kwargs.get("old"), ver.language) for ref in text_index.all_segment_refs(): - TextIndexer.index_ref(search_index_name, ref, kwargs.get("new"), ver.language, False) + TextIndexer.index_ref(search_index_name, ref, kwargs.get("new"), ver.language, ver.languageFamilyName, ver.isPrimary) # Version Title Change diff --git a/sefaria/search.py b/sefaria/search.py index 020963f82a..f2ecd3d9fe 100644 --- a/sefaria/search.py +++ b/sefaria/search.py @@ -543,7 +543,7 @@ def index_version(cls, version, tries=0, action=None): print("Could not find dictionary node in {}".format(version.title)) @classmethod - def index_ref(cls, index_name, oref, version_title, lang): + def index_ref(cls, index_name, oref, version_title, lang, language_family_name, is_primary): # slower than `cls.index_version` but useful when you don't want the overhead of loading all versions into cache cls.index_name = index_name cls.curr_index = oref.index @@ -560,7 +560,7 @@ def index_ref(cls, index_name, oref, version_title, lang): content = TextChunk(oref, lang, vtitle=version_title).ja().flatten_to_string() categories = cls.curr_index.categories tref = oref.normal() - doc = cls.make_text_index_document(tref, oref.he_normal(), version_title, lang, version_priority, content, categories, hebrew_version_title) + doc = cls.make_text_index_document(tref, oref.he_normal(), version_title, lang, version_priority, content, categories, hebrew_version_title, language_family_name, is_primary) id = make_text_doc_id(tref, version_title, lang) es_client.index(index_name, doc, id=id) @@ -569,11 +569,13 @@ def _cache_action(cls, segment_str, tref, heTref, version): # Index this document as a whole vtitle = version.versionTitle vlang = version.language + language_family_name = version.languageFamilyName + is_primary = version.isPrimary hebrew_version_title = getattr(version, 'versionTitleInHebrew', None) try: version_priority, categories = cls.version_priority_map[(version.title, vtitle, vlang)] #TODO include sgement_str in this func - doc = cls.make_text_index_document(tref, heTref, vtitle, vlang, version_priority, segment_str, categories, hebrew_version_title) + doc = cls.make_text_index_document(tref, heTref, vtitle, vlang, version_priority, segment_str, categories, hebrew_version_title, language_family_name, is_primary) # print doc except Exception as e: logger.error("Error making index document {} / {} / {} : {}".format(tref, vtitle, vlang, str(e))) @@ -615,7 +617,7 @@ def modify_text_in_doc(cls, content): return content @classmethod - def make_text_index_document(cls, tref, heTref, version, lang, version_priority, content, categories, hebrew_version_title): + def make_text_index_document(cls, tref, heTref, version, lang, version_priority, content, categories, hebrew_version_title, language_family_name, is_primary): """ Create a document for indexing from the text specified by ref/version/lang """ @@ -655,6 +657,8 @@ def make_text_index_document(cls, tref, heTref, version, lang, version_priority, "exact": content, "naive_lemmatizer": content, 'hebrew_version_title': hebrew_version_title, + "languageFamilyName": language_family_name, + "isPrimary": is_primary, } @@ -736,7 +740,7 @@ def index_from_queue(): queue = db.index_queue.find() for item in queue: try: - TextIndexer.index_ref(index_name, Ref(item["ref"]), item["version"], item["lang"]) + TextIndexer.index_ref(index_name, Ref(item["ref"]), item["version"], item["lang"], item['languageFamilyName'], item['isPrimary']) db.index_queue.remove(item) except Exception as e: logging.error("Error indexing from queue ({} / {} / {}) : {}".format(item["ref"], item["version"], item["lang"], e))