Skip to content

Commit

Permalink
Merge pull request #2155 from Sefaria/add-attrs-to-search-api
Browse files Browse the repository at this point in the history
feat(elasticSearch): add languageFamilyName and isPrimary to search API result.
  • Loading branch information
akiva10b authored Dec 11, 2024
2 parents 4130934 + 3b1088e commit 49d6bbf
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
2 changes: 1 addition & 1 deletion sefaria/model/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def process_version_title_change_in_search(ver, **kwargs):
text_index = library.get_index(ver.title)
delete_version(text_index, kwargs.get("old"), ver.language)
for ref in text_index.all_segment_refs():
TextIndexer.index_ref(search_index_name, ref, kwargs.get("new"), ver.language, False)
TextIndexer.index_ref(search_index_name, ref, kwargs.get("new"), ver.language, ver.languageFamilyName, ver.isPrimary)


# Version Title Change
Expand Down
14 changes: 9 additions & 5 deletions sefaria/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ def index_version(cls, version, tries=0, action=None):
print("Could not find dictionary node in {}".format(version.title))

@classmethod
def index_ref(cls, index_name, oref, version_title, lang):
def index_ref(cls, index_name, oref, version_title, lang, language_family_name, is_primary):
# slower than `cls.index_version` but useful when you don't want the overhead of loading all versions into cache
cls.index_name = index_name
cls.curr_index = oref.index
Expand All @@ -558,7 +558,7 @@ def index_ref(cls, index_name, oref, version_title, lang):
content = TextChunk(oref, lang, vtitle=version_title).ja().flatten_to_string()
categories = cls.curr_index.categories
tref = oref.normal()
doc = cls.make_text_index_document(tref, oref.he_normal(), version_title, lang, version_priority, content, categories, hebrew_version_title)
doc = cls.make_text_index_document(tref, oref.he_normal(), version_title, lang, version_priority, content, categories, hebrew_version_title, language_family_name, is_primary)
id = make_text_doc_id(tref, version_title, lang)
es_client.index(index_name, doc, id=id)

Expand All @@ -567,11 +567,13 @@ def _cache_action(cls, segment_str, tref, heTref, version):
# Index this document as a whole
vtitle = version.versionTitle
vlang = version.language
language_family_name = version.languageFamilyName
is_primary = version.isPrimary
hebrew_version_title = getattr(version, 'versionTitleInHebrew', None)
try:
version_priority, categories = cls.version_priority_map[(version.title, vtitle, vlang)]
#TODO include sgement_str in this func
doc = cls.make_text_index_document(tref, heTref, vtitle, vlang, version_priority, segment_str, categories, hebrew_version_title)
doc = cls.make_text_index_document(tref, heTref, vtitle, vlang, version_priority, segment_str, categories, hebrew_version_title, language_family_name, is_primary)
# print doc
except Exception as e:
logger.error("Error making index document {} / {} / {} : {}".format(tref, vtitle, vlang, str(e)))
Expand Down Expand Up @@ -613,7 +615,7 @@ def modify_text_in_doc(cls, content):
return content

@classmethod
def make_text_index_document(cls, tref, heTref, version, lang, version_priority, content, categories, hebrew_version_title):
def make_text_index_document(cls, tref, heTref, version, lang, version_priority, content, categories, hebrew_version_title, language_family_name, is_primary):
"""
Create a document for indexing from the text specified by ref/version/lang
"""
Expand Down Expand Up @@ -653,6 +655,8 @@ def make_text_index_document(cls, tref, heTref, version, lang, version_priority,
"exact": content,
"naive_lemmatizer": content,
'hebrew_version_title': hebrew_version_title,
"languageFamilyName": language_family_name,
"isPrimary": is_primary,
}


Expand Down Expand Up @@ -734,7 +738,7 @@ def index_from_queue():
queue = db.index_queue.find()
for item in queue:
try:
TextIndexer.index_ref(index_name, Ref(item["ref"]), item["version"], item["lang"])
TextIndexer.index_ref(index_name, Ref(item["ref"]), item["version"], item["lang"], item['languageFamilyName'], item['isPrimary'])
db.index_queue.remove(item)
except Exception as e:
logging.error("Error indexing from queue ({} / {} / {}) : {}".format(item["ref"], item["version"], item["lang"], e))
Expand Down

0 comments on commit 49d6bbf

Please sign in to comment.