Skip to content

Commit

Permalink
chore: merge
Browse files Browse the repository at this point in the history
  • Loading branch information
nsantacruz committed Sep 26, 2023
2 parents 841978e + 4ec507c commit 0597261
Show file tree
Hide file tree
Showing 13 changed files with 124 additions and 210 deletions.
10 changes: 9 additions & 1 deletion helm-chart/sefaria-project/templates/configmap/nginx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ data:
set -e
export ELASTIC_AUTH_HEADER=$(echo -n $ELASTIC_USER:$ELASTIC_PASSWORD | base64)
envsubst '${ENV_NAME},${VARNISH_HOST},${SEARCH_HOST},${RELEASE_TAG},${ELASTIC_AUTH_HEADER}{{- if .Values.linker.enabled }},${LINKER_HOST}{{- end }}{{- if .Values.instrumentation.enabled }},${NGINX_VERSION}{{- end }}' < /conf/nginx.template.conf > /nginx.conf
envsubst '${ENV_NAME},${VARNISH_HOST},${SEARCH_HOST},${RELEASE_TAG},${STRAPI_LOCATION},${ELASTIC_AUTH_HEADER}{{- if .Values.linker.enabled }},${LINKER_HOST}{{- end }}{{- if .Values.instrumentation.enabled }},${NGINX_VERSION}{{- end }}' < /conf/nginx.template.conf > /nginx.conf
nginx -c /nginx.conf -g 'daemon off;'
Expand Down Expand Up @@ -163,6 +163,14 @@ data:
proxy_pass http://varnish_upstream;
}
location /static/mobile/message-en.json {
return 301 ${STRAPI_LOCATION}/api/mobile-message;
}
location /static/mobile/message-he.json {
return 301 ${STRAPI_LOCATION}/api/mobile-message-he;
}
location /static/ {
access_log off;
alias /app/static/;
Expand Down
6 changes: 0 additions & 6 deletions helm-chart/sefaria-project/templates/rollout/nginx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,6 @@ spec:
name: nginx-conf
subPath: nginx.template.conf
readOnly: true
- mountPath: /usr/src/entrypoint.sh
name: nginx-conf
subPath: entrypoint.sh
{{- if .Values.instrumentation.enabled }}
- mountPath: /etc/nginx/opentracing.json
name: nginx-conf
Expand All @@ -102,8 +99,6 @@ spec:
value: "linker-{{ .Values.deployEnv }}-{{ .Release.Revision }}"
{{- end }}
envFrom:
- secretRef:
name: {{ template "sefaria.secrets.elasticUser" . }}
- configMapRef:
name: local-settings-nginx-{{ .Values.deployEnv }}
optional: true
Expand All @@ -114,7 +109,6 @@ spec:
- name: nginx-conf
configMap:
name: nginx-conf-{{ .Values.deployEnv }}
defaultMode: 0755
- name: robots-txt
configMap:
name: robots-txt-{{ .Values.deployEnv }}
11 changes: 2 additions & 9 deletions sefaria/client/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,9 @@ def format_link_object_for_client(link, with_text, ref, pos=None):
com["sourceVersion"] = {"title": link.versions[linkPos]["title"], "language": link.versions[linkPos].get("language", None)}
com["displayedText"] = link.displayedText[linkPos] # we only want source displayedText

compDate = getattr(linkRef.index, "compDate", None)
compDate = getattr(linkRef.index, "compDate", None) # default comp date to in the future
if compDate:
try:
com["compDate"] = int(compDate)
except ValueError:
com["compDate"] = 3000 # default comp date to in the future
try:
com["errorMargin"] = int(getattr(linkRef.index, "errorMargin", 0))
except ValueError:
com["errorMargin"] = 0
com["compDate"] = compDate

# Pad out the sections list, so that comparison between comment numbers are apples-to-apples
lsections = linkRef.sections[:] + [0] * (linkRef.index_node.depth - len(linkRef.sections))
Expand Down
28 changes: 10 additions & 18 deletions sefaria/model/garden.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,24 +441,16 @@ def _derive_metadata(self):
# Time
# This is similar to logic on Index.composition_time_period() refactor
if getattr(self, "start", None) is None or getattr(self, "end", None) is None:
if getattr(i, "compDate", None):
errorMargin = int(getattr(i, "errorMargin", 0))
self.startIsApprox = self.endIsApprox = errorMargin > 0

try:
year = int(getattr(i, "compDate"))
self.start = year - errorMargin
self.end = year + errorMargin
except ValueError as e:
years = getattr(i, "compDate").split("-")
if years[0] == "" and len(years) == 3: #Fix for first value being negative
years[0] = -int(years[1])
years[1] = int(years[2])
self.start = int(years[0]) - errorMargin
self.end = int(years[1]) + errorMargin

elif author and author.mostAccurateTimePeriod():
tp = author.mostAccurateTimePeriod()
years = getattr(i, 'compDate', [])
if years and len(years) > 0:
self.startIsApprox = self.endIsApprox = getattr(i, "hasErrorMargin", False)
if len(years) > 1:
self.start = years[0]
self.end = years[1]
else:
self.start = self.end = years[0]
elif author and author.most_accurate_time_period():
tp = author.most_accurate_time_period()
self.start = tp.start
self.end = tp.end
self.startIsApprox = tp.startIsApprox
Expand Down
5 changes: 5 additions & 0 deletions sefaria/model/tests/text_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,11 @@ def test_invalid_index_save_no_category():
assert "You must create category Mishnah/Commentary/Bartenura/Gargamel before adding texts to it." in str(e_info.value)
assert model.IndexSet({"title": title}).count() == 0

def test_best_time_period():
i = model.library.get_index("Rashi on Genesis")
assert i.best_time_period().period_string('en') == ' (c.1075 - c.1105 CE)'
i.compDate = None
assert i.best_time_period().period_string('en') == ' (1040 - 1105 CE)' # now that compDate is None, period_string should return Rashi's birth to death years

def test_invalid_index_save_no_hebrew_collective_title():
title = 'Bartenura (The Next Generation)'
Expand Down
145 changes: 39 additions & 106 deletions sefaria/model/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,10 +200,10 @@ class Index(abst.AbstractMongoRecord, AbstractIndex):
"enShortDesc",
"heShortDesc",
"pubDate",
"hasErrorMargin", # (bool) whether or not compDate is exact. used to be 'errorMargin' which was an integer amount that compDate was off by
"compDate",
"compPlace",
"pubPlace",
"errorMargin",
"era",
"dependence", # (str) Values: "Commentary" or "Targum" - to denote commentaries and other potential not standalone texts
"base_text_titles", # (list) the base book(s) this one is dependant on
Expand Down Expand Up @@ -307,30 +307,20 @@ def expand_metadata_on_contents(self, contents):
contents["base_text_titles"] = [{"en": btitle, "he": hebrew_term(btitle)} for btitle in self.base_text_titles]

contents["heCategories"] = list(map(hebrew_term, self.categories))
contents = self.time_period_and_place_contents(contents)
return contents


composition_time_period = self.composition_time_period()
if composition_time_period:
contents["compDateString"] = {
"en": composition_time_period.period_string("en"),
"he": composition_time_period.period_string("he"),
}


composition_place = self.composition_place()
if composition_place:
contents["compPlaceString"] = {
"en": composition_place.primary_name("en"),
"he": composition_place.primary_name("he"),
}

pub_place = self.publication_place()
if pub_place:
contents["pubPlaceString"] = {
"en": pub_place.primary_name("en"),
"he": pub_place.primary_name("he"),
}

def time_period_and_place_contents(self, contents):
""" Used to expand contents for date and time info """
for k, f in [("compDateString", self.composition_time_period), ("pubDateString", self.publication_time_period)]:
time_period = f()
if time_period:
contents[k] = {"en": time_period.period_string('en'), 'he': time_period.period_string('he')}

for k, f in [("compPlaceString", self.composition_place), ("pubPlaceString", self.publication_place)]:
place = f()
if place:
contents[k] = {"en": place.primary_name('en'), 'he': place.primary_name('he')}
return contents

def _saveable_attrs(self):
Expand Down Expand Up @@ -444,93 +434,47 @@ def publication_place(self):

# This is similar to logic on GardenStop
def composition_time_period(self):
return self._get_time_period("compDate", "errorMargin")
return self._get_time_period("compDate", margin_field="hasErrorMargin")

def publication_time_period(self):
return self._get_time_period("pubDate")

def best_time_period(self):
"""
:return: TimePeriod: First tries to return `compDate`. Deals with ranges and negative values for compDate
If no compDate, looks at author info
:return: TimePeriod: First tries to return `compDate`.
If no compDate or compDate is an empty list, _get_time_period returns None and it then looks at author info
"""
start, end, startIsApprox, endIsApprox = None, None, None, None

if getattr(self, "compDate", None):
errorMargin = int(getattr(self, "errorMargin", 0))
self.startIsApprox = self.endIsApprox = errorMargin > 0

try:
year = int(getattr(self, "compDate"))
start = year - errorMargin
end = year + errorMargin
except ValueError as e:
years = getattr(self, "compDate").split("-")
if years[0] == "" and len(years) == 3: #Fix for first value being negative
years[0] = -int(years[1])
years[1] = int(years[2])
try:
start = int(years[0]) - errorMargin
end = int(years[1]) + errorMargin
except UnicodeEncodeError as e:
pass

compDatePeriod = self._get_time_period('compDate', margin_field="hasErrorMargin")
if compDatePeriod:
return compDatePeriod
else:
author = self.author_objects()[0] if len(self.author_objects()) > 0 else None
tp = author and author.most_accurate_time_period()
if tp is not None:
tpvars = vars(tp)
start = tp.start if "start" in tpvars else None
end = tp.end if "end" in tpvars else None
startIsApprox = tp.startIsApprox if "startIsApprox" in tpvars else None
endIsApprox = tp.endIsApprox if "endIsApprox" in tpvars else None

if not start is None:
from sefaria.model.timeperiod import TimePeriod
if not startIsApprox is None:
return TimePeriod({
"start": start,
"end": end,
"startIsApprox": startIsApprox,
"endIsApprox": endIsApprox
})
else:
return TimePeriod({
"start": start,
"end": end
})
return tp

def _get_time_period(self, date_field, margin_field=None):
def _get_time_period(self, date_field, margin_field=""):
"""
Assumes that value of `date_field` ('pubDate' or 'compDate') is a list of integers.
"""
from . import timeperiod
if not getattr(self, date_field, None):
years = getattr(self, date_field, [])
if years is None or len(years) == 0:
return None

try:
error_margin = int(getattr(self, margin_field, 0)) if margin_field else 0
error_margin = getattr(self, margin_field, False) if margin_field else False
except ValueError:
error_margin = 0
startIsApprox = endIsApprox = error_margin > 0

try:
year = int(getattr(self, date_field))
start = year - error_margin
end = year + error_margin
except ValueError as e:
try:
years = getattr(self, date_field).split("-")
if years[0] == "" and len(years) == 3: #Fix for first value being negative
years[0] = -int(years[1])
years[1] = int(years[2])
start = int(years[0]) - error_margin
end = int(years[1]) + error_margin
except ValueError as e:
return None
error_margin = False
startIsApprox = endIsApprox = error_margin
if len(years) > 1:
start, end = years
else:
start = end = years[0]
return timeperiod.TimePeriod({
"start": start,
"startIsApprox": startIsApprox,
"end": end,
"endIsApprox": endIsApprox
})
"start": start,
"startIsApprox": startIsApprox,
"end": end,
"endIsApprox": endIsApprox
})

# Index changes behavior of load_from_dict, so this circumvents that changed behavior to call load_from_dict on the abstract superclass
def update_from_dict(self, d):
Expand Down Expand Up @@ -692,12 +636,6 @@ def _normalize(self):
for attr in deprecated_attrs:
if getattr(self, attr, None):
delattr(self, attr)
try:
error_margin_value = getattr(self, "errorMargin", 0)
int(error_margin_value)
except ValueError:
logger.warning("Index record '{}' has invalid 'errorMargin': {} field, removing".format(self.title, error_margin_value))
delattr(self, "errorMargin")

def _update_alt_structs_on_title_change(self):
old_title = self.pkeys_orig_values["title"]
Expand Down Expand Up @@ -758,11 +696,6 @@ def _validate(self):
if getattr(self, "collective_title", None) and not hebrew_term(getattr(self, "collective_title", None)):
raise InputError("You must add a hebrew translation Term for any new Collective Title: {}.".format(self.collective_title))

try:
int(getattr(self, "errorMargin", 0))
except (ValueError):
raise InputError("composition date error margin must be an integer")

#complex style records- all records should now conform to this
if self.nodes:
# Make sure that all primary titles match
Expand Down
28 changes: 16 additions & 12 deletions sefaria/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,17 @@ def remove_footnotes(cls, content):
content = AbstractTextRecord.strip_itags(content)
return content

@classmethod
def modify_text_in_doc(cls, content):
content = AbstractTextRecord.strip_imgs(content)
content = cls.remove_footnotes(content)
content = strip_cantillation(content, strip_vowels=False).strip()
content = re.sub(r'<[^>]+>', ' ', content) # replace HTML tags with space so that words dont get smushed together
content = re.sub(r'\([^)]+\)', ' ', content) # remove all parens
while " " in content: # make sure there are not many spaces in a row
content = content.replace(" ", " ")
return content

@classmethod
def make_text_index_document(cls, tref, heTref, version, lang, version_priority, content, categories, hebrew_version_title):
"""
Expand All @@ -619,15 +630,8 @@ def make_text_index_document(cls, tref, heTref, version, lang, version_priority,
# Don't bother indexing if there's no content
if not content:
return False
content = AbstractTextRecord.strip_imgs(content)
content = cls.remove_footnotes(content)
content_wo_cant = strip_cantillation(content, strip_vowels=False).strip()
content_wo_cant = re.sub(r'<[^>]+>', ' ', content_wo_cant) # replace HTML tags with space so that words dont get smushed together
content_wo_cant = re.sub(r'\([^)]+\)', ' ', content_wo_cant) # remove all parens
while " " in content_wo_cant: # make sure there are not many spaces in a row
content_wo_cant = content_wo_cant.replace(" ", " ")

if len(content_wo_cant) == 0:
content = cls.modify_text_in_doc(content)
if len(content) == 0:
return False

oref = Ref(tref)
Expand Down Expand Up @@ -655,9 +659,9 @@ def make_text_index_document(cls, tref, heTref, version, lang, version_priority,
"path": "/".join(indexed_categories + [cls.curr_index.title]),
"pagesheetrank": pagesheetrank,
"comp_date": comp_start_date,
#"hebmorph_semi_exact": content_wo_cant,
"exact": content_wo_cant,
"naive_lemmatizer": content_wo_cant,
#"hebmorph_semi_exact": content,
"exact": content,
"naive_lemmatizer": content,
'hebrew_version_title': hebrew_version_title,
}

Expand Down
9 changes: 3 additions & 6 deletions sefaria/tests/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,7 @@ def test_make_text_index_document():

ref_data = RefData().load({"ref": tref})
pagesheetrank = ref_data.pagesheetrank if ref_data is not None else RefData.DEFAULT_PAGESHEETRANK
content_wo_cant = strip_cantillation(content, strip_vowels=False).strip()
content_wo_cant = re.sub(r'<[^>]+>', '', content_wo_cant)
content_wo_cant = re.sub(r'\([^)]+\)', '', content_wo_cant)

content = TI.modify_text_in_doc(content)
assert doc == {
"ref": tref,
"heRef": he_ref,
Expand All @@ -40,8 +37,8 @@ def test_make_text_index_document():
"path": "/".join(categories + [index.title]),
"pagesheetrank": pagesheetrank,
"comp_date": comp_date,
"exact": content_wo_cant,
"naive_lemmatizer": content_wo_cant,
"exact": content,
"naive_lemmatizer": content,
'hebrew_version_title': heVtitle,

}
Expand Down
Loading

0 comments on commit 0597261

Please sign in to comment.