chore: merge

Sefaria · Sep 26, 2023 · 0597261 · 0597261
2 parents 841978e + 4ec507c
commit 0597261
Show file tree

Hide file tree

Showing 13 changed files with 124 additions and 210 deletions.
diff --git a/helm-chart/sefaria-project/templates/configmap/nginx.yaml b/helm-chart/sefaria-project/templates/configmap/nginx.yaml
@@ -34,7 +34,7 @@ data:
     set -e
 
     export ELASTIC_AUTH_HEADER=$(echo -n $ELASTIC_USER:$ELASTIC_PASSWORD | base64)
-    envsubst '${ENV_NAME},${VARNISH_HOST},${SEARCH_HOST},${RELEASE_TAG},${ELASTIC_AUTH_HEADER}{{- if .Values.linker.enabled }},${LINKER_HOST}{{- end }}{{- if .Values.instrumentation.enabled }},${NGINX_VERSION}{{- end }}' < /conf/nginx.template.conf > /nginx.conf 
+    envsubst '${ENV_NAME},${VARNISH_HOST},${SEARCH_HOST},${RELEASE_TAG},${STRAPI_LOCATION},${ELASTIC_AUTH_HEADER}{{- if .Values.linker.enabled }},${LINKER_HOST}{{- end }}{{- if .Values.instrumentation.enabled }},${NGINX_VERSION}{{- end }}' < /conf/nginx.template.conf > /nginx.conf 
 
     nginx -c /nginx.conf -g 'daemon off;'
 
@@ -163,6 +163,14 @@ data:
             proxy_pass http://varnish_upstream;
         }
 
+        location /static/mobile/message-en.json {
+          return 301 ${STRAPI_LOCATION}/api/mobile-message;
+        }
+
+        location /static/mobile/message-he.json {
+          return 301 ${STRAPI_LOCATION}/api/mobile-message-he;
+        }
+
         location /static/ {
           access_log off;
           alias /app/static/;

diff --git a/helm-chart/sefaria-project/templates/rollout/nginx.yaml b/helm-chart/sefaria-project/templates/rollout/nginx.yaml
@@ -74,9 +74,6 @@ spec:
             name: nginx-conf
             subPath: nginx.template.conf
             readOnly: true
-          - mountPath: /usr/src/entrypoint.sh
-            name: nginx-conf
-            subPath: entrypoint.sh
           {{- if .Values.instrumentation.enabled }}
           - mountPath: /etc/nginx/opentracing.json
             name: nginx-conf
@@ -102,8 +99,6 @@ spec:
             value: "linker-{{ .Values.deployEnv }}-{{ .Release.Revision }}"
           {{- end }}
         envFrom:
-          - secretRef:
-              name: {{ template "sefaria.secrets.elasticUser" . }}  
           - configMapRef:
               name: local-settings-nginx-{{ .Values.deployEnv }}
               optional: true
@@ -114,7 +109,6 @@ spec:
         - name:  nginx-conf
           configMap:
             name: nginx-conf-{{ .Values.deployEnv }}
-            defaultMode: 0755
         - name: robots-txt
           configMap:
             name: robots-txt-{{ .Values.deployEnv }}
diff --git a/sefaria/client/wrapper.py b/sefaria/client/wrapper.py
@@ -54,16 +54,9 @@ def format_link_object_for_client(link, with_text, ref, pos=None):
         com["sourceVersion"] = {"title": link.versions[linkPos]["title"], "language": link.versions[linkPos].get("language", None)}
         com["displayedText"] = link.displayedText[linkPos]  # we only want source displayedText
 
-    compDate = getattr(linkRef.index, "compDate", None)
+    compDate = getattr(linkRef.index, "compDate", None)  # default comp date to in the future
     if compDate:
-        try:
-            com["compDate"] = int(compDate)
-        except ValueError:
-            com["compDate"] = 3000  # default comp date to in the future
-        try:
-            com["errorMargin"] = int(getattr(linkRef.index, "errorMargin", 0))
-        except ValueError:
-            com["errorMargin"] = 0
+        com["compDate"] = compDate
 
     # Pad out the sections list, so that comparison between comment numbers are apples-to-apples
     lsections = linkRef.sections[:] + [0] * (linkRef.index_node.depth - len(linkRef.sections))

diff --git a/sefaria/model/garden.py b/sefaria/model/garden.py
@@ -441,24 +441,16 @@ def _derive_metadata(self):
         # Time
         # This is similar to logic on Index.composition_time_period() refactor
         if getattr(self, "start", None) is None or getattr(self, "end", None) is None:
-            if getattr(i, "compDate", None):
-                errorMargin = int(getattr(i, "errorMargin", 0))
-                self.startIsApprox = self.endIsApprox = errorMargin > 0
-
-                try:
-                    year = int(getattr(i, "compDate"))
-                    self.start = year - errorMargin
-                    self.end = year + errorMargin
-                except ValueError as e:
-                    years = getattr(i, "compDate").split("-")
-                    if years[0] == "" and len(years) == 3:  #Fix for first value being negative
-                        years[0] = -int(years[1])
-                        years[1] = int(years[2])
-                    self.start = int(years[0]) - errorMargin
-                    self.end = int(years[1]) + errorMargin
-
-            elif author and author.mostAccurateTimePeriod():
-                tp = author.mostAccurateTimePeriod()
+            years = getattr(i, 'compDate', [])
+            if years and len(years) > 0:
+                self.startIsApprox = self.endIsApprox = getattr(i, "hasErrorMargin", False)
+                if len(years) > 1:
+                    self.start = years[0]
+                    self.end = years[1]
+                else:
+                    self.start = self.end = years[0]
+            elif author and author.most_accurate_time_period():
+                tp = author.most_accurate_time_period()
                 self.start = tp.start
                 self.end = tp.end
                 self.startIsApprox = tp.startIsApprox

diff --git a/sefaria/model/tests/text_test.py b/sefaria/model/tests/text_test.py
@@ -167,6 +167,11 @@ def test_invalid_index_save_no_category():
     assert "You must create category Mishnah/Commentary/Bartenura/Gargamel before adding texts to it." in str(e_info.value)
     assert model.IndexSet({"title": title}).count() == 0
 
+def test_best_time_period():
+    i = model.library.get_index("Rashi on Genesis")
+    assert i.best_time_period().period_string('en') == ' (c.1075  - c.1105 CE)'
+    i.compDate = None
+    assert i.best_time_period().period_string('en') == ' (1040  - 1105 CE)'  # now that compDate is None, period_string should return Rashi's birth to death years
 
 def test_invalid_index_save_no_hebrew_collective_title():
     title = 'Bartenura (The Next Generation)'

diff --git a/sefaria/model/text.py b/sefaria/model/text.py
@@ -200,10 +200,10 @@ class Index(abst.AbstractMongoRecord, AbstractIndex):
         "enShortDesc",
         "heShortDesc",
         "pubDate",
+        "hasErrorMargin",     # (bool) whether or not compDate is exact.  used to be 'errorMargin' which was an integer amount that compDate was off by
         "compDate",
         "compPlace",
         "pubPlace",
-        "errorMargin",
         "era",
         "dependence",           # (str) Values: "Commentary" or "Targum" - to denote commentaries and other potential not standalone texts
         "base_text_titles",     # (list) the base book(s) this one is dependant on
@@ -307,30 +307,20 @@ def expand_metadata_on_contents(self, contents):
             contents["base_text_titles"] = [{"en": btitle, "he": hebrew_term(btitle)} for btitle in self.base_text_titles]
 
         contents["heCategories"] = list(map(hebrew_term, self.categories))
+        contents = self.time_period_and_place_contents(contents)
+        return contents
 
-
-        composition_time_period = self.composition_time_period()
-        if composition_time_period:
-            contents["compDateString"] = {
-                "en": composition_time_period.period_string("en"),
-                "he": composition_time_period.period_string("he"),
-            }
-
-
-        composition_place = self.composition_place()
-        if composition_place:
-            contents["compPlaceString"] = {
-                "en": composition_place.primary_name("en"),
-                "he": composition_place.primary_name("he"),
-            }
-
-        pub_place = self.publication_place()
-        if pub_place:
-            contents["pubPlaceString"] = {
-                "en": pub_place.primary_name("en"),
-                "he": pub_place.primary_name("he"),
-            }
-
+    def time_period_and_place_contents(self, contents):
+        """ Used to expand contents for date and time info """
+        for k, f in [("compDateString", self.composition_time_period), ("pubDateString", self.publication_time_period)]:
+            time_period = f()
+            if time_period:
+                contents[k] = {"en": time_period.period_string('en'), 'he': time_period.period_string('he')}
+
+        for k, f in [("compPlaceString", self.composition_place), ("pubPlaceString", self.publication_place)]:
+            place = f()
+            if place:
+                contents[k] = {"en": place.primary_name('en'), 'he': place.primary_name('he')}
         return contents
 
     def _saveable_attrs(self):
@@ -444,93 +434,47 @@ def publication_place(self):
 
     # This is similar to logic on GardenStop
     def composition_time_period(self):
-        return self._get_time_period("compDate", "errorMargin")
+        return self._get_time_period("compDate", margin_field="hasErrorMargin")
 
     def publication_time_period(self):
         return self._get_time_period("pubDate")
 
     def best_time_period(self):
         """
-        :return: TimePeriod: First tries to return `compDate`. Deals with ranges and negative values for compDate
-        If no compDate, looks at author info
+        :return: TimePeriod: First tries to return `compDate`.
+        If no compDate or compDate is an empty list, _get_time_period returns None and it then looks at author info
         """
-        start, end, startIsApprox, endIsApprox = None, None, None, None
-
-        if getattr(self, "compDate", None):
-            errorMargin = int(getattr(self, "errorMargin", 0))
-            self.startIsApprox = self.endIsApprox = errorMargin > 0
-
-            try:
-                year = int(getattr(self, "compDate"))
-                start = year - errorMargin
-                end = year + errorMargin
-            except ValueError as e:
-                years = getattr(self, "compDate").split("-")
-                if years[0] == "" and len(years) == 3:  #Fix for first value being negative
-                    years[0] = -int(years[1])
-                    years[1] = int(years[2])
-                try:
-                    start = int(years[0]) - errorMargin
-                    end = int(years[1]) + errorMargin
-                except UnicodeEncodeError as e:
-                    pass
-
+        compDatePeriod = self._get_time_period('compDate', margin_field="hasErrorMargin")
+        if compDatePeriod:
+            return compDatePeriod
         else:
             author = self.author_objects()[0] if len(self.author_objects()) > 0 else None
             tp = author and author.most_accurate_time_period()
-            if tp is not None:
-                tpvars = vars(tp)
-                start = tp.start if "start" in tpvars else None
-                end = tp.end if "end" in tpvars else None
-                startIsApprox = tp.startIsApprox if "startIsApprox" in tpvars else None
-                endIsApprox = tp.endIsApprox if "endIsApprox" in tpvars else None
-
-        if not start is None:
-            from sefaria.model.timeperiod import TimePeriod
-            if not startIsApprox is None:
-                return TimePeriod({
-                    "start": start,
-                    "end": end,
-                    "startIsApprox": startIsApprox,
-                    "endIsApprox": endIsApprox
-                })
-            else:
-                return TimePeriod({
-                    "start": start,
-                    "end": end
-                })
+            return tp
 
-    def _get_time_period(self, date_field, margin_field=None):
+    def _get_time_period(self, date_field, margin_field=""):
+        """
+        Assumes that value of `date_field` ('pubDate' or 'compDate') is a list of integers.
+        """
         from . import timeperiod
-        if not getattr(self, date_field, None):
+        years = getattr(self, date_field, [])
+        if years is None or len(years) == 0:
             return None
-
         try:
-            error_margin = int(getattr(self, margin_field, 0)) if margin_field else 0
+            error_margin = getattr(self, margin_field, False) if margin_field else False
         except ValueError:
-            error_margin = 0
-        startIsApprox = endIsApprox = error_margin > 0
-
-        try:
-            year = int(getattr(self, date_field))
-            start = year - error_margin
-            end = year + error_margin
-        except ValueError as e:
-            try:
-                years = getattr(self, date_field).split("-")
-                if years[0] == "" and len(years) == 3:  #Fix for first value being negative
-                    years[0] = -int(years[1])
-                    years[1] = int(years[2])
-                start = int(years[0]) - error_margin
-                end = int(years[1]) + error_margin
-            except ValueError as e:
-                return None
+            error_margin = False
+        startIsApprox = endIsApprox = error_margin
+        if len(years) > 1:
+            start, end = years
+        else:
+            start = end = years[0]
         return timeperiod.TimePeriod({
-            "start": start,
-            "startIsApprox": startIsApprox,
-            "end": end,
-            "endIsApprox": endIsApprox
-        })
+        "start": start,
+        "startIsApprox": startIsApprox,
+        "end": end,
+        "endIsApprox": endIsApprox
+    })
 
     # Index changes behavior of load_from_dict, so this circumvents that changed behavior to call load_from_dict on the abstract superclass
     def update_from_dict(self, d):
@@ -692,12 +636,6 @@ def _normalize(self):
         for attr in deprecated_attrs:
             if getattr(self, attr, None):
                 delattr(self, attr)
-        try:
-            error_margin_value = getattr(self, "errorMargin", 0)
-            int(error_margin_value)
-        except ValueError:
-            logger.warning("Index record '{}' has invalid 'errorMargin': {} field, removing".format(self.title, error_margin_value))
-            delattr(self, "errorMargin")
 
     def _update_alt_structs_on_title_change(self):
         old_title = self.pkeys_orig_values["title"]
@@ -758,11 +696,6 @@ def _validate(self):
         if getattr(self, "collective_title", None) and not hebrew_term(getattr(self, "collective_title", None)):
             raise InputError("You must add a hebrew translation Term for any new Collective Title: {}.".format(self.collective_title))
 
-        try:
-            int(getattr(self, "errorMargin", 0))
-        except (ValueError):
-            raise InputError("composition date error margin must be an integer")
-
         #complex style records- all records should now conform to this
         if self.nodes:
             # Make sure that all primary titles match

diff --git a/sefaria/search.py b/sefaria/search.py
@@ -611,6 +611,17 @@ def remove_footnotes(cls, content):
             content = AbstractTextRecord.strip_itags(content)
             return content
 
+    @classmethod
+    def modify_text_in_doc(cls, content):
+        content = AbstractTextRecord.strip_imgs(content)
+        content = cls.remove_footnotes(content)
+        content = strip_cantillation(content, strip_vowels=False).strip()
+        content = re.sub(r'<[^>]+>', ' ', content)     # replace HTML tags with space so that words dont get smushed together
+        content = re.sub(r'\([^)]+\)', ' ', content)   # remove all parens
+        while "  " in content:                                 # make sure there are not many spaces in a row
+            content = content.replace("  ", " ")
+        return content
+
     @classmethod
     def make_text_index_document(cls, tref, heTref, version, lang, version_priority, content, categories, hebrew_version_title):
         """
@@ -619,15 +630,8 @@ def make_text_index_document(cls, tref, heTref, version, lang, version_priority,
         # Don't bother indexing if there's no content
         if not content:
             return False
-        content = AbstractTextRecord.strip_imgs(content)
-        content = cls.remove_footnotes(content)
-        content_wo_cant = strip_cantillation(content, strip_vowels=False).strip()
-        content_wo_cant = re.sub(r'<[^>]+>', ' ', content_wo_cant)     # replace HTML tags with space so that words dont get smushed together
-        content_wo_cant = re.sub(r'\([^)]+\)', ' ', content_wo_cant)   # remove all parens
-        while "  " in content_wo_cant:                                 # make sure there are not many spaces in a row
-            content_wo_cant = content_wo_cant.replace("  ", " ")
-
-        if len(content_wo_cant) == 0:
+        content = cls.modify_text_in_doc(content)
+        if len(content) == 0:
             return False
 
         oref = Ref(tref)
@@ -655,9 +659,9 @@ def make_text_index_document(cls, tref, heTref, version, lang, version_priority,
             "path": "/".join(indexed_categories + [cls.curr_index.title]),
             "pagesheetrank": pagesheetrank,
             "comp_date": comp_start_date,
-            #"hebmorph_semi_exact": content_wo_cant,
-            "exact": content_wo_cant,
-            "naive_lemmatizer": content_wo_cant,
+            #"hebmorph_semi_exact": content,
+            "exact": content,
+            "naive_lemmatizer": content,
             'hebrew_version_title': hebrew_version_title,
         }
 

diff --git a/sefaria/tests/search.py b/sefaria/tests/search.py
@@ -24,10 +24,7 @@ def test_make_text_index_document():
 
     ref_data = RefData().load({"ref": tref})
     pagesheetrank = ref_data.pagesheetrank if ref_data is not None else RefData.DEFAULT_PAGESHEETRANK
-    content_wo_cant = strip_cantillation(content, strip_vowels=False).strip()
-    content_wo_cant = re.sub(r'<[^>]+>', '', content_wo_cant)
-    content_wo_cant = re.sub(r'\([^)]+\)', '', content_wo_cant)
-
+    content = TI.modify_text_in_doc(content)
     assert doc == {
         "ref": tref,
         "heRef": he_ref,
@@ -40,8 +37,8 @@ def test_make_text_index_document():
         "path": "/".join(categories + [index.title]),
         "pagesheetrank": pagesheetrank,
         "comp_date": comp_date,
-        "exact": content_wo_cant,
-        "naive_lemmatizer": content_wo_cant,
+        "exact": content,
+        "naive_lemmatizer": content,
         'hebrew_version_title': heVtitle,
 
     }