diff --git a/sefaria/client/wrapper.py b/sefaria/client/wrapper.py index 0525ebbb61..26e70bb73f 100644 --- a/sefaria/client/wrapper.py +++ b/sefaria/client/wrapper.py @@ -54,16 +54,9 @@ def format_link_object_for_client(link, with_text, ref, pos=None): com["sourceVersion"] = {"title": link.versions[linkPos]["title"], "language": link.versions[linkPos].get("language", None)} com["displayedText"] = link.displayedText[linkPos] # we only want source displayedText - compDate = getattr(linkRef.index, "compDate", None) + compDate = getattr(linkRef.index, "compDate", None) # default comp date to in the future if compDate: - try: - com["compDate"] = 3000 - except ValueError: - com["compDate"] = 3000 # default comp date to in the future - try: - com["errorMargin"] = int(getattr(linkRef.index, "errorMargin", 0)) - except ValueError: - com["errorMargin"] = 0 + com["compDate"] = compDate # Pad out the sections list, so that comparison between comment numbers are apples-to-apples lsections = linkRef.sections[:] + [0] * (linkRef.index_node.depth - len(linkRef.sections)) diff --git a/sefaria/model/garden.py b/sefaria/model/garden.py index 38d6f9c75b..b127aa58c1 100644 --- a/sefaria/model/garden.py +++ b/sefaria/model/garden.py @@ -441,24 +441,16 @@ def _derive_metadata(self): # Time # This is similar to logic on Index.composition_time_period() refactor if getattr(self, "start", None) is None or getattr(self, "end", None) is None: - if getattr(i, "compDate", None): - errorMargin = int(getattr(i, "errorMargin", 0)) - self.startIsApprox = self.endIsApprox = errorMargin > 0 - self.start = self.end = 3000 - # try: - # year = int(getattr(i, "compDate")) - # self.start = year - errorMargin - # self.end = year + errorMargin - # except ValueError as e: - # years = getattr(i, "compDate").split("-") - # if years[0] == "" and len(years) == 3: #Fix for first value being negative - # years[0] = -int(years[1]) - # years[1] = int(years[2]) - # self.start = int(years[0]) - errorMargin - # self.end = int(years[1]) + errorMargin - - elif author and author.mostAccurateTimePeriod(): - tp = author.mostAccurateTimePeriod() + years = getattr(i, 'compDate', []) + if years and len(years) > 0: + self.startIsApprox = self.endIsApprox = getattr(i, "hasErrorMargin", False) + if len(years) > 1: + self.start = years[0] + self.end = years[1] + else: + self.start = self.end = years[0] + elif author and author.most_accurate_time_period(): + tp = author.most_accurate_time_period() self.start = tp.start self.end = tp.end self.startIsApprox = tp.startIsApprox diff --git a/sefaria/model/tests/text_test.py b/sefaria/model/tests/text_test.py index 4d372fd55b..00192188e2 100644 --- a/sefaria/model/tests/text_test.py +++ b/sefaria/model/tests/text_test.py @@ -167,6 +167,11 @@ def test_invalid_index_save_no_category(): assert "You must create category Mishnah/Commentary/Bartenura/Gargamel before adding texts to it." in str(e_info.value) assert model.IndexSet({"title": title}).count() == 0 +def test_best_time_period(): + i = model.library.get_index("Rashi on Genesis") + assert i.best_time_period().period_string('en') == ' (c.1075 - c.1105 CE)' + i.compDate = None + assert i.best_time_period().period_string('en') == ' (1040 - 1105 CE)' # now that compDate is None, period_string should return Rashi's birth to death years def test_invalid_index_save_no_hebrew_collective_title(): title = 'Bartenura (The Next Generation)' diff --git a/sefaria/model/text.py b/sefaria/model/text.py index 9d87a1dca7..6346fd5374 100644 --- a/sefaria/model/text.py +++ b/sefaria/model/text.py @@ -200,10 +200,10 @@ class Index(abst.AbstractMongoRecord, AbstractIndex): "enShortDesc", "heShortDesc", "pubDate", + "hasErrorMargin", # (bool) whether or not compDate is exact. used to be 'errorMargin' which was an integer amount that compDate was off by "compDate", "compPlace", "pubPlace", - "errorMargin", "era", "dependence", # (str) Values: "Commentary" or "Targum" - to denote commentaries and other potential not standalone texts "base_text_titles", # (list) the base book(s) this one is dependant on @@ -307,30 +307,20 @@ def expand_metadata_on_contents(self, contents): contents["base_text_titles"] = [{"en": btitle, "he": hebrew_term(btitle)} for btitle in self.base_text_titles] contents["heCategories"] = list(map(hebrew_term, self.categories)) + contents = self.time_period_and_place_contents(contents) + return contents - - composition_time_period = self.composition_time_period() - if composition_time_period: - contents["compDateString"] = { - "en": composition_time_period.period_string("en"), - "he": composition_time_period.period_string("he"), - } - - - composition_place = self.composition_place() - if composition_place: - contents["compPlaceString"] = { - "en": composition_place.primary_name("en"), - "he": composition_place.primary_name("he"), - } - - pub_place = self.publication_place() - if pub_place: - contents["pubPlaceString"] = { - "en": pub_place.primary_name("en"), - "he": pub_place.primary_name("he"), - } - + def time_period_and_place_contents(self, contents): + """ Used to expand contents for date and time info """ + for k, f in [("compDateString", self.composition_time_period), ("pubDateString", self.publication_time_period)]: + time_period = f() + if time_period: + contents[k] = {"en": time_period.period_string('en'), 'he': time_period.period_string('he')} + + for k, f in [("compPlaceString", self.composition_place), ("pubPlaceString", self.publication_place)]: + place = f() + if place: + contents[k] = {"en": place.primary_name('en'), 'he': place.primary_name('he')} return contents def _saveable_attrs(self): @@ -444,93 +434,47 @@ def publication_place(self): # This is similar to logic on GardenStop def composition_time_period(self): - return self._get_time_period("compDate", "errorMargin") + return self._get_time_period("compDate", margin_field="hasErrorMargin") def publication_time_period(self): return self._get_time_period("pubDate") def best_time_period(self): """ - :return: TimePeriod: First tries to return `compDate`. Deals with ranges and negative values for compDate - If no compDate, looks at author info - """ - start, end, startIsApprox, endIsApprox = None, None, None, None - - if getattr(self, "compDate", None): - errorMargin = int(getattr(self, "errorMargin", 0)) - self.startIsApprox = self.endIsApprox = errorMargin > 0 - start = end = 3000 - # try: - # year = int(getattr(self, "compDate")) - # start = year - errorMargin - # end = year + errorMargin - # except ValueError as e: - # years = getattr(self, "compDate").split("-") - # if years[0] == "" and len(years) == 3: #Fix for first value being negative - # years[0] = -int(years[1]) - # years[1] = int(years[2]) - # try: - # start = int(years[0]) - errorMargin - # end = int(years[1]) + errorMargin - # except UnicodeEncodeError as e: - # pass - + :return: TimePeriod: First tries to return `compDate`. + If no compDate or compDate is an empty list, _get_time_period returns None and it then looks at author info + """ + compDatePeriod = self._get_time_period('compDate', margin_field="hasErrorMargin") + if compDatePeriod: + return compDatePeriod else: author = self.author_objects()[0] if len(self.author_objects()) > 0 else None tp = author and author.most_accurate_time_period() - if tp is not None: - tpvars = vars(tp) - start = tp.start if "start" in tpvars else None - end = tp.end if "end" in tpvars else None - startIsApprox = tp.startIsApprox if "startIsApprox" in tpvars else None - endIsApprox = tp.endIsApprox if "endIsApprox" in tpvars else None - - if not start is None: - from sefaria.model.timeperiod import TimePeriod - if not startIsApprox is None: - return TimePeriod({ - "start": start, - "end": end, - "startIsApprox": startIsApprox, - "endIsApprox": endIsApprox - }) - else: - return TimePeriod({ - "start": start, - "end": end - }) + return tp - def _get_time_period(self, date_field, margin_field=None): + def _get_time_period(self, date_field, margin_field=""): + """ + Assumes that value of `date_field` ('pubDate' or 'compDate') is a list of integers. + """ from . import timeperiod - if not getattr(self, date_field, None): + years = getattr(self, date_field, []) + if years is None or len(years) == 0: return None - try: - error_margin = int(getattr(self, margin_field, 0)) if margin_field else 0 + error_margin = getattr(self, margin_field, False) if margin_field else False except ValueError: - error_margin = 0 - startIsApprox = endIsApprox = error_margin > 0 - start = end = 3000 - # try: - # year = int(getattr(self, date_field)) - # start = year - error_margin - # end = year + error_margin - # except ValueError as e: - # try: - # years = getattr(self, date_field).split("-") - # if years[0] == "" and len(years) == 3: #Fix for first value being negative - # years[0] = -int(years[1]) - # years[1] = int(years[2]) - # start = int(years[0]) - error_margin - # end = int(years[1]) + error_margin - # except ValueError as e: - # return None + error_margin = False + startIsApprox = endIsApprox = error_margin + if len(years) > 1: + start, end = years + else: + start = end = years[0] return timeperiod.TimePeriod({ - "start": start, - "startIsApprox": startIsApprox, - "end": end, - "endIsApprox": endIsApprox - }) + "start": start, + "startIsApprox": startIsApprox, + "end": end, + "endIsApprox": endIsApprox + }) # Index changes behavior of load_from_dict, so this circumvents that changed behavior to call load_from_dict on the abstract superclass def update_from_dict(self, d): @@ -692,12 +636,6 @@ def _normalize(self): for attr in deprecated_attrs: if getattr(self, attr, None): delattr(self, attr) - try: - error_margin_value = getattr(self, "errorMargin", 0) - int(error_margin_value) - except ValueError: - logger.warning("Index record '{}' has invalid 'errorMargin': {} field, removing".format(self.title, error_margin_value)) - delattr(self, "errorMargin") def _update_alt_structs_on_title_change(self): old_title = self.pkeys_orig_values["title"] @@ -758,11 +696,6 @@ def _validate(self): if getattr(self, "collective_title", None) and not hebrew_term(getattr(self, "collective_title", None)): raise InputError("You must add a hebrew translation Term for any new Collective Title: {}.".format(self.collective_title)) - try: - int(getattr(self, "errorMargin", 0)) - except (ValueError): - raise InputError("composition date error margin must be an integer") - #complex style records- all records should now conform to this if self.nodes: # Make sure that all primary titles match diff --git a/sefaria/search.py b/sefaria/search.py index 3bf18fec0f..d42d39e614 100644 --- a/sefaria/search.py +++ b/sefaria/search.py @@ -613,6 +613,17 @@ def remove_footnotes(cls, content): content = AbstractTextRecord.strip_itags(content) return content + @classmethod + def modify_text_in_doc(cls, content): + content = AbstractTextRecord.strip_imgs(content) + content = cls.remove_footnotes(content) + content = strip_cantillation(content, strip_vowels=False).strip() + content = re.sub(r'<[^>]+>', ' ', content) # replace HTML tags with space so that words dont get smushed together + content = re.sub(r'\([^)]+\)', ' ', content) # remove all parens + while " " in content: # make sure there are not many spaces in a row + content = content.replace(" ", " ") + return content + @classmethod def make_text_index_document(cls, tref, heTref, version, lang, version_priority, content, categories, hebrew_version_title): """ @@ -621,15 +632,8 @@ def make_text_index_document(cls, tref, heTref, version, lang, version_priority, # Don't bother indexing if there's no content if not content: return False - content = AbstractTextRecord.strip_imgs(content) - content = cls.remove_footnotes(content) - content_wo_cant = strip_cantillation(content, strip_vowels=False).strip() - content_wo_cant = re.sub(r'<[^>]+>', ' ', content_wo_cant) # replace HTML tags with space so that words dont get smushed together - content_wo_cant = re.sub(r'\([^)]+\)', ' ', content_wo_cant) # remove all parens - while " " in content_wo_cant: # make sure there are not many spaces in a row - content_wo_cant = content_wo_cant.replace(" ", " ") - - if len(content_wo_cant) == 0: + content = cls.modify_text_in_doc(content) + if len(content) == 0: return False oref = Ref(tref) @@ -657,9 +661,9 @@ def make_text_index_document(cls, tref, heTref, version, lang, version_priority, "path": "/".join(indexed_categories + [cls.curr_index.title]), "pagesheetrank": pagesheetrank, "comp_date": comp_start_date, - #"hebmorph_semi_exact": content_wo_cant, - "exact": content_wo_cant, - "naive_lemmatizer": content_wo_cant, + #"hebmorph_semi_exact": content, + "exact": content, + "naive_lemmatizer": content, 'hebrew_version_title': hebrew_version_title, } diff --git a/sefaria/tests/search.py b/sefaria/tests/search.py index a04a368599..a47f146637 100644 --- a/sefaria/tests/search.py +++ b/sefaria/tests/search.py @@ -24,10 +24,7 @@ def test_make_text_index_document(): ref_data = RefData().load({"ref": tref}) pagesheetrank = ref_data.pagesheetrank if ref_data is not None else RefData.DEFAULT_PAGESHEETRANK - content_wo_cant = strip_cantillation(content, strip_vowels=False).strip() - content_wo_cant = re.sub(r'<[^>]+>', '', content_wo_cant) - content_wo_cant = re.sub(r'\([^)]+\)', '', content_wo_cant) - + content = TI.modify_text_in_doc(content) assert doc == { "ref": tref, "heRef": he_ref, @@ -40,8 +37,8 @@ def test_make_text_index_document(): "path": "/".join(categories + [index.title]), "pagesheetrank": pagesheetrank, "comp_date": comp_date, - "exact": content_wo_cant, - "naive_lemmatizer": content_wo_cant, + "exact": content, + "naive_lemmatizer": content, 'hebrew_version_title': heVtitle, } diff --git a/static/js/AboutBox.jsx b/static/js/AboutBox.jsx index f78cb31066..2d7435a170 100644 --- a/static/js/AboutBox.jsx +++ b/static/js/AboutBox.jsx @@ -114,7 +114,6 @@ class AboutBox extends Component { authorsElems[lang] = authorArray.map((author, iauthor) => {iauthor > 0 ? ", " : ""}{author[lang]} ); } } - // use compPlaceString and compDateString if available. then use compPlace o/w use pubPlace o/w nothing let placeTextEn, placeTextHe; if (d.compPlaceString) { placeTextEn = d.compPlaceString.en; @@ -131,22 +130,11 @@ class AboutBox extends Component { if (d.compDateString) { dateTextEn = d.compDateString.en; dateTextHe = d.compDateString.he - } else if (d.compDate) { - if (d.errorMargin !== 0) { - //I don't think there are any texts which are mixed BCE/CE - const lowerDate = Math.abs(d.compDate - d.errorMargin); - const upperDate = Math.abs(d.compDate - d.errorMargin); - dateTextEn = `(c.${lowerDate} - c.${upperDate} ${d.compDate < 0 ? "BCE" : "CE"})`; - dateTextHe = `(${lowerDate} - ${upperDate} ${d.compDate < 0 ? 'לפנה"ס בקירוב' : 'לספירה בקירוב'})`; - } else { - dateTextEn = `(${Math.abs(d.compDate)} ${d.compDate < 0 ? "BCE" : "CE"})`; - dateTextHe = `(${Math.abs(d.compDate)} ${d.compDate < 0 ? 'לפנה"ס בקירוב' : 'לספירה בקירוב'})`; - } - } else if (d.pubDate) { - dateTextEn = `(${Math.abs(d.pubDate)} ${d.pubDate < 0 ? "BCE" : "CE"})`; - dateTextHe = `(${Math.abs(d.pubDate)} ${d.pubDate < 0 ? 'לפנה"ס בקירוב' : 'לספירה בקירוב'})`; + } else if (d.pubDateString) { + dateTextEn = d.pubDateString.en; + dateTextHe = d.pubDateString.he; } - const bookPageUrl = "/" + Sefaria.normRef(d.title); + const bookPageUrl = "/" + Sefaria.normRef(d.title); //comment for the sake of commit detailSection = (