diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4645d13..7645ec9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -100,7 +100,7 @@ jobs: - name: Publish benchmark results uses: benchmark-action/github-action-benchmark@v1 - if: github.event_name == 'pull_request' && github.repository == 'ixc/python-edtf' + if: github.event_name == 'pull_request' && github.repository == 'rism-digital/python-edtf' with: tool: 'pytest' auto-push: true @@ -112,7 +112,7 @@ jobs: summary-always: true - name: Comment on benchmark results without publishing - if: github.event_name != 'pull_request' || github.repository != 'ixc/python-edtf' + if: github.event_name != 'pull_request' || github.repository != 'rism-digital/python-edtf' uses: benchmark-action/github-action-benchmark@v1 with: tool: 'pytest' diff --git a/.gitignore b/.gitignore index 36df893..d27f79d 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,5 @@ docs/_build/ # PyBuilder target/ +.idea +.DS_Store diff --git a/edtf/convert.py b/edtf/convert.py index ee03f36..db86155 100644 --- a/edtf/convert.py +++ b/edtf/convert.py @@ -70,8 +70,7 @@ def trim_struct_time(st: struct_time, strip_time: bool = False) -> struct_time: """ if strip_time: return struct_time(list(st[:3]) + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - else: - return struct_time(list(st[:6]) + TIME_EMPTY_EXTRAS) + return struct_time(list(st[:6]) + TIME_EMPTY_EXTRAS) def struct_time_to_jd(st: struct_time) -> float: @@ -116,7 +115,7 @@ def jd_to_struct_time(jd: float) -> struct_time: return struct_time([year, month, day, hour, minute, second] + TIME_EMPTY_EXTRAS) -def _roll_negative_time_fields(year, month, day, hour, minute, second): +def _roll_negative_time_fields(year, month, day, hour, minute, second) -> tuple: """ Fix date/time fields which have nonsense negative values for any field except for year by rolling the overall date/time value backwards, treating @@ -152,4 +151,5 @@ def _roll_negative_time_fields(year, month, day, hour, minute, second): year += int(month / 12.0) # Adjust by whole year in months year -= 1 # Subtract 1 for negative minutes month %= 12 # Convert negative month to positive remainder - return (year, month, day, hour, minute, second) + + return year, month, day, hour, minute, second diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index f28e685..82fefc8 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -2,6 +2,7 @@ import re from datetime import datetime +from typing import Optional from dateutil.parser import ParserError, parse @@ -13,19 +14,45 @@ DEFAULT_DATE_1 = datetime(1234, 1, 1, 0, 0) DEFAULT_DATE_2 = datetime(5678, 10, 10, 0, 0) -SHORT_YEAR_RE = r"(-?)([\dX])([\dX])([\dX])([\dX])" -LONG_YEAR_RE = r"Y(-?)([1-9]\d\d\d\d+)" -CENTURY_RE = r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?" -CE_RE = r"(\d{1,4}) (ad|ce|bc|bce)" +LONG_YEAR_RE = re.compile(r"y(-?)([1-9]\d\d\d\d+)") +CENTURY_RE = re.compile(r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?") +CENTURY_RANGE = re.compile(r"\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]") +CE_RE = re.compile(r"(\d{1,4}) (ad|ce|bc|bce)") +ONE_DIGIT_PARTIAL_FIRST = re.compile(r"\d\D\b") +TWO_DIGIT_PARTIAL_FIRST = re.compile(r"\d\d\b") +PARTIAL_CHECK = re.compile(r"\b\d\d\d\d$") +SLASH_YEAR = re.compile(r"(\d\d\d\d)/(\d\d\d\d)") +BEFORE_CHECK = re.compile(r"\b(?:before|earlier|avant)\b") +AFTER_CHECK = re.compile(r"\b(after|since|later|aprés|apres)\b") +APPROX_CHECK = re.compile( + r"\b(?:ca?\.? ?\d{4}|circa|approx|approximately|around|about|~\d{3,4})|^~" +) +UNCERTAIN_CHECK = re.compile(r"\b(?:uncertain|possibly|maybe|guess|\d{3,4}\?)") +UNCERTAIN_REPL = re.compile(r"(\d{4})\?") +MIGHT_BE_CENTURY = re.compile(r"(\d{2}00)s") +MIGHT_BE_DECADE = re.compile(r"(\d{3}0)s") + +APPROX_CENTURY_RE = re.compile( + r"\b(ca?\.?) ?(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?" +) +UNCERTAIN_CENTURY_RE = re.compile( + r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?\?" +) + +APPROX_CE_RE = re.compile(r"\b(ca?\.?) ?(\d{1,4}) (ad|ce|bc|bce)") +UNCERTAIN_CE_RE = re.compile(r"(\d{1,4}) (ad|ce|bc|bce)\?") + +MENTIONS_YEAR = re.compile(r"\byear\b.+(in|during)\b") +MENTIONS_MONTH = re.compile(r"\bmonth\b.+(in|during)\b") +MENTIONS_DAY = re.compile(r"\bday\b.+(in|during)\b") # Set of RE rules that will cause us to abort text processing, since we know # the results will be wrong. -REJECT_RULES = ( - r".*dynasty.*", # Don't parse '23rd Dynasty' to 'uuuu-uu-23' -) +REJECT_RULES = re.compile(r".*dynasty.*") # Don't parse '23rd Dynasty' to 'uuuu-uu-23' -def text_to_edtf(text): +# @functools.lru_cache +def text_to_edtf(text: str) -> Optional[str]: """ Generate EDTF string equivalent of a given natural language date string. """ @@ -35,7 +62,7 @@ def text_to_edtf(text): t = text.lower() # try parsing the whole thing - result = text_to_edtf_date(t) + result: Optional[str] = text_to_edtf_date(t) if not result: # split by list delims and move fwd with the first thing that returns a non-empty string. @@ -43,7 +70,8 @@ def text_to_edtf(text): for split in [",", ";", "or"]: for list_item in t.split(split): # try parsing as an interval - split by '-' - toks = list_item.split("-") + toks: list[str] = list_item.split("-") + if len(toks) == 2: d1 = toks[0].strip() d2 = toks[1].strip() @@ -51,19 +79,20 @@ def text_to_edtf(text): # match looks from the beginning of the string, search # looks anywhere. - if re.match(r"\d\D\b", d2): # 1-digit year partial e.g. 1868-9 + if re.match( + ONE_DIGIT_PARTIAL_FIRST, d2 + ): # 1-digit year partial e.g. 1868-9 if re.search( - r"\b\d\d\d\d$", d1 + PARTIAL_CHECK, d1 ): # TODO: evaluate it and see if it's a year d2 = d1[-4:-1] + d2 - elif re.match(r"\d\d\b", d2): # 2-digit year partial e.g. 1809-10 - if re.search(r"\b\d\d\d\d$", d1): + elif re.match( + TWO_DIGIT_PARTIAL_FIRST, d2 + ): # 2-digit year partial e.g. 1809-10 + if re.search(PARTIAL_CHECK, d1): d2 = d1[-4:-2] + d2 else: - century_range_match = re.search( - r"\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]", - f"{d1}-{d2}", - ) + century_range_match = re.search(CENTURY_RANGE, f"{d1}-{d2}") if century_range_match: g = century_range_match.groups() d1 = f"{g[0]}C" @@ -73,7 +102,7 @@ def text_to_edtf(text): r2 = text_to_edtf_date(d2) if r1 and r2: - result = r1 + "/" + r2 + result = f"{r1}/{r2}" return result # is it an either/or year "1838/1862" - that has a different @@ -82,7 +111,7 @@ def text_to_edtf(text): # This whole section could be more friendly. else: - int_match = re.search(r"(\d\d\d\d)\/(\d\d\d\d)", list_item) + int_match = re.search(SLASH_YEAR, list_item) if int_match: return f"[{int_match.group(1)}, {int_match.group(2)}]" @@ -92,21 +121,19 @@ def text_to_edtf(text): if result: break - is_before = re.findall(r"\bbefore\b", t) - is_before = is_before or re.findall(r"\bearlier\b", t) - - is_after = re.findall(r"\bafter\b", t) - is_after = is_after or re.findall(r"\bsince\b", t) - is_after = is_after or re.findall(r"\blater\b", t) + is_before = re.findall(BEFORE_CHECK, t) + is_after = re.findall(AFTER_CHECK, t) if is_before: - result = f"/{result}" # unknown is replaced with null for intervals + result = f"/{result}" elif is_after: - result = f"{result}/" # unknown is replaced with null for intervals + result = f"{result}/" + return result -def text_to_edtf_date(text): +# @functools.lru_cache +def text_to_edtf_date(text: str) -> Optional[str]: """ Return EDTF string equivalent of a given natural language date string. @@ -115,37 +142,28 @@ def text_to_edtf_date(text): differ are undefined. """ if not text: - return + return None t = text.lower() - result = "" + result: str = "" - for reject_re in REJECT_RULES: - if re.match(reject_re, t): - return + if re.match(REJECT_RULES, t): + return None # matches on '1800s'. Needs to happen before is_decade. - could_be_century = re.findall(r"(\d{2}00)s", t) + could_be_century: list = re.findall(MIGHT_BE_CENTURY, t) # matches on '1800s' and '1910s'. Removes the 's'. # Needs to happen before is_uncertain because e.g. "1860s?" - t, is_decade = re.subn(r"(\d{3}0)s", r"\1", t) + t, is_decade = re.subn(MIGHT_BE_DECADE, r"\1", t) # detect approximation signifiers # a few 'circa' abbreviations just before the year - is_approximate = re.findall(r"\b(ca?\.?) ?\d{4}", t) + is_approximate = re.findall(APPROX_CHECK, t) # the word 'circa' anywhere - is_approximate = is_approximate or re.findall(r"\bcirca\b", t) - # the word 'approx'/'around'/'about' anywhere - is_approximate = is_approximate or re.findall(r"\b(approx|around|about)", t) - # a ~ before a year-ish number - is_approximate = is_approximate or re.findall(r"\b~\d{4}", t) - # a ~ at the beginning - is_approximate = is_approximate or re.findall(r"^~", t) # detect uncertainty signifiers - t, is_uncertain = re.subn(r"(\d{4})\?", r"\1", t) - # the words uncertain/maybe/guess anywhere - is_uncertain = is_uncertain or re.findall(r"\b(uncertain|possibly|maybe|guess)", t) + t, is_uncertain = re.subn(UNCERTAIN_REPL, r"\1", t) + is_uncertain = is_uncertain or re.findall(UNCERTAIN_CHECK, t) # detect century forms is_century = re.findall(CENTURY_RE, t) @@ -153,32 +171,29 @@ def text_to_edtf_date(text): # detect CE/BCE year form is_ce = re.findall(CE_RE, t) if is_century: - result = "%02dXX" % (int(is_century[0][0]) - 1,) - is_approximate = is_approximate or re.findall(r"\b(ca?\.?) ?" + CENTURY_RE, t) - is_uncertain = is_uncertain or re.findall(CENTURY_RE + r"\?", t) + result = f"{int(is_century[0][0]) - 1:02d}XX" + is_approximate = is_approximate or re.findall(APPROX_CENTURY_RE, t) + is_uncertain = is_uncertain or re.findall(UNCERTAIN_CENTURY_RE, t) try: - is_bc = is_century[0][-1] in ("bc", "bce") - if is_bc: + if is_century[0][-1] in ("bc", "bce"): result = f"-{result}" except IndexError: pass elif is_ce: - result = "%04d" % (int(is_ce[0][0])) - is_approximate = is_approximate or re.findall(r"\b(ca?\.?) ?" + CE_RE, t) - is_uncertain = is_uncertain or re.findall(CE_RE + r"\?", t) + result = f"{int(is_ce[0][0]):04d}" + is_approximate = is_approximate or re.findall(APPROX_CE_RE, t) + is_uncertain = is_uncertain or re.findall(UNCERTAIN_CE_RE, t) try: - is_bc = is_ce[0][-1] in ("bc", "bce") - if is_bc: + if is_ce[0][-1] in ("bc", "bce"): result = f"-{result}" except IndexError: pass else: # try dateutil.parse - try: # parse twice, using different defaults to see what was # parsed and what was guessed. @@ -205,34 +220,34 @@ def text_to_edtf_date(text): if dt1.date() == DEFAULT_DATE_1.date() and dt2.date() == DEFAULT_DATE_2.date(): # couldn't parse anything - defaults are untouched. - return + return None date1 = dt1.isoformat()[:10] date2 = dt2.isoformat()[:10] # guess precision of 'unspecified' characters to use - mentions_year = re.findall(r"\byear\b.+(in|during)\b", t) - mentions_month = re.findall(r"\bmonth\b.+(in|during)\b", t) - mentions_day = re.findall(r"\bday\b.+(in|during)\b", t) + mentions_year = re.findall(MENTIONS_YEAR, t) + mentions_month = re.findall(MENTIONS_MONTH, t) + mentions_day = re.findall(MENTIONS_DAY, t) - for i in range(len(date1)): + for i, char in enumerate(date1): # if the given year could be a century (e.g. '1800s') then use # approximate/uncertain markers to decide whether we treat it as # a century or a decade. if i == 2 and could_be_century and not (is_approximate or is_uncertain): result += "X" - elif i == 3 and is_decade > 0: + elif i == 3 and is_decade: if mentions_year: - result += "X" # previously year precision - now just X + result += "X" # year precision else: - result += "X" # previously decade precision - now just X - elif date1[i] == date2[i]: + result += "X" # decade precision + elif char == date2[i]: # since both attempts at parsing produced the same result # it must be parsed value, not a default - result += date1[i] + result += char else: # different values were produced, meaning that it's likely - # a default. Use 'X' + # a default. Use 'unspecified' result += "X" # strip off unknown chars from end of string - except the first 4 diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index d2c43a5..e0acaad 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -182,9 +182,9 @@ def test_natlang(input_text, expected_output): Verify that the conversion from text to EDTF format matches the expected output. """ result = text_to_edtf(input_text) - assert ( - result == expected_output - ), f"Failed for input: {input_text} - expected {expected_output}, got {result}" + assert result == expected_output, ( + f"Failed for input: {input_text} - expected {expected_output}, got {result}" + ) @pytest.mark.benchmark diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index beabf52..2fdb4bf 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -49,15 +49,15 @@ Unspecified, ) -oneThru12 = oneOf(["%.2d" % i for i in range(1, 13)]) -oneThru13 = oneOf(["%.2d" % i for i in range(1, 14)]) -oneThru23 = oneOf(["%.2d" % i for i in range(1, 24)]) -zeroThru23 = oneOf(["%.2d" % i for i in range(0, 24)]) -oneThru29 = oneOf(["%.2d" % i for i in range(1, 30)]) -oneThru30 = oneOf(["%.2d" % i for i in range(1, 31)]) -oneThru31 = oneOf(["%.2d" % i for i in range(1, 32)]) -oneThru59 = oneOf(["%.2d" % i for i in range(1, 60)]) -zeroThru59 = oneOf(["%.2d" % i for i in range(0, 60)]) +oneThru12 = oneOf([f"{i:02}" for i in range(1, 13)]) +oneThru13 = oneOf([f"{i:02}" for i in range(1, 14)]) +oneThru23 = oneOf([f"{i:02}" for i in range(1, 24)]) +zeroThru23 = oneOf([f"{i:02}" for i in range(0, 24)]) +oneThru29 = oneOf([f"{i:02}" for i in range(1, 30)]) +oneThru30 = oneOf([f"{i:02}" for i in range(1, 31)]) +oneThru31 = oneOf([f"{i:02}" for i in range(1, 32)]) +oneThru59 = oneOf([f"{i:02}" for i in range(1, 60)]) +zeroThru59 = oneOf([f"{i:02}" for i in range(0, 60)]) digit = Word(nums, exact=1) positiveDigit = Word(nums, exact=1, excludeChars="0") diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index ed03355..9439a80 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -4,6 +4,7 @@ from datetime import date, datetime from operator import add, sub from time import struct_time +from typing import Optional from dateutil.relativedelta import relativedelta @@ -27,7 +28,7 @@ PRECISION_DAY = "day" -def days_in_month(year, month): +def days_in_month(year: int, month: int) -> int: """ Return the number of days in the given year and month, where month is 1=January to 12=December, and respecting leap years as identified by @@ -125,7 +126,7 @@ def __init__(self, *args, **kwargs): def __str__(self): raise NotImplementedError - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): raise NotImplementedError def lower_strict(self): @@ -134,36 +135,37 @@ def lower_strict(self): def upper_strict(self): return self._strict_date(lean=LATEST) - def _get_fuzzy_padding(self, lean): + def _get_fuzzy_padding(self, lean: str): """ Subclasses should override this to pad based on how precise they are. """ return relativedelta(0) - def get_is_approximate(self): + def get_is_approximate(self) -> bool: return getattr(self, "_is_approximate", False) - def set_is_approximate(self, val): + def set_is_approximate(self, val: bool) -> None: self._is_approximate = val - is_approximate = property(get_is_approximate, set_is_approximate) + is_approximate = property(get_is_approximate, set_is_approximate) # noqa - def get_is_uncertain(self): + def get_is_uncertain(self) -> bool: return getattr(self, "_is_uncertain", False) - def set_is_uncertain(self, val): + def set_is_uncertain(self, val: bool) -> None: self._is_uncertain = val - is_uncertain = property(get_is_uncertain, set_is_uncertain) + is_uncertain = property(get_is_uncertain, set_is_uncertain) # noqa - def get_is_uncertain_and_approximate(self): + def get_is_uncertain_and_approximate(self) -> bool: return getattr(self, "_uncertain_and_approximate", False) - def set_is_uncertain_and_approximate(self, val): + def set_is_uncertain_and_approximate(self, val: bool) -> None: self._uncertain_and_approximate = val is_uncertain_and_approximate = property( - get_is_uncertain_and_approximate, set_is_uncertain_and_approximate + get_is_uncertain_and_approximate, # noqa + set_is_uncertain_and_approximate, # noqa ) def lower_fuzzy(self): @@ -241,57 +243,68 @@ def __le__(self, other): class Date(EDTFObject): - def set_year(self, y): + def __init__( # noqa + self, + year: Optional[str] = None, + month: Optional[str] = None, + day: Optional[str] = None, + significant_digits=None, + **kwargs, + ): + for param in ("date", "lower", "upper"): + if param in kwargs: + self.__init__(**kwargs[param]) + return + + self._year = year # Year is required, but sometimes passed in as a 'date' dict. + self._month = month + self._day = day + self.significant_digits = ( + int(significant_digits) if significant_digits else None + ) + + def set_year(self, y: str): if y is None: raise AttributeError("Year must not be None") self._year = y - def get_year(self): + def get_year(self) -> str: return self._year - year = property(get_year, set_year) + year = property(get_year, set_year) # noqa - def set_month(self, m): + def set_month(self, m: Optional[str]): self._month = m if m is None: - self.day = None + self._day = None - def get_month(self): + def get_month(self) -> Optional[str]: return self._month - month = property(get_month, set_month) + month = property(get_month, set_month) # noqa - def __init__( - self, year=None, month=None, day=None, significant_digits=None, **kwargs - ): - for param in ("date", "lower", "upper"): - if param in kwargs: - self.__init__(**kwargs[param]) - return + def set_day(self, d: Optional[str]): + self._day = d + if d is None: + self._day = None - self.year = year # Year is required, but sometimes passed in as a 'date' dict. - self.month = month - self.day = day - self.significant_digits = ( - int(significant_digits) if significant_digits else None - ) + def get_day(self) -> Optional[str]: + return self._day + + day = property(get_day, set_day) # noqa def __str__(self): - r = self.year - if self.month: - r += f"-{self.month}" - if self.day: - r += f"-{self.day}" + r = self._year + if self._month: + r += f"-{self._month}" + if self._day: + r += f"-{self._day}" if self.significant_digits: r += f"S{self.significant_digits}" return r def isoformat(self, default=date.max): - return "%s-%02d-%02d" % ( - self.year, - int(self.month or default.month), - int(self.day or default.day), - ) + return f"{self._year}-{int(self._month or default.month):02d}-{int(self._day or default.day):02d}" def lower_fuzzy(self): if not hasattr(self, "significant_digits") or not self.significant_digits: @@ -299,10 +312,10 @@ def lower_fuzzy(self): sub, self.lower_strict(), self._get_fuzzy_padding(EARLIEST) ) else: - total_digits = len(self.year) + total_digits = len(self._year) insignificant_digits = total_digits - self.significant_digits lower_year = ( - int(self.year) + int(self._year) // (10**insignificant_digits) * (10**insignificant_digits) ) @@ -314,9 +327,9 @@ def upper_fuzzy(self): add, self.upper_strict(), self._get_fuzzy_padding(LATEST) ) else: - total_digits = len(self.year) + total_digits = len(self._year) insignificant_digits = total_digits - self.significant_digits - upper_year = (int(self.year) // (10**insignificant_digits) + 1) * ( + upper_year = (int(self._year) // (10**insignificant_digits) + 1) * ( 10**insignificant_digits ) - 1 return struct_time( @@ -326,23 +339,23 @@ def upper_fuzzy(self): def _precise_year(self, lean): # Replace any ambiguous characters in the year string with 0s or 9s if lean == EARLIEST: - return int(re.sub(r"X", r"0", self.year)) + return int(re.sub(r"X", r"0", self._year)) else: - return int(re.sub(r"X", r"9", self.year)) + return int(re.sub(r"X", r"9", self._year)) def _precise_month(self, lean): - if self.month and self.month != "XX": + if self._month and self._month != "XX": try: - return int(self.month) + return int(self._month) except ValueError as err: raise ValueError( - f"Couldn't convert {self.month} to int (in {self})" + f"Couldn't convert {self._month} to int (in {self})" ) from err else: return 1 if lean == EARLIEST else 12 def _precise_day(self, lean): - if not self.day or self.day == "XX": + if not self._day or self._day == "XX": if lean == EARLIEST: return 1 else: @@ -350,9 +363,9 @@ def _precise_day(self, lean): self._precise_year(LATEST), self._precise_month(LATEST) ) else: - return int(self.day) + return int(self._day) - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): """ Return a `time.struct_time` representation of the date. """ @@ -368,9 +381,9 @@ def _strict_date(self, lean): @property def precision(self): - if self.day: + if self._day: return PRECISION_DAY - if self.month: + if self._month: return PRECISION_MONTH return PRECISION_YEAR @@ -379,7 +392,7 @@ def estimated(self): class DateAndTime(EDTFObject): - def __init__(self, date, time): + def __init__(self, date, time): # noqa: super raises not implemented self.date = date self.time = time @@ -389,7 +402,7 @@ def __str__(self): def isoformat(self): return self.date.isoformat() + "T" + self.time - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): return self.date._strict_date(lean) def __eq__(self, other): @@ -408,14 +421,14 @@ def __ne__(self, other): class Interval(EDTFObject): - def __init__(self, lower, upper): + def __init__(self, lower, upper): # noqa: super() raises not implemented self.lower = lower self.upper = upper def __str__(self): return f"{self.lower}/{self.upper}" - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): if lean == EARLIEST: r = self.lower._strict_date(lean) else: @@ -438,7 +451,7 @@ def parse_action(cls, toks): args = toks.asList() return cls(*args) - def __init__(self, *args): + def __init__(self, *args): # noqa: super() raises not implemented if len(args) != 1: raise AssertionError("UA must have exactly one argument") ua = args[0] @@ -467,7 +480,7 @@ def _get_multiplier(self): class UncertainOrApproximate(EDTFObject): - def __init__(self, date, ua): + def __init__(self, date, ua): # noqa: super() raises not implemented self.date = date self.ua = ua self.is_uncertain = ua.is_uncertain if ua else False @@ -482,7 +495,7 @@ def __str__(self): else: return str(self.date) - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): return self.date._strict_date(lean) def _get_fuzzy_padding(self, lean): @@ -511,7 +524,7 @@ def _get_fuzzy_padding(self, lean): class UnspecifiedIntervalSection(EDTFObject): - def __init__(self, sectionOpen=False, other_section_element=None): + def __init__(self, sectionOpen=False, other_section_element=None): # noqa: super() raises not implemented if sectionOpen: self.is_open = True self.is_unknown = False @@ -526,14 +539,17 @@ def __str__(self): else: return ".." - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): + if lean not in (EARLIEST, LATEST): + raise ValueError("lean must be one of EARLIEST or LATEST") + if lean == EARLIEST: if self.is_unknown: upper = self.other._strict_date(LATEST) return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) else: return -math.inf - else: + elif lean == LATEST: if self.is_unknown: lower = self.other._strict_date(EARLIEST) return apply_delta(add, lower, appsettings.DELTA_IF_UNKNOWN) @@ -696,7 +712,7 @@ def precision(self): class Level1Interval(Interval): - def __init__(self, lower=None, upper=None): + def __init__(self, lower: Optional[dict] = None, upper: Optional[dict] = None): # noqa if lower: if lower["date"] == "..": self.lower = UnspecifiedIntervalSection( @@ -719,8 +735,10 @@ def __init__(self, lower=None, upper=None): self.upper = UnspecifiedIntervalSection( False, UncertainOrApproximate(**lower) ) - self.is_approximate = self.lower.is_approximate or self.upper.is_approximate - self.is_uncertain = self.lower.is_uncertain or self.upper.is_uncertain + self.is_approximate: bool = ( + self.lower.is_approximate or self.upper.is_approximate + ) + self.is_uncertain: bool = self.lower.is_uncertain or self.upper.is_uncertain self.is_uncertain_and_approximate = ( self.lower.is_uncertain_and_approximate or self.upper.is_uncertain_and_approximate @@ -734,7 +752,7 @@ def _get_fuzzy_padding(self, lean): class LongYear(EDTFObject): - def __init__(self, year, significant_digits=None): + def __init__(self, year: str, significant_digits: Optional[str] = None): # noqa self.year = year self.significant_digits = ( int(significant_digits) if significant_digits else None @@ -749,7 +767,7 @@ def __str__(self): def _precise_year(self): return int(self.year) - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): py = self._precise_year() if lean == EARLIEST: return struct_time([py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) @@ -797,7 +815,7 @@ def upper_fuzzy(self): class Season(Date): - def __init__(self, year, season, **kwargs): + def __init__(self, year, season, **kwargs): # noqa self.year = year self.season = season # use season to look up month # day isn't part of the 'season' spec, but it helps the inherited @@ -811,20 +829,15 @@ def _precise_month(self, lean): rng = appsettings.SEASON_L2_MONTHS_RANGE[int(self.season)] if lean == EARLIEST: return rng[0] - else: - return rng[1] + + return rng[1] # (* ************************** Level 2 *************************** *) class PartialUncertainOrApproximate(Date): - def set_year(self, y): # Year can be None. - self._year = y - - year = property(Date.get_year, set_year) - - def __init__( + def __init__( # noqa self, year=None, month=None, @@ -909,12 +922,17 @@ def __str__(self): return result - def _precise_year(self, lean): + def set_year(self, y): # Year can be None. + self._year = y + + year = property(Date.get_year, set_year) # noqa + + def _precise_year(self, lean: str): if self.season: return self.season._precise_year(lean) return super()._precise_year(lean) - def _precise_month(self, lean): + def _precise_month(self, lean: str): if self.season: return self.season._precise_month(lean) return super()._precise_month(lean) @@ -992,7 +1010,7 @@ class PartialUnspecified(Unspecified): class Consecutives(Interval): # Treating Consecutive ranges as intervals where one bound is optional - def __init__(self, lower=None, upper=None): + def __init__(self, lower=None, upper=None): # noqa if lower and not isinstance(lower, EDTFObject): self.lower = Date.parse(lower) else: @@ -1018,18 +1036,19 @@ def __str__(self): class OneOfASet(EDTFObject): + def __init__(self, *args): # noqa + self.objects = args + @classmethod def parse_action(cls, toks): args = [t for t in toks.asList() if isinstance(t, EDTFObject)] return cls(*args) - def __init__(self, *args): - self.objects = args - def __str__(self): - return "[{}]".format(", ".join([str(o) for o in self.objects])) + repr: str = ", ".join([str(o) for o in self.objects]) + return f"[{repr}]" - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): strict_dates = [x._strict_date(lean) for x in self.objects] # Accounting for possible 'inf' and '-inf' values if lean == LATEST: @@ -1051,34 +1070,35 @@ def _strict_date(self, lean): class MultipleDates(EDTFObject): + def __init__(self, *args): # noqa + self.objects = args + @classmethod def parse_action(cls, toks): args = [t for t in toks.asList() if isinstance(t, EDTFObject)] return cls(*args) - def __init__(self, *args): - self.objects = args - def __str__(self): - return "{{{}}}".format(", ".join([str(o) for o in self.objects])) + repr: str = ", ".join([str(o) for o in self.objects]) + return f"{{{repr}}}" - def _strict_date(self, lean): + def _strict_date(self, lean: str = EARLIEST): if lean == LATEST: return max([x._strict_date(lean) for x in self.objects]) - else: - return min([x._strict_date(lean) for x in self.objects]) + return min([x._strict_date(lean) for x in self.objects]) class Level2Interval(Level1Interval): - def __init__(self, lower, upper): + def __init__(self, lower, upper): # noqa # Check whether incoming lower/upper values are single-item lists, and # if so take just the first item. This works around what I *think* is a - # bug in the grammer that provides us with single-item lists of + # bug in the grammar that provides us with single-item lists of # `PartialUncertainOrApproximate` items for lower/upper values. if isinstance(lower, (tuple, list)) and len(lower) == 1: self.lower = lower[0] else: self.lower = lower + if isinstance(lower, (tuple, list)) and len(upper) == 1: self.upper = upper[0] else: @@ -1096,7 +1116,7 @@ class Level2Season(Season): class ExponentialYear(LongYear): - def __init__(self, base, exponent, significant_digits=None): + def __init__(self, base, exponent, significant_digits=None): # noqa self.base = base self.exponent = exponent self.significant_digits = ( @@ -1106,13 +1126,13 @@ def __init__(self, base, exponent, significant_digits=None): def _precise_year(self): return int(self.base) * 10 ** int(self.exponent) - def get_year(self): + def get_year(self) -> str: if self.significant_digits: return f"{self.base}E{self.exponent}S{self.significant_digits}" else: return f"{self.base}E{self.exponent}" - year = property(get_year) + year = property(get_year) # noqa def estimated(self): return self._precise_year() diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index c2dd711..f37c806 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -312,51 +312,51 @@ def test_edtf_examples(test_input, expected_tuple): # Unpack expected results based on their count if len(expected_tuple) == 1: - assert ( - result_date == expected_tuple[0] - ), f"Expected {expected_tuple[0]}, got {result_date}" + assert result_date == expected_tuple[0], ( + f"Expected {expected_tuple[0]}, got {result_date}" + ) elif len(expected_tuple) == 2: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) - assert ( - result.lower_strict() == lower_strict - ), f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" - assert ( - result.upper_strict() == upper_strict - ), f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + assert result.lower_strict() == lower_strict, ( + f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + ) + assert result.upper_strict() == upper_strict, ( + f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + ) elif len(expected_tuple) == 3: strict_date = iso_to_struct_time(expected_tuple[0]) lower_fuzzy = iso_to_struct_time(expected_tuple[1]) upper_fuzzy = iso_to_struct_time(expected_tuple[2]) - assert ( - result.lower_strict() == strict_date - ), f"Lower strict date does not match. Expected {strict_date}, got {result.lower_strict()}" - assert ( - result.upper_strict() == strict_date - ), f"Upper strict date does not match. Expected {strict_date}, got {result.upper_strict()}" - assert ( - result.lower_fuzzy() == lower_fuzzy - ), f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" - assert ( - result.upper_fuzzy() == upper_fuzzy - ), f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + assert result.lower_strict() == strict_date, ( + f"Lower strict date does not match. Expected {strict_date}, got {result.lower_strict()}" + ) + assert result.upper_strict() == strict_date, ( + f"Upper strict date does not match. Expected {strict_date}, got {result.upper_strict()}" + ) + assert result.lower_fuzzy() == lower_fuzzy, ( + f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + ) + assert result.upper_fuzzy() == upper_fuzzy, ( + f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + ) elif len(expected_tuple) == 4: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) lower_fuzzy = iso_to_struct_time(expected_tuple[2]) upper_fuzzy = iso_to_struct_time(expected_tuple[3]) - assert ( - result.lower_strict() == lower_strict - ), f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" - assert ( - result.upper_strict() == upper_strict - ), f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" - assert ( - result.lower_fuzzy() == lower_fuzzy - ), f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" - assert ( - result.upper_fuzzy() == upper_fuzzy - ), f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + assert result.lower_strict() == lower_strict, ( + f"Lower strict date does not match. Expected {lower_strict}, got {result.lower_strict()}" + ) + assert result.upper_strict() == upper_strict, ( + f"Upper strict date does not match. Expected {upper_strict}, got {result.upper_strict()}" + ) + assert result.lower_fuzzy() == lower_fuzzy, ( + f"Lower fuzzy date does not match. Expected {lower_fuzzy}, got {result.lower_fuzzy()}" + ) + assert result.upper_fuzzy() == upper_fuzzy, ( + f"Upper fuzzy date does not match. Expected {upper_fuzzy}, got {result.upper_fuzzy()}" + ) @pytest.mark.parametrize("bad_input", BAD_EXAMPLES) diff --git a/pyproject.toml b/pyproject.toml index b48c3f7..8826b99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,8 @@ [project] name = "edtf" version = "5.0.0" +license = { file = "LICENSE" } +keywords = ['edtf'] dependencies = [ "python-dateutil", "pyparsing", @@ -16,7 +18,8 @@ authors = [ { name = "Mark Finger" }, { name = "Sabine Müller" }, { name = "Cole Crawford" }, - { name = "Klaus Rettinghaus" } + { name = "Klaus Rettinghaus" }, + { name = "Andrew Hankinson", email = "andrew.hankinson@rism.digital" }, ] maintainers = [ { name = "The Interaction Consortium", email = "studio@interaction.net.au" }