From 86b94517f1df7d705564d5a93e0d0c431ccd9120 Mon Sep 17 00:00:00 2001
From: Andrew Hankinson <andrew.hankinson@rism.digital>
Date: Tue, 13 Apr 2021 18:52:28 +0200
Subject: [PATCH 01/14] Enable packratting for pyparser

Delivers significant performance improvements by caching previously computed results.
---
 edtf/parser/grammar.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py
index c028c6e..d612c5f 100644
--- a/edtf/parser/grammar.py
+++ b/edtf/parser/grammar.py
@@ -1,5 +1,9 @@
 from pyparsing import Literal as L, ParseException, Optional, OneOrMore, \
-    ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums
+    ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums, ParserElement
+
+# From the pyparsing performance improvement tips:
+# https://github.com/pyparsing/pyparsing/wiki/Performance-Tips
+ParserElement.enablePackrat()
 
 # (* ************************** Level 0 *************************** *)
 from edtf.parser.parser_classes import Date, DateAndTime, Interval, Unspecified, \

From 7fdf8dd8b649a5085d8f2aed3b66a8734f2ce915 Mon Sep 17 00:00:00 2001
From: jacobcolyvan <jacob_colyvan@live.com>
Date: Mon, 26 Jul 2021 12:29:25 +1000
Subject: [PATCH 02/14] #37 update for Django 3.x compat

---
 edtf/fields.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/edtf/fields.py b/edtf/fields.py
index 83d10a7..52b9171 100644
--- a/edtf/fields.py
+++ b/edtf/fields.py
@@ -53,7 +53,7 @@ def deconstruct(self):
         del kwargs["max_length"]
         return name, path, args, kwargs
 
-    def from_db_value(self, value, expression, connection, context):
+    def from_db_value(self, value, expression, connection, context=None):
         # Converting values to Python objects
         if not value:
             return None

From 6e4a627df5447b76db492b1603f95bbd55524346 Mon Sep 17 00:00:00 2001
From: Andrew Hankinson <andrew.hankinson@gmail.com>
Date: Fri, 26 Apr 2024 15:43:38 +0200
Subject: [PATCH 03/14] Minor updates

---
 edtf/natlang/en.py |  3 ++-
 poetry.lock        | 45 +++++++++++++++++++++++++++++++++++++++++++++
 pyproject.toml     | 18 ++++++++++++++++++
 3 files changed, 65 insertions(+), 1 deletion(-)
 create mode 100644 poetry.lock
 create mode 100644 pyproject.toml

diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py
index ec7842b..5263e07 100644
--- a/edtf/natlang/en.py
+++ b/edtf/natlang/en.py
@@ -89,6 +89,7 @@ def text_to_edtf(text):
 
     is_before = re.findall(r'\bbefore\b', t)
     is_before = is_before or re.findall(r'\bearlier\b', t)
+    is_before = is_before or re.findall(r'\baprés\b', t)
 
     is_after = re.findall(r'\bafter\b', t)
     is_after = is_after or re.findall(r'\bsince\b', t)
@@ -133,7 +134,7 @@ def text_to_edtf_date(text):
     is_approximate = is_approximate or re.findall(r'\bcirca\b', t)
     # the word 'approx'/'around'/'about' anywhere
     is_approximate = is_approximate or \
-                     re.findall(r'\b(approx|around|about)', t)
+                     re.findall(r'\b(approx|approximately|around|about)', t)
     # a ~ before a year-ish number
     is_approximate = is_approximate or re.findall(r'\b~\d{4}', t)
     # a ~ at the beginning
diff --git a/poetry.lock b/poetry.lock
new file mode 100644
index 0000000..745843e
--- /dev/null
+++ b/poetry.lock
@@ -0,0 +1,45 @@
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+
+[[package]]
+name = "pyparsing"
+version = "3.1.2"
+description = "pyparsing module - Classes and methods to define and execute parsing grammars"
+optional = false
+python-versions = ">=3.6.8"
+files = [
+    {file = "pyparsing-3.1.2-py3-none-any.whl", hash = "sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742"},
+    {file = "pyparsing-3.1.2.tar.gz", hash = "sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad"},
+]
+
+[package.extras]
+diagrams = ["jinja2", "railroad-diagrams"]
+
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+description = "Extensions to the standard Python datetime module"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+files = [
+    {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
+    {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
+]
+
+[package.dependencies]
+six = ">=1.5"
+
+[[package]]
+name = "six"
+version = "1.16.0"
+description = "Python 2 and 3 compatibility utilities"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
+files = [
+    {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
+    {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
+]
+
+[metadata]
+lock-version = "2.0"
+python-versions = "^3.11"
+content-hash = "822c6f7ddf2552d097c1bfc8399a2492c845c74cb4576a423adf3ad62850ffc3"
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..f203360
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,18 @@
+[tool.poetry]
+name = "python-edtf"
+version = "0.1.0"
+description = ""
+authors = ["Andrew Hankinson <andrew.hankinson@gmail.com>"]
+readme = "README.md"
+packages = [{include = "python_edtf"}]
+
+[tool.poetry.dependencies]
+python = "^3.11"
+python-dateutil = "^2.9.0.post0"
+pyparsing = "^3.1.2"
+six = "^1.16.0"
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

From 80fdd60cbb590d7139341293185628d6aa8cac5b Mon Sep 17 00:00:00 2001
From: Andrew Hankinson <andrew.hankinson@gmail.com>
Date: Fri, 26 Apr 2024 15:49:58 +0200
Subject: [PATCH 04/14] Update dependency management

---
 pyproject.toml | 2 +-
 setup.py       | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index f203360..f1d7c5f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [tool.poetry]
-name = "python-edtf"
+name = "edtf"
 version = "0.1.0"
 description = ""
 authors = ["Andrew Hankinson <andrew.hankinson@gmail.com>"]
diff --git a/setup.py b/setup.py
index f0f1849..f2cc7d5 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,6 @@
 from __future__ import print_function
 
 import setuptools
-import sys
 
 def readme():
     with open('README.md') as f:

From c12d759732d393ac66faa462b8d61b057c675d17 Mon Sep 17 00:00:00 2001
From: Andrew Hankinson <andrew.hankinson@gmail.com>
Date: Fri, 26 Apr 2024 15:55:52 +0200
Subject: [PATCH 05/14] Deps

---
 poetry.lock    | 4 ++--
 pyproject.toml | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 745843e..c4b40b6 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -41,5 +41,5 @@ files = [
 
 [metadata]
 lock-version = "2.0"
-python-versions = "^3.11"
-content-hash = "822c6f7ddf2552d097c1bfc8399a2492c845c74cb4576a423adf3ad62850ffc3"
+python-versions = "^3.9"
+content-hash = "e6be32f86f1a6af0695f6846b57ed289e015b5634c7f574c45800095a84e2200"
diff --git a/pyproject.toml b/pyproject.toml
index f1d7c5f..9af9ee4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,13 +1,13 @@
 [tool.poetry]
 name = "edtf"
-version = "0.1.0"
+version = "4.0.1+enh"
 description = ""
 authors = ["Andrew Hankinson <andrew.hankinson@gmail.com>"]
 readme = "README.md"
-packages = [{include = "python_edtf"}]
+packages = [{include = "edtf"}]
 
 [tool.poetry.dependencies]
-python = "^3.11"
+python = "^3.9"
 python-dateutil = "^2.9.0.post0"
 pyparsing = "^3.1.2"
 six = "^1.16.0"

From 6e508d016e9bbcc49b90d3c88ca3512d69a0d193 Mon Sep 17 00:00:00 2001
From: Andrew Hankinson <andrew.hankinson@gmail.com>
Date: Tue, 23 Jul 2024 17:03:14 +0200
Subject: [PATCH 06/14] Optimized regexes

---
 edtf/natlang/en.py | 126 ++++++++++++++++++++++-----------------------
 1 file changed, 63 insertions(+), 63 deletions(-)

diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py
index 5263e07..4f68f21 100644
--- a/edtf/natlang/en.py
+++ b/edtf/natlang/en.py
@@ -1,9 +1,10 @@
 """Utilities to derive an EDTF string from an (English) natural language string."""
 from datetime import datetime
+from typing import Optional
+
 from dateutil.parser import parse
 import re
 from edtf import appsettings
-from six.moves import xrange
 
 
 # two dates where every digit of an ISO date representation is different,
@@ -12,24 +13,43 @@
 DEFAULT_DATE_1 = datetime(1234, 1, 1, 0, 0)
 DEFAULT_DATE_2 = datetime(5678, 10, 10, 0, 0)
 
-SHORT_YEAR_RE = r'(-?)([\du])([\dxu])([\dxu])([\dxu])'
-LONG_YEAR_RE = r'y(-?)([1-9]\d\d\d\d+)'
-CENTURY_RE = r'(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?'
-CE_RE = r'(\d{1,4}) (ad|ce|bc|bce)'
+SHORT_YEAR_RE = re.compile(r'(-?)([\du])([\dxu])([\dxu])([\dxu])')
+LONG_YEAR_RE = re.compile(r'y(-?)([1-9]\d\d\d\d+)')
+CENTURY_RE = re.compile(r'(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?')
+CENTURY_RANGE = re.compile(r'\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]')
+CE_RE = re.compile(r'(\d{1,4}) (ad|ce|bc|bce)')
+ONE_DIGIT_PARTIAL_FIRST = re.compile(r'\d\D\b')
+TWO_DIGIT_PARTIAL_FIRST = re.compile(r'\d\d\b')
+PARTIAL_CHECK = re.compile(r'\b\d\d\d\d$')
+SLASH_YEAR = re.compile(r"(\d\d\d\d)/(\d\d\d\d)")
+BEFORE_CHECK = re.compile(r"\b(?:before|earlier|avant)\b")
+AFTER_CHECK = re.compile(r"\b(after|since|later|aprés|apres)\b")
+APPROX_CHECK = re.compile(r'\b(?:ca?\.? ?\d{4}|circa|approx|approximately|around|about|~\d{3,4})|(?:^~)')
+UNCERTAIN_CHECK = re.compile(r"\b(?:uncertain|possibly|maybe|guess|\d{3,4}\?)")
+UNCERTAIN_REPL = re.compile(r'(\d{4})\?')
+MIGHT_BE_CENTURY = re.compile(r'(\d{2}00)s')
+MIGHT_BE_DECADE = re.compile(r'(\d{3}0)s')
+
+APPROX_CENTURY_RE = re.compile(r'\b(ca?\.?) ?(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?')
+UNCERTAIN_CENTURY_RE = re.compile(r'(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?\?')
+
+APPROX_CE_RE = re.compile(r'\b(ca?\.?) ?(\d{1,4}) (ad|ce|bc|bce)')
+UNCERTAIN_CE_RE = re.compile(r'(\d{1,4}) (ad|ce|bc|bce)\?')
+
 
 # Set of RE rules that will cause us to abort text processing, since we know
 # the results will be wrong.
 REJECT_RULES = (
-    r'.*dynasty.*',  # Don't parse '23rd Dynasty' to 'uuuu-uu-23'
+    re.compile(r'.*dynasty.*'),  # Don't parse '23rd Dynasty' to 'uuuu-uu-23'
 )
 
 
-def text_to_edtf(text):
+def text_to_edtf(text: str) -> Optional[str]:
     """
     Generate EDTF string equivalent of a given natural language date string.
     """
     if not text:
-        return
+        return None
 
     t = text.lower()
 
@@ -51,18 +71,18 @@ def text_to_edtf(text):
                     # match looks from the beginning of the string, search
                     # looks anywhere.
 
-                    if re.match(r'\d\D\b', d2):  # 1-digit year partial e.g. 1868-9
-                        if re.search(r'\b\d\d\d\d$', d1):  # TODO: evaluate it and see if it's a year
+                    if re.match(ONE_DIGIT_PARTIAL_FIRST, d2):  # 1-digit year partial e.g. 1868-9
+                        if re.search(PARTIAL_CHECK, d1):  # TODO: evaluate it and see if it's a year
                             d2 = d1[-4:-1] + d2
-                    elif re.match(r'\d\d\b', d2):  # 2-digit year partial e.g. 1809-10
-                        if re.search(r'\b\d\d\d\d$', d1):
+                    elif re.match(TWO_DIGIT_PARTIAL_FIRST, d2):  # 2-digit year partial e.g. 1809-10
+                        if re.search(PARTIAL_CHECK, d1):
                             d2 = d1[-4:-2] + d2
                     else:
-                        century_range_match = re.search(r'\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]', "%s-%s" % (d1,d2))
+                        century_range_match = re.search(CENTURY_RANGE, f"{d1}-{d2}")
                         if century_range_match:
                             g = century_range_match.groups()
-                            d1 = "%sC" % g[0]
-                            d2 = "%sC" % g[2]
+                            d1 = f"{g[0]}C"
+                            d2 = f"{g[2]}C"
 
                     r1 = text_to_edtf_date(d1)
                     r2 = text_to_edtf_date(d2)
@@ -77,9 +97,9 @@ def text_to_edtf(text):
                 # This whole section could be more friendly.
 
                 else:
-                    int_match = re.search(r"(\d\d\d\d)\/(\d\d\d\d)", list_item)
+                    int_match = re.search(SLASH_YEAR, list_item)
                     if int_match:
-                        return "[%s, %s]" % (int_match.group(1), int_match.group(2))
+                        return f"[{int_match.group(1)}, {int_match.group(2)}]"
 
                 result = text_to_edtf_date(list_item)
                 if result:
@@ -87,23 +107,18 @@ def text_to_edtf(text):
             if result:
                 break
 
-    is_before = re.findall(r'\bbefore\b', t)
-    is_before = is_before or re.findall(r'\bearlier\b', t)
-    is_before = is_before or re.findall(r'\baprés\b', t)
-
-    is_after = re.findall(r'\bafter\b', t)
-    is_after = is_after or re.findall(r'\bsince\b', t)
-    is_after = is_after or re.findall(r'\blater\b', t)
+    is_before = re.findall(BEFORE_CHECK, t)
+    is_after = re.findall(AFTER_CHECK, t)
 
     if is_before:
-        result = u"unknown/%s" % result
+        result = f"unknown/{result}"
     elif is_after:
-        result = u"%s/unknown" % result
+        result = f"{result}/unknown"
 
     return result
 
 
-def text_to_edtf_date(text):
+def text_to_edtf_date(text) -> Optional[str]:
     """
     Return EDTF string equivalent of a given natural language date string.
 
@@ -112,39 +127,29 @@ def text_to_edtf_date(text):
     differ are undefined.
     """
     if not text:
-        return
+        return None
 
     t = text.lower()
     result = ''
 
     for reject_re in REJECT_RULES:
         if re.match(reject_re, t):
-            return
+            return None
 
     # matches on '1800s'. Needs to happen before is_decade.
-    could_be_century = re.findall(r'(\d{2}00)s', t)
+    could_be_century: list = re.findall(MIGHT_BE_CENTURY, t)
     # matches on '1800s' and '1910s'. Removes the 's'.
     # Needs to happen before is_uncertain because e.g. "1860s?"
-    t, is_decade = re.subn(r'(\d{3}0)s', r'\1', t)
+    t, is_decade = re.subn(MIGHT_BE_DECADE, r'\1', t)
 
     # detect approximation signifiers
     # a few 'circa' abbreviations just before the year
-    is_approximate = re.findall(r'\b(ca?\.?) ?\d{4}', t)
+    is_approximate = re.findall(APPROX_CHECK, t)
     # the word 'circa' anywhere
-    is_approximate = is_approximate or re.findall(r'\bcirca\b', t)
-    # the word 'approx'/'around'/'about' anywhere
-    is_approximate = is_approximate or \
-                     re.findall(r'\b(approx|approximately|around|about)', t)
-    # a ~ before a year-ish number
-    is_approximate = is_approximate or re.findall(r'\b~\d{4}', t)
-    # a ~ at the beginning
-    is_approximate = is_approximate or re.findall(r'^~', t)
 
     # detect uncertainty signifiers
-    t, is_uncertain = re.subn(r'(\d{4})\?', r'\1', t)
-    # the words uncertain/maybe/guess anywhere
-    is_uncertain = is_uncertain or re.findall(
-        r'\b(uncertain|possibly|maybe|guess)', t)
+    t, is_uncertain = re.subn(UNCERTAIN_REPL, r'\1', t)
+    is_uncertain = is_uncertain or re.findall(UNCERTAIN_CHECK, t)
 
     # detect century forms
     is_century = re.findall(CENTURY_RE, t)
@@ -153,27 +158,23 @@ def text_to_edtf_date(text):
     is_ce = re.findall(CE_RE, t)
     if is_century:
         result = "%02dxx" % (int(is_century[0][0]) - 1,)
-        is_approximate = is_approximate or \
-                         re.findall(r'\b(ca?\.?) ?' + CENTURY_RE, t)
-        is_uncertain = is_uncertain or re.findall(CENTURY_RE + r'\?', t)
+        is_approximate = is_approximate or re.findall(APPROX_CENTURY_RE, t)
+        is_uncertain = is_uncertain or re.findall(UNCERTAIN_CENTURY_RE, t)
 
         try:
-            is_bc = is_century[0][-1] in ("bc", "bce")
-            if is_bc:
-                result = "-%s" % result
+            if is_century[0][-1] in ("bc", "bce"):
+                result = f"-{result}"
         except IndexError:
             pass
 
     elif is_ce:
         result = "%04d" % (int(is_ce[0][0]))
-        is_approximate = is_approximate or \
-                         re.findall(r'\b(ca?\.?) ?' + CE_RE, t)
-        is_uncertain = is_uncertain or re.findall(CE_RE + r'\?', t)
+        is_approximate = is_approximate or re.findall(APPROX_CE_RE, t)
+        is_uncertain = is_uncertain or re.findall(UNCERTAIN_CE_RE, t)
 
         try:
-            is_bc = is_ce[0][-1] in ("bc", "bce")
-            if is_bc:
-                result = "-%s" % result
+            if is_ce[0][-1] in ("bc", "bce"):
+                result = f"-{result}"
         except IndexError:
             pass
 
@@ -200,12 +201,12 @@ def text_to_edtf_date(text):
             )
 
         except ValueError:
-            return
+            return None
 
         if dt1.date() == DEFAULT_DATE_1.date() and \
                 dt2.date() == DEFAULT_DATE_2.date():
             # couldn't parse anything - defaults are untouched.
-            return
+            return None
 
         date1 = dt1.isoformat()[:10]
         date2 = dt2.isoformat()[:10]
@@ -215,14 +216,13 @@ def text_to_edtf_date(text):
         mentions_month = re.findall(r'\bmonth\b.+(in|during)\b', t)
         mentions_day = re.findall(r'\bday\b.+(in|during)\b', t)
 
-        for i in xrange(len(date1)):
+        for i in range(len(date1)):
             # if the given year could be a century (e.g. '1800s') then use
             # approximate/uncertain markers to decide whether we treat it as
             # a century or a decade.
-            if i == 2 and could_be_century and \
-                not (is_approximate or is_uncertain):
+            if i == 2 and could_be_century and not (is_approximate or is_uncertain):
                 result += 'x'
-            elif i == 3 and is_decade > 0:
+            elif i == 3 and is_decade:
                 if mentions_year:
                     result += 'u'  # year precision
                 else:
@@ -238,7 +238,7 @@ def text_to_edtf_date(text):
 
         # strip off unknown chars from end of string - except the first 4
 
-        for i in reversed(xrange(len(result))):
+        for i in reversed(range(len(result))):
             if result[i] not in ('u', 'x', '-'):
                 smallest_length = 4
 

From f2252f03c23b1f7a6a153ccf750e97a94ce71dd2 Mon Sep 17 00:00:00 2001
From: Andrew Hankinson <andrew.hankinson@gmail.com>
Date: Tue, 23 Jul 2024 17:18:26 +0200
Subject: [PATCH 07/14] Package updates

---
 edtf/convert.py               |   8 +-
 edtf/jdutil.py                |  32 +++----
 edtf/natlang/en.py            |  11 ++-
 edtf/natlang/tests.py         |   4 +-
 edtf/parser/grammar.py        |  14 +--
 edtf/parser/parser_classes.py | 159 +++++++++++++++++-----------------
 edtf/parser/tests.py          |  66 +++++++-------
 7 files changed, 152 insertions(+), 142 deletions(-)

diff --git a/edtf/convert.py b/edtf/convert.py
index c1bfd3a..de1f2a2 100644
--- a/edtf/convert.py
+++ b/edtf/convert.py
@@ -59,8 +59,7 @@ def trim_struct_time(st, strip_time=False):
     """
     if strip_time:
         return struct_time(list(st[:3]) + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS)
-    else:
-        return struct_time(list(st[:6]) + TIME_EMPTY_EXTRAS)
+    return struct_time(list(st[:6]) + TIME_EMPTY_EXTRAS)
 
 
 def struct_time_to_jd(st):
@@ -106,7 +105,7 @@ def jd_to_struct_time(jd):
     )
 
 
-def _roll_negative_time_fields(year, month, day, hour, minute, second):
+def _roll_negative_time_fields(year, month, day, hour, minute, second) -> tuple:
     """
     Fix date/time fields which have nonsense negative values for any field
     except for year by rolling the overall date/time value backwards, treating
@@ -142,4 +141,5 @@ def _roll_negative_time_fields(year, month, day, hour, minute, second):
         year += int(month / 12.0)  # Adjust by whole year in months
         year -= 1  # Subtract 1 for negative minutes
         month %= 12  # Convert negative month to positive remainder
-    return (year, month, day, hour, minute, second)
+
+    return year, month, day, hour, minute, second
diff --git a/edtf/jdutil.py b/edtf/jdutil.py
index 9fabdd1..4a12b58 100644
--- a/edtf/jdutil.py
+++ b/edtf/jdutil.py
@@ -17,7 +17,8 @@
 #       10-14-1582 never occurred. Python datetime objects will produce incorrect
 #       time deltas if one date is from before 10-15-1582.
 
-def mjd_to_jd(mjd):
+
+def mjd_to_jd(mjd: float) -> float:
     """
     Convert Modified Julian Day to Julian Day.
 
@@ -30,13 +31,11 @@ def mjd_to_jd(mjd):
     -------
     jd : float
         Julian Day
-
-
     """
     return mjd + 2400000.5
 
 
-def jd_to_mjd(jd):
+def jd_to_mjd(jd: float) -> float:
     """
     Convert Julian Day to Modified Julian Day
 
@@ -54,7 +53,7 @@ def jd_to_mjd(jd):
     return jd - 2400000.5
 
 
-def date_to_jd(year,month,day):
+def date_to_jd(year: int, month: int, day: float) -> float:
     """
     Convert a date to Julian Day.
 
@@ -117,7 +116,7 @@ def date_to_jd(year,month,day):
     return jd
 
 
-def jd_to_date(jd):
+def jd_to_date(jd: float) -> (int, int, float):
     """
     Convert Julian Day to date.
 
@@ -184,7 +183,10 @@ def jd_to_date(jd):
     return year, month, day
 
 
-def hmsm_to_days(hour=0,min=0,sec=0,micro=0):
+def hmsm_to_days(hour: int = 0,
+                 min: int = 0,
+                 sec: int = 0,
+                 micro: int = 0) -> float:
     """
     Convert hours, minutes, seconds, and microseconds to fractional days.
 
@@ -222,7 +224,7 @@ def hmsm_to_days(hour=0,min=0,sec=0,micro=0):
     return days / 24.
 
 
-def days_to_hmsm(days):
+def days_to_hmsm(days: float) -> (int, int, int, int):
     """
     Convert fractional days to hours, minutes, seconds, and microseconds.
     Precision beyond microseconds is rounded to the nearest microsecond.
@@ -271,7 +273,7 @@ def days_to_hmsm(days):
     return int(hour), int(min), int(sec), int(micro)
 
 
-def datetime_to_jd(date):
+def datetime_to_jd(date: dt.datetime) -> float:
     """
     Convert a `datetime.datetime` object to Julian Day.
 
@@ -298,7 +300,7 @@ def datetime_to_jd(date):
     return date_to_jd(date.year,date.month,days)
 
 
-def jd_to_datetime(jd):
+def jd_to_datetime(jd: float) -> dt.datetime:
     """
     Convert a Julian Day to an `jdutil.datetime` object.
 
@@ -328,7 +330,7 @@ def jd_to_datetime(jd):
     return datetime(year,month,day,hour,min,sec,micro)
 
 
-def timedelta_to_days(td):
+def timedelta_to_days(td: dt.timedelta) -> float:
     """
     Convert a `datetime.timedelta` object to a total number of days.
 
@@ -372,7 +374,7 @@ class datetime(dt.datetime):
     datetime.datetime : Parent class.
 
     """
-    def __add__(self,other):
+    def __add__(self, other):
         if not isinstance(other,dt.timedelta):
             s = "jdutil.datetime supports '+' only with datetime.timedelta"
             raise TypeError(s)
@@ -383,7 +385,7 @@ def __add__(self,other):
 
         return jd_to_datetime(combined)
 
-    def __radd__(self,other):
+    def __radd__(self, other):
         if not isinstance(other,dt.timedelta):
             s = "jdutil.datetime supports '+' only with datetime.timedelta"
             raise TypeError(s)
@@ -394,7 +396,7 @@ def __radd__(self,other):
 
         return jd_to_datetime(combined)
 
-    def __sub__(self,other):
+    def __sub__(self, other):
         if isinstance(other,dt.timedelta):
             days = timedelta_to_days(other)
 
@@ -412,7 +414,7 @@ def __sub__(self,other):
             s += "datetime.timedelta, jdutil.datetime and datetime.datetime"
             raise TypeError(s)
 
-    def __rsub__(self,other):
+    def __rsub__(self, other):
         if not isinstance(other, (datetime,dt.datetime)):
             s = "jdutil.datetime supports '-' with: "
             s += "jdutil.datetime and datetime.datetime"
diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py
index 4f68f21..8cb72c4 100644
--- a/edtf/natlang/en.py
+++ b/edtf/natlang/en.py
@@ -36,6 +36,11 @@
 APPROX_CE_RE = re.compile(r'\b(ca?\.?) ?(\d{1,4}) (ad|ce|bc|bce)')
 UNCERTAIN_CE_RE = re.compile(r'(\d{1,4}) (ad|ce|bc|bce)\?')
 
+MENTIONS_YEAR = re.compile(r'\byear\b.+(in|during)\b')
+MENTIONS_MONTH = re.compile(r'\bmonth\b.+(in|during)\b')
+MENTIONS_DAY = re.compile(r'\bday\b.+(in|during)\b')
+
+
 
 # Set of RE rules that will cause us to abort text processing, since we know
 # the results will be wrong.
@@ -212,9 +217,9 @@ def text_to_edtf_date(text) -> Optional[str]:
         date2 = dt2.isoformat()[:10]
 
         # guess precision of 'unspecified' characters to use
-        mentions_year = re.findall(r'\byear\b.+(in|during)\b', t)
-        mentions_month = re.findall(r'\bmonth\b.+(in|during)\b', t)
-        mentions_day = re.findall(r'\bday\b.+(in|during)\b', t)
+        mentions_year = re.findall(MENTIONS_YEAR, t)
+        mentions_month = re.findall(MENTIONS_MONTH, t)
+        mentions_day = re.findall(MENTIONS_DAY, t)
 
         for i in range(len(date1)):
             # if the given year could be a century (e.g. '1800s') then use
diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py
index ea137d2..d18ec76 100644
--- a/edtf/natlang/tests.py
+++ b/edtf/natlang/tests.py
@@ -207,8 +207,8 @@ def test_natlang(self):
         """
         for i, o in EXAMPLES:
             e = text_to_edtf(i)
-            print("%s => %s" % (i, e))
-            self.assertEqual(e, o)
+            print(f"{i} => {e}")
+            self.assertEqual(e, o, msg=f"Testing {i}")
 
 
 if __name__ == '__main__':
diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py
index d612c5f..14cb3a4 100644
--- a/edtf/parser/grammar.py
+++ b/edtf/parser/grammar.py
@@ -282,14 +282,16 @@ def f(toks):
 edtfParser = level0Expression("level0") ^ level1Expression("level1") ^ level2Expression("level2")
 
 
-def parse_edtf(str, parseAll=True, fail_silently=False):
+def parse_edtf(inp: str, parse_all: bool = True, fail_silently: bool = False):
+    if not inp:
+        raise ParseException("You must supply some input text")
+
     try:
-        if not str:
-            raise ParseException("You must supply some input text")
-        p = edtfParser.parseString(str.strip(), parseAll)
-        if p:
-            return p[0]
+        p = edtfParser.parseString(inp.strip(), parse_all)
     except ParseException as e:
         if fail_silently:
             return None
         raise EDTFParseException(e)
+
+    if p:
+        return p[0]
diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py
index b670296..ae7adb4 100644
--- a/edtf/parser/parser_classes.py
+++ b/edtf/parser/parser_classes.py
@@ -3,6 +3,7 @@
 from time import struct_time
 from datetime import date, datetime
 from operator import add, sub
+from typing import Optional
 
 from dateutil.relativedelta import relativedelta
 
@@ -22,7 +23,7 @@
 PRECISION_DAY = "day"
 
 
-def days_in_month(year, month):
+def days_in_month(year: int, month: int) -> dict:
     """
     Return the number of days in the given year and month, where month is
     1=January to 12=December, and respecting leap years as identified by
@@ -85,11 +86,15 @@ def apply_delta(op, time_struct, delta):
 
 class EDTFObject(object):
     """
-    Object to attact to a parser to become instantiated when the parser
+    Object to attach to a parser to become instantiated when the parser
     completes.
     """
     parser = None
 
+    def __init__(self, *args, **kwargs):
+        errmsg: str = f"{type(self).__name__}.__init__(*{args}, **{kwargs})"
+        raise NotImplementedError(f"{errmsg} is not implemented.")
+
     @classmethod
     def set_parser(cls, p):
         cls.parser = p
@@ -99,7 +104,7 @@ def set_parser(cls, p):
     def parse_action(cls, toks):
         kwargs = toks.asDict()
         try:
-            return cls(**kwargs) # replace the token list with the class
+            return cls(**kwargs)  # replace the token list with the class
         except Exception as e:
             print("trying to %s.__init__(**%s)" % (cls.__name__, kwargs))
             raise e
@@ -109,19 +114,12 @@ def parse(cls, s):
         return cls.parser.parseString(s)[0]
 
     def __repr__(self):
-        return "%s: '%s'" % (type(self).__name__, str(self))
-
-    def __init__(self, *args, **kwargs):
-        str = "%s.__init__(*%s, **%s)" % (
-            type(self).__name__,
-            args, kwargs,
-        )
-        raise NotImplementedError("%s is not implemented." % str)
+        return f"{type(self).__name__}: '{str(self)}'"
 
     def __str__(self):
         raise NotImplementedError
 
-    def _strict_date(self, lean):
+    def _strict_date(self, lean: str):
         raise NotImplementedError
 
     def lower_strict(self):
@@ -130,7 +128,7 @@ def lower_strict(self):
     def upper_strict(self):
         return self._strict_date(lean=LATEST)
 
-    def _get_fuzzy_padding(self, lean):
+    def _get_fuzzy_padding(self, lean: str):
         """
         Subclasses should override this to pad based on how precise they are.
         """
@@ -216,41 +214,40 @@ def __le__(self, other):
 # (* ************************** Level 0 *************************** *)
 
 class Date(EDTFObject):
+    def __init__(self, year=None, month=None, day=None, **kwargs):
+        for param in ('date', 'lower', 'upper'):
+            if param in kwargs:
+                self.__init__(**kwargs[param])
+                return
+
+        self.year = year # Year is required, but sometimes passed in as a 'date' dict.
+        self.month = month
+        self.day = day
 
-    def set_year(self, y):
+    def set_year(self, y: int):
         if y is None:
             raise AttributeError("Year must not be None")
         self._year = y
 
-    def get_year(self):
+    def get_year(self) -> int:
         return self._year
     year = property(get_year, set_year)
 
-    def set_month(self, m):
+    def set_month(self, m: Optional[int]):
         self._month = m
-        if m == None:
+        if m is None:
             self.day = None
 
-    def get_month(self):
+    def get_month(self) -> Optional[int]:
         return self._month
     month = property(get_month, set_month)
 
-    def __init__(self, year=None, month=None, day=None, **kwargs):
-        for param in ('date', 'lower', 'upper'):
-            if param in kwargs:
-                self.__init__(**kwargs[param])
-                return
-
-        self.year = year # Year is required, but sometimes passed in as a 'date' dict.
-        self.month = month
-        self.day = day
-
     def __str__(self):
         r = self.year
         if self.month:
-            r += "-%s" % self.month
+            r += f"-{self.month}"
             if self.day:
-                r += "-%s" % self.day
+                r += f"-{self.day}"
         return r
 
     def isoformat(self, default=date.max):
@@ -260,14 +257,14 @@ def isoformat(self, default=date.max):
             int(self.day or default.day),
         )
 
-    def _precise_year(self, lean):
+    def _precise_year(self, lean: str):
         # Replace any ambiguous characters in the year string with 0s or 9s
         if lean == EARLIEST:
             return int(re.sub(r'[xu]', r'0', self.year))
         else:
             return int(re.sub(r'[xu]', r'9', self.year))
 
-    def _precise_month(self, lean):
+    def _precise_month(self, lean: str):
         if self.month and self.month != "uu":
             try:
                 return int(self.month)
@@ -276,7 +273,7 @@ def _precise_month(self, lean):
         else:
             return 1 if lean == EARLIEST else 12
 
-    def _precise_day(self, lean):
+    def _precise_day(self, lean: str):
         if not self.day or self.day == 'uu':
             if lean == EARLIEST:
                 return 1
@@ -343,7 +340,7 @@ def __init__(self, lower, upper):
         self.upper = upper
 
     def __str__(self):
-        return "%s/%s" % (self.lower, self.upper)
+        return f"{self.lower}/{self.upper}"
 
     def _strict_date(self, lean):
         if lean == EARLIEST:
@@ -416,8 +413,8 @@ def __str__(self):
     def _strict_date(self, lean):
         if self.date == "open":
             return dt_to_struct_time(date.today())
-        if self.date =="unknown":
-            return None # depends on the other date
+        if self.date == "unknown":
+            return None  # depends on the other date
         return self.date._strict_date(lean)
 
     def _get_fuzzy_padding(self, lean):
@@ -454,12 +451,12 @@ def __init__(self, year):
         self.year = year
 
     def __str__(self):
-        return "y%s" % self.year
+        return f"y{self.year}"
 
     def _precise_year(self):
         return int(self.year)
 
-    def _strict_date(self, lean):
+    def _strict_date(self, lean: str):
         py = self._precise_year()
         if lean == EARLIEST:
             return struct_time(
@@ -478,30 +475,26 @@ def __init__(self, year, season, **kwargs):
         self.day = None
 
     def __str__(self):
-        return "%s-%s" % (self.year, self.season)
+        return f"{self.year}-{self.season}"
 
     def _precise_month(self, lean):
         rng = appsettings.SEASON_MONTHS_RANGE[int(self.season)]
         if lean == EARLIEST:
             return rng[0]
-        else:
-            return rng[1]
+
+        return rng[1]
 
 
 # (* ************************** Level 2 *************************** *)
 
 
 class PartialUncertainOrApproximate(Date):
-
-    def set_year(self, y): # Year can be None.
-        self._year = y
-    year = property(Date.get_year, set_year)
-
     def __init__(
         self, year=None, month=None, day=None,
-        year_ua=False, month_ua = False, day_ua = False,
-        year_month_ua = False, month_day_ua = False,
-        ssn=None, season_ua=False, all_ua=False
+        year_ua: Optional[UA] = None, month_ua: Optional[UA] = None,
+        day_ua: Optional[UA] = None, year_month_ua: Optional[UA] = None,
+        month_day_ua: Optional[UA] = None, ssn=None,
+        season_ua: Optional[UA] = None, all_ua: Optional[UA] = None
     ):
         self.year = year
         self.month = month
@@ -520,56 +513,60 @@ def __init__(
         self.all_ua = all_ua
 
     def __str__(self):
-
         if self.season_ua:
-            return "%s%s" % (self.season, self.season_ua)
+            return f"{self.season}{self.season_ua}"
 
         if self.year_ua:
-            y = "%s%s" % (self.year, self.year_ua)
+            y = f"{self.year}{self.year_ua}"
         else:
             y = str(self.year)
 
         if self.month_ua:
-            m = "(%s)%s" % (self.month, self.month_ua)
+            m = f"({self.month}){self.month_ua}"
         else:
             m = str(self.month)
 
         if self.day:
             if self.day_ua:
-                d = "(%s)%s" % (self.day, self.day_ua)
+                d = f"({self.day}){self.day_ua}"
             else:
                 d = str(self.day)
         else:
             d = None
 
         if self.year_month_ua: # year/month approximate. No brackets needed.
-            ym = "%s-%s%s" % (y, m, self.year_month_ua)
+            ym = f"{y}-{m}{self.year_month_ua}"
             if d:
-                result = "%s-%s" % (ym, d)
+                result = f"{ym}-{d}"
             else:
                 result = ym
+
         elif self.month_day_ua:
-            if self.year_ua: # we don't need the brackets round month and day
-                result = "%s-%s-%s%s" % (y, m, d, self.month_day_ua)
+            if self.year_ua:  # we don't need the brackets round month and day
+                result = f"{y}-{m}-{d}{self.month_day_ua}"
             else:
-                result = "%s-(%s-%s)%s" % (y, m, d, self.month_day_ua)
+                result = f"{y}-({m}-{d}){self.month_day_ua}"
         else:
             if d:
-                result = "%s-%s-%s" % (y, m, d)
+                result = f"{y}-{m}-{d}"
             else:
-                result = "%s-%s" % (y, m)
+                result = f"{y}-{m}"
 
         if self.all_ua:
-            result = "(%s)%s" % (result, self.all_ua)
+            result = f"({result}){self.all_ua}"
 
         return result
 
-    def _precise_year(self, lean):
+    def set_year(self, y): # Year can be None.
+        self._year = y
+    year = property(Date.get_year, set_year)
+
+    def _precise_year(self, lean: str):
         if self.season:
             return self.season._precise_year(lean)
         return super(PartialUncertainOrApproximate, self)._precise_year(lean)
 
-    def _precise_month(self, lean):
+    def _precise_month(self, lean: str):
         if self.season:
             return self.season._precise_month(lean)
         return super(PartialUncertainOrApproximate, self)._precise_month(lean)
@@ -638,7 +635,7 @@ def __init__(self, lower=None, upper=None):
             self.upper = upper
 
     def __str__(self):
-        return "%s..%s" % (self.lower or '', self.upper or '')
+        return f"{self.lower or ''}..{self.upper or ''}"
 
 
 class EarlierConsecutives(Consecutives):
@@ -650,41 +647,40 @@ class LaterConsecutives(Consecutives):
 
 
 class OneOfASet(EDTFObject):
+    def __init__(self, *args):
+        self.objects = args
+
     @classmethod
     def parse_action(cls, toks):
         args = [t for t in toks.asList() if isinstance(t, EDTFObject)]
         return cls(*args)
 
-    def __init__(self, *args):
-        self.objects = args
-
     def __str__(self):
-        return "[%s]" % (", ".join([str(o) for o in self.objects]))
+        return f"[{', '.join([str(o) for o in self.objects])}]"
 
-    def _strict_date(self, lean):
+    def _strict_date(self, lean: str):
         if lean == LATEST:
             return max([x._strict_date(lean) for x in self.objects])
-        else:
-            return min([x._strict_date(lean) for x in self.objects])
+
+        return min([x._strict_date(lean) for x in self.objects])
 
 
 class MultipleDates(EDTFObject):
+    def __init__(self, *args):
+        self.objects = args
+
     @classmethod
     def parse_action(cls, toks):
         args = [t for t in toks.asList() if isinstance(t, EDTFObject)]
         return cls(*args)
 
-    def __init__(self, *args):
-        self.objects = args
-
     def __str__(self):
-        return "{%s}" % (", ".join([str(o) for o in self.objects]))
+        return f"{{{', '.join([str(o) for o in self.objects])}}}"
 
     def _strict_date(self, lean):
         if lean == LATEST:
             return max([x._strict_date(lean) for x in self.objects])
-        else:
-            return min([x._strict_date(lean) for x in self.objects])
+        return min([x._strict_date(lean) for x in self.objects])
 
 
 class MaskedPrecision(Date):
@@ -695,12 +691,13 @@ class Level2Interval(Level1Interval):
     def __init__(self, lower, upper):
         # Check whether incoming lower/upper values are single-item lists, and
         # if so take just the first item. This works around what I *think* is a
-        # bug in the grammer that provides us with single-item lists of
+        # bug in the grammar that provides us with single-item lists of
         # `PartialUncertainOrApproximate` items for lower/upper values.
         if isinstance(lower, (tuple, list)) and len(lower) == 1:
             self.lower = lower[0]
         else:
             self.lower = lower
+
         if isinstance(lower, (tuple, list)) and len(upper) == 1:
             self.upper = upper[0]
         else:
@@ -718,7 +715,7 @@ def _precise_year(self):
 
     def get_year(self):
         if self.precision:
-            return '%se%sp%s' % (self.base, self.exponent, self.precision)
+            return f'{self.base}e{self.exponent}p{self.precision}'
         else:
-            return '%se%s' % (self.base, self.exponent)
+            return f'{self.base}e{self.exponent}'
     year = property(get_year)
diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py
index f9dde42..77c2ad3 100644
--- a/edtf/parser/tests.py
+++ b/edtf/parser/tests.py
@@ -3,10 +3,11 @@
 from datetime import date
 from time import struct_time
 
+from pyparsing import ParseException
+
 from edtf.parser.grammar import parse_edtf as parse
 from edtf.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, \
     TIME_EMPTY_EXTRAS
-from edtf.parser.edtf_exceptions import EDTFParseException
 
 # Example object types and attributes.
 # the first item in each tuple is the input EDTF string, and expected parse result.
@@ -192,17 +193,30 @@
     None,
     '',
     'not a edtf string',
-    'y17e7-12-26', # not implemented
-    '2016-13-08', # wrong day order
-    '2016-02-39', # out of range
+    'y17e7-12-26',  # not implemented
+    '2016-13-08',  # wrong day order
+    '2016-02-39',  # out of range
     '-0000-01-01',  # negative zero year
 )
 
 
 class TestParsing(unittest.TestCase):
+    def iso_to_struct_time(self, iso_date):
+        """ Convert YYYY-mm-dd date strings to time structs """
+        if iso_date[0] == '-':
+            is_negative = True
+            iso_date = iso_date[1:]
+        else:
+            is_negative = False
+        y, mo, d = [int(i) for i in iso_date.split('-')]
+        if is_negative:
+            y *= -1
+        return struct_time(
+            [y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS)
+
     def test_non_parsing(self):
         for i in BAD_EXAMPLES:
-            self.assertRaises(EDTFParseException, parse, i)
+            self.assertRaises(ParseException, parse, i)
 
     def test_date_values(self):
         """
@@ -217,13 +231,15 @@ def test_date_values(self):
             else:
                 o = i
 
-            sys.stdout.write("parsing '%s'" % i)
+            sys.stdout.write(f"parsing '{i}'")
             f = parse(i)
-            sys.stdout.write(" => %s()\n" % type(f).__name__)
+            sys.stdout.write(f" => {type(f).__name__}()\n")
             self.assertIsInstance(f, EDTFObject)
-            self.assertEqual(str(f), o)
+            self.assertEqual(str(f), o, msg=f"Testing {i}")
 
-            if len(e) == 5:
+            if len(e) == 1:
+                continue
+            elif len(e) == 5:
                 expected_lower_strict = e[1]
                 expected_upper_strict = e[2]
                 expected_lower_fuzzy = e[3]
@@ -243,33 +259,21 @@ def test_date_values(self):
                 expected_upper_strict = e[1]
                 expected_lower_fuzzy = e[1]
                 expected_upper_fuzzy = e[1]
-            if len(e) == 1:
+            else:
+                print(f"Unexpected value {e}; skipping.")
                 continue
 
-            def iso_to_struct_time(iso_date):
-                """ Convert YYYY-mm-dd date strings to time structs """
-                if iso_date[0] == '-':
-                    is_negative = True
-                    iso_date = iso_date[1:]
-                else:
-                    is_negative = False
-                y, mo, d = [int(i) for i in iso_date.split('-')]
-                if is_negative:
-                    y *= -1
-                return struct_time(
-                    [y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS)
-
             # Convert string date representations into `struct_time`s
-            expected_lower_strict = iso_to_struct_time(expected_lower_strict)
-            expected_upper_strict = iso_to_struct_time(expected_upper_strict)
-            expected_lower_fuzzy = iso_to_struct_time(expected_lower_fuzzy)
-            expected_upper_fuzzy = iso_to_struct_time(expected_upper_fuzzy)
+            exp_lower_str = self.iso_to_struct_time(expected_lower_strict)
+            exp_upper_str = self.iso_to_struct_time(expected_upper_strict)
+            exp_lower_fuzz = self.iso_to_struct_time(expected_lower_fuzzy)
+            exp_upper_fuzz = self.iso_to_struct_time(expected_upper_fuzzy)
 
             try:
-                self.assertEqual(f.lower_strict(), expected_lower_strict)
-                self.assertEqual(f.upper_strict(), expected_upper_strict)
-                self.assertEqual(f.lower_fuzzy(), expected_lower_fuzzy)
-                self.assertEqual(f.upper_fuzzy(), expected_upper_fuzzy)
+                self.assertEqual(f.lower_strict(), exp_lower_str)
+                self.assertEqual(f.upper_strict(), exp_upper_str)
+                self.assertEqual(f.lower_fuzzy(), exp_lower_fuzz)
+                self.assertEqual(f.upper_fuzzy(), exp_upper_fuzz)
             except Exception as x:
                 # Write to stdout for manual debugging, I guess
                 sys.stdout.write(str(x))

From 06ab934befb7a665301587134794ddbc50b60964 Mon Sep 17 00:00:00 2001
From: Andrew Hankinson <andrew.hankinson@gmail.com>
Date: Wed, 24 Jul 2024 11:18:51 +0200
Subject: [PATCH 08/14] Further optimizations

---
 edtf/natlang/en.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py
index 8cb72c4..d7d7b8d 100644
--- a/edtf/natlang/en.py
+++ b/edtf/natlang/en.py
@@ -1,4 +1,5 @@
 """Utilities to derive an EDTF string from an (English) natural language string."""
+import functools
 from datetime import datetime
 from typing import Optional
 
@@ -40,15 +41,12 @@
 MENTIONS_MONTH = re.compile(r'\bmonth\b.+(in|during)\b')
 MENTIONS_DAY = re.compile(r'\bday\b.+(in|during)\b')
 
-
-
 # Set of RE rules that will cause us to abort text processing, since we know
 # the results will be wrong.
-REJECT_RULES = (
-    re.compile(r'.*dynasty.*'),  # Don't parse '23rd Dynasty' to 'uuuu-uu-23'
-)
+REJECT_RULES = re.compile(r'.*dynasty.*')  # Don't parse '23rd Dynasty' to 'uuuu-uu-23'
 
 
+@functools.lru_cache()
 def text_to_edtf(text: str) -> Optional[str]:
     """
     Generate EDTF string equivalent of a given natural language date string.
@@ -123,7 +121,8 @@ def text_to_edtf(text: str) -> Optional[str]:
     return result
 
 
-def text_to_edtf_date(text) -> Optional[str]:
+@functools.lru_cache()
+def text_to_edtf_date(text: str) -> Optional[str]:
     """
     Return EDTF string equivalent of a given natural language date string.
 
@@ -137,9 +136,8 @@ def text_to_edtf_date(text) -> Optional[str]:
     t = text.lower()
     result = ''
 
-    for reject_re in REJECT_RULES:
-        if re.match(reject_re, t):
-            return None
+    if re.match(REJECT_RULES, t):
+        return None
 
     # matches on '1800s'. Needs to happen before is_decade.
     could_be_century: list = re.findall(MIGHT_BE_CENTURY, t)
@@ -185,7 +183,6 @@ def text_to_edtf_date(text) -> Optional[str]:
 
     else:
         # try dateutil.parse
-
         try:
             # parse twice, using different defaults to see what was
             # parsed and what was guessed.

From c9cb56fe7dfcfe3f55ee981106bce7e73e7b7554 Mon Sep 17 00:00:00 2001
From: Andrew Hankinson <andrew.hankinson@gmail.com>
Date: Mon, 12 Aug 2024 14:27:41 +0200
Subject: [PATCH 09/14] Update gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index ba74660..4d58675 100644
--- a/.gitignore
+++ b/.gitignore
@@ -55,3 +55,5 @@ docs/_build/
 
 # PyBuilder
 target/
+.idea
+.DS_Store

From 9e51373eea989f4ea306408138b31ce53bdef1ab Mon Sep 17 00:00:00 2001
From: Andrew Hankinson <andrew.hankinson@gmail.com>
Date: Tue, 13 Aug 2024 15:01:47 +0200
Subject: [PATCH 10/14] Black formatting, updates

---
 edtf/natlang/en.py | 101 +++++++++++++++++++++++++--------------------
 1 file changed, 57 insertions(+), 44 deletions(-)

diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py
index d7d7b8d..191199e 100644
--- a/edtf/natlang/en.py
+++ b/edtf/natlang/en.py
@@ -14,36 +14,42 @@
 DEFAULT_DATE_1 = datetime(1234, 1, 1, 0, 0)
 DEFAULT_DATE_2 = datetime(5678, 10, 10, 0, 0)
 
-SHORT_YEAR_RE = re.compile(r'(-?)([\du])([\dxu])([\dxu])([\dxu])')
-LONG_YEAR_RE = re.compile(r'y(-?)([1-9]\d\d\d\d+)')
-CENTURY_RE = re.compile(r'(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?')
-CENTURY_RANGE = re.compile(r'\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]')
-CE_RE = re.compile(r'(\d{1,4}) (ad|ce|bc|bce)')
-ONE_DIGIT_PARTIAL_FIRST = re.compile(r'\d\D\b')
-TWO_DIGIT_PARTIAL_FIRST = re.compile(r'\d\d\b')
-PARTIAL_CHECK = re.compile(r'\b\d\d\d\d$')
+SHORT_YEAR_RE = re.compile(r"(-?)([\du])([\dxu])([\dxu])([\dxu])")
+LONG_YEAR_RE = re.compile(r"y(-?)([1-9]\d\d\d\d+)")
+CENTURY_RE = re.compile(r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?")
+CENTURY_RANGE = re.compile(r"\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]")
+CE_RE = re.compile(r"(\d{1,4}) (ad|ce|bc|bce)")
+ONE_DIGIT_PARTIAL_FIRST = re.compile(r"\d\D\b")
+TWO_DIGIT_PARTIAL_FIRST = re.compile(r"\d\d\b")
+PARTIAL_CHECK = re.compile(r"\b\d\d\d\d$")
 SLASH_YEAR = re.compile(r"(\d\d\d\d)/(\d\d\d\d)")
 BEFORE_CHECK = re.compile(r"\b(?:before|earlier|avant)\b")
 AFTER_CHECK = re.compile(r"\b(after|since|later|aprés|apres)\b")
-APPROX_CHECK = re.compile(r'\b(?:ca?\.? ?\d{4}|circa|approx|approximately|around|about|~\d{3,4})|(?:^~)')
+APPROX_CHECK = re.compile(
+    r"\b(?:ca?\.? ?\d{4}|circa|approx|approximately|around|about|~\d{3,4})|(?:^~)"
+)
 UNCERTAIN_CHECK = re.compile(r"\b(?:uncertain|possibly|maybe|guess|\d{3,4}\?)")
-UNCERTAIN_REPL = re.compile(r'(\d{4})\?')
-MIGHT_BE_CENTURY = re.compile(r'(\d{2}00)s')
-MIGHT_BE_DECADE = re.compile(r'(\d{3}0)s')
+UNCERTAIN_REPL = re.compile(r"(\d{4})\?")
+MIGHT_BE_CENTURY = re.compile(r"(\d{2}00)s")
+MIGHT_BE_DECADE = re.compile(r"(\d{3}0)s")
 
-APPROX_CENTURY_RE = re.compile(r'\b(ca?\.?) ?(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?')
-UNCERTAIN_CENTURY_RE = re.compile(r'(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?\?')
+APPROX_CENTURY_RE = re.compile(
+    r"\b(ca?\.?) ?(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?"
+)
+UNCERTAIN_CENTURY_RE = re.compile(
+    r"(\d{1,2})(c\.?|(st|nd|rd|th) century)\s?(ad|ce|bc|bce)?\?"
+)
 
-APPROX_CE_RE = re.compile(r'\b(ca?\.?) ?(\d{1,4}) (ad|ce|bc|bce)')
-UNCERTAIN_CE_RE = re.compile(r'(\d{1,4}) (ad|ce|bc|bce)\?')
+APPROX_CE_RE = re.compile(r"\b(ca?\.?) ?(\d{1,4}) (ad|ce|bc|bce)")
+UNCERTAIN_CE_RE = re.compile(r"(\d{1,4}) (ad|ce|bc|bce)\?")
 
-MENTIONS_YEAR = re.compile(r'\byear\b.+(in|during)\b')
-MENTIONS_MONTH = re.compile(r'\bmonth\b.+(in|during)\b')
-MENTIONS_DAY = re.compile(r'\bday\b.+(in|during)\b')
+MENTIONS_YEAR = re.compile(r"\byear\b.+(in|during)\b")
+MENTIONS_MONTH = re.compile(r"\bmonth\b.+(in|during)\b")
+MENTIONS_DAY = re.compile(r"\bday\b.+(in|during)\b")
 
 # Set of RE rules that will cause us to abort text processing, since we know
 # the results will be wrong.
-REJECT_RULES = re.compile(r'.*dynasty.*')  # Don't parse '23rd Dynasty' to 'uuuu-uu-23'
+REJECT_RULES = re.compile(r".*dynasty.*")  # Don't parse '23rd Dynasty' to 'uuuu-uu-23'
 
 
 @functools.lru_cache()
@@ -57,16 +63,16 @@ def text_to_edtf(text: str) -> Optional[str]:
     t = text.lower()
 
     # try parsing the whole thing
-    result = text_to_edtf_date(t)
+    result: Optional[str] = text_to_edtf_date(t)
 
     if not result:
         # split by list delims and move fwd with the first thing that returns a non-empty string.
         # TODO: assemble multiple dates into a {} or [] structure.
         for split in [",", ";", "or"]:
             for list_item in t.split(split):
-
                 # try parsing as an interval - split by '-'
-                toks = list_item.split("-")
+                toks: list[str] = list_item.split("-")
+
                 if len(toks) == 2:
                     d1 = toks[0].strip()
                     d2 = toks[1].strip()
@@ -74,10 +80,16 @@ def text_to_edtf(text: str) -> Optional[str]:
                     # match looks from the beginning of the string, search
                     # looks anywhere.
 
-                    if re.match(ONE_DIGIT_PARTIAL_FIRST, d2):  # 1-digit year partial e.g. 1868-9
-                        if re.search(PARTIAL_CHECK, d1):  # TODO: evaluate it and see if it's a year
+                    if re.match(
+                        ONE_DIGIT_PARTIAL_FIRST, d2
+                    ):  # 1-digit year partial e.g. 1868-9
+                        if re.search(
+                            PARTIAL_CHECK, d1
+                        ):  # TODO: evaluate it and see if it's a year
                             d2 = d1[-4:-1] + d2
-                    elif re.match(TWO_DIGIT_PARTIAL_FIRST, d2):  # 2-digit year partial e.g. 1809-10
+                    elif re.match(
+                        TWO_DIGIT_PARTIAL_FIRST, d2
+                    ):  # 2-digit year partial e.g. 1809-10
                         if re.search(PARTIAL_CHECK, d1):
                             d2 = d1[-4:-2] + d2
                     else:
@@ -134,7 +146,7 @@ def text_to_edtf_date(text: str) -> Optional[str]:
         return None
 
     t = text.lower()
-    result = ''
+    result: str = ""
 
     if re.match(REJECT_RULES, t):
         return None
@@ -143,7 +155,7 @@ def text_to_edtf_date(text: str) -> Optional[str]:
     could_be_century: list = re.findall(MIGHT_BE_CENTURY, t)
     # matches on '1800s' and '1910s'. Removes the 's'.
     # Needs to happen before is_uncertain because e.g. "1860s?"
-    t, is_decade = re.subn(MIGHT_BE_DECADE, r'\1', t)
+    t, is_decade = re.subn(MIGHT_BE_DECADE, r"\1", t)
 
     # detect approximation signifiers
     # a few 'circa' abbreviations just before the year
@@ -151,7 +163,7 @@ def text_to_edtf_date(text: str) -> Optional[str]:
     # the word 'circa' anywhere
 
     # detect uncertainty signifiers
-    t, is_uncertain = re.subn(UNCERTAIN_REPL, r'\1', t)
+    t, is_uncertain = re.subn(UNCERTAIN_REPL, r"\1", t)
     is_uncertain = is_uncertain or re.findall(UNCERTAIN_CHECK, t)
 
     # detect century forms
@@ -191,7 +203,7 @@ def text_to_edtf_date(text: str) -> Optional[str]:
                 dayfirst=appsettings.DAY_FIRST,
                 yearfirst=False,
                 fuzzy=True,  # force a match, even if it's default date
-                default=DEFAULT_DATE_1
+                default=DEFAULT_DATE_1,
             )
 
             dt2 = parse(
@@ -199,14 +211,13 @@ def text_to_edtf_date(text: str) -> Optional[str]:
                 dayfirst=appsettings.DAY_FIRST,
                 yearfirst=False,
                 fuzzy=True,  # force a match, even if it's default date
-                default=DEFAULT_DATE_2
+                default=DEFAULT_DATE_2,
             )
 
         except ValueError:
             return None
 
-        if dt1.date() == DEFAULT_DATE_1.date() and \
-                dt2.date() == DEFAULT_DATE_2.date():
+        if dt1.date() == DEFAULT_DATE_1.date() and dt2.date() == DEFAULT_DATE_2.date():
             # couldn't parse anything - defaults are untouched.
             return None
 
@@ -223,12 +234,12 @@ def text_to_edtf_date(text: str) -> Optional[str]:
             # approximate/uncertain markers to decide whether we treat it as
             # a century or a decade.
             if i == 2 and could_be_century and not (is_approximate or is_uncertain):
-                result += 'x'
+                result += "x"
             elif i == 3 and is_decade:
                 if mentions_year:
-                    result += 'u'  # year precision
+                    result += "X"  # year precision
                 else:
-                    result += 'x'  # decade precision
+                    result += "x"  # decade precision
             elif date1[i] == date2[i]:
                 # since both attempts at parsing produced the same result
                 # it must be parsed value, not a default
@@ -236,12 +247,12 @@ def text_to_edtf_date(text: str) -> Optional[str]:
             else:
                 # different values were produced, meaning that it's likely
                 # a default. Use 'unspecified'
-                result += "u"
+                result += "X"
 
         # strip off unknown chars from end of string - except the first 4
 
         for i in reversed(range(len(result))):
-            if result[i] not in ('u', 'x', '-'):
+            if result[i] not in ("X", "-"):
                 smallest_length = 4
 
                 if mentions_month:
@@ -265,14 +276,16 @@ def text_to_edtf_date(text: str) -> Optional[str]:
 
             # end dateutil post-parsing
 
-    if is_uncertain:
-        result += "?"
-
-    if is_approximate:
-        result += "~"
+    if is_uncertain and is_approximate:
+        result += "%"
+    else:
+        if is_uncertain:
+            result += "?"
+        if is_approximate:
+            result += "~"
 
     # weed out bad parses
-    if result.startswith("uu-uu"):
+    if result.startswith("XX-XX"):
         return None
 
     return result

From 1aa53cfb2d4e0a2a3c284ec20db60f841b88a7f9 Mon Sep 17 00:00:00 2001
From: Andrew Hankinson <andrew.hankinson@gmail.com>
Date: Tue, 13 Aug 2024 15:03:16 +0200
Subject: [PATCH 11/14] Update imports

---
 edtf/natlang/en.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py
index 191199e..ba192e8 100644
--- a/edtf/natlang/en.py
+++ b/edtf/natlang/en.py
@@ -1,12 +1,12 @@
 """Utilities to derive an EDTF string from an (English) natural language string."""
 import functools
+import re
 from datetime import datetime
 from typing import Optional
 
 from dateutil.parser import parse
-import re
-from edtf import appsettings
 
+from edtf import appsettings
 
 # two dates where every digit of an ISO date representation is different,
 # and one is in the past and one is in the future.

From 8c4f9685bc31224bcd0efcf811485f2e3f34e292 Mon Sep 17 00:00:00 2001
From: Andrew Hankinson <andrew.hankinson@gmail.com>
Date: Tue, 13 Aug 2024 16:48:01 +0200
Subject: [PATCH 12/14] Merge fixes

---
 edtf/natlang/en.py            | 18 ++++++++++--------
 edtf/parser/parser_classes.py |  1 +
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py
index ba192e8..49b04f3 100644
--- a/edtf/natlang/en.py
+++ b/edtf/natlang/en.py
@@ -4,7 +4,7 @@
 from datetime import datetime
 from typing import Optional
 
-from dateutil.parser import parse
+from dateutil.parser import ParserError, parse
 
 from edtf import appsettings
 
@@ -126,9 +126,9 @@ def text_to_edtf(text: str) -> Optional[str]:
     is_after = re.findall(AFTER_CHECK, t)
 
     if is_before:
-        result = f"unknown/{result}"
+        result = f"/{result}"
     elif is_after:
-        result = f"{result}/unknown"
+        result = f"{result}/"
 
     return result
 
@@ -172,7 +172,7 @@ def text_to_edtf_date(text: str) -> Optional[str]:
     # detect CE/BCE year form
     is_ce = re.findall(CE_RE, t)
     if is_century:
-        result = "%02dxx" % (int(is_century[0][0]) - 1,)
+        result = "%02dXX" % (int(is_century[0][0]) - 1,)
         is_approximate = is_approximate or re.findall(APPROX_CENTURY_RE, t)
         is_uncertain = is_uncertain or re.findall(UNCERTAIN_CENTURY_RE, t)
 
@@ -214,8 +214,10 @@ def text_to_edtf_date(text: str) -> Optional[str]:
                 default=DEFAULT_DATE_2,
             )
 
-        except ValueError:
-            return None
+        except ParserError:
+            return
+        except Exception:
+            return
 
         if dt1.date() == DEFAULT_DATE_1.date() and dt2.date() == DEFAULT_DATE_2.date():
             # couldn't parse anything - defaults are untouched.
@@ -234,12 +236,12 @@ def text_to_edtf_date(text: str) -> Optional[str]:
             # approximate/uncertain markers to decide whether we treat it as
             # a century or a decade.
             if i == 2 and could_be_century and not (is_approximate or is_uncertain):
-                result += "x"
+                result += "X"
             elif i == 3 and is_decade:
                 if mentions_year:
                     result += "X"  # year precision
                 else:
-                    result += "x"  # decade precision
+                    result += "X"  # decade precision
             elif date1[i] == date2[i]:
                 # since both attempts at parsing produced the same result
                 # it must be parsed value, not a default
diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py
index d103660..eada1f9 100644
--- a/edtf/parser/parser_classes.py
+++ b/edtf/parser/parser_classes.py
@@ -4,6 +4,7 @@
 from datetime import date, datetime
 from operator import add, sub
 from time import struct_time
+from typing import Optional
 
 from dateutil.relativedelta import relativedelta
 

From 6f08bce95cb583f2825353cbe8ae6a1de1c47df7 Mon Sep 17 00:00:00 2001
From: Andrew Hankinson <andrew.hankinson@gmail.com>
Date: Tue, 13 Aug 2024 16:55:59 +0200
Subject: [PATCH 13/14] ruff formatting

---
 edtf/natlang/en.py            | 5 +++--
 edtf/parser/parser_classes.py | 9 ++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py
index 49b04f3..97230db 100644
--- a/edtf/natlang/en.py
+++ b/edtf/natlang/en.py
@@ -1,4 +1,5 @@
 """Utilities to derive an EDTF string from an (English) natural language string."""
+
 import functools
 import re
 from datetime import datetime
@@ -52,7 +53,7 @@
 REJECT_RULES = re.compile(r".*dynasty.*")  # Don't parse '23rd Dynasty' to 'uuuu-uu-23'
 
 
-@functools.lru_cache()
+@functools.lru_cache
 def text_to_edtf(text: str) -> Optional[str]:
     """
     Generate EDTF string equivalent of a given natural language date string.
@@ -133,7 +134,7 @@ def text_to_edtf(text: str) -> Optional[str]:
     return result
 
 
-@functools.lru_cache()
+@functools.lru_cache
 def text_to_edtf_date(text: str) -> Optional[str]:
     """
     Return EDTF string equivalent of a given natural language date string.
diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py
index eada1f9..ad690fb 100644
--- a/edtf/parser/parser_classes.py
+++ b/edtf/parser/parser_classes.py
@@ -98,10 +98,6 @@ class EDTFObject:
 
     parser = None
 
-    def __init__(self, *args, **kwargs):
-        errmsg: str = f"{type(self).__name__}.__init__(*{args}, **{kwargs})"
-        raise NotImplementedError(f"{errmsg} is not implemented.")
-
     @classmethod
     def set_parser(cls, p):
         cls.parser = p
@@ -288,6 +284,7 @@ def set_year(self, y: int):
 
     def get_year(self) -> int:
         return self._year
+
     year = property(get_year, set_year)
 
     def set_month(self, m: Optional[int]):
@@ -297,6 +294,7 @@ def set_month(self, m: Optional[int]):
 
     def get_month(self) -> Optional[int]:
         return self._month
+
     month = property(get_month, set_month)
 
     def __str__(self):
@@ -932,8 +930,9 @@ def __str__(self):
 
         return result
 
-    def set_year(self, y): # Year can be None.
+    def set_year(self, y):  # Year can be None.
         self._year = y
+
     year = property(Date.get_year, set_year)
 
     def _precise_year(self, lean: str):

From 51255e3e0d82ed374b91cd10a96bf1afede056d2 Mon Sep 17 00:00:00 2001
From: Andrew Hankinson <andrew.hankinson@gmail.com>
Date: Tue, 13 Aug 2024 17:03:31 +0200
Subject: [PATCH 14/14] Run benchmarks

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4645d13..7645ec9 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -100,7 +100,7 @@ jobs:
 
             - name: Publish benchmark results
               uses: benchmark-action/github-action-benchmark@v1
-              if: github.event_name == 'pull_request' && github.repository == 'ixc/python-edtf'
+              if: github.event_name == 'pull_request' && github.repository == 'rism-digital/python-edtf'
               with:
                 tool: 'pytest'
                 auto-push: true
@@ -112,7 +112,7 @@ jobs:
                 summary-always: true
 
             - name: Comment on benchmark results without publishing
-              if: github.event_name != 'pull_request' || github.repository != 'ixc/python-edtf'
+              if: github.event_name != 'pull_request' || github.repository != 'rism-digital/python-edtf'
               uses: benchmark-action/github-action-benchmark@v1
               with:
                 tool: 'pytest'