Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Properly set qualification properties #60

Merged
merged 10 commits into from
Jun 11, 2024
3 changes: 2 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ jobs:

- name: Publish benchmark results
uses: benchmark-action/github-action-benchmark@v1
if: github.event_name != 'pull_request'
if: github.event_name == 'pull_request' && github.repository == 'ixc/python-edtf'
with:
tool: 'pytest'
auto-push: true
Expand All @@ -112,6 +112,7 @@ jobs:
summary-always: true

- name: Comment on benchmark results without publishing
if: github.event_name != 'pull_request' || github.repository != 'ixc/python-edtf'
uses: benchmark-action/github-action-benchmark@v1
with:
tool: 'pytest'
Expand Down
45 changes: 45 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,51 @@ One can interpret uncertain or approximate dates as 'plus or minus a [level of p

If a date is both uncertain __and__ approximate, the padding is applied twice, i.e. it gets 100% * 2 padding, or 'plus or minus two [levels of precision]'.

### Qualification properties
EDTF objects support properties that provide an overview of how the object is qualified:
- `.is_uncertain (?)`
- `.is_approximate (~)`
- `.is_uncertain_and_approximate (%)`
These properties represent whether the any part of the date object is uncertain, approximate, or uncertain and approximate. For ranges, the properties are true if any part of the range (lower or upper section) is qualified as such. A date is not necessarily uncertain and approximate if it is separately both uncertain and approximate - it must have the "%" qualifier to be considered uncertain and aproximate.
```python
>>> parse_edtf("2006-06-11")
Date: '2006-06-11'
>>> parse_edtf("2006-06-11").is_uncertain
False
>>> parse_edtf("2006-06-11").is_approximate
False

>>> parse_edtf("1984?")
UncertainOrApproximate: '1984?'
>>> parse_edtf("1984?").is_approximate
False
>>> parse_edtf("1984?").is_uncertain
True
>>> parse_edtf("1984?").is_uncertain_and_approximate
False

>>> parse_edtf("1984%").is_uncertain
False
>>> parse_edtf("1984%").is_uncertain_and_approximate
True

>>> parse_edtf("1984~/2004-06")
Level1Interval: '1984~/2004-06'
>>> parse_edtf("1984~/2004-06").is_approximate
True
>>> parse_edtf("1984~/2004-06").is_uncertain
False

>>> parse_edtf("2004?-~06-~04")
PartialUncertainOrApproximate: '2004?-~06-~04'
>>> parse_edtf("2004?-~06-~04").is_approximate
True
>>> parse_edtf("2004?-~06-~04").is_uncertain
True
>>> parse_edtf("2004?-~06-~04").is_uncertain_and_approximate
False
```

### Seasons

Seasons are interpreted as Northern Hemisphere by default. To change this, override the month mapping in `appsettings.py`.
Expand Down
2 changes: 2 additions & 0 deletions edtf/appsettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,5 @@
MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0)
MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0)
DELTA_IF_UNKNOWN = EDTF.get("DELTA_IF_UNKNOWN", relativedelta(years=10))

DEBUG_PYPARSING = False
37 changes: 17 additions & 20 deletions edtf/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
from django.db import models
from django.db.models import signals
from django.db.models.query_utils import DeferredAttribute
from pyparsing import ParseException

from edtf import EDTFObject, parse_edtf
from edtf.convert import struct_time_to_date, struct_time_to_jd
from edtf.natlang import text_to_edtf
from edtf.parser.edtf_exceptions import EDTFParseException

DATE_ATTRS = (
"lower_strict",
Expand Down Expand Up @@ -46,21 +48,12 @@ def __init__(
**kwargs,
):
kwargs["max_length"] = 2000
(
self.natural_text_field,
self.direct_input_field,
self.lower_strict_field,
self.upper_strict_field,
self.lower_fuzzy_field,
self.upper_fuzzy_field,
) = (
natural_text_field,
direct_input_field,
lower_strict_field,
upper_strict_field,
lower_fuzzy_field,
upper_fuzzy_field,
)
self.natural_text_field = natural_text_field
self.direct_input_field = direct_input_field
self.lower_strict_field = lower_strict_field
self.upper_strict_field = upper_strict_field
self.lower_fuzzy_field = lower_fuzzy_field
self.upper_fuzzy_field = upper_fuzzy_field
super().__init__(verbose_name, name, **kwargs)

description = (
Expand All @@ -72,6 +65,8 @@ def deconstruct(self):
name, path, args, kwargs = super().deconstruct()
if self.natural_text_field:
kwargs["natural_text_field"] = self.natural_text_field
if self.direct_input_field:
kwargs["direct_input_field"] = self.direct_input_field

for attr in DATE_ATTRS:
field = f"{attr}_field"
Expand Down Expand Up @@ -132,10 +127,12 @@ def update_values(self, instance, *args, **kwargs):
if direct_input and (
existing_value is None or str(existing_value) != direct_input
):
edtf = parse_edtf(
direct_input, fail_silently=True
) # ParseException if invalid; should this be raised?
# TODO pyparsing.ParseExceptions are very noisy and dumps the whole grammar (see https://github.com/ixc/python-edtf/issues/46)
try:
edtf = parse_edtf(
direct_input, fail_silently=True
) # ParseException if invalid; should this be raised?
except ParseException as err:
raise EDTFParseException(direct_input, err) from None

# set the natural_text (display) field to the direct_input if it is not provided
if natural_text == "":
Expand All @@ -148,7 +145,7 @@ def update_values(self, instance, *args, **kwargs):
):
edtf = parse_edtf(
edtf_string, fail_silently=True
) # potetial ParseException if invalid; should this be raised?
) # potential ParseException if invalid; should this be raised?
else:
edtf = existing_value
else:
Expand Down
26 changes: 25 additions & 1 deletion edtf/parser/edtf_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,28 @@


class EDTFParseException(ParseException):
pass
"""Raised when an input cannot be parsed as an EDTF string.

Attributes:
input_string - the input string that could not be parsed
err -- the original ParseException that caused this one
"""

def __init__(self, input_string, err=None):
if input_string is None:
input_string = ""
self.input_string = input_string
if err is None:
err = ParseException(input_string, 0, "Invalid input or format.")
self.err = err
super().__init__(str(err), err.loc if err.loc else 0, self.input_string)

def __str__(self):
if not self.input_string:
return "You must supply some input text"
near_text = (
self.input_string[max(self.err.loc - 10, 0) : self.err.loc + 10]
if hasattr(self.err, "loc")
else ""
)
return f"Error at position {self.err.loc}: Invalid input or format near '{near_text}'. Please provide a valid EDTF string."
15 changes: 10 additions & 5 deletions edtf/parser/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# https://github.com/pyparsing/pyparsing/wiki/Performance-Tips

import pyparsing
from edtf.appsettings import DEBUG_PYPARSING

pyparsing.ParserElement.enablePackrat()

Expand Down Expand Up @@ -342,14 +343,18 @@ def f(toks):
)


def parse_edtf(str, parseAll=True, fail_silently=False):
def parse_edtf(input_string, parseAll=True, fail_silently=False, debug=None):
if debug is None:
debug = DEBUG_PYPARSING
if not input_string:
raise EDTFParseException(input_string)
try:
if not str:
raise ParseException("You must supply some input text")
p = edtfParser.parseString(str.strip(), parseAll)
p = edtfParser.parseString(input_string.strip(), parseAll)
if p:
return p[0]
except ParseException as err:
if fail_silently:
return None
raise EDTFParseException(err) from err
if debug:
raise
raise EDTFParseException(input_string, err) from None
45 changes: 44 additions & 1 deletion edtf/parser/parser_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def apply_delta(op, time_struct, delta):

class EDTFObject:
"""
Object to attact to a parser to become instantiated when the parser
Object to attach to a parser to become instantiated when the parser
completes.
"""

Expand Down Expand Up @@ -470,6 +470,11 @@ class UncertainOrApproximate(EDTFObject):
def __init__(self, date, ua):
self.date = date
self.ua = ua
self.is_uncertain = ua.is_uncertain if ua else False
self.is_approximate = ua.is_approximate if ua else False
self.is_uncertain_and_approximate = (
ua.is_uncertain_and_approximate if ua else False
)

def __str__(self):
if self.ua:
Expand Down Expand Up @@ -558,6 +563,11 @@ def __init__(
**kwargs,
)
self.ua = ua
self.is_uncertain = ua.is_uncertain if ua else False
self.is_approximate = ua.is_approximate if ua else False
self.is_uncertain_and_approximate = (
ua.is_uncertain_and_approximate if ua else False
)
self.negative = self.year.startswith("-")

def __str__(self):
Expand Down Expand Up @@ -709,6 +719,12 @@ def __init__(self, lower=None, upper=None):
self.upper = UnspecifiedIntervalSection(
False, UncertainOrApproximate(**lower)
)
self.is_approximate = self.lower.is_approximate or self.upper.is_approximate
self.is_uncertain = self.lower.is_uncertain or self.upper.is_uncertain
self.is_uncertain_and_approximate = (
self.lower.is_uncertain_and_approximate
or self.upper.is_uncertain_and_approximate
)

def _get_fuzzy_padding(self, lean):
if lean == EARLIEST:
Expand Down Expand Up @@ -840,6 +856,27 @@ def __init__(

self.all_ua = all_ua

uas = [
year_ua,
month_ua,
day_ua,
year_month_ua,
month_day_ua,
season_ua,
all_ua,
]
self.is_uncertain = any(
item.is_uncertain for item in uas if hasattr(item, "is_uncertain")
)
self.is_approximate = any(
item.is_approximate for item in uas if hasattr(item, "is_approximate")
)
self.is_uncertain_and_approximate = any(
item.is_uncertain_and_approximate
for item in uas
if hasattr(item, "is_uncertain_and_approximate")
)

def __str__(self):
if self.season_ua:
return f"{self.season}{self.season_ua}"
Expand Down Expand Up @@ -1046,6 +1083,12 @@ def __init__(self, lower, upper):
self.upper = upper[0]
else:
self.upper = upper
self.is_approximate = self.lower.is_approximate or self.upper.is_approximate
self.is_uncertain = self.lower.is_uncertain or self.upper.is_uncertain
self.is_uncertain_and_approximate = (
self.lower.is_uncertain_and_approximate
or self.upper.is_uncertain_and_approximate
)


class Level2Season(Season):
Expand Down
41 changes: 41 additions & 0 deletions edtf/parser/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,25 @@
"2001-29",
)

APPROXIMATE_UNCERTAIN_EXAMPLES = (
# first part of tuple is the input EDTF string, second part is a tuple of booleans:
# uncertain ?, approximate ~, both uncertain and approximate %
("2004", (False, False, False)),
("2006-06-11", (False, False, False)),
("-0999", (False, False, False)),
("1984?", (True, False, False)),
("2004-06-11?", (True, False, False)),
("1984~", (False, True, False)),
("1984%", (False, False, True)),
("1984~/2004-06", (False, True, False)),
("2004-%06", (False, False, True)),
("2004?-~06-~04", (True, True, False)),
("2004?-06-04", (True, False, False)),
("2011-~06-~04", (False, True, False)),
("2004-06-~01/2004-06-~20", (False, True, False)),
("156X~", (False, True, False)),
)

BAD_EXAMPLES = (
# parentheses are not used for group qualification in the 2018 spec
None,
Expand Down Expand Up @@ -347,6 +366,14 @@ def test_non_parsing(bad_input):
parse(bad_input)


@pytest.mark.parametrize("bad_input", [None, ""])
def test_empty_input(bad_input):
"""Test that empty input raises a specific exception."""
with pytest.raises(EDTFParseException) as exc_info:
parse(bad_input)
assert "You must supply some input text" in str(exc_info.value)


def test_comparisons():
"""Test comparisons between parsed EDTF objects and standard dates."""
d1 = parse("1979-08~")
Expand All @@ -371,3 +398,17 @@ def test_comparisons():
def test_benchmark_parser(benchmark, test_input):
"""Benchmark parsing of selected EDTF strings."""
benchmark(parse, test_input)


@pytest.mark.parametrize("test_input,expected_tuple", APPROXIMATE_UNCERTAIN_EXAMPLES)
def test_approximate_uncertain(test_input, expected_tuple):
"""Test parsing of EDTF strings and check .is_uncertain, .is_approximate,
and .is_uncertain_and_approximate properties. The expected_tuple should have three
values, the first should be a boolean indicating if the date is uncertain,
the second should be a boolean indicating if the date is approximate, and the
third should be a boolean indicating if the date is both uncertain and approximate."""
result = parse(test_input)
assert isinstance(result, EDTFObject), "Result should be an instance of EDTFObject"
assert result.is_uncertain == expected_tuple[0]
assert result.is_approximate == expected_tuple[1]
assert result.is_uncertain_and_approximate == expected_tuple[2]
Loading
Loading