Skip to content

Commit

Permalink
Merge branch 'main' into fix-eyecite-defendants
Browse files Browse the repository at this point in the history
  • Loading branch information
flooie authored Jan 22, 2025
2 parents b4f6ff2 + f2a0c4c commit 6184921
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 7 deletions.
22 changes: 15 additions & 7 deletions eyecite/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
POST_SHORT_CITATION_REGEX,
YEAR_REGEX,
)
from eyecite.utils import strip_punct

BACKWARD_SEEK = 28 # Median case name length in the CL db is 28 (2016-02-26)

Expand All @@ -41,17 +40,26 @@ def get_court_by_paren(paren_string: str) -> Optional[str]:
Does not work on SCOTUS, since that court lacks parentheticals, and
needs to be handled after disambiguation has been completed.
"""
court_str = strip_punct(paren_string)

# Remove whitespace and punctuation because citation strings sometimes lack
# internal spaces, e.g. "Pa.Super." or "SC" (South Carolina)
court_str = re.sub(r"[^\w]", "", paren_string).lower()

court_code = None
if court_str:
# Map the string to a court, if possible.
for court in courts:
# Use startswith because citations are often missing final period,
# e.g. "2d Cir"
if court["citation_string"].startswith(court_str):
s = re.sub(r"[^\w]", "", court["citation_string"]).lower()

# Check for an exact match first
if s == court_str:
return str(court["id"])

# If no exact match, try to record a startswith match for possible
# eventual return
if s.startswith(court_str):
court_code = court["id"]
break

return court_code

return court_code

Expand Down
19 changes: 19 additions & 0 deletions tests/test_FindTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,19 @@ def test_find_citations(self):
'defendant': 'test',
'court': 'ca4',
'pin_cite': '347-348'})]),
# Test with court string without space
('bob lissner v. test 1 U.S. 12, 347-348 (Pa.Super. 1982)',
[case_citation(page='12', year=1982,
metadata={'plaintiff': 'lissner',
'defendant': 'test',
'court': 'pasuperct',
'pin_cite': '347-348'})]),
# Test with court string exact match
('Commonwealth v. Muniz, 164 A.3d 1189 (Pa. 2017)',
[case_citation(page='1189', reporter='A.3d', volume='164', year=2017,
metadata={'plaintiff': 'Commonwealth',
'defendant': 'Muniz',
'court': 'pa'})]),
# Parallel cite with parenthetical
('bob lissner v. test 1 U.S. 12, 347-348, 1 S. Ct. 2, 358 (4th Cir. 1982) (overruling foo)',
[case_citation(page='12', year=1982,
Expand Down Expand Up @@ -553,6 +566,12 @@ def test_find_citations(self):
# Long pin cite -- make sure no catastrophic backtracking in regex
('1 U.S. 1, 2277, 2278, 2279, 2280, 2281, 2282, 2283, 2284, 2286, 2287, 2288, 2289, 2290, 2291',
[case_citation(metadata={'pin_cite': '2277, 2278, 2279, 2280, 2281, 2282, 2283, 2284, 2286, 2287, 2288, 2289, 2290, 2291'})]),
('Commonwealth v. Muniz, 164 A.3d 1189 (Pa. 2017)', [
case_citation(volume='164', reporter='A.3d', year=2017,
page='1189',
metadata={'plaintiff': 'Commonwealth', 'defendant': 'Muniz',
'court': 'pa'})]),
('Foo v. Bar, 1 F.Supp. 1 (SC 1967)', [case_citation(volume='1', reporter='F.Supp.', year=1967, page='1', metadata={'plaintiff': 'Foo', 'defendant': 'Bar', 'court': 'sc'})]),
)
# fmt: on
self.run_test_pairs(test_pairs, "Citation extraction")
Expand Down

0 comments on commit 6184921

Please sign in to comment.