Skip to content

Commit

Permalink
Merge pull request #1919 from lexcaliber/parFixes
Browse files Browse the repository at this point in the history
fix(parentheticals): Fix three bugs in parenthetical functionality
  • Loading branch information
mlissner authored Feb 18, 2022
2 parents f5fe8cc + 40d0603 commit c2c76f3
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 7 deletions.
4 changes: 2 additions & 2 deletions cl/citations/description_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from cl.search.models import OpinionCluster

_GERUND = re.compile(r"(?:\S+ing)", re.IGNORECASE)
_GERUND_THAT = re.compile(rf"{_GERUND} that", re.IGNORECASE)
_GERUND_THAT = re.compile(rf"{_GERUND.pattern} that", re.IGNORECASE)
_HOLDING = re.compile(
r"(?:holding|deciding|ruling|recognizing|concluding)", re.IGNORECASE
)
_HOLDING_THAT = re.compile(rf"{_HOLDING} that", re.IGNORECASE)
_HOLDING_THAT = re.compile(rf"{_HOLDING.pattern} that", re.IGNORECASE)

# Observation of thousands of parentheticals seems to indicate that the
# most useful ones are in the neighborhood of 20 words long.
Expand Down
9 changes: 8 additions & 1 deletion cl/citations/filter_parentheticals.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import re

_MODIFIABLE = r"(omissions?|quotations?|quotes?|headings?|(quotations? )?marks?|ellips.s|cites?|citations?|emphas.s|italics?|footnotes?|alterations?|punctuation|modifications?|brackets?|bracketed material|formatting)"
_MODIFIABLE = r"(omissions?|quotations?|quotes?|headings?|(quotations? )?marks|ellips.s|cites?|citations?|emphas.s|italics?|footnotes?|alterations?|punctuation|modifications?|brackets?|bracketed material|formatting)"
_MODIFABLE_TYPE = r"(internal|former|latter|first|second|third|fourth|fifth|last|some|further|certain|numbered|other|transcript)"
_FULL_MODIFIABLE = f"(({_MODIFABLE_TYPE} )?{_MODIFIABLE})"
_QUOTE_MODIFICATION = r"(added|provided|removed|adopted|(in )?(the )original|omitted|included|deleted|eliminated|altered|modified|supplied|ours|mine|changed|(in|by) \S+|by \S+ court)"
Expand All @@ -20,16 +20,19 @@
r".n banc", # en banc or in banc
# Scalia, J., dissenting; Roberts, C.J., concurring in the judgment, concurring in part, and dissenting in part
f"{_JUDGE_NAME}( {_FULL_OPINION_DESCRIPTOR})?([ ,]+(and )?{_FULL_OPINION_DESCRIPTOR})*",
f"{_JUDGE_NAME}.{{1,75}}",
# concurring in result
f"({_DOCUMENT_TYPES} )?{_FULL_OPINION_DESCRIPTOR}",
# opinion of Breyer, J.; opinion of Scalia and Alito, J.J.
f"{_DOCUMENT_TYPES} of {_JUDGE_NAME}",
# plurality opinion, supplemental order
f"{_OPINION_TYPES}( {_DOCUMENT_TYPES})?( {_OPINION_TYPE_MODIFICATION})?",
rf"({_DOCUMENT_TYPES} )?opinion.*",
r"dictum|dicta",
r"on rehearing|denying cert(iorari)?",
r"simplified|cleaned up|as amended",
r"same|similar|contra",
r"standard of review",
r"(and )?cases cited therein",
# No. 12-345
r"No. \d+.?\d+",
Expand All @@ -45,6 +48,8 @@
f"{_FULL_MODIFIABLE} and {_FULL_MODIFIABLE} {_QUOTE_MODIFICATION}",
f"{_FULL_MODIFIABLE} {_QUOTE_MODIFICATION}[;,] ?{_FULL_MODIFIABLE} {_QUOTE_MODIFICATION}",
f"({_MODIFABLE_TYPE} )?{_MODIFIABLE}, {_MODIFIABLE}, and {_MODIFIABLE} {_QUOTE_MODIFICATION}",
# Match any short parenthetical that looks like a modification (e.g. "citations and internal marks omitted, emphasis added")
rf"(?=.*{_MODIFIABLE}.*).{{1,75}}",
# citing Gonzales v. Raich, 123 U.S. 456 (2019). A tad over-inclusive but very helpful
f"{_REFERENTIAL} .*",
# 2nd Cir. 2019, Third Circuit 1993
Expand All @@ -55,6 +60,8 @@
r".{1,10} (Circuit|Cir.)",
# hereinafter, "Griffin II"
r"here(in)?after(,)? .+",
# Imbalanced parentheses (for when eyecite cuts off the parenthetical too soon) e.g. "holding Section 4(a"
r"^.{1,35}\([^\)]{1,35}$",
# Single-word parentheticals, e.g., 'TILA'
r"\S*",
]
Expand Down
13 changes: 12 additions & 1 deletion cl/citations/fixtures/opinions_matching_citations.json
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,17 @@
"model": "search.citation",
"pk": 6
},
{
"fields": {
"volume": 2,
"reporter": "S.Ct.",
"page": "2",
"type": 1,
"cluster": 4
},
"model": "search.citation",
"pk": 20
},
{
"fields": {
"volume": 2,
Expand Down Expand Up @@ -401,7 +412,7 @@
"date_modified": "2015-08-15T14:10:56.801Z",
"extracted_by_ocr": false,
"author": 2,
"plain_text": "my plain text secret word for queries",
"plain_text": "my plain text secret word for queries. Foo v. Bar, 1 U.S. 1, 4, 2 S.Ct. 2, 5 (2000) (holding something happened)",
"html": "",
"download_url": null,
"cluster": 8,
Expand Down
11 changes: 9 additions & 2 deletions cl/citations/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,19 @@ def find_citations_and_parentheticals_for_opinion_by_pks(

parentheticals = []
for _opinion, _citations in citation_resolutions.items():
# Currently, eyecite has a bug where parallel citations are
# detected individually. We avoid creating duplicate parentheticals
# because of that by keeping track of what we've seen so far.
parenthetical_texts = set()
for _cit in _citations:
# If the citation has a descriptive parenthetical, clean
# it up and store it as a Parenthetical
if (
par_text := _cit.metadata.parenthetical
) and is_parenthetical_descriptive(par_text):
(par_text := _cit.metadata.parenthetical)
and par_text not in parenthetical_texts
and is_parenthetical_descriptive(par_text)
):
parenthetical_texts.add(par_text)
clean = clean_parenthetical_text(par_text)
parentheticals.append(
Parenthetical(
Expand Down
17 changes: 17 additions & 0 deletions cl/citations/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,18 @@ def test_opinionscited_creation(self) -> None:
num_parentheticals,
)

def test_no_duplicate_parentheticals_from_parallel_cites(self) -> None:
remove_citations_from_imported_fixtures()
citing = Opinion.objects.get(pk=11)
cited = Opinion.objects.get(pk=7)
find_citations_and_parentheticals_for_opinion_by_pks.delay([11])
self.assertEqual(
Parenthetical.objects.filter(
describing_opinion=citing, described_opinion=cited
).count(),
1,
)


class CitationFeedTest(IndexedSolrTestCase):
def _tree_has_content(self, content, expected_count):
Expand Down Expand Up @@ -807,6 +819,7 @@ def test_is_not_descriptive(self):
"internal citations and quotations omitted",
"citations and internal ellipses omitted",
"quotation marks omitted; ellipses ours",
"headings and internal quotations omitted, emphasis and citations altered",
"plurality opinion",
"opinion of Breyer, J.",
"opinion of Mister Justice Black",
Expand All @@ -826,9 +839,12 @@ def test_is_not_descriptive(self):
"Sotomayor, J., statement respecting denial of certiorari",
"Roberts, C.J., concurring in part and dissenting in part",
"Friendly, J., concurring in the judgment, concurring in part, and dissenting in part",
"Scalia, J., specially concurring in the judgment on this issue",
"en banc",
"per curiam",
"same",
"standard of review",
"opinion of O'Connor, J., respecting the granting of an injunction",
"no",
"n. 3",
"No. 12-345",
Expand All @@ -853,6 +869,7 @@ def test_is_descriptive(self):
"accountant who gave lay opinion testimony might have qualified as expert",
"where plaintif's complaint alleges facts which, if proven, would entitle plaintiff to relief under the Eighth Amendment, dismissal of complaint was inappropriate",
"ruling that there is nothing either legal or illegal, only thinking makes it so",
"testing that the mere presence of the word quotation doesn't get a parenthetical filtered out if it's long enough",
"First Amendment",
"mislabeled product",
'"Look on my Works, ye Mighty, and despair"',
Expand Down
4 changes: 3 additions & 1 deletion cl/opinion_page/templates/view_opinion_summaries.html
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ <h2>

<div id="all-summaries">
<h3>{{ summaries_count|intcomma }} judge-written summar{{ summaries_count|pluralize:"y,ies" }} of this opinion from other cases.</h3>
<p>We looked through our complete collection of opinions and identified the following parenthetical summaries that describe this case:</p>
{% if summaries_count > 0 %}
<p>We looked through our complete collection of opinions and identified the following parenthetical summaries that describe this case:</p>
{% endif %}
<hr>
<ul>
{% for summary in summaries %}
Expand Down

0 comments on commit c2c76f3

Please sign in to comment.