Skip to content

Commit

Permalink
Merge branch 'main' into fix_idaho
Browse files Browse the repository at this point in the history
  • Loading branch information
flooie authored Dec 31, 2024
2 parents f25ff6e + 675c7d7 commit 68b33be
Show file tree
Hide file tree
Showing 11 changed files with 670 additions and 548 deletions.
58 changes: 24 additions & 34 deletions juriscraper/opinions/united_states/federal_appellate/cadc.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,31 @@
import time
from datetime import date
"""
CourtID: cadc
Court Short Name: Court of Appeals of the District of Columbia
Author: mlissner
History:
2014-07-31, mlissner: commited first version
2024-12-31, grossir: Implemented new site
"""

from lxml import html
from juriscraper.OpinionSiteLinear import OpinionSiteLinear

from juriscraper.OpinionSite import OpinionSite


class Site(OpinionSite):
class Site(OpinionSiteLinear):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.url = "https://www.cadc.uscourts.gov/internet/opinions.nsf/uscadcopinions.xml"
self.court_id = self.__module__

def _get_case_names(self):
return [e for e in self.html.xpath("//item/description/text()")]

def _get_download_urls(self):
return [
html.tostring(e, method="text").decode()
for e in self.html.xpath("//item/link")
]

def _get_case_dates(self):
dates = []
for date_string in self.html.xpath("//item/pubdate/text()"):
date_only = " ".join(date_string.split(" ")[1:4])
dates.append(
date.fromtimestamp(
time.mktime(time.strptime(date_only, "%d %b %Y"))
)
# https://media.cadc.uscourts.gov/opinions/
self.url = "https://media.cadc.uscourts.gov/opinions/bydate/recent"
self.status = "Published"

def _process_html(self):
link_xpath = "a[contains(@href, '.pdf')]"
for row in self.html.xpath(f"//div[div[div[div[{link_xpath}]]]]"):
self.cases.append(
{
"url": row.xpath(f".//{link_xpath}/@href")[0],
"docket": row.xpath(f".//{link_xpath}/text()")[0],
"name": row.xpath("div[2]/div/div/text()")[0],
"date": row.xpath(".//span/text()")[-1],
}
)
return dates

def _get_docket_numbers(self):
return [
e.split("|")[0] for e in self.html.xpath("//item/title/text()")
]

def _get_precedential_statuses(self):
return ["Published" for _ in range(0, len(self.case_names))]
36 changes: 6 additions & 30 deletions juriscraper/opinions/united_states/federal_appellate/cadc_pi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,15 @@
Author: flooie
History:
2021-12-18: Created by flooie
2023-01-12: Fixed requests.exceptions.InvalidURL error by grossir
2023-01-12: Fixed requests.exceptions.InvalidURL error, by grossir
2024-12-31: Implemented new site, by grossir
"""

from urllib.parse import urljoin
from juriscraper.opinions.united_states.federal_appellate import cadc

from juriscraper.OpinionSiteLinear import OpinionSiteLinear


class Site(OpinionSiteLinear):
class Site(cadc.Site):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.url = "https://www.cadc.uscourts.gov/internet/orders.nsf"
self.base = "https://www.cadc.uscourts.gov"
self.court_id = self.__module__

def _process_html(self) -> None:
"""Iterate over the public interest cases.
:return: None
"""
for row in self.html.xpath(".//div[@class='row-entry']"):
url = row.xpath(".//a/@href")[0]
docket = row.xpath(".//a/span/text()")[0]
name = row.xpath(".//div[@class='column-two']/div[1]/text()")[
0
].strip()
date = row.xpath(".//date/text()")[0]
self.cases.append(
{
"date": date,
"url": urljoin("https:", url),
"docket": docket,
"name": name,
"status": "Published",
}
)
# https://media.cadc.uscourts.gov/orders/
self.url = "https://media.cadc.uscourts.gov/orders/bydate/recent"
22 changes: 3 additions & 19 deletions juriscraper/opinions/united_states/federal_appellate/cadc_u.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,9 @@
import re

from juriscraper.opinions.united_states.federal_appellate import cadc


class Site(cadc.Site):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.url = "https://www.cadc.uscourts.gov/internet/judgments.nsf/uscadcjudgments.xml"
self.court_id = self.__module__

def _get_case_names(self):
return [
e.split(", ", 1)[1]
for e in self.html.xpath("//item/description/text()")
]

def _get_docket_numbers(self):
return [
re.search(r"\d{2}-\d+", e).group(0)
for e in self.html.xpath("//item/title/text()")
]

def _get_precedential_statuses(self):
return ["Unpublished" for _ in range(0, len(self.case_names))]
# https://media.cadc.uscourts.gov/judgments/
self.url = "https://media.cadc.uscourts.gov/judgments/bydate/recent"
self.status = "Unpublished"
134 changes: 42 additions & 92 deletions tests/examples/opinions/united_states/cadc_example.compare.json
Original file line number Diff line number Diff line change
@@ -1,152 +1,102 @@
[
{
"case_dates": "2012-02-24",
"case_names": "United States v. Russel Washington",
"download_urls": "http://www.cadc.uscourts.gov/internet/opinions.nsf/0/416EE5E6C04FE4D8852579AE005350C9/$file/11-3020-1360191.pdf",
"case_dates": "2024-12-27",
"case_names": "United States v. Darrell Neely",
"download_urls": "/opinions/docs/2024/12/23-3166-2091593.pdf",
"precedential_statuses": "Published",
"blocked_statuses": false,
"date_filed_is_approximate": false,
"docket_numbers": "11-3020",
"docket_numbers": "23-3166",
"case_name_shorts": ""
},
{
"case_dates": "2012-02-24",
"case_names": "ATK Launch Systems, Inc. v. EPA",
"download_urls": "http://www.cadc.uscourts.gov/internet/opinions.nsf/0/183A9B86A0A9E393852579AE005350AF/$file/10-1004-1360185.pdf",
"case_dates": "2024-12-20",
"case_names": "United States v. James Little",
"download_urls": "/opinions/docs/2024/12/24-3011-2090711.pdf",
"precedential_statuses": "Published",
"blocked_statuses": false,
"date_filed_is_approximate": false,
"docket_numbers": "10-1004",
"docket_numbers": "24-3011",
"case_name_shorts": ""
},
{
"case_dates": "2012-02-23",
"case_names": "Thomas Gust v. United States",
"download_urls": "http://www.cadc.uscourts.gov/internet/opinions.nsf/0/9B806BC824324286852579AD00695D51/$file/11-5203-1360058.pdf",
"case_dates": "2024-12-20",
"case_names": "Timothy Jenkins v. Howard University",
"download_urls": "/opinions/docs/2024/12/23-7093-2090701.pdf",
"precedential_statuses": "Published",
"blocked_statuses": false,
"date_filed_is_approximate": false,
"docket_numbers": "11-5203",
"docket_numbers": "23-7093",
"case_name_shorts": ""
},
{
"case_dates": "2012-02-21",
"case_names": "Talal Al-Zahrani v. Esteban Rodriguez",
"download_urls": "http://www.cadc.uscourts.gov/internet/opinions.nsf/0/6F1C608D9D1D95B7852579AB0053A8EF/$file/10-5393-1359343.pdf",
"case_dates": "2024-12-20",
"case_names": "Stingray Pipeline Company, L.L.C. v. FERC",
"download_urls": "/opinions/docs/2024/12/23-1288-2090695.pdf",
"precedential_statuses": "Published",
"blocked_statuses": false,
"date_filed_is_approximate": false,
"docket_numbers": "10-5393",
"case_name_shorts": ""
},
{
"case_dates": "2012-02-21",
"case_names": "Paul Keohane v. United States",
"download_urls": "http://www.cadc.uscourts.gov/internet/opinions.nsf/0/423FB5CC30E2A1C9852579AB0053A920/$file/11-5127-1359354.pdf",
"precedential_statuses": "Published",
"blocked_statuses": false,
"date_filed_is_approximate": false,
"docket_numbers": "11-5127",
"case_name_shorts": ""
},
{
"case_dates": "2012-02-21",
"case_names": "Gulf Power Company v. FCC",
"download_urls": "http://www.cadc.uscourts.gov/internet/opinions.nsf/0/F49C232270AC0438852579AB0053A90F/$file/11-1215-1359348.pdf",
"precedential_statuses": "Published",
"blocked_statuses": false,
"date_filed_is_approximate": false,
"docket_numbers": "11-1215",
"case_name_shorts": ""
},
{
"case_dates": "2012-02-17",
"case_names": "Shahintaj Bakhtiar v. Islamic Republic of Iran",
"download_urls": "http://www.cadc.uscourts.gov/internet/opinions.nsf/0/7F41214DFEB7B444852579A70053DE13/$file/10-7030-1359077.pdf",
"precedential_statuses": "Published",
"blocked_statuses": false,
"date_filed_is_approximate": false,
"docket_numbers": "10-7030",
"case_name_shorts": ""
},
{
"case_dates": "2012-02-17",
"case_names": "Blue Ridge Env. Def. League v. NRC",
"download_urls": "http://www.cadc.uscourts.gov/internet/opinions.nsf/0/91C359F6C3D905CE852579A70053DDDD/$file/09-1112-1359064.pdf",
"precedential_statuses": "Published",
"blocked_statuses": false,
"date_filed_is_approximate": false,
"docket_numbers": "09-1112",
"case_name_shorts": "NRC"
"docket_numbers": "23-1288",
"case_name_shorts": "FERC"
},
{
"case_dates": "2012-02-17",
"case_names": "Allied Mechanical Services, In v. NLRB",
"download_urls": "http://www.cadc.uscourts.gov/internet/opinions.nsf/0/88AAFC3D5C2285ED852579A70053DE01/$file/10-1328-1359071.pdf",
"case_dates": "2024-12-20",
"case_names": "Environmental Defense Fund v. EPA",
"download_urls": "/opinions/docs/2024/12/23-1166-2090691.pdf",
"precedential_statuses": "Published",
"blocked_statuses": false,
"date_filed_is_approximate": false,
"docket_numbers": "10-1328",
"docket_numbers": "23-1166",
"case_name_shorts": ""
},
{
"case_dates": "2012-02-10",
"case_names": "United States v. Allen Murdock",
"download_urls": "http://www.cadc.uscourts.gov/internet/opinions.nsf/0/8017C676DBEC4DE1852579A000553F31/$file/11-3068-1357614.pdf",
"case_dates": "2024-12-20",
"case_names": "Aenergy, S.A. v. Republic of Angola",
"download_urls": "/opinions/docs/2024/12/23-7160-2090706.pdf",
"precedential_statuses": "Published",
"blocked_statuses": false,
"date_filed_is_approximate": false,
"docket_numbers": "11-3068",
"docket_numbers": "23-7160",
"case_name_shorts": ""
},
{
"case_dates": "2012-02-07",
"case_names": "Department of Treasury v. FLRA",
"download_urls": "http://www.cadc.uscourts.gov/internet/opinions.nsf/0/A0B4F3579755FF508525799D0054B02F/$file/11-1102.pdf",
"case_dates": "2024-12-17",
"case_names": "Vanda Pharmaceuticals, Inc. v. FDA",
"download_urls": "/opinions/docs/2024/12/23-5200-2089874.pdf",
"precedential_statuses": "Published",
"blocked_statuses": false,
"date_filed_is_approximate": false,
"docket_numbers": "11-1102",
"case_name_shorts": "FLRA"
"docket_numbers": "23-5200",
"case_name_shorts": "FDA"
},
{
"case_dates": "2012-02-07",
"case_names": "Brian Hall v. Kathleen Sebelius",
"download_urls": "http://www.cadc.uscourts.gov/internet/opinions.nsf/0/890596479218E0818525799D00548389/$file/11-5076-1356903.pdf",
"case_dates": "2024-12-17",
"case_names": "VTCU Corp. v. NLRB",
"download_urls": "/opinions/docs/2024/12/23-1281-2089868.pdf",
"precedential_statuses": "Published",
"blocked_statuses": false,
"date_filed_is_approximate": false,
"docket_numbers": "11-5076",
"docket_numbers": "23-1281",
"case_name_shorts": ""
},
{
"case_dates": "2012-02-07",
"case_names": "Braintree Electric Light Dept. v. FERC",
"download_urls": "http://www.cadc.uscourts.gov/internet/opinions.nsf/0/DB24885C8D655AFB8525799D00548367/$file/09-1231-1356885.pdf",
"precedential_statuses": "Published",
"blocked_statuses": false,
"date_filed_is_approximate": false,
"docket_numbers": "09-1231",
"case_name_shorts": "FERC"
},
{
"case_dates": "2012-02-07",
"case_names": "Abdul-Rahman Suleiman v. Barack Obama",
"download_urls": "http://www.cadc.uscourts.gov/internet/opinions.nsf/0/86507E518F36FEC08525799D0054D4AD/$file/10-5292.pdf",
"case_dates": "2024-12-13",
"case_names": "Acumen Capital Partners, LLC v. NLRB",
"download_urls": "/opinions/docs/2024/12/23-1237-2089411.pdf",
"precedential_statuses": "Published",
"blocked_statuses": false,
"date_filed_is_approximate": false,
"docket_numbers": "10-5292",
"docket_numbers": "23-1237",
"case_name_shorts": ""
},
{
"case_dates": "2012-02-07",
"case_names": "Abdul-Rahman Suleiman v. Barack Obama",
"download_urls": "http://www.cadc.uscourts.gov/internet/opinions.nsf/0/40F144AB87FDD8838525799D0054839C/$file/10-5292-1356892.pdf",
"case_dates": "2024-12-09",
"case_names": "United States v. Bryan Burwell",
"download_urls": "/opinions/docs/2024/12/16-3009-2088533.pdf",
"precedential_statuses": "Published",
"blocked_statuses": false,
"date_filed_is_approximate": false,
"docket_numbers": "10-5292",
"docket_numbers": "16-3009",
"case_name_shorts": ""
}
]
Loading

0 comments on commit 68b33be

Please sign in to comment.