Skip to content

Commit

Permalink
Merge pull request #1137 from ttys0dev/fix-ia-attachment-descriptions
Browse files Browse the repository at this point in the history
Fix IA attachment description keys
  • Loading branch information
mlissner authored Aug 28, 2024
2 parents e57f9b8 + e325eeb commit 357f4db
Show file tree
Hide file tree
Showing 13 changed files with 360 additions and 899 deletions.
20 changes: 8 additions & 12 deletions juriscraper/pacer/internet_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,9 @@ def docket_entries(self):

de_nodes = self.tree.xpath("//document_list/document")
docket_entries = []
prev_date_filed = None
for de_node in de_nodes:
de = {
"document_number": de_node.xpath("./@doc_num")[0],
"description": self._xpath_text_0(de_node, "./long_desc"),
"short_description": self._xpath_text_0(
de_node, "./short_desc"
),
"pacer_seq_no": self._xpath_text_0(
de_node, "./pacer_de_seq_num"
)
Expand All @@ -191,6 +186,12 @@ def docket_entries(self):
attachment_number = de_node.xpath("./@attachment_num")[0]
if attachment_number != "0":
de["attachment_number"] = attachment_number
de["description"] = self._xpath_text_0(de_node, "./short_desc")
else:
de["description"] = self._xpath_text_0(de_node, "./long_desc")
de["short_description"] = self._xpath_text_0(
de_node, "./short_desc"
)

date_filed_str = self._xpath_text_0(de_node, "./date_filed")
if date_filed_str:
Expand All @@ -200,15 +201,9 @@ def docket_entries(self):
except ValueError:
# Fails for dates like 0000-00-00
de["date_filed"] = None
else:
prev_date_filed = de["date_filed"]
else:
# No date found.
if de.get("attachment_number"):
# If it's an attachment, it probably lacks a date. Get it
# from the previously stored item.
de["date_filed"] = prev_date_filed
else:
if not de.get("attachment_number"):
# If not an attachment, it's probably an old docket entry,
# which sometimes lack dates. Press on.
continue
Expand All @@ -226,6 +221,7 @@ def docket_entries(self):
except IndexError:
continue
if last_de.get("document_number") == de["document_number"]:
del de["document_number"]
attachments = last_de.get("attachments", [])
attachments.append(de)
last_de["attachments"] = attachments
Expand Down
35 changes: 10 additions & 25 deletions tests/examples/pacer/dockets_internet_archive/almd_49523.json
Original file line number Diff line number Diff line change
Expand Up @@ -1149,12 +1149,9 @@
"attachments": [
{
"attachment_number": "1",
"date_filed": "2014-07-09",
"description": "",
"document_number": "625",
"description": "Advertisement Certification Report and Notice of Forfeiture",
"pacer_doc_id": "01702313993",
"pacer_seq_no": null,
"short_description": "Advertisement Certification Report and Notice of Forfeiture"
"pacer_seq_no": null
}
],
"date_filed": "2014-07-09",
Expand All @@ -1168,12 +1165,9 @@
"attachments": [
{
"attachment_number": "1",
"date_filed": "2014-07-09",
"description": "",
"document_number": "626",
"description": "Exhibit A",
"pacer_doc_id": "01702314758",
"pacer_seq_no": null,
"short_description": "Exhibit A"
"pacer_seq_no": null
}
],
"date_filed": "2014-07-09",
Expand All @@ -1195,12 +1189,9 @@
"attachments": [
{
"attachment_number": "1",
"date_filed": "2014-07-09",
"description": "",
"document_number": "629",
"description": "Exhibit A",
"pacer_doc_id": "01702315696",
"pacer_seq_no": null,
"short_description": "Exhibit A"
"pacer_seq_no": null
}
],
"date_filed": "2014-07-09",
Expand Down Expand Up @@ -1238,21 +1229,15 @@
"attachments": [
{
"attachment_number": "1",
"date_filed": "2014-07-28",
"description": "",
"document_number": "635",
"description": "Stipulation of Final Settlement and Release of All Claims as to Third Party Pet",
"pacer_doc_id": "01702325954",
"pacer_seq_no": null,
"short_description": "Stipulation of Final Settlement and Release of All Claims as to Third Party Pet"
"pacer_seq_no": null
},
{
"attachment_number": "2",
"date_filed": "2014-07-28",
"description": "",
"document_number": "635",
"description": "Text of Proposed Order",
"pacer_doc_id": "01702325955",
"pacer_seq_no": null,
"short_description": "Text of Proposed Order"
"pacer_seq_no": null
}
],
"date_filed": "2014-07-28",
Expand Down
56 changes: 16 additions & 40 deletions tests/examples/pacer/dockets_internet_archive/azd_1061043.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,75 +13,51 @@
"attachments": [
{
"attachment_number": "1",
"date_filed": "2017-10-25",
"description": "",
"document_number": "1",
"description": "Civil Cover Sheet",
"pacer_doc_id": "025018027677",
"pacer_seq_no": null,
"short_description": "Civil Cover Sheet"
"pacer_seq_no": null
},
{
"attachment_number": "2",
"date_filed": "2017-10-25",
"description": "",
"document_number": "1",
"description": "Exhibit",
"pacer_doc_id": "025018027678",
"pacer_seq_no": null,
"short_description": "Exhibit"
"pacer_seq_no": null
},
{
"attachment_number": "3",
"date_filed": "2017-10-25",
"description": "",
"document_number": "1",
"description": "Exhibit",
"pacer_doc_id": "025018027679",
"pacer_seq_no": null,
"short_description": "Exhibit"
"pacer_seq_no": null
},
{
"attachment_number": "4",
"date_filed": "2017-10-25",
"description": "",
"document_number": "1",
"description": "Exhibit",
"pacer_doc_id": "025018027680",
"pacer_seq_no": null,
"short_description": "Exhibit"
"pacer_seq_no": null
},
{
"attachment_number": "5",
"date_filed": "2017-10-25",
"description": "",
"document_number": "1",
"description": "Exhibit",
"pacer_doc_id": "025018027681",
"pacer_seq_no": null,
"short_description": "Exhibit"
"pacer_seq_no": null
},
{
"attachment_number": "6",
"date_filed": "2017-10-25",
"description": "",
"document_number": "1",
"description": "Exhibit",
"pacer_doc_id": "025018027682",
"pacer_seq_no": null,
"short_description": "Exhibit"
"pacer_seq_no": null
},
{
"attachment_number": "7",
"date_filed": "2017-10-25",
"description": "",
"document_number": "1",
"description": "Exhibit",
"pacer_doc_id": "025018027683",
"pacer_seq_no": null,
"short_description": "Exhibit"
"pacer_seq_no": null
},
{
"attachment_number": "8",
"date_filed": "2017-10-25",
"description": "",
"document_number": "1",
"description": "Exhibit",
"pacer_doc_id": "025018027684",
"pacer_seq_no": null,
"short_description": "Exhibit"
"pacer_seq_no": null
}
],
"date_filed": "2017-10-25",
Expand Down
Loading

0 comments on commit 357f4db

Please sign in to comment.