From 94e2cd05e99d6644c591376e3c89d81404dfc5a6 Mon Sep 17 00:00:00 2001 From: ttys0dev <126845556+ttys0dev@users.noreply.github.com> Date: Tue, 5 Sep 2023 22:34:36 -0600 Subject: [PATCH] Parse attachments from docket when available --- juriscraper/pacer/docket_report.py | 143 ++++++ juriscraper/pacer/reports.py | 11 +- juriscraper/pacer/utils.py | 406 +++++++++++++++++- .../pacer/dockets/district/cand_7.json | 42 +- .../pacer/dockets/district/dcd_3.json | 92 +++- tests/local/test_PacerUtilTest.py | 32 +- 6 files changed, 711 insertions(+), 15 deletions(-) diff --git a/juriscraper/pacer/docket_report.py b/juriscraper/pacer/docket_report.py index 6b8f2d348..49bd2517e 100644 --- a/juriscraper/pacer/docket_report.py +++ b/juriscraper/pacer/docket_report.py @@ -992,6 +992,142 @@ def _get_docket_entry_rows(self) -> List[HtmlElement]: ) return docket_entry_all_rows + def _get_attachment_number(self, row): + """Return the attachment number for an item. + + In district courts, this can be easily extracted. In bankruptcy courts, + you must extract it, then subtract 1 from the value since these are + tallied and include the main document. + """ + number = int(row.xpath(".//td/text()")[0].strip()) + if self.is_bankruptcy: + return number - 1 + return number + + def _get_description_from_tr(self, row): + """Get the description from the row""" + if not self.is_bankruptcy: + index = 2 + # Some NEFs attachment pages for some courts have an extra column + # (see nyed_123019137279), use index 3 to get the description + columns_in_row = row.xpath(f"./td") + if len(columns_in_row) == 5: + index = 3 + else: + index = 3 + + description_text_nodes = row.xpath(f"./td[{index}]//text()") + if not description_text_nodes: + # No text in the cell. + return "" + description = description_text_nodes[0].strip() + return force_unicode(description) + + @staticmethod + def _get_page_count_from_tr(tr): + """Take a row from the attachment table and return the page count as an + int extracted from the cell specified by index. + """ + pg_cnt_str_nodes = tr.xpath('./td[contains(., "page")]/text()') + if not pg_cnt_str_nodes: + # It's a restricted document without page count information. + return None + + for pg_cnt_str_node in pg_cnt_str_nodes: + try: + pg_cnt_str = pg_cnt_str_node.strip() + return int(pg_cnt_str.split()[0]) + except ValueError: + # Happens when the description field contains the + # word "page" and gets caught by the xpath. Just + # press on. + continue + + @staticmethod + def _get_file_size_str_from_tr(tr): + """Take a row from the attachment table and return the number of bytes + as an int. + """ + cells = tr.xpath("./td") + last_cell_contents = cells[-1].text_content() + units = ["kb", "mb"] + if any(unit in last_cell_contents.lower() for unit in units): + return last_cell_contents.strip() + return "" + + def _get_pacer_doc_id(self, row): + """Take in a row from the attachment table and return the pacer_doc_id + for the item in that row. Return None if the ID cannot be found. + """ + try: + input = row.xpath(".//input")[0] + except IndexError: + # Item exists, but cannot download document. Perhaps it's sealed + # or otherwise unavailable in PACER. This is carried over from the + # docket report and may not be needed here, but it's a good + # precaution. + return None + else: + value = input.xpath("./@value")[0] + pacer_doc_suffix = value.split("-")[0] + return self.doc_id_prefix + "0" + pacer_doc_suffix + + @staticmethod + def _get_pacer_seq_no_from_tr(row): + """Take a row of the attachment page, and return the sequence number + from the goDLS function. + """ + try: + input = row.xpath(".//input")[0] + except IndexError: + # No link in the row. Maybe its sealed. + pass + else: + try: + name = input.xpath("./@name")[0] + except IndexError: + # No onclick on this row. + pass + else: + return name.split("_")[2] + + return None + + def _get_attachments(self, cells): + rows = cells.xpath("./table//tr") + + result = [] + for row in rows: + result.append( + { + "attachment_number": self._get_attachment_number(row), + "description": self._get_description_from_tr(row), + "page_count": self._get_page_count_from_tr(row), + "file_size_str": self._get_file_size_str_from_tr(row), + "pacer_doc_id": self._get_pacer_doc_id(row), + # It may not be needed to reparse the seq_no + # for each row, but we may as well. So far, it + # has always been the same as the main document. + "pacer_seq_no": self._get_pacer_seq_no_from_tr(row), + } + ) + return result + + @staticmethod + def _merge_de_with_attachment(de, attachment): + if de["pacer_doc_id"] != attachment["pacer_doc_id"]: + raise ValueError( + f"docket entry doc_id {de['pacer_doc_id']} does not match " + f"attachment 0 doc_id {attachment['pacer_doc_id']}" + ) + if de["pacer_seq_no"] != attachment["pacer_seq_no"]: + raise ValueError( + f"docket entry seq_no {de['pacer_seq_no']} does not match " + f"attachment 0 seq_no {attachment['pacer_seq_no']}" + ) + de["file_size_str"] = attachment["file_size_str"] + de["page_count"] = attachment["page_count"] + @property def docket_entries(self): if self._docket_entries is not None: @@ -1037,6 +1173,13 @@ def docket_entries(self): date_filed_str = force_unicode(cells[0].text_content()) if not date_filed_str.strip(): + if view_multiple_documents and len(cells) >= 3: + last_de = docket_entries[-1] + attachments = self._get_attachments(cells[2]) + if attachments[0]["attachment_number"] == 0: + de_attachment = attachments.pop(0) + self._merge_de_with_attachment(last_de, de_attachment) + last_de["attachments"] = attachments # Some older dockets have missing dates. Press on. continue de["date_filed"] = convert_date_string(date_filed_str) diff --git a/juriscraper/pacer/reports.py b/juriscraper/pacer/reports.py index 7db853042..fdba70e28 100644 --- a/juriscraper/pacer/reports.py +++ b/juriscraper/pacer/reports.py @@ -16,7 +16,12 @@ strip_bad_html_tags_insecure, ) from ..lib.log_tools import make_default_logger -from .utils import is_pdf, make_doc1_url, make_docs1_url +from .utils import ( + get_doc_id_prefix_from_court_id, + is_pdf, + make_doc1_url, + make_docs1_url, +) logger = make_default_logger() @@ -54,6 +59,10 @@ def __init__(self, court_id, pacer_session=None): self.response = None self.is_valid = None + @property + def doc_id_prefix(self): + return get_doc_id_prefix_from_court_id(self.court_id) + @property def url(self): if self.court_id == "psc": diff --git a/juriscraper/pacer/utils.py b/juriscraper/pacer/utils.py index 11cfc65aa..be275d544 100644 --- a/juriscraper/pacer/utils.py +++ b/juriscraper/pacer/utils.py @@ -11,6 +11,394 @@ from ..lib.exceptions import ParsingException +def get_court_id_from_doc_id_prefix(prefix): + prefix_to_cid_map = { + "016": "almb", + "017": "almd", + "018": "alnb", + "019": "alnd", + "020": "alsb", + "021": "alsd", + "022": "akb", + "023": "akd", + "024": "arb", + "025": "azd", + "026": "areb", + "027": "ared", + "028": "arwb", + "029": "arwd", + "031": "cacd", + "032": "caeb", + "033": "caed", + "034": "canb", + "035": "cand", + "036": "casb", + "037": "casd", + "038": "cob", + "039": "cod", + "040": "ctb", + "041": "ctd", + "042": "deb", + "043": "ded", + "044": "dcb", + "045": "dcd", + "046": "flmb", + "047": "flmd", + "048": "flnb", + "049": "flnd", + "050": "flsb", + "051": "flsd", + "052": "gamb", + "053": "gamd", + "054": "ganb", + "055": "gand", + "056": "gasb", + "057": "gasd", + "058": "gub", + "059": "gud", + "060": "hib", + "061": "hid", + "062": "idb", + "063": "idd", + "064": "ilcb", + "065": "ilcd", + "066": "ilnb", + "067": "ilnd", + "068": "ilsb", + "069": "ilsd", + "070": "innb", + "071": "innd", + "072": "insb", + "073": "insd", + "074": "ianb", + "075": "iand", + "076": "iasb", + "077": "iasd", + "078": "ksb", + "079": "ksd", + "080": "kyeb", + "081": "kyed", + "082": "kywb", + "083": "kywd", + "084": "laeb", + "085": "laed", + "086": "lamb", + "087": "lamd", + "088": "lawb", + "089": "lawd", + "090": "meb", + "091": "med", + "092": "mdb", + "093": "mdd", + "094": "mab", + "095": "mad", + "096": "mieb", + "097": "mied", + "098": "miwb", + "099": "miwd", + "100": "mnb", + "101": "mnd", + "102": "msnb", + "103": "msnd", + "104": "mssb", + "105": "mssd", + "106": "moeb", + "107": "moed", + "108": "mowb", + "109": "mowd", + "110": "mtb", + "111": "mtd", + "112": "nebraskab", + "113": "ned", + "114": "nvb", + "115": "nvd", + "116": "nhb", + "117": "nhd", + "118": "njb", + "119": "njd", + "120": "nmb", + "121": "nmd", + "122": "nyeb", + "123": "nyed", + "124": "nynb", + "125": "nynd", + "126": "nysb", + "127": "nysd", + "128": "nywb", + "129": "nywd", + "130": "nceb", + "131": "nced", + "132": "ncmb", + "133": "ncmd", + "134": "ncwb", + "135": "ncwd", + "136": "ndb", + "137": "ndd", + "138": "nmib", + "139": "nmid", + "140": "ohnb", + "141": "ohnd", + "142": "ohsb", + "143": "ohsd", + "144": "okeb", + "145": "oked", + "146": "oknb", + "147": "oknd", + "148": "okwb", + "149": "okwd", + "150": "orb", + "151": "ord", + "152": "paeb", + "153": "paed", + "154": "pamb", + "155": "pamd", + "156": "pawb", + "157": "pawd", + "158": "prb", + "159": "prd", + "160": "rib", + "161": "rid", + "162": "scb", + "163": "scd", + "164": "sdb", + "165": "sdd", + "166": "tneb", + "167": "tned", + "168": "tnmb", + "169": "tnmd", + "170": "tnwb", + "171": "tnwd", + "174": "txeb", + "175": "txed", + "176": "txnb", + "177": "txnd", + "178": "txsb", + "179": "txsd", + "180": "txwb", + "181": "txwd", + "182": "utb", + "183": "utd", + "184": "vtb", + "185": "vtd", + "188": "vaeb", + "189": "vaed", + "190": "vawb", + "191": "vawd", + "192": "vib", + "193": "vid", + "194": "waeb", + "195": "waed", + "196": "wawb", + "197": "wawd", + "198": "wvnb", + "199": "wvnd", + "200": "wvsb", + "201": "wvsd", + "202": "wieb", + "203": "wied", + "204": "wiwb", + "205": "wiwd", + "206": "wyb", + "207": "wyd", + "973": "cacb", + } + return prefix_to_cid_map[prefix] + + +def get_doc_id_prefix_from_court_id(court_id): + cid_to_prefix_map = { + "akb": "022", + "akd": "023", + "almb": "016", + "almd": "017", + "alnb": "018", + "alnd": "019", + "alsb": "020", + "alsd": "021", + "arb": "024", + "areb": "026", + "ared": "027", + "arwb": "028", + "arwd": "029", + "azd": "025", + "cacb": "973", + "cacd": "031", + "caeb": "032", + "caed": "033", + "canb": "034", + "cand": "035", + "casb": "036", + "casd": "037", + "cob": "038", + "cod": "039", + "ctb": "040", + "ctd": "041", + "dcb": "044", + "dcd": "045", + "deb": "042", + "ded": "043", + "flmb": "046", + "flmd": "047", + "flnb": "048", + "flnd": "049", + "flsb": "050", + "flsd": "051", + "gamb": "052", + "gamd": "053", + "ganb": "054", + "gand": "055", + "gasb": "056", + "gasd": "057", + "gub": "058", + "gud": "059", + "hib": "060", + "hid": "061", + "ianb": "074", + "iand": "075", + "iasb": "076", + "iasd": "077", + "idb": "062", + "idd": "063", + "ilcb": "064", + "ilcd": "065", + "ilnb": "066", + "ilnd": "067", + "ilsb": "068", + "ilsd": "069", + "innb": "070", + "innd": "071", + "insb": "072", + "insd": "073", + "ksb": "078", + "ksd": "079", + "kyeb": "080", + "kyed": "081", + "kywb": "082", + "kywd": "083", + "laeb": "084", + "laed": "085", + "lamb": "086", + "lamd": "087", + "lawb": "088", + "lawd": "089", + "mab": "094", + "mad": "095", + "mdb": "092", + "mdd": "093", + "meb": "090", + "med": "091", + "mieb": "096", + "mied": "097", + "miwb": "098", + "miwd": "099", + "mnb": "100", + "mnd": "101", + "moeb": "106", + "moed": "107", + "mowb": "108", + "mowd": "109", + "msnb": "102", + "msnd": "103", + "mssb": "104", + "mssd": "105", + "mtb": "110", + "mtd": "111", + "nceb": "130", + "nced": "131", + "ncmb": "132", + "ncmd": "133", + "ncwb": "134", + "ncwd": "135", + "ndb": "136", + "ndd": "137", + "nebraskab": "112", + "ned": "113", + "nhb": "116", + "nhd": "117", + "njb": "118", + "njd": "119", + "nmb": "120", + "nmd": "121", + "nmib": "138", + "nmid": "139", + "nvb": "114", + "nvd": "115", + "nyeb": "122", + "nyed": "123", + "nynb": "124", + "nynd": "125", + "nysb": "126", + "nysd": "127", + "nywb": "128", + "nywd": "129", + "ohnb": "140", + "ohnd": "141", + "ohsb": "142", + "ohsd": "143", + "okeb": "144", + "oked": "145", + "oknb": "146", + "oknd": "147", + "okwb": "148", + "okwd": "149", + "orb": "150", + "ord": "151", + "paeb": "152", + "paed": "153", + "pamb": "154", + "pamd": "155", + "pawb": "156", + "pawd": "157", + "prb": "158", + "prd": "159", + "rib": "160", + "rid": "161", + "scb": "162", + "scd": "163", + "sdb": "164", + "sdd": "165", + "tneb": "166", + "tned": "167", + "tnmb": "168", + "tnmd": "169", + "tnwb": "170", + "tnwd": "171", + "txeb": "174", + "txed": "175", + "txnb": "176", + "txnd": "177", + "txsb": "178", + "txsd": "179", + "txwb": "180", + "txwd": "181", + "utb": "182", + "utd": "183", + "vaeb": "188", + "vaed": "189", + "vawb": "190", + "vawd": "191", + "vib": "192", + "vid": "193", + "vtb": "184", + "vtd": "185", + "waeb": "194", + "waed": "195", + "wawb": "196", + "wawd": "197", + "wieb": "202", + "wied": "203", + "wiwb": "204", + "wiwd": "205", + "wvnb": "198", + "wvnd": "199", + "wvsb": "200", + "wvsd": "201", + "wyb": "206", + "wyd": "207", + } + return cid_to_prefix_map[court_id] + + def get_pacer_court_info(): r = requests.get("https://court-version-scraper.fly.dev/courts.json") return r.json() @@ -186,11 +574,19 @@ def make_doc1_url(court_id, pacer_doc_id, skip_attachment_page): if skip_attachment_page and pacer_doc_id[3] == "0": # If the fourth digit is a 0, replace it with a 1 pacer_doc_id = f"{pacer_doc_id[:3]}1{pacer_doc_id[4:]}" + doc_id_cid = get_court_id_from_doc_id_prefix(pacer_doc_id[:3]) + if court_id is None: + court_id = doc_id_cid + elif court_id != doc_id_cid: + raise ValueError( + f"pacer_doc_id {pacer_doc_id} prefix has court_id {doc_id_cid}, " + f"expected {court_id}" + ) return f"https://ecf.{court_id}.uscourts.gov/doc1/{pacer_doc_id}" def make_docs1_url( - court_id: str, pacer_doc_id: str, skip_attachment_page + court_id: Optional[str], pacer_doc_id: str, skip_attachment_page ) -> str: """Make a docs1 URL for NDAs free look downloads. @@ -200,6 +596,14 @@ def make_docs1_url( if skip_attachment_page and pacer_doc_id[3] == "0": # If the fourth digit is a 0, replace it with a 1 pacer_doc_id = f"{pacer_doc_id[:3]}1{pacer_doc_id[4:]}" + doc_id_cid = get_court_id_from_doc_id_prefix(pacer_doc_id[:3]) + if court_id is None: + court_id = doc_id_cid + elif court_id != doc_id_cid: + raise ValueError( + f"pacer_doc_id {pacer_doc_id} prefix has court_id {doc_id_cid}, " + f"expected {court_id}" + ) return f"https://ecf.{court_id}.uscourts.gov/docs1/{pacer_doc_id}" diff --git a/tests/examples/pacer/dockets/district/cand_7.json b/tests/examples/pacer/dockets/district/cand_7.json index 5ca6af69c..cb7bf8345 100644 --- a/tests/examples/pacer/dockets/district/cand_7.json +++ b/tests/examples/pacer/dockets/district/cand_7.json @@ -10,12 +10,24 @@ "demand": "$5,000,000,000", "docket_entries": [ { + "attachments": [ + { + "attachment_number": 1, + "description": "Civil Cover Sheet", + "file_size_str": "248.8 KB", + "pacer_doc_id": "035023513539", + "pacer_seq_no": "10", + "page_count": 2 + } + ], "date_entered": "2023-09-05", "date_filed": "2023-09-05", "description": "COMPLAINT against Microsoft Corporation, OpenAI GP, OpenAI Incorporated, OpenAI LP, OpenAI Startup Fund GP I, LLC, OpenAI Startup Fund I, LP, OpenAI Startup Fund Mangement, LLC ( Filing fee $ 402, receipt number ACANDC-18615816.). Filed byJ. H., A. T.. (Attachments: # 1 Civil Cover Sheet)(Ram, Michael) (Filed on 9/5/2023) (Entered: 09/05/2023)", "document_number": "1", + "file_size_str": "1.0 MB", "pacer_doc_id": "035023513538", - "pacer_seq_no": "10" + "pacer_seq_no": "10", + "page_count": 121 }, { "date_entered": "2023-09-06", @@ -34,20 +46,44 @@ "pacer_seq_no": "17" }, { + "attachments": [ + { + "attachment_number": 1, + "description": "Exhibit Certificate of Good Standing", + "file_size_str": "4.6 MB", + "pacer_doc_id": "035023515599", + "pacer_seq_no": "19", + "page_count": 1 + } + ], "date_entered": "2023-09-06", "date_filed": "2023-09-06", "description": "MOTION for leave to appear in Pro Hac Vice ( Filing fee $ 317, receipt number ACANDC-18617738.) filed by J. H., A. T.. (Attachments: # 1 Exhibit Certificate of Good Standing)(Yanchunis, John) (Filed on 9/6/2023) (Entered: 09/06/2023)", "document_number": "4", + "file_size_str": "88.4 KB", "pacer_doc_id": "035023515598", - "pacer_seq_no": "19" + "pacer_seq_no": "19", + "page_count": 2 }, { + "attachments": [ + { + "attachment_number": 1, + "description": "Exhibit Certificate of Good Standing", + "file_size_str": "644.5 KB", + "pacer_doc_id": "035023515656", + "pacer_seq_no": "23", + "page_count": 1 + } + ], "date_entered": "2023-09-06", "date_filed": "2023-09-06", "description": "MOTION for leave to appear in Pro Hac Vice ( Filing fee $ 317, receipt number ACANDC-18617813.) filed by J. H., A. T.. (Attachments: # 1 Exhibit Certificate of Good Standing)(McGee, Ryan) (Filed on 9/6/2023) (Entered: 09/06/2023)", "document_number": "5", + "file_size_str": "88.4 KB", "pacer_doc_id": "035023515655", - "pacer_seq_no": "23" + "pacer_seq_no": "23", + "page_count": 2 }, { "date_entered": "2023-09-06", diff --git a/tests/examples/pacer/dockets/district/dcd_3.json b/tests/examples/pacer/dockets/district/dcd_3.json index 7b41e4789..adf610e61 100644 --- a/tests/examples/pacer/dockets/district/dcd_3.json +++ b/tests/examples/pacer/dockets/district/dcd_3.json @@ -10,12 +10,32 @@ "demand": "", "docket_entries": [ { + "attachments": [ + { + "attachment_number": 1, + "description": "Civil Cover Sheet", + "file_size_str": "37.6 KB", + "pacer_doc_id": "04508117527", + "pacer_seq_no": "15", + "page_count": 2 + }, + { + "attachment_number": 2, + "description": "Summons", + "file_size_str": "73.8 KB", + "pacer_doc_id": "04508117528", + "pacer_seq_no": "15", + "page_count": 2 + } + ], "date_entered": "2020-10-20", "date_filed": "2020-10-20", "description": "COMPLAINT against GOOGLE LLC filed by UNITED STATES OF AMERICA. (Attachments: # 1 Civil Cover Sheet, # 2 Summons)(ztnr) (Entered: 10/20/2020)", "document_number": "1", + "file_size_str": "1.0 MB", "pacer_doc_id": "04508117526", - "pacer_seq_no": "15" + "pacer_seq_no": "15", + "page_count": 64 }, { "date_entered": "2021-08-19", @@ -58,12 +78,80 @@ "pacer_seq_no": "2039" }, { + "attachments": [ + { + "attachment_number": 1, + "description": "Memorandum in Support A", + "file_size_str": "621.7 KB", + "pacer_doc_id": "04509920630", + "pacer_seq_no": "2042", + "page_count": 21 + }, + { + "attachment_number": 2, + "description": "Exhibit B", + "file_size_str": "80.6 KB", + "pacer_doc_id": "04509920631", + "pacer_seq_no": "2042", + "page_count": 1 + }, + { + "attachment_number": 3, + "description": "Exhibit C", + "file_size_str": "80.0 KB", + "pacer_doc_id": "04509920632", + "pacer_seq_no": "2042", + "page_count": 1 + }, + { + "attachment_number": 4, + "description": "Exhibit D", + "file_size_str": "80.0 KB", + "pacer_doc_id": "04509920633", + "pacer_seq_no": "2042", + "page_count": 1 + }, + { + "attachment_number": 5, + "description": "Exhibit E", + "file_size_str": "79.7 KB", + "pacer_doc_id": "04509920634", + "pacer_seq_no": "2042", + "page_count": 1 + }, + { + "attachment_number": 6, + "description": "Exhibit F", + "file_size_str": "80.0 KB", + "pacer_doc_id": "04509920635", + "pacer_seq_no": "2042", + "page_count": 1 + }, + { + "attachment_number": 7, + "description": "Exhibit G", + "file_size_str": "52.9 KB", + "pacer_doc_id": "04509920636", + "pacer_seq_no": "2042", + "page_count": 1 + }, + { + "attachment_number": 8, + "description": "Certificate of Service", + "file_size_str": "312.2 KB", + "pacer_doc_id": "04509920637", + "pacer_seq_no": "2042", + "page_count": 1 + } + ], "date_entered": "2023-05-09", "date_filed": "2023-05-09", "description": "REDACTED DOCUMENT- Plaintiff States' Motion for Leave to File a Supplemental Response to Certain Questions of the Court at Oral Argument to 584 Sealed Document, by STATE OF COLORADO. (Attachments: # 1 Memorandum in Support A, # 2 Exhibit B, # 3 Exhibit C, # 4 Exhibit D, # 5 Exhibit E, # 6 Exhibit F, # 7 Exhibit G, # 8 Certificate of Service)(Sallet, Jonathan) (Entered: 05/09/2023)", "document_number": "590", + "file_size_str": "310.6 KB", "pacer_doc_id": "04509920629", - "pacer_seq_no": "2042" + "pacer_seq_no": "2042", + "page_count": 16 } ], "docket_number": "1:20-cv-03010", diff --git a/tests/local/test_PacerUtilTest.py b/tests/local/test_PacerUtilTest.py index 614000216..c073f64fa 100644 --- a/tests/local/test_PacerUtilTest.py +++ b/tests/local/test_PacerUtilTest.py @@ -133,20 +133,36 @@ def test_make_doc1_url(self): """Can we make good doc1 urls?""" qa_pairs = ( ( - ("cand", "01712427473", False), - "https://ecf.cand.uscourts.gov/doc1/01712427473", + ("almd", "01712427473", False), + "https://ecf.almd.uscourts.gov/doc1/01712427473", ), ( - ("cand", "01702427473", False), - "https://ecf.cand.uscourts.gov/doc1/01702427473", + ("almd", "01702427473", False), + "https://ecf.almd.uscourts.gov/doc1/01702427473", ), ( - ("cand", "01712427473", True), - "https://ecf.cand.uscourts.gov/doc1/01712427473", + ("almd", "01712427473", True), + "https://ecf.almd.uscourts.gov/doc1/01712427473", ), ( - ("cand", "01702427473", True), - "https://ecf.cand.uscourts.gov/doc1/01712427473", + ("almd", "01702427473", True), + "https://ecf.almd.uscourts.gov/doc1/01712427473", + ), + ( + (None, "01712427473", False), + "https://ecf.almd.uscourts.gov/doc1/01712427473", + ), + ( + (None, "01702427473", False), + "https://ecf.almd.uscourts.gov/doc1/01702427473", + ), + ( + (None, "01712427473", True), + "https://ecf.almd.uscourts.gov/doc1/01712427473", + ), + ( + (None, "01702427473", True), + "https://ecf.almd.uscourts.gov/doc1/01712427473", ), ) for q, a in qa_pairs: