From 94e2cd05e99d6644c591376e3c89d81404dfc5a6 Mon Sep 17 00:00:00 2001
From: ttys0dev <126845556+ttys0dev@users.noreply.github.com>
Date: Tue, 5 Sep 2023 22:34:36 -0600
Subject: [PATCH] Parse attachments from docket when available

---
 juriscraper/pacer/docket_report.py            | 143 ++++++
 juriscraper/pacer/reports.py                  |  11 +-
 juriscraper/pacer/utils.py                    | 406 +++++++++++++++++-
 .../pacer/dockets/district/cand_7.json        |  42 +-
 .../pacer/dockets/district/dcd_3.json         |  92 +++-
 tests/local/test_PacerUtilTest.py             |  32 +-
 6 files changed, 711 insertions(+), 15 deletions(-)

diff --git a/juriscraper/pacer/docket_report.py b/juriscraper/pacer/docket_report.py
index 6b8f2d348..49bd2517e 100644
--- a/juriscraper/pacer/docket_report.py
+++ b/juriscraper/pacer/docket_report.py
@@ -992,6 +992,142 @@ def _get_docket_entry_rows(self) -> List[HtmlElement]:
         )
         return docket_entry_all_rows
 
+    def _get_attachment_number(self, row):
+        """Return the attachment number for an item.
+
+        In district courts, this can be easily extracted. In bankruptcy courts,
+        you must extract it, then subtract 1 from the value since these are
+        tallied and include the main document.
+        """
+        number = int(row.xpath(".//td/text()")[0].strip())
+        if self.is_bankruptcy:
+            return number - 1
+        return number
+
+    def _get_description_from_tr(self, row):
+        """Get the description from the row"""
+        if not self.is_bankruptcy:
+            index = 2
+            # Some NEFs attachment pages for some courts have an extra column
+            # (see nyed_123019137279), use index 3 to get the description
+            columns_in_row = row.xpath(f"./td")
+            if len(columns_in_row) == 5:
+                index = 3
+        else:
+            index = 3
+
+        description_text_nodes = row.xpath(f"./td[{index}]//text()")
+        if not description_text_nodes:
+            # No text in the cell.
+            return ""
+        description = description_text_nodes[0].strip()
+        return force_unicode(description)
+
+    @staticmethod
+    def _get_page_count_from_tr(tr):
+        """Take a row from the attachment table and return the page count as an
+        int extracted from the cell specified by index.
+        """
+        pg_cnt_str_nodes = tr.xpath('./td[contains(., "page")]/text()')
+        if not pg_cnt_str_nodes:
+            # It's a restricted document without page count information.
+            return None
+
+        for pg_cnt_str_node in pg_cnt_str_nodes:
+            try:
+                pg_cnt_str = pg_cnt_str_node.strip()
+                return int(pg_cnt_str.split()[0])
+            except ValueError:
+                # Happens when the description field contains the
+                # word "page" and gets caught by the xpath. Just
+                # press on.
+                continue
+
+    @staticmethod
+    def _get_file_size_str_from_tr(tr):
+        """Take a row from the attachment table and return the number of bytes
+        as an int.
+        """
+        cells = tr.xpath("./td")
+        last_cell_contents = cells[-1].text_content()
+        units = ["kb", "mb"]
+        if any(unit in last_cell_contents.lower() for unit in units):
+            return last_cell_contents.strip()
+        return ""
+
+    def _get_pacer_doc_id(self, row):
+        """Take in a row from the attachment table and return the pacer_doc_id
+        for the item in that row. Return None if the ID cannot be found.
+        """
+        try:
+            input = row.xpath(".//input")[0]
+        except IndexError:
+            # Item exists, but cannot download document. Perhaps it's sealed
+            # or otherwise unavailable in PACER. This is carried over from the
+            # docket report and may not be needed here, but it's a good
+            # precaution.
+            return None
+        else:
+            value = input.xpath("./@value")[0]
+            pacer_doc_suffix = value.split("-")[0]
+            return self.doc_id_prefix + "0" + pacer_doc_suffix
+
+    @staticmethod
+    def _get_pacer_seq_no_from_tr(row):
+        """Take a row of the attachment page, and return the sequence number
+        from the goDLS function.
+        """
+        try:
+            input = row.xpath(".//input")[0]
+        except IndexError:
+            # No link in the row. Maybe its sealed.
+            pass
+        else:
+            try:
+                name = input.xpath("./@name")[0]
+            except IndexError:
+                # No onclick on this row.
+                pass
+            else:
+                return name.split("_")[2]
+
+        return None
+
+    def _get_attachments(self, cells):
+        rows = cells.xpath("./table//tr")
+
+        result = []
+        for row in rows:
+            result.append(
+                {
+                    "attachment_number": self._get_attachment_number(row),
+                    "description": self._get_description_from_tr(row),
+                    "page_count": self._get_page_count_from_tr(row),
+                    "file_size_str": self._get_file_size_str_from_tr(row),
+                    "pacer_doc_id": self._get_pacer_doc_id(row),
+                    # It may not be needed to reparse the seq_no
+                    # for each row, but we may as well. So far, it
+                    # has always been the same as the main document.
+                    "pacer_seq_no": self._get_pacer_seq_no_from_tr(row),
+                }
+            )
+        return result
+
+    @staticmethod
+    def _merge_de_with_attachment(de, attachment):
+        if de["pacer_doc_id"] != attachment["pacer_doc_id"]:
+            raise ValueError(
+                f"docket entry doc_id {de['pacer_doc_id']} does not match "
+                f"attachment 0 doc_id {attachment['pacer_doc_id']}"
+            )
+        if de["pacer_seq_no"] != attachment["pacer_seq_no"]:
+            raise ValueError(
+                f"docket entry seq_no {de['pacer_seq_no']} does not match "
+                f"attachment 0 seq_no {attachment['pacer_seq_no']}"
+            )
+        de["file_size_str"] = attachment["file_size_str"]
+        de["page_count"] = attachment["page_count"]
+
     @property
     def docket_entries(self):
         if self._docket_entries is not None:
@@ -1037,6 +1173,13 @@ def docket_entries(self):
 
             date_filed_str = force_unicode(cells[0].text_content())
             if not date_filed_str.strip():
+                if view_multiple_documents and len(cells) >= 3:
+                    last_de = docket_entries[-1]
+                    attachments = self._get_attachments(cells[2])
+                    if attachments[0]["attachment_number"] == 0:
+                        de_attachment = attachments.pop(0)
+                        self._merge_de_with_attachment(last_de, de_attachment)
+                    last_de["attachments"] = attachments
                 # Some older dockets have missing dates. Press on.
                 continue
             de["date_filed"] = convert_date_string(date_filed_str)
diff --git a/juriscraper/pacer/reports.py b/juriscraper/pacer/reports.py
index 7db853042..fdba70e28 100644
--- a/juriscraper/pacer/reports.py
+++ b/juriscraper/pacer/reports.py
@@ -16,7 +16,12 @@
     strip_bad_html_tags_insecure,
 )
 from ..lib.log_tools import make_default_logger
-from .utils import is_pdf, make_doc1_url, make_docs1_url
+from .utils import (
+    get_doc_id_prefix_from_court_id,
+    is_pdf,
+    make_doc1_url,
+    make_docs1_url,
+)
 
 logger = make_default_logger()
 
@@ -54,6 +59,10 @@ def __init__(self, court_id, pacer_session=None):
         self.response = None
         self.is_valid = None
 
+    @property
+    def doc_id_prefix(self):
+        return get_doc_id_prefix_from_court_id(self.court_id)
+
     @property
     def url(self):
         if self.court_id == "psc":
diff --git a/juriscraper/pacer/utils.py b/juriscraper/pacer/utils.py
index 11cfc65aa..be275d544 100644
--- a/juriscraper/pacer/utils.py
+++ b/juriscraper/pacer/utils.py
@@ -11,6 +11,394 @@
 from ..lib.exceptions import ParsingException
 
 
+def get_court_id_from_doc_id_prefix(prefix):
+    prefix_to_cid_map = {
+        "016": "almb",
+        "017": "almd",
+        "018": "alnb",
+        "019": "alnd",
+        "020": "alsb",
+        "021": "alsd",
+        "022": "akb",
+        "023": "akd",
+        "024": "arb",
+        "025": "azd",
+        "026": "areb",
+        "027": "ared",
+        "028": "arwb",
+        "029": "arwd",
+        "031": "cacd",
+        "032": "caeb",
+        "033": "caed",
+        "034": "canb",
+        "035": "cand",
+        "036": "casb",
+        "037": "casd",
+        "038": "cob",
+        "039": "cod",
+        "040": "ctb",
+        "041": "ctd",
+        "042": "deb",
+        "043": "ded",
+        "044": "dcb",
+        "045": "dcd",
+        "046": "flmb",
+        "047": "flmd",
+        "048": "flnb",
+        "049": "flnd",
+        "050": "flsb",
+        "051": "flsd",
+        "052": "gamb",
+        "053": "gamd",
+        "054": "ganb",
+        "055": "gand",
+        "056": "gasb",
+        "057": "gasd",
+        "058": "gub",
+        "059": "gud",
+        "060": "hib",
+        "061": "hid",
+        "062": "idb",
+        "063": "idd",
+        "064": "ilcb",
+        "065": "ilcd",
+        "066": "ilnb",
+        "067": "ilnd",
+        "068": "ilsb",
+        "069": "ilsd",
+        "070": "innb",
+        "071": "innd",
+        "072": "insb",
+        "073": "insd",
+        "074": "ianb",
+        "075": "iand",
+        "076": "iasb",
+        "077": "iasd",
+        "078": "ksb",
+        "079": "ksd",
+        "080": "kyeb",
+        "081": "kyed",
+        "082": "kywb",
+        "083": "kywd",
+        "084": "laeb",
+        "085": "laed",
+        "086": "lamb",
+        "087": "lamd",
+        "088": "lawb",
+        "089": "lawd",
+        "090": "meb",
+        "091": "med",
+        "092": "mdb",
+        "093": "mdd",
+        "094": "mab",
+        "095": "mad",
+        "096": "mieb",
+        "097": "mied",
+        "098": "miwb",
+        "099": "miwd",
+        "100": "mnb",
+        "101": "mnd",
+        "102": "msnb",
+        "103": "msnd",
+        "104": "mssb",
+        "105": "mssd",
+        "106": "moeb",
+        "107": "moed",
+        "108": "mowb",
+        "109": "mowd",
+        "110": "mtb",
+        "111": "mtd",
+        "112": "nebraskab",
+        "113": "ned",
+        "114": "nvb",
+        "115": "nvd",
+        "116": "nhb",
+        "117": "nhd",
+        "118": "njb",
+        "119": "njd",
+        "120": "nmb",
+        "121": "nmd",
+        "122": "nyeb",
+        "123": "nyed",
+        "124": "nynb",
+        "125": "nynd",
+        "126": "nysb",
+        "127": "nysd",
+        "128": "nywb",
+        "129": "nywd",
+        "130": "nceb",
+        "131": "nced",
+        "132": "ncmb",
+        "133": "ncmd",
+        "134": "ncwb",
+        "135": "ncwd",
+        "136": "ndb",
+        "137": "ndd",
+        "138": "nmib",
+        "139": "nmid",
+        "140": "ohnb",
+        "141": "ohnd",
+        "142": "ohsb",
+        "143": "ohsd",
+        "144": "okeb",
+        "145": "oked",
+        "146": "oknb",
+        "147": "oknd",
+        "148": "okwb",
+        "149": "okwd",
+        "150": "orb",
+        "151": "ord",
+        "152": "paeb",
+        "153": "paed",
+        "154": "pamb",
+        "155": "pamd",
+        "156": "pawb",
+        "157": "pawd",
+        "158": "prb",
+        "159": "prd",
+        "160": "rib",
+        "161": "rid",
+        "162": "scb",
+        "163": "scd",
+        "164": "sdb",
+        "165": "sdd",
+        "166": "tneb",
+        "167": "tned",
+        "168": "tnmb",
+        "169": "tnmd",
+        "170": "tnwb",
+        "171": "tnwd",
+        "174": "txeb",
+        "175": "txed",
+        "176": "txnb",
+        "177": "txnd",
+        "178": "txsb",
+        "179": "txsd",
+        "180": "txwb",
+        "181": "txwd",
+        "182": "utb",
+        "183": "utd",
+        "184": "vtb",
+        "185": "vtd",
+        "188": "vaeb",
+        "189": "vaed",
+        "190": "vawb",
+        "191": "vawd",
+        "192": "vib",
+        "193": "vid",
+        "194": "waeb",
+        "195": "waed",
+        "196": "wawb",
+        "197": "wawd",
+        "198": "wvnb",
+        "199": "wvnd",
+        "200": "wvsb",
+        "201": "wvsd",
+        "202": "wieb",
+        "203": "wied",
+        "204": "wiwb",
+        "205": "wiwd",
+        "206": "wyb",
+        "207": "wyd",
+        "973": "cacb",
+    }
+    return prefix_to_cid_map[prefix]
+
+
+def get_doc_id_prefix_from_court_id(court_id):
+    cid_to_prefix_map = {
+        "akb": "022",
+        "akd": "023",
+        "almb": "016",
+        "almd": "017",
+        "alnb": "018",
+        "alnd": "019",
+        "alsb": "020",
+        "alsd": "021",
+        "arb": "024",
+        "areb": "026",
+        "ared": "027",
+        "arwb": "028",
+        "arwd": "029",
+        "azd": "025",
+        "cacb": "973",
+        "cacd": "031",
+        "caeb": "032",
+        "caed": "033",
+        "canb": "034",
+        "cand": "035",
+        "casb": "036",
+        "casd": "037",
+        "cob": "038",
+        "cod": "039",
+        "ctb": "040",
+        "ctd": "041",
+        "dcb": "044",
+        "dcd": "045",
+        "deb": "042",
+        "ded": "043",
+        "flmb": "046",
+        "flmd": "047",
+        "flnb": "048",
+        "flnd": "049",
+        "flsb": "050",
+        "flsd": "051",
+        "gamb": "052",
+        "gamd": "053",
+        "ganb": "054",
+        "gand": "055",
+        "gasb": "056",
+        "gasd": "057",
+        "gub": "058",
+        "gud": "059",
+        "hib": "060",
+        "hid": "061",
+        "ianb": "074",
+        "iand": "075",
+        "iasb": "076",
+        "iasd": "077",
+        "idb": "062",
+        "idd": "063",
+        "ilcb": "064",
+        "ilcd": "065",
+        "ilnb": "066",
+        "ilnd": "067",
+        "ilsb": "068",
+        "ilsd": "069",
+        "innb": "070",
+        "innd": "071",
+        "insb": "072",
+        "insd": "073",
+        "ksb": "078",
+        "ksd": "079",
+        "kyeb": "080",
+        "kyed": "081",
+        "kywb": "082",
+        "kywd": "083",
+        "laeb": "084",
+        "laed": "085",
+        "lamb": "086",
+        "lamd": "087",
+        "lawb": "088",
+        "lawd": "089",
+        "mab": "094",
+        "mad": "095",
+        "mdb": "092",
+        "mdd": "093",
+        "meb": "090",
+        "med": "091",
+        "mieb": "096",
+        "mied": "097",
+        "miwb": "098",
+        "miwd": "099",
+        "mnb": "100",
+        "mnd": "101",
+        "moeb": "106",
+        "moed": "107",
+        "mowb": "108",
+        "mowd": "109",
+        "msnb": "102",
+        "msnd": "103",
+        "mssb": "104",
+        "mssd": "105",
+        "mtb": "110",
+        "mtd": "111",
+        "nceb": "130",
+        "nced": "131",
+        "ncmb": "132",
+        "ncmd": "133",
+        "ncwb": "134",
+        "ncwd": "135",
+        "ndb": "136",
+        "ndd": "137",
+        "nebraskab": "112",
+        "ned": "113",
+        "nhb": "116",
+        "nhd": "117",
+        "njb": "118",
+        "njd": "119",
+        "nmb": "120",
+        "nmd": "121",
+        "nmib": "138",
+        "nmid": "139",
+        "nvb": "114",
+        "nvd": "115",
+        "nyeb": "122",
+        "nyed": "123",
+        "nynb": "124",
+        "nynd": "125",
+        "nysb": "126",
+        "nysd": "127",
+        "nywb": "128",
+        "nywd": "129",
+        "ohnb": "140",
+        "ohnd": "141",
+        "ohsb": "142",
+        "ohsd": "143",
+        "okeb": "144",
+        "oked": "145",
+        "oknb": "146",
+        "oknd": "147",
+        "okwb": "148",
+        "okwd": "149",
+        "orb": "150",
+        "ord": "151",
+        "paeb": "152",
+        "paed": "153",
+        "pamb": "154",
+        "pamd": "155",
+        "pawb": "156",
+        "pawd": "157",
+        "prb": "158",
+        "prd": "159",
+        "rib": "160",
+        "rid": "161",
+        "scb": "162",
+        "scd": "163",
+        "sdb": "164",
+        "sdd": "165",
+        "tneb": "166",
+        "tned": "167",
+        "tnmb": "168",
+        "tnmd": "169",
+        "tnwb": "170",
+        "tnwd": "171",
+        "txeb": "174",
+        "txed": "175",
+        "txnb": "176",
+        "txnd": "177",
+        "txsb": "178",
+        "txsd": "179",
+        "txwb": "180",
+        "txwd": "181",
+        "utb": "182",
+        "utd": "183",
+        "vaeb": "188",
+        "vaed": "189",
+        "vawb": "190",
+        "vawd": "191",
+        "vib": "192",
+        "vid": "193",
+        "vtb": "184",
+        "vtd": "185",
+        "waeb": "194",
+        "waed": "195",
+        "wawb": "196",
+        "wawd": "197",
+        "wieb": "202",
+        "wied": "203",
+        "wiwb": "204",
+        "wiwd": "205",
+        "wvnb": "198",
+        "wvnd": "199",
+        "wvsb": "200",
+        "wvsd": "201",
+        "wyb": "206",
+        "wyd": "207",
+    }
+    return cid_to_prefix_map[court_id]
+
+
 def get_pacer_court_info():
     r = requests.get("https://court-version-scraper.fly.dev/courts.json")
     return r.json()
@@ -186,11 +574,19 @@ def make_doc1_url(court_id, pacer_doc_id, skip_attachment_page):
     if skip_attachment_page and pacer_doc_id[3] == "0":
         # If the fourth digit is a 0, replace it with a 1
         pacer_doc_id = f"{pacer_doc_id[:3]}1{pacer_doc_id[4:]}"
+    doc_id_cid = get_court_id_from_doc_id_prefix(pacer_doc_id[:3])
+    if court_id is None:
+        court_id = doc_id_cid
+    elif court_id != doc_id_cid:
+        raise ValueError(
+            f"pacer_doc_id {pacer_doc_id} prefix has court_id {doc_id_cid}, "
+            f"expected {court_id}"
+        )
     return f"https://ecf.{court_id}.uscourts.gov/doc1/{pacer_doc_id}"
 
 
 def make_docs1_url(
-    court_id: str, pacer_doc_id: str, skip_attachment_page
+    court_id: Optional[str], pacer_doc_id: str, skip_attachment_page
 ) -> str:
     """Make a docs1 URL for NDAs free look downloads.
 
@@ -200,6 +596,14 @@ def make_docs1_url(
     if skip_attachment_page and pacer_doc_id[3] == "0":
         # If the fourth digit is a 0, replace it with a 1
         pacer_doc_id = f"{pacer_doc_id[:3]}1{pacer_doc_id[4:]}"
+    doc_id_cid = get_court_id_from_doc_id_prefix(pacer_doc_id[:3])
+    if court_id is None:
+        court_id = doc_id_cid
+    elif court_id != doc_id_cid:
+        raise ValueError(
+            f"pacer_doc_id {pacer_doc_id} prefix has court_id {doc_id_cid}, "
+            f"expected {court_id}"
+        )
     return f"https://ecf.{court_id}.uscourts.gov/docs1/{pacer_doc_id}"
 
 
diff --git a/tests/examples/pacer/dockets/district/cand_7.json b/tests/examples/pacer/dockets/district/cand_7.json
index 5ca6af69c..cb7bf8345 100644
--- a/tests/examples/pacer/dockets/district/cand_7.json
+++ b/tests/examples/pacer/dockets/district/cand_7.json
@@ -10,12 +10,24 @@
   "demand": "$5,000,000,000",
   "docket_entries": [
     {
+      "attachments": [
+        {
+          "attachment_number": 1,
+          "description": "Civil Cover Sheet",
+          "file_size_str": "248.8 KB",
+          "pacer_doc_id": "035023513539",
+          "pacer_seq_no": "10",
+          "page_count": 2
+        }
+      ],
       "date_entered": "2023-09-05",
       "date_filed": "2023-09-05",
       "description": "COMPLAINT against Microsoft Corporation, OpenAI GP, OpenAI Incorporated, OpenAI LP, OpenAI Startup Fund GP I, LLC, OpenAI Startup Fund I, LP, OpenAI Startup Fund Mangement, LLC ( Filing fee $ 402, receipt number ACANDC-18615816.). Filed byJ. H., A. T.. (Attachments: # 1 Civil Cover Sheet)(Ram, Michael) (Filed on 9/5/2023) (Entered: 09/05/2023)",
       "document_number": "1",
+      "file_size_str": "1.0 MB",
       "pacer_doc_id": "035023513538",
-      "pacer_seq_no": "10"
+      "pacer_seq_no": "10",
+      "page_count": 121
     },
     {
       "date_entered": "2023-09-06",
@@ -34,20 +46,44 @@
       "pacer_seq_no": "17"
     },
     {
+      "attachments": [
+        {
+          "attachment_number": 1,
+          "description": "Exhibit Certificate of Good Standing",
+          "file_size_str": "4.6 MB",
+          "pacer_doc_id": "035023515599",
+          "pacer_seq_no": "19",
+          "page_count": 1
+        }
+      ],
       "date_entered": "2023-09-06",
       "date_filed": "2023-09-06",
       "description": "MOTION for leave to appear in Pro Hac Vice ( Filing fee $ 317, receipt number ACANDC-18617738.) filed by J. H., A. T.. (Attachments: # 1 Exhibit Certificate of Good Standing)(Yanchunis, John) (Filed on 9/6/2023) (Entered: 09/06/2023)",
       "document_number": "4",
+      "file_size_str": "88.4 KB",
       "pacer_doc_id": "035023515598",
-      "pacer_seq_no": "19"
+      "pacer_seq_no": "19",
+      "page_count": 2
     },
     {
+      "attachments": [
+        {
+          "attachment_number": 1,
+          "description": "Exhibit Certificate of Good Standing",
+          "file_size_str": "644.5 KB",
+          "pacer_doc_id": "035023515656",
+          "pacer_seq_no": "23",
+          "page_count": 1
+        }
+      ],
       "date_entered": "2023-09-06",
       "date_filed": "2023-09-06",
       "description": "MOTION for leave to appear in Pro Hac Vice ( Filing fee $ 317, receipt number ACANDC-18617813.) filed by J. H., A. T.. (Attachments: # 1 Exhibit Certificate of Good Standing)(McGee, Ryan) (Filed on 9/6/2023) (Entered: 09/06/2023)",
       "document_number": "5",
+      "file_size_str": "88.4 KB",
       "pacer_doc_id": "035023515655",
-      "pacer_seq_no": "23"
+      "pacer_seq_no": "23",
+      "page_count": 2
     },
     {
       "date_entered": "2023-09-06",
diff --git a/tests/examples/pacer/dockets/district/dcd_3.json b/tests/examples/pacer/dockets/district/dcd_3.json
index 7b41e4789..adf610e61 100644
--- a/tests/examples/pacer/dockets/district/dcd_3.json
+++ b/tests/examples/pacer/dockets/district/dcd_3.json
@@ -10,12 +10,32 @@
   "demand": "",
   "docket_entries": [
     {
+      "attachments": [
+        {
+          "attachment_number": 1,
+          "description": "Civil Cover Sheet",
+          "file_size_str": "37.6 KB",
+          "pacer_doc_id": "04508117527",
+          "pacer_seq_no": "15",
+          "page_count": 2
+        },
+        {
+          "attachment_number": 2,
+          "description": "Summons",
+          "file_size_str": "73.8 KB",
+          "pacer_doc_id": "04508117528",
+          "pacer_seq_no": "15",
+          "page_count": 2
+        }
+      ],
       "date_entered": "2020-10-20",
       "date_filed": "2020-10-20",
       "description": "COMPLAINT against GOOGLE LLC filed by UNITED STATES OF AMERICA. (Attachments: # 1 Civil Cover Sheet, # 2 Summons)(ztnr) (Entered: 10/20/2020)",
       "document_number": "1",
+      "file_size_str": "1.0 MB",
       "pacer_doc_id": "04508117526",
-      "pacer_seq_no": "15"
+      "pacer_seq_no": "15",
+      "page_count": 64
     },
     {
       "date_entered": "2021-08-19",
@@ -58,12 +78,80 @@
       "pacer_seq_no": "2039"
     },
     {
+      "attachments": [
+        {
+          "attachment_number": 1,
+          "description": "Memorandum in Support A",
+          "file_size_str": "621.7 KB",
+          "pacer_doc_id": "04509920630",
+          "pacer_seq_no": "2042",
+          "page_count": 21
+        },
+        {
+          "attachment_number": 2,
+          "description": "Exhibit B",
+          "file_size_str": "80.6 KB",
+          "pacer_doc_id": "04509920631",
+          "pacer_seq_no": "2042",
+          "page_count": 1
+        },
+        {
+          "attachment_number": 3,
+          "description": "Exhibit C",
+          "file_size_str": "80.0 KB",
+          "pacer_doc_id": "04509920632",
+          "pacer_seq_no": "2042",
+          "page_count": 1
+        },
+        {
+          "attachment_number": 4,
+          "description": "Exhibit D",
+          "file_size_str": "80.0 KB",
+          "pacer_doc_id": "04509920633",
+          "pacer_seq_no": "2042",
+          "page_count": 1
+        },
+        {
+          "attachment_number": 5,
+          "description": "Exhibit E",
+          "file_size_str": "79.7 KB",
+          "pacer_doc_id": "04509920634",
+          "pacer_seq_no": "2042",
+          "page_count": 1
+        },
+        {
+          "attachment_number": 6,
+          "description": "Exhibit F",
+          "file_size_str": "80.0 KB",
+          "pacer_doc_id": "04509920635",
+          "pacer_seq_no": "2042",
+          "page_count": 1
+        },
+        {
+          "attachment_number": 7,
+          "description": "Exhibit G",
+          "file_size_str": "52.9 KB",
+          "pacer_doc_id": "04509920636",
+          "pacer_seq_no": "2042",
+          "page_count": 1
+        },
+        {
+          "attachment_number": 8,
+          "description": "Certificate of Service",
+          "file_size_str": "312.2 KB",
+          "pacer_doc_id": "04509920637",
+          "pacer_seq_no": "2042",
+          "page_count": 1
+        }
+      ],
       "date_entered": "2023-05-09",
       "date_filed": "2023-05-09",
       "description": "REDACTED DOCUMENT- Plaintiff States' Motion for Leave to File a Supplemental Response to Certain Questions of the Court at Oral Argument to 584 Sealed Document, by STATE OF COLORADO. (Attachments: # 1 Memorandum in Support A, # 2 Exhibit B, # 3 Exhibit C, # 4 Exhibit D, # 5 Exhibit E, # 6 Exhibit F, # 7 Exhibit G, # 8 Certificate of Service)(Sallet, Jonathan) (Entered: 05/09/2023)",
       "document_number": "590",
+      "file_size_str": "310.6 KB",
       "pacer_doc_id": "04509920629",
-      "pacer_seq_no": "2042"
+      "pacer_seq_no": "2042",
+      "page_count": 16
     }
   ],
   "docket_number": "1:20-cv-03010",
diff --git a/tests/local/test_PacerUtilTest.py b/tests/local/test_PacerUtilTest.py
index 614000216..c073f64fa 100644
--- a/tests/local/test_PacerUtilTest.py
+++ b/tests/local/test_PacerUtilTest.py
@@ -133,20 +133,36 @@ def test_make_doc1_url(self):
         """Can we make good doc1 urls?"""
         qa_pairs = (
             (
-                ("cand", "01712427473", False),
-                "https://ecf.cand.uscourts.gov/doc1/01712427473",
+                ("almd", "01712427473", False),
+                "https://ecf.almd.uscourts.gov/doc1/01712427473",
             ),
             (
-                ("cand", "01702427473", False),
-                "https://ecf.cand.uscourts.gov/doc1/01702427473",
+                ("almd", "01702427473", False),
+                "https://ecf.almd.uscourts.gov/doc1/01702427473",
             ),
             (
-                ("cand", "01712427473", True),
-                "https://ecf.cand.uscourts.gov/doc1/01712427473",
+                ("almd", "01712427473", True),
+                "https://ecf.almd.uscourts.gov/doc1/01712427473",
             ),
             (
-                ("cand", "01702427473", True),
-                "https://ecf.cand.uscourts.gov/doc1/01712427473",
+                ("almd", "01702427473", True),
+                "https://ecf.almd.uscourts.gov/doc1/01712427473",
+            ),
+            (
+                (None, "01712427473", False),
+                "https://ecf.almd.uscourts.gov/doc1/01712427473",
+            ),
+            (
+                (None, "01702427473", False),
+                "https://ecf.almd.uscourts.gov/doc1/01702427473",
+            ),
+            (
+                (None, "01712427473", True),
+                "https://ecf.almd.uscourts.gov/doc1/01712427473",
+            ),
+            (
+                (None, "01702427473", True),
+                "https://ecf.almd.uscourts.gov/doc1/01712427473",
             ),
         )
         for q, a in qa_pairs: