diff --git a/.circleci/config.yml b/.circleci/config.yml
old mode 100644
new mode 100755
diff --git a/.codeclimate.yml b/.codeclimate.yml
old mode 100644
new mode 100755
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
old mode 100644
new mode 100755
index 192df5b..2caf7fc
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -16,4 +16,4 @@ updates:
         patterns:
           - "freezegun"
           - "pylint"
-          - "pytest"
+          - "pytest*"
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
old mode 100644
new mode 100755
diff --git a/.gitignore b/.gitignore
old mode 100644
new mode 100755
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
old mode 100644
new mode 100755
index e056019..10b1cfe
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -10,7 +10,6 @@ repos:
       - id: check-added-large-files # prevents giant files from being committed.
       - id: check-case-conflict # checks for files that would conflict in case-insensitive filesystems.
       - id: check-json # checks json files for parseable syntax.
-      - id: check-shebang-scripts-are-executable # ensures that (non-binary) files with a shebang are executable.
       - id: check-merge-conflict # checks for files that contain merge conflict strings.
       - id: check-symlinks # checks for symlinks which do not point to anything.
       - id: check-yaml # checks yaml files for parseable syntax.
diff --git a/LICENSE b/LICENSE
old mode 100644
new mode 100755
diff --git a/README.md b/README.md
old mode 100644
new mode 100755
index bd7c2e0..94b760c
--- a/README.md
+++ b/README.md
@@ -75,7 +75,7 @@ Make sure to run `pip install -r requirements.txt` and `playwright install` befo
 1. Navigate to the [*spiders*](search_gov_crawler/search_gov_spiders/spiders) directory
 2. Enter one of two following commands:
 
-    * This command will output the yielded URLs in the destination (relative to the [*spiders*](search_gov_crawler/search_gov_spiders/spiders) directory) and file format specified in the “FEEDS” variable of the [*settings.py*](search_gov_crawler/search_gov_spiders/settings.py) file:
+    * This command will output the yielded URLs in the destination (relative to the [*spiders*](search_gov_crawler/search_gov_spiders/spiders) directory) and file format specified in the `search_gov_crawler/search_gov_spiders/pipelines.py`:
 
           $ scrapy runspider <spider_file.py>
 
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/appspec.yml b/appspec.yml
new file mode 100755
index 0000000..87244e0
--- /dev/null
+++ b/appspec.yml
@@ -0,0 +1,25 @@
+version: 0.0
+os: linux
+permissions:
+  - object: .
+    mode: 777
+    acls:
+      - "d:u::rwx"
+      - "d:g::rwx"
+      - "d:o::rwx"
+    owner: search
+    type:
+      - directory
+hooks:
+  AfterInstall:
+    - location: cicd-scripts/app_install.sh
+      timeout: 600
+      runas: search
+  ApplicationStart:
+    - location: cicd-scripts/app_start.sh
+      timeout: 300
+      runas: search
+  ApplicationStop:
+    - location: cicd-scripts/app_stop.sh
+      timeout: 300
+      runas: search
diff --git a/cicd-scripts/app_install.sh b/cicd-scripts/app_install.sh
new file mode 100755
index 0000000..5179308
--- /dev/null
+++ b/cicd-scripts/app_install.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+
+# CD into the current script directory (which != $pwd)
+cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && cd ../
+
+chmod +x ./cicd-scripts/helpers/ensure_executable.sh
+source ./cicd-scripts/helpers/ensure_executable.sh
+
+### VARIABLES ###
+SPIDER_PYTHON_VERSION=3.12
+_CURRENT_BUILD_DIR=${PWD}
+VENV_DIR=./venv
+
+### FUNCTIONS ###
+
+# Stop spider services
+stop_services() {
+    echo "Running app_stop.sh..."
+    ensure_executable "./cicd-scripts/app_stop.sh"
+}
+
+# Install missing system dependencies
+install_system_dependencies() {
+    echo "Installing system dependencies..."
+    sudo apt-get update -y
+    sudo apt-get install -y \
+        lzma liblzma-dev libbz2-dev python-setuptools \
+        acl build-essential checkinstall libreadline-dev \
+        libncursesw5-dev libssl-dev libsqlite3-dev tk-dev \
+        libgdbm-dev libc6-dev zlib1g-dev libffi-dev openssl
+}
+
+# Install Python
+install_python() {
+    echo "Installing Python ${SPIDER_PYTHON_VERSION}..."
+    cd /usr/src
+    wget -q https://www.python.org/ftp/python/${SPIDER_PYTHON_VERSION}.0/Python-${SPIDER_PYTHON_VERSION}.0.tgz
+    tar xzf Python-${SPIDER_PYTHON_VERSION}.0.tgz
+    sudo chown -R $(whoami) ./Python-${SPIDER_PYTHON_VERSION}.0
+    cd Python-${SPIDER_PYTHON_VERSION}.0
+    ./configure --enable-optimizations
+    make
+    make install
+    make altinstall
+    cd "$_CURRENT_BUILD_DIR"
+    echo "Python ${SPIDER_PYTHON_VERSION} installed successfully."
+}
+
+# Check and install Python if needed
+check_python() {
+    if ! command -v python${SPIDER_PYTHON_VERSION} &>/dev/null; then
+        install_python
+    else
+        echo "Python ${SPIDER_PYTHON_VERSION} already installed: $(python${SPIDER_PYTHON_VERSION} --version)"
+    fi
+}
+
+# Set environment paths
+update_pythonpath() {
+  ensure_executable "./cicd-scripts/helpers/update_pythonpath.sh"
+}
+
+# Setup virtual environment
+setup_virtualenv() {
+    echo "Setting up virtual environment..."
+    python${SPIDER_PYTHON_VERSION} -m venv "$VENV_DIR"
+    source "$VENV_DIR/bin/activate"
+    python -m pip install --upgrade pip
+}
+
+# Install dependencies
+install_dependencies() {
+    echo "Installing dependencies..."
+    python -m pip install --upgrade -r ./search_gov_crawler/requirements.txt
+    echo "Installing Playwright..."
+    python -m pip install --upgrade pytest-playwright playwright
+    playwright install --with-deps
+    deactivate
+}
+
+# Configure permissions
+configure_permissions() {
+    echo "Configuring file permissions..."
+    chmod -R 777 .
+    chown -R "$(whoami)" .
+    sudo setfacl -Rdm g:dgsearch:rwx .
+}
+
+# Manage cron jobs
+manage_cron_jobs() {
+    echo "Managing cron jobs..."
+    crontab -l | grep -v 'app_start.sh' > temp_cron || true
+    echo "@reboot $(pwd)/cicd-scripts/app_start.sh" >> temp_cron
+    crontab temp_cron
+    rm temp_cron
+    echo "Cron jobs updated."
+}
+
+# Start monitoring agents
+start_agents() {
+    echo "Starting AWS CloudWatch agent..."
+    ensure_executable "./cicd-scripts/helpers/check_cloudwatch.sh"
+
+    echo "Starting AWS CodeDeploy agent..."
+    ensure_executable "./cicd-scripts/helpers/check_codedeploy.sh"
+}
+
+### SCRIPT EXECUTION ###
+
+# Stop running services
+stop_services
+
+# Install system dependencies
+install_system_dependencies
+
+# Check and install Python if missing
+check_python
+
+# Set environment paths
+update_pythonpath
+
+# Configure permissions
+configure_permissions
+
+# Setup and activate virtual environment
+setup_virtualenv
+
+# Install dependencies
+install_dependencies
+
+# Start AWS agents
+start_agents
+
+# Manage cron jobs
+manage_cron_jobs
+
+echo "App installation completed successfully."
diff --git a/cicd-scripts/app_start.sh b/cicd-scripts/app_start.sh
new file mode 100755
index 0000000..76b0081
--- /dev/null
+++ b/cicd-scripts/app_start.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# CD into the current script directory (which != $pwd)
+cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && cd ../
+
+chmod +x ./cicd-scripts/helpers/ensure_executable.sh
+source ./cicd-scripts/helpers/ensure_executable.sh
+
+# TODO: Make it part of the local env variable that is set by Ansible
+SPIDER_RUN_WITH_UI=false
+
+# Determine which script to run based on the SPIDER_RUN_WITH_UI flag
+if $SPIDER_RUN_WITH_UI; then
+    SCRIPT="./cicd-scripts/helpers/run_with_ui.sh"
+else
+    SCRIPT="./cicd-scripts/helpers/run_without_ui.sh"
+fi
+
+# Ensure the script exists, is executable, and run it
+ensure_executable "$SCRIPT"
diff --git a/cicd-scripts/app_stop.sh b/cicd-scripts/app_stop.sh
new file mode 100755
index 0000000..9d536a8
--- /dev/null
+++ b/cicd-scripts/app_stop.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+
+# CD into the current script directory (which != $pwd)
+cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && cd ../
+
+chmod +x ./cicd-scripts/helpers/ensure_executable.sh
+source ./cicd-scripts/helpers/ensure_executable.sh
+
+### FUNCTIONS ###
+
+# Remove virtual environment if it exists
+remove_venv() {
+    if [ -d ./venv ]; then
+        echo "Removing virtual environment..."
+        rm -rf ./venv/
+    fi
+}
+
+# Purge pip cache
+purge_pip_cache() {
+    echo "Purging pip cache..."
+    rm -rf ~/.cache/pip /root/.cache/pip
+}
+
+# Stop scrapy scheduler if running
+stop_scrapy_scheduler() {
+    echo "Stopping scrapy_scheduler.py (if running)..."
+    ensure_executable "./cicd-scripts/helpers/kill_scheduler.sh"
+}
+
+# Stop scrapyd and scrapydweb tasks
+stop_scrapy_tasks() {
+    echo "Stopping all scrapyd and scrapydweb tasks..."
+
+    # Kill scrapydweb tasks
+    if pkill -f "scrapydweb" 2>/dev/null; then
+        echo "scrapydweb tasks stopped."
+    else
+        echo "No scrapydweb tasks running."
+    fi
+
+    # Kill scrapyd tasks
+    if pkill -f "scrapyd" 2>/dev/null; then
+        echo "scrapyd tasks stopped."
+    else
+        echo "No scrapyd tasks running."
+    fi
+}
+
+# Display remaining scrapy processes
+display_remaining_scrapy_processes() {
+    echo -e "\nRemaining scrapy processes (if any):"
+    ps -ef | grep scrapy | grep -v grep || echo "No scrapy processes running."
+}
+
+# Force kill any remaining scrapy background jobs
+kill_remaining_scrapy_jobs() {
+    echo "Force killing remaining scrapy background jobs..."
+    if ps aux | grep -ie [s]crapy | awk '{print $2}' | xargs kill -9; then
+        echo "Remaining scrapy jobs killed."
+    else
+        echo "No remaining scrapy jobs to kill."
+    fi
+}
+
+# Remove nohup jobs (python scripts)
+remove_nohup_jobs() {
+    echo "Removing nohup jobs (python)..."
+    ps -ef | grep nohup | grep -v grep | awk '{print $2}' | xargs kill -9
+}
+
+# Remove cron job entries referencing the given string
+remove_cron_entry() {
+    if [ -z "$1" ]; then
+        echo "Error: No cron entry provided."
+        return
+    fi
+
+    local CRON_ENTRY="$1"
+    local CRON_USER=$(whoami)
+
+    echo "Removing cron job entries referencing: $CRON_ENTRY"
+
+    # Remove cron job for the current user (including the full path if needed)
+    sudo crontab -l -u "$CRON_USER" 2>/dev/null | grep -v -F "$CRON_ENTRY" | sudo crontab -u "$CRON_USER" -
+
+    echo "Cron job entries for '$CRON_ENTRY' removed."
+}
+
+### SCRIPT EXECUTION ###
+
+# Remove virtual environment
+remove_venv
+
+# Purge pip cache
+purge_pip_cache
+
+# Stop scrapy scheduler if running
+stop_scrapy_scheduler
+
+# Stop scrapyd and scrapydweb tasks
+stop_scrapy_tasks
+
+# Display remaining scrapy processes (if any)
+display_remaining_scrapy_processes
+
+# Force kill any remaining scrapy background jobs
+kill_remaining_scrapy_jobs
+
+# Remove nohup jobs (python)
+remove_nohup_jobs
+
+# Remove specific cron jobs
+remove_cron_entry "check_cloudwatch.sh"
+remove_cron_entry "check_codedeploy.sh"
+remove_cron_entry "app_start.sh"
+
+echo "App stop completed successfully."
diff --git a/cicd-scripts/helpers/check_cloudwatch.sh b/cicd-scripts/helpers/check_cloudwatch.sh
new file mode 100755
index 0000000..487d122
--- /dev/null
+++ b/cicd-scripts/helpers/check_cloudwatch.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+# Function to check if CloudWatch agent is running
+check_cloudwatch() {
+    if ! pgrep -f amazon-cloudwatch-agent > /dev/null; then
+        echo "AWS CloudWatch agent is not running. Starting it now..."
+        sudo service amazon-cloudwatch-agent start
+        if [ $? -eq 0 ]; then
+            echo "AWS CloudWatch agent started successfully."
+        else
+            echo "Failed to start AWS CloudWatch agent."
+        fi
+    else
+        echo "AWS CloudWatch agent is running."
+    fi
+}
+
+# Ensure the script is added to crontab for execution on reboot
+setup_cron() {
+    chmod +x ./cicd-scripts/helpers/check_cloudwatch.sh
+    CRON_ENTRY="@reboot $(pwd)/cicd-scripts/helpers/check_cloudwatch.sh"
+
+    # Update crontab, ensuring no duplicates
+    (crontab -l 2>/dev/null | grep -v -F "check_cloudwatch.sh"; echo "$CRON_ENTRY") | crontab -
+    echo "Crontab entry added to ensure the script runs on reboot."
+}
+
+# Execute the function
+check_cloudwatch
+
+# Add to crontab
+setup_cron
diff --git a/cicd-scripts/helpers/check_codedeploy.sh b/cicd-scripts/helpers/check_codedeploy.sh
new file mode 100755
index 0000000..6e6cf15
--- /dev/null
+++ b/cicd-scripts/helpers/check_codedeploy.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+# Function to check if CodeDeploy agent is running
+check_codedeploy() {
+    if ! pgrep -f codedeploy-agent > /dev/null; then
+        echo "AWS CodeDeploy agent is not running. Starting it now..."
+        sudo service codedeploy-agent start
+        if [ $? -eq 0 ]; then
+            echo "AWS CodeDeploy agent started successfully."
+        else
+            echo "Failed to start AWS CodeDeploy agent."
+        fi
+    else
+        echo "AWS CodeDeploy agent is running."
+    fi
+}
+
+# Ensure the script is added to crontab for execution on reboot
+setup_cron() {
+    chmod +x ./cicd-scripts/helpers/check_codedeploy.sh
+    CRON_ENTRY="@reboot $(pwd)/cicd-scripts/helpers/check_codedeploy.sh"
+
+    # Update crontab, ensuring no duplicates
+    (crontab -l 2>/dev/null | grep -v -F "check_codedeploy.sh"; echo "$CRON_ENTRY") | crontab -
+    echo "Crontab entry added to ensure the script runs on reboot."
+}
+
+# Execute the function
+check_codedeploy
+
+# Add to crontab
+setup_cron
diff --git a/cicd-scripts/helpers/ensure_executable.sh b/cicd-scripts/helpers/ensure_executable.sh
new file mode 100755
index 0000000..88e6439
--- /dev/null
+++ b/cicd-scripts/helpers/ensure_executable.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# Function to ensure a file exists, is executable, and then runs it
+ensure_executable() {
+  local script="$1"
+
+  if [ -f "$script" ]; then
+    chmod +x "$script"
+    echo "$script is now executable."
+    source "$script"
+  else
+    echo "Error: $script not found!"
+    # exit 1
+  fi
+}
diff --git a/cicd-scripts/helpers/kill_scheduler.sh b/cicd-scripts/helpers/kill_scheduler.sh
new file mode 100755
index 0000000..4c559da
--- /dev/null
+++ b/cicd-scripts/helpers/kill_scheduler.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# Find the process ID of the running scrapy_scheduler.py script
+echo "Searching for scrapy_scheduler.py process..."
+PROCESS_ID=$(pgrep -f "scrapy_scheduler.py")
+
+# Check if the process ID was found
+if [ -n "$PROCESS_ID" ]; then
+  echo "No running process found for scrapy_scheduler.py."
+
+  # Kill the process
+  echo "Killing process with PID: $PROCESS_ID"
+  kill "$PROCESS_ID" 2>/dev/null
+
+  # Pause to allow the process to terminate
+  sleep 3
+
+  # Verify if the process was killed
+  if ! kill -0 "$PROCESS_ID" 2>/dev/null; then
+    echo "Process scrapy_scheduler.py (PID: $PROCESS_ID) has been terminated."
+  else
+    echo "Failed to terminate the process or process no longer exists."
+  fi
+fi
diff --git a/cicd-scripts/helpers/run_with_ui.sh b/cicd-scripts/helpers/run_with_ui.sh
new file mode 100755
index 0000000..de181e2
--- /dev/null
+++ b/cicd-scripts/helpers/run_with_ui.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+SCRAPYD_URL="http://127.0.0.1:6800/"
+SCRAPYDWEB_URL="http://127.0.0.1:5000/"
+SPIDER_URLS_API=https://staging.search.usa.gov/urls
+
+# Function to check if a URL is up and running
+function check_url() {
+    local URL=$1
+    local MAX_ATTEMPTS="${2:-3}"
+    local DELAY=5
+    local attempt=1
+
+    while [ $attempt -le $MAX_ATTEMPTS ]; do
+        if curl --output /dev/null --silent --head --fail "$URL"; then
+            echo "Service at $URL is up on attempt $attempt."
+            return 0
+        else
+            echo "Attempt $attempt: Service at $URL is not available, retrying in $DELAY seconds..."
+        fi
+        attempt=$((attempt+1))
+        sleep $DELAY
+    done
+
+    echo "Service at $URL is still not available after $MAX_ATTEMPTS attempts."
+    return 1
+}
+
+# Function to check if required command exists
+function check_command() {
+    if ! command -v "$1" &> /dev/null; then
+        echo "Error: $1 is not installed or not in your PATH."
+        exit 1
+    fi
+}
+
+check_command "scrapyd"
+check_command "scrapydweb"
+check_command "curl"
+
+echo "Killing any existing scrapyd and scrapydweb services"
+sudo pkill -f "scrapydweb" 2>/dev/null
+sudo pkill -f "scrapyd" 2>/dev/null
+
+echo "Running searchgov-spider application..."
+
+# Start scrapyd
+echo "Starting scrapyd service..."
+sudo bash -c 'nohup scrapyd > /var/log/scrapyd.log 2>&1 &'
+PID1=$!
+echo "Started scrapyd with PID $PID1"
+
+# Check if scrapyd is running
+if check_url "$SCRAPYD_URL"; then
+    echo "The scrapyd service is running at $SCRAPYD_URL"
+    sudo bash -c 'cd ./search_gov_crawler && nohup scrapydweb > /var/log/scrapydweb.log 2>&1 &'
+    PID2=$!
+    echo "Started scrapydweb with PID $PID2"
+
+    if check_url "$SCRAPYDWEB_URL"; then
+        echo "The scrapydweb service is running at $SCRAPYDWEB_URL"
+    else
+        echo "Error: scrapydweb failed at $SCRAPYDWEB_URL."
+        return
+    fi
+else
+    echo "Error: scrapyd failed at $SCRAPYD_URL."
+    return
+fi
+
+# Display the last few lines of logs
+echo -e "\n-- Last 10 lines of scrapyd.log:\n"
+tail -n 10 /var/log/scrapyd.log
+
+echo -e "\n-- Last 10 lines of scrapydweb.log:\n"
+tail -n 10 /var/log/scrapydweb.log
diff --git a/cicd-scripts/helpers/run_without_ui.sh b/cicd-scripts/helpers/run_without_ui.sh
new file mode 100755
index 0000000..247488b
--- /dev/null
+++ b/cicd-scripts/helpers/run_without_ui.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# Run the script in the background using the virtual environment
+chmod +x ./search_gov_crawler/scrapy_scheduler.py
+
+sudo nohup bash -c "source ./venv/bin/activate && ./venv/bin/python ./search_gov_crawler/scrapy_scheduler.py" > /var/log/scrapy_scheduler.log 2>&1 &
+
+echo "Running no UI vesrion of searchgov-spider..."
diff --git a/cicd-scripts/helpers/update_pythonpath.sh b/cicd-scripts/helpers/update_pythonpath.sh
new file mode 100755
index 0000000..e742b55
--- /dev/null
+++ b/cicd-scripts/helpers/update_pythonpath.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# Define the current directory
+CURRENT_DIR=$(pwd)
+
+# Define the .bashrc file location
+BASHRC_FILE="$HOME/.bashrc"
+
+# Check if .bashrc contains an export PYTHONPATH line
+if grep -q "^export PYTHONPATH=" "$BASHRC_FILE"; then
+    # Extract the existing PYTHONPATH line
+    PYTHONPATH_LINE=$(grep "^export PYTHONPATH=" "$BASHRC_FILE")
+
+    # Check if the current directory is already included
+    if echo "$PYTHONPATH_LINE" | grep -q "$CURRENT_DIR"; then
+        echo "PYTHONPATH already includes the current directory: $CURRENT_DIR"
+    else
+        # Ensure the updated line includes the starting and ending quotes
+        CURRENT_PATHS=$(echo "$PYTHONPATH_LINE" | sed -e 's/^export PYTHONPATH=//' -e 's/^"//' -e 's/"$//')
+        UPDATED_LINE="export PYTHONPATH=\"${CURRENT_PATHS}:${CURRENT_DIR}\""
+        sed -i "s|^export PYTHONPATH=.*|$UPDATED_LINE|" "$BASHRC_FILE"
+        echo "Updated PYTHONPATH to include the current directory: $CURRENT_DIR"
+    fi
+else
+    # Add a new export PYTHONPATH line to .bashrc
+    echo "export PYTHONPATH=\"\$PYTHONPATH:${CURRENT_DIR}\"" >> "$BASHRC_FILE"
+    echo "Added new PYTHONPATH to .bashrc including the current directory: $CURRENT_DIR"
+fi
+
+# Apply changes for the current session
+export PYTHONPATH=\"${CURRENT_PATHS//"\$PYTHONPATH"/}:${CURRENT_DIR}\"
+
+echo "PYTHONPATH changes applied:"
+echo $PYTHONPATH
diff --git a/pyproject.toml b/pyproject.toml
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/__init__.py b/search_gov_crawler/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/search_gov_crawler/benchmark.py b/search_gov_crawler/benchmark.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/output/.gitignore b/search_gov_crawler/output/.gitignore
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/requirements.txt b/search_gov_crawler/requirements.txt
old mode 100644
new mode 100755
index 645c8f8..19790be
--- a/search_gov_crawler/requirements.txt
+++ b/search_gov_crawler/requirements.txt
@@ -2,6 +2,7 @@ freezegun==1.5.1
 pylint==3.3.2
 pytest==8.3.4
 pytest-console-scripts==1.4.1
+pytest-mock==3.14.0
 
 python-json-logger==3.2.0
 scrapy==2.11.2
diff --git a/search_gov_crawler/scrapy.cfg b/search_gov_crawler/scrapy.cfg
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/scrapy_scheduler.py b/search_gov_crawler/scrapy_scheduler.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/scrapyd-logs/.gitignore b/search_gov_crawler/scrapyd-logs/.gitignore
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/scrapyd.conf b/search_gov_crawler/scrapyd.conf
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/scrapydweb_settings_v10.py b/search_gov_crawler/scrapydweb_settings_v10.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_logparser/__init__.py b/search_gov_crawler/search_gov_logparser/__init__.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_scrapyd/__init__.py b/search_gov_crawler/search_gov_scrapyd/__init__.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_scrapydweb/__init__.py b/search_gov_crawler/search_gov_scrapydweb/__init__.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/__init__.py b/search_gov_crawler/search_gov_spiders/__init__.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/actions/reports/email/bases/report/base.jinja b/search_gov_crawler/search_gov_spiders/actions/reports/email/bases/report/base.jinja
old mode 100644
new mode 100755
index 656c94b..47f6bff
--- a/search_gov_crawler/search_gov_spiders/actions/reports/email/bases/report/base.jinja
+++ b/search_gov_crawler/search_gov_spiders/actions/reports/email/bases/report/base.jinja
@@ -23,5 +23,3 @@
         </table>
     </body>
 </html>
-
-
diff --git a/search_gov_crawler/search_gov_spiders/actions/reports/email/bases/report/email.css b/search_gov_crawler/search_gov_spiders/actions/reports/email/bases/report/email.css
old mode 100644
new mode 100755
index 2f13050..7d63694
--- a/search_gov_crawler/search_gov_spiders/actions/reports/email/bases/report/email.css
+++ b/search_gov_crawler/search_gov_spiders/actions/reports/email/bases/report/email.css
@@ -83,5 +83,3 @@ table{max-width:100%;background-color:transparent;border-collapse:collapse;borde
 .icon,.icon-big {display:inline-block;}
 .icon {width:34px;height:34px;}
 .icon-big {width:140px;height:140px;}
-
-
diff --git a/search_gov_crawler/search_gov_spiders/actions/reports/email/bases/report/medium.jinja b/search_gov_crawler/search_gov_spiders/actions/reports/email/bases/report/medium.jinja
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/actions/reports/email/bases/report/report.css b/search_gov_crawler/search_gov_spiders/actions/reports/email/bases/report/report.css
old mode 100644
new mode 100755
index bd1e39d..124bd07
--- a/search_gov_crawler/search_gov_spiders/actions/reports/email/bases/report/report.css
+++ b/search_gov_crawler/search_gov_spiders/actions/reports/email/bases/report/report.css
@@ -24,4 +24,4 @@ table.report-container td {padding: 40px 20px;}
 .report-section h2 {margin: 0 0 20px 0;padding: 0 0 12px 0;line-height: 20px;border-bottom: 1px solid #f4f4f4;}
 .report-section h3 {margin: 25px 0 5px 0;line-height: 24px;}
 .report-section h4 {margin: 0 0 2px 0;}
-.report-footer {text-align: center;padding: 20px 10px 5px 10px;color: #cdcdcd;font-size: 14px;}
\ No newline at end of file
+.report-footer {text-align: center;padding: 20px 10px 5px 10px;color: #cdcdcd;font-size: 14px;}
diff --git a/search_gov_crawler/search_gov_spiders/actions/results.css b/search_gov_crawler/search_gov_spiders/actions/results.css
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/actions/results.jinja b/search_gov_crawler/search_gov_spiders/actions/results.jinja
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/extensions/__init__.py b/search_gov_crawler/search_gov_spiders/extensions/__init__.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/extensions/json_logging.py b/search_gov_crawler/search_gov_spiders/extensions/json_logging.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/helpers/__init__.py b/search_gov_crawler/search_gov_spiders/helpers/__init__.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/helpers/domain_spider.py b/search_gov_crawler/search_gov_spiders/helpers/domain_spider.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/items.py b/search_gov_crawler/search_gov_spiders/items.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/middlewares.py b/search_gov_crawler/search_gov_spiders/middlewares.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/monitors.py b/search_gov_crawler/search_gov_spiders/monitors.py
old mode 100644
new mode 100755
index 260dd94..7dafd2a
--- a/search_gov_crawler/search_gov_spiders/monitors.py
+++ b/search_gov_crawler/search_gov_spiders/monitors.py
@@ -14,4 +14,4 @@ class PeriodicMonitorSuite(MonitorSuite):
 
     monitors_failed_actions = [
         CreateCustomFileReport, SendSmtpEmail
-    ]
\ No newline at end of file
+    ]
diff --git a/search_gov_crawler/search_gov_spiders/pipelines.py b/search_gov_crawler/search_gov_spiders/pipelines.py
old mode 100644
new mode 100755
index b626072..6cbc5e8
--- a/search_gov_crawler/search_gov_spiders/pipelines.py
+++ b/search_gov_crawler/search_gov_spiders/pipelines.py
@@ -2,46 +2,99 @@
 Don't forget to add your pipeline to the ITEM_PIPELINES setting
 See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 """
-
 import os
 from pathlib import Path
+import requests
 from scrapy.exceptions import DropItem
 
+
 class SearchGovSpidersPipeline:
     """
-    Class for pipeline that takes items and adds them
-    to output file with a max size of 3.9MB
+    Pipeline that writes items to files (rotated at ~3.9MB) or sends batched POST requests
+    to SPIDER_URLS_API if the environment variable is set.
     """
 
-    def __init__(self, *_args, **_kwargs):
-        self.current_file_size = 0
+    MAX_FILE_SIZE_BYTES = int(3.9 * 1024 * 1024)  # 3.9MB in bytes
+    APP_PID = os.getpid()
+
+    def __init__(self):
+        self.api_url = os.environ.get("SPIDER_URLS_API")
+        self.urls_batch = []
         self.file_number = 1
-        self.parent_file_path = Path(__file__).parent.parent.resolve()
-        self.base_path_name = str(self.parent_file_path / f"output/all-links-p{os.getpid()}.csv")
-        self.short_file = open(self.base_path_name, "a", encoding="utf-8")
-        self.max_file_size = 39000000 #3.9MB max
-        self.paginate = True
+        self.file_path = None
+        self.current_file = None
+
+        if not self.api_url:
+            output_dir = Path(__file__).parent.parent / "output"
+            output_dir.mkdir(parents=True, exist_ok=True)
+            base_filename = f"all-links-p{self.APP_PID}"
+            self.file_path = output_dir / f"{base_filename}.csv"
+            self.current_file = open(self.file_path, "a", encoding="utf-8")
+
+    def process_item(self, item, spider):
+        """Handle each item by writing to file or batching URLs for an API POST."""
+        url = item.get("url", "")
+        if not url:
+            raise DropItem("Missing URL in item")
+
+        if self.api_url:
+            self._process_api_item(url, spider)
+        else:
+            self._process_file_item(url)
 
-    def process_item(self, item, _spider):
-        """Checks that the file is not at max size.
-        Adds it to the file if less, or creates a new file if too large."""
-        line = item["url"]
-        self.current_file_size += 1
-        file_stats = os.stat(self.base_path_name)
-        self.current_file_size += file_stats.st_size
-        next_file_size = self.current_file_size + len(line)
-        if self.paginate and next_file_size > self.max_file_size:
-            self.short_file.close()
-            new_name = str(self.parent_file_path / f"output/all-links-p{os.getpid()}-{self.file_number}.csv")
-            os.rename(self.base_path_name, new_name)
-            self.file_number = self.file_number + 1
-            self.short_file = open(self.base_path_name, "w", encoding="utf-8")
-            self.current_file_size = 0
-        self.short_file.write(line)
-        self.short_file.write("\n")
-        self.current_file_size = self.current_file_size + len(line)
         return item
 
+    def _process_api_item(self, url, spider):
+        """Batch URLs for API and send POST if size limit is reached."""
+        self.urls_batch.append(url)
+        if self._batch_size() >= self.MAX_FILE_SIZE_BYTES:
+            self._send_post_request(spider)
+
+    def _process_file_item(self, url):
+        """Write URL to file and rotate the file if size exceeds the limit."""
+        self.current_file.write(f"{url}\n")
+        if self._file_size() >= self.MAX_FILE_SIZE_BYTES:
+            self._rotate_file()
+
+    def _batch_size(self):
+        """Calculate total size of the batched URLs."""
+        return sum(len(url.encode("utf-8")) for url in self.urls_batch)
+
+    def _file_size(self):
+        """Get the current file size."""
+        self.current_file.flush()  # Ensure the OS writes buffered data to disk
+        return self.file_path.stat().st_size
+
+    def _rotate_file(self):
+        """Close the current file, rename it, and open a new one."""
+        self.current_file.close()
+        rotated_file = self.file_path.with_name(f"{self.file_path.stem}-{self.file_number}.csv")
+        os.rename(self.file_path, rotated_file)
+        self.current_file = open(self.file_path, "a", encoding="utf-8")
+        self.file_number += 1
+
+    def _send_post_request(self, spider):
+        """Send a POST request with the batched URLs."""
+        if not self.urls_batch:
+            return
+
+        try:
+            response = requests.post(self.api_url, json={"urls": self.urls_batch})
+            response.raise_for_status()
+            spider.logger.info(f"Successfully posted {len(self.urls_batch)} URLs to {self.api_url}")
+        except requests.RequestException as e:
+            spider.logger.error(f"Failed to send URLs to {self.api_url}: {e}")
+            raise DropItem(f"POST request failed: {e}")
+        finally:
+            self.urls_batch.clear()
+
+    def close_spider(self, spider):
+        """Finalize operations: close files or send remaining batched URLs."""
+        if self.api_url:
+            self._send_post_request(spider)
+        elif self.current_file:
+            self.current_file.close()
+
 
 class DeDeuplicatorPipeline:
     """Class for pipeline that removes duplicate items"""
diff --git a/search_gov_crawler/search_gov_spiders/settings.py b/search_gov_crawler/search_gov_spiders/settings.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/spiders/__init__.py b/search_gov_crawler/search_gov_spiders/spiders/__init__.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/spiders/domain_spider.py b/search_gov_crawler/search_gov_spiders/spiders/domain_spider.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/spiders/domain_spider_js.py b/search_gov_crawler/search_gov_spiders/spiders/domain_spider_js.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/utility_files/README.md b/search_gov_crawler/search_gov_spiders/utility_files/README.md
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/utility_files/crawl-sites.json b/search_gov_crawler/search_gov_spiders/utility_files/crawl-sites.json
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/utility_files/import_plist.py b/search_gov_crawler/search_gov_spiders/utility_files/import_plist.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/utility_files/init_schedule.py b/search_gov_crawler/search_gov_spiders/utility_files/init_schedule.py
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/search_gov_spiders/utility_files/scrutiny-2023-06-20.plist b/search_gov_crawler/search_gov_spiders/utility_files/scrutiny-2023-06-20.plist
old mode 100644
new mode 100755
diff --git a/search_gov_crawler/setup.py b/search_gov_crawler/setup.py
old mode 100644
new mode 100755
diff --git a/setup.cfg b/setup.cfg
old mode 100644
new mode 100755
diff --git a/tests/__init__.py b/tests/__init__.py
old mode 100644
new mode 100755
diff --git a/tests/integration_tests/test_scrapyd.py b/tests/integration_tests/test_scrapyd.py
old mode 100644
new mode 100755
diff --git a/tests/search_gov_spiders/conftest.py b/tests/search_gov_spiders/conftest.py
old mode 100644
new mode 100755
diff --git a/tests/search_gov_spiders/crawl-sites-test.json b/tests/search_gov_spiders/crawl-sites-test.json
old mode 100644
new mode 100755
diff --git a/tests/search_gov_spiders/scrapy_httpcache/domain_spider.db.bak b/tests/search_gov_spiders/scrapy_httpcache/domain_spider.db.bak
old mode 100644
new mode 100755
diff --git a/tests/search_gov_spiders/scrapy_httpcache/domain_spider.db.dat b/tests/search_gov_spiders/scrapy_httpcache/domain_spider.db.dat
old mode 100644
new mode 100755
diff --git a/tests/search_gov_spiders/scrapy_httpcache/domain_spider.db.dir b/tests/search_gov_spiders/scrapy_httpcache/domain_spider.db.dir
old mode 100644
new mode 100755
diff --git a/tests/search_gov_spiders/scrapy_httpcache/domain_spider_js.db.bak b/tests/search_gov_spiders/scrapy_httpcache/domain_spider_js.db.bak
old mode 100644
new mode 100755
diff --git a/tests/search_gov_spiders/scrapy_httpcache/domain_spider_js.db.dat b/tests/search_gov_spiders/scrapy_httpcache/domain_spider_js.db.dat
old mode 100644
new mode 100755
diff --git a/tests/search_gov_spiders/scrapy_httpcache/domain_spider_js.db.dir b/tests/search_gov_spiders/scrapy_httpcache/domain_spider_js.db.dir
old mode 100644
new mode 100755
diff --git a/tests/search_gov_spiders/test_deduplicator_pipeline.py b/tests/search_gov_spiders/test_deduplicator_pipeline.py
new file mode 100644
index 0000000..55ee3e2
--- /dev/null
+++ b/tests/search_gov_spiders/test_deduplicator_pipeline.py
@@ -0,0 +1,146 @@
+import os
+import pytest
+from contextlib import suppress
+from unittest.mock import MagicMock, patch
+from scrapy.exceptions import DropItem
+from search_gov_crawler.search_gov_spiders.pipelines import (
+    SearchGovSpidersPipeline,
+    DeDeuplicatorPipeline,
+)
+from search_gov_crawler.search_gov_spiders.items import SearchGovSpidersItem
+# ---------------------------
+# Fixtures
+# ---------------------------
+
+@pytest.fixture
+def sample_item():
+    """Fixture for a valid sample item."""
+    return {"url": "http://example.com"}
+
+@pytest.fixture
+def invalid_item():
+    """Fixture for an invalid item with no URL."""
+    return {}
+
+@pytest.fixture
+def sample_spider():
+    """Fixture for a mock spider with a logger."""
+    class SpiderMock:
+        logger = MagicMock()
+    return SpiderMock()
+
+@pytest.fixture
+def pipeline_no_api():
+    """Fixture for SearchGovSpidersPipeline with no SPIDER_URLS_API."""
+    with patch.dict(os.environ, {}, clear=True):
+        return SearchGovSpidersPipeline()
+
+@pytest.fixture
+def pipeline_with_api():
+    """Fixture for SearchGovSpidersPipeline with SPIDER_URLS_API set."""
+    with patch.dict(os.environ, {"SPIDER_URLS_API": "http://mockapi.com"}):
+        return SearchGovSpidersPipeline()
+
+@pytest.fixture
+def deduplicator_pipeline():
+    """Fixture for DeDeuplicatorPipeline with clean state."""
+    return DeDeuplicatorPipeline()
+
+# ---------------------------
+# Tests for SearchGovSpidersPipeline
+# ---------------------------
+
+def test_missing_url_in_item(pipeline_no_api, sample_spider, invalid_item):
+    """
+    Verify DropItem exception is raised when an item has no URL.
+    """
+    with pytest.raises(DropItem, match="Missing URL in item"):
+        pipeline_no_api.process_item(invalid_item, sample_spider)
+
+# ---------------------------
+# Tests for DeDeuplicatorPipeline
+# ---------------------------
+
+@pytest.mark.parametrize(
+    "item",
+    [
+        {"url": "http://example.com/1"},
+        {"url": "http://example.com/2"},
+    ],
+)
+def test_deduplicator_pipeline_unique_items(deduplicator_pipeline, item):
+    """
+    Verify that unique items are processed successfully.
+    """
+    result = deduplicator_pipeline.process_item(item, None)
+    assert result == item
+
+
+def test_deduplicator_pipeline_duplicate_item(deduplicator_pipeline, sample_item):
+    """
+    Verify that duplicate items raise DropItem.
+    """
+    deduplicator_pipeline.process_item(sample_item, None)  # First time should pass
+
+    with pytest.raises(DropItem, match="Item already seen!"):
+        deduplicator_pipeline.process_item(sample_item, None)  # Duplicate raises DropItem
+
+
+def test_deduplicator_pipeline_multiple_items(deduplicator_pipeline):
+    """
+    Verify that multiple unique items are processed without errors.
+    """
+    item1 = {"url": "http://example.com/1"}
+    item2 = {"url": "http://example.com/2"}
+
+    result1 = deduplicator_pipeline.process_item(item1, None)
+    result2 = deduplicator_pipeline.process_item(item2, None)
+
+    assert result1 == item1
+    assert result2 == item2
+
+
+def test_deduplicator_pipeline_clean_state():
+    """
+    Verify that a new instance of DeDeuplicatorPipeline starts with a clean state.
+    """
+    pipeline1 = DeDeuplicatorPipeline()
+    pipeline2 = DeDeuplicatorPipeline()
+
+    item = {"url": "http://example.com/1"}
+
+    # First pipeline processes the item
+    result = pipeline1.process_item(item, None)
+    assert result == item
+
+    # Second pipeline should also process the same item as it has a clean state
+    result = pipeline2.process_item(item, None)
+    assert result == item
+
+@pytest.mark.parametrize(
+    ("items", "urls_seen_length"),
+    [
+        (
+            [
+                SearchGovSpidersItem(url="https://www.example.com/1"),
+                SearchGovSpidersItem(url="https://www.example.com/2"),
+            ],
+            2,
+        ),
+        (
+            [
+                SearchGovSpidersItem(url="https://www.example.com/1"),
+                SearchGovSpidersItem(url="https://www.example.com/1"),
+            ],
+            1,
+        ),
+    ],
+)
+def test_deduplicator_pipeline(items, urls_seen_length):
+    pl = DeDeuplicatorPipeline()
+
+    with suppress(DropItem):
+        for item in items:
+            pl.process_item(item, None)
+
+    assert len(pl.urls_seen) == urls_seen_length
diff --git a/tests/search_gov_spiders/test_extensions.py b/tests/search_gov_spiders/test_extensions.py
old mode 100644
new mode 100755
diff --git a/tests/search_gov_spiders/test_full_crawl.py b/tests/search_gov_spiders/test_full_crawl.py
old mode 100644
new mode 100755
index 8ce41a0..dac5305
--- a/tests/search_gov_spiders/test_full_crawl.py
+++ b/tests/search_gov_spiders/test_full_crawl.py
@@ -111,13 +111,12 @@ def test_full_crawl(mock_scrapy_settings, monkeypatch, spider, use_dedup, crawl_
         temp_dir.joinpath("output").mkdir(exist_ok=True)
 
         def mock_init(pipeline_cls, *_args, temp_dir=temp_dir, **_kwargs):
-            pipeline_cls.current_file_size = 0
+            pipeline_cls.api_url = None
             pipeline_cls.file_number = 1
             pipeline_cls.parent_file_path = temp_dir
-            pipeline_cls.base_path_name = str(pipeline_cls.parent_file_path / "output/all-links.csv")
-            pipeline_cls.short_file = open(pipeline_cls.base_path_name, "w", encoding="utf-8")
-            pipeline_cls.max_file_size = max_file_size
-            pipeline_cls.paginate = True
+            pipeline_cls.base_file_name = temp_dir / "output" / "all-links-p1234.csv"
+            pipeline_cls.file_path = pipeline_cls.base_file_name
+            pipeline_cls.current_file = open(pipeline_cls.file_path, "w", encoding="utf-8")
 
         monkeypatch.setattr(
             "search_gov_crawler.search_gov_spiders.pipelines.SearchGovSpidersPipeline.__init__", mock_init
@@ -132,7 +131,7 @@ def mock_init(pipeline_cls, *_args, temp_dir=temp_dir, **_kwargs):
         with open(output_file.name, encoding="UTF") as f:
             links = json.load(f)
 
-        split_files = list(temp_dir.glob("all-links*.csv"))
+        split_files = list(temp_dir.glob("all-links-p*.csv"))
 
         # verify total links match expected
         assert len(links) == expected_results
diff --git a/tests/search_gov_spiders/test_helpers.py b/tests/search_gov_spiders/test_helpers.py
old mode 100644
new mode 100755
diff --git a/tests/search_gov_spiders/test_middlewares.py b/tests/search_gov_spiders/test_middlewares.py
old mode 100644
new mode 100755
diff --git a/tests/search_gov_spiders/test_pipelines.py b/tests/search_gov_spiders/test_pipelines.py
deleted file mode 100644
index 0b85135..0000000
--- a/tests/search_gov_spiders/test_pipelines.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from contextlib import suppress
-
-import pytest
-from scrapy.exceptions import DropItem
-
-from search_gov_crawler.search_gov_spiders.items import SearchGovSpidersItem
-from search_gov_crawler.search_gov_spiders.pipelines import DeDeuplicatorPipeline
-
-
-@pytest.mark.parametrize(
-    ("items", "urls_seen_length"),
-    [
-        (
-            [
-                SearchGovSpidersItem(url="https://www.example.com/1"),
-                SearchGovSpidersItem(url="https://www.example.com/2"),
-            ],
-            2,
-        ),
-        (
-            [
-                SearchGovSpidersItem(url="https://www.example.com/1"),
-                SearchGovSpidersItem(url="https://www.example.com/1"),
-            ],
-            1,
-        ),
-    ],
-)
-def test_deduplicator_pipeline(items, urls_seen_length):
-    pl = DeDeuplicatorPipeline()
-
-    with suppress(DropItem):
-        for item in items:
-            pl.process_item(item, None)
-
-    assert len(pl.urls_seen) == urls_seen_length
diff --git a/tests/search_gov_spiders/test_scrapy_scheduler.py b/tests/search_gov_spiders/test_scrapy_scheduler.py
old mode 100644
new mode 100755
diff --git a/tests/search_gov_spiders/test_spider.py b/tests/search_gov_spiders/test_spider.py
old mode 100644
new mode 100755
diff --git a/tests/search_gov_spiders/test_urls_files_size.py b/tests/search_gov_spiders/test_urls_files_size.py
new file mode 100755
index 0000000..319b547
--- /dev/null
+++ b/tests/search_gov_spiders/test_urls_files_size.py
@@ -0,0 +1,113 @@
+import os
+
+import pytest
+from scrapy import Spider
+from scrapy.utils.test import get_crawler
+
+from search_gov_crawler.search_gov_spiders.items import SearchGovSpidersItem
+from search_gov_crawler.search_gov_spiders.pipelines import SearchGovSpidersPipeline
+
+
+@pytest.fixture(name="sample_spider")
+def fixture_sample_spider():
+    crawler = get_crawler(Spider)
+    return crawler._create_spider(
+        name="urls_test", allowed_domains="example.com", allowed_domain_paths="https://www.example.com"
+    )
+
+
+@pytest.fixture(name="sample_item")
+def fixture_sample_item() -> SearchGovSpidersItem:
+    """Fixture for a sample item with a URL."""
+    item = SearchGovSpidersItem()
+    item["url"] = "http://example.com"
+    return item
+
+
+@pytest.fixture(name="mock_open")
+def fixture_mock_open(mocker):
+    return mocker.patch("builtins.open", mocker.mock_open())
+
+
+@pytest.fixture(name="pipeline_no_api")
+def fixture_pipeline_no_api(mock_open, mocker) -> SearchGovSpidersPipeline:
+    mocker.patch.dict(os.environ, {})
+    mocker.patch("search_gov_crawler.search_gov_spiders.pipelines.SearchGovSpidersPipeline.APP_PID", 1234)
+    return SearchGovSpidersPipeline()
+
+
+@pytest.fixture(name="pipeline_with_api")
+def fixture_pipeline_with_api(mocker) -> SearchGovSpidersPipeline:
+    """Fixture for pipeline with an API URL set."""
+    mocker.patch.dict(os.environ, {"SPIDER_URLS_API": "http://mockapi.com"})
+    mocker.patch("search_gov_crawler.search_gov_spiders.pipelines.SearchGovSpidersPipeline.APP_PID", 1234)
+
+    return SearchGovSpidersPipeline()
+
+
+def test_write_to_file(pipeline_no_api, mock_open, sample_item, sample_spider, mocker):
+    """Test that URLs are written to files when SPIDER_URLS_API is not set."""
+    mocker.patch.object(SearchGovSpidersPipeline, "_file_size", return_value=100)
+    pipeline_no_api.process_item(sample_item, sample_spider)
+
+    # Ensure file is opened and written to
+    mock_open.assert_called_once_with(pipeline_no_api.file_path, "a", encoding="utf-8")
+    mock_open().write.assert_any_call(sample_item["url"] + "\n")
+
+
+def test_post_to_api(pipeline_with_api, sample_item, sample_spider, mocker):
+    """Test that URLs are batched and sent via POST when SPIDER_URLS_API is set."""
+    mock_post = mocker.patch("requests.post")
+
+    pipeline_with_api.process_item(sample_item, sample_spider)
+
+    # Check that the batch contains the URL
+    assert sample_item["url"] in pipeline_with_api.urls_batch
+
+    # Simulate max size to force post
+    mocker.patch.object(
+        SearchGovSpidersPipeline,
+        "_batch_size",
+        return_value=SearchGovSpidersPipeline.MAX_FILE_SIZE_BYTES,
+    )
+    pipeline_with_api.process_item(sample_item, sample_spider)
+
+    # Ensure POST request was made
+    mock_post.assert_called_once_with("http://mockapi.com", json={"urls": pipeline_with_api.urls_batch})
+
+
+def test_rotate_file(pipeline_no_api, mock_open, sample_item, mocker):
+    """Test that file rotation occurs when max size is exceeded."""
+    mock_rename = mocker.patch("os.rename")
+    mocker.patch.object(
+        SearchGovSpidersPipeline,
+        "_file_size",
+        return_value=SearchGovSpidersPipeline.MAX_FILE_SIZE_BYTES,
+    )
+    pipeline_no_api.process_item(sample_item, None)
+
+    # Check if the file was rotated
+    mock_open.assert_called_with(pipeline_no_api.file_path, "a", encoding="utf-8")
+    mock_open().close.assert_called()
+    mock_rename.assert_called_once()
+
+
+def test_post_urls_on_spider_close(pipeline_with_api, sample_spider, mocker):
+    """Test that remaining URLs are posted when spider closes and SPIDER_URLS_API is set."""
+    mock_post = mocker.patch("requests.post")
+
+    pipeline_with_api.urls_batch = ["http://example.com"]
+
+    pipeline_with_api.close_spider(sample_spider)
+
+    # Ensure POST request was made on spider close, cannot verify json once urls_batch is cleared
+    mock_post.assert_called_once_with("http://mockapi.com", json=mocker.ANY)
+
+
+def test_close_file_on_spider_close(pipeline_no_api, mock_open):
+    """Test that the file is closed when the spider closes and no SPIDER_URLS_API is set."""
+
+    pipeline_no_api.close_spider(None)
+
+    # Ensure the file is closed
+    mock_open().close.assert_called_once()
diff --git a/tests/search_gov_spiders/test_utiliity_files.py b/tests/search_gov_spiders/test_utiliity_files.py
old mode 100644
new mode 100755