diff --git a/cdc_vaccines/.pylintrc b/cdc_vaccines/.pylintrc new file mode 100644 index 000000000..f30837c7e --- /dev/null +++ b/cdc_vaccines/.pylintrc @@ -0,0 +1,22 @@ + +[MESSAGES CONTROL] + +disable=logging-format-interpolation, + too-many-locals, + too-many-arguments, + # Allow pytest functions to be part of a class. + no-self-use, + # Allow pytest classes to have one test. + too-few-public-methods + +[BASIC] + +# Allow arbitrarily short-named variables. +variable-rgx=[a-z_][a-z0-9_]* +argument-rgx=[a-z_][a-z0-9_]* +attr-rgx=[a-z_][a-z0-9_]* + +[DESIGN] + +# Don't complain about pytest "unused" arguments. +ignored-argument-names=(_.*|run_as_module) \ No newline at end of file diff --git a/cdc_vaccines/Makefile b/cdc_vaccines/Makefile new file mode 100644 index 000000000..5d1101c53 --- /dev/null +++ b/cdc_vaccines/Makefile @@ -0,0 +1,29 @@ +.PHONY = venv, lint, test, clean + +dir = $(shell find ./delphi_* -name __init__.py | grep -o 'delphi_[_[:alnum:]]*') + +venv: + python3.8 -m venv env + +install: venv + . env/bin/activate; \ + pip install wheel ; \ + pip install -e ../_delphi_utils_python ;\ + pip install -e . + +lint: + . env/bin/activate; pylint $(dir) + . env/bin/activate; pydocstyle $(dir) + +test: + . env/bin/activate ;\ + (cd tests && ../env/bin/pytest --cov=$(dir) --cov-report=term-missing) + +clean: + rm -rf env + rm -f params.json + +run: + env/bin/python -m $(dir) + env/bin/python -m delphi_utils.validator --dry_run + env/bin/python -m delphi_utils.archive \ No newline at end of file diff --git a/cdc_vaccines/README.md b/cdc_vaccines/README.md new file mode 100644 index 000000000..792f9070c --- /dev/null +++ b/cdc_vaccines/README.md @@ -0,0 +1,69 @@ +# CDC Vaccinations + +This indicator provides the official vaccination counts in the US. We export the county-level +daily vaccination rates data as-is, and publish the result as a COVIDcast signal. +We also aggregate the data to the MSA, HRR, State, HHS Region, and Nation levels. +For detailed information see the files DETAILS.md contained in this directory. + +Note that individuals could be vaccinated outside of the US. Additionally, +there is no county level data for counties in Texas and Hawaii. Each state has some vaccination counts assigned to "unknown county". Some vaccination counts are assigned to "unknown state, unknown county". + + +## Running the Indicator + +The indicator is run by directly executing the Python module contained in this +directory. The safest way to do this is to create a virtual environment, +installed the common DELPHI tools, and then install the module and its +dependencies. To do this, run the following command from this directory: + +``` +make install +``` + +This command will install the package in editable mode, so you can make changes that +will automatically propagate to the installed package. + +All of the user-changable parameters are stored in `params.json`. To execute +the module and produce the output datasets (by default, in `receiving`), run +the following: + +``` +env/bin/python -m delphi_cdc_vaccines +``` + +If you want to enter the virtual environment in your shell, +you can run `source env/bin/activate`. Run `deactivate` to leave the virtual environment. + +Once you are finished, you can remove the virtual environment and +params file with the following: + +``` +make clean +``` + +## Testing the code + +To run static tests of the code style, run the following command: + +``` +make lint +``` + +Unit tests are also included in the module. To execute these, run the following +command from this directory: + +``` +make test +``` + +To run individual tests, run the following: + +``` +(cd tests && ../env/bin/pytest test_run.py --cov=delphi_ --cov-report=term-missing) +``` + +The output will show the number of unit tests that passed and failed, along +with the percentage of code covered by the tests. + +None of the linting or unit tests should fail, and the code lines that are not covered by unit tests should be small and +should not include critical sub-routines. diff --git a/cdc_vaccines/REVIEW.md b/cdc_vaccines/REVIEW.md new file mode 100644 index 000000000..03f87b17a --- /dev/null +++ b/cdc_vaccines/REVIEW.md @@ -0,0 +1,38 @@ +## Code Review (Python) + +A code review of this module should include a careful look at the code and the +output. To assist in the process, but certainly not in replace of it, please +check the following items. + +**Documentation** + +- [ ] the README.md file template is filled out and currently accurate; it is +possible to load and test the code using only the instructions given +- [ ] minimal docstrings (one line describing what the function does) are +included for all functions; full docstrings describing the inputs and expected +outputs should be given for non-trivial functions + +**Structure** + +- [ ] code should pass lint checks (`make lint`) +- [ ] any required metadata files are checked into the repository and placed +within the directory `static` +- [ ] any intermediate files that are created and stored by the module should +be placed in the directory `cache` +- [ ] final expected output files to be uploaded to the API are placed in the +`receiving` directory; output files should not be committed to the respository +- [ ] all options and API keys are passed through the file `params.json` +- [ ] template parameter file (`params.json.template`) is checked into the +code; no personal (i.e., usernames) or private (i.e., API keys) information is +included in this template file + +**Testing** + +- [ ] module can be installed in a new virtual environment (`make install`) +- [ ] reasonably high level of unit test coverage covering all of the main logic +of the code (e.g., missing coverage for raised errors that do not currently seem +possible to reach are okay; missing coverage for options that will be needed are +not) +- [ ] all unit tests run without errors (`make test`) +- [ ] indicator directory has been added to GitHub CI +(`covidcast-indicators/.github/workflows/python-ci.yml`) diff --git a/cdc_vaccines/cache/.gitignore b/cdc_vaccines/cache/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/cdc_vaccines/delphi_cdc_vaccines/__init__.py b/cdc_vaccines/delphi_cdc_vaccines/__init__.py new file mode 100644 index 000000000..6813595b4 --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +"""Module to pull and clean indicators from the CDC source. + +This file defines the functions that are made public by the module. As the +module is intended to be executed though the main method, these are primarily +for testing. +""" + +from __future__ import absolute_import +from . import pull +from . import run + +__version__ = "0.1.0" diff --git a/cdc_vaccines/delphi_cdc_vaccines/__main__.py b/cdc_vaccines/delphi_cdc_vaccines/__main__.py new file mode 100644 index 000000000..0aa3f6ac1 --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/__main__.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +"""Call the function run_module when executed. + +This file indicates that calling the module (`python -m delphi_cdc_vaccines`) will +call the function `run_module` found within the run.py file. There should be +no need to change this template. +""" + +from delphi_utils import read_params +from .run import run_module # pragma: no cover + +run_module(read_params()) # pragma: no cover diff --git a/cdc_vaccines/delphi_cdc_vaccines/constants.py b/cdc_vaccines/delphi_cdc_vaccines/constants.py new file mode 100644 index 000000000..b8ff83d06 --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/constants.py @@ -0,0 +1,33 @@ +"""Registry for variations.""" + +from itertools import product +from delphi_utils import Smoother + + +CUMULATIVE = 'cumulative' +INCIDENCE ='incidence' +FREQUENCY = [CUMULATIVE, INCIDENCE] +STATUS = ["tot", "part"] +AGE = ["", "_12P", "_18P", "_65P"] + +SIGNALS = [f"{frequency}_counts_{status}_vaccine{AGE}" for + frequency, status, age in product(FREQUENCY, STATUS, AGE)] +DIFFERENCE_MAPPING = { + f"{INCIDENCE}_counts_{status}_vaccine{age}": f"{CUMULATIVE}_counts_{status}_vaccine{age}" + for status, age in product(STATUS, AGE) +} +SIGNALS = list(DIFFERENCE_MAPPING.keys()) + list(DIFFERENCE_MAPPING.values()) + + +GEOS = [ + "nation", + "state", + "hrr", + "hhs", + "msa" +] + +SMOOTHERS = [ + (Smoother("identity", impute_method=None), ""), + (Smoother("moving_average", window_length=7), "_7dav"), +] diff --git a/cdc_vaccines/delphi_cdc_vaccines/pull.py b/cdc_vaccines/delphi_cdc_vaccines/pull.py new file mode 100644 index 000000000..59475c678 --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/pull.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- +"""Functions for pulling data from the CDC data website for vaccines.""" +import hashlib +from logging import Logger +from delphi_utils.geomap import GeoMapper +import numpy as np +import pandas as pd +from .constants import SIGNALS, DIFFERENCE_MAPPING + + + +def pull_cdcvacc_data(base_url: str, export_start_date: str, + export_end_date: str, logger: Logger) -> pd.DataFrame: + """Pull the latest data from the CDC on vaccines and conform it into a dataset. + + The output dataset has: + - Each row corresponds to (County, Date), denoted (FIPS, timestamp) + - Each row additionally has columns that correspond to the counts or + cumulative counts of vaccination status (fully vaccinated, + partially vaccinated) of various age groups (all, 12+, 18+, 65+) + from December 13th 2020 until the latest date + + Note that the raw dataset gives the `cumulative` metrics, from which + we compute `counts` by taking first differences. Hence, `counts` + may be negative. This is wholly dependent on the quality of the raw + dataset. + + We filter the data such that we only keep rows with valid FIPS, or "FIPS" + codes defined under the exceptions of the README. The current exceptions + include: + # - 0: statewise unallocated + Parameters + ---------- + base_url: str + Base URL for pulling the CDC Vaccination Data + export_start_date: str + The start date for the csv file (can be empty) + export_end_date: + The end date for the csv file (can be empty) + logger: Logger + Returns + ------- + pd.DataFrame + Dataframe as described above. + """ + # Columns to drop the the data frame. + drop_columns = [ + "date", + "recip_state", + "series_complete_pop_pct", + "mmwr_week", + "recip_county", + "state_id" + ] + + # Read data and cut off by designated start date + df = pd.read_csv(base_url) + df['Date']=pd.to_datetime(df['Date']) + try: + export_start_date = pd.to_datetime(0) if (pd.to_datetime(export_start_date) + is pd.NaT) else pd.to_datetime(export_start_date) + export_end_date = pd.Timestamp.max if (pd.to_datetime(export_end_date) + is pd.NaT) else pd.to_datetime(export_end_date) + except KeyError as e: + raise ValueError( + "Tried to convert export_start/end_date param " + "to datetime but failed. Please " + "check this input." + ) from e + try: + df = df.query('@export_start_date <= Date') + df = df.query('Date <= @export_end_date') + except KeyError as e: + raise ValueError( + "Used export_start/end_date param " + "to filter dataframe but failed. Please " + "check this input." + ) from e + if df['Date'].shape[0] == 0: + raise ValueError( + "Output df has no rows. Please check " + "if export_start_date is later than " + "export_end_date. Else check if base_url" + " still functional." + ) + + logger.info("data retrieved from source", + num_rows=df.shape[0], + num_cols=df.shape[1], + min_date=min(df['Date']), + max_date=max(df['Date']), + checksum=hashlib.sha256(pd.util.hash_pandas_object(df).values).hexdigest()) + df.columns = [i.lower() for i in df.columns] + + df.loc[:,'recip_state'] = df['recip_state'].str.lower().copy() + + drop_columns = list(set(drop_columns + [x for x in df.columns if + ("pct" in x) | ("svi" in x)] + list(df.columns[22:]))) + df = GeoMapper().add_geocode(df, "state_id", "state_code", + from_col="recip_state", new_col="state_id", dropna=False) + df['state_id'] = df['state_id'].fillna('0').astype(int) + # Change FIPS from 0 to XX000 for statewise unallocated cases/deaths + unassigned_index = (df["fips"] == "UNK") + df.loc[unassigned_index, "fips"] = df["state_id"].loc[unassigned_index].values * 1000 + + # Conform FIPS + df["fips"] = df["fips"].apply(lambda x: f"{int(x):05d}") + df["timestamp"] = pd.to_datetime(df["date"]) + # Drop unnecessary columns (state is pre-encoded in fips) + try: + df.drop(drop_columns, axis=1, inplace=True) + except KeyError as e: + raise ValueError( + "Tried to drop non-existent columns. The dataset " + "schema may have changed. Please investigate and " + "amend drop_columns." + ) from e + + # timestamp: str -> datetime + try: + df.columns = ["fips", + "cumulative_counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + "timestamp"] + except KeyError as e: + raise ValueError( + "Tried to name wrong number of columns. The dataset " + "schema may have changed. Please investigate and " + "amend drop_columns." + ) from e + + min_time = min(df["timestamp"]) + df_dummy = df.loc[(df["timestamp"] == min_time)].copy() + df_dummy.loc[:, "timestamp"] = min_time - pd.Timedelta(days=1) + df_dummy.loc[:, ["cumulative_counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + ]] = 0 + + df = pd.concat([df_dummy, df]) + df = df.set_index(["fips", "timestamp"]) + for to, from_d in DIFFERENCE_MAPPING.items(): + df[to] = df.groupby(level=0)[from_d].diff() + idx = pd.IndexSlice + df.loc[idx[:, min_time - pd.Timedelta(days=1)], ["cumulative_counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + ]] = np.nan + df.reset_index(inplace=True) + + # Final sanity checks + unique_days = df["timestamp"].unique() + n_days = (max(unique_days) - min(unique_days)) / np.timedelta64(1, "D") + 1 + if n_days != len(unique_days): + raise ValueError( + f"Not every day between {min(unique_days)} and " + "{max(unique_days)} is represented." + ) + return df.loc[ + df["timestamp"] >= min(df["timestamp"]), + # Reorder + ["fips", "timestamp"] + SIGNALS, + ].reset_index(drop=True) diff --git a/cdc_vaccines/delphi_cdc_vaccines/run.py b/cdc_vaccines/delphi_cdc_vaccines/run.py new file mode 100644 index 000000000..4475de6d8 --- /dev/null +++ b/cdc_vaccines/delphi_cdc_vaccines/run.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +"""Functions to call when running the function. + +This module should contain a function called `run_module`, that is executed +when the module is run with `python -m delphi_cdc_vaccines`. +`run_module`'s lone argument should be a nested dictionary of +parameters loaded from the params.json file. +We expect the `params` to have the following structure: + - "common": + - "export_dir": str, directory to which the results are exported + - "log_filename": (optional) str, path to log file + - "indicator": (optional) + - "wip_signal": (optional) Any[str, bool], list of signals that are works in progress, or + True if all signals in the registry are works in progress, or False if only + unpublished signals are. See `delphi_utils.add_prefix()` + - Any other indicator-specific settings +""" +from datetime import timedelta, datetime +from itertools import product +import time as tm +import os + +from pandas import DataFrame + +from delphi_utils.export import create_export_csv +from delphi_utils.geomap import GeoMapper +from delphi_utils import get_structured_logger +from delphi_utils.nancodes import Nans +from .constants import GEOS, SIGNALS, SMOOTHERS +from .pull import pull_cdcvacc_data + + +def add_nancodes(df: DataFrame) -> DataFrame: + """ + Provide default nancodes for a non-survey indicator. + + Arguments + -------- + params: DataFrame + """ + df["missing_val"] = Nans.NOT_MISSING + df["missing_se"] = Nans.NOT_APPLICABLE + df["missing_sample_size"] = Nans.NOT_APPLICABLE + + # Mark an values found null to the catch-all category + remaining_nans_mask = df["val"].isnull() & df["missing_val"].eq(Nans.NOT_MISSING) + df.loc[remaining_nans_mask, "missing_val"] = Nans.OTHER + + return df + +def run_module(params): + """ + Run the indicator. + + Arguments + -------- + params: Dict[str, Any] + Nested dictionary of parameters. + """ + start_time = tm.time() + logger = get_structured_logger( + __name__, filename=params["common"].get("log_filename"), + log_exceptions=params["common"].get("log_exceptions", True)) + base_url = params["indicator"]["base_url"] + export_start_date = params["indicator"]["export_start_date"] + export_end_date = params["indicator"]["export_end_date"] + ## build the base version of the signal at the most detailed geo level you can get. + all_data = pull_cdcvacc_data(base_url, export_start_date, export_end_date, logger) + run_stats = [] + ## aggregate & smooth + + + if not os.path.exists(params["common"]["export_dir"]): + os.makedirs(params["common"]["export_dir"]) + + for (sensor, smoother, geo) in product(SIGNALS, SMOOTHERS, GEOS): + + logger.info("Running on ", + sensor=sensor, + smoother=smoother, + geo=geo) + geo_map = geo + if geo=='state': + geo_map='state_code' + + df = GeoMapper().replace_geocode( + all_data[['timestamp','fips', sensor]], + from_col='fips', + from_code="fips", + new_col="geo_id", + new_code=geo_map, + date_col="timestamp") + df["val"] = df[["geo_id", sensor]].groupby("geo_id")[sensor].transform( + smoother[0].smooth + ) + df["se"] = None + df["sample_size"] = None + df = add_nancodes(df) + sensor_name = sensor + smoother[1] + if not (("cumulative" in sensor_name) and ("7dav" in sensor_name)): + # don't export first 6 days for smoothed signals since they'll be nan. + start_date = min(df.timestamp) + timedelta(6) if smoother[1] else min(df.timestamp) + exported_csv_dates = create_export_csv( + df, + params["common"]["export_dir"], + geo, + sensor_name, + start_date=start_date) + if len(exported_csv_dates) > 0: + run_stats.append((max(exported_csv_dates), len(exported_csv_dates))) + ## log this indicator run + elapsed_time_in_seconds = round(tm.time() - start_time, 2) + min_max_date = run_stats and min(s[0] for s in run_stats) + csv_export_count = sum(s[-1] for s in run_stats) + max_lag_in_days = min_max_date and (datetime.now() - min_max_date).days + formatted_min_max_date = min_max_date and min_max_date.strftime("%Y-%m-%d") + logger.info("Completed indicator run", + elapsed_time_in_seconds = elapsed_time_in_seconds, + csv_export_count = csv_export_count, + max_lag_in_days = max_lag_in_days, + oldest_final_export_date = formatted_min_max_date) diff --git a/cdc_vaccines/params.json.template b/cdc_vaccines/params.json.template new file mode 100644 index 000000000..208f57f91 --- /dev/null +++ b/cdc_vaccines/params.json.template @@ -0,0 +1,27 @@ +{ + "common": { + "export_dir": "./receiving", + "log_filename": "cdc_vaccines.log" + }, + "indicator": { + "base_url": "https://data.cdc.gov/api/views/8xkx-amqh/rows.csv", + "export_start_date": "2020-12-13", + "export_end_date": "2020-12-15" + }, + "validation": { + "common": { + "data_source": "cdc", + "span_length": 14, + "min_expected_lag": {"all": "1"}, + "max_expected_lag": {"all": "7"}, + "dry_run": true, + "suppressed_errors": [] + }, + "static": { + "minimum_sample_size": 0, + "missing_se_allowed": true, + "missing_sample_size_allowed": true + }, + "dynamic": {} + } +} diff --git a/cdc_vaccines/setup.py b/cdc_vaccines/setup.py new file mode 100644 index 000000000..8802dfd45 --- /dev/null +++ b/cdc_vaccines/setup.py @@ -0,0 +1,29 @@ +from setuptools import setup +from setuptools import find_packages + +required = [ + "numpy", + "pandas", + "pydocstyle", + "pytest", + "pytest-cov", + "pylint==2.8.3", + "delphi-utils", + "covidcast" +] + +setup( + name="delphi_cdc_vaccines", + version="0.0.1", + description="The number of people who are vaccinated per county.", + author="Ananya Joshi", + author_email="aajoshi@andrew.cmu.edu", + url="https://github.com/cmu-delphi/covidcast-indicators", + install_requires=required, + classifiers=[ + "Development Status :: 0 - Attempt", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3.8", + ], + packages=find_packages(), +) diff --git a/cdc_vaccines/static/.gitignore b/cdc_vaccines/static/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/cdc_vaccines/tests/test_data/bad_extra_cols.csv b/cdc_vaccines/tests/test_data/bad_extra_cols.csv new file mode 100644 index 000000000..6642296a1 --- /dev/null +++ b/cdc_vaccines/tests/test_data/bad_extra_cols.csv @@ -0,0 +1,8 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI,Extra_Administered_Dose1_Recip_12PlusPop_Pct,Extra_Administered_Dose1_Recip_18Plus,Extra_Administered_Dose1_Recip_18PlusPop_Pct,Extra_Administered_Dose1_Recip_65Plus,Extra_Administered_Dose1_Recip_65PlusPop_Pct,Extra_SVI_CTGY +8/26/21,UNK,34,Unknown County,UNK,0,789625,789591,0,733809,0,55620,0,0,1119266,0,1119203,0,1035082,0,75596,0,,,,,,0,1035082,0,75596,0, +8/26/21,32013,34,Humboldt County,NV,32.9,5537,5535,40.2,5368,43.6,1696,69.9,94.9,6293,37.4,6290,45.6,6014,48.9,1877,77.3,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,45.6,6014,48.9,1877,77.3,Mod-High +8/26/21,47131,34,Obion County,TN,28.4,8529,8529,33.1,8412,35.7,4114,66,97.8,10758,35.8,10755,41.7,10520,44.6,4625,74.2,High,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI,Mod-High VC/High SVI,41.7,10520,44.6,4625,74.2,High +8/26/21,48305,34,Lynn County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,,,0,0,0,0,0,High +8/26/21,UNK,34,Unknown County,VA,0,2358403,2352494,0,2206696,0,603704,0,51.3,2705300,0,2696267,0,2516857,0,660454,0,,,,,,0,2516857,0,660454,0, +8/26/21,51678,34,Lexington city,VA,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Low-Mod,,,,,0,0,0,0,0,Low-Mod +8/26/21,50025,34,Windham County,VT,56.8,23963,23963,64,22620,65.1,7718,76.5,73.7,27598,65.4,27588,73.7,25830,74.3,8588,85.1,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,73.7,25830,74.3,8588,85.1,Low-Mod \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/bad_missing_cols.csv b/cdc_vaccines/tests/test_data/bad_missing_cols.csv new file mode 100644 index 000000000..4efd5c1bd --- /dev/null +++ b/cdc_vaccines/tests/test_data/bad_missing_cols.csv @@ -0,0 +1,8 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_65PlusPop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus +8/26/21,UNK,34,Unknown County,UNK,0,0,1119203,0,1035082,0,75596 +8/26/21,32013,34,Humboldt County,NV,32.9,69.9,6290,45.6,6014,48.9,1877 +8/26/21,47131,34,Obion County,TN,28.4,66,10755,41.7,10520,44.6,4625 +8/26/21,48305,34,Lynn County,TX,0,0,0,0,0,0,0 +8/26/21,UNK,34,Unknown County,VA,0,0,2696267,0,2516857,0,660454 +8/26/21,51678,34,Lexington city,VA,0,0,0,0,0,0,0 +8/26/21,50025,34,Windham County,VT,56.8,76.5,27588,73.7,25830,74.3,8588 \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/bad_missing_days.csv b/cdc_vaccines/tests/test_data/bad_missing_days.csv new file mode 100644 index 000000000..75a15510c --- /dev/null +++ b/cdc_vaccines/tests/test_data/bad_missing_days.csv @@ -0,0 +1,31 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI +8/25/21,26007,34,Alpena County,MI,44.8,12730,12730,51,12344,53.5,5137,75.8,93.8,13426,47.3,13425,53.8,13293,57.6,5593,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/25/21,30071,34,Phillips County,MT,33.8,1337,1337,40.1,1295,43,618,67.5,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/25/21,41029,34,Jackson County,OR,46,101653,101502,53.2,97081,55.2,37628,75.5,97.7,115107,52.1,114881,60.2,109038,62,40450,81.1,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/25/21,UNK,34,Unknown County,UNK,0,787349,787315,0,731890,0,55518,0,0,1116357,0,1116294,0,1032597,0,75307,0,,,,, +8/25/21,49041,34,Sevier County,UT,30.7,6630,6630,37.5,6350,41.3,2602,72.9,97.2,8092,37.4,8089,45.8,7606,49.5,2928,82.1,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/25/21,26013,34,Baraga County,MI,48.8,4002,4001,54.4,3927,57.7,1440,77.1,93.8,4225,51.5,4224,57.5,4156,61,1481,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/24/21,30071,34,Phillips County,MT,33.8,1337,1337,40.1,1295,43,618,67.5,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/24/21,26007,34,Alpena County,MI,44.8,12719,12719,50.9,12338,53.4,5137,75.8,93.8,13422,47.3,13421,53.8,13289,57.6,5593,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/24/21,26013,34,Baraga County,MI,48.8,4002,4001,54.4,3927,57.7,1440,77.1,93.8,4225,51.5,4224,57.5,4156,61,1481,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/24/21,41029,34,Jackson County,OR,46,101651,101500,53.2,97079,55.2,37627,75.5,97.7,115101,52.1,114875,60.2,109032,62,40448,81.1,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/24/21,49041,34,Sevier County,UT,30.6,6607,6607,37.4,6328,41.2,2597,72.8,97.2,8050,37.2,8047,45.6,7567,49.2,2921,81.9,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/24/21,UNK,34,Unknown County,UNK,0,785133,785100,0,730087,0,55397,0,0,1113646,0,1113584,0,1030330,0,75003,0,,,,, +8/23/21,41029,34,Jackson County,OR,45.9,101501,101350,53.1,96953,55.1,37621,75.5,97.7,114861,52,114635,60,108835,61.9,40432,81.1,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/23/21,49041,34,Sevier County,UT,30.3,6557,6557,37.1,6295,41,2586,72.5,97.2,7920,36.6,7918,44.8,7478,48.7,2913,81.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/23/21,26007,34,Alpena County,MI,44.8,12714,12714,50.9,12334,53.4,5137,75.8,93.8,13422,47.3,13421,53.8,13289,57.6,5593,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/23/21,30071,34,Phillips County,MT,33.8,1336,1336,40.1,1294,42.9,617,67.4,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/23/21,26013,34,Baraga County,MI,48.7,4001,4000,54.4,3926,57.6,1440,77.1,93.8,4225,51.5,4224,57.5,4156,61,1481,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/23/21,UNK,34,Unknown County,UNK,0,783092,783059,0,728591,0,55262,0,0,1111277,0,1111215,0,1028562,0,74699,0,,,,, +8/21/21,49041,33,Sevier County,UT,30.3,6557,6557,37.1,6295,41,2586,72.5,97.2,7919,36.6,7917,44.8,7477,48.7,2913,81.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/21/21,26013,33,Baraga County,MI,48.7,3997,3996,54.4,3922,57.6,1438,77,93.8,4224,51.5,4223,57.5,4155,61,1480,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/21/21,30071,33,Phillips County,MT,33.8,1335,1335,40.1,1293,42.9,617,67.4,95.6,1513,38.3,1511,45.4,1450,48.1,665,72.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/21/21,41029,33,Jackson County,OR,45.7,101069,100918,52.9,96575,54.9,37554,75.3,97.7,114032,51.6,113806,59.6,108142,61.5,40324,80.9,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/21/21,26007,33,Alpena County,MI,44.7,12697,12697,50.9,12320,53.4,5133,75.7,93.8,13418,47.2,13417,53.7,13285,57.5,5592,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/21/21,UNK,33,Unknown County,UNK,0,777853,777820,0,724853,0,55045,0,0,1103847,0,1103785,0,1023256,0,74303,0,,,,, +8/19/21,26013,33,Baraga County,MI,48.6,3991,3990,54.3,3916,57.5,1437,77,93.8,4220,51.4,4219,57.4,4151,60.9,1480,79.3,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/19/21,41029,33,Jackson County,OR,45.6,100654,100503,52.6,96215,54.7,37479,75.2,97.7,113175,51.2,112949,59.2,107380,61.1,40201,80.6,Mod-High,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/19/21,UNK,33,Unknown County,UNK,0,772959,772926,0,720903,0,54789,0,0,1096323,0,1096261,0,1017125,0,73976,0,,,,, +8/19/21,30071,33,Phillips County,MT,33.5,1326,1326,39.8,1284,42.6,615,67.1,95.6,1501,38,1499,45,1438,47.7,664,72.5,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/19/21,49041,33,Sevier County,UT,30.2,6526,6526,37,6267,40.8,2576,72.2,97.1,7827,36.2,7825,44.3,7395,48.1,2904,81.4,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/19/21,26007,33,Alpena County,MI,44.6,12662,12662,50.7,12296,53.3,5130,75.7,93.8,13406,47.2,13405,53.7,13276,57.5,5591,82.5,Low-Mod,Mod-High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/small.csv b/cdc_vaccines/tests/test_data/small.csv new file mode 100644 index 000000000..d4243a035 --- /dev/null +++ b/cdc_vaccines/tests/test_data/small.csv @@ -0,0 +1,109 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI +8/19/21,72121,33,Sabana Grande Municipio,PR,66.1,14360,14360,74.2,13402,75.2,4236,99.9,96,16305,75.1,16305,84.2,15052,84.5,4587,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/19/21,72129,33,San Lorenzo Municipio,PR,56.9,20489,20481,63.8,18994,64.3,4771,78,96,24048,66.8,24031,74.9,22111,74.9,5805,94.9,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/19/21,55031,33,Douglas County,WI,35.2,15169,15169,40.3,14557,41.9,5178,62.5,97,18005,41.7,18005,47.9,17155,49.4,5778,69.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/19/21,47067,33,Hancock County,TN,24.7,1634,1634,28.6,1613,30.8,676,47,97.8,1818,27.5,1818,31.8,1789,34.2,726,50.5,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/19/21,UNK,33,Unknown County,UNK,0,772959,772926,0,720903,0,54789,0,0,1096323,0,1096261,0,1017125,0,73976,0,,,,, +8/19/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/19/21,49031,33,Piute County,UT,29.7,439,439,33.7,435,38.7,244,58.7,97.1,527,35.6,527,40.5,518,46.1,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/19/21,1043,33,Cullman County,AL,27.7,23211,23210,32.5,22877,35.2,9906,63,91.9,29475,35.2,29470,41.3,28641,44.1,11151,70.9,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/19/21,UNK,33,Unknown County,TN,0,59660,59642,0,58874,0,15125,0,97.8,74682,0,74626,0,72349,0,17334,0,,,,, +8/19/21,18109,33,Morgan County,IN,44.5,31398,31397,51.9,30057,55,10392,85.2,98.6,33772,47.9,33771,55.8,32057,58.6,10663,87.4,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/19/21,56009,33,Converse County,WY,25.1,3466,3466,29.9,3397,32.5,1436,60.4,96.4,3971,28.7,3970,34.2,3864,37,1604,67.4,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/19/21,18099,33,Marshall County,IN,36.7,16967,16967,43.6,16139,46.5,6135,72.6,98.6,18507,40,18505,47.6,17320,49.9,6355,75.2,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/18/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/18/21,49031,33,Piute County,UT,29.7,439,439,33.7,435,38.7,244,58.7,97.1,527,35.6,527,40.5,518,46.1,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/18/21,55031,33,Douglas County,WI,35.1,15136,15136,40.2,14529,41.9,5171,62.4,97,17954,41.6,17954,47.7,17116,49.3,5773,69.7,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/18/21,72129,33,San Lorenzo Municipio,PR,56.9,20465,20457,63.7,18973,64.2,4768,77.9,96,23962,66.6,23945,74.6,22040,74.6,5801,94.8,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/18/21,56009,33,Converse County,WY,25.1,3465,3465,29.8,3396,32.5,1436,60.4,96.4,3963,28.7,3962,34.1,3856,36.9,1604,67.4,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/18/21,1043,33,Cullman County,AL,27.6,23088,23087,32.4,22765,35,9883,62.8,91.9,29328,35,29323,41.1,28502,43.9,11133,70.8,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/18/21,18109,33,Morgan County,IN,44.5,31348,31347,51.8,30021,54.9,10388,85.1,98.6,33713,47.8,33712,55.7,32002,58.5,10656,87.3,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/18/21,72121,33,Sabana Grande Municipio,PR,65.9,14319,14319,73.9,13369,75,4233,99.9,96,16232,74.8,16232,83.8,14988,84.1,4574,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/18/21,18099,33,Marshall County,IN,36.6,16939,16939,43.5,16112,46.4,6128,72.5,98.6,18474,39.9,18472,47.5,17293,49.8,6355,75.2,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/18/21,UNK,33,Unknown County,UNK,0,770751,770718,0,719193,0,54698,0,0,1092878,0,1092816,0,1014393,0,73827,0,,,,, +8/18/21,UNK,33,Unknown County,TN,0,59506,59488,0,58728,0,15089,0,97.8,74350,0,74295,0,72040,0,17267,0,,,,, +8/18/21,47067,33,Hancock County,TN,24.6,1631,1631,28.5,1610,30.7,675,46.9,97.8,1810,27.3,1810,31.6,1783,34,724,50.3,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/17/21,UNK,33,Unknown County,UNK,0,768634,768601,0,717447,0,54645,0,0,1089178,0,1089116,0,1011387,0,73711,0,,,,, +8/17/21,18099,33,Marshall County,IN,36.5,16891,16891,43.4,16078,46.3,6123,72.4,98.6,18392,39.8,18390,47.3,17259,49.7,6354,75.2,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/17/21,55031,33,Douglas County,WI,35,15121,15121,40.2,14515,41.8,5168,62.4,97,17936,41.6,17936,47.7,17098,49.3,5771,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/17/21,49031,33,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/17/21,72129,33,San Lorenzo Municipio,PR,56.9,20465,20457,63.7,18973,64.2,4768,77.9,96,23962,66.6,23945,74.6,22040,74.6,5801,94.8,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/17/21,UNK,33,Unknown County,TN,0,59345,59327,0,58576,0,15053,0,97.8,74078,0,74023,0,71776,0,17214,0,,,,, +8/17/21,56009,33,Converse County,WY,25.1,3463,3463,29.8,3394,32.5,1436,60.4,96.4,3961,28.7,3960,34.1,3854,36.9,1604,67.4,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/17/21,47067,33,Hancock County,TN,24.6,1629,1629,28.5,1608,30.7,675,46.9,97.8,1806,27.3,1806,31.6,1779,34,724,50.3,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/17/21,18109,33,Morgan County,IN,44.4,31316,31315,51.7,29996,54.9,10387,85.1,98.6,33662,47.8,33661,55.6,31960,58.4,10651,87.3,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/17/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/17/21,1043,33,Cullman County,AL,27.5,23032,23031,32.3,22712,35,9875,62.8,91.9,29254,34.9,29249,41,28431,43.8,11126,70.7,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/17/21,72121,33,Sabana Grande Municipio,PR,65.9,14319,14319,73.9,13369,75,4233,99.9,96,16232,74.8,16232,83.8,14988,84.1,4574,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/16/21,UNK,33,Unknown County,TN,0,59321,59303,0,58555,0,15048,0,97.8,73946,0,73891,0,71657,0,17203,0,,,,, +8/16/21,18099,33,Marshall County,IN,36.5,16879,16879,43.4,16068,46.3,6122,72.4,98.6,18369,39.7,18367,47.2,17239,49.7,6351,75.1,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/16/21,1043,33,Cullman County,AL,27.4,22971,22970,32.2,22657,34.9,9869,62.7,91.9,29137,34.8,29132,40.9,28333,43.6,11111,70.6,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/16/21,72129,33,San Lorenzo Municipio,PR,56.8,20426,20418,63.6,18941,64.1,4761,77.8,96,23869,66.3,23852,74.3,21971,74.4,5797,94.8,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/16/21,55031,33,Douglas County,WI,35,15120,15120,40.2,14515,41.8,5168,62.4,97,17928,41.5,17928,47.7,17090,49.2,5771,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/16/21,72121,33,Sabana Grande Municipio,PR,65.9,14305,14305,73.9,13359,75,4231,99.9,96,16159,74.4,16159,83.4,14924,83.8,4561,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/16/21,56009,33,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3902,28.2,3901,33.6,3809,36.5,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/16/21,18109,33,Morgan County,IN,44.4,31271,31270,51.6,29963,54.8,10384,85.1,98.6,33622,47.7,33621,55.5,31929,58.4,10648,87.3,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/16/21,UNK,33,Unknown County,UNK,0,766588,766555,0,716121,0,54544,0,0,1085417,0,1085355,0,1008952,0,73510,0,,,,, +8/16/21,47067,33,Hancock County,TN,24.6,1629,1629,28.5,1608,30.7,676,47,97.8,1805,27.3,1805,31.6,1778,34,724,50.3,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/16/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/16/21,49031,33,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/15/21,72129,33,San Lorenzo Municipio,PR,56.7,20413,20405,63.6,18928,64.1,4755,77.7,96,23830,66.2,23813,74.2,21945,74.3,5792,94.7,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/15/21,48153,33,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/15/21,47067,33,Hancock County,TN,24.6,1628,1628,28.5,1607,30.7,676,47,97.8,1804,27.3,1804,31.5,1777,33.9,723,50.2,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/15/21,1043,33,Cullman County,AL,27.3,22861,22860,32.1,22570,34.7,9861,62.7,91.9,28943,34.6,28938,40.6,28169,43.4,11095,70.5,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/15/21,18109,33,Morgan County,IN,44.3,31220,31219,51.6,29919,54.7,10381,85.1,98.6,33573,47.6,33572,55.5,31893,58.3,10645,87.2,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/15/21,55031,33,Douglas County,WI,34.9,15080,15080,40.1,14483,41.7,5162,62.3,97,17875,41.4,17875,47.5,17057,49.1,5765,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/15/21,72121,33,Sabana Grande Municipio,PR,65.9,14299,14299,73.8,13355,75,4230,99.9,96,16119,74.2,16119,83.2,14893,83.6,4555,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/15/21,56009,33,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3901,28.2,3900,33.6,3808,36.4,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/15/21,49031,33,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/15/21,UNK,33,Unknown County,UNK,0,764575,764542,0,714649,0,54426,0,0,1081257,0,1081195,0,1005831,0,73305,0,,,,, +8/15/21,UNK,33,Unknown County,TN,0,59136,59118,0,58379,0,15004,0,97.8,73626,0,73571,0,71349,0,17149,0,,,,, +8/15/21,18099,33,Marshall County,IN,36.5,16868,16868,43.3,16059,46.3,6122,72.4,98.6,18335,39.6,18333,47.1,17209,49.6,6350,75.1,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/14/21,47067,32,Hancock County,TN,24.6,1626,1626,28.4,1605,30.6,676,47,97.8,1797,27.1,1797,31.4,1771,33.8,721,50.1,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/14/21,18099,32,Marshall County,IN,36.4,16854,16854,43.3,16047,46.2,6120,72.4,98.6,18308,39.6,18306,47,17186,49.5,6347,75.1,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/14/21,72121,32,Sabana Grande Municipio,PR,65.8,14282,14282,73.8,13340,74.9,4230,99.9,96,16094,74.1,16094,83.1,14873,83.5,4554,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/14/21,55031,32,Douglas County,WI,34.9,15080,15080,40.1,14483,41.7,5162,62.3,97,17875,41.4,17875,47.5,17057,49.1,5765,69.6,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/14/21,18109,32,Morgan County,IN,44.2,31191,31190,51.5,29898,54.7,10380,85.1,98.6,33520,47.6,33519,55.4,31853,58.3,10643,87.2,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/14/21,72129,32,San Lorenzo Municipio,PR,56.6,20380,20372,63.5,18902,64,4751,77.7,96,23744,66,23727,73.9,21886,74.1,5789,94.6,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/14/21,UNK,32,Unknown County,UNK,0,762688,762656,0,713187,0,54336,0,0,1077619,0,1077559,0,1002992,0,73097,0,,,,, +8/14/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/14/21,56009,32,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3901,28.2,3900,33.6,3808,36.4,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/14/21,1043,32,Cullman County,AL,27.2,22777,22776,31.9,22497,34.6,9848,62.6,91.8,28740,34.3,28735,40.3,27994,43.1,11077,70.4,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/14/21,UNK,32,Unknown County,TN,0,59055,59037,0,58308,0,14997,0,97.8,73414,0,73359,0,71149,0,17126,0,,,,, +8/14/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/13/21,18099,32,Marshall County,IN,36.4,16832,16832,43.3,16028,46.2,6117,72.3,98.6,18263,39.5,18261,46.9,17147,49.4,6343,75,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/13/21,1043,32,Cullman County,AL,27.2,22758,22757,31.9,22480,34.6,9843,62.6,91.8,28693,34.3,28688,40.2,27952,43,11073,70.4,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/13/21,47067,32,Hancock County,TN,24.5,1624,1624,28.4,1605,30.6,676,47,97.8,1797,27.1,1797,31.4,1771,33.8,721,50.1,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/13/21,56009,32,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3900,28.2,3899,33.6,3807,36.4,1590,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/13/21,UNK,32,Unknown County,TN,0,58888,58870,0,58148,0,14972,0,97.8,73059,0,73004,0,70807,0,17061,0,,,,, +8/13/21,18109,32,Morgan County,IN,44.2,31134,31133,51.4,29852,54.6,10377,85.1,98.6,33443,47.4,33442,55.2,31787,58.1,10635,87.2,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/13/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/13/21,72129,32,San Lorenzo Municipio,PR,56.5,20335,20327,63.3,18865,63.9,4746,77.6,96,23666,65.8,23649,73.7,21826,73.9,5788,94.6,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/13/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/13/21,55031,32,Douglas County,WI,34.9,15058,15058,40,14466,41.7,5158,62.2,97,17847,41.4,17847,47.5,17031,49.1,5762,69.5,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/13/21,72121,32,Sabana Grande Municipio,PR,65.6,14253,14253,73.6,13316,74.7,4225,99.9,96,16071,74,16071,83,14852,83.4,4550,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/13/21,UNK,32,Unknown County,UNK,0,760315,760283,0,711222,0,54262,0,0,1072813,0,1072753,0,998950,0,72929,0,,,,, +8/12/21,UNK,32,Unknown County,UNK,0,758305,758273,0,709538,0,54190,0,0,1068845,0,1068785,0,995682,0,72750,0,,,,, +8/12/21,18099,32,Marshall County,IN,36.4,16817,16817,43.2,16013,46.1,6116,72.3,98.6,18242,39.4,18240,46.9,17129,49.4,6341,75,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/12/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/12/21,72129,32,San Lorenzo Municipio,PR,56.4,20296,20288,63.2,18833,63.8,4740,77.5,96,23615,65.6,23598,73.5,21786,73.8,5784,94.5,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/12/21,72121,32,Sabana Grande Municipio,PR,65.6,14234,14234,73.5,13303,74.7,4220,99.9,96,16057,74,16057,82.9,14840,83.3,4548,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/12/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/12/21,55031,32,Douglas County,WI,34.9,15040,15040,40,14452,41.6,5153,62.2,97,17820,41.3,17820,47.4,17007,49,5756,69.4,Low-Mod,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/12/21,18109,32,Morgan County,IN,44.1,31079,31078,51.3,29808,54.5,10372,85,98.6,33362,47.3,33361,55.1,31719,58,10630,87.1,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/12/21,56009,32,Converse County,WY,24.8,3433,3433,29.6,3369,32.2,1431,60.2,96.4,3899,28.2,3898,33.6,3806,36.4,1589,66.8,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/12/21,UNK,32,Unknown County,TN,0,58763,58745,0,58026,0,14940,0,97.8,72726,0,72672,0,70489,0,17016,0,,,,, +8/12/21,1043,32,Cullman County,AL,27.1,22693,22692,31.8,22421,34.5,9833,62.5,91.8,28535,34.1,28530,40,27817,42.8,11062,70.3,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/12/21,47067,32,Hancock County,TN,24.5,1623,1623,28.4,1604,30.6,676,47,97.8,1795,27.1,1795,31.4,1769,33.8,720,50,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/11/21,18099,32,Marshall County,IN,36.3,16794,16794,43.2,15996,46.1,6113,72.3,98.6,18199,39.3,18197,46.8,17097,49.3,6338,75,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,High VC/Mod-High SVI +8/11/21,72121,32,Sabana Grande Municipio,PR,65.2,14166,14166,73.2,13266,74.5,4214,99.9,96,16023,73.8,16023,82.7,14818,83.2,4545,99.9,Mod-High,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI,High VC/Mod-High SVI +8/11/21,72129,32,San Lorenzo Municipio,PR,56.3,20279,20271,63.2,18821,63.7,4738,77.4,96,23592,65.6,23575,73.4,21767,73.7,5783,94.5,Low-Mod,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI,High VC/Low-Mod SVI +8/11/21,UNK,32,Unknown County,TN,0,58601,58583,0,57870,0,14895,0,97.8,72472,0,72418,0,70251,0,16956,0,,,,, +8/11/21,47067,32,Hancock County,TN,24.5,1622,1622,28.4,1603,30.6,675,46.9,97.8,1794,27.1,1794,31.4,1768,33.8,720,50,High,Low VC/High SVI,Low VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI +8/11/21,48153,32,Floyd County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, +8/11/21,UNK,32,Unknown County,UNK,0,756368,756338,0,707922,0,54140,0,0,1065083,0,1065026,0,992587,0,72616,0,,,,, +8/11/21,55031,32,Douglas County,WI,34.8,15014,15014,39.9,14434,41.6,5147,62.1,97,17792,41.2,17792,47.3,16986,48.9,5754,69.4,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/11/21,56009,32,Converse County,WY,24.8,3429,3429,29.5,3365,32.2,1429,60.1,96.4,3895,28.2,3894,33.5,3802,36.4,1587,66.7,Low,Low VC/Low SVI,Low VC/Low SVI,Low-Mod VC/Low SVI,Mod-High VC/Low SVI +8/11/21,18109,32,Morgan County,IN,44,31047,31046,51.3,29782,54.5,10371,85,98.6,33318,47.3,33317,55,31682,57.9,10627,87.1,Low,Mod-High VC/Low SVI,High VC/Low SVI,High VC/Low SVI,High VC/Low SVI +8/11/21,49031,32,Piute County,UT,29.1,431,431,33.1,427,38,238,57.2,97.1,521,35.2,521,40,514,45.8,275,66.1,Low-Mod,Low VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI +8/11/21,1043,32,Cullman County,AL,27,22603,22602,31.7,22338,34.4,9821,62.4,91.8,28324,33.8,28319,39.7,27622,42.5,11036,70.1,Mod-High,Low VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI \ No newline at end of file diff --git a/cdc_vaccines/tests/test_data/small_pull.csv b/cdc_vaccines/tests/test_data/small_pull.csv new file mode 100644 index 000000000..812835e37 --- /dev/null +++ b/cdc_vaccines/tests/test_data/small_pull.csv @@ -0,0 +1,4 @@ +Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI +8/26/21,UNK,34,Unknown County,UNK,0,789625,789591,0,733809,0,55620,0,0,1119266,0,1119203,0,1035082,0,75596,0,,,,, +8/26/21,32013,34,Humboldt County,NV,32.9,5537,5535,40.2,5368,43.6,1696,69.9,94.9,6293,37.4,6290,45.6,6014,48.9,1877,77.3,Mod-High,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI +8/26/21,48305,34,Lynn County,TX,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,High,,,, \ No newline at end of file diff --git a/cdc_vaccines/tests/test_pull.py b/cdc_vaccines/tests/test_pull.py new file mode 100644 index 000000000..de5297e35 --- /dev/null +++ b/cdc_vaccines/tests/test_pull.py @@ -0,0 +1,83 @@ +"""Tests for running the CDC Vaccine indicator.""" +import pytest +import logging +from unittest.mock import patch +import pandas as pd +import numpy as np +from delphi_cdc_vaccines.pull import pull_cdcvacc_data + +from test_run import local_fetch + +BASE_URL_GOOD = "test_data/small_pull.csv" + +BASE_URL_BAD = { + "missing_days": "test_data/bad_missing_days.csv", + "missing_cols": "test_data/bad_missing_cols.csv", + "extra_cols": "test_data/bad_extra_cols.csv" +} + +TEST_LOGGER = logging.getLogger() + +class TestPullCDCVaccines: + """Tests for the `pull_cdcvacc_data()` function.""" + def test_good_file(self): + """Test the expected output from a smaller file.""" + df = pull_cdcvacc_data(BASE_URL_GOOD, "", "", TEST_LOGGER) + expected_df = pd.DataFrame({ + "fips": ["00000","00000","32013","32013","48305","48305"], + "timestamp": [pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26"), + pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26"), + pd.Timestamp("2021-08-25"), pd.Timestamp("2021-08-26")], + + "incidence_counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], + "incidence_counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], + "incidence_counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], + "incidence_counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], + "incidence_counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], + "incidence_counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], + "incidence_counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], + "incidence_counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0], + + + + "cumulative_counts_tot_vaccine": [np.nan,789625.0,np.nan,5537.0,np.nan,0.0], + "cumulative_counts_tot_vaccine_12P": [np.nan,789591.0,np.nan,5535.0,np.nan,0.0], + "cumulative_counts_tot_vaccine_18P": [np.nan,733809.0,np.nan,5368.0,np.nan,0.0], + "cumulative_counts_tot_vaccine_65P": [np.nan,55620.0,np.nan,1696.0,np.nan,0.0], + "cumulative_counts_part_vaccine": [np.nan,1119266.0,np.nan,6293.0,np.nan,0.0], + "cumulative_counts_part_vaccine_12P": [np.nan,1119203.0,np.nan,6290.0,np.nan,0.0], + "cumulative_counts_part_vaccine_18P": [np.nan,1035082.0,np.nan,6014.0,np.nan,0.0], + "cumulative_counts_part_vaccine_65P": [np.nan,75596.0,np.nan,1877.0,np.nan,0.0]}, + + index=[0, 1, 2, 3, 4, 5]) + + # sort since rows order doesn't matter + pd.testing.assert_frame_equal(df.set_index(["fips", "timestamp"]).sort_index(), expected_df.set_index(["fips", "timestamp"]).sort_index()) + + def test_missing_days(self): + """Test if error is raised when there are missing days.""" + with pytest.raises(ValueError): + pull_cdcvacc_data( + BASE_URL_BAD["missing_days"], "","", TEST_LOGGER + ) + + def test_missing_cols(self): + """Test if error is raised when there are missing columns.""" + with pytest.raises(ValueError): + pull_cdcvacc_data( + BASE_URL_BAD["missing_cols"],"","",TEST_LOGGER + ) + + def test_start_date(self): + """ Test that there is an error if start date > end date. """ + with pytest.raises(ValueError): + pull_cdcvacc_data( + BASE_URL_BAD["missing_cols"],"2021-12-12","2021-08-25",TEST_LOGGER + ) + + def test_bad_start_only(self): + """ Test if there is an export_start_date which is not a date""" + with pytest.raises(ValueError): + pull_cdcvacc_data( + BASE_URL_BAD["missing_cols"],"abcd","2021-08-25",TEST_LOGGER + ) \ No newline at end of file diff --git a/cdc_vaccines/tests/test_run.py b/cdc_vaccines/tests/test_run.py new file mode 100644 index 000000000..f66dd0403 --- /dev/null +++ b/cdc_vaccines/tests/test_run.py @@ -0,0 +1,117 @@ +"""Tests for running the CDC Vaccine indicator.""" +from itertools import product +from os import listdir, remove +from os.path import join +from unittest.mock import patch +import pandas as pd + +from delphi_cdc_vaccines.run import run_module + +def local_fetch(url, cache): + return pd.read_csv(url) + +class TestRun: + """Tests for the `run_module()` function.""" + PARAMS = { + "common": { + "export_dir": "./receiving", + "input_dir": "./input_cache" + }, + "indicator": { + "base_url": "./test_data/small.csv", + "export_start_date": "2021-08-10", + "export_end_date": "2021-08-17" + } + } + + + + def test_output_files_exist(self): + """Test that the expected output files exist.""" + run_module(self.PARAMS) + + csv_files = [f for f in listdir("receiving") if f.endswith(".csv")] + + dates = [ + "20210810", + "20210811", + "20210812", + "20210813", + "20210814", + "20210815", + "20210816", + "20210817", + ] + geos = ["state", "hrr", "hhs", "nation", "msa"] + + expected_files = [] + for metric in ["cumulative_counts_tot_vaccine", + "incidence_counts_tot_vaccine", + "cumulative_counts_tot_vaccine_12P", + "incidence_counts_tot_vaccine_12P", + "cumulative_counts_tot_vaccine_18P", + "incidence_counts_tot_vaccine_18P", + "cumulative_counts_tot_vaccine_65P", + "incidence_counts_tot_vaccine_65P", + "cumulative_counts_part_vaccine", + "incidence_counts_part_vaccine", + "cumulative_counts_part_vaccine_12P", + "incidence_counts_part_vaccine_12P", + "cumulative_counts_part_vaccine_18P", + "incidence_counts_part_vaccine_18P", + "cumulative_counts_part_vaccine_65P", + "incidence_counts_part_vaccine_65P"]: + for date in dates: + for geo in geos: + expected_files += [date + "_" + geo + "_" + metric + ".csv"] + if not("cumulative" in metric) and not (date in dates[:6]): + expected_files += [date + "_" + geo + "_" + metric + "_7dav.csv"] + + print(set(csv_files)-set(expected_files)) + assert set(csv_files) == set(expected_files) + # Remove the csv_files from the directory + [remove(join("receiving", f)) for f in csv_files] + + def test_output_file_format(self): + """Test that the output files have the proper format.""" + self.PARAMS['indicator']['export_start_date'] = "2021-08-19" + self.PARAMS['indicator']['export_end_date'] = "2021-08-19" + run_module(self.PARAMS) + csv_files = [f for f in listdir("receiving") if f.endswith(".csv")] + df = pd.read_csv( + join("receiving", "20210819_state_cumulative_counts_tot_vaccine.csv") + ) + assert (df.columns.values == ["geo_id", "val", "se", "sample_size", "missing_val", "missing_se", "missing_sample_size"]).all() + # Remove the csv_files from the directory + [remove(join("receiving", f)) for f in csv_files] + + def test_end_date(self): + """ Test if there is only a end date, that the correct range of dates for the files are returned. """ + self.PARAMS['indicator']['export_start_date'] = "" + self.PARAMS['indicator']['export_end_date'] = "2021-08-11" + run_module(self.PARAMS) + csv_files = [f for f in listdir("receiving") if f.endswith(".csv")] + list_dates = set([f.split("_")[0] for f in csv_files]) + assert(list_dates == {"20210810", "20210811"}) + # Remove the .csv files from the directory + [remove(join("receiving", f)) for f in csv_files] + + def test_delta(self): + """ Test if the correct range of dates for the files are returned. """ + self.PARAMS['indicator']['export_start_date'] = "2021-08-10" + self.PARAMS['indicator']['export_end_date'] = "2021-08-11" + run_module(self.PARAMS) + csv_files = [f for f in listdir("receiving") if f.endswith(".csv")] + list_dates = set([f.split("_")[0] for f in csv_files]) + assert(list_dates == {'20210810', '20210811'}) + # Remove the .csv files from the directory + [remove(join("receiving", f)) for f in csv_files] + + + + + + + + +