diff --git a/README.md b/README.md index c67a285a..60436dcc 100644 --- a/README.md +++ b/README.md @@ -105,9 +105,9 @@ If the `remove_overlap` parameter is set to True (default), `stewicombo` combine ## Installation Instructions Install a release directly from github using pip. From a command line interface, run: -> pip install git+https://github.com/USEPA/standardizedinventories.git@v1.0.2#egg=StEWI +> pip install git+https://github.com/USEPA/standardizedinventories.git@v1.0.3#egg=StEWI -where you can replace 'v1.0.2' with the version you wish to use under [Releases](https://github.com/USEPA/standardizedinventories/releases). +where you can replace 'v1.0.3' with the version you wish to use under [Releases](https://github.com/USEPA/standardizedinventories/releases). Alternatively, to install from the most current point on the repository: ``` diff --git a/facilitymatcher/__init__.py b/facilitymatcher/__init__.py index ee513dd4..013adab3 100644 --- a/facilitymatcher/__init__.py +++ b/facilitymatcher/__init__.py @@ -23,7 +23,8 @@ def get_matches_for_inventories(inventory_list=stewi_inventories): def get_FRS_NAICSInfo_for_facility_list(frs_id_list, - inventories_of_interest_list=None): + inventories_of_interest_list=None, + download_if_missing=False): """Return the FRS NAICS codes for the facilities of interest. Optionally it will also filter that FRS info by inventories of interest @@ -31,10 +32,12 @@ def get_FRS_NAICSInfo_for_facility_list(frs_id_list, e.g. ['110000491735', '110000491744'] :param inventories_of_interest_list: list of inventories to filter NAICS info by using StEWI inventory names e.g. ['NEI'] + :param download_if_missing: bool, if True will attempt to load from + remote server prior to generating if file not found locally :return: dataframe with columns 'FRS_ID', 'Source', 'NAICS', 'PRIMARY_INDICATOR' """ - all_NAICS = get_fm_file('FRS_NAICSforStEWI') + all_NAICS = get_fm_file('FRS_NAICSforStEWI', download_if_missing) if frs_id_list is not None: NAICS_of_interest = filter_by_facility_list(all_NAICS, frs_id_list) else: diff --git a/facilitymatcher/globals.py b/facilitymatcher/globals.py index e14e1731..2b5d3d73 100644 --- a/facilitymatcher/globals.py +++ b/facilitymatcher/globals.py @@ -17,7 +17,8 @@ import facilitymatcher.WriteFacilityMatchesforStEWI as write_fm import facilitymatcher.WriteFRSNAICSforStEWI as write_naics from esupy.processed_data_mgmt import Paths, load_preprocessed_output,\ - write_df_to_file, write_metadata_to_file, read_source_metadata + write_df_to_file, write_metadata_to_file, read_source_metadata,\ + download_from_remote from esupy.util import strip_file_extension MODULEPATH = Path(__file__).resolve().parent @@ -97,14 +98,21 @@ def store_fm_file(df, file_name, category='', sources=None): log.error('Failed to save inventory') -def get_fm_file(file_name): - """Read facilitymatcher file, if not present, generate it.""" +def get_fm_file(file_name, download_if_missing=False): + """Read facilitymatcher file, if not present, generate it. + :param file_name: str, can be 'FacilityMatchList_forStEWI' or + 'FRS_NAICSforStEWI' + :param download_if_missing: bool, if True will attempt to load from + remote server prior to generating if file not found locally + """ file_meta = set_facilitymatcher_meta(file_name, category='') df = load_preprocessed_output(file_meta, paths) if df is None: log.info(f'{file_name} not found in {output_dir}, ' 'writing facility matches to file') - if file_name == 'FacilityMatchList_forStEWI': + if download_if_missing: + download_from_remote(file_meta, paths) + elif file_name == 'FacilityMatchList_forStEWI': write_fm.write_facility_matches() elif file_name == 'FRS_NAICSforStEWI': write_naics.write_NAICS_matches() diff --git a/setup.py b/setup.py index eef5588d..24a7b480 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ setup( name="StEWI", - version="1.0.2", + version="1.0.3", author="Ben Young, Wesley Ingwersen, Matthew Bergmann, Jose Hernandez-Betancur, Tapajyoti Ghosh, Eric Bell", author_email="ingwersen.wesley@epa.gov", description="Standardized Emission And Waste Inventories (StEWI)" diff --git a/stewi/__init__.py b/stewi/__init__.py index 6f69252a..5d3e89f4 100644 --- a/stewi/__init__.py +++ b/stewi/__init__.py @@ -60,7 +60,8 @@ def printAvailableInventories(stewiformat='flowbyfacility'): def getInventory(inventory_acronym, year, stewiformat='flowbyfacility', - filters=None, filter_for_LCI=False, US_States_Only=False): + filters=None, filter_for_LCI=False, US_States_Only=False, + download_if_missing=False): """Return or generate an inventory in a standard output format. :param inventory_acronym: like 'TRI' @@ -68,12 +69,16 @@ def getInventory(inventory_acronym, year, stewiformat='flowbyfacility', :param stewiformat: str e.g. 'flowbyfacility' or 'flow' :param filters: a list of named filters to apply to inventory :param filter_for_LCI: whether or not to filter inventory for life - cycle inventory creation - :param US_States_Only: includes only US states + cycle inventory creation, is DEPRECATED in favor of 'filters' + :param US_States_Only: includes only US states, is DEPRECATED in + favor of 'filters' + :param download_if_missing: bool, if True will attempt to load from + remote server prior to generating if file not found locally :return: dataframe with standard fields depending on output format """ f = ensure_format(stewiformat) - inventory = read_inventory(inventory_acronym, year, f) + inventory = read_inventory(inventory_acronym, year, f, + download_if_missing) if not filters: filters = [] if f.value > 2: # exclude FLOW and FACILITY @@ -92,7 +97,7 @@ def getInventory(inventory_acronym, year, stewiformat='flowbyfacility', filters.append('US_States_only') inventory = apply_filters_to_inventory(inventory, inventory_acronym, year, - filters) + filters, download_if_missing) # After filting, may be necessary to reaggregate inventory again inventory = aggregate(inventory) @@ -102,31 +107,39 @@ def getInventory(inventory_acronym, year, stewiformat='flowbyfacility', return inventory -def getInventoryFlows(inventory_acronym, year): +def getInventoryFlows(inventory_acronym, year, + download_if_missing=False): """Return flows for an inventory. :param inventory_acronym: e.g. 'TRI' :param year: e.g. 2014 + :param download_if_missing: bool, if True will attempt to load from + remote server prior to generating if file not found locally :return: dataframe with standard flows format """ - flows = read_inventory(inventory_acronym, year, StewiFormat.FLOW) + flows = read_inventory(inventory_acronym, year, StewiFormat.FLOW, + download_if_missing) if flows is None: - return None + return flows = add_missing_fields(flows, inventory_acronym, StewiFormat.FLOW, maintain_columns=False) return flows -def getInventoryFacilities(inventory_acronym, year): +def getInventoryFacilities(inventory_acronym, year, + download_if_missing=False): """Return flows for an inventory. :param inventory_acronym: e.g. 'TRI' :param year: e.g. 2014 + :param download_if_missing: bool, if True will attempt to load from + remote server prior to generating if file not found locally :return: dataframe with standard flows format """ - facilities = read_inventory(inventory_acronym, year, StewiFormat.FACILITY) + facilities = read_inventory(inventory_acronym, year, StewiFormat.FACILITY, + download_if_missing) if facilities is None: - return None + return facilities = add_missing_fields(facilities, inventory_acronym, StewiFormat.FACILITY, maintain_columns=True) return facilities diff --git a/stewi/filter.py b/stewi/filter.py index 33ac30c8..7f079628 100644 --- a/stewi/filter.py +++ b/stewi/filter.py @@ -12,13 +12,16 @@ filter_config = config(file='filter.yaml') -def apply_filters_to_inventory(inventory, inventory_acronym, year, filters): +def apply_filters_to_inventory(inventory, inventory_acronym, year, filters, + download_if_missing=False): """Apply one or more filters from a passed list to an inventory dataframe. :param inventory: df of stewi inventory of type flowbyfacility or flowbyprocess :param inventory_acronym: str of inventory e.g. 'NEI' :param year: year as number like 2010 :param filters: a list of named filters to apply to inventory + :param download_if_missing: bool, if True will attempt to load from + remote server prior to generating if file not found locally :return: DataFrame of filtered inventory """ if 'filter_for_LCI' in filters: @@ -30,7 +33,8 @@ def apply_filters_to_inventory(inventory, inventory_acronym, year, filters): if 'US_States_only' in filters: log.info('filtering for US states') inventory = filter_states(inventory, inventory_acronym=inventory_acronym, - year=year) + year=year, + download_if_missing=download_if_missing) if inventory_acronym == 'DMR' and 'remove_duplicate_organic_enrichment' in filters: from stewi.DMR import remove_duplicate_organic_enrichment @@ -38,7 +42,8 @@ def apply_filters_to_inventory(inventory, inventory_acronym, year, filters): if inventory_acronym == 'RCRAInfo' and 'National_Biennial_Report' in filters: log.info('filtering for National Biennial Report') - fac_list = read_inventory('RCRAInfo', year, StewiFormat.FACILITY) + fac_list = read_inventory('RCRAInfo', year, StewiFormat.FACILITY, + download_if_missing) fac_list = fac_list[['FacilityID', 'Generator ID Included in NBR'] ].drop_duplicates(ignore_index=True) @@ -62,7 +67,8 @@ def apply_filters_to_inventory(inventory, inventory_acronym, year, filters): def filter_states(inventory_df, inventory_acronym=None, year=None, - include_states=True, include_dc=True, include_territories=False): + include_states=True, include_dc=True, include_territories=False, + download_if_missing=False): """Remove records that are not included in the list of states. :param inventory_df: dataframe that includes column 'State' of 2 digit strings, @@ -71,13 +77,16 @@ def filter_states(inventory_df, inventory_acronym=None, year=None, :param include_states: bool, True to include data from 50 U.S. states :param include_dc: bool, True to include data from D.C. :param include_territories: bool, True to include data from U.S. territories + :param download_if_missing: bool, if True will attempt to load from + remote server prior to generating if file not found locally :return: DataFrame """ states_df = pd.read_csv(DATA_PATH.joinpath('state_codes.csv')) states_list = [] if 'State' not in inventory_df: if all(p is not None for p in [inventory_acronym, year]): - fac_list = read_inventory(inventory_acronym, year, StewiFormat.FACILITY) + fac_list = read_inventory(inventory_acronym, year, StewiFormat.FACILITY, + download_if_missing) fac_list = fac_list[['FacilityID', 'State']].drop_duplicates(ignore_index=True) inventory_df = inventory_df.merge(fac_list, how='left') else: diff --git a/stewi/globals.py b/stewi/globals.py index f64f1821..725bd8ad 100644 --- a/stewi/globals.py +++ b/stewi/globals.py @@ -18,7 +18,7 @@ from esupy.processed_data_mgmt import Paths, FileMeta,\ load_preprocessed_output, remove_extra_files,\ write_df_to_file, write_metadata_to_file,\ - read_source_metadata + read_source_metadata, download_from_remote from esupy.dqi import get_weighted_average from esupy.util import get_git_hash @@ -27,7 +27,7 @@ DATA_PATH = MODULEPATH / 'data' log.basicConfig(level=log.INFO, format='%(levelname)s %(message)s') -STEWI_VERSION = '1.0.2' +STEWI_VERSION = '1.0.3' # Conversion factors USton_kg = 907.18474 @@ -288,12 +288,14 @@ def store_inventory(df, file_name, f, replace_files=REPLACE_FILES): log.error('Failed to save inventory') -def read_inventory(inventory_acronym, year, f): +def read_inventory(inventory_acronym, year, f, download_if_missing=False): """Return the inventory from local directory. If not found, generate it. :param inventory_acronym: like 'TRI' :param year: year as number like 2010 :param f: object of class StewiFormat + :param download_if_missing: bool, if True will attempt to load from + remote server prior to generating if file not found locally :return: dataframe of stored inventory; if not present returns None """ file_name = inventory_acronym + '_' + str(year) @@ -302,9 +304,13 @@ def read_inventory(inventory_acronym, year, f): method_path = paths.local_path + '/' + meta.category if inventory is None: log.info(f'{meta.name_data} not found in {method_path}') - log.info('requested inventory does not exist in local directory, ' - 'it will be generated...') - generate_inventory(inventory_acronym, year) + if download_if_missing: + meta.tool = meta.tool.lower() # lower case for remote access + download_from_remote(meta, paths) + else: + log.info('requested inventory does not exist in local directory, ' + 'it will be generated...') + generate_inventory(inventory_acronym, year) inventory = load_preprocessed_output(meta, paths) if inventory is None: log.error('error generating inventory')