Skip to content

Commit

Permalink
Merge pull request #103 from USEPA/release_1.0.3
Browse files Browse the repository at this point in the history
Release v1.0.3
  • Loading branch information
bl-young authored Apr 7, 2022
2 parents 4e1b445 + 410a387 commit ac21490
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 31 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ If the `remove_overlap` parameter is set to True (default), `stewicombo` combine
## Installation Instructions

Install a release directly from github using pip. From a command line interface, run:
> pip install git+https://github.com/USEPA/[email protected].2#egg=StEWI
> pip install git+https://github.com/USEPA/[email protected].3#egg=StEWI
where you can replace 'v1.0.2' with the version you wish to use under [Releases](https://github.com/USEPA/standardizedinventories/releases).
where you can replace 'v1.0.3' with the version you wish to use under [Releases](https://github.com/USEPA/standardizedinventories/releases).

Alternatively, to install from the most current point on the repository:
```
Expand Down
7 changes: 5 additions & 2 deletions facilitymatcher/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,21 @@ def get_matches_for_inventories(inventory_list=stewi_inventories):


def get_FRS_NAICSInfo_for_facility_list(frs_id_list,
inventories_of_interest_list=None):
inventories_of_interest_list=None,
download_if_missing=False):
"""Return the FRS NAICS codes for the facilities of interest.
Optionally it will also filter that FRS info by inventories of interest
:param frs_id_list: list of FRS IDs
e.g. ['110000491735', '110000491744']
:param inventories_of_interest_list: list of inventories to filter NAICS
info by using StEWI inventory names e.g. ['NEI']
:param download_if_missing: bool, if True will attempt to load from
remote server prior to generating if file not found locally
:return: dataframe with columns 'FRS_ID', 'Source', 'NAICS',
'PRIMARY_INDICATOR'
"""
all_NAICS = get_fm_file('FRS_NAICSforStEWI')
all_NAICS = get_fm_file('FRS_NAICSforStEWI', download_if_missing)
if frs_id_list is not None:
NAICS_of_interest = filter_by_facility_list(all_NAICS, frs_id_list)
else:
Expand Down
16 changes: 12 additions & 4 deletions facilitymatcher/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
import facilitymatcher.WriteFacilityMatchesforStEWI as write_fm
import facilitymatcher.WriteFRSNAICSforStEWI as write_naics
from esupy.processed_data_mgmt import Paths, load_preprocessed_output,\
write_df_to_file, write_metadata_to_file, read_source_metadata
write_df_to_file, write_metadata_to_file, read_source_metadata,\
download_from_remote
from esupy.util import strip_file_extension

MODULEPATH = Path(__file__).resolve().parent
Expand Down Expand Up @@ -97,14 +98,21 @@ def store_fm_file(df, file_name, category='', sources=None):
log.error('Failed to save inventory')


def get_fm_file(file_name):
"""Read facilitymatcher file, if not present, generate it."""
def get_fm_file(file_name, download_if_missing=False):
"""Read facilitymatcher file, if not present, generate it.
:param file_name: str, can be 'FacilityMatchList_forStEWI' or
'FRS_NAICSforStEWI'
:param download_if_missing: bool, if True will attempt to load from
remote server prior to generating if file not found locally
"""
file_meta = set_facilitymatcher_meta(file_name, category='')
df = load_preprocessed_output(file_meta, paths)
if df is None:
log.info(f'{file_name} not found in {output_dir}, '
'writing facility matches to file')
if file_name == 'FacilityMatchList_forStEWI':
if download_if_missing:
download_from_remote(file_meta, paths)
elif file_name == 'FacilityMatchList_forStEWI':
write_fm.write_facility_matches()
elif file_name == 'FRS_NAICSforStEWI':
write_naics.write_NAICS_matches()
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

setup(
name="StEWI",
version="1.0.2",
version="1.0.3",
author="Ben Young, Wesley Ingwersen, Matthew Bergmann, Jose Hernandez-Betancur, Tapajyoti Ghosh, Eric Bell",
author_email="[email protected]",
description="Standardized Emission And Waste Inventories (StEWI)"
Expand Down
35 changes: 24 additions & 11 deletions stewi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,20 +60,25 @@ def printAvailableInventories(stewiformat='flowbyfacility'):


def getInventory(inventory_acronym, year, stewiformat='flowbyfacility',
filters=None, filter_for_LCI=False, US_States_Only=False):
filters=None, filter_for_LCI=False, US_States_Only=False,
download_if_missing=False):
"""Return or generate an inventory in a standard output format.
:param inventory_acronym: like 'TRI'
:param year: year as number like 2010
:param stewiformat: str e.g. 'flowbyfacility' or 'flow'
:param filters: a list of named filters to apply to inventory
:param filter_for_LCI: whether or not to filter inventory for life
cycle inventory creation
:param US_States_Only: includes only US states
cycle inventory creation, is DEPRECATED in favor of 'filters'
:param US_States_Only: includes only US states, is DEPRECATED in
favor of 'filters'
:param download_if_missing: bool, if True will attempt to load from
remote server prior to generating if file not found locally
:return: dataframe with standard fields depending on output format
"""
f = ensure_format(stewiformat)
inventory = read_inventory(inventory_acronym, year, f)
inventory = read_inventory(inventory_acronym, year, f,
download_if_missing)
if not filters:
filters = []
if f.value > 2: # exclude FLOW and FACILITY
Expand All @@ -92,7 +97,7 @@ def getInventory(inventory_acronym, year, stewiformat='flowbyfacility',
filters.append('US_States_only')

inventory = apply_filters_to_inventory(inventory, inventory_acronym, year,
filters)
filters, download_if_missing)
# After filting, may be necessary to reaggregate inventory again
inventory = aggregate(inventory)

Expand All @@ -102,31 +107,39 @@ def getInventory(inventory_acronym, year, stewiformat='flowbyfacility',
return inventory


def getInventoryFlows(inventory_acronym, year):
def getInventoryFlows(inventory_acronym, year,
download_if_missing=False):
"""Return flows for an inventory.
:param inventory_acronym: e.g. 'TRI'
:param year: e.g. 2014
:param download_if_missing: bool, if True will attempt to load from
remote server prior to generating if file not found locally
:return: dataframe with standard flows format
"""
flows = read_inventory(inventory_acronym, year, StewiFormat.FLOW)
flows = read_inventory(inventory_acronym, year, StewiFormat.FLOW,
download_if_missing)
if flows is None:
return None
return
flows = add_missing_fields(flows, inventory_acronym, StewiFormat.FLOW,
maintain_columns=False)
return flows


def getInventoryFacilities(inventory_acronym, year):
def getInventoryFacilities(inventory_acronym, year,
download_if_missing=False):
"""Return flows for an inventory.
:param inventory_acronym: e.g. 'TRI'
:param year: e.g. 2014
:param download_if_missing: bool, if True will attempt to load from
remote server prior to generating if file not found locally
:return: dataframe with standard flows format
"""
facilities = read_inventory(inventory_acronym, year, StewiFormat.FACILITY)
facilities = read_inventory(inventory_acronym, year, StewiFormat.FACILITY,
download_if_missing)
if facilities is None:
return None
return
facilities = add_missing_fields(facilities, inventory_acronym, StewiFormat.FACILITY,
maintain_columns=True)
return facilities
Expand Down
19 changes: 14 additions & 5 deletions stewi/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,16 @@
filter_config = config(file='filter.yaml')


def apply_filters_to_inventory(inventory, inventory_acronym, year, filters):
def apply_filters_to_inventory(inventory, inventory_acronym, year, filters,
download_if_missing=False):
"""Apply one or more filters from a passed list to an inventory dataframe.
:param inventory: df of stewi inventory of type flowbyfacility or flowbyprocess
:param inventory_acronym: str of inventory e.g. 'NEI'
:param year: year as number like 2010
:param filters: a list of named filters to apply to inventory
:param download_if_missing: bool, if True will attempt to load from
remote server prior to generating if file not found locally
:return: DataFrame of filtered inventory
"""
if 'filter_for_LCI' in filters:
Expand All @@ -30,15 +33,17 @@ def apply_filters_to_inventory(inventory, inventory_acronym, year, filters):
if 'US_States_only' in filters:
log.info('filtering for US states')
inventory = filter_states(inventory, inventory_acronym=inventory_acronym,
year=year)
year=year,
download_if_missing=download_if_missing)

if inventory_acronym == 'DMR' and 'remove_duplicate_organic_enrichment' in filters:
from stewi.DMR import remove_duplicate_organic_enrichment
inventory = remove_duplicate_organic_enrichment(inventory)

if inventory_acronym == 'RCRAInfo' and 'National_Biennial_Report' in filters:
log.info('filtering for National Biennial Report')
fac_list = read_inventory('RCRAInfo', year, StewiFormat.FACILITY)
fac_list = read_inventory('RCRAInfo', year, StewiFormat.FACILITY,
download_if_missing)
fac_list = fac_list[['FacilityID',
'Generator ID Included in NBR']
].drop_duplicates(ignore_index=True)
Expand All @@ -62,7 +67,8 @@ def apply_filters_to_inventory(inventory, inventory_acronym, year, filters):


def filter_states(inventory_df, inventory_acronym=None, year=None,
include_states=True, include_dc=True, include_territories=False):
include_states=True, include_dc=True, include_territories=False,
download_if_missing=False):
"""Remove records that are not included in the list of states.
:param inventory_df: dataframe that includes column 'State' of 2 digit strings,
Expand All @@ -71,13 +77,16 @@ def filter_states(inventory_df, inventory_acronym=None, year=None,
:param include_states: bool, True to include data from 50 U.S. states
:param include_dc: bool, True to include data from D.C.
:param include_territories: bool, True to include data from U.S. territories
:param download_if_missing: bool, if True will attempt to load from
remote server prior to generating if file not found locally
:return: DataFrame
"""
states_df = pd.read_csv(DATA_PATH.joinpath('state_codes.csv'))
states_list = []
if 'State' not in inventory_df:
if all(p is not None for p in [inventory_acronym, year]):
fac_list = read_inventory(inventory_acronym, year, StewiFormat.FACILITY)
fac_list = read_inventory(inventory_acronym, year, StewiFormat.FACILITY,
download_if_missing)
fac_list = fac_list[['FacilityID', 'State']].drop_duplicates(ignore_index=True)
inventory_df = inventory_df.merge(fac_list, how='left')
else:
Expand Down
18 changes: 12 additions & 6 deletions stewi/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from esupy.processed_data_mgmt import Paths, FileMeta,\
load_preprocessed_output, remove_extra_files,\
write_df_to_file, write_metadata_to_file,\
read_source_metadata
read_source_metadata, download_from_remote
from esupy.dqi import get_weighted_average
from esupy.util import get_git_hash

Expand All @@ -27,7 +27,7 @@
DATA_PATH = MODULEPATH / 'data'

log.basicConfig(level=log.INFO, format='%(levelname)s %(message)s')
STEWI_VERSION = '1.0.2'
STEWI_VERSION = '1.0.3'

# Conversion factors
USton_kg = 907.18474
Expand Down Expand Up @@ -288,12 +288,14 @@ def store_inventory(df, file_name, f, replace_files=REPLACE_FILES):
log.error('Failed to save inventory')


def read_inventory(inventory_acronym, year, f):
def read_inventory(inventory_acronym, year, f, download_if_missing=False):
"""Return the inventory from local directory. If not found, generate it.
:param inventory_acronym: like 'TRI'
:param year: year as number like 2010
:param f: object of class StewiFormat
:param download_if_missing: bool, if True will attempt to load from
remote server prior to generating if file not found locally
:return: dataframe of stored inventory; if not present returns None
"""
file_name = inventory_acronym + '_' + str(year)
Expand All @@ -302,9 +304,13 @@ def read_inventory(inventory_acronym, year, f):
method_path = paths.local_path + '/' + meta.category
if inventory is None:
log.info(f'{meta.name_data} not found in {method_path}')
log.info('requested inventory does not exist in local directory, '
'it will be generated...')
generate_inventory(inventory_acronym, year)
if download_if_missing:
meta.tool = meta.tool.lower() # lower case for remote access
download_from_remote(meta, paths)
else:
log.info('requested inventory does not exist in local directory, '
'it will be generated...')
generate_inventory(inventory_acronym, year)
inventory = load_preprocessed_output(meta, paths)
if inventory is None:
log.error('error generating inventory')
Expand Down

0 comments on commit ac21490

Please sign in to comment.