diff --git a/cs-config/cs_config/functions.py b/cs-config/cs_config/functions.py index b396fe2c..cef61206 100644 --- a/cs-config/cs_config/functions.py +++ b/cs-config/cs_config/functions.py @@ -11,9 +11,11 @@ from .helpers import retrieve_puf import cs2tc -AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", "") -AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", "") - +AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID") +AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY") +PUF_S3_FILE_LOCATION = os.environ.get( + "PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz" +) class MetaParams(paramtools.Parameters): """ @@ -174,7 +176,9 @@ def run_model(meta_param_dict, adjustment): meta_params.adjust(meta_param_dict) # Get data chosen by user if meta_params.data_source == "PUF": - data = retrieve_puf(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) + data = retrieve_puf( + PUF_S3_FILE_LOCATION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY + ) else: data = "cps" # Get TC params adjustments diff --git a/cs-config/cs_config/helpers.py b/cs-config/cs_config/helpers.py index dac7d0cd..f411efd0 100644 --- a/cs-config/cs_config/helpers.py +++ b/cs-config/cs_config/helpers.py @@ -5,18 +5,21 @@ import os from pathlib import Path import warnings +import os -try: - from s3fs import S3FileSystem -except ImportError: - S3FileSystem = None import pandas as pd from ccc.utils import TC_LAST_YEAR -AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID") -AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY") +try: + from s3fs import S3FileSystem +except ImportError as ie: + S3FileSystem = None -PUF_S3_FILE_NAME = "puf.20210720.csv.gz" +AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", None) +AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", None) +PUF_S3_FILE_LOCATION = os.environ.get( + "PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz" +) POLICY_SCHEMA = { "labels": { @@ -79,8 +82,7 @@ def retrieve_puf( - aws_access_key_id=AWS_ACCESS_KEY_ID, - aws_secret_access_key=AWS_SECRET_ACCESS_KEY, + puf_s3_file_location=PUF_S3_FILE_LOCATION, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY ): """ Function for retrieving the PUF from the OSPC S3 bucket @@ -89,15 +91,12 @@ def retrieve_puf( has_credentials = ( aws_access_key_id is not None and aws_secret_access_key is not None ) - if has_credentials and s3_reader_installed: - print("Reading puf from S3 bucket.") - fs = S3FileSystem( - key=AWS_ACCESS_KEY_ID, - secret=AWS_SECRET_ACCESS_KEY, - ) - with fs.open(f"s3://ospc-data-files/{PUF_S3_FILE_NAME}") as f: + if puf_s3_file_location and has_credentials and s3_reader_installed: + print("Reading puf from S3 bucket.", puf_s3_file_location) + fs = S3FileSystem(key=AWS_ACCESS_KEY_ID, secret=AWS_SECRET_ACCESS_KEY,) + with fs.open(puf_s3_file_location) as f: # Skips over header from top of file. - puf_df = pd.read_csv(f, compression="gzip") + puf_df = pd.read_csv(f) return puf_df elif Path("puf.csv.gz").exists(): print("Reading puf from puf.csv.gz.") @@ -107,7 +106,8 @@ def retrieve_puf( return pd.read_csv("puf.csv") else: warnings.warn( - f"PUF file not available (has_credentials={has_credentials}, " + f"PUF file not available (puf_location={puf_s3_file_location}, " + f"has_credentials={has_credentials}, " f"s3_reader_installed={s3_reader_installed})" ) - return None + return None \ No newline at end of file diff --git a/cs-config/install.sh b/cs-config/install.sh index 5c74c541..f93f1afa 100644 --- a/cs-config/install.sh +++ b/cs-config/install.sh @@ -1 +1,2 @@ -conda install -c conda-forge ccc s3fs +conda install -c conda-forge s3fs +pip install -e . \ No newline at end of file