v0.8

tonyhollaar · Aug 4, 2023 · 8989b2e · 8989b2e
1 parent 1f3a162
commit 8989b2e
Show file tree

Hide file tree

Showing 5 changed files with 213 additions and 50 deletions.
diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ To run the Streamlit app locally on your machine, follow these steps:
 1. Install the `streamlit-bls-connection` package and its dependencies by running the following command in your terminal or command prompt:
 
 ```bash
-pip install streamlit-bls-connection
+pip install streamlit_bls_connection
 ```
 
 ### Create .py file
@@ -27,16 +27,19 @@ import streamlit as st
 from streamlit_bls_connection import BLSConnection
 
 # Step 1: Setup connection to US Bureau of Labor Statistics
-connection = BLSConnection("bls_connection")
+conn = st.experimental_connection('bls', type=BLSConnection)
 
-# Step 2: Define Input parameters for the API call
+# Step 2: Define input parameters
 # Tip: one or multiple Series ID's* can be retrieved
 seriesids_list = ['APU000074714', 'APU000072610']
-start_year_str = '2014'  # start of date range
-end_year_str = '2023'    # end of date range
+start_year_str = '2014' # start of date range
+end_year_str = '2023'   # end of date range
 
 # Step 3: Fetch data using the custom connection
-dataframes_dict = connection.query(seriesids_list, start_year_str, end_year_str)
+dataframes_dict = connection.query(seriesids_list,
+                                   start_year_str, 
+                                   end_year_str,
+                                   api_key = None)
 
 # Step 4: Create dataframes
 gas_df = dataframes_dict['APU000074714']

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [tool.poetry]
 name = "streamlit_bls_connection"
-version = "0.7"
+version = "0.8"
 description = "API for U.S. Bureau of Labor Statistics with Streamlit Connection"
 authors = ["Tony Hollaar <[email protected]>"]
 

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name='streamlit_bls_connection',
-    version='0.7',
+    version='0.8',
     license='MIT',  
     description='A package to fetch Bureau of Labor Statistics data using Streamlit',
     long_description=long_description,  

diff --git a/streamlit_bls_connection/bls_connection.py b/streamlit_bls_connection/bls_connection.py
@@ -1,8 +1,7 @@
 # -*- coding: utf-8 -*-
-"""
+"""                                                                                                                                                                                               
 Created on Mon Jul 24 15:12:23 2023
-
-@author: tholl
+@author: tonyhollaar
 """
 import streamlit as st
 from streamlit.connections import ExperimentalBaseConnection
@@ -12,64 +11,225 @@
 
 class BLSConnection(ExperimentalBaseConnection):
     def __init__(self, connection_name, **kwargs):
+        """
+        Initializes the BLSConnection object.
+        
+        Parameters:
+        ----------
+        connection_name : str
+            A name for the connection.
+        **kwargs : keyword arguments, optional
+            Additional keyword arguments that can be passed to the parent class constructor.
+        
+        Returns:
+        -------
+        None
+        """
         super().__init__(connection_name=connection_name, **kwargs)
-        # Load any connection-specific configuration or credentials here if needed.
-
+
     def _connect(self, **kwargs):
         # Implement the connection setup here.
         # We don't need to explicitly set up a connection in this case,
         # as we'll be making direct API calls in the methods below.
         pass
 
-    def fetch_data(self, seriesids, start_year, end_year):
+    def fetch_data(self, seriesids, start_year, end_year, api_key=None, **kwargs):
+        """
+        Fetches data from the Bureau of Labor Statistics (BLS) API and returns it as a dictionary of DataFrames.
+        
+        Parameters:
+        ----------
+        seriesids : list of str
+            The series IDs representing the BLS time series data to fetch. Each ID should be a string.
+        start_year : str
+            The start year for the data retrieval (inclusive), represented as a string.
+        end_year : str
+            The end year for the data retrieval (inclusive), represented as a string.
+        api_key : str, optional
+            The API key for accessing the BLS API. If not provided, some restrictions may apply to the data retrieval.
+            Note: Without an API key, you might be subject to limitations on the number of requests you can make.
+        **kwargs : keyword arguments, optional
+            Additional keyword arguments to customize the data retrieval.
+            Possible keyword arguments include:
+            - catalog : bool, optional
+                Whether to include catalog data for the series. Default is False.
+            - calculations : bool, optional
+                Whether to include calculated data for the series. Default is False.
+            - annualaverage : bool, optional
+                Whether to include annual average data for the series. Default is False.
+            - aspects : bool, optional
+                Whether to include additional aspects data for the series. Default is False.
+        
+        Returns:
+        -------
+        dict of DataFrame(s)
+            A dictionary with series IDs as keys and DataFrames as values, containing the fetched BLS data for each series.
+            Each DataFrame includes columns for 'date', 'value', '%_change_value', 'year', 'month', 'period'.
+            If the API key is provided, 'seriesID', 'series_title', and 'survey_name' columns are also included in the DataFrames.
+            Empty or all-None columns are excluded from the DataFrames.
+        """
         dataframes_dict = {}
-        headers = {'Content-type': 'application/json'}
+        headers = {
+            'Content-type': 'application/json',
+        }
+
+        # Build the payload with required parameters
+        payload = {
+            "seriesid": seriesids,
+            "startyear": start_year,
+            "endyear": end_year,
+            "registrationkey": api_key
+        }
+
+        # Update the payload with additional parameters from **kwargs
+        payload.update(kwargs)
+
+        # Make the API request using the POST method with the payload
+        p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', json=payload, headers=headers)
+        json_data = json.loads(p.text)
 
-        # iterate over one or more timeseries
-        for series_id in seriesids:
-            # create empty list to save data for the current seriesId
+        # Iterate over the JSON response and extract data for each series
+        for series in json_data['Results']['series']:
+            series_id = series['seriesID']
             parsed_data = []
-            # set the variable to retrieve from the public dataset
-            data = json.dumps({"seriesid": [series_id], "startyear": start_year, "endyear": end_year})
-            p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)
-            json_data = json.loads(p.text)
 
-            # iterate over the json file
-            for series in json_data['Results']['series']:
-                # iterate over the list of lists that contains the data
-                for item in series['data']:
-                    # within each list retrieve the year, period, value and footnotes
-                    year = item['year']
-                    period = item['period']
-                    value = item['value']
-                    footnotes = ""
-                    for footnote in item['footnotes']:
-                        if footnote:
-                            footnotes = footnotes + footnote['text'] + ','
-                    parsed_data.append([series_id, year, period, value, footnotes[0:-1]])
+            # Extract catalog data for the current series if available
+            series_title = series.get('catalog', {}).get('series_title')
+            survey_name = series.get('catalog', {}).get('survey_name')
+
+            for item in series['data']:
+                year = item['year']
+                period = item['period']
+                value = item['value']
+                footnotes = ",".join(footnote['text'] for footnote in item['footnotes'] if footnote)
+
+                # Create a dictionary with the common data fields
+                row_data = {
+                    'seriesID': series_id,
+                    'year': year,
+                    'period': period,
+                    'value': value,
+                    'footnotes': footnotes,
+                    'series_title': series_title,
+                    'survey_name': survey_name,
+                    'catalog': series.get('catalog'),
+                    'calculations': item.get('calculations'),
+                    'annualaverage': item.get('annualaverage'),
+                    'aspects': item.get('aspects')
+                }
+
+                parsed_data.append(row_data)
+
+            # Create DataFrame for the current series
+            columns = ['seriesID', 'series_title', 'survey_name', 'year', 'period', 'value', 'catalog', 'calculations', 'annualaverage', 'aspects', 'footnotes']
+            data = [[entry.get(i, None) for i in columns] for entry in parsed_data]
+            df = pd.DataFrame(data, columns=columns)
 
-            df = pd.DataFrame(parsed_data, columns=['seriesID', 'year', 'period', 'value', 'footnotes'])
             df['value'] = pd.to_numeric(df['value'])
             df['month'] = pd.to_numeric(df['period'].replace({'M': ''}, regex=True))
-            df['date'] = df['month'].map(str) + '-' + df['year'].map(str)
-            df['date'] = pd.to_datetime(pd.to_datetime(df['date'], format='%m-%Y').dt.strftime('%m-%Y'))
+            df['date'] = pd.to_datetime(df['month'].map(str) + '-' + df['year'].map(str), format='%m-%Y')
             df = df.sort_values(by='date', ascending=True)
-            df['perct_change_value'] = df['value'].pct_change()
+            df['%_change_value'] = df['value'].pct_change()
+
+            # Reorder the columns in the DataFrame
+            df = df[['date', 'value', '%_change_value', 'seriesID', 'series_title', 'year', 'month', 'period', 'survey_name', 'catalog', 'calculations', 'annualaverage', 'aspects', 'footnotes']]
+
+            # Reset the index to start from 0
+            df.reset_index(drop=True, inplace=True)
 
-            # add the dataframe to the dictionary with the seriesid as the key
+            # Replace empty strings with NaN
+            df.replace('', pd.NA, inplace=True)
+
+            # Drop columns where all values are either NaN or pd.NA
+            df = df.dropna(axis=1, how='all')
+
+            # Add the DataFrame to the dictionary with the seriesid as the key
             dataframes_dict[series_id] = df
+
         return dataframes_dict
 
-    @staticmethod
+    @classmethod
     @st.cache_data(ttl="1d")  # Cache the data for one day (24 hours)
-    def query(series_id, start_year, end_year):
-        # This method will be called by the Streamlit app to retrieve data using the custom connection.
-        # You can implement any caching logic or other data processing here.
-        connection = BLSConnection("bls_connection")
+    def query(cls, seriesids, start_year, end_year, api_key=None, **kwargs):
+        """
+        Fetches data from the Bureau of Labor Statistics (BLS) API.
+        
+        Parameters:
+        ----------
+        seriesids : list of str
+            The series IDs representing the BLS time series data to fetch. Each ID should be a string.
+        start_year : str
+            The start year for the data retrieval (inclusive), represented as a string.
+        end_year : str
+            The end year for the data retrieval (inclusive), represented as a string.
+        api_key : str, optional
+            The API key for accessing the BLS API. If not provided, some restrictions may apply to the data retrieval.
+            Note: Without an API key, you might be subject to limitations on the number of requests you can make.
+        **kwargs : keyword arguments, optional
+            Additional keyword arguments to customize the data retrieval.
+            Possible keyword arguments include:
+            - catalog : bool, optional
+                Whether to include catalog data for the series. Default is False.
+            - calculations : bool, optional
+                Whether to include calculated data for the series. Default is False.
+            - annualaverage : bool, optional
+                Whether to include annual average data for the series. Default is False.
+            - aspects : bool, optional
+                Whether to include additional aspects data for the series. Default is False.
+
+        Returns:
+        -------
+        dict of DataFrame(s)
+            A dictionary with series IDs as keys and DataFrames as values, containing the fetched BLS data for each series.
+            Each DataFrame includes columns for 'date', 'value', '%_change_value', 'year', 'month', 'period'.
+            If the API key is provided, 'seriesID','series_title' and 'survey_name' and **kwargs columns are also included in the DataFrames.
+            Empty or all-None columns are excluded from the DataFrames.
+                
+        Example:
+        --------
+        # Setup connection
+        conn = st.experimental_connection('bls', type=BLSConnection)
+   
+        # Set your API key obtained from https://data.bls.gov/registrationEngine/
+        api_key = 'YOUR_API_KEY_HERE'  # Replace with your key or set to None.
+        
+        # Optionally, store the API key in secrets.toml under [connections_bls] with api_key = 'YOUR_KEY'
+        # and set api_key = st.secrets["connections_bls"]["api_key"] to use it securely.
+
+        # Calling the query method with additional keyword arguments
+        dataframes_dict = conn.query(
+            seriesids=seriesids_list = ['APU000074714', 'APU000074715'],
+            start_year= '2014',
+            end_year= '2023',
+            api_key=api_key,
+            catalog=True,
+            calculations=True,
+            annualaverage=True,
+            aspects=True
+        )
+   
+        # Access the retrieved DataFrames using the 'dataframes_dict' dictionary.
+        first_series_dataframe = dataframes_dict[seriesids_list[0]]  # DataFrame for the first series ID.
+        
+        # Alternatively, access the first DataFrame using its series ID.
+        first_series_dataframe = dataframes_dict['APU000074714']
+        """
         try:
-            return connection.fetch_data(series_id, start_year, end_year)
+            # This method will be called by the Streamlit app to retrieve data using the custom connection.
+            # You can implement any caching logic or other data processing here.
+            connection = cls("bls_connection")
+
+            # Fetch data using the custom connection
+            dataframes_dict = connection.fetch_data(
+                seriesids=seriesids,
+                start_year=start_year,
+                end_year=end_year,
+                api_key=api_key,  # Pass the api_key or set to None
+                **kwargs          # Pass any additional keyword arguments
+            )
+            return dataframes_dict
         except KeyError:
             with st.sidebar:
-                st.error("😒 **Error**: Failed to fetch latest data. Daily query limit is exceeded")
-           #st.stop()  # Stop the app execution and display the error message to the user
-            return None
+                st.error("😒 **Error**: Failed to fetch latest data. Daily query limit is exceeded.")
+            return None
+
diff --git a/streamlit_bls_connection/version.py b/streamlit_bls_connection/version.py
@@ -1,2 +1,2 @@
 # version.py
-__version__ = "0.7"
+__version__ = "0.8"