return csv-wide data

a2gov · Jul 22, 2024 · 5c61bf6 · 5c61bf6
1 parent 284ce9c
commit 5c61bf6
Show file tree

Hide file tree

Showing 4 changed files with 34 additions and 6 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 <!--next-version-placeholder-->
 
+## v0.2.0 (2024-07-22)
+
+- Can retrieve data in `csv-wide` format
+
 ## v0.1.0 (2024-07-07)
 
 - Initial release
diff --git a/README.md b/README.md
@@ -43,18 +43,35 @@ api_connection = clarityio.ClarityAPIConnection(api_key='YOUR_API_KEY', org='YOU
 
 ### Retrieve recent measurements
 
-See API docs for valid arguments to pass, e.g., retrieve daily data instead of hourly, or in CSV format instead of JSON.
+See API docs for valid arguments to pass, e.g., retrieve daily data instead of hourly.
+
+The default value of `format` is `json-long`, which returns the data in long format (one row per combination of metric and time).  Here is such a call:
 
 ```python
 request_body = { # the required value for 'org' is automatically passed from the connection object
         'allDatasources': True,
         'outputFrequency': 'hour',
         'format': 'json-long',
-        'startTime': '2024-07-05T00:00:00Z'
+        'startTime': '2024-07-22T00:00:00Z'
 }
 response = api_connection.get_recent_measurements(data=request_body)
 df = pd.DataFrame(response['data'])
 ```
+
+To get the data in wide format, with one row per timestamp and each metric in its own column, use the `csv-wide` format option and convert to a pandas dataframe:
+
+```python
+request_body = {
+        'allDatasources': True,
+        'outputFrequency': 'hour',
+        'format': 'csv-wide',
+        'metricSelect': 'only pm2_5ConcMass24HourRollingMean' # see API docs for how to specify specific variables
+}
+response_wide = api_connection.get_recent_measurements(data=request_body)
+from io import StringIO
+df_wide = pd.read_csv(StringIO(response_wide), sep=",")
+```
+
 ### List data sources
 ```python
 datasources_response = api_connection.get_datasources()

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "clarityio"
-version = "0.1.0"
+version = "0.2.0"
 description = "Retrieve air quality data from the Clarity.io API"
 authors = [
   { name="Sam Firke", email="[email protected]" },

diff --git a/src/clarityio/clarityio.py b/src/clarityio/clarityio.py
@@ -1,4 +1,5 @@
 import requests
+import copy
 
 class ClarityAPIConnection:
     def __init__(self, api_key, org):
@@ -16,17 +17,23 @@ def get_recent_measurements(self, data=None):
         if data is None:
             print('No parameters provided, fetching hourly measurements for all datasources using API defaults.')
             data = {}
-            data['allDatasources'] = True
-            data['outputFrequency'] = 'hour' # API v2 docs say this should be a default value but requests fails w/o it
+            data = {'allDatasources': True, 'outputFrequency': 'hour'} # API v2 docs say outputFrequency should be 'hour' by default but requests fail if it is not specified
+        else:
+            data = copy.deepcopy(data)
         data['org'] = self.org
         try:
             response = requests.post(url, headers=self.headers, json=data)
             response.raise_for_status()  # Raises an HTTPError if the response status code is 4XX/5XX
-            return response.json()
         except requests.exceptions.HTTPError as err:
             print(f"HTTP error occurred: {err}")
+            return None
         except Exception as err:
             print(f"An error occurred: {err}")
+            return None
+        if data.get('format') == 'csv-wide':
+            return response.text
+        else:
+            return response.json()
 
     def get_datasources(self):
         """