From 5f90a39e5fec58fad2c00694dabfd0ece83effd0 Mon Sep 17 00:00:00 2001
From: Anthony Gagliardo <avgagliardo@gmail.com>
Date: Wed, 6 Nov 2024 17:12:47 -0500
Subject: [PATCH 1/4] added a NOTES.md file with some metadata and license info

---
 methane-data-collection/data/NOTES.md | 57 +++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 methane-data-collection/data/NOTES.md

diff --git a/methane-data-collection/data/NOTES.md b/methane-data-collection/data/NOTES.md
new file mode 100644
index 0000000..5942f82
--- /dev/null
+++ b/methane-data-collection/data/NOTES.md
@@ -0,0 +1,57 @@
+## This contains the license, citation, and location for each data set used in this module.
+
+### The methane data was sourced from gml.noaa.gov:
+These data were produced by NOAA and are not subject to copyright
+protection in the United States. NOAA waives any potential copyright and
+related rights in these data worldwide through the Creative Commons Zero
+1.0 Universal Public Domain Dedication (CC0 1.0)
+
+### CC0 1.0 Universal
+
+The data citation for each set is:
+
+Lan, X., J.W. Mund, A.M. Crotwell, K.W. Thoning, E. Moglia, M. Madronich, K. Baugh,
+G. Petron, M.J. Crotwell, D. Neff, S. Wolter, T. Mefford and S. DeVogel (2024),
+Atmospheric Carbon Dioxide Dry Air Mole Fractions from the NOAA GML Carbon Cycle Cooperative
+Global Air Sampling Network, 1968-2023,  Version: 2024-07-30, https://doi.org/10.15138/wkgj-f215
+
+### South America Data:
+
+site_code : USH
+site_name : Ushuaia
+site_country : Argentina
+
+### Oceania Data:
+
+site_code : CGO
+site_name : Cape Grim, Tasmania
+site_country : Australia
+
+### North America Data:
+
+site_code : UTA
+site_name : Wendover, Utah
+site_country : United States
+
+### Africa Data:
+
+site_code : ASK
+site_name : Assekrem
+site_country : Algeria
+
+### Asia Data:
+
+site_code : AMY
+site_name : Anmyeon-do
+site_country : Republic of Korea
+
+### Antarctica Data:
+
+site_code : PSA
+site_name : Palmer Station, Antarctica
+site_country : United States
+
+### Europe Data:
+site_code : ZEP
+site_name : Ny-Alesund, Svalbard
+site_country : Norway and Sweden

From a09b41f71130ce5ae29369ba81d2039ec9bd2a3b Mon Sep 17 00:00:00 2001
From: ojha-aditya <adityaojha.science@gmail.com>
Date: Fri, 8 Nov 2024 16:36:23 +0000
Subject: [PATCH 2/4] changed preparation.py to accept custom datecolumn names

---
 preparation.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/preparation.py b/preparation.py
index 95ea377..73e6004 100644
--- a/preparation.py
+++ b/preparation.py
@@ -57,7 +57,7 @@ def calc_freq(data, tim):
         diftim = diftim/(60*60*24*30.4375)
     return np.fft.fftfreq(n, d = diftim)
 
-def get_timeseries(path):
+def get_timeseries(path, datecolumn = 'date'):
     '''
     This function reads json files from the data collection task 
     and returns a pandas time series with datetime as index and 
@@ -74,6 +74,7 @@ def get_timeseries(path):
 
     Parameters:
     - path: Stringlike. path/to/json/file.json
+    - datecolumn: to specifiy specific date column name
 
     Returns:
     - Pandas Time Series. Index = Datetime, Data = CO2/Methane Concentration
@@ -84,13 +85,15 @@ def get_timeseries(path):
     #Uses the month and year information from the json file,
     # assumes data was taken on the first of each month,
     # creates new column with datetime
-    data['date'] = pd.to_datetime(data[['Year', 'Month']].assign(Day=1))
+    
+    if datecolumn == "date": 
+        data['date'] = pd.to_datetime(data[['Year', 'Month']].assign(Day=1))
 
     #Sets datetime as index
-    data.set_index('date', inplace=True)
+    data.set_index(datecolumn, inplace=True)
 
     #Creates timeseries with co2 (ppm) as data and datetime as index
-    co2_series = data['CO2 (ppm)']
+    co2_series = data[datacolumn]
 
 
     return co2_series

From 565ade2604a06f1b679911a2fe51ed11d8dd2eff Mon Sep 17 00:00:00 2001
From: zbpetersbuf <zbpeters@buffalo.edu>
Date: Fri, 8 Nov 2024 18:16:12 +0000
Subject: [PATCH 3/4] added the error tollerance

---
 preparation.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/preparation.py b/preparation.py
index b0e7053..ac1ce60 100644
--- a/preparation.py
+++ b/preparation.py
@@ -23,7 +23,9 @@ def fft_mag(data):
     matrix in half or take the absolut values of the variables"""
     n = len(data)
     timestamp_sum = sum(data.index[i+1].timestamp() - data.index[i].timestamp() for i in range(n-1))
-    if not timestamp_sum/(n-1) == data.index[2].timestamp() - data.index[1].timestamp():
+    compare = np.isclose(timestamp_sum/(n-1),
+                          data.index[2].timestamp() - data.index[1].timestamp(), atol=1e-6)
+    if not compare:
         print("Data is not evenly spaced or data points are missing")
         return None
     return np.fft.fft(data.values)

From 5ab43dc9e186284ee80719dd4d3756b7477d6ac5 Mon Sep 17 00:00:00 2001
From: ojha-aditya <adityaojha.science@gmail.com>
Date: Fri, 8 Nov 2024 20:58:39 +0000
Subject: [PATCH 4/4] Incorporated the functionality of having different data
 column name in get_timeseries function from preparation.py

---
 preparation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/preparation.py b/preparation.py
index a9adc26..b5e3bb0 100644
--- a/preparation.py
+++ b/preparation.py
@@ -59,7 +59,7 @@ def calc_freq(data, tim):
         diftim = diftim/(60*60*24*30.4375)
     return np.fft.fftfreq(n, d = diftim)
 
-def get_timeseries(path, datecolumn = 'date'):
+def get_timeseries(path, datecolumn = 'date', datacolumn = 'CO2 (ppm)'):
     '''
     This function reads json files from the data collection task 
     and returns a pandas time series with datetime as index and