From 5f90a39e5fec58fad2c00694dabfd0ece83effd0 Mon Sep 17 00:00:00 2001 From: Anthony Gagliardo Date: Wed, 6 Nov 2024 17:12:47 -0500 Subject: [PATCH 1/4] added a NOTES.md file with some metadata and license info --- methane-data-collection/data/NOTES.md | 57 +++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 methane-data-collection/data/NOTES.md diff --git a/methane-data-collection/data/NOTES.md b/methane-data-collection/data/NOTES.md new file mode 100644 index 0000000..5942f82 --- /dev/null +++ b/methane-data-collection/data/NOTES.md @@ -0,0 +1,57 @@ +## This contains the license, citation, and location for each data set used in this module. + +### The methane data was sourced from gml.noaa.gov: +These data were produced by NOAA and are not subject to copyright +protection in the United States. NOAA waives any potential copyright and +related rights in these data worldwide through the Creative Commons Zero +1.0 Universal Public Domain Dedication (CC0 1.0) + +### CC0 1.0 Universal + +The data citation for each set is: + +Lan, X., J.W. Mund, A.M. Crotwell, K.W. Thoning, E. Moglia, M. Madronich, K. Baugh, +G. Petron, M.J. Crotwell, D. Neff, S. Wolter, T. Mefford and S. DeVogel (2024), +Atmospheric Carbon Dioxide Dry Air Mole Fractions from the NOAA GML Carbon Cycle Cooperative +Global Air Sampling Network, 1968-2023, Version: 2024-07-30, https://doi.org/10.15138/wkgj-f215 + +### South America Data: + +site_code : USH +site_name : Ushuaia +site_country : Argentina + +### Oceania Data: + +site_code : CGO +site_name : Cape Grim, Tasmania +site_country : Australia + +### North America Data: + +site_code : UTA +site_name : Wendover, Utah +site_country : United States + +### Africa Data: + +site_code : ASK +site_name : Assekrem +site_country : Algeria + +### Asia Data: + +site_code : AMY +site_name : Anmyeon-do +site_country : Republic of Korea + +### Antarctica Data: + +site_code : PSA +site_name : Palmer Station, Antarctica +site_country : United States + +### Europe Data: +site_code : ZEP +site_name : Ny-Alesund, Svalbard +site_country : Norway and Sweden From a09b41f71130ce5ae29369ba81d2039ec9bd2a3b Mon Sep 17 00:00:00 2001 From: ojha-aditya Date: Fri, 8 Nov 2024 16:36:23 +0000 Subject: [PATCH 2/4] changed preparation.py to accept custom datecolumn names --- preparation.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/preparation.py b/preparation.py index 95ea377..73e6004 100644 --- a/preparation.py +++ b/preparation.py @@ -57,7 +57,7 @@ def calc_freq(data, tim): diftim = diftim/(60*60*24*30.4375) return np.fft.fftfreq(n, d = diftim) -def get_timeseries(path): +def get_timeseries(path, datecolumn = 'date'): ''' This function reads json files from the data collection task and returns a pandas time series with datetime as index and @@ -74,6 +74,7 @@ def get_timeseries(path): Parameters: - path: Stringlike. path/to/json/file.json + - datecolumn: to specifiy specific date column name Returns: - Pandas Time Series. Index = Datetime, Data = CO2/Methane Concentration @@ -84,13 +85,15 @@ def get_timeseries(path): #Uses the month and year information from the json file, # assumes data was taken on the first of each month, # creates new column with datetime - data['date'] = pd.to_datetime(data[['Year', 'Month']].assign(Day=1)) + + if datecolumn == "date": + data['date'] = pd.to_datetime(data[['Year', 'Month']].assign(Day=1)) #Sets datetime as index - data.set_index('date', inplace=True) + data.set_index(datecolumn, inplace=True) #Creates timeseries with co2 (ppm) as data and datetime as index - co2_series = data['CO2 (ppm)'] + co2_series = data[datacolumn] return co2_series From 565ade2604a06f1b679911a2fe51ed11d8dd2eff Mon Sep 17 00:00:00 2001 From: zbpetersbuf Date: Fri, 8 Nov 2024 18:16:12 +0000 Subject: [PATCH 3/4] added the error tollerance --- preparation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/preparation.py b/preparation.py index b0e7053..ac1ce60 100644 --- a/preparation.py +++ b/preparation.py @@ -23,7 +23,9 @@ def fft_mag(data): matrix in half or take the absolut values of the variables""" n = len(data) timestamp_sum = sum(data.index[i+1].timestamp() - data.index[i].timestamp() for i in range(n-1)) - if not timestamp_sum/(n-1) == data.index[2].timestamp() - data.index[1].timestamp(): + compare = np.isclose(timestamp_sum/(n-1), + data.index[2].timestamp() - data.index[1].timestamp(), atol=1e-6) + if not compare: print("Data is not evenly spaced or data points are missing") return None return np.fft.fft(data.values) From 5ab43dc9e186284ee80719dd4d3756b7477d6ac5 Mon Sep 17 00:00:00 2001 From: ojha-aditya Date: Fri, 8 Nov 2024 20:58:39 +0000 Subject: [PATCH 4/4] Incorporated the functionality of having different data column name in get_timeseries function from preparation.py --- preparation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preparation.py b/preparation.py index a9adc26..b5e3bb0 100644 --- a/preparation.py +++ b/preparation.py @@ -59,7 +59,7 @@ def calc_freq(data, tim): diftim = diftim/(60*60*24*30.4375) return np.fft.fftfreq(n, d = diftim) -def get_timeseries(path, datecolumn = 'date'): +def get_timeseries(path, datecolumn = 'date', datacolumn = 'CO2 (ppm)'): ''' This function reads json files from the data collection task and returns a pandas time series with datetime as index and