diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4c2a7f9..53e94ad 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ repos: # Other file formatting, plus common Git mistakes & text file standardization: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.2.0 + rev: v5.0.0 hooks: - id: check-added-large-files # Don't accidentally commit giant files. - id: check-merge-conflict # Watch for lingering merge markers. @@ -16,13 +16,13 @@ repos: # Make sure import statements are sorted uniformly. - repo: https://github.com/PyCQA/isort - rev: 5.10.1 + rev: 5.13.2 hooks: - id: isort # Check for PEP8 non-compliance, code complexity, style, errors, etc: - repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 + rev: 7.1.1 hooks: - id: flake8 args: @@ -43,7 +43,7 @@ repos: # Format the code - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 24.10.0 hooks: - id: black args: diff --git a/LICENSE b/LICENSE index 661c756..0c26b0f 100644 --- a/LICENSE +++ b/LICENSE @@ -7,4 +7,3 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - diff --git a/data/raw/all_gsod_stations_in_wieb_territory.csv b/data/raw/all_gsod_stations_in_wieb_territory.csv index 053a09d..13fc1ab 100644 --- a/data/raw/all_gsod_stations_in_wieb_territory.csv +++ b/data/raw/all_gsod_stations_in_wieb_territory.csv @@ -2236,4 +2236,4 @@ A07049,00320,PETALUMA MUNICIPAL AIRPORT,US,CA,KO69 ,38.25,-122.6,+0027.1,2014073 715680,99999,FANNY ISLAND,CA,,CXFA,50.45,-125.983,+0008.0,20030617,20210920 712090,99999,SANDHEADS CS,CA,,CWVF,49.1,-123.3,+0011.0,20010705,20210920 710310,99999,DISCOVERY ISLAND,CA,,CWDR,48.42,-123.23,+0015.0,19921008,20210920 -714840,99999,BONILLA ISLAND (AUT) BC,CA,,CWWL,53.5,-130.633,+0015.0,19860331,20210920 \ No newline at end of file +714840,99999,BONILLA ISLAND (AUT) BC,CA,,CWWL,53.5,-130.633,+0015.0,19860331,20210920 diff --git a/references/gsod_documentation.txt b/references/gsod_documentation.txt index 3c1b61c..1d97665 100644 --- a/references/gsod_documentation.txt +++ b/references/gsod_documentation.txt @@ -1,22 +1,22 @@ NATIONAL CENTERS FOR ENVIRONMENTAL INFORMATION GLOBAL SURFACE SUMMARY OF DAY DATA (GSOD) (OVER 9000 WORLDWIDE STATIONS) - + 10/28/2020 ******************************************************************** - -SPECIAL NOTES -The data summaries provided here are based on data exchanged under the World -Meteorological Organization (WMO) World Weather Watch Program according to WMO -Resolution 40 (Cg-XII). This allows WMO member countries to place restrictions on the -use or re-export of their data for commercial purposes outside of the receiving country. -Data for selected countries may, at times, not be available through this system. +SPECIAL NOTES + +The data summaries provided here are based on data exchanged under the World +Meteorological Organization (WMO) World Weather Watch Program according to WMO +Resolution 40 (Cg-XII). This allows WMO member countries to place restrictions on the +use or re-export of their data for commercial purposes outside of the receiving country. +Data for selected countries may, at times, not be available through this system. Those countries' data summaries and products which are available here are intended for -free and unrestricted use in research, education, and other non-commercial activities. -However, for non-U.S. locations' data, the data or any derived product shall not be provided +free and unrestricted use in research, education, and other non-commercial activities. +However, for non-U.S. locations' data, the data or any derived product shall not be provided to other users or be used for the re-export of commercial services. To determine off-line availability of any country's data, please contact NCEI at @@ -28,7 +28,7 @@ See our contact page at: www.ncei.noaa.gov/contact The data are available via: - + Web Accessible Folder -- https://www.ncei.noaa.gov/data/global-summary-of-the-day/ Common Access -- https://www.ncei.noaa.gov/access/search/data-search/global-summary-of- the-day @@ -40,15 +40,15 @@ the-day OVERVIEW -The following is a description of the global surface summary of day product produced by the -National Centers for Environmental Information (NCEI) in Asheville, NC. The input data used in -building these daily summaries are the Integrated Surface Data (ISD), which includes global -data obtained from the USAF Climatology Center, located in the Federal Climate Complex with -NCEI. The latest daily summary data are normally available a few days after the date-time of the -observations used in the daily summaries. The online data files begin with 1929. Over 9000 -stations' data are typically available. +The following is a description of the global surface summary of day product produced by the +National Centers for Environmental Information (NCEI) in Asheville, NC. The input data used in +building these daily summaries are the Integrated Surface Data (ISD), which includes global +data obtained from the USAF Climatology Center, located in the Federal Climate Complex with +NCEI. The latest daily summary data are normally available a few days after the date-time of the +observations used in the daily summaries. The online data files begin with 1929. Over 9000 +stations' data are typically available. -The headers used in csv files and definition of each daily element included in the dataset (as +The headers used in csv files and definition of each daily element included in the dataset (as available from each station) are as follows: TEMP - Mean temperature (.1 Fahrenheit) @@ -64,15 +64,15 @@ MIN - Minimum temperature (.1 Fahrenheit) PRCP - Precipitation amount (.01 inches) SNDP - Snow depth (.1 inches) FRSHTT – Indicator for occurrence of: - Fog + Fog Rain or Drizzle Snow or Ice Pellets Hail Thunder Tornado/Funnel Cloud -For more detailed descriptions of data elements and contents of the dataset, see the format -documentation shown below. +For more detailed descriptions of data elements and contents of the dataset, see the format +documentation shown below. ******************************************************************** @@ -81,12 +81,12 @@ DETAILS/FORMAT Global summary of day data for 18 surface meteorological elements are derived from the synoptic/hourly observations contained in -USAF DATSAV3 Surface data and Federal Climate Complex Integrated +USAF DATSAV3 Surface data and Federal Climate Complex Integrated Surface Data (ISD). Historical data are generally available for 1929 to -the present, with data from 1973 to the present being the most complete. +the present, with data from 1973 to the present being the most complete. For some periods, one or more countries' data may not be available due to -data restrictions or communications problems. In deriving the summary of -day data, a minimum of 4 observations for the day must be present (allows +data restrictions or communications problems. In deriving the summary of +day data, a minimum of 4 observations for the day must be present (allows for stations which report 4 synoptic observations/day). Since the data are converted to constant units (e.g, knots), slight rounding error from the originally reported values may occur (e.g, 9.9 instead of 10.0). @@ -98,7 +98,7 @@ miles) due to the practice of not reporting visibilities greater than certain distances. The daily extremes and totals--maximum wind gust, precipitation amount, and snow depth--will only appear if the station reports the data sufficiently to provide a valid value. -Therefore, these three elements will appear less frequently than +Therefore, these three elements will appear less frequently than other values. Also, these elements are derived from the stations' reports during the day, and may comprise a 24-hour period which includes a portion of the previous day. The data are reported and @@ -109,11 +109,11 @@ As for quality control (QC), the input data undergo extensive automated QC to correctly 'decode' as much of the synoptic data as possible, and to eliminate many of the random errors found in the original data. Then, these data are QC'ed further as the summary of -day data are derived. However, we expect that a very small percent of +day data are derived. However, we expect that a very small percent of the errors will remain in the summary of day data. The data are strictly ASCII, with a mixture of character data, real -values, and integer values. +values, and integer values. Following is the data format: @@ -122,7 +122,7 @@ All 9's in a field (e.g., 99.99 for PRCP) indicates no report or insufficient da FIELD DESCRIPTION -STATION - Station number (WMO/DATSAV3 possibly combined w/WBAN number) +STATION - Station number (WMO/DATSAV3 possibly combined w/WBAN number) DATE - Given in mm/dd/yyyy format @@ -140,7 +140,7 @@ TEMP_ATTRIBUTES - Number of observations used in calculating mean temperature. DEWP - Mean dew point for the day in degrees Fahrenheit to tenths. Missing = 9999.9 -DEWP_ATTRIBUTES - Number of observations used in calculating mean dew point. +DEWP_ATTRIBUTES - Number of observations used in calculating mean dew point. SLP - Mean sea level pressure for the day in millibars to tenths. Missing = 9999.9 @@ -148,54 +148,54 @@ SLP_ATTRIBUTES - Number of observations used in calculating mean sea level press STP - Mean station pressure for the day in millibars to tenths. Missing = 9999.9 -STP_ATTRIBUTES - Number of observations used in calculating mean station pressure. +STP_ATTRIBUTES - Number of observations used in calculating mean station pressure. VISIB - Mean visibility for the day in miles to tenths. Missing = 999.9 -VISIB_ATTRIBUTES - Number of observations used in calculating mean visibility. +VISIB_ATTRIBUTES - Number of observations used in calculating mean visibility. -WDSP - Mean wind speed for the day in knots to tenths. Missing = 999.9 +WDSP - Mean wind speed for the day in knots to tenths. Missing = 999.9 WDSP_ATTRIBUTES - Number of observations used in calculating mean wind speed. -MXSPD - Maximum sustained wind speed reported for the day in knots to tenths. Missing = +MXSPD - Maximum sustained wind speed reported for the day in knots to tenths. Missing = 999. GUST - Maximum wind gust reported for the day in knots to tenths. Missing = 999.9 MAX - Maximum temperature reported during the day in Fahrenheit to tenths. Missing = 9999.9 -Note: Time of maximum temperature report varies by country and region, so this will sometimes -not be the maximum for the calendar day. +Note: Time of maximum temperature report varies by country and region, so this will sometimes +not be the maximum for the calendar day. + +MAX_ATTRIBUTES – +Blank indicates maximum temperature was taken from the explicit maximum +temperature report and not from the 'hourly' data. -MAX_ATTRIBUTES – -Blank indicates maximum temperature was taken from the explicit maximum -temperature report and not from the 'hourly' data. - * indicates maximum temperature was derived from the hourly data (i.e. highest hourly or synoptic-reported temperature). MIN - Minimum temperature reported during the day in Fahrenheit to tenths. Missing = 9999.9 -Note: Time of minimum temperature report varies by country and region, so this will sometimes +Note: Time of minimum temperature report varies by country and region, so this will sometimes not be the maximum for the calendar day. -MIN_ATTRIBUTES -Blank indicates minimum temperature was taken from the explicit minimum -temperature report and not from the 'hourly' data. - +MIN_ATTRIBUTES +Blank indicates minimum temperature was taken from the explicit minimum +temperature report and not from the 'hourly' data. + * indicates minimum temperature was derived from the hourly data (i.e. highest hourly or synoptic-reported temperature). PRCP - Total precipitation (rain and/or melted snow) reported during the day in inches and hundredths; will usually not end with the midnight observation (i.e. may include -latter part of previous day). “0” indicates no measurable precipitation (includes a trace). +latter part of previous day). “0” indicates no measurable precipitation (includes a trace). Missing = 99.99 - -Note: Many stations do not report “0” on days with no precipitation, therefore “99.99” will often -appear on these days. Also, for example, a station may only report a 6-hour amount for the + +Note: Many stations do not report “0” on days with no precipitation, therefore “99.99” will often +appear on these days. Also, for example, a station may only report a 6-hour amount for the period during which rain fell. See attribute field for source of data. -PRCP_ATTRIBUTES - +PRCP_ATTRIBUTES - A = 1 report of 6-hour precipitation amount. B = Summation of 2 reports of 6-hour precipitation amount. C = Summation of 3 reports of 6-hour precipitation amount. @@ -204,8 +204,8 @@ PRCP_ATTRIBUTES - F = Summation of 2 reports of 12-hour precipitation amount. G = 1 report of 24-hour precipitation amount. H = Station reported '0' as the amount for the day (eg, from 6-hour reports), -but also reported at least one occurrence of precipitation in hourly observations. -This could indicate a trace occurred, but should be considered as incomplete +but also reported at least one occurrence of precipitation in hourly observations. +This could indicate a trace occurred, but should be considered as incomplete data for the day. I = Station did not report any precipitation data for the day and did not report any occurrences of precipitation in its hourly observations. It's still possible that @@ -213,8 +213,8 @@ data for the day. SNDP - Snow depth in inches to tenths. It is the last report for the day if reported more than once. Missing = 999.9 - -Note: Most stations do not report “0” on days with no snow on the ground, therefore, “999.9” will + +Note: Most stations do not report “0” on days with no snow on the ground, therefore, “999.9” will often appear on these days. FRSHTT - Indicators (1 = yes, 0 = no/not reported) for the occurrence during the day of: @@ -229,9 +229,9 @@ FRSHTT - Indicators (1 = yes, 0 = no/not reported) for the occurrence during the REFERENCE -The NCEI Climate Services Branch (CSB) is responsible for distribution of NCEI products to -users. NCEI's CSB can be contacted via the following phone number, internet address, or -fax number. +The NCEI Climate Services Branch (CSB) is responsible for distribution of NCEI products to +users. NCEI's CSB can be contacted via the following phone number, internet address, or +fax number. Telephone Number: 828-271-4800 Fax Number: 828-271-4876 @@ -241,10 +241,8 @@ Website: www.ncei.noaa.gov ******************************************************************** Mark Lackey -Meteorologist +Meteorologist NOAA’s National Centers for Environmental Information (NCEI) Center for Weather and Climate (CWC) 151 Patton Ave Asheville, NC 28801 - - diff --git a/setup.py b/setup.py index 16f35ed..62f1411 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,10 @@ from setuptools import find_packages, setup setup( - name='src', + name="src", packages=find_packages(), - version='0.1.0', - description='Analysis of historical weather data from NOAAs Global Summary of the Day (GSOD).', - author='Catalyst Cooperative', - license='MIT', + version="0.1.0", + description="Analysis of historical weather data from NOAAs Global Summary of the Day (GSOD).", + author="Catalyst Cooperative", + license="MIT", ) diff --git a/sql_queries/all_wieb_stations.sql b/sql_queries/all_wieb_stations.sql index 27ef13a..789dd0c 100644 --- a/sql_queries/all_wieb_stations.sql +++ b/sql_queries/all_wieb_stations.sql @@ -7,9 +7,9 @@ wieb_member_stations AS ( -- WIEB members: US states WHERE country = 'US' AND state in ('WA', 'OR', 'CA', 'ID', 'NV', 'AZ', 'MT', 'WY', 'UT', 'CO', 'NM') - - UNION ALL - + + UNION ALL + SELECT * FROM `bigquery-public-data.noaa_gsod.stations` -- WIEB members: CA provinces