-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
724 additions
and
17 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from . import globals, helpers, downloaders |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
""" | ||
This module contains the barra2 download function(s) . | ||
""" | ||
import requests | ||
from datetime import datetime, timedelta | ||
from pathlib import Path | ||
import calendar | ||
from .helpers import list_months | ||
from .globals import LatLonPoint, LatLonBBox, barra2_index | ||
|
||
|
||
def download_file(url: str, | ||
folder_path: str | Path, | ||
file_name: str, | ||
create_folder: bool = False) -> None: | ||
"""Download the file from the url and saves it as folder_path/filename. | ||
If the downloads folder does not exist, it will be created due to the create_folder=True argument. | ||
Args: | ||
url: The URL of the file to be downloaded. | ||
folder_path: The path where the file should be saved. | ||
file_name: The name to save the downloaded file as. | ||
create_folder: If True, creates the folder if it does not exist; otherwise, exits if the folder doesn't exist. | ||
Returns: | ||
None | ||
""" | ||
folder = Path(folder_path) | ||
file = folder / file_name | ||
|
||
# Check if the folder exists | ||
if not folder.exists(): | ||
if create_folder: | ||
folder.mkdir(parents=True) | ||
print(f"The folder '{folder_path}' was created.") | ||
else: | ||
print(f"The folder '{folder_path}' does not exist. Exiting...") | ||
return | ||
|
||
# Check if the file already exists | ||
if file.exists(): | ||
print(f"The file '{file_name}' already exists in the folder '{folder_path}'.") | ||
else: | ||
# Download the URL to the file | ||
response = requests.get(url) | ||
file.write_bytes(response.content) | ||
print(f"File '{file_name}' has been downloaded to '{folder_path}'.") | ||
|
||
return | ||
|
||
|
||
def barra2_point_downloader(base_url: str, | ||
barra2_var: list, | ||
lat_lon_point: LatLonPoint, | ||
start_datetime: str | datetime, | ||
end_datetime: str | datetime, | ||
fileout_prefix: str, | ||
fileout_folder: str = 'cache', | ||
fileout_type: str = 'csv_file') -> None: | ||
"""Download barra2 data based on the url and variables list | ||
for each month between start and end datetime. | ||
Args: | ||
base_url (str): Use from barra2-dl.globals or set explicitly | ||
barra2_var (list): Use from barra2-dl.globals or set explicitly | ||
lat_lon_point (LatLonPoint: TypedDict): Use custom class for barra2-dl.globals or as Dict{'lat':float, 'lon':float} | ||
start_datetime (str | datetime): Used to define start of inclusive download period | ||
end_datetime (str | datetime): Used to define end of inclusive download period | ||
fileout_prefix (str): Optional prefix for downloaded file. E.g. location reference. | ||
fileout_folder (str): Relative or absolute path for downloaded files | ||
fileout_type (str): Output file option, 'csv_file' | ||
Returns: | ||
Downloaded files into fileout_folder as f'{fileout_prefix}_{var}_{time_start[:10]}_{time_end[:10]}.csv' | ||
Todo: | ||
Add set list of output format options | ||
Change from using os to pathlib | ||
""" | ||
|
||
# loop through each variable requested for download as each variable is saved in a separate url | ||
for var in barra2_var: | ||
# loop through each month as each BARRA2 file is saved by month | ||
for date in list_months(start_datetime, end_datetime, freq="MS"): | ||
year = date.year | ||
month = date.month | ||
time_start = date.isoformat() + 'Z' | ||
# Get the number of days in the current month | ||
days_in_month = calendar.monthrange(year, month)[1] | ||
time_end = (date + timedelta(days=days_in_month) + timedelta(hours=-1)).isoformat() + 'Z' | ||
|
||
# update thredds_base_url and set as url for request | ||
url = base_url.format(var=var, year=year, month=month) | ||
|
||
# add url parameters to base_url | ||
url += f"?var={var}&latitude={lat_lon_point['lat']}&longitude={lat_lon_point['lon']}&time_start={time_start}&time_end={time_end}&accept={fileout_type}" | ||
fileout_name = f'{fileout_prefix}_{var}_{time_start[:10]}_{time_end[:10]}.csv' | ||
folder_path = fileout_folder | ||
download_file(url, folder_path, fileout_name, create_folder=True) | ||
|
||
return | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
""" | ||
This module contains global or default variables required to download barra2-dl data from thredds.nci.org.au. | ||
""" | ||
from typing import TypedDict | ||
|
||
# ----------------------------------------------------------------------------- | ||
# CLASSES | ||
# ----------------------------------------------------------------------------- | ||
|
||
class LatLonPoint(TypedDict): | ||
"""TypedDict to store a point as latitude and longitude. | ||
Attributes: | ||
lat (float): latitude. | ||
lon (float): longitude. | ||
""" | ||
lat: float | ||
lon: float | ||
|
||
|
||
class LatLonBBox(TypedDict): | ||
"""TypedDict to store a north south east west bounding box by latitude and longitude. | ||
Attributes: | ||
north (float): latitude. | ||
south (float): latitude. | ||
east (float): longitude. | ||
west (float): longitude. | ||
Todo: | ||
Add checks to make sure co-ordinates are correct with respect to each other. | ||
""" | ||
north: float | ||
south: float | ||
east: float | ||
west: float | ||
|
||
# ----------------------------------------------------------------------------- | ||
# VARIABLES | ||
# ----------------------------------------------------------------------------- | ||
|
||
# barra2_aus11_extents | ||
barra2_aus11_lat_lon_bbox = LatLonBBox(north=-23.0, west=133.0, east=134.0, south=-24) | ||
|
||
# base thredds url for BARRA2 11km 1hour reanalysis data | ||
barra2_aus11_csv_url = ("https://thredds.nci.org.au/thredds/ncss/grid/ob53/output/reanalysis/AUS-11/BOM/ERA5" | ||
"/historical/hres/BARRA-R2/v1/1hr/{var}/latest/" | ||
"{var}_AUS-11_ERA5_historical_hres_BOM_BARRA-R2_v1_1hr_{year}{month:02d}-{year}{month:02d}.nc") | ||
|
||
# index for barra2 used to join separate files | ||
barra2_index = ['time', 'station', 'latitude[unit="degrees_north"]', 'longitude[unit="degrees_east"]'] | ||
|
||
# set list of BARRA2 variables to download default list is eastward wind (ua*), northward wind (va*), and air temperature at 50m (ta50m) | ||
barra2_var_wind_all = ["ua50m", "va50m", "ua100m", "va100m", "ua150m", "va150m", "ta50m"] | ||
|
||
# optional limited variables to test | ||
barra2_var_wind_50m = ["ua50m", "va50m", "ta50m"] | ||
|
||
# output file format todo add list of output format options | ||
point_output_format = "csv_file" | ||
# grid_output_format = "netcdf3" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
""" | ||
This module contains helper functions. | ||
""" | ||
|
||
import pandas as pd | ||
import fnmatch | ||
from pathlib import Path | ||
from typing import List | ||
|
||
|
||
def list_months(start_datetime: str, end_datetime: str, freq: str ='MS', **kwargs) -> list: | ||
"""Generate list of months from input start and end datetime for url file loop. | ||
Args: | ||
freq: | ||
start_datetime: str or datetime-like, Left bound for generating dates. | ||
end_datetime: str or datetime-like, Left bound for generating dates. | ||
**kwargs: | ||
Returns: | ||
list | ||
""" | ||
df_to_list = pd.date_range(start=start_datetime, end=end_datetime, freq=freq, **kwargs).tolist() | ||
return df_to_list | ||
|
||
|
||
def list_csv_files(folder_path): | ||
""" | ||
List all CSV files in the given folder. | ||
Args: | ||
folder_path (str): The path to the folder containing the CSV files. | ||
Returns: | ||
list: A list of CSV file names in the folder. | ||
""" | ||
folder = Path(folder_path) | ||
csv_files = [file.name for file in folder.glob('*.csv')] | ||
return csv_files | ||
|
||
|
||
def filter_list_using_wildcard(input_list: list[str], pattern:str): | ||
""" | ||
Filter a list using a wildcard pattern. | ||
Args: | ||
input_list (list[str]): The list of strings to be filtered. | ||
pattern (str): The wildcard pattern to filter the list. | ||
Returns: | ||
list: A list of strings that match the wildcard pattern. | ||
""" | ||
filtered_list = fnmatch.filter(input_list, pattern) | ||
return filtered_list | ||
|
||
|
||
def merge_csv_files_to_dataframe(filein_folder: str, | ||
filename_pattern: str = '*.csv', | ||
index_for_join: str = None) -> pd.DataFrame: | ||
""" | ||
Merge csv files from a folder based on optional filename wildcard using fnmatch. | ||
If filename wildcard is omitted all csv files in the folder will be merged. | ||
If fileout_folder is omitted the merged file will be saved in the filein_folder. | ||
Args: | ||
filein_folder (str): Optional | ||
filename_pattern (str): | ||
index_for_join (str): | ||
Returns: | ||
return_type: None. | ||
Todo: | ||
Change from using os to pathlib | ||
""" | ||
|
||
# todo add .csv check for filename_prefix | ||
|
||
|
||
# list all csv files in folder | ||
csv_files = list_csv_files(filein_folder) | ||
|
||
# filter csv files | ||
csv_files_filtered = filter_list_using_wildcard(csv_files, filename_pattern) | ||
|
||
# initiate dataframe for combined csv results | ||
df_combined = pd.DataFrame() | ||
|
||
for file in Path(filein_folder).glob(filename_pattern): | ||
if df_combined.empty: | ||
# read csv file without indexing to retain time as column for join | ||
df_combined = pd.read_csv(file) | ||
else: | ||
# read next file into new df | ||
df_add = pd.read_csv(file) | ||
# combine on index join if not None, otherwise just concat together | ||
if index_for_join is not None: | ||
df_combined = df_combined.join(df_add.set_index(index_for_join),on=index_for_join) | ||
else: | ||
df_combined = pd.concat([df_combined, df_add], ignore_index = True) | ||
|
||
return df_combined | ||
|
||
|
||
def export_dataframe_to_csv(dataframe: pd.DataFrame, | ||
fileout_folder: str | Path, | ||
fileout_name: str, | ||
create_folder: bool = True) -> None: | ||
""" | ||
Export a DataFrame to a CSV file in the specified folder with the given file name. | ||
Args: | ||
dataframe (pd.DataFrame): The Pandas DataFrame to export. | ||
fileout_folder (str or Path): The path to the folder where the CSV file will be saved. | ||
fileout_name (str): The name of the CSV file to save. | ||
create_folder (bool): If True, creates the folder if it does not exist; otherwise, exits if the folder doesn't exist. | ||
Returns: | ||
Path: The path of the saved CSV file. | ||
""" | ||
fileout_folder = Path(fileout_folder) | ||
# Check if the folder exists | ||
if not fileout_folder.exists(): | ||
if create_folder: | ||
fileout_folder.mkdir(parents=True) | ||
print(f"The folder '{fileout_folder}' was created.") | ||
else: | ||
print(f"The folder '{fileout_folder}' does not exist. Exiting...") | ||
return | ||
|
||
# Define the full path for the CSV file | ||
fileout_path_name = fileout_folder / fileout_name | ||
|
||
# Export the DataFrame to CSV | ||
dataframe.to_csv(fileout_path_name, index=False) | ||
|
||
return fileout_path_name | ||
|
||
|
||
def get_timestamp_range_list(dataframe: pd.DataFrame, timestamp_column: str) -> List[pd.Timestamp]: | ||
""" | ||
Get a list containing the range between the first and last timestamp in the specified column of the DataFrame. | ||
Args: | ||
dataframe (pd.DataFrame): The DataFrame containing the timestamp column. | ||
timestamp_column (str): The name of the timestamp column in the DataFrame. | ||
Returns: | ||
list: A list containing the first and last timestamp. | ||
""" | ||
if timestamp_column not in dataframe.columns: | ||
raise ValueError(f"Column '{timestamp_column}' does not exist in the DataFrame.") | ||
|
||
# Ensure the column is of datetime type | ||
dataframe[timestamp_column] = pd.to_datetime(dataframe[timestamp_column]) | ||
|
||
# Sort the DataFrame by the timestamp column | ||
dataframe = dataframe.sort_values(by=timestamp_column) | ||
|
||
# Get the first and last timestamp | ||
first_timestamp = dataframe[timestamp_column].iloc[0] | ||
last_timestamp = dataframe[timestamp_column].iloc[-1] | ||
|
||
return [first_timestamp, last_timestamp] |
Oops, something went wrong.