From ae01c6133174454a08cddcfbc756ddecd8a71b9e Mon Sep 17 00:00:00 2001 From: Argyrios Samourkasidis Date: Mon, 11 Oct 2021 21:22:38 +0200 Subject: [PATCH] WIP --- .gitignore | 225 ++++++++++++++++++ MANIFEST.in | 2 +- README.md | 2 +- bin/{edam.py => read.py} | 20 +- bin/viewer.py | 1 + edam/reader/SourceConfiguration.py | 4 +- edam/reader/TemplateReader.py | 5 +- edam/reader/manage.py | 122 +++++----- edam/reader/models.py | 2 +- edam/reader/utilities.py | 6 +- .../{configurations => metadata}/Agmip.yaml | 0 .../{configurations => metadata}/bom.yaml | 0 .../{configurations => metadata}/uk.yaml | 0 edam/settings.py | 2 +- edam/viewer/app/__init__.py | 22 +- edam/viewer/app/views.py | 2 +- requirements.txt | 5 +- setup.py | 14 +- 18 files changed, 331 insertions(+), 103 deletions(-) rename bin/{edam.py => read.py} (81%) rename edam/resources/{configurations => metadata}/Agmip.yaml (100%) rename edam/resources/{configurations => metadata}/bom.yaml (100%) rename edam/resources/{configurations => metadata}/uk.yaml (100%) diff --git a/.gitignore b/.gitignore index 1d65489..586f2cf 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,228 @@ venv .idea/ !/edam/viewer/app/templates/ /projectFilesBackup/ + +# Created by https://www.gitignore.io/api/macos,python,pycharm +# Edit at https://www.gitignore.io/?templates=macos,python,pycharm + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### PyCharm ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### PyCharm Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +.idea/**/sonarlint/ + +# SonarQube Plugin +.idea/**/sonarIssues.xml + +# Markdown Navigator plugin +.idea/**/markdown-navigator.xml +.idea/**/markdown-navigator/ + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ +# End of https://www.gitignore.io/api/macos,python,pycharm \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in index 11ba7e2..0da33c0 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,6 @@ include README.md recursive-include edam/resources * -recursive-include edam/resources/configurations * +recursive-include edam/resources/metadata * recursive-include edam/resources/flask_related * recursive-include edam/resources/inputs * recursive-include edam/resources/templates * diff --git a/README.md b/README.md index a3f3fd8..1ef0d51 100644 --- a/README.md +++ b/README.md @@ -340,7 +340,7 @@ Units of Measurement: Following command downloads and stores data from 27 weather stations. It is executed in approximately 9 seconds. -`edam --input "http://www.metoffice.gov.uk/pub/data/weather/uk/climate/stationdata/{\$var}data.txt" --template uk.tmpl --metadata uk.yaml --var "aberporth,armagh, ballypatrick, camborne, cambridge, cardiff, chivenor, cwmystwyth, dunstaffnage, durham, eastbourne, eskdalemuir" --drop yes` +`edam --input "http://www.metoffice.gov.uk/pub/data/weather/uk/climate/stationdata/{{ var }}data.txt" --template uk.tmpl --metadata uk.yaml --var "aberporth,armagh, ballypatrick, camborne, cambridge, cardiff, chivenor, cwmystwyth, dunstaffnage, durham, eastbourne, eskdalemuir" --drop yes` ## **Australian Bureau of Meteorology (Online)** diff --git a/bin/edam.py b/bin/read.py similarity index 81% rename from bin/edam.py rename to bin/read.py index ccc00a9..b9f3059 100644 --- a/bin/edam.py +++ b/bin/read.py @@ -12,13 +12,13 @@ @click.option('--input', required=True, help='input string') @click.option('--query', required=False, help='SQL query') @click.option('--template', required=True, help='template file to parse data with') -@click.option('--config', required=True, help='configuration file to annotate data with') +@click.option('--metadata', required=True, help='configuration file to annotate data with') @click.option('--var', required=False, default="", help='Extra variables for URI generation') @click.option('--storage', required=False, default='file', type=click.Choice(['file', 'memory']), help="Whether input files to be stored or not") @click.option('--drop', required=False, default='no', type=click.Choice(['yes', 'no']), help="Whether to drop stored data or not") -def cli(input, template, query, config, var, storage, drop): +def cli(input, template, query, metadata, var, storage, drop): now = datetime.now() if drop == "yes": # Dropping database @@ -27,7 +27,7 @@ def cli(input, template, query, config, var, storage, drop): DatabaseInstantiation(drop=False) template_path, template_object = handle_input_files(template) - config_path, config_object = handle_input_files(config) + config_path, config_object = handle_input_files(metadata) success, inputs_path, file_type = identify_input_type(input_file=input, extra_variables=var, template=template_path, template_object=template_object, @@ -36,26 +36,26 @@ def cli(input, template, query, config, var, storage, drop): if success: if not inputs_path: # It means template and input file does not match - click.echo("I can't match template: %s with input: %s" % (template_path, input)) + click.echo(f"I can't match template: {template_path} with input: {input}") exit(3) elif inputs_path: if template_path and config_path: mid_time = datetime.now() Workflow(input_list=inputs_path, template_file=template_object, configuration_file=config_object) end_time = datetime.now() - print("Download all data: %s" % (mid_time - now)) + print(f"Download all data: {mid_time - now}") - print("Store all data: %s" % (end_time - mid_time)) - print("Total time: %s" % (end_time - now)) + print(f"Store all data: {end_time - mid_time}") + print(f"Total time: {end_time - now}") # run() else: click.echo("No template or config were given") else: - click.echo("%s does not exist" % input) + click.echo(f"{input} does not exist") exit(2) run() else: - click.echo("%s does not exist" % input) + click.echo(f"{input} does not exist") exit(4) @@ -68,7 +68,7 @@ def handle_input_files(filename): if exists: return file_path, file_object else: - click.echo("%s does not exist" % filename) + click.echo(f"{filename} does not exist") raise SystemExit(0) # raise Exception("File does not exit") diff --git a/bin/viewer.py b/bin/viewer.py index d00a44f..e5e348f 100644 --- a/bin/viewer.py +++ b/bin/viewer.py @@ -7,5 +7,6 @@ def run(): app.config['TEMPLATES_AUTO_RELOAD'] = True app.run(host=SERVER['host'], port=SERVER['port'], debug=SERVER['debug']) + if __name__ == "__main__": run() diff --git a/edam/reader/SourceConfiguration.py b/edam/reader/SourceConfiguration.py index f9eb887..6404c40 100644 --- a/edam/reader/SourceConfiguration.py +++ b/edam/reader/SourceConfiguration.py @@ -63,7 +63,7 @@ def __init__(self, input_yaml, input_file_data=io.StringIO(), input_preamble=io. def check_yaml(self): try: - return yaml.load(self.input_yaml) + return yaml.load(self.input_yaml, Loader=yaml.FullLoader) except yaml.YAMLError as exc: return exc @@ -375,4 +375,4 @@ def __check_type_of_field(self, field_name): if __name__ == "__main__": - test = SourceConfiguration('configurations/knmi.yaml', 'inputs/Yucheng.csv') + test = SourceConfiguration('metadata/knmi.yaml', 'inputs/Yucheng.csv') diff --git a/edam/reader/TemplateReader.py b/edam/reader/TemplateReader.py index 6e28dc2..5c63912 100644 --- a/edam/reader/TemplateReader.py +++ b/edam/reader/TemplateReader.py @@ -2,6 +2,7 @@ import io import logging import re +from datetime import datetime import numpy as np import pandas as pd @@ -288,7 +289,7 @@ def __create_dataframe_from_csv__(self): if self.parse_dates['timestamp']['format']: def date_parser(x): try: - return pd.datetime.strptime(x, ' '.join(self.parse_dates['timestamp']['format'])) + return datetime.strptime(x, ' '.join(self.parse_dates['timestamp']['format'])) except: # This exception catches the case where in datetime column we have litter (e.g. Site closed) return x @@ -549,7 +550,7 @@ def __update_helper_observable_id(self, helper_observable_id: HelperTemplateIDs, if __name__ == "__main__": - conf = SourceConfiguration(input_yaml="/Users/argyris/Documents/git/templateFramework/configurations/knmi.yaml", + conf = SourceConfiguration(input_yaml="/Users/argyris/Documents/git/templateFramework/metadata/knmi.yaml", input_file_data="/Users/argyris/Documents/git/templateFramework/inputs/knmi_alldata_data.txt") t = TemplateReader(config=conf, input_file="/Users/argyris/Documents/git/templateFramework/inputs/knmi_alldata_data.txt", diff --git a/edam/reader/manage.py b/edam/reader/manage.py index 38233f4..fd22d15 100644 --- a/edam/reader/manage.py +++ b/edam/reader/manage.py @@ -19,7 +19,7 @@ def db_connect(): Performs database connection using database settings from settings.py. Returns sqlalchemy engine instance """ - module_logger.debug('Received a call to `db_connect()`. Database url is: %s' % database_url) + module_logger.debug(f'Received a call to `db_connect()`. Database url is: {database_url}') return create_engine(database_url) @@ -58,18 +58,18 @@ class DatabaseHandler(object): """ This class handles database operations (add item, add dataframes, etc.) """ - + def __init__(self): """ Initializes database connection and sessionmaker. """ self.db_handler_logger = logging.getLogger('edam.reader.manage.DatabaseHandler') - + self.engine = db_connect() - + self.Session = db_session - - def clean_df_db_dups(self, df, tablename, dup_cols=[], filter_continuous_col=None, filter_categorical_col=None): + + def clean_df_db_dups(self, df, tablename, dup_cols=None, filter_continuous_col=None, filter_categorical_col=None): """ code: https://github.com/ryanbaumann/Pandas-to_sql-upsert/blob/master/to_sql_newrows.py Remove rows from a dataframe that already exist in a database @@ -87,37 +87,37 @@ def clean_df_db_dups(self, df, tablename, dup_cols=[], filter_continuous_col=Non Returns Unique list of values from dataframe compared to database table """ + if dup_cols is None: + dup_cols = [] engine = self.engine - args = 'SELECT %s FROM %s' % (', '.join(['"{0}"'.format(col) for col in dup_cols]), tablename) + args = 'SELECT {} FROM {}'.format(', '.join(['"{0}"'.format(col) for col in dup_cols]), tablename) args_contin_filter, args_cat_filter = None, None if filter_continuous_col is not None: if df[filter_continuous_col].dtype == 'datetime64[ns]': - args_contin_filter = """ "%s" BETWEEN Convert(datetime, '%s') - AND Convert(datetime, '%s')""" % (filter_continuous_col, - df[filter_continuous_col].min(), - df[filter_continuous_col].max()) - + args_contin_filter = f""" "{filter_continuous_col}" BETWEEN Convert(datetime, '{df[filter_continuous_col].min()}') + AND Convert(datetime, '{df[filter_continuous_col].max()}')""" + if filter_categorical_col is not None: - args_cat_filter = ' "%s" in(%s)' % (filter_categorical_col, - ', '.join(["'{0}'".format(value) for value in - df[filter_categorical_col].unique()])) - + args_cat_filter = ' "{}" in({})'.format(filter_categorical_col, + ', '.join(["'{0}'".format(value) for value in + df[filter_categorical_col].unique()])) + if args_contin_filter and args_cat_filter: args += ' Where ' + args_contin_filter + ' AND' + args_cat_filter elif args_contin_filter: args += ' Where ' + args_contin_filter elif args_cat_filter: args += ' Where ' + args_cat_filter - + df.drop_duplicates(dup_cols, keep='last', inplace=True) df2 = pd.read_sql(args, engine) for column in list(df): if df[column].dtype == "float64": df[column] = df[column].astype(str) df2[column] = df2[column].astype(str) - - df = pd.merge(df, df2, left_index=True, how='left', on=dup_cols, indicator=True) - + + df = pd.merge(df, df2, how='left', on=dup_cols, indicator=True) + df = df[df['_merge'] == 'left_only'] df.drop(['_merge'], axis=1, inplace=True) if "tation" in tablename: @@ -128,13 +128,13 @@ def clean_df_db_dups(self, df, tablename, dup_cols=[], filter_continuous_col=Non df['id'] = pd.read_sql_query('select coalesce(max(id),0)+1 from Station', engine).iloc[ 0, 0] + range(len(df)) df.set_index(['id'], inplace=True) - + return df - + @staticmethod def __add_dataframe__(dataframe: pd.DataFrame, table='Observations', index=True, index_label='timestamp'): engine = create_engine(database_url) - + def create_file_object(df, file_path=None, string_io=True, index=index): """Creates a csv file or writes to memory""" if string_io: @@ -147,23 +147,23 @@ def create_file_object(df, file_path=None, string_io=True, index=index): df = open(file_path) file_object = df return file_object - + def load_to_database(table1, unique_columns, file_object): - + connection = engine.raw_connection() try: cursor = connection.cursor() - + columns = ', '.join(['{}'.format(col) for col in unique_columns]) - sql = 'COPY "{}" ({}) FROM STDIN WITH CSV HEADER'.format(table1, columns) + sql = f'COPY "{table1}" ({columns}) FROM STDIN WITH CSV HEADER' cursor.copy_expert(sql=sql, file=file_object) - + connection.commit() - + connection.close() finally: pass - + if database_type == "postgresql": df_index_name = dataframe.index.name if df_index_name == "id" or df_index_name is None: @@ -178,7 +178,7 @@ def load_to_database(table1, unique_columns, file_object): elif database_type == "sqlite": dataframe.to_sql(name=table, con=engine, if_exists='append', index=index, index_label=index_label) - + def __add_item__(self, item): """ @@ -191,14 +191,14 @@ def __add_item__(self, item): session.add(item) session.commit() except: - self.db_handler_logger.error('Exception when adding %s. Check __add_item__()' % item) + self.db_handler_logger.error(f'Exception when adding {item}. Check __add_item__()') session.rollback() raise finally: session.flush() session.close() return True, item.id - + def __update_item__(self, item, metadata_dict): session = self.Session() returned_item = session.query(item.__class__).filter( @@ -206,7 +206,7 @@ def __update_item__(self, item, metadata_dict): returned_item.update_meta(metadata_dict) session.commit() session.close() - + def __get_observations_by_id_as_df__(self, observable_id): """ @@ -221,10 +221,10 @@ def __get_observations_by_id_as_df__(self, observable_id): session.rollback() session.close() return df - + def __check_station_is_in_db__(self, station: Station): session = self.Session() - + exists = session.query(Station.id).filter(and_(Station.name == station.name , Station.location == station.location , Station.mobile == station.mobile) @@ -232,7 +232,7 @@ def __check_station_is_in_db__(self, station: Station): session.rollback() session.close() return exists.scalar() is not None, exists.first() - + def __check_unit_is_in_db__(self, unit: UnitsOfMeasurement): session = self.Session() exists = session.query(UnitsOfMeasurement.id).filter(and_(UnitsOfMeasurement.name == unit.name @@ -242,7 +242,7 @@ def __check_unit_is_in_db__(self, unit: UnitsOfMeasurement): session.rollback() session.close() return exists.scalar() is not None, exists.first() - + def __check_observable_is_in_db__(self, observable: AbstractObservables): session = self.Session() exists = session.query(AbstractObservables.id).filter(and_(AbstractObservables.name == observable.name @@ -252,7 +252,7 @@ def __check_observable_is_in_db__(self, observable: AbstractObservables): session.rollback() session.close() return exists.scalar() is not None, exists.first() - + def __chech_helperTemplateID_is_in_db__(self, helperTemplateID: HelperTemplateIDs): session = self.Session() exists = session.query(HelperTemplateIDs.id). \ @@ -264,7 +264,7 @@ def __chech_helperTemplateID_is_in_db__(self, helperTemplateID: HelperTemplateID session.rollback() session.close() return exists.scalar() is not None, exists.first() - + def __check_sensor_is_in_db__(self, sensor: Sensors): session = self.Session() # TODO: Fix the filter parameters. Needs to be more generic @@ -277,9 +277,9 @@ def __check_sensor_is_in_db__(self, sensor: Sensors): session.rollback() session.close() return exists.scalar() is not None, exists.first() - + def __get_helper_table_row_input_file_observable_id__(self, observable_id, - station_id) -> HelperTemplateIDs: + station_id) -> HelperTemplateIDs: """ :param observable_id: This is usually a short of the Observables name, which is used in templates @@ -294,7 +294,7 @@ def __get_helper_table_row_input_file_observable_id__(self, observable_id, # session.rollback() session.close() return q - + def __get_observable_by__id__(self, id) -> AbstractObservables: """ @@ -303,41 +303,41 @@ def __get_observable_by__id__(self, id) -> AbstractObservables: :return: """ session = self.Session() - + q = session.query(AbstractObservables).filter(AbstractObservables.id == id) # type: Query session.rollback() session.close() return q.first() - + def __update_helper_observable_ids_with_unit_id__(self, station_id, observable_observable_id, unit_id): # TODO: This should be more generic session = self.Session() - + result = session.query(HelperTemplateIDs).filter(and_(HelperTemplateIDs.station_id == station_id, HelperTemplateIDs.observable_id == observable_observable_id )).first() # type: HelperTemplateIDs # Above query MUST return one result # We update this one result.unit_id = unit_id - + session.commit() session.close() - + def __update_helper_observable_ids_with_sensor_id__(self, station_id, observable_observable_id, - sensor_id): + sensor_id): # TODO: This should be more generic session = self.Session() - + result = session.query(HelperTemplateIDs).filter(and_(HelperTemplateIDs.station_id == station_id, HelperTemplateIDs.observable_id == observable_observable_id )).first() # type: HelperTemplateIDs # Above query MUST return one result # We update this one result.sensor_id = sensor_id - + session.commit() session.close() - + def __get_abstract_observable_id_from_observable_id__(self, station_id, observable_observable_id): session = self.Session() result = session.query(HelperTemplateIDs).filter(and_(HelperTemplateIDs.station_id == station_id, @@ -346,34 +346,34 @@ def __get_abstract_observable_id_from_observable_id__(self, station_id, observab session.expunge_all() session.close() return result.abstract_observable_id or None - + def __get_station_id_by_tags_station_id__(self, tags_station_id): session = self.Session() string_to_find = "\"station_id\":\"%d\"" % tags_station_id exists = session.query(Station.id).filter(Station.tags.contains(string_to_find)) - + if exists.first() is not None: database_station_id = exists.first().id else: database_station_id = None session.close() return database_station_id - + def get_all_observables(self): session = self.Session() session.close() return session.query(AbstractObservables).all() - + def get_all_stations(self): session = self.Session() session.close() return session.query(Station).all() - + def get_all_helper_observable_ids(self): session = self.Session() session.close() return session.query(HelperTemplateIDs).all() - + def get_helper_for_describe_sensor(self, station_id, sensor_id, observable_id): session = self.Session() exists = session.query(HelperTemplateIDs).filter( @@ -381,7 +381,7 @@ def get_helper_for_describe_sensor(self, station_id, sensor_id, observable_id): HelperTemplateIDs.sensor_id == sensor_id, HelperTemplateIDs.observable_id == observable_id )) - + if exists.first() is not None: print(exists.first().observable_id) session.expunge_all() @@ -389,11 +389,11 @@ def get_helper_for_describe_sensor(self, station_id, sensor_id, observable_id): return exists.first() else: return None - + def get_observations_by_helper_id(self, helper_id): session = self.Session() exists = session.query(Observations).filter(Observations.helper_observable_id == helper_id) - + if exists.first() is not None: session.expunge_all() session.close() diff --git a/edam/reader/models.py b/edam/reader/models.py index 7bdc732..75499fb 100644 --- a/edam/reader/models.py +++ b/edam/reader/models.py @@ -261,7 +261,7 @@ def __init__(self, observable_id=None, sensor_id=None, self.frequency = None def __repr__(self): - return '' % (self.id) + return f'' class Templates(Base): diff --git a/edam/reader/utilities.py b/edam/reader/utilities.py index 18af255..2a3c1c2 100644 --- a/edam/reader/utilities.py +++ b/edam/reader/utilities.py @@ -495,7 +495,7 @@ def generate_uri(uri: str(), static_variables=None): """ # variable_regex is more generic. It matches both {01-09} AND {$var} (i.e. static_variable_regex) variable_regex = r"({.*?})" - static_variable_regex = r"({\$var})" + static_variable_regex = r"({{ var }})" match = re.search(variable_regex, uri) uris_in_a_list = list() if match: @@ -505,7 +505,7 @@ def generate_uri(uri: str(), static_variables=None): static_variables_in_a_list = static_variables.split(',') for static_var in static_variables_in_a_list: iteration_uri = copy.deepcopy(uri) - iteration_uri = iteration_uri.replace("{$var}", static_var.strip()) + iteration_uri = iteration_uri.replace("{{ var }}", static_var.strip()) uris_in_a_list.append(iteration_uri) except AttributeError: utilities_logger.error('--extra parameter was not given (Station variables). generate_uri()') @@ -582,7 +582,7 @@ def download_and_check_with_tmpl_html_content_via_http(url: list(), template, te input_list.append(io.StringIO(data_input)) else: - utilities_logger.warning("There is an error with: %s" % unique_url) + utilities_logger.warning(f"There is an error with: {unique_url}") return input_list, template diff --git a/edam/resources/configurations/Agmip.yaml b/edam/resources/metadata/Agmip.yaml similarity index 100% rename from edam/resources/configurations/Agmip.yaml rename to edam/resources/metadata/Agmip.yaml diff --git a/edam/resources/configurations/bom.yaml b/edam/resources/metadata/bom.yaml similarity index 100% rename from edam/resources/configurations/bom.yaml rename to edam/resources/metadata/bom.yaml diff --git a/edam/resources/configurations/uk.yaml b/edam/resources/metadata/uk.yaml similarity index 100% rename from edam/resources/configurations/uk.yaml rename to edam/resources/metadata/uk.yaml diff --git a/edam/settings.py b/edam/settings.py index d88358e..1b03d3f 100644 --- a/edam/settings.py +++ b/edam/settings.py @@ -10,7 +10,7 @@ with open(settings, 'r') as stream: try: - settings_content = yaml.load(stream) + settings_content = yaml.load(stream, Loader=yaml.FullLoader) except yaml.YAMLError as exc: raise exc diff --git a/edam/viewer/app/__init__.py b/edam/viewer/app/__init__.py index 688ca7d..1786c31 100755 --- a/edam/viewer/app/__init__.py +++ b/edam/viewer/app/__init__.py @@ -9,7 +9,7 @@ import pandas as pd from flask import Flask, url_for, redirect from flask import make_response -from flask_cache import Cache +from flask_caching import Cache from flask_sqlalchemy import SQLAlchemy import edam.viewer.config as config @@ -94,7 +94,7 @@ def no_cache(*args, **kwargs): response.headers['Pragma'] = 'no-cache' response.headers['Expires'] = '-1' return response - + return update_wrapper(no_cache, view) @@ -124,13 +124,13 @@ def same_timestamp(*args): return args[0] else: return None - - + + app.jinja_env.globals.update(same_timestamp=same_timestamp) def resample(df: pd.DataFrame, rule, how=None, axis=0, fill_method=None, closed=None, label=None, convention='start', - kind=None, loffset=None, limit=None, base=0, on=None, level=None): + kind=None, loffset=None, limit=None, base=0, on=None, level=None): # observables_list = list(df) # pd.set_option('precision', 3) observables_list = ['timestamp', 'tmax', 'tmin', 'af', 'rain', 'sun'] @@ -141,14 +141,14 @@ def resample(df: pd.DataFrame, rule, how=None, axis=0, fill_method=None, closed= try: for observable in observables_list: df[observable] = df[observable].apply(lambda x: float(x)) - + except Exception as e: print(e.args) print("I can't transform string value to float. Wind maybe? Check edam.viewer.__init__.py - downsample func") exit() resampled = df.resample("A", None, axis, fill_method, closed, label, convention, kind, loffset, limit, base, on, level) - + resampled = resampled.mean() resampled = resampled.round(3) resampled = resampled.fillna('---') @@ -158,7 +158,7 @@ def resample(df: pd.DataFrame, rule, how=None, axis=0, fill_method=None, closed= # resampled = getattr(resampled, "interpolate")(method) resampled = getattr(resampled, how)() resampled["timestamp"] = resampled.index - + for observable in observables_list: resampled[observable] = resampled[observable].apply(lambda x: Measurement(x)) # TODO: This is soooooo dangerous. Please re-implement...... @@ -166,11 +166,11 @@ def resample(df: pd.DataFrame, rule, how=None, axis=0, fill_method=None, closed= observables_list = ['timestamp', 'tmax', 'tmin', 'af', 'rain', 'sun'] # observables_list = ['timestamp', 'radn', 'maxt', 'mint', 'rain', 'wind', 'RH'] zip_argument = map(lambda x: "resampled." + x, observables_list) - + zip_argument = ",".join(zip_argument) - + zip_argument = eval("zip(%s)" % zip_argument) - + return zip_argument diff --git a/edam/viewer/app/views.py b/edam/viewer/app/views.py index 1fb8466..da4a56a 100755 --- a/edam/viewer/app/views.py +++ b/edam/viewer/app/views.py @@ -79,7 +79,7 @@ def specific_template(template): response.headers['Content-type'] = 'text/plain' return response except KeyError: - raise InvalidUsage('%s template does not exist' % template, status_code=410) + raise InvalidUsage(f'{template} template does not exist', status_code=410) @app.route('/data/') diff --git a/requirements.txt b/requirements.txt index 3ba964f..61b8b6b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ numpy pandas numexpr flask -Flask-Cache +flask-caching pytest tox coverage @@ -17,4 +17,5 @@ jinja2 geopy records click -flask_googlemaps \ No newline at end of file +flask_googlemaps +Werkzeug \ No newline at end of file diff --git a/setup.py b/setup.py index 27e7795..0b7fb57 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ def copytree(src, dst, symlinks=False, ignore=None): s = os.path.join(src, item) d = os.path.join(dst, item) if os.path.isdir(s): - + try: shutil.copytree(s, d, symlinks, ignore) except OSError as e: @@ -45,11 +45,11 @@ def run(self): except OSError as e: if e.errno != errno.EEXIST: raise - + resources_directory = os.path.join(here, 'edam', 'resources') home_user_directory = os.path.expanduser("~/.edam/") directories_to_be_copied_from_resources = ['inputs', 'templates', 'metadata'] - + for directory in directories_to_be_copied_from_resources: copytree(os.path.join(resources_directory, directory), os.path.join(home_user_directory, directory)) shutil.copyfile(os.path.join(resources_directory, 'settings.yaml'), @@ -61,7 +61,7 @@ def run(self): shutil.copyfile(os.path.join(resources_directory, 'edam.owl'), os.path.join(home_user_directory, 'backup.owl')) # Copy flask_related contents into home_directory - + copytree(os.path.join(resources_directory, 'flask_related'), os.path.join(home_user_directory, '.viewer/')) # Copy edam templates into flask edam/templates @@ -87,7 +87,7 @@ def finalize_options(self): TestCommand.finalize_options(self) self.test_args = [] self.test_suite = True - + def run_tests(self): import pytest errcode = pytest.main(self.test_args) @@ -107,7 +107,7 @@ def run_tests(self): include_package_data=True, install_requires=['sqlalchemy', 'requests', 'numpy', 'pandas', 'numexpr', 'geopy', 'Flask-SQLAlchemy', 'Flask', 'Flask-Caching', 'jinja2', 'pyyaml', 'records', 'psycopg2', 'click', - 'Flask-GoogleMaps==0.2.4', 'owlready2', 'pint', 'oyaml' + 'Flask-GoogleMaps==0.2.4', 'owlready2', 'pint', 'oyaml', 'Werkzeug' ], cmdclass={'test': PyTest, 'install': CustomInstall}, python_requires='>=3.3', @@ -121,7 +121,7 @@ def run_tests(self): entry_points={ 'console_scripts': ['edam=bin.edam:cli', 'viewer=bin.viewer:run'], - + }, classifiers=[ 'Programming Language :: Python :: 3',