diff --git a/docker-image/Dockerfile b/docker-image/Dockerfile index b76848c..a1a8717 100644 --- a/docker-image/Dockerfile +++ b/docker-image/Dockerfile @@ -9,7 +9,7 @@ ENV OPENCORPORATES_API_KEY=secret ENV RML_FOLDER=/home/rml-mappings ENV START_DATE=2020-01-01 ENV END_DATE=2020-12-31 -ENV DAYS_DELAYED=1 +ENV DAYS_DELAYED=3 ENV DAILY_SCHEDULE=03:00 ENV OUTPUT_FOLDER=/ingestion ENV TBFY_FUSEKI_URL=http://52.19.213.234:3030 @@ -21,7 +21,7 @@ RUN rm /home/python-scripts/__pycache__/* RUN rm /home/python-scripts/shelve/* RUN rm /home/python-scripts/tbfy/__pycache__/* ADD rml-mappings /home/rml-mappings -ADD https://github.com/RMLio/rmlmapper-java/releases/download/v4.6.0/rmlmapper.jar /home/rml-mappings +ADD https://github.com/RMLio/rmlmapper-java/releases/download/v4.9.1/rmlmapper-4.9.1.jar /home/rml-mappings/rmlmapper.jar # Make ingestion dir RUN mkdir -p /ingestion diff --git a/docker-image/README.md b/docker-image/README.md index 7def194..f8a79cd 100644 --- a/docker-image/README.md +++ b/docker-image/README.md @@ -18,7 +18,8 @@ If you intend to run all the TBFY service on a local machine, then you are free In order to use the service you have to set the following variables in the `kg-ingestion-service.env` file in the [Docker scripts](https://github.com/TBFY/knowledge-graph/tree/master/docker-scripts) module: * `OPENOPPS_USERNAME:` OpenOpps API username. * `OPENOPPS_PASSWORD:` OpenOpps API password. -* `OPENCORPORATES_API_KEY:` OpenCorporates API key. +* `OPENCORPORATES_RECONCILE_API_KEY:` OpenCorporates API key for the reconciliation service. +* `OPENCORPORATES_COMPANIES_API_KEY:` OpenCorporates API key for the companies service. * `RML_FOLDER:` Folder location for RML inside the Docker container. Use the default value `/home/rml-mappings` that is configured for the Docker image. * `START_DATE:` Start date for data to be ingested. The default value is `2020-01-01`. Change this accordingly. * `END_DATE:` End date for data to no longer be ingested. The service terminates at this date (+ days delayed). The default value is `2020-12-31`. Change this accordingly. diff --git a/docker-scripts/kg-ingestion-service.env b/docker-scripts/kg-ingestion-service.env index 79d53c8..0f57c37 100644 --- a/docker-scripts/kg-ingestion-service.env +++ b/docker-scripts/kg-ingestion-service.env @@ -1,10 +1,11 @@ OPENOPPS_USERNAME=username OPENOPPS_PASSWORD=password -OPENCORPORATES_API_KEY=secret +OPENCORPORATES_RECONCILE_API_KEY=secret +OPENCORPORATES_COMPANIES_API_KEY=secret RML_FOLDER=/home/rml-mappings START_DATE=2020-01-01 END_DATE=2020-12-31 -DAYS_DELAYED=1 +DAYS_DELAYED=3 DAILY_SCHEDULE=03:00 OUTPUT_FOLDER=/ingestion TBFY_FUSEKI_URL=http://52.19.213.234:3030 diff --git a/python-scripts/kg_ingestion_service.py b/python-scripts/kg_ingestion_service.py index 4a17601..6e48e52 100644 --- a/python-scripts/kg_ingestion_service.py +++ b/python-scripts/kg_ingestion_service.py @@ -54,7 +54,8 @@ def main(argv): openopps_username = os.environ["OPENOPPS_USERNAME"] openopps_password = os.environ["OPENOPPS_PASSWORD"] - opencorporates_api_key = os.environ["OPENCORPORATES_API_KEY"] + opencorporates_reconcile_api_key = os.environ["OPENCORPORATES_RECONCILE_API_KEY"] + opencorporates_companies_api_key = os.environ["OPENCORPORATES_COMPANIES_API_KEY"] rml_folder = os.environ["RML_FOLDER"] start_date = os.environ["START_DATE"] end_date = os.environ["END_DATE"] @@ -66,7 +67,8 @@ def main(argv): logging.debug("kg_ingestion_service.py: openopps_username = " + openopps_username) logging.debug("kg_ingestion_service.py: openopps_password = " + openopps_password) - logging.debug("kg_ingestion_service.py: opencorporates_api_key = " + opencorporates_api_key) + logging.debug("kg_ingestion_service.py: opencorporates_reconcile_api_key = " + opencorporates_reconcile_api_key) + logging.debug("kg_ingestion_service.py: opencorporates_companies_api_key = " + opencorporates_companies_api_key) logging.debug("kg_ingestion_service.py: rml_folder = " + rml_folder) logging.debug("kg_ingestion_service.py: start_date = " + start_date) logging.debug("kg_ingestion_service.py: end_date = " + end_date) @@ -94,7 +96,7 @@ def main(argv): logging.info("kg_ingestion_service.py: date = " + created_date) # Run ingestion script - ingest_data_argv = ["ingest_data.py", "-u", openopps_username, "-p", openopps_password, "-a", opencorporates_api_key, "-r", rml_folder, "-s", created_date, "-e", created_date, "-o", output_folder] + ingest_data_argv = ["ingest_data.py", "-u", openopps_username, "-p", openopps_password, "-a", opencorporates_reconcile_api_key, "-b", opencorporates_companies_api_key, "-r", rml_folder, "-s", created_date, "-e", created_date, "-o", output_folder] ingest_data.main(ingest_data_argv[1:]) start = start + timedelta(days=1) # Increase date by one day diff --git a/python-scripts/replace_string_rdf.py b/python-scripts/replace_string_rdf.py index 70da0df..ff4c4d3 100644 --- a/python-scripts/replace_string_rdf.py +++ b/python-scripts/replace_string_rdf.py @@ -92,8 +92,8 @@ def main(argv): else: copy_command = "copy" - logging.info("main(): platform = " + sys.platform.lower()) - logging.info("main(): copy_command = " + copy_command) + logging.info("replace_string_rdf.py: platform = " + sys.platform.lower()) + logging.info("replace_string_rdf.py: copy_command = " + copy_command) start = datetime.strptime(start_date, "%Y-%m-%d") stop = datetime.strptime(end_date, "%Y-%m-%d") @@ -108,9 +108,9 @@ def main(argv): if not os.path.exists(outputDirPath): os.makedirs(outputDirPath) for filename in os.listdir(dirPath): - logging.info("main(): filename = " + filename) filePath = os.path.join(dirPath, filename) outputFilePath = os.path.join(outputDirPath, filename) + logging.info("replace_string_rdf.py: file = " + outputFilePath) os.system(copy_command + ' ' + filePath + ' ' + outputFilePath) inplace_change(outputFilePath, old_string, new_string) diff --git a/python-scripts/validate_rdf.py b/python-scripts/validate_rdf.py index 9ee1e2b..a968dc1 100644 --- a/python-scripts/validate_rdf.py +++ b/python-scripts/validate_rdf.py @@ -62,15 +62,15 @@ def main(argv): elif opt in ("-i"): input_folder = arg - logging.info("main(): start_date = " + start_date) - logging.info("main(): end_date = " + end_date) - logging.info("main(): input_folder = " + input_folder) + logging.info("validate_rdf.py: start_date = " + start_date) + logging.info("validate_rdf.py: end_date = " + end_date) + logging.info("validate_rdf.py: input_folder = " + input_folder) if sys.platform.lower().startswith("win"): global riot_command riot_command = riot_command + ".bat" - logging.info("main(): riot_command = " + riot_command) + logging.info("validate_rdf.py: riot_command = " + riot_command) start = datetime.strptime(start_date, "%Y-%m-%d") stop = datetime.strptime(end_date, "%Y-%m-%d") @@ -83,7 +83,7 @@ def main(argv): if os.path.isdir(dirPath): for filename in os.listdir(dirPath): filePath = os.path.join(dirPath, filename) - logging.info("main(): filePath = " + filePath) + logging.info("validate_rdf.py: file = " + filePath) subprocess.call([riot_command, "--validate", filePath]) start = start + timedelta(days=1) # Increase date by one day diff --git a/website/html/norway/statistics/tender.html b/website/html/norway/statistics/tender.html index e387274..f348dc4 100644 --- a/website/html/norway/statistics/tender.html +++ b/website/html/norway/statistics/tender.html @@ -130,7 +130,7 @@