diff --git a/ena-submission/Snakefile b/ena-submission/Snakefile index 63a3f34ae..0985f4689 100644 --- a/ena-submission/Snakefile +++ b/ena-submission/Snakefile @@ -72,6 +72,23 @@ rule get_ena_submission_list: --log-level {params.log_level} \ """ +rule submit_to_ena: + input: + script="scripts/submit_to_ena.py", + input_file="results/approved_ena_submission_list.json", + config="results/config.yaml", + output: + submitted="results/submitted", + params: + log_level=LOG_LEVEL, + shell: + """ + python {input.script} \ + --config-file {input.config} \ + --input-file {input.input_file} \ + --log-level {params.log_level} \ + """ + rule get_ena_submission_list_and_sleep: input: file="results/ena_submission_list.json" diff --git a/ena-submission/flyway/sql/V1__Initial_Schema.sql b/ena-submission/flyway/sql/V1__Initial_Schema.sql index 9c77e8c62..4b4a45e6a 100644 --- a/ena-submission/flyway/sql/V1__Initial_Schema.sql +++ b/ena-submission/flyway/sql/V1__Initial_Schema.sql @@ -2,7 +2,7 @@ CREATE TABLE submission_table ( accession text not null, version bigint not null, organism text not null, - groupId bigint not null, + group_id bigint not null, errors jsonb, warnings jsonb, status_all text not null, @@ -13,7 +13,7 @@ CREATE TABLE submission_table ( ); CREATE TABLE project_table ( - groupId bigint not null, + group_id bigint not null, organism text not null, errors jsonb, warnings jsonb, @@ -21,7 +21,7 @@ CREATE TABLE project_table ( started_at timestamp not null, finished_at timestamp, project_metadata jsonb, - primary key (groupId, organism) + primary key (group_id, organism) ); CREATE TABLE sample_table ( diff --git a/ena-submission/scripts/submission_db.py b/ena-submission/scripts/submission_db.py index f8ae8b97a..14b66d22a 100644 --- a/ena-submission/scripts/submission_db.py +++ b/ena-submission/scripts/submission_db.py @@ -1,7 +1,11 @@ import os -import psycopg2 -from enum import Enum from dataclasses import dataclass +from datetime import datetime +from enum import Enum +from typing import Dict + +import psycopg2 +import pytz @dataclass @@ -52,32 +56,68 @@ class Status(Enum): HAS_ERRORS = 3 -def connect_to_db(username="postgres", password="unsecure", host="127.0.0.1"): +@dataclass +class SubmissionTableEntry: + accession: str + version: str + organism: str + group_id: int + errors: str | None = None + warnings: str | None = None + status_all: StatusAll = StatusAll.READY_TO_SUBMIT + started_at: datetime | None = None + finished_at: datetime | None = None + external_metadata: str | None = None + + +def connect_to_db(db_config: DBConfig): """ Establish connection to ena_submitter DB, if DB doesn't exist create it. """ try: con = psycopg2.connect( dbname="loculus", - user=username, - host=host, - password=password, + user=db_config.username, + host=db_config.host, + password=db_config.password, options="-c search_path=ena-submission", ) except ConnectionError as e: - raise ConnectionError("Could not create ena_submitter DB") from e + raise ConnectionError("Could not connect to loculus DB") from e return con def in_submission_table(accession: str, version: int, db_config: DBConfig) -> bool: - con = connect_to_db( - db_config.username, - db_config.password, - db_config.host, - ) + con = connect_to_db(db_config) cur = con.cursor() cur.execute( "select * from submission_table where accession=%s and version=%s", (f"{accession}", f"{version}"), ) - return bool(cur.rowcount) + in_db = bool(cur.rowcount) + con.close() + return in_db + + +def add_to_submission_table(db_config: DBConfig, submission_table_entry: SubmissionTableEntry): + con = connect_to_db(db_config) + cur = con.cursor() + submission_table_entry.started_at = datetime.now(tz=pytz.utc) + + cur.execute( + "insert into submission_table values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", + ( + submission_table_entry.accession, + submission_table_entry.version, + submission_table_entry.organism, + submission_table_entry.group_id, + submission_table_entry.errors, + submission_table_entry.warnings, + str(submission_table_entry.status_all), + submission_table_entry.started_at, + submission_table_entry.finished_at, + submission_table_entry.external_metadata, + ), + ) + con.commit() + con.close() diff --git a/ena-submission/scripts/create_project_xml.py b/ena-submission/scripts/submit_to_ena.py similarity index 74% rename from ena-submission/scripts/create_project_xml.py rename to ena-submission/scripts/submit_to_ena.py index 759900f37..0c157b25e 100644 --- a/ena-submission/scripts/create_project_xml.py +++ b/ena-submission/scripts/submit_to_ena.py @@ -6,8 +6,7 @@ import click import yaml -from submission_db import get_db_config, in_submission_table - +from submission_db import SubmissionTableEntry, add_to_submission_table, get_db_config logger = logging.getLogger(__name__) logging.basicConfig( @@ -77,9 +76,9 @@ def get_project_xml( required=True, type=click.Path(), ) -def create_project_xml(log_level, config_file, input_file): +def submit_to_ena(log_level, config_file, input_file): logger.setLevel(log_level) - logging.getLogger("requests").setLevel(logging.WARNING) + logging.getLogger("requests").setLevel(logging.INFO) with open(config_file) as file: full_config = yaml.safe_load(file) @@ -89,4 +88,17 @@ def create_project_xml(log_level, config_file, input_file): db_config = get_db_config(config.db_password, config.db_username, config.db_host) - sequences_to_upload = json.load(open(input_file, encoding="utf-8")) + sequences_to_upload: dict = json.load(open(input_file, encoding="utf-8")) + for accession, data in sequences_to_upload.items(): + entry = { + "accession": accession, + "version": data["metadata"]["version"], + "group_id": data["metadata"]["groupId"], + "organism": "cchf", # TODO: fix this + } + submission_table_entry = SubmissionTableEntry(**entry) + add_to_submission_table(db_config, submission_table_entry) + + +if __name__ == "__main__": + submit_to_ena()