Skip to content

Commit

Permalink
Add sequences approved for ena submission to submission table. This s…
Browse files Browse the repository at this point in the history
…hould trigger submission process.
  • Loading branch information
anna-parker committed Jul 16, 2024
1 parent cdf77ef commit 419ebdb
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 21 deletions.
17 changes: 17 additions & 0 deletions ena-submission/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,23 @@ rule get_ena_submission_list:
--log-level {params.log_level} \
"""

rule submit_to_ena:
input:
script="scripts/submit_to_ena.py",
input_file="results/approved_ena_submission_list.json",
config="results/config.yaml",
output:
submitted="results/submitted",
params:
log_level=LOG_LEVEL,
shell:
"""
python {input.script} \
--config-file {input.config} \
--input-file {input.input_file} \
--log-level {params.log_level} \
"""

rule get_ena_submission_list_and_sleep:
input:
file="results/ena_submission_list.json"
Expand Down
6 changes: 3 additions & 3 deletions ena-submission/flyway/sql/V1__Initial_Schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ CREATE TABLE submission_table (
accession text not null,
version bigint not null,
organism text not null,
groupId bigint not null,
group_id bigint not null,
errors jsonb,
warnings jsonb,
status_all text not null,
Expand All @@ -13,15 +13,15 @@ CREATE TABLE submission_table (
);

CREATE TABLE project_table (
groupId bigint not null,
group_id bigint not null,
organism text not null,
errors jsonb,
warnings jsonb,
status text not null,
started_at timestamp not null,
finished_at timestamp,
project_metadata jsonb,
primary key (groupId, organism)
primary key (group_id, organism)
);

CREATE TABLE sample_table (
Expand Down
66 changes: 53 additions & 13 deletions ena-submission/scripts/submission_db.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import os
import psycopg2
from enum import Enum
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from typing import Dict

import psycopg2
import pytz


@dataclass
Expand Down Expand Up @@ -52,32 +56,68 @@ class Status(Enum):
HAS_ERRORS = 3


def connect_to_db(username="postgres", password="unsecure", host="127.0.0.1"):
@dataclass
class SubmissionTableEntry:
accession: str
version: str
organism: str
group_id: int
errors: str | None = None
warnings: str | None = None
status_all: StatusAll = StatusAll.READY_TO_SUBMIT
started_at: datetime | None = None
finished_at: datetime | None = None
external_metadata: str | None = None


def connect_to_db(db_config: DBConfig):
"""
Establish connection to ena_submitter DB, if DB doesn't exist create it.
"""
try:
con = psycopg2.connect(
dbname="loculus",
user=username,
host=host,
password=password,
user=db_config.username,
host=db_config.host,
password=db_config.password,
options="-c search_path=ena-submission",
)
except ConnectionError as e:
raise ConnectionError("Could not create ena_submitter DB") from e
raise ConnectionError("Could not connect to loculus DB") from e
return con


def in_submission_table(accession: str, version: int, db_config: DBConfig) -> bool:
con = connect_to_db(
db_config.username,
db_config.password,
db_config.host,
)
con = connect_to_db(db_config)
cur = con.cursor()
cur.execute(
"select * from submission_table where accession=%s and version=%s",
(f"{accession}", f"{version}"),
)
return bool(cur.rowcount)
in_db = bool(cur.rowcount)
con.close()
return in_db


def add_to_submission_table(db_config: DBConfig, submission_table_entry: SubmissionTableEntry):
con = connect_to_db(db_config)
cur = con.cursor()
submission_table_entry.started_at = datetime.now(tz=pytz.utc)

cur.execute(
"insert into submission_table values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
(
submission_table_entry.accession,
submission_table_entry.version,
submission_table_entry.organism,
submission_table_entry.group_id,
submission_table_entry.errors,
submission_table_entry.warnings,
str(submission_table_entry.status_all),
submission_table_entry.started_at,
submission_table_entry.finished_at,
submission_table_entry.external_metadata,
),
)
con.commit()
con.close()
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@

import click
import yaml
from submission_db import get_db_config, in_submission_table

from submission_db import SubmissionTableEntry, add_to_submission_table, get_db_config

logger = logging.getLogger(__name__)
logging.basicConfig(
Expand Down Expand Up @@ -77,9 +76,9 @@ def get_project_xml(
required=True,
type=click.Path(),
)
def create_project_xml(log_level, config_file, input_file):
def submit_to_ena(log_level, config_file, input_file):
logger.setLevel(log_level)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("requests").setLevel(logging.INFO)

with open(config_file) as file:
full_config = yaml.safe_load(file)
Expand All @@ -89,4 +88,17 @@ def create_project_xml(log_level, config_file, input_file):

db_config = get_db_config(config.db_password, config.db_username, config.db_host)

sequences_to_upload = json.load(open(input_file, encoding="utf-8"))
sequences_to_upload: dict = json.load(open(input_file, encoding="utf-8"))
for accession, data in sequences_to_upload.items():
entry = {
"accession": accession,
"version": data["metadata"]["version"],
"group_id": data["metadata"]["groupId"],
"organism": "cchf", # TODO: fix this
}
submission_table_entry = SubmissionTableEntry(**entry)
add_to_submission_table(db_config, submission_table_entry)


if __name__ == "__main__":
submit_to_ena()

0 comments on commit 419ebdb

Please sign in to comment.