diff --git a/ena-submission/scripts/create_project.py b/ena-submission/scripts/create_project.py index 252ff397f..adb893065 100644 --- a/ena-submission/scripts/create_project.py +++ b/ena-submission/scripts/create_project.py @@ -109,13 +109,14 @@ def create_project(log_level, config_file): while True: # Check submission_table for newly added sequences - conditions = {"status_all": StatusAll.READY_TO_SUBMIT.name} + conditions = {"status_all": StatusAll.READY_TO_SUBMIT} ready_to_submit = find_conditions_in_db( db_config, table_name="submission_table", conditions=conditions ) - logger.debug( - f"Found {len(ready_to_submit)} entries in submission_table in status READY_TO_SUBMIT" - ) + if len(ready_to_submit) > 0: + logger.debug( + f"Found {len(ready_to_submit)} entries in submission_table in status READY_TO_SUBMIT" + ) for row in ready_to_submit: group_key = {"group_id": row["group_id"], "organism": row["organism"]} seq_key = {"accession": row["accession"], "version": row["version"]} @@ -125,9 +126,9 @@ def create_project(log_level, config_file): db_config, table_name="project_table", conditions=group_key ) if len(corresponding_project) == 1: - if corresponding_project[0]["status"] == Status.SUBMITTED.name: + if corresponding_project[0]["status"] == Status.SUBMITTED: update_values = { - "status_all": StatusAll.SUBMITTED_PROJECT.name, + "status_all": StatusAll.SUBMITTED_PROJECT, "center_name": corresponding_project[0]["center_name"], } number_rows_updated = update_db_where_conditions( @@ -137,7 +138,7 @@ def create_project(log_level, config_file): update_values=update_values, ) else: - update_values = {"status_all": StatusAll.SUBMITTING_PROJECT.name} + update_values = {"status_all": StatusAll.SUBMITTING_PROJECT} number_rows_updated = update_db_where_conditions( db_config, table_name="submission_table", @@ -152,21 +153,22 @@ def create_project(log_level, config_file): } project_table_entry = ProjectTableEntry(**entry) add_to_project_table(db_config, project_table_entry) - update_values = {"status_all": StatusAll.SUBMITTING_PROJECT.name} + update_values = {"status_all": StatusAll.SUBMITTING_PROJECT} number_rows_updated = update_db_where_conditions( db_config, table_name="submission_table", conditions=seq_key, update_values=update_values, ) - conditions = {"status_all": StatusAll.SUBMITTING_PROJECT.name} + conditions = {"status_all": StatusAll.SUBMITTING_PROJECT} submitting_project = find_conditions_in_db( db_config, table_name="submission_table", conditions=conditions ) - logger.debug( - f"Found {len(submitting_project)} entries in submission_table in" - " status SUBMITTING_PROJECT" - ) + if len(submitting_project) > 0: + logger.debug( + f"Found {len(submitting_project)} entries in submission_table in" + " status SUBMITTING_PROJECT" + ) for row in submitting_project: group_key = {"group_id": row["group_id"], "organism": row["organism"]} seq_key = {"accession": row["accession"], "version": row["version"]} @@ -177,10 +179,10 @@ def create_project(log_level, config_file): ) if ( len(corresponding_project) == 1 - and corresponding_project[0]["status"] == Status.SUBMITTED.name + and corresponding_project[0]["status"] == Status.SUBMITTED ): update_values = { - "status_all": StatusAll.SUBMITTED_PROJECT.name, + "status_all": StatusAll.SUBMITTED_PROJECT, "center_name": corresponding_project[0]["center_name"], } number_rows_updated = update_db_where_conditions( @@ -196,13 +198,14 @@ def create_project(log_level, config_file): ) raise RuntimeError(error_msg) # Check project_table for newly added sequences - conditions = {"status": Status.READY.name} + conditions = {"status": Status.READY} ready_to_submit_project = find_conditions_in_db( db_config, table_name="project_table", conditions=conditions ) - logger.debug( - f"Found {len(ready_to_submit_project)} entries in project_table in status READY" - ) + if len(ready_to_submit_project) > 0: + logger.debug( + f"Found {len(ready_to_submit_project)} entries in project_table in status READY" + ) for row in ready_to_submit_project: group_key = {"group_id": row["group_id"], "organism": row["organism"]} @@ -211,7 +214,7 @@ def create_project(log_level, config_file): project_set = construct_project_set_object(group_info, config, metadata_dict, row) update_values = { - "status": Status.SUBMITTING.name, + "status": Status.SUBMITTING, "center_name": group_info["institution"], } number_rows_updated = update_db_where_conditions( @@ -231,7 +234,7 @@ def create_project(log_level, config_file): project_creation_results: CreationResults = create_ena_project(config, project_set) if project_creation_results.results: update_values = { - "status": Status.SUBMITTED.name, + "status": Status.SUBMITTED, "result": json.dumps(project_creation_results.results), "finished_at": datetime.now(tz=pytz.utc), } @@ -253,7 +256,7 @@ def create_project(log_level, config_file): logger.info(f"Project creation for group_id {row["group_id"]} succeeded!") else: update_values = { - "status": Status.HAS_ERRORS.name, + "status": Status.HAS_ERRORS, "errors": json.dumps(project_creation_results.errors), } number_rows_updated = 0 @@ -275,10 +278,11 @@ def create_project(log_level, config_file): entries_with_errors = find_errors_in_db( db_config, "project_table", time_threshold=time_threshold ) - logger.info( - f"Found {len(entries_with_errors)} entries in project_table in status HAS_ERRORS" - f" for {time_threshold}m" - ) + if len(entries_with_errors) > 0: + logger.info( + f"Found {len(entries_with_errors)} entries in project_table in status HAS_ERRORS" + f" for {time_threshold}m" + ) for row in entries_with_errors: # TODO: Query ENA to check if project has in fact been created # If created update project_table diff --git a/ena-submission/scripts/create_sample.py b/ena-submission/scripts/create_sample.py index d0376761c..c85732570 100644 --- a/ena-submission/scripts/create_sample.py +++ b/ena-submission/scripts/create_sample.py @@ -153,13 +153,14 @@ def create_sample(log_level, config_file): while True: # Check submission_table for newly added sequences - conditions = {"status_all": StatusAll.SUBMITTED_PROJECT.name} + conditions = {"status_all": StatusAll.SUBMITTED_PROJECT} ready_to_submit = find_conditions_in_db( db_config, table_name="submission_table", conditions=conditions ) - logging.debug( - f"Found {len(ready_to_submit)} entries in submission_table in status SUBMITTED_PROJECT" - ) + if len(ready_to_submit) > 0: + logging.debug( + f"Found {len(ready_to_submit)} entries in submission_table in status SUBMITTED_PROJECT" + ) for row in ready_to_submit: seq_key = {"accession": row["accession"], "version": row["version"]} @@ -168,8 +169,8 @@ def create_sample(log_level, config_file): db_config, table_name="sample_table", conditions=seq_key ) if len(corresponding_sample) == 1: - if corresponding_sample[0]["status"] == Status.SUBMITTED.name: - update_values = {"status_all": StatusAll.SUBMITTED_SAMPLE.name} + if corresponding_sample[0]["status"] == Status.SUBMITTED: + update_values = {"status_all": StatusAll.SUBMITTED_SAMPLE} number_rows_updated = update_db_where_conditions( db_config, table_name="submission_table", @@ -177,7 +178,7 @@ def create_sample(log_level, config_file): update_values=update_values, ) else: - update_values = {"status_all": StatusAll.SUBMITTING_SAMPLE.name} + update_values = {"status_all": StatusAll.SUBMITTING_SAMPLE} number_rows_updated = update_db_where_conditions( db_config, table_name="submission_table", @@ -188,21 +189,22 @@ def create_sample(log_level, config_file): # If not: create sample_entry, change status to SUBMITTING_SAMPLE sample_table_entry = SampleTableEntry(**seq_key) add_to_sample_table(db_config, sample_table_entry) - update_values = {"status_all": StatusAll.SUBMITTING_SAMPLE.name} + update_values = {"status_all": StatusAll.SUBMITTING_SAMPLE} number_rows_updated = update_db_where_conditions( db_config, table_name="submission_table", conditions=seq_key, update_values=update_values, ) - conditions = {"status_all": StatusAll.SUBMITTING_SAMPLE.name} + conditions = {"status_all": StatusAll.SUBMITTING_SAMPLE} submitting_sample = find_conditions_in_db( db_config, table_name="submission_table", conditions=conditions ) - logger.debug( - f"Found {len(submitting_sample)} entries in submission_table in" - " status SUBMITTING_SAMPLE" - ) + if len(submitting_sample) > 0: + logger.debug( + f"Found {len(submitting_sample)} entries in submission_table in" + " status SUBMITTING_SAMPLE" + ) for row in submitting_sample: seq_key = {"accession": row["accession"], "version": row["version"]} @@ -212,9 +214,9 @@ def create_sample(log_level, config_file): ) if ( len(corresponding_sample) == 1 - and corresponding_sample[0]["status"] == Status.SUBMITTED.name + and corresponding_sample[0]["status"] == Status.SUBMITTED ): - update_values = {"status_all": StatusAll.SUBMITTED_SAMPLE.name} + update_values = {"status_all": StatusAll.SUBMITTED_SAMPLE} number_rows_updated = update_db_where_conditions( db_config, table_name="submission_table", @@ -228,11 +230,14 @@ def create_sample(log_level, config_file): ) raise RuntimeError(error_msg) # Check sample_table for newly added sequences - conditions = {"status": Status.READY.name} + conditions = {"status": Status.READY} ready_to_submit_sample = find_conditions_in_db( db_config, table_name="sample_table", conditions=conditions ) - logger.debug(f"Found {len(ready_to_submit_sample)} entries in sample_table in status READY") + if len(ready_to_submit_sample) > 0: + logger.debug( + f"Found {len(ready_to_submit_sample)} entries in sample_table in status READY" + ) for row in ready_to_submit_sample: seq_key = {"accession": row["accession"], "version": row["version"]} sample_data_in_submission_table = find_conditions_in_db( @@ -246,7 +251,7 @@ def create_sample(log_level, config_file): sample_set = construct_sample_set_object( config, organism_metadata, sample_metadata, center_name, row, organism ) - update_values = {"status": Status.SUBMITTING.name} + update_values = {"status": Status.SUBMITTING} number_rows_updated = update_db_where_conditions( db_config, table_name="sample_table", @@ -264,7 +269,7 @@ def create_sample(log_level, config_file): sample_creation_results: CreationResults = create_ena_sample(config, sample_set) if sample_creation_results.results: update_values = { - "status": Status.SUBMITTED.name, + "status": Status.SUBMITTED, "result": json.dumps(sample_creation_results.results), "finished_at": datetime.now(tz=pytz.utc), } @@ -286,7 +291,7 @@ def create_sample(log_level, config_file): logger.info(f"Sample creation for accession {row["accession"]} succeeded!") else: update_values = { - "status": Status.HAS_ERRORS.name, + "status": Status.HAS_ERRORS, "errors": json.dumps(sample_creation_results.errors), } number_rows_updated = 0 @@ -308,10 +313,11 @@ def create_sample(log_level, config_file): entries_with_errors = find_errors_in_db( db_config, "sample_table", time_threshold=time_threshold ) - logger.info( - f"Found {len(entries_with_errors)} entries in sample_table " - f"in status HAS_ERRORS for {time_threshold}m" - ) + if len(entries_with_errors) > 0: + logger.info( + f"Found {len(entries_with_errors)} entries in sample_table " + f"in status HAS_ERRORS for {time_threshold}m" + ) for row in entries_with_errors: # TODO: Query ENA to check if sample has in fact been created # If created update sample_table diff --git a/ena-submission/scripts/ena_types.py b/ena-submission/scripts/ena_types.py index 4dd84f21a..02df1fbf5 100644 --- a/ena-submission/scripts/ena_types.py +++ b/ena-submission/scripts/ena_types.py @@ -1,46 +1,46 @@ import dataclasses from dataclasses import dataclass, field -from typing import Dict, List, Optional +from enum import Enum @dataclass class XrefType: - db: Optional[str] = None - id: Optional[str] = None - label: Optional[str] = None + db: str | None = None + id: str | None = None + label: str | None = None @dataclass class UrlType: - label: Optional[str] = None - url: Optional[str] = None + label: str | None = None + url: str | None = None @dataclass class ProjectLink: - xref_link: Optional[XrefType] = None - url_link: Optional[UrlType] = None + xref_link: XrefType | None = None + url_link: UrlType | None = None @dataclass class ProjectLinks: - project_link: Optional[List[ProjectLink]] = None + project_link: list[ProjectLink] | None = None @dataclass class OrganismType: - taxon_id: Optional[int] = None - scientific_name: Optional[str] = None - common_name: Optional[str] = None - strain: Optional[str] = None - breed: Optional[str] = None - cultivar: Optional[str] = None - isolate: Optional[str] = None + taxon_id: int | None = None + scientific_name: str | None = None + common_name: str | None = None + strain: str | None = None + breed: str | None = None + cultivar: str | None = None + isolate: str | None = None @dataclass class SequencingProject: - locus_tag_prefix: List[str] = dataclasses.field(default_factory=list) + locus_tag_prefix: list[str] = dataclasses.field(default_factory=list) def default_sequencing_project() -> SequencingProject: @@ -50,29 +50,29 @@ def default_sequencing_project() -> SequencingProject: @dataclass class SubmissionProject: sequencing_project: SequencingProject = field(default_factory=default_sequencing_project) - organism: Optional[OrganismType] = None + organism: OrganismType | None = None @dataclass class UmbrellaProject: - organism: Optional[OrganismType] = None + organism: OrganismType | None = None @dataclass class RelatedProjectSubType: - accession: Optional[str] = None + accession: str | None = None @dataclass class RelatedProject: - parent_project: Optional[RelatedProjectSubType] = None - child_project: Optional[RelatedProjectSubType] = None - peer_project: Optional[RelatedProjectSubType] = None + parent_project: RelatedProjectSubType | None = None + child_project: RelatedProjectSubType | None = None + peer_project: RelatedProjectSubType | None = None @dataclass class ProjectTypeCollaborators: - collaborator: List[str] + collaborator: list[str] @dataclass @@ -89,14 +89,14 @@ class ProjectType: name: str title: str description: str - center_name: Optional[XmlAttribute] = None - alias: Optional[XmlAttribute] = None - collaborators: Optional[ProjectTypeCollaborators] = None - submission_project: Optional[SubmissionProject] = None - umbrella_project: Optional[UmbrellaProject] = None - related_projects: Optional[RelatedProject] = None - project_links: Optional[ProjectLinks] = None - project_attributes: Optional[Dict[str, str]] = None + center_name: XmlAttribute | None = None + alias: XmlAttribute | None = None + collaborators: ProjectTypeCollaborators | None = None + submission_project: SubmissionProject | None = None + umbrella_project: UmbrellaProject | None = None + related_projects: RelatedProject | None = None + project_links: ProjectLinks | None = None + project_attributes: dict[str, str] | None = None def default_project_type(): @@ -107,43 +107,43 @@ def default_project_type(): @dataclass class ProjectSet: - project: List[ProjectType] + project: list[ProjectType] @dataclass class SampleName: - taxon_id: Optional[int] = None - scientific_name: Optional[str] = None - common_name: Optional[str] = None - display_name: Optional[str] = None + taxon_id: int | None = None + scientific_name: str | None = None + common_name: str | None = None + display_name: str | None = None @dataclass class SampleAttribute: tag: str value: str - dd: Optional[str] = None + dd: str | None = None @dataclass class SampleAttributes: - sample_attribute: List[SampleAttribute] = None + sample_attribute: list[SampleAttribute] = None @dataclass class SampleLinks: - sample_link: List[ProjectLink] + sample_link: list[ProjectLink] @dataclass class SampleType: - center_name: Optional[XmlAttribute] = None - alias: Optional[XmlAttribute] = None - title: Optional[str] = None - sample_name: Optional[SampleName] = None - description: Optional[str] = None - sample_links: Optional[SampleLinks] = None - sample_attributes: Optional[SampleAttributes] = None + center_name: XmlAttribute | None = None + alias: XmlAttribute | None = None + title: str | None = None + sample_name: SampleName | None = None + description: str | None = None + sample_links: SampleLinks | None = None + sample_attributes: SampleAttributes | None = None def default_sample_type(): @@ -152,4 +152,89 @@ def default_sample_type(): @dataclass class SampleSetType: - sample: List[SampleType] + sample: list[SampleType] + + +class AssemblyType(Enum): + CLONE = "clone" + ISOLATE = "isolate" + + def __str__(self): + return self.value + + +class MoleculeType(Enum): + GENOMIC_DNA = "genomic DNA" + GENOMIC_RNA = "genomic RNA" + VIRAL_CRNA = "viral cRNA" + + def __str__(self): + return self.value + + +class AssemblyManifest: + study: str + sample: str + assemblyname: str # Note: this SHOULD be 1 word no hyphen + assembly_type: AssemblyType + coverage: str + program: str + platform: str + fasta: str + chromosome_list: str + mingaplength: int | None = None + moleculetype: MoleculeType | None = None + description: str | None = None + run_ref: list[str] | None = None + + +class ChromosomeType(Enum): + CHROMOSOME = "chromosome" + PLASMID = "plasmid" + LINKAGE_GROUP = "linkage_group" + MONOPARTITE = "monopartite" + SEGMENTED = "segmented" + MULTIPARTITE = "multipartite" + + def __str__(self): + return self.value + + +class ChromosomeLocation(Enum): + MACRONUCLEAR = "macronuclear" + NUCLEOMORPH = "nucleomorph" + MITOCHONDRION = "mitochondrion" + KINETOPLAST = "kinetoplast" + CHLOROPLAST = "chloroplast" + CHROMOPLAST = "chromoplast" + PLASTID = "plastid" + VIRION = "virion" + PHAGE = "phage" + PROVIRAL = "proviral" + PROPHAGE = "prophage" + VIROID = "viroid" + CYANELLE = "cyanelle" + APICOPLAST = "apicoplast" + LEUCOPLAST = "leucoplast" + PROPLASTID = "proplastid" + HYDROGENOSOME = "hydrogenosome" + CHROMATOPHORE = "chromatophore" + + def __str__(self): + return self.value + + +class Topology(Enum): + LINEAR = "linear" + CIRCULAR = "circular" + + def __str__(self): + return self.value + + +class AssemblyChromosomeListFile: + object_name: str + chromosome_name: str + chromosome_type: ChromosomeType + topology: Topology = Topology.LINEAR + chromosome_location: ChromosomeLocation | None = None diff --git a/ena-submission/scripts/submission_db_helper.py b/ena-submission/scripts/submission_db_helper.py index 3fc9f204b..91f9e071d 100644 --- a/ena-submission/scripts/submission_db_helper.py +++ b/ena-submission/scripts/submission_db_helper.py @@ -49,6 +49,9 @@ class StatusAll(Enum): HAS_ERRORS_ASSEMBLY = 9 HAS_ERRORS_SAMPLE = 10 + def __str__(self): + return self.name + class Status(Enum): READY = 0 @@ -56,6 +59,9 @@ class Status(Enum): SUBMITTED = 2 HAS_ERRORS = 3 + def __str__(self): + return self.name + @dataclass class SubmissionTableEntry: @@ -121,9 +127,9 @@ def find_conditions_in_db(db_config, table_name, conditions): cur = con.cursor() query = f"SELECT * FROM {table_name} WHERE " - query += " AND ".join([f"{key}=%s" for key in conditions]) + query += " AND ".join([f"{key}='{str(value)}'" for key, value in conditions.items()]) - cur.execute(query, tuple(conditions.values())) + cur.execute(query) rows = cur.fetchall() # Get column names from cursor @@ -165,7 +171,7 @@ def update_db_where_conditions(db_config, table_name, conditions, update_values) updated_row_count = 0 try: query = f"UPDATE {table_name} SET " - query += ", ".join([f"{key}='{value}'" for key, value in update_values.items()]) + query += ", ".join([f"{key}='{str(value)}'" for key, value in update_values.items()]) query += " WHERE " query += " AND ".join([f"{key}=%s" for key in conditions]) @@ -190,7 +196,7 @@ def add_to_project_table(db_config: DBConfig, project_table_entry: ProjectTableE project_table_entry.organism, project_table_entry.errors, project_table_entry.warnings, - project_table_entry.status.name, + str(project_table_entry.status), project_table_entry.started_at, project_table_entry.finished_at, project_table_entry.result, @@ -212,7 +218,7 @@ def add_to_sample_table(db_config: DBConfig, sample_table_entry: SampleTableEntr sample_table_entry.version, sample_table_entry.errors, sample_table_entry.warnings, - sample_table_entry.status.name, + str(sample_table_entry.status), sample_table_entry.started_at, sample_table_entry.finished_at, sample_table_entry.result, @@ -248,7 +254,7 @@ def add_to_submission_table(db_config: DBConfig, submission_table_entry: Submiss submission_table_entry.group_id, submission_table_entry.errors, submission_table_entry.warnings, - submission_table_entry.status_all.name, + str(submission_table_entry.status_all), submission_table_entry.started_at, submission_table_entry.finished_at, submission_table_entry.metadata,