Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(deposition): improvements and fixes #2960

Merged
merged 14 commits into from
Oct 7, 2024
6 changes: 5 additions & 1 deletion ena-submission/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
.snakemake/
results/
__pycache__
assembly/
project/
sample/
__pycache__
config/config.yaml
162 changes: 0 additions & 162 deletions ena-submission/config/config.yaml

This file was deleted.

21 changes: 18 additions & 3 deletions ena-submission/scripts/create_assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import click
import pytz
import yaml
from call_loculus import get_group_info
from ena_submission_helper import (
CreationResult,
create_chromosome_list,
Expand Down Expand Up @@ -67,6 +68,7 @@ class Config:
slack_hook: str
slack_token: str
slack_channel_id: str
is_broker: bool


def create_chromosome_list_object(
Expand Down Expand Up @@ -133,15 +135,26 @@ def create_manifest_object(
sample_accession = sample_table_entry["result"]["ena_sample_accession"]
study_accession = project_table_entry["result"]["bioproject_accession"]

address_string = project_table_entry["center_name"]
if config.is_broker:
try:
group_info = get_group_info(config, project_table_entry["group_id"])[0]["group"]
address = group_info["address"]
address_string = (f'{address.get("line1", "")}, {address.get("line2", "")}, '
f'{address.get("city", "")}, {address.get("state", "")}, '
f'{address.get("postalCode", "")}, {address.get("country")}')
except Exception as e:
logger.error(f"Was unable to create address, setting address to center_name due to {e}")

metadata = submission_table_entry["metadata"]
unaligned_nucleotide_sequences = submission_table_entry["unaligned_nucleotide_sequences"]
organism_metadata = config.organisms[group_key["organism"]]["ingest"]
organism_metadata = config.organisms[group_key["organism"]]["enaDeposition"]
chromosome_list_object = create_chromosome_list_object(unaligned_nucleotide_sequences, seq_key)
chromosome_list_file = create_chromosome_list(list_object=chromosome_list_object, dir=dir)
authors = (
metadata["authors"] if metadata.get("authors") else metadata.get("submitter", "Unknown")
)
collection_date = metadata.get("collectionDate", "Unknown")
collection_date = metadata.get("sampleCollectionDate", "Unknown")
country = metadata.get("geoLocCountry", "Unknown")
admin1 = metadata.get("geoLocAdmin1", "")
admin2 = metadata.get("geoLocAdmin2", "")
Expand Down Expand Up @@ -203,6 +216,8 @@ def create_manifest_object(
chromosome_list=chromosome_list_file,
description=description,
moleculetype=moleculetype,
authors=authors,
address=address_string,
)


Expand Down Expand Up @@ -365,7 +380,7 @@ def assembly_table_create(
group_key,
test,
)
manifest_file = create_manifest(manifest_object)
manifest_file = create_manifest(manifest_object, is_broker=config.is_broker)
except Exception as e:
logger.error(
f"Manifest creation failed for accession {row["accession"]} with error {e}"
Expand Down
4 changes: 2 additions & 2 deletions ena-submission/scripts/create_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,13 @@ def construct_project_set_object(
Construct project set object, using:
- entry in project_table
- group_info of corresponding group_id
- config information, such as ingest metadata for that organism
- config information, such as enaDeposition metadata for that organism

If test=True add a timestamp to the alias suffix to allow for multiple
submissions of the same project for testing.
(ENA blocks multiple submissions with the same alias)
"""
metadata_dict = config.organisms[entry["organism"]]["ingest"]
metadata_dict = config.organisms[entry["organism"]]["enaDeposition"]
if test:
alias = XmlAttribute(
f"{entry["group_id"]}:{entry["organism"]}:{config.unique_project_suffix}:{datetime.now(tz=pytz.utc)}"
Expand Down
4 changes: 2 additions & 2 deletions ena-submission/scripts/create_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,15 @@ def construct_sample_set_object(
Construct sample set object, using:
- entry in sample_table
- sample_data_in_submission_table: corresponding entry in submission_table
- config information, such as ingest metadata for that organism
- config information, such as enaDeposition metadata for that organism
If test=True add a timestamp to the alias suffix to allow for multiple
submissions of the same project for testing.
(ENA blocks multiple submissions with the same alias)
"""
sample_metadata = sample_data_in_submission_table["metadata"]
center_name = sample_data_in_submission_table["center_name"]
organism = sample_data_in_submission_table["organism"]
organism_metadata = config.organisms[organism]["ingest"]
organism_metadata = config.organisms[organism]["enaDeposition"]
if test:
alias = XmlAttribute(
f"{entry["accession"]}:{organism}:{config.unique_project_suffix}:{datetime.now(tz=pytz.utc)}"
Expand Down
3 changes: 2 additions & 1 deletion ena-submission/scripts/deposition_dry_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class Config:
metadata_mapping_mandatory_field_defaults: dict[str, str]
ena_checklist: str
use_ena_checklist: bool
is_broker: bool


@click.command()
Expand Down Expand Up @@ -158,7 +159,7 @@ def local_ena_submission_generator(
manifest_object = create_manifest_object(
config, dummy_sample_dict, dummy_project_dict, entry, entry, entry, dir=directory
)
create_manifest(manifest_object, dir=directory)
create_manifest(manifest_object, is_broker=config.is_broker, dir=directory)
logger.info(
"You can submit the assembly to ENA using the command: \n"
"java -jarwebin-cli.jar -username {ena_submission_username} "
Expand Down
14 changes: 13 additions & 1 deletion ena-submission/scripts/ena_submission_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,9 @@ def create_fasta(
return filename


def create_manifest(manifest: AssemblyManifest, dir: str | None = None) -> str:
def create_manifest(
manifest: AssemblyManifest, is_broker: bool = False, dir: str | None = None
) -> str:
"""
Creates a temp manifest file:
https://ena-docs.readthedocs.io/en/latest/submit/assembly/genome.html#manifest-files
Expand Down Expand Up @@ -410,6 +412,16 @@ def create_manifest(manifest: AssemblyManifest, dir: str | None = None) -> str:
f.write(f"DESCRIPTION\t{manifest.description}\n")
if manifest.moleculetype:
f.write(f"MOLECULETYPE\t{manifest.moleculetype!s}\n")
if manifest.authors:
if not is_broker:
logger.error("Cannot set authors field for non broker")
else:
f.write(f"AUTHORS\t{manifest.authors}\n")
if manifest.address:
if not is_broker:
logger.error("Cannot set address field for non broker")
else:
f.write(f"ADDRESS\t{manifest.address}\n")

return filename

Expand Down
2 changes: 2 additions & 0 deletions ena-submission/scripts/ena_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ class AssemblyManifest:
moleculetype: MoleculeType | None = None
description: str | None = None
run_ref: list[str] | None = None
address: str | None = None
authors: str | None = None


class ChromosomeType(Enum):
Expand Down
7 changes: 5 additions & 2 deletions ena-submission/scripts/test_ena_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def mock_config():
"scientific_name": "Test scientific name",
"molecule_type": "genomic RNA",
}
config.organisms = {"Test organism": {"ingest": metadata_dict}}
config.organisms = {"Test organism": {"enaDeposition": metadata_dict}}
config.metadata_mapping = defaults["metadata_mapping"]
config.metadata_mapping_mandatory_field_defaults = defaults[
"metadata_mapping_mandatory_field_defaults"
Expand Down Expand Up @@ -240,7 +240,10 @@ def test_create_manifest(self):
study_accession = "Test Study Accession"
sample_accession = "Test Sample Accession"
results_in_sample_table = {"result": {"ena_sample_accession": sample_accession}}
results_in_project_table = {"result": {"bioproject_accession": study_accession}}
results_in_project_table = {
"result": {"bioproject_accession": study_accession},
"center_name": "generic_center_name",
}
manifest = create_manifest_object(
config,
results_in_sample_table,
Expand Down
4 changes: 2 additions & 2 deletions kubernetes/loculus/templates/_common-metadata.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -370,12 +370,12 @@ organisms:
{{ $key }}:
{{- with $instance.schema }}
{{- $nucleotideSequences := .nucleotideSequences | default (list "main")}}
ingest: {{- $instance.ingest.configFile | toYaml | nindent 8 }}
enaDeposition: {{- $instance.enaDeposition.configFile | toYaml | nindent 6 }}
organismName: {{ quote .organismName }}
externalMetadata:
{{- $args := dict "metadata" (include "loculus.patchMetadataSchema" . | fromYaml).metadata "nucleotideSequences" $nucleotideSequences}}
{{- $metadata := include "loculus.generateBackendExternalMetadata" $args | fromYaml }}
{{- $metadata.fields | default list | toYaml | nindent 8 }}
{{- $metadata.fields | default list | toYaml | nindent 6 }}
{{- end }}
{{- end }}
{{- end }}
Expand Down
2 changes: 2 additions & 0 deletions kubernetes/loculus/templates/ena-submission-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
{{- $submitToEnaProduction := .Values.submitToEnaProduction | default false }}
{{- $enaDbName := .Values.enaDbName | default false }}
{{- $enaUniqueSuffix := .Values.enaUniqueSuffix | default false }}
{{- $enaIsBroker := .Values.enaIsBroker | default false }}
---
apiVersion: v1
kind: ConfigMap
Expand All @@ -13,6 +14,7 @@ data:
config.yaml: |
submit_to_ena_prod: {{ $submitToEnaProduction }}
db_name: {{ $enaDbName }}
is_broker: {{ $enaIsBroker }}
unique_project_suffix: {{ $enaUniqueSuffix }}
backend_url: {{ $backendHost }}
keycloak_token_url: {{ $keycloakHost -}}/realms/loculus/protocol/openid-connect/token
Expand Down
Loading
Loading