Skip to content

Commit

Permalink
Use externalMetadata in en-submission-pod.
Browse files Browse the repository at this point in the history
  • Loading branch information
anna-parker committed Jul 12, 2024
1 parent 082c7d1 commit 7c7f896
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 149 deletions.
246 changes: 122 additions & 124 deletions ena-submission/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,128 +5,126 @@ db_password: unsecure
db_host: "127.0.0.1"
organisms:
cchf:
schema:
ingest:
nextclade_dataset_name: nextstrain/cchfv/linked
nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/cornelius-cchfv/data_output
nucleotide_sequences:
- L
- M
- S
taxon_id: 3052518
organismName: "Crimean-Congo Hemorrhagic Fever Virus"
externalMetadata:
- externalMetadataUpdater: ena
name: ncbi_release_date
type: date
- externalMetadataUpdater: ena
name: ncbi_update_date
type: date
- externalMetadataUpdater: ena
name: ncbi_submitter_country
type: string
- externalMetadataUpdater: ena
name: insdc_accession_base_L
type: string
- externalMetadataUpdater: ena
name: insdc_accession_base_M
type: string
- externalMetadataUpdater: ena
name: insdc_accession_base_S
type: string
- externalMetadataUpdater: ena
name: insdc_version_L
type: int
- externalMetadataUpdater: ena
name: insdc_version_M
type: int
- externalMetadataUpdater: ena
name: insdc_version_S
type: int
- externalMetadataUpdater: ena
name: insdc_accession_full_L
type: string
- externalMetadataUpdater: ena
name: insdc_accession_full_M
type: string
- externalMetadataUpdater: ena
name: insdc_accession_full_S
type: string
- externalMetadataUpdater: ena
name: bioproject_accessions_L
type: string
- externalMetadataUpdater: ena
name: bioproject_accessions_M
type: string
- externalMetadataUpdater: ena
name: bioproject_accessions_S
type: string
- externalMetadataUpdater: ena
name: biosample_accession_L
type: string
- externalMetadataUpdater: ena
name: biosample_accession_M
type: string
- externalMetadataUpdater: ena
name: biosample_accession_S
type: string
- externalMetadataUpdater: ena
name: ncbi_protein_count
type: int
- externalMetadataUpdater: ena
name: ncbi_sourcedb
type: string
- externalMetadataUpdater: ena
name: ncbi_virus_name
type: string
- externalMetadataUpdater: ena
name: ncbi_virus_tax_id
type: int
- externalMetadataUpdater: ena
name: sra_run_accession
type: string
ingest:
nextclade_dataset_name: nextstrain/cchfv/linked
nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/cornelius-cchfv/data_output
nucleotide_sequences:
- L
- M
- S
taxon_id: 3052518
organismName: "Crimean-Congo Hemorrhagic Fever Virus"
externalMetadata:
- externalMetadataUpdater: ena
name: ncbi_release_date
type: date
- externalMetadataUpdater: ena
name: ncbi_update_date
type: date
- externalMetadataUpdater: ena
name: ncbi_submitter_country
type: string
- externalMetadataUpdater: ena
name: insdc_accession_base_L
type: string
- externalMetadataUpdater: ena
name: insdc_accession_base_M
type: string
- externalMetadataUpdater: ena
name: insdc_accession_base_S
type: string
- externalMetadataUpdater: ena
name: insdc_version_L
type: int
- externalMetadataUpdater: ena
name: insdc_version_M
type: int
- externalMetadataUpdater: ena
name: insdc_version_S
type: int
- externalMetadataUpdater: ena
name: insdc_accession_full_L
type: string
- externalMetadataUpdater: ena
name: insdc_accession_full_M
type: string
- externalMetadataUpdater: ena
name: insdc_accession_full_S
type: string
- externalMetadataUpdater: ena
name: bioproject_accessions_L
type: string
- externalMetadataUpdater: ena
name: bioproject_accessions_M
type: string
- externalMetadataUpdater: ena
name: bioproject_accessions_S
type: string
- externalMetadataUpdater: ena
name: biosample_accession_L
type: string
- externalMetadataUpdater: ena
name: biosample_accession_M
type: string
- externalMetadataUpdater: ena
name: biosample_accession_S
type: string
- externalMetadataUpdater: ena
name: ncbi_protein_count
type: int
- externalMetadataUpdater: ena
name: ncbi_sourcedb
type: string
- externalMetadataUpdater: ena
name: ncbi_virus_name
type: string
- externalMetadataUpdater: ena
name: ncbi_virus_tax_id
type: int
- externalMetadataUpdater: ena
name: sra_run_accession
type: string
ebola-sudan:
schema:
ingest:
taxon_id: 3052460
organismName: "Ebola Sudan"
externalMetadata:
- externalMetadataUpdater: ena
name: ncbi_release_date
type: date
- externalMetadataUpdater: ena
name: ncbi_update_date
type: date
- externalMetadataUpdater: ena
name: ncbi_submitter_country
type: string
- externalMetadataUpdater: ena
name: insdc_accession_base
type: string
- externalMetadataUpdater: ena
name: insdc_version
type: int
- externalMetadataUpdater: ena
name: insdc_accession_full
type: string
- externalMetadataUpdater: ena
name: bioproject_accessions
type: string
- externalMetadataUpdater: ena
name: biosample_accession
type: string
- externalMetadataUpdater: ena
name: ncbi_protein_count
type: int
- externalMetadataUpdater: ena
name: ncbi_sourcedb
type: string
- externalMetadataUpdater: ena
name: ncbi_virus_name
type: string
- externalMetadataUpdater: ena
name: ncbi_virus_tax_id
type: int
- externalMetadataUpdater: ena
name: sra_run_accession
type: string
ingest:
taxon_id: 3052460
organismName: "Ebola Sudan"
externalMetadata:
- externalMetadataUpdater: ena
name: ncbi_release_date
type: date
- externalMetadataUpdater: ena
name: ncbi_update_date
type: date
- externalMetadataUpdater: ena
name: ncbi_submitter_country
type: string
- externalMetadataUpdater: ena
name: insdc_accession_base
type: string
- externalMetadataUpdater: ena
name: insdc_version
type: int
- externalMetadataUpdater: ena
name: insdc_accession_full
type: string
- externalMetadataUpdater: ena
name: bioproject_accessions
type: string
- externalMetadataUpdater: ena
name: biosample_accession
type: string
- externalMetadataUpdater: ena
name: ncbi_protein_count
type: int
- externalMetadataUpdater: ena
name: ncbi_sourcedb
type: string
- externalMetadataUpdater: ena
name: ncbi_virus_name
type: string
- externalMetadataUpdater: ena
name: ncbi_virus_tax_id
type: int
- externalMetadataUpdater: ena
name: sra_run_accession
type: string
15 changes: 0 additions & 15 deletions ena-submission/config/defaults.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,3 @@
username: external_metadata_updater
password: external_metadata_updater
keycloak_client_id: backend-client
ena_specific_metadata:
- ncbi_release_date
- ncbi_update_date
- ncbi_submitter_country
- ncbi_protein_count
- ncbi_sourcedb
- ncbi_virus_name
- ncbi_virus_tax_id
- insdc_accession_base
- insdc_version
- insdc_accession_full
- bioproject_accessions
- biosample_accession
- ncbi_completeness
- sra_run_accession
3 changes: 3 additions & 0 deletions ena-submission/scripts/get_ena_submission_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ def get_ena_submission_list(log_level, config_file, output_file):
entries_to_submit = {}
for organism in config.organisms:
config.organism = organism
config.ena_specific_metadata = [
value["name"] for value in config.organisms[organism]["externalMetadata"]
]
logging.info(f"Getting released sequences for organism: {organism}")
entries = get_released_data(config, remove_if_has_ena_specific_metadata=True)

Expand Down
19 changes: 9 additions & 10 deletions kubernetes/loculus/templates/_common-metadata.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -320,16 +320,15 @@ organisms:
{{- range $key, $instance := (.Values.organisms | default .Values.defaultOrganisms) }}
{{- if $instance.ingest }}
{{ $key }}:
schema:
{{- with $instance.schema }}
{{- $nucleotideSequences := .nucleotideSequences | default (list "main")}}
ingest: {{- $instance.ingest.configFile | toYaml | nindent 8 }}
organismName: {{ quote .organismName }}
externalMetadata:
{{- $args := dict "metadata" (include "loculus.patchMetadataSchema" . | fromYaml).metadata "nucleotideSequences" $nucleotideSequences}}
{{ $metadata := include "loculus.generateBackendExternalMetadata" $args | fromYaml }}
{{ $metadata.fields | default list | toYaml | nindent 8 }}
{{- end }}
{{- with $instance.schema }}
{{- $nucleotideSequences := .nucleotideSequences | default (list "main")}}
ingest: {{- $instance.ingest.configFile | toYaml | nindent 8 }}
organismName: {{ quote .organismName }}
externalMetadata:
{{- $args := dict "metadata" (include "loculus.patchMetadataSchema" . | fromYaml).metadata "nucleotideSequences" $nucleotideSequences}}
{{ $metadata := include "loculus.generateBackendExternalMetadata" $args | fromYaml }}
{{ $metadata.fields | default list | toYaml | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

0 comments on commit 7c7f896

Please sign in to comment.