Skip to content

Commit

Permalink
Add create_sample updates as well
Browse files Browse the repository at this point in the history
  • Loading branch information
anna-parker committed Jul 23, 2024
1 parent 7147777 commit 8db4aab
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 20 deletions.
49 changes: 31 additions & 18 deletions ena-submission/scripts/create_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
@dataclass
class Config:
organisms: List[Dict[str, str]]
metadata_map: Dict[str, Dict[str, str]]
metadata_mapping: Dict[str, Dict[str, str]]
backend_url: str
keycloak_token_url: str
keycloak_client_id: str
Expand All @@ -57,30 +57,36 @@ class Config:
ena_submission_username: str


def construct_sample_set_object(config, organism_metadata, sample_metadata, center_name, row):
def get_sample_attributes(config, sample_metadata, row):
list_sample_attributes = []
for field in config.metadata_map:
loculus_metadata_field_names = config.metadata_map[field]["loculus_fields"]
for field in config.metadata_mapping:
loculus_metadata_field_names = config.metadata_mapping[field]["loculus_fields"]
loculus_metadata_field_values = [
sample_metadata.get(metadata, None) for metadata in loculus_metadata_field_names
]
if "function" in config.metadata_map[field] and "args" in config.metadata_map[field]:
function = config.metadata_map[field]["function"]
args = config.metadata_map[field]["args"]
if function == "match" and (len(loculus_metadata_field_names) == len(args)):
if (
"function" in config.metadata_mapping[field]
and "args" in config.metadata_mapping[field]
):
function = config.metadata_mapping[field]["function"]
args = [i for i in config.metadata_mapping[field]["args"] if i]
full_field_values = [i for i in loculus_metadata_field_values if i]
if function != "match":
logging.warning(
f"Unknown function: {function} with args: {args} for {row["accession"]}"
)
continue
if function == "match" and (len(full_field_values) == len(args)):
value = True
for i in range(len(loculus_metadata_field_names)):
for i in range(len(full_field_values)):
if not re.match(
args[i],
sample_metadata.get(loculus_metadata_field_names[i], None),
full_field_values[i],
re.IGNORECASE,
):
value = False
break
else:
logging.warning(
f"Could not calculate function {function} with args: {args} for {row["accession"]}"
)
continue
else:
value = ";".join([metadata for metadata in loculus_metadata_field_values if metadata])
Expand All @@ -91,9 +97,16 @@ def construct_sample_set_object(config, organism_metadata, sample_metadata, cent
value=value,
)
)
return list_sample_attributes


def construct_sample_set_object(
config, organism_metadata, sample_metadata, center_name, row, organism
):
list_sample_attributes = get_sample_attributes(config, sample_metadata, row)
sample_type = SampleType(
center_name=XmlAttribute(center_name),
alias=XmlAttribute(f"{row["accession"]}:{row["organism"]}:{config.unique_project_suffix}"),
alias=XmlAttribute(f"{row["accession"]}:{organism}:{config.unique_project_suffix}"),
title=f"{organism_metadata["scientific_name"]}: Genome sequencing",
description=(
f"Automated upload of {organism_metadata["scientific_name"]} sequences submitted by {center_name} from {config.db_name}",
Expand Down Expand Up @@ -135,12 +148,12 @@ def create_sample(log_level, config_file):

while True:
# Check submission_table for newly added sequences
conditions = {"status_all": StatusAll.READY_TO_SUBMIT.name}
conditions = {"status_all": StatusAll.SUBMITTED_PROJECT.name}
ready_to_submit = find_conditions_in_db(
db_config, table_name="submission_table", conditions=conditions
)
logging.debug(
f"Found {len(ready_to_submit)} entries in submission_table in status READY_TO_SUBMIT"
f"Found {len(ready_to_submit)} entries in submission_table in status SUBMITTED_PROJECT"
)
for row in ready_to_submit:
seq_key = {"accession": row["accession"], "version": row["version"]}
Expand Down Expand Up @@ -230,7 +243,7 @@ def create_sample(log_level, config_file):
organism_metadata = config.organisms[organism]["ingest"]

sample_set = construct_sample_set_object(
config, organism_metadata, sample_metadata, center_name, row
config, organism_metadata, sample_metadata, center_name, row, organism
)
update_values = {"status": Status.SUBMITTING.name}
number_rows_updated = update_db_where_conditions(
Expand Down Expand Up @@ -297,7 +310,7 @@ def create_sample(log_level, config_file):
f"Found {len(entries_with_errors)} entries in sample_table in status HAS_ERRORS",
f" for {time_threshold}m",
)
for row in ready_to_submit_sample:
for row in entries_with_errors:
# TODO: Query ENA to check if sample has in fact been created
# If created update sample_table
# If not retry 3 times, then raise for manual intervention
Expand Down
5 changes: 3 additions & 2 deletions ena-submission/scripts/test_ena_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ def test_sample_set_construction(self):
config.unique_project_suffix = "test suffix"
row = {}
row["accession"] = "test_accession"
row["organism"] = "test organism"
organism = "test organism"
row["organism"] = organism
sample_metadata = {
"authors": "I. Kurane, M. Saijo, Q. Tang, S. Morikawa, T. Qing, Z. Xinqin",
"host_age": None,
Expand Down Expand Up @@ -337,7 +338,7 @@ def test_sample_set_construction(self):
"dataUseTermsUrl": "https://#TODO-MVP/open",
}
sample_set = construct_sample_set_object(
config, organism_metadata, sample_metadata, center_name, row
config, organism_metadata, sample_metadata, center_name, row, organism
)
assert xmltodict.parse(
dataclass_to_xml(sample_set, root_name="SAMPLE_SET")
Expand Down

0 comments on commit 8db4aab

Please sign in to comment.