Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(website): Create a standardized sequence name with the format: {country}/{AccessionVersion}/{date} #2246

Merged
merged 20 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions kubernetes/loculus/templates/_preprocessingFromValues.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
{{- if .type }}
type: {{ .type }}
{{- end }}
{{- if .order }}
order:
{{- range .order }}
- {{ . }}
{{- end }}
{{- end }}
{{- if .preprocessing }}
{{- if hasKey .preprocessing "function" }}
function: {{ index .preprocessing "function" }}
Expand Down
13 changes: 11 additions & 2 deletions kubernetes/loculus/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,15 @@ defaultOrganismConfig: &defaultOrganismConfig
inputs:
date: sample_collection_date
required: true
- name: display_name
header: "INSDC"
anna-parker marked this conversation as resolved.
Show resolved Hide resolved
order: [geo_loc_country, accession_version, sample_collection_date]
preprocessing:
function: concatenate
inputs:
string: geo_loc_country
date: sample_collection_date
anna-parker marked this conversation as resolved.
Show resolved Hide resolved
noInput: true
- name: ncbi_release_date
displayName: NCBI release date
type: date
Expand Down Expand Up @@ -1193,5 +1202,5 @@ enableCrossRefCredentials: true
runDevelopmentKeycloakDatabase: true
runDevelopmentMainDatabase: true
enforceHTTPS: true
registrationTermsMessage: >
You must agree to the <a href="http://main.loculus.org/terms">terms of use</a>.
registrationTermsMessage: >-
You must agree to the <a href="http://main.loculus.org/terms">terms of use</a>.
98 changes: 60 additions & 38 deletions preprocessing/nextclade/src/loculus_preprocessing/prepro.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import csv
import json
import logging
Expand Down Expand Up @@ -316,55 +317,75 @@ def null_per_backend(x: Any) -> bool:
return False


def add_InputMetadata(
spec: ProcessingSpec,
unprocessed: UnprocessedAfterNextclade,
errors: list[ProcessingAnnotation],
input_data: InputMetadata,
arg_name: str,
input_path: str,
) -> InputMetadata:
input_data[arg_name] = None
# If field starts with "nextclade.", take from nextclade metadata
nextclade_prefix = "nextclade."
if input_path.startswith(nextclade_prefix):
segment = spec.args.get("segment", "main")
if unprocessed.nextcladeMetadata is None:
errors.append(
ProcessingAnnotation(
source=[
AnnotationSource(
name="main",
type=AnnotationSourceType.NUCLEOTIDE_SEQUENCE,
)
],
message="Nucleotide sequence failed to align",
)
)
return input_data
sub_path = input_path[len(nextclade_prefix) :]
if segment in unprocessed.nextcladeMetadata:
input_data[arg_name] = str(
dpath.get(
unprocessed.nextcladeMetadata[segment],
sub_path,
separator=".",
default=None,
)
)
else:
input_data[arg_name] = None
return input_data
if input_path not in unprocessed.inputMetadata:
return input_data
input_data[arg_name] = unprocessed.inputMetadata[input_path]
return input_data


def get_metadata(
id: AccessionVersion,
spec: ProcessingSpec,
output_field: str,
unprocessed: UnprocessedAfterNextclade,
errors: list[ProcessingAnnotation],
warnings: list[ProcessingAnnotation],
) -> ProcessingResult:
input_data: InputMetadata = {}
args = {} if spec.args is None else copy.deepcopy(spec.args)
anna-parker marked this conversation as resolved.
Show resolved Hide resolved
for arg_name, input_path in spec.inputs.items():
input_data[arg_name] = None
# If field starts with "nextclade.", take from nextclade metadata
nextclade_prefix = "nextclade."
if input_path.startswith(nextclade_prefix):
# Remove "nextclade." prefix
if spec.args is None:
spec.args = {}
segment = spec.args.get("segment", "main")
if unprocessed.nextcladeMetadata is None:
errors.append(
ProcessingAnnotation(
source=[
AnnotationSource(
name="main",
type=AnnotationSourceType.NUCLEOTIDE_SEQUENCE,
)
],
message="Nucleotide sequence failed to align",
)
)
continue
sub_path = input_path[len(nextclade_prefix) :]
if segment in unprocessed.nextcladeMetadata:
input_data[arg_name] = str(
dpath.get(
unprocessed.nextcladeMetadata[segment],
sub_path,
separator=".",
default=None,
)
)
else:
input_data[arg_name] = None
continue
if input_path not in unprocessed.inputMetadata:
continue
input_data[arg_name] = unprocessed.inputMetadata[input_path]
input_data = add_InputMetadata(spec, unprocessed, errors, input_data, arg_name, input_path)
if spec.function == "concatenate":
args["accession_version"] = id
filledin_order: InputMetadata = {}
for item in spec.args["order"]:
filledin_order = add_InputMetadata(
copy.deepcopy(spec), unprocessed, errors, filledin_order, item, item
)
args["order"] = [filledin_order[item] for item in spec.args["order"]]

try:
processing_result = ProcessingFunctions.call_function(
spec.function, spec.args, input_data, output_field
spec.function, args, input_data, output_field
)
except Exception as e:
msg = f"Processing for spec: {spec} with input data: {input_data} failed with {e}"
Expand Down Expand Up @@ -403,6 +424,7 @@ def process_single(
)
spec.args = {} if spec.args is None else spec.args
processing_result = get_metadata(
id,
spec,
output_field,
unprocessed,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,73 @@ def parse_timestamp(
errors=errors,
)

@staticmethod
def concatenate(
input_data: InputMetadata, output_field: str, args: FunctionArgs = None
) -> ProcessingResult:
"""Concatenates input fields with accession_version using the "/" separator in the order
specified by the order argument.
"""
warnings: list[ProcessingAnnotation] = []
errors: list[ProcessingAnnotation] = []

number_fields = len(input_data.keys()) + 1

accession_version = args["accession_version"]
order = args["order"]

# Check accessionVersion only exists once in the list:
if number_fields != len(order):
errors.append(
ProcessingAnnotation(
source=[
AnnotationSource(name=output_field, type=AnnotationSourceType.METADATA)
],
message="Concatenation failed.",
)
)
return ProcessingResult(
datum=None,
warnings=warnings,
errors=errors,
)

formatted_input_data = {}
for key, item in input_data.items():
if key == "date":
processed = ProcessingFunctions.process_date({key: item}, output_field)
formatted_input_data[item] = "" if processed.datum is None else processed.datum
errors += processed.errors
warnings += processed.warnings
elif key == "timestamp":
processed = ProcessingFunctions.parse_timestamp({key: item}, output_field)
formatted_input_data[item] = "" if processed.datum is None else processed.datum
errors += processed.errors
warnings += processed.warnings
else:
formatted_input_data[item] = item
logging.debug(f"formatted input data:{formatted_input_data}")

try:
concatenation_order = [formatted_input_data.get(i, accession_version) for i in order]
result = "/".join(concatenation_order)

return ProcessingResult(datum=result, warnings=warnings, errors=errors)
except ValueError as e:
errors.append(
ProcessingAnnotation(
source=[
AnnotationSource(name=output_field, type=AnnotationSourceType.METADATA)
],
message="Concatenation failed.",
)
)
return ProcessingResult(
datum=None,
errors=errors,
warnings=warnings,
)

@staticmethod
def identity(
input_data: InputMetadata, output_field: str, args: FunctionArgs = None
Expand Down
3 changes: 3 additions & 0 deletions website/src/components/SequenceDetailsPage/DataTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ interface Props {
const DataTableComponent: React.FC<Props> = ({ dataTableData, dataUseTermsHistory }) => {
return (
<div>
{dataTableData.topmatter.displayName !== undefined && (
<div className='px-6 mb-4 italic'>{dataTableData.topmatter.displayName}</div>
)}
{dataTableData.topmatter.authors !== undefined && dataTableData.topmatter.authors.length > 0 && (
<div className='px-6 mb-4'>
<AuthorList authors={dataTableData.topmatter.authors} />
Expand Down
11 changes: 11 additions & 0 deletions website/src/components/SequenceDetailsPage/getDataTableData.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import type { TableDataEntry } from './types.ts';
export type DataTableData = {
topmatter: {
authors: string[] | undefined;
displayName: string | undefined;
};
table: {
header: string;
Expand All @@ -14,6 +15,7 @@ export function getDataTableData(listTableDataEntries: TableDataEntry[]): DataTa
const result: DataTableData = {
topmatter: {
authors: undefined,
displayName: undefined,
anna-parker marked this conversation as resolved.
Show resolved Hide resolved
},
table: [],
};
Expand All @@ -33,6 +35,15 @@ export function getDataTableData(listTableDataEntries: TableDataEntry[]): DataTa
continue;
}

if (
result.topmatter.displayName === undefined &&
entry.type.kind === 'metadata' &&
entry.name === 'display_name'
) {
result.topmatter.displayName = entry.value.toString();
continue;
}

if (!tableHeaderMap.has(entry.header)) {
tableHeaderMap.set(entry.header, []);
}
Expand Down
Loading