loculus-project · anna-parker · Jul 9, 2024 · Jul 4, 2024 · Jul 4, 2024 · Jul 4, 2024
diff --git a/kubernetes/loculus/templates/_preprocessingFromValues.tpl b/kubernetes/loculus/templates/_preprocessingFromValues.tpl
@@ -7,6 +7,12 @@
     {{- if .type }}
     type: {{ .type }}
     {{- end }}
+    {{- if .order }}
+    order: 
+    {{- range .order }}
+      - {{ . }}
+    {{- end }}
+    {{- end }}
   {{- if .preprocessing }}
   {{- if hasKey .preprocessing "function" }}
   function: {{ index .preprocessing "function" }}

diff --git a/kubernetes/loculus/values.yaml b/kubernetes/loculus/values.yaml
@@ -64,6 +64,15 @@ defaultOrganismConfig: &defaultOrganismConfig
           inputs:
             date: sample_collection_date
         required: true
+      - name: display_name
+        header: "INSDC"
+        order: [geo_loc_country, accession_version, sample_collection_date]
+        preprocessing:
+          function: concatenate
+          inputs:
+            string: geo_loc_country
+            date: sample_collection_date
+        noInput: true
       - name: ncbi_release_date
         displayName: NCBI release date
         type: date
@@ -1193,5 +1202,5 @@ enableCrossRefCredentials: true
 runDevelopmentKeycloakDatabase: true
 runDevelopmentMainDatabase: true
 enforceHTTPS: true
-registrationTermsMessage: >
-   You must agree to the <a href="http://main.loculus.org/terms">terms of use</a>.
+registrationTermsMessage: >-
+  You must agree to the <a href="http://main.loculus.org/terms">terms of use</a>.
diff --git a/preprocessing/nextclade/src/loculus_preprocessing/prepro.py b/preprocessing/nextclade/src/loculus_preprocessing/prepro.py
@@ -1,3 +1,4 @@
+import copy
 import csv
 import json
 import logging
@@ -316,55 +317,75 @@ def null_per_backend(x: Any) -> bool:
             return False
 
 
+def add_InputMetadata(
+    spec: ProcessingSpec,
+    unprocessed: UnprocessedAfterNextclade,
+    errors: list[ProcessingAnnotation],
+    input_data: InputMetadata,
+    arg_name: str,
+    input_path: str,
+) -> InputMetadata:
+    input_data[arg_name] = None
+    # If field starts with "nextclade.", take from nextclade metadata
+    nextclade_prefix = "nextclade."
+    if input_path.startswith(nextclade_prefix):
+        segment = spec.args.get("segment", "main")
+        if unprocessed.nextcladeMetadata is None:
+            errors.append(
+                ProcessingAnnotation(
+                    source=[
+                        AnnotationSource(
+                            name="main",
+                            type=AnnotationSourceType.NUCLEOTIDE_SEQUENCE,
+                        )
+                    ],
+                    message="Nucleotide sequence failed to align",
+                )
+            )
+            return input_data
+        sub_path = input_path[len(nextclade_prefix) :]
+        if segment in unprocessed.nextcladeMetadata:
+            input_data[arg_name] = str(
+                dpath.get(
+                    unprocessed.nextcladeMetadata[segment],
+                    sub_path,
+                    separator=".",
+                    default=None,
+                )
+            )
+        else:
+            input_data[arg_name] = None
+        return input_data
+    if input_path not in unprocessed.inputMetadata:
+        return input_data
+    input_data[arg_name] = unprocessed.inputMetadata[input_path]
+    return input_data
+
+
 def get_metadata(
+    id: AccessionVersion,
     spec: ProcessingSpec,
     output_field: str,
     unprocessed: UnprocessedAfterNextclade,
     errors: list[ProcessingAnnotation],
     warnings: list[ProcessingAnnotation],
 ) -> ProcessingResult:
     input_data: InputMetadata = {}
+    args = {} if spec.args is None else copy.deepcopy(spec.args)
     for arg_name, input_path in spec.inputs.items():
-        input_data[arg_name] = None
-        # If field starts with "nextclade.", take from nextclade metadata
-        nextclade_prefix = "nextclade."
-        if input_path.startswith(nextclade_prefix):
-            # Remove "nextclade." prefix
-            if spec.args is None:
-                spec.args = {}
-            segment = spec.args.get("segment", "main")
-            if unprocessed.nextcladeMetadata is None:
-                errors.append(
-                    ProcessingAnnotation(
-                        source=[
-                            AnnotationSource(
-                                name="main",
-                                type=AnnotationSourceType.NUCLEOTIDE_SEQUENCE,
-                            )
-                        ],
-                        message="Nucleotide sequence failed to align",
-                    )
-                )
-                continue
-            sub_path = input_path[len(nextclade_prefix) :]
-            if segment in unprocessed.nextcladeMetadata:
-                input_data[arg_name] = str(
-                    dpath.get(
-                        unprocessed.nextcladeMetadata[segment],
-                        sub_path,
-                        separator=".",
-                        default=None,
-                    )
-                )
-            else:
-                input_data[arg_name] = None
-            continue
-        if input_path not in unprocessed.inputMetadata:
-            continue
-        input_data[arg_name] = unprocessed.inputMetadata[input_path]
+        input_data = add_InputMetadata(spec, unprocessed, errors, input_data, arg_name, input_path)
+    if spec.function == "concatenate":
+        args["accession_version"] = id
+        filledin_order: InputMetadata = {}
+        for item in spec.args["order"]:
+            filledin_order = add_InputMetadata(
+                copy.deepcopy(spec), unprocessed, errors, filledin_order, item, item
+            )
+        args["order"] = [filledin_order[item] for item in spec.args["order"]]
+
     try:
         processing_result = ProcessingFunctions.call_function(
-            spec.function, spec.args, input_data, output_field
+            spec.function, args, input_data, output_field
         )
     except Exception as e:
         msg = f"Processing for spec: {spec} with input data: {input_data} failed with {e}"
@@ -403,6 +424,7 @@ def process_single(
         )
         spec.args = {} if spec.args is None else spec.args
         processing_result = get_metadata(
+            id,
             spec,
             output_field,
             unprocessed,

diff --git a/preprocessing/nextclade/src/loculus_preprocessing/processing_functions.py b/preprocessing/nextclade/src/loculus_preprocessing/processing_functions.py
@@ -273,6 +273,73 @@ def parse_timestamp(
                 errors=errors,
             )
 
+    @staticmethod
+    def concatenate(
+        input_data: InputMetadata, output_field: str, args: FunctionArgs = None
+    ) -> ProcessingResult:
+        """Concatenates input fields with accession_version using the "/" separator in the order
+        specified by the order argument.
+        """
+        warnings: list[ProcessingAnnotation] = []
+        errors: list[ProcessingAnnotation] = []
+
+        number_fields = len(input_data.keys()) + 1
+
+        accession_version = args["accession_version"]
+        order = args["order"]
+
+        # Check accessionVersion only exists once in the list:
+        if number_fields != len(order):
+            errors.append(
+                ProcessingAnnotation(
+                    source=[
+                        AnnotationSource(name=output_field, type=AnnotationSourceType.METADATA)
+                    ],
+                    message="Concatenation failed.",
+                )
+            )
+            return ProcessingResult(
+                datum=None,
+                warnings=warnings,
+                errors=errors,
+            )
+
+        formatted_input_data = {}
+        for key, item in input_data.items():
+            if key == "date":
+                processed = ProcessingFunctions.process_date({key: item}, output_field)
+                formatted_input_data[item] = "" if processed.datum is None else processed.datum
+                errors += processed.errors
+                warnings += processed.warnings
+            elif key == "timestamp":
+                processed = ProcessingFunctions.parse_timestamp({key: item}, output_field)
+                formatted_input_data[item] = "" if processed.datum is None else processed.datum
+                errors += processed.errors
+                warnings += processed.warnings
+            else:
+                formatted_input_data[item] = item
+        logging.debug(f"formatted input data:{formatted_input_data}")
+
+        try:
+            concatenation_order = [formatted_input_data.get(i, accession_version) for i in order]
+            result = "/".join(concatenation_order)
+
+            return ProcessingResult(datum=result, warnings=warnings, errors=errors)
+        except ValueError as e:
+            errors.append(
+                ProcessingAnnotation(
+                    source=[
+                        AnnotationSource(name=output_field, type=AnnotationSourceType.METADATA)
+                    ],
+                    message="Concatenation failed.",
+                )
+            )
+            return ProcessingResult(
+                datum=None,
+                errors=errors,
+                warnings=warnings,
+            )
+
     @staticmethod
     def identity(
         input_data: InputMetadata, output_field: str, args: FunctionArgs = None

diff --git a/website/src/components/SequenceDetailsPage/DataTable.tsx b/website/src/components/SequenceDetailsPage/DataTable.tsx
@@ -14,6 +14,9 @@ interface Props {
 const DataTableComponent: React.FC<Props> = ({ dataTableData, dataUseTermsHistory }) => {
     return (
         <div>
+            {dataTableData.topmatter.displayName !== undefined && (
+                <div className='px-6 mb-4 italic'>{dataTableData.topmatter.displayName}</div>
+            )}
             {dataTableData.topmatter.authors !== undefined && dataTableData.topmatter.authors.length > 0 && (
                 <div className='px-6 mb-4'>
                     <AuthorList authors={dataTableData.topmatter.authors} />

diff --git a/website/src/components/SequenceDetailsPage/getDataTableData.ts b/website/src/components/SequenceDetailsPage/getDataTableData.ts
@@ -3,6 +3,7 @@ import type { TableDataEntry } from './types.ts';
 export type DataTableData = {
     topmatter: {
         authors: string[] | undefined;
+        displayName: string | undefined;
     };
     table: {
         header: string;
@@ -14,6 +15,7 @@ export function getDataTableData(listTableDataEntries: TableDataEntry[]): DataTa
     const result: DataTableData = {
         topmatter: {
             authors: undefined,
+            displayName: undefined,
         },
         table: [],
     };
@@ -33,6 +35,15 @@ export function getDataTableData(listTableDataEntries: TableDataEntry[]): DataTa
             continue;
         }
 
+        if (
+            result.topmatter.displayName === undefined &&
+            entry.type.kind === 'metadata' &&
+            entry.name === 'display_name'
+        ) {
+            result.topmatter.displayName = entry.value.toString();
+            continue;
+        }
+
         if (!tableHeaderMap.has(entry.header)) {
             tableHeaderMap.set(entry.header, []);
         }