Skip to content

Commit

Permalink
Other approach
Browse files Browse the repository at this point in the history
  • Loading branch information
anna-parker committed Oct 7, 2024
1 parent 6607877 commit 1eca007
Show file tree
Hide file tree
Showing 5 changed files with 447 additions and 140 deletions.
68 changes: 66 additions & 2 deletions preprocessing/nextclade/src/loculus_preprocessing/datatypes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# ruff: noqa: N815
from collections import defaultdict
from dataclasses import dataclass, field
from enum import StrEnum, unique
from typing import Any
Expand Down Expand Up @@ -79,7 +79,7 @@ class UnprocessedAfterNextclade:
# Derived metadata produced by Nextclade
nextcladeMetadata: dict[SegmentName, Any] | None
unalignedNucleotideSequences: dict[SegmentName, NucleotideSequence | None]
alignedNucleotideSequences: dict[SegmentName, NucleotideSequence | None]
alignedNucleotideSequences: dict[GeneName, NucleotideSequence | None]
nucleotideInsertions: dict[SegmentName, list[NucleotideInsertion]]
alignedAminoAcidSequences: dict[GeneName, AminoAcidSequence | None]
aminoAcidInsertions: dict[GeneName, list[AminoAcidInsertion]]
Expand Down Expand Up @@ -115,3 +115,67 @@ class ProcessingResult:
datum: ProcessedMetadataValue
warnings: list[ProcessingAnnotation] = field(default_factory=list)
errors: list[ProcessingAnnotation] = field(default_factory=list)


@dataclass
class UnprocessedEntryFactory:
_counter: int = 0

@staticmethod
def create_unprocessed_entry(
metadata_dict: dict[str, str],
) -> UnprocessedEntry:
unique_id = str(UnprocessedEntryFactory._counter)
UnprocessedEntryFactory._counter += 1
return UnprocessedEntry(
accessionVersion="LOC_" + unique_id + ".1",
data=UnprocessedData(
submitter="test_submitter",
metadata=metadata_dict,
unalignedNucleotideSequences={"main": ""},
),
)


@dataclass
class ProcessedEntryFactory:
_counter: int = 0

@staticmethod
def create_processed_entry(
metadata_dict: dict[str, str],
metadata_errors: list[tuple[str, str]] | None = None,
metadata_warnings: list[tuple[str, str]] | None = None,
) -> ProcessedEntry:
if metadata_errors is None:
metadata_errors = []
if metadata_warnings is None:
metadata_warnings = []
unique_id = str(ProcessedEntryFactory._counter)
ProcessedEntryFactory._counter += 1
return ProcessedEntry(
accession="LOC_" + unique_id,
version=1,
data=ProcessedData(
metadata=metadata_dict,
unalignedNucleotideSequences={"main": ""},
alignedNucleotideSequences={"main": None},
nucleotideInsertions={"main": []},
alignedAminoAcidSequences={},
aminoAcidInsertions={},
),
errors=[
ProcessingAnnotation(
source=[AnnotationSource(name=error[0], type=AnnotationSourceType.METADATA)],
message=error[1],
)
for error in metadata_errors
],
warnings=[
ProcessingAnnotation(
source=[AnnotationSource(name=warning[0], type=AnnotationSourceType.METADATA)],
message=warning[1],
)
for warning in metadata_warnings
],
)
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,7 @@ def process_single(
ProcessingAnnotation(
source=[
AnnotationSource(
name="main",
name=output_field,
type=AnnotationSourceType.METADATA,
)
],
Expand Down
62 changes: 0 additions & 62 deletions preprocessing/nextclade/tests/expected_output.json

This file was deleted.

Loading

0 comments on commit 1eca007

Please sign in to comment.