diff --git a/bento_variant_service/search.py b/bento_variant_service/search.py index 4e37f3c..b67e865 100644 --- a/bento_variant_service/search.py +++ b/bento_variant_service/search.py @@ -54,6 +54,9 @@ def search_worker_prime( try: variant = next(possible_matches) + # TODO: Do we use as_chord_representation or as_augmented_chord_representation here? + # Maybe not augmented, since we won't allow querying augmented stuff. + match = rest_of_query is None or check_ast_against_data_structure( rest_of_query, variant.as_chord_representation(), VARIANT_SCHEMA) found = found or match @@ -62,7 +65,7 @@ def search_worker_prime( break if match: # implicitly internal_data is True here as well - matches.append(variant.as_chord_representation()) + matches.append(variant.as_augmented_chord_representation()) except StopIteration: break diff --git a/bento_variant_service/tables/vcf/table.py b/bento_variant_service/tables/vcf/table.py index ed903b1..e1bde7b 100644 --- a/bento_variant_service/tables/vcf/table.py +++ b/bento_variant_service/tables/vcf/table.py @@ -152,6 +152,7 @@ def _variants( start_pos=int(row[1]), ref_bases=row[3], alt_bases=tuple(row[4].split(",")), + file_uri=vcf.original_index_uri, ) variant.calls = tuple(VCFVariantTable._variant_calls(variant, vcf.sample_ids, row, diff --git a/bento_variant_service/variants/models.py b/bento_variant_service/variants/models.py index 7b54f15..82b43d9 100644 --- a/bento_variant_service/variants/models.py +++ b/bento_variant_service/variants/models.py @@ -14,7 +14,7 @@ class Variant: """ def __init__(self, assembly_id: str, chromosome: str, ref_bases: str, alt_bases: Tuple[str, ...], start_pos: int, - calls: Tuple["Call"] = ()): + calls: Tuple["Call"] = (), file_uri: Optional[str] = None): self.assembly_id: str = assembly_id # Assembly ID for context self.chromosome: str = chromosome # Chromosome where the variant occurs self.ref_bases: str = ref_bases # Reference bases @@ -22,6 +22,8 @@ def __init__(self, assembly_id: str, chromosome: str, ref_bases: str, alt_bases: self.start_pos: int = start_pos # Starting position on the chromosome w/r/t the reference, 0-indexed self.calls: Tuple["Call"] = calls # Variant calls, per sample TODO: Make this a dict? + self.file_uri: Optional[str] = file_uri # File URI, " + @property def end_pos(self) -> int: """ @@ -40,6 +42,14 @@ def as_chord_representation(self): "calls": [c.as_chord_representation() for c in self.calls], } + def as_augmented_chord_representation(self): + return { + **self.as_chord_representation(), + # _ prefix is context dependent -> immune from equality, used by Bento in weird contexts + "_extra": { + "file_uri": self.file_uri, + }, + } def __eq__(self, other): # Use and shortcutting to return False early if the other instance isn't a Variant diff --git a/bento_variant_service/variants/schemas.py b/bento_variant_service/variants/schemas.py index 7735eda..9592e8e 100644 --- a/bento_variant_service/variants/schemas.py +++ b/bento_variant_service/variants/schemas.py @@ -193,9 +193,31 @@ "required": False, "type": "unlimited", "order": 6, - } - } - } + }, + }, + "_extra": { + "type": "object", + "properties": { + "file_uri": { + "type": "string", + "search": { + "operations": [op.SEARCH_OP_EQ], + "queryable": "internal", + "canNegate": True, + "required": False, + "type": "single", # single / unlimited + "order": 0, + }, + }, + }, + "search": { + "queryable": "internal", + "required": False, + "type": "unlimited", + "order": 8, + }, + }, + }, } VARIANT_TABLE_METADATA_SCHEMA = { diff --git a/tests/test_search.py b/tests/test_search.py index d61b30e..4e237af 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -170,8 +170,11 @@ def test_chord_variant_search(app, client, table_manager): assert "results" in data assert "fixed_id" in data["results"] assert len(data["results"]["fixed_id"]["matches"]) == 3 - assert json.dumps(data["results"]["fixed_id"]["matches"][0], sort_keys=True) == \ - json.dumps(VARIANT_1.as_chord_representation(), sort_keys=True) + first_match = data["results"]["fixed_id"]["matches"][0] + assert json.dumps(first_match, sort_keys=True) == \ + json.dumps(VARIANT_1.as_augmented_chord_representation(), sort_keys=True) + assert json.dumps(dict((k, v) for k, v in first_match.items() if not k.startswith("_")), + sort_keys=True) == json.dumps(VARIANT_1.as_chord_representation(), sort_keys=True) # Test private table search @@ -194,8 +197,8 @@ def test_chord_variant_search(app, client, table_manager): assert "results" in data assert len(data["results"]) == 3 - assert json.dumps(data["results"][0], sort_keys=True) == json.dumps(VARIANT_1.as_chord_representation(), - sort_keys=True) + assert json.dumps(data["results"][0], sort_keys=True) == \ + json.dumps(VARIANT_1.as_augmented_chord_representation(), sort_keys=True) for q, r in TEST_PRIVATE_QUERIES: qj = {"query": q} diff --git a/tests/test_vcf_file.py b/tests/test_vcf_file.py index 8b4f0f7..c20b4b4 100644 --- a/tests/test_vcf_file.py +++ b/tests/test_vcf_file.py @@ -7,6 +7,7 @@ def test_vcf_file(): file = VCFFile(VCF_ONE_VAR_FILE_URI) assert file.path == os.path.realpath(VCF_ONE_VAR_FILE_PATH) + assert file.original_uri == VCF_ONE_VAR_FILE_URI assert file.index_path is None assert file.assembly_id == "GRCh37" assert len(file.sample_ids) == 835