Skip to content
This repository has been archived by the owner on Jan 16, 2023. It is now read-only.

Commit

Permalink
Add file uri to _extra part of variant objects
Browse files Browse the repository at this point in the history
(for linking purposes)
  • Loading branch information
davidlougheed committed Jul 7, 2020
1 parent ffe8980 commit b60477f
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 9 deletions.
5 changes: 4 additions & 1 deletion bento_variant_service/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ def search_worker_prime(
try:
variant = next(possible_matches)

# TODO: Do we use as_chord_representation or as_augmented_chord_representation here?
# Maybe not augmented, since we won't allow querying augmented stuff.

match = rest_of_query is None or check_ast_against_data_structure(
rest_of_query, variant.as_chord_representation(), VARIANT_SCHEMA)
found = found or match
Expand All @@ -62,7 +65,7 @@ def search_worker_prime(
break

if match: # implicitly internal_data is True here as well
matches.append(variant.as_chord_representation())
matches.append(variant.as_augmented_chord_representation())

except StopIteration:
break
Expand Down
1 change: 1 addition & 0 deletions bento_variant_service/tables/vcf/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def _variants(
start_pos=int(row[1]),
ref_bases=row[3],
alt_bases=tuple(row[4].split(",")),
file_uri=vcf.original_index_uri,
)

variant.calls = tuple(VCFVariantTable._variant_calls(variant, vcf.sample_ids, row,
Expand Down
12 changes: 11 additions & 1 deletion bento_variant_service/variants/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,16 @@ class Variant:
"""

def __init__(self, assembly_id: str, chromosome: str, ref_bases: str, alt_bases: Tuple[str, ...], start_pos: int,
calls: Tuple["Call"] = ()):
calls: Tuple["Call"] = (), file_uri: Optional[str] = None):
self.assembly_id: str = assembly_id # Assembly ID for context
self.chromosome: str = chromosome # Chromosome where the variant occurs
self.ref_bases: str = ref_bases # Reference bases
self.alt_bases: Tuple[str] = alt_bases # Alternate bases TODO: Structural variants
self.start_pos: int = start_pos # Starting position on the chromosome w/r/t the reference, 0-indexed
self.calls: Tuple["Call"] = calls # Variant calls, per sample TODO: Make this a dict?

self.file_uri: Optional[str] = file_uri # File URI, "

@property
def end_pos(self) -> int:
"""
Expand All @@ -40,6 +42,14 @@ def as_chord_representation(self):
"calls": [c.as_chord_representation() for c in self.calls],
}

def as_augmented_chord_representation(self):
return {
**self.as_chord_representation(),
# _ prefix is context dependent -> immune from equality, used by Bento in weird contexts
"_extra": {
"file_uri": self.file_uri,
},
}

def __eq__(self, other):
# Use and shortcutting to return False early if the other instance isn't a Variant
Expand Down
28 changes: 25 additions & 3 deletions bento_variant_service/variants/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,31 @@
"required": False,
"type": "unlimited",
"order": 6,
}
}
}
},
},
"_extra": {
"type": "object",
"properties": {
"file_uri": {
"type": "string",
"search": {
"operations": [op.SEARCH_OP_EQ],
"queryable": "internal",
"canNegate": True,
"required": False,
"type": "single", # single / unlimited
"order": 0,
},
},
},
"search": {
"queryable": "internal",
"required": False,
"type": "unlimited",
"order": 8,
},
},
},
}

VARIANT_TABLE_METADATA_SCHEMA = {
Expand Down
11 changes: 7 additions & 4 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,11 @@ def test_chord_variant_search(app, client, table_manager):
assert "results" in data
assert "fixed_id" in data["results"]
assert len(data["results"]["fixed_id"]["matches"]) == 3
assert json.dumps(data["results"]["fixed_id"]["matches"][0], sort_keys=True) == \
json.dumps(VARIANT_1.as_chord_representation(), sort_keys=True)
first_match = data["results"]["fixed_id"]["matches"][0]
assert json.dumps(first_match, sort_keys=True) == \
json.dumps(VARIANT_1.as_augmented_chord_representation(), sort_keys=True)
assert json.dumps(dict((k, v) for k, v in first_match.items() if not k.startswith("_")),
sort_keys=True) == json.dumps(VARIANT_1.as_chord_representation(), sort_keys=True)

# Test private table search

Expand All @@ -194,8 +197,8 @@ def test_chord_variant_search(app, client, table_manager):
assert "results" in data

assert len(data["results"]) == 3
assert json.dumps(data["results"][0], sort_keys=True) == json.dumps(VARIANT_1.as_chord_representation(),
sort_keys=True)
assert json.dumps(data["results"][0], sort_keys=True) == \
json.dumps(VARIANT_1.as_augmented_chord_representation(), sort_keys=True)

for q, r in TEST_PRIVATE_QUERIES:
qj = {"query": q}
Expand Down
1 change: 1 addition & 0 deletions tests/test_vcf_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
def test_vcf_file():
file = VCFFile(VCF_ONE_VAR_FILE_URI)
assert file.path == os.path.realpath(VCF_ONE_VAR_FILE_PATH)
assert file.original_uri == VCF_ONE_VAR_FILE_URI
assert file.index_path is None
assert file.assembly_id == "GRCh37"
assert len(file.sample_ids) == 835
Expand Down

0 comments on commit b60477f

Please sign in to comment.