Skip to content

Commit

Permalink
Merge pull request #2418 from opencb/TASK-5877
Browse files Browse the repository at this point in the history
TASK-5877 - Accept non-normalized variant ID filtering by variant
  • Loading branch information
j-coll authored Apr 24, 2024
2 parents e7983a1 + 92492ec commit 33dd860
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1200,7 +1200,7 @@ public DataResult<Variant> getSampleData(String variantStr, String study, QueryO
public Variant getVariant(String variantStr) {
final Variant variant;
if (VariantQueryUtils.isVariantId(variantStr)) {
variant = new Variant(variantStr);
variant = VariantQueryUtils.toVariant(variantStr, true);
} else if (VariantQueryUtils.isVariantAccession(variantStr)) {
VariantQueryResult<Variant> result = get(new Query(VariantQueryParam.ANNOT_XREF.key(), variantStr),
new QueryOptions(QueryOptions.INCLUDE, VariantField.ID).append(QueryOptions.LIMIT, 1).append(QueryOptions.COUNT, true));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.ClinicalSignificance;
import org.opencb.biodata.models.variant.avro.VariantType;
import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField;
import org.opencb.biodata.models.variant.metadata.VariantFileHeaderComplexLine;
import org.opencb.biodata.tools.variant.VariantNormalizer;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.QueryParam;
Expand Down Expand Up @@ -862,8 +864,17 @@ public static ParsedVariantQuery.VariantQueryXref parseXrefs(Query query) {
}
}
}
}

if (!xrefs.getVariants().isEmpty()) {
List<Variant> normalizedVariants = normalizeVariants(xrefs.getVariants());
for (Variant normalizedVariant : normalizedVariants) {
if (!xrefs.getVariants().contains(normalizedVariant)) {
xrefs.getVariants().add(normalizedVariant);
}
}
}

if (isValidParam(query, ANNOT_GENE_ROLE_IN_CANER_GENES)) {
List<String> thisGenes = query.getAsStringList(ANNOT_GENE_ROLE_IN_CANER_GENES.key());
if (thisGenes.size() != 1 || !thisGenes.get(0).equals(NONE)) {
Expand All @@ -890,6 +901,21 @@ public static ParsedVariantQuery.VariantQueryXref parseXrefs(Query query) {
return xrefs;
}

public static Variant normalizeVariant(Variant variant) {
return normalizeVariants(Collections.singletonList(variant)).get(0);
}

public static List<Variant> normalizeVariants(List<Variant> variants) {
VariantNormalizer variantNormalizer = new VariantNormalizer();
List<Variant> normalizedVariants;
try {
normalizedVariants = variantNormalizer.normalize(variants, false);
} catch (NonStandardCompliantSampleField e) {
throw VariantQueryException.internalException(e);
}
return normalizedVariants;
}

public static ParsedQuery<KeyOpValue<String, Float>> parseFreqFilter(Query query, QueryParam queryParam) {
return VariantQueryUtils.splitValue(query, queryParam)
.map(VariantQueryUtils::parseKeyOpValue)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,14 @@ public static Variant toVariant(String value) {
return variant;
}

public static Variant toVariant(String variantStr, boolean normalize) {
Variant variant = toVariant(variantStr);
if (normalize && variant != null) {
return VariantQueryParser.normalizeVariant(variant);
}
return variant;
}

public static String[] splitStudyResource(String value) {
int idx = value.lastIndexOf(STUDY_RESOURCE_SEPARATOR);
if (idx <= 0 || idx == value.length() - 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,16 @@ public void testSampleData() throws Exception {
assertEquals(0, variant.getStudies().get(0).getFiles().size());
}

@Test
public void testSampleDataUnnormalized() throws Exception {
// Check unnormalized queries
Variant variant = variantStorageEngine.getSampleData("1:10352:T:TA", study1, new QueryOptions()).first();
assertEquals("1:10353:-:A", variant.toString());
System.out.println("variant = " + variant.toJson());
assertNotNull(variant.getStudies().get(0).getStats(DEFAULT_COHORT));
assertEquals(4, variant.getStudies().get(0).getSamples().size());
assertEquals(4, variant.getStudies().get(0).getFiles().size());
}

@Test
public void testCount() throws StorageEngineException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -529,11 +529,14 @@ public long filterPopulation(DataResult<Variant> queryResult, Predicate<Variant>
public void testGetAllVariants_variantId() {
int i = 0;
List<Variant> variants = new ArrayList<>();
Map<String, String> normalizedVariants = new HashMap<>();
for (Variant variant : allVariants.getResults()) {
if (i++ % 10 == 0) {
if (!variant.isSymbolic()) {
variants.add(variant);
}
if ((i++ % 10) == 0) {
variants.add(variant);
}
OriginalCall call = variant.getStudies().get(0).getFiles().get(0).getCall();
if (call != null) {
normalizedVariants.put(variant.toString(), call.getVariantId());
}
}
List<Variant> result = query(new Query(ID.key(), variants), new QueryOptions()).getResults();
Expand All @@ -552,6 +555,18 @@ public void testGetAllVariants_variantId() {
}
}
assertEquals(expectedList, actualList);

normalizedVariants.forEach((key, value) -> {
System.out.println(key + " = " + value);
});
List<Variant> resultNormalized = query(new Query(ID.key(), normalizedVariants.values()).append(INCLUDE_FILE.key(), ALL), new QueryOptions()).getResults();
assertEquals(normalizedVariants.size(), resultNormalized.size());
assertTrue(!resultNormalized.isEmpty());
for (Variant variant : resultNormalized) {
String expected = normalizedVariants.get(variant.toString());
String actual = variant.getStudies().get(0).getFiles().get(0).getCall().getVariantId();
assertEquals(expected, actual);
}
}

@Test
Expand Down

0 comments on commit 33dd860

Please sign in to comment.