Skip to content

Commit

Permalink
storage: Accept non-normalized variants when filtering by ID or XREF.…
Browse files Browse the repository at this point in the history
… #TASK-5877
  • Loading branch information
j-coll committed Mar 19, 2024
1 parent 85ff3e4 commit a37f1b4
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.ClinicalSignificance;
import org.opencb.biodata.models.variant.avro.VariantType;
import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField;
import org.opencb.biodata.models.variant.metadata.VariantFileHeaderComplexLine;
import org.opencb.biodata.tools.variant.VariantNormalizer;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.QueryParam;
Expand Down Expand Up @@ -858,8 +860,17 @@ public static ParsedVariantQuery.VariantQueryXref parseXrefs(Query query) {
}
}
}
}

if (!xrefs.getVariants().isEmpty()) {
List<Variant> normalizedVariants = normalizeVariants(xrefs.getVariants());
for (Variant normalizedVariant : normalizedVariants) {
if (!xrefs.getVariants().contains(normalizedVariant)) {
xrefs.getVariants().add(normalizedVariant);
}
}
}

if (isValidParam(query, ANNOT_GENE_ROLE_IN_CANER_GENES)) {
List<String> thisGenes = query.getAsStringList(ANNOT_GENE_ROLE_IN_CANER_GENES.key());
if (thisGenes.size() != 1 || !thisGenes.get(0).equals(NONE)) {
Expand All @@ -886,6 +897,21 @@ public static ParsedVariantQuery.VariantQueryXref parseXrefs(Query query) {
return xrefs;
}

public static Variant normalizeVariant(Variant variant) {
return normalizeVariants(Collections.singletonList(variant)).get(0);
}

public static List<Variant> normalizeVariants(List<Variant> variants) {
VariantNormalizer variantNormalizer = new VariantNormalizer();
List<Variant> normalizedVariants;
try {
normalizedVariants = variantNormalizer.normalize(variants, false);
} catch (NonStandardCompliantSampleField e) {
throw VariantQueryException.internalException(e);
}
return normalizedVariants;
}

public static ParsedQuery<KeyOpValue<String, Float>> parseFreqFilter(Query query, QueryParam queryParam) {
return VariantQueryUtils.splitValue(query, queryParam)
.map(VariantQueryUtils::parseKeyOpValue)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,14 @@ public static Variant toVariant(String value) {
return variant;
}

public static Variant toVariant(String variantStr, boolean normalize) {
Variant variant = toVariant(variantStr);
if (normalize && variant != null) {
return VariantQueryParser.normalizeVariant(variant);
}
return variant;
}

public static String[] splitStudyResource(String value) {
int idx = value.lastIndexOf(STUDY_RESOURCE_SEPARATOR);
if (idx <= 0 || idx == value.length() - 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,16 @@ public void testSampleData() throws Exception {
assertEquals(0, variant.getStudies().get(0).getFiles().size());
}

@Test
public void testSampleDataUnnormalized() throws Exception {
// Check unnormalized queries
Variant variant = variantStorageEngine.getSampleData("1:10352:T:TA", study1, new QueryOptions()).first();
assertEquals("1:10353:-:A", variant.toString());
System.out.println("variant = " + variant.toJson());
assertNotNull(variant.getStudies().get(0).getStats(DEFAULT_COHORT));
assertEquals(4, variant.getStudies().get(0).getSamples().size());
assertEquals(4, variant.getStudies().get(0).getFiles().size());
}

@Test
public void testCount() throws StorageEngineException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.VariantFileMetadata;
import org.opencb.biodata.models.variant.avro.*;
import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField;
import org.opencb.biodata.models.variant.stats.VariantStats;
import org.opencb.biodata.tools.variant.VariantNormalizer;
import org.opencb.commons.datastore.core.DataResult;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
Expand Down Expand Up @@ -531,11 +529,14 @@ public long filterPopulation(DataResult<Variant> queryResult, Predicate<Variant>
public void testGetAllVariants_variantId() {
int i = 0;
List<Variant> variants = new ArrayList<>();
Map<String, String> normalizedVariants = new HashMap<>();
for (Variant variant : allVariants.getResults()) {
if (i++ % 10 == 0) {
if (!variant.isSymbolic()) {
variants.add(variant);
}
if ((i++ % 10) == 0) {
variants.add(variant);
}
OriginalCall call = variant.getStudies().get(0).getFiles().get(0).getCall();
if (call != null) {
normalizedVariants.put(variant.toString(), call.getVariantId());
}
}
List<Variant> result = query(new Query(ID.key(), variants), new QueryOptions()).getResults();
Expand All @@ -554,6 +555,18 @@ public void testGetAllVariants_variantId() {
}
}
assertEquals(expectedList, actualList);

normalizedVariants.forEach((key, value) -> {
System.out.println(key + " = " + value);
});
List<Variant> resultNormalized = query(new Query(ID.key(), normalizedVariants.values()).append(INCLUDE_FILE.key(), ALL), new QueryOptions()).getResults();
assertEquals(normalizedVariants.size(), resultNormalized.size());
assertTrue(!resultNormalized.isEmpty());
for (Variant variant : resultNormalized) {
String expected = normalizedVariants.get(variant.toString());
String actual = variant.getStudies().get(0).getFiles().get(0).getCall().getVariantId();
assertEquals(expected, actual);
}
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
import org.opencb.opencga.storage.hadoop.variant.GenomeHelper;
import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHadoopDBAdaptor;
import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.PhoenixHelper;
import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantPhoenixSchema;
import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantPhoenixKeyFactory;
import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantPhoenixSchema;
import org.opencb.opencga.storage.hadoop.variant.converters.HBaseVariantConverterConfiguration;
import org.opencb.opencga.storage.hadoop.variant.converters.VariantRow;
import org.opencb.opencga.storage.hadoop.variant.converters.annotation.HBaseToVariantAnnotationConverter;
Expand Down Expand Up @@ -64,7 +64,7 @@ protected DataResult<Variant> getSampleData(String variantStr, String study, Que

final Variant variant;
if (VariantQueryUtils.isVariantId(variantStr)) {
variant = new Variant(variantStr);
variant = VariantQueryUtils.toVariant(variantStr, true);
} else {
variant = cellBaseUtils.getVariant(variantStr);
}
Expand Down

0 comments on commit a37f1b4

Please sign in to comment.