diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/ClinicalInterpretationManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/ClinicalInterpretationManager.java index 3ca18fe0eb9..f240d877fc5 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/ClinicalInterpretationManager.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/ClinicalInterpretationManager.java @@ -61,7 +61,7 @@ import org.opencb.opencga.core.models.study.Study; import org.opencb.opencga.core.models.user.User; import org.opencb.opencga.core.response.OpenCGAResult; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.StorageEngineFactory; import org.opencb.opencga.storage.core.clinical.ClinicalVariantEngine; import org.opencb.opencga.storage.core.clinical.ClinicalVariantException; diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/exomiser/ExomiserInterpretationAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/exomiser/ExomiserInterpretationAnalysis.java index 5611d1c0d4f..55b291f156e 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/exomiser/ExomiserInterpretationAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/exomiser/ExomiserInterpretationAnalysis.java @@ -44,7 +44,7 @@ import org.opencb.opencga.core.models.common.Enums; import org.opencb.opencga.core.models.individual.Individual; import org.opencb.opencga.core.response.OpenCGAResult; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.core.tools.annotations.Tool; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/tiering/CancerTieringInterpretationAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/tiering/CancerTieringInterpretationAnalysisExecutor.java index ce89757143a..ea8dc7c3c89 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/tiering/CancerTieringInterpretationAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/tiering/CancerTieringInterpretationAnalysisExecutor.java @@ -41,7 +41,7 @@ import org.opencb.opencga.core.models.individual.Individual; import org.opencb.opencga.core.models.sample.Sample; import org.opencb.opencga.core.response.OpenCGAResult; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.core.tools.OpenCgaToolExecutor; import org.opencb.opencga.core.tools.annotations.ToolExecutor; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/zetta/ZettaInterpretationAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/zetta/ZettaInterpretationAnalysisExecutor.java index c45f0b484a9..cbf7e8211d2 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/zetta/ZettaInterpretationAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/zetta/ZettaInterpretationAnalysisExecutor.java @@ -28,7 +28,7 @@ import org.opencb.opencga.analysis.clinical.ClinicalUtils; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.core.exceptions.ToolException; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.core.tools.OpenCgaToolExecutor; import org.opencb.opencga.core.tools.annotations.ToolExecutor; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; @@ -40,7 +40,6 @@ import java.util.Map; import static org.opencb.opencga.analysis.clinical.InterpretationAnalysis.PRIMARY_FINDINGS_FILENAME; -import static org.opencb.opencga.analysis.clinical.InterpretationAnalysis.SECONDARY_FINDINGS_FILENAME; import static org.opencb.opencga.analysis.variant.manager.VariantCatalogQueryUtils.FAMILY_SEGREGATION; @ToolExecutor(id = "opencga-local", diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java index 33de681f215..944bbc9b980 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java @@ -69,7 +69,7 @@ import org.opencb.opencga.core.models.study.Study; import org.opencb.opencga.core.models.study.StudyPermissions; import org.opencb.opencga.core.response.OpenCGAResult; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.core.tools.ToolParams; import org.opencb.opencga.storage.core.StorageEngineFactory; import org.opencb.opencga.storage.core.StoragePipelineResult; @@ -646,6 +646,7 @@ public VariantQueryResult get(Query inputQuery, QueryOptions queryOptio @SuppressWarnings("unchecked") public VariantQueryResult get(Query query, QueryOptions queryOptions, String token, Class clazz) throws CatalogException, IOException, StorageEngineException { + VariantQueryResult result = get(query, queryOptions, token); List variants; if (clazz == Variant.class) { @@ -659,16 +660,7 @@ public VariantQueryResult get(Query query, QueryOptions queryOptions, Str } else { throw new IllegalArgumentException("Unknown variant format " + clazz); } - return new VariantQueryResult<>( - result.getTime(), - result.getNumResults(), - result.getNumMatches(), - result.getEvents(), - variants, - result.getSamples(), - result.getSource(), - result.getApproximateCount(), - result.getApproximateCountSamplingSize(), null); + return new VariantQueryResult<>(result, variants); } @@ -899,7 +891,7 @@ public DataResult getSampleData(String variant, String study, QueryOpti VariantQueryResult result = new VariantQueryResult<>( ((int) stopWatch.getTime(TimeUnit.MILLISECONDS)), - 1, 1, new ArrayList<>(), Collections.singletonList(variantResult), null, null) + 1, 1, new ArrayList<>(), Collections.singletonList(variantResult), engine.getStorageEngineId()) .setNumSamples(sampleEntries.size()); if (exactNumSamples) { result.setApproximateCount(false); diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java index 042784b844a..5be8325e066 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java @@ -43,7 +43,7 @@ import org.opencb.opencga.core.models.sample.Sample; import org.opencb.opencga.core.models.variant.MutationalSignatureAnalysisParams; import org.opencb.opencga.core.response.OpenCGAResult; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.core.tools.annotations.ToolExecutor; import org.opencb.opencga.core.tools.variant.MutationalSignatureAnalysisExecutor; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/io/VcfOutputWriter.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/io/VcfOutputWriter.java index 6093c975a32..0c398dd2ef6 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/io/VcfOutputWriter.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/io/VcfOutputWriter.java @@ -9,7 +9,7 @@ import org.opencb.biodata.models.variant.metadata.VariantStudyMetadata; import org.opencb.biodata.models.variant.protobuf.VariantProto; import org.opencb.opencga.core.response.RestResponse; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.io.VcfDataWriter; import java.io.PrintStream; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java index cf4477bb4f1..65812bd7017 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java @@ -1135,6 +1135,11 @@ public Iterable getInvalidCohorts(int studyId) { return () -> Iterators.filter(cohortIterator(studyId), CohortMetadata::isInvalid); } + public Iterable getCalculatedOrInvalidCohorts(int studyId) { + return () -> Iterators.filter(cohortIterator(studyId), + cohortMetadata -> cohortMetadata.isStatsReady() || cohortMetadata.isInvalid()); + } + public CohortMetadata setSamplesToCohort(int studyId, String cohortName, Collection samples) throws StorageEngineException { return updateCohortSamples(studyId, cohortName, samples, false); } @@ -1244,6 +1249,7 @@ private CohortMetadata updateCohortSamples(int studyId, String cohortName, Colle if (!oldSamples.equals(sampleIdsList) || !oldFiles.equals(fileIds)) { // Cohort has been modified! Invalidate stats cohort.setStatsStatus(TaskMetadata.Status.ERROR); + cohort.getAttributes().put(CohortMetadata.INVALID_STATS_NUM_SAMPLES, oldSamples.size()); } } } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/CohortMetadata.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/CohortMetadata.java index 4814fc82b63..a4e9de4116e 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/CohortMetadata.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/CohortMetadata.java @@ -8,6 +8,7 @@ * @author Jacobo Coll <jacobo167@gmail.com> */ public class CohortMetadata extends StudyResourceMetadata { + public static final String INVALID_STATS_NUM_SAMPLES = "invalidStatsNumSamples"; // private int studyId; // private int id; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java index 0ce77635eab..c295d086381 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java @@ -31,7 +31,7 @@ import org.opencb.opencga.core.config.storage.StorageConfiguration; import org.opencb.opencga.core.models.operations.variant.VariantAggregateFamilyParams; import org.opencb.opencga.core.models.operations.variant.VariantAggregateParams; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.StorageEngine; import org.opencb.opencga.storage.core.StoragePipelineResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; @@ -1233,8 +1233,8 @@ public VariantQueryResult get(Query query, QueryOptions options) { } addDefaultLimit(options, getOptions()); addDefaultSampleLimit(query, getOptions()); - query = preProcessQuery(query, options); - return getVariantQueryExecutor(query, options).get(query, options); + ParsedVariantQuery variantQuery = parseQuery(query, options); + return getVariantQueryExecutor(variantQuery).get(variantQuery); } @Override @@ -1251,8 +1251,8 @@ public MultiVariantDBIterator iterator(Iterator variants, Query query, QueryO public VariantDBIterator iterator(Query query, QueryOptions options) { query = VariantQueryUtils.copy(query); options = VariantQueryUtils.copy(options); - query = preProcessQuery(query, options); - return getVariantQueryExecutor(query, options).iterator(query, options); + ParsedVariantQuery variantQuery = parseQuery(query, options); + return getVariantQueryExecutor(variantQuery).iterator(variantQuery); } public final List getVariantQueryExecutors() throws StorageEngineException { @@ -1274,7 +1274,7 @@ protected List initVariantQueryExecutors() throws StorageE executors.add(new CompoundHeterozygousQueryExecutor( getMetadataManager(), getStorageEngineId(), getOptions(), this)); executors.add(new BreakendVariantQueryExecutor( - getMetadataManager(), getStorageEngineId(), getOptions(), new DBAdaptorVariantQueryExecutor( + getStorageEngineId(), getOptions(), new DBAdaptorVariantQueryExecutor( getDBAdaptor(), getStorageEngineId(), getOptions()), getDBAdaptor())); executors.add(new SamplesSearchIndexVariantQueryExecutor( getDBAdaptor(), getVariantSearchManager(), getStorageEngineId(), dbName, configuration, getOptions())); @@ -1293,12 +1293,22 @@ protected List initVariantQueryExecutors() throws StorageE * @return VariantQueryExecutor to use */ public VariantQueryExecutor getVariantQueryExecutor(Query query, QueryOptions options) { + return getVariantQueryExecutor(parseQuery(query, options)); + } + + /** + * Determine which {@link VariantQueryExecutor} should be used to execute the given query. + * + * @param variantQuery Parsed variant query + * @return VariantQueryExecutor to use + */ + public VariantQueryExecutor getVariantQueryExecutor(ParsedVariantQuery variantQuery) { try { for (VariantQueryExecutor executor : getVariantQueryExecutors()) { - if (executor.canUseThisExecutor(query, options)) { + if (executor.canUseThisExecutor(variantQuery.getQuery(), variantQuery.getInputOptions())) { logger.info("Using VariantQueryExecutor : " + executor.getClass().getName()); - logger.info(" Query : " + VariantQueryUtils.printQuery(query)); - logger.info(" Options : " + options.toJson()); + logger.info(" Query : " + VariantQueryUtils.printQuery(variantQuery.getInputQuery())); + logger.info(" Options : " + variantQuery.getInputOptions().toJson()); return executor; } } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptor.java index c2ee42c9b00..e54efca912a 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptor.java @@ -23,12 +23,12 @@ import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjectionParser; import org.opencb.opencga.storage.core.variant.stats.VariantStatsWrapper; @@ -37,8 +37,6 @@ import java.util.List; import java.util.Map; -import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.addSamplesMetadataIfRequested; - /** * @author Ignacio Medina * @author Jacobo Coll @@ -46,21 +44,6 @@ */ public interface VariantDBAdaptor extends VariantIterable, AutoCloseable { - /** - * This method inserts Variants into the given Study. If the Study already exists then it just adds the new Sample - * genotypes, also new variants are inserted. If it is a new Study then Sample genotypes are added to the new Study. - * - * @param variants List of variants in OpenCB data model to be inserted - * @param studyName Name or alias of the study - * @param options Query modifiers, accepted values are: include, exclude, limit, skip, sort and count - * @return A DataResult with the number of inserted variants - */ - @Deprecated - default DataResult insert(List variants, String studyName, QueryOptions options) { - throw new UnsupportedOperationException(); - } - - /** /** * Fetch all variants resulting of executing the query in the database. Returned fields are taken from * the 'include' and 'exclude' fields at options. @@ -71,26 +54,29 @@ default DataResult insert(List variants, String studyName, QueryOptions * @return A DataResult with the result of the query */ default VariantQueryResult get(Iterator variants, Query query, QueryOptions options) { - DataResult queryResult = iterator(variants, query, options).toDataResult(); - return addSamplesMetadataIfRequested(queryResult, query, options, getMetadataManager()); + ParsedVariantQuery variantQuery = new VariantQueryParser(null, getMetadataManager()).parseQuery(query, options, true); + try (VariantDBIterator iterator = iterator(variants, query, options)) { + return iterator.toDataResult(variantQuery); + } catch (Exception e) { + throw VariantQueryException.internalException(e); + } } @Deprecated default VariantDBIterator iterator(Query query, QueryOptions options) { - return iterator(new VariantQueryParser(null, getMetadataManager()).parseQuery(query, options, true), options); + return iterator(new VariantQueryParser(null, getMetadataManager()).parseQuery(query, options, true)); } - VariantDBIterator iterator(ParsedVariantQuery query, QueryOptions options); + VariantDBIterator iterator(ParsedVariantQuery query); /** * Fetch all variants resulting of executing the query in the database. Returned fields are taken from * the 'include' and 'exclude' fields at options. * * @param query Query to be executed in the database to filter variants - * @param options Query modifiers, accepted values are: include, exclude, limit, skip, sort and count * @return A DataResult with the result of the query */ - VariantQueryResult get(ParsedVariantQuery query, QueryOptions options); + VariantQueryResult get(ParsedVariantQuery query); /** * Fetch all variants resulting of executing the query in the database. Returned fields are taken from @@ -102,7 +88,7 @@ default VariantDBIterator iterator(Query query, QueryOptions options) { */ @Deprecated default VariantQueryResult get(Query query, QueryOptions options) { - return get(new VariantQueryParser(null, getMetadataManager()).parseQuery(query, options, true), options); + return get(new VariantQueryParser(null, getMetadataManager()).parseQuery(query, options, true)); } /** diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQuery.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQuery.java index 9c04ad9c9bf..09b1867854e 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQuery.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQuery.java @@ -50,6 +50,10 @@ public VariantQuery region(String value) { put(VariantQueryParam.REGION.key(), value); return this; } + public VariantQuery region(String... value) { + put(VariantQueryParam.REGION.key(), Arrays.asList(value)); + return this; + } public VariantQuery region(Region... value) { put(VariantQueryParam.REGION.key(), Arrays.asList(value)); return this; @@ -222,20 +226,24 @@ public boolean includeGenotype() { return getBoolean(VariantQueryParam.INCLUDE_GENOTYPE.key()); } - public VariantQuery sampleLimit(String value) { + public VariantQuery sampleLimit(int value) { put(VariantQueryParam.SAMPLE_LIMIT.key(), value); return this; } - public String sampleLimit() { - return getString(VariantQueryParam.SAMPLE_LIMIT.key()); + public int sampleLimit() { + return getInt(VariantQueryParam.SAMPLE_LIMIT.key()); } public VariantQuery sampleSkip(String value) { put(VariantQueryParam.SAMPLE_SKIP.key(), value); return this; } - public String sampleSkip() { - return getString(VariantQueryParam.SAMPLE_SKIP.key()); + public VariantQuery sampleSkip(int value) { + put(VariantQueryParam.SAMPLE_SKIP.key(), value); + return this; + } + public int sampleSkip() { + return getInt(VariantQueryParam.SAMPLE_SKIP.key()); } public VariantQuery file(String value) { @@ -633,12 +641,12 @@ public VariantQuery release(String value) { put(VariantQueryParam.RELEASE.key(), value); return this; } - public VariantQuery release(int value) { + public VariantQuery release(Integer value) { put(VariantQueryParam.RELEASE.key(), value); return this; } - public String release() { - return getString(VariantQueryParam.RELEASE.key()); + public Integer release() { + return getInt(VariantQueryParam.RELEASE.key()); } @Override diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/iterators/LimitVariantDBIterator.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/iterators/LimitVariantDBIterator.java index 3f69fe91924..07ece2a578d 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/iterators/LimitVariantDBIterator.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/iterators/LimitVariantDBIterator.java @@ -1,16 +1,32 @@ package org.opencb.opencga.storage.core.variant.adaptors.iterators; +import org.opencb.biodata.models.variant.Variant; + +import java.util.NoSuchElementException; + public class LimitVariantDBIterator extends DelegatedVariantDBIterator { private final int limit; + private int count; LimitVariantDBIterator(VariantDBIterator delegated, int limit) { super(delegated); this.limit = limit; + this.count = 0; } @Override public boolean hasNext() { - return getCount() < limit && super.hasNext(); + return count < limit && super.hasNext(); + } + + @Override + public Variant next() { + if (!this.hasNext()) { + throw new NoSuchElementException(); + } else { + ++this.count; + return super.next(); + } } } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/iterators/VariantDBIterator.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/iterators/VariantDBIterator.java index eae841fc766..f070c4c7bb1 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/iterators/VariantDBIterator.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/iterators/VariantDBIterator.java @@ -16,14 +16,14 @@ package org.opencb.opencga.storage.core.variant.adaptors.iterators; -import com.google.common.collect.Iterators; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.VariantAvro; import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; import org.opencb.opencga.storage.core.utils.iterators.CloseableIterator; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -131,21 +131,34 @@ public final void forEachRemaining(Consumer action) { } } - public final DataResult toDataResult() { + + // TODO: The VariantDBIterator should be able to return the samples in the result + // This class should contain a ParsedVariantQuery +// public final VariantQueryResult toVariantQueryResult() { +// } + + public final VariantQueryResult toDataResult(ParsedVariantQuery variantQuery) { List result = new ArrayList<>(); this.forEachRemaining(result::add); int numResults = result.size(); int numTotalResults = -1; // Unknown numTotalResults - return new DataResult<>((int) getTimeFetching(TimeUnit.MILLISECONDS), Collections.emptyList(), numResults, result, numTotalResults); + DataResult dataResult = new DataResult<>((int) + getTimeFetching(TimeUnit.MILLISECONDS), + Collections.emptyList(), + numResults, + result, + numTotalResults); + return new VariantQueryResult<>(dataResult, variantQuery); } - public final VariantQueryResult toDataResult(Map> samples) { - return new VariantQueryResult<>(toDataResult(), samples); + public final List toList() { + List result = new ArrayList<>(); + this.forEachRemaining(result::add); + return result; } - protected interface TimeFunction { R call() throws E; } @@ -192,7 +205,11 @@ public VariantDBIterator filter(Predicate filter) { } public VariantDBIterator localSkip(int skip) { - Iterators.advance(this, skip); + int i = 0; + while (i < skip && hasNext()) { + next(); + i++; + } return this; } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/ParsedVariantQuery.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/ParsedVariantQuery.java index 5c90df943fc..8468ab34317 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/ParsedVariantQuery.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/ParsedVariantQuery.java @@ -1,53 +1,76 @@ package org.opencb.opencga.storage.core.variant.query; +import org.opencb.biodata.models.core.Region; import org.opencb.biodata.models.variant.Variant; +import org.opencb.commons.datastore.core.Event; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; import org.opencb.opencga.storage.core.metadata.models.StudyResourceMetadata; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjection; import java.util.*; import java.util.stream.Collectors; +import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.*; import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.ID_INTERSECT; public class ParsedVariantQuery { private Query inputQuery; private QueryOptions inputOptions; - private Query query; + private VariantQuery query; private boolean optimized = false; + private List events = new ArrayList<>(); + private VariantQueryProjection projection; private final VariantStudyQuery studyQuery; -// private VariantAnnotationQuery annotationQuery; + private Integer limit; + private int skip; + private boolean count; + private int approximateCountSamplingSize; + private List geneRegions; + private List regions; + private List> clinicalCombination; + private List clinicalCombinationList; + // private VariantAnnotationQuery annotationQuery; public ParsedVariantQuery() { this.inputQuery = new Query(); this.inputOptions = new QueryOptions(); - this.query = new Query(); + this.query = new VariantQuery(); studyQuery = new VariantStudyQuery(); } public ParsedVariantQuery(Query inputQuery, QueryOptions inputOptions) { this.inputQuery = inputQuery; this.inputOptions = inputOptions; - this.query = inputQuery; + this.query = new VariantQuery(inputQuery); studyQuery = new VariantStudyQuery(); } public ParsedVariantQuery(ParsedVariantQuery other) { this.inputQuery = new Query(other.inputQuery); this.inputOptions = new QueryOptions(other.inputOptions); - this.query = new Query(other.query); + this.query = new VariantQuery(other.query); this.projection = other.projection; - this.studyQuery = other.studyQuery; + this.studyQuery = new VariantStudyQuery(other.getStudyQuery()); this.optimized = other.optimized; + this.limit = other.limit; + this.skip = other.skip; + this.count = other.count; + this.approximateCountSamplingSize = other.approximateCountSamplingSize; + this.geneRegions = new ArrayList<>(other.geneRegions); + this.regions = new ArrayList<>(other.regions); + this.clinicalCombination = new ArrayList<>(other.clinicalCombination); + this.clinicalCombinationList = new ArrayList<>(other.clinicalCombinationList); } public Query getInputQuery() { @@ -59,11 +82,11 @@ public ParsedVariantQuery setInputQuery(Query inputQuery) { return this; } - public Query getQuery() { + public VariantQuery getQuery() { return query; } - public ParsedVariantQuery setQuery(Query query) { + public ParsedVariantQuery setQuery(VariantQuery query) { this.query = query; return this; } @@ -77,6 +100,15 @@ public ParsedVariantQuery setOptimized(boolean optimized) { return this; } + public List getEvents() { + return events; + } + + public ParsedVariantQuery setEvents(List events) { + this.events = events; + return this; + } + public VariantQueryProjection getProjection() { return projection; } @@ -107,6 +139,24 @@ public VariantQueryXref getXrefs() { return VariantQueryParser.parseXrefs(query); } + public List getRegions() { + return regions; + } + + public ParsedVariantQuery setRegions(List regions) { + this.regions = regions; + return this; + } + + public List getGeneRegions() { + return geneRegions; + } + + public ParsedVariantQuery setGeneRegions(List geneRegions) { + this.geneRegions = geneRegions; + return this; + } + public List getConsequenceTypes() { return VariantQueryUtils.parseConsequenceTypes(query.getAsStringList(VariantQueryParam.ANNOT_CONSEQUENCE_TYPE.key())); } @@ -119,6 +169,76 @@ public List getTranscriptFlags() { return query.getAsStringList(VariantQueryParam.ANNOT_TRANSCRIPT_FLAG.key()); } + public Integer getLimit() { + return limit; + } + + public int getLimitOr(int defaultValue) { + return limit == null ? defaultValue : limit; + } + + public ParsedVariantQuery setLimit(Integer limit) { + this.limit = limit; + return this; + } + + public int getSkip() { + return skip; + } + + public ParsedVariantQuery setSkip(int skip) { + this.skip = skip; + return this; + } + + public boolean getCount() { + return count; + } + + public ParsedVariantQuery setCount(boolean count) { + this.count = count; + return this; + } + + public int getApproximateCountSamplingSize() { + return approximateCountSamplingSize; + } + + public ParsedVariantQuery setApproximateCountSamplingSize(int approximateCountSamplingSize) { + this.approximateCountSamplingSize = approximateCountSamplingSize; + return this; + } + + public ParsedQuery> getPopulationFrequencyAlt() { + return VariantQueryParser.parseFreqFilter(query, ANNOT_POPULATION_ALTERNATE_FREQUENCY); + } + + public ParsedQuery> getPopulationFrequencyRef() { + return VariantQueryParser.parseFreqFilter(query, ANNOT_POPULATION_REFERENCE_FREQUENCY); + } + + public ParsedQuery> getPopulationFrequencyMaf() { + return VariantQueryParser.parseFreqFilter(query, ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY); + } + + public List> getClinicalCombinations() { + return clinicalCombination; + } + + public ParsedVariantQuery setClinicalCombination(List> clinicalCombination) { + this.clinicalCombination = clinicalCombination; + return this; + } + + public List getClinicalCombinationsList() { + return clinicalCombinationList; + } + + public ParsedVariantQuery setClinicalCombinationList(List clinicalCombinationList) { + this.clinicalCombinationList = clinicalCombinationList; + return this; + } + public static class VariantStudyQuery { private ParsedQuery studies; private ParsedQuery>> genotypes; @@ -131,6 +251,13 @@ public static class VariantStudyQuery { public VariantStudyQuery() { } + public VariantStudyQuery(VariantStudyQuery studyQuery) { + this.studies = studyQuery.studies; + this.genotypes = studyQuery.genotypes; + this.sampleDataQuery = studyQuery.sampleDataQuery; + this.defaultStudy = studyQuery.defaultStudy; + } + public ParsedQuery getStudies() { return studies; } @@ -140,6 +267,14 @@ public VariantStudyQuery setStudies(ParsedQuery studies) { return this; } + public String getStudyOrFail() { + if (studies == null || studies.size() != 1) { + throw new VariantQueryException("Require exactly one study"); + } else { + return studies.get(0); + } + } + public ParsedQuery>> getGenotypes() { return genotypes; } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java index 6038330f9d4..a0f641fa987 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java @@ -21,7 +21,9 @@ import org.opencb.opencga.storage.core.metadata.models.TaskMetadata; import org.opencb.opencga.storage.core.metadata.models.VariantScoreMetadata; import org.opencb.opencga.storage.core.utils.CellBaseUtils; +import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjection; @@ -150,21 +152,39 @@ public ParsedVariantQuery parseQuery(Query query, QueryOptions options) { return parseQuery(query, options, false); } - public ParsedVariantQuery parseQuery(Query query, QueryOptions options, boolean skipPreProcess) { - if (query == null) { - query = new Query(); + public ParsedVariantQuery parseQuery(Query inputQuery, QueryOptions options, boolean skipPreProcess) { + if (inputQuery == null) { + inputQuery = new Query(); } if (options == null) { options = new QueryOptions(); } - ParsedVariantQuery variantQuery = new ParsedVariantQuery(new Query(query), new QueryOptions(options)); + ParsedVariantQuery variantQuery = new ParsedVariantQuery(new Query(inputQuery), new QueryOptions(options)); + int limit = options.getInt(QueryOptions.LIMIT, -1); + variantQuery.setLimit(limit == -1 ? null : limit); + variantQuery.setSkip(options.getInt(QueryOptions.SKIP, 0)); + variantQuery.setCount(options.getBoolean(QueryOptions.COUNT, false)); + variantQuery.setApproximateCountSamplingSize(options.getInt( + VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.key(), + VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.defaultValue())); + + variantQuery.setProjection(projectionParser.parseVariantQueryProjection(inputQuery, options)); + VariantQuery query; if (!skipPreProcess) { - query = preProcessQuery(query, options); + query = new VariantQuery(preProcessQuery(inputQuery, options, variantQuery.getProjection())); + } else { + query = new VariantQuery(inputQuery); } variantQuery.setQuery(query); - variantQuery.setProjection(projectionParser.parseVariantQueryProjection(query, options)); + + List geneRegions = Region.parseRegions(query.getString(ANNOT_GENE_REGIONS.key())); + variantQuery.setGeneRegions(geneRegions == null ? Collections.emptyList() : geneRegions); + List regions = Region.parseRegions(query.region(), true); + variantQuery.setRegions(regions == null ? Collections.emptyList() : regions); + variantQuery.setClinicalCombination(VariantQueryParser.parseClinicalCombination(query, false)); + variantQuery.setClinicalCombinationList(VariantQueryParser.parseClinicalCombinationsList(query, false)); ParsedVariantQuery.VariantStudyQuery studyQuery = variantQuery.getStudyQuery(); @@ -175,7 +195,7 @@ public ParsedVariantQuery parseQuery(Query query, QueryOptions options, boolean } if (isValidParam(query, GENOTYPE)) { HashMap> map = new HashMap<>(); - QueryOperation op = VariantQueryUtils.parseGenotypeFilter(query.getString(GENOTYPE.key()), map); + QueryOperation op = VariantQueryUtils.parseGenotypeFilter(query.genotype(), map); if (defaultStudy == null) { List studyNames = metadataManager.getStudyNames(); @@ -211,13 +231,17 @@ public ParsedVariantQuery parseQuery(Query query, QueryOptions options, boolean return variantQuery; } - public Query preProcessQuery(Query originalQuery, QueryOptions options) { + public final Query preProcessQuery(Query originalQuery, QueryOptions options) { + return preProcessQuery(originalQuery, options, null); + } + + protected Query preProcessQuery(Query originalQuery, QueryOptions options, VariantQueryProjection projection) { // Copy input query! Do not modify original query! Query query = VariantQueryUtils.copy(originalQuery); preProcessAnnotationParams(query); - preProcessStudyParams(query, options); + preProcessStudyParams(query, options, projection); if (options != null && options.getLong(QueryOptions.LIMIT) < 0) { throw VariantQueryException.malformedParam(QueryOptions.LIMIT, options.getString(QueryOptions.LIMIT), @@ -363,7 +387,7 @@ private VariantType parseVariantType(String type) { } } - protected void preProcessStudyParams(Query query, QueryOptions options) { + protected void preProcessStudyParams(Query query, QueryOptions options, VariantQueryProjection projection) { StudyMetadata defaultStudy = getDefaultStudy(query); QueryOperation formatOperator = null; if (isValidParam(query, SAMPLE_DATA)) { @@ -663,22 +687,23 @@ protected void preProcessStudyParams(Query query, QueryOptions options) { if (!isValidParam(query, INCLUDE_STUDY) || !isValidParam(query, INCLUDE_SAMPLE) || !isValidParam(query, INCLUDE_FILE) - || !isValidParam(query, SAMPLE_SKIP) - || !isValidParam(query, SAMPLE_LIMIT) + || isValidParam(query, SAMPLE_SKIP) + || isValidParam(query, SAMPLE_LIMIT) ) { - VariantQueryProjection selectVariantElements = - VariantQueryProjectionParser.parseVariantQueryFields(query, options, metadataManager); + if (projection == null) { + projection = projectionParser.parseVariantQueryProjection(query, options); + } // Apply the sample pagination. // Remove the sampleLimit and sampleSkip to avoid applying the pagination twice query.remove(SAMPLE_SKIP.key()); query.remove(SAMPLE_LIMIT.key()); - query.put(NUM_TOTAL_SAMPLES.key(), selectVariantElements.getNumTotalSamples()); - query.put(NUM_SAMPLES.key(), selectVariantElements.getNumSamples()); + query.put(NUM_TOTAL_SAMPLES.key(), projection.getNumTotalSamples()); + query.put(NUM_SAMPLES.key(), projection.getNumSamples()); if (!isValidParam(query, INCLUDE_STUDY)) { List includeStudy = new ArrayList<>(); - for (Integer studyId : selectVariantElements.getStudyIds()) { - includeStudy.add(selectVariantElements.getStudy(studyId).getStudyMetadata().getName()); + for (Integer studyId : projection.getStudyIds()) { + includeStudy.add(projection.getStudy(studyId).getStudyMetadata().getName()); } if (includeStudy.isEmpty()) { query.put(INCLUDE_STUDY.key(), NONE); @@ -686,22 +711,17 @@ protected void preProcessStudyParams(Query query, QueryOptions options) { query.put(INCLUDE_STUDY.key(), includeStudy); } } - if (!isValidParam(query, INCLUDE_SAMPLE) || selectVariantElements.getSamplePagination()) { - List includeSample = selectVariantElements.getSamples() - .entrySet() - .stream() - .flatMap(e -> e.getValue() - .stream() - .map(s -> metadataManager.getSampleName(e.getKey(), s))) - .collect(Collectors.toList()); + if (!isValidParam(query, INCLUDE_SAMPLE) || projection.getSamplePagination()) { + List includeSample = projection.getSampleNames().values() + .stream().flatMap(Collection::stream).collect(Collectors.toList()); if (includeSample.isEmpty()) { query.put(INCLUDE_SAMPLE.key(), NONE); } else { query.put(INCLUDE_SAMPLE.key(), includeSample); } } - if (!isValidParam(query, INCLUDE_FILE) || selectVariantElements.getSamplePagination()) { - List includeFile = selectVariantElements.getFiles() + if (!isValidParam(query, INCLUDE_FILE) || projection.getSamplePagination()) { + List includeFile = projection.getFiles() .entrySet() .stream() .flatMap(e -> e.getValue() diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/response/VariantQueryResult.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryResult.java similarity index 64% rename from opencga-core/src/main/java/org/opencb/opencga/core/response/VariantQueryResult.java rename to opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryResult.java index c955b3ca379..ee432deb3b7 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/response/VariantQueryResult.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryResult.java @@ -14,13 +14,17 @@ * limitations under the License. */ -package org.opencb.opencga.core.response; +package org.opencb.opencga.storage.core.variant.query; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.Event; import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.core.response.OpenCGAResult; +import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjection; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; @@ -45,25 +49,19 @@ public class VariantQueryResult extends OpenCGAResult { private static final String APPROXIMATE_COUNT = "approximateCount"; private static final String APPROXIMATE_COUNT_SAMPLING_SIZE = "approximateCountSamplingSize"; - public VariantQueryResult() { + protected VariantQueryResult() { } - public VariantQueryResult(long time, int numResults, long numMatches, List events, List result) { - this(time, numResults, numMatches, events, result, null, null, null, null, null); + public VariantQueryResult(long time, int numResults, long numMatches, List events, List result, String source, + ParsedVariantQuery variantQuery) { + this(time, numResults, numMatches, events, result, source, null, null, null, variantQuery); } public VariantQueryResult(long time, int numResults, long numMatches, List events, List result, - Map> samples, String source) { - this(time, numResults, numMatches, events, result, samples, source, null, null, null); - } - - public VariantQueryResult(long time, int numResults, long numMatches, List events, List result, - Map> samples, String source, Boolean approximateCount, - Integer approximateCountSamplingSize, Integer numTotalSamples) { + String source, Boolean approximateCount, Integer approximateCountSamplingSize, Integer numTotalSamples, + ParsedVariantQuery variantQuery) { super((int) time, events, numResults, result, numMatches); - if (samples != null) { - setSamples(samples); - } + if (source != null) { setSource(source); } @@ -73,15 +71,18 @@ public VariantQueryResult(long time, int numResults, long numMatches, List()); + } + if (variantQuery != null) { + addSamplesMetadataIfRequested(variantQuery); + } } - public VariantQueryResult(DataResult dataResult) { + private VariantQueryResult(DataResult dataResult, List results) { super(dataResult.getTime(), dataResult.getEvents(), dataResult.getNumMatches(), @@ -90,27 +91,66 @@ public VariantQueryResult(DataResult dataResult) { dataResult.getNumDeleted(), dataResult.getNumErrors(), dataResult.getAttributes()); - setResults(dataResult.getResults()); - setNumResults(dataResult.getNumResults()); + setResults(results); + setNumResults(results.size()); + if (getEvents() == null) { + setEvents(new ArrayList<>()); + } + } + + public VariantQueryResult(DataResult dataResult, ParsedVariantQuery variantQuery) { + this(dataResult, dataResult.getResults()); + if (variantQuery != null) { + addSamplesMetadataIfRequested(variantQuery); + } + } + + public VariantQueryResult(VariantQueryResult dataResult, List results) { + this((DataResult) dataResult, results); + } + + public VariantQueryResult(DataResult dataResult, String source, ParsedVariantQuery variantQuery) { + this(dataResult, variantQuery); + setSource(source); + } + + /* + * @deprecated Missing ParsedVariantQuery. + * Use {@link #VariantQueryResult(long, int, long, List, List, String, Boolean, Integer, Integer, ParsedVariantQuery)} + */ + @Deprecated + public VariantQueryResult(long time, int numResults, long numMatches, List events, List result, String source) { + this(time, numResults, numMatches, events, result, source, null, null, null, (ParsedVariantQuery) null); } - public VariantQueryResult(DataResult queryResult, Map> samples) { - this(queryResult, samples, null); + /* + * @deprecated Missing ParsedVariantQuery. + * Use {@link #VariantQueryResult(DataResult, ParsedVariantQuery)} + */ + @Deprecated + public VariantQueryResult(DataResult dataResult) { + this(dataResult, (ParsedVariantQuery) null); } - public VariantQueryResult(DataResult dataResult, Map> samples, String source) { - this(dataResult); - setSamples(samples); - if (getNumMatches() >= 0) { - setApproximateCount(false); + private void addSamplesMetadataIfRequested(ParsedVariantQuery query) { + VariantQueryProjection projection = query.getProjection(); + + // Ensure is modifiable + if (getEvents() == null || Collections.emptyList().getClass().equals(getEvents().getClass())) { + setEvents(new ArrayList<>()); } - if (samples != null) { - this.setNumSamples(samples.values().stream().mapToInt(List::size).sum()); - this.setNumTotalSamples(getNumSamples()); + if (!query.getEvents().isEmpty()) { + getEvents().addAll(query.getEvents()); } - if (source != null) { - this.setSource(source); + if (!projection.getEvents().isEmpty()) { + getEvents().addAll(projection.getEvents()); + } + + if (query.getQuery().sampleMetadata()) { + setSamples(query.getProjection().getSampleNames()); } + setNumSamples(projection.getNumSamples()); + setNumTotalSamples(projection.getNumTotalSamples()); } public Map> getSamples() { @@ -173,7 +213,9 @@ public VariantQueryResult setApproximateCount(Boolean approximateCount) { } public Integer getApproximateCountSamplingSize() { - return getAttributes().containsKey(APPROXIMATE_COUNT_SAMPLING_SIZE) ? getAttributes().getInt(APPROXIMATE_COUNT_SAMPLING_SIZE) : null; + return getAttributes().containsKey(APPROXIMATE_COUNT_SAMPLING_SIZE) + ? getAttributes().getInt(APPROXIMATE_COUNT_SAMPLING_SIZE) + : null; } public VariantQueryResult setApproximateCountSamplingSize(Integer approximateCountSamplingSize) { diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java index 5a97b349b95..a538092ef4f 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java @@ -28,12 +28,13 @@ import org.opencb.biodata.models.variant.VariantBuilder; import org.opencb.biodata.models.variant.annotation.ConsequenceTypeMappings; import org.opencb.biodata.models.variant.avro.*; -import org.opencb.commons.datastore.core.*; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.datastore.core.Query; +import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.commons.datastore.core.QueryParam; import org.opencb.commons.utils.ListUtils; import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.models.variant.VariantAnnotationConstants; -import org.opencb.opencga.core.response.VariantQueryResult; -import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.utils.CellBaseUtils; import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; @@ -617,37 +618,6 @@ public static boolean isOutputMultiStudy(Query query, QueryOptions options, Coll } } - public static VariantQueryResult addSamplesMetadataIfRequested(DataResult result, Query query, QueryOptions options, - VariantStorageMetadataManager variantStorageMetadataManager) { - return addSamplesMetadataIfRequested(new VariantQueryResult<>(result, null), query, options, variantStorageMetadataManager); - } - - public static VariantQueryResult addSamplesMetadataIfRequested(VariantQueryResult result, Query query, QueryOptions options, - VariantStorageMetadataManager variantStorageMetadataManager) { - if (query.getBoolean(SAMPLE_METADATA.key(), false)) { - int numTotalSamples = query.getInt(NUM_TOTAL_SAMPLES.key(), -1); - int numSamples = query.getInt(NUM_SAMPLES.key(), -1); - Map> samplesMetadata = VariantQueryProjectionParser - .getIncludeSampleNames(query, options, variantStorageMetadataManager); - if (numTotalSamples < 0 && numSamples < 0) { - numTotalSamples = samplesMetadata.values().stream().mapToInt(List::size).sum(); - VariantQueryProjectionParser.skipAndLimitSamples(query, samplesMetadata); - numSamples = samplesMetadata.values().stream().mapToInt(List::size).sum(); - } - return result.setNumSamples(numSamples) - .setNumTotalSamples(numTotalSamples) - .setSamples(samplesMetadata); - } else { - int numTotalSamples = query.getInt(NUM_TOTAL_SAMPLES.key(), -1); - int numSamples = query.getInt(NUM_SAMPLES.key(), -1); - if (numTotalSamples >= 0 && numSamples >= 0) { - return result.setNumSamples(numSamples) - .setNumTotalSamples(numTotalSamples); - } - return result; - } - } - /** * Gets a list of elements sample data keys to return. * @@ -1606,6 +1576,13 @@ public static String printQuery(Query query) { query.put(ANNOT_GENE_REGIONS.key(), "numGeneRegions : " + ((Collection) geneRegions).size()); } } + if (isValidParam(query, ID_INTERSECT)) { + query = new Query(query); + Object idIntersect = query.get(ID_INTERSECT.key()); + if (idIntersect instanceof Collection) { + query.put(ID_INTERSECT.key(), "numIdIntersect : " + ((Collection) idIntersect).size()); + } + } try { return QUERY_MAPPER.writeValueAsString(query); } catch (JsonProcessingException e) { diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/AbstractTwoPhasedVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/AbstractTwoPhasedVariantQueryExecutor.java index 5697998edb1..d71311a107d 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/AbstractTwoPhasedVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/AbstractTwoPhasedVariantQueryExecutor.java @@ -7,7 +7,7 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.core.common.TimeUtils; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIteratorWithCounts; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java index 0e8c9ead98c..6eb237ea4b3 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java @@ -1,6 +1,5 @@ package org.opencb.opencga.storage.core.variant.query.executors; -import com.google.common.collect.Iterators; import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.models.core.Region; import org.opencb.biodata.models.variant.StudyEntry; @@ -13,14 +12,13 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.core.QueryParam; -import org.opencb.opencga.core.response.VariantQueryResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; -import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; -import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.core.variant.query.filters.VariantFilterBuilder; @@ -33,12 +31,12 @@ public class BreakendVariantQueryExecutor extends VariantQueryExecutor { private final VariantDBAdaptor variantDBAdaptor; private final VariantFilterBuilder filterBuilder; - public BreakendVariantQueryExecutor(VariantStorageMetadataManager metadataManager, String storageEngineId, ObjectMap options, + public BreakendVariantQueryExecutor(String storageEngineId, ObjectMap options, VariantQueryExecutor delegatedQueryExecutor, VariantDBAdaptor variantDBAdaptor) { - super(metadataManager, storageEngineId, options); + super(variantDBAdaptor.getMetadataManager(), storageEngineId, options); this.delegatedQueryExecutor = delegatedQueryExecutor; this.variantDBAdaptor = variantDBAdaptor; - filterBuilder = new VariantFilterBuilder(metadataManager); + filterBuilder = new VariantFilterBuilder(); } @Override @@ -48,35 +46,39 @@ public boolean canUseThisExecutor(Query query, QueryOptions options) throws Stor } @Override - protected Object getOrIterator(Query query, QueryOptions options, boolean getIterator) throws StorageEngineException { - int limit = options.getInt(QueryOptions.LIMIT); - int skip = options.getInt(QueryOptions.SKIP); - boolean count = options.getBoolean(QueryOptions.COUNT); - int approximateCountSamplingSize = options.getInt( - VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.key(), - VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.defaultValue()); - Query baseQuery = baseQuery(query); - Predicate variantLocalFilter = filterBuilder.buildFilter(query, options); - + protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean getIterator) throws StorageEngineException { + int limit = variantQuery.getLimitOr(-1); + int skip = variantQuery.getSkip(); + boolean count = variantQuery.getCount() && !getIterator; + int approximateCountSamplingSize = variantQuery.getApproximateCountSamplingSize(); + Query baseQuery = baseQuery(variantQuery.getQuery()); + Predicate variantLocalFilter = filterBuilder.buildFilter(variantQuery); + + // Copy to avoid modifications to input query + ParsedVariantQuery delegatedVariantQuery = new ParsedVariantQuery(variantQuery); + QueryOptions options = new QueryOptions(variantQuery.getInputOptions()); + options.remove(QueryOptions.SKIP); + delegatedVariantQuery.setSkip(0); + if (limit >= 0) { + int tmpLimit = skip + limit * 4; + if (count && tmpLimit < approximateCountSamplingSize) { + tmpLimit = approximateCountSamplingSize; + } + options.put(QueryOptions.LIMIT, tmpLimit); + delegatedVariantQuery.setLimit(tmpLimit); + } + delegatedVariantQuery.setInputOptions(options); if (getIterator) { - VariantDBIterator iterator = delegatedQueryExecutor.iterator(query, options); + VariantDBIterator iterator = delegatedQueryExecutor.iterator(delegatedVariantQuery); iterator = iterator.mapBuffered(l -> getBreakendPairs(0, baseQuery, variantLocalFilter, l), 100); iterator = iterator.localLimitSkip(limit, skip); return iterator; } else { - // Copy to avoid modifications to input options - options = new QueryOptions(options); - options.remove(QueryOptions.SKIP); - int tmpLimit = skip + limit * 2; - if (count && tmpLimit < approximateCountSamplingSize) { - tmpLimit = approximateCountSamplingSize; - } - options.put(QueryOptions.LIMIT, tmpLimit); - VariantQueryResult queryResult = delegatedQueryExecutor.get(query, options); + VariantQueryResult queryResult = delegatedQueryExecutor.get(delegatedVariantQuery); List results = queryResult.getResults(); results = getBreakendPairs(0, baseQuery, variantLocalFilter, results); - if (queryResult.getNumMatches() < tmpLimit) { + if (queryResult.getNumMatches() < delegatedVariantQuery.getLimitOr(-1)) { // Exact count!! queryResult.setApproximateCount(false); queryResult.setNumMatches(results.size()); @@ -100,24 +102,6 @@ protected Object getOrIterator(Query query, QueryOptions options, boolean getIte } } - - protected VariantDBIterator iterator(Query query, QueryOptions options, int batchSize) throws StorageEngineException { - - int limit = options.getInt(QueryOptions.LIMIT); - int skip = options.getInt(QueryOptions.SKIP); - Query baseQuery = baseQuery(query); - Predicate variantLocalFilter = filterBuilder.buildFilter(query, options); - - VariantDBIterator iterator = delegatedQueryExecutor.iterator(query, options); - iterator = iterator.mapBuffered(l -> getBreakendPairs(0, baseQuery, variantLocalFilter, l), batchSize); - Iterators.advance(iterator, skip); - iterator = iterator.localSkip(skip); - if (limit > 0) { - iterator = iterator.localLimit(limit); - } - return iterator; - } - private Query baseQuery(Query query) { return subQuery(query, VariantQueryParam.STUDY, @@ -143,7 +127,6 @@ private List getBreakendPairs(int samplePosition, Query baseQuery, Pred } // Copy query to avoid propagating modifications baseQuery = new Query(baseQuery); -// System.out.println("variants = " + variants); List regions = new ArrayList<>(variants.size()); for (Variant variant : variants) { BreakendMate mate = variant.getSv().getBreakend().getMate(); @@ -190,7 +173,7 @@ private List getBreakendPairs(int samplePosition, Query baseQuery, Pred return variantPairs; } - private void addPair(Predicate filter, List variantPairs, Variant variant, Variant mateVariant) { + private boolean addPair(Predicate filter, List variantPairs, Variant variant, Variant mateVariant) { // Check for duplicated pairs if (VariantDBIterator.VARIANT_COMPARATOR.compare(variant, mateVariant) > 0) { // The mate variant is "before" the main variant @@ -200,10 +183,14 @@ private void addPair(Predicate filter, List variantPairs, Vari // But first the "mate" to respect order variantPairs.add(mateVariant); variantPairs.add(variant); + return true; + } else { + return false; } } else { variantPairs.add(variant); variantPairs.add(mateVariant); + return true; } } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/ChromDensityVariantAggregationExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/ChromDensityVariantAggregationExecutor.java index b7a03ceac34..9606a74a85d 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/ChromDensityVariantAggregationExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/ChromDensityVariantAggregationExecutor.java @@ -8,7 +8,7 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.solr.FacetQueryParser; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantIterable; @@ -144,7 +144,7 @@ protected VariantQueryResult aggregation(Query query, QueryOptions o regionBuckets.size(), regionBuckets); return new VariantQueryResult<>((int) stopWatch.getTime(TimeUnit.MILLISECONDS), 1, numMatches, Collections.emptyList(), - Collections.singletonList(field), null, null); + Collections.singletonList(field), null); } private VariantQueryException invalidNestedField(String nestedFieldName) { diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java index db0815b5135..c6f4b87a5f0 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java @@ -3,21 +3,24 @@ import com.google.common.collect.Iterators; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.tools.pedigree.ModeOfInheritance; -import org.opencb.opencga.core.models.variant.VariantAnnotationConstants; -import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.core.models.variant.VariantAnnotationConstants; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; import org.opencb.opencga.storage.core.metadata.models.Trio; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; -import org.opencb.opencga.storage.core.variant.adaptors.*; +import org.opencb.opencga.storage.core.variant.adaptors.VariantField; +import org.opencb.opencga.storage.core.variant.adaptors.VariantIterable; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.adaptors.iterators.UnionMultiVariantKeyIterator; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIteratorWithCounts; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -66,15 +69,11 @@ public boolean canUseThisExecutor(Query query, QueryOptions options) throws Stor } @Override - public DataResult count(Query query) { - throw new UnsupportedOperationException(); - } - - @Override - protected Object getOrIterator(Query query, QueryOptions options, boolean iterator) { - Trio trio = getCompHetTrio(query); - return getOrIterator(query.getString(VariantQueryParam.STUDY.key()), trio.getChild(), trio.getFather(), trio.getMother(), - query, options, iterator); + protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator) { + Trio trio = getCompHetTrio(variantQuery.getQuery()); + String study = variantQuery.getStudyQuery().getStudyOrFail(); + return getOrIterator(study, trio.getChild(), trio.getFather(), trio.getMother(), + variantQuery, iterator); } @Override @@ -84,25 +83,25 @@ protected long primaryCount(Query query, QueryOptions options) { .append(QueryOptions.INCLUDE, VariantField.ID.fieldName()))); } - public VariantQueryResult get(String study, String proband, String father, String mother, Query query, QueryOptions options) { - return (VariantQueryResult) getOrIterator(study, proband, father, mother, query, options, false); + public VariantQueryResult get(String study, String proband, String father, String mother, ParsedVariantQuery variantQuery) { + return (VariantQueryResult) getOrIterator(study, proband, father, mother, variantQuery, false); } - public VariantDBIterator iterator(String study, String proband, String father, String mother, Query query, QueryOptions options) { - return (VariantDBIterator) getOrIterator(study, proband, father, mother, query, options, true); + public VariantDBIterator iterator(String study, String proband, String father, String mother, ParsedVariantQuery variantQuery) { + return (VariantDBIterator) getOrIterator(study, proband, father, mother, variantQuery, true); } - private Object getOrIterator(String study, String proband, String father, String mother, Query query, QueryOptions inputOptions, + private Object getOrIterator(String study, String proband, String father, String mother, ParsedVariantQuery variantQuery, boolean iterator) { // Prepare query and options - int skip = getSkip(inputOptions); - int limit = inputOptions.containsKey(QueryOptions.LIMIT) ? getLimit(inputOptions) : (Integer.MAX_VALUE - skip); - int samplingSize = getSamplingSize(inputOptions, DEFAULT_SAMPLING_SIZE, iterator); - QueryOptions options = buildQueryOptions(inputOptions); + int skip = variantQuery.getSkip(); + int limit = variantQuery.getLimit() != null ? variantQuery.getLimit() : (Integer.MAX_VALUE - skip); + int samplingSize = getSamplingSize(variantQuery.getInputOptions(), DEFAULT_SAMPLING_SIZE, iterator); + QueryOptions options = buildQueryOptions(variantQuery.getInputOptions()); // Always sort results for compound heterozygous options.put(QueryOptions.SORT, true); - query = new Query(query); + Query query = new Query(variantQuery.getQuery()); List includeSample = getAndCheckIncludeSample(query, proband, father, mother); Set biotypes; @@ -158,12 +157,12 @@ private Object getOrIterator(String study, String proband, String father, String return VariantDBIterator.wrapper(variantIterator); } else { VariantQueryResult result = VariantDBIterator.wrapper(variantIterator) - .toDataResult(Collections.singletonMap(study, includeSample)); + .toDataResult(variantQuery); if ((limit + skip) < samplingSize && compoundHeterozygous.size() < samplingSize) { result.setApproximateCount(false); result.setNumMatches(compoundHeterozygous.size()); } else { - setNumTotalResults(unfilteredIterator, result, query, inputOptions, + setNumTotalResults(unfilteredIterator, result, query, variantQuery.getInputOptions(), unfilteredIterator.getCount(), compoundHeterozygous.size()); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java index 08705ccf69e..474cbc3fa9f 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java @@ -2,7 +2,9 @@ import org.opencb.biodata.models.variant.Variant; import org.opencb.commons.datastore.core.*; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.exceptions.StorageEngineException; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; @@ -37,11 +39,11 @@ public DBAdaptorVariantQueryExecutor(VariantDBAdaptor dbAdaptor, String storageE } @Override - protected Object getOrIterator(Query query, QueryOptions options, boolean iterator) { + protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator) throws StorageEngineException { if (iterator) { - return dbAdaptor.iterator(query, options); + return dbAdaptor.iterator(variantQuery); } else { - VariantQueryResult result = dbAdaptor.get(query, options); + VariantQueryResult result = dbAdaptor.get(variantQuery); if (result.getSource() == null || result.getSource().isEmpty()) { result.setSource(storageEngineId); } @@ -49,11 +51,6 @@ protected Object getOrIterator(Query query, QueryOptions options, boolean iterat } } - @Override - public DataResult count(Query query) { - return dbAdaptor.count(query); - } - @Override public boolean canUseThisExecutor(Query query, QueryOptions options) { for (QueryParam unsupportedParam : UNSUPPORTED_PARAMS) { diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java index 57b1e64ae58..e286b4a07ce 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java @@ -1,10 +1,8 @@ package org.opencb.opencga.storage.core.variant.query.executors; -import org.opencb.biodata.models.variant.Variant; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.CohortMetadata; @@ -126,14 +124,11 @@ private boolean checkStatsFilter(Query query, String sample, VariantQueryParam p } @Override - protected Object getOrIterator(Query query, QueryOptions options, boolean iterator) throws StorageEngineException { + protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator) throws StorageEngineException { if (iterator) { return VariantDBIterator.emptyIterator(); } else { - VariantQueryResult result = new VariantQueryResult<>(0, 0, 0, Collections.emptyList(), Collections.emptyList()); - result.setSource(NO_OP); - VariantQueryUtils.addSamplesMetadataIfRequested(result, query, options, metadataManager); - return result; + return new VariantQueryResult<>(0, 0, 0, Collections.emptyList(), Collections.emptyList(), NO_OP, variantQuery); } } } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantAggregationExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantAggregationExecutor.java index a2c7f67d54e..8479dd683f7 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantAggregationExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantAggregationExecutor.java @@ -4,7 +4,7 @@ import org.opencb.commons.datastore.core.FacetField; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java index 8e445cc30e2..26d53e89e5c 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java @@ -1,18 +1,15 @@ package org.opencb.opencga.storage.core.variant.query.executors; import org.opencb.biodata.models.variant.Variant; -import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; -import org.opencb.opencga.storage.core.variant.adaptors.VariantIterable; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; - -import java.util.Collections; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import static org.opencb.opencga.storage.core.variant.VariantStorageOptions.QUERY_DEFAULT_TIMEOUT; import static org.opencb.opencga.storage.core.variant.VariantStorageOptions.QUERY_MAX_TIMEOUT; @@ -22,7 +19,7 @@ * * @author Jacobo Coll <jacobo167@gmail.com> */ -public abstract class VariantQueryExecutor implements VariantIterable { +public abstract class VariantQueryExecutor { protected final VariantStorageMetadataManager metadataManager; protected final String storageEngineId; @@ -34,19 +31,17 @@ public VariantQueryExecutor(VariantStorageMetadataManager metadataManager, Strin this.options = options; } - public final VariantQueryResult get(Query query, QueryOptions options) { + public final VariantQueryResult get(ParsedVariantQuery query) { try { - return (VariantQueryResult) getOrIterator(query, options, false); + return (VariantQueryResult) getOrIterator(query, false); } catch (StorageEngineException e) { throw VariantQueryException.internalException(e); } } - @Override - public final VariantDBIterator iterator(Query query, QueryOptions options) { + public final VariantDBIterator iterator(ParsedVariantQuery variantQuery) { try { -// query = parser.preProcessQuery(query, options); - return (VariantDBIterator) getOrIterator(query, options, true); + return (VariantDBIterator) getOrIterator(variantQuery, true); } catch (StorageEngineException e) { throw VariantQueryException.internalException(e); } @@ -77,19 +72,7 @@ public static void setDefaultTimeout(QueryOptions queryOptions, ObjectMap config */ public abstract boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException; - @Deprecated - public DataResult count(Query query) { - VariantQueryResult result = get(query, new QueryOptions(QueryOptions.COUNT, true).append(QueryOptions.LIMIT, 0)); - return new DataResult<>( - result.getTime(), - result.getEvents(), - 1, - Collections.singletonList(result.getNumMatches()), - result.getNumMatches(), - result.getAttributes()); - } - - protected abstract Object getOrIterator(Query query, QueryOptions options, boolean iterator) throws StorageEngineException; + protected abstract Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator) throws StorageEngineException; protected VariantStorageMetadataManager getMetadataManager() { return metadataManager; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/filters/VariantFilterBuilder.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/filters/VariantFilterBuilder.java index f8705d26eef..1f505864fd1 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/filters/VariantFilterBuilder.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/filters/VariantFilterBuilder.java @@ -5,32 +5,26 @@ import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.ConsequenceType; import org.opencb.biodata.models.variant.avro.SequenceOntologyTerm; -import org.opencb.commons.datastore.core.Query; -import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; -import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; -import org.opencb.opencga.storage.core.variant.query.*; +import org.opencb.opencga.storage.core.variant.query.KeyOpValue; +import org.opencb.opencga.storage.core.variant.query.ParsedQuery; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import java.util.*; import java.util.function.Predicate; import java.util.stream.Collectors; -import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.*; - public class VariantFilterBuilder { - private final VariantStorageMetadataManager metadataManager; - - public VariantFilterBuilder(VariantStorageMetadataManager metadataManager) { - this.metadataManager = metadataManager; + public VariantFilterBuilder() { } - public Predicate buildFilter(Query query, QueryOptions queryOptions) { + public Predicate buildFilter(ParsedVariantQuery variantQuery) { List> filters = new LinkedList<>(); - addRegionFilters(query, filters); - addAnnotationFilters(query, filters); + addRegionFilters(variantQuery, filters); + addAnnotationFilters(variantQuery, filters); if (filters.isEmpty()) { return v -> true; @@ -39,18 +33,18 @@ public Predicate buildFilter(Query query, QueryOptions queryOptions) { } } - private void addRegionFilters(Query query, List> filters) { + private void addRegionFilters(ParsedVariantQuery variantQuery, List> filters) { List> regionFilters = new LinkedList<>(); - if (VariantQueryUtils.isValidParam(query, VariantQueryParam.REGION)) { - List regions = Region.parseRegions(query.getString(REGION.key()), true); + List regions = variantQuery.getRegions(); + if (!regions.isEmpty()) { regions = VariantQueryUtils.mergeRegions(regions); for (Region region : regions) { regionFilters.add(variant -> region.contains(variant.getChromosome(), variant.getStart())); } } - ParsedVariantQuery.VariantQueryXref variantQueryXref = VariantQueryParser.parseXrefs(query); - Predicate geneFilter = getGeneFilter(query, variantQueryXref.getGenes()); + ParsedVariantQuery.VariantQueryXref variantQueryXref = variantQuery.getXrefs(); + Predicate geneFilter = getGeneFilter(variantQuery, variantQueryXref.getGenes()); if (!variantQueryXref.getIds().isEmpty()) { Set ids = new HashSet<>(variantQueryXref.getIds()); regionFilters.add(variant -> ids.contains(variant.getAnnotation().getId())); @@ -65,15 +59,14 @@ private void addRegionFilters(Query query, List> filters) { if (!regionFilters.isEmpty()) { Set bts; - if (VariantQueryUtils.isValidParam(query, VariantQueryParam.ANNOT_BIOTYPE)) { - bts = new HashSet<>(query.getAsStringList(VariantQueryParam.ANNOT_BIOTYPE.key())); + if (!variantQuery.getBiotypes().isEmpty()) { + bts = new HashSet<>(variantQuery.getBiotypes()); } else { bts = null; } Set cts; - if (VariantQueryUtils.isValidParam(query, VariantQueryParam.ANNOT_CONSEQUENCE_TYPE)) { - cts = new HashSet<>(VariantQueryUtils - .parseConsequenceTypes(query.getAsStringList(VariantQueryParam.ANNOT_CONSEQUENCE_TYPE.key()))); + if (!variantQuery.getConsequenceTypes().isEmpty()) { + cts = new HashSet<>(variantQuery.getConsequenceTypes()); } else { cts = null; } @@ -99,12 +92,12 @@ private void addRegionFilters(Query query, List> filters) { } } - private Predicate getGeneFilter(Query query, List genes) { + private Predicate getGeneFilter(ParsedVariantQuery variantQuery, List genes) { if (genes.isEmpty()) { return null; } - List geneRegions = Region.parseRegions(query.getString(VariantQueryUtils.ANNOT_GENE_REGIONS.key())); + List geneRegions = variantQuery.getGeneRegions(); Predicate geneRegionFilter; if (CollectionUtils.isEmpty(geneRegions)) { geneRegionFilter = null; @@ -115,15 +108,14 @@ private Predicate getGeneFilter(Query query, List genes) { Predicate geneFilter; Set bts; - if (VariantQueryUtils.isValidParam(query, VariantQueryParam.ANNOT_BIOTYPE)) { - bts = new HashSet<>(query.getAsStringList(VariantQueryParam.ANNOT_BIOTYPE.key())); + if (!variantQuery.getBiotypes().isEmpty()) { + bts = new HashSet<>(variantQuery.getBiotypes()); } else { bts = null; } Set cts; - if (VariantQueryUtils.isValidParam(query, VariantQueryParam.ANNOT_CONSEQUENCE_TYPE)) { - cts = new HashSet<>(VariantQueryUtils - .parseConsequenceTypes(query.getAsStringList(VariantQueryParam.ANNOT_CONSEQUENCE_TYPE.key()))); + if (!variantQuery.getConsequenceTypes().isEmpty()) { + cts = new HashSet<>(variantQuery.getConsequenceTypes()); } else { cts = null; } @@ -151,31 +143,30 @@ private Predicate getGeneFilter(Query query, List genes) { } } - private void addAnnotationFilters(Query query, List> filters) { -// ParsedVariantQuery.VariantQueryXref variantQueryXref = VariantQueryParser.parseXrefs(query); - addClinicalFilters(query, filters); + private void addAnnotationFilters(ParsedVariantQuery variantQuery, List> filters) { +// ParsedVariantQuery.VariantQueryXref variantQueryXref = variantQuery.getXrefs(); + addClinicalFilters(variantQuery, filters); - if (VariantQueryUtils.isValidParam(query, ANNOT_POPULATION_ALTERNATE_FREQUENCY)) { - ParsedQuery> freqQuery - = VariantQueryParser.parseFreqFilter(query, ANNOT_POPULATION_ALTERNATE_FREQUENCY); + ParsedQuery> freqQuery = variantQuery.getPopulationFrequencyAlt(); + if (!freqQuery.isEmpty()) { List freqFilters = freqQuery.mapValues(popFreq -> { String[] split = popFreq.getKey().split(VariantQueryUtils.STUDY_POP_FREQ_SEPARATOR); return new PopulationFrequencyVariantFilter.AltFreqFilter(split[0], split[1], popFreq.getOp(), popFreq.getValue()); }); filters.add(new PopulationFrequencyVariantFilter(freqQuery.getOperation(), freqFilters)); } - if (VariantQueryUtils.isValidParam(query, ANNOT_POPULATION_REFERENCE_FREQUENCY)) { - ParsedQuery> freqQuery - = VariantQueryParser.parseFreqFilter(query, ANNOT_POPULATION_REFERENCE_FREQUENCY); + + freqQuery = variantQuery.getPopulationFrequencyRef(); + if (!freqQuery.isEmpty()) { List freqFilters = freqQuery.mapValues(popFreq -> { String[] split = popFreq.getKey().split(VariantQueryUtils.STUDY_POP_FREQ_SEPARATOR); return new PopulationFrequencyVariantFilter.RefFreqFilter(split[0], split[1], popFreq.getOp(), popFreq.getValue()); }); filters.add(new PopulationFrequencyVariantFilter(freqQuery.getOperation(), freqFilters)); } - if (VariantQueryUtils.isValidParam(query, ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY)) { - ParsedQuery> freqQuery - = VariantQueryParser.parseFreqFilter(query, ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY); + + freqQuery = variantQuery.getPopulationFrequencyMaf(); + if (!freqQuery.isEmpty()) { List freqFilters = freqQuery.mapValues(popFreq -> { String[] split = popFreq.getKey().split(VariantQueryUtils.STUDY_POP_FREQ_SEPARATOR); return new PopulationFrequencyVariantFilter.MafFreqFilter(split[0], split[1], popFreq.getOp(), popFreq.getValue()); @@ -185,8 +176,8 @@ private void addAnnotationFilters(Query query, List> filters) } - private void addClinicalFilters(Query query, List> filters) { - List> clinicalCombinations = VariantQueryParser.parseClinicalCombination(query) + private void addClinicalFilters(ParsedVariantQuery variantQuery, List> filters) { + List> clinicalCombinations = variantQuery.getClinicalCombinations() .stream().map(HashSet::new).collect(Collectors.toList()); if (clinicalCombinations.isEmpty()) { return; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjection.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjection.java index 717967cc038..a31d810fffc 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjection.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjection.java @@ -1,6 +1,7 @@ package org.opencb.opencga.storage.core.variant.query.projection; import com.google.common.collect.Iterables; +import org.opencb.commons.datastore.core.Event; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; import org.opencb.opencga.storage.core.variant.adaptors.VariantField; @@ -21,6 +22,7 @@ public final class VariantQueryProjection { private final int numSamples; private final int numTotalSamples; + private List events = new ArrayList<>(); public VariantQueryProjection(StudyMetadata studyMetadata, List samples, List files) { this.fields = VariantField.getIncludeFields(null); @@ -77,14 +79,28 @@ public Map> getSamples() { return studies.values().stream().collect(Collectors.toMap(s -> s.studyMetadata.getId(), s -> s.samples)); } + public Map> getSampleNames() { + return studies.values().stream().collect(Collectors.toMap(s -> s.studyMetadata.getName(), s -> s.sampleNames)); + } + @Deprecated public Map> getFiles() { return studies.values().stream().collect(Collectors.toMap(s -> s.studyMetadata.getId(), s -> s.files)); } + public List getEvents() { + return events; + } + + public VariantQueryProjection setEvents(List events) { + this.events = events; + return this; + } + public static class StudyVariantQueryProjection { private StudyMetadata studyMetadata; private List samples = Collections.emptyList(); + private List sampleNames = Collections.emptyList(); private Map> multiFileSampleFiles = Collections.emptyMap(); private Set multiFileSamples = Collections.emptySet(); private List files = Collections.emptyList(); @@ -129,6 +145,15 @@ public StudyVariantQueryProjection setSamples(List samples) { return this; } + public List getSampleNames() { + return sampleNames; + } + + public StudyVariantQueryProjection setSampleNames(List sampleNames) { + this.sampleNames = sampleNames; + return this; + } + public Map> getMultiFileSampleFiles() { return multiFileSampleFiles; } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java index 371903a7626..3ec0c32c08a 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java @@ -1,6 +1,7 @@ package org.opencb.opencga.storage.core.variant.query.projection; import org.apache.commons.collections4.CollectionUtils; +import org.opencb.commons.datastore.core.Event; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; @@ -53,6 +54,7 @@ public static VariantQueryProjection parseVariantQueryFields( public VariantQueryProjection parseVariantQueryProjection(Query query, QueryOptions options) { Set includeFields = VariantField.getIncludeFields(options); + List events = new ArrayList<>(); List includeStudies = getIncludeStudies(query, options, metadataManager, includeFields); Map studies = new HashMap<>(includeStudies.size()); @@ -68,13 +70,20 @@ public VariantQueryProjection parseVariantQueryProjection(Query query, QueryOpti } Map> sampleIdsMap = getIncludeSampleIds(query, options, includeStudies, metadataManager); - for (VariantQueryProjection.StudyVariantQueryProjection study : studies.values()) { - study.setSamples(sampleIdsMap.get(study.getId())); - } int numTotalSamples = sampleIdsMap.values().stream().mapToInt(List::size).sum(); skipAndLimitSamples(query, sampleIdsMap); int numSamples = sampleIdsMap.values().stream().mapToInt(List::size).sum(); + for (VariantQueryProjection.StudyVariantQueryProjection study : studies.values()) { + List sampleIds = sampleIdsMap.get(study.getId()); + study.setSamples(sampleIds); + List sampleNames = new ArrayList<>(sampleIds.size()); + for (Integer sampleId : sampleIds) { + sampleNames.add(metadataManager.getSampleName(study.getId(), sampleId)); + } + study.setSampleNames(sampleNames); + } + Map> fileIdsMap = getIncludeFiles(query, includeStudies, includeFields, metadataManager, sampleIdsMap); for (VariantQueryProjection.StudyVariantQueryProjection study : studies.values()) { @@ -125,22 +134,30 @@ public VariantQueryProjection parseVariantQueryProjection(Query query, QueryOpti for (VariantQueryProjection.StudyVariantQueryProjection study : studies.values()) { int studyId = study.getId(); List cohorts = new LinkedList<>(); - for (CohortMetadata cohort : metadataManager.getCalculatedCohorts(studyId)) { + for (CohortMetadata cohort : metadataManager.getCalculatedOrInvalidCohorts(studyId)) { cohorts.add(cohort.getId()); + if (cohort.isInvalid()) { + String message = "Please note that the Cohort Stats for " + + "'" + study.getName() + ":" + cohort.getName() + "' are currently outdated."; + int numSampmles = cohort.getSamples().size(); + int invalidStatsNumSamples = cohort.getAttributes().getInt(CohortMetadata.INVALID_STATS_NUM_SAMPLES, -1); + if (invalidStatsNumSamples > 0) { + message += " The statistics have been calculated with " + invalidStatsNumSamples + " samples, " + + "while the total number of samples in the cohort is " + numSampmles + "."; + } + message += " To display updated statistics, please execute variant-stats-index."; + events.add(new Event(Event.Type.WARNING, message)); + } } -// metadataManager.cohortIterator(studyId).forEachRemaining(cohort -> { -// if (cohort.isReady()/* || cohort.isInvalid()*/) { -// cohorts.add(cohort.getId()); -// } -// }); study.setCohorts(cohorts); } } - return new VariantQueryProjection(includeFields, studies, numTotalSamples != numSamples, numSamples, numTotalSamples); + return new VariantQueryProjection(includeFields, studies, numTotalSamples != numSamples, numSamples, numTotalSamples) + .setEvents(events); } - public static void skipAndLimitSamples(Query query, Map> sampleIds) { + private void skipAndLimitSamples(Query query, Map> sampleIds) { if (VariantQueryUtils.isValidParam(query, VariantQueryParam.SAMPLE_SKIP)) { int skip = query.getInt(VariantQueryParam.SAMPLE_SKIP.key()); if (skip > 0) { @@ -483,6 +500,10 @@ public static boolean isIncludeSamplesDefined(Query query, Set fie return getIncludeSamplePartialStatus(query, fields) != null || getIncludeFilePartialStatus(query, fields) != null; } + /* + * @deprecated use VariantQueryProjection.getSampleNames() + */ + @Deprecated public static Map> getIncludeSampleNames(Query query, QueryOptions options, VariantStorageMetadataManager metadataManager) { if (VariantField.getIncludeFields(options).contains(VariantField.STUDIES)) { diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java index dc278d1e3db..200b2eb463d 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java @@ -1,7 +1,5 @@ package org.opencb.opencga.storage.core.variant.search; -import org.apache.commons.lang3.time.StopWatch; -import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; @@ -10,11 +8,10 @@ import org.opencb.opencga.storage.core.exceptions.VariantSearchException; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.search.solr.VariantSearchManager; import java.io.IOException; -import java.util.Collections; -import java.util.concurrent.TimeUnit; import static org.opencb.opencga.storage.core.variant.search.VariantSearchUtils.inferSpecificSearchIndexSamplesCollection; @@ -37,19 +34,9 @@ public boolean canUseThisExecutor(Query query, QueryOptions options) throws Stor } @Override - public DataResult count(Query query) { - try { - StopWatch watch = StopWatch.createStarted(); - long count = searchManager.count(dbName, query); - int time = (int) watch.getTime(TimeUnit.MILLISECONDS); - return new DataResult<>(time, Collections.emptyList(), 1, Collections.singletonList(count), 1); - } catch (IOException | VariantSearchException e) { - throw new VariantQueryException("Error querying Solr", e); - } - } - - @Override - protected Object getOrIterator(Query query, QueryOptions options, boolean iterator) throws StorageEngineException { + protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator) throws StorageEngineException { + Query query = variantQuery.getQuery(); + QueryOptions options = variantQuery.getInputOptions(); String specificSearchIndexSamples = inferSpecificSearchIndexSamplesCollection(query, options, getMetadataManager(), dbName); if (specificSearchIndexSamples == null) { @@ -59,7 +46,7 @@ protected Object getOrIterator(Query query, QueryOptions options, boolean iterat if (iterator) { return searchManager.iterator(specificSearchIndexSamples, query, options); } else { - return searchManager.query(specificSearchIndexSamples, query, options); + return searchManager.query(specificSearchIndexSamples, variantQuery); } } catch (IOException | VariantSearchException e) { throw VariantQueryException.internalException(e); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantAggregationExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantAggregationExecutor.java index cd466046999..eba762ab0af 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantAggregationExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantAggregationExecutor.java @@ -4,7 +4,7 @@ import org.opencb.commons.datastore.core.FacetField; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.executors.VariantAggregationExecutor; import org.opencb.opencga.storage.core.variant.search.solr.VariantSearchManager; import org.slf4j.Logger; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java index 137c48cd163..12c86cc4e2b 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java @@ -8,7 +8,6 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.core.config.storage.StorageConfiguration; -import org.opencb.opencga.core.response.VariantQueryResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.exceptions.VariantSearchException; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; @@ -16,6 +15,8 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.core.variant.search.solr.SolrNativeIterator; import org.opencb.opencga.storage.core.variant.search.solr.VariantSearchManager; @@ -80,29 +81,16 @@ public boolean canUseThisExecutor(Query query, QueryOptions options) throws Stor } @Override - public DataResult count(Query query) { - try { - StopWatch watch = StopWatch.createStarted(); - long count = searchManager.count(dbName, query); - int time = (int) watch.getTime(TimeUnit.MILLISECONDS); - return new DataResult<>(time, Collections.emptyList(), 1, Collections.singletonList(count), 1); - } catch (IOException | VariantSearchException e) { - throw new VariantQueryException("Error querying Solr", e); - } - } - - @Override - protected Object getOrIterator(Query query, QueryOptions options, boolean iterator) { - if (options == null) { - options = QueryOptions.empty(); - } + protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator) { + Query query = variantQuery.getQuery(); + QueryOptions options = variantQuery.getInputOptions(); if (doQuerySearchManager(query, options)) { try { if (iterator) { return searchManager.iterator(dbName, query, options); } else { - return searchManager.query(dbName, query, options); + return searchManager.query(dbName, variantQuery); } } catch (IOException | VariantSearchException e) { throw new VariantQueryException("Error querying Solr", e); @@ -134,7 +122,7 @@ protected Object getOrIterator(Query query, QueryOptions options, boolean iterat approxCount = false; } else if (options.getBoolean(APPROXIMATE_COUNT.key()) || options.getBoolean(QueryOptions.COUNT)) { options.put(QueryOptions.COUNT, false); - VariantQueryResult result = approximateCount(query, options); + VariantQueryResult result = approximateCount(variantQuery); numTotalResults = result.first(); approxCount = result.getApproximateCount(); approxCountSamplingSize = result.getApproximateCountSamplingSize(); @@ -176,7 +164,9 @@ protected Object getOrIterator(Query query, QueryOptions options, boolean iterat } } - public VariantQueryResult approximateCount(Query query, QueryOptions options) { + public VariantQueryResult approximateCount(ParsedVariantQuery variantQuery) { + Query query = variantQuery.getQuery(); + QueryOptions options = variantQuery.getInputOptions(); long count; boolean approxCount = true; int sampling = 0; @@ -193,7 +183,7 @@ public VariantQueryResult approximateCount(Query query, QueryOptions optio Query searchEngineQuery = getSearchEngineQuery(query); Query engineQuery = getEngineQuery(query, options, getMetadataManager()); - VariantQueryResult nativeResult = searchManager + DataResult nativeResult = searchManager .nativeQuery(dbName, searchEngineQuery, queryOptions); List variantIds = nativeResult.getResults().stream().map(VariantSearchModel::getId).collect(Collectors.toList()); // Adjust numSamples if the results from SearchManager is smaller than numSamples @@ -202,7 +192,7 @@ public VariantQueryResult approximateCount(Query query, QueryOptions optio approxCount = false; sampling = variantIds.size(); } - long numSearchResults = nativeResult.getNumTotalResults(); + long numSearchResults = nativeResult.getNumMatches(); long numResults; if (variantIds.isEmpty()) { @@ -223,8 +213,8 @@ public VariantQueryResult approximateCount(Query query, QueryOptions optio throw new VariantQueryException("Error querying Solr", e); } int time = (int) watch.getTime(TimeUnit.MILLISECONDS); - return new VariantQueryResult<>(time, 1, 1, Collections.emptyList(), Collections.singletonList(count), null, - SEARCH_ENGINE_ID + '+' + getStorageEngineId(), approxCount, approxCount ? sampling : null, null); + return new VariantQueryResult<>(time, 1, 1, Collections.emptyList(), Collections.singletonList(count), + SEARCH_ENGINE_ID + '+' + getStorageEngineId(), approxCount, approxCount ? sampling : null, null, variantQuery); } /** @@ -308,7 +298,7 @@ protected Iterator variantIdIteratorFromSearch(Query query, int limit, i try { // Do not iterate for small queries if (limit < 10000) { - VariantQueryResult nativeResult = searchManager.nativeQuery(dbName, query, queryOptions); + DataResult nativeResult = searchManager.nativeQuery(dbName, query, queryOptions); if (numTotalResults != null) { numTotalResults.set(nativeResult.getNumMatches()); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/solr/VariantSearchManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/solr/VariantSearchManager.java index 74ee51bbe3a..374f5f9ffd6 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/solr/VariantSearchManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/solr/VariantSearchManager.java @@ -42,7 +42,8 @@ import org.opencb.commons.utils.ListUtils; import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.config.storage.StorageConfiguration; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.exceptions.VariantSearchException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.utils.CellBaseUtils; @@ -278,25 +279,24 @@ public int delete(String collection, VariantDBIterator variantDBIterator, Progre * according a given query. * * @param collection Collection name - * @param query Query - * @param queryOptions Query options + * @param variantQuery Parsed variant query * @return List of Variant objects * @throws VariantSearchException VariantSearchException * @throws IOException IOException */ - public VariantQueryResult query(String collection, Query query, QueryOptions queryOptions) + public VariantQueryResult query(String collection, ParsedVariantQuery variantQuery) throws VariantSearchException, IOException { - SolrQuery solrQuery = solrQueryParser.parse(query, queryOptions); + SolrQuery solrQuery = solrQueryParser.parse(variantQuery.getQuery(), variantQuery.getInputOptions()); SolrCollection solrCollection = solrManager.getCollection(collection); DataResult queryResult; try { queryResult = solrCollection.query(solrQuery, VariantSearchModel.class, - new VariantSearchToVariantConverter(VariantField.getIncludeFields(queryOptions))); + new VariantSearchToVariantConverter(VariantField.getIncludeFields(variantQuery.getInputOptions()))); } catch (SolrServerException e) { throw new VariantSearchException("Error executing variant query", e); } - return new VariantQueryResult<>(queryResult, null, SEARCH_ENGINE_ID); + return new VariantQueryResult<>(queryResult, SEARCH_ENGINE_ID, variantQuery); } /** @@ -310,7 +310,7 @@ public VariantQueryResult query(String collection, Query query, QueryOp * @throws VariantSearchException VariantSearchException * @throws IOException IOException */ - public VariantQueryResult nativeQuery(String collection, Query query, QueryOptions queryOptions) + public DataResult nativeQuery(String collection, Query query, QueryOptions queryOptions) throws VariantSearchException, IOException { SolrQuery solrQuery = solrQueryParser.parse(query, queryOptions); SolrCollection solrCollection = solrManager.getCollection(collection); @@ -321,7 +321,7 @@ public VariantQueryResult nativeQuery(String collection, Que throw new VariantSearchException("Error executing variant query (nativeQuery)", e); } - return new VariantQueryResult<>(queryResult, null, SEARCH_ENGINE_ID); + return queryResult; } /** diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java index 598e1fae586..08dabda7562 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java @@ -1,6 +1,7 @@ package org.opencb.opencga.storage.core.variant; import org.hamcrest.CoreMatchers; +import org.hamcrest.MatcherAssert; import org.junit.Before; import org.junit.Ignore; import org.junit.Test; @@ -17,6 +18,7 @@ import org.opencb.opencga.storage.core.StoragePipelineResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; @@ -75,13 +77,13 @@ public void checkAllAnnotated() throws Exception { @Test public void getPairs() throws Exception { - getPairs(new Query()); - getPairs(new Query(VariantQueryParam.REGION.key(), "2")); - getPairs(new Query(VariantQueryParam.REGION.key(), "17")); - getPairs(new Query(VariantQueryParam.REGION.key(), "2,13,5")); - getPairs(new Query(VariantQueryParam.REGION.key(), "2").append(VariantQueryParam.GENE.key(), "VPS53")); - getPairs(new Query(VariantQueryParam.GENE.key(), "VPS53")); - getPairs(new Query(VariantQueryParam.GENE.key(), "BRCA2")); + getPairs(new VariantQuery()); + getPairs(new VariantQuery().region("2")); + getPairs(new VariantQuery().region("17")); + getPairs(new VariantQuery().region("2", "13", "5")); + getPairs(new VariantQuery().region("2").gene("LINC00423")); + getPairs(new VariantQuery().gene("LINC00423")); + getPairs(new VariantQuery().gene("ENSG00000263015")); } public void getPairs(Query inputQuery) throws Exception { @@ -115,8 +117,8 @@ public void getPairs(Query inputQuery) throws Exception { } assertNull(prevMateid); assertNull(prevId); - assertThat(duplicatedVariants, CoreMatchers.not(CoreMatchers.hasItem(CoreMatchers.anything()))); - + MatcherAssert.assertThat(duplicatedVariants, CoreMatchers.not(CoreMatchers.hasItem(CoreMatchers.anything()))); + assertNotEquals(0, variants.size()); // Check pagination testPagination(variantsList, query, 1); testPagination(variantsList, query, 2); @@ -130,17 +132,19 @@ private void testPagination(List variantsList, Query query, int batchSiz List actualVariantsGet = new ArrayList<>(variantsList.size()); List actualVariantsIterator = new ArrayList<>(variantsList.size()); for (int i = 0; i < variantsList.size(); i += batchSize) { + System.out.println(" --- limit = " + batchSize + " skip = " + i); QueryOptions options = new QueryOptions(QueryOptions.LIMIT, batchSize) .append(QueryOptions.SKIP, i); - List results = variantStorageEngine.get(query, options).getResults(); - System.out.println("get = " + options.toJson() + " -> " + results.size() + " " + results); + List results; + results = variantStorageEngine.get(query, options).getResults(); + System.out.println("get = " + options.toJson() + " -> " + results.size() + " result " + results.stream().map(Variant::toString).collect(Collectors.joining("\", \"", "[ \"", "\" ]"))); results.stream().map(Variant::toString).forEach(actualVariantsGet::add); assertTrue(results.size() <= batchSize); results = new ArrayList<>(batchSize); variantStorageEngine.iterator(query, options).forEachRemaining(results::add); - System.out.println("it = " + options.toJson() + " -> " + results.size() + " " + results); - results.stream().map(Variant::toString).forEach(actualVariantsGet::add); + System.out.println("it = " + options.toJson() + " -> " + results.size() + " result " + results.stream().map(Variant::toString).collect(Collectors.joining("\", \"", "[ \"", "\" ]"))); + results.stream().map(Variant::toString).forEach(actualVariantsIterator::add); assertTrue(results.size() <= batchSize); } diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageSearchIntersectTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageSearchIntersectTest.java index 37c1c02b60d..3827a56da35 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageSearchIntersectTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageSearchIntersectTest.java @@ -32,7 +32,7 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.core.api.ParamConstants; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.StoragePipelineResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; @@ -293,7 +293,7 @@ public void testCount() throws Exception { .append(ANNOT_CONSERVATION.key(), "gerp>0.1"); long realCount = dbAdaptor.count(query).first(); VariantQueryResult result = variantQueryExecutor - .get(query, new QueryOptions(COUNT, true).append(LIMIT, 0)); + .get(variantStorageEngine.parseQuery(query, new QueryOptions(COUNT, true).append(LIMIT, 0))); assertEquals(0, result.getResults().size()); assertEquals(realCount, result.getNumMatches()); } @@ -304,7 +304,7 @@ public void testApproxCount() throws Exception { .append(ANNOT_CONSERVATION.key(), "gerp>0.1"); long realCount = dbAdaptor.count(query).first(); VariantQueryResult result = variantQueryExecutor - .approximateCount(query, new QueryOptions(VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.key(), realCount * 0.1)); + .approximateCount(variantStorageEngine.getVariantQueryParser().parseQuery(query, new QueryOptions(VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.key(), realCount * 0.1))); long approxCount = result.first(); System.out.println("approxCount = " + approxCount); System.out.println("realCount = " + realCount); @@ -319,7 +319,7 @@ public void testExactApproxCount() throws Exception { .append(ANNOT_CONSERVATION.key(), "gerp>0.1"); long realCount = dbAdaptor.count(query).first(); VariantQueryResult result = variantQueryExecutor - .approximateCount(query, new QueryOptions(VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.key(), allVariants.getNumResults())); + .approximateCount(variantStorageEngine.getVariantQueryParser().parseQuery(query, new QueryOptions(VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.key(), allVariants.getNumResults()))); long approxCount = result.first(); System.out.println("approxCount = " + approxCount); System.out.println("realCount = " + realCount); @@ -333,7 +333,7 @@ public void testExactApproxCountToSearch() throws Exception { Query query = new Query(ANNOT_CONSERVATION.key(), "gerp>0.1"); long realCount = dbAdaptor.count(query).first(); VariantQueryResult result = variantQueryExecutor - .approximateCount(query, new QueryOptions(VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.key(), allVariants.getNumResults())); + .approximateCount(variantStorageEngine.getVariantQueryParser().parseQuery(query, new QueryOptions(VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.key(), allVariants.getNumResults()))); long approxCount = result.first(); System.out.println("approxCount = " + approxCount); System.out.println("realCount = " + realCount); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileSpecificSamplesCollectionTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileSpecificSamplesCollectionTest.java index 4c267a35d15..58cc058dc4b 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileSpecificSamplesCollectionTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileSpecificSamplesCollectionTest.java @@ -5,7 +5,8 @@ import org.opencb.biodata.models.variant.Variant; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.exceptions.VariantSearchException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; @@ -55,19 +56,19 @@ protected void load() throws Exception { protected VariantQueryResult query(Query query, QueryOptions options) { try { - query = variantStorageEngine.preProcessQuery(query, options); + ParsedVariantQuery variantQuery = variantStorageEngine.parseQuery(query, options); VariantStorageMetadataManager scm = dbAdaptor.getMetadataManager(); - String collection = VariantSearchUtils.inferSpecificSearchIndexSamplesCollection(query, options, scm, DB_NAME); + String collection = VariantSearchUtils.inferSpecificSearchIndexSamplesCollection(variantQuery.getQuery(), options, scm, DB_NAME); // Do not execute this test if the query is not covered by the specific search index collection - Assume.assumeThat(query.toJson(), collection, CoreMatchers.notNullValue()); + Assume.assumeThat(variantQuery.getQuery().toJson(), collection, CoreMatchers.notNullValue()); if (options.getInt(QueryOptions.LIMIT, 0) <= 0) { options = new QueryOptions(options); options.put(QueryOptions.LIMIT, 100000); } - return variantStorageEngine.getVariantSearchManager().query(collection, query, options); + return variantStorageEngine.getVariantSearchManager().query(collection, variantQuery); } catch (StorageEngineException | VariantSearchException | IOException e) { e.printStackTrace(); Assert.fail(e.getMessage()); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileTest.java index 8ad47c67c2c..9548338e3d8 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileTest.java @@ -10,7 +10,8 @@ import org.opencb.biodata.models.variant.avro.SampleEntry; import org.opencb.biodata.models.variant.stats.VariantStats; import org.opencb.commons.datastore.core.*; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.FileMetadata; @@ -111,9 +112,8 @@ protected void load() throws Exception { } protected VariantQueryResult query(Query query, QueryOptions options) { - options = options == null ? QueryOptions.empty() : options; - query = variantStorageEngine.preProcessQuery(query, options); - return dbAdaptor.get(query, options); + ParsedVariantQuery variantQuery = variantStorageEngine.parseQuery(query, options); + return dbAdaptor.get(variantQuery); } protected ObjectMap getOptions() { @@ -382,13 +382,13 @@ public void testSampleLimitSkip() throws Exception { System.out.println("samples(ALL) = " + result.getSamples()); for (int i : new int[]{1, 3, 6, 8, 10}) { - result = query(new Query(VariantQueryParam.SAMPLE_SKIP.key(), i).append(VariantQueryParam.INCLUDE_SAMPLE.key(), ALL).append(SAMPLE_METADATA.key(), true), options); + result = query(new VariantQuery().sampleSkip(i).includeSampleAll().sampleMetadata(true), options); // System.out.println("samples(SKIP=" + i + ") = " + result.getSamples()); assertEquals(Math.max(0, 8 - i), result.getSamples().values().stream().mapToInt(List::size).sum()); assertEquals(Math.max(0, 8 - i), result.getNumSamples().intValue()); assertEquals(8, result.getNumTotalSamples().intValue()); - result = query(new Query(VariantQueryParam.SAMPLE_LIMIT.key(), i).append(VariantQueryParam.INCLUDE_SAMPLE.key(), ALL).append(SAMPLE_METADATA.key(), true), options); + result = query(new VariantQuery().sampleLimit(i).includeSampleAll().sampleMetadata(true), options); // System.out.println("samples(LIMIT=" + i + ") = " + result.getSamples()); assertEquals(Math.min(8, i), result.getSamples().values().stream().mapToInt(List::size).sum()); assertEquals(Math.min(8, i), result.getNumSamples().intValue()); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorTest.java index 800efb34e3e..984cc5bfb83 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorTest.java @@ -35,7 +35,6 @@ import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; import org.opencb.opencga.storage.core.StoragePipelineResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; @@ -45,6 +44,8 @@ import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; import org.opencb.opencga.storage.core.variant.annotation.annotators.CellBaseRestVariantAnnotator; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.core.variant.query.executors.NoOpVariantQueryExecutor; import org.opencb.opencga.storage.core.variant.query.filters.VariantFilterBuilder; @@ -266,8 +267,8 @@ public void multiIterator() throws Exception { VariantDBIterator iterator = dbAdaptor.iterator(variantsToQuery.iterator(), new Query(), new QueryOptions()); - DataResult queryResult = iterator.toDataResult(); - assertEquals(variantsToQuery.size(), queryResult.getResults().size()); + List variants = iterator.toList(); + assertEquals(variantsToQuery.size(), variants.size()); } @Test @@ -2450,7 +2451,8 @@ private void testQuery(Query query, QueryOptions options) { } private Matcher withFilter(Query query) { - return VariantMatchers.withFilter(new VariantFilterBuilder(metadataManager).buildFilter(query, null), query.toJson()); + ParsedVariantQuery parsedVariantQuery = variantStorageEngine.parseQuery(query, new QueryOptions()); + return VariantMatchers.withFilter(new VariantFilterBuilder().buildFilter(parsedVariantQuery), query.toJson()); } /* @Test diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryTest.java index de17c52a8a0..5cfdde4ca13 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryTest.java @@ -28,6 +28,12 @@ public void test() throws Exception { if (param.type() == QueryParam.Type.BOOLEAN || param.type() == QueryParam.Type.BOOLEAN_ARRAY) { methodSet = getMethodSafe(param.key(), boolean.class); expectedValue = true; + } else if (param.type() == QueryParam.Type.INTEGER || param.type() == QueryParam.Type.INTEGER_ARRAY) { + methodSet = getMethodSafe(param.key(), int.class); + expectedValue = 42; + if (methodSet == null) { + methodSet = getMethodSafe(param.key(), Integer.class); + } } else { expectedValue = RandomStringUtils.random(10); methodSet = getMethodSafe(param.key(), String.class); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryUsingSearchIndexTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryUsingSearchIndexTest.java index 395e41d2675..6194d25c6d8 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryUsingSearchIndexTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryUsingSearchIndexTest.java @@ -6,7 +6,7 @@ import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; import org.opencb.opencga.storage.core.variant.search.solr.VariantSearchManager; diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantDBAdaptor.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantDBAdaptor.java index 71fa34998de..b77ea4e08bc 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantDBAdaptor.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyVariantDBAdaptor.java @@ -28,7 +28,7 @@ import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.CohortMetadata; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; @@ -72,13 +72,13 @@ public DummyVariantDBAdaptor(String dbName) { } @Override - public VariantQueryResult get(ParsedVariantQuery query, QueryOptions options) { + public VariantQueryResult get(ParsedVariantQuery query) { List variants = new ArrayList<>(); - iterator(query, options).forEachRemaining(variants::add); + iterator(query).forEachRemaining(variants::add); - return new VariantQueryResult<>(0, variants.size(), variants.size(), Collections.emptyList(), variants, null, - DummyVariantStorageEngine.STORAGE_ENGINE_ID); + return new VariantQueryResult<>(0, variants.size(), variants.size(), Collections.emptyList(), variants, + DummyVariantStorageEngine.STORAGE_ENGINE_ID, query); } @Override @@ -102,7 +102,8 @@ public DataResult distinct(Query query, String field) { } @Override - public VariantDBIterator iterator(ParsedVariantQuery variantQuery, QueryOptions options) { + public VariantDBIterator iterator(ParsedVariantQuery variantQuery) { + QueryOptions options = variantQuery.getInputOptions(); logger.info("Query " + variantQuery.getQuery().toJson()); logger.info("QueryOptions " + options.toJson()); logger.info("dbName " + dbName); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParserTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParserTest.java index 8fae5f12310..2611cfc337a 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParserTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParserTest.java @@ -101,10 +101,17 @@ public static Set set(T... ts) { @Test public void testParseClinicalCombinations() { - assertEquals(Arrays.asList("cosmic"), VariantQueryParser.parseClinicalCombinationsList(new Query(ANNOT_CLINICAL.key(), "cosmic"))); - assertEquals(Arrays.asList("clinvar", "cosmic"), VariantQueryParser.parseClinicalCombinationsList(new Query(ANNOT_CLINICAL.key(), "clinvar,cosmic"))); - assertEquals(Arrays.asList("cosmic_pathogenic"), VariantQueryParser.parseClinicalCombinationsList(new Query(ANNOT_CLINICAL.key(), "cosmic").append(ANNOT_CLINICAL_SIGNIFICANCE.key(), "pathogenic"))); - assertEquals(Arrays.asList("cosmic_confirmed"), VariantQueryParser.parseClinicalCombinationsList(new Query(ANNOT_CLINICAL.key(), "cosmic").append(ANNOT_CLINICAL_CONFIRMED_STATUS.key(), true))); - assertEquals(Arrays.asList("clinvar_pathogenic_confirmed", "clinvar_likely_pathogenic_confirmed", "cosmic_pathogenic_confirmed", "cosmic_likely_pathogenic_confirmed"), VariantQueryParser.parseClinicalCombinationsList(new Query(ANNOT_CLINICAL.key(), "clinvar,cosmic").append(ANNOT_CLINICAL_SIGNIFICANCE.key(), "pathogenic,likely_pathogenic").append(ANNOT_CLINICAL_CONFIRMED_STATUS.key(), true))); + assertEquals(Arrays.asList("cosmic"), variantQueryParser.parseQuery( + new Query(ANNOT_CLINICAL.key(), "cosmic"), new QueryOptions()).getClinicalCombinationsList()); + assertEquals(Arrays.asList("clinvar", "cosmic"), variantQueryParser.parseQuery( + new Query(ANNOT_CLINICAL.key(), "clinvar,cosmic"), new QueryOptions()).getClinicalCombinationsList()); + assertEquals(Arrays.asList("cosmic_pathogenic"), variantQueryParser.parseQuery( + new Query(ANNOT_CLINICAL.key(), "cosmic").append(ANNOT_CLINICAL_SIGNIFICANCE.key(), "pathogenic"), new QueryOptions()).getClinicalCombinationsList()); + assertEquals(Arrays.asList("cosmic_confirmed"), variantQueryParser.parseQuery( + new Query(ANNOT_CLINICAL.key(), "cosmic").append(ANNOT_CLINICAL_CONFIRMED_STATUS.key(), true), new QueryOptions()).getClinicalCombinationsList()); + assertEquals(Arrays.asList("clinvar_pathogenic_confirmed", "clinvar_likely_pathogenic_confirmed", "cosmic_pathogenic_confirmed", "cosmic_likely_pathogenic_confirmed"), variantQueryParser.parseQuery(new Query() + .append(ANNOT_CLINICAL.key(), "clinvar,cosmic") + .append(ANNOT_CLINICAL_SIGNIFICANCE.key(), "pathogenic,likely_pathogenic") + .append(ANNOT_CLINICAL_CONFIRMED_STATUS.key(), true), new QueryOptions()).getClinicalCombinationsList()); } } \ No newline at end of file diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java index b5fdde0fb85..c58328d4469 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java @@ -8,7 +8,6 @@ import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; import org.opencb.opencga.storage.core.StoragePipelineResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; @@ -20,6 +19,7 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjection; import org.opencb.opencga.storage.core.variant.solr.VariantSolrExternalResource; @@ -198,7 +198,7 @@ public VariantQueryResult testQuery(Query query, QueryOptions options, Assert.assertTrue(dbQueryExecutor.canUseThisExecutor(query, options)); ParsedVariantQuery variantQuery = variantStorageEngine.parseQuery(query, options); - VariantQueryResult expected = dbQueryExecutor.get(new Query(variantQuery.getQuery()), new QueryOptions(options)); + VariantQueryResult expected = dbQueryExecutor.get(variantQuery); VariantQueryResult unfilteredResult = null; VariantQueryResult result = null; @@ -237,7 +237,7 @@ public VariantQueryResult testQuery(Query query, QueryOptions options, QueryOptions emptyOptions = new QueryOptions(); emptyOptions.putIfNotEmpty(QueryOptions.INCLUDE, options.getString(QueryOptions.INCLUDE)); emptyOptions.putIfNotEmpty(QueryOptions.EXCLUDE, options.getString(QueryOptions.EXCLUDE)); - unfilteredResult = dbQueryExecutor.get(emptyQuery, emptyOptions); + unfilteredResult = dbQueryExecutor.get(variantStorageEngine.parseQuery(emptyQuery, emptyOptions)); } for (VariantQueryExecutor variantQueryExecutor : variantQueryExecutors) { @@ -245,7 +245,7 @@ public VariantQueryResult testQuery(Query query, QueryOptions options, logger.info(""); logger.info("###################"); logger.info("### Testing " + variantQueryExecutor.getClass().getSimpleName()); - result = variantQueryExecutor.get(new Query(variantQuery.getQuery()), new QueryOptions(options)); + result = variantQueryExecutor.get(variantQuery); logger.info("### Num results : " + result.getNumResults()); logger.info("###################"); expected.getResults().sort(Comparator.comparing(Variant::toString)); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/SearchIndexSamplesTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/SearchIndexSamplesTest.java index bd25023f6dc..12cb35f8714 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/SearchIndexSamplesTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/SearchIndexSamplesTest.java @@ -229,7 +229,8 @@ protected void checkLoadedData(String collection, List samples) Query query = new Query(SAMPLE.key(), samples); int expectedCount = variantStorageEngine.getDBAdaptor().count(query).first().intValue(); - assertEquals(expectedCount, variantSearchManager.query(collection, new Query(), new QueryOptions()).getNumTotalResults()); + assertEquals(expectedCount, variantSearchManager.query(collection, variantStorageEngine.parseQuery(new Query(), new QueryOptions())) + .getNumTotalResults()); SolrVariantDBIterator solrIterator = variantSearchManager.iterator(collection, new Query(), new QueryOptions(QueryOptions.SORT, true)); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchIndexTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchIndexTest.java index b83d1a9338e..4d5d8da6716 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchIndexTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchIndexTest.java @@ -215,7 +215,7 @@ public void checkVariantSearchIndex(VariantDBAdaptor dbAdaptor) throws IOExcepti TreeSet variantsFromSearch = new TreeSet<>(Comparator.comparing(Variant::toString)); TreeSet variantsFromDB = new TreeSet<>(Comparator.comparing(Variant::toString)); - variantsFromSearch.addAll(variantStorageEngine.getVariantSearchManager().query(DB_NAME, query, queryOptions).getResults()); + variantsFromSearch.addAll(variantStorageEngine.getVariantSearchManager().query(DB_NAME, variantStorageEngine.parseQuery(query, queryOptions)).getResults()); variantsFromDB.addAll(dbAdaptor.get(query, queryOptions).getResults()); assertEquals(variantsFromDB.size(), variantsFromSearch.size()); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchTest.java index a8fe6d9790a..56588d25863 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchTest.java @@ -21,7 +21,7 @@ import org.opencb.commons.datastore.solr.FacetQueryParser; import org.opencb.commons.utils.ListUtils; import org.opencb.opencga.core.common.JacksonUtils; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.core.testclassification.duration.MediumTests; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; @@ -67,8 +67,8 @@ public void testTranscriptInfo() throws Exception { variantSearchManager.insert(collection, annotatedVariants); - VariantQueryResult results = variantSearchManager.query(collection, new Query(), - new QueryOptions(QueryOptions.LIMIT, limit)); + VariantQueryResult results = variantSearchManager.query(collection, variantStorageEngine.parseQuery(new Query(), + new QueryOptions(QueryOptions.LIMIT, limit))); for (int i = 0; i < limit; i++) { Variant expectedVariant = annotatedVariants.get(i); @@ -213,8 +213,8 @@ public void testSpecialCharacter() throws Exception { query.put(VariantQueryParam.FILE.key(), fileId); query.put(VariantQueryParam.FILTER.key(), "PASS"); query.put(VariantQueryParam.ANNOT_CLINICAL_SIGNIFICANCE.key(), "benign"); - VariantQueryResult results = variantSearchManager.query(collection, query, - new QueryOptions(QueryOptions.LIMIT, limit)); + VariantQueryResult results = variantSearchManager.query(collection, variantStorageEngine.parseQuery(query, + new QueryOptions(QueryOptions.LIMIT, limit))); if (results.getResults().size() > 0) { System.out.println(results.getResults().get(0).toJson()); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManagerTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManagerTest.java index 5fb147d6d9e..baafe7d0ad6 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManagerTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManagerTest.java @@ -27,8 +27,8 @@ import org.opencb.biodata.tools.variant.VariantNormalizer; import org.opencb.biodata.tools.variant.stats.VariantStatsCalculator; import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.models.CohortMetadata; import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; @@ -39,6 +39,7 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import java.io.IOException; import java.net.URI; @@ -112,6 +113,44 @@ public void calculateStatsMultiCohortsTest() throws Exception { checkCohorts(dbAdaptor, studyMetadata); } + @Test + public void queryInvalidStats() throws Exception { + //Calculate stats for 2 cohorts at one time + checkCohorts(dbAdaptor, studyMetadata); + + QueryOptions options = new QueryOptions(); + options.put(VariantStorageOptions.LOAD_BATCH_SIZE.key(), 100); + Iterator iterator = metadataManager.sampleMetadataIterator(studyMetadata.getId()); + + /** Create cohorts **/ + HashSet cohort1 = new HashSet<>(); + cohort1.add(iterator.next().getName()); + cohort1.add(iterator.next().getName()); + + HashSet cohort2 = new HashSet<>(); + cohort2.add(iterator.next().getName()); + cohort2.add(iterator.next().getName()); + + Map> cohorts = new HashMap<>(); + cohorts.put("cohort1", cohort1); + cohorts.put("cohort2", cohort2); + + //Calculate stats + stats(options, studyMetadata.getName(), cohorts, outputUri.resolve("cohort1.cohort2.stats")); + + checkCohorts(dbAdaptor, studyMetadata); + + List cohort1Samples = metadataManager.getCohortMetadata(studyMetadata.getId(), "cohort1").getSamples(); + CohortMetadata cohort = metadataManager.addSamplesToCohort(studyMetadata.getId(), "cohort2", cohort1Samples); + assertTrue(cohort.isInvalid()); + + VariantQueryResult result = variantStorageEngine.get(new Query(), new QueryOptions(QueryOptions.LIMIT, 1)); + assertEquals(1, result.getEvents().size()); + assertEquals("Please note that the Cohort Stats for '1000g:cohort2' are currently outdated." + + " The statistics have been calculated with 2 samples, while the total number of samples in the cohort is 4." + + " To display updated statistics, please execute variant-stats-index.", result.getEvents().get(0).getMessage()); + } + @Test public void calculateStatsSeparatedCohortsTest() throws Exception { //Calculate stats for 2 cohorts separately diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantQueryParser.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantQueryParser.java new file mode 100644 index 00000000000..6aa472ee735 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantQueryParser.java @@ -0,0 +1,37 @@ +package org.opencb.opencga.storage.hadoop.variant; + +import org.opencb.commons.datastore.core.Query; +import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; +import org.opencb.opencga.storage.core.utils.CellBaseUtils; +import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; +import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjection; + +import java.util.List; + +import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.STUDY; +import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.*; + +public class HadoopVariantQueryParser extends VariantQueryParser { + public HadoopVariantQueryParser(CellBaseUtils cellBaseUtils, VariantStorageMetadataManager metadataManager) { + super(cellBaseUtils, metadataManager); + } + + @Override + protected Query preProcessQuery(Query originalQuery, QueryOptions options, VariantQueryProjection projection) { + Query query = super.preProcessQuery(originalQuery, options, projection); + List studyNames = metadataManager.getStudyNames(); + + if (isValidParam(query, STUDY) && studyNames.size() == 1) { + String study = query.getString(STUDY.key()); + if (!isNegated(study)) { + // Check that study exists + metadataManager.getStudyId(study); + query.remove(STUDY.key()); + } + } + + convertGenesToRegionsQuery(query, cellBaseUtils); + return query; + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java index 2f0f6143b6b..65927c62fb2 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java @@ -46,7 +46,6 @@ import org.opencb.opencga.storage.core.metadata.VariantMetadataFactory; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.*; -import org.opencb.opencga.storage.core.utils.CellBaseUtils; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.VariantStoragePipeline; @@ -59,6 +58,7 @@ import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotator; import org.opencb.opencga.storage.core.variant.io.VariantExporter; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; import org.opencb.opencga.storage.core.variant.query.executors.*; import org.opencb.opencga.storage.core.variant.score.VariantScoreFormatDescriptor; import org.opencb.opencga.storage.core.variant.search.SamplesSearchIndexVariantQueryExecutor; @@ -115,8 +115,6 @@ import static org.opencb.opencga.storage.core.variant.VariantStorageOptions.*; import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.REGION; -import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.STUDY; -import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.*; import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions.*; import static org.opencb.opencga.storage.hadoop.variant.gaps.FillGapsDriver.*; @@ -1031,46 +1029,48 @@ private synchronized HBaseManager getHBaseManager(Configuration configuration) { @Override public ParsedVariantQuery parseQuery(Query originalQuery, QueryOptions options) { try { - Query query = preProcessQuery(originalQuery, options); - ParsedVariantQuery parsedVariantQuery = getVariantQueryParser().parseQuery(query, options, true); - parsedVariantQuery.setInputQuery(originalQuery); - return parsedVariantQuery; + return getVariantQueryParser().parseQuery(originalQuery, options); } catch (StorageEngineException e) { throw VariantQueryException.internalException(e).setQuery(originalQuery); } } @Override - public Query preProcessQuery(Query originalQuery, QueryOptions options) { - Query query = super.preProcessQuery(originalQuery, options); - - VariantStorageMetadataManager metadataManager; - CellBaseUtils cellBaseUtils; - try { - metadataManager = getMetadataManager(); - cellBaseUtils = getCellBaseUtils(); - } catch (StorageEngineException e) { - throw VariantQueryException.internalException(e); - } - List studyNames = metadataManager.getStudyNames(); - - if (isValidParam(query, STUDY) && studyNames.size() == 1) { - String study = query.getString(STUDY.key()); - if (!isNegated(study)) { - try { - // Check that study exists - getMetadataManager().getStudyId(study); - } catch (StorageEngineException e) { - throw VariantQueryException.internalException(e); - } - query.remove(STUDY.key()); - } - } - - convertGenesToRegionsQuery(query, cellBaseUtils); - return query; + protected VariantQueryParser getVariantQueryParser() throws StorageEngineException { + return new HadoopVariantQueryParser(getCellBaseUtils(), getMetadataManager()); } +// @Override +// public Query preProcessQuery(Query originalQuery, QueryOptions options) { +// Query query = super.preProcessQuery(originalQuery, options); +// +// VariantStorageMetadataManager metadataManager; +// CellBaseUtils cellBaseUtils; +// try { +// metadataManager = getMetadataManager(); +// cellBaseUtils = getCellBaseUtils(); +// } catch (StorageEngineException e) { +// throw VariantQueryException.internalException(e); +// } +// List studyNames = metadataManager.getStudyNames(); +// +// if (isValidParam(query, STUDY) && studyNames.size() == 1) { +// String study = query.getString(STUDY.key()); +// if (!isNegated(study)) { +// try { +// // Check that study exists +// getMetadataManager().getStudyId(study); +// } catch (StorageEngineException e) { +// throw VariantQueryException.internalException(e); +// } +// query.remove(STUDY.key()); +// } +// } +// +// convertGenesToRegionsQuery(query, cellBaseUtils); +// return query; +// } + @Override protected List initVariantAggregationExecutors() { List executors = new ArrayList<>(3); @@ -1177,7 +1177,7 @@ protected List initVariantQueryExecutors() throws StorageE executors.add(new SampleIndexCompoundHeterozygousQueryExecutor( getMetadataManager(), getStorageEngineId(), getOptions(), this, getSampleIndexDBAdaptor(), getDBAdaptor())); executors.add(new BreakendVariantQueryExecutor( - getMetadataManager(), getStorageEngineId(), getOptions(), new SampleIndexVariantQueryExecutor( + getStorageEngineId(), getOptions(), new SampleIndexVariantQueryExecutor( getDBAdaptor(), getSampleIndexDBAdaptor(), getStorageEngineId(), getOptions()), getDBAdaptor())); executors.add(new SamplesSearchIndexVariantQueryExecutor( getDBAdaptor(), getVariantSearchManager(), getStorageEngineId(), dbName, getConfiguration(), getOptions())); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java index 40d45a70bee..c672e94fb58 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java @@ -2,14 +2,14 @@ import com.google.common.collect.Iterators; import org.opencb.biodata.models.variant.Variant; -import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import org.opencb.opencga.storage.core.variant.query.ParsedQuery; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.core.variant.query.executors.VariantQueryExecutor; import org.slf4j.Logger; @@ -76,23 +76,19 @@ public boolean canUseThisExecutor(Query query, QueryOptions options) { return false; } - @Override - public DataResult count(Query query) { - throw new UnsupportedOperationException("Count not implemented in " + getClass()); - } - /** * Intersect result of column hbase scan and full phoenix query. * Use {@link org.opencb.opencga.storage.core.variant.adaptors.iterators.MultiVariantDBIterator}. * - * @param query Query - * @param options Options + * @param variantQuery Parsed query * @param iterator Shall the resulting object be an iterator instead of a DataResult * @return DataResult or Iterator with the variants that matches the query */ @Override - protected Object getOrIterator(Query query, QueryOptions options, boolean iterator) { + protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator) { logger.info("HBase column intersect"); + Query query = variantQuery.getQuery(); + QueryOptions options = variantQuery.getInputOptions(); // Build the query with only one query filter -> Single HBase column filter // diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/VariantHadoopDBAdaptor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/VariantHadoopDBAdaptor.java index 3d6fe440da1..d3c608c8b42 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/VariantHadoopDBAdaptor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/VariantHadoopDBAdaptor.java @@ -33,7 +33,6 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.*; import org.opencb.opencga.core.config.storage.StorageConfiguration; -import org.opencb.opencga.core.response.VariantQueryResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.ProjectMetadata; @@ -45,6 +44,7 @@ import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotationManager; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjection; import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjectionParser; @@ -52,6 +52,7 @@ import org.opencb.opencga.storage.hadoop.auth.HBaseCredentials; import org.opencb.opencga.storage.hadoop.utils.HBaseManager; import org.opencb.opencga.storage.hadoop.variant.GenomeHelper; +import org.opencb.opencga.storage.hadoop.variant.HadoopVariantQueryParser; import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine; import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions; import org.opencb.opencga.storage.hadoop.variant.adaptors.iterators.VariantHBaseResultSetIterator; @@ -221,33 +222,35 @@ public void close() throws IOException { } @Override - public VariantQueryResult get(ParsedVariantQuery query, QueryOptions options) { + public VariantQueryResult get(ParsedVariantQuery query) { List variants = new LinkedList<>(); - VariantDBIterator iterator = iterator(query, options); + VariantDBIterator iterator = iterator(query); + QueryOptions options = new QueryOptions(query.getInputOptions()); iterator.forEachRemaining(variants::add); long numTotalResults; - if (options == null) { - numTotalResults = variants.size(); - } else { - if (options.getInt(QueryOptions.LIMIT, -1) >= 0) { - if (options.getBoolean(QueryOptions.COUNT, false)) { - numTotalResults = count(query).first(); - } else { - numTotalResults = -1; - } + if (options.getInt(QueryOptions.LIMIT, -1) >= 0) { + if (options.getBoolean(QueryOptions.COUNT, false)) { + numTotalResults = count(query).first(); } else { - // There are no limit. Do not count. - numTotalResults = variants.size(); + numTotalResults = -1; } + } else { + // There are no limit. Do not count. + numTotalResults = variants.size(); } - VariantQueryResult result = new VariantQueryResult<>(iterator.getTime(TimeUnit.MILLISECONDS), variants.size(), - numTotalResults, null, variants, null, HadoopVariantStorageEngine.STORAGE_ENGINE_ID) + return new VariantQueryResult<>(iterator.getTime(TimeUnit.MILLISECONDS), variants.size(), + numTotalResults, null, variants, HadoopVariantStorageEngine.STORAGE_ENGINE_ID, query) .setFetchTime(iterator.getTimeFetching(TimeUnit.MILLISECONDS)) .setConvertTime(iterator.getTimeConverting(TimeUnit.MILLISECONDS)); - return addSamplesMetadataIfRequested(result, query.getQuery(), options, getMetadataManager()); + } + + @Override + @Deprecated + public VariantQueryResult get(Query query, QueryOptions options) { + return get(new HadoopVariantQueryParser(null, getMetadataManager()).parseQuery(query, options)); } @Override @@ -334,7 +337,8 @@ public DataResult distinct(Query query, String field) { } @Override - public VariantDBIterator iterator(ParsedVariantQuery variantQuery, QueryOptions options) { + public VariantDBIterator iterator(ParsedVariantQuery variantQuery) { + QueryOptions options = variantQuery.getInputOptions(); if (options == null) { options = new QueryOptions(); } else { @@ -385,7 +389,7 @@ public VariantDBIterator iterator(ParsedVariantQuery variantQuery, QueryOptions private VariantHBaseResultSetIterator phoenixIterator(ParsedVariantQuery variantQuery, QueryOptions options, HBaseVariantConverterConfiguration converterConfiguration) { VariantStorageMetadataManager metadataManager = getMetadataManager(); - new VariantQueryParser(null, metadataManager).optimize(variantQuery); + new HadoopVariantQueryParser(null, metadataManager).optimize(variantQuery); logger.debug("Table name = " + variantTable); logger.info("Query : " + VariantQueryUtils.printQuery(variantQuery.getQuery())); @@ -679,7 +683,7 @@ private class VariantQueryIteratorCustomSplit extends MultiVariantDBIterator.Var VariantQueryIteratorCustomSplit(Iterator variants, Query query, int batchSize, QueryOptions options) { super(variants, query, batchSize); - parser = new VariantQueryParser(null, getMetadataManager()); + parser = new HadoopVariantQueryParser(null, getMetadataManager()); variantQuery = parser.parseQuery(query, options); cts = sizeOrOne(variantQuery.getConsequenceTypes()); bts = sizeOrOne(variantQuery.getBiotypes()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/VariantPhoenixSchemaManager.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/VariantPhoenixSchemaManager.java index 2d1d4c59e04..4ffa3c7aa55 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/VariantPhoenixSchemaManager.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/VariantPhoenixSchemaManager.java @@ -79,7 +79,7 @@ public void registerStudyColumns(int studyId) throws StorageEngineException { registerNewFiles(studyId, new ArrayList<>(metadataManager.getIndexedFiles(studyId))); List cohortIds = new LinkedList<>(); - for (CohortMetadata cohort : metadataManager.getCalculatedCohorts(studyId)) { + for (CohortMetadata cohort : metadataManager.getCalculatedOrInvalidCohorts(studyId)) { cohortIds.add(cohort.getId()); } registerNewCohorts(studyId, cohortIds); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/VariantSqlQueryParser.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/VariantSqlQueryParser.java index bb9737144d2..7cc9dd77bf4 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/VariantSqlQueryParser.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/VariantSqlQueryParser.java @@ -44,6 +44,7 @@ import org.opencb.opencga.storage.core.variant.query.*; import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjection; import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjectionParser; +import org.opencb.opencga.storage.hadoop.variant.HadoopVariantQueryParser; import org.opencb.opencga.storage.hadoop.variant.converters.HBaseToVariantConverter; import org.opencb.opencga.storage.hadoop.variant.converters.annotation.VariantAnnotationToPhoenixConverter; import org.opencb.opencga.storage.hadoop.variant.converters.study.HBaseToStudyEntryConverter; @@ -116,7 +117,7 @@ public VariantSqlQueryParser(String variantTable, @Deprecated public String parse(Query query, QueryOptions options) { - VariantQueryParser parser = new VariantQueryParser(null, metadataManager); + VariantQueryParser parser = new HadoopVariantQueryParser(null, metadataManager); ParsedVariantQuery variantQuery = parser.parseQuery(query, options, true); return parse(variantQuery, options); } @@ -124,8 +125,6 @@ public String parse(Query query, QueryOptions options) { public String parse(ParsedVariantQuery variantQuery, QueryOptions options) { Query query = variantQuery.getQuery(); - StringBuilder sb = new StringBuilder("SELECT "); - try { Set dynamicColumns = new HashSet<>(); @@ -144,21 +143,22 @@ public String parse(ParsedVariantQuery variantQuery, QueryOptions options) { hints.add(HintNode.Hint.valueOf(hint)); } } + + StringBuilder sb = new StringBuilder("SELECT "); if (!hints.isEmpty()) { sb.append("/*+ ").append(hints.stream().map(Object::toString).collect(Collectors.joining(","))).append(" */ "); } - appendProjectedColumns(sb, variantQuery.getProjection(), options); appendFromStatement(sb, dynamicColumns); appendWhereStatement(sb, regionFilters, filters); appendOrderby(options, sb); appendLimitSkip(options, sb); + return sb.toString(); } catch (VariantQueryException e) { e.setQuery(query); throw e; } - return sb.toString(); } private void appendOrderby(QueryOptions options, StringBuilder sb) { @@ -216,7 +216,6 @@ protected StringBuilder appendProjectedColumns(StringBuilder sb, VariantQueryPro return sb.append(" COUNT(*) "); } else { Set returnedFields = projection.getFields(); - Collection studyIds = projection.getStudyIds(); sb.append(VariantColumn.CHROMOSOME).append(',') .append(VariantColumn.POSITION).append(',') @@ -966,36 +965,39 @@ protected void addVariantFilters(ParsedVariantQuery variantQuery, QueryOptions o } boolean multiFileSample = VariantStorageEngine.SplitData.MULTI.equals(sampleMetadata.getSplitData()); List sampleFiles = new ArrayList<>(); + // First file does not have the fileID in the column name + Integer firstSampleFile = sampleMetadata.getFiles().get(0); + if (multiFileSample) { if (fileIds.isEmpty()) { - sampleFiles.add(null); // First file does not have the fileID in the column name List fileIdsFromSampleId = sampleMetadata.getFiles(); - sampleFiles.addAll(fileIdsFromSampleId.subList(1, fileIdsFromSampleId.size())); + sampleFiles.addAll(fileIdsFromSampleId); } else { for (Pair fileIdPair : fileIds) { - if (fileIdPair.getKey().equals(studyId)) { - Integer fileId = fileIdPair.getValue(); - int idx = sampleMetadata.getFiles().indexOf(fileId); - if (idx == 0) { - sampleFiles.add(null); // First file does not have the fileID in the column name - } else if (idx > 0) { - sampleFiles.add(fileId); // First file does not have the fileID in the column name - } + if (fileIdPair.getKey().equals(studyId) + && sampleMetadata.getFiles().contains(fileIdPair.getValue())) { + sampleFiles.add(fileIdPair.getValue()); } } } } else { - sampleFiles.add(null); // First file does not have the fileID in the column name + // Non multi file sample + sampleFiles.add(firstSampleFile); } for (Integer sampleFile : sampleFiles) { + if (!metadataManager.isFileIndexed(studyId, sampleFile)) { + // Skip non indexed files + continue; + } List sampleFileGtFilters = new ArrayList<>(genotypes.size()); for (String genotype : genotypes) { if (negated) { genotype = removeNegation(genotype); } String key; - if (sampleFile == null) { + if (Objects.equals(sampleFile, firstSampleFile)) { + // Special scenario for the first file. Column name does not contain the fileId key = buildSampleColumnKey(studyId, sampleId, new StringBuilder()).toString(); } else { key = buildSampleColumnKey(studyId, sampleId, sampleFile, new StringBuilder()).toString(); @@ -1230,27 +1232,24 @@ private void addSampleDataFilter(ParsedVariantQuery query, List filters, SampleMetadata sampleMetadata = sampleDataFilter.getKey(); boolean multiFileSample = VariantStorageEngine.SplitData.MULTI.equals(sampleMetadata.getSplitData()); + // First file does not have the fileID in the column name + Integer firstSampleFile = sampleMetadata.getFiles().get(0); List sampleFiles = new ArrayList<>(); if (multiFileSample) { if (fileIds.isEmpty()) { - sampleFiles.add(null); // First file does not have the fileID in the column name List fileIdsFromSampleId = sampleMetadata.getFiles(); - sampleFiles.addAll(fileIdsFromSampleId.subList(1, fileIdsFromSampleId.size())); + sampleFiles.addAll(fileIdsFromSampleId); } else { for (Pair fileIdPair : fileIds) { - if (fileIdPair.getKey().equals(sampleMetadata.getStudyId())) { - Integer fileId = fileIdPair.getValue(); - int idx = sampleMetadata.getFiles().indexOf(fileId); - if (idx == 0) { - sampleFiles.add(null); // First file does not have the fileID in the column name - } else if (idx > 0) { - sampleFiles.add(fileId); // First file does not have the fileID in the column name - } + if (fileIdPair.getKey().equals(sampleMetadata.getStudyId()) + && sampleMetadata.getFiles().contains(fileIdPair.getValue())) { + sampleFiles.add(fileIdPair.getValue()); } } } } else { - sampleFiles.add(null); // First file does not have the fileID in the column name + // Non multi file sample + sampleFiles.add(firstSampleFile); } for (Integer sampleFile : sampleFiles) { List sampleFileFilters = new LinkedList<>(); @@ -1270,10 +1269,11 @@ private void addSampleDataFilter(ParsedVariantQuery query, List filters, sb.append("TO_NUMBER("); } sb.append('"'); - if (sampleFile != null) { - buildSampleColumnKey(sampleMetadata.getStudyId(), sampleMetadata.getId(), sampleFile, sb); - } else { + if (Objects.equals(sampleFile, firstSampleFile)) { + // Special scenario for the first file. Column name does not contain the fileId buildSampleColumnKey(sampleMetadata.getStudyId(), sampleMetadata.getId(), sb); + } else { + buildSampleColumnKey(sampleMetadata.getStudyId(), sampleMetadata.getId(), sampleFile, sb); } sb.append('"'); @@ -1287,10 +1287,11 @@ private void addSampleDataFilter(ParsedVariantQuery query, List filters, if (op.startsWith(">>") || op.startsWith("<<")) { sb.append(" OR \""); - if (sampleFile != null) { - buildSampleColumnKey(sampleMetadata.getStudyId(), sampleMetadata.getId(), sampleFile, sb); - } else { + if (Objects.equals(sampleFile, firstSampleFile)) { + // Special scenario for the first file. Column name does not contain the fileId buildSampleColumnKey(sampleMetadata.getStudyId(), sampleMetadata.getId(), sb); + } else { + buildSampleColumnKey(sampleMetadata.getStudyId(), sampleMetadata.getId(), sampleFile, sb); } sb.append('"'); @@ -1307,8 +1308,8 @@ private void addSampleDataFilter(ParsedVariantQuery query, List filters, if (multiFileSample) { // The first file is null. Get the actual fileId Integer actualFileId; - if (sampleFile == null) { - actualFileId = sampleMetadata.getFiles().get(0); + if (Objects.equals(sampleFile, firstSampleFile)) { + actualFileId = firstSampleFile; } else { actualFileId = sampleFile; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/julie/JulieToolDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/julie/JulieToolDriver.java index 2321e25f6e1..4442bf48927 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/julie/JulieToolDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/analysis/julie/JulieToolDriver.java @@ -68,6 +68,7 @@ protected void parseAndValidateParameters() throws IOException { for (Integer studyId : metadataManager.getStudies().values()) { List studyCohorts = new LinkedList<>(); cohorts.put(studyId, studyCohorts); + // Only READY cohort stats are used for JulieTool for (CohortMetadata c : metadataManager.getCalculatedCohorts(studyId)) { studyCohorts.add(c.getId()); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexCompoundHeterozygousQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexCompoundHeterozygousQueryExecutor.java index cb2019c2f6d..c7599e7e288 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexCompoundHeterozygousQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexCompoundHeterozygousQueryExecutor.java @@ -5,7 +5,7 @@ import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.Trio; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java index 68eed2db889..4dd50c9eacf 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java @@ -15,6 +15,7 @@ import org.opencb.opencga.storage.core.metadata.models.Trio; import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHadoopDBAdaptor; import org.opencb.opencga.storage.hadoop.variant.index.query.SampleIndexQuery; @@ -43,7 +44,7 @@ public boolean canUseThisExecutor(Query query, QueryOptions options) { } @Override - protected Object getOrIterator(Query query, QueryOptions options, boolean iterator, SampleIndexQuery sampleIndexQuery) { + protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator, SampleIndexQuery sampleIndexQuery) { List trios = new ArrayList<>(sampleIndexQuery.getMendelianErrorSet().size()); int studyId = metadataManager.getStudyId(sampleIndexQuery.getStudy()); @@ -65,7 +66,7 @@ protected Object getOrIterator(Query query, QueryOptions options, boolean iterat trios.add(new Trio(null, father, mother, sampleMetadata.getName())); } - Object object = super.getOrIterator(query, options, iterator, sampleIndexQuery); + Object object = super.getOrIterator(variantQuery, iterator, sampleIndexQuery); if (iterator) { VariantDBIterator variantIterator = (VariantDBIterator) object; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java index 510085420dd..371d57ac4da 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java @@ -9,7 +9,6 @@ import org.opencb.biodata.models.variant.avro.FileEntry; import org.opencb.biodata.models.variant.avro.SampleEntry; import org.opencb.biodata.tools.commons.Converter; -import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; @@ -17,7 +16,6 @@ import org.opencb.opencga.core.common.BatchUtils; import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; -import org.opencb.opencga.core.response.VariantQueryResult; import org.opencb.opencga.storage.core.io.bit.BitBuffer; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; @@ -26,7 +24,7 @@ import org.opencb.opencga.storage.core.variant.adaptors.*; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; -import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.core.variant.query.executors.VariantQueryExecutor; import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjection; @@ -50,7 +48,6 @@ import java.util.stream.Collectors; import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.NONE; -import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.addSamplesMetadataIfRequested; import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions.SAMPLE_INDEX_QUERY_SAMPLE_INDEX_ONLY_PD_BATCH; import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions.SAMPLE_INDEX_QUERY_SAMPLE_INDEX_ONLY_PD_BUFFER; import static org.opencb.opencga.storage.hadoop.variant.index.SampleIndexVariantQueryExecutor.SAMPLE_INDEX_TABLE_SOURCE; @@ -64,7 +61,6 @@ public class SampleIndexOnlyVariantQueryExecutor extends VariantQueryExecutor { private final SampleIndexDBAdaptor sampleIndexDBAdaptor; private final VariantHadoopDBAdaptor dbAdaptor; - private final VariantQueryParser variantQueryParser; private final VariantQueryProjectionParser variantQueryProjectionParser; private static Logger logger = LoggerFactory.getLogger(SampleIndexOnlyVariantQueryExecutor.class); @@ -82,7 +78,6 @@ public SampleIndexOnlyVariantQueryExecutor(VariantHadoopDBAdaptor dbAdaptor, Sam super(dbAdaptor.getMetadataManager(), storageEngineId, options); this.sampleIndexDBAdaptor = sampleIndexDBAdaptor; this.dbAdaptor = dbAdaptor; - variantQueryParser = new VariantQueryParser(null, getMetadataManager()); variantQueryProjectionParser = new VariantQueryProjectionParser(getMetadataManager()); partialDataBufferSize = options.getInt(SAMPLE_INDEX_QUERY_SAMPLE_INDEX_ONLY_PD_BUFFER.key(), SAMPLE_INDEX_QUERY_SAMPLE_INDEX_ONLY_PD_BUFFER.defaultValue()); @@ -101,26 +96,16 @@ public boolean canUseThisExecutor(Query query, QueryOptions options) { return false; } - @Override - public DataResult count(Query query) { - StopWatch stopWatch = StopWatch.createStarted(); - SampleIndexQuery sampleIndexQuery = sampleIndexDBAdaptor.parseSampleIndexQuery(query); - long count = sampleIndexDBAdaptor.count(sampleIndexQuery); - return new DataResult<>(((int) stopWatch.getTime(TimeUnit.MILLISECONDS)), Collections.emptyList(), 1, - Collections.singletonList(count), count); - } - /** * Fetch results exclusively from SampleSecondaryIndex. * - * @param inputQuery Query - * @param options Options + * @param variantQuery Query * @param iterator Shall the resulting object be an iterator instead of a DataResult * @return DataResult or Iterator with the variants that matches the query */ @Override - protected Object getOrIterator(Query inputQuery, QueryOptions options, boolean iterator) { - Query query = new Query(inputQuery); + protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator) { + Query query = new Query(variantQuery.getQuery()); query.put(SampleIndexQueryParser.INCLUDE_PARENTS_COLUMN, true); SampleIndexQuery sampleIndexQuery = sampleIndexDBAdaptor.parseSampleIndexQuery(query); @@ -128,7 +113,7 @@ protected Object getOrIterator(Query inputQuery, QueryOptions options, boolean i boolean count; Future asyncCountFuture; - if (shouldGetCount(options, iterator)) { + if (shouldGetCount(variantQuery.getInputOptions(), iterator)) { count = true; asyncCountFuture = THREAD_POOL.submit(() -> { StopWatch stopWatch = StopWatch.createStarted(); @@ -141,13 +126,12 @@ protected Object getOrIterator(Query inputQuery, QueryOptions options, boolean i asyncCountFuture = null; } - VariantDBIterator variantIterator = getVariantDBIterator(sampleIndexQuery, inputQuery, options); + VariantDBIterator variantIterator = getVariantDBIterator(sampleIndexQuery, variantQuery); if (iterator) { return variantIterator; } else { - VariantQueryResult result = - addSamplesMetadataIfRequested(variantIterator.toDataResult(), inputQuery, options, getMetadataManager()); + VariantQueryResult result = variantIterator.toDataResult(variantQuery); if (count) { result.setApproximateCount(false); try { @@ -161,8 +145,8 @@ protected Object getOrIterator(Query inputQuery, QueryOptions options, boolean i } } - private VariantDBIterator getVariantDBIterator(SampleIndexQuery sampleIndexQuery, Query inputQuery, QueryOptions options) { - ParsedVariantQuery parsedQuery = variantQueryParser.parseQuery(inputQuery, options, true); + private VariantDBIterator getVariantDBIterator(SampleIndexQuery sampleIndexQuery, ParsedVariantQuery parsedQuery) { + QueryOptions options = parsedQuery.getInputOptions(); VariantDBIterator variantIterator; if (parsedQuery.getProjection().getStudyIds().isEmpty()) { logger.info("Using sample index iterator Iterator"); @@ -174,9 +158,9 @@ private VariantDBIterator getVariantDBIterator(SampleIndexQuery sampleIndexQuery try { rawIterator = sampleIndexDBAdaptor.rawIterator(sampleIndexQuery, options); } catch (IOException e) { - throw VariantQueryException.internalException(e).setQuery(inputQuery); + throw VariantQueryException.internalException(e).setQuery(parsedQuery.getInputQuery()); } - boolean includeAll = inputQuery.getBoolean("includeAllFromSampleIndex", false); + boolean includeAll = parsedQuery.getInputQuery().getBoolean("includeAllFromSampleIndex", false); SampleVariantIndexEntryToVariantConverter converter = new SampleVariantIndexEntryToVariantConverter( parsedQuery, sampleIndexQuery, dbAdaptor.getMetadataManager(), includeAll); variantIterator = VariantDBIterator.wrapper(Iterators.transform(rawIterator, converter::convert)); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantAggregationExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantAggregationExecutor.java index c27c2294e79..1bf855183b7 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantAggregationExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantAggregationExecutor.java @@ -9,7 +9,7 @@ import org.opencb.commons.datastore.core.FacetField; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.io.bit.BitBuffer; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.utils.iterators.CloseableIterator; @@ -144,7 +144,7 @@ protected VariantQueryResult aggregation(Query query, QueryOptions o } return new VariantQueryResult<>((int) stopWatch.getTime(TimeUnit.MILLISECONDS), 1, numMatches, Collections.emptyList(), - fields, null, SampleIndexVariantQueryExecutor.SAMPLE_INDEX_TABLE_SOURCE); + fields, SampleIndexVariantQueryExecutor.SAMPLE_INDEX_TABLE_SOURCE); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java index 2e1b74b9f88..25cb7052acd 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java @@ -3,17 +3,17 @@ import org.apache.commons.lang3.concurrent.BasicThreadFactory; import org.apache.commons.lang3.time.StopWatch; import org.opencb.biodata.models.variant.Variant; -import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.core.common.TimeUtils; -import org.opencb.opencga.core.response.VariantQueryResult; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.adaptors.iterators.MultiVariantDBIterator; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIteratorWithCounts; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.core.variant.query.executors.AbstractTwoPhasedVariantQueryExecutor; import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHadoopDBAdaptor; @@ -31,7 +31,6 @@ import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.ID; import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.REGION; -import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.addSamplesMetadataIfRequested; /** * Created on 01/04/19. @@ -66,11 +65,6 @@ public boolean canUseThisExecutor(Query query, QueryOptions options) { return false; } - @Override - public DataResult count(Query query) { - throw new UnsupportedOperationException("Count not implemented in " + getClass()); - } - @Override protected long primaryCount(Query query, QueryOptions options) { SampleIndexQuery sampleIndexQuery = sampleIndexDBAdaptor.parseSampleIndexQuery(query); @@ -81,24 +75,25 @@ protected long primaryCount(Query query, QueryOptions options) { * Intersect result of SampleIndexTable and full phoenix query. * Use {@link org.opencb.opencga.storage.core.variant.adaptors.iterators.MultiVariantDBIterator}. * - * @param inputQuery Query - * @param options Options + * @param variantQuery Parsed query * @param iterator Shall the resulting object be an iterator instead of a DataResult * @return DataResult or Iterator with the variants that matches the query */ @Override - protected Object getOrIterator(Query inputQuery, QueryOptions options, boolean iterator) { - Query query = new Query(inputQuery); + protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator) { + Query query = new Query(variantQuery.getQuery()); SampleIndexQuery sampleIndexQuery = sampleIndexDBAdaptor.parseSampleIndexQuery(query); - return getOrIterator(query, options, iterator, sampleIndexQuery); + return getOrIterator(variantQuery, iterator, sampleIndexQuery); } - protected Object getOrIterator(Query query, QueryOptions inputOptions, boolean iterator, SampleIndexQuery sampleIndexQuery) { + protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator, SampleIndexQuery sampleIndexQuery) { logger.info("HBase SampleIndex intersect"); + QueryOptions inputOptions = variantQuery.getInputOptions(); + Query uncoveredQuery = new Query(sampleIndexQuery.getUncoveredQuery()); Future asyncCountFuture; boolean asyncCount; - if (shouldGetApproximateCount(inputOptions, iterator) && queryFiltersCovered(query)) { + if (shouldGetApproximateCount(inputOptions, iterator) && queryFiltersCovered(uncoveredQuery)) { asyncCount = true; asyncCountFuture = THREAD_POOL.submit(() -> { StopWatch stopWatch = StopWatch.createStarted(); @@ -120,10 +115,10 @@ protected Object getOrIterator(Query query, QueryOptions inputOptions, boolean i int batchSize = inputOptions.getInt("multiIteratorBatchSize", 200); if (iterator) { // SampleIndex iterator will be closed when closing the variants iterator - return dbAdaptor.iterator(variants, query, inputOptions, batchSize); + return dbAdaptor.iterator(variants, uncoveredQuery, inputOptions, batchSize); } else { - int skip = getSkip(inputOptions); - int limit = getLimit(inputOptions); + int skip = variantQuery.getSkip(); + int limit = variantQuery.getLimitOr(-1); int samplingSize = asyncCount ? 0 : getSamplingSize(inputOptions, DEFAULT_SAMPLING_SIZE, iterator); int tmpLimit = Math.max(limit, samplingSize); @@ -141,9 +136,8 @@ protected Object getOrIterator(Query query, QueryOptions inputOptions, boolean i public void close() throws Exception { // Do not close this iterator! We'll need to keep iterating to get the approximate count } - }, query, options, batchSize); - VariantQueryResult result = - addSamplesMetadataIfRequested(variantDBIterator.toDataResult(), query, options, getMetadataManager()); + }, uncoveredQuery, options, batchSize); + VariantQueryResult result = variantDBIterator.toDataResult(variantQuery); if (result.getNumResults() < tmpLimit) { // Not an approximate count! @@ -164,7 +158,7 @@ public void close() throws Exception { inputOptions.get(QueryOptions.COUNT)); numTotalResultsOptions.put(VariantStorageOptions.APPROXIMATE_COUNT.key(), inputOptions.get(VariantStorageOptions.APPROXIMATE_COUNT.key())); - setNumTotalResults(variantDBIterator, variants, result, sampleIndexQuery, query, numTotalResultsOptions); + setNumTotalResults(variantDBIterator, variants, result, sampleIndexQuery, uncoveredQuery, numTotalResultsOptions); } // Ensure limit diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SampleIndexQuery.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SampleIndexQuery.java index 664ace07747..4a4803ff6a4 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SampleIndexQuery.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SampleIndexQuery.java @@ -4,6 +4,7 @@ import org.opencb.biodata.models.core.Region; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.VariantType; +import org.opencb.commons.datastore.core.Query; import org.opencb.opencga.storage.core.variant.query.Values; import org.opencb.opencga.storage.hadoop.variant.index.family.GenotypeCodec; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema; @@ -49,6 +50,8 @@ public class SampleIndexQuery { private final Set mendelianErrorSet; private final MendelianErrorType mendelianErrorType; private final boolean includeParentColumns; + + private final Query uncoveredQuery; private final QueryOperation queryOperation; public enum MendelianErrorType { @@ -73,6 +76,7 @@ public SampleIndexQuery(Collection locusQueries, SampleIndexQuery qu this.mendelianErrorSet = query.mendelianErrorSet; this.mendelianErrorType = query.mendelianErrorType; this.includeParentColumns = query.includeParentColumns; + this.uncoveredQuery = query.uncoveredQuery; this.queryOperation = query.queryOperation; } @@ -83,7 +87,7 @@ public SampleIndexQuery(SampleIndexSchema schema, Collection locusQu Map> fileFilterMap, SampleAnnotationIndexQuery annotationIndexQuery, Set mendelianErrorSet, MendelianErrorType mendelianErrorType, boolean includeParentColumns, - QueryOperation queryOperation) { + QueryOperation queryOperation, Query uncoveredQuery) { this.schema = schema; this.locusQueries = locusQueries; this.extendedFilteringRegion = extendedFilteringRegion; @@ -100,6 +104,7 @@ public SampleIndexQuery(SampleIndexSchema schema, Collection locusQu this.mendelianErrorType = mendelianErrorType; this.includeParentColumns = includeParentColumns; this.queryOperation = queryOperation; + this.uncoveredQuery = uncoveredQuery; } public SampleIndexSchema getSchema() { @@ -227,6 +232,10 @@ public boolean isIncludeParentColumns() { return includeParentColumns; } + public Query getUncoveredQuery() { + return uncoveredQuery; + } + public QueryOperation getQueryOperation() { return queryOperation; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SingleSampleIndexQuery.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SingleSampleIndexQuery.java index 35a9e67e718..51f44824872 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SingleSampleIndexQuery.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SingleSampleIndexQuery.java @@ -36,7 +36,8 @@ protected SingleSampleIndexQuery(SampleIndexQuery query, String sample, List findParents(Set childrenSet, Map> parentsMap) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java index 196ab0e1689..aa342c10a92 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/io/VariantExporterDriver.java @@ -33,6 +33,7 @@ import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; import org.opencb.opencga.storage.hadoop.variant.AbstractVariantsTableDriver; +import org.opencb.opencga.storage.hadoop.variant.HadoopVariantQueryParser; import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHBaseQueryParser; import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantSqlQueryParser; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexDBAdaptor; @@ -188,7 +189,7 @@ protected Job setupJob(Job job, String archiveTable, String variantTable) throws VariantMapReduceUtil.setNoneReduce(job); } - VariantQueryParser variantQueryParser = new VariantQueryParser(null, getMetadataManager()); + VariantQueryParser variantQueryParser = new HadoopVariantQueryParser(null, getMetadataManager()); ParsedVariantQuery variantQuery = variantQueryParser.parseQuery(query, options); Query query = variantQuery.getQuery(); if (VariantHBaseQueryParser.isSupportedQuery(query)) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java index 623deec75da..367196742db 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java @@ -29,11 +29,11 @@ import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; -import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.hadoop.utils.AbstractHBaseDriver; import org.opencb.opencga.storage.hadoop.variant.AbstractVariantsTableDriver; import org.opencb.opencga.storage.hadoop.variant.GenomeHelper; +import org.opencb.opencga.storage.hadoop.variant.HadoopVariantQueryParser; import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions; import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHBaseQueryParser; import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHadoopDBAdaptor; @@ -153,7 +153,7 @@ public static void initTableMapperJobFromPhoenix(Job job, String variantTable, S public static void initVariantMapperJob(Job job, Class mapperClass, String variantTable, VariantStorageMetadataManager metadataManager, Query query, QueryOptions queryOptions, boolean skipSampleIndex) throws IOException { - query = new VariantQueryParser(null, metadataManager).preProcessQuery(query, queryOptions); + query = new HadoopVariantQueryParser(null, metadataManager).preProcessQuery(query, queryOptions); setQuery(job, query); setQueryOptions(job, queryOptions); @@ -266,7 +266,7 @@ public static void initVariantRowMapperJob(Job job, Class mapperClass, String variantTable, VariantStorageMetadataManager metadataManager, Query query, QueryOptions queryOptions, boolean skipSampleIndex) throws IOException { - query = new VariantQueryParser(null, metadataManager).preProcessQuery(query, queryOptions); + query = new HadoopVariantQueryParser(null, metadataManager).preProcessQuery(query, queryOptions); setQuery(job, query); setQueryOptions(job, queryOptions); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/SecondaryIndexPendingVariantsDescriptor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/SecondaryIndexPendingVariantsDescriptor.java index 59c55cf8dda..604611ca93c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/SecondaryIndexPendingVariantsDescriptor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/SecondaryIndexPendingVariantsDescriptor.java @@ -61,7 +61,7 @@ public Scan configureScan(Scan scan, VariantStorageMetadataManager metadataManag scan.addColumn(GenomeHelper.COLUMN_FAMILY_BYTES, INDEX_STUDIES.bytes()); for (Integer studyId : metadataManager.getStudyIds()) { scan.addColumn(GenomeHelper.COLUMN_FAMILY_BYTES, VariantPhoenixSchema.getStudyColumn(studyId).bytes()); - for (CohortMetadata cohort : metadataManager.getCalculatedCohorts(studyId)) { + for (CohortMetadata cohort : metadataManager.getCalculatedOrInvalidCohorts(studyId)) { scan.addColumn(GenomeHelper.COLUMN_FAMILY_BYTES, VariantPhoenixSchema.getStatsColumn(studyId, cohort.getId()).bytes()); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSVTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSVTest.java index b1bd9d84879..09bbce14acd 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSVTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSVTest.java @@ -7,8 +7,7 @@ import org.opencb.biodata.models.variant.Variant; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.api.ParamConstants; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.core.testclassification.duration.LongTests; import org.opencb.opencga.storage.core.variant.VariantStorageEngineSVTest; import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorMultiFileTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorMultiFileTest.java index be47dd08890..bebcb5ce4da 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorMultiFileTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorMultiFileTest.java @@ -10,8 +10,7 @@ import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.api.ParamConstants; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.core.testclassification.duration.MediumTests; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorNativeTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorNativeTest.java index 7d6b6f19ba5..e03253739d3 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorNativeTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorNativeTest.java @@ -9,7 +9,7 @@ import org.opencb.biodata.models.variant.Variant; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.core.testclassification.duration.LongTests; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/gaps/FillGapsTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/gaps/FillGapsTest.java index 3ae2a36bfbc..a2e6699180d 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/gaps/FillGapsTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/gaps/FillGapsTest.java @@ -18,7 +18,7 @@ import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.core.models.operations.variant.VariantAggregateFamilyParams; import org.opencb.opencga.core.models.operations.variant.VariantAggregateParams; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.core.testclassification.duration.LongTests; import org.opencb.opencga.storage.core.StoragePipelineResult; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexTest.java index ccafd14be63..f67d5734d7c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexTest.java @@ -16,7 +16,7 @@ import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.models.operations.variant.VariantAggregateFamilyParams; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.core.testclassification.duration.MediumTests; import org.opencb.opencga.storage.core.metadata.models.Trio; import org.opencb.opencga.storage.core.variant.VariantStorageBaseTest; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptorTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptorTest.java index 03e8b3cd4fb..a46a3a5c99b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptorTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptorTest.java @@ -45,7 +45,7 @@ public void testSampleIdFF() throws Exception { SampleIndexQuery query = new SampleIndexQuery(SampleIndexSchema.defaultSampleIndexSchema(), Collections.emptyList(), 0, null, "ST", Collections.singletonMap(sampleName, Collections.singletonList("0/1")), Collections.emptySet(), null, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), new SampleAnnotationIndexQuery(SampleIndexSchema.defaultSampleIndexSchema()), - Collections.emptySet(), null, false, VariantQueryUtils.QueryOperation.AND); + Collections.emptySet(), null, false, VariantQueryUtils.QueryOperation.AND, null); new SampleIndexDBAdaptor(new HBaseManager(new Configuration()), null, metadataManager).parse(query.forSample(sampleName), null); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDuplicatedVariantsTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDuplicatedVariantsTest.java index 6a9c35eb0da..be83e365bed 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDuplicatedVariantsTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDuplicatedVariantsTest.java @@ -63,7 +63,7 @@ public void test2FilesSampleIndex() throws Exception { SampleIndexOnlyVariantQueryExecutor queryExecutor = new SampleIndexOnlyVariantQueryExecutor(dbAdaptor, sampleIndexDBAdaptor, "", new ObjectMap()); List expectedVariants = new ArrayList<>(); - queryExecutor.iterator(new VariantQuery().sample("s1"), new QueryOptions()).forEachRemaining(expectedVariants::add); + queryExecutor.iterator(variantStorageEngine.parseQuery(new VariantQuery().sample("s1"), new QueryOptions())).forEachRemaining(expectedVariants::add); int studyId = engine.getMetadataManager().getStudyId(STUDY_NAME); String actualSampleIndexTableName = sampleIndexDBAdaptor.getSampleIndexTableNameLatest(studyId); @@ -81,7 +81,7 @@ public void test2FilesSampleIndex() throws Exception { VariantHbaseTestUtils.printVariants(dbAdaptor, newOutputUri()); List actualVariants = new ArrayList<>(); - queryExecutor.iterator(new VariantQuery().sample("s1"), new QueryOptions()).forEachRemaining(actualVariants::add); + queryExecutor.iterator(variantStorageEngine.parseQuery(new VariantQuery().sample("s1"), new QueryOptions())).forEachRemaining(actualVariants::add); Assert.assertEquals(expectedVariants, actualVariants); @@ -105,7 +105,7 @@ public void test3FilesSampleIndex() throws Exception { SampleIndexOnlyVariantQueryExecutor queryExecutor = new SampleIndexOnlyVariantQueryExecutor(dbAdaptor, sampleIndexDBAdaptor, "", new ObjectMap()); List expectedVariants = new ArrayList<>(); - queryExecutor.iterator(new VariantQuery().sample("s1"), new QueryOptions()).forEachRemaining(expectedVariants::add); + queryExecutor.iterator(variantStorageEngine.parseQuery(new VariantQuery().sample("s1"), new QueryOptions())).forEachRemaining(expectedVariants::add); int studyId = engine.getMetadataManager().getStudyId(STUDY_NAME); String actualSampleIndexTableName = sampleIndexDBAdaptor.getSampleIndexTableNameLatest(studyId); @@ -123,7 +123,7 @@ public void test3FilesSampleIndex() throws Exception { VariantHbaseTestUtils.printVariants(dbAdaptor, newOutputUri()); List actualVariants = new ArrayList<>(); - queryExecutor.iterator(new VariantQuery().sample("s1"), new QueryOptions()).forEachRemaining(actualVariants::add); + queryExecutor.iterator(variantStorageEngine.parseQuery(new VariantQuery().sample("s1"), new QueryOptions())).forEachRemaining(actualVariants::add); Assert.assertEquals(expectedVariants, actualVariants); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryFilterTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryFilterTest.java index f4989f9ff79..76cc13feeb9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryFilterTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryFilterTest.java @@ -279,7 +279,7 @@ private SingleSampleIndexQuery getSingleSampleIndexQuery(SampleAnnotationIndexQu private SingleSampleIndexQuery getSingleSampleIndexQuery(SampleAnnotationIndexQuery annotationIndexQuery, Map> fileFilterMap) { return new SampleIndexQuery( - schema, Collections.emptyList(), 0, null, "study", Collections.singletonMap("S1", Arrays.asList("0/1", "1/1")), Collections.emptySet(), null, Collections.emptyMap(), Collections.emptyMap(), fileFilterMap, annotationIndexQuery, Collections.emptySet(), null, false, VariantQueryUtils.QueryOperation.AND) + schema, Collections.emptyList(), 0, null, "study", Collections.singletonMap("S1", Arrays.asList("0/1", "1/1")), Collections.emptySet(), null, Collections.emptyMap(), Collections.emptyMap(), fileFilterMap, annotationIndexQuery, Collections.emptySet(), null, false, VariantQueryUtils.QueryOperation.AND, null) .forSample("S1"); } } \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java index 0eafe0ca875..7ae4e36f3ec 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java @@ -26,8 +26,8 @@ import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.dummy.DummyVariantStorageMetadataDBAdaptorFactory; import org.opencb.opencga.storage.core.variant.query.Values; -import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; +import org.opencb.opencga.storage.hadoop.variant.HadoopVariantQueryParser; import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexConverter; import org.opencb.opencga.storage.hadoop.variant.index.core.IndexField; import org.opencb.opencga.storage.hadoop.variant.index.core.RangeIndexField; @@ -127,7 +127,7 @@ public void setUp() throws Exception { } private SampleIndexQuery parse(final Query query) { - Query newQuery = new VariantQueryParser(null, mm).preProcessQuery(query, new QueryOptions()); + Query newQuery = new HadoopVariantQueryParser(null, mm).preProcessQuery(query, new QueryOptions()); query.clear(); query.putAll(newQuery); return sampleIndexQueryParser.parse(query); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java index 922c2648f4d..e15cb6ae4bb 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java @@ -26,7 +26,7 @@ import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; import org.opencb.opencga.core.models.variant.VariantAnnotationConstants; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.core.testclassification.duration.LongTests; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; @@ -369,8 +369,8 @@ public void regenerateSampleIndex() throws Exception { } public VariantQueryResult dbAdaptorQuery(Query query, QueryOptions options) { - query = variantStorageEngine.preProcessQuery(query, options); - return dbAdaptor.get(query, options); + ParsedVariantQuery variantQuery = variantStorageEngine.parseQuery(query, options); + return dbAdaptor.get(variantQuery); } @Test @@ -463,26 +463,26 @@ public void testLocusQueryOverlap() throws Exception { VariantQuery query = new VariantQuery().study(STUDY_NAME_5).sample("NA19600"); // System.out.println("query = " + query.toJson()); List variants = sampleIndexDBAdaptor.iterator(new Query(query), new QueryOptions()) - .toDataResult().getResults(); + .toList(); assertEquals(2, variants.size()); query.region("1:2000200-5500000"); // System.out.println("query = " + query.toJson()); variants = sampleIndexDBAdaptor.iterator(new Query(query), new QueryOptions()) - .toDataResult().getResults(); + .toList(); assertEquals(2, variants.size()); query.region("1:200-2500000"); // System.out.println("query = " + query.toJson()); variants = sampleIndexDBAdaptor.iterator(new Query(query), new QueryOptions()) - .toDataResult().getResults(); + .toList(); assertEquals(1, variants.size()); assertEquals("1:1000001-4000000:-:", variants.get(0).toString()); query.region("1:2000200-2500000"); // System.out.println("query = " + query.toJson()); variants = sampleIndexDBAdaptor.iterator(new Query(query), new QueryOptions()) - .toDataResult().getResults(); + .toList(); assertEquals(1, variants.size()); assertEquals("1:1000001-4000000:-:", variants.get(0).toString()); } @@ -668,10 +668,10 @@ public SampleIndexQuery testQueryIndex(Query testQuery, Query query, boolean sam SampleIndexQuery indexQuery = sampleIndexDBAdaptor.parseSampleIndexQuery(sampleIndexVariantQuery); // int onlyIndex = (int) ((HadoopVariantStorageEngine) variantStorageEngine).getSampleIndexDBAdaptor() // .count(indexQuery, "NA19600"); - DataResult result = ((HadoopVariantStorageEngine) variantStorageEngine).getSampleIndexDBAdaptor() - .iterator(indexQuery).toDataResult(); + List result = ((HadoopVariantStorageEngine) variantStorageEngine).getSampleIndexDBAdaptor() + .iterator(indexQuery).toList(); // System.out.println("result.getResults() = " + result.getResults()); - List onlyIndex = result.getResults().stream().map(Variant::toString).sorted().collect(toList()); + List onlyIndex = result.stream().map(Variant::toString).sorted().collect(toList()); // Query SampleIndex+DBAdaptor System.out.println("#Query SampleIndex+DBAdaptor"); @@ -815,7 +815,7 @@ public void testCount() throws StorageEngineException { System.out.println("Count = " + actualCount); stopWatch = StopWatch.createStarted(); - long actualCountIterator = sampleIndexDBAdaptor.iterator(sampleIndexDBAdaptor.parseSampleIndexQuery(new Query(query))).toDataResult().getNumResults(); + long actualCountIterator = sampleIndexDBAdaptor.iterator(sampleIndexDBAdaptor.parseSampleIndexQuery(new Query(query))).toList().size(); System.out.println("---"); System.out.println("Count indexTable iterator " + stopWatch.getTime(TimeUnit.MILLISECONDS) / 1000.0); System.out.println("Count = " + actualCountIterator); @@ -1179,24 +1179,24 @@ private void testSampleIndexOnlyVariantQueryExecutor(VariantQuery query, QueryOp private void testSampleIndexOnlyVariantQueryExecutor(VariantQuery query, QueryOptions options, Class expected, Function mapper) { - VariantQueryExecutor variantQueryExecutor = variantStorageEngine.getVariantQueryExecutor( - query, - options); + ParsedVariantQuery variantQuery = variantStorageEngine.parseQuery(query, options); + + VariantQueryExecutor variantQueryExecutor = variantStorageEngine.getVariantQueryExecutor(variantQuery); assertEquals(expected, variantQueryExecutor.getClass()); - ParsedVariantQuery variantQuery = variantStorageEngine.parseQuery(query, options); List expectedVariants = new ArrayList<>(1000); - dbAdaptor.iterator(variantQuery, new QueryOptions(options)) + dbAdaptor.iterator(variantQuery) .forEachRemaining(expectedVariants::add); List actualVariants = new ArrayList<>(1000); - variantQueryExecutor.iterator(variantQuery.getQuery(), options) + variantQueryExecutor.iterator(variantStorageEngine.parseQuery(variantQuery.getQuery(), options)) .forEachRemaining(actualVariants::add); - VariantQueryResult result = variantQueryExecutor.get(variantQuery.getQuery(), new QueryOptions(options) + ParsedVariantQuery limitedQuery = variantStorageEngine.parseQuery(variantQuery.getQuery(), new QueryOptions(options) .append(QueryOptions.LIMIT, 10) .append(QueryOptions.COUNT, true)); + VariantQueryResult result = variantQueryExecutor.get(limitedQuery); assertEquals(10, result.getNumResults()); assertEquals(10, result.getResults().size()); long count = result.getNumMatches(); diff --git a/opencga-storage/opencga-storage-server/src/main/java/org/opencb/opencga/storage/server/rest/VariantRestWebService.java b/opencga-storage/opencga-storage-server/src/main/java/org/opencb/opencga/storage/server/rest/VariantRestWebService.java index 00832a76dd9..c6292d4f2f1 100644 --- a/opencga-storage/opencga-storage-server/src/main/java/org/opencb/opencga/storage/server/rest/VariantRestWebService.java +++ b/opencga-storage/opencga-storage-server/src/main/java/org/opencb/opencga/storage/server/rest/VariantRestWebService.java @@ -20,7 +20,7 @@ import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.opencga.core.response.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.StorageEngineFactory; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor;