From 9a7dd156807cc9d16dea68b396d76156b54d9de0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 24 Jul 2024 12:59:25 +0100 Subject: [PATCH 1/4] storage: Hash large vairants into solr collections. Add unhashed attr_id #TASK-6596 --- .../VariantStorageMetadataManager.java | 23 ++++++++++++ .../core/variant/VariantStorageEngine.java | 13 +++++++ .../core/variant/VariantStoragePipeline.java | 5 +-- .../SearchIndexVariantQueryExecutor.java | 14 ++++---- .../variant/search/VariantSearchModel.java | 28 +++++++++++++++ .../VariantSearchToVariantConverter.java | 29 +++++++++++---- .../variant/search/solr/SolrQueryParser.java | 27 +++++++------- .../variant/VariantStorageEngineSVTest.java | 36 +++++++++++++++---- .../VariantSearchToVariantConverterTest.java | 2 +- .../search/solr/SolrQueryParserTest.java | 4 +-- .../search/HadoopVariantSearchDataWriter.java | 2 +- 11 files changed, 143 insertions(+), 40 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java index 6c539e4b077..a65ea678479 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java @@ -573,11 +573,15 @@ public ProjectMetadata getProjectMetadata() { public ProjectMetadata getAndUpdateProjectMetadata(ObjectMap options) throws StorageEngineException { ProjectMetadata projectMetadata = getProjectMetadata(); + + checkSameSpeciesAndAssembly(options, projectMetadata); if (options != null && (projectMetadata == null || StringUtils.isEmpty(projectMetadata.getSpecies()) && options.containsKey(SPECIES.key()) || StringUtils.isEmpty(projectMetadata.getAssembly()) && options.containsKey(ASSEMBLY.key()))) { projectMetadata = updateProjectMetadata(pm -> { + // Check again, in case it was updated by another thread + checkSameSpeciesAndAssembly(options, pm); if (pm == null) { pm = new ProjectMetadata(); } @@ -598,6 +602,25 @@ public ProjectMetadata getAndUpdateProjectMetadata(ObjectMap options) throws Sto return projectMetadata; } + private static void checkSameSpeciesAndAssembly(ObjectMap options, ProjectMetadata projectMetadata) throws StorageEngineException { + if (options != null && projectMetadata != null) { + if (options.containsKey(ASSEMBLY.key())) { + if (StringUtils.isNotEmpty(projectMetadata.getAssembly()) && !projectMetadata.getAssembly() + .equalsIgnoreCase(options.getString(ASSEMBLY.key()))) { + throw new StorageEngineException("Incompatible assembly change from '" + projectMetadata.getAssembly() + "' to '" + + options.getString(ASSEMBLY.key()) + "'"); + } + } + if (options.containsKey(SPECIES.key())) { + if (StringUtils.isNotEmpty(projectMetadata.getSpecies()) && !projectMetadata.getSpecies() + .equalsIgnoreCase(toCellBaseSpeciesName(options.getString(SPECIES.key())))) { + throw new StorageEngineException("Incompatible species change from '" + projectMetadata.getSpecies() + "' to '" + + options.getString(SPECIES.key()) + "'"); + } + } + } + } + public DataResult getVariantFileMetadata(int studyId, int fileId, QueryOptions options) throws StorageEngineException { return fileDBAdaptor.getVariantFileMetadata(studyId, fileId, options); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java index 0f51fbe39eb..b541b2f4ae4 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java @@ -1362,6 +1362,19 @@ public VariantQueryExecutor getVariantQueryExecutor(ParsedVariantQuery variantQu throw new VariantQueryException("No VariantQueryExecutor found to run the query!"); } + public final VariantQueryExecutor getVariantQueryExecutor(Class clazz) + throws StorageEngineException { + Optional first = getVariantQueryExecutors() + .stream() + .filter(e -> e instanceof SearchIndexVariantQueryExecutor) + .findFirst(); + if (first.isPresent()) { + return first.get(); + } else { + throw new StorageEngineException("VariantQueryExecutor " + clazz + " not found"); + } + } + public Query preProcessQuery(Query originalQuery, QueryOptions options) { try { return getVariantQueryParser().preProcessQuery(originalQuery, options); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStoragePipeline.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStoragePipeline.java index 5b37d2512b1..722d79e59fd 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStoragePipeline.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStoragePipeline.java @@ -60,10 +60,7 @@ import org.opencb.opencga.storage.core.io.plain.StringDataReader; import org.opencb.opencga.storage.core.io.plain.StringDataWriter; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; -import org.opencb.opencga.storage.core.metadata.models.CohortMetadata; -import org.opencb.opencga.storage.core.metadata.models.FileMetadata; -import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; -import org.opencb.opencga.storage.core.metadata.models.TaskMetadata; +import org.opencb.opencga.storage.core.metadata.models.*; import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java index bd6b8e6437f..899f71a9b72 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java @@ -183,7 +183,9 @@ public VariantQueryResult approximateCount(ParsedVariantQuery variantQuery DataResult nativeResult = searchManager .nativeQuery(dbName, searchEngineQuery, queryOptions); - List variantIds = nativeResult.getResults().stream().map(VariantSearchModel::getId).collect(Collectors.toList()); + List variantIds = nativeResult.getResults().stream() + .map(VariantSearchModel::toVariantSimple) + .collect(Collectors.toList()); // Adjust numSamples if the results from SearchManager is smaller than numSamples // If this happens, the count is not approximated if (variantIds.size() < sampling) { @@ -283,12 +285,12 @@ public boolean doIntersectWithSearch(Query query, QueryOptions options) { return intersect; } - protected Iterator variantIdIteratorFromSearch(Query query) { + protected Iterator variantIdIteratorFromSearch(Query query) { return variantIdIteratorFromSearch(query, Integer.MAX_VALUE, 0, null); } - protected Iterator variantIdIteratorFromSearch(Query query, int limit, int skip, AtomicLong numTotalResults) { - Iterator variantsIterator; + protected Iterator variantIdIteratorFromSearch(Query query, int limit, int skip, AtomicLong numTotalResults) { + Iterator variantsIterator; QueryOptions queryOptions = new QueryOptions() .append(QueryOptions.LIMIT, limit) .append(QueryOptions.SKIP, skip) @@ -302,14 +304,14 @@ protected Iterator variantIdIteratorFromSearch(Query query, int limit, i } variantsIterator = nativeResult.getResults() .stream() - .map(VariantSearchModel::getId) + .map(VariantSearchModel::toVariantSimple) .iterator(); } else { SolrNativeIterator nativeIterator = searchManager.nativeIterator(dbName, query, queryOptions); if (numTotalResults != null) { numTotalResults.set(nativeIterator.getNumFound()); } - variantsIterator = Iterators.transform(nativeIterator, VariantSearchModel::getId); + variantsIterator = Iterators.transform(nativeIterator, VariantSearchModel::toVariantSimple); } } catch (VariantSearchException | IOException e) { throw new VariantQueryException("Error querying " + VariantSearchManager.SEARCH_ENGINE_ID, e); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchModel.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchModel.java index 9b0bb69792c..835af18a0a0 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchModel.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchModel.java @@ -17,12 +17,15 @@ package org.opencb.opencga.storage.core.variant.search; import org.apache.solr.client.solrj.beans.Field; +import org.opencb.biodata.models.variant.Variant; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import static org.opencb.opencga.storage.core.variant.search.VariantSearchToVariantConverter.HASH_PREFIX; + /** * Created by wasim on 09/11/16. */ @@ -140,6 +143,9 @@ public class VariantSearchModel { @Field("fileInfo_*") private Map fileInfo; + @Field("attr_*") + private Map attr; + public static final double MISSING_VALUE = -100.0; @@ -171,6 +177,7 @@ public VariantSearchModel() { this.qual = new HashMap<>(); this.filter = new HashMap<>(); this.fileInfo = new HashMap<>(); + this.attr = new HashMap<>(); } public VariantSearchModel(VariantSearchModel init) { @@ -210,6 +217,7 @@ public VariantSearchModel(VariantSearchModel init) { this.qual = init.getQual(); this.filter = init.getFilter(); this.fileInfo = init.getFileInfo(); + this.attr = init.getAttr(); } @Override @@ -251,6 +259,7 @@ public String toString() { sb.append(", qual=").append(qual); sb.append(", filter=").append(filter); sb.append(", fileInfo=").append(fileInfo); + sb.append(", attr=").append(attr); sb.append('}'); return sb.toString(); } @@ -259,6 +268,17 @@ public String getId() { return id; } + public Variant toVariantSimple() { + String variantId = getId(); + if (variantId.startsWith(HASH_PREFIX)) { + Object o = getAttr().get("attr_id"); + variantId = o instanceof String ? (String) o : ((List) o).get(0); + } + Variant variant = new Variant(variantId); + variant.setId(variantId); + return variant; + } + public VariantSearchModel setId(String id) { this.id = id; return this; @@ -579,4 +599,12 @@ public VariantSearchModel setFileInfo(Map fileInfo) { return this; } + public Map getAttr() { + return attr; + } + + public VariantSearchModel setAttr(Map attr) { + this.attr = attr; + return this; + } } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverter.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverter.java index 62841c0a3b9..10f3b58ff3f 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverter.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverter.java @@ -56,6 +56,7 @@ public class VariantSearchToVariantConverter implements ComplexTypeConverter includeFields; @@ -79,10 +80,9 @@ public VariantSearchToVariantConverter(Set includeFields) { @Override public Variant convertToDataModelType(VariantSearchModel variantSearchModel) { // set chromosome, start, end, ref, alt from ID - Variant variant = new Variant(variantSearchModel.getId()); + Variant variant = variantSearchModel.toVariantSimple(); - // set ID, chromosome, start, end, ref, alt, type - variant.setId(variantSearchModel.getVariantId()); + // set chromosome, start, end, ref, alt, type // set variant type if (StringUtils.isNotEmpty(variantSearchModel.getType())) { @@ -662,8 +662,10 @@ public VariantSearchModel convertToStorageType(Variant variant) { List other = new ArrayList<>(); // Set general Variant attributes: id, dbSNP, chromosome, start, end, type - variantSearchModel.setId(variant.toString()); // Internal unique ID e.g. 3:1000:AT:- - variantSearchModel.setVariantId(variant.getId()); + String variantId = getVariantId(variant); + variantSearchModel.setId(variantId); // Internal unique ID e.g. 3:1000:AT:- + variantSearchModel.setVariantId(variantId); + variantSearchModel.getAttr().put("attr_id", variant.toString()); variantSearchModel.setChromosome(variant.getChromosome()); variantSearchModel.setStart(variant.getStart()); variantSearchModel.setEnd(variant.getEnd()); @@ -1019,8 +1021,7 @@ public VariantSearchModel convertToStorageType(Variant variant) { // This field contains all possible IDs: id, dbSNP, names, genes, transcripts, protein, clinvar, hpo, ... // This will help when searching by variant id. This is added at the end of the method after collecting all IDs Set xrefs = variantAnnotationModelUtils.extractXRefs(variant.getAnnotation()); - xrefs.add(variantSearchModel.getId()); - xrefs.add(variantSearchModel.getVariantId()); + xrefs.add(variantId); if (variant.getNames() != null && !variant.getNames().isEmpty()) { variant.getNames().forEach(name -> { if (name != null) { @@ -1032,6 +1033,20 @@ public VariantSearchModel convertToStorageType(Variant variant) { return variantSearchModel; } + public static String getVariantId(Variant variant) { + String variantString = variant.toString(); + if (variantString.length() > 32766) { + // variantString.length() >= Short.MAX_VALUE + return hashVariantId(variant, variantString); + } else { + return variantString; + } + } + + public static String hashVariantId(Variant variant, String variantString) { + return HASH_PREFIX + variant.getChromosome() + ":" + variant.getStart() + ":" + Integer.toString(variantString.hashCode()); + } + private void convertStudies(Variant variant, VariantSearchModel variantSearchModel, List other) { // Sanity check if (CollectionUtils.isEmpty(variant.getStudies())) { diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParser.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParser.java index 0cf045ada4c..a618fa22af4 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParser.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParser.java @@ -25,7 +25,6 @@ import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.common.SolrException; import org.opencb.biodata.models.core.Region; -import org.opencb.biodata.models.variant.Variant; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.solr.FacetQueryParser; @@ -35,7 +34,10 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; -import org.opencb.opencga.storage.core.variant.query.*; +import org.opencb.opencga.storage.core.variant.query.KeyOpValue; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.Values; +import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjectionParser; import org.opencb.opencga.storage.core.variant.search.VariantSearchToVariantConverter; import org.slf4j.Logger; @@ -79,7 +81,7 @@ public class SolrQueryParser { static { includeMap = new HashMap<>(); - includeMap.put("id", "id,variantId"); + includeMap.put("id", "id,variantId,attr_id"); includeMap.put("chromosome", "chromosome"); includeMap.put("start", "start"); includeMap.put("end", "end"); @@ -477,7 +479,9 @@ private String parseGenomicFilter(Query query) { genes.addAll(variantQueryXref.getGenes()); xrefs.addAll(variantQueryXref.getIds()); xrefs.addAll(variantQueryXref.getOtherXrefs()); - xrefs.addAll(variantQueryXref.getVariants().stream().map(Variant::toString).collect(Collectors.toList())); + xrefs.addAll(variantQueryXref.getVariants().stream() + .map(VariantSearchToVariantConverter::getVariantId) + .collect(Collectors.toList())); // Regions if (StringUtils.isNotEmpty(query.getString(REGION.key()))) { @@ -1616,15 +1620,12 @@ private String[] includeFieldsWithMandatory(String[] includes) { return new String[0]; } - String[] mandatoryIncludeFields = new String[]{"id", "chromosome", "start", "end", "type"}; - String[] includeWithMandatory = new String[includes.length + mandatoryIncludeFields.length]; - for (int i = 0; i < includes.length; i++) { - includeWithMandatory[i] = includes[i]; - } - for (int i = 0; i < mandatoryIncludeFields.length; i++) { - includeWithMandatory[includes.length + i] = mandatoryIncludeFields[i]; - } - return includeWithMandatory; + Set mandatoryIncludeFields = new HashSet<>(Arrays.asList("id", "attr_id", "chromosome", "start", "end", "type")); + Set includeWithMandatory = new LinkedHashSet<>(includes.length + mandatoryIncludeFields.size()); + + includeWithMandatory.addAll(Arrays.asList(includes)); + includeWithMandatory.addAll(mandatoryIncludeFields); + return includeWithMandatory.toArray(new String[0]); } /** diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java index 64fc14c4c6a..05f090a162f 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java @@ -1,6 +1,7 @@ package org.opencb.opencga.storage.core.variant; import org.junit.Before; +import org.junit.ClassRule; import org.junit.Ignore; import org.junit.Test; import org.opencb.biodata.formats.variant.io.VariantReader; @@ -20,6 +21,11 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.executors.VariantQueryExecutor; +import org.opencb.opencga.storage.core.variant.search.SearchIndexVariantQueryExecutor; +import org.opencb.opencga.storage.core.variant.solr.VariantSolrExternalResource; import java.net.URI; import java.nio.file.Paths; @@ -49,22 +55,27 @@ public abstract class VariantStorageEngineSVTest extends VariantStorageBaseTest protected static URI input2; protected static URI input3; + @ClassRule + public static VariantSolrExternalResource solr = new VariantSolrExternalResource(); + @Before public void before() throws Exception { if (!loaded) { clearDB(DB_NAME); + } + variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); + variantStorageEngine.getConfiguration().getCellbase().setVersion(ParamConstants.CELLBASE_VERSION); + variantStorageEngine.getConfiguration().getCellbase().setDataRelease(ParamConstants.CELLBASE_DATA_RELEASE); + variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); + variantStorageEngine.reloadCellbaseConfiguration(); + solr.configure(variantStorageEngine); + if (!loaded) { loadFiles(); loaded = true; } } protected void loadFiles() throws Exception { - variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); - variantStorageEngine.getConfiguration().getCellbase().setVersion("v5.2"); - variantStorageEngine.getConfiguration().getCellbase().setDataRelease("3"); - variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); - variantStorageEngine.reloadCellbaseConfiguration(); - input1 = getResourceUri("variant-test-sv.vcf"); studyMetadata = new StudyMetadata(1, "s1"); variantStorageEngine.getOptions().append(VariantStorageOptions.ANNOTATOR_CELLBASE_EXCLUDE.key(), "expression,clinical"); @@ -86,6 +97,7 @@ protected void loadFiles() throws Exception { .append(VariantStorageOptions.STATS_CALCULATE.key(), true) .append(VariantStorageOptions.ASSEMBLY.key(), "grch38")); + variantStorageEngine.secondaryIndex(); } @Test @@ -106,6 +118,18 @@ public void checkCount() throws Exception { assertEquals(expected, count); } + @Test + public void checkSecondaryAnnotationIndex() throws Exception { + VariantQueryExecutor variantQueryExecutor = variantStorageEngine.getVariantQueryExecutor(SearchIndexVariantQueryExecutor.class); + for (Variant variant : variantStorageEngine) { + ParsedVariantQuery query = variantStorageEngine + .parseQuery(new Query(VariantQueryParam.ID.key(), variant.toString()), new QueryOptions()); + VariantQueryResult result = variantQueryExecutor.get(query); + assertEquals(1, result.getNumResults()); + assertEquals(variant.toString(), result.first().toString()); + } + } + @Test public void checkCorrectnessFile1() throws Exception { checkCorrectness(VariantStorageEngineSVTest.input1); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverterTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverterTest.java index 5839db1745b..539ea8c22d6 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverterTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverterTest.java @@ -49,7 +49,7 @@ public void test() throws Exception { expectedVariant.addStudyEntry(aux.getStudy("2")); VariantSearchModel variantSearchModel = converter.convertToStorageType(expectedVariant); - assertNull(variantSearchModel.getVariantId()); + assertNotNull(variantSearchModel.getVariantId()); assertEquals(variantId, variantSearchModel.getId()); Variant actualVariant = converter.convertToDataModelType(variantSearchModel); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParserTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParserTest.java index a74bcd8f8ed..6890ad7534d 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParserTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParserTest.java @@ -43,8 +43,8 @@ public class SolrQueryParserTest { private String studyName = "platinum"; private String flBase = "fl=other,geneToSoAcc,traits,type,soAcc,score_*,sift,passStats_*,caddRaw,biotypes,polyphenDesc,studies,end,id,variantId," - + "popFreq_*,caddScaled,genes,chromosome,xrefs,start,gerp,polyphen,siftDesc," - + "phastCons,phylop,altStats_*,id,chromosome,start,end,type"; + + "popFreq_*,caddScaled,genes,chromosome,xrefs,start,gerp,polyphen,attr_id,siftDesc," + + "phastCons,phylop,altStats_*"; private String flDefault1 = flBase + ",fileInfo__*,qual__*,filter__*,sampleFormat__*"; private String flDefaultStudy = flBase + ",fileInfo__" + studyName + "__*,qual__" + studyName + "__*," + "filter__" + studyName + "__*,sampleFormat__" + studyName + "__*"; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/HadoopVariantSearchDataWriter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/HadoopVariantSearchDataWriter.java index 17c61739496..39c63923c02 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/HadoopVariantSearchDataWriter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/HadoopVariantSearchDataWriter.java @@ -68,7 +68,7 @@ protected void add(List batch) throws Exception { return PhoenixHelper.toBytes(studyIds, PIntegerArray.INSTANCE); }); - byte[] row = VariantPhoenixKeyFactory.generateVariantRowKey(new Variant(document.getFieldValue("id").toString())); + byte[] row = VariantPhoenixKeyFactory.generateVariantRowKey(new Variant(document.getFieldValue("attr_id").toString())); variantRows.add(row); mutations.add(new Put(row) .addColumn(family, VariantPhoenixSchema.VariantColumn.INDEX_STUDIES.bytes(), bytes)); From 5d3c7f461aa20e898bbd8e56b28e61bdc3cb65fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 24 Jul 2024 17:01:14 +0100 Subject: [PATCH 2/4] storage: Fix some tests. #TASK-6596 --- .../core/variant/VariantStorageEngineBNDTest.java | 2 ++ .../variant/dummy/DummyProjectMetadataAdaptor.java | 5 ++--- .../variant/HadoopVariantStorageEngineBNDTest.java | 2 +- .../variant/index/family/FamilyIndexTest.java | 14 +++++++------- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java index 08dabda7562..0e90e5a174a 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java @@ -48,6 +48,7 @@ public void before() throws Exception { variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); variantStorageEngine.getConfiguration().getCellbase().setVersion(ParamConstants.CELLBASE_VERSION); variantStorageEngine.getConfiguration().getCellbase().setDataRelease(ParamConstants.CELLBASE_DATA_RELEASE); + variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); if (!loaded) { clearDB(DB_NAME); loadFiles(); @@ -59,6 +60,7 @@ protected void loadFiles() throws Exception { variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); variantStorageEngine.getConfiguration().getCellbase().setVersion(ParamConstants.CELLBASE_VERSION); variantStorageEngine.getConfiguration().getCellbase().setDataRelease(ParamConstants.CELLBASE_DATA_RELEASE); + variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); studyMetadata = new StudyMetadata(1, "s1"); // variantStorageEngine.getOptions().append(VariantStorageOptions.ANNOTATOR_CELLBASE_EXCLUDE.key(), "expression,clinical"); input1 = getResourceUri("variant-test-bnd.vcf"); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyProjectMetadataAdaptor.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyProjectMetadataAdaptor.java index d223180d9d1..3ba92ed7f1c 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyProjectMetadataAdaptor.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyProjectMetadataAdaptor.java @@ -45,10 +45,9 @@ public void refresh() { @Override public synchronized DataResult getProjectMetadata() { final DataResult result = new DataResult<>(); - if (projectMetadata == null) { - projectMetadata = new ProjectMetadata("hsapiens", "grch37", 1); + if (projectMetadata != null) { + result.setResults(Collections.singletonList(projectMetadata.copy())); } - result.setResults(Collections.singletonList(projectMetadata.copy())); return result; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineBNDTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineBNDTest.java index aea720d356a..b613df935ba 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineBNDTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineBNDTest.java @@ -19,7 +19,7 @@ public class HadoopVariantStorageEngineBNDTest extends VariantStorageEngineBNDTe @Override protected void loadFiles() throws Exception { super.loadFiles(); - VariantHbaseTestUtils.printVariants(getVariantStorageEngine().getDBAdaptor(), newOutputUri()); + VariantHbaseTestUtils.printVariants(((HadoopVariantStorageEngine) variantStorageEngine).getDBAdaptor(), newOutputUri()); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexTest.java index f67d5734d7c..a6aeba91145 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexTest.java @@ -63,13 +63,13 @@ public class FamilyIndexTest extends VariantStorageBaseTest implements HadoopVar @Before public void before() throws Exception { + HadoopVariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); + variantStorageEngine.getConfiguration().getCellbase().setVersion("v5.2"); + variantStorageEngine.getConfiguration().getCellbase().setDataRelease("3"); + variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); + variantStorageEngine.reloadCellbaseConfiguration(); if (!loaded) { - HadoopVariantStorageEngine variantStorageEngine = getVariantStorageEngine(); - variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); - variantStorageEngine.getConfiguration().getCellbase().setVersion("v5.2"); - variantStorageEngine.getConfiguration().getCellbase().setDataRelease("3"); - variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); - variantStorageEngine.reloadCellbaseConfiguration(); URI outputUri = newOutputUri(); ObjectMap params = new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false) @@ -91,7 +91,7 @@ public void before() throws Exception { variantStorageEngine.annotate(outputUri, new ObjectMap()); - VariantHbaseTestUtils.printVariants(getVariantStorageEngine().getDBAdaptor(), newOutputUri(getTestName().getMethodName())); + VariantHbaseTestUtils.printVariants(variantStorageEngine.getDBAdaptor(), newOutputUri(getTestName().getMethodName())); mendelianErrorVariants = new HashSet<>(); deNovoVariants = new HashSet<>(); From 84af7c166e5270391394cd995108b08c6c774ace Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 24 Jul 2024 18:51:37 +0100 Subject: [PATCH 3/4] storage: Fix NPE at some tests. #TASK-6596 --- .../core/metadata/VariantMetadataConverterTest.java | 2 ++ .../storage/core/variant/io/VariantWriterFactoryTest.java | 8 ++++++-- .../storage/hadoop/variant/gaps/FillGapsTaskTest.java | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/metadata/VariantMetadataConverterTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/metadata/VariantMetadataConverterTest.java index 2aacde2e3ce..30e4bba2e5f 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/metadata/VariantMetadataConverterTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/metadata/VariantMetadataConverterTest.java @@ -9,6 +9,7 @@ import org.junit.experimental.categories.Category; import org.opencb.biodata.models.variant.VariantFileMetadata; import org.opencb.biodata.models.variant.metadata.VariantMetadata; +import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.core.testclassification.duration.ShortTests; import org.opencb.opencga.storage.core.io.managers.IOConnectorProvider; import org.opencb.opencga.storage.core.io.managers.LocalIOConnector; @@ -45,6 +46,7 @@ public class VariantMetadataConverterTest { @Before public void setUp() throws Exception { metadataManager = new VariantStorageMetadataManager(new DummyVariantStorageMetadataDBAdaptorFactory()); + projectMetadata = metadataManager.getAndUpdateProjectMetadata(new ObjectMap()); URI uri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz"); variantReaderUtils = new VariantReaderUtils(new IOConnectorProvider(LocalIOConnector.class)); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactoryTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactoryTest.java index 32ef120f888..776abd3874d 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactoryTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactoryTest.java @@ -23,11 +23,13 @@ import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.metadata.VariantFileHeader; import org.opencb.biodata.models.variant.metadata.VariantFileHeaderComplexLine; +import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.io.DataWriter; import org.opencb.opencga.core.testclassification.duration.ShortTests; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; +import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; import org.opencb.opencga.storage.core.variant.dummy.DummyVariantDBAdaptor; import org.opencb.opencga.storage.core.variant.dummy.DummyVariantStorageEngine; @@ -86,8 +88,10 @@ public void testContigLengthNull() throws IOException, StorageEngineException { new VariantFileHeaderComplexLine("contig", "chr3", null, null, null, Collections.singletonMap("length", ".")), new VariantFileHeaderComplexLine("contig", "chr4", null, null, null, Collections.singletonMap("length", "1234")) )); - StudyMetadata study = dbAdaptor.getMetadataManager().createStudy("study"); - dbAdaptor.getMetadataManager().unsecureUpdateStudyMetadata(study.setVariantHeader(header)); + VariantStorageMetadataManager metadataManager = dbAdaptor.getMetadataManager(); + metadataManager.getAndUpdateProjectMetadata(new ObjectMap()); + StudyMetadata study = metadataManager.createStudy("study"); + metadataManager.unsecureUpdateStudyMetadata(study.setVariantHeader(header)); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(10000); DataWriter writer = new VariantWriterFactory(dbAdaptor).newDataWriter( VariantWriterFactory.VariantOutputFormat.VCF, diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/gaps/FillGapsTaskTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/gaps/FillGapsTaskTest.java index 3d9db73719e..63f579cb99e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/gaps/FillGapsTaskTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/gaps/FillGapsTaskTest.java @@ -13,6 +13,7 @@ import org.opencb.biodata.models.variant.metadata.VariantFileHeaderComplexLine; import org.opencb.biodata.models.variant.protobuf.VcfSliceProtos; import org.opencb.biodata.tools.variant.converters.proto.VariantToVcfSliceConverter; +import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.core.testclassification.duration.ShortTests; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; @@ -44,6 +45,7 @@ public class FillGapsTaskTest { public void setUp() throws Exception { DummyVariantStorageMetadataDBAdaptorFactory.clear(); metadataManager = new VariantStorageMetadataManager(new DummyVariantStorageMetadataDBAdaptorFactory()); + metadataManager.getAndUpdateProjectMetadata(new ObjectMap()); studyMetadata = metadataManager.createStudy("S"); metadataManager.updateStudyMetadata("S", sm -> { sm.getAttributes().put(VariantStorageOptions.EXTRA_FORMAT_FIELDS.key(), "DP"); From f7b4b52883ab25868e61bdc6ba6b6a4f07f7548f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 25 Jul 2024 08:43:52 +0100 Subject: [PATCH 4/4] storage: Fix VariantDBAdaptorTest. #TASK-6596 --- .../hadoop/variant/adaptors/HadoopVariantDBAdaptorTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorTest.java index ca0d268d4f5..b2339a3cd7e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorTest.java @@ -119,6 +119,7 @@ public void before() throws Exception { e.printStackTrace(); } } + variantStorageEngine.getOptions().append(VariantStorageOptions.ASSEMBLY.key(), "GRCH38"); cellBaseUtils = variantStorageEngine.getCellBaseUtils(); expectedConnections = GlobalClientMetrics.GLOBAL_OPEN_PHOENIX_CONNECTIONS.getMetric().getTotalSum(); }