From 5f1029a6419951ec51f6806734439f239a1f2dec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 27 May 2024 11:29:23 +0100 Subject: [PATCH 01/21] storage: Fix VariantSearchTest. #TASK-6136 --- .../core/variant/search/VariantSearchTest.java | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchTest.java index 56588d25863..17d7d073c51 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchTest.java @@ -187,15 +187,16 @@ public void testSpecialCharacter() throws Exception { List variants = getVariants(limit); List annotatedVariants = annotatedVariants(variants); - String studyId = "abyu12"; - String fileId = "a.vcf"; + String study = "abyu12"; + String file = "a.vcf"; - variants.get(0).getStudies().get(0).getFiles().get(0).setFileId(fileId); + variants.get(0).getStudies().get(0).getFiles().get(0).setFileId(file); System.out.println(variants.get(0).getStudies().get(0).getFiles().get(0).getFileId()); //System.exit(-1); - scm.createStudy(studyId); - + int studyId = scm.createStudy(study).getId(); + int fileId = scm.registerFile(studyId, file, Arrays.asList("A-A", "B", "C", "D")); + scm.addIndexedFiles(studyId, Collections.singletonList(fileId)); String collection = solr.coreName; variantSearchManager.create(collection); @@ -204,13 +205,13 @@ public void testSpecialCharacter() throws Exception { samplePosition.put("B", 1); samplePosition.put("C", 2); samplePosition.put("D", 3); - annotatedVariants.get(0).getStudies().get(0).setStudyId(studyId).setSortedSamplesPosition(samplePosition); + annotatedVariants.get(0).getStudies().get(0).setStudyId(study).setSortedSamplesPosition(samplePosition); variantSearchManager.insert(collection, annotatedVariants); Query query = new Query(); - query.put(VariantQueryParam.STUDY.key(), studyId); + query.put(VariantQueryParam.STUDY.key(), study); // query.put(VariantQueryParam.SAMPLE.key(), samplePosition.keySet().toArray()[0]); - query.put(VariantQueryParam.FILE.key(), fileId); + query.put(VariantQueryParam.FILE.key(), file); query.put(VariantQueryParam.FILTER.key(), "PASS"); query.put(VariantQueryParam.ANNOT_CLINICAL_SIGNIFICANCE.key(), "benign"); VariantQueryResult results = variantSearchManager.query(collection, variantStorageEngine.parseQuery(query, From 9f46a7cf2c3c95bfb6a10bd7d834e4b7c4d58b50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 28 May 2024 17:20:02 +0100 Subject: [PATCH 02/21] storage: Fix tests from HadoopVariantStorageEngineSplitDataTest #TASK-6136 --- ...adoopVariantStorageEngineSplitDataTest.java | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSplitDataTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSplitDataTest.java index e4f1f2e14b4..1401a7b5ad9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSplitDataTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSplitDataTest.java @@ -224,6 +224,10 @@ public void testMultiChromosomeSplitData() throws Exception { } private void failAtLoadingFile(String x, String file1, URI outputUri) throws Exception { + failAtLoadingFile(x, file1, outputUri, 1); + } + + private void failAtLoadingFile(String x, String file1, URI outputUri, int expectedRunningTasks) throws Exception { try { VariantStorageEngine engine = getMockedStorageEngine(new ObjectMap(VariantStorageOptions.STUDY.key(), STUDY_NAME)); engine.index(Collections.singletonList(getResourceUri(x + file1)), outputUri); @@ -236,11 +240,13 @@ private void failAtLoadingFile(String x, String file1, URI outputUri) throws Exc assertEquals(TaskMetadata.Status.NONE, fileMetadata.getIndexStatus()); List runningTasks = new ArrayList<>(); metadataManager.getRunningTasks(studyId).forEach(runningTasks::add); - assertEquals(1, runningTasks.size()); - assertEquals(TaskMetadata.Type.LOAD, runningTasks.get(0).getType()); - assertEquals(TaskMetadata.Status.RUNNING, runningTasks.get(0).currentStatus()); - assertEquals(Arrays.asList(fileMetadata.getId()), runningTasks.get(0).getFileIds()); + assertEquals(expectedRunningTasks, runningTasks.size()); + TaskMetadata taskMetadata = runningTasks.get(runningTasks.size() - 1); + assertEquals(TaskMetadata.Type.LOAD, taskMetadata.getType()); + assertEquals(TaskMetadata.Status.RUNNING, taskMetadata.currentStatus()); + assertEquals(Arrays.asList(fileMetadata.getId()), taskMetadata.getFileIds()); } catch (AssertionError error) { + error.addSuppressed(e); e.printStackTrace(); throw error; } @@ -594,7 +600,7 @@ public void testLoadMultiFileDataConcurrencyDeleteMany() throws Exception { String file2 = "1K.end.platinum-genomes-vcf-NA12878_S1.vcf.gz"; failAtLoadingFile(resourceDir, file1, outDir); - failAtLoadingFile(resourceDir, file2, outDir); + failAtLoadingFile(resourceDir, file2, outDir, 2); // try { // getMockedStorageEngine().index(Collections.singletonList(getResourceUri(resourceDir + file1)), outDir); // fail("Should have thrown an exception"); @@ -656,7 +662,7 @@ public void testLoadMultiFileDataConcurrencyFail() throws Exception { variantStorageEngine.getOptions().put(VariantStorageOptions.LOAD_MULTI_FILE_DATA.key(), true); variantStorageEngine.getOptions().put(VariantStorageOptions.RESUME.key(), true); - variantStorageEngine.index(Collections.singletonList(getResourceUri(file1)), outDir); + variantStorageEngine.index(Collections.singletonList(getResourceUri(resourceDir + file1)), outDir); } From f15978bc1fc7d8ce41779b53c8d5d5956003fb1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 31 May 2024 11:30:39 +0100 Subject: [PATCH 03/21] storage: Avoid unnecesary sample metadata updates updating cohorts #TASK-5895 --- .../VariantStorageMetadataManager.java | 87 ++++++++----------- .../VariantStorageMetadataManagerTest.java | 34 ++++++++ 2 files changed, 68 insertions(+), 53 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java index 89d86a63a89..a27f908173b 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java @@ -56,7 +56,6 @@ import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiFunction; import java.util.function.BiPredicate; @@ -191,14 +190,7 @@ public ObjectMap getConfiguration() { public Lock lockGlobal(long lockDuration, long timeout, String lockName) throws StorageEngineException { - try { - return projectDBAdaptor.lockProject(lockDuration, timeout, lockName); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new StorageEngineException("Unable to lock the Project", e); - } catch (TimeoutException e) { - throw new StorageEngineException("Unable to lock the Project", e); - } + return projectDBAdaptor.lockProject(lockDuration, timeout, lockName); } public Lock lockStudy(int studyId) throws StorageEngineException { @@ -282,8 +274,7 @@ public StudyMetadata updateStudyMetadata(Object study, Upd throws StorageEngineException, E { int studyId = getStudyId(study); - Lock lock = lockStudy(studyId); - try { + try (Lock lock = lockStudy(studyId)) { StudyMetadata sm = getStudyMetadata(studyId); sm = updater.update(sm); @@ -291,8 +282,6 @@ public StudyMetadata updateStudyMetadata(Object study, Upd lock.checkLocked(); unsecureUpdateStudyMetadata(sm); return sm; - } finally { - lock.unlock(); } } @@ -557,16 +546,8 @@ public ProjectMetadata updateProjectMetadata(UpdateConsume public ProjectMetadata updateProjectMetadata(UpdateFunction function) throws StorageEngineException, E { Objects.requireNonNull(function); - Lock lock; - try { - lock = projectDBAdaptor.lockProject(lockDuration, lockTimeout); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new StorageEngineException("Unable to lock the Project", e); - } catch (TimeoutException e) { - throw new StorageEngineException("Unable to lock the Project", e); - } - try { + + try (Lock lock = projectDBAdaptor.lockProject(lockDuration, lockTimeout)) { ProjectMetadata projectMetadata = getProjectMetadata(); int countersHash = (projectMetadata == null ? Collections.emptyMap() : projectMetadata.getCounters()).hashCode(); @@ -579,8 +560,6 @@ public ProjectMetadata updateProjectMetadata(UpdateFunctio lock.checkLocked(); projectDBAdaptor.updateProjectMetadata(projectMetadata, updateCounters); return projectMetadata; - } finally { - lock.unlock(); } } @@ -673,16 +652,14 @@ public void unsecureUpdateFileMetadata(int studyId, FileMetadata file) { public FileMetadata updateFileMetadata(int studyId, int fileId, UpdateConsumer update) throws E, StorageEngineException { getFileName(studyId, fileId); // Check file exists - Lock lock = fileDBAdaptor.lock(studyId, fileId, lockDuration, lockTimeout); - try { + + try (Lock lock = fileDBAdaptor.lock(studyId, fileId, lockDuration, lockTimeout)) { FileMetadata fileMetadata = getFileMetadata(studyId, fileId); update.update(fileMetadata); lock.checkLocked(); unsecureUpdateFileMetadata(studyId, fileMetadata); fileIdIndexedCache.put(studyId, fileId, fileMetadata.isIndexed()); return fileMetadata; - } finally { - lock.unlock(); } } @@ -875,15 +852,13 @@ public void unsecureUpdateSampleMetadata(int studyId, SampleMetadata sample) { public SampleMetadata updateSampleMetadata(int studyId, int sampleId, UpdateConsumer consumer) throws E, StorageEngineException { getSampleName(studyId, sampleId); // Check sample exists - Lock lock = sampleDBAdaptor.lock(studyId, sampleId, lockDuration, lockTimeout); - try { + + try (Lock lock = sampleDBAdaptor.lock(studyId, sampleId, lockDuration, lockTimeout)) { SampleMetadata sample = getSampleMetadata(studyId, sampleId); sample = consumer.toFunction().update(sample); lock.checkLocked(); unsecureUpdateSampleMetadata(studyId, sample); return sample; - } finally { - lock.unlock(); } } @@ -1054,15 +1029,12 @@ public void unsecureUpdateCohortMetadata(int studyId, CohortMetadata cohort) { public CohortMetadata updateCohortMetadata(int studyId, int cohortId, UpdateConsumer update) throws E, StorageEngineException { getCohortName(studyId, cohortId); // Check cohort exists - Lock lock = cohortDBAdaptor.lock(studyId, cohortId, lockDuration, lockTimeout); - try { + try (Lock lock = cohortDBAdaptor.lock(studyId, cohortId, lockDuration, lockTimeout)) { CohortMetadata cohortMetadata = getCohortMetadata(studyId, cohortId); update.update(cohortMetadata); lock.checkLocked(); unsecureUpdateCohortMetadata(studyId, cohortMetadata); return cohortMetadata; - } finally { - lock.unlock(); } } @@ -1190,13 +1162,19 @@ private CohortMetadata updateCohortSamples(int studyId, String cohortName, Colle for (Integer sampleId : sampleIds) { Integer finalCohortId = cohortId; if (secondaryIndexCohort) { - updateSampleMetadata(studyId, sampleId, sampleMetadata -> { - sampleMetadata.addSecondaryIndexCohort(finalCohortId); - }); + if (!getSampleMetadata(studyId, sampleId).getSecondaryIndexCohorts().contains(finalCohortId)) { + // Avoid unnecessary updates + updateSampleMetadata(studyId, sampleId, sampleMetadata -> { + sampleMetadata.addSecondaryIndexCohort(finalCohortId); + }); + } } else { - updateSampleMetadata(studyId, sampleId, sampleMetadata -> { - sampleMetadata.addCohort(finalCohortId); - }); + if (!getSampleMetadata(studyId, sampleId).getCohorts().contains(finalCohortId)) { + // Avoid unnecessary updates + updateSampleMetadata(studyId, sampleId, sampleMetadata -> { + sampleMetadata.addCohort(finalCohortId); + }); + } } } @@ -1209,13 +1187,19 @@ private CohortMetadata updateCohortSamples(int studyId, String cohortName, Colle Integer finalCohortId = cohortId; if (!sampleIds.contains(sampleFromCohort)) { if (secondaryIndexCohort) { - updateSampleMetadata(studyId, sampleFromCohort, sampleMetadata -> { - sampleMetadata.getSecondaryIndexCohorts().remove(finalCohortId); - }); + if (getSampleMetadata(studyId, sampleFromCohort).getSecondaryIndexCohorts().contains(finalCohortId)) { + // Avoid unnecessary updates + updateSampleMetadata(studyId, sampleFromCohort, sampleMetadata -> { + sampleMetadata.getSecondaryIndexCohorts().remove(finalCohortId); + }); + } } else { - updateSampleMetadata(studyId, sampleFromCohort, sampleMetadata -> { - sampleMetadata.getCohorts().remove(finalCohortId); - }); + if (getSampleMetadata(studyId, sampleFromCohort).getCohorts().contains(finalCohortId)) { + // Avoid unnecessary updates + updateSampleMetadata(studyId, sampleFromCohort, sampleMetadata -> { + sampleMetadata.getCohorts().remove(finalCohortId); + }); + } } } } @@ -1326,15 +1310,12 @@ public void unsecureUpdateTask(int studyId, TaskMetadata task) throws StorageEng public TaskMetadata updateTask(int studyId, int taskId, UpdateConsumer consumer) throws E, StorageEngineException { getTask(studyId, taskId); // Check task exists - Lock lock = taskDBAdaptor.lock(studyId, taskId, lockDuration, lockTimeout); - try { + try (Lock lock = taskDBAdaptor.lock(studyId, taskId, lockDuration, lockTimeout)) { TaskMetadata task = getTask(studyId, taskId); consumer.update(task); lock.checkLocked(); unsecureUpdateTask(studyId, task); return task; - } finally { - lock.unlock(); } } diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManagerTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManagerTest.java index 1b3311958f4..71ea72de3c0 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManagerTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManagerTest.java @@ -4,12 +4,14 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.mockito.Mockito; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; import org.opencb.opencga.storage.core.metadata.models.TaskMetadata; import org.opencb.opencga.storage.core.variant.VariantStorageBaseTest; import org.opencb.opencga.storage.core.variant.VariantStorageTest; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -103,4 +105,36 @@ public List getTasks(StudyMetadata study, List stat .map(TaskMetadata::getName) .collect(Collectors.toList()); } + + @Test + public void testAddSampleToCohort() throws Exception { + StudyMetadata study = metadataManager.createStudy("study"); + + metadataManager.registerCohort(study.getName(), "cohort1", Collections.emptyList()); + + int numSamples = 100; + List sampleIds = new ArrayList<>(numSamples); + for (int i = 0; i < numSamples; i++) { + sampleIds.add(metadataManager.registerSample(study.getId(), null, "sample_" + i)); + } + + metadataManager.addSamplesToCohort(study.getId(), "cohort1", sampleIds.subList(0, 10)); + VariantStorageMetadataManager metadataManager = Mockito.spy(this.metadataManager); + metadataManager.addSamplesToCohort(study.getId(), "cohort1", sampleIds.subList(0, 11)); + Mockito.verify(metadataManager, Mockito.times(1)).updateSampleMetadata(Mockito.anyInt(), Mockito.anyInt(), Mockito.any()); + + Mockito.reset(metadataManager); + metadataManager.addSamplesToCohort(study.getId(), "cohort1", sampleIds.subList(0, 11)); + Mockito.verify(metadataManager, Mockito.never()).updateSampleMetadata(Mockito.anyInt(), Mockito.anyInt(), Mockito.any()); + metadataManager.setSamplesToCohort(study.getId(), "cohort1", sampleIds.subList(0, 11)); + Mockito.verify(metadataManager, Mockito.never()).updateSampleMetadata(Mockito.anyInt(), Mockito.anyInt(), Mockito.any()); + + metadataManager.setSamplesToCohort(study.getId(), "cohort1", sampleIds.subList(0, 12)); + Mockito.verify(metadataManager, Mockito.times(1)).updateSampleMetadata(Mockito.anyInt(), Mockito.anyInt(), Mockito.any()); + + Mockito.reset(metadataManager); + metadataManager.setSamplesToCohort(study.getId(), "cohort1", sampleIds.subList(0, 6)); + Mockito.verify(metadataManager, Mockito.times(6)).updateSampleMetadata(Mockito.anyInt(), Mockito.anyInt(), Mockito.any()); + } + } \ No newline at end of file From e968bd29acea260b91e5e6b74262d4ea92660746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 31 May 2024 12:09:18 +0100 Subject: [PATCH 04/21] storage: Improve HBaseLockManager errors and warn messages. #TASK-5895 --- .../adaptors/ProjectMetadataAdaptor.java | 7 +- .../dummy/DummyProjectMetadataAdaptor.java | 7 +- .../hadoop/utils/HBaseLockManager.java | 274 ++++++++++-------- .../metadata/AbstractHBaseDBAdaptor.java | 9 - .../HBaseProjectMetadataDBAdaptor.java | 16 +- .../hadoop/utils/HBaseLockManagerTest.java | 42 ++- 6 files changed, 195 insertions(+), 160 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/adaptors/ProjectMetadataAdaptor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/adaptors/ProjectMetadataAdaptor.java index dc88a85d338..3045ee8f3cd 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/adaptors/ProjectMetadataAdaptor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/adaptors/ProjectMetadataAdaptor.java @@ -7,7 +7,6 @@ import org.opencb.opencga.storage.core.metadata.models.ProjectMetadata; import java.io.IOException; -import java.util.concurrent.TimeoutException; /** * Created on 02/05/18. @@ -17,14 +16,12 @@ public interface ProjectMetadataAdaptor extends AutoCloseable { default Lock lockProject(long lockDuration, long timeout) - throws InterruptedException, TimeoutException, StorageEngineException { + throws StorageEngineException { return lockProject(lockDuration, timeout, null); } Lock lockProject(long lockDuration, long timeout, String lockName) - throws InterruptedException, TimeoutException, StorageEngineException; - - void unLockProject(long lockId) throws StorageEngineException; + throws StorageEngineException; DataResult getProjectMetadata(); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyProjectMetadataAdaptor.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyProjectMetadataAdaptor.java index bed8d419666..d223180d9d1 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyProjectMetadataAdaptor.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyProjectMetadataAdaptor.java @@ -16,7 +16,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; -import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; /** @@ -29,7 +28,7 @@ public class DummyProjectMetadataAdaptor implements ProjectMetadataAdaptor { private static Map counters = new HashMap<>(); @Override - public Lock lockProject(long lockDuration, long timeout, String lockName) throws InterruptedException, TimeoutException { + public Lock lockProject(long lockDuration, long timeout, String lockName) { return new Lock(0) { @Override public void unlock0() { @@ -43,10 +42,6 @@ public void refresh() { }; } - @Override - public void unLockProject(long lockId) { - } - @Override public synchronized DataResult getProjectMetadata() { final DataResult result = new DataResult<>(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/HBaseLockManager.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/HBaseLockManager.java index 6fc7609b998..b1f827b2baf 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/HBaseLockManager.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/utils/HBaseLockManager.java @@ -24,7 +24,6 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.solr.common.StringUtils; import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.storage.core.metadata.models.Lock; import org.slf4j.Logger; @@ -134,18 +133,17 @@ public Lock lock(byte[] row, byte[] column, long lockDuration, long timeout) // Minimum lock duration of 100ms lockDuration = Math.max(lockDuration, 100); - byte[] lockValue; - String readToken = ""; + LockToken lockToken; StopWatch stopWatch = new StopWatch(); stopWatch.start(); do { - lockValue = readLockValue(row, column); + lockToken = readLockToken(row, column); // If the lock is taken, wait - while (isLockTaken(lockValue)) { + while (lockToken.isTaken()) { Thread.sleep(100); - lockValue = readLockValue(row, column); + lockToken = readLockToken(row, column); //Check if the lock is still valid if (stopWatch.getTime() > timeout) { throw new TimeoutException("Unable to get the lock"); @@ -157,19 +155,19 @@ public Lock lock(byte[] row, byte[] column, long lockDuration, long timeout) } // Try to lock cell - if (tryToPutToken(token, lockDuration, row, column, lockValue, CURRENT)) { - readToken = parseValidLockToken(readLockValue(row, column)); + if (tryToPutToken(token, lockDuration, row, column, lockToken, CURRENT)) { + lockToken = readLockToken(row, column); } - // You win the lock if the first available lock is yours. - } while (!token.equals(readToken)); + // You win the lock if you manage to write your lock. + } while (!lockToken.equals(token)); - boolean prevTokenExpired = lockValue != null && lockValue.length > 0; + boolean prevTokenExpired = !lockToken.isEmpty() && lockToken.isExpired(); boolean slowQuery = stopWatch.getTime() > 60000; if (prevTokenExpired || slowQuery) { StringBuilder msg = new StringBuilder("Lock column '").append(Bytes.toStringBinary(column)).append("'"); if (prevTokenExpired) { - long expireDate = parseExpireDate(lockValue); + long expireDate = lockToken.getExpireDate(); msg.append(". Previous token expired ") .append(TimeUtils.durationToString(System.currentTimeMillis() - expireDate)) .append(" ago"); @@ -181,105 +179,65 @@ public Lock lock(byte[] row, byte[] column, long lockDuration, long timeout) logger.warn(msg.toString()); } - long tokenHash = token.hashCode(); - logger.debug("Won the lock with token " + token + " (" + tokenHash + ")"); - - long finalLockDuration = lockDuration; - return new Lock(threadPool, (int) (finalLockDuration / 4), tokenHash) { - @Override - public void unlock0() { - try { - HBaseLockManager.this.unlock(row, column, tokenHash); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } + logger.debug("Won the lock with token " + token + " (" + token.hashCode() + ")"); - @Override - public synchronized void refresh() throws IOException { - HBaseLockManager.this.refresh(row, column, tokenHash, finalLockDuration); - } - }; + return new HBaseLock(lockDuration, token, row, column); } - /** - * Refreshes the lock. - * - * @param column Column to find the lock cell - * @param lockToken Lock token - * @param lockDuration Duration un milliseconds of the token. After this time the token is expired. - * @throws IOException if there is an error writing or reading from HBase. - */ - public void refresh(byte[] column, long lockToken, int lockDuration) throws IOException { - refresh(defaultRow, column, lockToken, lockDuration); - } - - /** * Refreshes the lock. * * @param row Row to find the lock cell * @param column Column to find the lock cell - * @param lockToken Lock token + * @param lockTokenHash Lock token * @param lockDuration Duration un milliseconds of the token. After this time the token is expired. * @throws IOException if there is an error writing or reading from HBase. */ - public void refresh(byte[] row, byte[] column, long lockToken, long lockDuration) throws IOException { + private void refresh(byte[] row, byte[] column, long lockTokenHash, long lockDuration) throws IOException { // Check token is valid - byte[] lockValue = readLockValue(row, column); - String currentLockToken = parseValidLockToken(lockValue); - if (currentLockToken == null || currentLockToken.hashCode() != lockToken) { - throw IllegalLockStatusException.inconsistentLock(row, column, lockToken, currentLockToken, lockValue); + LockToken currentLockToken = readLockToken(row, column); + if (currentLockToken.isEmpty() || currentLockToken.isExpired() || !currentLockToken.equals(lockTokenHash)) { + throw IllegalLockStatusException.inconsistentLock(row, column, lockTokenHash, currentLockToken); + } + if (currentLockToken.getRemainingTime() < lockDuration / 2) { + logger.warn("Refreshing lock with less than half of the duration remaining. Expected duration: {} Remaining time: {}ms", + lockDuration, + currentLockToken.getRemainingTime()); } - if (!tryToPutToken(currentLockToken, lockDuration, row, column, lockValue, REFRESH)) { + if (!tryToPutToken(currentLockToken.token, lockDuration, row, column, currentLockToken, REFRESH)) { // Error refreshing! - lockValue = readLockValue(row, column); - String newLockToken = parseValidLockToken(lockValue); + LockToken newLockToken = readLockToken(row, column); - logger.error("Current lock token:" + currentLockToken); - logger.error("New lock token: " + newLockToken); - throw IllegalLockStatusException.inconsistentLock(row, column, lockToken, currentLockToken, lockValue); + logger.error("Current lock token:" + currentLockToken.token); + logger.error("New lock token: " + newLockToken.token); + throw IllegalLockStatusException.inconsistentLock(row, column, lockTokenHash, currentLockToken); } } - /** - * Releases the lock. - * - * @param column Column to find the lock cell - * @param lockToken Lock token - * @throws IOException if there is an error writing or reading from HBase. - * @throws IllegalLockStatusException if the lockToken does not match with the current lockToken - */ - public void unlock(byte[] column, long lockToken) throws IOException, IllegalLockStatusException { - unlock(defaultRow, column, lockToken); - } - /** * Releases the lock. * * @param row Row to find the lock cell * @param column Column to find the lock cell - * @param lockToken Lock token + * @param lockTokenHash Lock token * @throws IOException if there is an error writing or reading from HBase. * @throws IllegalLockStatusException if the lockToken does not match with the current lockToken */ - public void unlock(byte[] row, byte[] column, long lockToken) throws IOException, IllegalLockStatusException { - byte[] lockValue = readLockValue(row, column); + private void unlock(byte[] row, byte[] column, long lockTokenHash) throws IOException, IllegalLockStatusException { + LockToken currentToken = readLockToken(row, column); - String currentToken = parseValidLockToken(lockValue); - - if (currentToken == null || currentToken.hashCode() != lockToken) { - throw IllegalLockStatusException.inconsistentLock(row, column, lockToken, currentToken, lockValue); + if (currentToken.isEmpty() || currentToken.isExpired() || !currentToken.equals(lockTokenHash)) { + throw IllegalLockStatusException.inconsistentLock(row, column, lockTokenHash, currentToken); } - logger.debug("Unlock lock with token " + lockToken); - if (!clearLock(row, column, lockValue)) { - throw IllegalLockStatusException.inconsistentLock(row, column, lockToken, currentToken, lockValue); + logger.debug("Unlock lock with token " + lockTokenHash); + if (!clearLock(row, column, currentToken)) { + throw IllegalLockStatusException.inconsistentLock(row, column, lockTokenHash, currentToken); } } - private Boolean tryToPutToken(String token, long lockDuration, byte[] row, byte[] qualifier, byte[] lockValue, String type) + private Boolean tryToPutToken(String token, long lockDuration, byte[] row, byte[] qualifier, LockToken currentLock, String type) throws IOException { return hbaseManager.act(tableName, table -> { Put put = new Put(row) @@ -288,30 +246,31 @@ private Boolean tryToPutToken(String token, long lockDuration, byte[] row, byte[ + token + LOCK_EXPIRING_DATE_SEPARATOR_STR + (System.currentTimeMillis() + lockDuration))); - return table.checkAndPut(row, columnFamily, qualifier, CompareFilter.CompareOp.EQUAL, lockValue, put); + return table.checkAndPut(row, columnFamily, qualifier, CompareFilter.CompareOp.EQUAL, currentLock.lockValue, put); }); } - private boolean clearLock(byte[] row, byte[] qualifier, byte[] lockValue) throws IOException { + private boolean clearLock(byte[] row, byte[] qualifier, LockToken lockToken) throws IOException { return hbaseManager.act(tableName, table -> { Put put = new Put(row) .addColumn(columnFamily, qualifier, Bytes.toBytes("")); - return table.checkAndPut(row, columnFamily, qualifier, CompareFilter.CompareOp.EQUAL, lockValue, put); + return table.checkAndPut(row, columnFamily, qualifier, CompareFilter.CompareOp.EQUAL, lockToken.lockValue, put); }); } /** - * Parse non-expired lock token. + * Parse lock token. * @param lockValue lock values - * @return Current lock token, if any + * @return Current lock token. */ - protected static String parseValidLockToken(byte[] lockValue) { + protected static LockToken parseLockToken(byte[] lockValue) { if (lockValue == null || lockValue.length == 0) { - return null; + return new LockToken(); } int idx1 = Bytes.indexOf(lockValue, LOCK_PREFIX_SEPARATOR_BYTE); int idx2 = Bytes.indexOf(lockValue, LOCK_EXPIRING_DATE_SEPARATOR_BYTE); + String type = Bytes.toString(lockValue, 0, idx1); String token = Bytes.toString(lockValue, idx1 + 1, idx2 - idx1 - 1); long expireDate; try { @@ -319,45 +278,82 @@ protected static String parseValidLockToken(byte[] lockValue) { } catch (NumberFormatException e) { // Deprecated token. Assume expired token if (Bytes.contains(lockValue, DEPRECATED_LOCK_SEPARATOR_BYTE)) { - return null; + return new LockToken(); } throw e; } + return new LockToken(lockValue, type, token, expireDate); + } + + protected static final class LockToken { + protected final byte[] lockValue; + protected final String type; + protected final String token; + protected final Long expireDate; + + private LockToken() { + this.lockValue = new byte[0]; + this.type = null; + this.token = null; + this.expireDate = null; + } + + private LockToken(byte[] lockValue, String type, String token, long expireDate) { + this.lockValue = lockValue; + this.type = type; + this.token = token; + this.expireDate = expireDate; + } + + /** + * A lock is taken if there is any lockValue, and + * the token has not expired. + * + * @return if the lock is taken + */ + public boolean isTaken() { + return token != null && !isExpired(); + } + + public boolean isExpired() { + return expireDate != null && expireDate < System.currentTimeMillis(); + } - if (isExpired(expireDate)) { - return null; - } else { + public boolean isEmpty() { + return token == null; + } + + public boolean equals(String token) { + return !isEmpty() && this.token.equals(token); + } + + public boolean equals(long tokenHash) { + return !isEmpty() && this.token.hashCode() == tokenHash; + } + + public byte[] getLockValue() { + return lockValue; + } + + public String getType() { + return type; + } + + public String getToken() { return token; } - } - protected static long parseExpireDate(byte[] lockValue) { - int idx2 = Bytes.indexOf(lockValue, LOCK_EXPIRING_DATE_SEPARATOR_BYTE); - try { - return Long.parseLong(Bytes.toString(lockValue, idx2 + 1)); - } catch (NumberFormatException e) { - // Deprecated token. Assume expired token - if (Bytes.contains(lockValue, DEPRECATED_LOCK_SEPARATOR_BYTE)) { - return -1; - } - throw e; + public Long getExpireDate() { + return expireDate; } - } - /** - * A lock is taken if there is any lockValue in the array, and - * the token has not expired. - * - * - * @param lockValue lock values - * @return if the lock is taken - */ - protected static boolean isLockTaken(byte[] lockValue) { - return parseValidLockToken(lockValue) != null; + public long getRemainingTime() { + return expireDate == null ? 0 : expireDate - System.currentTimeMillis(); + } } - private static boolean isExpired(long expireDate) { - return expireDate < System.currentTimeMillis(); + private LockToken readLockToken(byte[] row, byte[] qualifier) throws IOException { + return parseLockToken(readLockValue(row, qualifier)); } private byte[] readLockValue(byte[] row, byte[] qualifier) throws IOException { @@ -380,18 +376,22 @@ public IllegalLockStatusException(String s) { super(s); } - public static IllegalLockStatusException inconsistentLock(byte[] row, byte[] column, long lockToken, String currentLock, - byte[] lockValue) { - if (StringUtils.isEmpty(currentLock)) { - return new IllegalLockStatusException("Inconsistent lock status. You don't have the lock! " + private static IllegalLockStatusException inconsistentLock(byte[] row, byte[] column, long lockTokenHash, LockToken currentLock) { + if (currentLock.isEmpty()) { + return new IllegalLockStatusException("Inconsistent lock status. You don't have the lock! Empty lock. " + + "Row: '" + Bytes.toStringBinary(row) + "', " + + "column: '" + Bytes.toStringBinary(column) + "'. " + + "Lock: " + Bytes.toString(currentLock.lockValue) + "."); + } else if (currentLock.isExpired()) { + return new IllegalLockStatusException("Inconsistent lock status. You don't have the lock! Expired lock. " + "Row: '" + Bytes.toStringBinary(row) + "', " + "column: '" + Bytes.toStringBinary(column) + "'. " - + "Lock: " + Bytes.toString(lockValue) + "."); + + "Lock: " + Bytes.toString(currentLock.lockValue) + "."); } else { - return new IllegalLockStatusException("Inconsistent lock status. You don't have the lock! " + return new IllegalLockStatusException("Inconsistent lock status. You don't have the lock! Lock is taken. " + "Row: '" + Bytes.toStringBinary(row) + "', " + "column: '" + Bytes.toStringBinary(column) + "'. " - + lockToken + " != " + currentLock.hashCode() + " from " + Bytes.toString(lockValue)); + + lockTokenHash + " != " + currentLock.token.hashCode() + " from " + Bytes.toString(currentLock.lockValue)); } } } @@ -403,4 +403,38 @@ protected static ExecutorService buildThreadPool() { .build()); } + private final class HBaseLock extends Lock { + private final long lockDuration; + private final String token; + private final long tokenHash; + private final byte[] row; + private final byte[] column; + + private HBaseLock(long lockDuration, String token, byte[] row, byte[] column) { + super(HBaseLockManager.threadPool, (int) (lockDuration / 4), token.hashCode()); + this.lockDuration = lockDuration; + this.token = token; + this.tokenHash = token.hashCode(); + this.row = row; + this.column = column; + } + + @Override + public void unlock0() { + try { + synchronized (this) { + HBaseLockManager.this.unlock(row, column, tokenHash); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public void refresh() throws IOException { + synchronized (this) { + HBaseLockManager.this.refresh(row, column, tokenHash, lockDuration); + } + } + } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/metadata/AbstractHBaseDBAdaptor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/metadata/AbstractHBaseDBAdaptor.java index 3f6a23c5abf..fbdfdf920ec 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/metadata/AbstractHBaseDBAdaptor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/metadata/AbstractHBaseDBAdaptor.java @@ -279,14 +279,5 @@ protected Lock lockToken(byte[] rowKey, byte[] lockName, long lockDuration, long } } - protected void unLock(byte[] rowKey, byte[] lockName, long token) { - try { - this.lock.unlock(rowKey, lockName, token); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/metadata/HBaseProjectMetadataDBAdaptor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/metadata/HBaseProjectMetadataDBAdaptor.java index 85a1d0cec0c..6ddae38be65 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/metadata/HBaseProjectMetadataDBAdaptor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/metadata/HBaseProjectMetadataDBAdaptor.java @@ -49,20 +49,14 @@ public HBaseProjectMetadataDBAdaptor(HBaseManager hBaseManager, String metaTable @Override public Lock lockProject(long lockDuration, long timeout, String lockName) - throws InterruptedException, TimeoutException, StorageEngineException { + throws StorageEngineException { try { ensureTableExists(); return lock.lock(getProjectRowKey(), getLockColumn(lockName), lockDuration, timeout); - } catch (IOException e) { - throw new StorageEngineException("Error locking project in HBase", e); - } - } - - @Override - public void unLockProject(long lockId) throws StorageEngineException { - try { - lock.unlock(getProjectRowKey(), getLockColumn(), lockId); - } catch (IOException e) { + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new StorageEngineException("Unable to lock the Project", e); + } catch (IOException | TimeoutException e) { throw new StorageEngineException("Error locking project in HBase", e); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/utils/HBaseLockManagerTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/utils/HBaseLockManagerTest.java index 0f421081ff6..5943a43519f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/utils/HBaseLockManagerTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/utils/HBaseLockManagerTest.java @@ -214,23 +214,47 @@ public void testLockRefreshExpiredRefresh() throws Exception { @Test public void testGetCurrent() { long e = System.currentTimeMillis() + 1000; - String s; + HBaseLockManager.LockToken s; + + // null token + s = HBaseLockManager.parseLockToken(null); + assertTrue(s.isEmpty()); + assertFalse(s.isTaken()); + assertEquals(null, s.getType()); + assertArrayEquals(new byte[0], s.getLockValue()); + + // Empty token + s = HBaseLockManager.parseLockToken(Bytes.toBytes("")); + assertTrue(s.isEmpty()); + assertFalse(s.isTaken()); + assertEquals(null, s.getType()); + assertArrayEquals(new byte[0], s.getLockValue()); // Expired current token - s = HBaseLockManager.parseValidLockToken(Bytes.toBytes("CURRENT-abc:123")); - assertNull(s); + s = HBaseLockManager.parseLockToken(Bytes.toBytes("CURRENT-abc:123")); + assertFalse(s.isEmpty()); + assertEquals("CURRENT", s.getType()); // Valid current token - s = HBaseLockManager.parseValidLockToken(Bytes.toBytes("CURRENT-abc:" + e)); - assertEquals("abc", s); + s = HBaseLockManager.parseLockToken(Bytes.toBytes("CURRENT-abc:" + e)); + assertEquals("abc", s.token); + assertEquals("CURRENT", s.getType()); + assertFalse(s.isExpired()); + assertTrue(s.isTaken()); // Current expired, first refresh valid - s = HBaseLockManager.parseValidLockToken(Bytes.toBytes("REFRESH-abc:" + e)); - assertEquals("abc", s); + s = HBaseLockManager.parseLockToken(Bytes.toBytes("REFRESH-abc:" + e)); + assertEquals("abc", s.token); + assertEquals("REFRESH", s.getType()); + assertFalse(s.isExpired()); + assertTrue(s.isTaken()); // Expired refresh - s = HBaseLockManager.parseValidLockToken(Bytes.toBytes("REFRESH-abc:200")); - assertNull(s); + s = HBaseLockManager.parseLockToken(Bytes.toBytes("REFRESH-abc:200")); + assertEquals("abc", s.token); + assertEquals("REFRESH", s.getType()); + assertTrue(s.isExpired()); + assertFalse(s.isTaken()); } } \ No newline at end of file From dd1dcec6de1746730e5e5a102cc3289194a59cf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 31 May 2024 12:11:32 +0100 Subject: [PATCH 05/21] storage: Increase metadata lock duration and timeout. #TASK-5895 --- .../opencga/storage/core/variant/VariantStorageOptions.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java index caefbb5260e..bc1be055466 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java @@ -83,8 +83,8 @@ public enum VariantStorageOptions implements ConfigurationOption { INDEX_SEARCH("indexSearch", false), // Build secondary indexes using search engine. - METADATA_LOCK_DURATION("metadata.lock.duration", 5000), - METADATA_LOCK_TIMEOUT("metadata.lock.timeout", 60000), + METADATA_LOCK_DURATION("metadata.lock.duration", 60000), + METADATA_LOCK_TIMEOUT("metadata.lock.timeout", 600000), METADATA_LOAD_BATCH_SIZE("metadata.load.batchSize", 10), METADATA_LOAD_THREADS("metadata.load.numThreads", 4), From 1c7db628514e40d960c97fdea2b1a782e67b698a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 31 May 2024 14:48:15 +0100 Subject: [PATCH 06/21] storage: Improve testing SampleIndexAggregation of intergenic queries. #TASK-6136 --- .../index/sample/SampleIndexQueryParser.java | 90 ++++++++++++------- .../sample/SampleIndexQueryParserTest.java | 18 +++- .../variant/index/sample/SampleIndexTest.java | 44 ++++++--- 3 files changed, 109 insertions(+), 43 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java index 6137c5bd6c1..803fa75fea5 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java @@ -1216,18 +1216,7 @@ protected SampleAnnotationIndexQuery parseAnnotationIndexQuery(SampleIndexSchema CtBtFtCombinationIndexSchema.Filter ctBtTfFilter = schema.getCtBtTfIndex().getField().noOpFilter(); IndexFilter clinicalFilter = schema.getClinicalIndexSchema().noOpFilter(); - Boolean intergenic = null; - - ParsedVariantQuery.VariantQueryXref variantQueryXref = VariantQueryParser.parseXrefs(query); - if (!isValidParam(query, REGION)) { - if (!variantQueryXref.getGenes().isEmpty() - && variantQueryXref.getIds().isEmpty() - && variantQueryXref.getOtherXrefs().isEmpty() - && variantQueryXref.getVariants().isEmpty()) { - // If only filtering by genes, is not intergenic. - intergenic = false; - } - } + final Boolean intergenic = isIntergenicQuery(query); // BiotypeConsquenceTypeFlagCombination combination = BiotypeConsquenceTypeFlagCombination // .fromQuery(query, Arrays.asList(schema.getTranscriptFlagIndexSchema().getField().getConfiguration().getValues())); @@ -1237,18 +1226,10 @@ protected SampleAnnotationIndexQuery parseAnnotationIndexQuery(SampleIndexSchema boolean tfCovered = false; if (isValidParam(query, ANNOT_CONSEQUENCE_TYPE)) { - List soNames = query.getAsStringList(VariantQueryParam.ANNOT_CONSEQUENCE_TYPE.key()); - soNames = soNames.stream() + List soNames = query.getAsStringList(VariantQueryParam.ANNOT_CONSEQUENCE_TYPE.key()) + .stream() .map(ct -> ConsequenceTypeMappings.accessionToTerm.get(VariantQueryUtils.parseConsequenceType(ct))) .collect(Collectors.toList()); - if (!soNames.contains(VariantAnnotationConstants.INTERGENIC_VARIANT) - && !soNames.contains(VariantAnnotationConstants.REGULATORY_REGION_VARIANT) - && !soNames.contains(VariantAnnotationConstants.TF_BINDING_SITE_VARIANT)) { - // All ct values but "intergenic_variant" and "regulatory_region_variant" are in genes (i.e. non-intergenic) - intergenic = false; - } else if (soNames.size() == 1 && soNames.contains(VariantAnnotationConstants.INTERGENIC_VARIANT)) { - intergenic = true; - } // else, leave undefined : intergenic = null boolean ctFilterCoveredBySummary = false; boolean ctBtCombinationCoveredBySummary = false; if (SampleIndexSchema.CUSTOM_LOF.containsAll(soNames)) { @@ -1295,14 +1276,17 @@ protected SampleAnnotationIndexQuery parseAnnotationIndexQuery(SampleIndexSchema } } - // Do not use ctIndex if the CT filter is covered by the summary - // Use the ctIndex if: + // Do not use ctIndex for intergenic queries (intergenic == true) + // or queries that might return intergenic variants (intergenic == null) + // + // Use the ctIndex if any of: // - The CtFilter is not covered by the summary // - The query has the combination CT+BT , and it is not covered by the summary // - The query has the combination CT+TF - boolean useCtIndexFilter = !ctFilterCoveredBySummary - || (!ctBtCombinationCoveredBySummary && combination.isBiotype()) - || combination.isFlag(); + boolean useCtIndexFilter = + intergenic == Boolean.FALSE && (!ctFilterCoveredBySummary + || (!ctBtCombinationCoveredBySummary && combination.isBiotype()) + || combination.isFlag()); if (useCtIndexFilter) { ctCovered = completeIndex; consequenceTypeFilter = schema.getCtIndex().getField().buildFilter(new OpValue<>("=", soNames)); @@ -1317,8 +1301,6 @@ protected SampleAnnotationIndexQuery parseAnnotationIndexQuery(SampleIndexSchema } if (isValidParam(query, ANNOT_BIOTYPE)) { - // All biotype values are in genes (i.e. non-intergenic) - intergenic = false; boolean biotypeFilterCoveredBySummary = false; List biotypes = query.getAsStringList(VariantQueryParam.ANNOT_BIOTYPE.key()); if (BIOTYPE_SET.containsAll(biotypes)) { @@ -1350,8 +1332,6 @@ protected SampleAnnotationIndexQuery parseAnnotationIndexQuery(SampleIndexSchema List transcriptFlags = query.getAsStringList(ANNOT_TRANSCRIPT_FLAG.key()); tfFilter = schema.getTranscriptFlagIndexSchema().getField().buildFilter(new OpValue<>("=", transcriptFlags)); tfCovered = completeIndex & tfFilter.isExactFilter(); - // Transcript flags are in transcripts/genes. (i.e. non-intergenic) - intergenic = false; // TranscriptFlag filter is covered by index if (tfCovered) { if (!isValidParam(query, GENE) && simpleCombination(combination)) { @@ -1538,12 +1518,60 @@ protected SampleAnnotationIndexQuery parseAnnotationIndexQuery(SampleIndexSchema // If intergenic is undefined, or true, CT and BT filters can not be used. biotypeFilter = schema.getBiotypeIndex().getField().noOpFilter(); consequenceTypeFilter = schema.getCtIndex().getField().noOpFilter(); + if (!biotypeFilter.isNoOp()) { + throw new IllegalStateException("Unexpected BT filter for intergenic=" + intergenic); + } + if (!consequenceTypeFilter.isNoOp()) { + throw new IllegalStateException("Unexpected CT filter for intergenic=" + intergenic); + } } return new SampleAnnotationIndexQuery(new byte[]{annotationIndexMask, annotationIndex}, consequenceTypeFilter, biotypeFilter, tfFilter, ctBtTfFilter, clinicalFilter, populationFrequencyFilter); } + private Boolean isIntergenicQuery(Query query) { + ParsedVariantQuery.VariantQueryXref variantQueryXref = VariantQueryParser.parseXrefs(query); + if (!isValidParam(query, REGION)) { + if (!variantQueryXref.getGenes().isEmpty() + && variantQueryXref.getIds().isEmpty() + && variantQueryXref.getOtherXrefs().isEmpty() + && variantQueryXref.getVariants().isEmpty()) { + // If only filtering by genes, is not intergenic. + return false; + } + } + + if (isValidParam(query, ANNOT_BIOTYPE)) { + // All biotype values are in genes (i.e. non-intergenic) + return false; + } + if (isValidParam(query, ANNOT_BIOTYPE)) { + // All biotype values are in genes (i.e. non-intergenic) + return false; + } + if (isValidParam(query, ANNOT_TRANSCRIPT_FLAG)) { + // Transcript flags are in transcripts/genes. (i.e. non-intergenic) + return false; + } + if (isValidParam(query, ANNOT_CONSEQUENCE_TYPE)) { + List soNames = query.getAsStringList(VariantQueryParam.ANNOT_CONSEQUENCE_TYPE.key()); + soNames = soNames.stream() + .map(ct -> ConsequenceTypeMappings.accessionToTerm.get(VariantQueryUtils.parseConsequenceType(ct))) + .collect(Collectors.toList()); + if (!soNames.contains(VariantAnnotationConstants.INTERGENIC_VARIANT) + && !soNames.contains(VariantAnnotationConstants.REGULATORY_REGION_VARIANT) + && !soNames.contains(VariantAnnotationConstants.TF_BINDING_SITE_VARIANT)) { + // All ct values but "intergenic_variant" and "regulatory_region_variant" are in genes (i.e. non-intergenic) + return false; + } else if (soNames.size() == 1 && soNames.contains(VariantAnnotationConstants.INTERGENIC_VARIANT)) { + return true; + } // else, leave undefined : intergenic = null + } + // Unable to determine if the query is intergenic or not. Return null for uncertain. + return null; + } + private boolean simpleCombination(BiotypeConsquenceTypeFlagCombination combination) { return combination.numParams() == 1; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java index 7ae4e36f3ec..212b0521b26 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java @@ -1384,7 +1384,10 @@ public void parseIntergenicTest() { checkIntergenic(true, new Query(ANNOT_CONSEQUENCE_TYPE.key(), "intergenic_variant")); checkIntergenic(null, new Query(ANNOT_CONSEQUENCE_TYPE.key(), "missense_variant,intergenic_variant")); checkIntergenic(null, new Query(ANNOT_CONSEQUENCE_TYPE.key(), "intergenic_variant,missense_variant")); - + checkIntergenic(null, new Query(ANNOT_CONSEQUENCE_TYPE.key(), VariantAnnotationConstants.REGULATORY_REGION_VARIANT)); + checkIntergenic(false, new Query(ANNOT_CONSEQUENCE_TYPE.key(), VariantAnnotationConstants.REGULATORY_REGION_VARIANT) + .append(ANNOT_BIOTYPE.key(), "protein_coding")); + // Nonsense combination checkIntergenic(false, new Query(ANNOT_CONSEQUENCE_TYPE.key(), "intergenic_variant").append(ANNOT_BIOTYPE.key(), "protein_coding")); } @@ -1570,6 +1573,19 @@ public void testCoveredQuery_ct() { parseAnnotationIndexQuery(query, true); assertTrue(query.isEmpty()); + query = new Query().append(ANNOT_CONSEQUENCE_TYPE.key(), String.join(OR, VariantAnnotationConstants.REGULATORY_REGION_VARIANT)); + parseAnnotationIndexQuery(query, true); + indexQuery = parseAnnotationIndexQuery(query, true); + assertTrue(indexQuery.getConsequenceTypeFilter().isNoOp()); + assertFalse(query.isEmpty()); // regulatory_region_variant can't be used for CT filter alone + + query = new Query().append(ANNOT_CONSEQUENCE_TYPE.key(), String.join(OR, VariantAnnotationConstants.REGULATORY_REGION_VARIANT)) + .append(ANNOT_BIOTYPE.key(), "protein_coding"); + indexQuery = parseAnnotationIndexQuery(query, true); + assertFalse(indexQuery.getConsequenceTypeFilter().isNoOp()); + assertFalse(indexQuery.getBiotypeFilter().isNoOp()); + assertTrue(query.isEmpty()); // regulatory_region_variant can be used together with biotype + query = new Query().append(ANNOT_CONSEQUENCE_TYPE.key(), String.join(OR, VariantAnnotationConstants.STOP_LOST)); parseAnnotationIndexQuery(query, false); indexQuery = parseAnnotationIndexQuery(query, false); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java index e15cb6ae4bb..9ee362f8724 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java @@ -897,29 +897,55 @@ public void testAggregationCorrectnessCt() throws Exception { @Test public void testAggregationCorrectnessTFBS() throws Exception { - testAggregationCorrectness(TF_BINDING_SITE_VARIANT, true); + // Special scenario. This CT might include intergenic values, so can't be used alone + testAggregationCorrectness(new Query(ANNOT_BIOTYPE.key(), "protein_coding"), TF_BINDING_SITE_VARIANT); } @Test public void testAggregationCorrectnessRegulatoryRegionVariant() throws Exception { - testAggregationCorrectness(REGULATORY_REGION_VARIANT); + // Special scenario. This CT might include intergenic values, so can't be used alone + testAggregationCorrectness(new Query(ANNOT_BIOTYPE.key(), "protein_coding"), + REGULATORY_REGION_VARIANT); + } + + @Test + public void testAggregationByIntergenicQuery() throws Exception { + SampleIndexVariantAggregationExecutor executor = new SampleIndexVariantAggregationExecutor(metadataManager, sampleIndexDBAdaptor); + + Query baseQuery = new Query(STUDY.key(), STUDY_NAME_3) + .append(SAMPLE.key(), "NA12877"); + + assertFalse(executor.canUseThisExecutor(new Query(baseQuery) + .append(ANNOT_CONSEQUENCE_TYPE.key(), REGULATORY_REGION_VARIANT), new QueryOptions(QueryOptions.FACET, "consequenceType"))); + assertFalse(executor.canUseThisExecutor(new Query(baseQuery) + .append(ANNOT_CONSEQUENCE_TYPE.key(), TF_BINDING_SITE_VARIANT), new QueryOptions(QueryOptions.FACET, "consequenceType"))); + + assertTrue(executor.canUseThisExecutor(new Query(baseQuery) + .append(ANNOT_CONSEQUENCE_TYPE.key(), REGULATORY_REGION_VARIANT) + .append(ANNOT_BIOTYPE.key(), "protein_coding"), + new QueryOptions(QueryOptions.FACET, "consequenceType"))); + assertTrue(executor.canUseThisExecutor(new Query(baseQuery) + .append(ANNOT_CONSEQUENCE_TYPE.key(), TF_BINDING_SITE_VARIANT) + .append(ANNOT_BIOTYPE.key(), "protein_coding"), + new QueryOptions(QueryOptions.FACET, "consequenceType"))); } private void testAggregationCorrectness(String ct) throws Exception { - testAggregationCorrectness(ct, false); + testAggregationCorrectness(new Query(), ct); } - private void testAggregationCorrectness(String ct, boolean sampleIndexMightBeMoreAccurate) throws Exception { + private void testAggregationCorrectness(Query baseQuery, String ct) throws Exception { SampleIndexVariantAggregationExecutor executor = new SampleIndexVariantAggregationExecutor(metadataManager, sampleIndexDBAdaptor); - Query query = new Query(STUDY.key(), STUDY_NAME_3) + Query query = new Query(baseQuery) + .append(STUDY.key(), STUDY_NAME_3) .append(SAMPLE.key(), "NA12877") .append(ANNOT_CONSEQUENCE_TYPE.key(), ct); assertTrue(executor.canUseThisExecutor(query, new QueryOptions(QueryOptions.FACET, "consequenceType"))); AtomicInteger count = new AtomicInteger(0); sampleIndexDBAdaptor.iterator(new Query(query), new QueryOptions()).forEachRemaining(v -> count.incrementAndGet()); - FacetField facet = executor.aggregation(query, new QueryOptions(QueryOptions.FACET, "consequenceType")).first(); + FacetField facet = executor.aggregation(new Query(query), new QueryOptions(QueryOptions.FACET, "consequenceType")).first(); assertEquals(count.get(), facet.getCount()); FacetField.Bucket bucket = facet.getBuckets().stream().filter(b -> b.getValue().equals(ct)).findFirst().orElse(null); @@ -934,11 +960,7 @@ private void testAggregationCorrectness(String ct, boolean sampleIndexMightBeMor } } else { assertNotNull(msg, bucket); - if (sampleIndexMightBeMoreAccurate) { - assertThat(msg, count.get(), gte(bucket.getCount())); - } else { - assertEquals(msg, count.get(), bucket.getCount()); - } + assertEquals(msg, count.get(), bucket.getCount()); } } From 9f2b06d543a52537c69829f6d7a372d694bed70e Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 4 Jun 2024 12:32:48 +0200 Subject: [PATCH 07/21] Prepare next release 2.12.6-SNAPSHOT --- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- opencga-storage/opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-deps-emr6.1/pom.xml | 2 +- .../opencga-storage-hadoop-deps-hdp2.6/pom.xml | 2 +- .../opencga-storage-hadoop-deps-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop-deps/pom.xml | 2 +- opencga-storage/opencga-storage-hadoop/pom.xml | 2 +- opencga-storage/opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 14 +++++++------- 21 files changed, 27 insertions(+), 27 deletions(-) diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index 01544f3538a..69a9b33089c 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index afbb2c15342..18071afb30b 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index f4971939bbb..46964bfae1f 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index 700c1de69c7..cfdf162a973 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index bdae22a335a..ad02409329b 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index dab473a7362..00abcbf61d7 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index 83d7becd861..4e304b5e34c 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index e206eec5c95..798bdd26d41 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index 47262e2c647..b5d0d0fad60 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index 6ebbd4567c9..50c8105886a 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index bb9e72406d9..130716cf970 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index 44301eb5b63..29e4ac40576 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml index 85bd6a7da66..e4d5e3c8439 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml index a15d1e80dbd..7831c67ec49 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml index 5a107fec8fc..f3792747d50 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml index 0790a13a764..8b769184fa3 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml @@ -50,7 +50,7 @@ org.opencb.opencga opencga-storage-hadoop - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index a4e177c4403..1d3b4e5f795 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -28,7 +28,7 @@ org.opencb.opencga opencga-storage - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index 9403f1ba8f9..6a725c2746e 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index 3c31682fb6e..1f00aeda658 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index a1df570546a..953a68ee373 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 2.12.5 + 2.12.6-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 1bc6c3f8de1..aef3f1a0bc0 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.5 + 2.12.6-SNAPSHOT pom OpenCGA @@ -43,12 +43,12 @@ - 2.12.5 - 2.12.5 - 5.8.3 - 2.12.2 - 4.12.0 - 2.12.5 + 2.12.6_dev + 2.12.6_dev + 5.8.4-SNAPSHOT + 2.12.3-SNAPSHOT + 4.12.1-SNAPSHOT + 2.12.6-SNAPSHOT 0.2.0 2.11.4 From b6e6e868e4a59d5df2ef53f86555bf60b39c8349 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 20 Jun 2024 13:03:41 +0100 Subject: [PATCH 08/21] storage: Fix compoundHet query in single-study projects. #TASK-6311 --- .../VariantStorageMetadataRepairTool.java | 1 - .../VariantStorageMetadataManager.java | 13 +++ .../core/variant/VariantStorageEngine.java | 2 +- .../variant/query/ParsedVariantQuery.java | 21 +++-- .../variant/query/VariantQueryParser.java | 80 ++++++++++++------- .../BreakendVariantQueryExecutor.java | 4 +- .../CompoundHeterozygousQueryExecutor.java | 6 +- .../DBAdaptorVariantQueryExecutor.java | 4 +- .../executors/NoOpVariantQueryExecutor.java | 4 +- .../query/executors/VariantQueryExecutor.java | 5 +- ...amplesSearchIndexVariantQueryExecutor.java | 4 +- .../SearchIndexVariantQueryExecutor.java | 8 +- .../variant/search/VariantSearchUtils.java | 4 +- .../executors/VariantQueryExecutorTest.java | 57 ++++++++++--- .../variant/HadoopVariantQueryParser.java | 5 +- ...seColumnIntersectVariantQueryExecutor.java | 5 +- ...ampleIndexMendelianErrorQueryExecutor.java | 9 +-- .../SampleIndexOnlyVariantQueryExecutor.java | 8 +- .../SampleIndexVariantQueryExecutor.java | 4 +- .../index/family/FamilyIndexDriver.java | 5 +- .../sample/SampleIndexSchemaFactory.java | 3 +- .../variant/index/sample/SampleIndexTest.java | 12 ++- 22 files changed, 177 insertions(+), 87 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/operations/VariantStorageMetadataRepairTool.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/operations/VariantStorageMetadataRepairTool.java index a8e9ba5b528..29ed6efb996 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/operations/VariantStorageMetadataRepairTool.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/operations/VariantStorageMetadataRepairTool.java @@ -134,7 +134,6 @@ private void rebuildSampleFileIds(VariantStorageMetadataManager metadataManager, for (Map.Entry> entry : batch.entrySet()) { Integer sampleId = entry.getKey(); List fileIds = entry.getValue(); - List actualFiles = metadataManager.getSampleMetadata(studyId, sampleId).getFiles(); if (actualFiles.size() != fileIds.size() || !actualFiles.containsAll(fileIds)) { fixedSamples++; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java index a27f908173b..6c539e4b077 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java @@ -840,6 +840,19 @@ public Iterator fileMetadataIterator(int studyId) { return fileDBAdaptor.fileIterator(studyId); } + public SampleMetadata getSampleMetadata(Integer studyId, Integer sampleId) { + return getSampleMetadata(studyId.intValue(), sampleId.intValue()); + } + + public SampleMetadata getSampleMetadata(int studyId, Integer sampleId) { + return getSampleMetadata(studyId, sampleId.intValue()); + } + + public SampleMetadata getSampleMetadata(int studyId, Object sample) { + int sampleId = getSampleIdOrFail(studyId, sample); + return getSampleMetadata(studyId, sampleId); + } + public SampleMetadata getSampleMetadata(int studyId, int sampleId) { return sampleDBAdaptor.getSampleMetadata(studyId, sampleId, null); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java index dd24ee1334d..0f51fbe39eb 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java @@ -1348,7 +1348,7 @@ public VariantQueryExecutor getVariantQueryExecutor(Query query, QueryOptions op public VariantQueryExecutor getVariantQueryExecutor(ParsedVariantQuery variantQuery) { try { for (VariantQueryExecutor executor : getVariantQueryExecutors()) { - if (executor.canUseThisExecutor(variantQuery.getQuery(), variantQuery.getInputOptions())) { + if (executor.canUseThisExecutor(variantQuery, variantQuery.getInputOptions())) { logger.info("Using VariantQueryExecutor : " + executor.getClass().getName()); logger.info(" Query : " + VariantQueryUtils.printQuery(variantQuery.getInputQuery())); logger.info(" Options : " + variantQuery.getInputOptions().toJson()); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/ParsedVariantQuery.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/ParsedVariantQuery.java index 8468ab34317..300f2e6b48a 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/ParsedVariantQuery.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/ParsedVariantQuery.java @@ -267,14 +267,6 @@ public VariantStudyQuery setStudies(ParsedQuery studies) { return this; } - public String getStudyOrFail() { - if (studies == null || studies.size() != 1) { - throw new VariantQueryException("Require exactly one study"); - } else { - return studies.get(0); - } - } - public ParsedQuery>> getGenotypes() { return genotypes; } @@ -311,6 +303,19 @@ public void setDefaultStudy(StudyMetadata defaultStudy) { public StudyMetadata getDefaultStudy() { return defaultStudy; } + + public StudyMetadata getDefaultStudyOrFail() { + if (defaultStudy == null) { + if (studies.size() != 1) { + throw new VariantQueryException("Only one study is allowed. Found " + studies.size() + " studies"); + } else { + throw new VariantQueryException("One study required. None provided"); + } + } else { + return defaultStudy; + } + } + } public static class VariantQueryXref { diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java index 641e365a51d..bd02a6b4303 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java @@ -17,10 +17,7 @@ import org.opencb.commons.datastore.core.QueryParam; import org.opencb.opencga.core.models.variant.VariantAnnotationConstants; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; -import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; -import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; -import org.opencb.opencga.storage.core.metadata.models.TaskMetadata; -import org.opencb.opencga.storage.core.metadata.models.VariantScoreMetadata; +import org.opencb.opencga.storage.core.metadata.models.*; import org.opencb.opencga.storage.core.utils.CellBaseUtils; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass; @@ -170,15 +167,14 @@ public ParsedVariantQuery parseQuery(Query inputQuery, QueryOptions options, boo VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.key(), VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.defaultValue())); - variantQuery.setProjection(projectionParser.parseVariantQueryProjection(inputQuery, options)); - VariantQuery query; if (!skipPreProcess) { - query = new VariantQuery(preProcessQuery(inputQuery, options, variantQuery.getProjection())); + query = new VariantQuery(preProcessQuery(inputQuery, options)); } else { query = new VariantQuery(inputQuery); } variantQuery.setQuery(query); + variantQuery.setProjection(projectionParser.parseVariantQueryProjection(query, options)); List geneRegions = Region.parseRegions(query.getString(ANNOT_GENE_REGIONS.key())); variantQuery.setGeneRegions(geneRegions == null ? Collections.emptyList() : geneRegions); @@ -221,10 +217,7 @@ public ParsedVariantQuery parseQuery(Query inputQuery, QueryOptions options, boo = new ParsedQuery<>(sampleDataQuery.getKey(), sampleDataQuery.getOperation(), new ArrayList<>(sampleDataQuery.size())); for (KeyValues> keyValues : sampleDataQuery) { sampleDataQueryWithMetadata.getValues().add( - keyValues.mapKey(sample -> { - int sampleId = metadataManager.getSampleIdOrFail(defaultStudy.getId(), sample); - return metadataManager.getSampleMetadata(defaultStudy.getId(), sampleId); - })); + keyValues.mapKey(sample -> metadataManager.getSampleMetadata(defaultStudy.getId(), sample))); } studyQuery.setSampleDataQuery(sampleDataQueryWithMetadata); } @@ -232,17 +225,13 @@ public ParsedVariantQuery parseQuery(Query inputQuery, QueryOptions options, boo return variantQuery; } - public final Query preProcessQuery(Query originalQuery, QueryOptions options) { - return preProcessQuery(originalQuery, options, null); - } - - protected Query preProcessQuery(Query originalQuery, QueryOptions options, VariantQueryProjection projection) { + public Query preProcessQuery(Query originalQuery, QueryOptions options) { // Copy input query! Do not modify original query! Query query = VariantQueryUtils.copy(originalQuery); preProcessAnnotationParams(query); - preProcessStudyParams(query, options, projection); + preProcessStudyParams(query, options); if (options != null && options.getLong(QueryOptions.LIMIT) < 0) { throw VariantQueryException.malformedParam(QueryOptions.LIMIT, options.getString(QueryOptions.LIMIT), @@ -388,7 +377,7 @@ private VariantType parseVariantType(String type) { } } - protected void preProcessStudyParams(Query query, QueryOptions options, VariantQueryProjection projection) { + protected void preProcessStudyParams(Query query, QueryOptions options) { StudyMetadata defaultStudy = getDefaultStudy(query); QueryOperation formatOperator = null; if (isValidParam(query, SAMPLE_DATA)) { @@ -613,10 +602,13 @@ protected void preProcessStudyParams(Query query, QueryOptions options, VariantQ if (isValidParam(query, SAMPLE_MENDELIAN_ERROR) || isValidParam(query, SAMPLE_DE_NOVO) - || isValidParam(query, SAMPLE_DE_NOVO_STRICT)) { + || isValidParam(query, SAMPLE_DE_NOVO_STRICT) + || isValidParam(query, SAMPLE_COMPOUND_HETEROZYGOUS)) { + boolean requireMendelianReady = false; QueryParam param = null; if (isValidParam(query, SAMPLE_MENDELIAN_ERROR)) { param = SAMPLE_MENDELIAN_ERROR; + requireMendelianReady = true; } if (isValidParam(query, SAMPLE_DE_NOVO)) { if (param != null) { @@ -624,6 +616,7 @@ protected void preProcessStudyParams(Query query, QueryOptions options, VariantQ param, query.getString(param.key()), SAMPLE_DE_NOVO, query.getString(SAMPLE_DE_NOVO.key())); } + requireMendelianReady = true; param = SAMPLE_DE_NOVO; } if (isValidParam(query, SAMPLE_DE_NOVO_STRICT)) { @@ -632,8 +625,21 @@ protected void preProcessStudyParams(Query query, QueryOptions options, VariantQ param, query.getString(param.key()), SAMPLE_DE_NOVO_STRICT, query.getString(SAMPLE_DE_NOVO_STRICT.key())); } + requireMendelianReady = true; param = SAMPLE_DE_NOVO_STRICT; } + if (isValidParam(query, SAMPLE_COMPOUND_HETEROZYGOUS)) { + if (param != null) { + throw VariantQueryException.unsupportedParamsCombination( + param, query.getString(param.key()), + SAMPLE_COMPOUND_HETEROZYGOUS, query.getString(SAMPLE_COMPOUND_HETEROZYGOUS.key())); + } + requireMendelianReady = false; + param = SAMPLE_COMPOUND_HETEROZYGOUS; + } + if (param == null) { + throw new IllegalStateException("Unknown param"); + } if (defaultStudy == null) { throw VariantQueryException.missingStudyForSamples(query.getAsStringList(param.key()), metadataManager.getStudyNames()); @@ -645,15 +651,18 @@ protected void preProcessStudyParams(Query query, QueryOptions options, VariantQ genotypeParam, query.getString(genotypeParam.key()) ); } - List samples = query.getAsStringList(param.key()); + Object value = query.get(param.key()); + List samples; + if (value instanceof Trio) { + samples = Collections.singletonList(((Trio) value).getChild()); + } else { + samples = query.getAsStringList(param.key()); + } Set samplesAndParents = new LinkedHashSet<>(samples); for (String sample : samples) { - Integer sampleId = metadataManager.getSampleId(defaultStudy.getId(), sample); - if (sampleId == null) { - throw VariantQueryException.sampleNotFound(sample, defaultStudy.getName()); - } - SampleMetadata sampleMetadata = metadataManager.getSampleMetadata(defaultStudy.getId(), sampleId); - if (TaskMetadata.Status.READY != sampleMetadata.getMendelianErrorStatus()) { + SampleMetadata sampleMetadata = metadataManager.getSampleMetadata(defaultStudy.getId(), sample); + if (requireMendelianReady + && TaskMetadata.Status.READY != sampleMetadata.getMendelianErrorStatus()) { throw VariantQueryException.malformedParam(param, "Sample \"" + sampleMetadata.getName() + "\" does not have the Mendelian Errors precomputed yet"); } @@ -674,6 +683,21 @@ protected void preProcessStudyParams(Query query, QueryOptions options, VariantQ } else { query.put(INCLUDE_SAMPLE.key(), new ArrayList<>(samplesAndParents)); } + if (param == SAMPLE_COMPOUND_HETEROZYGOUS) { + int studyId = defaultStudy.getId(); + if (!(value instanceof Trio)) { + if (samples.size() > 1) { + throw VariantQueryException.malformedParam(SAMPLE, value.toString(), + "More than one sample provided for compound heterozygous filter."); + } + SampleMetadata sm = metadataManager.getSampleMetadata(studyId, samples.get(0)); + Trio trio = new Trio(null, + metadataManager.getSampleName(studyId, sm.getFather()), + metadataManager.getSampleName(studyId, sm.getMother()), + sm.getName()); + query.put(SAMPLE_COMPOUND_HETEROZYGOUS.key(), trio); + } + } } if (isValidParam(query, SCORE)) { @@ -704,9 +728,7 @@ protected void preProcessStudyParams(Query query, QueryOptions options, VariantQ || isValidParam(query, SAMPLE_SKIP) || isValidParam(query, SAMPLE_LIMIT) ) { - if (projection == null) { - projection = projectionParser.parseVariantQueryProjection(query, options); - } + VariantQueryProjection projection = projectionParser.parseVariantQueryProjection(query, options); // Apply the sample pagination. // Remove the sampleLimit and sampleSkip to avoid applying the pagination twice query.remove(SAMPLE_SKIP.key()); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java index 6eb237ea4b3..bc40c5b6418 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java @@ -14,6 +14,7 @@ import org.opencb.commons.datastore.core.QueryParam; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; @@ -40,7 +41,8 @@ public BreakendVariantQueryExecutor(String storageEngineId, ObjectMap options, } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + VariantQuery query = variantQuery.getQuery(); return query.getString(VariantQueryParam.TYPE.key()).equals(VariantType.BREAKEND.name()) && VariantQueryUtils.isValidParam(query, VariantQueryParam.GENOTYPE); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java index c6f4b87a5f0..bcecac57b5a 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java @@ -64,14 +64,14 @@ public CompoundHeterozygousQueryExecutor(VariantStorageMetadataManager metadataM } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException { - return isValidParam(query, VariantQueryUtils.SAMPLE_COMPOUND_HETEROZYGOUS); + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + return isValidParam(variantQuery.getQuery(), VariantQueryUtils.SAMPLE_COMPOUND_HETEROZYGOUS); } @Override protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator) { Trio trio = getCompHetTrio(variantQuery.getQuery()); - String study = variantQuery.getStudyQuery().getStudyOrFail(); + String study = variantQuery.getStudyQuery().getDefaultStudyOrFail().getName(); return getOrIterator(study, trio.getChild(), trio.getFather(), trio.getMother(), variantQuery, iterator); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java index 474cbc3fa9f..e667e2a1485 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java @@ -52,9 +52,9 @@ protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) { for (QueryParam unsupportedParam : UNSUPPORTED_PARAMS) { - if (VariantQueryUtils.isValidParam(query, unsupportedParam)) { + if (VariantQueryUtils.isValidParam(variantQuery.getQuery(), unsupportedParam)) { logger.warn("Unsupported variant query param {} in {}", unsupportedParam.key(), DBAdaptorVariantQueryExecutor.class.getSimpleName()); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java index e286b4a07ce..5f2da2c2056 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java @@ -8,6 +8,7 @@ import org.opencb.opencga.storage.core.metadata.models.CohortMetadata; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; import org.opencb.opencga.storage.core.variant.query.*; @@ -34,7 +35,8 @@ public NoOpVariantQueryExecutor(VariantStorageMetadataManager metadataManager, S } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + VariantQuery query = variantQuery.getQuery(); boolean sampleQuery = false; String sample = null; if (VariantQueryUtils.isValidParam(query, VariantQueryParam.GENOTYPE)) { diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java index 26d53e89e5c..49ddc339658 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java @@ -2,7 +2,6 @@ import org.opencb.biodata.models.variant.Variant; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; @@ -65,12 +64,12 @@ public static void setDefaultTimeout(QueryOptions queryOptions, ObjectMap config /** * Determine if this VariantQueryExecutor can run the given query. - * @param query Query to execute + * @param variantQuery Query to execute * @param options Options for the query * @return True if this variant query executor is valid for the query * @throws StorageEngineException if there is an error */ - public abstract boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException; + public abstract boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException; protected abstract Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator) throws StorageEngineException; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java index 200b2eb463d..cfa794cbe10 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java @@ -7,6 +7,7 @@ import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.exceptions.VariantSearchException; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.search.solr.VariantSearchManager; @@ -28,7 +29,8 @@ public SamplesSearchIndexVariantQueryExecutor(VariantDBAdaptor dbAdaptor, Varian } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + VariantQuery query = variantQuery.getQuery(); String samplesCollection = inferSpecificSearchIndexSamplesCollection(query, options, getMetadataManager(), dbName); return samplesCollection != null && searchActiveAndAlive(samplesCollection); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java index 12c86cc4e2b..bd6b8e6437f 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java @@ -11,10 +11,7 @@ import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.exceptions.VariantSearchException; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; -import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; -import org.opencb.opencga.storage.core.variant.adaptors.VariantField; -import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; -import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; +import org.opencb.opencga.storage.core.variant.adaptors.*; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; @@ -76,7 +73,8 @@ public SearchIndexVariantQueryExecutor setIntersectParamsThreshold(int intersect } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + VariantQuery query = variantQuery.getQuery(); return doQuerySearchManager(query, options) || doIntersectWithSearch(query, options); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchUtils.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchUtils.java index e46c6379dc6..5365643b1fa 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchUtils.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchUtils.java @@ -280,7 +280,9 @@ public static String inferSpecificSearchIndexSamplesCollection( Set samples = new HashSet<>(); if (isValidParam(query, VariantQueryParam.SAMPLE)) { String value = query.getString(VariantQueryParam.SAMPLE.key()); - samples.addAll(splitValue(value).getValue()); + for (String sample : splitValue(value).getValue()) { + samples.add(sample.split(IS)[0]); + } } if (isValidParam(query, VariantQueryParam.GENOTYPE)) { HashMap> map = new HashMap<>(); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java index 778257d5999..3d0b2524681 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java @@ -12,6 +12,7 @@ import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; +import org.opencb.opencga.storage.core.metadata.models.Trio; import org.opencb.opencga.storage.core.variant.VariantStorageBaseTest; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass; @@ -72,6 +73,9 @@ public void setUp() throws Exception { fileIndexed = true; Integer indexedFileId = metadataManager.getIndexedFiles(studyMetadata.getId()).iterator().next(); + Trio trio = new Trio("NA19660", "NA19661", "NA19685"); + variantStorageEngine.familyIndex(studyMetadata.getName(), Collections.singletonList(trio), new ObjectMap()); + //Calculate stats QueryOptions options = new QueryOptions(VariantStorageOptions.STUDY.key(), STUDY_NAME) .append(VariantStorageOptions.LOAD_BATCH_SIZE.key(), 100) @@ -188,23 +192,53 @@ public void testXRefRs() throws StorageEngineException { } } + @Test + public void testCompHetQuery() throws StorageEngineException { +// Matcher matcher = allOf( +// anyOf( +// samePosition(new Variant("1:2441358:T:C")), +// samePosition(new Variant("1:2458010:G:C")), +// samePosition(new Variant("19:501725:G:A")), +// samePosition(new Variant("19:501900:C:A"))), +// withStudy(STUDY_NAME, withSampleGt("NA19685"))); + Matcher matcher = null; + testQuery(new VariantQuery().sample("NA19685:compoundheterozygous") + .study(STUDY_NAME) + .biotype("protein_coding"), + new QueryOptions(), + matcher, + false); + } + public VariantQueryResult testQuery(Query query, QueryOptions options, Matcher matcher) throws StorageEngineException { + return testQuery(query, options, matcher, true); + } + + public VariantQueryResult testQuery(Query query, QueryOptions options, Matcher matcher, boolean expectDBAdaptorExecutor) + throws StorageEngineException { logger.info(""); logger.info(""); logger.info("####################################################"); logger.info("########## TEST QUERY :" + query.toJson()); logger.info("####################################################"); logger.info("## Allowed VariantQueryExecutors:"); + ParsedVariantQuery variantQuery = variantStorageEngine.parseQuery(query, options); for (VariantQueryExecutor variantQueryExecutor : variantQueryExecutors) { - if (variantQueryExecutor.canUseThisExecutor(query, options)) { + if (variantQueryExecutor.canUseThisExecutor(variantQuery, options)) { logger.info("## - " + variantQueryExecutor.getClass().getSimpleName()); } } - logger.info("## Using DBAdaptorVariantQueryExecutor for expected results"); - Assert.assertTrue(dbQueryExecutor.canUseThisExecutor(query, options)); + VariantQueryResult expected; + if (expectDBAdaptorExecutor) { + logger.info("## Using DBAdaptorVariantQueryExecutor for expected results"); + Assert.assertTrue(dbQueryExecutor.canUseThisExecutor(variantQuery, options)); - ParsedVariantQuery variantQuery = variantStorageEngine.parseQuery(query, options); - VariantQueryResult expected = dbQueryExecutor.get(variantQuery); + expected = dbQueryExecutor.get(variantQuery); + } else { + logger.info("## DBAdaptorVariantQueryExecutor can not be used for expected results"); + Assert.assertFalse(dbQueryExecutor.canUseThisExecutor(variantQuery, options)); + expected = null; + } VariantQueryResult unfilteredResult = null; VariantQueryResult result = null; @@ -243,21 +277,24 @@ public VariantQueryResult testQuery(Query query, QueryOptions options, QueryOptions emptyOptions = new QueryOptions(); emptyOptions.putIfNotEmpty(QueryOptions.INCLUDE, options.getString(QueryOptions.INCLUDE)); emptyOptions.putIfNotEmpty(QueryOptions.EXCLUDE, options.getString(QueryOptions.EXCLUDE)); + logger.info("## unfiltered query " + VariantQueryUtils.printQuery(emptyQuery)); + logger.info("## unfiltered options " + emptyOptions.toJson()); unfilteredResult = dbQueryExecutor.get(variantStorageEngine.parseQuery(emptyQuery, emptyOptions)); } for (VariantQueryExecutor variantQueryExecutor : variantQueryExecutors) { - if (variantQueryExecutor.canUseThisExecutor(query, options)) { + if (variantQueryExecutor.canUseThisExecutor(variantQuery, options)) { logger.info(""); logger.info("###################"); logger.info("### Testing " + variantQueryExecutor.getClass().getSimpleName()); result = variantQueryExecutor.get(variantQuery); logger.info("### Num results : " + result.getNumResults()); logger.info("###################"); - expected.getResults().sort(Comparator.comparing(Variant::toString)); - result.getResults().sort(Comparator.comparing(Variant::toString)); - Assert.assertEquals(expected.getResults(), result.getResults()); - + if (expected != null) { + expected.getResults().sort(Comparator.comparing(Variant::toString)); + result.getResults().sort(Comparator.comparing(Variant::toString)); + Assert.assertEquals(expected.getResults(), result.getResults()); + } assertThat(result, numResults(gt(0))); if (matcher != null) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantQueryParser.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantQueryParser.java index 6aa472ee735..45682df31f4 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantQueryParser.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantQueryParser.java @@ -5,7 +5,6 @@ import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.utils.CellBaseUtils; import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; -import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjection; import java.util.List; @@ -18,8 +17,8 @@ public HadoopVariantQueryParser(CellBaseUtils cellBaseUtils, VariantStorageMetad } @Override - protected Query preProcessQuery(Query originalQuery, QueryOptions options, VariantQueryProjection projection) { - Query query = super.preProcessQuery(originalQuery, options, projection); + public Query preProcessQuery(Query originalQuery, QueryOptions options) { + Query query = super.preProcessQuery(originalQuery, options); List studyNames = metadataManager.getStudyNames(); if (isValidParam(query, STUDY) && studyNames.size() == 1) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java index c672e94fb58..ceb52e5eeaa 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java @@ -7,6 +7,7 @@ import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.query.ParsedQuery; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; @@ -43,8 +44,8 @@ public HBaseColumnIntersectVariantQueryExecutor(VariantDBAdaptor dbAdaptor, Stri } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) { - + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) { + VariantQuery query = variantQuery.getQuery(); if (!options.getBoolean(HBASE_COLUMN_INTERSECT, ACTIVE_BY_DEFAULT)) { // HBase column intersect not active return false; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java index 4dd50c9eacf..5063ca1fe4b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java @@ -9,7 +9,6 @@ import org.opencb.biodata.tools.pedigree.MendelianError; import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; import org.opencb.opencga.storage.core.metadata.models.Trio; @@ -33,10 +32,10 @@ public SampleIndexMendelianErrorQueryExecutor(VariantHadoopDBAdaptor dbAdaptor, } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) { - if (VariantQueryUtils.isValidParam(query, VariantQueryUtils.SAMPLE_MENDELIAN_ERROR) - || VariantQueryUtils.isValidParam(query, VariantQueryUtils.SAMPLE_DE_NOVO) - || VariantQueryUtils.isValidParam(query, VariantQueryUtils.SAMPLE_DE_NOVO_STRICT)) { + public boolean canUseThisExecutor(ParsedVariantQuery query, QueryOptions options) { + if (VariantQueryUtils.isValidParam(query.getQuery(), VariantQueryUtils.SAMPLE_MENDELIAN_ERROR) + || VariantQueryUtils.isValidParam(query.getQuery(), VariantQueryUtils.SAMPLE_DE_NOVO) + || VariantQueryUtils.isValidParam(query.getQuery(), VariantQueryUtils.SAMPLE_DE_NOVO_STRICT)) { return super.canUseThisExecutor(query, options); } else { return false; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java index 371d57ac4da..3e9e0b9b75f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java @@ -86,7 +86,8 @@ public SampleIndexOnlyVariantQueryExecutor(VariantHadoopDBAdaptor dbAdaptor, Sam } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) { + VariantQuery query = variantQuery.getQuery(); if (SampleIndexQueryParser.validSampleIndexQuery(query)) { if (isFullyCoveredQuery(query, options)) { @@ -179,10 +180,13 @@ protected boolean shouldGetCount(QueryOptions options, boolean iterator) { private boolean isFullyCoveredQuery(Query inputQuery, QueryOptions options) { Query query = new Query(inputQuery); + if (!isQueryCovered(query)) { + return false; + } // ParsedVariantQuery parsedVariantQuery = variantQueryProjectionParser.parseQuery(query, options, true); SampleIndexQuery sampleIndexQuery = sampleIndexDBAdaptor.parseSampleIndexQuery(query); - return isQueryCovered(query) && isIncludeCovered(sampleIndexQuery, inputQuery, options); + return isIncludeCovered(sampleIndexQuery, inputQuery, options); } private boolean isQueryCovered(Query query) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java index 6175128018d..03e8a5fe880 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java @@ -58,9 +58,9 @@ public SampleIndexVariantQueryExecutor(VariantHadoopDBAdaptor dbAdaptor, SampleI } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) { + public boolean canUseThisExecutor(ParsedVariantQuery query, QueryOptions options) { if (options.getBoolean(SAMPLE_INDEX_INTERSECT, true)) { - return SampleIndexQueryParser.validSampleIndexQuery(query); + return SampleIndexQueryParser.validSampleIndexQuery(query.getQuery()); } return false; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexDriver.java index af632d69436..ad09896e627 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexDriver.java @@ -112,10 +112,9 @@ protected void parseAndValidateParameters() throws IOException { } else { trioList.add(metadataManager.getSampleIdOrFail(getStudyId(), trio.getMother())); } - int childId = metadataManager.getSampleIdOrFail(getStudyId(), trio.getChild()); - trioList.add(childId); - SampleMetadata sampleMetadata = metadataManager.getSampleMetadata(getStudyId(), childId); + SampleMetadata sampleMetadata = metadataManager.getSampleMetadata(getStudyId(), trio.getChild()); + trioList.add(sampleMetadata.getId()); if (!overwrite && sampleMetadata.getFamilyIndexStatus(sampleIndexVersion) == TaskMetadata.Status.READY) { LOGGER.info("Skip sample " + sampleMetadata.getName() + ". Already precomputed!"); } else { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchemaFactory.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchemaFactory.java index ef2aecb803b..a087420f425 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchemaFactory.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchemaFactory.java @@ -78,8 +78,7 @@ public Collection getSampleIndexConfigurationVersions(int studyId, Coll private Collection getSampleIndexConfigurationVersions(int studyId, Object sample, boolean withAnnotation, boolean withFamilyIndex) { - int sampleId = metadataManager.getSampleIdOrFail(studyId, sample); - SampleMetadata sampleMetadata = metadataManager.getSampleMetadata(studyId, sampleId); + SampleMetadata sampleMetadata = metadataManager.getSampleMetadata(studyId, sample); Collection versions = sampleMetadata.getSampleIndexVersions(); if (withAnnotation) { versions = CollectionUtils.intersection( diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java index 9ee362f8724..144083e5bd0 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java @@ -1068,7 +1068,11 @@ public void testFamilyIndexQueryCount() { .append(QueryOptions.LIMIT, 10) .append(QueryOptions.COUNT, true)); - System.out.println(result.getResults().stream().map(Variant::getAnnotation).flatMap(v -> v.getConsequenceTypes().stream()).map(ConsequenceType::getGeneName).collect(Collectors.toSet())); + System.out.println(result.getResults().stream() + .map(Variant::getAnnotation) + .flatMap(v -> v.getConsequenceTypes().stream()) + .map(ConsequenceType::getGeneName) + .collect(Collectors.toSet())); result = variantStorageEngine.get( new Query() @@ -1082,7 +1086,11 @@ public void testFamilyIndexQueryCount() { .append(QueryOptions.LIMIT, 10) .append(QueryOptions.COUNT, true)); - System.out.println(result.getResults().stream().map(Variant::getAnnotation).flatMap(v -> v.getConsequenceTypes().stream()).map(ConsequenceType::getGeneName).collect(Collectors.toSet())); + System.out.println(result.getResults().stream() + .map(Variant::getAnnotation) + .flatMap(v -> v.getConsequenceTypes().stream()) + .map(ConsequenceType::getGeneName) + .collect(Collectors.toSet())); } @Test From f3ac930aced6ff2e4de8992a1c0b3a3693bca3cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 28 Jun 2024 14:53:30 +0100 Subject: [PATCH 09/21] storage: Fix numTotalSamples variant result value. #TASK-6436 --- .../VariantQueryProjectionParser.java | 9 ++++++++- .../VariantDBAdaptorMultiFileTest.java | 18 +++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java index bdcd501ad88..95add22ec8e 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java @@ -71,7 +71,14 @@ public VariantQueryProjection parseVariantQueryProjection(Query query, QueryOpti } Map> sampleIdsMap = getIncludeSampleIds(query, options, includeStudies, metadataManager); - int numTotalSamples = sampleIdsMap.values().stream().mapToInt(List::size).sum(); + int numTotalSamples; + if (isValidParam(query, NUM_TOTAL_SAMPLES)) { + // NUM_TOTAL_SAMPLES might have been defined in the PreProcess step. + // This implies that the current query has the samples already paginated. + numTotalSamples = query.getInt(NUM_TOTAL_SAMPLES.key()); + } else { + numTotalSamples = sampleIdsMap.values().stream().mapToInt(List::size).sum(); + } skipAndLimitSamples(query, sampleIdsMap); int numSamples = sampleIdsMap.values().stream().mapToInt(List::size).sum(); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileTest.java index 9548338e3d8..903ba51f3a2 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileTest.java @@ -381,18 +381,22 @@ public void testSampleLimitSkip() throws Exception { VariantQueryResult result = query(new Query(SAMPLE_METADATA.key(), true).append(VariantQueryParam.INCLUDE_SAMPLE.key(), ALL), options); System.out.println("samples(ALL) = " + result.getSamples()); - for (int i : new int[]{1, 3, 6, 8, 10}) { + int numSamples = metadataManager.getStudyIds().stream().mapToInt(id -> metadataManager.getIndexedSamples(id).size()).sum(); + assertEquals(8, numSamples); + for (int i : new int[]{1, 3, 6, numSamples, 10}) { result = query(new VariantQuery().sampleSkip(i).includeSampleAll().sampleMetadata(true), options); // System.out.println("samples(SKIP=" + i + ") = " + result.getSamples()); - assertEquals(Math.max(0, 8 - i), result.getSamples().values().stream().mapToInt(List::size).sum()); - assertEquals(Math.max(0, 8 - i), result.getNumSamples().intValue()); - assertEquals(8, result.getNumTotalSamples().intValue()); + int expected = Math.max(0, numSamples - i); + assertEquals("Skip = " + i + " , expected " + expected + " out of 8 samples", expected, result.getSamples().values().stream().mapToInt(List::size).sum()); + assertEquals("Skip = " + i + " , expected " + expected + " out of 8 samples", expected, result.getNumSamples().intValue()); + assertEquals(numSamples, result.getNumTotalSamples().intValue()); result = query(new VariantQuery().sampleLimit(i).includeSampleAll().sampleMetadata(true), options); // System.out.println("samples(LIMIT=" + i + ") = " + result.getSamples()); - assertEquals(Math.min(8, i), result.getSamples().values().stream().mapToInt(List::size).sum()); - assertEquals(Math.min(8, i), result.getNumSamples().intValue()); - assertEquals(8, result.getNumTotalSamples().intValue()); + expected = Math.min(numSamples, i); + assertEquals("Limit = " + i + " , expected " + expected + " out of 8 samples", expected, result.getSamples().values().stream().mapToInt(List::size).sum()); + assertEquals("Limit = " + i + " , expected " + expected + " out of 8 samples", expected, result.getNumSamples().intValue()); + assertEquals(numSamples, result.getNumTotalSamples().intValue()); } } From e95bae415936c8718ac5f0396dc342aadf477585 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 28 Jun 2024 17:08:50 +0100 Subject: [PATCH 10/21] storage: Fix canUseThisExecutor on SampleIndexOnlyVariantQueryExecutor #TASK-6436 --- .../variant/index/SampleIndexOnlyVariantQueryExecutor.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java index 3e9e0b9b75f..7b1cdc5dceb 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java @@ -180,13 +180,11 @@ protected boolean shouldGetCount(QueryOptions options, boolean iterator) { private boolean isFullyCoveredQuery(Query inputQuery, QueryOptions options) { Query query = new Query(inputQuery); - if (!isQueryCovered(query)) { - return false; - } // ParsedVariantQuery parsedVariantQuery = variantQueryProjectionParser.parseQuery(query, options, true); SampleIndexQuery sampleIndexQuery = sampleIndexDBAdaptor.parseSampleIndexQuery(query); - return isIncludeCovered(sampleIndexQuery, inputQuery, options); + return isQueryCovered(sampleIndexQuery.getUncoveredQuery()) + && isIncludeCovered(sampleIndexQuery, inputQuery, options); } private boolean isQueryCovered(Query query) { From 54b667661428dd9ac4c9179f4c12e9f4e739dd05 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Fri, 5 Jul 2024 09:47:56 +0200 Subject: [PATCH 11/21] Prepare release 2.12.6 --- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- opencga-storage/opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-deps-emr6.1/pom.xml | 2 +- .../opencga-storage-hadoop-deps-hdp2.6/pom.xml | 2 +- .../opencga-storage-hadoop-deps-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop-deps/pom.xml | 2 +- opencga-storage/opencga-storage-hadoop/pom.xml | 2 +- opencga-storage/opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 14 +++++++------- 21 files changed, 27 insertions(+), 27 deletions(-) diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index 69a9b33089c..e6cd5eabffb 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index 18071afb30b..c17445c0570 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index 46964bfae1f..986afb42ef4 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index cfdf162a973..4ba23d9eb1f 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index ad02409329b..917c115a102 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index 00abcbf61d7..d1a843a369a 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index 4e304b5e34c..a0b09511331 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index 798bdd26d41..85ac42f2654 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index b5d0d0fad60..726d8456aa3 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index 50c8105886a..ac49b0167ac 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index 130716cf970..290972cc572 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index 29e4ac40576..ddcd73ee837 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml index e4d5e3c8439..64269c04696 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml index 7831c67ec49..575b5ef7b73 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml index f3792747d50..7583d28aa4f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml index 8b769184fa3..1198b5718ab 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml @@ -50,7 +50,7 @@ org.opencb.opencga opencga-storage-hadoop - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index 1d3b4e5f795..f6cba2e9980 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -28,7 +28,7 @@ org.opencb.opencga opencga-storage - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index 6a725c2746e..31cf6d9531f 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index 1f00aeda658..669b237ff25 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index 953a68ee373..7e4d5f81c7f 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 2.12.6-SNAPSHOT + 2.12.6 ../pom.xml diff --git a/pom.xml b/pom.xml index aef3f1a0bc0..3dfc10baa5a 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6-SNAPSHOT + 2.12.6 pom OpenCGA @@ -43,12 +43,12 @@ - 2.12.6_dev - 2.12.6_dev - 5.8.4-SNAPSHOT - 2.12.3-SNAPSHOT - 4.12.1-SNAPSHOT - 2.12.6-SNAPSHOT + 2.12.6 + 2.12.6 + 5.8.4 + 2.12.2 + 4.12.0 + 2.12.6 0.2.0 2.11.4 From f35ed460069549a6917b7ebbdee5ea3bed5ba731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 24 Jul 2024 12:54:55 +0100 Subject: [PATCH 12/21] Prepare next release 2.12.6.1-SNAPSHOT --- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- opencga-storage/opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-deps-emr6.1/pom.xml | 2 +- .../opencga-storage-hadoop-deps-hdp2.6/pom.xml | 2 +- .../opencga-storage-hadoop-deps-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml | 2 +- opencga-storage/opencga-storage-hadoop/pom.xml | 2 +- opencga-storage/opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 2 +- 21 files changed, 21 insertions(+), 21 deletions(-) diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index e6cd5eabffb..d3036d76d4f 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index c17445c0570..d1973d3109e 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index 986afb42ef4..3e1b2d619b0 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index 4ba23d9eb1f..46b8cfd6f76 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index 917c115a102..856af7335e3 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index d1a843a369a..fb62d45951d 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index a0b09511331..ae48d80da2e 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index 85ac42f2654..f02d2f42c4b 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index 726d8456aa3..3ccfe32b3ba 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index ac49b0167ac..f4b18899665 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index 290972cc572..f5b784112fa 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index ddcd73ee837..c6f99ad3ea4 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml index 64269c04696..b0143b64ede 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml index 575b5ef7b73..af8d695e22e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml index 7583d28aa4f..40481f27836 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml index 1198b5718ab..7d2c4b3feb5 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml @@ -50,7 +50,7 @@ org.opencb.opencga opencga-storage-hadoop - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index f6cba2e9980..dc0ed33fa02 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -28,7 +28,7 @@ org.opencb.opencga opencga-storage - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index 31cf6d9531f..a3708473206 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index 669b237ff25..5fcf5ffccf5 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index 7e4d5f81c7f..9550459bafe 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 2.12.6 + 2.12.6.1-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 3dfc10baa5a..3e367630b6a 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6 + 2.12.6.1-SNAPSHOT pom OpenCGA From 9a7dd156807cc9d16dea68b396d76156b54d9de0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 24 Jul 2024 12:59:25 +0100 Subject: [PATCH 13/21] storage: Hash large vairants into solr collections. Add unhashed attr_id #TASK-6596 --- .../VariantStorageMetadataManager.java | 23 ++++++++++++ .../core/variant/VariantStorageEngine.java | 13 +++++++ .../core/variant/VariantStoragePipeline.java | 5 +-- .../SearchIndexVariantQueryExecutor.java | 14 ++++---- .../variant/search/VariantSearchModel.java | 28 +++++++++++++++ .../VariantSearchToVariantConverter.java | 29 +++++++++++---- .../variant/search/solr/SolrQueryParser.java | 27 +++++++------- .../variant/VariantStorageEngineSVTest.java | 36 +++++++++++++++---- .../VariantSearchToVariantConverterTest.java | 2 +- .../search/solr/SolrQueryParserTest.java | 4 +-- .../search/HadoopVariantSearchDataWriter.java | 2 +- 11 files changed, 143 insertions(+), 40 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java index 6c539e4b077..a65ea678479 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java @@ -573,11 +573,15 @@ public ProjectMetadata getProjectMetadata() { public ProjectMetadata getAndUpdateProjectMetadata(ObjectMap options) throws StorageEngineException { ProjectMetadata projectMetadata = getProjectMetadata(); + + checkSameSpeciesAndAssembly(options, projectMetadata); if (options != null && (projectMetadata == null || StringUtils.isEmpty(projectMetadata.getSpecies()) && options.containsKey(SPECIES.key()) || StringUtils.isEmpty(projectMetadata.getAssembly()) && options.containsKey(ASSEMBLY.key()))) { projectMetadata = updateProjectMetadata(pm -> { + // Check again, in case it was updated by another thread + checkSameSpeciesAndAssembly(options, pm); if (pm == null) { pm = new ProjectMetadata(); } @@ -598,6 +602,25 @@ public ProjectMetadata getAndUpdateProjectMetadata(ObjectMap options) throws Sto return projectMetadata; } + private static void checkSameSpeciesAndAssembly(ObjectMap options, ProjectMetadata projectMetadata) throws StorageEngineException { + if (options != null && projectMetadata != null) { + if (options.containsKey(ASSEMBLY.key())) { + if (StringUtils.isNotEmpty(projectMetadata.getAssembly()) && !projectMetadata.getAssembly() + .equalsIgnoreCase(options.getString(ASSEMBLY.key()))) { + throw new StorageEngineException("Incompatible assembly change from '" + projectMetadata.getAssembly() + "' to '" + + options.getString(ASSEMBLY.key()) + "'"); + } + } + if (options.containsKey(SPECIES.key())) { + if (StringUtils.isNotEmpty(projectMetadata.getSpecies()) && !projectMetadata.getSpecies() + .equalsIgnoreCase(toCellBaseSpeciesName(options.getString(SPECIES.key())))) { + throw new StorageEngineException("Incompatible species change from '" + projectMetadata.getSpecies() + "' to '" + + options.getString(SPECIES.key()) + "'"); + } + } + } + } + public DataResult getVariantFileMetadata(int studyId, int fileId, QueryOptions options) throws StorageEngineException { return fileDBAdaptor.getVariantFileMetadata(studyId, fileId, options); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java index 0f51fbe39eb..b541b2f4ae4 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java @@ -1362,6 +1362,19 @@ public VariantQueryExecutor getVariantQueryExecutor(ParsedVariantQuery variantQu throw new VariantQueryException("No VariantQueryExecutor found to run the query!"); } + public final VariantQueryExecutor getVariantQueryExecutor(Class clazz) + throws StorageEngineException { + Optional first = getVariantQueryExecutors() + .stream() + .filter(e -> e instanceof SearchIndexVariantQueryExecutor) + .findFirst(); + if (first.isPresent()) { + return first.get(); + } else { + throw new StorageEngineException("VariantQueryExecutor " + clazz + " not found"); + } + } + public Query preProcessQuery(Query originalQuery, QueryOptions options) { try { return getVariantQueryParser().preProcessQuery(originalQuery, options); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStoragePipeline.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStoragePipeline.java index 5b37d2512b1..722d79e59fd 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStoragePipeline.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStoragePipeline.java @@ -60,10 +60,7 @@ import org.opencb.opencga.storage.core.io.plain.StringDataReader; import org.opencb.opencga.storage.core.io.plain.StringDataWriter; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; -import org.opencb.opencga.storage.core.metadata.models.CohortMetadata; -import org.opencb.opencga.storage.core.metadata.models.FileMetadata; -import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; -import org.opencb.opencga.storage.core.metadata.models.TaskMetadata; +import org.opencb.opencga.storage.core.metadata.models.*; import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java index bd6b8e6437f..899f71a9b72 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java @@ -183,7 +183,9 @@ public VariantQueryResult approximateCount(ParsedVariantQuery variantQuery DataResult nativeResult = searchManager .nativeQuery(dbName, searchEngineQuery, queryOptions); - List variantIds = nativeResult.getResults().stream().map(VariantSearchModel::getId).collect(Collectors.toList()); + List variantIds = nativeResult.getResults().stream() + .map(VariantSearchModel::toVariantSimple) + .collect(Collectors.toList()); // Adjust numSamples if the results from SearchManager is smaller than numSamples // If this happens, the count is not approximated if (variantIds.size() < sampling) { @@ -283,12 +285,12 @@ public boolean doIntersectWithSearch(Query query, QueryOptions options) { return intersect; } - protected Iterator variantIdIteratorFromSearch(Query query) { + protected Iterator variantIdIteratorFromSearch(Query query) { return variantIdIteratorFromSearch(query, Integer.MAX_VALUE, 0, null); } - protected Iterator variantIdIteratorFromSearch(Query query, int limit, int skip, AtomicLong numTotalResults) { - Iterator variantsIterator; + protected Iterator variantIdIteratorFromSearch(Query query, int limit, int skip, AtomicLong numTotalResults) { + Iterator variantsIterator; QueryOptions queryOptions = new QueryOptions() .append(QueryOptions.LIMIT, limit) .append(QueryOptions.SKIP, skip) @@ -302,14 +304,14 @@ protected Iterator variantIdIteratorFromSearch(Query query, int limit, i } variantsIterator = nativeResult.getResults() .stream() - .map(VariantSearchModel::getId) + .map(VariantSearchModel::toVariantSimple) .iterator(); } else { SolrNativeIterator nativeIterator = searchManager.nativeIterator(dbName, query, queryOptions); if (numTotalResults != null) { numTotalResults.set(nativeIterator.getNumFound()); } - variantsIterator = Iterators.transform(nativeIterator, VariantSearchModel::getId); + variantsIterator = Iterators.transform(nativeIterator, VariantSearchModel::toVariantSimple); } } catch (VariantSearchException | IOException e) { throw new VariantQueryException("Error querying " + VariantSearchManager.SEARCH_ENGINE_ID, e); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchModel.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchModel.java index 9b0bb69792c..835af18a0a0 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchModel.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchModel.java @@ -17,12 +17,15 @@ package org.opencb.opencga.storage.core.variant.search; import org.apache.solr.client.solrj.beans.Field; +import org.opencb.biodata.models.variant.Variant; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import static org.opencb.opencga.storage.core.variant.search.VariantSearchToVariantConverter.HASH_PREFIX; + /** * Created by wasim on 09/11/16. */ @@ -140,6 +143,9 @@ public class VariantSearchModel { @Field("fileInfo_*") private Map fileInfo; + @Field("attr_*") + private Map attr; + public static final double MISSING_VALUE = -100.0; @@ -171,6 +177,7 @@ public VariantSearchModel() { this.qual = new HashMap<>(); this.filter = new HashMap<>(); this.fileInfo = new HashMap<>(); + this.attr = new HashMap<>(); } public VariantSearchModel(VariantSearchModel init) { @@ -210,6 +217,7 @@ public VariantSearchModel(VariantSearchModel init) { this.qual = init.getQual(); this.filter = init.getFilter(); this.fileInfo = init.getFileInfo(); + this.attr = init.getAttr(); } @Override @@ -251,6 +259,7 @@ public String toString() { sb.append(", qual=").append(qual); sb.append(", filter=").append(filter); sb.append(", fileInfo=").append(fileInfo); + sb.append(", attr=").append(attr); sb.append('}'); return sb.toString(); } @@ -259,6 +268,17 @@ public String getId() { return id; } + public Variant toVariantSimple() { + String variantId = getId(); + if (variantId.startsWith(HASH_PREFIX)) { + Object o = getAttr().get("attr_id"); + variantId = o instanceof String ? (String) o : ((List) o).get(0); + } + Variant variant = new Variant(variantId); + variant.setId(variantId); + return variant; + } + public VariantSearchModel setId(String id) { this.id = id; return this; @@ -579,4 +599,12 @@ public VariantSearchModel setFileInfo(Map fileInfo) { return this; } + public Map getAttr() { + return attr; + } + + public VariantSearchModel setAttr(Map attr) { + this.attr = attr; + return this; + } } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverter.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverter.java index 62841c0a3b9..10f3b58ff3f 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverter.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverter.java @@ -56,6 +56,7 @@ public class VariantSearchToVariantConverter implements ComplexTypeConverter includeFields; @@ -79,10 +80,9 @@ public VariantSearchToVariantConverter(Set includeFields) { @Override public Variant convertToDataModelType(VariantSearchModel variantSearchModel) { // set chromosome, start, end, ref, alt from ID - Variant variant = new Variant(variantSearchModel.getId()); + Variant variant = variantSearchModel.toVariantSimple(); - // set ID, chromosome, start, end, ref, alt, type - variant.setId(variantSearchModel.getVariantId()); + // set chromosome, start, end, ref, alt, type // set variant type if (StringUtils.isNotEmpty(variantSearchModel.getType())) { @@ -662,8 +662,10 @@ public VariantSearchModel convertToStorageType(Variant variant) { List other = new ArrayList<>(); // Set general Variant attributes: id, dbSNP, chromosome, start, end, type - variantSearchModel.setId(variant.toString()); // Internal unique ID e.g. 3:1000:AT:- - variantSearchModel.setVariantId(variant.getId()); + String variantId = getVariantId(variant); + variantSearchModel.setId(variantId); // Internal unique ID e.g. 3:1000:AT:- + variantSearchModel.setVariantId(variantId); + variantSearchModel.getAttr().put("attr_id", variant.toString()); variantSearchModel.setChromosome(variant.getChromosome()); variantSearchModel.setStart(variant.getStart()); variantSearchModel.setEnd(variant.getEnd()); @@ -1019,8 +1021,7 @@ public VariantSearchModel convertToStorageType(Variant variant) { // This field contains all possible IDs: id, dbSNP, names, genes, transcripts, protein, clinvar, hpo, ... // This will help when searching by variant id. This is added at the end of the method after collecting all IDs Set xrefs = variantAnnotationModelUtils.extractXRefs(variant.getAnnotation()); - xrefs.add(variantSearchModel.getId()); - xrefs.add(variantSearchModel.getVariantId()); + xrefs.add(variantId); if (variant.getNames() != null && !variant.getNames().isEmpty()) { variant.getNames().forEach(name -> { if (name != null) { @@ -1032,6 +1033,20 @@ public VariantSearchModel convertToStorageType(Variant variant) { return variantSearchModel; } + public static String getVariantId(Variant variant) { + String variantString = variant.toString(); + if (variantString.length() > 32766) { + // variantString.length() >= Short.MAX_VALUE + return hashVariantId(variant, variantString); + } else { + return variantString; + } + } + + public static String hashVariantId(Variant variant, String variantString) { + return HASH_PREFIX + variant.getChromosome() + ":" + variant.getStart() + ":" + Integer.toString(variantString.hashCode()); + } + private void convertStudies(Variant variant, VariantSearchModel variantSearchModel, List other) { // Sanity check if (CollectionUtils.isEmpty(variant.getStudies())) { diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParser.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParser.java index 0cf045ada4c..a618fa22af4 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParser.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParser.java @@ -25,7 +25,6 @@ import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.common.SolrException; import org.opencb.biodata.models.core.Region; -import org.opencb.biodata.models.variant.Variant; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.solr.FacetQueryParser; @@ -35,7 +34,10 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; -import org.opencb.opencga.storage.core.variant.query.*; +import org.opencb.opencga.storage.core.variant.query.KeyOpValue; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.Values; +import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjectionParser; import org.opencb.opencga.storage.core.variant.search.VariantSearchToVariantConverter; import org.slf4j.Logger; @@ -79,7 +81,7 @@ public class SolrQueryParser { static { includeMap = new HashMap<>(); - includeMap.put("id", "id,variantId"); + includeMap.put("id", "id,variantId,attr_id"); includeMap.put("chromosome", "chromosome"); includeMap.put("start", "start"); includeMap.put("end", "end"); @@ -477,7 +479,9 @@ private String parseGenomicFilter(Query query) { genes.addAll(variantQueryXref.getGenes()); xrefs.addAll(variantQueryXref.getIds()); xrefs.addAll(variantQueryXref.getOtherXrefs()); - xrefs.addAll(variantQueryXref.getVariants().stream().map(Variant::toString).collect(Collectors.toList())); + xrefs.addAll(variantQueryXref.getVariants().stream() + .map(VariantSearchToVariantConverter::getVariantId) + .collect(Collectors.toList())); // Regions if (StringUtils.isNotEmpty(query.getString(REGION.key()))) { @@ -1616,15 +1620,12 @@ private String[] includeFieldsWithMandatory(String[] includes) { return new String[0]; } - String[] mandatoryIncludeFields = new String[]{"id", "chromosome", "start", "end", "type"}; - String[] includeWithMandatory = new String[includes.length + mandatoryIncludeFields.length]; - for (int i = 0; i < includes.length; i++) { - includeWithMandatory[i] = includes[i]; - } - for (int i = 0; i < mandatoryIncludeFields.length; i++) { - includeWithMandatory[includes.length + i] = mandatoryIncludeFields[i]; - } - return includeWithMandatory; + Set mandatoryIncludeFields = new HashSet<>(Arrays.asList("id", "attr_id", "chromosome", "start", "end", "type")); + Set includeWithMandatory = new LinkedHashSet<>(includes.length + mandatoryIncludeFields.size()); + + includeWithMandatory.addAll(Arrays.asList(includes)); + includeWithMandatory.addAll(mandatoryIncludeFields); + return includeWithMandatory.toArray(new String[0]); } /** diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java index 64fc14c4c6a..05f090a162f 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java @@ -1,6 +1,7 @@ package org.opencb.opencga.storage.core.variant; import org.junit.Before; +import org.junit.ClassRule; import org.junit.Ignore; import org.junit.Test; import org.opencb.biodata.formats.variant.io.VariantReader; @@ -20,6 +21,11 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.executors.VariantQueryExecutor; +import org.opencb.opencga.storage.core.variant.search.SearchIndexVariantQueryExecutor; +import org.opencb.opencga.storage.core.variant.solr.VariantSolrExternalResource; import java.net.URI; import java.nio.file.Paths; @@ -49,22 +55,27 @@ public abstract class VariantStorageEngineSVTest extends VariantStorageBaseTest protected static URI input2; protected static URI input3; + @ClassRule + public static VariantSolrExternalResource solr = new VariantSolrExternalResource(); + @Before public void before() throws Exception { if (!loaded) { clearDB(DB_NAME); + } + variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); + variantStorageEngine.getConfiguration().getCellbase().setVersion(ParamConstants.CELLBASE_VERSION); + variantStorageEngine.getConfiguration().getCellbase().setDataRelease(ParamConstants.CELLBASE_DATA_RELEASE); + variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); + variantStorageEngine.reloadCellbaseConfiguration(); + solr.configure(variantStorageEngine); + if (!loaded) { loadFiles(); loaded = true; } } protected void loadFiles() throws Exception { - variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); - variantStorageEngine.getConfiguration().getCellbase().setVersion("v5.2"); - variantStorageEngine.getConfiguration().getCellbase().setDataRelease("3"); - variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); - variantStorageEngine.reloadCellbaseConfiguration(); - input1 = getResourceUri("variant-test-sv.vcf"); studyMetadata = new StudyMetadata(1, "s1"); variantStorageEngine.getOptions().append(VariantStorageOptions.ANNOTATOR_CELLBASE_EXCLUDE.key(), "expression,clinical"); @@ -86,6 +97,7 @@ protected void loadFiles() throws Exception { .append(VariantStorageOptions.STATS_CALCULATE.key(), true) .append(VariantStorageOptions.ASSEMBLY.key(), "grch38")); + variantStorageEngine.secondaryIndex(); } @Test @@ -106,6 +118,18 @@ public void checkCount() throws Exception { assertEquals(expected, count); } + @Test + public void checkSecondaryAnnotationIndex() throws Exception { + VariantQueryExecutor variantQueryExecutor = variantStorageEngine.getVariantQueryExecutor(SearchIndexVariantQueryExecutor.class); + for (Variant variant : variantStorageEngine) { + ParsedVariantQuery query = variantStorageEngine + .parseQuery(new Query(VariantQueryParam.ID.key(), variant.toString()), new QueryOptions()); + VariantQueryResult result = variantQueryExecutor.get(query); + assertEquals(1, result.getNumResults()); + assertEquals(variant.toString(), result.first().toString()); + } + } + @Test public void checkCorrectnessFile1() throws Exception { checkCorrectness(VariantStorageEngineSVTest.input1); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverterTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverterTest.java index 5839db1745b..539ea8c22d6 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverterTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchToVariantConverterTest.java @@ -49,7 +49,7 @@ public void test() throws Exception { expectedVariant.addStudyEntry(aux.getStudy("2")); VariantSearchModel variantSearchModel = converter.convertToStorageType(expectedVariant); - assertNull(variantSearchModel.getVariantId()); + assertNotNull(variantSearchModel.getVariantId()); assertEquals(variantId, variantSearchModel.getId()); Variant actualVariant = converter.convertToDataModelType(variantSearchModel); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParserTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParserTest.java index a74bcd8f8ed..6890ad7534d 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParserTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/solr/SolrQueryParserTest.java @@ -43,8 +43,8 @@ public class SolrQueryParserTest { private String studyName = "platinum"; private String flBase = "fl=other,geneToSoAcc,traits,type,soAcc,score_*,sift,passStats_*,caddRaw,biotypes,polyphenDesc,studies,end,id,variantId," - + "popFreq_*,caddScaled,genes,chromosome,xrefs,start,gerp,polyphen,siftDesc," - + "phastCons,phylop,altStats_*,id,chromosome,start,end,type"; + + "popFreq_*,caddScaled,genes,chromosome,xrefs,start,gerp,polyphen,attr_id,siftDesc," + + "phastCons,phylop,altStats_*"; private String flDefault1 = flBase + ",fileInfo__*,qual__*,filter__*,sampleFormat__*"; private String flDefaultStudy = flBase + ",fileInfo__" + studyName + "__*,qual__" + studyName + "__*," + "filter__" + studyName + "__*,sampleFormat__" + studyName + "__*"; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/HadoopVariantSearchDataWriter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/HadoopVariantSearchDataWriter.java index 17c61739496..39c63923c02 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/HadoopVariantSearchDataWriter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/HadoopVariantSearchDataWriter.java @@ -68,7 +68,7 @@ protected void add(List batch) throws Exception { return PhoenixHelper.toBytes(studyIds, PIntegerArray.INSTANCE); }); - byte[] row = VariantPhoenixKeyFactory.generateVariantRowKey(new Variant(document.getFieldValue("id").toString())); + byte[] row = VariantPhoenixKeyFactory.generateVariantRowKey(new Variant(document.getFieldValue("attr_id").toString())); variantRows.add(row); mutations.add(new Put(row) .addColumn(family, VariantPhoenixSchema.VariantColumn.INDEX_STUDIES.bytes(), bytes)); From 5d3c7f461aa20e898bbd8e56b28e61bdc3cb65fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 24 Jul 2024 17:01:14 +0100 Subject: [PATCH 14/21] storage: Fix some tests. #TASK-6596 --- .../core/variant/VariantStorageEngineBNDTest.java | 2 ++ .../variant/dummy/DummyProjectMetadataAdaptor.java | 5 ++--- .../variant/HadoopVariantStorageEngineBNDTest.java | 2 +- .../variant/index/family/FamilyIndexTest.java | 14 +++++++------- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java index 08dabda7562..0e90e5a174a 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java @@ -48,6 +48,7 @@ public void before() throws Exception { variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); variantStorageEngine.getConfiguration().getCellbase().setVersion(ParamConstants.CELLBASE_VERSION); variantStorageEngine.getConfiguration().getCellbase().setDataRelease(ParamConstants.CELLBASE_DATA_RELEASE); + variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); if (!loaded) { clearDB(DB_NAME); loadFiles(); @@ -59,6 +60,7 @@ protected void loadFiles() throws Exception { variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); variantStorageEngine.getConfiguration().getCellbase().setVersion(ParamConstants.CELLBASE_VERSION); variantStorageEngine.getConfiguration().getCellbase().setDataRelease(ParamConstants.CELLBASE_DATA_RELEASE); + variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); studyMetadata = new StudyMetadata(1, "s1"); // variantStorageEngine.getOptions().append(VariantStorageOptions.ANNOTATOR_CELLBASE_EXCLUDE.key(), "expression,clinical"); input1 = getResourceUri("variant-test-bnd.vcf"); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyProjectMetadataAdaptor.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyProjectMetadataAdaptor.java index d223180d9d1..3ba92ed7f1c 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyProjectMetadataAdaptor.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/dummy/DummyProjectMetadataAdaptor.java @@ -45,10 +45,9 @@ public void refresh() { @Override public synchronized DataResult getProjectMetadata() { final DataResult result = new DataResult<>(); - if (projectMetadata == null) { - projectMetadata = new ProjectMetadata("hsapiens", "grch37", 1); + if (projectMetadata != null) { + result.setResults(Collections.singletonList(projectMetadata.copy())); } - result.setResults(Collections.singletonList(projectMetadata.copy())); return result; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineBNDTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineBNDTest.java index aea720d356a..b613df935ba 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineBNDTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineBNDTest.java @@ -19,7 +19,7 @@ public class HadoopVariantStorageEngineBNDTest extends VariantStorageEngineBNDTe @Override protected void loadFiles() throws Exception { super.loadFiles(); - VariantHbaseTestUtils.printVariants(getVariantStorageEngine().getDBAdaptor(), newOutputUri()); + VariantHbaseTestUtils.printVariants(((HadoopVariantStorageEngine) variantStorageEngine).getDBAdaptor(), newOutputUri()); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexTest.java index f67d5734d7c..a6aeba91145 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexTest.java @@ -63,13 +63,13 @@ public class FamilyIndexTest extends VariantStorageBaseTest implements HadoopVar @Before public void before() throws Exception { + HadoopVariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); + variantStorageEngine.getConfiguration().getCellbase().setVersion("v5.2"); + variantStorageEngine.getConfiguration().getCellbase().setDataRelease("3"); + variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); + variantStorageEngine.reloadCellbaseConfiguration(); if (!loaded) { - HadoopVariantStorageEngine variantStorageEngine = getVariantStorageEngine(); - variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); - variantStorageEngine.getConfiguration().getCellbase().setVersion("v5.2"); - variantStorageEngine.getConfiguration().getCellbase().setDataRelease("3"); - variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); - variantStorageEngine.reloadCellbaseConfiguration(); URI outputUri = newOutputUri(); ObjectMap params = new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false) @@ -91,7 +91,7 @@ public void before() throws Exception { variantStorageEngine.annotate(outputUri, new ObjectMap()); - VariantHbaseTestUtils.printVariants(getVariantStorageEngine().getDBAdaptor(), newOutputUri(getTestName().getMethodName())); + VariantHbaseTestUtils.printVariants(variantStorageEngine.getDBAdaptor(), newOutputUri(getTestName().getMethodName())); mendelianErrorVariants = new HashSet<>(); deNovoVariants = new HashSet<>(); From 84af7c166e5270391394cd995108b08c6c774ace Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 24 Jul 2024 18:51:37 +0100 Subject: [PATCH 15/21] storage: Fix NPE at some tests. #TASK-6596 --- .../core/metadata/VariantMetadataConverterTest.java | 2 ++ .../storage/core/variant/io/VariantWriterFactoryTest.java | 8 ++++++-- .../storage/hadoop/variant/gaps/FillGapsTaskTest.java | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/metadata/VariantMetadataConverterTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/metadata/VariantMetadataConverterTest.java index 2aacde2e3ce..30e4bba2e5f 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/metadata/VariantMetadataConverterTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/metadata/VariantMetadataConverterTest.java @@ -9,6 +9,7 @@ import org.junit.experimental.categories.Category; import org.opencb.biodata.models.variant.VariantFileMetadata; import org.opencb.biodata.models.variant.metadata.VariantMetadata; +import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.core.testclassification.duration.ShortTests; import org.opencb.opencga.storage.core.io.managers.IOConnectorProvider; import org.opencb.opencga.storage.core.io.managers.LocalIOConnector; @@ -45,6 +46,7 @@ public class VariantMetadataConverterTest { @Before public void setUp() throws Exception { metadataManager = new VariantStorageMetadataManager(new DummyVariantStorageMetadataDBAdaptorFactory()); + projectMetadata = metadataManager.getAndUpdateProjectMetadata(new ObjectMap()); URI uri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz"); variantReaderUtils = new VariantReaderUtils(new IOConnectorProvider(LocalIOConnector.class)); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactoryTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactoryTest.java index 32ef120f888..776abd3874d 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactoryTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/io/VariantWriterFactoryTest.java @@ -23,11 +23,13 @@ import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.metadata.VariantFileHeader; import org.opencb.biodata.models.variant.metadata.VariantFileHeaderComplexLine; +import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.io.DataWriter; import org.opencb.opencga.core.testclassification.duration.ShortTests; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; +import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; import org.opencb.opencga.storage.core.variant.dummy.DummyVariantDBAdaptor; import org.opencb.opencga.storage.core.variant.dummy.DummyVariantStorageEngine; @@ -86,8 +88,10 @@ public void testContigLengthNull() throws IOException, StorageEngineException { new VariantFileHeaderComplexLine("contig", "chr3", null, null, null, Collections.singletonMap("length", ".")), new VariantFileHeaderComplexLine("contig", "chr4", null, null, null, Collections.singletonMap("length", "1234")) )); - StudyMetadata study = dbAdaptor.getMetadataManager().createStudy("study"); - dbAdaptor.getMetadataManager().unsecureUpdateStudyMetadata(study.setVariantHeader(header)); + VariantStorageMetadataManager metadataManager = dbAdaptor.getMetadataManager(); + metadataManager.getAndUpdateProjectMetadata(new ObjectMap()); + StudyMetadata study = metadataManager.createStudy("study"); + metadataManager.unsecureUpdateStudyMetadata(study.setVariantHeader(header)); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(10000); DataWriter writer = new VariantWriterFactory(dbAdaptor).newDataWriter( VariantWriterFactory.VariantOutputFormat.VCF, diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/gaps/FillGapsTaskTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/gaps/FillGapsTaskTest.java index 3d9db73719e..63f579cb99e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/gaps/FillGapsTaskTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/gaps/FillGapsTaskTest.java @@ -13,6 +13,7 @@ import org.opencb.biodata.models.variant.metadata.VariantFileHeaderComplexLine; import org.opencb.biodata.models.variant.protobuf.VcfSliceProtos; import org.opencb.biodata.tools.variant.converters.proto.VariantToVcfSliceConverter; +import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.core.testclassification.duration.ShortTests; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; @@ -44,6 +45,7 @@ public class FillGapsTaskTest { public void setUp() throws Exception { DummyVariantStorageMetadataDBAdaptorFactory.clear(); metadataManager = new VariantStorageMetadataManager(new DummyVariantStorageMetadataDBAdaptorFactory()); + metadataManager.getAndUpdateProjectMetadata(new ObjectMap()); studyMetadata = metadataManager.createStudy("S"); metadataManager.updateStudyMetadata("S", sm -> { sm.getAttributes().put(VariantStorageOptions.EXTRA_FORMAT_FIELDS.key(), "DP"); From f7b4b52883ab25868e61bdc6ba6b6a4f07f7548f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 25 Jul 2024 08:43:52 +0100 Subject: [PATCH 16/21] storage: Fix VariantDBAdaptorTest. #TASK-6596 --- .../hadoop/variant/adaptors/HadoopVariantDBAdaptorTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorTest.java index ca0d268d4f5..b2339a3cd7e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorTest.java @@ -119,6 +119,7 @@ public void before() throws Exception { e.printStackTrace(); } } + variantStorageEngine.getOptions().append(VariantStorageOptions.ASSEMBLY.key(), "GRCH38"); cellBaseUtils = variantStorageEngine.getCellBaseUtils(); expectedConnections = GlobalClientMetrics.GLOBAL_OPEN_PHOENIX_CONNECTIONS.getMetric().getTotalSum(); } From 8cdc46cb0f4eaecee5d5534881b52bcccf8e26de Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Fri, 26 Jul 2024 11:19:16 +0200 Subject: [PATCH 17/21] Prepare release 2.12.6.1 --- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- opencga-storage/opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-deps-emr6.1/pom.xml | 2 +- .../opencga-storage-hadoop-deps-hdp2.6/pom.xml | 2 +- .../opencga-storage-hadoop-deps-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop-deps/pom.xml | 2 +- opencga-storage/opencga-storage-hadoop/pom.xml | 2 +- opencga-storage/opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 6 +++--- 21 files changed, 23 insertions(+), 23 deletions(-) diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index d3036d76d4f..936c61eadeb 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index d1973d3109e..bc0aa70b4d3 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index 3e1b2d619b0..633264eb919 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index 46b8cfd6f76..56067742d78 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index 856af7335e3..ecd91094c72 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index fb62d45951d..fec3eae4696 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index ae48d80da2e..83e92525e37 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index f02d2f42c4b..6967cfa50f1 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index 3ccfe32b3ba..b95f3959f09 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index f4b18899665..ecffe332c36 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index f5b784112fa..a649118bc8b 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index c6f99ad3ea4..cc7eaacba34 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml index b0143b64ede..8f1b560364a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml index af8d695e22e..b217db7ac21 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml index 40481f27836..954c3b945e1 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml index 7d2c4b3feb5..8612e1a6661 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml @@ -50,7 +50,7 @@ org.opencb.opencga opencga-storage-hadoop - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index dc0ed33fa02..b8cff73a473 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -28,7 +28,7 @@ org.opencb.opencga opencga-storage - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index a3708473206..8379af29027 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index 5fcf5ffccf5..c43e60d7a91 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index 9550459bafe..27934f088aa 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 2.12.6.1-SNAPSHOT + 2.12.6.1 ../pom.xml diff --git a/pom.xml b/pom.xml index 3e367630b6a..3b2c6ea239f 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6.1-SNAPSHOT + 2.12.6.1 pom OpenCGA @@ -43,8 +43,8 @@ - 2.12.6 - 2.12.6 + + 2.12.6.1 5.8.4 2.12.2 4.12.0 From 9dbf1f873e21a6d7f0881faeca3fd06fd0ec9a0e Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 30 Jul 2024 09:58:46 +0200 Subject: [PATCH 18/21] Prepare Port Patch 1.10.6.1 -> 2.2.1 #TASK-6515 --- opencga-analysis/pom.xml | 2 +- opencga-app/pom.xml | 2 +- opencga-catalog/pom.xml | 2 +- opencga-client/pom.xml | 2 +- opencga-clinical/pom.xml | 2 +- opencga-core/pom.xml | 2 +- opencga-master/pom.xml | 2 +- opencga-server/pom.xml | 2 +- opencga-storage/opencga-storage-app/pom.xml | 2 +- opencga-storage/opencga-storage-benchmark/pom.xml | 2 +- opencga-storage/opencga-storage-core/pom.xml | 2 +- .../opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml | 2 +- .../opencga-storage-hadoop-deps-emr6.1/pom.xml | 2 +- .../opencga-storage-hadoop-deps-hdp2.6/pom.xml | 2 +- .../opencga-storage-hadoop-deps-hdp3.1/pom.xml | 2 +- .../opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml | 2 +- opencga-storage/opencga-storage-hadoop/pom.xml | 2 +- opencga-storage/opencga-storage-server/pom.xml | 2 +- opencga-storage/pom.xml | 2 +- opencga-test/pom.xml | 2 +- pom.xml | 2 +- 21 files changed, 21 insertions(+), 21 deletions(-) diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index 936c61eadeb..7c32e987cdc 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-app/pom.xml b/opencga-app/pom.xml index bc0aa70b4d3..d0ab2466da5 100644 --- a/opencga-app/pom.xml +++ b/opencga-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index 633264eb919..4cbfa454138 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index 56067742d78..043c1c5788d 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-clinical/pom.xml b/opencga-clinical/pom.xml index ecd91094c72..02a59d91cf6 100644 --- a/opencga-clinical/pom.xml +++ b/opencga-clinical/pom.xml @@ -5,7 +5,7 @@ org.opencb.opencga opencga - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml 4.0.0 diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index fec3eae4696..c59da1f754d 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index 83e92525e37..8c8ab880796 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index 6967cfa50f1..2b5d462abff 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index b95f3959f09..2f760ec9164 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index ecffe332c36..7b26545a09d 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -22,7 +22,7 @@ opencga-storage org.opencb.opencga - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index a649118bc8b..06b0412be9d 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index cc7eaacba34..6d7a6649ad7 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml index 8f1b560364a..84c787463ec 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml index b217db7ac21..f9c584917b1 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml index 954c3b945e1..bc999c1656f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml index 8612e1a6661..b35c5350d64 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml @@ -50,7 +50,7 @@ org.opencb.opencga opencga-storage-hadoop - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index b8cff73a473..52de24ee0b9 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -28,7 +28,7 @@ org.opencb.opencga opencga-storage - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index 8379af29027..2a91d2753e4 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index c43e60d7a91..4a47e44df35 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index 27934f088aa..2e7253d9889 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga - 2.12.6.1 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 3b2c6ea239f..5dd8f303bed 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.12.6.1 + 3.2.1-SNAPSHOT pom OpenCGA From 00ddc0a0d6be471aaf41715595242c5b341c1fa2 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Fri, 9 Aug 2024 12:43:17 +0200 Subject: [PATCH 19/21] Fix compile error CELLBASE_DATA_RELEASE_GRCH38 #TASK-6515 --- .../storage/core/variant/VariantStorageEngineSVTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java index 05f090a162f..1573b27fd1d 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java @@ -65,7 +65,7 @@ public void before() throws Exception { } variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); variantStorageEngine.getConfiguration().getCellbase().setVersion(ParamConstants.CELLBASE_VERSION); - variantStorageEngine.getConfiguration().getCellbase().setDataRelease(ParamConstants.CELLBASE_DATA_RELEASE); + variantStorageEngine.getConfiguration().getCellbase().setDataRelease(ParamConstants.CELLBASE_DATA_RELEASE_GRCH38); variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); variantStorageEngine.reloadCellbaseConfiguration(); solr.configure(variantStorageEngine); From d32104498c863eeed159d9ff4494e7aa8466f013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 13 Aug 2024 10:42:24 +0100 Subject: [PATCH 20/21] storage: Fix merge issue with ASSEMBLY. #TASK-6515 --- .../storage/core/variant/VariantStorageEngineBNDTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java index cd8e37b6444..a675e5304b0 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineBNDTest.java @@ -48,6 +48,7 @@ public void before() throws Exception { variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); variantStorageEngine.getConfiguration().getCellbase().setVersion(ParamConstants.CELLBASE_VERSION); variantStorageEngine.getConfiguration().getCellbase().setDataRelease(ParamConstants.CELLBASE_DATA_RELEASE_GRCH38); + variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); if (!loaded) { clearDB(DB_NAME); @@ -60,7 +61,7 @@ protected void loadFiles() throws Exception { variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); variantStorageEngine.getConfiguration().getCellbase().setVersion(ParamConstants.CELLBASE_VERSION); variantStorageEngine.getConfiguration().getCellbase().setDataRelease(ParamConstants.CELLBASE_DATA_RELEASE_GRCH38); - + variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); studyMetadata = new StudyMetadata(1, "s1"); // variantStorageEngine.getOptions().append(VariantStorageOptions.ANNOTATOR_CELLBASE_EXCLUDE.key(), "expression,clinical"); input1 = getResourceUri("variant-test-bnd.vcf"); From 8c1cde14620673bba3cd6c25552abce2fb7ee757 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 13 Aug 2024 16:35:48 +0100 Subject: [PATCH 21/21] storage: Embedded solr should be optional in tests. #TASK-6515 --- .../variant/VariantStorageEngineSVTest.java | 10 ++--- .../HadoopVariantStorageEngineSVTest.java | 45 +++++++++++++++++-- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java index 1573b27fd1d..2523fad5ba2 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineSVTest.java @@ -1,7 +1,7 @@ package org.opencb.opencga.storage.core.variant; +import org.junit.Assume; import org.junit.Before; -import org.junit.ClassRule; import org.junit.Ignore; import org.junit.Test; import org.opencb.biodata.formats.variant.io.VariantReader; @@ -25,7 +25,6 @@ import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.executors.VariantQueryExecutor; import org.opencb.opencga.storage.core.variant.search.SearchIndexVariantQueryExecutor; -import org.opencb.opencga.storage.core.variant.solr.VariantSolrExternalResource; import java.net.URI; import java.nio.file.Paths; @@ -55,9 +54,6 @@ public abstract class VariantStorageEngineSVTest extends VariantStorageBaseTest protected static URI input2; protected static URI input3; - @ClassRule - public static VariantSolrExternalResource solr = new VariantSolrExternalResource(); - @Before public void before() throws Exception { if (!loaded) { @@ -68,7 +64,7 @@ public void before() throws Exception { variantStorageEngine.getConfiguration().getCellbase().setDataRelease(ParamConstants.CELLBASE_DATA_RELEASE_GRCH38); variantStorageEngine.getOptions().put(VariantStorageOptions.ASSEMBLY.key(), "grch38"); variantStorageEngine.reloadCellbaseConfiguration(); - solr.configure(variantStorageEngine); + if (!loaded) { loadFiles(); loaded = true; @@ -97,7 +93,6 @@ protected void loadFiles() throws Exception { .append(VariantStorageOptions.STATS_CALCULATE.key(), true) .append(VariantStorageOptions.ASSEMBLY.key(), "grch38")); - variantStorageEngine.secondaryIndex(); } @Test @@ -120,6 +115,7 @@ public void checkCount() throws Exception { @Test public void checkSecondaryAnnotationIndex() throws Exception { + Assume.assumeTrue(variantStorageEngine.secondaryAnnotationIndexActiveAndAlive()); VariantQueryExecutor variantQueryExecutor = variantStorageEngine.getVariantQueryExecutor(SearchIndexVariantQueryExecutor.class); for (Variant variant : variantStorageEngine) { ParsedVariantQuery query = variantStorageEngine diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSVTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSVTest.java index fbcbd773477..26bcb49bb0f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSVTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSVTest.java @@ -1,8 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant; -import org.junit.Assert; -import org.junit.ClassRule; -import org.junit.Test; +import org.junit.*; import org.junit.experimental.categories.Category; import org.opencb.biodata.models.variant.StudyEntry; import org.opencb.biodata.models.variant.Variant; @@ -12,13 +10,15 @@ import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass; import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.solr.VariantSolrExternalResource; +import org.opencb.opencga.storage.hadoop.HBaseCompat; import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHadoopDBAdaptor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.*; -import static org.junit.Assert.assertEquals; +import static org.junit.Assert.*; import static org.opencb.opencga.core.api.ParamConstants.OVERWRITE; /** @@ -33,9 +33,46 @@ public class HadoopVariantStorageEngineSVTest extends VariantStorageEngineSVTest public static HadoopExternalResource externalResource = new HadoopExternalResource(); private Logger logger = LoggerFactory.getLogger(getClass()); + public static VariantSolrExternalResource solr = new VariantSolrExternalResource(); + + @BeforeClass + public static void beforeClass() throws Exception { + if (HBaseCompat.getInstance().isSolrTestingAvailable()) { + solr.before(); + solr.configure(externalResource.getVariantStorageEngine()); + System.out.println("Start embedded solr"); + } else { + System.out.println("Skip embedded solr tests"); + } + } + + @AfterClass + public static void afterClass() throws Exception { + if (HBaseCompat.getInstance().isSolrTestingAvailable()) { + solr.after(); + } + } + + @Override + public void before() throws Exception { + super.before(); + if (HBaseCompat.getInstance().isSolrTestingAvailable()) { + solr.configure(variantStorageEngine); + } + } + @Override protected void loadFiles() throws Exception { + if (HBaseCompat.getInstance().isSolrTestingAvailable()) { + solr.configure(variantStorageEngine); + } super.loadFiles(); + if (HBaseCompat.getInstance().isSolrTestingAvailable()) { + variantStorageEngine.secondaryIndex(); + assertTrue(variantStorageEngine.secondaryAnnotationIndexActiveAndAlive()); + } else { + assertFalse(variantStorageEngine.secondaryAnnotationIndexActiveAndAlive()); + } VariantHbaseTestUtils.printVariants(getVariantStorageEngine().getDBAdaptor(), newOutputUri(getTestName().getMethodName())); }