diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/ParamUtils.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/ParamUtils.java index 7d6eb326acc..c39d111d8c7 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/ParamUtils.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/ParamUtils.java @@ -124,9 +124,9 @@ public static void checkValidUserId(String userId) throws CatalogParameterExcept if (userId.equals(ParamConstants.ANONYMOUS_USER_ID) || userId.equals(ParamConstants.REGISTERED_USERS)) { throw new CatalogParameterException("User id cannot be one of the reserved OpenCGA users."); } - if (!userId.matches("^[A-Za-z0-9]([-_.]?[A-Za-z0-9])*$")) { + if (!userId.matches("^[A-Za-z0-9]([-_.@]?[A-Za-z0-9])*$")) { throw new CatalogParameterException("Invalid user id. Id needs to start by any character and might contain single '-', '_', " - + "'.', symbols followed by any character or number."); + + "'.' or '@' symbols followed by any character or number."); } } diff --git a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/CatalogManagerTest.java b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/CatalogManagerTest.java index 8e468806d04..2b403314f4a 100644 --- a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/CatalogManagerTest.java +++ b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/CatalogManagerTest.java @@ -49,6 +49,7 @@ import org.opencb.opencga.core.models.sample.*; import org.opencb.opencga.core.models.study.*; import org.opencb.opencga.core.models.user.Account; +import org.opencb.opencga.core.models.user.AuthenticationResponse; import org.opencb.opencga.core.models.user.User; import org.opencb.opencga.core.response.OpenCGAResult; import org.opencb.opencga.core.testclassification.duration.MediumTests; @@ -276,6 +277,16 @@ private String getAdminToken() throws CatalogException, IOException { return catalogManager.getUserManager().loginAsAdmin("admin").getToken(); } + @Test + public void createUserUsingMailAsId() throws CatalogException { + catalogManager.getUserManager().create(new User().setId("hello.mail@mymail.org").setName("Hello") + .setAccount(new Account().setType(Account.AccountType.GUEST)), TestParamConstants.PASSWORD, opencgaToken); + AuthenticationResponse login = catalogManager.getUserManager().login("hello.mail@mymail.org", TestParamConstants.PASSWORD); + assertNotNull(login); + User user = catalogManager.getUserManager().get("hello.mail@mymail.org", new QueryOptions(), login.getToken()).first(); + assertEquals("hello.mail@mymail.org", user.getId()); + } + @Test public void getGroupsTest() throws CatalogException { Group group = new Group("groupId", Arrays.asList("user2", "user3")).setSyncedFrom(new Group.Sync("ldap", "bio")); diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/cellbase/CellBaseValidator.java b/opencga-core/src/main/java/org/opencb/opencga/core/cellbase/CellBaseValidator.java index 6d7eafd1503..5ed331143db 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/cellbase/CellBaseValidator.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/cellbase/CellBaseValidator.java @@ -9,6 +9,7 @@ import org.opencb.cellbase.core.config.SpeciesProperties; import org.opencb.cellbase.core.models.DataRelease; import org.opencb.cellbase.core.result.CellBaseDataResponse; +import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.utils.VersionUtils; @@ -307,9 +308,13 @@ private static String majorMinor(String version) { public String getVersionFromServer() throws IOException { if (serverVersion == null) { synchronized (this) { - String serverVersion = cellBaseClient.getMetaClient().about().firstResult().getString("Version"); + ObjectMap result = retryMetaAbout(3); + if (result == null) { + throw new IOException("Unable to get version from server for cellbase " + toString()); + } + String serverVersion = result.getString("Version"); if (StringUtils.isEmpty(serverVersion)) { - serverVersion = cellBaseClient.getMetaClient().about().firstResult().getString("Version: "); + serverVersion = result.getString("Version: "); } this.serverVersion = serverVersion; } @@ -317,6 +322,16 @@ public String getVersionFromServer() throws IOException { return serverVersion; } + private ObjectMap retryMetaAbout(int retries) throws IOException { + ObjectMap result = cellBaseClient.getMetaClient().about().firstResult(); + if (result == null && retries > 0) { + // Retry + logger.warn("Unable to get version from server for cellbase " + toString() + ". Retrying..."); + result = retryMetaAbout(retries - 1); + } + return result; + } + public boolean isMinVersion(String minVersion) throws IOException { String serverVersion = getVersionFromServer(); return VersionUtils.isMinVersion(minVersion, serverVersion); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java index 65812bd7017..46872f47103 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java @@ -1135,9 +1135,14 @@ public Iterable getInvalidCohorts(int studyId) { return () -> Iterators.filter(cohortIterator(studyId), CohortMetadata::isInvalid); } - public Iterable getCalculatedOrInvalidCohorts(int studyId) { + public Iterable getCalculatedOrPartialCohorts(int studyId) { return () -> Iterators.filter(cohortIterator(studyId), - cohortMetadata -> cohortMetadata.isStatsReady() || cohortMetadata.isInvalid()); + cohortMetadata -> { + TaskMetadata.Status status = cohortMetadata.getStatsStatus(); + return status == TaskMetadata.Status.READY + || status == TaskMetadata.Status.RUNNING + || status == TaskMetadata.Status.ERROR; + }); } public CohortMetadata setSamplesToCohort(int studyId, String cohortName, Collection samples) throws StorageEngineException { diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/converters/VariantAnnotationModelUtils.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/converters/VariantAnnotationModelUtils.java index c31b7137236..a4d40cf3d56 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/converters/VariantAnnotationModelUtils.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/converters/VariantAnnotationModelUtils.java @@ -1,11 +1,15 @@ package org.opencb.opencga.storage.core.variant.annotation.converters; import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.models.variant.avro.*; +import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class VariantAnnotationModelUtils { @@ -21,19 +25,24 @@ public class VariantAnnotationModelUtils { * - annotation.consequenceTypes.transcriptId * - annotation.consequenceTypes.ensemblTranscriptId * - annotation.consequenceTypes.hgvs + * - annotation.consequenceTypes.proteinVariantAnnotation.proteinId * - annotation.consequenceTypes.proteinVariantAnnotation.uniprotAccession * - annotation.consequenceTypes.proteinVariantAnnotation.uniprotName * - annotation.consequenceTypes.proteinVariantAnnotation.uniprotVariantId * - annotation.consequenceTypes.proteinVariantAnnotation.features.id * - annotation.traitAssociation.id - * - annotation.geneTraitAssociation.hpo * - annotation.geneTraitAssociation.id + * - annotation.geneTraitAssociation.hpo + * - annotation.pharmacogenomics.id + * - annotation.pharmacogenomics.name * * @param variantAnnotation VariantAnnotation object * @return Set of XRefs */ + private static final Pattern HGVS_PATTERN = Pattern.compile("\\([^()]*\\)"); + public Set extractXRefs(VariantAnnotation variantAnnotation) { - Set xrefs = new HashSet<>(); + Set xrefs = new HashSet<>(100); if (variantAnnotation == null) { return xrefs; @@ -41,7 +50,7 @@ public Set extractXRefs(VariantAnnotation variantAnnotation) { xrefs.add(variantAnnotation.getId()); - if (variantAnnotation.getXrefs() != null) { + if (CollectionUtils.isNotEmpty(variantAnnotation.getXrefs())) { for (Xref xref : variantAnnotation.getXrefs()) { if (xref != null) { xrefs.add(xref.getId()); @@ -49,26 +58,55 @@ public Set extractXRefs(VariantAnnotation variantAnnotation) { } } - if (variantAnnotation.getHgvs() != null) { + List consequenceTypes = variantAnnotation.getConsequenceTypes(); + + if (CollectionUtils.isNotEmpty(variantAnnotation.getHgvs())) { xrefs.addAll(variantAnnotation.getHgvs()); + + // TODO Remove this code when CellBase 6.4.0 returns the expected HGVS + for (String hgvs: variantAnnotation.getHgvs()) { + if (VariantQueryUtils.isTranscript(hgvs)) { + // 1. Remove the content between parentheses, e.g. ENST00000680783.1(ENSG00000135744):c.776T>C + if (hgvs.contains("(")) { + Matcher matcher = HGVS_PATTERN.matcher(hgvs); + StringBuffer result = new StringBuffer(); + while (matcher.find()) { + matcher.appendReplacement(result, ""); + } + matcher.appendTail(result); + xrefs.add(result.toString()); + } + + // 2. Add the HGVS with the Ensembl and gene name, e.g. ENSG00000135744:c.776T>C, AGT:c.776T>C + if (CollectionUtils.isNotEmpty(consequenceTypes)) { + for (ConsequenceType conseqType : consequenceTypes) { + if (conseqType != null && conseqType.getHgvs() != null && conseqType.getHgvs().contains(hgvs)) { + String[] fields = hgvs.split(":", 2); + if (StringUtils.isNotEmpty(conseqType.getGeneId())) { + xrefs.add(conseqType.getGeneId() + ":" + fields[1]); + } + if (StringUtils.isNotEmpty(conseqType.getGeneName())) { + xrefs.add(conseqType.getGeneName() + ":" + fields[1]); + } + break; + } + } + } + } + } } - List consequenceTypes = variantAnnotation.getConsequenceTypes(); - if (consequenceTypes != null) { + if (CollectionUtils.isNotEmpty(consequenceTypes)) { for (ConsequenceType conseqType : consequenceTypes) { xrefs.add(conseqType.getGeneName()); xrefs.add(conseqType.getGeneId()); - xrefs.add(conseqType.getEnsemblGeneId()); xrefs.add(conseqType.getTranscriptId()); + xrefs.add(conseqType.getEnsemblGeneId()); xrefs.add(conseqType.getEnsemblTranscriptId()); - if (conseqType.getHgvs() != null) { - xrefs.addAll(conseqType.getHgvs()); - } - ProteinVariantAnnotation protVarAnnotation = conseqType.getProteinVariantAnnotation(); if (protVarAnnotation != null) { - + xrefs.add(protVarAnnotation.getProteinId()); xrefs.add(protVarAnnotation.getUniprotAccession()); xrefs.add(protVarAnnotation.getUniprotName()); xrefs.add(protVarAnnotation.getUniprotVariantId()); @@ -80,7 +118,6 @@ public Set extractXRefs(VariantAnnotation variantAnnotation) { } } } - } if (CollectionUtils.isNotEmpty(variantAnnotation.getTraitAssociation())) { @@ -89,10 +126,17 @@ public Set extractXRefs(VariantAnnotation variantAnnotation) { } } - if (variantAnnotation.getGeneTraitAssociation() != null) { + if (CollectionUtils.isNotEmpty(variantAnnotation.getGeneTraitAssociation())) { for (GeneTraitAssociation geneTrait : variantAnnotation.getGeneTraitAssociation()) { - xrefs.add(geneTrait.getHpo()); xrefs.add(geneTrait.getId()); + xrefs.add(geneTrait.getHpo()); + } + } + + if (CollectionUtils.isNotEmpty(variantAnnotation.getPharmacogenomics())) { + for (Pharmacogenomics pharmacogenomics : variantAnnotation.getPharmacogenomics()) { + xrefs.add(pharmacogenomics.getId()); + xrefs.add(pharmacogenomics.getName()); } } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java index a538092ef4f..9cf773d001f 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java @@ -467,6 +467,19 @@ public static boolean isVariantAccession(String value) { return value.startsWith("rs") || value.startsWith("VAR_"); } + /** + * Determines if the given value might be a known transcript or not. + * Ensembl transcripts start with `ENST` + * RefSeq transcripts start with `NM_` and `XM_` + * See ... + * + * @param value Value to check + * @return If is a known transcript + */ + public static boolean isTranscript(String value) { + return value.startsWith("ENST") || value.startsWith("NM_") || value.startsWith("XM_"); + } + /** * Determines if the given value is a HGVS. * diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java index 3ec0c32c08a..bdcd501ad88 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java @@ -8,6 +8,7 @@ import org.opencb.opencga.storage.core.metadata.models.CohortMetadata; import org.opencb.opencga.storage.core.metadata.models.FileMetadata; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; +import org.opencb.opencga.storage.core.metadata.models.TaskMetadata; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; @@ -134,9 +135,10 @@ public VariantQueryProjection parseVariantQueryProjection(Query query, QueryOpti for (VariantQueryProjection.StudyVariantQueryProjection study : studies.values()) { int studyId = study.getId(); List cohorts = new LinkedList<>(); - for (CohortMetadata cohort : metadataManager.getCalculatedOrInvalidCohorts(studyId)) { + for (CohortMetadata cohort : metadataManager.getCalculatedOrPartialCohorts(studyId)) { cohorts.add(cohort.getId()); - if (cohort.isInvalid()) { + TaskMetadata.Status status = cohort.getStatsStatus(); + if (status == TaskMetadata.Status.ERROR) { String message = "Please note that the Cohort Stats for " + "'" + study.getName() + ":" + cohort.getName() + "' are currently outdated."; int numSampmles = cohort.getSamples().size(); @@ -147,6 +149,10 @@ public VariantQueryProjection parseVariantQueryProjection(Query query, QueryOpti } message += " To display updated statistics, please execute variant-stats-index."; events.add(new Event(Event.Type.WARNING, message)); + } else if (status == TaskMetadata.Status.RUNNING) { + String message = "Please note that the Cohort Stats for " + + "'" + study.getName() + ":" + cohort.getName() + "' are currently being calculated."; + events.add(new Event(Event.Type.WARNING, message)); } } study.setCohorts(cohorts); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/DefaultVariantStatisticsManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/DefaultVariantStatisticsManager.java index c85c63b1505..db6ead71f58 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/DefaultVariantStatisticsManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/DefaultVariantStatisticsManager.java @@ -474,6 +474,7 @@ protected VariantStatsDBWriter newVariantStatisticsDBWriter(VariantDBAdaptor dbA // // } + @Deprecated void checkAndUpdateCalculatedCohorts(StudyMetadata studyMetadata, URI uri, boolean updateStats) throws IOException, StorageEngineException { Set cohortNames = readCohortsFromStatsFile(uri); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManager.java index fe6765529fa..d4fdc948622 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManager.java @@ -112,6 +112,7 @@ public void postCalculateStats( } } + @Deprecated public static void checkAndUpdateCalculatedCohorts( VariantStorageMetadataManager metadataManager, StudyMetadata studyMetadata, Collection cohorts, boolean updateStats) throws StorageEngineException { diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/converters/VariantAnnotationModelUtilsTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/converters/VariantAnnotationModelUtilsTest.java new file mode 100644 index 00000000000..f1167213be9 --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/converters/VariantAnnotationModelUtilsTest.java @@ -0,0 +1,46 @@ +package org.opencb.opencga.storage.core.variant.annotation.converters; + +import junit.framework.TestCase; +import org.junit.experimental.categories.Category; +import org.opencb.biodata.models.variant.avro.ConsequenceType; +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.biodata.models.variant.avro.Xref; +import org.opencb.opencga.core.testclassification.duration.ShortTests; + +import java.util.Arrays; +import java.util.Collections; +import java.util.Set; + +@Category(ShortTests.class) +public class VariantAnnotationModelUtilsTest extends TestCase { + + public void testXrefsHgvs() throws Exception { + VariantAnnotation variantAnnotation = new VariantAnnotation(); + variantAnnotation.setId("id"); + variantAnnotation.setXrefs(Collections.singletonList(new Xref("xref1", "source"))); + variantAnnotation.setHgvs(Arrays.asList( + "ENST00000680783.1(ENSG00000135744):c.776T>C", + "ENSP00000451720.1:p.Asn134Lys" + )); + ConsequenceType ct = new ConsequenceType(); + ct.setGeneName("GENE"); + ct.setHgvs(variantAnnotation.getHgvs()); + ct.setGeneId(null); + variantAnnotation.setConsequenceTypes(Arrays.asList(ct, new ConsequenceType())); + Set xrefs = new VariantAnnotationModelUtils().extractXRefs(variantAnnotation); + + assertEquals(7, xrefs.size()); + // Default fields + assertTrue(xrefs.contains("id")); + assertTrue(xrefs.contains("xref1")); + assertTrue(xrefs.contains("GENE")); + + // Untouched hgvs, not starting with ENST or NM_ + assertTrue(xrefs.contains("ENSP00000451720.1:p.Asn134Lys")); + + assertTrue(xrefs.contains("ENST00000680783.1(ENSG00000135744):c.776T>C")); + assertTrue(xrefs.contains("ENST00000680783.1:c.776T>C")); + assertTrue(xrefs.contains("GENE:c.776T>C")); + + } +} \ No newline at end of file diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java index c58328d4469..778257d5999 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java @@ -139,6 +139,12 @@ public void testXRefRs() throws StorageEngineException { with("Cosmic", EvidenceEntry::getId, is("COSV60260399")))))); matchers.put("ENST00000341832.11(ENSG00000248333):c.356-1170A>G", hasAnnotation(with("HGVS", VariantAnnotation::getHgvs, hasItem( is("ENST00000341832.11(ENSG00000248333):c.356-1170A>G"))))); + matchers.put("ENST00000341832.11:c.356-1170A>G", hasAnnotation(with("HGVS", VariantAnnotation::getHgvs, hasItem( + // The variant annotation may not have the "alternate" hgvs + is("ENST00000341832.11(ENSG00000248333):c.356-1170A>G"))))); + matchers.put("ENSG00000248333:c.356-1170A>G", hasAnnotation(with("HGVS", VariantAnnotation::getHgvs, hasItem( + // The variant annotation may not have the "alternate" hgvs + is("ENST00000341832.11(ENSG00000248333):c.356-1170A>G"))))); matchers.put("VSP_039324", hasAnnotation( with("ConsequenceType", VariantAnnotation::getConsequenceTypes, hasItem(with("ProteinVariantAnnotation", ConsequenceType::getProteinVariantAnnotation, diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManagerTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManagerTest.java index baafe7d0ad6..e90b49cff4d 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManagerTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManagerTest.java @@ -18,6 +18,7 @@ import org.junit.*; import org.junit.rules.ExpectedException; +import org.mockito.Mockito; import org.opencb.biodata.models.variant.Genotype; import org.opencb.biodata.models.variant.StudyEntry; import org.opencb.biodata.models.variant.Variant; @@ -33,7 +34,9 @@ import org.opencb.opencga.storage.core.metadata.models.CohortMetadata; import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; +import org.opencb.opencga.storage.core.metadata.models.TaskMetadata; import org.opencb.opencga.storage.core.variant.VariantStorageBaseTest; +import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.opencb.opencga.storage.core.variant.VariantStorageEngineTest; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; @@ -49,8 +52,7 @@ import java.util.*; import java.util.stream.Collectors; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; /** * Created by hpccoll1 on 01/06/15. @@ -123,32 +125,76 @@ public void queryInvalidStats() throws Exception { Iterator iterator = metadataManager.sampleMetadataIterator(studyMetadata.getId()); /** Create cohorts **/ - HashSet cohort1 = new HashSet<>(); - cohort1.add(iterator.next().getName()); - cohort1.add(iterator.next().getName()); + HashSet cohort1Samples = new HashSet<>(); + cohort1Samples.add(iterator.next().getName()); + cohort1Samples.add(iterator.next().getName()); - HashSet cohort2 = new HashSet<>(); - cohort2.add(iterator.next().getName()); - cohort2.add(iterator.next().getName()); + HashSet cohort2Samples = new HashSet<>(); + cohort2Samples.add(iterator.next().getName()); + cohort2Samples.add(iterator.next().getName()); Map> cohorts = new HashMap<>(); - cohorts.put("cohort1", cohort1); - cohorts.put("cohort2", cohort2); + cohorts.put("cohort1", cohort1Samples); + cohorts.put("cohort2", cohort2Samples); + + // Just cohort ALL is expected + VariantQueryResult result = variantStorageEngine.get(new Query(), new QueryOptions(QueryOptions.LIMIT, 1)); + assertEquals(1, result.first().getStudies().get(0).getStats().size()); + assertEquals(0, result.getEvents().size()); + + metadataManager.registerCohort(studyMetadata.getName(), "cohort1", cohort1Samples); + + // Still just cohort ALL is expected, as cohort1 is not ready nor partial + result = variantStorageEngine.get(new Query(), new QueryOptions(QueryOptions.LIMIT, 1)); + assertEquals(1, result.first().getStudies().get(0).getStats().size()); + assertEquals(0, result.getEvents().size()); //Calculate stats stats(options, studyMetadata.getName(), cohorts, outputUri.resolve("cohort1.cohort2.stats")); checkCohorts(dbAdaptor, studyMetadata); - List cohort1Samples = metadataManager.getCohortMetadata(studyMetadata.getId(), "cohort1").getSamples(); - CohortMetadata cohort = metadataManager.addSamplesToCohort(studyMetadata.getId(), "cohort2", cohort1Samples); - assertTrue(cohort.isInvalid()); + // All 3 cohorts are ready and expected + result = variantStorageEngine.get(new Query(), new QueryOptions(QueryOptions.LIMIT, 1)); + assertEquals(3, result.first().getStudies().get(0).getStats().size()); + assertEquals(0, result.getEvents().size()); - VariantQueryResult result = variantStorageEngine.get(new Query(), new QueryOptions(QueryOptions.LIMIT, 1)); + List cohort1SampleIds = metadataManager.getCohortMetadata(studyMetadata.getId(), "cohort1").getSamples(); + CohortMetadata cohort2 = metadataManager.addSamplesToCohort(studyMetadata.getId(), "cohort2", cohort1SampleIds); + assertTrue(cohort2.isInvalid()); + + // Cohort2 is invalid, but still all cohorts are expected, but with a warning event + result = variantStorageEngine.get(new Query(), new QueryOptions(QueryOptions.LIMIT, 1)); + assertEquals(3, result.first().getStudies().get(0).getStats().size()); assertEquals(1, result.getEvents().size()); assertEquals("Please note that the Cohort Stats for '1000g:cohort2' are currently outdated." + " The statistics have been calculated with 2 samples, while the total number of samples in the cohort is 4." + " To display updated statistics, please execute variant-stats-index.", result.getEvents().get(0).getMessage()); + + VariantStorageEngine engineMock = Mockito.spy(variantStorageEngine); + VariantStatisticsManager statsManagerMock = Mockito.spy(variantStorageEngine.newVariantStatisticsManager()); + Mockito.doReturn(statsManagerMock).when(engineMock).newVariantStatisticsManager(); + Mockito.doAnswer(invocation -> { + invocation.callRealMethod(); + throw new StorageEngineException("Mock error calculating stats"); + }).when(statsManagerMock).preCalculateStats(Mockito.any(), Mockito.any(), Mockito.anyList(), Mockito.anyBoolean(), Mockito.any()); + + options.put(DefaultVariantStatisticsManager.OUTPUT, outputUri.resolve("stats_mock_fail").toString()); + try { + engineMock.calculateStats(studyMetadata.getName(), Collections.singletonList(cohort2.getName()), options); + fail("Expected to fail mock"); + } catch (Exception e) { + assertEquals("Mock error calculating stats", e.getMessage()); + } + + cohort2 = metadataManager.getCohortMetadata(studyMetadata.getId(), cohort2.getName()); + assertEquals(TaskMetadata.Status.RUNNING, cohort2.getStatsStatus()); + + result = variantStorageEngine.get(new Query(), new QueryOptions(QueryOptions.LIMIT, 1)); + assertEquals(3, result.first().getStudies().get(0).getStats().size()); + assertEquals(1, result.getEvents().size()); + assertEquals("Please note that the Cohort Stats for '1000g:cohort2' are currently being calculated.", + result.getEvents().get(0).getMessage()); } @Test diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/VariantPhoenixSchemaManager.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/VariantPhoenixSchemaManager.java index 4ffa3c7aa55..6f917dbabdd 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/VariantPhoenixSchemaManager.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/phoenix/VariantPhoenixSchemaManager.java @@ -79,7 +79,7 @@ public void registerStudyColumns(int studyId) throws StorageEngineException { registerNewFiles(studyId, new ArrayList<>(metadataManager.getIndexedFiles(studyId))); List cohortIds = new LinkedList<>(); - for (CohortMetadata cohort : metadataManager.getCalculatedOrInvalidCohorts(studyId)) { + for (CohortMetadata cohort : metadataManager.getCalculatedOrPartialCohorts(studyId)) { cohortIds.add(cohort.getId()); } registerNewCohorts(studyId, cohortIds); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/SecondaryIndexPendingVariantsDescriptor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/SecondaryIndexPendingVariantsDescriptor.java index 604611ca93c..8b5134873ed 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/SecondaryIndexPendingVariantsDescriptor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/search/SecondaryIndexPendingVariantsDescriptor.java @@ -61,7 +61,7 @@ public Scan configureScan(Scan scan, VariantStorageMetadataManager metadataManag scan.addColumn(GenomeHelper.COLUMN_FAMILY_BYTES, INDEX_STUDIES.bytes()); for (Integer studyId : metadataManager.getStudyIds()) { scan.addColumn(GenomeHelper.COLUMN_FAMILY_BYTES, VariantPhoenixSchema.getStudyColumn(studyId).bytes()); - for (CohortMetadata cohort : metadataManager.getCalculatedOrInvalidCohorts(studyId)) { + for (CohortMetadata cohort : metadataManager.getCalculatedOrPartialCohorts(studyId)) { scan.addColumn(GenomeHelper.COLUMN_FAMILY_BYTES, VariantPhoenixSchema.getStatsColumn(studyId, cohort.getId()).bytes()); } }