Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TASK-6231 - Port Patch 2.12.4.1 -> 2.12.5 (XB 1.10.4.1 -> 1.10.5) #2448

Merged
merged 17 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ public static void checkValidUserId(String userId) throws CatalogParameterExcept
if (userId.equals(ParamConstants.ANONYMOUS_USER_ID) || userId.equals(ParamConstants.REGISTERED_USERS)) {
throw new CatalogParameterException("User id cannot be one of the reserved OpenCGA users.");
}
if (!userId.matches("^[A-Za-z0-9]([-_.]?[A-Za-z0-9])*$")) {
if (!userId.matches("^[A-Za-z0-9]([-_.@]?[A-Za-z0-9])*$")) {
throw new CatalogParameterException("Invalid user id. Id needs to start by any character and might contain single '-', '_', "
+ "'.', symbols followed by any character or number.");
+ "'.' or '@' symbols followed by any character or number.");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.opencb.opencga.core.models.sample.*;
import org.opencb.opencga.core.models.study.*;
import org.opencb.opencga.core.models.user.Account;
import org.opencb.opencga.core.models.user.AuthenticationResponse;
import org.opencb.opencga.core.models.user.User;
import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.core.testclassification.duration.MediumTests;
Expand Down Expand Up @@ -276,6 +277,16 @@ private String getAdminToken() throws CatalogException, IOException {
return catalogManager.getUserManager().loginAsAdmin("admin").getToken();
}

@Test
public void createUserUsingMailAsId() throws CatalogException {
catalogManager.getUserManager().create(new User().setId("[email protected]").setName("Hello")
.setAccount(new Account().setType(Account.AccountType.GUEST)), TestParamConstants.PASSWORD, opencgaToken);
AuthenticationResponse login = catalogManager.getUserManager().login("[email protected]", TestParamConstants.PASSWORD);
assertNotNull(login);
User user = catalogManager.getUserManager().get("[email protected]", new QueryOptions(), login.getToken()).first();
assertEquals("[email protected]", user.getId());
}

@Test
public void getGroupsTest() throws CatalogException {
Group group = new Group("groupId", Arrays.asList("user2", "user3")).setSyncedFrom(new Group.Sync("ldap", "bio"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.opencb.cellbase.core.config.SpeciesProperties;
import org.opencb.cellbase.core.models.DataRelease;
import org.opencb.cellbase.core.result.CellBaseDataResponse;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.utils.VersionUtils;

Expand Down Expand Up @@ -307,16 +308,30 @@ private static String majorMinor(String version) {
public String getVersionFromServer() throws IOException {
if (serverVersion == null) {
synchronized (this) {
String serverVersion = cellBaseClient.getMetaClient().about().firstResult().getString("Version");
ObjectMap result = retryMetaAbout(3);
if (result == null) {
throw new IOException("Unable to get version from server for cellbase " + toString());
}
String serverVersion = result.getString("Version");
if (StringUtils.isEmpty(serverVersion)) {
serverVersion = cellBaseClient.getMetaClient().about().firstResult().getString("Version: ");
serverVersion = result.getString("Version: ");
}
this.serverVersion = serverVersion;
}
}
return serverVersion;
}

private ObjectMap retryMetaAbout(int retries) throws IOException {
ObjectMap result = cellBaseClient.getMetaClient().about().firstResult();
if (result == null && retries > 0) {
// Retry
logger.warn("Unable to get version from server for cellbase " + toString() + ". Retrying...");
result = retryMetaAbout(retries - 1);
}
return result;
}

public boolean isMinVersion(String minVersion) throws IOException {
String serverVersion = getVersionFromServer();
return VersionUtils.isMinVersion(minVersion, serverVersion);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1135,9 +1135,14 @@ public Iterable<CohortMetadata> getInvalidCohorts(int studyId) {
return () -> Iterators.filter(cohortIterator(studyId), CohortMetadata::isInvalid);
}

public Iterable<CohortMetadata> getCalculatedOrInvalidCohorts(int studyId) {
public Iterable<CohortMetadata> getCalculatedOrPartialCohorts(int studyId) {
return () -> Iterators.filter(cohortIterator(studyId),
cohortMetadata -> cohortMetadata.isStatsReady() || cohortMetadata.isInvalid());
cohortMetadata -> {
TaskMetadata.Status status = cohortMetadata.getStatsStatus();
return status == TaskMetadata.Status.READY
|| status == TaskMetadata.Status.RUNNING
|| status == TaskMetadata.Status.ERROR;
});
}

public CohortMetadata setSamplesToCohort(int studyId, String cohortName, Collection<Integer> samples) throws StorageEngineException {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
package org.opencb.opencga.storage.core.variant.annotation.converters;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.variant.avro.*;
import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils;

import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class VariantAnnotationModelUtils {

Expand All @@ -21,54 +25,88 @@ public class VariantAnnotationModelUtils {
* - annotation.consequenceTypes.transcriptId
* - annotation.consequenceTypes.ensemblTranscriptId
* - annotation.consequenceTypes.hgvs
* - annotation.consequenceTypes.proteinVariantAnnotation.proteinId
* - annotation.consequenceTypes.proteinVariantAnnotation.uniprotAccession
* - annotation.consequenceTypes.proteinVariantAnnotation.uniprotName
* - annotation.consequenceTypes.proteinVariantAnnotation.uniprotVariantId
* - annotation.consequenceTypes.proteinVariantAnnotation.features.id
* - annotation.traitAssociation.id
* - annotation.geneTraitAssociation.hpo
* - annotation.geneTraitAssociation.id
* - annotation.geneTraitAssociation.hpo
* - annotation.pharmacogenomics.id
* - annotation.pharmacogenomics.name
*
* @param variantAnnotation VariantAnnotation object
* @return Set of XRefs
*/
private static final Pattern HGVS_PATTERN = Pattern.compile("\\([^()]*\\)");

public Set<String> extractXRefs(VariantAnnotation variantAnnotation) {
Set<String> xrefs = new HashSet<>();
Set<String> xrefs = new HashSet<>(100);

if (variantAnnotation == null) {
return xrefs;
}

xrefs.add(variantAnnotation.getId());

if (variantAnnotation.getXrefs() != null) {
if (CollectionUtils.isNotEmpty(variantAnnotation.getXrefs())) {
for (Xref xref : variantAnnotation.getXrefs()) {
if (xref != null) {
xrefs.add(xref.getId());
}
}
}

if (variantAnnotation.getHgvs() != null) {
List<ConsequenceType> consequenceTypes = variantAnnotation.getConsequenceTypes();

if (CollectionUtils.isNotEmpty(variantAnnotation.getHgvs())) {
xrefs.addAll(variantAnnotation.getHgvs());

// TODO Remove this code when CellBase 6.4.0 returns the expected HGVS
for (String hgvs: variantAnnotation.getHgvs()) {
if (VariantQueryUtils.isTranscript(hgvs)) {
// 1. Remove the content between parentheses, e.g. ENST00000680783.1(ENSG00000135744):c.776T>C
if (hgvs.contains("(")) {
Matcher matcher = HGVS_PATTERN.matcher(hgvs);
StringBuffer result = new StringBuffer();
while (matcher.find()) {
matcher.appendReplacement(result, "");
}
matcher.appendTail(result);
xrefs.add(result.toString());
}

// 2. Add the HGVS with the Ensembl and gene name, e.g. ENSG00000135744:c.776T>C, AGT:c.776T>C
if (CollectionUtils.isNotEmpty(consequenceTypes)) {
for (ConsequenceType conseqType : consequenceTypes) {
if (conseqType != null && conseqType.getHgvs() != null && conseqType.getHgvs().contains(hgvs)) {
String[] fields = hgvs.split(":", 2);
if (StringUtils.isNotEmpty(conseqType.getGeneId())) {
xrefs.add(conseqType.getGeneId() + ":" + fields[1]);
}
if (StringUtils.isNotEmpty(conseqType.getGeneName())) {
xrefs.add(conseqType.getGeneName() + ":" + fields[1]);
}
break;
}
}
}
}
}
}

List<ConsequenceType> consequenceTypes = variantAnnotation.getConsequenceTypes();
if (consequenceTypes != null) {
if (CollectionUtils.isNotEmpty(consequenceTypes)) {
for (ConsequenceType conseqType : consequenceTypes) {
xrefs.add(conseqType.getGeneName());
xrefs.add(conseqType.getGeneId());
xrefs.add(conseqType.getEnsemblGeneId());
xrefs.add(conseqType.getTranscriptId());
xrefs.add(conseqType.getEnsemblGeneId());
xrefs.add(conseqType.getEnsemblTranscriptId());

if (conseqType.getHgvs() != null) {
xrefs.addAll(conseqType.getHgvs());
}

ProteinVariantAnnotation protVarAnnotation = conseqType.getProteinVariantAnnotation();
if (protVarAnnotation != null) {

xrefs.add(protVarAnnotation.getProteinId());
xrefs.add(protVarAnnotation.getUniprotAccession());
xrefs.add(protVarAnnotation.getUniprotName());
xrefs.add(protVarAnnotation.getUniprotVariantId());
Expand All @@ -80,7 +118,6 @@ public Set<String> extractXRefs(VariantAnnotation variantAnnotation) {
}
}
}

}

if (CollectionUtils.isNotEmpty(variantAnnotation.getTraitAssociation())) {
Expand All @@ -89,10 +126,17 @@ public Set<String> extractXRefs(VariantAnnotation variantAnnotation) {
}
}

if (variantAnnotation.getGeneTraitAssociation() != null) {
if (CollectionUtils.isNotEmpty(variantAnnotation.getGeneTraitAssociation())) {
for (GeneTraitAssociation geneTrait : variantAnnotation.getGeneTraitAssociation()) {
xrefs.add(geneTrait.getHpo());
xrefs.add(geneTrait.getId());
xrefs.add(geneTrait.getHpo());
}
}

if (CollectionUtils.isNotEmpty(variantAnnotation.getPharmacogenomics())) {
for (Pharmacogenomics pharmacogenomics : variantAnnotation.getPharmacogenomics()) {
xrefs.add(pharmacogenomics.getId());
xrefs.add(pharmacogenomics.getName());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,19 @@ public static boolean isVariantAccession(String value) {
return value.startsWith("rs") || value.startsWith("VAR_");
}

/**
* Determines if the given value might be a known transcript or not.
* Ensembl transcripts start with `ENST`
* RefSeq transcripts start with `NM_` and `XM_`
* See <a href="https://www.ncbi.nlm.nih.gov/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=objectonly">...</a>
*
* @param value Value to check
* @return If is a known transcript
*/
public static boolean isTranscript(String value) {
return value.startsWith("ENST") || value.startsWith("NM_") || value.startsWith("XM_");
}

/**
* Determines if the given value is a HGVS.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import org.opencb.opencga.storage.core.metadata.models.CohortMetadata;
import org.opencb.opencga.storage.core.metadata.models.FileMetadata;
import org.opencb.opencga.storage.core.metadata.models.StudyMetadata;
import org.opencb.opencga.storage.core.metadata.models.TaskMetadata;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;
import org.opencb.opencga.storage.core.variant.adaptors.VariantField;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException;
Expand Down Expand Up @@ -134,9 +135,10 @@ public VariantQueryProjection parseVariantQueryProjection(Query query, QueryOpti
for (VariantQueryProjection.StudyVariantQueryProjection study : studies.values()) {
int studyId = study.getId();
List<Integer> cohorts = new LinkedList<>();
for (CohortMetadata cohort : metadataManager.getCalculatedOrInvalidCohorts(studyId)) {
for (CohortMetadata cohort : metadataManager.getCalculatedOrPartialCohorts(studyId)) {
cohorts.add(cohort.getId());
if (cohort.isInvalid()) {
TaskMetadata.Status status = cohort.getStatsStatus();
if (status == TaskMetadata.Status.ERROR) {
String message = "Please note that the Cohort Stats for "
+ "'" + study.getName() + ":" + cohort.getName() + "' are currently outdated.";
int numSampmles = cohort.getSamples().size();
Expand All @@ -147,6 +149,10 @@ public VariantQueryProjection parseVariantQueryProjection(Query query, QueryOpti
}
message += " To display updated statistics, please execute variant-stats-index.";
events.add(new Event(Event.Type.WARNING, message));
} else if (status == TaskMetadata.Status.RUNNING) {
String message = "Please note that the Cohort Stats for "
+ "'" + study.getName() + ":" + cohort.getName() + "' are currently being calculated.";
events.add(new Event(Event.Type.WARNING, message));
}
}
study.setCohorts(cohorts);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,7 @@ protected VariantStatsDBWriter newVariantStatisticsDBWriter(VariantDBAdaptor dbA
//
// }

@Deprecated
void checkAndUpdateCalculatedCohorts(StudyMetadata studyMetadata, URI uri, boolean updateStats)
throws IOException, StorageEngineException {
Set<String> cohortNames = readCohortsFromStatsFile(uri);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ public void postCalculateStats(
}
}

@Deprecated
public static void checkAndUpdateCalculatedCohorts(
VariantStorageMetadataManager metadataManager, StudyMetadata studyMetadata, Collection<String> cohorts, boolean updateStats)
throws StorageEngineException {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package org.opencb.opencga.storage.core.variant.annotation.converters;

import junit.framework.TestCase;
import org.junit.experimental.categories.Category;
import org.opencb.biodata.models.variant.avro.ConsequenceType;
import org.opencb.biodata.models.variant.avro.VariantAnnotation;
import org.opencb.biodata.models.variant.avro.Xref;
import org.opencb.opencga.core.testclassification.duration.ShortTests;

import java.util.Arrays;
import java.util.Collections;
import java.util.Set;

@Category(ShortTests.class)
public class VariantAnnotationModelUtilsTest extends TestCase {

public void testXrefsHgvs() throws Exception {
VariantAnnotation variantAnnotation = new VariantAnnotation();
variantAnnotation.setId("id");
variantAnnotation.setXrefs(Collections.singletonList(new Xref("xref1", "source")));
variantAnnotation.setHgvs(Arrays.asList(
"ENST00000680783.1(ENSG00000135744):c.776T>C",
"ENSP00000451720.1:p.Asn134Lys"
));
ConsequenceType ct = new ConsequenceType();
ct.setGeneName("GENE");
ct.setHgvs(variantAnnotation.getHgvs());
ct.setGeneId(null);
variantAnnotation.setConsequenceTypes(Arrays.asList(ct, new ConsequenceType()));
Set<String> xrefs = new VariantAnnotationModelUtils().extractXRefs(variantAnnotation);

assertEquals(7, xrefs.size());
// Default fields
assertTrue(xrefs.contains("id"));
assertTrue(xrefs.contains("xref1"));
assertTrue(xrefs.contains("GENE"));

// Untouched hgvs, not starting with ENST or NM_
assertTrue(xrefs.contains("ENSP00000451720.1:p.Asn134Lys"));

assertTrue(xrefs.contains("ENST00000680783.1(ENSG00000135744):c.776T>C"));
assertTrue(xrefs.contains("ENST00000680783.1:c.776T>C"));
assertTrue(xrefs.contains("GENE:c.776T>C"));

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,12 @@ public void testXRefRs() throws StorageEngineException {
with("Cosmic", EvidenceEntry::getId, is("COSV60260399"))))));
matchers.put("ENST00000341832.11(ENSG00000248333):c.356-1170A>G", hasAnnotation(with("HGVS", VariantAnnotation::getHgvs, hasItem(
is("ENST00000341832.11(ENSG00000248333):c.356-1170A>G")))));
matchers.put("ENST00000341832.11:c.356-1170A>G", hasAnnotation(with("HGVS", VariantAnnotation::getHgvs, hasItem(
// The variant annotation may not have the "alternate" hgvs
is("ENST00000341832.11(ENSG00000248333):c.356-1170A>G")))));
matchers.put("ENSG00000248333:c.356-1170A>G", hasAnnotation(with("HGVS", VariantAnnotation::getHgvs, hasItem(
// The variant annotation may not have the "alternate" hgvs
is("ENST00000341832.11(ENSG00000248333):c.356-1170A>G")))));
matchers.put("VSP_039324", hasAnnotation(
with("ConsequenceType", VariantAnnotation::getConsequenceTypes,
hasItem(with("ProteinVariantAnnotation", ConsequenceType::getProteinVariantAnnotation,
Expand Down
Loading
Loading