Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TASK-4872 - NullPointerException at variant-secondary-sample-index #2342

Merged
merged 2 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,12 @@ protected void run() throws Exception {
if (family == null || StringUtils.isEmpty(family.getId())) {
continue;
}
List<List<String>> trios = variantStorageManager.getTriosFromFamily(getStudy(), family, true, getToken());
for (List<String> trio : trios) {
String child = trio.get(2);
List<Trio> trios = variantStorageManager.getTriosFromFamily(getStudy(), family, true, getToken());
for (Trio trio : trios) {
String child = trio.getChild();
if (analysisParams.getSample().contains(child)) {
String father = trio.get(0);
String mother = trio.get(1);
String father = trio.getFather();
String mother = trio.getMother();
triosMap.put(child, new Trio(family.getId(),
"-".equals(father) ? null : father,
"-".equals(mother) ? null : mother,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1299,10 +1299,10 @@ public Integer getReleaseFilter(Query query, String sessionId) throws CatalogExc
return release;
}

public List<List<String>> getTriosFromFamily(
public List<Trio> getTriosFromFamily(
String studyFqn, Family family, VariantStorageMetadataManager metadataManager, boolean skipIncompleteFamily, String sessionId)
throws StorageEngineException, CatalogException {
List<List<String>> trios = getTrios(studyFqn, metadataManager, family.getMembers(), sessionId);
List<Trio> trios = getTrios(studyFqn, metadataManager, family.getMembers(), sessionId);
if (trios.size() == 0) {
if (skipIncompleteFamily) {
logger.debug("Skip family '" + family.getId() + "'. ");
Expand All @@ -1313,7 +1313,7 @@ public List<List<String>> getTriosFromFamily(
return trios;
}

public List<List<String>> getTriosFromSamples(
public List<Trio> getTriosFromSamples(
String studyFqn, VariantStorageMetadataManager metadataManager, Collection<String> sampleIds, String token)
throws CatalogException {
OpenCGAResult<Individual> individualResult = catalogManager.getIndividualManager()
Expand All @@ -1330,12 +1330,12 @@ public List<List<String>> getTriosFromSamples(
return getTrios(studyFqn, metadataManager, individualResult.getResults(), token);
}

public List<List<String>> getTrios(
public List<Trio> getTrios(
String studyFqn, VariantStorageMetadataManager metadataManager, List<Individual> membersList, String sessionId)
throws CatalogException {
int studyId = metadataManager.getStudyId(studyFqn);
Map<Long, Individual> membersMap = membersList.stream().collect(Collectors.toMap(Individual::getUid, i -> i));
List<List<String>> trios = new LinkedList<>();
List<Trio> trios = new LinkedList<>();
for (Individual individual : membersList) {
String fatherSample = null;
String motherSample = null;
Expand Down Expand Up @@ -1402,10 +1402,7 @@ public List<List<String>> getTrios(

// Allow one missing parent
if (childSample != null && (fatherSample != null || motherSample != null)) {
trios.add(Arrays.asList(
fatherSample == null ? "-" : fatherSample,
motherSample == null ? "-" : motherSample,
childSample));
trios.add(new Trio(fatherSample, motherSample, childSample));
}
}
return trios;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,7 @@
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.metadata.VariantMetadataFactory;
import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager;
import org.opencb.opencga.storage.core.metadata.models.ProjectMetadata;
import org.opencb.opencga.storage.core.metadata.models.SampleMetadata;
import org.opencb.opencga.storage.core.metadata.models.StudyMetadata;
import org.opencb.opencga.storage.core.metadata.models.VariantScoreMetadata;
import org.opencb.opencga.storage.core.metadata.models.*;
import org.opencb.opencga.storage.core.utils.CellBaseUtils;
import org.opencb.opencga.storage.core.variant.BeaconResponse;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;
Expand Down Expand Up @@ -397,19 +394,19 @@ public void sampleIndexAnnotate(String study, List<String> samples, ObjectMap pa
});
}

public DataResult<List<String>> familyIndexUpdate(String study,
public DataResult<Trio> familyIndexUpdate(String study,
ObjectMap params, String token)
throws CatalogException, StorageEngineException {
return secureOperation(VariantFamilyIndexOperationTool.ID, study, params, token, engine -> {
return engine.familyIndexUpdate(study, params);
});
}

public DataResult<List<String>> familyIndex(String study, List<String> familiesStr, boolean skipIncompleteFamilies,
public DataResult<Trio> familyIndex(String study, List<String> familiesStr, boolean skipIncompleteFamilies,
ObjectMap params, String token)
throws CatalogException, StorageEngineException {
return secureOperation(VariantFamilyIndexOperationTool.ID, study, params, token, engine -> {
List<List<String>> trios = new LinkedList<>();
List<Trio> trios = new LinkedList<>();
List<Event> events = new LinkedList<>();
VariantStorageMetadataManager metadataManager = engine.getMetadataManager();
VariantCatalogQueryUtils catalogUtils = new VariantCatalogQueryUtils(catalogManager);
Expand All @@ -425,9 +422,9 @@ public DataResult<List<String>> familyIndex(String study, List<String> familiesS
trios.addAll(catalogUtils.getTriosFromFamily(study, family, metadataManager, skipIncompleteFamilies, token));
}
}
DataResult<List<String>> dataResult = engine.familyIndex(study, trios, params);
DataResult<Trio> dataResult = engine.familyIndex(study, trios, params);
getSynchronizer(engine).synchronizeCatalogSamplesFromStorage(study, trios.stream()
.flatMap(Collection::stream)
.flatMap(t->t.toList().stream())
.collect(Collectors.toList()), token);
return dataResult;
});
Expand All @@ -439,25 +436,24 @@ private CatalogStorageMetadataSynchronizer getSynchronizer(VariantStorageEngine
return synchronizer;
}

public DataResult<List<String>> familyIndexBySamples(String study, Collection<String> samples, ObjectMap params, String token)
public DataResult<Trio> familyIndexBySamples(String study, Collection<String> samples, ObjectMap params, String token)
throws CatalogException, StorageEngineException {
return secureOperation(VariantFamilyIndexOperationTool.ID, study, params, token, engine -> {
Collection<String> thisSamples = samples;
if (CollectionUtils.size(thisSamples) == 1 && thisSamples.iterator().next().equals(ParamConstants.ALL)) {
thisSamples = getIndexedSamples(study, token);
}

List<List<String>> trios = catalogUtils.getTriosFromSamples(study, engine.getMetadataManager(), thisSamples, token);

DataResult<List<String>> dataResult = engine.familyIndex(study, trios, params);
List<Trio> trios = catalogUtils.getTriosFromSamples(study, engine.getMetadataManager(), thisSamples, token);
DataResult<Trio> dataResult = engine.familyIndex(study, trios, params);
getSynchronizer(engine).synchronizeCatalogSamplesFromStorage(study, trios.stream()
.flatMap(Collection::stream)
.flatMap(t -> t.toList().stream())
.collect(Collectors.toList()), token);
return dataResult;
});
}

public List<List<String>> getTriosFromFamily(String study, Family family, boolean skipIncompleteFamilies, String token)
public List<Trio> getTriosFromFamily(String study, Family family, boolean skipIncompleteFamilies, String token)
throws CatalogException, StorageEngineException {
VariantStorageEngine variantStorageEngine = getVariantStorageEngine(study, token);
return catalogUtils.getTriosFromFamily(study, family, variantStorageEngine.getMetadataManager(), skipIncompleteFamilies, token);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.opencb.opencga.core.models.operations.variant.VariantFamilyIndexParams;
import org.opencb.opencga.core.tools.annotations.Tool;
import org.opencb.opencga.core.tools.annotations.ToolParams;
import org.opencb.opencga.storage.core.metadata.models.Trio;

import java.util.Collections;
import java.util.List;
Expand Down Expand Up @@ -67,7 +68,7 @@ protected void check() throws Exception {
@Override
protected void run() throws Exception {
step(() -> {
DataResult<List<String>> trios;
DataResult<Trio> trios;
if (variantFamilyIndexParams.isUpdateIndex()) {
trios = variantStorageManager.familyIndexUpdate(study, params, token);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.opencb.opencga.core.models.operations.variant.VariantSecondarySampleIndexParams;
import org.opencb.opencga.core.tools.annotations.Tool;
import org.opencb.opencga.core.tools.annotations.ToolParams;
import org.opencb.opencga.storage.core.metadata.models.Trio;

import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -80,7 +81,7 @@ protected void run() throws Exception {
}
if (sampleIndexParams.isFamilyIndex()) {
step("familyIndex", () -> {
DataResult<List<String>> result = variantStorageManager.familyIndexBySamples(study, sampleIndexParams.getSample(), params,
DataResult<Trio> result = variantStorageManager.familyIndexBySamples(study, sampleIndexParams.getSample(), params,
getToken());
if (result.getEvents() != null) {
for (Event event : result.getEvents()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager;
import org.opencb.opencga.storage.core.metadata.models.StudyMetadata;
import org.opencb.opencga.storage.core.metadata.models.TaskMetadata;
import org.opencb.opencga.storage.core.metadata.models.Trio;
import org.opencb.opencga.storage.core.utils.CellBaseUtils;
import org.opencb.opencga.storage.core.variant.adaptors.VariantField;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery;
Expand Down Expand Up @@ -868,9 +869,9 @@ public void getTriosFromFamily() throws Exception {
sampleMetadata -> sampleMetadata.setIndexStatus(TaskMetadata.Status.READY));
}

List<List<String>> trios = queryUtils.getTriosFromFamily("s1", f1, metadataManager, true, sessionId);
List<Trio> trios = queryUtils.getTriosFromFamily("s1", f1, metadataManager, true, sessionId);
// System.out.println("trios = " + trios);
assertEquals(Arrays.asList(Arrays.asList("sample1", "sample2", "sample3"), Arrays.asList("sample1", "sample2", "sample4")), trios);
assertEquals(Arrays.asList(new Trio("sample1", "sample2", "sample3"), new Trio("sample1", "sample2", "sample4")), trios);

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@
import org.opencb.opencga.core.models.common.IndexStatus;
import org.opencb.opencga.core.models.family.Family;
import org.opencb.opencga.core.models.file.File;
import org.opencb.opencga.core.models.individual.Individual;
import org.opencb.opencga.core.models.individual.IndividualInternal;
import org.opencb.opencga.core.models.individual.Location;
import org.opencb.opencga.core.models.individual.*;
import org.opencb.opencga.core.models.job.Job;
import org.opencb.opencga.core.models.operations.variant.VariantAnnotationIndexParams;
import org.opencb.opencga.core.models.operations.variant.VariantSecondaryAnnotationIndexParams;
Expand Down Expand Up @@ -151,6 +149,16 @@ public void setUp() throws Throwable {
@After
public void tearDown() {
if (hadoopExternalResource != null) {

try {
VariantStorageEngine engine = opencga.getStorageEngineFactory().getVariantStorageEngine(storageEngine, DB_NAME);
if (storageEngine.equals(HadoopVariantStorageEngine.STORAGE_ENGINE_ID)) {
VariantHbaseTestUtils.printVariants(((VariantHadoopDBAdaptor) engine.getDBAdaptor()), Paths.get(opencga.createTmpOutdir("_hbase_print_variants_AFTER")).toUri());
}
} catch (Exception ignore) {
ignore.printStackTrace();
}

hadoopExternalResource.after();
hadoopExternalResource = null;
}
Expand Down Expand Up @@ -249,7 +257,7 @@ private void loadDataset() throws Throwable {
Collections.emptyList(), false, 0, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), IndividualInternal.init(), Collections.emptyMap()).setFather(individuals.get(0)).setMother(individuals.get(1)), Collections.singletonList(daughter), new QueryOptions(ParamConstants.INCLUDE_RESULT_PARAM, true), token).first());
catalogManager.getFamilyManager().create(
STUDY,
new Family("f1", "f1", Collections.singletonList(phenotype), Collections.singletonList(disorder), null, null, 3, null, null),
new Family("f1", "f1", Collections.singletonList(phenotype), Collections.singletonList(disorder), null, null, 4, null, null),
individuals.stream().map(Individual::getId).collect(Collectors.toList()), new QueryOptions(),
token);

Expand Down Expand Up @@ -401,6 +409,50 @@ public void testVariantSecondarySampleIndex() throws Exception {
}
}

@Test
public void testVariantSecondarySampleIndexPartialFamily() throws Exception {
Assume.assumeThat(storageEngine, anyOf(
// is(DummyVariantStorageEngine.STORAGE_ENGINE_ID),
is(HadoopVariantStorageEngine.STORAGE_ENGINE_ID)
));
for (String sample : samples) {
SampleInternalVariantSecondarySampleIndex sampleIndex = catalogManager.getSampleManager().get(STUDY, sample, new QueryOptions(), token).first().getInternal().getVariant().getSecondarySampleIndex();
assertEquals(sample, IndexStatus.READY, sampleIndex.getStatus().getId());
assertEquals(sample, IndexStatus.NONE, sampleIndex.getFamilyStatus().getId());
assertEquals(sample, 1, sampleIndex.getVersion().intValue());
}

Phenotype phenotype = new Phenotype("phenotype", "phenotype", "");
Disorder disorder = new Disorder("disorder", "disorder", "", "", Collections.singletonList(phenotype), Collections.emptyMap());

catalogManager.getFamilyManager().delete(STUDY, Collections.singletonList("f1"), null, token);
catalogManager.getIndividualManager().update(STUDY, daughter, new IndividualUpdateParams()
.setMother(new IndividualReferenceParam(null, null)), null, token);
catalogManager.getFamilyManager().create(
STUDY,
new Family("f2", "f2", Collections.singletonList(phenotype), Collections.singletonList(disorder), null, null, 2, null, null),
Arrays.asList(father, daughter), new QueryOptions(),
token);

// Run family index. The family index status should be READY on offspring
toolRunner.execute(VariantSecondarySampleIndexOperationTool.class, STUDY,
new VariantSecondarySampleIndexParams()
.setFamilyIndex(true)
.setSample(Arrays.asList(daughter)),
Paths.get(opencga.createTmpOutdir()), "index", token);

for (String sample : samples) {
SampleInternalVariantSecondarySampleIndex sampleIndex = catalogManager.getSampleManager().get(STUDY, sample, new QueryOptions(), token).first().getInternal().getVariant().getSecondarySampleIndex();
assertEquals(sample, IndexStatus.READY, sampleIndex.getStatus().getId());
if (sample.equals(daughter)) {
assertEquals(sample, IndexStatus.READY, sampleIndex.getFamilyStatus().getId());
} else {
assertEquals(sample, IndexStatus.NONE, sampleIndex.getFamilyStatus().getId());
}
assertEquals(sample, 1, sampleIndex.getVersion().intValue());
}
}

@Test
public void testGwasIndex() throws Exception {
// Variant scores can not be loaded in mongodb nor dummy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,42 @@
import java.util.List;
import java.util.Objects;

/**
* This class represents a family trio.
*
* - Father and mother can be null
* - Child cannot be null
* - All samples must be unique
* - All samples must be different to NA (dash)
*
*/
public class Trio {
public static final String NA = "-";
private final String id;
private final String father;
private final String mother;
private final String child;

public Trio(String trio) {
String[] split = trio.split(",");
if (split.length != 3) {
throw new IllegalArgumentException("Expected three samples in trio '" + trio + "'");
}
this.id = null;
this.child = split[0];
this.father = NA.equals(split[1]) ? null : split[1];
this.mother = NA.equals(split[2]) ? null : split[2];
}

public Trio(List<String> trio) {
this(null, trio);
}

public Trio(String id, List<String> trio) {
this.id = id;
this.father = trio.get(1);
this.mother = trio.get(2);
this.child = trio.get(0);
this.father = NA.equals(trio.get(1)) ? null : trio.get(1);
this.mother = NA.equals(trio.get(2)) ? null : trio.get(2);
}

public Trio(String father, String mother, String child) {
Expand All @@ -29,8 +50,8 @@ public Trio(String father, String mother, String child) {

public Trio(String id, String father, String mother, String child) {
this.id = id;
this.father = father;
this.mother = mother;
this.father = NA.equals(father) ? null : father;
this.mother = NA.equals(mother) ? null : mother;
this.child = child;
}

Expand All @@ -50,6 +71,11 @@ public String getChild() {
return child;
}

/**
* Returns a list with the non-null samples in the trio.
*
* @return List of samples
*/
public List<String> toList() {
ArrayList<String> list = new ArrayList<>(3);
list.add(getChild());
Expand Down Expand Up @@ -82,6 +108,31 @@ public int hashCode() {
return Objects.hash(id, father, mother, child);
}

/**
* Serialize the trio into a string contain the three samples separated by commas.
* order: child, father, mother.
* If the father or mother are null, they will be replaced by {@link #NA}.
*
* Can be deserialized using {@link #Trio(String)}.
*
* @return String
*/
public String serialize() {
ArrayList<String> list = new ArrayList<>(3);
list.add(getChild());
if (getFather() == null) {
list.add(NA);
} else {
list.add(getFather());
}
if (getMother() == null) {
list.add(NA);
} else {
list.add(getMother());
}
return Strings.join(list, ',');
}

@Override
public String toString() {
return Strings.join(toList(), ',');
Expand Down
Loading
Loading