Skip to content

Commit

Permalink
analysis: improvements by re-using code, #TASK-6772, #TASK-6766
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed Sep 10, 2024
1 parent a6a8c86 commit 8df5100
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 75 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.variant.qc.VariantQcAnalysis;
import org.opencb.opencga.analysis.variant.relatedness.RelatednessAnalysis;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.core.common.JacksonUtils;
Expand All @@ -38,7 +37,6 @@
import org.opencb.opencga.core.models.family.FamilyQualityControl;
import org.opencb.opencga.core.models.family.FamilyUpdateParams;
import org.opencb.opencga.core.models.variant.FamilyQcAnalysisParams;
import org.opencb.opencga.core.models.variant.QcRelatednessAnalysisParams;
import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.core.tools.annotations.Tool;
import org.opencb.opencga.core.tools.annotations.ToolParams;
Expand All @@ -54,7 +52,6 @@
import java.util.stream.Collectors;

import static org.opencb.opencga.core.models.common.InternalStatus.READY;
import static org.opencb.opencga.core.models.common.QualityControlStatus.COMPUTING;
import static org.opencb.opencga.core.models.common.QualityControlStatus.NONE;
import static org.opencb.opencga.core.models.study.StudyPermissions.Permissions.WRITE_FAMILIES;
import static org.opencb.opencga.storage.core.variant.io.VariantWriterFactory.VariantOutputFormat.JSON;
Expand Down Expand Up @@ -105,7 +102,7 @@ protected void run() throws ToolException {

// Set quality control status to COMPUTING to prevent multiple family QCs from running simultaneously
// for the same family
if (!setComputingStatus(family)) {
if (!setComputingStatus(family.getId(), FAMILY_QC_TYPE)) {
continue;
}

Expand Down Expand Up @@ -175,21 +172,6 @@ protected void run() throws ToolException {
}
}

private boolean setComputingStatus(Family family) throws ToolException {
try {
QualityControlStatus qcStatus = new QualityControlStatus(COMPUTING, "Performing family QC");
FamilyUpdateParams updateParams = new FamilyUpdateParams().setQualityControlStatus(qcStatus);
catalogManager.getFamilyManager().update(getStudy(), family.getId(), updateParams, null, token);
} catch (CatalogException e) {
String msg = "Could not set status to COMPUTING before performing the QC for the family " + family.getId() + ": "
+ e.getMessage();
logger.error(msg);
addError(new ToolException(msg, e));
return false;
}
return true;
}

public static void checkParameters(FamilyQcAnalysisParams params, String studyId, CatalogManager catalogManager, String token)
throws ToolException {
// Check study
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,41 +17,19 @@
package org.opencb.opencga.analysis.family.qc;

import com.fasterxml.jackson.databind.ObjectWriter;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import org.opencb.biodata.models.clinical.qc.RelatednessReport;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.utils.DockerUtils;
import org.opencb.opencga.analysis.StorageToolExecutor;
import org.opencb.opencga.analysis.utils.VariantQcAnalysisExecutorUtils;
import org.opencb.opencga.analysis.variant.mutationalSignature.MutationalSignatureAnalysis;
import org.opencb.opencga.analysis.variant.mutationalSignature.MutationalSignatureLocalAnalysisExecutor;
import org.opencb.opencga.catalog.db.api.IndividualDBAdaptor;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.core.common.GitRepositoryState;
import org.opencb.opencga.core.common.JacksonUtils;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.exceptions.ToolExecutorException;
import org.opencb.opencga.core.models.individual.Individual;
import org.opencb.opencga.core.models.sample.Sample;
import org.opencb.opencga.core.models.variant.FamilyQcAnalysisParams;
import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.core.tools.ToolParams;
import org.opencb.opencga.core.tools.annotations.ToolExecutor;
import org.opencb.opencga.core.tools.variant.FamilyQcAnalysisExecutor;
import org.opencb.opencga.core.tools.variant.FamilyVariantQcAnalysisExecutor;

import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

import static org.opencb.opencga.analysis.utils.VariantQcAnalysisExecutorUtils.CONFIG_FILENAME;
import static org.opencb.opencga.analysis.variant.qc.VariantQcAnalysis.FAMILY_QC_TYPE;

@ToolExecutor(id="opencga-local", tool = FamilyVariantQcAnalysis.ID, framework = ToolExecutor.Framework.LOCAL,
source = ToolExecutor.Source.STORAGE)
Expand All @@ -67,6 +45,6 @@ public void run() throws ToolExecutorException {
throw new ToolExecutorException(e);
}

VariantQcAnalysisExecutorUtils.run(getVcfPaths(), getJsonPaths(), configPath, getOutDir(), getOpencgaHome());
VariantQcAnalysisExecutorUtils.run(FAMILY_QC_TYPE, getVcfPaths(), getJsonPaths(), configPath, getOutDir(), getOpencgaHome());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@

import static org.opencb.opencga.analysis.utils.VariantQcAnalysisExecutorUtils.QC_JSON_EXTENSION;
import static org.opencb.opencga.core.models.common.InternalStatus.READY;
import static org.opencb.opencga.core.models.common.QualityControlStatus.COMPUTING;
import static org.opencb.opencga.core.models.common.QualityControlStatus.NONE;
import static org.opencb.opencga.core.models.study.StudyPermissions.Permissions.WRITE_INDIVIDUALS;
import static org.opencb.opencga.storage.core.variant.io.VariantWriterFactory.VariantOutputFormat.JSON;
Expand Down Expand Up @@ -118,7 +117,7 @@ protected void run() throws ToolException {

// Set quality control status to COMPUTING to prevent multiple individual QCs from running simultaneously
// for the same individual
if (!setComputingStatus(individual)) {
if (!setComputingStatus(individual.getId(), INDIVIDUAL_QC_TYPE)) {
continue;
}

Expand Down Expand Up @@ -200,21 +199,6 @@ protected void run() throws ToolException {
}
}

private boolean setComputingStatus(Individual individual) throws ToolException {
try {
QualityControlStatus qcStatus = new QualityControlStatus(COMPUTING, "Performing individual QC");
IndividualUpdateParams updateParams = new IndividualUpdateParams().setQualityControlStatus(qcStatus);
catalogManager.getIndividualManager().update(getStudy(), individual.getId(), updateParams, null, token);
} catch (CatalogException e) {
String msg = "Could not set status to COMPUTING before performing the QC for the individual '" + individual.getId() + "': "
+ e.getMessage();
logger.error(msg);
addError(new ToolException(msg, e));
return false;
}
return true;
}

private void updateIndividualQualityControl(List<Individual> individuals) throws ToolException {
ObjectMapper objectMapper = JacksonUtils.getDefaultObjectMapper();
objectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.nio.file.Path;

import static org.opencb.opencga.analysis.utils.VariantQcAnalysisExecutorUtils.CONFIG_FILENAME;
import static org.opencb.opencga.analysis.variant.qc.VariantQcAnalysis.INDIVIDUAL_QC_TYPE;

@ToolExecutor(id="opencga-local", tool = IndividualVariantQcAnalysis.ID, framework = ToolExecutor.Framework.LOCAL,
source = ToolExecutor.Source.STORAGE)
Expand All @@ -44,6 +45,6 @@ public void run() throws ToolExecutorException {
throw new ToolExecutorException(e);
}

VariantQcAnalysisExecutorUtils.run(getVcfPaths(), getJsonPaths(), configPath, getOutDir(), getOpencgaHome());
VariantQcAnalysisExecutorUtils.run(INDIVIDUAL_QC_TYPE, getVcfPaths(), getJsonPaths(), configPath, getOutDir(), getOpencgaHome());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,10 @@

package org.opencb.opencga.analysis.utils;

import com.fasterxml.jackson.databind.ObjectWriter;
import org.apache.commons.lang.StringUtils;
import org.opencb.commons.utils.DockerUtils;
import org.opencb.opencga.core.common.GitRepositoryState;
import org.opencb.opencga.core.common.JacksonUtils;
import org.opencb.opencga.core.exceptions.ToolExecutorException;
import org.opencb.opencga.core.models.variant.FamilyQcAnalysisParams;
import org.opencb.opencga.core.tools.ToolParams;

import java.io.IOException;
import java.nio.file.Path;
Expand All @@ -33,12 +29,19 @@
import java.util.List;
import java.util.stream.Collectors;

import static org.opencb.opencga.analysis.AnalysisUtils.ANALYSIS_FOLDER;
import static org.opencb.opencga.analysis.variant.qc.VariantQcAnalysis.QC_FOLDER;

public class VariantQcAnalysisExecutorUtils {

public static String CONFIG_FILENAME = "config.json";
public static String QC_JSON_EXTENSION = ".qc.json";

public static void run(LinkedList<Path> vcfPaths, LinkedList<Path> jsonPaths, Path configPath, Path outDir, Path opencgaHome)
private static String SCRIPT_VIRTUAL_FOLDER = "/script";
private static String JOB_VIRTUAL_FOLDER = "/jobdir";

public static void run(String qcType, LinkedList<Path> vcfPaths, LinkedList<Path> jsonPaths, Path configPath, Path outDir,
Path opencgaHome)
throws ToolExecutorException {
// Run the Python script responsible for performing the family QC analyses
// variant_qc.main.py --vcf-file xxx --info-json xxx --bam-file xxx --qc-type xxx --config xxx --output-dir xxx
Expand All @@ -48,21 +51,21 @@ public static void run(LinkedList<Path> vcfPaths, LinkedList<Path> jsonPaths, Pa
try {
// Input binding
List<AbstractMap.SimpleEntry<String, String>> inputBindings = new ArrayList<>();
inputBindings.add(new AbstractMap.SimpleEntry<>(opencgaHome.resolve("analysis/variant-qc").toAbsolutePath().toString(),
"/script"));
inputBindings.add(new AbstractMap.SimpleEntry<>(opencgaHome.resolve(ANALYSIS_FOLDER).resolve(QC_FOLDER).toAbsolutePath()
.toString(), SCRIPT_VIRTUAL_FOLDER));

// Output binding
AbstractMap.SimpleEntry<String, String> outputBinding = new AbstractMap.SimpleEntry<>(outDir.toAbsolutePath().toString(),
"/jobdir");
JOB_VIRTUAL_FOLDER);

String params = "python3 /script/variant_qc.main.py"
String params = "python3 " + SCRIPT_VIRTUAL_FOLDER + "/variant_qc.main.py"
+ " --vcf-file " + StringUtils.join(vcfPaths.stream().map(p -> p.toAbsolutePath().toString().replace(
outDir.toAbsolutePath().toString(), "/jobdir")).collect(Collectors.toList()), ",")
outDir.toAbsolutePath().toString(), JOB_VIRTUAL_FOLDER)).collect(Collectors.toList()), ",")
+ " --info-json " + StringUtils.join(jsonPaths.stream().map(p -> p.toAbsolutePath().toString().replace(
outDir.toAbsolutePath().toString(), "/jobdir")).collect(Collectors.toList()), ",")
+ " --qc-type family"
+ " --config /jobdir/" + configPath.getFileName()
+ " --output-dir /jobdir";
outDir.toAbsolutePath().toString(), JOB_VIRTUAL_FOLDER)).collect(Collectors.toList()), ",")
+ " --qc-type " + qcType
+ " --config " + JOB_VIRTUAL_FOLDER + "/" + configPath.getFileName()
+ " --output-dir " + JOB_VIRTUAL_FOLDER;


// Execute Pythong script in docker
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@
import org.opencb.opencga.core.models.JwtPayload;
import org.opencb.opencga.core.models.common.QualityControlStatus;
import org.opencb.opencga.core.models.family.Family;
import org.opencb.opencga.core.models.family.FamilyUpdateParams;
import org.opencb.opencga.core.models.file.File;
import org.opencb.opencga.core.models.individual.Individual;
import org.opencb.opencga.core.models.individual.IndividualUpdateParams;
import org.opencb.opencga.core.models.sample.Sample;
import org.opencb.opencga.core.models.study.Study;
import org.opencb.opencga.core.models.study.StudyPermissions;
Expand All @@ -47,6 +49,7 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

Expand All @@ -62,6 +65,11 @@ public class VariantQcAnalysis extends OpenCgaToolScopeStudy {
public static final String QC_FOLDER = "qc/";
public static final String QC_DATA_FOLDER = QC_FOLDER + "data/";

// Data type
public static final String FAMILY_QC_TYPE = "family";
public static final String INDIVIDUAL_QC_TYPE = "individual";
public static final String SAMPLE_QC_TYPE = "sample";

// For relatedness analysis
public static final String RELATEDNESS_ANALYSIS_ID = "relatedness";
protected static final String RELATEDNESS_POP_FREQ_FILENAME = "autosomes_1000G_QC_prune_in.frq";
Expand All @@ -76,6 +84,9 @@ public class VariantQcAnalysis extends OpenCgaToolScopeStudy {
protected static final String INFERRED_SEX_THRESHOLDS_FILENAME = "karyotypic_sex_thresholds.json";
protected static final String INFERRED_SEX_THRESHOLDS_FILE_MSG = "Karyotypic sex thresholds file";

// For mendelian errors sex analysis
public static final String MENDELIAN_ERRORS_ANALYSIS_ID = "mendelian-errors";

@Override
protected void check() throws Exception {
super.check();
Expand Down Expand Up @@ -219,6 +230,35 @@ protected static Path checkFileParameter(String fileId, String msg, String study
return path;
}

protected boolean setComputingStatus(String id, String qcType) throws ToolException {
try {
QualityControlStatus qcStatus = new QualityControlStatus(COMPUTING, "Performing " + qcType + " QC");
switch (qcType) {
case FAMILY_QC_TYPE: {
FamilyUpdateParams updateParams = new FamilyUpdateParams().setQualityControlStatus(qcStatus);
catalogManager.getFamilyManager().update(getStudy(), id, updateParams, null, token);
break;
}
case INDIVIDUAL_QC_TYPE: {
IndividualUpdateParams updateParams = new IndividualUpdateParams().setQualityControlStatus(qcStatus);
catalogManager.getIndividualManager().update(getStudy(), id, updateParams, null, token);
break;
}
default: {
String msg = "Internal error: unknown QC type '" + qcType + "' (valid values are: " + StringUtils.join(
Arrays.asList(FAMILY_QC_TYPE, INDIVIDUAL_QC_TYPE), ",") + ")";
throw new ToolException(msg);
}
}
} catch (CatalogException e) {
String msg = "Could not set status to COMPUTING before performing QC for " + qcType + " ID '" + id + "': " + e.getMessage();
logger.error(msg);
addError(new ToolException(msg, e));
return false;
}
return true;
}

protected static List<String> getNoSomaticSampleIds(Family family, String studyId, CatalogManager catalogManager, String token)
throws CatalogException {
// Get list of individual IDs
Expand Down

0 comments on commit 8df5100

Please sign in to comment.