From 8df51005569e0f904155a8e5dcfb5152e2660297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 10 Sep 2024 17:56:17 +0200 Subject: [PATCH] analysis: improvements by re-using code, #TASK-6772, #TASK-6766 --- .../family/qc/FamilyVariantQcAnalysis.java | 20 +--------- .../FamilyVariantQcLocalAnalysisExecutor.java | 26 +----------- .../qc/IndividualVariantQcAnalysis.java | 18 +-------- ...ividualVariantQcLocalAnalysisExecutor.java | 3 +- .../utils/VariantQcAnalysisExecutorUtils.java | 31 +++++++------- .../variant/qc/VariantQcAnalysis.java | 40 +++++++++++++++++++ 6 files changed, 63 insertions(+), 75 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/family/qc/FamilyVariantQcAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/family/qc/FamilyVariantQcAnalysis.java index 0deaa0443d..7fd0c5c86e 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/family/qc/FamilyVariantQcAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/family/qc/FamilyVariantQcAnalysis.java @@ -27,7 +27,6 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.analysis.variant.qc.VariantQcAnalysis; -import org.opencb.opencga.analysis.variant.relatedness.RelatednessAnalysis; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.catalog.managers.CatalogManager; import org.opencb.opencga.core.common.JacksonUtils; @@ -38,7 +37,6 @@ import org.opencb.opencga.core.models.family.FamilyQualityControl; import org.opencb.opencga.core.models.family.FamilyUpdateParams; import org.opencb.opencga.core.models.variant.FamilyQcAnalysisParams; -import org.opencb.opencga.core.models.variant.QcRelatednessAnalysisParams; import org.opencb.opencga.core.response.OpenCGAResult; import org.opencb.opencga.core.tools.annotations.Tool; import org.opencb.opencga.core.tools.annotations.ToolParams; @@ -54,7 +52,6 @@ import java.util.stream.Collectors; import static org.opencb.opencga.core.models.common.InternalStatus.READY; -import static org.opencb.opencga.core.models.common.QualityControlStatus.COMPUTING; import static org.opencb.opencga.core.models.common.QualityControlStatus.NONE; import static org.opencb.opencga.core.models.study.StudyPermissions.Permissions.WRITE_FAMILIES; import static org.opencb.opencga.storage.core.variant.io.VariantWriterFactory.VariantOutputFormat.JSON; @@ -105,7 +102,7 @@ protected void run() throws ToolException { // Set quality control status to COMPUTING to prevent multiple family QCs from running simultaneously // for the same family - if (!setComputingStatus(family)) { + if (!setComputingStatus(family.getId(), FAMILY_QC_TYPE)) { continue; } @@ -175,21 +172,6 @@ protected void run() throws ToolException { } } - private boolean setComputingStatus(Family family) throws ToolException { - try { - QualityControlStatus qcStatus = new QualityControlStatus(COMPUTING, "Performing family QC"); - FamilyUpdateParams updateParams = new FamilyUpdateParams().setQualityControlStatus(qcStatus); - catalogManager.getFamilyManager().update(getStudy(), family.getId(), updateParams, null, token); - } catch (CatalogException e) { - String msg = "Could not set status to COMPUTING before performing the QC for the family " + family.getId() + ": " - + e.getMessage(); - logger.error(msg); - addError(new ToolException(msg, e)); - return false; - } - return true; - } - public static void checkParameters(FamilyQcAnalysisParams params, String studyId, CatalogManager catalogManager, String token) throws ToolException { // Check study diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/family/qc/FamilyVariantQcLocalAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/family/qc/FamilyVariantQcLocalAnalysisExecutor.java index 8f2ff864f2..b44efdbceb 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/family/qc/FamilyVariantQcLocalAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/family/qc/FamilyVariantQcLocalAnalysisExecutor.java @@ -17,41 +17,19 @@ package org.opencb.opencga.analysis.family.qc; import com.fasterxml.jackson.databind.ObjectWriter; -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.lang.StringUtils; -import org.opencb.biodata.models.clinical.qc.RelatednessReport; -import org.opencb.commons.datastore.core.Query; -import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.commons.utils.DockerUtils; import org.opencb.opencga.analysis.StorageToolExecutor; import org.opencb.opencga.analysis.utils.VariantQcAnalysisExecutorUtils; -import org.opencb.opencga.analysis.variant.mutationalSignature.MutationalSignatureAnalysis; -import org.opencb.opencga.analysis.variant.mutationalSignature.MutationalSignatureLocalAnalysisExecutor; -import org.opencb.opencga.catalog.db.api.IndividualDBAdaptor; -import org.opencb.opencga.catalog.exceptions.CatalogException; -import org.opencb.opencga.catalog.managers.CatalogManager; -import org.opencb.opencga.core.common.GitRepositoryState; import org.opencb.opencga.core.common.JacksonUtils; -import org.opencb.opencga.core.exceptions.ToolException; import org.opencb.opencga.core.exceptions.ToolExecutorException; -import org.opencb.opencga.core.models.individual.Individual; -import org.opencb.opencga.core.models.sample.Sample; import org.opencb.opencga.core.models.variant.FamilyQcAnalysisParams; -import org.opencb.opencga.core.response.OpenCGAResult; -import org.opencb.opencga.core.tools.ToolParams; import org.opencb.opencga.core.tools.annotations.ToolExecutor; -import org.opencb.opencga.core.tools.variant.FamilyQcAnalysisExecutor; import org.opencb.opencga.core.tools.variant.FamilyVariantQcAnalysisExecutor; import java.io.IOException; import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.AbstractMap; -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Collectors; import static org.opencb.opencga.analysis.utils.VariantQcAnalysisExecutorUtils.CONFIG_FILENAME; +import static org.opencb.opencga.analysis.variant.qc.VariantQcAnalysis.FAMILY_QC_TYPE; @ToolExecutor(id="opencga-local", tool = FamilyVariantQcAnalysis.ID, framework = ToolExecutor.Framework.LOCAL, source = ToolExecutor.Source.STORAGE) @@ -67,6 +45,6 @@ public void run() throws ToolExecutorException { throw new ToolExecutorException(e); } - VariantQcAnalysisExecutorUtils.run(getVcfPaths(), getJsonPaths(), configPath, getOutDir(), getOpencgaHome()); + VariantQcAnalysisExecutorUtils.run(FAMILY_QC_TYPE, getVcfPaths(), getJsonPaths(), configPath, getOutDir(), getOpencgaHome()); } } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/individual/qc/IndividualVariantQcAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/individual/qc/IndividualVariantQcAnalysis.java index 043e998ac3..49be094537 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/individual/qc/IndividualVariantQcAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/individual/qc/IndividualVariantQcAnalysis.java @@ -54,7 +54,6 @@ import static org.opencb.opencga.analysis.utils.VariantQcAnalysisExecutorUtils.QC_JSON_EXTENSION; import static org.opencb.opencga.core.models.common.InternalStatus.READY; -import static org.opencb.opencga.core.models.common.QualityControlStatus.COMPUTING; import static org.opencb.opencga.core.models.common.QualityControlStatus.NONE; import static org.opencb.opencga.core.models.study.StudyPermissions.Permissions.WRITE_INDIVIDUALS; import static org.opencb.opencga.storage.core.variant.io.VariantWriterFactory.VariantOutputFormat.JSON; @@ -118,7 +117,7 @@ protected void run() throws ToolException { // Set quality control status to COMPUTING to prevent multiple individual QCs from running simultaneously // for the same individual - if (!setComputingStatus(individual)) { + if (!setComputingStatus(individual.getId(), INDIVIDUAL_QC_TYPE)) { continue; } @@ -200,21 +199,6 @@ protected void run() throws ToolException { } } - private boolean setComputingStatus(Individual individual) throws ToolException { - try { - QualityControlStatus qcStatus = new QualityControlStatus(COMPUTING, "Performing individual QC"); - IndividualUpdateParams updateParams = new IndividualUpdateParams().setQualityControlStatus(qcStatus); - catalogManager.getIndividualManager().update(getStudy(), individual.getId(), updateParams, null, token); - } catch (CatalogException e) { - String msg = "Could not set status to COMPUTING before performing the QC for the individual '" + individual.getId() + "': " - + e.getMessage(); - logger.error(msg); - addError(new ToolException(msg, e)); - return false; - } - return true; - } - private void updateIndividualQualityControl(List individuals) throws ToolException { ObjectMapper objectMapper = JacksonUtils.getDefaultObjectMapper(); objectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/individual/qc/IndividualVariantQcLocalAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/individual/qc/IndividualVariantQcLocalAnalysisExecutor.java index ddf00c11cb..28ba991c42 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/individual/qc/IndividualVariantQcLocalAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/individual/qc/IndividualVariantQcLocalAnalysisExecutor.java @@ -29,6 +29,7 @@ import java.nio.file.Path; import static org.opencb.opencga.analysis.utils.VariantQcAnalysisExecutorUtils.CONFIG_FILENAME; +import static org.opencb.opencga.analysis.variant.qc.VariantQcAnalysis.INDIVIDUAL_QC_TYPE; @ToolExecutor(id="opencga-local", tool = IndividualVariantQcAnalysis.ID, framework = ToolExecutor.Framework.LOCAL, source = ToolExecutor.Source.STORAGE) @@ -44,6 +45,6 @@ public void run() throws ToolExecutorException { throw new ToolExecutorException(e); } - VariantQcAnalysisExecutorUtils.run(getVcfPaths(), getJsonPaths(), configPath, getOutDir(), getOpencgaHome()); + VariantQcAnalysisExecutorUtils.run(INDIVIDUAL_QC_TYPE, getVcfPaths(), getJsonPaths(), configPath, getOutDir(), getOpencgaHome()); } } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/utils/VariantQcAnalysisExecutorUtils.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/utils/VariantQcAnalysisExecutorUtils.java index c0757693a7..e89fd31442 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/utils/VariantQcAnalysisExecutorUtils.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/utils/VariantQcAnalysisExecutorUtils.java @@ -16,14 +16,10 @@ package org.opencb.opencga.analysis.utils; -import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.commons.lang.StringUtils; import org.opencb.commons.utils.DockerUtils; import org.opencb.opencga.core.common.GitRepositoryState; -import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.exceptions.ToolExecutorException; -import org.opencb.opencga.core.models.variant.FamilyQcAnalysisParams; -import org.opencb.opencga.core.tools.ToolParams; import java.io.IOException; import java.nio.file.Path; @@ -33,12 +29,19 @@ import java.util.List; import java.util.stream.Collectors; +import static org.opencb.opencga.analysis.AnalysisUtils.ANALYSIS_FOLDER; +import static org.opencb.opencga.analysis.variant.qc.VariantQcAnalysis.QC_FOLDER; + public class VariantQcAnalysisExecutorUtils { public static String CONFIG_FILENAME = "config.json"; public static String QC_JSON_EXTENSION = ".qc.json"; - public static void run(LinkedList vcfPaths, LinkedList jsonPaths, Path configPath, Path outDir, Path opencgaHome) + private static String SCRIPT_VIRTUAL_FOLDER = "/script"; + private static String JOB_VIRTUAL_FOLDER = "/jobdir"; + + public static void run(String qcType, LinkedList vcfPaths, LinkedList jsonPaths, Path configPath, Path outDir, + Path opencgaHome) throws ToolExecutorException { // Run the Python script responsible for performing the family QC analyses // variant_qc.main.py --vcf-file xxx --info-json xxx --bam-file xxx --qc-type xxx --config xxx --output-dir xxx @@ -48,21 +51,21 @@ public static void run(LinkedList vcfPaths, LinkedList jsonPaths, Pa try { // Input binding List> inputBindings = new ArrayList<>(); - inputBindings.add(new AbstractMap.SimpleEntry<>(opencgaHome.resolve("analysis/variant-qc").toAbsolutePath().toString(), - "/script")); + inputBindings.add(new AbstractMap.SimpleEntry<>(opencgaHome.resolve(ANALYSIS_FOLDER).resolve(QC_FOLDER).toAbsolutePath() + .toString(), SCRIPT_VIRTUAL_FOLDER)); // Output binding AbstractMap.SimpleEntry outputBinding = new AbstractMap.SimpleEntry<>(outDir.toAbsolutePath().toString(), - "/jobdir"); + JOB_VIRTUAL_FOLDER); - String params = "python3 /script/variant_qc.main.py" + String params = "python3 " + SCRIPT_VIRTUAL_FOLDER + "/variant_qc.main.py" + " --vcf-file " + StringUtils.join(vcfPaths.stream().map(p -> p.toAbsolutePath().toString().replace( - outDir.toAbsolutePath().toString(), "/jobdir")).collect(Collectors.toList()), ",") + outDir.toAbsolutePath().toString(), JOB_VIRTUAL_FOLDER)).collect(Collectors.toList()), ",") + " --info-json " + StringUtils.join(jsonPaths.stream().map(p -> p.toAbsolutePath().toString().replace( - outDir.toAbsolutePath().toString(), "/jobdir")).collect(Collectors.toList()), ",") - + " --qc-type family" - + " --config /jobdir/" + configPath.getFileName() - + " --output-dir /jobdir"; + outDir.toAbsolutePath().toString(), JOB_VIRTUAL_FOLDER)).collect(Collectors.toList()), ",") + + " --qc-type " + qcType + + " --config " + JOB_VIRTUAL_FOLDER + "/" + configPath.getFileName() + + " --output-dir " + JOB_VIRTUAL_FOLDER; // Execute Pythong script in docker diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/qc/VariantQcAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/qc/VariantQcAnalysis.java index 447080b1e6..1303304afb 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/qc/VariantQcAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/qc/VariantQcAnalysis.java @@ -32,8 +32,10 @@ import org.opencb.opencga.core.models.JwtPayload; import org.opencb.opencga.core.models.common.QualityControlStatus; import org.opencb.opencga.core.models.family.Family; +import org.opencb.opencga.core.models.family.FamilyUpdateParams; import org.opencb.opencga.core.models.file.File; import org.opencb.opencga.core.models.individual.Individual; +import org.opencb.opencga.core.models.individual.IndividualUpdateParams; import org.opencb.opencga.core.models.sample.Sample; import org.opencb.opencga.core.models.study.Study; import org.opencb.opencga.core.models.study.StudyPermissions; @@ -47,6 +49,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; @@ -62,6 +65,11 @@ public class VariantQcAnalysis extends OpenCgaToolScopeStudy { public static final String QC_FOLDER = "qc/"; public static final String QC_DATA_FOLDER = QC_FOLDER + "data/"; + // Data type + public static final String FAMILY_QC_TYPE = "family"; + public static final String INDIVIDUAL_QC_TYPE = "individual"; + public static final String SAMPLE_QC_TYPE = "sample"; + // For relatedness analysis public static final String RELATEDNESS_ANALYSIS_ID = "relatedness"; protected static final String RELATEDNESS_POP_FREQ_FILENAME = "autosomes_1000G_QC_prune_in.frq"; @@ -76,6 +84,9 @@ public class VariantQcAnalysis extends OpenCgaToolScopeStudy { protected static final String INFERRED_SEX_THRESHOLDS_FILENAME = "karyotypic_sex_thresholds.json"; protected static final String INFERRED_SEX_THRESHOLDS_FILE_MSG = "Karyotypic sex thresholds file"; + // For mendelian errors sex analysis + public static final String MENDELIAN_ERRORS_ANALYSIS_ID = "mendelian-errors"; + @Override protected void check() throws Exception { super.check(); @@ -219,6 +230,35 @@ protected static Path checkFileParameter(String fileId, String msg, String study return path; } + protected boolean setComputingStatus(String id, String qcType) throws ToolException { + try { + QualityControlStatus qcStatus = new QualityControlStatus(COMPUTING, "Performing " + qcType + " QC"); + switch (qcType) { + case FAMILY_QC_TYPE: { + FamilyUpdateParams updateParams = new FamilyUpdateParams().setQualityControlStatus(qcStatus); + catalogManager.getFamilyManager().update(getStudy(), id, updateParams, null, token); + break; + } + case INDIVIDUAL_QC_TYPE: { + IndividualUpdateParams updateParams = new IndividualUpdateParams().setQualityControlStatus(qcStatus); + catalogManager.getIndividualManager().update(getStudy(), id, updateParams, null, token); + break; + } + default: { + String msg = "Internal error: unknown QC type '" + qcType + "' (valid values are: " + StringUtils.join( + Arrays.asList(FAMILY_QC_TYPE, INDIVIDUAL_QC_TYPE), ",") + ")"; + throw new ToolException(msg); + } + } + } catch (CatalogException e) { + String msg = "Could not set status to COMPUTING before performing QC for " + qcType + " ID '" + id + "': " + e.getMessage(); + logger.error(msg); + addError(new ToolException(msg, e)); + return false; + } + return true; + } + protected static List getNoSomaticSampleIds(Family family, String studyId, CatalogManager catalogManager, String token) throws CatalogException { // Get list of individual IDs