diff --git a/opencga-analysis/src/main/R/mutational-signature/mutational-signature.r b/opencga-analysis/src/main/R/mutational-signature/mutational-signature.r deleted file mode 100644 index 30517e786f1..00000000000 --- a/opencga-analysis/src/main/R/mutational-signature/mutational-signature.r +++ /dev/null @@ -1,170 +0,0 @@ -#install.packages("nnls") -#install.packages("ggplot2") -#install.packages("jsonlite") - -library(nnls) -library(ggplot2) -suppressMessages(library(jsonlite)) - -# Getting command arguments -args <- commandArgs(trailingOnly = TRUE) - -context <- args[1] -signatures <- args[2] -outdir <- args[3] - -print(paste("Genome context file: ", context)) -print(paste("Mutational signatures file: ", signatures)) -print(paste("Output directory: ", outdir)) - - -# Getting absolute counts for each SNV trinucleotide context -#dataTable <- read.table(args[1], sep = "\t", header = TRUE) -dataTable <- read.table(context, sep = "\t", header = TRUE) -dataTable <- as.vector(dataTable$Count) - -# Normalising frequencies to values between 0 and 1 like the signature table is -dataTable <- dataTable / sum(dataTable) - -# Getting signature probabilities reference table -#signatureTable <- read.table(args[2], sep = "\t", header = TRUE) -signatureTable <- read.table(signatures, sep = "\t", header = TRUE) -header <- scan(args[2], nlines = 1, what = character()) -tags <- header[2:length(header)]; -signatureTable <- as.matrix(signatureTable[1:96,2:length(header)]) - -# Applying non-negative least squares (NNLS) -coefficients <- nnls(signatureTable, dataTable) - -################################# -# Calculating confidence values # -################################# - -# Getting the original RSS value -RSSOriginal <- coefficients$deviance - -# Optimising the RSS score by iteratively removing signatures with coefficients -# less than 0.06 and refitting the model. -# Optimisation is limitted to 10 iterations to avoid large computaional time -# and because model improvement will not be significant after 2-3 iterations -coefficientsCopy <- coefficients # Copy of the nnls object -signatureTableCopy <- signatureTable # Copy of COSMIC signature table -counter <- 0 # Iteration counter -toRemove <- which(coefficientsCopy$x < 0.06 & coefficientsCopy$x > 0 ) - -# Optimisation -while(length(toRemove) > 0 && counter < 10) { - toRemove <- which(coefficientsCopy$x < 0.06 & coefficientsCopy$x > 0 ) - # Removing - for (i in 1:length(toRemove)) { - signatureTableCopy[, toRemove[i]] <- 0 - } - # Recalculating coefficients - coefficientsCopy <- nnls(signatureTableCopy, dataTable) - counter <- counter + 1 - toRemove <- which(coefficientsCopy$x < 0.06 & coefficientsCopy$x > 0 ) -} - -# Explained Sum of Squares -#ESS <- sum((coefficientsCopy$fitted - apply(dataTable, 1, mean)) ^ 2) - -# Residual Sum of Squares -RSS <- sum(coefficientsCopy$residuals^2) - -# Total Sum of Squares -#TSS <- ESS + RSS - -# Value to be reported: RSS -RSS <- round(RSS, digits = 4) - -##################### -# Writing JSON file # -##################### - -# Normalising coefficients to values between 0 and 100 -coefficients <- coefficients$x / sum(coefficients$x) * 100 - -# Creating signature names -#tags <- paste("Signature", 1:30, sep = " ") - -# Writing coefficients and RSS to JSON file -dfCoeff <- as.data.frame(t(data.frame(coefficients))) -colnames(dfCoeff) <- sapply(sub(' ', '', tags), tolower) -rownames(dfCoeff) <- NULL -df <- NULL -df$coefficients <- dfCoeff -df$rss <- RSS -j <- gsub("\\[|\\]", "", toJSON(df)) -write(j, paste0(outdir, "/signature_coefficients.json")) - - -############ -# Plotting # -############ - -# Ordering coefficients by value -orderedIndex <- order(coefficients, decreasing = TRUE) -orderedCoefficients <- coefficients[orderedIndex] -orderedTags <- tags[orderedIndex] - -# Filtering out coefficients below 5 -coeffs2 <- orderedCoefficients[orderedCoefficients > 5] -tags2 <- orderedTags[orderedCoefficients > 5] - -# Setting up boxes y-limits -cumulativeSum <- cumsum(c(0, coeffs2, 100 - sum(coeffs2))) -starts <- cumulativeSum[1:length(cumulativeSum) - 1] -ends <- cumulativeSum[2:length(cumulativeSum)] - -# Setting up boxes colours -colours <- c("darkblue", "darkgreen", "gold2", "darkorange", "darkred", "purple4", "blue1", "chartreuse3", "darkgoldenrod1", "chocolate1", "firebrick1", "darkorchid3") - -# Setting up ticks and ticks labels -tickPos <- 0:5 * 20 -tickNames <- paste0(tickPos, "%") - -# Setting up legend positions -numLegends <- 1:(length(coeffs2) + 1) -legendPos <- ((numLegends - (mean(numLegends))) * 7) + 50 - -# PLOTTING -png(paste0(outdir, "/signature_summary.png"), width = 1028, height = 800, type='cairo') -ggplot() + - # Boxes - geom_rect(mapping=aes(xmin = 0, xmax = 1, ymin = starts, ymax = ends - 0.3), - fill = c(colours[1:length(starts) - 1], "white"), - color = "black", alpha = 0.7) + - # Plot width - xlim(-1, 2) + - # Y-axis line - geom_segment(mapping = aes(x = -0.1, xend = -0.1, y = 0, yend = 100)) + - # Y-axis ticks - geom_segment(mapping = aes(x = -0.1, xend = -0.15, - y = tickPos, yend = tickPos)) + - # Y-axis labels - geom_text(mapping = aes(x = -0.3, y = tickPos), label = tickNames) + - # Y-axis title - geom_text(mapping = aes(x = -0.5, y = 50), label = "Signature contribution", - angle = 90) + - # Legend boxes - geom_rect(mapping = aes(xmin = 1.2, xmax = 1.25, - ymin = (legendPos - 1), ymax = (legendPos + 1)), - color = "black", - fill = c(colours[1:length(legendPos) - 1], "white"), - alpha = 0.7) + - # Legend text - geom_text(mapping = aes(x = 1.28, y = legendPos), label = c(tags2, "Other"), - hjust = 0) + - # Removing every other ggplot element from the plot - theme(axis.text.x = element_blank(), - axis.ticks.y = element_blank(), - axis.ticks.x = element_blank(), - axis.title.x = element_blank(), - axis.title.y = element_blank(), - axis.text.y = element_blank(), - panel.background = element_blank(), - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - plot.background = element_blank()) - -garbage <- dev.off() diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/qc/AlignmentFastQcMetricsAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/qc/AlignmentFastQcMetricsAnalysis.java index 1dd71a315af..e63f3e91a42 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/qc/AlignmentFastQcMetricsAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/qc/AlignmentFastQcMetricsAnalysis.java @@ -22,6 +22,7 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.analysis.tools.OpenCgaToolScopeStudy; +import org.opencb.opencga.analysis.wrappers.executors.DockerWrapperAnalysisExecutor; import org.opencb.opencga.analysis.wrappers.fastqc.FastqcWrapperAnalysisExecutor; import org.opencb.opencga.catalog.db.api.FileDBAdaptor; import org.opencb.opencga.core.exceptions.ToolException; @@ -96,7 +97,7 @@ protected void run() throws ToolException { }); } - public static FastQcMetrics parseResults(Path outDir) throws ToolException { + public static FastQcMetrics parseResults(Path outDir, String confJobDir) throws ToolException { Path fastQcPath = null; Path imgPath = null; for (java.io.File file : outDir.toFile().listFiles()) { @@ -116,12 +117,16 @@ public static FastQcMetrics parseResults(Path outDir) throws ToolException { // Replace absolute paths to relative paths List relativePaths = new ArrayList<>(); for (String path : fastQcMetrics.getFiles()) { - int index = path.indexOf("JOBS/"); - relativePaths.add(index == -1 ? new java.io.File(path).getName() : path.substring(index)); + // Sanity check + if (!path.startsWith(confJobDir)) { + throw new ToolException("The FastQC file " + path + " is not in the configuration job folder "+ confJobDir); + } + relativePaths.add(path.substring(confJobDir.length() + 1)); } fastQcMetrics.setFiles(relativePaths); } else { - throw new ToolException("Something wrong happened: FastQC file " + fastQcPath.getFileName() + " not found!"); + String msg = DockerWrapperAnalysisExecutor.getStdErrMessage("Something wrong happened running FastQC analysis.", outDir); + throw new ToolException(msg); } } catch (IOException e) { new ToolException("Error parsing Alignment FastQC Metrics file: " + e.getMessage()); diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/qc/AlignmentQcAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/qc/AlignmentQcAnalysis.java index 67ac836c36a..f6ae680c85f 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/qc/AlignmentQcAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/alignment/qc/AlignmentQcAnalysis.java @@ -16,33 +16,40 @@ package org.opencb.opencga.analysis.alignment.qc; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.formats.alignment.samtools.SamtoolsFlagstats; import org.opencb.biodata.formats.alignment.samtools.SamtoolsStats; import org.opencb.biodata.formats.sequence.fastqc.FastQcMetrics; -import org.opencb.commons.datastore.core.Event; -import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.analysis.AnalysisUtils; import org.opencb.opencga.analysis.tools.OpenCgaToolScopeStudy; +import org.opencb.opencga.analysis.tools.ToolRunner; +import org.opencb.opencga.analysis.wrappers.executors.DockerWrapperAnalysisExecutor; +import org.opencb.opencga.analysis.wrappers.fastqc.FastqcWrapperAnalysis; +import org.opencb.opencga.analysis.wrappers.samtools.SamtoolsWrapperAnalysis; import org.opencb.opencga.catalog.exceptions.CatalogException; -import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.exceptions.ToolException; -import org.opencb.opencga.core.models.alignment.*; +import org.opencb.opencga.core.models.alignment.AlignmentQcParams; +import org.opencb.opencga.core.models.alignment.FastqcWrapperParams; +import org.opencb.opencga.core.models.alignment.SamtoolsWrapperParams; import org.opencb.opencga.core.models.common.Enums; import org.opencb.opencga.core.models.file.File; +import org.opencb.opencga.core.models.file.FileLinkParams; import org.opencb.opencga.core.models.file.FileQualityControl; import org.opencb.opencga.core.models.file.FileUpdateParams; -import org.opencb.opencga.core.models.job.Job; -import org.opencb.opencga.core.response.OpenCGAResult; import org.opencb.opencga.core.tools.annotations.Tool; import org.opencb.opencga.core.tools.annotations.ToolParams; +import org.opencb.opencga.core.tools.result.ExecutionResult; +import org.opencb.opencga.core.tools.result.Status; +import java.io.IOException; +import java.nio.charset.Charset; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.*; +import static org.apache.commons.io.FileUtils.readLines; import static org.opencb.opencga.core.api.ParamConstants.ALIGNMENT_QC_DESCRIPTION; @Tool(id = AlignmentQcAnalysis.ID, resource = Enums.Resource.ALIGNMENT) @@ -51,15 +58,25 @@ public class AlignmentQcAnalysis extends OpenCgaToolScopeStudy { public static final String ID = "alignment-qc"; public static final String DESCRIPTION = ALIGNMENT_QC_DESCRIPTION; + public static final String SAMTOOLS_STATS_STEP = "samtools-stats"; + public static final String SAMTOOLS_FLAGSTATS_STEP = "samtools-flagstats"; + public static final String PLOT_BAMSTATS_STEP = "plot-bamstats"; + public static final String FASTQC_METRICS_STEP = "fastqc-metrics"; + public static final String UPDATE_FILE_ALIGNMENT_QC_STEP = "update-file-alignment-qc"; + @ToolParams - protected final AlignmentQcParams analysisParams = new AlignmentQcParams(); + protected final AlignmentQcParams alignmentQcParams = new AlignmentQcParams(); - private File catalogBamFile; - private AlignmentFileQualityControl alignmentQc = null; + private ToolRunner toolRunner; + + private boolean runSamtoolsStatsStep = true; + private boolean runSamptoolsFlagstatsStep = true; + private boolean runFastqcMetricsStep = true; + private boolean updateQcStep = true; - private boolean runStats = true; - private boolean runFlagStats = true; - private boolean runFastqc = true; + private File catalogBamFile; + private File catalogStatsFile; + private FileQualityControl fileQc = null; @Override protected void check() throws Exception { @@ -70,214 +87,306 @@ protected void check() throws Exception { } try { - catalogBamFile = AnalysisUtils.getCatalogFile(analysisParams.getBamFile(), study, catalogManager.getFileManager(), token); - if (catalogBamFile.getQualityControl() != null) { - alignmentQc = catalogBamFile.getQualityControl().getAlignment(); - } + catalogBamFile = AnalysisUtils.getCatalogFile(alignmentQcParams.getBamFile(), study, catalogManager.getFileManager(), token); + fileQc = catalogBamFile.getQualityControl(); + } catch (CatalogException e) { + throw new ToolException("Error accessing to the BAM file '" + alignmentQcParams.getBamFile() + "'", e); + } - // Prepare flags - String skip = null; - if (StringUtils.isNotEmpty(analysisParams.getSkip())) { - skip = analysisParams.getSkip().toLowerCase().replace(" ", ""); + // Prepare flags from skip and overwrite + String skip = null; + if (StringUtils.isNotEmpty(alignmentQcParams.getSkip())) { + skip = alignmentQcParams.getSkip().toLowerCase().replace(" ", ""); + } + if (StringUtils.isNotEmpty(skip)) { + Set skipValues = new HashSet<>(Arrays.asList(skip.split(","))); + if (skipValues.contains(AlignmentQcParams.STATS_SKIP_VALUE)) { + runSamtoolsStatsStep = false; + String msg = "Skipping Samtools stats (and plot) by user"; + addWarning(msg); + logger.warn(msg); } - if (StringUtils.isNotEmpty(skip)) { - Set skipValues = new HashSet<>(Arrays.asList(skip.split(","))); - if (skipValues.contains(AlignmentQcParams.STATS_SKIP_VALUE) - || - (!analysisParams.isOverwrite() && alignmentQc != null && alignmentQc.getSamtoolsStats() != null)) { - runStats = false; - } - if (skipValues.contains(AlignmentQcParams.FLAGSTATS_SKIP_VALUE) - || - (!analysisParams.isOverwrite() && alignmentQc != null && alignmentQc.getSamtoolsFlagStats() != null)) { - runFlagStats = false; - } - if (skipValues.contains(AlignmentQcParams.FASTQC_METRICS_SKIP_VALUE) - || - (!analysisParams.isOverwrite() && alignmentQc != null && alignmentQc.getFastQcMetrics() != null)) { - runFastqc = false; - } + if (skipValues.contains(AlignmentQcParams.FLAGSTATS_SKIP_VALUE)) { + runSamptoolsFlagstatsStep = false; + String msg = "Skipping Samtools flagstats by user"; + addWarning(msg); + logger.warn(msg); + } + if (skipValues.contains(AlignmentQcParams.FASTQC_METRICS_SKIP_VALUE)) { + runFastqcMetricsStep = false; + String msg = "Skipping FastQC metrics by user"; + addWarning(msg); + logger.warn(msg); + } + } + if (!alignmentQcParams.isOverwrite() && fileQc != null && fileQc.getAlignment() != null) { + if (runSamtoolsStatsStep && fileQc.getAlignment().getSamtoolsStats() != null) { + runSamtoolsStatsStep = false; + String msg = "Skipping Samtools stats (and plots) because they already exist and the overwrite flag is not set"; + addWarning(msg); + logger.warn(msg); + } + if (runSamptoolsFlagstatsStep && fileQc.getAlignment().getSamtoolsFlagStats() != null) { + runSamptoolsFlagstatsStep = false; + String msg = "Skipping Samtools flag stats because they already exist and the overwrite flag is not set"; + addWarning(msg); + logger.warn(msg); + } + if (runFastqcMetricsStep && fileQc.getAlignment().getFastQcMetrics() != null) { + runFastqcMetricsStep = false; + String msg = "Skipping FastQC metrics because they already exist and the overwrite flag is not set"; + addWarning(msg); + logger.warn(msg); } - } catch (CatalogException e) { - throw new ToolException("Error accessing to the BAM file '" + analysisParams.getBamFile() + "'", e); } + + updateQcStep = (runSamptoolsFlagstatsStep || runSamtoolsStatsStep || runFastqcMetricsStep) ? true : false; + } + + @Override + protected List getSteps() { + List steps = new ArrayList<>(); + if (runSamtoolsStatsStep) { + steps.add(SAMTOOLS_STATS_STEP); + steps.add(PLOT_BAMSTATS_STEP); + } + if (runSamptoolsFlagstatsStep) { + steps.add(SAMTOOLS_FLAGSTATS_STEP); + } + if (runFastqcMetricsStep) { + steps.add(FASTQC_METRICS_STEP); + } + if (updateQcStep) { + steps.add(UPDATE_FILE_ALIGNMENT_QC_STEP); + } + return steps; } @Override protected void run() throws ToolException { + // Create the tool runner + toolRunner = new ToolRunner(getOpencgaHome().toString(), catalogManager, variantStorageManager); - step(() -> { - Map params; - String statsJobId = null; - String flagStatsJobId = null; - String fastQcMetricsJobId = null; + // Get alignment QC metrics to update + if (catalogBamFile.getQualityControl() != null) { + fileQc = catalogBamFile.getQualityControl(); + } + if (fileQc == null) { + fileQc = new FileQualityControl(); + } - try { - if (runFlagStats) { - // Flag stats - params = new AlignmentFlagStatsParams(analysisParams.getBamFile(), null) - .toParams(new ObjectMap(ParamConstants.STUDY_PARAM, study)); - - OpenCGAResult flagStatsJobResult = catalogManager.getJobManager() - .submit(study, AlignmentFlagStatsAnalysis.ID, Enums.Priority.MEDIUM, params, null, "Job generated by " - + getId() + " - " + getJobId(), Collections.emptyList(), Collections.emptyList(), getJobId(), null, - false, token); - flagStatsJobId = flagStatsJobResult.first().getId(); - addEvent(Event.Type.INFO, "Submit job " + flagStatsJobId + " to compute stats (" + AlignmentFlagStatsAnalysis.ID - + ")"); - } - } catch (CatalogException e) { - addWarning("Error launching job for Alignment Flag Stats Analysis: " + e.getMessage()); - } + if (runSamtoolsStatsStep) { + step(SAMTOOLS_STATS_STEP, this::runSamtoolsStats); + step(PLOT_BAMSTATS_STEP, this::runPlotBamstats); + } + if (runSamptoolsFlagstatsStep) { + step(SAMTOOLS_FLAGSTATS_STEP, this::runSamtoolsFlagstats); + } + if (runFastqcMetricsStep) { + step(FASTQC_METRICS_STEP, this::runFastqcMetrics); + } + if (updateQcStep) { + step(UPDATE_FILE_ALIGNMENT_QC_STEP, this::updateAlignmentQc); + } + } + + private void runSamtoolsFlagstats() throws ToolException { + Path outPath = getOutDir().resolve(SAMTOOLS_FLAGSTATS_STEP); + try { + FileUtils.forceMkdir(outPath.toFile()); + } catch (IOException e) { + throw new ToolException("Error creating SAMtools flagstat output folder: " + outPath, e); + } + // Prepare parameters + SamtoolsWrapperParams samtoolsWrapperParams = new SamtoolsWrapperParams("flagstat", catalogBamFile.getId(), null, + new HashMap<>()); + + // Execute the Samtools flag stats analysis and add its step attributes if exist + ExecutionResult executionResult = toolRunner.execute(SamtoolsWrapperAnalysis.class, study, samtoolsWrapperParams, outPath, + null, false, token); + addStepAttribute(STEP_EXECUTION_RESULT_ATTRIBUTE_KEY, executionResult); + + // Check execution status + if (executionResult.getStatus().getName() != Status.Type.DONE) { + throw new ToolException("Something wrong happened running the Samtools flagstat analysis. Execution status = " + + executionResult.getStatus().getName()); + } + + // Check results and update QC file + Path flagStatsFile = AlignmentFlagStatsAnalysis.getResultPath(outPath.toAbsolutePath().toString(), catalogBamFile.getName()); + java.io.File stdoutFile = outPath.resolve(DockerWrapperAnalysisExecutor.STDOUT_FILENAME).toFile(); + List lines ; + try { + lines = readLines(stdoutFile, Charset.defaultCharset()); + } catch (IOException e) { + throw new ToolException("Error reading running Samtools flagstat results", e); + } + if (CollectionUtils.isNotEmpty(lines) && lines.get(0).contains("QC-passed")) { try { - if (runStats) { - // Stats - params = new AlignmentStatsParams(analysisParams.getBamFile(), null) - .toParams(new ObjectMap(ParamConstants.STUDY_PARAM, study)); - - OpenCGAResult statsJobResult = catalogManager.getJobManager() - .submit(study, AlignmentStatsAnalysis.ID, Enums.Priority.MEDIUM, params, null, "Job generated by " - + getId() + " - " + getJobId(), Collections.emptyList(), Collections.emptyList(), getJobId(), null, - false, token); - statsJobId = statsJobResult.first().getId(); - addEvent(Event.Type.INFO, "Submit job " + statsJobId + " to compute stats (" + AlignmentStatsAnalysis.ID + ")"); - } - } catch (CatalogException e) { - addWarning("Error launching job for Alignment Stats Analysis: " + e.getMessage()); + FileUtils.copyFile(stdoutFile, flagStatsFile.toFile()); + } catch (IOException e) { + throw new ToolException("Error copying Samtools flagstat results", e); } + } else { + String msg = DockerWrapperAnalysisExecutor.getStdErrMessage("Something wrong happened running Samtools flagstat analysis.", + outPath); + throw new ToolException(msg); + } + // Check results and update QC file + SamtoolsFlagstats samtoolsFlagstats = AlignmentFlagStatsAnalysis.parseResults(flagStatsFile); + fileQc.getAlignment().setSamtoolsFlagStats(samtoolsFlagstats); + } + + private void runSamtoolsStats() throws ToolException { + Path outPath = getOutDir().resolve(SAMTOOLS_STATS_STEP); + try { + FileUtils.forceMkdir(outPath.toFile()); + } catch (IOException e) { + throw new ToolException("Error creating SAMtools stats output folder: " + outPath, e); + } + + // Prepare parameters + Map statsParams = new HashMap<>(); + // Filter flag: + // - not primary alignment (0x100) + // - read fails platform/vendor quality checks (0x200) + // - supplementary alignment (0x800) + statsParams.put("F", "0xB00"); + SamtoolsWrapperParams samtoolsWrapperParams = new SamtoolsWrapperParams("stats", catalogBamFile.getId(), null, statsParams); + + // Execute the Samtools stats analysis and add its step attributes if exist + ExecutionResult executionResult = toolRunner.execute(SamtoolsWrapperAnalysis.class, study, samtoolsWrapperParams, outPath, + null, false, token); + addStepAttribute(STEP_EXECUTION_RESULT_ATTRIBUTE_KEY, executionResult); + + // Check execution status + if (executionResult.getStatus().getName() != Status.Type.DONE) { + throw new ToolException("Something wrong happened running the Samtools stats analysis. Execution status = " + + executionResult.getStatus().getName()); + } + + // Check results + Path statsFile = AlignmentStatsAnalysis.getResultPath(outPath.toAbsolutePath().toString(), catalogBamFile.getName()); + java.io.File stdoutFile = outPath.resolve(DockerWrapperAnalysisExecutor.STDOUT_FILENAME).toFile(); + List lines ; + try { + lines = readLines(stdoutFile, Charset.defaultCharset()); + } catch (IOException e) { + throw new ToolException("Error reading running samtools-stats results", e); + } + if (CollectionUtils.isNotEmpty(lines) && lines.get(0).startsWith("# This file was produced by samtools stats")) { try { - if (runFastqc) { - // FastQC metrics - params = new AlignmentFastQcMetricsParams(analysisParams.getBamFile(), null) - .toParams(new ObjectMap(ParamConstants.STUDY_PARAM, study)); - - OpenCGAResult fastQcMetricsJobResult = catalogManager.getJobManager() - .submit(study, AlignmentFastQcMetricsAnalysis.ID, Enums.Priority.MEDIUM, params, null, - "Job generated by " + getId() + " - " + getJobId(), Collections.emptyList(), Collections.emptyList(), - getJobId(), null, false, token); - fastQcMetricsJobId = fastQcMetricsJobResult.first().getId(); - addEvent(Event.Type.INFO, "Submit job " + fastQcMetricsJobId + " to compute FastQC metrics (" - + AlignmentFastQcMetricsAnalysis.ID + ")"); - } - } catch (CatalogException e) { - addWarning("Error launching job for Alignment FastQC Metrics Analysis: " + e.getMessage()); + FileUtils.copyFile(stdoutFile, statsFile.toFile()); + } catch (IOException e) { + throw new ToolException("Error copying Samtools stats results", e); } + } else { + String msg = DockerWrapperAnalysisExecutor.getStdErrMessage("Something wrong happened running Samtools stats analysis.", + outPath); + throw new ToolException(msg); + } - // Wait for those jobs before saving QC - SamtoolsFlagstats samtoolsFlagstats = null; - SamtoolsStats samtoolsStats = null; - FastQcMetrics fastQcMetrics = null; - - if (flagStatsJobId != null) { - try { - if (waitFor(flagStatsJobId)) { - Job job = getJob(flagStatsJobId); - Path resultPath = AlignmentFlagStatsAnalysis.getResultPath(job.getOutDir().getUri().getPath(), - catalogBamFile.getName()); - samtoolsFlagstats = AlignmentFlagStatsAnalysis.parseResults(resultPath); - } - } catch (Exception e) { - addWarning("Error waiting for job '" + flagStatsJobId + "' (Alignment Flag Stats Analysis): " + e.getMessage()); - } - } + // Check results and update QC file + SamtoolsStats samtoolsStats; + try { + samtoolsStats = SamtoolsWrapperAnalysis.parseSamtoolsStats(statsFile.toFile()); + } catch (IOException e) { + throw new ToolException("Error parsing Samtools stats results", e); + } - if (statsJobId != null) { - try { - if (waitFor(statsJobId)) { - Job job = getJob(statsJobId); - Path resultPath = AlignmentStatsAnalysis.getResultPath(job.getOutDir().getUri().getPath(), - catalogBamFile.getName()); - samtoolsStats = AlignmentStatsAnalysis.parseResults(resultPath, Paths.get(job.getOutDir().getUri().getPath())); - } - } catch (Exception e) { - addWarning("Error waiting for job '" + statsJobId + "' (Alignment Stats Analysis): " + e.getMessage()); - } + // Link the stats file to the OpenCGA catalog to be used by the plot-batmstats later + try { + String path; + if (outPath.startsWith(configuration.getJobDir())) { + path = outPath.toString().substring(configuration.getJobDir().length() + 1); + } else { + path = outPath.toString(); + logger.warn("Using path {} to link {} to OpenCGA catalog", outPath, catalogStatsFile.getName()); } + catalogStatsFile = catalogManager.getFileManager().link(study, new FileLinkParams(statsFile.toUri().toString(), path, "", "", + null, null, null, null, null), true, token).first(); + } catch (CatalogException e) { + throw new ToolException("Error linking the Samtools stats results to OpenCGA catalog", e); + } - if (fastQcMetricsJobId != null) { - try { - if (waitFor(fastQcMetricsJobId)) { - Job job = getJob(fastQcMetricsJobId); - fastQcMetrics = AlignmentFastQcMetricsAnalysis.parseResults(Paths.get(job.getOutDir().getUri().getPath())); - } - } catch (Exception e) { - addWarning("Error waiting for job '" + fastQcMetricsJobId + "' (Alignment FastQC Metrics Analysis): " + e.getMessage()); - } - } + fileQc.getAlignment().setSamtoolsStats(samtoolsStats); + } - // Update quality control for the catalog file - catalogBamFile = AnalysisUtils.getCatalogFile(analysisParams.getBamFile(), study, catalogManager.getFileManager(), token); - FileQualityControl qc = catalogBamFile.getQualityControl(); - // Sanity check - if (qc == null) { - qc = new FileQualityControl(); - } else if (qc.getAlignment() == null) { - qc.setAlignment(new AlignmentFileQualityControl()); - } + private void runPlotBamstats() throws ToolException { + Path outPath = getOutDir().resolve(PLOT_BAMSTATS_STEP); + try { + FileUtils.forceMkdir(outPath.toFile()); + } catch (IOException e) { + throw new ToolException("Error creating plot-bamstats output folder: " + outPath, e); + } - boolean saveQc = false; - if (samtoolsFlagstats != null) { - qc.getAlignment().setSamtoolsFlagStats(samtoolsFlagstats); - saveQc = true; - } - if (samtoolsStats != null) { - qc.getAlignment().setSamtoolsStats(samtoolsStats); - saveQc = true; - } - if (fastQcMetrics != null) { - qc.getAlignment().setFastQcMetrics(fastQcMetrics); - saveQc = true; - } + // Prepare parameters + SamtoolsWrapperParams samtoolsWrapperParams = new SamtoolsWrapperParams("plot-bamstats", catalogStatsFile.getId(), null, + new HashMap<>()); + + // Execute the plot-bamstats analysis and add its step attributes if exist + ExecutionResult executionResult = toolRunner.execute(SamtoolsWrapperAnalysis.class, study, samtoolsWrapperParams, outPath, + null, false, token); + addStepAttribute(STEP_EXECUTION_RESULT_ATTRIBUTE_KEY, executionResult); - if (saveQc) { - catalogManager.getFileManager().update(getStudy(), catalogBamFile.getId(), new FileUpdateParams().setQualityControl(qc), - QueryOptions.empty(), getToken()); + // Check execution status + if (executionResult.getStatus().getName() != Status.Type.DONE) { + throw new ToolException("Something wrong happened running the plot-bamstats analysis. Execution status = " + + executionResult.getStatus().getName()); + } + + // Add images from plot-bamstats to the QC alignment + List images = new ArrayList<>(); + for (java.io.File file : outPath.toFile().listFiles()) { + if (file.getName().endsWith("png")) { + // Sanity check + if (!file.getAbsolutePath().startsWith(configuration.getJobDir())) { + throw new ToolException("plot-bamstats image is not in the configuration job folder "+ configuration.getJobDir()); + } + images.add(file.getAbsolutePath().substring(configuration.getJobDir().length() + 1)); } - }); + } + fileQc.getAlignment().getSamtoolsStats().setFiles(images); } - private boolean waitFor(String jobId) throws ToolException { - Query query = new Query("id", jobId); - OpenCGAResult result = null; + private void runFastqcMetrics() throws ToolException { + Path outPath = getOutDir().resolve(FASTQC_METRICS_STEP); try { - result = catalogManager.getJobManager().search(study, query, QueryOptions.empty(), token); - } catch (CatalogException e) { - new ToolException("Error waiting for job '" + jobId + "': " + e.getMessage()); + FileUtils.forceMkdir(outPath.toFile()); + } catch (IOException e) { + throw new ToolException("Error creating FastQC output folder: " + outPath, e); } - Job job = result.first(); - String status = job.getInternal().getStatus().getId(); - while (status.equals(Enums.ExecutionStatus.PENDING) || status.equals(Enums.ExecutionStatus.RUNNING) - || status.equals(Enums.ExecutionStatus.QUEUED) || status.equals(Enums.ExecutionStatus.READY) - || status.equals(Enums.ExecutionStatus.REGISTERING)) { - // Sleep for 1 minute - try { - Thread.sleep(60000); - result = catalogManager.getJobManager().search(study, query, QueryOptions.empty(), token); - job = result.first(); - } catch (CatalogException | InterruptedException e) { - new ToolException("Error waiting for job '" + jobId + "': " + e.getMessage()); - } - status = job.getInternal().getStatus().getId(); + // Prepare parameters + Map fastQcParams = new HashMap<>(); + fastQcParams.put("extract", "true"); + FastqcWrapperParams fastqcWrapperParams = new FastqcWrapperParams(catalogBamFile.getId(), null, fastQcParams); + + // Execute the FastQC analysis and add its step attributes if exist + ExecutionResult executionResult = toolRunner.execute(FastqcWrapperAnalysis.class, study, fastqcWrapperParams, outPath, null, false, + token); + addStepAttribute(STEP_EXECUTION_RESULT_ATTRIBUTE_KEY, executionResult); + + // Check execution status + if (executionResult.getStatus().getName() != Status.Type.DONE) { + throw new ToolException("Something wrong happened running the FastQC analysis. Execution status = " + + executionResult.getStatus().getName()); } - return status.equals(Enums.ExecutionStatus.DONE) ? true : false; + // Check results and update QC file + FastQcMetrics fastQcMetrics = AlignmentFastQcMetricsAnalysis.parseResults(outPath, configuration.getJobDir()); + fileQc.getAlignment().setFastQcMetrics(fastQcMetrics); } - private Job getJob(String jobId) { - Job job = null; + private void updateAlignmentQc() throws ToolException { + // Finally, update file quality control try { - Query query = new Query("id", jobId); - OpenCGAResult result = catalogManager.getJobManager().search(study, query, QueryOptions.empty(), token); - job = result.first(); + FileUpdateParams fileUpdateParams = new FileUpdateParams().setQualityControl(fileQc); + catalogManager.getFileManager().update(study, catalogBamFile.getId(), fileUpdateParams, QueryOptions.empty(), token); } catch (CatalogException e) { - new ToolException("Error getting job '" + jobId + "' from catalog: " + e.getMessage()); - } - if (job == null) { - new ToolException("Error getting job '" + jobId + "' from catalog."); + throw new ToolException("Error updating alignment quality control", e); } - return job; } } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/sample/qc/SampleQcAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/sample/qc/SampleQcAnalysis.java index b8d460824c1..e59a7e4109d 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/sample/qc/SampleQcAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/sample/qc/SampleQcAnalysis.java @@ -17,40 +17,34 @@ package org.opencb.opencga.analysis.sample.qc; import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; -import org.opencb.biodata.models.clinical.qc.GenomePlot; -import org.opencb.biodata.models.clinical.qc.GenomePlotConfig; import org.opencb.biodata.models.clinical.qc.SampleQcVariantStats; -import org.opencb.commons.datastore.core.Event; -import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.analysis.AnalysisUtils; import org.opencb.opencga.analysis.individual.qc.IndividualQcUtils; import org.opencb.opencga.analysis.tools.OpenCgaToolScopeStudy; +import org.opencb.opencga.analysis.tools.ToolRunner; import org.opencb.opencga.analysis.variant.genomePlot.GenomePlotAnalysis; import org.opencb.opencga.analysis.variant.mutationalSignature.MutationalSignatureAnalysis; import org.opencb.opencga.analysis.variant.stats.SampleVariantStatsAnalysis; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.catalog.utils.CatalogFqn; -import org.opencb.opencga.core.api.ParamConstants; -import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.exceptions.ToolException; import org.opencb.opencga.core.models.JwtPayload; import org.opencb.opencga.core.models.common.Enums; import org.opencb.opencga.core.models.file.File; -import org.opencb.opencga.core.models.job.Job; import org.opencb.opencga.core.models.sample.Sample; -import org.opencb.opencga.core.models.sample.SampleQualityControl; -import org.opencb.opencga.core.models.sample.SampleUpdateParams; -import org.opencb.opencga.core.models.sample.SampleVariantQualityControlMetrics; import org.opencb.opencga.core.models.variant.GenomePlotAnalysisParams; import org.opencb.opencga.core.models.variant.MutationalSignatureAnalysisParams; import org.opencb.opencga.core.models.variant.SampleQcAnalysisParams; import org.opencb.opencga.core.models.variant.SampleVariantStatsAnalysisParams; -import org.opencb.opencga.core.response.OpenCGAResult; import org.opencb.opencga.core.tools.annotations.Tool; import org.opencb.opencga.core.tools.annotations.ToolParams; +import org.opencb.opencga.core.tools.result.ExecutionResult; +import org.opencb.opencga.core.tools.result.Status; +import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.*; @@ -64,10 +58,14 @@ public class SampleQcAnalysis extends OpenCgaToolScopeStudy { public static final String DESCRIPTION = "Run quality control (QC) for a given sample. It includes variant stats, and if the sample " + "is somatic, mutational signature and genome plot are calculated."; + private static final String SAMPLE_VARIANT_STATS_STEP = "sample-variant-stats"; + private static final String MUTATIONAL_SIGNATURE_STEP = "mutational-signature"; + private static final String GENOME_PLOT_STEP = "genome-plot"; + @ToolParams - protected final SampleQcAnalysisParams analysisParams = new SampleQcAnalysisParams(); + protected final SampleQcAnalysisParams sampleQcParams = new SampleQcAnalysisParams(); - private Path genomePlotConfigPath; + private ToolRunner toolRunner; private boolean runVariantStats = true; private boolean runSignatureCatalogue = true; @@ -96,19 +94,19 @@ protected void check() throws Exception { } // Sanity check - if (StringUtils.isEmpty(analysisParams.getSample())) { + if (StringUtils.isEmpty(sampleQcParams.getSample())) { throw new ToolException("Missing sample ID."); } - Sample sample = IndividualQcUtils.getValidSampleById(getStudy(), analysisParams.getSample(), catalogManager, token); + Sample sample = IndividualQcUtils.getValidSampleById(getStudy(), sampleQcParams.getSample(), catalogManager, token); if (sample == null) { - throw new ToolException("Sample '" + analysisParams.getSample() + "' not found."); + throw new ToolException("Sample '" + sampleQcParams.getSample() + "' not found."); } // Prepare flags String skip = null; - if (StringUtils.isNotEmpty(analysisParams.getSkip())) { - skip = analysisParams.getSkip().toLowerCase().replace(" ", ""); + if (StringUtils.isNotEmpty(sampleQcParams.getSkip())) { + skip = sampleQcParams.getSkip().toLowerCase().replace(" ", ""); } if (StringUtils.isNotEmpty(skip)) { Set skipValues = new HashSet<>(Arrays.asList(skip.split(","))); @@ -131,239 +129,236 @@ protected void check() throws Exception { // Check variant stats if (runVariantStats) { final String OPENCGA_ALL = "ALL"; - if (OPENCGA_ALL.equals(analysisParams.getVsId())) { - new ToolException("Invalid parameters: " + OPENCGA_ALL + " is a reserved word, you can not use as a variant stats ID"); + if (OPENCGA_ALL.equals(sampleQcParams.getVsId())) { + throw new ToolException("Invalid parameters: " + OPENCGA_ALL + " is a reserved word, you can not use as a variant stats ID"); } - if (StringUtils.isEmpty(analysisParams.getVsId()) && analysisParams.getVsQuery() != null - && !analysisParams.getVsQuery().toParams().isEmpty()) { - new ToolException("Invalid parameters: if variant stats ID is empty, variant stats query must be empty"); + if (StringUtils.isEmpty(sampleQcParams.getVsId()) && sampleQcParams.getVsQuery() != null + && !sampleQcParams.getVsQuery().toParams().isEmpty()) { + throw new ToolException("Invalid parameters: if variant stats ID is empty, variant stats query must be empty"); } - if (StringUtils.isNotEmpty(analysisParams.getVsId()) - && (analysisParams.getVsQuery() == null || analysisParams.getVsQuery().toParams().isEmpty())) { - new ToolException("Invalid parameters: if you provide a variant stats ID, variant stats query can not be empty"); + if (StringUtils.isNotEmpty(sampleQcParams.getVsId()) + && (sampleQcParams.getVsQuery() == null || sampleQcParams.getVsQuery().toParams().isEmpty())) { + throw new ToolException("Invalid parameters: if you provide a variant stats ID, variant stats query can not be empty"); } - if (StringUtils.isEmpty(analysisParams.getVsId())) { - analysisParams.setVsId(OPENCGA_ALL); + if (StringUtils.isEmpty(sampleQcParams.getVsId())) { + sampleQcParams.setVsId(OPENCGA_ALL); } - if (analysisParams.getVsQuery() == null) { - new ToolException("Invalid parameters: variant stats query is empty"); + if (sampleQcParams.getVsQuery() == null) { + throw new ToolException("Invalid parameters: variant stats query is empty"); } if (sample.getQualityControl() != null && sample.getQualityControl().getVariant() != null) { if (CollectionUtils.isNotEmpty(sample.getQualityControl().getVariant().getVariantStats()) - && OPENCGA_ALL.equals(analysisParams.getVsId())) { + && OPENCGA_ALL.equals(sampleQcParams.getVsId())) { runVariantStats = false; } else { for (SampleQcVariantStats variantStats : sample.getQualityControl().getVariant().getVariantStats()) { - if (variantStats.getId().equals(analysisParams.getVsId())) { - throw new ToolException("Invalid parameters: variant stats ID '" + analysisParams.getVsId() + if (variantStats.getId().equals(sampleQcParams.getVsId())) { + throw new ToolException("Invalid parameters: variant stats ID '" + sampleQcParams.getVsId() + "' is already used"); } } } } + } else { + String msg = "Skipping sample variant stats analysis by user"; + addWarning(msg); + logger.warn(msg); } // Check mutational signature if (runSignatureCatalogue) { - if (StringUtils.isEmpty(analysisParams.getMsQuery())) { - new ToolException("Invalid parameters: mutational signature query is empty"); + if (!sample.isSomatic()) { + String msg = "Skipping mutational signature catalog analysis:" + getSampleIsNotSomaticMsg(sample.getId()); + addWarning(msg); + logger.warn(msg); + runSignatureCatalogue = false; + } else if (StringUtils.isEmpty(sampleQcParams.getMsQuery())) { + throw new ToolException("Invalid parameters: mutational signature query is empty"); } - } - - if (runSignatureCatalogue && !sample.isSomatic()) { - String msg = "Skipping mutational signature catalog analysis: sample '" + sample.getId() + "' is not somatic."; + } else { + String msg = "Skipping mutational signature catalogue analysis by user"; addWarning(msg); logger.warn(msg); - runSignatureCatalogue = false; } - if (runSignatureFitting && !sample.isSomatic()) { - String msg = "Skipping mutational signature fitting analysis: sample '" + sample.getId() + "' is not somatic."; + if (runSignatureFitting) { + if (!sample.isSomatic()) { + String msg = "Skipping mutational signature fitting analysis:" + getSampleIsNotSomaticMsg(sample.getId()); + addWarning(msg); + logger.warn(msg); + runSignatureFitting = false; + } + } else { + String msg = "Skipping mutational signature fitting analysis by user"; addWarning(msg); logger.warn(msg); - runSignatureFitting = false; } // Check genome plot if (runGenomePlot) { - if (StringUtils.isEmpty(analysisParams.getGpConfigFile())) { - new ToolException("Invalid parameters: genome plot configuration file is empty"); - } - if (runGenomePlot && !sample.isSomatic()) { - String msg = "Skipping genome plot: sample '" + sample.getId() + "' is not somatic."; + if (!sample.isSomatic()) { + String msg = "Skipping genome plot: " + getSampleIsNotSomaticMsg(sample.getId()); addWarning(msg); logger.warn(msg); runGenomePlot = false; } else { - File genomePlotConfFile = AnalysisUtils.getCatalogFile(analysisParams.getGpConfigFile(), getStudy(), + if (StringUtils.isEmpty(sampleQcParams.getGpConfigFile())) { + throw new ToolException("Invalid parameters: genome plot configuration file is empty"); + } + + File genomePlotConfFile = AnalysisUtils.getCatalogFile(sampleQcParams.getGpConfigFile(), getStudy(), catalogManager.getFileManager(), getToken()); - genomePlotConfigPath = Paths.get(genomePlotConfFile.getUri().getPath()); + Path genomePlotConfigPath = Paths.get(genomePlotConfFile.getUri().getPath()); if (!genomePlotConfigPath.toFile().exists()) { - new ToolException("Invalid parameters: genome plot configuration file does not exist (" + genomePlotConfigPath + ")"); + throw new ToolException("Invalid parameters: genome plot configuration file does not exist (" + genomePlotConfigPath + + ")"); } } + } else { + String msg = "Skipping genome plot analysis by user"; + addWarning(msg); + logger.warn(msg); } } @Override - protected void run() throws ToolException { - step(() -> { - Map params; - String variantStatsJobId = null; - String signatureJobId = null; - String genomePlotJobId = null; - - try { - if (runVariantStats) { - // Run variant stats - params = new SampleVariantStatsAnalysisParams(Collections.singletonList(analysisParams.getSample()), null, null, true, - false, analysisParams.getVsId(), analysisParams.getVsDescription(), null, - analysisParams.getVsQuery()) - .toParams(new ObjectMap(ParamConstants.STUDY_PARAM, getStudy())); - - OpenCGAResult variantStatsJobResult = catalogManager.getJobManager() - .submit(study, SampleVariantStatsAnalysis.ID, Enums.Priority.MEDIUM, params, null, "Job generated by " - + getId() + " - " + getJobId(), Collections.emptyList(), Collections.emptyList(), getJobId(), null, - false, token); - variantStatsJobId = variantStatsJobResult.first().getId(); - addEvent(Event.Type.INFO, "Submit job " + variantStatsJobId + " to compute stats (" + SampleVariantStatsAnalysis.ID - + ")"); - } - } catch (CatalogException e) { - addWarning("Error launching job for sample variant stats analysis: " + e.getMessage()); - variantStatsJobId = null; - } - - try { - if (runSignatureCatalogue || runSignatureFitting) { - // Run mutational signature - logger.info("Preparing to submit the mutational signature analysis job"); - - String skip = null; - if (!runSignatureCatalogue) { - skip = MutationalSignatureAnalysisParams.SIGNATURE_CATALOGUE_SKIP_VALUE; - } else if (!runSignatureFitting) { - skip = MutationalSignatureAnalysisParams.SIGNATURE_FITTING_SKIP_VALUE; - } - - params = new MutationalSignatureAnalysisParams() - .setId(analysisParams.getMsId()) - .setDescription(analysisParams.getMsDescription()) - .setSample(analysisParams.getSample()) - .setQuery(analysisParams.getMsQuery()) - .setFitId(analysisParams.getMsFitId()) - .setFitMethod(analysisParams.getMsFitMethod()) - .setFitSigVersion(analysisParams.getMsFitSigVersion()) - .setFitOrgan(analysisParams.getMsFitOrgan()) - .setFitNBoot(analysisParams.getMsFitNBoot()) - .setFitThresholdPerc(analysisParams.getMsFitThresholdPerc()) - .setFitThresholdPval(analysisParams.getMsFitThresholdPval()) - .setFitMaxRareSigs(analysisParams.getMsFitMaxRareSigs()) - .setFitSignaturesFile(analysisParams.getMsFitSignaturesFile()) - .setFitRareSignaturesFile(analysisParams.getMsFitRareSignaturesFile()) - .setSkip(skip) - .toParams(new ObjectMap(ParamConstants.STUDY_PARAM, getStudy())); - - OpenCGAResult signatureJobResult = catalogManager.getJobManager() - .submit(getStudy(), MutationalSignatureAnalysis.ID, Enums.Priority.MEDIUM, params, null, "Job generated by " - + getId() + " - " + getJobId(), Collections.emptyList(), Collections.emptyList(), getJobId(), null, - false, token); - signatureJobId = signatureJobResult.first().getId(); - logger.info("Submitted job {} to compute the mutational signature analysis {}", signatureJobId, - MutationalSignatureAnalysis.ID); - addEvent(Event.Type.INFO, "Submit job " + signatureJobId + " to compute the mutational signature (" - + MutationalSignatureAnalysis.ID + ")"); - } - } catch (CatalogException e) { - throw new ToolException(e); - } + protected List getSteps() { + List steps = new ArrayList<>(); + if (runVariantStats) { + steps.add(SAMPLE_VARIANT_STATS_STEP); + } + if (runSignatureCatalogue || runSignatureFitting) { + steps.add(MUTATIONAL_SIGNATURE_STEP); + } + if (runGenomePlot) { + steps.add(GENOME_PLOT_STEP); + } + return steps; + } + @Override + protected void run() throws ToolException { + // Create the tool runner + toolRunner = new ToolRunner(getOpencgaHome().toString(), catalogManager, variantStorageManager); - try { - if (runGenomePlot) { - // Run genome plot - params = new GenomePlotAnalysisParams(analysisParams.getSample(), analysisParams.getGpId(), - analysisParams.getGpDescription(), analysisParams.getGpConfigFile(), null) - .toParams(new ObjectMap(ParamConstants.STUDY_PARAM, getStudy())); + // Sample variant stats + if (runVariantStats) { + step(SAMPLE_VARIANT_STATS_STEP, this::runSampleVariantStats); + } - OpenCGAResult genomePlotJobResult = catalogManager.getJobManager() - .submit(getStudy(), GenomePlotAnalysis.ID, Enums.Priority.MEDIUM, params, null, - "Job generated by " + getId() + " - " + getJobId(), Collections.emptyList(), Collections.emptyList(), - getJobId(), null, false, token); - genomePlotJobId = genomePlotJobResult.first().getId(); - addEvent(Event.Type.INFO, "Submit job " + genomePlotJobId + " to compute genome plot (" + GenomePlotAnalysis.ID - + ")"); - } - } catch (CatalogException e) { - addWarning("Error launching job for sample genome plot analysis: " + e.getMessage()); - genomePlotJobId = null; - } + // Mutational signature + if (runSignatureCatalogue || runSignatureFitting) { + step(MUTATIONAL_SIGNATURE_STEP, this::runMutationalSignature); + } + // Genome plot + if (runGenomePlot) { + step(GENOME_PLOT_STEP, this::runGenomePlot); + } + } - // Wait for those jobs before saving QC - GenomePlot genomePlot = null; + private void runSampleVariantStats() throws ToolException { + // Create out folder + Path outPath = getOutDir().resolve(SAMPLE_VARIANT_STATS_STEP); + try { + FileUtils.forceMkdir(outPath.toFile()); + } catch (IOException e) { + throw new ToolException("Error creating sample variant stats folder: " + outPath, e); + } - if (variantStatsJobId != null) { - try { - logger.info("Waiting for variant stats job: {} ...", variantStatsJobId); - AnalysisUtils.waitFor(variantStatsJobId, getStudy(), catalogManager.getJobManager(), getToken()); - // Sample quality control is updated in the variant stats analysis, nothing more to do here - } catch (Exception e) { - addWarning("Error waiting for job '" + variantStatsJobId + "' (sample variant stats): " + e.getMessage()); - } - } + // Prepare parameters + SampleVariantStatsAnalysisParams sampleVariantStatsParams = new SampleVariantStatsAnalysisParams( + Collections.singletonList(sampleQcParams.getSample()), null, null, true, false, sampleQcParams.getVsId(), + sampleQcParams.getVsDescription(), null, sampleQcParams.getVsQuery()); - if (signatureJobId != null) { - try { - logger.info("Waiting for mutational signature job: {} ...", signatureJobId); - AnalysisUtils.waitFor(signatureJobId, getStudy(), catalogManager.getJobManager(), getToken()); - } catch (Exception e) { - addWarning("Error waiting for job '" + signatureJobId + "' (mutational signature analysis): " + e.getMessage()); - } - } + // Execute the sample variant stats analysis and add its step attributes if exist + ExecutionResult executionResult = toolRunner.execute(SampleVariantStatsAnalysis.class, study, sampleVariantStatsParams, outPath, + null, false, token); + addStepAttribute(STEP_EXECUTION_RESULT_ATTRIBUTE_KEY, executionResult); - if (genomePlotJobId != null) { - try { - if (AnalysisUtils.waitFor(genomePlotJobId, getStudy(), catalogManager.getJobManager(), getToken())) { - Job job = AnalysisUtils.getJob(genomePlotJobId, getStudy(), catalogManager.getJobManager(), getToken()); + // Check execution status + if (executionResult.getStatus().getName() != Status.Type.DONE) { + throw new ToolException("Something wrong happened running the sample variant stats analysis. Execution status = " + + executionResult.getStatus().getName()); + } + } - // Parse configuration file - GenomePlotConfig plotConfig = JacksonUtils.getDefaultObjectMapper().readerFor(GenomePlotConfig.class) - .readValue(genomePlotConfigPath.toFile()); + private void runMutationalSignature() throws ToolException { + // Create the output folder + Path outPath = getOutDir().resolve(MUTATIONAL_SIGNATURE_STEP); + try { + FileUtils.forceMkdir(outPath.toFile()); + } catch (IOException e) { + throw new ToolException("Error creating mutational signature folder: " + outPath, e); + } - // Parse genome plot results - genomePlot = GenomePlotAnalysis.parseResults(Paths.get(job.getOutDir().getUri().getPath()), - analysisParams.getGpDescription(), plotConfig); - } - } catch (Exception e) { - addWarning("Error waiting for job '" + genomePlotJobId + "' (genome plot analysis): " + e.getMessage()); - } - } + // Prepare parameters + String skip = null; + if (!runSignatureCatalogue) { + skip = MutationalSignatureAnalysisParams.SIGNATURE_CATALOGUE_SKIP_VALUE; + } else if (!runSignatureFitting) { + skip = MutationalSignatureAnalysisParams.SIGNATURE_FITTING_SKIP_VALUE; + } - // Update quality control for the sample - logger.info("Preparing to save quality control for sample {}", analysisParams.getSample()); - Sample sample = IndividualQcUtils.getValidSampleById(getStudy(), analysisParams.getSample(), catalogManager, token); - if (sample == null) { - throw new ToolException("Can not access to the sample " + analysisParams.getSample() + " in order to save quality control"); - } - SampleQualityControl qc = sample.getQualityControl(); + MutationalSignatureAnalysisParams mutationalSignatureParams = new MutationalSignatureAnalysisParams() + .setId(sampleQcParams.getMsId()) + .setDescription(sampleQcParams.getMsDescription()) + .setSample(sampleQcParams.getSample()) + .setQuery(sampleQcParams.getMsQuery()) + .setFitId(sampleQcParams.getMsFitId()) + .setFitMethod(sampleQcParams.getMsFitMethod()) + .setFitSigVersion(sampleQcParams.getMsFitSigVersion()) + .setFitOrgan(sampleQcParams.getMsFitOrgan()) + .setFitNBoot(sampleQcParams.getMsFitNBoot()) + .setFitThresholdPerc(sampleQcParams.getMsFitThresholdPerc()) + .setFitThresholdPval(sampleQcParams.getMsFitThresholdPval()) + .setFitMaxRareSigs(sampleQcParams.getMsFitMaxRareSigs()) + .setFitSignaturesFile(sampleQcParams.getMsFitSignaturesFile()) + .setFitRareSignaturesFile(sampleQcParams.getMsFitRareSignaturesFile()) + .setSkip(skip); + + // Execute the mutational signature analysis and add its step attributes if exist + ExecutionResult executionResult = toolRunner.execute(MutationalSignatureAnalysis.class, study, mutationalSignatureParams, outPath, + null, false, token); + addStepAttribute(STEP_EXECUTION_RESULT_ATTRIBUTE_KEY, executionResult); + + // Check execution status + if (executionResult.getStatus().getName() != Status.Type.DONE) { + throw new ToolException("Something wrong happened running the mutational signature analysis. Execution status = " + + executionResult.getStatus().getName()); + } + } - // Sanity check - if (qc == null) { - qc = new SampleQualityControl(); - } else if (qc.getVariant() == null) { - qc.setVariant(new SampleVariantQualityControlMetrics()); - } + private void runGenomePlot() throws ToolException { + Path outPath = getOutDir().resolve(GENOME_PLOT_STEP); + try { + FileUtils.forceMkdir(outPath.toFile()); + } catch (IOException e) { + throw new ToolException("Error creating genome plot folder: " + outPath, e); + } - if (genomePlot != null) { - qc.getVariant().setGenomePlot(genomePlot); + // Prepare parameters + GenomePlotAnalysisParams genomePlotParams = new GenomePlotAnalysisParams() + .setSample(sampleQcParams.getSample()) + .setId(sampleQcParams.getGpId()) + .setDescription(sampleQcParams.getGpDescription()) + .setConfigFile(sampleQcParams.getGpConfigFile()); + + // Execute the genome plot analysis and add its step attributes if exist + ExecutionResult executionResult = toolRunner.execute(GenomePlotAnalysis.class, study, genomePlotParams, outPath, null, false, + token); + addStepAttribute(STEP_EXECUTION_RESULT_ATTRIBUTE_KEY, executionResult); + + // Check execution status + if (executionResult.getStatus().getName() != Status.Type.DONE) { + throw new ToolException("Something wrong happened running the mutational signature analysis. Execution status = " + + executionResult.getStatus().getName()); + } + } - catalogManager.getSampleManager().update(getStudy(), sample.getId(), new SampleUpdateParams().setQualityControl(qc), - QueryOptions.empty(), getToken()); - logger.info("Quality control saved for sample {}", sample.getId()); - } - }); + public static String getSampleIsNotSomaticMsg(String id) { + return "sample '" + id + "' is not somatic."; } } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/tools/OpenCgaTool.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/tools/OpenCgaTool.java index 5a88635e45c..cd77ec2cbd7 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/tools/OpenCgaTool.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/tools/OpenCgaTool.java @@ -16,6 +16,8 @@ package org.opencb.opencga.analysis.tools; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.collections4.MapUtils; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.StringUtils; @@ -43,6 +45,7 @@ import org.opencb.opencga.core.tools.result.ExecutionResult; import org.opencb.opencga.core.tools.result.ExecutionResultManager; import org.opencb.opencga.core.tools.result.ExecutorInfo; +import org.opencb.opencga.core.tools.result.ToolStep; import org.opencb.opencga.storage.core.StorageEngineFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -55,11 +58,14 @@ import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; +import java.util.Map; import static org.opencb.opencga.core.tools.OpenCgaToolExecutor.EXECUTOR_ID; public abstract class OpenCgaTool { + protected final static String STEP_EXECUTION_RESULT_ATTRIBUTE_KEY = "STEP_EXECUTION_RESULT"; + protected CatalogManager catalogManager; protected Configuration configuration; protected StorageConfiguration storageConfiguration; @@ -111,7 +117,7 @@ public final OpenCgaTool setUp(String opencgaHome, CatalogManager catalogManager if (params != null) { this.params.putAll(params); } - this.executorParams = new ObjectMap(); + this.executorParams = getExecutorParams(params); this.outDir = outDir; //this.params.put("outDir", outDir.toAbsolutePath().toString()); @@ -126,7 +132,7 @@ public final OpenCgaTool setUp(String opencgaHome, ObjectMap params, Path outDir if (params != null) { this.params.putAll(params); } - this.executorParams = new ObjectMap(); + this.executorParams = getExecutorParams(params); this.outDir = outDir; //this.params.put("outDir", outDir.toAbsolutePath().toString()); @@ -510,6 +516,10 @@ protected final void addAttribute(String key, Object value) throws ToolException erm.addAttribute(key, value); } + protected final void addStepAttribute(String key, Object value) throws ToolException { + erm.addStepAttribute(key, value); + } + protected final void moveFile(String study, Path source, Path destiny, String catalogDirectoryPath, String token) throws ToolException { File file; try { @@ -600,6 +610,16 @@ private void loadStorageConfiguration() throws IOException { this.storageConfiguration = ConfigurationUtils.loadStorageConfiguration(opencgaHome); } + private ObjectMap getExecutorParams(ObjectMap params) { + ObjectMap executorParams = new ObjectMap(); + if (MapUtils.isNotEmpty(params)) { + if (params.containsKey(EXECUTOR_ID)) { + executorParams.put(EXECUTOR_ID, params.getString(EXECUTOR_ID)); + } + } + return executorParams; + } + // TODO can this method be removed? // protected final Analyst getAnalyst(String token) throws ToolException { // try { diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/circos/CircosLocalAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/circos/CircosLocalAnalysisExecutor.java index 356d53d1d84..41cdd0135d7 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/circos/CircosLocalAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/circos/CircosLocalAnalysisExecutor.java @@ -30,6 +30,7 @@ import org.opencb.commons.utils.DockerUtils; import org.opencb.opencga.analysis.ResourceUtils; import org.opencb.opencga.analysis.StorageToolExecutor; +import org.opencb.opencga.analysis.variant.genomePlot.GenomePlotAnalysis; import org.opencb.opencga.analysis.variant.manager.VariantStorageManager; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.core.common.GitRepositoryState; @@ -48,6 +49,7 @@ import java.io.File; import java.io.IOException; import java.io.PrintWriter; +import java.nio.file.Paths; import java.util.*; import java.util.concurrent.*; @@ -133,7 +135,8 @@ public void run() throws ToolException, IOException, CatalogException { if (MapUtils.isEmpty(errors)) { // Execute R script // circos.R ./snvs.tsv ./indels.tsv ./cnvs.tsv ./rearrs.tsv SampleId - String rScriptPath = getExecutorParams().getString("opencgaHome") + "/analysis/R/genome-plot"; + String rScriptPath = Paths.get(getExecutorParams().getString("opencgaHome")).resolve("analysis/" + GenomePlotAnalysis.ID) + .toAbsolutePath().toString(); List> inputBindings = new ArrayList<>(); inputBindings.add(new AbstractMap.SimpleEntry<>(rScriptPath, DOCKER_INPUT_PATH)); AbstractMap.SimpleEntry outputBinding = new AbstractMap.SimpleEntry<>(getOutDir() diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/genomePlot/GenomePlotAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/genomePlot/GenomePlotAnalysis.java index 04a5241623f..cb7eb113591 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/genomePlot/GenomePlotAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/genomePlot/GenomePlotAnalysis.java @@ -97,7 +97,7 @@ protected void run() throws ToolException { if (imgFile.getName().endsWith(GenomePlotAnalysis.SUFFIX_FILENAME)) { int index = imgFile.getAbsolutePath().indexOf("JOBS/"); String relativeFilePath = (index == -1 ? imgFile.getName() : imgFile.getAbsolutePath().substring(index)); - genomePlot = new GenomePlot("", getGenomePlotParams().getDescription(), plotConfig, relativeFilePath); + genomePlot = new GenomePlot(getGenomePlotParams().getId(), getGenomePlotParams().getDescription(), plotConfig, relativeFilePath); break; } } @@ -116,27 +116,18 @@ protected void run() throws ToolException { }); } - public static GenomePlot parseResults(Path outDir, String description, GenomePlotConfig plotConfig) throws IOException { + public static GenomePlot parseResults(Path outDir, String id, String description, GenomePlotConfig plotConfig) throws IOException { // Get image file for (java.io.File imgFile : outDir.toFile().listFiles()) { if (imgFile.getName().endsWith(GenomePlotAnalysis.SUFFIX_FILENAME)) { int index = imgFile.getAbsolutePath().indexOf("JOBS/"); String relativeFilePath = (index == -1 ? imgFile.getName() : imgFile.getAbsolutePath().substring(index)); - return new GenomePlot("", description, plotConfig, relativeFilePath); + return new GenomePlot(id, description, plotConfig, relativeFilePath); } } return null; } - public String getStudy() { - return study; - } - - public GenomePlotAnalysis setStudy(String study) { - this.study = study; - return this; - } - public GenomePlotAnalysisParams getGenomePlotParams() { return genomePlotParams; } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/genomePlot/GenomePlotLocalAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/genomePlot/GenomePlotLocalAnalysisExecutor.java index a86cc797287..64e061c7668 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/genomePlot/GenomePlotLocalAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/genomePlot/GenomePlotLocalAnalysisExecutor.java @@ -33,6 +33,7 @@ import org.opencb.opencga.analysis.ResourceUtils; import org.opencb.opencga.analysis.StorageToolExecutor; import org.opencb.opencga.analysis.variant.manager.VariantStorageManager; +import org.opencb.opencga.analysis.variant.mutationalSignature.MutationalSignatureAnalysis; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.core.common.GitRepositoryState; import org.opencb.opencga.core.common.JacksonUtils; @@ -48,6 +49,7 @@ import java.io.File; import java.io.IOException; import java.io.PrintWriter; +import java.nio.file.Paths; import java.util.*; import java.util.concurrent.*; @@ -79,6 +81,7 @@ public class GenomePlotLocalAnalysisExecutor extends GenomePlotAnalysisExecutor @Override public void run() throws ToolException, IOException, CatalogException { + addStepParams(); plotConfig = JacksonUtils.getDefaultObjectMapper().readerFor(GenomePlotConfig.class).readValue(getConfigFile()); @@ -119,7 +122,8 @@ public void run() throws ToolException, IOException, CatalogException { if (MapUtils.isEmpty(errors)) { // Execute R script // circos.R ./snvs.tsv ./indels.tsv ./cnvs.tsv ./rearrs.tsv SampleId - String rScriptPath = getExecutorParams().getString("opencgaHome") + "/analysis/R/" + getToolId(); + String rScriptPath = Paths.get(getExecutorParams().getString("opencgaHome")).resolve("analysis/" + GenomePlotAnalysis.ID) + .toAbsolutePath().toString(); List> inputBindings = new ArrayList<>(); inputBindings.add(new AbstractMap.SimpleEntry<>(rScriptPath, DOCKER_INPUT_PATH)); AbstractMap.SimpleEntry outputBinding = new AbstractMap.SimpleEntry<>(getOutDir() @@ -146,6 +150,7 @@ public void run() throws ToolException, IOException, CatalogException { StopWatch stopWatch = StopWatch.createStarted(); String cmdline = DockerUtils.run(R_DOCKER_IMAGE, inputBindings, outputBinding, scriptParams, null); + addAttribute("CIRCOS_CLI", cmdline); logger.info("Docker command line: " + cmdline); logger.info("Execution time: " + TimeUtils.durationToString(stopWatch)); } else { @@ -441,14 +446,14 @@ private boolean rearrangementQuery(Query query, VariantStorageManager storageMan + mate.getChromosome() + "\t" + mate.getPosition() + "\t" + mate.getPosition() + "\t" + variantType); } else { - pwOut.println(v.toString() + "\tBreakend mate is empty (variant type: " + variantType + pwOut.println(v + "\tBreakend mate is empty (variant type: " + variantType + ")"); } } else { - pwOut.println(v.toString() + "\tBreakend is empty (variant type: " + variantType + ")"); + pwOut.println(v + "\tBreakend is empty (variant type: " + variantType + ")"); } } else { - pwOut.println(v.toString() + "\tSV is empty (variant type: " + variantType + ")"); + pwOut.println(v + "\tSV is empty (variant type: " + variantType + ")"); } } } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureAnalysis.java index 56a95cfa92e..81d65b14e60 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureAnalysis.java @@ -64,6 +64,9 @@ public class MutationalSignatureAnalysis extends OpenCgaToolScopeStudy { public final static String MUTATIONAL_SIGNATURE_DATA_MODEL_FILENAME = "mutational_signature.json"; public final static String MUTATIONAL_SIGNATURE_FITTING_DATA_MODEL_FILENAME = "mutational_signature_fitting.json"; + public final static String SV_CLUSTERING_CLI_KEY = "SV_CLUSTERING_CLI"; + public final static String SIGNATURE_FIT_CLI_KEY = "SIGNATURE_FIT_CLI"; + public static final String CLUSTERED = "clustered"; public static final String NON_CLUSTERED = "non-clustered"; public static final String LENGTH_NA = "na"; @@ -185,20 +188,20 @@ protected void check() throws Exception { assembly = getAssembly(study, catalogManager, token); // Log messages - logger.info("Signagture id: {}", signatureParams.getId()); - logger.info("Signagture description: {}", signatureParams.getDescription()); - logger.info("Signagture sample: {}", signatureParams.getSample()); - logger.info("Signagture query: {}", signatureParams.getQuery()); - logger.info("Signagture fit id: {}", signatureParams.getFitId()); - logger.info("Signagture fit method: {}", signatureParams.getFitMethod()); - logger.info("Signagture fit sig. version: {}", signatureParams.getFitSigVersion()); - logger.info("Signagture fit organ: {}", signatureParams.getFitOrgan()); - logger.info("Signagture fit n boot: {}", signatureParams.getFitNBoot()); - logger.info("Signagture fit threshold percentage: {}", signatureParams.getFitThresholdPerc()); - logger.info("Signagture fit threshold p-value: {}", signatureParams.getFitThresholdPval()); - logger.info("Signagture fit max. rare sigs.: {}", signatureParams.getFitMaxRareSigs()); - logger.info("Signagture fit signatures file: {}", signaturesFile); - logger.info("Signagture fit rare signatures file: {}", rareSignaturesFile); + logger.info("Signature id: {}", signatureParams.getId()); + logger.info("Signature description: {}", signatureParams.getDescription()); + logger.info("Signature sample: {}", signatureParams.getSample()); + logger.info("Signature query: {}", signatureParams.getQuery()); + logger.info("Signature fit id: {}", signatureParams.getFitId()); + logger.info("Signature fit method: {}", signatureParams.getFitMethod()); + logger.info("Signature fit sig. version: {}", signatureParams.getFitSigVersion()); + logger.info("Signature fit organ: {}", signatureParams.getFitOrgan()); + logger.info("Signature fit n boot: {}", signatureParams.getFitNBoot()); + logger.info("Signature fit threshold percentage: {}", signatureParams.getFitThresholdPerc()); + logger.info("Signature fit threshold p-value: {}", signatureParams.getFitThresholdPval()); + logger.info("Signature fit max. rare sigs.: {}", signatureParams.getFitMaxRareSigs()); + logger.info("Signature fit signatures file: {}", signaturesFile); + logger.info("Signature fit rare signatures file: {}", rareSignaturesFile); logger.info("Skip: {}", signatureParams.getSkip()); } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java index 5be8325e066..bdb3510b3cd 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java @@ -77,6 +77,7 @@ public class MutationalSignatureLocalAnalysisExecutor extends MutationalSignatur @Override public void run() throws ToolException, CatalogException, IOException, StorageEngineException { opencgaHome = Paths.get(getExecutorParams().getString("opencgaHome")); + addStepParams(); // Check genome context file for that sample, and create it if necessary if (StringUtils.isNotEmpty(getSkip()) @@ -456,8 +457,11 @@ private File computeClusteredFile(Query query, QueryOptions queryOptions) throws + " /jobdir/" + inputFile.getName() + " /jobdir/" + outputFile.getName(); - // Execute R script in docker - DockerUtils.run(MutationalSignatureLocalAnalysisExecutor.R_DOCKER_IMAGE, inputBindings, outputBinding, rParams, null); + // Execute R script in docker and save the CLI as attribute + String cmdline = DockerUtils.run(MutationalSignatureLocalAnalysisExecutor.R_DOCKER_IMAGE, inputBindings, outputBinding, rParams, + null); + addAttribute(SV_CLUSTERING_CLI_KEY, cmdline); + logger.info("Docker command line: {}", cmdline); } catch (Exception e) { throw new ToolException(e); } @@ -634,8 +638,10 @@ private void computeSignatureFitting() throws IOException, ToolException, Catalo } } + // Execute docker and save the cli as attribute String cmdline = DockerUtils.run(R_DOCKER_IMAGE, inputBindings, outputBinding, scriptParams.toString(), null); + addAttribute(SIGNATURE_FIT_CLI_KEY, cmdline); logger.info("Docker command line: {}", cmdline); // Check fitting file before parsing and creating the mutational signature fitting data model diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsAnalysis.java index 3d91b877bdd..02160f20f89 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsAnalysis.java @@ -332,4 +332,12 @@ protected void run() throws ToolException { } } + public SampleVariantStatsAnalysisParams getToolParams() { + return toolParams; + } + + public SampleVariantStatsAnalysis setToolParams(SampleVariantStatsAnalysisParams toolParams) { + this.toolParams = toolParams; + return this; + } } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsLocalAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsLocalAnalysisExecutor.java index 3009ae19d88..a73edcfbfd6 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsLocalAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/stats/SampleVariantStatsLocalAnalysisExecutor.java @@ -53,6 +53,7 @@ public int getMaxBatchSize() { @Override public void run() throws ToolException { + addStepParams(); VariantStorageManager variantStorageManager = getVariantStorageManager(); diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/executors/DockerWrapperAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/executors/DockerWrapperAnalysisExecutor.java index d68f363b36d..740b66fbfc1 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/executors/DockerWrapperAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/executors/DockerWrapperAnalysisExecutor.java @@ -7,21 +7,20 @@ import org.apache.commons.lang3.tuple.Pair; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.exec.Command; -import org.opencb.opencga.analysis.wrappers.deeptools.DeeptoolsWrapperAnalysis; import org.opencb.opencga.core.common.GitRepositoryState; import org.opencb.opencga.core.exceptions.ToolException; import org.opencb.opencga.core.tools.OpenCgaToolExecutor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.DataOutputStream; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; +import java.io.*; +import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.*; +import java.util.stream.Collectors; + +import static org.apache.commons.io.FileUtils.readLines; public abstract class DockerWrapperAnalysisExecutor extends OpenCgaToolExecutor { @@ -160,6 +159,7 @@ protected void appendOtherParams(Set skipParams, StringBuilder sb) { protected void runCommandLine(String cmdline) throws ToolException { checkDockerDaemonAlive(); try { + setCommandLine(cmdline); new Command(cmdline) .setOutputOutputStream( new DataOutputStream(new FileOutputStream(getOutDir().resolve(STDOUT_FILENAME).toFile()))) @@ -215,6 +215,25 @@ public static List> getInputFilenames(String inputFile, Set return inputFilenames; } + public static String getStdErrMessage(String title, Path outPath) { + List errMessages = new ArrayList<>(); + errMessages.add(title); + + java.io.File stderrFile = outPath.resolve(DockerWrapperAnalysisExecutor.STDERR_FILENAME).toFile(); + if (Files.exists(stderrFile.toPath())) { + try { + errMessages.addAll(readLines(stderrFile, Charset.defaultCharset())); + } catch (IOException e) { + errMessages.add("It could not read the stderr file '" + stderrFile + "' to retrieve error messages:"); + errMessages.addAll(Arrays.stream(e.getStackTrace()).map(StackTraceElement::toString).collect(Collectors.toList())); + } + } else { + errMessages.add("The stderr file '" + stderrFile + "' does not exist in order to retrieve error messages."); + } + + return StringUtils.join(errMessages, "\n"); + } + protected static boolean skipParameter(String param) { switch (param) { case "opencgaHome": diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/fastqc/FastqcWrapperAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/fastqc/FastqcWrapperAnalysis.java index b829bc0d781..37962887253 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/fastqc/FastqcWrapperAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/fastqc/FastqcWrapperAnalysis.java @@ -32,16 +32,19 @@ @Tool(id = FastqcWrapperAnalysis.ID, resource = Enums.Resource.ALIGNMENT, description = FastqcWrapperAnalysis.DESCRIPTION) public class FastqcWrapperAnalysis extends OpenCgaToolScopeStudy { - public final static String ID = "fastqc"; - public final static String DESCRIPTION = "A high throughput sequence QC analysis tool"; + public static final String ID = "fastqc"; + public static final String DESCRIPTION = "A high throughput sequence QC analysis tool"; - public final static Set FILE_PARAM_NAMES = new HashSet<>(Arrays.asList("l", "limits", "a", "adapters", "c", "contaminants")); + protected static final Set FILE_PARAM_NAMES = new HashSet<>(Arrays.asList("l", "limits", "a", "adapters", "c", "contaminants")); + + public static final String FASTQC_DOCKER_CLI_KEY = "FASTQC_DOCKER_CLI"; @ToolParams protected final FastqcWrapperParams analysisParams = new FastqcWrapperParams(); private String inputFilePath = null; + @Override protected void check() throws Exception { super.check(); diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/fastqc/FastqcWrapperAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/fastqc/FastqcWrapperAnalysisExecutor.java index e13d9468d13..7d5b10ba2cd 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/fastqc/FastqcWrapperAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/fastqc/FastqcWrapperAnalysisExecutor.java @@ -3,27 +3,31 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.opencb.opencga.analysis.wrappers.executors.DockerWrapperAnalysisExecutor; +import org.opencb.opencga.core.exceptions.ToolException; import org.opencb.opencga.core.tools.annotations.ToolExecutor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.*; +import static org.opencb.opencga.analysis.wrappers.fastqc.FastqcWrapperAnalysis.FASTQC_DOCKER_CLI_KEY; + @ToolExecutor(id = FastqcWrapperAnalysisExecutor.ID, tool = FastqcWrapperAnalysis.ID, source = ToolExecutor.Source.STORAGE, framework = ToolExecutor.Framework.LOCAL) public class FastqcWrapperAnalysisExecutor extends DockerWrapperAnalysisExecutor { - public final static String ID = FastqcWrapperAnalysis.ID + "-local"; + public static final String ID = FastqcWrapperAnalysis.ID + "-local"; - private String study; private String inputFile; private Logger logger = LoggerFactory.getLogger(this.getClass()); @Override protected void run() throws Exception { + addStepParams(); + StringBuilder sb = initCommandLine(); // Append mounts @@ -46,17 +50,13 @@ protected void run() throws Exception { appendOtherParams(skipParams, sb); // Execute command and redirect stdout and stderr to the files - logger.info("Docker command line: " + sb.toString()); + logger.info("Docker command line: {}", sb); + addAttribute(FASTQC_DOCKER_CLI_KEY, sb); runCommandLine(sb.toString()); } - public String getStudy() { - return study; - } - - public FastqcWrapperAnalysisExecutor setStudy(String study) { - this.study = study; - return this; + private void addStepParams() throws ToolException { + addAttribute("INPUT_FILE", inputFile); } public String getInputFile() { diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/samtools/SamtoolsWrapperAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/samtools/SamtoolsWrapperAnalysis.java index 25dabac64b9..0f1e4efa373 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/samtools/SamtoolsWrapperAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/samtools/SamtoolsWrapperAnalysis.java @@ -48,6 +48,8 @@ public class SamtoolsWrapperAnalysis extends OpenCgaToolScopeStudy { public final static String DESCRIPTION = "Samtools is a program for interacting with high-throughput sequencing data in SAM, BAM" + " and CRAM formats. " + SAMTOOLS_COMMAND_DESCRIPTION; + public final static String SAMTOOlS_DOCKER_CLI_KEY = "SAMTOOLS_DOCKER_CLI"; + @ToolParams protected final SamtoolsWrapperParams analysisParams = new SamtoolsWrapperParams(); diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/samtools/SamtoolsWrapperAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/samtools/SamtoolsWrapperAnalysisExecutor.java index 67a7743df87..fbc984fea19 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/samtools/SamtoolsWrapperAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/samtools/SamtoolsWrapperAnalysisExecutor.java @@ -4,7 +4,6 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.opencb.opencga.analysis.wrappers.executors.DockerWrapperAnalysisExecutor; -import org.opencb.opencga.core.common.GitRepositoryState; import org.opencb.opencga.core.exceptions.ToolException; import org.opencb.opencga.core.tools.annotations.ToolExecutor; import org.slf4j.Logger; @@ -12,6 +11,8 @@ import java.util.*; +import static org.opencb.opencga.analysis.wrappers.samtools.SamtoolsWrapperAnalysis.SAMTOOlS_DOCKER_CLI_KEY; + @ToolExecutor(id = SamtoolsWrapperAnalysisExecutor.ID, tool = SamtoolsWrapperAnalysis.ID, source = ToolExecutor.Source.STORAGE, @@ -20,7 +21,6 @@ public class SamtoolsWrapperAnalysisExecutor extends DockerWrapperAnalysisExecut public final static String ID = SamtoolsWrapperAnalysis.ID + "-local"; - private String study; private String command; private String inputFile; @@ -28,6 +28,8 @@ public class SamtoolsWrapperAnalysisExecutor extends DockerWrapperAnalysisExecut @Override public void run() throws ToolException { + addStepParams(); + switch (command) { case "depth": runDepth(); @@ -82,7 +84,8 @@ private void runDepth() throws ToolException { appendOtherParams(skipParams, sb); // Execute command and redirect stdout and stderr to the files - logger.info("Docker command line: " + sb.toString()); + logger.info("Docker command line: {}", sb); + addAttribute(SAMTOOlS_DOCKER_CLI_KEY, sb); runCommandLine(sb.toString()); } @@ -120,7 +123,8 @@ private void runDict() throws ToolException { appendOtherParams(skipParams, sb); // Execute command and redirect stdout and stderr to the files - logger.info("Docker command line: " + sb.toString()); + logger.info("Docker command line: {}", sb); + addAttribute(SAMTOOlS_DOCKER_CLI_KEY, sb); runCommandLine(sb.toString()); } @@ -155,7 +159,8 @@ private void runView() throws ToolException { appendOtherParams(skipParams, sb); // Execute command and redirect stdout and stderr to the files - logger.info("Docker command line: " + sb.toString()); + logger.info("Docker command line: {}", sb); + addAttribute(SAMTOOlS_DOCKER_CLI_KEY, sb); runCommandLine(sb.toString()); } @@ -179,7 +184,8 @@ private void runIndex() throws ToolException { appendOtherParams(null, sb); // Execute command and redirect stdout and stderr to the files - logger.info("Docker command line: " + sb.toString()); + logger.info("Docker command line: {}", sb); + addAttribute(SAMTOOlS_DOCKER_CLI_KEY, sb); runCommandLine(sb.toString()); } @@ -215,7 +221,8 @@ private void runSort() throws ToolException { appendOtherParams(skipParams, sb); // Execute command and redirect stdout and stderr to the files - logger.info("Docker command line: " + sb.toString()); + logger.info("Docker command line: {}", sb); + addAttribute(SAMTOOlS_DOCKER_CLI_KEY, sb); runCommandLine(sb.toString()); } @@ -238,7 +245,8 @@ private void runFaidx() throws ToolException { appendOtherParams(null, sb); // Execute command and redirect stdout and stderr to the files - logger.info("Docker command line: " + sb.toString()); + logger.info("Docker command line: {}", sb); + addAttribute(SAMTOOlS_DOCKER_CLI_KEY, sb); runCommandLine(sb.toString()); } @@ -261,7 +269,8 @@ private void runStats() throws ToolException { appendOtherParams(null, sb); // Execute command and redirect stdout and stderr to the files - logger.info("Docker command line: " + sb.toString()); + logger.info("Docker command line: {}", sb); + addAttribute(SAMTOOlS_DOCKER_CLI_KEY, sb); runCommandLine(sb.toString()); } @@ -290,7 +299,8 @@ private void runPlotBamStats() throws ToolException { appendOtherParams(skipParams, sb); // Execute command and redirect stdout and stderr to the files - logger.info("Docker command line: " + sb.toString()); + logger.info("Docker command line: " + sb); + addAttribute(SAMTOOlS_DOCKER_CLI_KEY, sb); runCommandLine(sb.toString()); } @@ -315,17 +325,14 @@ private void runFlagstat() throws ToolException { appendOtherParams(skipParams, sb); // Execute command and redirect stdout and stderr to the files - logger.info("Docker command line: " + sb.toString()); + logger.info("Docker command line: {}", sb); + addAttribute(SAMTOOlS_DOCKER_CLI_KEY, sb); runCommandLine(sb.toString()); } - public String getStudy() { - return study; - } - - public SamtoolsWrapperAnalysisExecutor setStudy(String study) { - this.study = study; - return this; + private void addStepParams() throws ToolException { + addAttribute("COMMAND", command); + addAttribute("INPUT_FILE", inputFile); } public String getCommand() { diff --git a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/alignment/AlignmentAnalysisTest.java b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/alignment/AlignmentAnalysisTest.java index 9339ddf40e2..314257c3ad0 100644 --- a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/alignment/AlignmentAnalysisTest.java +++ b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/alignment/AlignmentAnalysisTest.java @@ -16,15 +16,21 @@ package org.opencb.opencga.analysis.alignment; +import org.apache.commons.lang3.StringUtils; import org.junit.*; import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.opencb.biodata.formats.alignment.samtools.SamtoolsFlagstats; +import org.opencb.biodata.formats.alignment.samtools.SamtoolsStats; +import org.opencb.biodata.formats.sequence.fastqc.FastQcMetrics; import org.opencb.biodata.models.clinical.Phenotype; import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.TestParamConstants; import org.opencb.opencga.analysis.alignment.qc.AlignmentGeneCoverageStatsAnalysis; +import org.opencb.opencga.analysis.alignment.qc.AlignmentQcAnalysis; import org.opencb.opencga.analysis.tools.ToolRunner; import org.opencb.opencga.analysis.variant.OpenCGATestExternalResource; import org.opencb.opencga.analysis.variant.manager.VariantStorageManager; @@ -35,14 +41,15 @@ import org.opencb.opencga.core.exceptions.ToolException; import org.opencb.opencga.core.models.alignment.AlignmentGeneCoverageStatsParams; import org.opencb.opencga.core.models.alignment.AlignmentIndexParams; +import org.opencb.opencga.core.models.alignment.AlignmentQcParams; import org.opencb.opencga.core.models.alignment.CoverageIndexParams; -import org.opencb.opencga.core.models.file.File; -import org.opencb.opencga.core.models.file.FileLinkParams; -import org.opencb.opencga.core.models.file.FileRelatedFile; +import org.opencb.opencga.core.models.file.*; import org.opencb.opencga.core.models.organizations.OrganizationCreateParams; import org.opencb.opencga.core.models.organizations.OrganizationUpdateParams; import org.opencb.opencga.core.models.user.User; import org.opencb.opencga.core.testclassification.duration.MediumTests; +import org.opencb.opencga.core.tools.result.ExecutionResult; +import org.opencb.opencga.core.tools.result.ToolStep; import org.opencb.opencga.storage.core.StorageEngineFactory; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine; @@ -55,8 +62,10 @@ import java.util.Arrays; import java.util.Collections; import java.util.Map; +import java.util.stream.Collectors; -import static org.junit.Assert.assertEquals; +import static org.junit.Assert.*; +import static org.opencb.opencga.core.models.alignment.AlignmentQcParams.*; @RunWith(Parameterized.class) @Category(MediumTests.class) @@ -81,6 +90,8 @@ public class AlignmentAnalysisTest { private static String cancer_sample = "AR2.10039966-01T"; private static String germline_sample = "AR2.10039966-01G"; + private String bamFilename = "HG00096.chrom20.small.bam"; + private String baiFilename = "HG00096.chrom20.small.bam.bai"; @Parameterized.Parameters(name = "{0}") public static Object[][] parameters() { @@ -140,75 +151,10 @@ public void setUp() throws Throwable { setUpCatalogManager(); -// file = opencga.createFile(STUDY, "variant-test-file.vcf.gz", token); -// variantStorageManager.index(STUDY, file.getId(), opencga.createTmpOutdir("_index"), new ObjectMap(VariantStorageOptions.ANNOTATE.key(), true), token); - -// for (int i = 0; i < file.getSampleIds().size(); i++) { -// String id = file.getSampleIds().get(i); -// if (id.equals(son)) { -// SampleUpdateParams updateParams = new SampleUpdateParams().setSomatic(true); -// catalogManager.getSampleManager().update(STUDY, id, updateParams, null, token); -// } -// if (i % 2 == 0) { -// SampleUpdateParams updateParams = new SampleUpdateParams().setPhenotypes(Collections.singletonList(PHENOTYPE)); -// catalogManager.getSampleManager().update(STUDY, id, updateParams, null, token); -// } -// } - -// catalogManager.getCohortManager().create(STUDY, new CohortCreateParams().setId("c1") -// .setSamples(file.getSampleIds().subList(0, 2).stream().map(s -> new SampleReferenceParam().setId(s)).collect(Collectors.toList())), -// null, null, null, token); -// catalogManager.getCohortManager().create(STUDY, new CohortCreateParams().setId("c2") -// .setSamples(file.getSampleIds().subList(2, 4).stream().map(s -> new SampleReferenceParam().setId(s)).collect(Collectors.toList())), -// null, null, null, token); - -// Phenotype phenotype = new Phenotype("phenotype", "phenotype", ""); -// Disorder disorder = new Disorder("disorder", "disorder", "", "", Collections.singletonList(phenotype), Collections.emptyMap()); -// List individuals = new ArrayList<>(4); -// -// // Father -// individuals.add(catalogManager.getIndividualManager() -// .create(STUDY, new Individual(father, father, new Individual(), new Individual(), new Location(), SexOntologyTermAnnotation.initMale(), null, null, null, null, "", -// Collections.emptyList(), false, 0, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), IndividualInternal.init(), Collections.emptyMap()), Collections.singletonList(father), new QueryOptions(ParamConstants.INCLUDE_RESULT_PARAM, true), token).first()); -// // Mother -// individuals.add(catalogManager.getIndividualManager() -// .create(STUDY, new Individual(mother, mother, new Individual(), new Individual(), new Location(), SexOntologyTermAnnotation.initFemale(), null, null, null, null, "", -// Collections.emptyList(), false, 0, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), IndividualInternal.init(), Collections.emptyMap()), Collections.singletonList(mother), new QueryOptions(ParamConstants.INCLUDE_RESULT_PARAM, true), token).first()); -// // Son -// individuals.add(catalogManager.getIndividualManager() -// .create(STUDY, new Individual(son, son, new Individual(), new Individual(), new Location(), SexOntologyTermAnnotation.initMale(), null, null, null, null, "", -// Collections.emptyList(), false, 0, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), IndividualInternal.init(), Collections.emptyMap()).setFather(individuals.get(0)).setMother(individuals.get(1)).setDisorders(Collections.singletonList(disorder)), Collections.singletonList(son), new QueryOptions(ParamConstants.INCLUDE_RESULT_PARAM, true), token).first()); -// // Daughter -// individuals.add(catalogManager.getIndividualManager() -// .create(STUDY, new Individual(daughter, daughter, new Individual(), new Individual(), new Location(), SexOntologyTermAnnotation.initFemale(), null, null, null, null, "", -// Collections.emptyList(), false, 0, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), IndividualInternal.init(), Collections.emptyMap()).setFather(individuals.get(0)).setMother(individuals.get(1)), Collections.singletonList(daughter), new QueryOptions(ParamConstants.INCLUDE_RESULT_PARAM, true), token).first()); -// catalogManager.getFamilyManager().create( -// STUDY, -// new Family("f1", "f1", Collections.singletonList(phenotype), Collections.singletonList(disorder), null, null, 3, null, null), -// individuals.stream().map(Individual::getId).collect(Collectors.toList()), new QueryOptions(), -// token); -// -// // Cancer (SV) -// ObjectMap config = new ObjectMap(); -//// config.put(VariantStorageOptions.ANNOTATE.key(), true); -// config.put(VariantStorageOptions.LOAD_SPLIT_DATA.key(), VariantStorageEngine.SplitData.MULTI); -// -// file = opencga.createFile(CANCER_STUDY, "AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz", token); -// variantStorageManager.index(CANCER_STUDY, file.getId(), opencga.createTmpOutdir("_index"), config, token); -// file = opencga.createFile(CANCER_STUDY, "AR2.10039966-01T.copynumber.caveman.vcf.gz", token); -// variantStorageManager.index(CANCER_STUDY, file.getId(), opencga.createTmpOutdir("_index"), config, token); -// file = opencga.createFile(CANCER_STUDY, "AR2.10039966-01T_vs_AR2.10039966-01G.annot.pindel.vcf.gz", token); -// variantStorageManager.index(CANCER_STUDY, file.getId(), opencga.createTmpOutdir("_index"), config, token); -// -// SampleUpdateParams updateParams = new SampleUpdateParams().setSomatic(true); -// catalogManager.getSampleManager().update(CANCER_STUDY, cancer_sample, updateParams, null, token); - opencga.getStorageConfiguration().getVariant().setDefaultEngine(storageEngine); VariantStorageEngine engine = opencga.getStorageEngineFactory().getVariantStorageEngine(storageEngine, DB_NAME); -// if (storageEngine.equals(HadoopVariantStorageEngine.STORAGE_ENGINE_ID)) { -// VariantHbaseTestUtils.printVariants(((VariantHadoopDBAdaptor) engine.getDBAdaptor()), Paths.get(opencga.createTmpOutdir("_hbase_print_variants")).toUri()); -// } } + // Reset engines opencga.getStorageEngineFactory().close(); catalogManager = opencga.getCatalogManager(); @@ -225,7 +171,7 @@ public static void afterClass() { opencga.after(); } - public void setUpCatalogManager() throws CatalogException { + public void setUpCatalogManager() throws CatalogException, IOException { catalogManager.getOrganizationManager().create(new OrganizationCreateParams().setId("test"), null, opencga.getAdminToken()); catalogManager.getUserManager().create(new User().setId(USER).setName("User Name").setEmail("mail@ebi.ac.uk").setOrganization("test"), PASSWORD, opencga.getAdminToken()); @@ -237,28 +183,12 @@ public void setUpCatalogManager() throws CatalogException { null, "GRCh38", new QueryOptions(ParamConstants.INCLUDE_RESULT_PARAM, true), token).first().getId(); catalogManager.getStudyManager().create(projectId, STUDY, null, "Phase 1", "Done", null, null, null, null, null, token); - // Create 10 samples not indexed -// for (int i = 0; i < 10; i++) { -// Sample sample = new Sample().setId("SAMPLE_" + i); -// if (i % 2 == 0) { -// sample.setPhenotypes(Collections.singletonList(PHENOTYPE)); -// } -// catalogManager.getSampleManager().create(STUDY, sample, null, token); -// } -// -// // Cancer -// List samples = new ArrayList<>(); -// catalogManager.getStudyManager().create(projectId, CANCER_STUDY, null, "Phase 1", "Done", null, null, null, null, null, token); -// Sample sample = new Sample().setId(cancer_sample).setSomatic(true); -// samples.add(sample); -//// catalogManager.getSampleManager().create(CANCER_STUDY, sample, null, token); -// sample = new Sample().setId(germline_sample); -// samples.add(sample); -//// catalogManager.getSampleManager().create(CANCER_STUDY, sample, null, token); -// Individual individual = catalogManager.getIndividualManager() -// .create(CANCER_STUDY, new Individual("AR2.10039966-01", "AR2.10039966-01", new Individual(), new Individual(), new Location(), SexOntologyTermAnnotation.initMale(), null, null, null, null, "", -// samples, false, 0, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), IndividualInternal.init(), Collections.emptyMap()), Collections.emptyList(), new QueryOptions(ParamConstants.INCLUDE_RESULT_PARAM, true), token).first(); -// assertEquals(2, individual.getSamples().size()); + + // BAM and BAI files + catalogManager.getFileManager().link(STUDY, new FileLinkParams(opencga.getResourceUri("biofiles/" + bamFilename).toString(), + "", "", "", null, null, null, null, null), false, token).first(); + catalogManager.getFileManager().link(STUDY, new FileLinkParams(opencga.getResourceUri("biofiles/" + baiFilename).toString(), + "", "", "", null, null, null, null, null), false, token).first(); } @Test @@ -266,12 +196,13 @@ public void geneCoverageStatsTest() throws IOException, ToolException, CatalogEx Path outdir = Paths.get(opencga.createTmpOutdir("_genecoveragestats")); // setup BAM files - String bamFilename = opencga.getResourceUri("biofiles/HG00096.chrom20.small.bam").toString(); - String baiFilename = opencga.getResourceUri("biofiles/HG00096.chrom20.small.bam.bai").toString(); +// String bamFilename = opencga.getResourceUri("biofiles/HG00096.chrom20.small.bam").toString(); +// String baiFilename = opencga.getResourceUri("biofiles/HG00096.chrom20.small.bam.bai").toString(); //String bamFilename = getClass().getResource("/biofiles/NA19600.chrom20.small.bam").getFile(); - File bamFile = catalogManager.getFileManager().link(STUDY, new FileLinkParams(bamFilename, "", "", "", null, null, null, - null, null), false, token).first(); - assertEquals(0, bamFile.getQualityControl().getCoverage().getGeneCoverageStats().size()); +// File bamFile = catalogManager.getFileManager().link(STUDY, new FileLinkParams(bamFilename, "", "", "", null, null, null, +// null, null), false, token).first(); + File bamFile = getCatalogFile(bamFilename); + assertEquals(0, bamFile.getQualityControl().getCoverage().getGeneCoverageStats().size()); AlignmentGeneCoverageStatsParams params = new AlignmentGeneCoverageStatsParams(); params.setBamFile(bamFile.getId()); @@ -280,13 +211,254 @@ public void geneCoverageStatsTest() throws IOException, ToolException, CatalogEx toolRunner.execute(AlignmentGeneCoverageStatsAnalysis.class, params, new ObjectMap(), outdir, "coverage-job-id", false, token); - bamFile = catalogManager.getFileManager().link(STUDY, new FileLinkParams(bamFilename, "", "", "", null, null, null, - null, null), false, token).first(); + bamFile = getCatalogFile(bamFilename); assertEquals(1, bamFile.getQualityControl().getCoverage().getGeneCoverageStats().size()); assertEquals(geneName, bamFile.getQualityControl().getCoverage().getGeneCoverageStats().get(0).getGeneName()); assertEquals(10, bamFile.getQualityControl().getCoverage().getGeneCoverageStats().get(0).getStats().size()); } + @Test + public void testAlignmentQc() throws IOException, ToolException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_alignment_qc")); + + File bamFile = getCatalogFile(bamFilename); + resetAlignemntQc(bamFile); + + AlignmentQcParams params = new AlignmentQcParams(); + params.setBamFile(bamFile.getId()); + + ExecutionResult executionResult = toolRunner.execute(AlignmentQcAnalysis.class, params, new ObjectMap(), outDir, null, false, token); + assertTrue(executionResult.getSteps().stream().map(ToolStep::getId).collect(Collectors.toList()).contains(AlignmentQcAnalysis.SAMTOOLS_FLAGSTATS_STEP)); + assertTrue(executionResult.getSteps().stream().map(ToolStep::getId).collect(Collectors.toList()).contains(AlignmentQcAnalysis.SAMTOOLS_STATS_STEP)); + assertTrue(executionResult.getSteps().stream().map(ToolStep::getId).collect(Collectors.toList()).contains(AlignmentQcAnalysis.PLOT_BAMSTATS_STEP)); + assertTrue(executionResult.getSteps().stream().map(ToolStep::getId).collect(Collectors.toList()).contains(AlignmentQcAnalysis.FASTQC_METRICS_STEP)); + + // Check + bamFile = catalogManager.getFileManager().get(STUDY, bamFile.getId(), QueryOptions.empty(), token).first(); + checkSamtoolsFlagstats(bamFile.getQualityControl().getAlignment().getSamtoolsFlagStats()); + checkSamtoolsStats(bamFile.getQualityControl().getAlignment().getSamtoolsStats()); + checkFastQcMetrics(bamFile.getQualityControl().getAlignment().getFastQcMetrics()); + System.out.println("outdir = " + outDir); + } + + @Test + public void testAlignmentQcSamtoolsFlagstat() throws IOException, ToolException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_alignment_qc_samtools_flagstat")); + + File bamFile = getCatalogFile(bamFilename); + resetAlignemntQc(bamFile); + System.out.println("bamFile.getQualityControl().getAlignment() = " + bamFile.getQualityControl().getAlignment()); + + AlignmentQcParams params = new AlignmentQcParams(); + params.setBamFile(bamFile.getId()); + params.setSkip(StringUtils.join(Arrays.asList(STATS_SKIP_VALUE, FASTQC_METRICS_SKIP_VALUE), ",")); + + ExecutionResult executionResult = toolRunner.execute(AlignmentQcAnalysis.class, STUDY, params, outDir, null, false, token); + assertTrue(executionResult.getSteps().stream().map(ToolStep::getId).collect(Collectors.toList()).contains(AlignmentQcAnalysis.SAMTOOLS_FLAGSTATS_STEP)); + + // Check + bamFile = catalogManager.getFileManager().get(STUDY, bamFile.getId(), QueryOptions.empty(), token).first(); + checkSamtoolsFlagstats(bamFile.getQualityControl().getAlignment().getSamtoolsFlagStats()); + assertEquals(null, bamFile.getQualityControl().getAlignment().getSamtoolsStats()); + assertEquals(null, bamFile.getQualityControl().getAlignment().getFastQcMetrics()); + System.out.println("outdir = " + outDir); + } + + @Test + public void testAlignmentQcSamtoolsStatsPlots() throws IOException, ToolException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_alignment_qc_samtools_stats_plots")); + + File bamFile = getCatalogFile(bamFilename); + resetAlignemntQc(bamFile); + System.out.println("bamFile.getQualityControl().getAlignment() = " + bamFile.getQualityControl().getAlignment()); + + AlignmentQcParams params = new AlignmentQcParams(); + params.setBamFile(bamFile.getId()); + params.setSkip(StringUtils.join(Arrays.asList(FLAGSTATS_SKIP_VALUE, FASTQC_METRICS_SKIP_VALUE), ",")); + + ExecutionResult executionResult = toolRunner.execute(AlignmentQcAnalysis.class, STUDY, params, outDir, null, false, token); + assertTrue(executionResult.getSteps().stream().map(ToolStep::getId).collect(Collectors.toList()).contains(AlignmentQcAnalysis.SAMTOOLS_STATS_STEP)); + assertTrue(executionResult.getSteps().stream().map(ToolStep::getId).collect(Collectors.toList()).contains(AlignmentQcAnalysis.PLOT_BAMSTATS_STEP)); + + // Check + bamFile = catalogManager.getFileManager().get(STUDY, bamFile.getId(), QueryOptions.empty(), token).first(); + checkSamtoolsStats(bamFile.getQualityControl().getAlignment().getSamtoolsStats()); + assertEquals(null, bamFile.getQualityControl().getAlignment().getSamtoolsFlagStats()); + assertEquals(null, bamFile.getQualityControl().getAlignment().getFastQcMetrics()); + System.out.println("outdir = " + outDir); + } + + @Test + public void testAlignmentQcFastqc() throws IOException, ToolException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_alignment_qc_fastqc")); + + File bamFile = getCatalogFile(bamFilename); + resetAlignemntQc(bamFile); + + AlignmentQcParams params = new AlignmentQcParams(); + params.setBamFile(bamFile.getId()); + params.setSkip(StringUtils.join(Arrays.asList(STATS_SKIP_VALUE, FLAGSTATS_SKIP_VALUE), ",")); + + ExecutionResult executionResult = toolRunner.execute(AlignmentQcAnalysis.class, STUDY, params, outDir, null, false, token); + assertTrue(executionResult.getSteps().stream().map(ToolStep::getId).collect(Collectors.toList()).contains(AlignmentQcAnalysis.FASTQC_METRICS_STEP)); + + // Check + bamFile = catalogManager.getFileManager().get(STUDY, bamFile.getId(), QueryOptions.empty(), token).first(); + assertEquals(null, bamFile.getQualityControl().getAlignment().getSamtoolsStats()); + assertEquals(null, bamFile.getQualityControl().getAlignment().getSamtoolsFlagStats()); + checkFastQcMetrics(bamFile.getQualityControl().getAlignment().getFastQcMetrics()); + System.out.println("outdir = " + outDir); + } + + @Test + public void testAlignmentQcFastqcAndOverwrite() throws IOException, ToolException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_alignment_qc_fastqc_and_overwrite")); + + File bamFile = getCatalogFile(bamFilename); + resetAlignemntQc(bamFile); + + AlignmentQcParams params = new AlignmentQcParams(); + params.setBamFile(bamFile.getId()); + params.setSkip(StringUtils.join(Arrays.asList(STATS_SKIP_VALUE, FLAGSTATS_SKIP_VALUE), ",")); + + ExecutionResult executeResult = toolRunner.execute(AlignmentQcAnalysis.class, STUDY, params, outDir, null, false, token); + assertTrue(executeResult.getSteps().stream().map(ToolStep::getId).collect(Collectors.toList()).contains(AlignmentQcAnalysis.FASTQC_METRICS_STEP)); + + outDir = Paths.get(opencga.createTmpOutdir("_alignment_qc_fastqc_overwrite_and_overwrite_2")); + params.setOverwrite(true); + executeResult = toolRunner.execute(AlignmentQcAnalysis.class, STUDY, params, outDir, null, false, token); + assertTrue(executeResult.getSteps().stream().map(ToolStep::getId).collect(Collectors.toList()).contains(AlignmentQcAnalysis.FASTQC_METRICS_STEP)); + + // Check + bamFile = catalogManager.getFileManager().get(STUDY, bamFile.getId(), QueryOptions.empty(), token).first(); + assertEquals(null, bamFile.getQualityControl().getAlignment().getSamtoolsStats()); + assertEquals(null, bamFile.getQualityControl().getAlignment().getSamtoolsFlagStats()); + checkFastQcMetrics(bamFile.getQualityControl().getAlignment().getFastQcMetrics()); + System.out.println("outdir = " + outDir); + } + + @Test + public void testAlignmentQcFastqcAndDoNotOverwrite() throws IOException, ToolException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_alignment_qc_fastqc_and_do_not_overwrite")); + + File bamFile = getCatalogFile(bamFilename); + resetAlignemntQc(bamFile); + + AlignmentQcParams params = new AlignmentQcParams(); + params.setBamFile(bamFile.getId()); + params.setSkip(StringUtils.join(Arrays.asList(STATS_SKIP_VALUE, FLAGSTATS_SKIP_VALUE), ",")); + + ExecutionResult executeResult = toolRunner.execute(AlignmentQcAnalysis.class, STUDY, params, outDir, null, false, token); + assertTrue(executeResult.getSteps().stream().map(ToolStep::getId).collect(Collectors.toList()).contains(AlignmentQcAnalysis.FASTQC_METRICS_STEP)); + + // Check + bamFile = catalogManager.getFileManager().get(STUDY, bamFile.getId(), QueryOptions.empty(), token).first(); + assertEquals(null, bamFile.getQualityControl().getAlignment().getSamtoolsStats()); + assertEquals(null, bamFile.getQualityControl().getAlignment().getSamtoolsFlagStats()); + checkFastQcMetrics(bamFile.getQualityControl().getAlignment().getFastQcMetrics()); + + outDir = Paths.get(opencga.createTmpOutdir("_alignment_qc_fastqc_and_do_not_overwrite_2")); + + executeResult = toolRunner.execute(AlignmentQcAnalysis.class, STUDY, params, outDir, null, false, token); + assertFalse(executeResult.getSteps().stream().map(ToolStep::getId).collect(Collectors.toList()).contains(AlignmentQcAnalysis.FASTQC_METRICS_STEP)); + + System.out.println("outdir = " + outDir); + } + + @Test + public void testFailureOnAlignmentQcSamtoolsFlagstat() throws IOException, ToolException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_failure_on_alignment_qc_samtools_flagstat")); + + File bamFile = getCatalogFile(bamFilename); + + Path tmpDir = Files.createDirectories(outDir.resolve("tmp")); + if (!Files.exists(tmpDir)) { + throw new IOException("It could not create the directory " + tmpDir); + } + Path newBamFilePath = Files.copy(Paths.get(bamFile.getUri()), tmpDir.resolve(bamFile.getName())); + File newBamFile = catalogManager.getFileManager().link(STUDY, new FileLinkParams(newBamFilePath.toString(), "bam_to_file_on_flagstats", "", "", null, null, null, + null, null), true, token).first(); + + Paths.get(newBamFile.getUri()).toFile().delete(); + + AlignmentQcParams params = new AlignmentQcParams(); + params.setBamFile(newBamFile.getId()); + params.setSkip(StringUtils.join(Arrays.asList(STATS_SKIP_VALUE, FASTQC_METRICS_SKIP_VALUE), ",")); + + ExecutionResult executionResult; + try { + System.out.println("outdir = " + outDir); + executionResult = toolRunner.execute(AlignmentQcAnalysis.class, STUDY, params, outDir, null, false, token); + } catch (ToolException e) { + assertTrue(e.getMessage().contains("Cannot open input file")); + return; + } + fail(); + } + + @Test + public void testFailureOnAlignmentQcSamtoolsStats() throws IOException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_failure_on_alignment_qc_samtools_stats")); + + File bamFile = getCatalogFile(bamFilename); + + Path tmpDir = Files.createDirectories(outDir.resolve("tmp")); + if (!Files.exists(tmpDir)) { + throw new IOException("It could not create the directory " + tmpDir); + } + Path newBamFilePath = Files.copy(Paths.get(bamFile.getUri()), tmpDir.resolve(bamFile.getName())); + File newBamFile = catalogManager.getFileManager().link(STUDY, new FileLinkParams(newBamFilePath.toString(), "bam_to_file_on_stats", "", "", null, null, null, + null, null), true, token).first(); + + Paths.get(newBamFile.getUri()).toFile().delete(); + + AlignmentQcParams params = new AlignmentQcParams(); + params.setBamFile(newBamFile.getId()); + params.setSkip(StringUtils.join(Arrays.asList(FLAGSTATS_SKIP_VALUE, FASTQC_METRICS_SKIP_VALUE), ",")); + + ExecutionResult executionResult; + try { + System.out.println("outdir = " + outDir); + executionResult = toolRunner.execute(AlignmentQcAnalysis.class, STUDY, params, outDir, null, false, token); + } catch (ToolException e) { + assertTrue(e.getMessage().contains("No such file or directory")); + return; + } + fail(); + } + + @Test + public void testFailureOnAlignmentQcFastQc() throws IOException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_failure_on_alignment_qc_fastqc")); + + File bamFile = getCatalogFile(bamFilename); + + Path tmpDir = Files.createDirectories(outDir.resolve("tmp")); + if (!Files.exists(tmpDir)) { + throw new IOException("It could not create the directory " + tmpDir); + } + Path newBamFilePath = Files.copy(Paths.get(bamFile.getUri()), tmpDir.resolve(bamFile.getName())); + File newBamFile = catalogManager.getFileManager().link(STUDY, new FileLinkParams(newBamFilePath.toString(), "bam_to_file_on_fastqc", "", "", null, null, null, + null, null), true, token).first(); + + Paths.get(newBamFile.getUri()).toFile().delete(); + + AlignmentQcParams params = new AlignmentQcParams(); + params.setBamFile(newBamFile.getId()); + params.setSkip(StringUtils.join(Arrays.asList(STATS_SKIP_VALUE, FLAGSTATS_SKIP_VALUE), ",")); + + ExecutionResult executionResult; + try { + System.out.println("outdir = " + outDir); + executionResult = toolRunner.execute(AlignmentQcAnalysis.class, STUDY, params, outDir, null, false, token); + } catch (ToolException e) { + assertTrue(e.getMessage().contains("which didn't exist")); + System.out.println("e.getMessage() = " + e.getMessage()); + return; + } + fail(); + } + @Test public void testNonReadOnlyAlignmentIndex() throws Exception { Path nonReadOnlyDir = Paths.get(opencga.createTmpOutdir("_non_readonly_alignment_index")); @@ -432,4 +604,65 @@ public void testReadOnlyCoverageIndex() throws Exception { Runtime.getRuntime().exec("chmod 777 " + readOnlyDir.toAbsolutePath()); } + + //------------------------------------------------------------------------- + // U T I L S + //------------------------------------------------------------------------- + + private void checkSamtoolsStats(SamtoolsStats stats) { + System.out.println("stats = " + stats); + assertTrue(stats != null); + assertEquals(108, stats.getSequences()); + assertEquals(55, stats.getLastFragments()); + assertEquals(0, stats.getReadsDuplicated()); + assertEquals(0, stats.getReadsQcFailed()); + assertEquals(10800, stats.getTotalLength()); + assertEquals(10047, stats.getBasesMappedCigar()); + assertEquals(49, stats.getMismatches()); + assertEquals(31.0, stats.getAverageQuality(), 0.001f); + assertEquals(1, stats.getFiles().stream().filter(n -> n.endsWith("quals.png")).collect(Collectors.toList()).size()); + assertEquals(1, stats.getFiles().stream().filter(n -> n.endsWith("quals3.png")).collect(Collectors.toList()).size()); + assertEquals(1, stats.getFiles().stream().filter(n -> n.endsWith("coverage.png")).collect(Collectors.toList()).size()); + assertEquals(1, stats.getFiles().stream().filter(n -> n.endsWith("insert-size.png")).collect(Collectors.toList()).size()); + assertEquals(1, stats.getFiles().stream().filter(n -> n.endsWith("gc-content.png")).collect(Collectors.toList()).size()); + assertEquals(1, stats.getFiles().stream().filter(n -> n.endsWith("acgt-cycles.png")).collect(Collectors.toList()).size()); + assertEquals(1, stats.getFiles().stream().filter(n -> n.endsWith("quals2.png")).collect(Collectors.toList()).size()); + } + + private void checkSamtoolsFlagstats(SamtoolsFlagstats flagstats) { + System.out.println("flagstats = " + flagstats); + assertTrue(flagstats != null); + assertEquals(108, flagstats.getTotalReads()); + assertEquals(0, flagstats.getSecondaryAlignments()); + assertEquals(53, flagstats.getRead1()); + assertEquals(55, flagstats.getRead2()); + assertEquals(104, flagstats.getProperlyPaired()); + } + + private void checkFastQcMetrics(FastQcMetrics metrics) { + System.out.println("metrics = " + metrics); + assertTrue(metrics != null); + assertEquals("PASS", metrics.getSummary().getBasicStatistics()); + assertEquals("FAIL", metrics.getSummary().getPerSeqGcContent()); + assertEquals("WARN", metrics.getSummary().getOverrepresentedSeqs()); + assertEquals(7, metrics.getBasicStats().size()); + assertEquals("108", metrics.getBasicStats().get("Total Sequences")); + assertEquals("100", metrics.getBasicStats().get("Sequence length")); + assertEquals("46", metrics.getBasicStats().get("%GC")); + assertEquals(8, metrics.getFiles().size()); + assertEquals(1, metrics.getFiles().stream().filter(n -> n.endsWith("per_sequence_quality.png")).collect(Collectors.toList()).size()); + assertEquals(1, metrics.getFiles().stream().filter(n -> n.endsWith("duplication_levels.png")).collect(Collectors.toList()).size()); + assertEquals(1, metrics.getFiles().stream().filter(n -> n.endsWith("per_base_quality.png")).collect(Collectors.toList()).size()); + assertEquals(1, metrics.getFiles().stream().filter(n -> n.endsWith("adapter_content.png")).collect(Collectors.toList()).size()); + } + + private File getCatalogFile(String name) throws CatalogException { + return catalogManager.getFileManager().search(STUDY, new Query("name", name), QueryOptions.empty(), token).first(); + } + + private void resetAlignemntQc(File bamFile) throws CatalogException { + FileUpdateParams updateParams = new FileUpdateParams(); + updateParams.setQualityControl(new FileQualityControl()); + catalogManager.getFileManager().update(STUDY, new Query("id", bamFile.getId()), updateParams, QueryOptions.empty(), token); + } } \ No newline at end of file diff --git a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/OpenCGATestExternalResource.java b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/OpenCGATestExternalResource.java index 5c5956d7cf7..c5d1eb69b2c 100644 --- a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/OpenCGATestExternalResource.java +++ b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/OpenCGATestExternalResource.java @@ -250,12 +250,17 @@ public Path isolateOpenCGA() throws IOException { // Files.copy(inputStream, opencgaHome.resolve("examples") // .resolve("1k.chr1.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"), StandardCopyOption.REPLACE_EXISTING); - // Mutational signatue analysis + // Mutational signature analysis files Path analysisPath = Files.createDirectories(opencgaHome.resolve("analysis/mutational-signature")).toAbsolutePath(); inputStream = new FileInputStream("../opencga-app/app/analysis/mutational-signature/sv_clustering.R"); Files.copy(inputStream, analysisPath.resolve("sv_clustering.R"), StandardCopyOption.REPLACE_EXISTING); - // Pedigree graph analysis + // Genome plot analysis files + analysisPath = Files.createDirectories(opencgaHome.resolve("analysis/genome-plot")).toAbsolutePath(); + inputStream = new FileInputStream("../opencga-app/app/analysis/genome-plot/circos.R"); + Files.copy(inputStream, analysisPath.resolve("circos.R"), StandardCopyOption.REPLACE_EXISTING); + + // Pedigree graph analysis files analysisPath = Files.createDirectories(opencgaHome.resolve("analysis/pedigree-graph")).toAbsolutePath(); inputStream = new FileInputStream("../opencga-app/app/analysis/pedigree-graph/ped.R"); Files.copy(inputStream, analysisPath.resolve("ped.R"), StandardCopyOption.REPLACE_EXISTING); diff --git a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java index c91909d3abb..4dbea240a4f 100644 --- a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java +++ b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java @@ -18,6 +18,7 @@ import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.mutable.MutableInt; import org.hamcrest.CoreMatchers; import org.junit.*; @@ -35,11 +36,13 @@ import org.opencb.biodata.models.variant.StudyEntry; import org.opencb.biodata.models.variant.avro.VariantType; import org.opencb.biodata.models.variant.metadata.SampleVariantStats; +import org.opencb.commons.datastore.core.Event; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.TestParamConstants; import org.opencb.opencga.analysis.clinical.ClinicalAnalysisLoadTask; +import org.opencb.opencga.analysis.sample.qc.SampleQcAnalysis; import org.opencb.opencga.analysis.tools.ToolRunner; import org.opencb.opencga.analysis.variant.gwas.GwasAnalysis; import org.opencb.opencga.analysis.variant.hrdetect.HRDetectAnalysis; @@ -131,8 +134,10 @@ public class VariantAnalysisTest { private static String daughter = "NA19600"; public static final String CANCER_STUDY = "cancer"; - private static String cancer_sample = "AR2.10039966-01T"; - private static String germline_sample = "AR2.10039966-01G"; + private static String cancer_sample = "HCC1954"; // "AR2.10039966-01T"; + private static String germline_sample = "HCC1954BL"; //""AR2.10039966-01G"; + + private static String genomePlotConfigFilename = "genome-plot-config.json"; @Rule public ExpectedException thrown = ExpectedException.none(); @@ -235,16 +240,21 @@ public void setUp() throws Throwable { config.put(VariantStorageOptions.LOAD_SPLIT_DATA.key(), VariantStorageEngine.SplitData.MULTI); File file; - file = opencga.createFile(CANCER_STUDY, "AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz", token); + file = opencga.createFile(CANCER_STUDY, "cancer-rearrs.vcf.gz", token); + variantStorageManager.index(CANCER_STUDY, file.getId(), opencga.createTmpOutdir("_index"), config, token); + file = opencga.createFile(CANCER_STUDY, "cancer-cnvs.vcf.gz", token); variantStorageManager.index(CANCER_STUDY, file.getId(), opencga.createTmpOutdir("_index"), config, token); - file = opencga.createFile(CANCER_STUDY, "AR2.10039966-01T.copynumber.caveman.vcf.gz", token); + file = opencga.createFile(CANCER_STUDY, "cancer-indels.vcf.gz", token); variantStorageManager.index(CANCER_STUDY, file.getId(), opencga.createTmpOutdir("_index"), config, token); - file = opencga.createFile(CANCER_STUDY, "AR2.10039966-01T_vs_AR2.10039966-01G.annot.pindel.vcf.gz", token); + file = opencga.createFile(CANCER_STUDY, "cancer-snvs.vcf.gz", token); variantStorageManager.index(CANCER_STUDY, file.getId(), opencga.createTmpOutdir("_index"), config, token); SampleUpdateParams updateParams = new SampleUpdateParams().setSomatic(true); catalogManager.getSampleManager().update(CANCER_STUDY, cancer_sample, updateParams, null, token); + opencga.createFile(CANCER_STUDY, genomePlotConfigFilename, token); + assertEquals(genomePlotConfigFilename, catalogManager.getFileManager().get(CANCER_STUDY, genomePlotConfigFilename, QueryOptions.empty(), token).first().getName()); + opencga.getStorageConfiguration().getVariant().setDefaultEngine(storageEngine); VariantStorageEngine engine = opencga.getStorageEngineFactory().getVariantStorageEngine(storageEngine, DB_NAME); if (storageEngine.equals(HadoopVariantStorageEngine.STORAGE_ENGINE_ID)) { @@ -835,19 +845,8 @@ public void testMutationalSignatureCatalogueSV() throws Exception { VariantQuery query = new VariantQuery() .sample(cancer_sample) .type(VariantType.SV.name()) - //.file("AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz"); - .fileData("AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz:BAS>=0;BKDIST>=-1") .region("1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y"); - //https://ws.opencb.org/opencga-test/webservices/rest/v2/analysis/variant/mutationalSignature/query - // ?study=serena@cancer38:test38 - // &fitting=false - // &sample=AR2.10039966-01T - // &fileData=AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz:BAS>=0;BKDIST>=-1;EXT_PS_SOM>=4;EXT_RC_SOM>=0 - // ®ion=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y - // &type=SV - - params.setQuery(query.toJson()); params.setSkip("fitting"); @@ -1029,7 +1028,7 @@ public void testHRDetect() throws Exception { System.out.println("\t" + entry.getKey() + ": " + entry.getValue()); } if (hrDetect.getScores().containsKey("del.mh.prop")) { - Assert.assertEquals(-1.5702984, hrDetect.getScores().getFloat("del.mh.prop"), 0.00001f); + Assert.assertEquals(-2.6106846, hrDetect.getScores().getFloat("del.mh.prop"), 0.00001f); return; } } @@ -1114,6 +1113,262 @@ public void testClinicalAnalysisLoading() throws IOException, ToolException, Cat Assert.assertEquals(ca2Id, clinicalAnalysis.getId()); } + @Test + public void testSampleQcVS() throws IOException, ToolException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_sample_qc_variant_stats")); + + String sampleId = file.getSampleIds().get(0); + Sample sample = catalogManager.getSampleManager().get(STUDY, sampleId, new QueryOptions(), token).first(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + + SampleQcAnalysisParams params = new SampleQcAnalysisParams(); + params.setSample(sampleId); + params.setVsQuery(new AnnotationVariantQueryParams().setRegion("1,2")); + params.setVsId("regions-1-2"); + params.setVsDescription("Sample variant stats on regions 1,2"); + params.setSkip(SampleQcAnalysisParams.SIGNATURE_SKIP_VALUE + "," + SampleQcAnalysisParams.GENOME_PLOT_SKIP_VALUE); + + ExecutionResult result = toolRunner.execute(SampleQcAnalysis.class, STUDY, params, outDir, null, false, token); + System.out.println("outDir = " + outDir); + + sample = catalogManager.getSampleManager().get(STUDY, sampleId, new QueryOptions(), token).first(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + assertTrue(sample.getQualityControl().getVariant().getVariantStats().stream().map(SampleQcVariantStats::getId).collect(Collectors.toList()).contains(params.getVsId())); + assertTrue(sample.getQualityControl().getVariant().getVariantStats().stream().map(SampleQcVariantStats::getDescription).collect(Collectors.toList()).contains(params.getVsDescription())); + } + + @Test + public void testSampleQcVSSignatureGenomePlotNoSomatic() throws IOException, ToolException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_sample_qc_variant_stats")); + + String sampleId = file.getSampleIds().get(0); + Sample sample = catalogManager.getSampleManager().get(STUDY, sampleId, new QueryOptions(), token).first(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + + SampleQcAnalysisParams params = new SampleQcAnalysisParams(); + params.setSample(sampleId); + params.setVsQuery(new AnnotationVariantQueryParams().setRegion("2,1")); + params.setVsId("regions-2-1"); + params.setVsDescription("Sample variant stats on regions 2,1"); + + ExecutionResult result = toolRunner.execute(SampleQcAnalysis.class, STUDY, params, outDir, null, false, token); + System.out.println("outDir = " + outDir); + + // Expected three events with the message "is not somatic" (for catalogue, fitting and genome plot) + Assert.assertEquals(3, result.getEvents().stream().map(Event::getMessage).filter(msg -> msg.contains(SampleQcAnalysis.getSampleIsNotSomaticMsg(sampleId))).collect(Collectors.toList()).size()); + + sample = catalogManager.getSampleManager().get(STUDY, sampleId, new QueryOptions(), token).first(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + assertTrue(sample.getQualityControl().getVariant().getVariantStats().stream().map(SampleQcVariantStats::getId).collect(Collectors.toList()).contains(params.getVsId())); + assertTrue(sample.getQualityControl().getVariant().getVariantStats().stream().map(SampleQcVariantStats::getDescription).collect(Collectors.toList()).contains(params.getVsDescription())); + } + + @Test + public void testSampleQcVSError() throws IOException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_sample_qc_variant_stats_error")); + + Sample sample = catalogManager.getSampleManager().get(CANCER_STUDY, cancer_sample, new QueryOptions(), token).first(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + + SampleQcAnalysisParams params = new SampleQcAnalysisParams(); + params.setSample(cancer_sample); + params.setVsQuery(new AnnotationVariantQueryParams().setRegion("1,2")); + params.setVsId("regions-1-2"); + params.setVsDescription("Sample variant stats on regions 1,2"); + params.setSkip(SampleQcAnalysisParams.SIGNATURE_SKIP_VALUE + "," + SampleQcAnalysisParams.GENOME_PLOT_SKIP_VALUE); + + ExecutionResult result = null; + try { + result = toolRunner.execute(SampleQcAnalysis.class, CANCER_STUDY, params, outDir, null, false, token); + } catch (ToolException e) { + System.out.println("result = " + result); + e.printStackTrace(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + System.out.println("outDir = " + outDir); + return; + } + fail(); + } + + @Test + public void testSampleQcMutationalSignatureCatalogueSNV() throws IOException, ToolException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_sample_qc_mutational_signature_catalogue_snv")); + + Sample sample = catalogManager.getSampleManager().get(CANCER_STUDY, cancer_sample, new QueryOptions(), token).first(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + + SampleQcAnalysisParams params = new SampleQcAnalysisParams(); + params.setSample(cancer_sample); + params.setMsId("catalogue-snv-1"); + params.setMsDescription("Catalogue SNV #1"); + VariantQuery query = new VariantQuery() + .sample(cancer_sample) + .type(VariantType.SNV.name()) + .region("1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y"); + params.setMsQuery(query.toJson()); + + params.setSkip(StringUtils.join(Arrays.asList( + SampleQcAnalysisParams.VARIANT_STATS_SKIP_VALUE, + SampleQcAnalysisParams.SIGNATURE_FITTING_SKIP_VALUE, + SampleQcAnalysisParams.GENOME_PLOT_SKIP_VALUE), ",")); + + ExecutionResult result = toolRunner.execute(SampleQcAnalysis.class, CANCER_STUDY, params, outDir, null, false, token); + System.out.println("outDir = " + outDir); + + sample = catalogManager.getSampleManager().get(CANCER_STUDY, cancer_sample, new QueryOptions(), token).first(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + assertTrue(sample.getQualityControl().getVariant().getSignatures().stream().map(Signature::getId).collect(Collectors.toList()).contains(params.getMsId())); + assertTrue(sample.getQualityControl().getVariant().getSignatures().stream().map(Signature::getDescription).collect(Collectors.toList()).contains(params.getMsDescription())); + } + + @Test + public void testSampleQcMutationalSignatureCatalogueSV() throws IOException, ToolException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_sample_qc_mutational_signature_catalogue_sv")); + + Sample sample = catalogManager.getSampleManager().get(CANCER_STUDY, cancer_sample, new QueryOptions(), token).first(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + + SampleQcAnalysisParams params = new SampleQcAnalysisParams(); + params.setSample(cancer_sample); + params.setMsId("catalogue-sv-1"); + params.setMsDescription("Catalogue SV #1"); + VariantQuery query = new VariantQuery() + .sample(cancer_sample) + .type(VariantType.SV.name()) + .region("1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y"); + params.setMsQuery(query.toJson()); + + params.setSkip(StringUtils.join(Arrays.asList( + SampleQcAnalysisParams.VARIANT_STATS_SKIP_VALUE, + SampleQcAnalysisParams.SIGNATURE_FITTING_SKIP_VALUE, + SampleQcAnalysisParams.GENOME_PLOT_SKIP_VALUE), ",")); + + ExecutionResult result = toolRunner.execute(SampleQcAnalysis.class, CANCER_STUDY, params, outDir, null, false, token); + System.out.println("outDir = " + outDir); + + sample = catalogManager.getSampleManager().get(CANCER_STUDY, cancer_sample, new QueryOptions(), token).first(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + assertTrue(sample.getQualityControl().getVariant().getSignatures().stream().map(Signature::getId).collect(Collectors.toList()).contains(params.getMsId())); + assertTrue(sample.getQualityControl().getVariant().getSignatures().stream().map(Signature::getDescription).collect(Collectors.toList()).contains(params.getMsDescription())); + } + + @Test + public void testSampleQcMutationalSignatureCatalogueFittingSV() throws IOException, ToolException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_sample_qc_mutational_signature_catalogue_fitting_sv")); + + Sample sample = catalogManager.getSampleManager().get(CANCER_STUDY, cancer_sample, new QueryOptions(), token).first(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + + SampleQcAnalysisParams params = new SampleQcAnalysisParams(); + params.setSample(cancer_sample); + params.setMsId("catalogue-sv-2"); + params.setMsDescription("Catalogue SV #2"); + VariantQuery query = new VariantQuery() + .sample(cancer_sample) + .type(VariantType.SV.name()) + .region("1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y"); + params.setMsQuery(query.toJson()); + params.setMsFitId("fitting-2"); + params.setMsFitMethod("FitMS"); + params.setMsFitSigVersion("RefSigv2"); + params.setMsFitOrgan("Breast"); + params.setMsFitNBoot(200); + params.setMsFitThresholdPerc(5.0f); + params.setMsFitThresholdPval(0.05f); + params.setMsFitMaxRareSigs(1); +// params.setMsFitSignaturesFile(signatureFileId); +// params.setMsFitRareSignaturesFile(signatureFileId); + + params.setSkip(StringUtils.join(Arrays.asList( + SampleQcAnalysisParams.VARIANT_STATS_SKIP_VALUE, + SampleQcAnalysisParams.GENOME_PLOT_SKIP_VALUE), ",")); + + ExecutionResult result = toolRunner.execute(SampleQcAnalysis.class, CANCER_STUDY, params, outDir, null, false, token); + System.out.println("outDir = " + outDir); + + sample = catalogManager.getSampleManager().get(CANCER_STUDY, cancer_sample, new QueryOptions(), token).first(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + assertTrue(sample.getQualityControl().getVariant().getSignatures().stream().map(Signature::getId).collect(Collectors.toList()).contains(params.getMsId())); + assertTrue(sample.getQualityControl().getVariant().getSignatures().stream().map(Signature::getDescription).collect(Collectors.toList()).contains(params.getMsDescription())); + assertTrue(sample.getQualityControl().getVariant().getSignatures().stream().flatMap(s -> s.getFittings().stream().map(SignatureFitting::getId)).collect(Collectors.toList()).contains(params.getMsFitId())); + } + + @Test + public void testSampleQcGenomePlot() throws IOException, ToolException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_sample_qc_genome_plot")); + + Sample sample = catalogManager.getSampleManager().get(CANCER_STUDY, cancer_sample, new QueryOptions(), token).first(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + + File genomePlotConfigFile = catalogManager.getFileManager().get(CANCER_STUDY, genomePlotConfigFilename, QueryOptions.empty(), token).first(); + + SampleQcAnalysisParams params = new SampleQcAnalysisParams(); + params.setSample(cancer_sample); + params.setGpId("genome-plot1"); + params.setGpDescription("Genome plot description 1"); + params.setGpConfigFile(genomePlotConfigFile.getId()); + + params.setSkip(StringUtils.join(Arrays.asList( + SampleQcAnalysisParams.VARIANT_STATS_SKIP_VALUE, + SampleQcAnalysisParams.SIGNATURE_SKIP_VALUE), ",")); + + ExecutionResult result = toolRunner.execute(SampleQcAnalysis.class, CANCER_STUDY, params, outDir, null, false, token); + System.out.println("outDir = " + outDir); + + sample = catalogManager.getSampleManager().get(CANCER_STUDY, cancer_sample, new QueryOptions(), token).first(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + assertEquals(params.getGpId(), sample.getQualityControl().getVariant().getGenomePlot().getId()); + assertEquals(params.getGpDescription(), sample.getQualityControl().getVariant().getGenomePlot().getDescription()); + } + + @Test + public void testSampleQcMutationalSignatureCatalogueFittingSVGenomePlot() throws IOException, ToolException, CatalogException { + Path outDir = Paths.get(opencga.createTmpOutdir("_sample_qc_mutational_signature_catalogue_fitting_sv")); + + Sample sample = catalogManager.getSampleManager().get(CANCER_STUDY, cancer_sample, new QueryOptions(), token).first(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + + File genomePlotConfigFile = catalogManager.getFileManager().get(CANCER_STUDY, genomePlotConfigFilename, QueryOptions.empty(), token).first(); + + SampleQcAnalysisParams params = new SampleQcAnalysisParams(); + params.setSample(cancer_sample); + + // Mutational signature + params.setMsId("catalogue-sv-3"); + params.setMsDescription("Catalogue SV #3"); + VariantQuery query = new VariantQuery() + .sample(cancer_sample) + .type(VariantType.SV.name()) + .region("1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y"); + params.setMsQuery(query.toJson()); + params.setMsFitId("fitting-2"); + params.setMsFitMethod("FitMS"); + params.setMsFitSigVersion("RefSigv2"); + params.setMsFitOrgan("Breast"); + params.setMsFitNBoot(200); + params.setMsFitThresholdPerc(5.0f); + params.setMsFitThresholdPval(0.05f); + params.setMsFitMaxRareSigs(1); + + // Genome plot params + params.setGpId("genome-plot3"); + params.setGpDescription("Genome plot description 3"); + params.setGpConfigFile(genomePlotConfigFile.getId()); + + params.setSkip(SampleQcAnalysisParams.VARIANT_STATS_SKIP_VALUE); + + ExecutionResult result = toolRunner.execute(SampleQcAnalysis.class, CANCER_STUDY, params, outDir, null, false, token); + System.out.println("outDir = " + outDir); + + sample = catalogManager.getSampleManager().get(CANCER_STUDY, cancer_sample, new QueryOptions(), token).first(); + System.out.println("sample.getQualityControl() = " + sample.getQualityControl()); + assertTrue(sample.getQualityControl().getVariant().getSignatures().stream().map(Signature::getId).collect(Collectors.toList()).contains(params.getMsId())); + assertTrue(sample.getQualityControl().getVariant().getSignatures().stream().map(Signature::getDescription).collect(Collectors.toList()).contains(params.getMsDescription())); + assertTrue(sample.getQualityControl().getVariant().getSignatures().stream().flatMap(s -> s.getFittings().stream().map(SignatureFitting::getId)).collect(Collectors.toList()).contains(params.getMsFitId())); + assertEquals(params.getGpId(), sample.getQualityControl().getVariant().getGenomePlot().getId()); + assertEquals(params.getGpDescription(), sample.getQualityControl().getVariant().getGenomePlot().getDescription()); + } + @Test public void testCellbaseConfigure() throws Exception { String project = "Project_test_cellbase_configure"; diff --git a/opencga-analysis/src/main/R/genome-plot/circos.R b/opencga-app/app/analysis/genome-plot/circos.R similarity index 91% rename from opencga-analysis/src/main/R/genome-plot/circos.R rename to opencga-app/app/analysis/genome-plot/circos.R index c14d8ca921a..f9ecba14b02 100644 --- a/opencga-analysis/src/main/R/genome-plot/circos.R +++ b/opencga-app/app/analysis/genome-plot/circos.R @@ -95,7 +95,7 @@ RCircos.Chromosome.Ideogram.Plot.my <- function (chrTextColor = 'grey', gridLine chroms <- unique(RCircos.Cyto$Chromosome) for (a.chr in 1:length(chroms)) { the.chr <- RCircos.Cyto[RCircos.Cyto$Chromosome == chroms[a.chr], - ] + ] ##new RCircos version start <- the.chr$StartPoint[1] end <- the.chr$EndPoint[nrow(the.chr)] @@ -133,13 +133,13 @@ RCircos.Chromosome.Ideogram.Plot.my <- function (chrTextColor = 'grey', gridLine } -RCircos.Get.Plot.Data.nosort <- function (genomic.data, plot.type, validate=TRUE) +RCircos.Get.Plot.Data.nosort <- function (genomic.data, plot.type, validate=TRUE) { data.points <- rep(0, nrow(genomic.data)) for (a.row in 1:nrow(genomic.data)) { chromosome <- as.character(genomic.data[a.row, 1]) - location <- round((genomic.data[a.row, 2] + genomic.data[a.row, + location <- round((genomic.data[a.row, 2] + genomic.data[a.row, 3])/2, digits = 0) data.points[a.row] <- RCircos.Data.Point(chromosome, location) } @@ -153,7 +153,7 @@ RCircos.Heatmap.Plot.my <- function (heatmap.data, data.col, track.num, side, pl RCircos.Cyto <- RCircos.Get.Plot.Ideogram() RCircos.Pos <- RCircos.Get.Plot.Positions() RCircos.Par <- RCircos.Get.Plot.Parameters() - + min.with <- 1000000 heatmap.data$width <- heatmap.data$chromEnd - heatmap.data$chromStart heatmap.data <- heatmap.data[order(-heatmap.data$width),] # make sure the narrowest plots are drawn as last @@ -162,35 +162,35 @@ RCircos.Heatmap.Plot.my <- function (heatmap.data, data.col, track.num, side, pl heatmap.data$chromEnd[narrow.cn] <- heatmap.data$chromEnd[narrow.cn ] + flank heatmap.data$chromStart[narrow.cn ] <- heatmap.data$chromStart[narrow.cn ] - flank heatmap.data$chromStart[heatmap.data$chromStart<0] <- 0 - + heatmap.data <- RCircos.Get.Plot.Data.nosort(heatmap.data, "plot") heatmap.data1 <- RCircos.Get.Plot.Data.nosort(data.frame(Chromosome=heatmap.data$Chromosome, chromStart=heatmap.data$chromStart, chromEnd=heatmap.data$chromStart), "plot") heatmap.data2 <- RCircos.Get.Plot.Data.nosort(data.frame(Chromosome=heatmap.data$Chromosome, chromStart=heatmap.data$chromEnd, chromEnd=heatmap.data$chromEnd), "plot") - - + + if ((length(heatmap.ranges)==1) && (is.na(heatmap.ranges))) { ColorLevel <- RCircos.Par$heatmap.ranges } else { ColorLevel <- heatmap.ranges } - + if ((length(heatmap.color)==1) && (is.na(heatmap.color))) { ColorRamp <- RCircos.Get.Heatmap.ColorScales(RCircos.Par$heatmap.color) - } - + } + columns <- 5:(ncol(heatmap.data) - 1) min.value <- min(as.matrix(heatmap.data[, columns])) max.value <- max(as.matrix(heatmap.data[, columns])) - + heatmap.locations1 <- as.numeric(heatmap.data1[, ncol(heatmap.data2)]) heatmap.locations2 <- as.numeric(heatmap.data2[, ncol(heatmap.data2)]) - + start <- heatmap.locations1 # - RCircos.Par$heatmap.width/2 end <- heatmap.locations2 # + RCircos.Par$heatmap.width/2 data.chroms <- as.character(heatmap.data[, 1]) chromosomes <- unique(data.chroms) cyto.chroms <- as.character(RCircos.Cyto$Chromosome) - + for (a.chr in 1:length(chromosomes)) { cyto.rows <- which(cyto.chroms == chromosomes[a.chr]) locations <- as.numeric(RCircos.Cyto$EndPoint[cyto.rows]) # chromosome locations @@ -200,14 +200,14 @@ RCircos.Heatmap.Plot.my <- function (heatmap.data, data.col, track.num, side, pl start[data.rows[start[data.rows] < chr.start]] <- chr.start # chromosome starts for each point end[data.rows[end[data.rows] > chr.end]] <- chr.end # chromosome end for each point } - + locations <- RCircos.Track.Positions.my(side, track.num) # positions out.pos <- locations[1] in.pos <- locations[2] chroms <- unique(RCircos.Cyto$Chromosome) for (a.chr in 1:length(chroms)) { the.chr <- RCircos.Cyto[RCircos.Cyto$Chromosome == chroms[a.chr], - ] + ] the.start <- the.chr$StartPoint[1] the.end <- the.chr$EndPoint[nrow(the.chr)] polygon.x <- c(RCircos.Pos[the.start:the.end, 1] * out.pos, @@ -216,20 +216,20 @@ RCircos.Heatmap.Plot.my <- function (heatmap.data, data.col, track.num, side, pl RCircos.Pos[the.end:the.start, 2] * in.pos) polygon(polygon.x, polygon.y, col = "white", border = RCircos.Par$grid.line.color, lwd=0.3) } - - + + heatmap.value <- as.numeric(heatmap.data[, data.col]) for (a.point in 1:length(heatmap.value)) { - + the.level <- which(ColorLevel <= heatmap.value[a.point]) cell.color <- heatmap.color[max(the.level)] # establish the color - + the.start <- start[a.point] the.end <- end[a.point] #if (is.na(the.start) | is.na(the.end)) { # browser() #} - + #Catch positions that fall outside a band (eg when using exome ideogram) if (is.na(the.start) || (is.na(the.end))) { next; @@ -238,18 +238,18 @@ RCircos.Heatmap.Plot.my <- function (heatmap.data, data.col, track.num, side, pl polygon.y <- c(RCircos.Pos[the.start:the.end, 2] * out.pos, RCircos.Pos[the.end:the.start, 2] * in.pos) polygon(polygon.x, polygon.y, col = cell.color, border = NA) } - + } RCircos.Link.Plot.my <- function (link.data, track.num, by.chromosome = FALSE, link.colors=NA) { - + if (length(link.colors)==1) { link.colors <- rep('BurlyWood', nrow(link.data)) } - - + + RCircos.Pos <- RCircos.Get.Plot.Positions() RCircos.Par <- RCircos.Get.Plot.Parameters() locations <- RCircos.Track.Positions.my('in', track.num) @@ -282,14 +282,14 @@ RCircos.Link.Plot.my <- function (link.data, track.num, by.chromosome = FALSE, l } -RCircos.Scatter.Plot.color <- function (scatter.data, data.col, track.num, side, scatter.colors, draw.bg =TRUE, no.sort=FALSE) +RCircos.Scatter.Plot.color <- function (scatter.data, data.col, track.num, side, scatter.colors, draw.bg =TRUE, no.sort=FALSE) { RCircos.Pos <- RCircos.Get.Plot.Positions() pch <- RCircos.Get.Plot.Parameters()$point.type cex <- RCircos.Get.Plot.Parameters()$point.size scatter.data <- RCircos.Get.Plot.Data.nosort(scatter.data, "plot") - + locations <- RCircos.Track.Positions.my(side, track.num, track.heights = 4) out.pos <- locations[1] in.pos <- locations[2] @@ -297,9 +297,9 @@ RCircos.Scatter.Plot.color <- function (scatter.data, data.col, track.num, side, data.ceiling <- max(scatter.data[, data.col]) sub.height <- out.pos - point.bottom - + RCircos.Track.Outline.my(out.pos, in.pos) - + scatter.data[scatter.data[data.col]>data.ceiling, data.col] <- data.ceiling scatter.data[scatter.data[data.col]<(-data.ceiling), data.col] <- -data.ceiling scatter.data$height <- point.bottom + scatter.data[, data.col]/data.ceiling * sub.height @@ -309,12 +309,12 @@ RCircos.Scatter.Plot.color <- function (scatter.data, data.col, track.num, side, points(scatter.data$x_coord, scatter.data$y_coord, col = scatter.colors, - pch = pch, + pch = pch, cex = cex) } -RCircos.Track.Outline.my <- function (out.pos, in.pos, num.layers = 1) +RCircos.Track.Outline.my <- function (out.pos, in.pos, num.layers = 1) { RCircos.Cyto <- RCircos.Get.Plot.Ideogram() RCircos.Pos <- RCircos.Get.Plot.Positions() @@ -325,12 +325,12 @@ RCircos.Track.Outline.my <- function (out.pos, in.pos, num.layers = 1) the.chr <- RCircos.Cyto[RCircos.Cyto$Chromosome == chroms[a.chr], ] start <- the.chr$StartPoint[1] end <- the.chr$EndPoint[nrow(the.chr)] - polygon.x <- c(RCircos.Pos[start:end, 1] * out.pos, RCircos.Pos[end:start, + polygon.x <- c(RCircos.Pos[start:end, 1] * out.pos, RCircos.Pos[end:start, 1] * in.pos) - polygon.y <- c(RCircos.Pos[start:end, 2] * out.pos, RCircos.Pos[end:start, + polygon.y <- c(RCircos.Pos[start:end, 2] * out.pos, RCircos.Pos[end:start, 2] * in.pos) polygon(polygon.x, polygon.y, col = NULL, lwd=0.3, border=RCircos.Par$grid.line.color) - + for (a.line in 1:(num.layers - 1)) { height <- out.pos - a.line * subtrack.height lines(RCircos.Pos[start:end, 1] * height, RCircos.Pos[start:end, 2] * height, col = RCircos.Par$grid.line.color, lwd=0.3) @@ -339,18 +339,18 @@ RCircos.Track.Outline.my <- function (out.pos, in.pos, num.layers = 1) } -RCircos.Track.Positions.my <- function (side, track.num, track.heights = 1) +RCircos.Track.Positions.my <- function (side, track.num, track.heights = 1) { RCircos.Par <- RCircos.Get.Plot.Parameters() one.track <- RCircos.Par$track.height + RCircos.Par$track.padding side <- tolower(side) if (side == "in") { - out.pos <- RCircos.Par$track.in.start - (track.num - + out.pos <- RCircos.Par$track.in.start - (track.num - 1) * one.track - in.pos <- out.pos - RCircos.Par$track.height - + in.pos <- out.pos - RCircos.Par$track.height - one.track * ( track.heights - 1) } else if (side == "out") { - in.pos <- RCircos.Par$track.out.start + (track.num - + in.pos <- RCircos.Par$track.out.start + (track.num - 1) * one.track out.pos <- in.pos + RCircos.Par$track.height } else { @@ -361,14 +361,14 @@ RCircos.Track.Positions.my <- function (side, track.num, track.heights = 1) set.plot.circosParams <- function(){ - + # circos parameters circosParams.my <- list() - + #use these two circosParams to adjust circle size circosParams.my$plot.radius <- 2.15 circosParams.my$genomeplot.margin <- 0.25 - + circosParams.my$track.background <- 'white' circosParams.my$highlight.width <- 0.2 circosParams.my$point.size <- 0.3 @@ -379,53 +379,53 @@ set.plot.circosParams <- function(){ circosParams.my$chr.name.pos <- 2.14 #3.45 circosParams.my$track.in.start <- 3.05 circosParams.my$track.out.start <- 3.2 - + circosParams.my$tracks.inside <- 10 circosParams.my$tracks.outside <- 1 - + circosParams.my$line.width <- 1 circosParams.my$link.line.width <- 0.5 - + circosParams.my$text.size <- 0.6 - + circosParams.my$text.color <- 'black' - + circosParams.my$track.padding <- c(0.07, 0.0, 0.07, 0.0,0.07, 0) - + circosParams.my$grid.line.color <- 'lightgrey' circosParams.my$chr.text.color <- 'grey' - + circosParams.my$track.heights <- c(0.85, 0.07, 0.07, 0.1, 0.1, 0.1) circosParams.my$track.height <- 0.1 circosParams.my$sub.tracks <- 1 circosParams.my$heatmap.cols <- c(alpha('lightcoral', 1), - alpha('lightcoral', 0.5), - alpha('lightgrey',0.10), - alpha('olivedrab2', 0.3), - alpha('olivedrab2', 0.5), - alpha('olivedrab2',.7), - alpha('olivedrab2', 0.75), - alpha('olivedrab3', 0.9), - alpha('olivedrab4', 0.9)) + alpha('lightcoral', 0.5), + alpha('lightgrey',0.10), + alpha('olivedrab2', 0.3), + alpha('olivedrab2', 0.5), + alpha('olivedrab2',.7), + alpha('olivedrab2', 0.75), + alpha('olivedrab3', 0.9), + alpha('olivedrab4', 0.9)) circosParams.my$heatmap.ranges <- c(0,1,3,4,8,16, 32,64,1000) - + #Set copynumber (and indel) colour scheme circosParams.my$heatmap.color.gain <- c( alpha('lightgrey',0.10), alpha('olivedrab2', 0.3), alpha('olivedrab2', 0.5), alpha('olivedrab2',.7), alpha('olivedrab2', 0.75), alpha('olivedrab3', 0.9), alpha('olivedrab4', 0.9)) circosParams.my$heatmap.ranges.gain <- c(0,2,4,8,16, 32,64,1000) - + circosParams.my$heatmap.ranges.loh <- c(0,1,1000) circosParams.my$heatmap.color.loh <- c(alpha('lightcoral', 1), alpha('lightgrey',0.10)) - + circosParams.my$heatmap.key.gain.col <- alpha('olivedrab2', 0.3) circosParams.my$heatmap.key.loh.col <- alpha('lightcoral', 1) circosParams.my$heatmap.key.gain.title <- 'gain' circosParams.my$heatmap.key.loh.title <- 'LOH' - + #tumour majorCN circosParams.my$heatmap.data.col.gain <- 8 #tumour minorCN circosParams.my$heatmap.data.col.loh <- 7 - + #Indel colours circosParams.my$indel.mhomology <- 'firebrick4' circosParams.my$indel.repeatmediated <- 'firebrick1' @@ -434,28 +434,28 @@ set.plot.circosParams <- function(){ circosParams.my$indel.complex <- 'grey' return(circosParams.my) - + } -genomePlot <- function(snvs.file, indels.file, cnvs.file, rearrs.file, - sampleID, genome.v="hg19", ..., plot_title = NULL, - no_copynumber = FALSE, no_rearrangements = FALSE, no_indels = FALSE, out_format = "png", out_path = ".") { - +genomePlot <- function(snvs.file, indels.file, cnvs.file, rearrs.file, + sampleID, genome.v="hg19", ..., plot_title = NULL, + no_snvs = FALSE, no_copynumber = FALSE, no_rearrangements = FALSE, no_indels = FALSE, out_format = "png", out_path = ".") { + genome.ideogram = switch(genome.v, "hg19" = "UCSC.HG19.Human.CytoBandIdeogram", "hg38" = "UCSC.HG38.Human.CytoBandIdeogram") data(list=genome.ideogram, package = "RCircos"); species.cyto <- get(genome.ideogram); - + circosParams.my <- set.plot.circosParams() - + # rearrangement links colors inv.col <- alpha('dodgerblue2', 1) del.col <- alpha('coral2', 1) dupl.col <- alpha('darkgreen', 1) transloc.colour <- alpha('gray35', 1) - + #Set up height, width and resolution parameters cPanelWidth = 0 graph.height = 4100 @@ -463,20 +463,22 @@ genomePlot <- function(snvs.file, indels.file, cnvs.file, rearrs.file, graph.width = graph.height * graph.wd_ht_ratio graph.wd_res_ratio = (4100/550) graph.res = graph.width/graph.wd_res_ratio - + graph.height.inches = graph.height/graph.res graph.width.inches = graph.width/graph.res # substitutions - subs <- read.table(file = snvs.file, sep = '\t', header = TRUE) - # subs <- read.table(file = '/home/dapregi/tmp/snvs.tsv', sep = '\t', header = TRUE) - subs$color[(subs$ref=='C' & subs$alt=='A') | (subs$ref=='G' & subs$alt=='T')] <- 'royalblue' - subs$color[(subs$ref=='C' & subs$alt=='G') | (subs$ref=='G' & subs$alt=='C')] <- 'black' - subs$color[(subs$ref=='C' & subs$alt=='T') | (subs$ref=='G' & subs$alt=='A')] <- 'red' - subs$color[(subs$ref=='T' & subs$alt=='A') | (subs$ref=='A' & subs$alt=='T')] <- 'grey' - subs$color[(subs$ref=='T' & subs$alt=='C') | (subs$ref=='A' & subs$alt=='G')] <- 'green2' - subs$color[(subs$ref=='T' & subs$alt=='G') | (subs$ref=='A' & subs$alt=='C')] <- 'hotpink' - + if (!no_snvs) { + subs <- read.table(file = snvs.file, sep = '\t', header = TRUE) + # subs <- read.table(file = '/home/dapregi/tmp/snvs.tsv', sep = '\t', header = TRUE) + subs$color[(subs$ref=='C' & subs$alt=='A') | (subs$ref=='G' & subs$alt=='T')] <- 'royalblue' + subs$color[(subs$ref=='C' & subs$alt=='G') | (subs$ref=='G' & subs$alt=='C')] <- 'black' + subs$color[(subs$ref=='C' & subs$alt=='T') | (subs$ref=='G' & subs$alt=='A')] <- 'red' + subs$color[(subs$ref=='T' & subs$alt=='A') | (subs$ref=='A' & subs$alt=='T')] <- 'grey' + subs$color[(subs$ref=='T' & subs$alt=='C') | (subs$ref=='A' & subs$alt=='G')] <- 'green2' + subs$color[(subs$ref=='T' & subs$alt=='G') | (subs$ref=='A' & subs$alt=='C')] <- 'hotpink' + } + # indels indels <- read.table(file = indels.file, sep = '\t', header = TRUE) # indels <- read.table(file = '/home/dapregi/tmp/indels.tsv', sep = '\t', header = TRUE) @@ -488,8 +490,8 @@ genomePlot <- function(snvs.file, indels.file, cnvs.file, rearrs.file, dels$color[dels$classification=='Microhomology-mediated'] <- circosParams.my$indel.mhomology dels$color[dels$classification=='Repeat-mediated'] <- circosParams.my$indel.repeatmediated dels$color[dels$classification=='None'] <- circosParams.my$indel.other - } - + } + # copy number cv.data <- data.frame() #Skip if no copynumber was requested @@ -500,7 +502,7 @@ genomePlot <- function(snvs.file, indels.file, cnvs.file, rearrs.file, no_copynumber <- TRUE } } - + # rearrangements rearrs <- data.frame() if (!no_rearrangements) { @@ -509,11 +511,11 @@ genomePlot <- function(snvs.file, indels.file, cnvs.file, rearrs.file, no_rearrangements <- TRUE } } - + ################################################################################ - + fn = file.path(out_path, paste(sampleID, ".genomePlot.", out_format, sep=''), fsep = .Platform$file.sep) - + if (out_format == 'png') { png(file=fn, height=graph.height, width=(graph.width*(1/(1-cPanelWidth))), res=graph.res) } else if (out_format == 'svg') { @@ -521,7 +523,7 @@ genomePlot <- function(snvs.file, indels.file, cnvs.file, rearrs.file, } else { stop("Invalid file type. Only png and svg are supported"); } - + RCircos.Set.Core.Components(cyto.info=species.cyto, chr.exclude=NULL, tracks.inside=circosParams.my$tracks.inside, tracks.outside=circosParams.my$tracks.outside); @@ -530,31 +532,23 @@ genomePlot <- function(snvs.file, indels.file, cnvs.file, rearrs.file, circosParams$point.type <- circosParams.my$point.type circosParams$point.size <- circosParams.my$point.size RCircos.Reset.Plot.Parameters(circosParams) - + par(mar=c(0.001, 0.001, 0.001, 0.001)) par(mai=c(circosParams.my$genomeplot.margin, circosParams.my$genomeplot.margin, circosParams.my$genomeplot.margin, circosParams.my$genomeplot.margin)) plot.new() plot.window(c(-circosParams.my$plot.radius,circosParams.my$plot.radius), c(-circosParams.my$plot.radius, circosParams.my$plot.radius)) RCircos.Chromosome.Ideogram.Plot.my(circosParams.my$chr.text.color, circosParams.my$grid.line.color, circosParams.my$text.size); - + title(main = sampleID) - + if (!is.null(plot_title)) { title(paste(plot_title, sep=''), line=-1); } - + # substitutions - # start.time <- Sys.time() - # summary(subs) - # subs$distance <- 10^subs$logDistPrev - # subs <- subs[subs$distance<400000,] - # print(nrow(subs)) - if (exists("subs")) { + if (exists("subs") && (nrow(subs)>0)) { RCircos.Scatter.Plot.color(scatter.data=subs, data.col=6, track.num=1, side="in", scatter.colors = subs$color); } - # end.time <- Sys.time() - # time.taken <- end.time - start.time - # print(time.taken) # Insertions circosParams <- RCircos.Get.Plot.Parameters(); @@ -566,7 +560,7 @@ genomePlot <- function(snvs.file, indels.file, cnvs.file, rearrs.file, if (exists("ins") && nrow(ins)>0) { my.RCircos.Tile.Plot(tile.data=ins, track.num=5, side="in"); } - + # Deletions circosParams <- RCircos.Get.Plot.Parameters(); circosParams$tile.color <- 'firebrick4' @@ -574,8 +568,8 @@ genomePlot <- function(snvs.file, indels.file, cnvs.file, rearrs.file, if (exists("dels") && nrow(dels)>0) { my.RCircos.Tile.Plot(tile.data=dels, track.num=6, side="in", tile.colors=dels$color); } - - + + # Copy number if (exists('cv.data') && (nrow(cv.data)>0)) { heatmap.ranges.major <-circosParams.my$heatmap.ranges.gain @@ -587,7 +581,7 @@ genomePlot <- function(snvs.file, indels.file, cnvs.file, rearrs.file, RCircos.Heatmap.Plot.my(heatmap.data=cv.data, data.col=6, track.num=8, side="in", heatmap.ranges=heatmap.ranges.minor , heatmap.color=heatmap.color.minor ); # minor copy number } - + # Rearrangement # Chromosome chromStart chromEnd Chromosome.1 chromStart.1 chromEnd.1 type link.colors <- vector() @@ -601,7 +595,7 @@ genomePlot <- function(snvs.file, indels.file, cnvs.file, rearrs.file, RCircos.Link.Plot.my(link.data = rearrs, track.num=9, by.chromosome=TRUE, link.colors); } } - + invisible(dev.off()) } @@ -614,6 +608,8 @@ option_list <- list( help="Genome version", metavar="character"), make_option(c("--plot_title"), type="character", default="", help="Plot title", metavar="character"), + make_option(c("--no_snvs"), action="store_true", default=FALSE, + help="No SNVs"), make_option(c("--no_copynumber"), action="store_true", default=FALSE, help="No CNV"), make_option(c("--no_rearrangements"), action="store_true", default=FALSE, @@ -624,7 +620,7 @@ option_list <- list( help="Output format", metavar="character"), make_option(c("--out_path"), type="character", default=".", help="Output file path", metavar="character") - ) +) parser <- OptionParser(usage = "%prog [options] snvs_file indels_file cnvs_file rearrs_file sampleId", option_list=option_list) arguments <- parse_args(parser, positional_arguments = 5) opt <- arguments$options @@ -632,5 +628,5 @@ args <- arguments$args genomePlot(args[1], args[2], args[3], args[4], args[5], genome.v=opt$genome_version, plot_title = opt$plot_title, - no_copynumber = opt$no_copynumber, no_rearrangements = opt$no_rearrangements, no_indels = opt$no_indels, + no_snvs = opt$no_snvs, no_copynumber = opt$no_copynumber, no_rearrangements = opt$no_rearrangements, no_indels = opt$no_indels, out_format = opt$out_format, out_path = opt$out_path) diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/tools/OpenCgaToolExecutor.java b/opencga-core/src/main/java/org/opencb/opencga/core/tools/OpenCgaToolExecutor.java index 15cf7a51215..dbc936b9e7a 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/tools/OpenCgaToolExecutor.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/tools/OpenCgaToolExecutor.java @@ -31,6 +31,7 @@ public abstract class OpenCgaToolExecutor { private ObjectMap executorParams; private Path outDir; private ExecutionResultManager arm; + private String commandLine; protected OpenCgaToolExecutor() { } @@ -81,6 +82,15 @@ protected final String getToken() { return getExecutorParams().getString("token"); } + public String getCommandLine() { + return commandLine; + } + + protected OpenCgaToolExecutor setCommandLine(String commandLine) { + this.commandLine = commandLine; + return this; + } + protected final void addWarning(String warning) throws ToolException { arm.addWarning(warning); } @@ -88,5 +98,4 @@ protected final void addWarning(String warning) throws ToolException { protected final void addAttribute(String key, Object value) throws ToolException { arm.addStepAttribute(key, value); } - } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/tools/variant/GenomePlotAnalysisExecutor.java b/opencga-core/src/main/java/org/opencb/opencga/core/tools/variant/GenomePlotAnalysisExecutor.java index 2205143c670..30a3c767ab1 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/tools/variant/GenomePlotAnalysisExecutor.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/tools/variant/GenomePlotAnalysisExecutor.java @@ -16,6 +16,7 @@ package org.opencb.opencga.core.tools.variant; +import org.opencb.opencga.core.exceptions.ToolException; import org.opencb.opencga.core.tools.OpenCgaToolExecutor; import java.io.File; @@ -33,6 +34,11 @@ public GenomePlotAnalysisExecutor(String study, File configFile) { this.configFile = configFile; } + protected void addStepParams() throws ToolException { + addAttribute("STUDY", study); + addAttribute("CONFIG_FILE", configFile); + } + @Override public String toString() { final StringBuilder sb = new StringBuilder("GenomePlotAnalysisExecutor{"); diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/tools/variant/MutationalSignatureAnalysisExecutor.java b/opencga-core/src/main/java/org/opencb/opencga/core/tools/variant/MutationalSignatureAnalysisExecutor.java index be8fdcf1a62..0d86487e77e 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/tools/variant/MutationalSignatureAnalysisExecutor.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/tools/variant/MutationalSignatureAnalysisExecutor.java @@ -57,6 +57,26 @@ public abstract class MutationalSignatureAnalysisExecutor extends OpenCgaToolExe public MutationalSignatureAnalysisExecutor() { } + protected void addStepParams() throws ToolException { + addAttribute("STUDY", study); + addAttribute("SAMPLE", sample); + addAttribute("ASSEMBLY", assembly); + addAttribute("SIGNATURE_ID", queryId); + addAttribute("SIGNATURE_DESCRIPTION", queryDescription); + addAttribute("SIGNATURE_QUERY", query); + addAttribute("FIT_ID", fitId); + addAttribute("FIT_METHOD", fitMethod); + addAttribute("FIT_N_BOOT", nBoot); + addAttribute("FIT_SIG_VERSION", sigVersion); + addAttribute("FIT_ORGAN", organ); + addAttribute("FIT_THRESHOLD_PERC", thresholdPerc); + addAttribute("FIT_THRESHOLD_PVAL", thresholdPval); + addAttribute("FIT_MAX_RARE_SIGS", maxRareSigs); + addAttribute("FIT_SIGNATURES_FILE", signaturesFile); + addAttribute("FIT_RARE_SIGNATURES_FILE", rareSignaturesFile); + addAttribute("SKIP", skip); + } + protected static Map> initCountMap() { Map> map = new LinkedHashMap<>(); for (String firstKey : FIRST_LEVEL_KEYS) { diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/tools/variant/SampleVariantStatsAnalysisExecutor.java b/opencga-core/src/main/java/org/opencb/opencga/core/tools/variant/SampleVariantStatsAnalysisExecutor.java index 9e06c1e389a..9f18ddc7b7f 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/tools/variant/SampleVariantStatsAnalysisExecutor.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/tools/variant/SampleVariantStatsAnalysisExecutor.java @@ -95,6 +95,12 @@ protected void writeStatsToFile(List stats) throws ToolExcep } } + protected void addStepParams() throws ToolException { + addAttribute("STUDY", study); + addAttribute("SAMPLES", sampleNames); + addAttribute("QUERY", variantQuery); + } + public abstract int getDefaultBatchSize(); public abstract int getMaxBatchSize(); } diff --git a/opencga-storage/opencga-storage-core/src/test/resources/AR2.10039966-01T.copynumber.caveman.vcf.gz b/opencga-storage/opencga-storage-core/src/test/resources/AR2.10039966-01T.copynumber.caveman.vcf.gz deleted file mode 100644 index 3cd9a6f3494..00000000000 Binary files a/opencga-storage/opencga-storage-core/src/test/resources/AR2.10039966-01T.copynumber.caveman.vcf.gz and /dev/null differ diff --git a/opencga-storage/opencga-storage-core/src/test/resources/AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz b/opencga-storage/opencga-storage-core/src/test/resources/AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz deleted file mode 100644 index 10c8532fdd2..00000000000 Binary files a/opencga-storage/opencga-storage-core/src/test/resources/AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz and /dev/null differ diff --git a/opencga-storage/opencga-storage-core/src/test/resources/AR2.10039966-01T_vs_AR2.10039966-01G.annot.pindel.vcf.gz b/opencga-storage/opencga-storage-core/src/test/resources/AR2.10039966-01T_vs_AR2.10039966-01G.annot.pindel.vcf.gz deleted file mode 100644 index 69eb2b03002..00000000000 Binary files a/opencga-storage/opencga-storage-core/src/test/resources/AR2.10039966-01T_vs_AR2.10039966-01G.annot.pindel.vcf.gz and /dev/null differ diff --git a/opencga-storage/opencga-storage-core/src/test/resources/cancer-cnvs.vcf.gz b/opencga-storage/opencga-storage-core/src/test/resources/cancer-cnvs.vcf.gz new file mode 100644 index 00000000000..5de41e9ac58 Binary files /dev/null and b/opencga-storage/opencga-storage-core/src/test/resources/cancer-cnvs.vcf.gz differ diff --git a/opencga-storage/opencga-storage-core/src/test/resources/cancer-indels.vcf.gz b/opencga-storage/opencga-storage-core/src/test/resources/cancer-indels.vcf.gz new file mode 100644 index 00000000000..be382289e90 Binary files /dev/null and b/opencga-storage/opencga-storage-core/src/test/resources/cancer-indels.vcf.gz differ diff --git a/opencga-storage/opencga-storage-core/src/test/resources/cancer-rearrs.vcf.gz b/opencga-storage/opencga-storage-core/src/test/resources/cancer-rearrs.vcf.gz new file mode 100644 index 00000000000..1efdc418c9e Binary files /dev/null and b/opencga-storage/opencga-storage-core/src/test/resources/cancer-rearrs.vcf.gz differ diff --git a/opencga-storage/opencga-storage-core/src/test/resources/cancer-snvs.vcf.gz b/opencga-storage/opencga-storage-core/src/test/resources/cancer-snvs.vcf.gz new file mode 100644 index 00000000000..a9f89b6155e Binary files /dev/null and b/opencga-storage/opencga-storage-core/src/test/resources/cancer-snvs.vcf.gz differ diff --git a/opencga-storage/opencga-storage-core/src/test/resources/genome-plot-config.json b/opencga-storage/opencga-storage-core/src/test/resources/genome-plot-config.json new file mode 100644 index 00000000000..dbec75c380f --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/test/resources/genome-plot-config.json @@ -0,0 +1,32 @@ +{"density": "MEDIUM", + "query": {"sample": "HCC1954"}, + "title": "HCC1954", + "tracks": [{"id": "snv", + "query": { + "region": "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y", + "sample": "HCC1954", + "study": "test@project:cancer", + "type": "SNV"}, + "type": "SNV"}, + {"id": "indel", + "query": { + "region": "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y", + "sample": "HCC1954", + "study": "test@project:cancer", + "type": "INDEL,INSERTION,DELETION"}, + "type": "INDEL"}, + {"id": "cnv1", + "query": { + "region": "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y", + "sample": "HCC1954", + "study": "test@project:cancer", + "type": "COPY_NUMBER"}, + "type": "COPY-NUMBER"}, + {"id": "rearr1", + "query": { + "region": "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y", + "sample": "HCC1954", + "study": "test@project:cancer", + "type": "SV"}, + "type": "REARRANGEMENT"}] +} \ No newline at end of file