From 520695eaded65a0491611797c1d6da289c3e3f1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 3 Apr 2023 10:04:52 +0200 Subject: [PATCH 1/4] analysis: fix SV catalog counting in signature analysis, #TASK-4144 --- ...ationalSignatureLocalAnalysisExecutor.java | 63 +++++++++++++++++-- .../analysis/variant/VariantAnalysisTest.java | 19 +++++- 2 files changed, 74 insertions(+), 8 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java index 55173c3bd3a..7666ee3cb34 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java @@ -296,6 +296,7 @@ public void computeSignatureCatalogueSV() throws ToolExecutorException { } // Overwrite study and types related to SV query.put(VariantQueryParam.STUDY.key(), getStudy()); +// query.put(VariantQueryParam.TYPE.key(), VariantType.BREAKEND + "," + VariantType.DELETION); query.put(VariantQueryParam.TYPE.key(), VariantType.DELETION + "," + VariantType.BREAKEND + "," + VariantType.DUPLICATION + "," + VariantType.TANDEM_DUPLICATION + "," + VariantType.INVERSION + "," + VariantType.TRANSLOCATION); @@ -384,19 +385,45 @@ private File computeClusteredFile(Query query, QueryOptions queryOptions) throws File inputFile = getOutDir().resolve("in.clustered.bedpe").toFile(); File outputFile = getOutDir().resolve("out.clustered.bedpe").toFile(); try { + String mateChrom; + int matePosition; + String lengthKey; + + Map> breakendMap = new HashMap<>(); PrintWriter pw = new PrintWriter(inputFile); pw.println("chrom1\tstart1\tend1\tchrom2\tstart2\tend2\tlength\ttype\tsample"); while (iterator.hasNext()) { Variant variant = iterator.next(); - if (variant.getSv() == null || variant.getSv().getBreakend() == null || variant.getSv().getBreakend().getMate() == null) { - continue; + if (breakendMap.containsKey(variant.getChromosome())) { + for (Integer position : breakendMap.get(variant.getChromosome())) { + if (Math.abs(variant.getStart() - position) <= 20) { + // Skipping since it is a mate + continue; + } + } + } + BreakendMate mate = null; + if (variant.getSv() != null && variant.getSv().getBreakend() != null && variant.getSv().getBreakend().getMate() != null) { + mate = variant.getSv().getBreakend().getMate(); + if (!breakendMap.containsKey(mate.getChromosome())) { + breakendMap.put(mate.getChromosome(), new ArrayList<>()); + } + breakendMap.get(mate.getChromosome()).add(mate.getPosition()); } String typeKey = getTypeKey(variant); - String lengthKey = getLengthKey(variant); + if (mate == null) { + mateChrom = "0"; + matePosition = 0; + lengthKey = LENGTH_NA; + } else { + mateChrom = mate.getChromosome(); + matePosition = mate.getPosition() == null ? 0 : mate.getPosition(); + lengthKey = getLengthKey(variant, typeKey); + } + if (typeKey != null && lengthKey != null) { - BreakendMate mate = variant.getSv().getBreakend().getMate(); pw.println(variant.getChromosome() + "\t" + variant.getStart() + "\t" + variant.getEnd() + "\t" - + mate.getChromosome() + "\t" + mate.getPosition() + "\t" + mate.getPosition() + "\t" + + mateChrom + "\t" + matePosition + "\t" + matePosition + "\t" + lengthKey + "\t" + typeKey + "\t" + getSample()); } } @@ -458,6 +485,7 @@ private String getTypeKey(Variant variant) { case "TDS": case "DUPLICATION": case "TANDEM_DUPLICATION": + case "TANDEM-DUPLICATION": return TYPE_TDS; case "INV": case "INVERSION": @@ -495,6 +523,31 @@ private String getLengthKey(Variant variant) { return null; } + private String getLengthKey(Variant variant, String type) { + if (type == null) { + return null; + } + if (type.equals(TYPE_TRANS)) { + return LENGTH_NA; + } else { + BreakendMate mate = variant.getSv().getBreakend().getMate(); + if (variant.getChromosome().equals(mate.getChromosome())) { + int length = Math.abs(mate.getPosition() - variant.getStart()); + if (length <= 10000) { + return LENGTH_1_10Kb; + } else if (length <= 100000) { + return LENGTH_10Kb_100Kb; + } else if (length <= 1000000) { + return LENGTH_100Kb_1Mb; + } else if (length <= 10000000) { + return LENGTH_1Mb_10Mb; + } + return LENGTH_10Mb; + } + } + return null; + } + private void computeSignatureFitting() throws IOException, ToolException, CatalogException { File cataloguesFile = getOutDir().resolve(CATALOGUES_FILENAME_DEFAULT).toFile(); if (!cataloguesFile.exists()) { diff --git a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java index c0ce79b8374..31ac3ab30a2 100644 --- a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java +++ b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java @@ -858,9 +858,22 @@ public void testMutationalSignatureCatalogueSV() throws Exception { params.setSample(cancer_sample); params.setId("catalogue-1"); params.setDescription("Catalogue #1"); - VariantQuery query = new VariantQuery(); - query.sample(cancer_sample); - query.type(VariantType.SV.name()); + VariantQuery query = new VariantQuery() + .sample(cancer_sample) + .type(VariantType.SV.name()) + .file("AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz"); + //.fileData("AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz:BAS>=0;BKDIST>=-1") + //.region("1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y") + + //https://ws.opencb.org/opencga-test/webservices/rest/v2/analysis/variant/mutationalSignature/query + // ?study=serena@cancer38:test38 + // &fitting=false + // &sample=AR2.10039966-01T + // &fileData=AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz:BAS>=0;BKDIST>=-1;EXT_PS_SOM>=4;EXT_RC_SOM>=0 + // ®ion=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y + // &type=SV + + params.setQuery(query.toJson()); params.setSkip("fitting"); From d36b6cded5e060fe2a17c393aab353aaeb3a3662 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 4 Apr 2023 16:43:18 +0200 Subject: [PATCH 2/4] analysis: fix SV catalogue counting and clean code, #TASK-4144 --- ...ationalSignatureLocalAnalysisExecutor.java | 160 ++++++++---------- 1 file changed, 67 insertions(+), 93 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java index 7666ee3cb34..41778987bb3 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java @@ -64,8 +64,10 @@ public class MutationalSignatureLocalAnalysisExecutor extends MutationalSignatureAnalysisExecutor implements StorageToolExecutor { - public final static String R_DOCKER_IMAGE = "opencb/opencga-ext-tools:" - + GitRepositoryState.get().getBuildVersion(); + public static final String R_DOCKER_IMAGE = "opencb/opencga-ext-tools:" + GitRepositoryState.get().getBuildVersion(); + + private static final String SVCLASS = "SVCLASS"; + private static final String EXT_SVTYPE = "EXT_SVTYPE"; private Path opencgaHome; @@ -80,7 +82,6 @@ public void run() throws ToolException, CatalogException, IOException, StorageEn && getSkip().contains(MutationalSignatureAnalysisParams.SIGNATURE_CATALOGUE_SKIP_VALUE) && getSkip().contains(MutationalSignatureAnalysisParams.SIGNATURE_FITTING_SKIP_VALUE)) { // Only compute genome context file - // TODO: overwrite support ! File indexFile = checkGenomeContextFile(); logger.info("Checking genome context file {} for sample {}", indexFile.getAbsolutePath(), getSample()); } @@ -97,7 +98,6 @@ && getSkip().contains(MutationalSignatureAnalysisParams.SIGNATURE_FITTING_SKIP_V // SNV logger.info("Computing catalogue (mutational signature) for SNV variants"); - // TODO: overwrite support ! File indexFile = checkGenomeContextFile(); logger.info("Mutational signature analysis is using the genome context file {} for sample {}", indexFile.getAbsolutePath(), getSample()); @@ -191,15 +191,15 @@ refGenomePath, new FastaSequenceIndex(new File(base + ".fai")), try { // Accessing to the context sequence and write it into the context index file - ReferenceSequence refSeq = indexed.getSubsequenceAt(variant.getChromosome(), variant.getStart() - 1, - variant.getEnd() + 1); + ReferenceSequence refSeq = indexed.getSubsequenceAt(variant.getChromosome(), (long) variant.getStart() - 1, + (long) variant.getEnd() + 1); String sequence = new String(refSeq.getBases()); // Write context index pw.println(variant.toString() + "\t" + sequence); } catch (Exception e) { - logger.warn("When creating genome context file for mutational signature analysis, ignoring variant " - + variant.toStringSimple() + ". " + e.getMessage()); + logger.warn("When creating genome context file for mutational signature analysis, ignoring variant {}: {}", + variant.toStringSimple(), e.getMessage()); } } } @@ -211,7 +211,8 @@ refGenomePath, new FastaSequenceIndex(new File(base + ".fai")), private void updateCountMap(Variant variant, String sequence, Map> countMap) { try { - String k, seq; + String k; + String seq; String key = variant.getReference() + ">" + variant.getAlternate(); @@ -226,16 +227,15 @@ private void updateCountMap(Variant variant, String sequence, Map indexMap = new HashMap<>(); - BufferedReader br = new BufferedReader(new FileReader(indexFile)); String line; while ((line = br.readLine()) != null) { String[] parts = line.split("\t"); @@ -288,26 +288,30 @@ public void computeSignatureCatalogueSNV(File indexFile) throws ToolExecutorExce } public void computeSignatureCatalogueSV() throws ToolExecutorException { + Query query; + File clusteredFile; try { // Get variant iterator - Query query = new Query(); + query = new Query(); if (getQuery() != null) { query.putAll(getQuery()); } // Overwrite study and types related to SV query.put(VariantQueryParam.STUDY.key(), getStudy()); -// query.put(VariantQueryParam.TYPE.key(), VariantType.BREAKEND + "," + VariantType.DELETION); query.put(VariantQueryParam.TYPE.key(), VariantType.DELETION + "," + VariantType.BREAKEND + "," + VariantType.DUPLICATION + "," + VariantType.TANDEM_DUPLICATION + "," + VariantType.INVERSION + "," + VariantType.TRANSLOCATION); QueryOptions queryOptions = new QueryOptions(QueryOptions.INCLUDE, "id,sv,studies"); - logger.info("Query: {}", query.toJson()); - logger.info("Query options: {}", queryOptions.toJson()); + logger.info("Query: {}", query != null ? query.toJson() : null); + logger.info("Query options: {}", queryOptions != null ? queryOptions.toJson() : null); - File clusteredFile = computeClusteredFile(query, queryOptions); + clusteredFile = computeClusteredFile(query, queryOptions); + } catch (CatalogException | StorageEngineException | ToolException e) { + throw new ToolExecutorException(e); + } - BufferedReader br = FileUtils.newBufferedReader(clusteredFile.toPath()); + try (BufferedReader br = FileUtils.newBufferedReader(clusteredFile.toPath())) { // Skip header line // chrom1 start1 end1 chrom2 start2 end2 length type sample id is.clustered // 0 1 2 3 4 5 6 7 8 9 10 @@ -335,11 +339,6 @@ public void computeSignatureCatalogueSV() throws ToolExecutorException { } } -// logger.info("Count map size = {}", countMap.size()); -// for (Map.Entry entry : countMap.entrySet()) { -// logger.info("context = {}, count = {}", entry.getKey(), entry.getValue()); -// } - // Build teh genome context counts object for SV List genomeContextCounts = new LinkedList<>(); for (String clustered: new LinkedList<>(Arrays.asList(CLUSTERED, NON_CLUSTERED))) { @@ -372,7 +371,7 @@ public void computeSignatureCatalogueSV() throws ToolExecutorException { JacksonUtils.getDefaultObjectMapper().writerFor(Signature.class).writeValue(getOutDir() .resolve(MutationalSignatureAnalysis.MUTATIONAL_SIGNATURE_DATA_MODEL_FILENAME).toFile(), signature); - } catch (IOException | CatalogException | StorageEngineException | ToolException e) { + } catch (IOException e) { throw new ToolExecutorException(e); } } @@ -384,50 +383,54 @@ private File computeClusteredFile(Query query, QueryOptions queryOptions) throws // $ Rscript sv_clustering.R ./test.bedpe ./out.bedpe File inputFile = getOutDir().resolve("in.clustered.bedpe").toFile(); File outputFile = getOutDir().resolve("out.clustered.bedpe").toFile(); - try { + try (PrintWriter pw = new PrintWriter(inputFile);) { String mateChrom; int matePosition; String lengthKey; + boolean processVariant; Map> breakendMap = new HashMap<>(); - PrintWriter pw = new PrintWriter(inputFile); pw.println("chrom1\tstart1\tend1\tchrom2\tstart2\tend2\tlength\ttype\tsample"); while (iterator.hasNext()) { + processVariant = true; Variant variant = iterator.next(); if (breakendMap.containsKey(variant.getChromosome())) { for (Integer position : breakendMap.get(variant.getChromosome())) { if (Math.abs(variant.getStart() - position) <= 20) { // Skipping since it is a mate - continue; + processVariant = false; + break; } } } - BreakendMate mate = null; - if (variant.getSv() != null && variant.getSv().getBreakend() != null && variant.getSv().getBreakend().getMate() != null) { - mate = variant.getSv().getBreakend().getMate(); - if (!breakendMap.containsKey(mate.getChromosome())) { - breakendMap.put(mate.getChromosome(), new ArrayList<>()); + if (processVariant) { + BreakendMate mate = null; + if (variant.getSv() != null && variant.getSv().getBreakend() != null + && variant.getSv().getBreakend().getMate() != null) { + mate = variant.getSv().getBreakend().getMate(); + if (!breakendMap.containsKey(mate.getChromosome())) { + breakendMap.put(mate.getChromosome(), new ArrayList<>()); + } + breakendMap.get(mate.getChromosome()).add(mate.getPosition()); + } + String typeKey = getTypeKey(variant); + if (mate == null) { + mateChrom = "0"; + matePosition = 0; + lengthKey = LENGTH_NA; + } else { + mateChrom = mate.getChromosome(); + matePosition = mate.getPosition() == null ? 0 : mate.getPosition(); + lengthKey = getLengthKey(variant, typeKey); } - breakendMap.get(mate.getChromosome()).add(mate.getPosition()); - } - String typeKey = getTypeKey(variant); - if (mate == null) { - mateChrom = "0"; - matePosition = 0; - lengthKey = LENGTH_NA; - } else { - mateChrom = mate.getChromosome(); - matePosition = mate.getPosition() == null ? 0 : mate.getPosition(); - lengthKey = getLengthKey(variant, typeKey); - } - if (typeKey != null && lengthKey != null) { - pw.println(variant.getChromosome() + "\t" + variant.getStart() + "\t" + variant.getEnd() + "\t" - + mateChrom + "\t" + matePosition + "\t" + matePosition + "\t" - + lengthKey + "\t" + typeKey + "\t" + getSample()); + if (typeKey != null && lengthKey != null) { + pw.println(variant.getChromosome() + "\t" + variant.getStart() + "\t" + variant.getEnd() + "\t" + + mateChrom + "\t" + matePosition + "\t" + matePosition + "\t" + + lengthKey + "\t" + typeKey + "\t" + getSample()); + } } } - pw.close(); // Build command line to run R script via docker image // Input binding @@ -457,22 +460,17 @@ private File computeClusteredFile(Query query, QueryOptions queryOptions) throws return outputFile; } - private String getClusteredKey(Variant variant) { - return NON_CLUSTERED; - } - private String getTypeKey(Variant variant) { String variantType = variant.getType() != null ? variant.getType().name() : ""; if (CollectionUtils.isNotEmpty(variant.getStudies()) && CollectionUtils.isNotEmpty(variant.getStudies().get(0).getFiles())) { for (FileEntry file : variant.getStudies().get(0).getFiles()) { - if (file.getData() != null) { - if (file.getData().containsKey("EXT_SVTYPE")) { - variantType = file.getData().get("EXT_SVTYPE").toUpperCase(Locale.ROOT); - break; - } else if (file.getData().containsKey("SVCLASS")) { - variantType = file.getData().get("SVCLASS").toUpperCase(Locale.ROOT); - break; + if (file.getData() != null && (file.getData().containsKey(EXT_SVTYPE) || file.getData().containsKey(SVCLASS))) { + if (file.getData().containsKey(EXT_SVTYPE)) { + variantType = file.getData().get(EXT_SVTYPE).toUpperCase(Locale.ROOT); + } else if (file.getData().containsKey(SVCLASS)) { + variantType = file.getData().get(SVCLASS).toUpperCase(Locale.ROOT); } + break; } } } @@ -494,33 +492,9 @@ private String getTypeKey(Variant variant) { case "TRANS": case "TRANSLOCATION": return TYPE_TRANS; + default: + return null; } - return null; - } - - private String getLengthKey(Variant variant) { - if (variant.getSv() == null || variant.getSv().getBreakend() == null || variant.getSv().getBreakend().getMate() == null) { - return null; - } - BreakendMate mate = variant.getSv().getBreakend().getMate(); - if (variant.getChromosome().equals(mate.getChromosome())) { - int length = Math.abs(mate.getPosition() - variant.getStart()); - if (length <= 10000) { - return LENGTH_1_10Kb; - } else if (length <= 100000) { - return LENGTH_10Kb_100Kb; - } else if (length <= 1000000) { - return LENGTH_100Kb_1Mb; - } else if (length <= 10000000) { - return LENGTH_1Mb_10Mb; - } - return LENGTH_10Mb; - } else { - if (variant.getType() == VariantType.TRANSLOCATION) { - return LENGTH_NA; - } - } - return null; } private String getLengthKey(Variant variant, String type) { @@ -561,12 +535,12 @@ private void computeSignatureFitting() throws IOException, ToolException, Catalo throw new ToolException("Unable to compute mutational signature analysis. Sample '" + getSample() + "' not found"); } Sample sample = sampleResult.first(); - logger.info("Searching catalogue counts from quality control for sample " + getSample()); + logger.info("Searching catalogue counts from quality control for sample {}", getSample()); if (sample.getQualityControl() != null && sample.getQualityControl().getVariant() != null && CollectionUtils.isNotEmpty(sample.getQualityControl().getVariant().getSignatures())) { - logger.info("Searching in " + sample.getQualityControl().getVariant().getSignatures().size() + " signatures"); + logger.info("Searching in {} signatures", sample.getQualityControl().getVariant().getSignatures().size()); for (Signature signature : sample.getQualityControl().getVariant().getSignatures()) { - logger.info("Matching ? " + getQueryId() + " vs " + signature.getId()); + logger.info("Matching ? {} vs {}", getQueryId(), signature.getId()); if (getQueryId().equals(signature.getId())) { // Write catalogue file try (PrintWriter pw = new PrintWriter(cataloguesFile)) { @@ -574,7 +548,6 @@ private void computeSignatureFitting() throws IOException, ToolException, Catalo for (Signature.GenomeContextCount count : signature.getCounts()) { pw.println(count.getContext() + "\t" + count.getTotal()); } - pw.close(); } catch (Exception e) { throw new ToolException("Error writing catalogue output file: " + cataloguesFile.getName(), e); } @@ -640,7 +613,8 @@ private void computeSignatureFitting() throws IOException, ToolException, Catalo scriptParams.append(" --genomev=hg19"); break; } - case "GRCh38": { + case "GRCh38": + default: { scriptParams.append(" --genomev=hg38"); break; } @@ -648,7 +622,7 @@ private void computeSignatureFitting() throws IOException, ToolException, Catalo String cmdline = DockerUtils.run(R_DOCKER_IMAGE, inputBindings, outputBinding, scriptParams.toString(), null); - logger.info("Docker command line: " + cmdline); + logger.info("Docker command line: {}", cmdline); // Check fitting file before parsing and creating the mutational signature fitting data model File signatureCoeffsFile = getOutDir().resolve(SIGNATURE_COEFFS_FILENAME).toFile(); From 9f3ca93eb7d40a44566f28636b718ddc4a8a1388 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 4 Apr 2023 17:19:58 +0200 Subject: [PATCH 3/4] analysis: fix try-catch issues, #TASK-4144 --- ...ationalSignatureLocalAnalysisExecutor.java | 41 +++++++++++-------- .../analysis/variant/VariantAnalysisTest.java | 6 +-- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java index 41778987bb3..32b0bd92b75 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java @@ -203,7 +203,6 @@ refGenomePath, new FastaSequenceIndex(new File(base + ".fai")), } } } - } catch (IOException | CatalogException | ToolException | StorageEngineException e) { throw new ToolExecutorException(e); } @@ -311,11 +310,11 @@ public void computeSignatureCatalogueSV() throws ToolExecutorException { throw new ToolExecutorException(e); } + Map countMap = new HashMap<>(); try (BufferedReader br = FileUtils.newBufferedReader(clusteredFile.toPath())) { // Skip header line // chrom1 start1 end1 chrom2 start2 end2 length type sample id is.clustered // 0 1 2 3 4 5 6 7 8 9 10 - Map countMap = new HashMap<>(); // Skip first line String line = br.readLine(); while ((line = br.readLine()) != null) { @@ -338,30 +337,36 @@ public void computeSignatureCatalogueSV() throws ToolExecutorException { countMap.put(key, 1); } } + } catch (IOException e) { + throw new ToolExecutorException(e); + } - // Build teh genome context counts object for SV - List genomeContextCounts = new LinkedList<>(); - for (String clustered: new LinkedList<>(Arrays.asList(CLUSTERED, NON_CLUSTERED))) { - for (String type: new LinkedList<>(Arrays.asList(TYPE_DEL, TYPE_TDS, TYPE_INV))) { - for (String length : new LinkedList<>(Arrays.asList(LENGTH_1_10Kb, LENGTH_10Kb_100Kb, LENGTH_100Kb_1Mb, LENGTH_1Mb_10Mb, - LENGTH_10Mb))) { - String key = clustered + "_" + type + "_" + length; - genomeContextCounts.add(new Signature.GenomeContextCount(key, countMap.containsKey(key) ? countMap.get(key) : 0)); - } + // Build teh genome context counts object for SV + List genomeContextCounts = new LinkedList<>(); + for (String clustered: new LinkedList<>(Arrays.asList(CLUSTERED, NON_CLUSTERED))) { + for (String type: new LinkedList<>(Arrays.asList(TYPE_DEL, TYPE_TDS, TYPE_INV))) { + for (String length : new LinkedList<>(Arrays.asList(LENGTH_1_10Kb, LENGTH_10Kb_100Kb, LENGTH_100Kb_1Mb, LENGTH_1Mb_10Mb, + LENGTH_10Mb))) { + String key = clustered + "_" + type + "_" + length; + genomeContextCounts.add(new Signature.GenomeContextCount(key, countMap.containsKey(key) ? countMap.get(key) : 0)); } - String key = clustered + "_" + TYPE_TRANS; - genomeContextCounts.add(new Signature.GenomeContextCount(key, countMap.containsKey(key) ? countMap.get(key) : 0)); } + String key = clustered + "_" + TYPE_TRANS; + genomeContextCounts.add(new Signature.GenomeContextCount(key, countMap.containsKey(key) ? countMap.get(key) : 0)); + } - // Write catalogue file from the genome context counts - PrintWriter pw = new PrintWriter(getOutDir().resolve(CATALOGUES_FILENAME_DEFAULT).toFile()); + // Write catalogue file from the genome context counts + try (PrintWriter pw = new PrintWriter(getOutDir().resolve(CATALOGUES_FILENAME_DEFAULT).toFile())) { pw.write(query.getString(VariantQueryParam.SAMPLE.key())); pw.write("\n"); for (Signature.GenomeContextCount counts : genomeContextCounts) { pw.write(counts.getContext() + "\t" + counts.getTotal() + "\n"); } - pw.close(); + } catch (IOException e) { + throw new ToolExecutorException(e); + } + try { Signature signature = new Signature() .setId(getQueryId()) .setDescription(getQueryDescription()) @@ -431,7 +436,11 @@ private File computeClusteredFile(Query query, QueryOptions queryOptions) throws } } } + } catch (Exception e) { + throw new ToolException(e); + } + try { // Build command line to run R script via docker image // Input binding List> inputBindings = new ArrayList<>(); diff --git a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java index 31ac3ab30a2..0dcee9ebfa6 100644 --- a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java +++ b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java @@ -861,9 +861,9 @@ public void testMutationalSignatureCatalogueSV() throws Exception { VariantQuery query = new VariantQuery() .sample(cancer_sample) .type(VariantType.SV.name()) - .file("AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz"); - //.fileData("AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz:BAS>=0;BKDIST>=-1") - //.region("1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y") + //.file("AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz"); + .fileData("AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz:BAS>=0;BKDIST>=-1") + .region("1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y"); //https://ws.opencb.org/opencga-test/webservices/rest/v2/analysis/variant/mutationalSignature/query // ?study=serena@cancer38:test38 From db6922c8eeefc7e56ed60a8276694f24fd21e83d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 4 Jul 2023 09:25:39 +0200 Subject: [PATCH 4/4] analysis: improve log messages, #TASK-4144 --- .../MutationalSignatureLocalAnalysisExecutor.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java index 32b0bd92b75..f2bf45cf487 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java @@ -199,7 +199,7 @@ refGenomePath, new FastaSequenceIndex(new File(base + ".fai")), pw.println(variant.toString() + "\t" + sequence); } catch (Exception e) { logger.warn("When creating genome context file for mutational signature analysis, ignoring variant {}: {}", - variant.toStringSimple(), e.getMessage()); + variant.toStringSimple(), Arrays.toString(e.getStackTrace())); } } } @@ -227,7 +227,7 @@ private void updateCountMap(Variant variant, String sequence, Map