From b0219abee1578e894331f7e18db9275bf896f65a Mon Sep 17 00:00:00 2001 From: Calvin Lu <59149377+calvinlu3@users.noreply.github.com> Date: Tue, 3 Sep 2024 10:52:05 -0400 Subject: [PATCH] Add protein start+end and add consequence --- .../domain/enumeration/CNAConsequence.java | 10 ++++---- .../domain/enumeration/SVConsequence.java | 14 +++++------ .../oncokb/curation/service/MainService.java | 15 ++++++++++- .../oncokb/curation/util/AlterationUtils.java | 25 ++++++++++++++++--- 4 files changed, 48 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/mskcc/oncokb/curation/domain/enumeration/CNAConsequence.java b/src/main/java/org/mskcc/oncokb/curation/domain/enumeration/CNAConsequence.java index fc3a29e2d..781f4ee72 100644 --- a/src/main/java/org/mskcc/oncokb/curation/domain/enumeration/CNAConsequence.java +++ b/src/main/java/org/mskcc/oncokb/curation/domain/enumeration/CNAConsequence.java @@ -1,9 +1,9 @@ package org.mskcc.oncokb.curation.domain.enumeration; public enum CNAConsequence { - AMPLIFICATION, - DELETION, - GAIN, - LOSS, - UNKNOWN, + CNA_AMPLIFICATION, + CNA_DELETION, + CNA_GAIN, + CNA_LOSS, + CNA_UNKNOWN, } diff --git a/src/main/java/org/mskcc/oncokb/curation/domain/enumeration/SVConsequence.java b/src/main/java/org/mskcc/oncokb/curation/domain/enumeration/SVConsequence.java index aea1fd65b..e8db80387 100644 --- a/src/main/java/org/mskcc/oncokb/curation/domain/enumeration/SVConsequence.java +++ b/src/main/java/org/mskcc/oncokb/curation/domain/enumeration/SVConsequence.java @@ -1,11 +1,11 @@ package org.mskcc.oncokb.curation.domain.enumeration; public enum SVConsequence { - DELETION, - TRANSLOCATION, - DUPLICATION, - INSERTION, - INVERSION, - FUSION, - UNKNOWN, + SV_DELETION, + SV_TRANSLOCATION, + SV_DUPLICATION, + SV_INSERTION, + SV_INVERSION, + SV_FUSION, + SV_UNKNOWN, } diff --git a/src/main/java/org/mskcc/oncokb/curation/service/MainService.java b/src/main/java/org/mskcc/oncokb/curation/service/MainService.java index 48293e64d..c8cdb1aaf 100644 --- a/src/main/java/org/mskcc/oncokb/curation/service/MainService.java +++ b/src/main/java/org/mskcc/oncokb/curation/service/MainService.java @@ -293,13 +293,26 @@ public AlterationAnnotationStatus annotateAlteration(ReferenceGenome referenceGe List problematicExonAlts = new ArrayList<>(); for (String exonAlterationString : Arrays.asList(alteration.getAlteration().split("\\s*\\+\\s*"))) { Integer exonNumber = Integer.parseInt(exonAlterationString.replaceAll("\\D*", "")); - if (exonNumber > 0 && exonNumber < proteinExons.size()) { + if (exonNumber > 0 && exonNumber < proteinExons.size() + 1) { overlap.add(proteinExons.get(exonNumber - 1)); } else { problematicExonAlts.add(exonAlterationString); } } if (problematicExonAlts.isEmpty()) { + overlap.sort(Comparator.comparingInt(ProteinExonDTO::getExon)); + Boolean isConsecutiveExonRange = + overlap + .stream() + .map(ProteinExonDTO::getExon) + .reduce((prev, curr) -> (curr - prev == 1) ? curr : Integer.MIN_VALUE) + .orElse(Integer.MIN_VALUE) != + Integer.MIN_VALUE; + if (isConsecutiveExonRange && overlap.size() > 0) { + alteration.setStart(overlap.get(0).getRange().getStart()); + alteration.setEnd(overlap.get(overlap.size() - 1).getRange().getEnd()); + } + annotationDTO.setExons(overlap); } else { StringBuilder sb = new StringBuilder(); diff --git a/src/main/java/org/mskcc/oncokb/curation/util/AlterationUtils.java b/src/main/java/org/mskcc/oncokb/curation/util/AlterationUtils.java index 8fc5a7436..965b74cf9 100644 --- a/src/main/java/org/mskcc/oncokb/curation/util/AlterationUtils.java +++ b/src/main/java/org/mskcc/oncokb/curation/util/AlterationUtils.java @@ -28,7 +28,7 @@ private Alteration parseFusion(String alteration) { Alteration alt = new Alteration(); Consequence consequence = new Consequence(); - consequence.setTerm(SVConsequence.FUSION.name()); + consequence.setTerm(SVConsequence.SV_FUSION.name()); alt.setType(AlterationType.STRUCTURAL_VARIANT); alt.setConsequence(consequence); @@ -52,7 +52,7 @@ private Alteration parseFusion(String alteration) { } private Alteration parseCopyNumberAlteration(String alteration) { - CNAConsequence cnaTerm = CNAConsequence.UNKNOWN; + CNAConsequence cnaTerm = CNAConsequence.CNA_UNKNOWN; Optional cnaConsequenceOptional = getCNAConsequence(alteration); if (cnaConsequenceOptional.isPresent()) { @@ -96,19 +96,38 @@ private Alteration parseGenomicChange(String genomicChange) { private Alteration parseExonAlteration(String alteration) { Alteration alt = new Alteration(); Consequence consequence = new Consequence(); - consequence.setTerm(UNKNOWN.name()); + consequence.setTerm(SVConsequence.SV_UNKNOWN.name()); alt.setType(AlterationType.STRUCTURAL_VARIANT); alt.setConsequence(consequence); Pattern pattern = Pattern.compile(EXON_ALT_REGEX); Matcher matcher = pattern.matcher(alteration); List splitResults = new ArrayList<>(); + Set consequenceTermSet = new HashSet<>(); while (matcher.find()) { String startExonStr = matcher.group(1); // The start exon number String endExonStr = matcher.group(3); // The end exon number (if present) String consequenceTerm = matcher.group(4); // consequence term + switch (consequenceTerm.toLowerCase()) { + case "insertion": + consequence.setTerm(SVConsequence.SV_INSERTION.name()); + break; + case "duplication": + consequence.setTerm(SVConsequence.SV_DUPLICATION.name()); + break; + case "deletion": + consequence.setTerm(SVConsequence.SV_DELETION.name()); + default: + break; + } + + consequenceTermSet.add(consequenceTerm); + if (consequenceTermSet.size() > 0) { + consequence.setTerm(SVConsequence.SV_UNKNOWN.name()); + } + int startExon = Integer.parseInt(startExonStr); int endExon = (endExonStr != null) ? Integer.parseInt(endExonStr) : startExon;