Skip to content

Commit

Permalink
Add protein start+end and add consequence
Browse files Browse the repository at this point in the history
  • Loading branch information
calvinlu3 committed Sep 3, 2024
1 parent f43db10 commit b0219ab
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 16 deletions.
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package org.mskcc.oncokb.curation.domain.enumeration;

public enum CNAConsequence {
AMPLIFICATION,
DELETION,
GAIN,
LOSS,
UNKNOWN,
CNA_AMPLIFICATION,
CNA_DELETION,
CNA_GAIN,
CNA_LOSS,
CNA_UNKNOWN,
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
package org.mskcc.oncokb.curation.domain.enumeration;

public enum SVConsequence {
DELETION,
TRANSLOCATION,
DUPLICATION,
INSERTION,
INVERSION,
FUSION,
UNKNOWN,
SV_DELETION,
SV_TRANSLOCATION,
SV_DUPLICATION,
SV_INSERTION,
SV_INVERSION,
SV_FUSION,
SV_UNKNOWN,
}
15 changes: 14 additions & 1 deletion src/main/java/org/mskcc/oncokb/curation/service/MainService.java
Original file line number Diff line number Diff line change
Expand Up @@ -293,13 +293,26 @@ public AlterationAnnotationStatus annotateAlteration(ReferenceGenome referenceGe
List<String> problematicExonAlts = new ArrayList<>();
for (String exonAlterationString : Arrays.asList(alteration.getAlteration().split("\\s*\\+\\s*"))) {
Integer exonNumber = Integer.parseInt(exonAlterationString.replaceAll("\\D*", ""));
if (exonNumber > 0 && exonNumber < proteinExons.size()) {
if (exonNumber > 0 && exonNumber < proteinExons.size() + 1) {
overlap.add(proteinExons.get(exonNumber - 1));
} else {
problematicExonAlts.add(exonAlterationString);
}
}
if (problematicExonAlts.isEmpty()) {
overlap.sort(Comparator.comparingInt(ProteinExonDTO::getExon));
Boolean isConsecutiveExonRange =
overlap
.stream()
.map(ProteinExonDTO::getExon)
.reduce((prev, curr) -> (curr - prev == 1) ? curr : Integer.MIN_VALUE)
.orElse(Integer.MIN_VALUE) !=
Integer.MIN_VALUE;
if (isConsecutiveExonRange && overlap.size() > 0) {
alteration.setStart(overlap.get(0).getRange().getStart());
alteration.setEnd(overlap.get(overlap.size() - 1).getRange().getEnd());
}

annotationDTO.setExons(overlap);
} else {
StringBuilder sb = new StringBuilder();
Expand Down
25 changes: 22 additions & 3 deletions src/main/java/org/mskcc/oncokb/curation/util/AlterationUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ private Alteration parseFusion(String alteration) {
Alteration alt = new Alteration();

Consequence consequence = new Consequence();
consequence.setTerm(SVConsequence.FUSION.name());
consequence.setTerm(SVConsequence.SV_FUSION.name());
alt.setType(AlterationType.STRUCTURAL_VARIANT);
alt.setConsequence(consequence);

Expand All @@ -52,7 +52,7 @@ private Alteration parseFusion(String alteration) {
}

private Alteration parseCopyNumberAlteration(String alteration) {
CNAConsequence cnaTerm = CNAConsequence.UNKNOWN;
CNAConsequence cnaTerm = CNAConsequence.CNA_UNKNOWN;

Optional<CNAConsequence> cnaConsequenceOptional = getCNAConsequence(alteration);
if (cnaConsequenceOptional.isPresent()) {
Expand Down Expand Up @@ -96,19 +96,38 @@ private Alteration parseGenomicChange(String genomicChange) {
private Alteration parseExonAlteration(String alteration) {
Alteration alt = new Alteration();
Consequence consequence = new Consequence();
consequence.setTerm(UNKNOWN.name());
consequence.setTerm(SVConsequence.SV_UNKNOWN.name());
alt.setType(AlterationType.STRUCTURAL_VARIANT);
alt.setConsequence(consequence);

Pattern pattern = Pattern.compile(EXON_ALT_REGEX);
Matcher matcher = pattern.matcher(alteration);
List<String> splitResults = new ArrayList<>();
Set<String> consequenceTermSet = new HashSet<>();

while (matcher.find()) {
String startExonStr = matcher.group(1); // The start exon number
String endExonStr = matcher.group(3); // The end exon number (if present)
String consequenceTerm = matcher.group(4); // consequence term

switch (consequenceTerm.toLowerCase()) {
case "insertion":
consequence.setTerm(SVConsequence.SV_INSERTION.name());
break;
case "duplication":
consequence.setTerm(SVConsequence.SV_DUPLICATION.name());
break;
case "deletion":
consequence.setTerm(SVConsequence.SV_DELETION.name());
default:
break;
}

consequenceTermSet.add(consequenceTerm);
if (consequenceTermSet.size() > 0) {
consequence.setTerm(SVConsequence.SV_UNKNOWN.name());
}

int startExon = Integer.parseInt(startExonStr);
int endExon = (endExonStr != null) ? Integer.parseInt(endExonStr) : startExon;

Expand Down

0 comments on commit b0219ab

Please sign in to comment.