Skip to content

Commit

Permalink
Initial commit for exon curation
Browse files Browse the repository at this point in the history
  • Loading branch information
calvinlu3 committed Sep 3, 2024
1 parent 61f38b9 commit f43db10
Show file tree
Hide file tree
Showing 9 changed files with 262 additions and 135 deletions.
100 changes: 25 additions & 75 deletions src/main/java/org/mskcc/oncokb/curation/service/MainService.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import org.mskcc.oncokb.curation.domain.dto.HotspotInfoDTO;
import org.mskcc.oncokb.curation.domain.dto.ProteinExonDTO;
import org.mskcc.oncokb.curation.domain.enumeration.*;
import org.mskcc.oncokb.curation.model.IntegerRange;
import org.mskcc.oncokb.curation.service.dto.TranscriptDTO;
import org.mskcc.oncokb.curation.service.mapper.TranscriptMapper;
import org.mskcc.oncokb.curation.util.AlterationUtils;
Expand Down Expand Up @@ -280,87 +279,38 @@ public AlterationAnnotationStatus annotateAlteration(ReferenceGenome referenceGe
}
annotationDTO.setHotspot(hotspotInfoDTO);

if (
annotatedGenes.size() == 1 &&
PROTEIN_CHANGE.equals(alteration.getType()) &&
alteration.getStart() != null &&
alteration.getEnd() != null
) {
Optional<TranscriptDTO> transcriptOptional = transcriptService.findByGeneAndReferenceGenomeAndCanonicalIsTrue(
annotatedGenes.stream().iterator().next(),
referenceGenome
);
if (transcriptOptional.isPresent()) {
List<GenomeFragment> utrs = transcriptOptional.orElseThrow().getUtrs();
List<GenomeFragment> exons = transcriptOptional.orElseThrow().getExons();
exons.sort((o1, o2) -> {
int diff = o1.getStart() - o2.getStart();
if (diff == 0) {
diff = o1.getEnd() - o2.getEnd();
}
if (diff == 0) {
diff = (int) (o1.getId() - o2.getId());
}
return diff;
});

List<GenomeFragment> codingExons = new ArrayList<>();
exons.forEach(exon -> {
Integer start = exon.getStart();
Integer end = exon.getEnd();
for (GenomeFragment utr : utrs) {
if (utr.getStart().equals(exon.getStart())) {
start = utr.getEnd() + 1;
}
if (utr.getEnd().equals(exon.getEnd())) {
end = utr.getStart() - 1;
}
}
if (start < end) {
GenomeFragment genomeFragment = new GenomeFragment();
genomeFragment.setType(GenomeFragmentType.EXON);
genomeFragment.setStart(start);
genomeFragment.setEnd(end);
codingExons.add(genomeFragment);
} else {
GenomeFragment genomeFragment = new GenomeFragment();
genomeFragment.setType(GenomeFragmentType.EXON);
genomeFragment.setStart(0);
genomeFragment.setEnd(0);
codingExons.add(genomeFragment);
}
});

if (transcriptOptional.orElseThrow().getStrand() == -1) {
Collections.reverse(codingExons);
}

List<ProteinExonDTO> proteinExons = new ArrayList<>();
int startAA = 1;
int previousExonCodonResidues = 0;
for (int i = 0; i < codingExons.size(); i++) {
GenomeFragment genomeFragment = codingExons.get(i);
if (genomeFragment.getStart() == 0) {
continue;
}
int proteinLength = (previousExonCodonResidues + (genomeFragment.getEnd() - genomeFragment.getStart() + 1)) / 3;
previousExonCodonResidues = (previousExonCodonResidues + (genomeFragment.getEnd() - genomeFragment.getStart() + 1)) % 3;
ProteinExonDTO proteinExonDTO = new ProteinExonDTO();
proteinExonDTO.setExon(i + 1);
IntegerRange integerRange = new IntegerRange();
integerRange.setStart(startAA);
integerRange.setEnd(startAA + proteinLength - 1 + (previousExonCodonResidues > 0 ? 1 : 0));
proteinExonDTO.setRange(integerRange);
proteinExons.add(proteinExonDTO);
startAA += proteinLength;
}
if (annotatedGenes.size() == 1) {
List<ProteinExonDTO> proteinExons = transcriptService.getExons(annotatedGenes.stream().iterator().next(), referenceGenome);
if (PROTEIN_CHANGE.equals(alteration.getType()) && alteration.getStart() != null && alteration.getEnd() != null) {
// Filter exons based on alteration range
List<ProteinExonDTO> overlap = proteinExons
.stream()
.filter(exon -> alteration.getStart() <= exon.getRange().getEnd() && alteration.getEnd() >= exon.getRange().getStart())
.collect(Collectors.toList());
annotationDTO.setExons(overlap);
} else if (AlterationUtils.isExon(alteration.getAlteration())) {
List<ProteinExonDTO> overlap = new ArrayList<>();
List<String> problematicExonAlts = new ArrayList<>();
for (String exonAlterationString : Arrays.asList(alteration.getAlteration().split("\\s*\\+\\s*"))) {
Integer exonNumber = Integer.parseInt(exonAlterationString.replaceAll("\\D*", ""));
if (exonNumber > 0 && exonNumber < proteinExons.size()) {
overlap.add(proteinExons.get(exonNumber - 1));
} else {
problematicExonAlts.add(exonAlterationString);
}
}
if (problematicExonAlts.isEmpty()) {
annotationDTO.setExons(overlap);
} else {
StringBuilder sb = new StringBuilder();
sb.append("The following exon(s) do not exist: ");
sb.append(problematicExonAlts.stream().collect(Collectors.joining(", ")));
alterationWithStatus.setMessage(sb.toString());
alterationWithStatus.setType(EntityStatusType.ERROR);
}
}
}

alterationWithStatus.setAnnotation(annotationDTO);
return alterationWithStatus;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import static org.mskcc.oncokb.curation.config.Constants.ENSEMBL_POST_THRESHOLD;

import java.util.*;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.genome_nexus.ApiException;
Expand All @@ -13,9 +11,11 @@
import org.mskcc.oncokb.curation.config.cache.CacheCategory;
import org.mskcc.oncokb.curation.config.cache.CacheNameResolver;
import org.mskcc.oncokb.curation.domain.*;
import org.mskcc.oncokb.curation.domain.dto.ProteinExonDTO;
import org.mskcc.oncokb.curation.domain.enumeration.GenomeFragmentType;
import org.mskcc.oncokb.curation.domain.enumeration.ReferenceGenome;
import org.mskcc.oncokb.curation.domain.enumeration.SequenceType;
import org.mskcc.oncokb.curation.model.IntegerRange;
import org.mskcc.oncokb.curation.repository.TranscriptRepository;
import org.mskcc.oncokb.curation.service.dto.ClustalOResp;
import org.mskcc.oncokb.curation.service.dto.TranscriptDTO;
Expand Down Expand Up @@ -582,6 +582,77 @@ public List<EnrichedAlignmentResult> getAlignmentResult(
}
}

public List<ProteinExonDTO> getExons(Gene gene, ReferenceGenome referenceGenome) {
Optional<TranscriptDTO> transcriptOptional = this.findByGeneAndReferenceGenomeAndCanonicalIsTrue(gene, referenceGenome);
if (transcriptOptional.isPresent()) {
List<GenomeFragment> utrs = transcriptOptional.orElseThrow().getUtrs();
List<GenomeFragment> exons = transcriptOptional.orElseThrow().getExons();
exons.sort((o1, o2) -> {
int diff = o1.getStart() - o2.getStart();
if (diff == 0) {
diff = o1.getEnd() - o2.getEnd();
}
if (diff == 0) {
diff = (int) (o1.getId() - o2.getId());
}
return diff;
});

List<GenomeFragment> codingExons = new ArrayList<>();
exons.forEach(exon -> {
Integer start = exon.getStart();
Integer end = exon.getEnd();
for (GenomeFragment utr : utrs) {
if (utr.getStart().equals(exon.getStart())) {
start = utr.getEnd() + 1;
}
if (utr.getEnd().equals(exon.getEnd())) {
end = utr.getStart() - 1;
}
}
if (start < end) {
GenomeFragment genomeFragment = new GenomeFragment();
genomeFragment.setType(GenomeFragmentType.EXON);
genomeFragment.setStart(start);
genomeFragment.setEnd(end);
codingExons.add(genomeFragment);
} else {
GenomeFragment genomeFragment = new GenomeFragment();
genomeFragment.setType(GenomeFragmentType.EXON);
genomeFragment.setStart(0);
genomeFragment.setEnd(0);
codingExons.add(genomeFragment);
}
});

if (transcriptOptional.orElseThrow().getStrand() == -1) {
Collections.reverse(codingExons);
}

List<ProteinExonDTO> proteinExons = new ArrayList<>();
int startAA = 1;
int previousExonCodonResidues = 0;
for (int i = 0; i < codingExons.size(); i++) {
GenomeFragment genomeFragment = codingExons.get(i);
if (genomeFragment.getStart() == 0) {
continue;
}
int proteinLength = (previousExonCodonResidues + (genomeFragment.getEnd() - genomeFragment.getStart() + 1)) / 3;
previousExonCodonResidues = (previousExonCodonResidues + (genomeFragment.getEnd() - genomeFragment.getStart() + 1)) % 3;
ProteinExonDTO proteinExonDTO = new ProteinExonDTO();
proteinExonDTO.setExon(i + 1);
IntegerRange integerRange = new IntegerRange();
integerRange.setStart(startAA);
integerRange.setEnd(startAA + proteinLength - 1 + (previousExonCodonResidues > 0 ? 1 : 0));
proteinExonDTO.setRange(integerRange);
proteinExons.add(proteinExonDTO);
startAA += proteinLength;
}
return proteinExons;
}
return new ArrayList<>();
}

private Optional<EnsemblTranscript> getEnsemblTranscriptBySequence(
List<EnsemblTranscript> availableEnsemblTranscripts,
EnsemblSequence sequence
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.similarity.JaroWinklerSimilarity;
import org.checkerframework.checker.regex.qual.Regex;
import org.mskcc.oncokb.curation.domain.*;
import org.mskcc.oncokb.curation.domain.enumeration.*;
import org.springframework.stereotype.Component;
Expand All @@ -21,6 +20,10 @@ public class AlterationUtils {
private static final String FUSION_REGEX = "\\s*(\\w*)" + FUSION_SEPARATOR + "(\\w*)\\s*(?i)(fusion)?\\s*";
private static final String FUSION_ALT_REGEX = "\\s*(\\w*)" + FUSION_ALTERNATIVE_SEPARATOR + "(\\w*)\\s+(?i)fusion\\s*";

private static final String EXON_ALT_REGEX = "Exon\\s+(\\d+)(-(\\d+))?\\s+(Deletion|Insertion|Duplication)";

private static final String EXON_ALTS_REGEX = "(" + EXON_ALT_REGEX + ")(\\s*\\+\\s*" + EXON_ALT_REGEX + ")*";

private Alteration parseFusion(String alteration) {
Alteration alt = new Alteration();

Expand Down Expand Up @@ -90,6 +93,36 @@ private Alteration parseGenomicChange(String genomicChange) {
return alt;
}

private Alteration parseExonAlteration(String alteration) {
Alteration alt = new Alteration();
Consequence consequence = new Consequence();
consequence.setTerm(UNKNOWN.name());
alt.setType(AlterationType.STRUCTURAL_VARIANT);
alt.setConsequence(consequence);

Pattern pattern = Pattern.compile(EXON_ALT_REGEX);
Matcher matcher = pattern.matcher(alteration);
List<String> splitResults = new ArrayList<>();

while (matcher.find()) {
String startExonStr = matcher.group(1); // The start exon number
String endExonStr = matcher.group(3); // The end exon number (if present)
String consequenceTerm = matcher.group(4); // consequence term

int startExon = Integer.parseInt(startExonStr);
int endExon = (endExonStr != null) ? Integer.parseInt(endExonStr) : startExon;

for (int exon = startExon; exon <= endExon; exon++) {
splitResults.add("Exon " + exon + " " + consequenceTerm);
}
}

alt.setAlteration(splitResults.stream().collect(Collectors.joining(" + ")));

alt.setName(alteration);
return alt;
}

public EntityStatus<Alteration> parseAlteration(String alteration) {
EntityStatus<Alteration> entityWithStatus = new EntityStatus<>();
String message = "";
Expand Down Expand Up @@ -130,6 +163,14 @@ public EntityStatus<Alteration> parseAlteration(String alteration) {
return entityWithStatus;
}

if (isExon(alteration)) {
Alteration alt = parseExonAlteration(alteration);
entityWithStatus.setEntity(alt);
entityWithStatus.setType(status);
entityWithStatus.setMessage(message);
return entityWithStatus;
}

// the following is to parse the alteration as protein change
MutationConsequence term = UNKNOWN;
String ref = null;
Expand Down Expand Up @@ -474,6 +515,12 @@ public static Boolean isGenomicChange(String alteration) {
return m.matches();
}

public static Boolean isExon(String alteration) {
Pattern p = Pattern.compile(EXON_ALTS_REGEX);
Matcher m = p.matcher(alteration);
return m.matches();
}

public static String removeExclusionCriteria(String proteinChange) {
Matcher exclusionMatch = getExclusionCriteriaMatcher(proteinChange);
if (exclusionMatch.matches()) {
Expand Down
20 changes: 19 additions & 1 deletion src/main/webapp/app/config/constants/regex.spec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { REFERENCE_LINK_REGEX, FDA_SUBMISSION_REGEX } from './regex';
import { REFERENCE_LINK_REGEX, FDA_SUBMISSION_REGEX, EXON_ALTERATION_REGEX } from './regex';

describe('Regex constants test', () => {
describe('Reference link regex', () => {
Expand Down Expand Up @@ -75,4 +75,22 @@ describe('Regex constants test', () => {
expect(FDA_SUBMISSION_REGEX.test(submission)).toEqual(expected);
});
});

describe('Exon alteration regex', () => {
test.each([
['Exon 14 Deletion', true],
['Exon 14 Duplication', true],
['Exon 4 Insertion', true],
['Exon 4-8 Deletion', true],
['Exon 4 InSERTion', true],
['Exon 4 Duplication', true],
['Exon 4 Deletion + Exon 5 Deletion + Exon 6 Deletion', true],
['Exon 4-8 Deletion + Exon 10 Deletion', true],
['Exon 4 Deletion+Exon 5 Deletion', true],
['Exon 14 Del', false],
['Exon 4 8 Insertion', false],
])('should return %b for %s', (alteration, expected) => {
expect(EXON_ALTERATION_REGEX.test(alteration)).toEqual(expected);
});
});
});
2 changes: 2 additions & 0 deletions src/main/webapp/app/config/constants/regex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ export const UUID_REGEX = new RegExp('\\w{8}-\\w{4}-\\w{4}-\\w{4}-\\w{12}');
export const WHOLE_NUMBER_REGEX = new RegExp('^\\d+$');

export const INTEGER_REGEX = /^-?\d+$/;

export const EXON_ALTERATION_REGEX = /(Exon\s+(\d+)(-(\d+))?\s+(Deletion|Insertion|Duplication))(\s*\+\s*(\1))*/i;
26 changes: 26 additions & 0 deletions src/main/webapp/app/hooks/useTextareaAutoHeight.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import React, { useEffect } from 'react';
import { InputType } from 'zlib';

export const useTextareaAutoHeight = (
inputRef: React.MutableRefObject<HTMLInputElement | HTMLTextAreaElement | null>,
type: InputType | undefined,
) => {
useEffect(() => {
const input = inputRef.current;
if (!input || type !== 'textarea') {
return;
}

const resizeObserver = new ResizeObserver(() => {
window.requestAnimationFrame(() => {
input.style.height = 'auto';
input.style.height = `${input.scrollHeight}px`;
});
});
resizeObserver.observe(input);

return () => {
resizeObserver.disconnect();
};
}, []);
};
20 changes: 2 additions & 18 deletions src/main/webapp/app/shared/firebase/input/RealtimeBasicInput.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { FormFeedback, Input, Label, LabelProps } from 'reactstrap';
import { InputType } from 'reactstrap/types/lib/Input';
import * as styles from './styles.module.scss';
import { Unsubscribe } from 'firebase/database';
import { useTextareaAutoHeight } from 'app/hooks/useTextareaAutoHeight';

export enum RealtimeInputType {
TEXT = 'text',
Expand Down Expand Up @@ -116,24 +117,7 @@ const RealtimeBasicInput: React.FunctionComponent<IRealtimeBasicInput> = (props:
};
}, [firebasePath, db]);

useEffect(() => {
const input = inputRef.current;
if (!input || type !== RealtimeInputType.TEXTAREA) {
return;
}

const resizeObserver = new ResizeObserver(() => {
window.requestAnimationFrame(() => {
input.style.height = 'auto';
input.style.height = `${input.scrollHeight}px`;
});
});
resizeObserver.observe(input);

return () => {
resizeObserver.disconnect();
};
}, []);
useTextareaAutoHeight(inputRef, type);

const labelComponent = label && (
<RealtimeBasicLabel label={label} labelIcon={labelIcon} id={id} labelClass={isCheckType ? 'mb-0' : 'fw-bold'} />
Expand Down
Loading

0 comments on commit f43db10

Please sign in to comment.