Skip to content

Commit

Permalink
Parse protein change case insensitive
Browse files Browse the repository at this point in the history
  • Loading branch information
zhx828 committed Sep 18, 2024
1 parent 860dbab commit d54b6c1
Show file tree
Hide file tree
Showing 8 changed files with 800 additions and 265 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ public AlterationAnnotationStatus annotateAlteration(ReferenceGenome referenceGe

// update associated genes
Set<Gene> genes = alteration.getGenes();
if (parsedAlteration.getType().equals(STRUCTURAL_VARIANT) && !parsedAlteration.getGenes().isEmpty()) {
if (STRUCTURAL_VARIANT.equals(parsedAlteration.getType()) && !parsedAlteration.getGenes().isEmpty()) {
genes = parsedAlteration.getGenes();
}
Set<Gene> annotatedGenes = genes
Expand Down Expand Up @@ -230,7 +230,8 @@ public AlterationAnnotationStatus annotateAlteration(ReferenceGenome referenceGe
) {
String refRe = String.valueOf(canonicalSequenceOptional.orElseThrow().getSequence().charAt(alteration.getStart() - 1));
if (!StringUtils.isEmpty(refRe)) {
if (StringUtils.isEmpty(alteration.getRefResidues())) {
// only set the reference AA when the alteration happens on one position
if (StringUtils.isEmpty(alteration.getRefResidues()) && alteration.getStart().equals(alteration.getEnd())) {
alteration.setRefResidues(refRe);
} else {
// If The AA in alteration is differed from the canonical transcript, and it's not X, we give warning
Expand Down
381 changes: 118 additions & 263 deletions src/main/java/org/mskcc/oncokb/curation/util/AlterationUtils.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package org.mskcc.oncokb.curation.util.parser;

import org.mskcc.oncokb.curation.domain.enumeration.EntityStatusType;

public class ParsingStatus<T> {

EntityStatusType status;
String message;
T entity;

public Boolean isParsed() {
return this.status != null;
}

public EntityStatusType getStatus() {
return status;
}

public void setStatus(EntityStatusType status) {
this.status = status;
}

public String getMessage() {
return message;
}

public void setMessage(String message) {
this.message = message;
}

public T getEntity() {
return entity;
}

public void setEntity(T entity) {
this.entity = entity;
}
}

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions src/test/java/org/mskcc/oncokb/curation/TestHelper.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.mskcc.oncokb.curation;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;

public class TestHelper {

public static BufferedReader getTestFileBufferedReader(String filePath) throws FileNotFoundException {
if (filePath == null) {
System.out.println("Please specify the testing file path");
return null;
}

File file = new File(filePath);
FileReader reader = new FileReader(file);
return new BufferedReader(reader);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package org.mskcc.oncokb.curation.test;

import static org.junit.Assert.assertEquals;
import static org.mskcc.oncokb.curation.util.AlterationUtils.parseProteinChange;

import org.junit.jupiter.api.Test;
import org.mskcc.oncokb.curation.domain.Alteration;
import org.mskcc.oncokb.curation.domain.AlterationAnnotationStatus;
import org.mskcc.oncokb.curation.domain.EntityStatus;

public class AlterationUtilsTest {

@Test
public void testRevisedProteinChangeInParseProteinChange() {
EntityStatus<Alteration> status = new AlterationAnnotationStatus();
parseProteinChange(status, "v600e");
assertEquals("V600E", status.getEntity().getProteinChange());

parseProteinChange(status, "*757kext*");
assertEquals("*757Kext*", status.getEntity().getProteinChange());

parseProteinChange(status, "T599delinsip");
assertEquals("T599delinsIP", status.getEntity().getProteinChange());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package org.mskcc.oncokb.curation.test;

import static org.junit.Assert.assertEquals;
import static org.mskcc.oncokb.curation.TestHelper.getTestFileBufferedReader;
import static org.mskcc.oncokb.curation.util.AlterationUtils.parseProteinChange;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.mskcc.oncokb.curation.domain.Alteration;
import org.mskcc.oncokb.curation.domain.AlterationAnnotationStatus;
import org.mskcc.oncokb.curation.domain.EntityStatus;

@RunWith(Parameterized.class)
public class ParseProteinChangeParameterizedTest {

private static String TEST_FILE_PATH = "src/test/resources/data/test_parse_protein_change.tsv";

private String proteinChange;
private String expectedConsequence;
private String expectedRefAllele;
private String expectedVarAllele;
private String expectedProteinStart;
private String expectedProteinEnd;

public ParseProteinChangeParameterizedTest(
String proteinChange,
String expectedConsequence,
String expectedRefAllele,
String expectedVarAllele,
String expectedProteinStart,
String expectedProteinEnd
) {
this.proteinChange = proteinChange;
this.expectedConsequence = expectedConsequence;
this.expectedRefAllele = expectedRefAllele;
this.expectedVarAllele = expectedVarAllele;
this.expectedProteinStart = expectedProteinStart;
this.expectedProteinEnd = expectedProteinEnd;
}

@Parameterized.Parameters
public static Collection<String[]> getParameters() throws IOException {
return importer();
}

private static List<String[]> importer() throws IOException {
BufferedReader buf = getTestFileBufferedReader(TEST_FILE_PATH);
String line = buf.readLine();

List<String[]> queries = new ArrayList<>();
int count = 0;
while (line != null) {
if (!line.startsWith("#") && line.trim().length() > 0) {
try {
String parts[] = line.split("\t");
if (parts.length < 1) {
throw new IllegalArgumentException("Test case should have at least protein change. Current case: " + line);
}
String proteinChange = parts[0];
String expectedConsequence = parts.length > 1 ? parts[1].toUpperCase() : "";
String expectedRefAllele = parts.length > 2 ? parts[2] : "";
String expectedVarAllele = parts.length > 3 ? parts[3] : "";
String expectedProteinStart = parts.length > 4 ? parts[4] : "";
String expectedProteinEnd = parts.length > 5 ? parts[5] : "";
String[] query = {
proteinChange,
expectedConsequence,
expectedRefAllele,
expectedVarAllele,
expectedProteinStart,
expectedProteinEnd,
};
queries.add(query);
count++;
} catch (Exception e) {
System.err.println("Could not add line '" + line + "'. " + e);
}
}
line = buf.readLine();
}
System.err.println("Contains " + count + " queries.");
System.err.println("Done.");

return queries;
}

private void testSuite(
Alteration annotatedAlteration,
String proteinChange,
String expectedConsequence,
String expectedRefAllele,
String expectedVarAllele,
String expectedProteinStart,
String expectedProteinEnd
) {
assertEquals(
"Not expected consequence. Query: " + proteinChange,
expectedConsequence,
annotatedAlteration.getConsequence() == null ? "" : annotatedAlteration.getConsequence().getTerm()
);
assertEquals(
"Not expected ref allele. Query: " + proteinChange,
expectedRefAllele,
StringUtils.isEmpty(annotatedAlteration.getRefResidues()) ? "" : annotatedAlteration.getRefResidues()
);
assertEquals(
"Not expected var allele. Query: " + proteinChange,
expectedVarAllele,
StringUtils.isEmpty(annotatedAlteration.getVariantResidues()) ? "" : annotatedAlteration.getVariantResidues()
);
assertEquals(
"Not expected protein start. Query: " + proteinChange,
expectedProteinStart,
annotatedAlteration.getStart() == null ? "" : Integer.toString(annotatedAlteration.getStart())
);
assertEquals(
"Not expected protein end. Query: " + proteinChange,
expectedProteinEnd,
annotatedAlteration.getEnd() == null ? "" : Integer.toString(annotatedAlteration.getEnd())
);
}

@Test
public void testSummary() {
EntityStatus<Alteration> alterationEntityStatus = new AlterationAnnotationStatus();
parseProteinChange(alterationEntityStatus, proteinChange);
testSuite(
alterationEntityStatus.getEntity(),
proteinChange,
expectedConsequence,
expectedRefAllele,
expectedVarAllele,
expectedProteinStart,
expectedProteinEnd
);
}
}
92 changes: 92 additions & 0 deletions src/test/resources/data/test_parse_protein_change.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#
# List of alterations for testing. Each line is the following format:
#
# Protein Change\tExpected consequence\tExpected Reference Allele\tExpected Variant Allele\tExpected Protein Start\tExpected Protein End
#

# missense variants
N505I missense_variant N I 505 505
E323_D324delinsKN missense_variant 323 324
814_852mis missense_variant 814 852
IK744KI missense_variant IK KI 744 745

# inframes
Q58_Q59insL inframe_insertion 58 59
58_Q59insL inframe_insertion 58 59
Q58_59insL inframe_insertion 58 59
58_59insL inframe_insertion 58 59
P68_C77dup inframe_insertion 68 77
P68_77dup inframe_insertion 68 77
68_C77dup inframe_insertion 68 77
68_77dup inframe_insertion 68 77
T599delinsIP inframe_insertion T 599 599
599delinsIP inframe_insertion 599 599
599delins UNKNOWN 599 599
599delins2 UNKNOWN 599 599
I744KI inframe_insertion I KI 744 744
M1ext-1 inframe_insertion 1 1
1ext-1 inframe_insertion 1 1
1ext inframe_insertion 1 1
C359del inframe_deletion C 359 359
359del inframe_deletion 359 359
IK744A inframe_deletion IK A 744 745
1459_1468del inframe_deletion 1459 1468
A1459_D1468del inframe_deletion 1459 1468
1459_D1468del inframe_deletion 1459 1468
A1459_1468del inframe_deletion 1459 1468
D1161_S1172delinsE inframe_deletion 1161 1172
D1161_1172delinsE inframe_deletion 1161 1172
1161_S1172delinsE inframe_deletion 1161 1172
1161_1172delinsE inframe_deletion 1161 1172

# frame shifts
S859Afs*12 frameshift_variant S 859 859
859Afs*12 frameshift_variant 859 859
S859fs*12 frameshift_variant S 859 859
G314fs frameshift_variant G 314 314
314fs frameshift_variant 314 314
S330_S352fs frameshift_variant 330 352
330_S352fs frameshift_variant 330 352
S330_352fs frameshift_variant 330 352
330_352fs frameshift_variant 330 352

# truncating mutations
W143_A314trunc feature_truncation 143 314
W143_314trunc feature_truncation 143 314
143_A314trunc feature_truncation 143 314
422_605trunc feature_truncation 422 605
Truncating Mutations feature_truncation

# splice variants
596_619splice splice_region_variant 596 619
X1429_splice splice_region_variant 1429 1429
1429_splice splice_region_variant 1429 1429
M1I start_lost M I 1 1
1I start_lost I 1 1
M1? start_lost M ? 1 1
A149* stop_gained A * 149 149
149* stop_gained * 149 149
*149* stop_lost * * 149 149

# extension
*757Kext*36 stop_lost * 757 757
757Kext*36 stop_lost * 757 757
*757ext*36 stop_lost * 757 757
*757Kext* stop_lost * 757 757
*757Kext*? stop_lost * 757 757

# synonymous variants
K24K synonymous_variant K K 24 24

# any
449_514mut any 449 514

# Unknown variants
D399 NA D 399 399
MCUR1-AKT1 fusion UNKNOWN
Fusions UNKNOWN
Deletion UNKNOWN
Amplification UNKNOWN
Oncogenic Mutations UNKNOWN
Oncogenic Mutations {excluding V600} UNKNOWN

0 comments on commit d54b6c1

Please sign in to comment.