-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enhancement/agfusion expansion #138
base: develop
Are you sure you want to change the base?
Changes from all commits
ecd4504
413d9ff
d3d4b6b
0697238
b276b38
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
ABL1 | ||
ACVR2A | ||
AKT1 | ||
ALK | ||
AR | ||
ARAF | ||
ARHGAP26 | ||
ATM | ||
BARD1 | ||
BCL10 | ||
BCL2 | ||
BCL6 | ||
BCOR | ||
BCORL1 | ||
BRAF | ||
BRCA1 | ||
BRCA2 | ||
BRD3 | ||
BRD4 | ||
BRIP1 | ||
CAMTA1 | ||
CBFB | ||
CCNB3 | ||
CCND1 | ||
CCND3 | ||
CDK12 | ||
CHEK1 | ||
CHEK2 | ||
CIC | ||
CREBBP | ||
CRLF2 | ||
CSF1 | ||
CSF1R | ||
DEK | ||
DNAJB1 | ||
EGFR | ||
EPC1 | ||
ERBB2 | ||
ERBB3 | ||
ERBB4 | ||
ERG | ||
ESR1 | ||
ETV1 | ||
ETV4 | ||
ETV5 | ||
ETV6 | ||
EWSR1 | ||
EZH2 | ||
FGF1 | ||
FGFR1 | ||
FGFR2 | ||
FGFR3 | ||
FGFR4 | ||
FGR | ||
FLT1 | ||
FLT3 | ||
FOS | ||
FOSB | ||
FOXO1 | ||
FOXP1 | ||
FOXR2 | ||
FUS | ||
GATA1 | ||
GLI1 | ||
GRB7 | ||
GRM1 | ||
HMGA2 | ||
IDH1 | ||
IDH2 | ||
IGF1R | ||
IRF4 | ||
JAK2 | ||
JAK3 | ||
JAZF1 | ||
KIT | ||
KMT2A | ||
KRAS | ||
LTK | ||
MALT1 | ||
MAML2 | ||
MAP2K1 | ||
MAP2K2 | ||
MAST1 | ||
MBTD1 | ||
MEAF6 | ||
MET | ||
MGEA5 | ||
MKL2 | ||
MN1 | ||
MUSK | ||
MYB | ||
MYBL1 | ||
MYC | ||
NCOA1 | ||
NCOA2 | ||
NCOA3 | ||
NF1 | ||
NFATC2 | ||
NFE2L2 | ||
NOTCH1 | ||
NOTCH2 | ||
NR4A3 | ||
NRAS | ||
NRG1 | ||
NRG2 | ||
NSD3 | ||
NTRK1 | ||
NTRK2 | ||
NTRK3 | ||
NUP214 | ||
NUTM1 | ||
NUTM2A | ||
NUTM2B | ||
PALB2 | ||
PAX3 | ||
PAX5 | ||
PAX7 | ||
PDGFB | ||
PDGFD | ||
PDGFRA | ||
PDGFRB | ||
PGR | ||
PHF1 | ||
PIK3CA | ||
PLAG1 | ||
PPARG | ||
PRB3 | ||
PRKACA | ||
PRKACB | ||
PRKCA | ||
PRKCB | ||
PRKD1 | ||
PRKD2 | ||
PRKD3 | ||
PTCH1 | ||
PTPN1 | ||
RAD51B | ||
RAD51C | ||
RAD51D | ||
RAD54L | ||
RAF1 | ||
RARA | ||
RELA | ||
RET | ||
RLF | ||
ROS1 | ||
RSPO2 | ||
RSPO3 | ||
RUNX1 | ||
SMARCB1 | ||
SRF | ||
SS18 | ||
SS18L1 | ||
STAT6 | ||
TAF15 | ||
TCF12 | ||
TCF3 | ||
TERT | ||
TFE3 | ||
TFEB | ||
TFG | ||
THADA | ||
TMPRSS2 | ||
TP63 | ||
TSC1 | ||
TSC2 | ||
USP6 | ||
VGLL2 | ||
VGLL3 | ||
WWTR1 | ||
YAP1 | ||
YWHAE |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,7 +15,7 @@ RT_call_filter=1 | |
blck_filter=1 | ||
ANC_filter=1 | ||
usage() { | ||
echo "Usage: Metafusion_forte.sh --num_tools=<minNumToolsCalled> --genome_fasta <FASTA adds SEQ to fusion> --recurrent_bedpe <blacklistFusions> --outdir <outputDirectory> --cff <cffFile> --gene_bed <geneBedFile> --gene_info <geneInfoFile> --clinical_genes <clinicalGenes>" 1>&2; | ||
echo "Usage: Metafusion_forte.sh --num_tools=<minNumToolsCalled> --genome_fasta <FASTA adds SEQ to fusion> --recurrent_bedpe <blacklistFusions> --outdir <outputDirectory> --cff <cffFile> --gene_bed <geneBedFile> --gene_info <geneInfoFile> --clinical_genes <transcript_allowlist>" 1>&2; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we also rename the parameter |
||
exit 1; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
#!/usr/local/bin/Rscript | ||
# __author__ = "Alexandria Dymun" | ||
# __email__ = "[email protected]" | ||
# __version__ = "0.0.1" | ||
|
||
|
||
suppressPackageStartupMessages({ | ||
library(dplyr) | ||
library(data.table) | ||
}) | ||
|
||
usage <- function() { | ||
message("Usage:") | ||
message("add_flags_agfusion_clinical.R --cff-file <file.cff> --agfusion-file <agfusion.tsv> --transcript_allowlist <transcript_allowlist.txt> --out-prefix <prefix>") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems like the parameters should actually be cff and agfusion, not cff-file and agfusion-file? |
||
} | ||
|
||
args = commandArgs(TRUE) | ||
|
||
if (is.null(args) | length(args)<1) { | ||
usage() | ||
quit() | ||
} | ||
|
||
#' Parse out options from a string without recourse to optparse | ||
#' | ||
#' @param x Long-form argument list like --opt1 val1 --opt2 val2 | ||
#' | ||
#' @return named list of options and values similar to optparse | ||
|
||
parse_args <- function(x){ | ||
args_list <- unlist(strsplit(x, ' ?--')[[1]])[-1] | ||
args_vals <- lapply(args_list, function(x) scan(text=x, what='character', quiet = TRUE)) | ||
# Ensure the option vectors are length 2 (key/ value) to catch empty ones | ||
args_vals <- lapply(args_vals, function(z){ length(z) <- 2; z}) | ||
|
||
parsed_args <- structure(lapply(args_vals, function(x) x[2]), names = lapply(args_vals, function(x) gsub('-','_',x[1]))) | ||
parsed_args[! is.na(parsed_args)] | ||
} | ||
|
||
args_opt <- parse_args(paste(args,collapse=" ")) | ||
|
||
possible_args = c("cff", "agfusion","transcript_allowlist", "out_prefix") | ||
if (length(setdiff(names(args_opt),possible_args)) > 0){ | ||
message("Invalid options") | ||
usage() | ||
quit() | ||
} | ||
|
||
required_args <- c("cff","agfusion","transcript_allowlist", "out_prefix") | ||
if (length(setdiff(required_args,names(args_opt))) > 0) { | ||
message("Missing required arguments") | ||
usage() | ||
quit() | ||
} | ||
|
||
agfusion_file = args_opt$agfusion | ||
cff_file = args_opt$cff | ||
out_prefix = args_opt$out_prefix | ||
transcript_allowlist_file <- args_opt$transcript_allowlist | ||
|
||
cff = fread(cff_file) %>% | ||
select(gene5_transcript_id,gene3_transcript_id,gene5_breakpoint,gene3_breakpoint,gene5_renamed_symbol,gene3_renamed_symbol,cluster) %>% | ||
mutate(ID = paste(gene5_transcript_id,gene3_transcript_id,gene5_breakpoint,gene3_breakpoint)) | ||
agfusion_tab = fread(agfusion_file) %>% | ||
mutate(ID = paste(`5'_transcript`,`3'_transcript`,`5'_breakpoint`,`3'_breakpoint`)) | ||
transcript_allowlist <- fread(transcript_allowlist_file) %>% select(ensembl_transcript,refseq_transcript) | ||
|
||
|
||
agfusion_tab <- agfusion_tab %>% mutate(Metafusion_Transcript_Pair = ID %in% cff$ID) | ||
|
||
agfusion_tab[,ID:=NULL] | ||
|
||
|
||
cff <- cff %>% | ||
select(gene5_breakpoint,gene3_breakpoint,gene5_renamed_symbol,gene3_renamed_symbol,cluster) %>% | ||
distinct() | ||
|
||
agfusion_tab <- merge(agfusion_tab, | ||
cff, | ||
by.x = c("5'_gene","3'_gene","5'_breakpoint","3'_breakpoint"), | ||
by.y = c("gene5_renamed_symbol","gene3_renamed_symbol","gene5_breakpoint","gene3_breakpoint"), | ||
all.x = T, | ||
all.y = F) | ||
|
||
agfusion_tab <- merge(agfusion_tab, | ||
transcript_allowlist, | ||
by.x = "5'_transcript", | ||
by.y = "ensembl_transcript", | ||
all.x = T, | ||
all.y = F) | ||
setnames(agfusion_tab,"refseq_transcript","refseq_transcript_id_5") | ||
|
||
agfusion_tab <- merge(agfusion_tab, | ||
transcript_allowlist, | ||
by.x = "3'_transcript", | ||
by.y = "ensembl_transcript", | ||
all.x = T, | ||
all.y = F) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we're not going to filter out agfusion results where one or both transcripts are not on the transcript allowlist? not saying this is wrong, just checking |
||
setnames(agfusion_tab,"refseq_transcript","refseq_transcript_id_3") | ||
|
||
write.table( | ||
agfusion_tab, | ||
paste0(out_prefix, ".expanded_agfusion_transcripts.tsv"), | ||
row.names = F, | ||
append = F, | ||
quote = F, | ||
sep = "\t" | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a new line for this PR 138