Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added simple-event-annotation as a command line script #618

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions src/main/r/simple-event-annotation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
#BiocManager::install("StructuralVariantAnnotation")
#install.packages("stringr")
suppressMessages(library(VariantAnnotation))
suppressMessages(library(StructuralVariantAnnotation))
suppressMessages(library(stringr))
args=commandArgs(trailingOnly = TRUE)

################################################################################
# Check arguments
if(length(args)!=3){
print("Incorrect number of arguments.\nUsage:\nRscript simple-event-annotation.R <PATH_TO_INPUT_VCF> <GENOME_NAME> <OUTPUT_PREFIX>")
quit()
}
if(!(file.exists(args[1]))){
print(paste("Cannot open input file:", args[1]))
quit()
}
fin=args[1]
genome=args[2]
prefix=args[3]
################################################################################
#' Simple SV type classifier
simpleEventType <- function(gr) {
pgr = partner(gr)
return(ifelse(seqnames(gr) != seqnames(pgr), "CTX", # inter-chromosomosal
ifelse(strand(gr) == strand(pgr), "INV",
ifelse(gr$insLen >= abs(gr$svLen) * 0.7, "INS", # TODO: improve classification of complex events
ifelse(xor(start(gr) < start(pgr), strand(gr) == "-"), "DEL",
"DUP")))))
}
# using the example in the GRIDSS /example directory
vcf <- readVcf(file=fin, genome=genome)
info(header(vcf)) = unique(as(rbind(as.data.frame(info(header(vcf))), data.frame(
row.names=c("SIMPLE_TYPE"),
Number=c("1"),
Type=c("String"),
Description=c("Simple event type annotation based purely on breakend position and orientation."))), "DataFrame"))
gr <- breakpointRanges(vcf)
svtype <- simpleEventType(gr)
info(vcf)$SIMPLE_TYPE <- NA_character_
info(vcf[gr$sourceId])$SIMPLE_TYPE <- svtype
# info(vcf[gr$sourceId])$SVLEN <- gr$svLen
vcf@info[gr$sourceId, 'SVLEN'] <- gr$svLen # Create a new entry for SVLEN and add corresponding data
writeVcf(vcf, paste0(prefix, ".annotated.vcf")) # generated by example/gridss.sh

# TODO: perform event filtering here
# By default, GRIDSS is very sensitive but this comes at the cost of a high false discovery rate
gr <- gr[gr$FILTER == "PASS" & partner(gr)$FILTER == "PASS"] # Remove low confidence calls

simplegr <- gr[simpleEventType(gr) %in% c("INS", "INV", "DEL", "DUP")]
simplebed <- data.frame(
chrom=seqnames(simplegr),
# call the centre of the homology/inexact interval
start=as.integer((start(simplegr) + end(simplegr)) / 2),
end=as.integer((start(partner(simplegr)) + end(partner(simplegr))) / 2),
name=simpleEventType(simplegr),
score=simplegr$QUAL,
strand="."
)
# Just the lower of the two breakends so we don't output everything twice
simplebed <- simplebed[simplebed$start < simplebed$end,]
write.table(simplebed, paste0(prefix,".simple.bed"), quote=FALSE, sep='\t', row.names=FALSE, col.names=FALSE)