Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add hrd score #27

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion R/calc.ai_new.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,10 @@ calc.ai_new<-function(seg, chrominfo, min.size=1e6, cont = 0,ploidyByChromosome=
no.events <- matrix(0, nrow=length(samples), ncol=12)
rownames(no.events) <- samples
colnames(no.events) <- c("Telomeric AI", "Mean size", "Interstitial AI", "Mean Size", "Whole chr AI", "Telomeric LOH", "Mean size", "Interstitial LOH", "Mean Size", "Whole chr LOH", "Ploidy", "Aberrant cell fraction")
TAI_seg = ""
for(j in samples){
sample.seg <- seg[seg[,1] %in% j,]
TAI_seg = sample.seg[sample.seg[,'AI'] == 1,]
no.events[j,1] <- nrow(sample.seg[sample.seg[,'AI'] == 1,])
no.events[j,2] <- mean(sample.seg[sample.seg[,'AI'] == 1,4] - sample.seg[sample.seg[,'AI'] == 1,3])
no.events[j,3] <- nrow(sample.seg[sample.seg[,'AI'] == 2,])
Expand All @@ -93,5 +95,5 @@ calc.ai_new<-function(seg, chrominfo, min.size=1e6, cont = 0,ploidyByChromosome=
no.events[j,9] <- mean(sample.seg[sample.seg[,'AI'] == 2,4] - sample.seg[sample.seg[,'AI'] == 2,3])
no.events[j,10] <- nrow(sample.seg[sample.seg[,'AI'] == 3,])
}
return(no.events)
return(list(no.events,TAI_seg))
}
3 changes: 2 additions & 1 deletion R/calc.hrd.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ calc.hrd<-function(seg, nA=7, return.loc=FALSE,sizelimit1){
if(return.loc){
return(out.seg)
} else {
return(output)
out = list(output, segLOH)
return(out)
}
}

7 changes: 6 additions & 1 deletion R/calc.lst.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ calc.lst<-function(seg, chrominfo=chrominfo,nA=7,chr.arm='no'){
nB <- nA+1
samples <- unique(seg[,1])
output <- setNames(rep(0,length(samples)), samples)
cnvseg <- NULL
for(j in samples){
sample.seg <- seg[seg[,1] %in% j,]
sample.lst <- c()
Expand Down Expand Up @@ -52,6 +53,8 @@ calc.lst<-function(seg, chrominfo=chrominfo,nA=7,chr.arm='no'){
p.arm <- cbind(p.arm[,1:8], c(0,1)[match((p.arm[,4]-p.arm[,3]) >= 10e6, c('FALSE','TRUE'))])
for(k in 2:nrow(p.arm)){
if(p.arm[k,9] == 1 & p.arm[(k-1),9]==1 & (p.arm[k,3]-p.arm[(k-1),4]) < 3e6){
cnvseg <- rbind(cnvseg,c(p.arm[k,1:8],paste(c(p.arm[k,2],p.arm[k,3],p.arm[k,4]),collapse="_")))
cnvseg <- rbind(cnvseg,c(p.arm[k-1,1:8],paste(c(p.arm[k,2],p.arm[k,3],p.arm[k,4]),collapse="_")))
sample.lst <- c(sample.lst, 1)
}
}
Expand All @@ -66,6 +69,8 @@ calc.lst<-function(seg, chrominfo=chrominfo,nA=7,chr.arm='no'){
q.arm <- cbind(q.arm[,1:8], c(0,1)[match((q.arm[,4]-q.arm[,3]) >= 10e6, c('FALSE','TRUE'))])
for(k in 2:nrow(q.arm)){
if(q.arm[k,9] == 1 & q.arm[(k-1),9]==1 & (q.arm[k,3]-q.arm[(k-1),4]) < 3e6){
cnvseg <- rbind(cnvseg,c(q.arm[k,1:8],paste(c(q.arm[k,2],q.arm[k,3],q.arm[k,4]),collapse="_")))
cnvseg <- rbind(cnvseg,c(q.arm[k-1,1:8],paste(c(q.arm[k,2],q.arm[k,3],q.arm[k,4]),collapse="_")))
sample.lst <- c(sample.lst, 1)

}
Expand All @@ -74,5 +79,5 @@ calc.lst<-function(seg, chrominfo=chrominfo,nA=7,chr.arm='no'){
}
output[j] <- sum(sample.lst)
}
return(output)
return(list(output, cnvseg))
}
81 changes: 81 additions & 0 deletions R/calc.lst.bak.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#' Determine large-scale transitions
#'
#' @param seg segmentation data
#' @param chrominfo reference genome version
#' @param nA column number of copy number of A allele
#' @param chr.arm option to use chromosome arms defined during segmentation
#' @return number of LSTs
calc.lst<-function(seg, chrominfo=chrominfo,nA=7,chr.arm='no'){
nB <- nA+1
samples <- unique(seg[,1])
output <- setNames(rep(0,length(samples)), samples)
cnvseg <- NULL
for(j in samples){
sample.seg <- seg[seg[,1] %in% j,]
sample.lst <- c()
chroms <- unique(sample.seg[,2])
chroms <- chroms[!chroms %in% c(23,24,'chr23','chr24','chrX','chrx','chrY','chry')]
for(i in chroms){
sample.chrom.seg <- sample.seg[sample.seg[,2] %in% i,,drop=F]
if(chr.arm !='no'){
p.max <- if(any(sample.chrom.seg[,chr.arm] == 'p')){max(sample.chrom.seg[sample.chrom.seg[,chr.arm] == 'p',4])}
q.min <- min(sample.chrom.seg[sample.chrom.seg[,chr.arm] == 'q',3])
}
if(nrow(sample.chrom.seg) < 2) {next}
sample.chrom.seg.new <- sample.chrom.seg
if(chr.arm == 'no'){
p.arm <- sample.chrom.seg.new[sample.chrom.seg.new[,3] <= chrominfo[i,2],,drop=F] # split into chromosome arms
q.arm <- sample.chrom.seg.new[sample.chrom.seg.new[,4] >= chrominfo[i,3],,drop=F]
q.arm<- shrink.seg.ai(q.arm)
q.arm[1,3] <- chrominfo[i,3]
if(nrow(p.arm) > 0){
p.arm<- shrink.seg.ai(p.arm)
p.arm[nrow(p.arm),4] <- chrominfo[i,2]
}
}
if(chr.arm != 'no'){
q.arm <- sample.chrom.seg.new[sample.chrom.seg.new[,chr.arm] == 'q',,drop=F]
q.arm<- shrink.seg.ai(q.arm)
q.arm[1,3] <- q.min
if(any(sample.chrom.seg.new[,chr.arm] == 'p')){
p.arm <- sample.chrom.seg.new[sample.chrom.seg.new[,chr.arm] == 'p',,drop=F] # split into chromosome arms
p.arm<- shrink.seg.ai(p.arm)
p.arm[nrow(p.arm),4] <- p.max
}
}
n.3mb <- which((p.arm[,4] - p.arm[,3]) < 3e6)
while(length(n.3mb) > 0){
p.arm <- p.arm[-(n.3mb[1]),]
p.arm <- shrink.seg.ai(p.arm)
n.3mb <- which((p.arm[,4] - p.arm[,3]) < 3e6)
}
if(nrow(p.arm) >= 2){
p.arm <- cbind(p.arm[,1:8], c(0,1)[match((p.arm[,4]-p.arm[,3]) >= 10e6, c('FALSE','TRUE'))])
for(k in 2:nrow(p.arm)){
if(p.arm[k,9] == 1 & p.arm[(k-1),9]==1 & (p.arm[k,3]-p.arm[(k-1),4]) < 3e6){
cnvseg <- rbind(cnvseg,p.arm[k,1:8])
sample.lst <- c(sample.lst, 1)
}
}
}
n.3mb <- which((q.arm[,4] - q.arm[,3]) < 3e6)
while(length(n.3mb) > 0){
q.arm <- q.arm[-(n.3mb[1]),]
q.arm <- shrink.seg.ai(q.arm)
n.3mb <- which((q.arm[,4] - q.arm[,3]) < 3e6)
}
if(nrow(q.arm) >= 2){
q.arm <- cbind(q.arm[,1:8], c(0,1)[match((q.arm[,4]-q.arm[,3]) >= 10e6, c('FALSE','TRUE'))])
for(k in 2:nrow(q.arm)){
if(q.arm[k,9] == 1 & q.arm[(k-1),9]==1 & (q.arm[k,3]-q.arm[(k-1),4]) < 3e6){
cnvseg <- rbind(cnvseg,q.arm[k,1:8])
sample.lst <- c(sample.lst, 1)

}
}
}
}
output[j] <- sum(sample.lst)
}
return(list(output, cnvseg))
}
25 changes: 25 additions & 0 deletions R/chrominfo_grch37.2.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
chrom centstart centend
chr1 121500000 128900000
chr2 90500000 96800000
chr3 87900000 93900000
chr4 48200000 52700000
chr5 46100000 50700000
chr6 58700000 63300000
chr7 58000000 61700000
chr8 43100000 48100000
chr9 47300000 50700000
chr10 38000000 42300000
chr11 51600000 55700000
chr12 33300000 38200000
chr13 16300000 19500000
chr14 16100000 19100000
chr15 15800000 20700000
chr16 34600000 38600000
chr17 22200000 25800000
chr18 15400000 19000000
chr19 24400000 28600000
chr20 25600000 29400000
chr21 10900000 14300000
chr22 12200000 17900000
chrX 58100000 63000000
chrY 11600000 13400000
25 changes: 25 additions & 0 deletions R/chrominfo_grch37.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
chrom centstart centend
chr1 chr1 121500000 128900000
chr2 chr2 90500000 96800000
chr3 chr3 87900000 93900000
chr4 chr4 48200000 52700000
chr5 chr5 46100000 50700000
chr6 chr6 58700000 63300000
chr7 chr7 58000000 61700000
chr8 chr8 43100000 48100000
chr9 chr9 47300000 50700000
chr10 chr10 38000000 42300000
chr11 chr11 51600000 55700000
chr12 chr12 33300000 38200000
chr13 chr13 16300000 19500000
chr14 chr14 16100000 19100000
chr15 chr15 15800000 20700000
chr16 chr16 34600000 38600000
chr17 chr17 22200000 25800000
chr18 chr18 15400000 19000000
chr19 chr19 24400000 28600000
chr20 chr20 25600000 29400000
chr21 chr21 10900000 14300000
chr22 chr22 12200000 17900000
chrX chrX 58100000 63000000
chrY chrY 11600000 13400000
25 changes: 25 additions & 0 deletions R/chrominfo_grch38.2.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
chrom centstart centend
chr1 121700000 125100000
chr2 91800000 96000000
chr3 87800000 94000000
chr4 48200000 51800000
chr5 46100000 51400000
chr6 58500000 62600000
chr7 58100000 62100000
chr8 43200000 47200000
chr9 42200000 45500000
chr10 38000000 41600000
chr11 51000000 55800000
chr12 33200000 37800000
chr13 16500000 18900000
chr14 16100000 18200000
chr15 17500000 20500000
chr16 35300000 38400000
chr17 22700000 27400000
chr18 15400000 21500000
chr19 24200000 28100000
chr20 25700000 30400000
chr21 10900000 13000000
chr22 13700000 17400000
chrX 58100000 63800000
chrY 10300000 10600000
25 changes: 25 additions & 0 deletions R/chrominfo_grch38.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
chrom centstart centend
chr1 chr1 121700000 125100000
chr2 chr2 91800000 96000000
chr3 chr3 87800000 94000000
chr4 chr4 48200000 51800000
chr5 chr5 46100000 51400000
chr6 chr6 58500000 62600000
chr7 chr7 58100000 62100000
chr8 chr8 43200000 47200000
chr9 chr9 42200000 45500000
chr10 chr10 38000000 41600000
chr11 chr11 51000000 55800000
chr12 chr12 33200000 37800000
chr13 chr13 16500000 18900000
chr14 chr14 16100000 18200000
chr15 chr15 17500000 20500000
chr16 chr16 35300000 38400000
chr17 chr17 22700000 27400000
chr18 chr18 15400000 21500000
chr19 chr19 24200000 28100000
chr20 chr20 25700000 30400000
chr21 chr21 10900000 13000000
chr22 chr22 13700000 17400000
chrX chrX 58100000 63800000
chrY chrY 10300000 10600000
2 changes: 1 addition & 1 deletion R/preprocess.hrd.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
preprocess.hrd<-function(seg){
seg <- seg[!seg[,2] %in% c(paste('chr',c('X','Y','x','y',23,24),sep=''),c('X','Y','x','y',23,24)),]
seg[,1] <- as.character(seg[,1])

if(! all(seg[,8] <= seg[,7]) ){
tmp <- seg
seg[tmp[,8] > tmp[,7],7] <- tmp[tmp[,8] > tmp[,7],8]
seg[tmp[,8] > tmp[,7],8] <- tmp[tmp[,8] > tmp[,7],7]
}
seg <- shrink.seg.ai.wrapper(seg)
print(seg)
return(seg)

}
101 changes: 51 additions & 50 deletions R/preprocess.seqz.R
Original file line number Diff line number Diff line change
@@ -1,50 +1,51 @@
#' Preprocessing seqz files
#'
#' Preprocesses seqz files
#' @param seg A segmentation file
#' @param ploidy0 ploidy, optional
#' @return Output is
preprocess.seqz<-function(seg, ploidy0=NULL, chr.in.names=TRUE, outputdir=NULL){
if (is.null(ploidy0)){
ploidy01 = seq(1, 5.5, 0.1)
} else {
ploidy01= seq(ploidy0-0.5,ploidy0+0.5,0.1)
}

if (is.null(outputdir)){
outputdir = getwd()
}

run_name<-gsub(".*/","",gsub("_small.seqz","",gsub("gz","",seg)))
if(chr.in.names){
extract<-sequenza.extract(seg, chromosome.list=paste('chr',c(1:24),sep=''),gamma = 60, kmin = 50)
} else {
extract<-sequenza.extract(seg, chromosome.list=c(1:24),gamma = 60, kmin = 50)
}
extract.fit<-sequenza::sequenza.fit(extract, N.ratio.filter = 10, N.BAF.filter = 1, segment.filter = 3e6, mufreq.treshold = 0.10, ratio.priority = FALSE,ploidy=ploidy01, mc.cores = 1)
# sequenza.results(extract, extract.fit, out.dir = getwd(),sample.id =run_name)

seg.tab <- do.call(rbind, extract$segments[extract$chromosomes])
seg.len <- (seg.tab$end.pos - seg.tab$start.pos)/1e+06
cint <- get.ci(extract.fit)
cellularity <- cint$max.cellularity
ploidy <- cint$max.ploidy
avg.depth.ratio <- mean(extract$gc$adj[, 2])
info_seg<-c(cellularity,ploidy,avg.depth.ratio)
names(info_seg)<-c("cellularity","ploidy","avg.depth.ratio")
write.table(t(info_seg),paste0(outputdir,"/",run_name,"_info_seg.txt"),sep="\t",row.names=F)
allele.cn <- sequenza:::baf.bayes(Bf = seg.tab$Bf, CNt.max = 20, depth.ratio = seg.tab$depth.ratio, avg.depth.ratio = 1,
cellularity = cint$max.cellularity, ploidy = cint$max.ploidy,
sd.ratio = seg.tab$sd.ratio, weight.ratio = seg.len, sd.Bf = seg.tab$sd.BAF,
weight.Bf = 1, ratio.priority = FALSE, CNn = 2)
seg.tab$CN <- allele.cn[,1]
allele.cn <- as.data.table(allele.cn)
#Making imput file
seg <- data.frame(SampleID = as.character(run_name), Chromosome = seg.tab$chromosome, Start_position = seg.tab$start.pos,
End_position = seg.tab$end.pos, Nprobes = 1, total_cn = allele.cn$CNt, A_cn = allele.cn$B,
B_cn = allele.cn$A, ploidy = ploidy)
seg$contamination <- 1
seg<-seg[!is.na(seg$A_cn),]
seg<-seg[!is.na(seg$B_cn),]
return(seg)
}
#' Preprocessing seqz files
#'
#' Preprocesses seqz files
#' @param seg A segmentation file
#' @param ploidy0 ploidy, optional
#' @return Output is
preprocess.seqz<-function(seg, ploidy0=NULL, chr.in.names=TRUE, outputdir=NULL){
if (is.null(ploidy0)){
ploidy01 = seq(1, 5.5, 0.1)
} else {
ploidy01= seq(ploidy0-0.5,ploidy0+0.5,0.1)
}

if (is.null(outputdir)){
outputdir = getwd()
}

run_name<-gsub(".*/","",gsub("_small.seqz","",gsub("gz","",seg)))
if(chr.in.names){
extract<-sequenza.extract(seg, chromosome.list=paste('chr',c(1:24),sep=''),gamma = 60, kmin = 50)
} else {
extract<-sequenza.extract(seg, chromosome.list=c(1:24),gamma = 60, kmin = 50)
}
extract.fit<-sequenza::sequenza.fit(extract, N.ratio.filter = 10, N.BAF.filter = 1, segment.filter = 3e6, mufreq.treshold = 0.10, ratio.priority = FALSE,ploidy=ploidy01, mc.cores = 1)
# sequenza.results(extract, extract.fit, out.dir = getwd(),sample.id =run_name)

seg.tab <- do.call(rbind, extract$segments[extract$chromosomes])
seg.len <- (seg.tab$end.pos - seg.tab$start.pos)/1e+06
cint <- get.ci(extract.fit)
cellularity <- cint$max.cellularity
ploidy <- cint$max.ploidy
#avg.depth.ratio <- mean(extract$gc$adj[, 2])
avg.depth.ratio <- extract$avg.depth.ratio
info_seg<-c(cellularity,ploidy,avg.depth.ratio)
names(info_seg)<-c("cellularity","ploidy","avg.depth.ratio")
write.table(t(info_seg),paste0(outputdir,"/",run_name,"_info_seg.txt"),sep="\t",row.names=F)
allele.cn <- sequenza:::baf.bayes(Bf = seg.tab$Bf, CNt.max = 20, depth.ratio = seg.tab$depth.ratio, avg.depth.ratio = 1,
cellularity = cint$max.cellularity, ploidy = cint$max.ploidy,
sd.ratio = seg.tab$sd.ratio, weight.ratio = seg.len, sd.Bf = seg.tab$sd.BAF,
weight.Bf = 1, ratio.priority = FALSE, CNn = 2)
seg.tab$CN <- allele.cn[,1]
allele.cn <- as.data.frame(allele.cn)
#Making imput file
seg <- data.frame(SampleID = as.character(run_name), Chromosome = seg.tab$chromosome, Start_position = seg.tab$start.pos,
End_position = seg.tab$end.pos, Nprobes = 1, total_cn = allele.cn$CNt, A_cn = allele.cn$B,
B_cn = allele.cn$A, ploidy = ploidy)
seg$contamination <- 1
seg<-seg[!is.na(seg$A_cn),]
seg<-seg[!is.na(seg$B_cn),]
return(seg)
}
Loading