Skip to content

Commit

Permalink
DeBUG with hapmap2numeric
Browse files Browse the repository at this point in the history
  • Loading branch information
jiabowang committed Oct 2, 2024
1 parent e7bbf5e commit 19e3d14
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 12 deletions.
98 changes: 98 additions & 0 deletions R/GAPIT.HMP.Amplification.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
`GAPIT.HMP.Amplification`<-function(x
)
{
#Object: To amplificate HapMap file from MNPs
#Output: New HapMap file without MNPs
#Authors: Jiabo Wang
#Writen by Jiabo Wang
#Last update: Oct 1, 2024
##############################################################################################
# setwd("/Users/Jiabo/Documents/Data/Meiai")
# myG=data.table::fread("gene.hmp.txt",
# header = F,
# na.strings = c("NA", "NaN"),
# data.table = F)
# X=myG[-1,]
muta=x[2]
n.muta=nchar(muta)

if(n.muta==3)
{
map=as.character(x[c(1:11)])
genotype=as.character(x[-c(1:11)])
genotype[genotype=="A"]="AA"
genotype[genotype=="C"]="CC"
genotype[genotype=="T"]="TT"
genotype[genotype=="G"]="GG"
genotype[genotype=="R"]="AG"
genotype[genotype=="Y"]="CT"
genotype[genotype=="S"]="CG"
genotype[genotype=="W"]="AT"
genotype[genotype=="K"]="GT"
genotype[genotype=="M"]="AC"
genotype[genotype=="N"]="NN"


new.X=append(map,genotype)
new.X=matrix(new.X,1,length(new.X))
return(as.data.frame(new.X))
}else{
# muta2=strsplit(muta,"/")[[1]]
# genotype=X
map=as.character(x[c(1:11)])
genotype=as.character(x[-c(1:11)])
genotype[genotype=="A"]="AA"
genotype[genotype=="C"]="CC"
genotype[genotype=="T"]="TT"
genotype[genotype=="G"]="GG"
genotype[genotype=="R"]="AG"
genotype[genotype=="Y"]="CT"
genotype[genotype=="S"]="CG"
genotype[genotype=="W"]="AT"
genotype[genotype=="K"]="GT"
genotype[genotype=="M"]="AC"
genotype[genotype=="N"]="NN"
genotypes=unlist(strsplit(genotype, ""))
genotypes2=genotypes[genotypes!="N"]
lev=levels(as.factor(genotypes2))
len=length(lev)
count=1:len
for(i in 1:len){
count[i]=length(genotypes2[(genotypes2==lev[i])])
}
count.temp = cbind(lev,count)
order.index=order(as.numeric(count.temp[,2]), decreasing = FALSE)
count.temp <- count.temp[order.index,]
count = count[order.index]
lev = lev[order.index]
MAF.lev=lev[1]
heter=c("AT","AG","AC","TA","GA","CA","GT","TG","GC","CG","CT","TC")
homo=paste(lev[1],lev[1],sep="")
homo0=NULL
for(i in 2:len)
{
homo0=c(homo0,paste(lev[i],lev[i],sep=""))
}
new.genotype=matrix(homo,len-1,length(genotype))
for(i in 2:len)
{
heter0=c(paste(lev[i],lev[-c(i)],sep=""),paste(lev[-c(i)],lev[i],sep=""))
# heter2=setdiff(heter,heter0)
homo1=paste(lev[i],lev[i],sep="")
homo2=setdiff(homo0,homo1)
# homo2=c(homo2,)
# new.genotype[i-1,]=genotype
new.genotype[i-1,genotype%in%heter0]=heter0[1]
new.genotype[i-1,genotype%in%homo2]=homo
new.genotype[i-1,genotype%in%homo1]=homo1
}
maps=matrix(NA,len-1,11)
maps[,1]=paste(map[1],"_",1:(len-1),sep="")
maps[,2]=paste(MAF.lev,"/",lev[-1],sep="")
maps[,3]=rep(map[3],len-1)
maps[,4]=as.numeric(map[4])+(1:(len-1))
new.X=cbind(maps,new.genotype)
return(as.data.frame(new.X))
}

}
23 changes: 11 additions & 12 deletions R/GAPIT.Numericalization.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,19 @@ if(Major.allele.zero){
#One bit: Make sure that the SNP with the major allele is on the top, and the SNP with the minor allele is on the second position
# if(bit==1){
count.temp = cbind(lev,count)
if(length(inter)!=0&lev[1]!=inter)count.temp = count.temp[-which(lev==inter),,drop=FALSE]
# if(nrow(count.temp)==0) return()
if(length(inter)!=0)
{
if(lev[1]!=inter)
{
count.temp = count.temp[-which(lev==inter),,drop=FALSE]
count=count[-which(lev==inter)]
lev=lev[-which(lev==inter)]
len=length(lev)
}
} # if(nrow(count.temp)==0) return()
# print("!!!")
order.index=order(as.numeric(count.temp[,2]), decreasing = FALSE)
count.temp <- count.temp[order.index,]
# if(len==3)order = c(count.temp[,2],3)else order = count.temp[,2]
# }
#Two bit: Make sure that the SNP with the major allele is on the top, and the SNP with the minor allele is on the third position
# if(bit==2){
# count.temp = cbind(count, seq(1:len))
# if(len==3) count.temp = count.temp[-2,]
# count.temp <- count.temp[order(count.temp[,1], decreasing = FALSE),]
# if(len==3) order = c(count.temp[1,2],2,count.temp[2,2])else order = count.temp[,2]
# }

count = count[order.index]
# print(count)
Expand Down Expand Up @@ -126,7 +125,7 @@ if(len==2)
{
if(!setequal(character(0),inter))
{
x=ifelse(x=="N",NA,ifelse(x==inter,1,2))
x=ifelse(x=="N",NA,ifelse(x==inter,1,ifelse(x==lev[1],2,0)))
# if(bit==2)x=ifelse(x=="NN",NA,ifelse(x==inter,1,2))
}else{
x=ifelse(x=="N",NA,ifelse(x==lev[1],2,0)) # the most is set 0, the least is set 2
Expand Down

0 comments on commit 19e3d14

Please sign in to comment.