diff --git a/R/GAPIT.HMP.Amplification.R b/R/GAPIT.HMP.Amplification.R new file mode 100644 index 0000000..85eed8f --- /dev/null +++ b/R/GAPIT.HMP.Amplification.R @@ -0,0 +1,98 @@ +`GAPIT.HMP.Amplification`<-function(x + ) +{ +#Object: To amplificate HapMap file from MNPs +#Output: New HapMap file without MNPs +#Authors: Jiabo Wang +#Writen by Jiabo Wang +#Last update: Oct 1, 2024 +############################################################################################## +# setwd("/Users/Jiabo/Documents/Data/Meiai") +# myG=data.table::fread("gene.hmp.txt", + # header = F, + # na.strings = c("NA", "NaN"), + # data.table = F) +# X=myG[-1,] +muta=x[2] +n.muta=nchar(muta) + +if(n.muta==3) + { + map=as.character(x[c(1:11)]) + genotype=as.character(x[-c(1:11)]) + genotype[genotype=="A"]="AA" + genotype[genotype=="C"]="CC" + genotype[genotype=="T"]="TT" + genotype[genotype=="G"]="GG" + genotype[genotype=="R"]="AG" + genotype[genotype=="Y"]="CT" + genotype[genotype=="S"]="CG" + genotype[genotype=="W"]="AT" + genotype[genotype=="K"]="GT" + genotype[genotype=="M"]="AC" + genotype[genotype=="N"]="NN" + + + new.X=append(map,genotype) + new.X=matrix(new.X,1,length(new.X)) + return(as.data.frame(new.X)) + }else{ + # muta2=strsplit(muta,"/")[[1]] + # genotype=X + map=as.character(x[c(1:11)]) + genotype=as.character(x[-c(1:11)]) + genotype[genotype=="A"]="AA" + genotype[genotype=="C"]="CC" + genotype[genotype=="T"]="TT" + genotype[genotype=="G"]="GG" + genotype[genotype=="R"]="AG" + genotype[genotype=="Y"]="CT" + genotype[genotype=="S"]="CG" + genotype[genotype=="W"]="AT" + genotype[genotype=="K"]="GT" + genotype[genotype=="M"]="AC" + genotype[genotype=="N"]="NN" + genotypes=unlist(strsplit(genotype, "")) + genotypes2=genotypes[genotypes!="N"] + lev=levels(as.factor(genotypes2)) + len=length(lev) + count=1:len + for(i in 1:len){ + count[i]=length(genotypes2[(genotypes2==lev[i])]) + } + count.temp = cbind(lev,count) + order.index=order(as.numeric(count.temp[,2]), decreasing = FALSE) + count.temp <- count.temp[order.index,] + count = count[order.index] + lev = lev[order.index] + MAF.lev=lev[1] + heter=c("AT","AG","AC","TA","GA","CA","GT","TG","GC","CG","CT","TC") + homo=paste(lev[1],lev[1],sep="") + homo0=NULL + for(i in 2:len) + { + homo0=c(homo0,paste(lev[i],lev[i],sep="")) + } + new.genotype=matrix(homo,len-1,length(genotype)) + for(i in 2:len) + { + heter0=c(paste(lev[i],lev[-c(i)],sep=""),paste(lev[-c(i)],lev[i],sep="")) +# heter2=setdiff(heter,heter0) + homo1=paste(lev[i],lev[i],sep="") + homo2=setdiff(homo0,homo1) +# homo2=c(homo2,) +# new.genotype[i-1,]=genotype + new.genotype[i-1,genotype%in%heter0]=heter0[1] + new.genotype[i-1,genotype%in%homo2]=homo + new.genotype[i-1,genotype%in%homo1]=homo1 + } + maps=matrix(NA,len-1,11) + maps[,1]=paste(map[1],"_",1:(len-1),sep="") + maps[,2]=paste(MAF.lev,"/",lev[-1],sep="") + maps[,3]=rep(map[3],len-1) + maps[,4]=as.numeric(map[4])+(1:(len-1)) + new.X=cbind(maps,new.genotype) + return(as.data.frame(new.X)) + } + +} \ No newline at end of file diff --git a/R/GAPIT.Numericalization.R b/R/GAPIT.Numericalization.R index 033bceb..d43fcf4 100644 --- a/R/GAPIT.Numericalization.R +++ b/R/GAPIT.Numericalization.R @@ -65,20 +65,19 @@ if(Major.allele.zero){ #One bit: Make sure that the SNP with the major allele is on the top, and the SNP with the minor allele is on the second position # if(bit==1){ count.temp = cbind(lev,count) - if(length(inter)!=0&lev[1]!=inter)count.temp = count.temp[-which(lev==inter),,drop=FALSE] - # if(nrow(count.temp)==0) return() + if(length(inter)!=0) + { + if(lev[1]!=inter) + { + count.temp = count.temp[-which(lev==inter),,drop=FALSE] + count=count[-which(lev==inter)] + lev=lev[-which(lev==inter)] + len=length(lev) + } + } # if(nrow(count.temp)==0) return() # print("!!!") order.index=order(as.numeric(count.temp[,2]), decreasing = FALSE) count.temp <- count.temp[order.index,] - # if(len==3)order = c(count.temp[,2],3)else order = count.temp[,2] - # } - #Two bit: Make sure that the SNP with the major allele is on the top, and the SNP with the minor allele is on the third position - # if(bit==2){ - # count.temp = cbind(count, seq(1:len)) - # if(len==3) count.temp = count.temp[-2,] - # count.temp <- count.temp[order(count.temp[,1], decreasing = FALSE),] - # if(len==3) order = c(count.temp[1,2],2,count.temp[2,2])else order = count.temp[,2] - # } count = count[order.index] # print(count) @@ -126,7 +125,7 @@ if(len==2) { if(!setequal(character(0),inter)) { - x=ifelse(x=="N",NA,ifelse(x==inter,1,2)) + x=ifelse(x=="N",NA,ifelse(x==inter,1,ifelse(x==lev[1],2,0))) # if(bit==2)x=ifelse(x=="NN",NA,ifelse(x==inter,1,2)) }else{ x=ifelse(x=="N",NA,ifelse(x==lev[1],2,0)) # the most is set 0, the least is set 2