Skip to content

Commit

Permalink
DeBUG taxa index and GTindex
Browse files Browse the repository at this point in the history
  • Loading branch information
jiabowang committed Apr 17, 2024
1 parent cd01e77 commit 46c8911
Show file tree
Hide file tree
Showing 7 changed files with 262 additions and 249 deletions.
4 changes: 2 additions & 2 deletions R/GAPIT.Bread.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ if(method=="MLM"){
GM=GM,
model="MLM",
QC=FALSE,
CV.Extragenetic=CV.Extragenetic,
CV.Extragenetic=CV.Extragenetic,
# GTindex=GTindex,
file.output=file.output
)
Expand Down Expand Up @@ -110,7 +110,7 @@ if(method=="FaST" | method=="SUPER"| method=="DC")
#GPS=myFaSTREML$GPS
}

mySUPERFaST=GAPIT.SUPER.FastMLM(ys=matrix(Y[,-1],nrow(Y),1),X0=as.matrix(cbind(matrix(1,nrow(CV),1),CV[,-1])),snp.pool=as.matrix(GK[-1]), xs=as.matrix(GD[GTindex,-1]),vg=vg,delta=delta,LD=LD,method=method)
mySUPERFaST=GAPIT.SUPER.FastMLM(ys=matrix(Y[,-1],nrow(Y),1),X0=as.matrix(cbind(matrix(1,nrow(CV),1),CV[,-1])),snp.pool=as.matrix(GK[-1]), xs=as.matrix(GD[,-1]),vg=vg,delta=delta,LD=LD,method=method)

GWAS=cbind(GM,mySUPERFaST$ps,mySUPERFaST$stats,mySUPERFaST$dfs,mySUPERFaST$effect)
}#End of if(method=="FaST" | method=="SUPER")
Expand Down
2 changes: 1 addition & 1 deletion R/GAPIT.EMMAxP3D.R
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@
{
xs=NULL
}else{
xs=myFRG$GD[GTindex,]
xs=myFRG$GD
}

if(!is.null(myFRG$GI))
Expand Down
12 changes: 11 additions & 1 deletion R/GAPIT.IC.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ print("GAPIT.IC in process...")
CV=DP$CV
GD=DP$GD
noCV=FALSE
# print(dim(CV))
if(is.null(CV))
{
noCV=TRUE
Expand All @@ -38,16 +39,21 @@ print("GAPIT.IC in process...")
taxa_GD=as.character(GD[,1])
taxa_KI=as.character(DP$KI[,1])
taxa_CV=as.character(CV[,1])
# print(Y)
# print(tail(taxa_Y))
# print(tail(taxa_GD))
# print(tail(taxa_CV))

taxa_comall=intersect(intersect(intersect(taxa_KI,taxa_GD),taxa_Y),taxa_CV)
taxa_g_cv=intersect(intersect(taxa_KI,taxa_GD),taxa_CV)
# print(length(taxa_comall))
# print(length(taxa_g_cv))
comCV=CV[taxa_CV%in%taxa_comall,]
comCV <- comCV[match(taxa_comall,as.character(comCV[,1])),]
comY=Y[taxa_Y%in%taxa_comall,]
comY <- comY[match(taxa_comall,as.character(comY[,1])),]
comGD=GD[taxa_GD%in%taxa_comall,]
comGD <- comGD[match(taxa_comall,as.character(comGD[,1])),]# comGD=NULL

}else{
# print("@@@@")
taxa_GD=as.character(GD[,1])
Expand Down Expand Up @@ -94,6 +100,10 @@ print("GAPIT.IC in process...")
}#end of K
}# end of GD
# print(DP$KI[1:5,1:5])
# print(tail(comY[,1]))
# print(tail(comGD[,1]))
# print(tail(comCV[,1]))
# print(tail(GD[,1]))
GT=as.matrix(as.character(taxa_comall))
print(paste("There are ",length(GT)," common individuals in genotype , phenotype and CV files.",sep=""))
if(nrow(comCV)!=length(GT))stop ("GAPIT says: The number of individuals in CV does not match to the number of individuals in genotype files.")
Expand Down
164 changes: 85 additions & 79 deletions R/GAPIT.Main.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@
function(Y,
G=NULL,
GD=NULL,
allGD=NULL,
allCV=NULL,
GM=NULL,
KI=NULL,
Z=NULL,
Expand Down Expand Up @@ -243,10 +245,12 @@ function(Y,
#print(model)
SUPER_GS_GAPIT = GAPIT.SUPER.GS(Y=Y,
GD=GD,
allGD=allGD,
GM=GM,
KI=KI,
Z=Z,
CV=CV,
allCV=allCV,
GK=GK,
kinship.algorithm=kinship.algorithm,
bin.from=bin.from,
Expand All @@ -271,7 +275,7 @@ function(Y,
sangwich.bottom=sangwich.bottom,
QC=QC,
QTN.gs=QTN.gs,
GTindex=GTindex,
# GTindex=GTindex,
LD=LD,
file.output=FALSE,
GAPIT3.output=GAPIT3.output,
Expand Down Expand Up @@ -347,27 +351,27 @@ function(Y,
}

if(is.null(GT)) {
GT=as.character(GD[,1])
GT=as.character(CV[,1])
}
#print("@@@@@@@@")
#print(GD)
#merge CV with PC: Put CV infront of PC
# if(PCA.total>0&!is.null(CV))CV=GAPIT.CVMergePC(CV,PC)
# if(PCA.total>0&is.null(CV))CV=PC
#for GS merge CV with GD name
if (is.null(CV)){
my_allCV=CV
}else{
taxa_GD=rownames(GD)
my_allCV=CV[order(CV[,1]),]
my_allCV=my_allCV[my_allCV[,1]%in%taxa_GD,]
# if (is.null(CV)){
# my_allCV=CV
# }else{
# taxa_GD=rownames(GD)
my_allCV=allCV
# my_allCV=my_allCV[my_allCV[,1]%in%taxa_GD,]
#print(dim(my_allCV))
}
# }

#Handler of CV.Extragenetic
if(is.null(CV) & !is.null(CV.Extragenetic)){
stop ("GAPIT says: CV.Extragenetic is more than avaiables.")
}
# if(is.null(CV) & !is.null(CV.Extragenetic)){
# stop ("GAPIT says: CV.Extragenetic is more than avaiables.")
# }

if(!is.null(CV)& !is.null(CV.Extragenetic)){
if(CV.Extragenetic>(ncol(CV)-1)){
Expand All @@ -377,8 +381,8 @@ function(Y,

#Create Z as identity matrix from Y if it is not provided
if(kinship.algorithm!="None" & kinship.algorithm!="SUPER" & is.null(Z)){
taxa=as.character(Y[,1]) #this part will make GS without CV not present all prediction
Z=as.data.frame(diag(1,nrow(Y)))
taxa=as.character(CV[,1]) #this part will make GS without CV not present all prediction
Z=as.data.frame(diag(1,nrow(CV)))
#taxa=as.character(KI[,1])
#Z=as.data.frame(diag(1,nrow(KI)))
Z=rbind(taxa,Z)
Expand All @@ -394,16 +398,16 @@ function(Y,
}

#Create CV with all 1's if it is not provided
noCV=FALSE
if(is.null(CV)){
noCV=TRUE
CV=Y[,1:2]
CV[,2]=1
colnames(CV)=c("taxa","overall")
}
# noCV=FALSE
# if(is.null(CV)){
# noCV=TRUE
# CV=Y[,1:2]
# CV[,2]=1
# colnames(CV)=c("taxa","overall")
# }

#Remove duplicat and integragation of data
print("QC is in process...")
# print("QC is in process...")

CVI <- CV

Expand All @@ -422,13 +426,13 @@ function(Y,
# }

# print(length(GK))
GTindex=match(as.character(KI[,1]),as.character(Y[,1]))
GTindex=GTindex[!is.na(GTindex)]
# GTindex=match(as.character(KI[,1]),as.character(Y[,1]))
# GTindex=GTindex[!is.na(GTindex)]
# KI=KI[GTindex,GTindex+1]
my_taxa=as.character(KI[,1])
CV=CV[as.character(CV[,1])%in%as.character(Y[,1]),]
# my_taxa=as.character(KI[,1])
# CV=CV[as.character(CV[,1])%in%as.character(Y[,1]),]
#Output phenotype
colnames(Y)=c("Taxa",name.of.trait)
# colnames(Y)=c("Taxa",name.of.trait)
# if(file.output){
# try(utils::write.table(Y, paste("GAPIT.", name.of.trait,".phenotype.csv" ,sep = ""),
# quote = FALSE, sep = ",",
Expand All @@ -439,45 +443,45 @@ function(Y,
# Default kinship.algorithm = "VanRaden".
# This if() may be seldom used.
#TDP
if( kinship.algorithm =="None" ){
if(min(CV[,2])==max(CV[,2])) CV=NULL
# if( kinship.algorithm =="None" ){
# if(min(CV[,2])==max(CV[,2])) CV=NULL

# GAPIT.TDP() does not appear to exist.
# theTDP = GAPIT.TDP(Y=Y,
# CV=CV,
# SNP = as.data.frame(cbind(GT[GTindex],as.matrix(as.data.frame(GD[GTindex,])))),
# QTN=QTN,
# Round=QTN.round,
# QTN.limit=QTN.limit,
# QTN.update=QTN.update,
# Method=QTN.method
# )
#print(dim(GM))
#print(length(theTDP$p))

#theGWAS=cbind(GM,theTDP$p,NA,NA,NA)
theGWAS=cbind(GM,NA,NA,NA,NA)
# # GAPIT.TDP() does not appear to exist.
# # theTDP = GAPIT.TDP(Y=Y,
# # CV=CV,
# # SNP = as.data.frame(cbind(GT[GTindex],as.matrix(as.data.frame(GD[GTindex,])))),
# # QTN=QTN,
# # Round=QTN.round,
# # QTN.limit=QTN.limit,
# # QTN.update=QTN.update,
# # Method=QTN.method
# # )
# #print(dim(GM))
# #print(length(theTDP$p))

# #theGWAS=cbind(GM,theTDP$p,NA,NA,NA)
# theGWAS=cbind(GM,NA,NA,NA,NA)

return (list(Compression = NULL,
kinship.optimum = NULL,
kinship = NULL,
PC = NULL,
GWAS = theGWAS,
GPS = NULL,
Pred = NULL,
REMLs = NULL,
#QTN = theTDP$QTN,
QTN = NULL,
Timmer = Timmer,
Memory = Memory,
h2 = NULL))
}
# return (list(Compression = NULL,
# kinship.optimum = NULL,
# kinship = NULL,
# PC = NULL,
# GWAS = theGWAS,
# GPS = NULL,
# Pred = NULL,
# REMLs = NULL,
# #QTN = theTDP$QTN,
# QTN = NULL,
# Timmer = Timmer,
# Memory = Memory,
# h2 = NULL))
# }

#rm(qc)
gc()
# gc()

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="QC")
Memory=GAPIT.Memory(Memory=Memory,Infor="QC")
# Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="QC")
# Memory=GAPIT.Memory(Memory=Memory,Infor="QC")

#Get indicator of sangwich top and bottom
byPass.top=FALSE
Expand Down Expand Up @@ -509,29 +513,32 @@ function(Y,
#print(GD[1:5,1:5])

#Create GK if not provided
if(is.null(GK)){
if(is.null(GK))
{
# set.seed(1)
nY=floor(nrow(Y)*.9)
nG=ncol(GD)
if(nG>nY){
if(nG>nY)
{
snpsam=sample(1:nG,nY)
}else{
snpsam=1:nG
}
GK=GD[GTindex,snpsam]
GK=GD
# print(dim(GK))
# print(GK[270:279,1:5])
SNPVar=apply(as.matrix(GK), 2, stats::var)
# print(SNPVar)
GK=GK[,SNPVar>0]
GK=cbind(as.data.frame(GT[GTindex]),as.data.frame(GK)) #add taxa
GK=cbind(as.data.frame(GT),as.data.frame(GK)) #add taxa
}

#myGD=cbind(as.data.frame(GT),as.data.frame(GD))
# file.output.temp=file.output
# file.output=FALSE
#print(sangwich.top)[GTindex,c(1,GTindex+1)]
GP=GAPIT.Bread(Y=Y,CV=CV,Z=Z,KI=KI,GK=GK,GD=cbind(as.data.frame(GT[GTindex]),as.data.frame(GD[GTindex,])),GM=GI,method=sangwich.top,GTindex=GTindex,LD=LD,file.output=FALSE)$GWAS
print(tail(Y[,1]))
print(tail(CV[,1]))
print(tail(GT))
# print(dim(cbind(as.data.frame(GT[GTindex]),as.data.frame(GD[GTindex,]))))
GP=GAPIT.Bread(Y=Y,CV=CV,Z=Z,KI=KI,GK=GK,GD=cbind(as.data.frame(GT),as.data.frame(GD)),GM=GI,method=sangwich.top,LD=LD,file.output=FALSE)$GWAS
# file.output=file.output.temp


Expand Down Expand Up @@ -695,7 +702,7 @@ function(Y,
# print(optOnly)
# print(head(CVI))
p3d <- GAPIT.EMMAxP3D(ys=ys,
xs=as.matrix(as.data.frame(GD[GTindex,colInclude])),
xs=as.matrix(as.data.frame(GD[,colInclude])),
K = as.matrix(bk$KW),
Z=matrix(as.numeric(as.matrix(zc$Z[,-1])),nrow=zrow,ncol=zcol),
X0=X0,
Expand All @@ -721,7 +728,7 @@ function(Y,
file.GM=file.GM,
file.Ext.GD=file.Ext.GD,
file.Ext.GM=file.Ext.GM,
GTindex=GTindex,
# GTindex=GTindex,
genoFormat=genoFormat,
optOnly=optOnly,
SNP.effect=SNP.effect,
Expand Down Expand Up @@ -828,16 +835,15 @@ function(Y,
Memory=GAPIT.Memory(Memory=Memory,Infor="Genotype for burger")

print(paste("bin---",bin,"---inc---",inc,sep=""))
GK=GD[GTindex,myGenotype$SNP.QTN]
GK=GD[,myGenotype$SNP.QTN]
SUPER_GD=GD[,myGenotype$SNP.QTN]
SNPVar=apply(as.matrix(GK), 2, stats::var)

GK=GK[,SNPVar>0]
SUPER_GD=SUPER_GD[,SNPVar>0]
GK=cbind(as.data.frame(GT[GTindex]),as.data.frame(GK)) #add taxa
GK=cbind(as.data.frame(GT),as.data.frame(GK)) #add taxa
# print(length(GT))
# print(dim(SUPER_GD))
SUPER_GD=cbind(as.data.frame(GT[GTindex]),as.data.frame(SUPER_GD)) #add taxa
SUPER_GD=cbind(as.data.frame(GT),as.data.frame(SUPER_GD)) #add taxa
# print(dim(GK))
#GP=NULL
}# end of if(is.null(GK))
Expand Down Expand Up @@ -946,7 +952,7 @@ if(Model.selection == TRUE){
p3d <- GAPIT.EMMAxP3D(ys=ys,xs=as.matrix(as.data.frame(GD[,1])),K = as.matrix(bk$KW) ,Z=Z1,X0=X0.test,CVI=CVI,CV.Extragenetic=CV.Extragenetic,GI=GI,SNP.P3D=SNP.P3D,Timmer=Timmer,Memory=Memory,fullGD=fullGD,
SNP.permutation=SNP.permutation, GP=GP,
file.path=file.path,file.from=file.from,file.to=file.to,file.total=file.total, file.fragment = file.fragment, byFile=byFile, file.G=file.G,file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM,
GTindex=GTindex,genoFormat=genoFormat,optOnly=TRUE,SNP.effect=SNP.effect,SNP.impute=SNP.impute,name.of.trait=name.of.trait, Create.indicator = Create.indicator, Major.allele.zero = Major.allele.zero)
genoFormat=genoFormat,optOnly=TRUE,SNP.effect=SNP.effect,SNP.impute=SNP.impute,name.of.trait=name.of.trait, Create.indicator = Create.indicator, Major.allele.zero = Major.allele.zero)



Expand Down Expand Up @@ -1122,10 +1128,10 @@ print("-------------- Sandwich bottom with raw burger------------------------")
print("--------------EMMAxP3D with the optimum setting-----------------------")
#print(dim(ys))
#print(dim(as.matrix(as.data.frame(GD[GTindex,colInclude]))))
p3d <- GAPIT.EMMAxP3D(ys=ys,xs=as.matrix(as.data.frame(GD[GTindex,colInclude])) ,K = as.matrix(bk$KW) ,Z=Z1,X0=as.matrix(X0),CVI=CVI, CV.Extragenetic=CV.Extragenetic,GI=GI,SNP.P3D=SNP.P3D,Timmer=Timmer,Memory=Memory,fullGD=fullGD,
p3d <- GAPIT.EMMAxP3D(ys=ys,xs=as.matrix(as.data.frame(GD[,colInclude])) ,K = as.matrix(bk$KW) ,Z=Z1,X0=as.matrix(X0),CVI=CVI, CV.Extragenetic=CV.Extragenetic,GI=GI,SNP.P3D=SNP.P3D,Timmer=Timmer,Memory=Memory,fullGD=fullGD,
SNP.permutation=SNP.permutation, GP=GP,
file.path=file.path,file.from=file.from,file.to=file.to,file.total=file.total, file.fragment = file.fragment, byFile=byFile, file.G=file.G,file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM,
GTindex=GTindex,genoFormat=genoFormat,optOnly=optOnly,SNP.effect=SNP.effect,SNP.impute=SNP.impute,name.of.trait=name.of.trait, Create.indicator = Create.indicator, Major.allele.zero = Major.allele.zero)
genoFormat=genoFormat,optOnly=optOnly,SNP.effect=SNP.effect,SNP.impute=SNP.impute,name.of.trait=name.of.trait, Create.indicator = Create.indicator, Major.allele.zero = Major.allele.zero)

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GWAS")
Memory=GAPIT.Memory(Memory=Memory,Infor="GWAS")
Expand All @@ -1144,7 +1150,7 @@ print("---------------Sandwich bottom: reload bins ---------------------------")
#SUPER: Final screening
GK=GK.save
# print(GK)
myBread=GAPIT.Bread(Y=Y,CV=CV,Z=Z,GK=GK,GD=cbind(as.data.frame(GT[GTindex]),as.data.frame(GD)),GM=GI,method=sangwich.bottom,GTindex=GTindex,LD=LD,file.output=FALSE)
myBread=GAPIT.Bread(Y=Y,CV=CV,Z=Z,GK=GK,GD=cbind(as.data.frame(GT),as.data.frame(GD)),GM=GI,method=sangwich.bottom,LD=LD,file.output=FALSE)

print("SUPER saving results...")

Expand Down
1 change: 1 addition & 0 deletions R/GAPIT.Power.compare.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ colnames(Y)=c("Taxa","Simu")
GD=GD,
GM=GM,
PCA.total=PCA.total,
cutOff=0.01,
CV=CV,
file.output=FALSE,
model=all.method[j],
Expand Down
Loading

0 comments on commit 46c8911

Please sign in to comment.