Skip to content

Commit

Permalink
Add get_full_genotype func
Browse files Browse the repository at this point in the history
  • Loading branch information
bschilder committed Mar 8, 2023
1 parent 59b74af commit 8a7748c
Show file tree
Hide file tree
Showing 38 changed files with 659 additions and 434 deletions.
9 changes: 6 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: ThreeWayTest
Type: Package
Title: Three Way Test for Identifying Pleiotropy
Version: 1.0.1
Version: 1.0.2
Authors@R: c(
person(given = "Deliang",
family = "Bu",
Expand All @@ -19,7 +19,8 @@ biocViews:
Imports:
MASS,
methods,
data.table
data.table,
tools
Suggests:
rworkflows,
markdown,
Expand All @@ -33,7 +34,9 @@ Suggests:
ggplot2,
tidygraph,
ggnetwork,
pals
pals,
piggyback,
gh
Remotes:
github::baolinwu/MSKAT
RoxygenNote: 7.2.3
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ export(clustermap)
export(coefficient_estimate)
export(gen_autoregressive)
export(generate_null_distribution_T3)
export(get_full_genotype)
export(metaCCA)
export(networkmap)
export(postprocess_data)
import(data.table)
importFrom(MASS,ginv)
importFrom(MASS,mvrnorm)
importFrom(stats,pchisq)
importFrom(tools,R_user_dir)
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# ThreeWayTest 1.0.2

## New features

* Split functions into separate files (make easier to find them).
* Increase coverage to 100%
* Update README with icicle plot.
* Add `get_full_genotype` func to get full 1KG genotype data.
- Added support function `get_data`

## Bug fixes

* Add missing *tests/testthat.R* script.

# ThreeWayTest 1.0.1

## New features
Expand Down
57 changes: 57 additions & 0 deletions R/MGAS.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#' MGAS
#'
#' This is the method proposed by Van_MGAS
#' @param z_vector Column vectorized data matrix with rows represent
#' phenotype and columns represent genotype.
#' @param est_genetic_cor Estimated phenotype correlation matrix.
#' @param est_pheno_cor Estimated genotype correlation matrix.
#' @returns A numeric value represents the p value of MGAS.
#'
#' @export
#' @importFrom stats pchisq
#' @examples
#' z_vector<-MASS::mvrnorm(1,mu=rep(0,9),Sigma = diag(nrow = 9, ncol = 9))
#' genotype_covariance<-diag(nrow = 3,ncol = 3)
#' phenotype_covariance<-diag(nrow = 3,ncol = 3)
#' ThreeWayTest::MGAS(z_vector=z_vector,
#' est_genetic_cor = genotype_covariance,
#' est_pheno_cor =phenotype_covariance)
MGAS<-function(z_vector,est_genetic_cor,est_pheno_cor){
X <- kronecker(est_genetic_cor,est_pheno_cor,FUN = "*")
beta = c(0.3867,0.0021,-0.1347,-0.0104,0.7276,0.0068)
p_value<-stats::pchisq(z_vector^2,1,lower.tail = FALSE)
tmp=sort(p_value,index.return=TRUE)
pj=tmp$x
iorder=tmp$ix
length = ncol(est_genetic_cor)*ncol(est_pheno_cor)
r2=matrix(0,length,length)
r2=X[iorder,iorder]
ro=diag(length)
for (i1 in 1:length) {
for (i2 in 1:i1) {
if (i1>i2) {
er=r2[i1,i2]
ro[i1,i2]=ro[i2,i1]= beta[1]*er^6+beta[2]*er^5+beta[3]*er^4+beta[4]*
er^3+beta[5]*er^2+beta[6]*er
}}}
alllam=eigen(ro[1:length,1:length])$values
qepj=length
for (i1 in 1:length) {
qepj=qepj-(alllam[i1]>1)*(alllam[i1]-1) }

qej=matrix(c(seq(1,length,1)),length,1,byrow=T)
for (j in 1:length) {
sellam=eigen(ro[1:j,1:j])$values
id=j
for (i1 in 1:id) {
qej[j,1]=qej[j,1]-(sellam[i1]>1)*(sellam[i1]-1)
}
}
pg=matrix(0,length,1)
for (i in 1:length) {
pg[i,1]=(qepj/qej[i,1])*pj[i]
}
pg=pg[iorder]
p_mgas<-min(pg)
return(p_mgas)
}
50 changes: 50 additions & 0 deletions R/TWT.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#' TWT
#'
#' This is the method of TWT.
#' @param z_mat is the matrix of z_scores with row number of rows stands for
#' the number of phenotypes and number of columns
#' stands for the number of variants.
#' @param est_genetic_cor Estimated correlation matrix of genetic variants.
#' @param est_pheno_cor Estimated correlation matrix of phenotypes.
#' @param cutoff_value Set Omega.
#' @param coefficient_matrix Calculated based on function
#' \link[ThreeWayTest]{approximate_distribution_coefficient_estimate_T3}.
#' @returns p_value_final P_value of TWT.
#' @returns p_1 P_value of T_1.
#' @returns p_2 P_value of T_2.
#' @returns p_3 P_value of T_3.
#'
#' @export
#' @examples
#' z_mat<-MASS::mvrnorm(5,mu=rep(0,5), Sigma = diag(nrow = 5, ncol = 5))
#' null_distribution<-ThreeWayTest::generate_null_distribution_T3(m=25,
#' n=1000,cov_mat=diag(nrow = 25,ncol= 25), cutoff_value=c(0.2,0.4,0.6,0.8,1))
#' coefficient_matrix<-
#' ThreeWayTest::approximate_distribution_coefficient_estimate_T3(
#' null_distribution_matrix = null_distribution)
#' ThreeWayTest::TWT(z_mat=z_mat, est_genetic_cor=diag(nrow = 5, ncol = 5),
#' est_pheno_cor=diag(nrow = 5, ncol = 5), cutoff_value=c(0.2,0.4,0.6,0.8,1),
#' coefficient_matrix=coefficient_matrix)
TWT<-function(z_mat,
est_genetic_cor,
est_pheno_cor,
cutoff_value,
coefficient_matrix){
number_of_snp<-ncol(z_mat)
number_of_pheno<-nrow(z_mat)
pvalue_genetic<-apply(z_mat,2,chisq_test, cov_mat=est_pheno_cor)
cauchy_statistic_1<-(1/number_of_snp)*sum(tan((0.5-pvalue_genetic)*pi))
p_pleiotropic<-0.5-(atan(cauchy_statistic_1)/pi)
pvalue_pheno<-apply(z_mat,1,chisq_test, cov_mat=est_genetic_cor)
cauchy_statistic_2<-(1/number_of_pheno)*sum(tan((0.5-pvalue_pheno)*pi))
p_genetic_structure<-0.5-(atan(cauchy_statistic_2)/pi)
z_vec<-as.vector(z_mat)
est_total_cov_mat<-methods::kronecker(est_genetic_cor,est_pheno_cor)
p_3<-T_3(z_vec,est_total_cov_mat,cutoff_value,coefficient_matrix)
final_p_vec<-c(p_genetic_structure,p_pleiotropic,p_3)
cauchy_statistic_final<-(1/3)*sum(tan((0.5-final_p_vec)*pi))
p_value_final<-0.5-(atan(cauchy_statistic_final)/pi)
return(list(p_value_final=p_value_final,
p_1=p_genetic_structure,
p_2=p_pleiotropic, p_3=p_3))
}
36 changes: 36 additions & 0 deletions R/T_3.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#' T_3
#'
#' This is the function calculating T3.
#' @param z_vector Column vectorized data matrix with rows
#' represent phenotype and columns represent genotype.
#' @param cov_mat Estimated covariance matrix of z_vector.
#' @param cutoff_value Vector of truncated value eta.
#' @param coefficient_matrix Matrix of alpha, beta and d.
#' @returns A numeric value represent p value of T3.
#' @importFrom MASS ginv
#' @export
#' @examples
#' z_vector<-MASS::mvrnorm(1,mu=rep(0,10), Sigma = diag(nrow = 10, ncol = 10))
#' null_distribution<-ThreeWayTest::generate_null_distribution_T3(m=10,n=1000,
#' cov_mat=diag(nrow = 10,ncol= 10), cutoff_value=c(0.2,0.4,0.6,0.8,1))
#' coefficient_matrix<-
#' ThreeWayTest::approximate_distribution_coefficient_estimate_T3(
#' null_distribution_matrix = null_distribution)
#' ThreeWayTest::T_3(z_vector=z_vector, cov_mat=diag(nrow = 10, ncol = 10),
#' cutoff_value=c(0.2,0.4,0.6,0.8,1), coefficient_matrix=coefficient_matrix)
T_3<-function(z_vector,
cov_mat,
cutoff_value,
coefficient_matrix){
pvalue<-rep(0,length(cutoff_value))
for (i in 1:(length(cutoff_value)-1)) {
stat_TWT<-T_eta(z_vector,cov_mat,cutoff_value[i])
pvalue[i]<-stats::pgamma(
(stat_TWT-coefficient_matrix[2,i])/(coefficient_matrix[1,i]),
shape = coefficient_matrix[3,i]/2,rate = 1/2,lower.tail = FALSE)
}
pvalue[length(cutoff_value)]<-chisq_test(z_vector,cov_mat)
cauchy_statistic<-(1/length(pvalue))*sum(tan((0.5-pvalue)*pi))
pvalue<-0.5-(atan(cauchy_statistic)/pi)
return(pvalue)
}
27 changes: 27 additions & 0 deletions R/T_eta.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#' T_eta
#'
#' Calculation of T_eta with eta fixed.
#' @param z_vector Column vectorized data matrix with rows represent
#' phenotype and columns represent genotype.
#' @param cov_mat Estimated covariance matrix of z_vector.
#' @param eta Truncated value.
#' @returns A numeric value represent calculated statistic T_eta.
#' @importFrom MASS ginv
#' @export
#' @examples
#' z_vector<-MASS::mvrnorm(1,mu=rep(0,9),Sigma = diag(nrow = 9, ncol = 9))
#' ThreeWayTest::T_eta(z_vector=z_vector,
#' cov_mat=diag(nrow = 9, ncol = 9), eta=0.5)
T_eta<-function(z_vector,
cov_mat,
eta){
index<-ceiling(length(z_vector)*eta)
z_vector_order<-order(abs(z_vector),decreasing = TRUE)
ordered_z_vector<-z_vector[z_vector_order]
ordered_cov_mat<-cov_mat[z_vector_order,z_vector_order]
select_vector<-ordered_z_vector[1:index]
select_cov_mat<-ordered_cov_mat[(1:index),(1:index)]
stat<-t(select_vector)%*%MASS::ginv(select_cov_mat)%*%select_vector
stat<-as.numeric(stat)
return(stat)
}
25 changes: 25 additions & 0 deletions R/approximate_distribution_coefficient_estimate_T3.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#' approximate_distribution_coefficient_estimate_T3
#'
#' This function estimate the coeffcient of T3 with null distribution
#' generated by generate_null_distribution_T3.
#' Please see Setp 2 in our manuscript for more information.
#' @param null_distribution_matrix Generated by generate_null_distribution_T3
#' @returns Coefficient matrix of alpha, beta and d.
#' @export
#' @examples
#' null_distribution<-ThreeWayTest::generate_null_distribution_T3(m=6,n=1000,
#' cov_mat=diag(nrow = 6, ncol = 6), cutoff_value=c(0.2,0.4,0.6,0.8,1))
#' coefficient_matrix <-
#' ThreeWayTest::approximate_distribution_coefficient_estimate_T3(
#' null_distribution_matrix = null_distribution)
approximate_distribution_coefficient_estimate_T3<-
function(null_distribution_matrix){
coefficient_matrix<-matrix(0,nrow = 3,ncol = ncol(null_distribution_matrix))
for (i in 1:ncol(null_distribution_matrix)) {
coefficient<-coefficient_estimate(null_distribution_matrix[,i])
coefficient_matrix[1,i]<-coefficient$alpha
coefficient_matrix[2,i]<-coefficient$beta
coefficient_matrix[3,i]<-coefficient$d
}
return(coefficient_matrix)
}
22 changes: 22 additions & 0 deletions R/chisq_test.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' chisq_test
#'
#' This is traditional chisq_test.
#' @param z_vector Column vectorized data matrix with rows represent
#' phenotype and columns represent genotype.
#' @param cov_mat Estimated covariance matrix of z_vector.
#' @returns A numeric value represent the p value of chi-square test.
#' @importFrom MASS ginv
#' @export
#' @examples
#' z_vector<-MASS::mvrnorm(n = 1,
#' mu=rep(0,9),
#' Sigma = diag(nrow = 9, ncol = 9))
#' ThreeWayTest::chisq_test(z_vector=z_vector,
#' cov_mat=diag(nrow = 9, ncol = 9))
chisq_test<-function(z_vector,
cov_mat){
chi_stat<-t(z_vector)%*%MASS::ginv(cov_mat)%*%z_vector
pvalue<-stats::pchisq(chi_stat,length(z_vector),lower.tail = FALSE)
pvalue<-as.numeric(pvalue)
return(pvalue)
}
25 changes: 25 additions & 0 deletions R/coefficient_estimate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#' coefficient_estimate
#'
#' This function estimate the coeffcient of T3 with null distribution
#' generated by generate_null_distribution_T3.
#' Please see Setp 2 in our manuscript for more information.
#' @param null_distribution Generated by generate_null_distribution_T3
#' @returns List of coefficient of alpha, beta and d.
#' @export
#' @examples
#' null_distribution<-ThreeWayTest::generate_null_distribution_T3(m=6,n=1000,
#' cov_mat=diag(nrow = 6, ncol = 6), cutoff_value=c(0.2,0.4,0.6,0.8,1))
#' ThreeWayTest::coefficient_estimate(null_distribution[,1])
coefficient_estimate<-function(null_distribution){
n<-length(null_distribution)
K1<-(1/n)*sum(null_distribution)
K2<-(1/n)*sum((null_distribution-K1)^2)
K3<-(1/n)*sum((null_distribution-K1)^3)
K4<-(1/n)*sum((null_distribution-K1)^4)-3*(K2^2)
alpha<-K3/(4*K2)
beta<-K1-2*(K2^2)/K3
d<-8*(K2^3)/(K3^2)
return(list(alpha=alpha,
beta=beta,
d=d))
}
6 changes: 3 additions & 3 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@
#' @description
#' 2504 samples in 1000 genomes project for
#' calculating covariance matrix of genotypes.
#' CHR is the chromesome number.
#' CHR is the chromosome number.
#' SNP is the number of SNP.
#' BP is the poisition of the SNP.
#' BP is the position of the SNP.
#' Counted is the allele that counts as 1.
#' ALT is the allele that counts as 0.
#' Due to R package limit of data, this is a part-version with 9 SNPs of gene
#' FADS2 to run getting_start.R in the vigneets.
#' FADS2 to run getting_start.R in the vignettes
#' For full Data, Please go https://github.com/bschilder/ThreeWayTest.
#' \code{
#' load(file.path("data","selected_genotype.Rdata"))
Expand Down
8 changes: 8 additions & 0 deletions R/expM.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
expM <- function(X,
e) {

v = La.svd(X)
res = v$u %*% diag(v$d^e) %*% v$vt

return(res)
}
Loading

0 comments on commit 8a7748c

Please sign in to comment.