Skip to content

Commit

Permalink
test new layout
Browse files Browse the repository at this point in the history
  • Loading branch information
yiqunchen committed Nov 25, 2023
1 parent a46569f commit 2d0c233
Show file tree
Hide file tree
Showing 15 changed files with 142 additions and 26 deletions.
3 changes: 3 additions & 0 deletions .Rproj.user/shared/notebooks/paths
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/Users/yiquntchen/Desktop/Desktop - Yiqun’s MacBook Pro/fall_2022/CADET/DESCRIPTION="0C15EB70"
/Users/yiquntchen/Desktop/Desktop - Yiqun’s MacBook Pro/fall_2022/CADET/NAMESPACE="3EEB5E98"
/Users/yiquntchen/Desktop/Desktop - Yiqun’s MacBook Pro/fall_2022/CADET/R/helper_cutree.R="757CC7BD"
/Users/yiquntchen/Desktop/Desktop - Yiqun’s MacBook Pro/fall_2022/CADET/R/trunc_dist.R="60B4C270"
Expand All @@ -11,6 +12,8 @@
/Users/yiquntchen/Desktop/Desktop - Yiqun’s MacBook Pro/fall_2022/CADET/vignettes/Tutorials.Rmd="009CB925"
/Users/yiquntchen/Desktop/Desktop - Yiqun’s MacBook Pro/fall_2022/CADET/vignettes/real_data_example.Rmd="106C91B5"
/Users/yiquntchen/Desktop/Desktop - Yiqun’s MacBook Pro/fall_2022/CADET/vignettes/technical_details.Rmd="7B3007C2"
/Users/yiquntchen/Desktop/Desktop - Yiqun’s MacBook Pro/fall_2022/gen_paper_figure_CADET/generate_figure_for_cadet.R="E22F1510"
/Users/yiquntchen/Desktop/fall_2022/get_intuition_about_phi.R="ABD04DA0"
/Users/yiquntchen/Desktop/fall_2023/hex_sticker_genePT.R="A90FC0E2"
/Users/yiquntchen/Downloads/Selective inference for the mean of a single feature in two estimated clusters/temp_code/test_1f_type_1_exchangeable.R="531D99BC"
/Users/yiquntchen/Library/CloudStorage/Box-Box/Derm Referrals Prediction Project/Materials_for_transition/Analysis Codes/Analysis_final_set_01052023/SL_analysis_v8.R="38018C8D"
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
.Rproj.user/
.Rproj.user/*
# produced vignettes
vignettes/*.html
#vignettes/*.html

# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
.httr-oauth
Expand Down
2 changes: 1 addition & 1 deletion R/kmeans_inference_1f.R
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ kmeans_estimation <- function(X, k, iter.max = 10, seed = 1234){
#' sig=sig,
#' covMat=NULL, seed=2023,
#' iter.max = 30)
#' summary(cl_1_2_feat_1)
#' cl_1_2_feat_1
#' @references
#' Lloyd, S. P. (1957, 1982). Least squares quantization in PCM. Technical Note, Bell Laboratories.
#' Published in 1982 in IEEE Transactions on Information Theory, 28, 128–137.
Expand Down
18 changes: 15 additions & 3 deletions R/trunc_inf.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#' @examples
#' # Simulates a 100 x 2 data set with three clusters
#' set.seed(123)
#' library(CADET)
#' dat <- rbind(c(-1, 0), c(0, sqrt(3)), c(1, 0))[rep(1:3, length=100), ] +
#' matrix(0.2*rnorm(200), 100, 2)
#'
Expand All @@ -61,7 +62,7 @@
#' @seealso \code{\link{rect_hier_clusters}} for visualizing clusters \code{k1} and \code{k2} in the dendrogram;
#'
#' @references Yiqun T. Chen and Lucy L. Gao "Testing for a difference in means of a single feature after clustering". arXiv preprint (2023).
test_hier_clusters_exact_1f <- function(X, link, hcl, K, k1, k2, feat, indpt=TRUE, sig=NULL, covMat=NULL) {
test_hier_clusters_exact_1f <- structure(function(X, link, hcl, K, k1, k2, feat, indpt=TRUE, sig=NULL, covMat=NULL) {
if(!is.matrix(X)) stop("X should be a matrix")

n <- nrow(X)
Expand Down Expand Up @@ -125,5 +126,16 @@ test_hier_clusters_exact_1f <- function(X, link, hcl, K, k1, k2, feat, indpt=TRU
TNSurv(-stat, 0, sqrt(scale_factor), intervals::Intervals(as.matrix(-S)[, 2:1]))
}

return(list(stat=abs(stat), pval=pval, trunc=S))
}
p_naive <- naive.two.sided.pval(z = stat,
mean = 0,
sd = sqrt(scale_factor))
result_list <- list("stat"=stat,
"cluster_1" = k1,
"cluster_2" = k2,
"pval"=pval,
"p_naive"=p_naive,
"trunc"=S,
"linkage"=link)
class(result_list) <- "hier_inference"
return(result_list)
})
89 changes: 89 additions & 0 deletions R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,92 @@ naive.two.sided.pval <- function(z, mean, sd){
return(two_sided_p_val)
}


#' Summarize the inferential result for k-means clustering
#' @param object output from running kmeans_inference
#' @param ... to be passed to methods
#' @return A data frame with summarized results
#' @export
#' @examples
#' library(CADET)
#' library(ggplot2)
#' set.seed(2022)
#' n <- 150
#' true_clusters <- c(rep(1, 50), rep(2, 50), rep(3, 50))
#' delta <- 10
#' q <- 2
#' mu <- rbind(c(delta/2,rep(0,q-1)),
#' c(rep(0,q-1), sqrt(3)*delta/2),
#' c(-delta/2,rep(0,q-1)) )
#' sig <- 1
#' # Generate a matrix normal sample
#' X <- matrix(rnorm(n*q, sd=sig), n, q) + mu[true_clusters, ]
#' # Visualize the data
#' ggplot(data.frame(X), aes(x=X1, y=X2)) +
#' geom_point(cex=2) + xlab("Feature 1") + ylab("Feature 2") +
#' theme_classic(base_size=18) + theme(legend.position="none") +
#' scale_colour_manual(values=c("dodgerblue3", "rosybrown", "orange")) +
#' theme(legend.title = element_blank(),
#' plot.title = element_text(hjust = 0.5))
#' k <- 3
#' # Run k-means clustering with K=3
#' estimated_clusters <- kmeans_estimation(X, k,iter.max = 20,seed = 2023)$final_cluster
#' table(true_clusters,estimated_clusters)
#' # Visualize the clusters
#' ggplot(data.frame(X), aes(x=X1, y=X2, col=as.factor(estimated_clusters))) +
#' geom_point(cex=2) + xlab("Feature 1") + ylab("Feature 2") +
#' theme_classic(base_size=18) + theme(legend.position="none") +
#' scale_colour_manual(values=c("dodgerblue3", "rosybrown", "orange")) +
#' theme(legend.title = element_blank(), plot.title = element_text(hjust = 0.5))
#' # Let's test the difference between first feature across estimated clusters 1 and 2:
#' cl_1_2_feat_1 <- kmeans_inference_1f(X, k=3, 1, 2,
#' feat=1, iso=TRUE,
#' sig=sig,
#' covMat=NULL, seed=2023,
#' iter.max = 30)
#' summary(cl_1_2_feat_1)
summary.kmeans_inference <- function(object, ...){
result <- data.frame(cluster_1 = object$cluster_1,
cluster_2 = object$cluster_2,
test_stat = object$test_stat,
p_selective = object$pval,
p_naive = object$p_naive)
return(result)
}




#' Summarize the inferential result for hierarhical clustering
#' @param object output from running `test_hier_clusters_exact_1f`
#' @param ... to be passed to methods
#' @return A data frame with summarized results
#' @export
#' @examples
#' # Simulates a 100 x 2 data set with three clusters
#' set.seed(123)
#' library(CADET)
#' dat <- rbind(c(-1, 0), c(0, sqrt(3)), c(1, 0))[rep(1:3, length=100), ] +
#' matrix(0.2*rnorm(200), 100, 2)
#'
#' # Average linkage hierarchical clustering
#' hcl <- hclust(dist(dat, method="euclidean")^2, method="average")
#'
#' # plot dendrograms with the 1st and 2nd clusters (cut at the third split)
#' # displayed in blue and orange
#' plot(hcl)
#' rect_hier_clusters(hcl, k=3, which=1:2, border=c("blue", "orange"))
#'
#' # tests for a difference in means between the blue and orange clusters
#' # with respect to the 1st feature
#' cl_1_2_feat_1 <- test_hier_clusters_exact_1f(X=dat, link="average", hcl=hcl, K=3, k1=1, k2=2, feat=1)
#' summary(cl_1_2_feat_1)
summary.hier_inference <- function(object, ...){
result <- data.frame(cluster_1 = object$cluster_1,
cluster_2 = object$cluster_2,
test_stat = object$stat,
p_selective = object$pval,
p_naive = object$p_naive)
return(result)
}

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# CADET (Clustering And Differential Expression Testing) <img src="./cadet_sticker.png" align="right" width="150px"/>
# CADET (Clustering And Differential Expression Testing) <img src="./man/figures/cadet_sticker.png" align="right" width="150px"/>

### What is CADET?

Expand Down
Binary file added docs/articles/figures/fig_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
22 changes: 11 additions & 11 deletions docs/articles/technical_details.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/index.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ pkgdown_sha: ~
articles:
Tutorials: Tutorials.html
technical_details: technical_details.html
last_built: 2023-11-25T23:12Z
last_built: 2023-11-25T23:47Z
urls:
reference: https://yiqunchen.github.io/CADET/reference
article: https://yiqunchen.github.io/CADET/articles
Expand Down
16 changes: 15 additions & 1 deletion docs/reference/test_hier_clusters_exact_1f.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added man/figures/cadet_sticker.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion vignettes/Tutorials.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ title: "Software tutorials"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Software tutorials}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEngine{knitr::knitr}
%\VignetteEncoding{UTF-8}
---

Expand Down
Binary file added vignettes/figures/fig_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 3 additions & 5 deletions vignettes/technical_details.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ output:
rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Technical details}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEngine{knitr::knitr}
%\VignetteEncoding{UTF-8}
---

Expand All @@ -18,10 +18,8 @@ knitr::opts_chunk$set(

<center>

![](../man/figures/figure_1_a.png){width=30%}
![](../man/figures/figure_1_b.png){width=30%}
![](../man/figures/figure_1_c.png){width=30%}
<figcaption>Figure 1: *Left*: One simulated dataset generated according to $\mathcal{MN}_{100\times 2}(\textbf{0}_{100\times 2}, \textbf{I}_{100}, \sigma^2 \textbf{I}_{2})$. We apply $k$-means clustering to obtain three clusters. The cluster centroids are displayed as triangles. *Center*: Quantile-quantile plot of the Wald p-values applied to 2,000 simulated datasets from $\mathcal{MN}_{100\times 2}(\textbf{0}_{100\times 2}, \textbf{I}_{100}, \sigma^2 \textbf{I}_{2})$. *Right*: Quantile-quantile plot of our proposed p-values applied to the same simulated datasets as in center.</figcaption>
![](./figures/fig_1.png){width=90%}
<figcaption>Figure 1: We simulated one dataset according to $\mathcal{MN}_{100\times 10}(\textbf{\mu}, \textbf{I}_{100}, \Sigma)$, where $\mu_i = (1,0_9)^T$ for $i=1,\ldots, 50$ and $\mu_i = (0_9,1)^T$ for $i=51,\ldots, 100$, and $\Sigma_{ij} = 1\{i=j\}+0.4\cdot 1\{i\neq j\}$. *(a)*: Empirical distribution of feature 2 based on the simulated data set. In this case, all observations have the same mean for feature 2. *(b)*: We apply k-means clustering to obtain two clusters and plot the empirical distribution of feature 2 stratified by the clusters. *(c)*: Quantile-quantile plot of naive z-test (black) our proposed p-values (orange) applied to the simulated data sets for testing the null hypotheses for a difference in means for features 2--8. </figcaption>
</center>


Expand Down

0 comments on commit 2d0c233

Please sign in to comment.