-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Fixed compilation error in R 4.2.0 See https://cran.r-project.org/doc/manuals/r-devel/R-exts.html#Fortran-character-strings ans https://www.stats.ox.ac.uk/pub/bdr/BLAS/README.txt * Use roxygen2 and markdown for documentation * Changed maintainer * import head and tail from utils in order to suppress cran note * remove news.md from Rbuildignore * updated cran-comments and README * Fixed BLAS compilation error also on dist.cpp * Added a comment regarding rchk * update CRAN-SUBMISSION
- Loading branch information
Showing
38 changed files
with
1,157 additions
and
568 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,11 @@ | ||
^.*\.Rproj$ | ||
^\.Rproj\.user$ | ||
README\.md | ||
NEWS\.md | ||
^README\.md$ | ||
^README\.Rmd$ | ||
^push_misha_manual$ | ||
^_pkgdown\.yml$ | ||
^\.git\.* | ||
^\.gitignore\.* | ||
^README_cache$ | ||
^cran-comments\.md$ | ||
^CRAN-SUBMISSION$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,11 @@ | ||
.Rproj.user | ||
.Rhistory | ||
.RData | ||
.Ruserdata | ||
src/*.o | ||
src/*.so | ||
src/*.dll | ||
README_cache/* | ||
.Rproj.user | ||
.Rhistory | ||
.RData | ||
.Ruserdata | ||
src/*.o | ||
src/*.so | ||
src/*.dll | ||
README_cache/* | ||
inst/doc | ||
README_cache/ | ||
..Rcheck |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Version: 2.3.17 | ||
Date: 2022-04-14 11:57:00 UTC | ||
SHA: 0d3cde73826253c86ffa98007855f54c44b37278 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,36 @@ | ||
Package: tgstat | ||
Type: Package | ||
Package: tgstat | ||
Title: Amos Tanay's Group High Performance Statistical Utilities | ||
Version: 2.3.16 | ||
Depends: R (>= 3.5.0) | ||
Imports: utils | ||
SystemRequirements: C++11 | ||
OS_type: unix | ||
Date: 2020-09-02 | ||
Author: Michael Hoichman | ||
Maintainer: Michael Hoichman <[email protected]> | ||
Description: A collection of high performance utilities to compute distance, | ||
correlation, auto correlation, clustering and other tasks. | ||
Contains graph clustering algorithm described in "MetaCell: analysis of | ||
single-cell RNA-seq data using K-nn graph partitions" (Yael Baran, | ||
Akhiad Bercovich, Arnau Sebe-Pedros, Yaniv Lubling, Amir Giladi, | ||
Elad Chomsky, Zohar Meir, Michael Hoichman, Aviezer Lifshitz & Amos Tanay, | ||
Version: 2.3.17 | ||
Date: 2022-04-13 | ||
Authors@R: c( | ||
person("Michael", "Hoichman", , "[email protected]", role = "aut"), | ||
person("Aviezer", "Lifshitz", , "[email protected]", role = c("aut", "cre")) | ||
) | ||
Author: Michael Hoichman [aut], Aviezer Lifshitz [aut, cre] | ||
Maintainer: Aviezer Lifshitz <[email protected]> | ||
Description: A collection of high performance utilities to compute | ||
distance, correlation, auto correlation, clustering and other tasks. | ||
Contains graph clustering algorithm described in "MetaCell: analysis | ||
of single-cell RNA-seq data using K-nn graph partitions" (Yael Baran, | ||
Akhiad Bercovich, Arnau Sebe-Pedros, Yaniv Lubling, Amir Giladi, Elad | ||
Chomsky, Zohar Meir, Michael Hoichman, Aviezer Lifshitz & Amos Tanay, | ||
2019 <doi:10.1186/s13059-019-1812-2>). | ||
License: GPL-2 | ||
BugReports: https://github.com/tanaylab/tgstat/issues | ||
Depends: | ||
R (>= 3.5.0) | ||
Imports: | ||
utils | ||
Suggests: | ||
knitr, | ||
rmarkdown | ||
VignetteBuilder: | ||
knitr | ||
Encoding: UTF-8 | ||
LazyLoad: yes | ||
RoxygenNote: 6.1.1 | ||
NeedsCompilation: yes | ||
Packaged: 2020-09-02 18:10:14 UTC; hoichman | ||
Authors@R: person("Misha", "Hoichman", email = "[email protected]", role = c("aut", "cre")) | ||
BugReports: https://github.com/tanaylab/tgstat/issues | ||
OS_type: unix | ||
Roxygen: list(markdown = TRUE) | ||
RoxygenNote: 7.1.2 | ||
SystemRequirements: C++11 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,23 @@ | ||
useDynLib(tgstat, .registration = TRUE) | ||
exportPattern("^[[:alpha:]]+") | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(tgs_cor) | ||
export(tgs_cor_knn) | ||
export(tgs_dist) | ||
export(tgs_finite) | ||
export(tgs_graph) | ||
export(tgs_graph_cover) | ||
export(tgs_graph_cover_resample) | ||
export(tgs_knn) | ||
export(tgs_matrix_tapply) | ||
importFrom(utils,head) | ||
importFrom(utils,tail) | ||
useDynLib(tgstat,tgs_cor_blas) | ||
useDynLib(tgstat,tgs_cor_graph) | ||
useDynLib(tgstat,tgs_cross_cor) | ||
useDynLib(tgstat,tgs_cross_cor_blas) | ||
useDynLib(tgstat,tgs_cross_cor_knn) | ||
useDynLib(tgstat,tgs_dist_blas) | ||
useDynLib(tgstat,tgs_graph2cluster) | ||
useDynLib(tgstat,tgs_graph2cluster_multi_edges) | ||
useDynLib(tgstat,tgs_graph2cluster_multi_full) | ||
useDynLib(tgstat,tgs_graph2cluster_multi_hash) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
#' Calculates correlation or auto-correlation | ||
#' | ||
#' Calculates correlation between two matrices columns or auto-correlation | ||
#' between a matrix columns. | ||
#' | ||
#' 'tgs_cor' is very similar to 'stats::cor'. Unlike the latter it uses | ||
#' all available CPU cores to compute the correlation in a much faster way. The | ||
#' basic implementation of 'pairwise.complete.obs' is also more efficient | ||
#' giving overall great run-time advantage. | ||
#' | ||
#' Unlike 'stats::cor' 'tgs_cor' implements only two modes of treating | ||
#' data containing NA, which are equivalent to 'use="everything"' and | ||
#' 'use="pairwise.complete.obs". Please refer the documentation of this | ||
#' function for more details. | ||
#' | ||
#' 'tgs_cor(x, y, spearman = FALSE)' is equivalent to 'cor(x, y, method = | ||
#' "pearson")' 'tgs_cor(x, y, spearman = TRUE)' is equivalent to 'cor(x, y, method | ||
#' = "spearman")' 'tgs_cor(x, y, pairwise.complete.obs = TRUE, spearman = TRUE)' is | ||
#' equivalent to 'cor(x, y, use = "pairwise.complete.obs", method = | ||
#' "spearman")' 'tgs_cor(x, y, pairwise.complete.obs = TRUE, spearman = FALSE)' is | ||
#' equivalent to 'cor(x, y, use = "pairwise.complete.obs", method = "pearson")' | ||
#' | ||
#' 'tgs_cor' can output its result in "tidy" format: a data frame with three | ||
#' columns named 'col1', 'col2' and 'cor'. Only the correlation values which | ||
#' abs are equal or above the 'threshold' are reported. For auto-correlation | ||
#' (i.e. when 'y=NULL') a pair of columns numbered X and Y is reported only if | ||
#' X < Y. | ||
#' | ||
#' 'tgs_cor_knn' works similarly to 'tgs_cor'. Unlike the latter it returns | ||
#' only the highest 'knn' correlations for each column in 'x'. The result of | ||
#' 'tgs_cor_knn' is always outputed in "tidy" format. | ||
#' | ||
#' One of the reasons to opt 'tgs_cor_knn' over a pair of calls to 'tgs_cor' | ||
#' and 'tgs_knn' is the reduced memory consumption of the former. For | ||
#' auto-correlation case (i.e. 'y=NULL') given that the number of columns NC | ||
#' exceeds the number of rows NR, then 'tgs_cor' memory consumption becomes a | ||
#' factor of NCxNC. In contrast 'tgs_cor_knn' would consume in the similar | ||
#' scenario a factor of max(NCxNR,NCxKNN). Similarly 'tgs_cor(x,y)' would | ||
#' consume memory as a factor of NCXxNCY, wherever 'tgs_cor_knn(x,y,knn)' would | ||
#' reduce that to max((NCX+NCY)xNR,NCXxKNN). | ||
#' | ||
#' @aliases tgs_cor tgs_cor_knn | ||
#' @param x numeric matrix | ||
#' @param y numeric matrix | ||
#' @param pairwise.complete.obs see below | ||
#' @param spearman if 'TRUE' Spearman correlation is computed, otherwise | ||
#' Pearson | ||
#' @param tidy if 'TRUE' data is outputed in tidy format | ||
#' @param threshold absolute threshold above which values are outputed in tidy | ||
#' format | ||
#' @param knn the number of highest correlations returned per column | ||
#' @return 'tgs_cor_knn' or 'tgs_cor' with 'tidy=TRUE' return a data frame, | ||
#' where each row represents correlation between two pairs of columns from 'x' | ||
#' and 'y' (or two columns of 'x' itself if 'y==NULL'). 'tgs_cor' with the | ||
#' 'tidy=FALSE' returns a matrix of correlation values, where \code{val[X,Y]} | ||
#' represents the correlation between columns X and Y of the input matrices (if | ||
#' 'y' is not 'NULL') or the correlation between columns X and Y of 'x' (if 'y' | ||
#' is 'NULL'). | ||
#' @keywords ~correlation | ||
#' @examples | ||
#' \donttest{ | ||
#' # Note: all the available CPU cores might be used | ||
#' | ||
#' set.seed(seed = 0) | ||
#' rows <- 100 | ||
#' cols <- 1000 | ||
#' vals <- sample(1:(rows * cols / 2), rows * cols, replace = TRUE) | ||
#' m <- matrix(vals, nrow = rows, ncol = cols) | ||
#' m[sample(1:(rows * cols), rows * cols / 1000)] <- NA | ||
#' | ||
#' r1 <- tgs_cor(m, spearman = FALSE) | ||
#' r2 <- tgs_cor(m, pairwise.complete.obs = TRUE, spearman = TRUE) | ||
#' r3 <- tgs_cor_knn(m, NULL, 5, spearman = FALSE) | ||
#' } | ||
#' | ||
#' \dontshow{ | ||
#' options(tgs_use.blas = FALSE) | ||
#' options(tgs_max.processes = 1) | ||
#' | ||
#' set.seed(seed = 0) | ||
#' rows <- 100 | ||
#' cols <- 100 | ||
#' vals <- sample(1:(rows * cols / 2), rows * cols, replace = TRUE) | ||
#' m <- matrix(vals, nrow = rows, ncol = cols) | ||
#' m[sample(1:(rows * cols), rows * cols / 1000)] <- NA | ||
#' | ||
#' r1 <- tgs_cor(m, spearman = FALSE) | ||
#' r2 <- tgs_cor(m, pairwise.complete.obs = TRUE, spearman = TRUE) | ||
#' r3 <- tgs_cor_knn(m, NULL, 5, spearman = FALSE) | ||
#' } | ||
#' | ||
#' @export tgs_cor | ||
tgs_cor <- function(x, y = NULL, pairwise.complete.obs = FALSE, spearman = FALSE, tidy = FALSE, threshold = 0) { | ||
if (missing(x)) { | ||
stop("Usage: tgs_cor(x, y = NULL, pairwise.complete.obs = FALSE, spearman = FALSE, tidy = FALSE, threshold = 0)", call. = FALSE) | ||
} | ||
|
||
if (is.null(y)) { | ||
if (!.tgs_use_blas() || pairwise.complete.obs && spearman && !tgs_finite(x)) { | ||
.Call("tgs_cor", x, pairwise.complete.obs, spearman, tidy, threshold, new.env(parent = parent.frame())) | ||
} else { | ||
.Call("tgs_cor_blas", x, pairwise.complete.obs, spearman, tidy, threshold, new.env(parent = parent.frame())) | ||
} | ||
} else { | ||
if (!.tgs_use_blas() || pairwise.complete.obs && spearman && (!tgs_finite(x) || !tgs_finite(y))) { | ||
.Call("tgs_cross_cor", x, y, pairwise.complete.obs, spearman, tidy, threshold, new.env(parent = parent.frame())) | ||
} else { | ||
.Call("tgs_cross_cor_blas", x, y, pairwise.complete.obs, spearman, tidy, threshold, new.env(parent = parent.frame())) | ||
} | ||
} | ||
} | ||
|
||
#' @rdname tgs_cor | ||
#' @export | ||
tgs_cor_knn <- function(x, y, knn, pairwise.complete.obs = FALSE, spearman = FALSE, threshold = 0) { | ||
if (missing(x) || missing(knn)) { | ||
stop("Usage: tgs_cor_knn(x, y, knn, pairwise.complete.obs = FALSE, spearman = FALSE, threshold = 0)", call. = FALSE) | ||
} | ||
|
||
if (is.null(y)) { | ||
.Call("tgs_cor_knn", x, knn, pairwise.complete.obs, spearman, threshold, new.env(parent = parent.frame())) | ||
} else { | ||
.Call("tgs_cross_cor_knn", x, y, knn, pairwise.complete.obs, spearman, threshold, new.env(parent = parent.frame())) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#' Calculates distances between the matrix rows | ||
#' | ||
#' Calculates distances between the matrix rows. | ||
#' | ||
#' This function is very similar to 'package:stats::dist'. Unlike the latter it | ||
#' uses all available CPU cores to compute the distances in a much faster way. | ||
#' | ||
#' Unlike 'package:stats::dist' 'tgs_dist' uses always "euclidean" metrics (see | ||
#' 'method' parameter of 'dist' function). Thus: | ||
#' | ||
#' 'tgs_dist(x)' is equivalent to 'dist(x, method = "euclidean")' | ||
#' | ||
#' 'tgs_dist' can output its result in "tidy" format: a data frame with three | ||
#' columns named 'row1', 'row2' and 'dist'. Only the distances that are less or | ||
#' equal than the 'threshold' are reported. Distance between row number X and Y | ||
#' is reported only if X < Y. 'diag' and 'upper' parameters are ignored when | ||
#' the result is returned in "tidy" format. | ||
#' | ||
#' @param x numeric matrix | ||
#' @param diag see 'dist' documentation | ||
#' @param upper see 'dist' documentation | ||
#' @param tidy if 'TRUE' data is outputed in tidy format | ||
#' @param threshold threshold below which values are outputed in tidy format | ||
#' @return If 'tidy' is 'FALSE' - the output is similar to that of 'dist' | ||
#' function. If 'tidy' is 'TRUE' - 'tgs_dist' returns a data frame, where each | ||
#' row represents distances between two pairs of original rows. | ||
#' @keywords ~distance | ||
#' @examples | ||
#' \donttest{ | ||
#' # Note: all the available CPU cores might be used | ||
#' | ||
#' set.seed(seed = 0) | ||
#' rows <- 100 | ||
#' cols <- 1000 | ||
#' vals <- sample(1:(rows * cols / 2), rows * cols, replace = TRUE) | ||
#' m <- matrix(vals, nrow = rows, ncol = cols) | ||
#' m[sample(1:(rows * cols), rows * cols / 1000)] <- NA | ||
#' r <- tgs_dist(m) | ||
#' } | ||
#' | ||
#' \dontshow{ | ||
#' options(tgs_use.blas = FALSE) | ||
#' options(tgs_max.processes = 1) | ||
#' | ||
#' set.seed(seed = 0) | ||
#' rows <- 100 | ||
#' cols <- 100 | ||
#' vals <- sample(1:(rows * cols / 2), rows * cols, replace = TRUE) | ||
#' m <- matrix(vals, nrow = rows, ncol = cols) | ||
#' m[sample(1:(rows * cols), rows * cols / 1000)] <- NA | ||
#' r <- tgs_dist(m) | ||
#' } | ||
#' | ||
#' @export tgs_dist | ||
tgs_dist <- function(x, diag = FALSE, upper = FALSE, tidy = FALSE, threshold = Inf) { | ||
if (missing(x)) { | ||
stop("Usage: tgs_dist(x, diag = FALSE, upper = FALSE, tidy = FALSE, threshold = Inf)", call. = FALSE) | ||
} | ||
|
||
attrs <- list( | ||
Size = nrow(x), Labels = dimnames(x)[[1L]], Diag = diag, | ||
Upper = upper, method = "euclidian", call = match.call(), class = "dist" | ||
) | ||
|
||
if (.tgs_use_blas()) { | ||
.Call("tgs_dist_blas", x, attrs, tidy, threshold, dimnames(x)[[1L]], new.env(parent = parent.frame())) | ||
} else { | ||
.Call("tgs_dist", x, attrs, tidy, threshold, dimnames(x)[[1L]], new.env(parent = parent.frame())) | ||
} | ||
} |
Oops, something went wrong.