Skip to content

Commit

Permalink
Merge pull request #16 from tanaylab/fix@clustering-crashed-when-hclu…
Browse files Browse the repository at this point in the history
…st_intra_clusters-was-TRUE-and-input-was-a-matrix

Fix: clustering crashed when `hclust_intra_clusters` was TRUE and inp…
  • Loading branch information
aviezerl authored May 15, 2024
2 parents dec5a80 + 3020ae0 commit 51a5cb2
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 13 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: tglkmeans
Title: Efficient Implementation of K-Means++ Algorithm
Version: 0.5.4
Version: 0.5.5
Authors@R: c(
person("Aviezer", "Lifshitz", , "[email protected]", role = c("aut", "cre")),
person("Amos", "Tanay", role = "aut"),
Expand All @@ -26,6 +26,7 @@ Depends:
Imports:
cli,
doFuture,
doRNG,
dplyr (>= 0.5.0),
future,
ggplot2 (>= 2.2.0),
Expand Down Expand Up @@ -56,5 +57,5 @@ Config/testthat/parallel: false
Encoding: UTF-8
NeedsCompilation: yes
OS_type: unix
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
SystemRequirements: GNU make
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# tglkmeans 0.5.5

* Fix: clustering crashed when `hclust_intra_clusters` was TRUE and input was a matrix.

# tglkmeans 0.5.4

* Fixed usage of more than 2 cores when testing on CRAN.
Expand Down
1 change: 1 addition & 0 deletions R/TGL_kmeans.R
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ TGL_kmeans_tidy <- function(df,

add_data_to_km_object <- function(df, cluster, ids, id_column_name) {
df %>%
as.data.frame() %>%
# add the ids at id_column_name
mutate(!!id_column_name := as.character(ids)) %>%
left_join(cluster, by = id_column_name) %>%
Expand Down
29 changes: 18 additions & 11 deletions R/intra_clustering.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
hclust_every_cluster <- function(km, df, parallel = TRUE) {
all_hc <- df %>%
plyr::dlply(plyr::.(clust), function(x) {
ids <- x$id
hc <- as.matrix(x[, -1:-2]) %>%
t() %>%
tgs_cor(pairwise.complete.obs = TRUE, spearman = TRUE) %>%
tgs_dist() %>%
hclust(method = "ward.D2")
return(tibble(clust = x$clust[1], id = ids, intra_clust_order = hc$order))
}, .parallel = parallel) %>%
purrr::map_df(~.x)
doFuture::withDoRNG({
all_hc <- df %>%
plyr::dlply(plyr::.(clust), function(x) {
ids <- x$id
dist <- as.matrix(x[, -1:-2]) %>%
t() %>%
tgs_cor(pairwise.complete.obs = TRUE, spearman = TRUE) %>%
tgs_dist()

if (length(dist) == 0 || any(is.na(dist))) {
return(tibble(clust = x$clust[1], id = ids, intra_clust_order = 1:length(ids)))
}

hc <- hclust(dist, method = "ward.D2")
return(tibble(clust = x$clust[1], id = ids, intra_clust_order = hc$order))
}, .parallel = parallel) %>%
purrr::map_df(~.x)
})

res <- df %>%
select(id, clust) %>%
Expand Down

0 comments on commit 51a5cb2

Please sign in to comment.