From a3c692b9fe3bf19c1102f91f2676d304c0d22819 Mon Sep 17 00:00:00 2001 From: aviezerl Date: Mon, 18 Dec 2023 15:05:22 +0200 Subject: [PATCH 1/5] Added validity checks for `k` and the number of observations --- DESCRIPTION | 2 +- NEWS.md | 4 +++- R/TGL_kmeans.R | 9 +++++++++ tests/testthat/test-clustering.R | 5 +++++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3981cb1..7f63a2c 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: tglkmeans Title: Efficient Implementation of K-Means++ Algorithm -Version: 0.3.11 +Version: 0.3.12 Authors@R: c(person(given = "Aviezer", family = "Lifshitz", diff --git a/NEWS.md b/NEWS.md index e171a23..e3a9fbb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,6 @@ -# tglkmeans (development version) +# tglkmeans 0.3.12 + +* Added validity checks for `k` and the number of observations. # tgkmeans 0.3.11 diff --git a/R/TGL_kmeans.R b/R/TGL_kmeans.R index 09735c4..df3cc98 100755 --- a/R/TGL_kmeans.R +++ b/R/TGL_kmeans.R @@ -76,6 +76,15 @@ TGL_kmeans_tidy <- function(df, df <- add_id_column(df) } } + + if (k < 1) { + stop("k must be greater than 0") + } + + if (nrow(df) < k) { + stop(paste0("number of observations (", nrow(df), ") must be greater than k (", k, ")")) + } + mat <- t(df[, -1]) # Thorw an error if there are rows that do not contain any value diff --git a/tests/testthat/test-clustering.R b/tests/testthat/test-clustering.R index ef30f85..858b69b 100755 --- a/tests/testthat/test-clustering.R +++ b/tests/testthat/test-clustering.R @@ -240,3 +240,8 @@ test_that("true_clust column is not added when add_true_clust is FALSE", { data <- simulate_data(n = 100, sd = 0.3, nclust = 30, frac_na = NULL, add_true_clust = FALSE) expect_true(!("true_clust" %in% colnames(data))) }) + +test_that("and error is thrown when number of observations is less than number of clusters", { + expect_error(TGL_kmeans(data.frame(id = 1:10, V1 = rnorm(10)), 30, metric = "euclid", verbose = FALSE, seed = 60427)) + expect_error(TGL_kmeans(data.frame(id = numeric(0)))) +}) \ No newline at end of file From 0ac80b0a32c24e7fd75e75b8fda60c6c889e19fd Mon Sep 17 00:00:00 2001 From: aviezerl Date: Mon, 18 Dec 2023 15:05:29 +0200 Subject: [PATCH 2/5] added github actions --- .Rbuildignore | 1 + .github/.gitignore | 1 + .github/workflows/R-CMD-check.yaml | 49 ++++++++++++++++++++ .github/workflows/pkgdown.yaml | 48 ++++++++++++++++++++ .github/workflows/style.yaml | 73 ++++++++++++++++++++++++++++++ README.Rmd | 1 + 6 files changed, 173 insertions(+) create mode 100644 .github/.gitignore create mode 100644 .github/workflows/R-CMD-check.yaml create mode 100644 .github/workflows/pkgdown.yaml create mode 100644 .github/workflows/style.yaml diff --git a/.Rbuildignore b/.Rbuildignore index 0cd4c56..6533a25 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -15,3 +15,4 @@ ^Meta$ ^cran-comments\.md$ ^CRAN-SUBMISSION$ +^\.github$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..a3ac618 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,49 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..ed7650c --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,48 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/style.yaml b/.github/workflows/style.yaml new file mode 100644 index 0000000..005b32d --- /dev/null +++ b/.github/workflows/style.yaml @@ -0,0 +1,73 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + paths: ["**.[rR]", "**.[qrR]md", "**.[rR]markdown", "**.[rR]nw", "**.[rR]profile"] + +name: Style + +jobs: + style: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Checkout repo + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Setup R + uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - name: Install dependencies + uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::styler, any::roxygen2 + needs: styler + + - name: Enable styler cache + run: styler::cache_activate() + shell: Rscript {0} + + - name: Determine cache location + id: styler-location + run: | + cat( + "location=", + styler::cache_info(format = "tabular")$location, + "\n", + file = Sys.getenv("GITHUB_OUTPUT"), + append = TRUE, + sep = "" + ) + shell: Rscript {0} + + - name: Cache styler + uses: actions/cache@v3 + with: + path: ${{ steps.styler-location.outputs.location }} + key: ${{ runner.os }}-styler-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-styler- + ${{ runner.os }}- + + - name: Style + run: styler::style_pkg(indent_by = 4) + shell: Rscript {0} + + - name: Commit and push changes + run: | + if FILES_TO_COMMIT=($(git diff-index --name-only ${{ github.sha }} \ + | egrep --ignore-case '\.(R|[qR]md|Rmarkdown|Rnw|Rprofile)$')) + then + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + git commit ${FILES_TO_COMMIT[*]} -m "Style code (GHA)" + git pull --ff-only + git push origin + else + echo "No changes to commit." + fi diff --git a/README.Rmd b/README.Rmd index c7f0dd0..4f127fa 100755 --- a/README.Rmd +++ b/README.Rmd @@ -13,6 +13,7 @@ knitr::opts_chunk$set( [![CRAN status](https://www.r-pkg.org/badges/version/tglkmeans)](https://CRAN.R-project.org/package=tglkmeans) [![Codecov test coverage](https://codecov.io/gh/tanaylab/tglkmeans/branch/master/graph/badge.svg)](https://app.codecov.io/gh/tanaylab/tglkmeans?branch=master) +[![R-CMD-check](https://github.com/tanaylab/tglkmeans/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tanaylab/tglkmeans/actions/workflows/R-CMD-check.yaml) From f43a0f5a1994fa0837d84eb65da9cbbde41bea08 Mon Sep 17 00:00:00 2001 From: aviezerl Date: Mon, 18 Dec 2023 13:08:52 +0000 Subject: [PATCH 3/5] Style code (GHA) --- tests/testthat/test-clustering.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-clustering.R b/tests/testthat/test-clustering.R index 858b69b..f740046 100755 --- a/tests/testthat/test-clustering.R +++ b/tests/testthat/test-clustering.R @@ -244,4 +244,4 @@ test_that("true_clust column is not added when add_true_clust is FALSE", { test_that("and error is thrown when number of observations is less than number of clusters", { expect_error(TGL_kmeans(data.frame(id = 1:10, V1 = rnorm(10)), 30, metric = "euclid", verbose = FALSE, seed = 60427)) expect_error(TGL_kmeans(data.frame(id = numeric(0)))) -}) \ No newline at end of file +}) From b3a5a71cbebb000170c522493042b74ee6b688a0 Mon Sep 17 00:00:00 2001 From: Aviezer Lifshitz Date: Mon, 18 Dec 2023 21:40:52 +0200 Subject: [PATCH 4/5] Update R-CMD-check.yaml --- .github/workflows/R-CMD-check.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index a3ac618..7259924 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -19,7 +19,6 @@ jobs: matrix: config: - {os: macos-latest, r: 'release'} - - {os: windows-latest, r: 'release'} - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} - {os: ubuntu-latest, r: 'oldrel-1'} From d4cd945a2536369aeb7c6648fd93a7e546cdd21d Mon Sep 17 00:00:00 2001 From: olivroy <52606734+olivroy@users.noreply.github.com> Date: Wed, 20 Dec 2023 14:16:42 -0500 Subject: [PATCH 5/5] Add website link to DESCRIPTION --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 7f63a2c..ee3c6a9 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,6 +15,7 @@ Maintainer: Aviezer Lifshitz Description: Efficient implementation of K-Means++ algorithm. For more information see (1) "kmeans++ the advantages of the k-means++ algorithm" by David Arthur and Sergei Vassilvitskii (2007), Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms, Society for Industrial and Applied Mathematics, Philadelphia, PA, USA, pp. 1027-1035, and (2) "The Effectiveness of Lloyd-Type Methods for the k-Means Problem" by Rafail Ostrovsky, Yuval Rabani, Leonard J. Schulman and Chaitanya Swamy . License: MIT + file LICENSE BugReports: https://github.com/tanaylab/tglkmeans/issues +URL: https://tanaylab.github.io/tglkmeans/, https://github.com/tanaylab/tglkmeans OS_type: unix Depends: R (>= 4.0.0)