Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1.1.2 #67

Merged
merged 3 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Encoding: UTF-8
Depends:
R (>= 4.0.0)
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
Suggests:
testthat (>= 3.0.0),
Matrix,
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export(select_assay)
export(select_clusters)
export(select_projections)
export(setup)
export(validate_barcodes)
export(validate_clusters)
export(validate_count_mat)
export(validate_projections)
Expand All @@ -24,4 +25,3 @@ importFrom(utils,download.file)
importFrom(utils,packageVersion)
importFrom(utils,read.csv)
importFrom(utils,sessionInfo)
importFrom(utils,strcapture)
8 changes: 3 additions & 5 deletions R/hdf5.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,15 @@ create_hdf5 <- function(
#' @noRd
write_mat <- function(f, count_mat, feature_ids) {
features <- rownames(count_mat)
barcodes_unmodified <- colnames(count_mat)
barcodes_formatted <- sanitize_barcodes(barcodes_unmodified)
barcodes <- colnames(count_mat)
feature_count <- length(features)
barcode_count <- length(barcodes_formatted )
barcode_count <- length(barcodes)

# create groups
matrix_group <- f$create_group("matrix")
features_group <- matrix_group$create_group("features")

create_str_dataset(matrix_group, "barcodes", barcodes_formatted )
create_str_dataset(matrix_group, "barcodes_unmodified", barcodes_unmodified)
create_str_dataset(matrix_group, "barcodes", barcodes)
create_dataset(matrix_group, "data", as.integer(count_mat@x))
create_dataset(matrix_group, "indices", as.integer(count_mat@i))
create_dataset(matrix_group, "indptr", as.integer(count_mat@p))
Expand Down
12 changes: 6 additions & 6 deletions R/setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -171,16 +171,16 @@ executable_basename <- function() {
#' @noRd
artifacts = list(
linux = list(
url = "https://github.com/10XGenomics/loupeR/releases/download/v1.1.1/louper-linux-x64",
md5 = "99903df7a3bc7b1b06d7509ddddf9a13"
url = "https://github.com/10XGenomics/loupeR/releases/download/v1.1.2/louper-linux-x64",
md5 = "b3fd93fd88a43fbcf3f6e40af3186eaa"
),
mac = list(
url = "https://github.com/10XGenomics/loupeR/releases/download/v1.1.1/louper-macos-x64",
md5 = "bf4ff652b88e0b9a88fb306b11a9c066"
url = "https://github.com/10XGenomics/loupeR/releases/download/v1.1.2/louper-macos-x64",
md5 = "ea65a2ec372d623c54d45c51793014e2"
),
windows = list(
url = "https://github.com/10XGenomics/loupeR/releases/download/v1.1.1/louper-windows-x64.exe",
md5 = "f40833260e3d4c14d8534a1f3349096d"
url = "https://github.com/10XGenomics/loupeR/releases/download/v1.1.2/louper-windows-x64.exe",
md5 = "f5d1e99138e840169a19191d10bb25ab"
)
)

Expand Down
59 changes: 0 additions & 59 deletions R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -250,65 +250,6 @@ cluster_levels_word_like <- function(cluster) {
})
}

#' Sanitize barcodes into expected format
#'
#' @param barcodes character vector of barcodes names
#'
#' @importFrom utils strcapture
#'
#' @return character vector of sanitized barcode names
#'
#' @noRd
sanitize_barcodes <- function(barcodes) {
if (are_barcodes_valid(barcodes)) {
return(barcodes)
}

# Some examples that we have seen
#
# Seurat Integrate will add a prefix to the barcode "12U_ACTGACTGACTG-1"
# Other users tend to add a prefix "SOMEPREFIX:ACTGACTGACTG"
pattern <-"^(.*?)(_|-|:)?([ACTG]{6,})(-\\d+)?(_|-|:)?(.*?)$"

# only santize barcodes if all match the pattern
if (length(grep(pattern, barcodes)) != length(barcodes)) {
return(barcodes)
}

# capture subgroups of pattern (prefix, barcode, suffix)
# NOTE: need to use perl regexs to support non-greedy matching
groups <- strcapture(
pattern=pattern,
x=barcodes,
perl=TRUE,
proto=list(prefix = character(),
sep1 = character(),
barcode = character(),
barcodeDashNum = character(),
sep2 = character(),
suffix = character()))

# rewrite barcodes "BARCODE-PREFIX-SUFFIX"
updated_barcodes <- character(length(barcodes))
for (i in 1:nrow(groups)) {
row <- groups[i,]

prefix <- ""
if (nchar(row$prefix) > 0) {
prefix <- sprintf("-%s", row$prefix)
}

suffix <- ""
if (nchar(row$suffix) > 0) {
suffix <- sprintf("-%s", row$suffix)
}

updated_barcodes[[i]] = sprintf("%s%s%s%s", row$barcode, row$barcodeDashNum, prefix, suffix)
}

updated_barcodes
}

#' Gets the systems OS.
#'
#' @return "windows", "mac", "unix"
Expand Down
42 changes: 31 additions & 11 deletions R/validate.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,12 @@ validate_count_mat <- function(count_mat, feature_ids = NULL) {
return(err("features cannot be the empty string"))
}

barcodes <- sanitize_barcodes(barcodes)

if (!are_barcodes_valid(barcodes)) {
resp <- validate_barcodes(barcodes)
if (!resp$success) {
barcode_msg <- paste(
'There is an issue with the formatting of your barcodes.',
'Barcodes should begin with base pairs and end with an optional hyphen and suffix.',
'For further information, please see the documentation: 10xgen.com/louper'
'There is an issue with the formatting of your barcodes:',
resp$msg,
'Please see the readme at github.com/10xGenomics/loupeR'
)

return(err(barcode_msg))
Expand Down Expand Up @@ -81,12 +80,33 @@ validate_count_mat <- function(count_mat, feature_ids = NULL) {
#'
#' @param barcodes a character vector
#'
#' @return A boolean true or false
#' @return A list with two elements:
#' \itemize{
#' \item success: a logical value indicating success (TRUE) or failure (FALSE)
#' \item msg: an optional error message (NULL if success is TRUE)
#' }
#'
#' @importFrom methods is
#'
#' @noRd
are_barcodes_valid <- function(barcodes) {
pattern <-"^([ACTG]{6,})(-.*?)?$"
return(all(grepl(pattern, barcodes)))
#' @export
validate_barcodes <- function(barcodes) {
barcodeRegex <- "^(.*[:_])?([ACGT]{14,})([:_].*)?$"
barcodeGemRegex <- "^(.*[:_])?([ACGT]{14,})-(\\d+)([:_].*)?$"
visiumHDRegex <- "^(.*[:_])?(s_\\d{3}um_\\d{5}_\\d{5})([:_].*)?$"
visiumHDGemRegex <- "^(.*[:_])?(s_\\d{3}um_\\d{5}_\\d{5})-(\\d+)([:_].*)?$"
xeniumCellIdRegex <- "^(.*[:_])?([a-p]{1,8})-(\\d+)([:_].*)?$"

for (barcode in barcodes) {
if (!grepl(barcodeRegex, barcode) &&
!grepl(barcodeGemRegex, barcode) &&
!grepl(visiumHDRegex, barcode) &&
!grepl(visiumHDGemRegex, barcode) &&
!grepl(xeniumCellIdRegex, barcode)) {
return(err(paste("Invalid barcode:", barcode)))
}
}

SUCCESS
}

#' Validate the seurat clusters
Expand Down
53 changes: 45 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
<a href="#troubleshooting">Troubleshooting</a>
</p>

`loupeR` creates a 10x Genomics Loupe file from a Seurat object. 10x Genomics Loupe Browser can visualize single-cell and spatial data from 10x Genomics. *Only single-cell gene expression datasets are supported*.
`loupeR` creates 10x Genomics Loupe files from Seurat objects and other 10x Genomics data in R. 10x Genomics Loupe Browser can visualize single-cell and spatial data from 10x Genomics. _Only single-cell gene expression datasets are supported in LoupeR_.

## How to Use

Expand Down Expand Up @@ -54,7 +54,7 @@ create_loupe(
)
```

Additionally, use the utility function `read_feature_ids_from_tsv` to read the Ensemble ids from the 10x dataset. A Seurat object will only have imported the feature names or ids and attached these as rownames to the count matrix. In order for the Ensemble id links to work correctly within Loupe Browser, one must manually import them and include them.
Additionally, use the utility function `read_feature_ids_from_tsv` to read the Ensemble ids from the 10x dataset. A Seurat object will only have imported the feature names or ids and attached these as rownames to the count matrix. In order for the Ensemble id links to work correctly within Loupe Browser, one must manually import them and include them.

```R
# import the library
Expand All @@ -73,7 +73,7 @@ create_loupe_from_seurat(seurat_obj, feature_ids = feature_ids)

### HDF5

Before using `loupeR`, make sure that your system has installed [HDF5](https://www.hdfgroup.org/downloads/hdf5). The HDF5 organization requires registration before being able to download the installer. Below are some other more convenient methods for installing HDF5 if you happen to have these package managers installed.
Before using `loupeR`, make sure that your system has installed [HDF5](https://www.hdfgroup.org/downloads/hdf5). The HDF5 organization requires registration before being able to download the installer. Below are some other more convenient methods for installing HDF5 if you happen to have these package managers installed.

- macOS with [Homebrew](https://brew.sh/) - `brew install hdf5` <br>
- windows with [vcpkg](https://vcpkg.io/en/index.html) - `.\vcpkg install hdf5`
Expand All @@ -95,26 +95,63 @@ install.packages(url, repos = NULL, type = "source")

### Installing loupeR using the `remotes` package

Another installation option is to use the `remotes` package to directly install `loupeR` and its dependencies. The installed package won't include the prebundled louper executable, so you must invoke the `loupeR::setup()` function which will go and download it.
Another installation option is to use the `remotes` package to directly install `loupeR` and its dependencies. The installed package won't include the prebundled louper executable, so you must invoke the `loupeR::setup()` function which will go and download it.

``` r
```r
remotes::install_github("10XGenomics/loupeR")
loupeR::setup()
```

### Automated installation and execution

If you are interested in automating LoupeR installation and execution (and are blocked by interactive license acceptance), please write to [[email protected]](mailto:[email protected]) for further assistance.

## Loupe Browser Compatibility

With new versions of the Loupe Browser, new version of LoupeR need to be released. The table below shows version requirements between the two.

| LoupeR Version | Loupe Browser Version |
| ------------- | ------------- |
| -------------- | --------------------- |
| v1.0.x | Loupe Browser >= 7.0 |
| v1.1.x | Loupe Browser >= 8.0 |
| v1.1.1 | Loupe Browser >= 8.0 |
| v1.1.2 | Loupe Browser >= 8.1 |

## Tutorials

* [Demo notebook](https://colab.research.google.com/github/10XGenomics/loupeR/blob/main/misc/tutorials/5k_mouse_brain.ipynb) with basic processing of an example 10x dataset [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/10XGenomics/loupeR/blob/main/misc/tutorials/5k_mouse_brain.ipynb)
- [Demo notebook](https://colab.research.google.com/github/10XGenomics/loupeR/blob/main/misc/tutorials/5k_mouse_brain.ipynb) with basic processing of an example 10x dataset [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/10XGenomics/loupeR/blob/main/misc/tutorials/5k_mouse_brain.ipynb)

## Barcode Formatting

Barcodes must be from 10x Genomics exeriments to work with LoupeR. Valid 10x Genomics Single Cell Gene Expression barcodes have the characters ACGT repeated 16 times, followed by an optional GEM well suffix, for example:

```
AAACCCAAGAAATTGC
AAACCCAAGAAATTGC-1
```

Barcodes can also have an additional optional prefix or suffix. These optional prefixes and suffixes must be delineated either by a `:` or a `_`:

```
prefix_AAACCCAAGAAATTGC
AAACCCAAGAAATTGC_suffix
prefix_AAACCCAAGAAATTGC_suffix

prefix:AAACATACAAACAG
AAACATACAAACAG:suffix
prefix:AAACATACAAACAG:suffix

prefix_AAACCCAAGAAATTGC-1
AAACCCAAGAAATTGC-1_suffix
prefix_AAACCCAAGAAATTGC-1_suffix

prefix:AAACCCAAGAAATTGC-1
AAACCCAAGAAATTGC-1:suffix
prefix:AAACCCAAGAAATTGC-1:suffix
```

**Note**: Visium and Xenium barcodes are formatted differently. Visium and Xenium data are currently enabled for use with LoupeR, but **_not_** fully supported. Expression data for these assays can be processed by loupeR, but **_not_** image data.

See `test-validate.R` for further examples of both valid and invalid barcode formatting, as well as `validater.R` for the exact formatting requirements as code.

## Troubleshooting

Expand Down
17 changes: 17 additions & 0 deletions man/validate_barcodes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions tests/testthat/helper.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#' Create random barcode
random_barcode <- function(size = 10) {
random_barcode <- function(size = 14) {
paste0(sample(c("A", "C", "T", "G"), size, replace=TRUE), collapse="")
}

Expand All @@ -17,7 +17,7 @@ create_count_mat <- function(rows, cols, valid_barcodes = FALSE) {
colnames <- as.character()
if (cols > 0) {
if (valid_barcodes) {
colnames <- lapply(rep(10, cols), random_barcode)
colnames <- lapply(rep(14, cols), random_barcode)
} else {
colnames <- paste0("col", 1:cols)
}
Expand Down
38 changes: 0 additions & 38 deletions tests/testthat/test-util.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,41 +95,3 @@ test_that("deduplicate_clusters prefers named factors", {
expect_length(clusters, 1)
expect_equal(clusters[[1]], cell_types)
})

test_that("sanitize_barcodes corrects barcodes", {
# no change
expect_equal(sanitize_barcodes("ACTGAA"), "ACTGAA")

# no change + lane numbers
expect_equal(sanitize_barcodes("ACTGAA-1"), "ACTGAA-1")

# prefix
expect_equal(sanitize_barcodes("prefix_ACTGAA"), "ACTGAA-prefix")
expect_equal(sanitize_barcodes("prefix-ACTGAA"), "ACTGAA-prefix")
expect_equal(sanitize_barcodes("prefix:ACTGAA"), "ACTGAA-prefix")

# barcodes with lane numbers + prefix
expect_equal(sanitize_barcodes("prefix_ACTGAA-1"), "ACTGAA-1-prefix")
expect_equal(sanitize_barcodes("prefix-ACTGAA-1"), "ACTGAA-1-prefix")
expect_equal(sanitize_barcodes("prefix:ACTGAA-1"), "ACTGAA-1-prefix")

# barcodes + prefix_with_underscore
expect_equal(sanitize_barcodes("pre_fix_ACTGAA"), "ACTGAA-pre_fix")
expect_equal(sanitize_barcodes("pre_fix-ACTGAA"), "ACTGAA-pre_fix")
expect_equal(sanitize_barcodes("pre_fix:ACTGAA"), "ACTGAA-pre_fix")

# barcodes with lane numbers + prefix_with_underscore
expect_equal(sanitize_barcodes("pre_fix_ACTGAA-1"), "ACTGAA-1-pre_fix")
expect_equal(sanitize_barcodes("pre_fix-ACTGAA-1"), "ACTGAA-1-pre_fix")
expect_equal(sanitize_barcodes("pre_fix:ACTGAA-1"), "ACTGAA-1-pre_fix")

# barcodes with prefix_with_underscore and suffix_with_underscore
expect_equal(sanitize_barcodes("pre_fix_ACTGAA-suf_fix"), "ACTGAA-pre_fix-suf_fix")
expect_equal(sanitize_barcodes("pre_fix-ACTGAA-suf_fix"), "ACTGAA-pre_fix-suf_fix")
expect_equal(sanitize_barcodes("pre_fix:ACTGAA-suf_fix"), "ACTGAA-pre_fix-suf_fix")

# barcodes with lane lane numbers with prefix_with_underscore and suffix_with_underscore
expect_equal(sanitize_barcodes("pre_fix_ACTGAA-1-suf_fix"), "ACTGAA-1-pre_fix-suf_fix")
expect_equal(sanitize_barcodes("pre_fix-ACTGAA-1-suf_fix"), "ACTGAA-1-pre_fix-suf_fix")
expect_equal(sanitize_barcodes("pre_fix:ACTGAA-1-suf_fix"), "ACTGAA-1-pre_fix-suf_fix")
})
Loading
Loading