diff --git a/.DS_Store b/.DS_Store index cb3f848e..16a4f428 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/DESCRIPTION b/DESCRIPTION index 6953679c..b73375a1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: ArchR Type: Package -Date: 2022-04-03 +Date: 2022-07-17 Title: Analyzing single-cell regulatory chromatin in R. -Version: 1.0.2 +Version: 1.0.3 Authors@R: c( person("Jeffrey", "Granja", email = "jgranja.stanford@gmail.com", role = c("aut","cre")), person("Ryan", "Corces", role = "aut")) @@ -14,6 +14,7 @@ LazyData: TRUE RoxygenNote: 7.1.2 Encoding: UTF-8 Imports: + devtools, ggplot2, SummarizedExperiment, data.table, @@ -83,6 +84,7 @@ Collate: 'RNAIntegration.R' 'RcppExports.R' 'ReproduciblePeakSet.R' + 'SparseUtils.R' 'Trajectory.R' 'ValidationUtils.R' 'VisualizeData.R' diff --git a/NAMESPACE b/NAMESPACE index 5fec629b..df517f94 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,12 +9,15 @@ export("%ni%") export(.DollarNames.ArchRProject) export(ArchRBrowser) export(ArchRBrowserTrack) +export(ArchRHeatmap) export(ArchRPalettes) export(ArchRProject) export(addArchRAnnotations) export(addArchRChrPrefix) export(addArchRDebugging) export(addArchRGenome) +export(addArchRH5Level) +export(addArchRLocking) export(addArchRLogging) export(addArchRThreads) export(addArchRVerbose) @@ -57,6 +60,7 @@ export(createArrowFiles) export(createGeneAnnotation) export(createGenomeAnnotation) export(createLogFile) +export(customEnrichment) export(enrichHeatmap) export(exportGroupSE) export(exportPeakMatrixForSTREAM) @@ -67,6 +71,7 @@ export(findMacs2) export(getArchRChrPrefix) export(getArchRDebugging) export(getArchRGenome) +export(getArchRH5Level) export(getArchRLogging) export(getArchRThreads) export(getArchRVerbose) @@ -90,6 +95,7 @@ export(getGenes) export(getGenome) export(getGenomeAnnotation) export(getGroupBW) +export(getGroupFragments) export(getGroupSE) export(getGroupSummary) export(getImputeWeights) @@ -101,6 +107,7 @@ export(getMatrixFromArrow) export(getMatrixFromProject) export(getMonocleTrajectories) export(getOutputDirectory) +export(getPBGroupSE) export(getPeak2GeneLinks) export(getPeakAnnotation) export(getPeakSet) @@ -111,6 +118,7 @@ export(getSampleColData) export(getSampleNames) export(getSeqnames) export(getTSS) +export(getTestArrow) export(getTestFragments) export(getTestProject) export(getTrajectory) @@ -153,12 +161,14 @@ export(projectBulkATAC) export(recoverArchRProject) export(reformatFragmentFiles) export(saveArchRProject) +export(setArchRLocking) export(subsetArchRProject) export(subsetCells) export(theme_ArchR) export(trajectoryHeatmap) export(validBSgenome) import(data.table) +importClassesFrom(GenomicRanges,GRanges) importFrom(GenomicRanges,GRanges) importFrom(Rcpp,sourceCpp) useDynLib(ArchR) diff --git a/R/AllClasses.R b/R/AllClasses.R index 314c4182..38c7436b 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -1,5 +1,6 @@ #' @useDynLib ArchR #' @importFrom Rcpp sourceCpp +#' @importClassesFrom GenomicRanges GRanges #' @importFrom GenomicRanges GRanges #' @import data.table NULL @@ -87,6 +88,15 @@ setMethod("show", "ArchRProject", #' genome information such as nucleotide information or chromosome sizes. #' @param showLogo A boolean value indicating whether to show the ascii ArchR logo after successful creation of an `ArchRProject`. #' @param threads The number of threads to use for parallel execution. +#' +#' @examples +#' +#' # Get Test Arrow +#' arrow <- getTestArrow() +#' +#' # Create ArchR Project for Analysis +#' proj <- ArchRProject(arrow) +#' #' @export ArchRProject <- function( ArrowFiles = NULL, @@ -209,6 +219,15 @@ ArchRProject <- function( #' This function will recover an ArchRProject if it has broken sampleColData or cellColData due to different versions of bioconductor s4vectors. #' #' @param ArchRProj An `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test Project +#' proj <- getTestProject() +#' +#' # Try to Recover ArchR Project +#' proj <- recoverArchRProject(proj) +#' #' @export recoverArchRProject <- function(ArchRProj){ @@ -230,6 +249,11 @@ recoverArchRProject <- function(ArchRProj){ } } + #Try to make sure that DataFrame matches currently loaded + #S4Vectors Package + ArchRProj@cellColData <- DataFrame(ArchRProj@cellColData) + ArchRProj@sampleColData <- DataFrame(ArchRProj@sampleColData) + if(inherits(ArchRProj@peakSet, "GRanges")){ peakSet <- tryCatch({ @@ -355,6 +379,24 @@ recoverArchRProject <- function(ArchRProj){ #' background peaks) should be ignored when re-normalizing file paths. If set to `FALSE` loading of the `ArchRProject` #' will fail unless all components can be found. #' @param showLogo A boolean value indicating whether to show the ascii ArchR logo after successful creation of an `ArchRProject`. +#' +#' @examples +#' +#' # Get Small PBMC Project Location +#' zipProj <- file.path(system.file("testdata", package="ArchR"), "PBSmall.zip") +#' +#' # Copy to current directory +#' file.copy(zipProj, basename(zipProj), overwrite = TRUE) +#' +#' # Unzip +#' unzip(basename(zipProj), overwrite = TRUE) +#' +#' # Remove +#' file.remove(basename(zipProj)) +#' +#' # Load +#' loadArchRProject("PBSmall") +#' #' @export loadArchRProject <- function( path = "./", @@ -482,6 +524,14 @@ loadArchRProject <- function( #' @param dropCells A boolean indicating whether to drop cells that are not in `ArchRProject` from corresponding Arrow Files. #' @param logFile The path to a file to be used for logging ArchR output. #' @param threads The number of threads to use for parallel execution. +#' @examples +#' +#' # Get Small Test Project +#' proj <- getTestProject() +#' +#' # Save +#' saveArchRProject(proj) +#' #' @export saveArchRProject <- function( ArchRProj = NULL, @@ -617,6 +667,15 @@ saveArchRProject <- function( #' @param logFile The path to a file to be used for logging ArchR output. #' @param threads The number of threads to use for parallel execution. #' @param force If output directory exists overwrite. +#' +#' @examples +#' +#' # Get Small Test Project +#' proj <- getTestProject() +#' +#' #Subset +#' proj <- subsetArchRProject(proj, cells = getCellNames(proj)[1:50]) +#' #' @export subsetArchRProject <- function( ArchRProj = NULL, @@ -784,7 +843,6 @@ subsetArchRProject <- function( } - setMethod( f = "colnames", signature = c("x" = "ArchRProject"), diff --git a/R/AnnotationGenome.R b/R/AnnotationGenome.R index e76026de..6956ec8f 100644 --- a/R/AnnotationGenome.R +++ b/R/AnnotationGenome.R @@ -10,6 +10,21 @@ #' @param filterChr A character vector indicating the seqlevels that should be removed if manual removal is desired for certain seqlevels. #' If no manual removal is desired, `filterChr` should be set to `NULL`. If `filter` is set to `TRUE` but `filterChr` is set to `NULL`, #' non-standard chromosomes will still be removed as defined in `filterChrGR()`. +#' +#' @examples +#' +#' if (!require("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") +#' library(BSgenome.Hsapiens.UCSC.hg19) +#' +#' # Get Genome +#' genome <- BSgenome.Hsapiens.UCSC.hg19 +#' +#' # Create Genome Annotation +#' genomeAnno <- createGenomeAnnotation(genome) +#' +#' # Also can create from a string if BSgenome exists +#' genomeAnno <- createGenomeAnnotation("hg19") +#' #' @export createGenomeAnnotation <- function( genome = NULL, @@ -78,6 +93,27 @@ createGenomeAnnotation <- function( #' @param exons A `GRanges` object containing gene exon coordinates. Must have a symbols column matching the symbols column of `genes`. #' @param TSS A `GRanges` object containing standed transcription start site coordinates for computing TSS enrichment scores downstream. #' @param annoStyle annotation style to map between gene names and various gene identifiers e.g. "ENTREZID", "ENSEMBL". +#' @param singleStrand A boolean for GenomicFeatures::genes(`single.strand.genes.only`) parameter +#' +#' @examples +#' +#' if (!require("TxDb.Hsapiens.UCSC.hg19.knownGene", quietly = TRUE)) BiocManager::install("TxDb.Hsapiens.UCSC.hg19.knownGene") +#' if (!require("org.Hs.eg.db", quietly = TRUE)) BiocManager::install("org.Hs.eg.db") +#' library(TxDb.Hsapiens.UCSC.hg19.knownGene) +#' library(org.Hs.eg.db) +#' +#' # Get Txdb +#' TxDb <- TxDb.Hsapiens.UCSC.hg19.knownGene +#' +#' # Get OrgDb +#' OrgDb <- org.Hs.eg.db +#' +#' # Create Genome Annotation +#' geneAnno <- createGeneAnnotation(TxDb=TxDb, OrgDb=OrgDb) +#' +#' # Also can create from a string if BSgenome exists +#' geneAnno <- createGeneAnnotation("hg19") +#' #' @export createGeneAnnotation <- function( genome = NULL, @@ -86,7 +122,8 @@ createGeneAnnotation <- function( genes = NULL, exons = NULL, TSS = NULL, - annoStyle = NULL + annoStyle = NULL, + singleStrand = FALSE ){ .validInput(input = genome, name = "genome", valid = c("character", "null")) @@ -118,7 +155,11 @@ createGeneAnnotation <- function( ########################### message("Getting Genes..") - genes <- GenomicFeatures::genes(TxDb) + genes <- tryCatch({ #Legacy Catch In Case + GenomicFeatures::genes(TxDb, single.strand.genes.only = singleStrand) + }, error = function(e){ + GenomicFeatures::genes(TxDb) + }) if(is.null(annoStyle)){ isEntrez <- mcols(genes)$symbol <- tryCatch({ diff --git a/R/AnnotationPeaks.R b/R/AnnotationPeaks.R index 2e13ff91..ec204d87 100644 --- a/R/AnnotationPeaks.R +++ b/R/AnnotationPeaks.R @@ -8,6 +8,15 @@ #' #' @param ArchRProj An `ArchRProject` object. #' @param name The name of the `peakAnnotation` object (i.e. Motifs) to retrieve from the designated `ArchRProject`. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Peak Annotations +#' peakAnno <- getPeakAnnotation(proj) +#' #' @export getPeakAnnotation <- function(ArchRProj = NULL, name = NULL){ .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) @@ -29,6 +38,15 @@ getPeakAnnotation <- function(ArchRProj = NULL, name = NULL){ #' @param ArchRProj An `ArchRProject` object. #' @param name The name of the `peakAnnotation` object (i.e. Motifs) to retrieve from the designated `ArchRProject`. #' @param annoName The name of a specific annotation to subset within the `peakAnnotation`. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Annotation Positions +#' positions <- getPositions(proj) +#' #' @export getPositions <- function(ArchRProj = NULL, name = NULL, annoName = NULL){ .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) @@ -65,6 +83,15 @@ getPositions <- function(ArchRProj = NULL, name = NULL, annoName = NULL){ #' @param ArchRProj An `ArchRProject` object. #' @param name The name of the `peakAnnotation` object (i.e. Motifs) to retrieve from the designated `ArchRProject`. #' @param annoName The name of a specific annotation to subset within the `peakAnnotation`. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Annotation Matches +#' matches <- getMatches(proj) +#' #' @export getMatches <- function(ArchRProj = NULL, name = NULL, annoName = NULL){ .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) @@ -90,7 +117,17 @@ getMatches <- function(ArchRProj = NULL, name = NULL, annoName = NULL){ } matches <- matches[, idx, drop=FALSE] } + #Check + rr1 <- paste0(rowRanges(matches)) + rr2 <- paste0(getPeakSet(ArchRProj)) + if(!all(rr1 %in% rr2)){ + stop("Not all matches in PeakSet") + } + rownames(matches) <- rr1 + matches <- matches[rr2,] + rownames(matches) <- NULL matches + } #' Add peak annotations to an ArchRProject @@ -104,6 +141,18 @@ getMatches <- function(ArchRProj = NULL, name = NULL, annoName = NULL){ #' @param force A boolean value indicating whether to force the `peakAnnotation` object indicated by `name` to be overwritten #' if it already exists in the given `ArchRProject`. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Motif Positions Can Be Any Interval GRanges List +#' positions <- getPositions(proj) +#' +#' # Add Peak Annotations +#' proj <- addPeakAnnotations(proj, regions = positions) +#' #' @export addPeakAnnotations <- function( ArchRProj = NULL, @@ -276,8 +325,9 @@ addPeakAnnotations <- function( #' corresponding motif sets from the `chromVAR` package, or (iii) "vierstra" which gives the clustered archetype motifs #' created by Jeff Vierstra (https://github.com/jvierstra/motif-clustering). #' @param annoName The name of the `peakAnnotation` object to be stored in the provided `ArchRProject` -#' @param species The name of the species relevant to the supplied `ArchRProject`. This is used for identifying which motif to be -#' used from CisBP/JASPAR. By default, this function will attempt to guess the species based on the value from `getGenome()`. +#' @param species The latin name of the species relevant to the supplied `ArchRProject`. This is used for identifying which motif to be +#' used from CisBP/JASPAR. For JASPAR, `species` is passed to `TFBS::getMatrixSet` and some species names are not recognized. In these cases +#' it is possible to use the NCBI taxonomy ID. By default, this function will attempt to guess the species based on the value from `getGenome()`. #' @param collection If one of the JASPAR motif sets is used via `motifSet`, this parameter allows you to indicate the JASPAR #' collection to be used. See `getMatrixSet()` from `TFBSTools` for all options to supply for collection. If `motifSet` is #' "vierstra", then this must either be "archetype" (for the v2.1 clustered models) or "individual" (for the original v1 individual motif models). @@ -291,6 +341,15 @@ addPeakAnnotations <- function( #' it already exists in the given `ArchRProject`. #' @param logFile The path to a file to be used for logging ArchR output. #' @param ... Additional parameters to be passed to `TFBSTools::getMatrixSet` for getting a JASPAR PWM object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Motif Annotations +#' proj <- addMotifAnnotations(proj, motifSet = "cisbptest", annoName = "test") +#' #' @export addMotifAnnotations <- function( ArchRProj = NULL, @@ -390,41 +449,57 @@ addMotifAnnotations <- function( motifs <- obj$motifs motifSummary <- obj$motifSummary - }else if(tolower(motifSet)=="cisbp"){ + }else if(tolower(motifSet) %in% c("cisbp", "cisbptest")){ .requirePackage("chromVARmotifs",installInfo='devtools::install_github("GreenleafLab/chromVARmotifs")') - if(tolower(species) == "mus musculus"){ - if(version == 1){ - message("Using version 1 motifs!") - data("mouse_pwms_v1") - motifs <- mouse_pwms_v1 - }else if(version == 2){ - message("Using version 2 motifs!") - data("mouse_pwms_v2") - motifs <- mouse_pwms_v2 - }else{ - stop("Only versions 1 and 2 exist!") - } - obj <- .summarizeChromVARMotifs(motifs) - motifs <- obj$motifs - motifSummary <- obj$motifSummary - }else if(tolower(species) == "homo sapiens"){ - if(version == 1){ - message("Using version 1 motifs!") - data("human_pwms_v1") - motifs <- human_pwms_v1 - }else if(version == 2){ + + if(tolower(motifSet) == "cisbptest"){ + message("Using version 2 motifs!") data("human_pwms_v2") motifs <- human_pwms_v2 + subset <- grep("PAX5|CEBPA|CEBPB|IRF4|ETS1|EOMES", names(motifs), value=TRUE) + motifs <- motifs[subset] + obj <- .summarizeChromVARMotifs(motifs) + motifs <- obj$motifs + motifSummary <- obj$motifSummary + + }else{ + + if(tolower(species) == "mus musculus"){ + if(version == 1){ + message("Using version 1 motifs!") + data("mouse_pwms_v1") + motifs <- mouse_pwms_v1 + }else if(version == 2){ + message("Using version 2 motifs!") + data("mouse_pwms_v2") + motifs <- mouse_pwms_v2 + }else{ + stop("Only versions 1 and 2 exist!") + } + obj <- .summarizeChromVARMotifs(motifs) + motifs <- obj$motifs + motifSummary <- obj$motifSummary + }else if(tolower(species) == "homo sapiens"){ + if(version == 1){ + message("Using version 1 motifs!") + data("human_pwms_v1") + motifs <- human_pwms_v1 + }else if(version == 2){ + message("Using version 2 motifs!") + data("human_pwms_v2") + motifs <- human_pwms_v2 + }else{ + stop("Only versions 1 and 2 exist!") + } + obj <- .summarizeChromVARMotifs(motifs) + motifs <- obj$motifs + motifSummary <- obj$motifSummary }else{ - stop("Only versions 1 and 2 exist!") + stop("Species not recognized homo sapiens, mus musculus supported by CisBP!") } - obj <- .summarizeChromVARMotifs(motifs) - motifs <- obj$motifs - motifSummary <- obj$motifSummary - }else{ - stop("Species not recognized homo sapiens, mus musculus supported by CisBP!") + } }else if(tolower(motifSet)=="encode"){ @@ -495,7 +570,11 @@ addMotifAnnotations <- function( # Get BSgenome Information! ############################################################# genome <- ArchRProj@genomeAnnotation$genome - BSgenome <- eval(parse(text = genome)) + BSgenome <- tryCatch({ + eval(parse(text = paste0(genome))) + }, error = function(e){ + eval(parse(text = paste0(genome,"::",genome))) + }) BSgenome <- validBSgenome(BSgenome) ############################################################# @@ -674,6 +753,15 @@ addMotifAnnotations <- function( #' @param force A boolean value indicating whether to force the `peakAnnotation` object indicated by `name` to be #' overwritten if it already exists in the given `ArchRProject`. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Motif Annotations +#' proj <- addArchRAnnotations(proj, name = "test") +#' #' @export addArchRAnnotations <- function( ArchRProj = NULL, @@ -961,6 +1049,101 @@ addArchRAnnotations <- function( } +#' Hypergeometric Enrichment in input peak ranges. +#' +#' This function will perform hypergeometric enrichment of a given peak matches object and ranges. +#' +#' @param ranges A `GenomicRanges` object of peaks/regions to overlap with peaks. +#' @param matches A custom `peakAnnotation` matches object used as input for the hypergeometric test. See +#' `motifmatchr::matchmotifs()` for additional information. +#' @param bgdPeaks A `SummarizedExperiment` of background peaks from `getBgdPeaks` can be NULL for using all peaks. +#' +#' @examples +#' #Project +#' proj <- getTestProject() +#' +#' #Get Peaks +#' peaks <- getPeakSet(proj) +#' +#' #Custom C1 Mono +#' peaks1 <- peaks[names(peaks)=="C1"] +#' +#' #All Peaks +#' customEnrichment( +#' ranges = peaks1, +#' matches = getMatches(proj) +#' ) +#' # feature CompareFrequency nCompare CompareProportion BackgroundFrequency +#' # CEBPB_1 CEBPB_1 70 635 0.11023622 122 +#' # CEBPA_2 CEBPA_2 81 635 0.12755906 156 +#' # IRF4_4 IRF4_4 103 635 0.16220472 276 +#' # EOMES_6 EOMES_6 36 635 0.05669291 120 +#' # ETS1_3 ETS1_3 38 635 0.05984252 149 +#' # PAX5_5 PAX5_5 23 635 0.03622047 124 +#' # nBackground BackgroundProporition Enrichment mlog10p mlog10Padj +#' # CEBPB_1 2142 0.05695612 1.9354589 10.3279 9.373657 +#' # CEBPA_2 2142 0.07282913 1.7514839 8.9394 7.985157 +#' # IRF4_4 2142 0.12885154 1.2588497 2.6938 1.739557 +#' # EOMES_6 2142 0.05602241 1.0119685 0.3001 0.000000 +#' # ETS1_3 2142 0.06956116 0.8602864 0.0487 0.000000 +#' # PAX5_5 2142 0.05788982 0.6256795 0.0006 0.000000 +#' +#' #Background Peaks +#' customEnrichment( +#' ranges = peaks1, +#' matches = getMatches(proj), +#' bgdPeaks = getBgdPeaks(proj, force=TRUE) +#' ) +#' # feature CompareFrequency nCompare CompareProportion BackgroundFrequency +#' # CEBPB_1 CEBPB_1 70 635 0.11023622 2459 +#' # CEBPA_2 CEBPA_2 81 635 0.12755906 3154 +#' # IRF4_4 IRF4_4 103 635 0.16220472 4836 +#' # ETS1_3 ETS1_3 38 635 0.05984252 1781 +#' # EOMES_6 EOMES_6 36 635 0.05669291 1975 +#' # PAX5_5 PAX5_5 23 635 0.03622047 1495 +#' # nBackground BackgroundProporition Enrichment mlog10p mlog10Padj +#' # CEBPB_1 32385 0.07593021 1.4518097 2.9544 2.000157 +#' # CEBPA_2 32385 0.09739077 1.3097654 2.1341 1.179857 +#' # IRF4_4 32385 0.14932839 1.0862283 0.7142 0.000000 +#' # ETS1_3 32385 0.05499460 1.0881527 0.4975 0.000000 +#' # EOMES_6 32385 0.06098502 0.9296203 0.1551 0.000000 +#' # PAX5_5 32385 0.04616335 0.7846154 0.0422 0.000000 +#' # +#' @export +customEnrichment <- function( + ranges = NULL, + matches = NULL, + bgdPeaks = NULL + ){ + + if(is.null(matches)){ + stop("Please supply matches! Try `matches` = getMatches(ArchRProj)!") + } + + .validInput(input = ranges, name = "ranges", valid = c("granges")) + .validInput(input = matches, name = "matches", valid = c("SummarizedExperiment")) + .validInput(input = bgdPeaks, name = "bgdPeaks", valid = c("SummarizedExperiment", "null")) + + if(!is.null(bgdPeaks)){ + + rownames(matches) <- paste0(rowRanges(matches)) + rownames(bgdPeaks) <- paste0(rowRanges(bgdPeaks)) + + bgdPeaks <- bgdPeaks[rownames(matches), ] + idx <- unique(queryHits(findOverlaps(matches, ranges, ignore.strand=TRUE))) + + .computeEnrichment(matches, idx, c(idx, as.vector(assay(bgdPeaks)[idx,]))) + + }else{ + + idx <- unique(queryHits(findOverlaps(matches, ranges, ignore.strand=TRUE))) + + .computeEnrichment(matches, idx, seq_len(nrow(matches))) + + } + +} + #' Peak Annotation Hypergeometric Enrichment in Marker Peaks. #' #' This function will perform hypergeometric enrichment of a given peak annotation within the defined marker peaks. @@ -974,6 +1157,27 @@ addArchRAnnotations <- function( #' `cutoff` can contain any of the `assayNames` from `seMarker`. #' @param background A string that indicates whether to use a background set of matched peaks to compare against ("bgdPeaks") or all peaks ("all"). #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Markers +#' seMarker <- getMarkerFeatures( +#' ArchRProj = proj, +#' useMatrix = "PeakMatrix", +#' testMethod = "binomial", +#' binarize = TRUE +#' ) +#' +#' # Get Peak Annotation Enrichment +#' annoEnrich <- peakAnnoEnrichment( +#' seMarker = seMarker, +#' ArchRProj = proj, +#' cutOff = "FDR <= 0.1 & Log2FC >= 0" +#' ) +#' #' @export peakAnnoEnrichment <- function( seMarker = NULL, @@ -1149,6 +1353,36 @@ enrichHeatmap <- function(...){ #' @param transpose A boolean determining whether to transpose the heatmap in the plot. #' @param returnMatrix A boolean determining whether to return the matrix corresponding to the heatmap rather than generate a plot. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Markers +#' seMarker <- getMarkerFeatures( +#' ArchRProj = proj, +#' useMatrix = "PeakMatrix", +#' testMethod = "binomial", +#' binarize = TRUE +#' ) +#' +#' # Get Peak Annotation Enrichment +#' annoEnrich <- peakAnnoEnrichment( +#' seMarker = seMarker, +#' ArchRProj = proj, +#' cutOff = "FDR <= 0.1 & Log2FC >= 0" +#' ) +#' +#' # Multiply by 50 since this is a super small test sample +#' assay(annoEnrich) <- assay(annoEnrich) * 50 +#' +#' #Plot +#' p <- plotEnrichHeatmap(annoEnrich) +#' +#' #PDF +#' plotPDF(p, name = "PeakAnnoEnrich", ArchRProj = proj) +#' #' @export plotEnrichHeatmap <- function( seEnrich = NULL, diff --git a/R/ArchRBrowser.R b/R/ArchRBrowser.R index 41df3315..36dfb94b 100644 --- a/R/ArchRBrowser.R +++ b/R/ArchRBrowser.R @@ -27,6 +27,14 @@ #' @param threads The number of threads to use for parallel execution. #' @param verbose A boolean value that determines whether standard output should be printed. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +# #Get Test ArchR Project +#' proj <- getTestProject() +#' +#' #Launch Browser with `ArchRBrowser(proj)` +#' #' @export ArchRBrowser <- function( ArchRProj = NULL, @@ -67,7 +75,9 @@ ArchRBrowser <- function( #Determine Grouping Methods ccd <- getCellColData(ArchRProj) discreteCols <- lapply(seq_len(ncol(ccd)), function(x){ - .isDiscrete(ccd[, x]) + check1 <- .isDiscrete(ccd[, x]) + check2 <- max(table(ccd[, x])) > minCells + check1 & check2 }) %>% unlist %>% {colnames(ccd)[.]} if("Clusters" %in% discreteCols){ selectCols <- "Clusters" @@ -80,22 +90,24 @@ ArchRBrowser <- function( .validInput(input = gr, name = "gr", valid = c("GRanges")) .validInput(input = upstream, name = "upstream", valid = c("integer")) .validInput(input = downstream, name = "downstream", valid = c("integer")) - #Get Info From gr - st <- start(gr) - ed <- end(gr) - #https://bioinformatics.stackexchange.com/questions/4390/expand-granges-object-different-amounts-upstream-vs-downstream - isMinus <- BiocGenerics::which(strand(gr) == "-") - isOther <- BiocGenerics::which(strand(gr) != "-") - #Forward - st[isOther] <- st[isOther] - upstream - ed[isOther] <- ed[isOther] + downstream - #Reverse - ed[isMinus] <- ed[isMinus] + upstream - st[isMinus] <- st[isMinus] - downstream - #If Any extensions now need to be flipped. - end(gr) <- pmax(st, ed) - start(gr) <- pmin(st, ed) - return(gr) + suppressWarnings({ + #Get Info From gr + st <- start(gr) + ed <- end(gr) + #https://bioinformatics.stackexchange.com/questions/4390/expand-granges-object-different-amounts-upstream-vs-downstream + isMinus <- BiocGenerics::which(strand(gr) == "-") + isOther <- BiocGenerics::which(strand(gr) != "-") + #Forward + st[isOther] <- st[isOther] - upstream + ed[isOther] <- ed[isOther] + downstream + #Reverse + ed[isMinus] <- ed[isMinus] + upstream + st[isMinus] <- st[isMinus] - downstream + #If Any extensions now need to be flipped. + end(gr) <- pmax(st, ed) + start(gr) <- pmin(st, ed) + gr + }) } @@ -311,7 +323,11 @@ ArchRBrowser <- function( groupBy <- isolate(input$grouping) groupDF <- tryCatch({ - isolate(hot_to_r(input$Metadata)) + o <- isolate(hot_to_r(input$Metadata)) + if(is.null(o)){ + stop() #switch methods! + } + o },error=function(x){ groups <- gtools::mixedsort(unique(ccd[,isolate(input$grouping)])) mdata <- data.frame( @@ -342,7 +358,6 @@ ArchRBrowser <- function( useGroups <- groupDF[groupDF[,"include"],"group"] - if(!all(.isColor(groupDF[groupDF[,"include"], "color"]))){ p <- ggplot() + xlim(c(-5,5)) + ylim(c(-5,5)) + @@ -666,6 +681,8 @@ ArchRBrowserTrack <- function(...){ #' used to exclude pseudo-bulk replicates generated from low numbers of cells. #' @param normMethod The name of the column in `cellColData` by which normalization should be performed. The recommended and default value #' is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth and sample data quality. +#' @param highlight A `GRanges` object containing regions to highlight. +#' @param highlightFill A `character` color for filling highlihgted regions. #' @param threads The number of threads to use for parallel execution. #' @param ylim The numeric quantile y-axis limit to be used for for "bulkTrack" plotting. This should be expressed as `c(lower limit, upper limit)` such as `c(0,0.99)`. If not provided, the y-axis limit will be c(0, 0.999). #' @param pal A custom palette (see `paletteDiscrete` or `ArchRPalettes`) used to override coloring for groups. @@ -679,6 +696,22 @@ ArchRBrowserTrack <- function(...){ #' @param title The title to add at the top of the plot next to the plot's genomic coordinates. #' @param verbose A boolean value that determines whether standard output should be printed. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' #Get Test ArchR Project +#' proj <- getTestProject() +#' +#' #Highlight +#' genes <- getGenes() +#' genes <- genes[which(genes$symbol %in% c("CD3D", "MS4A1"))] +#' +#' #Plot Track +#' p <- plotBrowserTrack(proj, geneSymbol = c("CD3D", "MS4A1"), groupBy = "CellType", highlight = genes, highlightFill = "dodgerblue3") +#' +#' #Plot PDF +#' plotPDF(p, name = "Track-CD3D-MS4A1", ArchRProj = proj) +#' #' @export plotBrowserTrack <- function( ArchRProj = NULL, @@ -697,6 +730,8 @@ plotBrowserTrack <- function( tileSize = 250, minCells = 25, normMethod = "ReadsInTSS", + highlight = NULL, + highlightFill = "firebrick3", threads = getArchRThreads(), ylim = NULL, pal = NULL, @@ -728,6 +763,8 @@ plotBrowserTrack <- function( .validInput(input = tileSize, name = "tileSize", valid = c("integer")) .validInput(input = minCells, name = "minCells", valid = c("integer")) .validInput(input = normMethod, name = "normMethod", valid = c("character")) + .validInput(input = highlight, name = "highlight", valid = c("granges", "null")) + .validInput(input = highlightFill, name = "highlightFill", valid = c("character")) .validInput(input = threads, name = "threads", valid = c("integer")) .validInput(input = ylim, name = "ylim", valid = c("numeric", "null")) .validInput(input = pal, name = "pal", valid = c("palette", "null")) @@ -806,6 +843,8 @@ plotBrowserTrack <- function( title = title, useGroups = useGroups, tstart = tstart, + highlight = highlight, + highlightFill = highlightFill, logFile = logFile) + theme(plot.margin = unit(c(0.35, 0.75, 0.35, 0.75), "cm")) } @@ -832,6 +871,8 @@ plotBrowserTrack <- function( title = title, useGroups = useGroups, tstart = tstart, + highlight = highlight, + highlightFill = highlightFill, logFile = logFile) + theme(plot.margin = unit(c(0.35, 0.75, 0.35, 0.75), "cm")) } @@ -847,6 +888,8 @@ plotBrowserTrack <- function( facetbaseSize = facetbaseSize, hideX = TRUE, title = "Peaks", + highlight = highlight, + highlightFill = highlightFill, logFile = logFile) + theme(plot.margin = unit(c(0.1, 0.75, 0.1, 0.75), "cm")) } } @@ -864,6 +907,8 @@ plotBrowserTrack <- function( hideX = TRUE, hideY = TRUE, title = "Loops", + highlight = highlight, + highlightFill = highlightFill, logFile = logFile) + theme(plot.margin = unit(c(0.1, 0.75, 0.1, 0.75), "cm")) } } @@ -878,6 +923,8 @@ plotBrowserTrack <- function( region = region[x], facetbaseSize = facetbaseSize, title = "Genes", + highlight = highlight, + highlightFill = highlightFill, logFile = logFile) + theme(plot.margin = unit(c(0.1, 0.75, 0.1, 0.75), "cm")) } @@ -972,6 +1019,8 @@ plotBrowserTrack <- function( pal = NULL, tstart = NULL, verbose = FALSE, + highlight = NULL, + highlightFill = NULL, logFile = NULL ){ @@ -1044,7 +1093,23 @@ plotBrowserTrack <- function( margin = margin(0,0.35,0,0.35, "cm")), strip.text.y = element_text(angle = 0), strip.background = element_rect(color="black")) + - guides(fill = "none", colour = "none") + ggtitle(title) + .gg_guides(fill = FALSE, colour = FALSE) + ggtitle(title) + + #Determine Whether To Highlight + highlight <- subsetByOverlaps(highlight, region, ignore.strand=TRUE) + if(length(highlight) > 0){ + + #Data Frame + dfH <- data.frame(highlight) + dfH$start <- pmax(dfH$start, start(region)) + dfH$end <- pmin(dfH$end, end(region)) + + #Plot Highlight + p <- p + + geom_rect(data = dfH, aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf), + alpha=0.2, fill=highlightFill, inherit.aes = FALSE) + + } p @@ -1268,6 +1333,8 @@ plotBrowserTrack <- function( facetbaseSize, colorMinus = "dodgerblue2", colorPlus = "red", + highlight = NULL, + highlightFill = NULL, logFile = NULL ){ @@ -1346,7 +1413,7 @@ plotBrowserTrack <- function( theme(axis.title.x=element_blank(), axis.text.x=element_blank(),axis.ticks.x=element_blank()) + theme(axis.title.y=element_blank(), axis.text.y=element_blank(),axis.ticks.y=element_blank()) + theme(legend.text = element_text(size = baseSize), strip.text.y = element_text(size = facetbaseSize, angle = 0)) + - guides(fill = guide_legend(override.aes = list(colour = NA, shape = "c", size=3)), color = "none") + + .gg_guides(fill = guide_legend(override.aes = list(colour = NA, shape = "c", size=3)), color = FALSE) + theme(legend.position="bottom") + theme(legend.title=element_text(size=5), legend.text=element_text(size=7), legend.key.size = unit(0.75,"line"), legend.background = element_rect(color =NA), strip.background = element_blank()) @@ -1385,6 +1452,22 @@ plotBrowserTrack <- function( } + #Determine Whether To Highlight + highlight <- subsetByOverlaps(highlight, region, ignore.strand=TRUE) + if(length(highlight) > 0){ + + #Data Frame + dfH <- data.frame(highlight) + dfH$start <- pmax(dfH$start, start(region)) + dfH$end <- pmin(dfH$end, end(region)) + + #Plot Highlight + p <- p + + geom_rect(data = dfH, aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf), + alpha=0.2, fill=highlightFill, inherit.aes = FALSE) + + } + if(!is.ggplot(p)){ .logError("geneTrack is not a ggplot!", fn = ".geneTracks", info = "", errorList = NULL, logFile = logFile) } @@ -1407,6 +1490,8 @@ plotBrowserTrack <- function( borderWidth = 0.4, hideX = FALSE, hideY = FALSE, + highlight = NULL, + highlightFill = NULL, logFile = NULL ){ @@ -1475,7 +1560,8 @@ plotBrowserTrack <- function( scale_color_manual(values = pal) + theme(legend.text = element_text(size = baseSize)) + theme_ArchR(baseSize = baseSize, baseLineSize = borderWidth, baseRectSize = borderWidth) + - guides(color = "none", fill = "none") + theme(strip.text.y = element_text(size = facetbaseSize, angle = 0), strip.background = element_blank()) + .gg_guides(color = FALSE, fill = FALSE) + + theme(strip.text.y = element_text(size = facetbaseSize, angle = 0), strip.background = element_blank()) }else{ @@ -1499,6 +1585,22 @@ plotBrowserTrack <- function( p <- p + theme(axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) } + #Determine Whether To Highlight + highlight <- subsetByOverlaps(highlight, region, ignore.strand=TRUE) + if(length(highlight) > 0){ + + #Data Frame + dfH <- data.frame(highlight) + dfH$start <- pmax(dfH$start, start(region)) + dfH$end <- pmin(dfH$end, end(region)) + + #Plot Highlight + p <- p + + geom_rect(data = dfH, aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf), + alpha=0.2, fill=highlightFill, inherit.aes = FALSE) + + } + if(!is.ggplot(p)){ .logError("featureTrack is not a ggplot!", fn = ".featureTracks", info = "", errorList = NULL, logFile = logFile) } @@ -1521,6 +1623,8 @@ plotBrowserTrack <- function( borderWidth = 0.4, hideX = FALSE, hideY = FALSE, + highlight = NULL, + highlightFill = NULL, logFile = NULL ){ @@ -1594,7 +1698,7 @@ plotBrowserTrack <- function( theme_ArchR(baseSize = baseSize, baseLineSize = borderWidth, baseRectSize = borderWidth, legendPosition = "right") + theme(strip.text.y = element_text(size = facetbaseSize, angle = 0), strip.background = element_blank(), legend.box.background = element_rect(color = NA)) + - guides(color= guide_colorbar(barwidth = 0.75, barheight = 3)) + .gg_guides(color= guide_colorbar(barwidth = 0.75, barheight = 3)) }else{ @@ -1632,6 +1736,22 @@ plotBrowserTrack <- function( p <- p + theme(axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) } + #Determine Whether To Highlight + highlight <- subsetByOverlaps(highlight, region, ignore.strand=TRUE) + if(length(highlight) > 0){ + + #Data Frame + dfH <- data.frame(highlight) + dfH$start <- pmax(dfH$start, start(region)) + dfH$end <- pmin(dfH$end, end(region)) + + #Plot Highlight + p <- p + + geom_rect(data = dfH, aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf), + alpha=0.2, fill=highlightFill, inherit.aes = FALSE) + + } + if(!is.ggplot(p)){ .logError("loopTracks is not a ggplot!", fn = ".loopTracks", info = "", errorList = NULL, logFile = logFile) } @@ -1667,6 +1787,8 @@ plotBrowserTrack <- function( tickWidth = 0.4, facetbaseSize = 7, geneAnnotation = getGeneAnnotation(ArchRProj), + highlight = NULL, + highlightFill = NULL, title = "", pal = NULL, tstart = NULL, @@ -1795,9 +1917,25 @@ plotBrowserTrack <- function( margin = margin(0,0.35,0,0.35, "cm")), strip.text.y = element_text(angle = 0), strip.background = element_rect(color="black")) + - guides(fill = "none", colour = "none") + ggtitle(title) + .gg_guides(fill = FALSE, colour = FALSE) + ggtitle(title) + + #Determine Whether To Highlight + highlight <- subsetByOverlaps(highlight, region, ignore.strand=TRUE) + if(length(highlight) > 0){ + + #Data Frame + dfH <- data.frame(highlight) + dfH$start <- pmax(dfH$start, start(region)) + dfH$end <- pmin(dfH$end, end(region)) - p + #Plot Highlight + p <- p + + geom_rect(data = dfH, aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf), + alpha=0.2, fill=highlightFill, inherit.aes = FALSE) + + } + + p } @@ -1884,7 +2022,7 @@ plotBrowserTrack <- function( pal = pal ) + facet_wrap(x~., ncol=1,scales="free_y",strip.position="right") + - guides(fill = "none", colour = "none") + + .gg_guides(fill = FALSE, colour = FALSE) + theme_ArchR(baseSize = baseSize, baseRectSize = borderWidth, baseLineSize = tickWidth, diff --git a/R/ArchRHeatmap.R b/R/ArchRHeatmap.R index 98ee029a..9c8d5a59 100644 --- a/R/ArchRHeatmap.R +++ b/R/ArchRHeatmap.R @@ -1,6 +1,101 @@ ######################################################################################################## # Helpers for Nice Heatmap with Bioconductors ComplexHeamtap ######################################################################################################## +#' Plot Nice Lookng Heatmap Using Complex Heatmap +#' @param mat A matrix to plot heatmap from. +#' @param scale A boolean whether to convert to row Z-scores +#' @param limits A vector of two values describing min and mix limits to plot +#' @param colData A DataFrame matching the columns of the input matrix to overlay on the columns of the heatmap +#' @param color A palette to use for heatmap continuous color scheme. See `paletteContinuous`. +#' @param clusterCols A boolean describing whether to cluster columns for heatmap. +#' @param clusterRows A boolean describing whether to cluster rows for heatmap. +#' @param colorMap A list of color mappings matching the column names in colData. +#' @param useRaster A boolean whether to use rastering when plotting heatmap. +#' @param rasterQuality Raster resolution of raster. Default is set to 5 higher numbers increase resolution. +#' @param split A vector of groupings that will split the rows of heatmap (must be equal to nrow(mat)). +#' @param fontSizeRows A numeric value representing the font size for rownames. +#' @param fontSizeCols A numeric value representing the font size for colnames. +#' @param fontSizeLabels A numeric value representing the font size for labels. +#' @param colAnnoPerRow An integer value describing the number of column annotations per row in the legend. +#' @param showRowDendrogram A boolean whether to show row dendrogram in heatmap. +#' @param showColDendrogram A boolean whether to show column dendrogram in heatmap. +#' @param customRowLabel A vector of indices to custom label from rows. +#' @param customRowLabelIDs A vector of custom labels to overlay. Should match length of `customRowLabel`. +#' @param customColLabel A vector of indices to custom label from columns. +#' @param customColLabelIDs A vector of custom labels to overlay. Should match length of `customColLabel`. +#' @param customLabelWidth A numeric describing the width of the column labels. +#' @param rasterDevice Which device to use for rastering see `ComplexHeatmap`. +#' @param padding A numeric (in cm) to pad the heatmap ie adding white space so that the final plot doesnt cutoff. +#' @param borderColor A character representing the border color for each cell in the heatmap. +#' @param draw A boolean whether to draw the heatmap immediately. If FALSE this will return a ComplexHeatmap object. +#' @param name A character that will appear above color bar legend in heatmap. +#' @export +ArchRHeatmap <- function( + mat = NULL, + scale = FALSE, + limits = c(min(mat), max(mat)), + colData = NULL, + color = paletteContinuous(set = "solarExtra", n = 100), + clusterCols = TRUE, + clusterRows = FALSE, + labelCols = FALSE, + labelRows = FALSE, + colorMap = NULL, + useRaster = TRUE, + rasterQuality = 5, + split = NULL, + fontSizeRows = 10, + fontSizeCols = 10, + fontSizeLabels = 8, + colAnnoPerRow = 4, + showRowDendrogram = FALSE, + showColDendrogram = FALSE, + customRowLabel = NULL, + customRowLabelIDs = NULL, + customColLabel = NULL, + customColLabelIDs = NULL, + customLabelWidth = 0.75, + rasterDevice = "png", + padding = 45, + borderColor = NA, + draw = TRUE, + name = "Heatmap" + ){ + + .ArchRHeatmap( + mat = mat, + scale = scale, + limits = limits, + colData = colData, + color = color, + clusterCols = clusterCols, + clusterRows = clusterRows, + labelCols = labelCols, + labelRows = labelRows, + colorMap = colorMap, + useRaster = useRaster, + rasterQuality = rasterQuality, + split = split, + fontSizeRows = fontSizeRows, + fontSizeCols = fontSizeCols, + fontSizeLabels = fontSizeLabels, + colAnnoPerRow = colAnnoPerRow, + showRowDendrogram = showRowDendrogram, + showColDendrogram = showColDendrogram, + customRowLabel = customRowLabel, + customRowLabelIDs = customRowLabelIDs, + customColLabel = customColLabel, + customColLabelIDs = customColLabelIDs, + customLabelWidth = customLabelWidth, + rasterDevice = rasterDevice, + padding = padding, + borderColor = borderColor, + draw = draw, + name = name + ) + +} + .ArchRHeatmap <- function( mat = NULL, scale = FALSE, diff --git a/R/ArrowRead.R b/R/ArrowRead.R index 8fbb09aa..9fe93d8e 100644 --- a/R/ArrowRead.R +++ b/R/ArrowRead.R @@ -13,6 +13,15 @@ #' from the provided ArrowFile using `getCellNames()`. #' @param verbose A boolean value indicating whether to use verbose output during execution of this function. Can be set to `FALSE` for a cleaner output. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' #Get Test Project +#' proj <- getTestProject() +#' +#' # Get Fragments +#' frags <- getFragmentsFromProject(proj) +#' #' @export getFragmentsFromProject <- function( ArchRProj = NULL, @@ -69,6 +78,15 @@ getFragmentsFromProject <- function( #' from the provided ArrowFile using `getCellNames()`. #' @param verbose A boolean value indicating whether to use verbose output during execution of this function. Can be set to `FALSE` for a cleaner output. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' #Get Test Arrow +#' arrow <- getTestArrow() +#' +#' # Get Fragments +#' frags <- getFragmentsFromArrow(arrow) +#' #' @export getFragmentsFromArrow <- function( ArrowFile = NULL, @@ -249,15 +267,26 @@ getFragmentsFromArrow <- function( #' @param ArchRProj An `ArchRProject` object to get data matrix from. #' @param useMatrix The name of the data matrix to retrieve from the given ArrowFile. Options include "TileMatrix", "GeneScoreMatrix", etc. #' @param useSeqnames A character vector of chromosome names to be used to subset the data matrix being obtained. +#' @param excludeChr A character vector containing the `seqnames` of the chromosomes that should be excluded from this analysis. #' @param verbose A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output. #' @param binarize A boolean value indicating whether the matrix should be binarized before return. #' This is often desired when working with insertion counts. Note that if the matrix has already been binarized previously, this should be set to `TRUE`. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' #Get Test Project +#' proj <- getTestProject() +#' +#' # Get Fragments +#' se <- getMatrixFromProject(proj) +#' #' @export getMatrixFromProject <- function( ArchRProj = NULL, useMatrix = "GeneScoreMatrix", useSeqnames = NULL, + excludeChr = NULL, verbose = TRUE, binarize = FALSE, threads = getArchRThreads(), @@ -267,6 +296,7 @@ getMatrixFromProject <- function( .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) .validInput(input = useMatrix, name = "useMatrix", valid = c("character")) .validInput(input = useSeqnames, name = "useSeqnames", valid = c("character","null")) + .validInput(input = excludeChr, name = "excludeChr", valid = c("character", "null")) .validInput(input = verbose, name = "verbose", valid = c("boolean")) .validInput(input = binarize, name = "binarize", valid = c("boolean")) .validInput(input = threads, name = "threads", valid = c("integer")) @@ -298,6 +328,7 @@ getMatrixFromProject <- function( ArrowFile = ArrowFiles[x], useMatrix = useMatrix, useSeqnames = useSeqnames, + excludeChr = excludeChr, cellNames = allCells, ArchRProj = ArchRProj, verbose = FALSE, @@ -378,6 +409,7 @@ getMatrixFromProject <- function( #' @param ArrowFile The path to an ArrowFile from which the selected data matrix should be obtained. #' @param useMatrix The name of the data matrix to retrieve from the given ArrowFile. Options include "TileMatrix", "GeneScoreMatrix", etc. #' @param useSeqnames A character vector of chromosome names to be used to subset the data matrix being obtained. +#' @param excludeChr A character vector containing the `seqnames` of the chromosomes that should be excluded from this analysis. #' @param cellNames A character vector indicating the cell names of a subset of cells from which fragments whould be extracted. #' This allows for extraction of fragments from only a subset of selected cells. By default, this function will extract all cells from #' the provided ArrowFile using `getCellNames()`. @@ -387,11 +419,21 @@ getMatrixFromProject <- function( #' @param verbose A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output. #' @param binarize A boolean value indicating whether the matrix should be binarized before return. This is often desired when working with insertion counts. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' #Get Test Arrow +#' arrow <- getTestArrow() +#' +#' # Get Fragments +#' se <- getMatrixFromArrow(arrow) +#' #' @export getMatrixFromArrow <- function( ArrowFile = NULL, useMatrix = "GeneScoreMatrix", useSeqnames = NULL, + excludeChr = NULL, cellNames = NULL, ArchRProj = NULL, verbose = TRUE, @@ -402,6 +444,7 @@ getMatrixFromArrow <- function( .validInput(input = ArrowFile, name = "ArrowFile", valid = "character") .validInput(input = useMatrix, name = "useMatrix", valid = "character") .validInput(input = useSeqnames, name = "useSeqnames", valid = c("character","null")) + .validInput(input = excludeChr, name = "excludeChr", valid = c("character", "null")) .validInput(input = cellNames, name = "cellNames", valid = c("character","null")) .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj","null")) .validInput(input = verbose, name = "verbose", valid = c("boolean")) @@ -422,11 +465,27 @@ getMatrixFromArrow <- function( seqnames <- seqnames[seqnames %in% useSeqnames] } + if(!is.null(excludeChr)){ + seqnames <- seqnames[seqnames %ni% excludeChr] + } + if(length(seqnames) == 0){ stop("No seqnames available!") } - featureDF <- featureDF[BiocGenerics::which(featureDF$seqnames %bcin% seqnames), ] + # if(!force){ + # #Check that the seqnames that will be used actually exist in the ArrowFiles + # missSeq <- .validateSeqNotEmpty(ArrowFile = ArrowFile, seqnames = seqnames) + # if(!is.null(missSeq)) { + # stop("The following seqnames do not have fragment information in ArrowFile ",ArrowFile,":\n", + # paste(missSeq, collapse = ","), + # "\nYou can proceed with the analysis by ignoring these seqnames by passing them to the 'excludeChr' parameter.") + # } + # }else{ + # message("Skipping validation of empty chromosomes since `force` = TRUE!") + # } + + featureDF <- featureDF[BiocGenerics::which(paste0(featureDF$seqnames) %bcin% seqnames), ] .logDiffTime(paste0("Getting ",useMatrix," from ArrowFile : ", basename(ArrowFile)), t1 = tstart, verbose = verbose, logFile = logFile) @@ -436,6 +495,10 @@ getMatrixFromArrow <- function( if(!all(cellNames %in% allCells)){ stop("cellNames must all be within the ArrowFile!!!!") } + if(sum(allCells %in% cellNames) == 0){ + message("Warning: No cellNames in ",useMatrix," for ",basename(ArrowFile)," returning NULL") + return(NULL) + } } mat <- .getMatFromArrow( @@ -446,6 +509,9 @@ getMatrixFromArrow <- function( binarize = binarize, useIndex = FALSE ) + if(is.null(mat)){ + return(NULL) + } .logThis(mat, paste0("mat ", sampleName), logFile = logFile) .logDiffTime(paste0("Organizing SE ",useMatrix," from ArrowFile : ", basename(ArrowFile)), @@ -539,49 +605,142 @@ getMatrixFromArrow <- function( idxCols <- seq_along(matColNames) } + #Check Any Cells Are Present Exist + if(length(idxCols) == 0){ + message("Warning: No CellNames found in ", basename(ArrowFile), " Returning NULL") + return(NULL) + } + seqnames <- unique(featureDF$seqnames) + h5df <- h5ls(ArrowFile) + if(all(c("indptr", "indices", "data") %in% h5df[,2])){ + version <- 2 + }else if(all(c("i", "jValues", "jLengths") %in% h5df[,2])){ + version <- 1 + }else{ + version <- 1 #Lets Test this out more before throwing a version error + } + mat <- .safelapply(seq_along(seqnames), function(x){ + #Feature Info seqnamex <- seqnames[x] featureDFx <- featureDF[BiocGenerics::which(featureDF$seqnames %bcin% seqnamex),] idxRows <- featureDFx$idx - j <- Rle( - values = h5read(ArrowFile, paste0(useMatrix,"/",seqnamex,"/jValues")), - lengths = h5read(ArrowFile, paste0(useMatrix,"/",seqnamex,"/jLengths")) + #Version 1 vs Version 2 + if(version == 1){ + + #Get J + j <- tryCatch({ + Rle( + values = h5read(ArrowFile, paste0(useMatrix,"/",seqnamex,"/jValues")), + lengths = h5read(ArrowFile, paste0(useMatrix,"/",seqnamex,"/jLengths")) + ) + }, error = function(e){ + NULL + }) + if(is.null(j)){ + message("Found 0 Column in ", seqnamex, " for ", basename(ArrowFile)," creating 0 value sparseMatrix!") + #Make 0 matrix + mat <- Matrix::sparseMatrix( + i=1, + j=1, + x=0, + dims = c(length(idxRows), length(idxCols)) + ) + rownames(mat) <- rownames(featureDFx) + return(mat) + } + + #Match J + matchJ <- S4Vectors::match(j, idxCols, nomatch = 0) + idxJ <- BiocGenerics::which(matchJ > 0) + if(useIndex){ + i <- h5read(ArrowFile, paste0(useMatrix,"/",seqnamex,"/i"), index = list(idxJ, 1)) + }else{ + i <- h5read(ArrowFile, paste0(useMatrix,"/",seqnamex,"/i"))[idxJ] + } + j <- matchJ[idxJ] + + #Match I + matchI <- match(i, idxRows, nomatch = 0) + idxI <- which(matchI > 0) + i <- i[idxI] + j <- j[idxI] + i <- matchI[idxI] + + if(!binarize){ + x <- h5read(ArrowFile, paste0(useMatrix,"/",seqnamex,"/x"))[idxJ][idxI] + }else{ + x <- rep(1, length(j)) + } + + mat <- Matrix::sparseMatrix( + i=as.vector(i), + j=as.vector(j), + x=as.numeric(x), + dims = c(length(idxRows), length(idxCols)) ) + rownames(mat) <- rownames(featureDFx) - #Match J - matchJ <- S4Vectors::match(j, idxCols, nomatch = 0) - idxJ <- BiocGenerics::which(matchJ > 0) - if(useIndex){ - i <- h5read(ArrowFile, paste0(useMatrix,"/",seqnamex,"/i"), index = list(idxJ, 1)) - }else{ - i <- h5read(ArrowFile, paste0(useMatrix,"/",seqnamex,"/i"))[idxJ] - } - j <- matchJ[idxJ] - - #Match I - matchI <- match(i, idxRows, nomatch = 0) - idxI <- which(matchI > 0) - i <- i[idxI] - j <- j[idxI] - i <- matchI[idxI] - - if(!binarize){ - x <- h5read(ArrowFile, paste0(useMatrix,"/",seqnamex,"/x"))[idxJ][idxI] - }else{ - x <- rep(1, length(j)) - } + }else if(version == 2){ - mat <- Matrix::sparseMatrix( - i=as.vector(i), - j=j, - x=x, - dims = c(length(idxRows), length(idxCols)) - ) - rownames(mat) <- rownames(featureDFx) + #Get J + j <- tryCatch({ + .sparseMatrixPToJRle( + h5read(ArrowFile, paste0(useMatrix,"/",seqnamex,"/indptr")) + ) + }, error = function(e){ + NULL + }) + if(is.null(j)){ + message("Found 0 Column in ", seqnamex, " for ", basename(ArrowFile)," creating 0 value sparseMatrix!") + #Make 0 matrix + mat <- Matrix::sparseMatrix( + i=1, + j=1, + x=0, #Set 0 + dims = c(length(idxRows), length(idxCols)) + ) + rownames(mat) <- rownames(featureDFx) + return(mat) + } + + #Match J + matchJ <- S4Vectors::match(j, idxCols, nomatch = 0) + idxJ <- BiocGenerics::which(matchJ > 0) + #I is no longer 1 indexed in this version + if(useIndex){ + i <- h5read(ArrowFile, paste0(useMatrix,"/",seqnamex,"/indices"), index = list(idxJ, 1)) + 1 + }else{ + i <- h5read(ArrowFile, paste0(useMatrix,"/",seqnamex,"/indices"))[idxJ] + 1 + } + j <- matchJ[idxJ] + + #Match I + matchI <- match(i, idxRows, nomatch = 0) + idxI <- which(matchI > 0) + i <- i[idxI] + j <- j[idxI] + i <- matchI[idxI] + + if(!binarize){ + x <- h5read(ArrowFile, paste0(useMatrix,"/",seqnamex,"/data"))[idxJ][idxI] + }else{ + x <- rep(1, length(j)) + } + + mat <- Matrix::sparseMatrix( + i=as.vector(i), + j=as.vector(j), + x=as.numeric(x), + dims = c(length(idxRows), length(idxCols)) + ) + rownames(mat) <- rownames(featureDFx) + + } rm(matchI, idxI, matchJ, idxJ, featureDFx, idxRows) @@ -605,6 +764,14 @@ getMatrixFromArrow <- function( } +#Adapted from +#https://github.com/mojaveazure/seurat-disk/blob/master/R/sparse_matrix.R +.sparseMatrixPToJRle <- function(p) { + dp <- diff(x = p) + j <- Rle(rep.int(x = seq_along(along.with = dp), times = dp)) + return(j) +} + #################################################################### # Helper read functioning #################################################################### @@ -612,6 +779,7 @@ getMatrixFromArrow <- function( ArrowFiles = NULL, featureDF = NULL, groupList = NULL, + excludeSeqnames = NULL, threads = 1, useIndex = FALSE, verbose = TRUE, @@ -631,6 +799,11 @@ getMatrixFromArrow <- function( # Construct Matrix ######################################### seqnames <- unique(featureDF$seqnames) + if(!is.null(excludeSeqnames)) { + seqnames <- seqnames[which(seqnames %ni% excludeSeqnames)] + featureDF <- featureDF[BiocGenerics::which(paste0(featureDF$seqnames) %bcni% excludeSeqnames),,drop=FALSE] + } + rownames(featureDF) <- paste0("f", seq_len(nrow(featureDF))) cellNames <- unlist(groupList, use.names = FALSE) ### UNIQUE here? doublet check QQQ @@ -658,7 +831,7 @@ getMatrixFromArrow <- function( for(y in seq_along(ArrowFiles)){ - allCells <- allCellsList[[y]] + allCells <- allCellsList[[y]] #These should be at least 1 cell so getMatFromArrow should never be NULL if(!is.null(allCells)){ @@ -748,6 +921,7 @@ getMatrixFromArrow <- function( allCells <- .availableCells(ArrowFile = ArrowFiles[x], subGroup = useMatrix) allCells <- allCells[allCells %in% cellNames] + #Handled 0 Cells if(length(allCells) == 0){ if(doSampleCells){ return(list(mat = NULL, out = NULL)) @@ -909,39 +1083,18 @@ getMatrixFromArrow <- function( seqnames = NULL, useMatrix = NULL, useLog2 = FALSE, + useGeo = FALSE, + useLog2Norm = FALSE, threads = 1 ){ - .combineVariances <- function(dfMeans = NULL, dfVars = NULL, ns = NULL){ - - #https://rdrr.io/cran/fishmethods/src/R/combinevar.R - - if(ncol(dfMeans) != ncol(dfVars) | ncol(dfMeans) != length(ns)){ - stop("Means Variances and Ns lengths not identical") - } - - #Check if samples have NAs due to N = 1 sample or some other weird thing. - #Set it to min non NA variance - dfVars <- lapply(seq_len(nrow(dfVars)), function(x){ - vx <- dfVars[x, , drop = FALSE] - if(any(is.na(vx))){ - vx[is.na(vx)] <- min(vx[!is.na(vx)]) - } - vx - }) %>% Reduce("rbind", .) - - combinedMeans <- rowSums(t(t(dfMeans) * ns)) / sum(ns) - summedVars <- rowSums(t(t(dfVars) * (ns - 1)) + t(t(dfMeans^2) * ns)) - combinedVars <- (summedVars - sum(ns)*combinedMeans^2)/(sum(ns)-1) - - data.frame(combinedVars = combinedVars, combinedMeans = combinedMeans) - - } + stopifnot(useLog2 + useGeo + useLog2Norm <= 1) + #Feature DF featureDF <- .getFeatureDF(ArrowFiles, useMatrix) if(!is.null(seqnames)){ - featureDF <- featureDF[BiocGenerics::which(featureDF$seqnames %bcin% seqnames),] + featureDF <- featureDF[BiocGenerics::which(paste0(featureDF$seqnames) %bcin% seqnames),] } rownames(featureDF) <- paste0("f", seq_len(nrow(featureDF))) @@ -966,6 +1119,12 @@ getMatrixFromArrow <- function( if(useLog2){ meanx[, y] <- h5read(ArrowFiles[y], paste0(useMatrix, "/", seqx, "/rowMeansLog2")) varx[, y] <- h5read(ArrowFiles[y], paste0(useMatrix, "/", seqx, "/rowVarsLog2")) + }else if(useGeo){ + meanx[, y] <- h5read(ArrowFiles[y], paste0(useMatrix, "/", seqx, "/rowMeansGeo")) + varx[, y] <- h5read(ArrowFiles[y], paste0(useMatrix, "/", seqx, "/rowVarsGeo")) + }else if(useLog2Norm){ + meanx[, y] <- h5read(ArrowFiles[y], paste0(useMatrix, "/", seqx, "/rowMeansLog2Norm")) + varx[, y] <- h5read(ArrowFiles[y], paste0(useMatrix, "/", seqx, "/rowVarsLog2Norm")) }else{ meanx[, y] <- h5read(ArrowFiles[y], paste0(useMatrix, "/", seqx, "/rowMeans")) varx[, y] <- h5read(ArrowFiles[y], paste0(useMatrix, "/", seqx, "/rowVars")) @@ -983,6 +1142,33 @@ getMatrixFromArrow <- function( } +.combineVariances <- function(dfMeans = NULL, dfVars = NULL, ns = NULL){ + + #https://rdrr.io/cran/fishmethods/src/R/combinevar.R + + if(ncol(dfMeans) != ncol(dfVars) | ncol(dfMeans) != length(ns)){ + stop("Means Variances and Ns lengths not identical") + } + + #Check if samples have NAs due to N = 1 sample or some other weird thing. + #Set it to 0 + if(any(is.na(dfVars))){ + idx <- which(rowSums(is.na(dfVars)) > 0) + dfVars[is.na(dfVars)] <- 0 + } + + #Compute + combinedMeans <- rowSums(t(t(dfMeans) * ns)) / sum(ns) + summedVars <- rowSums(t(t(dfVars) * (ns - 1)) + t(t(dfMeans^2) * ns)) + combinedVars <- (summedVars - sum(ns)*combinedMeans^2)/(sum(ns)-1) + + data.frame( + combinedVars = combinedVars, + combinedMeans = combinedMeans + ) + +} + .getColSums <- function( ArrowFiles = NULL, seqnames = NULL, @@ -1085,4 +1271,3 @@ getMatrixFromArrow <- function( } - diff --git a/R/ArrowUtils.R b/R/ArrowUtils.R index 0e8a0d63..2fd8aab9 100644 --- a/R/ArrowUtils.R +++ b/R/ArrowUtils.R @@ -65,6 +65,20 @@ return(seqnames) } +#check if an ArrowFile lacks fragment info on a set a seqnames and return a vector of empty seqnames +#returns NULL if all seqnames are ok +.validateSeqNotEmpty <- function(ArrowFile = NULL, seqnames = NULL, threads = getArchRThreads()) { + failed_chr <- .safelapply(seq_along(seqnames), function(x){ + tryCatch({ + .h5read(ArrowFile, paste0("Fragments/", seqnames[x], "/RGLengths"), method = "fast") + return(NULL) + },error=function(e){ + return(seqnames[x]) + }) + }, threads = threads) + return(unlist(failed_chr)) +} + .availableCells <- function(ArrowFile = NULL, subGroup = NULL, passQC = TRUE){ if(is.null(subGroup)){ o <- h5closeAll() @@ -162,11 +176,26 @@ threads <- min(threads, length(ArrowFiles)) .helpFeatureDF <- function(ArrowFile = NULL, subGroup = NULL){ + o <- h5closeAll() + + #Read featureDF <- DataFrame(h5read(ArrowFile, paste0(subGroup,"/Info/FeatureDF"))) + + #Make Sure this is not an array + for(i in seq_len(ncol(featureDF))){ + if(is(featureDF[,i], "array")){ + featureDF[,i] <- as.vector(featureDF[,i]) + } + } + + #Seqnames featureDF$seqnames <- Rle(as.character(featureDF$seqnames)) + o <- h5closeAll() + return(featureDF) + } fdf <- .helpFeatureDF(ArrowFiles[1], subGroup = subGroup) @@ -231,7 +260,7 @@ .dropGroupsFromArrow <- function( ArrowFile = NULL, dropGroups = NULL, - level = 0, + level = getArchRH5Level(), verbose = FALSE, logFile = NULL ){ @@ -328,7 +357,7 @@ inArrows = NULL, outArrows = NULL, cellsKeep = NULL, - level = 0, + level = getArchRH5Level(), verbose = FALSE, logFile = NULL, threads = 1 @@ -353,7 +382,7 @@ inArrow = NULL, outArrow = NULL, cellsKeep = NULL, - level = 0, + level = getArchRH5Level(), verbose = FALSE, logFile = NULL ){ @@ -425,6 +454,11 @@ #Read In Fragments RGLengths <- .h5read(inArrow, paste0(groupJ, "/RGLengths")) RGValues <- .h5read(inArrow, paste0(groupJ, "/RGValues")) + + #Check Cells Exist + if(length(RGLengths) == 0 | length(RGValues) == 0){ + next + } RGRle <- Rle(paste0(sampleName, "#", RGValues), RGLengths) #Determine Which to Keep diff --git a/R/ArrowWrite.R b/R/ArrowWrite.R index 41a216b8..c9aea5a3 100644 --- a/R/ArrowWrite.R +++ b/R/ArrowWrite.R @@ -111,8 +111,19 @@ addColSums = FALSE, addRowMeans = FALSE, addRowVars = FALSE, + addColMeansLog2 = FALSE, #New + addRowMeansLog2 = FALSE, #New addRowVarsLog2 = FALSE, - logFile = NULL + addRowMeansGeo = FALSE, #New + addRowVarsGeo = FALSE, #New + addRowSumsBinary = FALSE, #New + addColSumsBinary = FALSE, #New + addRowMeansLog2Norm = FALSE, #New + addRowVarsLog2Norm = FALSE, #New + scaleTo = 10000, #New + colSm = NULL, #New + logFile = NULL, + version = 2 #New ){ stopifnot(inherits(mat, "dgCMatrix")) @@ -130,105 +141,213 @@ o <- h5closeAll() o <- h5createGroup(ArrowFile, Group) - #Convert Columns to Rle - j <- Rle(findInterval(seq(mat@x)-1,mat@p[-1]) + 1) + if(version == 1){ - #Info - lengthRle <- length(j@lengths) - lengthI <- length(mat@i) + #Convert Columns to Rle + j <- Rle(findInterval(seq(mat@x)-1,mat@p[-1]) + 1) - #Create Data Set - o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group,"/i"), storage.mode = "integer", - dims = c(lengthI, 1), level = 0)) + #Info + lengthRle <- length(j@lengths) + lengthI <- length(mat@i) - o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group,"/jLengths"), storage.mode = "integer", - dims = c(lengthRle, 1), level = 0)) + #Create Data Set + o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group,"/i"), storage.mode = "integer", + dims = c(lengthI, 1), level = getArchRH5Level())) - o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group,"/jValues"), storage.mode = "integer", - dims = c(lengthRle, 1), level = 0)) - - #Write Data Set - o <- .suppressAll(h5write(obj = mat@i + 1, file = ArrowFile, name = paste0(Group,"/i"))) - o <- .suppressAll(h5write(obj = j@lengths, file = ArrowFile, name = paste0(Group,"/jLengths"))) - o <- .suppressAll(h5write(obj = j@values, file = ArrowFile, name = paste0(Group,"/jValues"))) - - #If binary dont store x - if(!binarize){ + o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group,"/jLengths"), storage.mode = "integer", + dims = c(lengthRle, 1), level = getArchRH5Level())) + + o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group,"/jValues"), storage.mode = "integer", + dims = c(lengthRle, 1), level = getArchRH5Level())) + + #Write Data Set + o <- .suppressAll(h5write(obj = mat@i + 1, file = ArrowFile, name = paste0(Group,"/i"))) + o <- .suppressAll(h5write(obj = j@lengths, file = ArrowFile, name = paste0(Group,"/jLengths"))) + o <- .suppressAll(h5write(obj = j@values, file = ArrowFile, name = paste0(Group,"/jValues"))) + + #If binary dont store x + if(!binarize){ + + o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group, "/x"), storage.mode = "double", + dims = c(lengthI, 1), level = getArchRH5Level())) + + o <- .suppressAll(h5write(obj = mat@x, file = ArrowFile, name = paste0(Group, "/x"))) + + }else{ + + mat@x[mat@x > 0] <- 1 + + } + + rm(j) + + }else if(version == 2){ + + #Info + lengthP <- length(mat@p) + lengthI <- length(mat@i) - o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group, "/x"), storage.mode = "double", - dims = c(lengthI, 1), level = 0)) + if(binarize){ + mat@x[mat@x > 0] <- 1 + } - o <- .suppressAll(h5write(obj = mat@x, file = ArrowFile, name = paste0(Group, "/x"))) + #Write Data Set + o <- .suppressAll(h5write(obj = mat@i, file = ArrowFile, name = paste0(Group,"/indices"))) + o <- .suppressAll(h5write(obj = mat@p, file = ArrowFile, name = paste0(Group,"/indptr"))) + o <- .suppressAll(h5write(obj = mat@x, file = ArrowFile, name = paste0(Group, "/data"))) + o <- .suppressAll(h5write(obj = dim(mat), file = ArrowFile, name = paste0(Group, "/shape"))) }else{ - mat@x[mat@x > 0] <- 1 + stop("ArchR Write Method Does Not Exist! Only versions = 1 and 2 exist!") } - + + #################### + # Normal + #################### + + cS <- 0 + rS <- 0 + rM <- 0 + rV <- 0 + if(addColSums){ cS <- Matrix::colSums(mat) o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group, "/colSums"), storage.mode = "double", - dims = c(ncol(mat), 1), level = 0)) + dims = c(ncol(mat), 1), level = getArchRH5Level())) o <- .suppressAll(h5write(obj = cS, file = ArrowFile, name = paste0(Group, "/colSums"))) - } if(addRowSums){ rS <- Matrix::rowSums(mat) o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group, "/rowSums"), storage.mode = "double", - dims = c(nrow(mat), 1), level = 0)) + dims = c(nrow(mat), 1), level = getArchRH5Level())) o <- .suppressAll(h5write(obj = rS, file = ArrowFile, name = paste0(Group, "/rowSums"))) - } - if(addRowMeans){ + if(addRowMeans | addRowVars){ rM <- Matrix::rowMeans(mat) o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group, "/rowMeans"), storage.mode = "double", - dims = c(nrow(mat), 1), level = 0)) + dims = c(nrow(mat), 1), level = getArchRH5Level())) o <- .suppressAll(h5write(obj = rM, file = ArrowFile, name = paste0(Group, "/rowMeans"))) - } if(addRowVars){ - if(!addRowMeans){ - rM <- Matrix::rowMeans(mat) - } - rV <- computeSparseRowVariances(mat@i + 1, mat@x, rM, n = ncol(mat)) + rV <- .sparseRowVars(m=mat, rM=rM) o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group, "/rowVars"), storage.mode = "double", - dims = c(nrow(mat), 1), level = 0)) + dims = c(nrow(mat), 1), level = getArchRH5Level())) o <- .suppressAll(h5write(obj = rV, file = ArrowFile, name = paste0(Group, "/rowVars"))) + } + + rm(cS, rS, rM, rV) + + #################### + # Binary + #################### + + cSB <- 0 + rSB <- 0 + + if(addColSumsBinary){ + cSB <- .colBinarySums(mat) + o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group, "/colSumsBinary"), storage.mode = "double", + dims = c(ncol(mat), 1), level = getArchRH5Level())) + o <- .suppressAll(h5write(obj = cSB, file = ArrowFile, name = paste0(Group, "/colSumsBinary"))) } - if(addRowVarsLog2){ + if(addRowSumsBinary){ + rSB <- .rowBinarySums(mat) + o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group, "/rowSumsBinary"), storage.mode = "double", + dims = c(nrow(mat), 1), level = getArchRH5Level())) + o <- .suppressAll(h5write(obj = rSB, file = ArrowFile, name = paste0(Group, "/rowSumsBinary"))) + } - mat@x <- log2(mat@x + 1) #log-normalize - rM <- Matrix::rowMeans(mat) - idx <- seq_along(rM) - idxSplit <- .splitEvery(idx, 2000) - - #Make sure not too much memory so split into 2000 gene chunks - #Check this doesnt cause memory mapping issues! - rV <- lapply(seq_along(idxSplit), function(x){ - idxX <- idxSplit[[x]] - matx <- mat[idxX, , drop = FALSE] - computeSparseRowVariances(matx@i + 1, matx@x, rM[idxX], n = ncol(mat)) - }) %>% unlist - - #Have to write rowMeansLog2 as well + rm(cSB, rSB) + + #################### + # Log2 + #################### + + cMLog <- 0 + rMLog <- 0 + rVLog <- 0 + + if(addColMeansLog2){ + cMLog <- .sparseColLog2p1Means(mat) + o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group, "/colMeansLog2"), storage.mode = "double", + dims = c(nrow(mat), 1), level = getArchRH5Level())) + o <- .suppressAll(h5write(obj = cMLog, file = ArrowFile, name = paste0(Group, "/colMeansLog2"))) + } + + if(addRowMeansLog2 | addRowVarsLog2){ + rMLog <- .sparseRowLog2p1Means(mat) o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group, "/rowMeansLog2"), storage.mode = "double", - dims = c(nrow(mat), 1), level = 0)) - o <- .suppressAll(h5write(obj = rM, file = ArrowFile, name = paste0(Group, "/rowMeansLog2"))) + dims = c(nrow(mat), 1), level = getArchRH5Level())) + o <- .suppressAll(h5write(obj = rMLog, file = ArrowFile, name = paste0(Group, "/rowMeansLog2"))) + } - #Write rowVarsLog2 + if(addRowVarsLog2){ + rVLog <- .sparseRowLog2p1Vars(mat, rM = rMLog) o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group, "/rowVarsLog2"), storage.mode = "double", - dims = c(nrow(mat), 1), level = 0)) - o <- .suppressAll(h5write(obj = rV, file = ArrowFile, name = paste0(Group, "/rowVarsLog2"))) + dims = c(nrow(mat), 1), level = getArchRH5Level())) + o <- .suppressAll(h5write(obj = rVLog, file = ArrowFile, name = paste0(Group, "/rowVarsLog2"))) + } + + rm(cMLog, rMLog, rVLog) + + #################### + # Geo + #################### + + rMGeo <- 0 + rVGeo <- 0 + + if(addRowMeansGeo | addRowVarsGeo){ + rMGeo <- .sparseRowGeoMeans(mat) + o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group, "/rowMeansGeo"), storage.mode = "double", + dims = c(nrow(mat), 1), level = getArchRH5Level())) + o <- .suppressAll(h5write(obj = rMGeo, file = ArrowFile, name = paste0(Group, "/rowMeansGeo"))) } + if(addRowVarsGeo){ + rVGeo <- .sparseRowGeoVars(mat, rM = rMGeo) + o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group, "/rowVarsGeo"), storage.mode = "double", + dims = c(nrow(mat), 1), level = getArchRH5Level())) + o <- .suppressAll(h5write(obj = rVGeo, file = ArrowFile, name = paste0(Group, "/rowVarsGeo"))) + } + + rm(rMGeo, rVGeo) + + #################### + # Log2 Depth Norm + #################### + + rMLogNorm <- 0 + rVLogNorm <- 0 + + if(addRowMeansLog2Norm | addRowVarsLog2Norm){ + mat <- .normalizeCols(mat, colSm = colSm, scaleTo = scaleTo) + rMLogNorm <- .sparseRowLog2p1Means(mat) + o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group, "/rowMeansLog2Norm"), storage.mode = "double", + dims = c(nrow(mat), 1), level = getArchRH5Level())) + o <- .suppressAll(h5write(obj = rMLogNorm, file = ArrowFile, name = paste0(Group, "/rowMeansLog2Norm"))) + } + + if(addRowVarsLog2Norm){ + rVLogNorm <- .sparseRowLog2p1Vars(mat, rM = rMLogNorm) + o <- .suppressAll(h5createDataset(ArrowFile, paste0(Group, "/rowVarsLog2Norm"), storage.mode = "double", + dims = c(nrow(mat), 1), level = getArchRH5Level())) + o <- .suppressAll(h5write(obj = rVLogNorm, file = ArrowFile, name = paste0(Group, "/rowVarsLog2Norm"))) + } + + rm(rMLogNorm, rVLogNorm) + + #################### #Clean Up Memorys - rm(j,mat) + #################### + rm(mat) gc() o <- h5closeAll() diff --git a/R/BulkProjection.R b/R/BulkProjection.R index 882dd9a4..a74276cd 100644 --- a/R/BulkProjection.R +++ b/R/BulkProjection.R @@ -12,7 +12,6 @@ #' @param force A boolean value indicating whether to force the projection of bulk ATAC data even if fewer than 25% of the features are present in the bulk ATAC data set. #' @param logFile The path to a file to be used for logging ArchR output. #' @export -#' projectBulkATAC <- function( ArchRProj = NULL, seATAC = NULL, diff --git a/R/Clustering.R b/R/Clustering.R index d0bb4160..408942a8 100644 --- a/R/Clustering.R +++ b/R/Clustering.R @@ -51,6 +51,15 @@ #' exists as a column name in `cellColData`. #' @param logFile The path to a file to be used for logging ArchR output. #' @param ... Additional arguments to be provided to Seurat::FindClusters or scran::buildSNNGraph (for example, knn = 50, jaccard = TRUE) +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Peak Annotations +#' proj <- addClusters(proj, force = TRUE) +#' #' @export #' addClusters <- function( diff --git a/R/ColorPalettes.R b/R/ColorPalettes.R index 81694ee3..ddad7017 100644 --- a/R/ColorPalettes.R +++ b/R/ColorPalettes.R @@ -101,6 +101,15 @@ ArchRPalettes <- list( #' given a unique color from the designated palette set. #' @param set The name of a color palette provided in the `ArchRPalettes` list object. #' @param reverse A boolean variable that indicates whether to return the palette colors in reverse order. +#' +#' @examples +#' +#' # Vector +#' v <- c("A", "B") +#' +#' # Color Palette +#' pal <- paletteDiscrete(values = v) +#' #' @export paletteDiscrete <- function( values = NULL, @@ -140,6 +149,12 @@ paletteDiscrete <- function( #' @param set The name of a color palette provided in the `ArchRPalettes` list object. #' @param n The number of unique colors to generate as part of this continuous color palette. #' @param reverse A boolean variable that indicates whether to return the palette colors in reverse order. +#' +#' @examples +#' +#' # Color Palette +#' pal <- paletteContinuous() +#' #' @export paletteContinuous <- function( set = "solarExtra", diff --git a/R/CreateArrow.R b/R/CreateArrow.R index 08243139..023c8cc0 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -63,6 +63,22 @@ #' @param verbose A boolean value that determines whether standard output should be printed. #' @param cleamTmp A boolean value that determines whether to clean temp folder of all intermediate ".arrow" files. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test Fragments +#' fragments <- getTestFragments() +#' +#' # Create Arrow Files +#' arrowFiles <- createArrowFiles( +#' inputFiles = fragments, +#' sampleNames = "PBSmall", +#' minFrags = 100, +#' nChunk = 1, +#' TileMatParams=list(tileSize=10000), +#' force = TRUE +#' ) +#' #' @export #' createArrowFiles <- function( @@ -202,12 +218,21 @@ createArrowFiles <- function( args$registryDir <- file.path(QCDir, "CreateArrowsRegistry") args$cleanTmp <- NULL - if(subThreading){ - h5disableFileLocking() - }else{ + #H5 File Lock Check + h5lock <- setArchRLocking() + if(h5lock){ + if(subThreading){ + message("subThreadhing Disabled since ArchRLocking is TRUE see `addArchRLocking`") + subThreading <- FALSE + } args$threads <- min(length(inputFiles), threads) + }else{ + if(subThreading){ + message("subThreadhing Enabled since ArchRLocking is FALSE see `addArchRLocking`") + } } + #Default Param args$minTSS <- NULL #Run With Parallel or lapply @@ -228,10 +253,6 @@ createArrowFiles <- function( paste0(args$outputNames,".arrow")[file.exists(paste0(args$outputNames,".arrow"))] }) - if(subThreading){ - h5enableFileLocking() - } - .endLogging(logFile = logFile) return(outArrows) @@ -1295,10 +1316,10 @@ createArrowFiles <- function( chrRGValues <- paste0("Fragments/",chrTmp,"/RGValues") lengthRG <- length(RG@lengths) o <- h5createGroup(tmpFile, paste0("Fragments/",chrTmp)) - o <- .suppressAll(h5createDataset(tmpFile, chrPos, storage.mode = "integer", dims = c(nrow(dt), 2), level = 0)) - o <- .suppressAll(h5createDataset(tmpFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0)) + o <- .suppressAll(h5createDataset(tmpFile, chrPos, storage.mode = "integer", dims = c(nrow(dt), 2), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(tmpFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = getArchRH5Level())) o <- .suppressAll(h5createDataset(tmpFile, chrRGValues, storage.mode = "character", - dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1)) + dims = c(lengthRG, 1), level = getArchRH5Level(), size = max(nchar(RG@values)) + 1)) o <- h5write(obj = cbind(dt$V2,dt$V3 - dt$V2 + 1), file = tmpFile, name = chrPos) o <- h5write(obj = RG@lengths, file = tmpFile, name = chrRGLengths) o <- h5write(obj = RG@values, file = tmpFile, name = chrRGValues) @@ -1330,10 +1351,10 @@ createArrowFiles <- function( chrRGValues <- paste0(chrTmp, "._.RGValues") lengthRG <- length(RG@lengths) - o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(nrow(dt), 2), level = 0)) - o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0)) + o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(nrow(dt), 2), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = getArchRH5Level())) o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", - dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1)) + dims = c(lengthRG, 1), level = getArchRH5Level(), size = max(nchar(RG@values)) + 1)) o <- h5write(obj = cbind(dt$V2,dt$V3 - dt$V2 + 1), file = tmpChrFile, name = chrPos) o <- h5write(obj = RG@lengths, file = tmpChrFile, name = chrRGLengths) @@ -1671,10 +1692,10 @@ createArrowFiles <- function( chrRGValues <- paste0("Fragments/",chrTmp,"/RGValues") lengthRG <- length(RG@lengths) o <- h5createGroup(tmpFile, paste0("Fragments/",chrTmp)) - o <- .suppressAll(h5createDataset(tmpFile, chrPos, storage.mode = "integer", dims = c(nrow(dt), 2), level = 0)) - o <- .suppressAll(h5createDataset(tmpFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0)) + o <- .suppressAll(h5createDataset(tmpFile, chrPos, storage.mode = "integer", dims = c(nrow(dt), 2), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(tmpFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = getArchRH5Level())) o <- .suppressAll(h5createDataset(tmpFile, chrRGValues, storage.mode = "character", - dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1)) + dims = c(lengthRG, 1), level = getArchRH5Level(), size = max(nchar(RG@values)) + 1)) o <- h5write(obj = cbind(dt$start, dt$end - dt$start + 1), file = tmpFile, name = chrPos) o <- h5write(obj = RG@lengths, file = tmpFile, name = chrRGLengths) @@ -1707,10 +1728,10 @@ createArrowFiles <- function( chrRGValues <- paste0(chrTmp, "._.RGValues") lengthRG <- length(RG@lengths) - o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(nrow(dt), 2), level = 0)) - o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0)) + o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(nrow(dt), 2), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = getArchRH5Level())) o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", - dims = c(lengthRG, 1), level = 0, size = max(nchar(RG@values)) + 1)) + dims = c(lengthRG, 1), level = getArchRH5Level(), size = max(nchar(RG@values)) + 1)) o <- h5write(obj = cbind(dt$start, dt$end - dt$start + 1), file = tmpChrFile, name = chrPos) o <- h5write(obj = RG@lengths, file = tmpChrFile, name = chrRGLengths) @@ -1914,7 +1935,7 @@ createArrowFiles <- function( #Determine Ranges and RG Pre-Allocation chr <- uniqueChr[x] - ix <- BiocGenerics::which(chunkChr == chr) + ix <- BiocGenerics::which(paste0(chunkChr) == paste0(chr)) if(threads == 1){ @@ -1927,9 +1948,9 @@ createArrowFiles <- function( chrRGLengths <- paste0("Fragments/",chr,"/RGLengths") chrRGValues <- paste0("Fragments/",chr,"/RGValues") o <- h5createGroup(outArrow, paste0("Fragments/",chr)) - o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(0, 2), level = 0)) - o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = 0)) - o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(0, 1), level = 0, size = 4)) + o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(0, 2), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(0, 1), level = getArchRH5Level(), size = 4)) return(NULL) @@ -1945,9 +1966,11 @@ createArrowFiles <- function( mcols(fragments)$RG@values <- stringr::str_split(mcols(fragments)$RG@values, pattern = "#", simplify=TRUE)[,2] #Order RG RLE based on bcPass - fragments <- fragments[BiocGenerics::which(mcols(fragments)$RG %bcin% bcPass)] - fragments <- fragments[order(S4Vectors::match(mcols(fragments)$RG, bcPass))] - + fragments <- fragments[BiocGenerics::which(paste0(mcols(fragments)$RG) %bcin% bcPass)] + if(length(fragments) > 0){ + fragments <- fragments[order(S4Vectors::match(paste0(mcols(fragments)$RG), bcPass))] + } + #Check if Fragments are greater than minFragSize and smaller than maxFragSize fragments <- fragments[width(fragments) >= minFragSize] fragments <- fragments[width(fragments) <= maxFragSize] @@ -1970,17 +1993,17 @@ createArrowFiles <- function( .logMessage(msg = paste0(prefix, " detected 0 Fragments in cells passing filtering threshold for ", chr), logFile = logFile) o <- h5createGroup(outArrow, paste0("Fragments/",chr)) - o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(0, 2), level = 0)) - o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = 0)) - o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(0, 1), level = 0, + o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(0, 2), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(0, 1), level = getArchRH5Level(), size = 10)) }else{ o <- h5createGroup(outArrow, paste0("Fragments/",chr)) - o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = 0)) - o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0)) - o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0, + o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = getArchRH5Level(), size = max(nchar(mcols(fragments)$RG@values)) + 1)) o <- h5write(obj = cbind(start(fragments),width(fragments)), file = outArrow, name = chrPos) @@ -2013,9 +2036,9 @@ createArrowFiles <- function( chrRGLengths <- paste0(chr, "._.RGLengths") chrRGValues <- paste0(chr, "._.RGValues") - o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(0, 2), level = 0)) - o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = 0)) - o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(0, 1), level = 0, size = 4)) + o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(0, 2), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(0, 1), level = getArchRH5Level(), size = 4)) return(tmpChrFile) @@ -2030,9 +2053,17 @@ createArrowFiles <- function( }) %>% Reduce("c", .) mcols(fragments)$RG@values <- stringr::str_split(mcols(fragments)$RG@values, pattern = "#", simplify=TRUE)[,2] + if(x == 1){ + .logThis(fragments, name = paste0(prefix, " .tmpToArrow Fragments0-Chr-(",x," of ",length(uniqueChr),")-", uniqueChr[x]), logFile = logFile) + .logThis(data.frame(bc = as.vector(mcols(fragments)$RG@values)), name = paste0(prefix, " .tmpToArrow Barcodes0-Chr-(",x," of ",length(uniqueChr),")-", uniqueChr[x]), logFile = logFile) + .logThis(data.frame(bc = as.vector(bcPass)), name = paste0(prefix, " .tmpToArrow bcPass0-Chr-(",x," of ",length(uniqueChr),")-", uniqueChr[x]), logFile = logFile) + } + #Order RG RLE based on bcPass - fragments <- fragments[BiocGenerics::which(mcols(fragments)$RG %bcin% bcPass)] - fragments <- fragments[order(S4Vectors::match(mcols(fragments)$RG, bcPass))] + fragments <- fragments[BiocGenerics::which(paste0(mcols(fragments)$RG) %bcin% bcPass)] + if(length(fragments) > 0){ + fragments <- fragments[order(S4Vectors::match(paste0(mcols(fragments)$RG), bcPass))] + } #Check if Fragments are greater than minFragSize and smaller than maxFragSize fragments <- fragments[width(fragments) >= minFragSize] @@ -2053,17 +2084,17 @@ createArrowFiles <- function( if(lengthRG == 0){ #HDF5 Write - o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(0, 2), level = 0)) - o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = 0)) - o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(0, 1), level = 0, + o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(0, 2), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(0, 1), level = getArchRH5Level(), size = 10)) }else{ #HDF5 Write - o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = 0)) - o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0)) - o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0, + o <- .suppressAll(h5createDataset(tmpChrFile, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(tmpChrFile, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(tmpChrFile, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = getArchRH5Level(), size = max(nchar(mcols(fragments)$RG@values)) + 1)) o <- h5write(obj = cbind(start(fragments),width(fragments)), file = tmpChrFile, name = chrPos) @@ -2206,9 +2237,9 @@ createArrowFiles <- function( chrRGLengths <- paste0("Fragments/",chr,"/RGLengths") chrRGValues <- paste0("Fragments/",chr,"/RGValues") o <- h5createGroup(outArrow, paste0("Fragments/",chr)) - o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(0, 2), level = 0)) - o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = 0)) - o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(0, 1), level = 0, size = 4)) + o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(0, 2), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(0, 1), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(0, 1), level = getArchRH5Level(), size = 4)) }else{ @@ -2225,9 +2256,9 @@ createArrowFiles <- function( chrRGLengths <- paste0("Fragments/",chr,"/RGLengths") chrRGValues <- paste0("Fragments/",chr,"/RGValues") o <- h5createGroup(outArrow, paste0("Fragments/",chr)) - o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = 0)) - o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = 0)) - o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = 0, + o <- .suppressAll(h5createDataset(outArrow, chrPos, storage.mode = "integer", dims = c(length(fragments), 2), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(outArrow, chrRGLengths, storage.mode = "integer", dims = c(lengthRG, 1), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(outArrow, chrRGValues, storage.mode = "character", dims = c(lengthRG, 1), level = getArchRH5Level(), size = max(nchar(mcols(fragments)$RG@values)) + 1)) o <- h5write(obj = cbind(start(fragments),width(fragments)), file = outArrow, name = chrPos) o <- h5write(obj = mcols(fragments)$RG@lengths, file = outArrow, name = chrRGLengths) diff --git a/R/DoubletsScores.R b/R/DoubletsScores.R index 9275bd5f..fb8be364 100644 --- a/R/DoubletsScores.R +++ b/R/DoubletsScores.R @@ -31,6 +31,15 @@ #' @param parallelParam A list of parameters to be passed for biocparallel/batchtools parallel computing. #' @param verbose A boolean value that determines whether standard output is printed. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Doublet Scores for Small Project +#' proj <- addDoubletScores(proj, dimsToUse = 1:5, LSIParams = list(dimsToUse = 1:5, varFeatures=1000, iterations = 2)) +#' #' @export addDoubletScores <- function( input = NULL, @@ -378,50 +387,13 @@ addDoubletScores <- function( scale_colour_gradientn(colors = pal) + xlab("UMAP Dimension 1") + ylab("UMAP Dimension 2") + labs(color = "Simulated Doublet Density") + - guides(fill = "none") + theme_ArchR(baseSize = 10) + + .gg_guides(fill = FALSE) + theme_ArchR(baseSize = 10) + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank()) + coord_equal(ratio = diff(xlim)/diff(ylim), xlim = xlim, ylim = ylim, expand = FALSE) + ggtitle("Simulated and LSI-Projected Density Overlayed") + theme(legend.direction = "horizontal", legend.box.background = element_rect(color = NA)) - # if(!requireNamespace("ggrastr", quietly = TRUE)){ - - # message("ggrastr is not available for rastr of points, continuing without rastr!") - # message("To install ggrastr try : devtools::install_github('VPetukhov/ggrastr')") - - # pdensity <- ggplot() + - # geom_point(data = df, aes(x=X1,y=X2),color="lightgrey", size = 0.5) + - # geom_point(data = dfDoub, aes(x=x,y=y,colour=color), size = 0.5) + - # scale_colour_gradientn(colors = pal) + - # xlab("UMAP Dimension 1") + ylab("UMAP Dimension 2") + - # guides(fill = "none") + theme_ArchR(baseSize = 10) + - # labs(color = "Simulated Doublet Density") + - # theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), - # axis.text.y = element_blank(), axis.ticks.y = element_blank()) + - # coord_equal(ratio = diff(xlim)/diff(ylim), xlim = xlim, ylim = ylim, expand = FALSE) + - # ggtitle("Simulated and LSI-Projected Doublet Density Overlayed") + theme(legend.direction = "horizontal", - # legend.box.background = element_rect(color = NA)) - - # }else{ - - # #.requirePackage("ggrastr", installInfo = "devtools::install_github('VPetukhov/ggrastr')") - - # pdensity <- ggplot() + - # .geom_point_rast2(data = df, aes(x=X1,y=X2),color="lightgrey", size = 0.5) + - # .geom_point_rast2(data = dfDoub, aes(x=x,y=y,colour=color), size = 0.5) + - # scale_colour_gradientn(colors = pal) + - # xlab("UMAP Dimension 1") + ylab("UMAP Dimension 2") + - # labs(color = "Simulated Doublet Density") + - # guides(fill = "none") + theme_ArchR(baseSize = 10) + - # theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), - # axis.text.y = element_blank(), axis.ticks.y = element_blank()) + - # coord_equal(ratio = diff(xlim)/diff(ylim), xlim = xlim, ylim = ylim, expand = FALSE) + - # ggtitle("Simulated and LSI-Projected Density Overlayed") + theme(legend.direction = "horizontal", - # legend.box.background = element_rect(color = NA)) - - # } - #Plot Doublet Score pscore <- ggPoint( x = df[,1], @@ -588,14 +560,21 @@ addDoubletScores <- function( .logError(e, fn = "uwot::umap_transform", info = prefix, errorList = errorList, logFile = logFile) }) + #make sure rownames are the same here + #simulated LSI are first so lets remove + corLSI <- allLSI[-seq_len(nSimLSI), , drop = FALSE] + corUMAP <- umapProject[-seq_len(nSimLSI), , drop = FALSE] + idx <- intersect(rownames(corLSI), rownames(LSI$matSVD)) corProjection <- list( - LSI = unlist(lapply(seq_len(ncol(allLSI)), function(x) cor(allLSI[-seq_len(nSimLSI), x], LSI$matSVD[, x]) )), + LSI = unlist(lapply(seq_len(ncol(allLSI)), function(x) cor(corLSI[idx, x, drop=FALSE], LSI$matSVD[idx, x, drop=FALSE]) )), UMAP = c( - dim1 = cor(uwotUmap[[1]][,1], umapProject[-seq_len(nSimLSI), 1]), - dim2 = cor(uwotUmap[[1]][,2], umapProject[-seq_len(nSimLSI), 2]) + dim1 = cor(uwotUmap[[1]][,1], corUMAP[idx, 1]), + dim2 = cor(uwotUmap[[1]][,2], corUMAP[idx, 2]) ) ) names(corProjection[[1]]) <- paste0("SVD", LSI$dimsKept) + rm(corLSI) + rm(corUMAP) .logThis(uwotUmap[[1]], name = paste0(prefix, "OriginalUMAP"), logFile = logFile) .logThis(umapProject[-seq_len(nSimLSI), ], name = paste0(prefix, "OriginalReProjectedUMAP"), logFile = logFile) diff --git a/R/Embedding.R b/R/Embedding.R index c71026f4..5dd79ec8 100644 --- a/R/Embedding.R +++ b/R/Embedding.R @@ -33,6 +33,15 @@ #' `name` already exists. #' @param threads The number of threads to be used for parallel computing. Default set to 1 because if set to high can cause C stack usage errors. #' @param ... Additional parameters to pass to `uwot::umap()` +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add UMAP for Small Project +#' proj <- addUMAP(proj, force = TRUE) +#' #' @export addUMAP <- function( ArchRProj = NULL, @@ -346,6 +355,15 @@ addUMAP <- function( #' `name` already exists. #' @param threads The number of threads to be used for parallel computing. #' @param ... Additional parameters for computing the TSNE embedding to pass to `Rtsne::Rtsne()` (when `method = "RTSNE"`) or to `Seurat::RunTSNE()` (when method = "FFRTSNE"). +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add UMAP for Small Project +#' proj <- addTSNE(proj, force = TRUE) +#' #' @export addTSNE <- function( ArchRProj = NULL, diff --git a/R/FilterCells.R b/R/FilterCells.R index 54a89ac1..8be9ee97 100644 --- a/R/FilterCells.R +++ b/R/FilterCells.R @@ -8,6 +8,15 @@ #' #' @param ArchRProj An `ArchRProject` object. #' @param cellNames A character vector of `cellNames` that will be subsetted of the current `ArchRProject`. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Peak Annotations +#' proj <- subsetCells(proj, getCellNames(proj)[1:50]) +#' #' @export subsetCells <- function( ArchRProj = NULL, @@ -41,6 +50,18 @@ subsetCells <- function( #' This `filterRatio` allows you to apply a consistent filter across multiple different samples that may have different #' percentages of doublets because they were run with different cell loading concentrations. #' The higher the `filterRatio`, the greater the number of cells potentially removed as doublets. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Doublet Scores for Small Project +#' proj <- addDoubletScores(proj, dimsToUse = 1:5, LSIParams = list(dimsToUse = 1:5, varFeatures=1000, iterations = 2)) +#' +#' # Filter Doublets (Since Low Cells filterRatio has to be high before removing 1 cell!) +#' proj <- filterDoublets(proj, filterRatio=10) +#' #' @export filterDoublets <- function(ArchRProj = NULL, cutEnrich = 1, cutScore = -Inf, filterRatio = 1){ diff --git a/R/Footprinting.R b/R/Footprinting.R index 566c41bf..e66f722f 100644 --- a/R/Footprinting.R +++ b/R/Footprinting.R @@ -19,6 +19,18 @@ #' @param threads The number of threads to be used for parallel computing. #' @param verbose A boolean value that determines whether standard output includes verbose sections. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Motif Positions +#' positions <- getPositions(proj) +#' +#' # Get Footprints +#' seFoot <- getFootprints(ArchRProj = proj, positions = positions, groupBy = "Clusters", minCells = 10) +#' #' @export getFootprints <- function( ArchRProj = NULL, @@ -50,6 +62,29 @@ getFootprints <- function( .startLogging(logFile = logFile) .logThis(mget(names(formals()),sys.frame(sys.nframe())), "Input-Parameters", logFile=logFile) + #Validate Positions + chromLengths <- getChromLengths(ArchRProj) + positions0 <- positions + positions <- lapply(seq_along(positions), function(x){ + + if(x %% 100 == 0) message("Checking Postions ", x, " of ", length(positions)) + + #Check All Positions Are at least 50 + flank from chromSize start! + idx1 <- start(positions[[x]]) > flank + 50 + + #Check End + 50 + flank less than chromSize end! + idx2 <- end(positions[[x]]) + flank + 50 < chromLengths[paste0(seqnames(positions[[x]]))] + + if(sum(idx1 & idx2)==0){ + NULL + }else{ + positions[[x]][idx1 & idx2] + } + + }) + names(positions) <- names(positions0) + positions <- as(positions, "GRangesList") + ##################################################### # Compute Kmer Frequency Table ##################################################### @@ -69,7 +104,11 @@ getFootprints <- function( genome <- getGenome(ArchRProj) .requirePackage("Biostrings", source = "bioc") - BSgenome <- eval(parse(text = genome)) + BSgenome <- tryCatch({ + eval(parse(text = paste0(genome))) + }, error = function(e){ + eval(parse(text = paste0(genome,"::",genome))) + }) BSgenome <- validBSgenome(BSgenome) .logDiffTime("Computing Kmer Bias Table", tstart, verbose = verbose, logFile = logFile) @@ -345,6 +384,21 @@ getFootprints <- function( #' @param force If many footprints are requested when plot = FALSE, please set force = TRUE. #' This prevents large amount of footprint plots stored as an object. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Motif Positions +#' positions <- getPositions(proj) +#' +#' # Get Footprints +#' seFoot <- getFootprints(ArchRProj = proj, positions = positions, groupBy = "Clusters", minCells = 10) +#' +#' # Plot Footprints +#' plotFootprints(seFoot, smoothWindow = 11) +#' #' @export plotFootprints <- function( seFoot = NULL, @@ -579,8 +633,8 @@ plotFootprints <- function( ylim = c(quantile(plotFootDF$mean, 0.0001), 1.15*quantile(smoothFoot, 0.999)), xlim = c(min(plotFootDF$x),max(plotFootDF$x)) ) + theme_ArchR(baseSize = baseSize) + ggtitle(name) + - guides(fill = "none") + - guides(color = "none") + ylab(paste0(title,"Normalized Insertions")) + .gg_guides(fill = FALSE, color = FALSE) + + ylab(paste0(title,"Normalized Insertions")) #removed ggrepel due to incompatibility with coord_cartesian - see https://github.com/GreenleafLab/ArchR/issues/493#issuecomment-870012873 #ggrepel::geom_label_repel(data = plotMax, aes(label = group), size = 3, xlim = c(75, NA)) @@ -617,7 +671,7 @@ plotFootprints <- function( ) { #https://stackoverflow.com/questions/52297978/decrease-overal-legend-size-elements-and-text gg + - guides(shape = guide_legend(override.aes = list(size = pointSize)), + .gg_guides(shape = guide_legend(override.aes = list(size = pointSize)), color = guide_legend(override.aes = list(size = pointSize))) + theme(legend.title = element_text(size = baseSize), legend.text = element_text(size = baseSize), diff --git a/R/GRangesUtils.R b/R/GRangesUtils.R index 23cd8990..282f6ab5 100644 --- a/R/GRangesUtils.R +++ b/R/GRangesUtils.R @@ -15,6 +15,15 @@ #' @param pruningMode The name of the pruning method to use (from`GenomeInfoDb::seqinfo()`) when seqlevels must be removed from a `GRanges` object. #' When some of the seqlevels to drop from the given `GRanges` object are in use (i.e. have ranges on them), the ranges on these sequences need #' to be removed before the seqlevels can be dropped. Four pruning modes are currently defined: "error", "coarse", "fine", and "tidy". +#' +#' @examples +#' +#' # Add ArchR Genome +#' addArchRGenome("hg19test2") +#' +#' # Filter Chr +#' filterChrGR(getChromSizes(), remove = "chr5") +#' #' @export filterChrGR <- function( gr = NULL, @@ -24,7 +33,7 @@ filterChrGR <- function( pruningMode="coarse" ){ - .validInput(input = gr, name = "gr", valid = c("GRanges")) + .validInput(input = gr, name = "gr", valid = c("GRanges", "TxDb")) .validInput(input = remove, name = "remove", valid = c("character", "null")) .validInput(input = underscore, name = "underscore", valid = c("boolean")) .validInput(input = standard, name = "standard", valid = c("boolean")) @@ -69,6 +78,22 @@ filterChrGR <- function( #' @param decreasing A boolean value indicating whether the values in the column indicated via `by` should be ordered in decreasing #' order. If `TRUE`, the higher value in `by` will be retained. #' @param verbose A boolean value indicating whether the output should include extra reporting. +#' +#' @examples +#' +#' # Dummy GR +#' gr <- GRanges( +#' seqnames = "chr1", +#' ranges = IRanges( +#' start = c(1, 4, 11), +#' end = c(10, 12, 20) +#' ), +#' score = c(1, 2, 3) +#' ) +#' +#' # Non Overlapping +#' nonOverlappingGR(gr) +#' #' @export nonOverlappingGR <- function( gr = NULL, @@ -147,6 +172,22 @@ nonOverlappingGR <- function( #' @param gr A `GRanges` object. #' @param upstream The number of basepairs upstream (5') to extend each region in `gr` in a strand-aware fashion. #' @param downstream The number of basepairs downstream (3') to extend each region in `gr` in a strand-aware fashion. +#' +#' @examples +#' +#' # Dummy GR +#' gr <- GRanges( +#' seqnames = "chr1", +#' ranges = IRanges( +#' start = c(1, 4, 11), +#' end = c(10, 12, 20) +#' ), +#' score = c(1, 2, 3) +#' ) +#' +#' # Non Overlapping +#' extendGR(gr, 1, 2) +#' #' @export extendGR <- function(gr = NULL, upstream = NULL, downstream = NULL){ .validInput(input = gr, name = "gr", valid = c("GRanges")) diff --git a/R/GgplotUtils.R b/R/GgplotUtils.R index ec808b4b..7a5b1c5a 100644 --- a/R/GgplotUtils.R +++ b/R/GgplotUtils.R @@ -50,6 +50,21 @@ #' @param rastr A boolean value that indicates whether the plot should be rasterized using `ggrastr`. This does not rasterize #' lines and labels, just the internal portions of the plot. #' @param dpi The resolution in dots per inch to use for the plot. +#' +#' @examples +#' +#' # Create Random Data +#' m <- data.frame(matrix(rnorm(20, 2),ncol=2)) +#' m$color <- sample(c("A", "B"), 10, replace = TRUE) +#' +#' # Plot +#' p <- ggPoint(x = m[,1], y = m[,2], color = m[,3]) +#' +#' # To PDF +#' pdf("test.pdf", width = 4, height = 4) +#' p +#' dev.off() +#' #' @export ggPoint <- function( x = NULL, @@ -267,19 +282,6 @@ ggPoint <- function( raster.width = min(par('fin')), raster.height = (ratioYX * min(par('fin'))) ) - - # if(!requireNamespace("ggrastr", quietly = TRUE)){ - # message("ggrastr is not available for rastr of points, continuing without rastr!") - # message("To install ggrastr try : devtools::install_github('VPetukhov/ggrastr')") - # p <- p + geom_point(size = size, alpha = alpha) - # }else{ - # .requirePackage("ggrastr", installInfo = "devtools::install_github('VPetukhov/ggrastr')") - # p <- p + geom_point_rast( - # size = size, raster.dpi = dpi, alpha = alpha, - # raster.width=par('fin')[1], - # raster.height = (ratioYX * par('fin')[2]) - # ) - # } }else{ @@ -298,7 +300,7 @@ ggPoint <- function( } #print(pal) p <- p + scale_color_manual(values = pal) + - guides(color = guide_legend(override.aes = list(size = legendSize, shape = 15))) + .gg_guides(color = guide_legend(override.aes = list(size = legendSize, shape = 15))) } if (labelMeans) { @@ -394,6 +396,20 @@ ggPoint <- function( #' @param rastr A boolean value that indicates whether the plot should be rasterized. This does not rasterize lines and labels, just the internal portions of the plot. #' @param pal A custom palette from `ArchRPalettes` used to display the density of points on the plot. #' @param ... Additional params to be supplied to ggPoint +#' +#' @examples +#' +#' # Create Random Data +#' m <- data.frame(matrix(rnorm(20, 2),ncol=2)) +#' +#' # Plot +#' p <- ggOneToOne(x = m[,1], y = m[,2]) +#' +#' # To PDF +#' pdf("test.pdf", width = 4, height = 4) +#' p +#' dev.off() +#' #' @export ggOneToOne <- function ( x = NULL, @@ -516,6 +532,20 @@ ggOneToOne <- function ( #' them to the value of the 97.5th and 2.5th percentile values respectively. #' @param addPoints A boolean value indicating whether individual points should be shown on the hexplot. #' @param ... Additional params for plotting +#' +#' @examples +#' +#' # Create Random Data +#' m <- data.frame(matrix(rnorm(300, 2),ncol=3)) +#' +#' # Plot +#' p <- ggHex(x = m[,1], y = m[,2], color = m[,3]) +#' +#' # To PDF +#' pdf("test.pdf", width = 4, height = 4) +#' p +#' dev.off() +#' #' @export ggHex <- function( x = NULL, @@ -643,6 +673,21 @@ ggHex <- function( #' @param addBoxPlot A boolean indicating whether to add a boxplot to the plot if `plotAs="violin"`. #' @param plotAs A string indicating how the groups should be plotted. Acceptable values are "ridges" (for a `ggrides`-style plot) or "violin" (for a violin plot). #' @param ... Additional parameters to pass to `ggplot2` for plotting. +#' +#' @examples +#' +#' # Create Random Data +#' m <- data.frame(x=matrix(rnorm(10, 2),ncol=1)) +#' m$color <- sample(c("A", "B"), 10, replace = TRUE) +#' +#' # Plot +#' p <- ggGroup(x = m$color, y = m$x) +#' +#' # To PDF +#' pdf("test.pdf", width = 4, height = 4) +#' p +#' dev.off() +#' #' @export ggGroup <- function( x = NULL, @@ -787,6 +832,20 @@ ggGroup <- function( #' @param sizes A numeric vector or list of values indicating the relative size for each of the objects in `plotList` or supplied in `...`. If the plot is supplied in `...` the order is the same as the input in this function. If set to NULL all plots will be evenly distributed. #' @param type A string indicating wheter vertical ("v") or horizontal ("h") alignment should be used for the multi-plot layout. #' @param draw A boolean value indicating whether to draw the plot(s) (`TRUE`) or return a graphical object (`FALSE`). +#' @examples +#' +#' # Create Random Data +#' m <- data.frame(x=matrix(rnorm(10, 2),ncol=1)) +#' m$color <- sample(c("A", "B"), 10, replace = TRUE) +#' +#' # Plot +#' p <- ggGroup(x = m$color, y = m$x) +#' +#' # To PDF +#' pdf("test.pdf", width = 4, height = 7) +#' ggAlignPlots(p, p) +#' dev.off() +#' #' @export ggAlignPlots <- function( ..., @@ -887,6 +946,20 @@ ggAlignPlots <- function( #' @param axisTickCm The length in centimeters to be used for the axis ticks. #' @param xText90 A boolean value indicating whether the x-axis text should be rotated 90 degrees counterclockwise. #' @param yText90 A boolean value indicating whether the y-axis text should be rotated 90 degrees counterclockwise. +#' @examples +#' +#' # Create Random Data +#' m <- data.frame(x=matrix(rnorm(10, 2),ncol=1)) +#' m$color <- sample(c("A", "B"), 10, replace = TRUE) +#' +#' # Plot +#' p <- ggGroup(x = m$color, y = m$x) + theme_ArchR() +#' +#' # To PDF +#' pdf("test.pdf", width = 4, height = 7) +#' p +#' dev.off() +#' #' @export theme_ArchR <- function( color = "black", @@ -947,12 +1020,32 @@ theme_ArchR <- function( } - - ########################################################################################## # ggplot2 helper functions ########################################################################################## +#ggplot2 guides +#this funciton handles the deprecation of guides scale = FALSE -> 'none' +#I'm not sure if I change to FALSE if its backwards so this is safer for now +.gg_guides <- function(...){ + args2 <- lapply(list(...), function(a){ + if(is(a, "logical")){ + if(!a){ + a <- "none" + } + } + a + }) + tryCatch({ + guides(...) + }, warning = function(w){ + supppressWarnings(guides(...)) + }, error = function(e){ + do.call(guides, args2) + }) +} + +#Check Cairo .checkCairo <- function(){ tryCatch({ tmp <- dev.cur() diff --git a/R/GlobalDefaults.R b/R/GlobalDefaults.R index aab0b8a5..0cc4758e 100644 --- a/R/GlobalDefaults.R +++ b/R/GlobalDefaults.R @@ -5,7 +5,9 @@ ######################################################## ArchRDefaults <- list( ArchR.threads = 1, + ArchR.locking = FALSE, ArchR.logging = TRUE, + ArchR.h5level = 0, ArchR.genome = NA, ArchR.chrPrefix = TRUE, ArchR.debugging = FALSE, @@ -13,6 +15,7 @@ ArchRDefaults <- list( ) ArchRDependency <- c( + "devtools", "grid", "gridExtra", "gtools", @@ -102,6 +105,12 @@ ArchRDependency <- c( #' This function will install extra packages used in ArchR that are not installed by default. #' #' @param force If you want to force a reinstall of these pacakges. +#' +#' @examples +#' +#' # Install +#' installExtraPackages() +#' #' @export installExtraPackages <- function(force = FALSE){ @@ -262,6 +271,12 @@ installExtraPackages <- function(force = FALSE){ #' This function will set the default requirement of chromosomes to have a "chr" prefix. #' #' @param chrPrefix A boolean describing the requirement of chromosomes to have a "chr" prefix. +#' +#' @examples +#' +#' # Add ArchR Chr Prefix +#' addArchRChrPrefix() +#' #' @export addArchRChrPrefix <- function(chrPrefix = TRUE){ @@ -281,6 +296,11 @@ addArchRChrPrefix <- function(chrPrefix = TRUE){ #' #' This function will get the default requirement of chromosomes to have a "chr" prefix. #' +#' @examples +#' +#' # Get ArchR Chr Prefix +#' getArchRChrPrefix() +#' #' @export getArchRChrPrefix <- function(){ @@ -311,6 +331,12 @@ getArchRChrPrefix <- function(){ #' This can be overwritten on a per-function basis using the given function's `threads` parameter. #' @param force If you request more than the total number of CPUs minus 2, ArchR will set `threads` to `(nCPU - 2)`. #' To bypass this, setting `force = TRUE` will use the number provided to `threads`. +#' +#' @examples +#' +#' # Add ArchR Threads +#' addArchRThreads() +#' #' @export addArchRThreads <- function(threads = floor(parallel::detectCores()/ 2), force = FALSE){ @@ -343,6 +369,12 @@ addArchRThreads <- function(threads = floor(parallel::detectCores()/ 2), force = #' #' This function will get the number of threads to be used for parallel execution across all ArchR functions. #' +#' +#' @examples +#' +#' # Get ArchR Threads +#' getArchRThreads() +#' #' @export getArchRThreads <- function(){ .ArchRThreads <- options()[["ArchR.threads"]] @@ -359,6 +391,141 @@ getArchRThreads <- function(){ } } +########################################################################################## +# h5 compression level +########################################################################################## + +#' Add a globally-applied compression level for h5 files +#' +#' This function will set the default compression level to be used for h5 file execution across all ArchR functions. +#' +#' @param level The default compression level to be used for h5 file execution across all ArchR functions. +#' +#' @examples +#' +#' # Add ArchR H5 Compression level +#' addArchRH5Level() +#' +#' @export +addArchRH5Level <- function(level = 0){ + + .validInput(input = level, name = "level", valid = "integer") + message("Setting default h5 compression to ", level, ".") + options(ArchR.h5level = as.integer(round(level))) + +} + +#' Get globally-applied compression level for h5 files +#' +#' This function will get the default compression level to be used for h5 file execution across all ArchR functions. +#' +#' @examples +#' +#' # Get ArchR H5 Compression level +#' getArchRH5Level() +#' +#' @export +getArchRH5Level <- function(){ + .ArchRH5Level <- options()[["ArchR.h5level"]] + if(!is.null(.ArchRH5Level)){ + if(!.isWholenumber(.ArchRH5Level)){ + message("option(.ArchRH5Level) : ", .ArchRThreads, " is not an integer. \nDid you mistakenly set this to a value without addArchRH5Level? Reseting to default!") + addArchRH5Level() + options()[["ArchR.threads"]] + }else{ + .ArchRH5Level + } + }else{ + 0 + } +} + +########################################################################################## +# H5 File Locking +########################################################################################## + +#' Add a globally-applied H5 file locking setup +#' +#' This function will set the default H5 file locking parameters +#' +#' @param locking The default value for H5 File Locking +#' +#' @examples +#' +#' # Disable/Add ArchR H5 Locking Globally +#' addArchRLocking(locking=FALSE) +#' +#' @export +addArchRLocking <- function(locking=FALSE){ + + .validInput(input = locking, name = "locking", valid = "boolean") + + #Check if Lockign is Valid + h5test <- h5testFileLocking(".") + if(!h5test){ + message( + "H5 Locking is not enabled based on 'h5testFileLocking'.\nSetting ArchRLocking locking to ", + locking, "." + ) + locking <- TRUE + }else{ + message("Setting ArchRLocking to ", locking, ".") + } + options(ArchR.locking = locking) + +} + +#' Set a globally-applied H5 file locking setup +#' +#' This function will set the default H5 file locking parameters to the system +#' +#' +#' @examples +#' +#' # Set ArchR H5 Locking Globally +#' setArchRLocking() +#' +#' @export +setArchRLocking <- function(){ + + #Get Value + .ArchRLocking <- options()[["ArchR.locking"]] + if(is.null(.ArchRLocking)){ + .ArchRLocking <- TRUE + }else if(!is.logical(.ArchRLocking)){ + .ArchRLocking <- TRUE + } + + #Get Environment Value + h5lock <- tryCatch({ + Sys.getenv("HDF5_USE_FILE_LOCKING") + }, error = function(e){ + "" + }) + if(h5lock=="FALSE"){ + h5lock <- FALSE + }else if(h5lock==""){ + h5lock <- TRUE + }else{ + stop("H5 Locking Not Valid!") + } + + #Set Environmental Value + if(.ArchRLocking != h5lock){ + if(.ArchRLocking){ + message("Enabling H5 File Locking. If this is not desired check `addArchRLocking`.") + h5enableFileLocking() + }else{ + message("Disabling H5 File Locking. If this is not desired check `addArchRLocking`.") + h5disableFileLocking() + } + } + + #Return Value + .ArchRLocking + +} + ########################################################################################## # Create Gene/Genome Annotation ########################################################################################## @@ -374,13 +541,19 @@ getArchRThreads <- function(){ #' For something other than one of the currently supported, see `createGeneAnnnotation()` and `createGenomeAnnnotation()`. #' @param install A boolean value indicating whether the `BSgenome` object associated with the provided `genome` should be #' automatically installed if it is not currently installed. This is useful for helping reduce user download requirements. +#' +#' @examples +#' +#' # Add ArchR Genome to use globally +#' addArchRGenome("hg19test2") +#' #' @export addArchRGenome <- function(genome = NULL, install = TRUE){ .validInput(input = genome, name = "genome", valid = "character") .validInput(input = install, name = "install", valid = c("boolean")) - supportedGenomes <- c("hg19","hg38","mm9","mm10","hg19test") + supportedGenomes <- c("hg19","hg38","mm9","mm10","hg19test", "hg19test2") if(tolower(genome) %ni% supportedGenomes){ @@ -400,7 +573,7 @@ addArchRGenome <- function(genome = NULL, install = TRUE){ stop("BSgenome for hg19 not installed! Please install by setting install = TRUE or by the following:\n\tBiocManager::install(\"BSgenome.Hsapiens.UCSC.hg19\")") } } - }else if(tolower(genome)=="hg19test"){ + }else if(tolower(genome) %in% c("hg19test", "hg19test2")){ if(!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)){ if(install){ message("BSgenome for hg19 not installed! Now installing by the following:\n\tBiocManager::install(\"BSgenome.Hsapiens.UCSC.hg19\")") @@ -461,6 +634,12 @@ addArchRGenome <- function(genome = NULL, install = TRUE){ #' @param genomeAnnotation A boolean value indicating whether the `genomeAnnotation` associated with the ArchRGenome should be returned #' instead of the globally defined genome. The `genomeAnnotation` is used downstream to determine things like chromosome sizes and nucleotide content. #' This function is not meant to be run with both `geneAnnotation` and `genomeAnnotation` set to `TRUE` (it is an either/or return value). +#' +#' @examples +#' +#' # Get ArchR Genome to use globally +#' getArchRGenome() +#' #' @export getArchRGenome <- function( geneAnnotation=FALSE, @@ -470,7 +649,7 @@ getArchRGenome <- function( .validInput(input = geneAnnotation, name = "geneAnnotation", valid = "boolean") .validInput(input = genomeAnnotation, name = "genomeAnnotation", valid = "boolean") - supportedGenomes <- c("hg19","hg38","mm9","mm10","hg19test") + supportedGenomes <- c("hg19","hg38","mm9","mm10","hg19test", "hg19test2") .ArchRGenome <- options()[["ArchR.genome"]] if(!is.null(.ArchRGenome)){ diff --git a/R/GroupCoverages.R b/R/GroupCoverages.R index 9b3f1170..eb34a1c8 100644 --- a/R/GroupCoverages.R +++ b/R/GroupCoverages.R @@ -17,16 +17,26 @@ #' @param minReplicates The minimum number of pseudo-bulk replicates to be generated. #' @param maxReplicates The maximum number of pseudo-bulk replicates to be generated. #' @param sampleRatio The fraction of the total cells that can be sampled to generate any given pseudo-bulk replicate. +#' @param excludeChr A character vector containing the `seqnames` of the chromosomes that should be excluded from this analysis. #' @param kmerLength The length of the k-mer used for estimating Tn5 bias. #' @param threads The number of threads to be used for parallel computing. #' @param returnGroups A boolean value that indicates whether to return sample-guided cell-groupings without creating coverages. #' This is used mainly in `addReproduciblePeakSet()` when MACS2 is not being used to call peaks but rather peaks are called from a #' TileMatrix (`peakMethod = "Tiles"`). #' @param parallelParam A list of parameters to be passed for biocparallel/batchtools parallel computing. -#' @param force A boolean value that indicates whether or not to overwrite the relevant data in the `ArchRProject` object if +#' @param force A boolean value that indicates whether or not to skip validation and overwrite the relevant data in the `ArchRProject` object if #' insertion coverage / pseudo-bulk replicate information already exists. #' @param verbose A boolean value that determines whether standard output includes verbose sections. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Group Coverages +#' proj <- addGroupCoverages(proj, force = TRUE) +#' #' @export addGroupCoverages <- function( ArchRProj = NULL, @@ -39,6 +49,7 @@ addGroupCoverages <- function( minReplicates = 2, maxReplicates = 5, sampleRatio = 0.8, + excludeChr = NULL, kmerLength = 6, threads = getArchRThreads(), returnGroups = FALSE, @@ -58,6 +69,7 @@ addGroupCoverages <- function( .validInput(input = minReplicates, name = "minReplicates", valid = c("integer")) .validInput(input = maxReplicates, name = "maxReplicates", valid = c("integer")) .validInput(input = sampleRatio, name = "sampleRatio", valid = c("numeric")) + .validInput(input = excludeChr, name = "excludeChr", valid = c("character", "null")) .validInput(input = kmerLength, name = "kmerLength", valid = c("integer")) .validInput(input = threads, name = "threads", valid = c("integer")) .validInput(input = returnGroups, name = "returnGroups", valid = c("boolean")) @@ -106,6 +118,26 @@ addGroupCoverages <- function( } } + #I'm 99% sure ArchR Should handle Empty Seqnames but lets leave this here since it cant hurt + if(!force){ + #Check that the seqnames that will be used actually exist in the ArrowFiles + seqnames <- getSeqnames(ArchRProj, "Fragments") + if(!is.null(excludeChr)){ + seqnames <- seqnames[paste0(seqnames) %ni% excludeChr] + } + ArrowFiles <- getArrowFiles(ArchRProj) + missSeqAll <- .safelapply(seq_along(ArrowFiles), function(x){ + .validateSeqNotEmpty(ArrowFile = ArrowFiles[x], seqnames = seqnames) + }, threads = threads) %>% unlist %>% unique + if(!is.null(missSeqAll)) { + stop("The following seqnames do not have fragment information in one or more ArrowFiles:\n", + paste(missSeqAll, collapse = ","), + "\nYou can proceed with the analysis by ignoring these seqnames by passing them to the 'excludeChr' parameter.") + } + }else{ + message("Skipping validation of empty chromosomes since `force` = TRUE!") + } + ##################################################### #Groups ##################################################### @@ -204,6 +236,10 @@ addGroupCoverages <- function( args$kmerLength <- kmerLength args$ArrowFiles <- getArrowFiles(ArchRProj) args$availableChr <- .availableSeqnames(getArrowFiles(ArchRProj)) + #Filter Chromosomes + if(!is.null(excludeChr)){ + args$availableChr <- args$availableChr[BiocGenerics::which(paste0(args$availableChr) %ni% excludeChr)] + } args$chromLengths <- getChromLengths(ArchRProj) #args$cellsInArrow <- split(rownames(getCellColData(ArchRProj)), getCellColData(ArchRProj)$Sample) args$cellsInArrow <- cellsInArrow <- split( @@ -212,19 +248,26 @@ addGroupCoverages <- function( ) args$covDir <- file.path(getOutputDirectory(ArchRProj), "GroupCoverages", groupBy) args$parallelParam <- parallelParam - args$threads <- threads args$verbose <- verbose args$tstart <- tstart args$logFile <- logFile args$registryDir <- file.path(getOutputDirectory(ArchRProj), "GroupCoverages", "batchRegistry") + #H5 File Lock Check + h5lock <- setArchRLocking() + if(h5lock){ + args$threads <- 1 + }else{ + if(threads > 1){ + message("subThreadhing Enabled since ArchRLocking is FALSE see `addArchRLocking`") + } + args$threads <- threads + } + ##################################################### # Batch Apply to Create Insertion Coverage Files ##################################################### - #Disable Hdf5 File Locking - h5disableFileLocking() - #Batch Apply .logDiffTime(sprintf("Creating Coverage Files!"), tstart, addHeader = FALSE) batchOut <- .batchlapply(args) @@ -256,9 +299,6 @@ addGroupCoverages <- function( ArchRProj@projectMetadata$GroupCoverages[[groupBy]] <- SimpleList(Params = Params, coverageMetadata = coverageMetadata) - #Enable Hdf5 File Locking - h5enableFileLocking() - .logDiffTime(sprintf("Finished Creation of Coverage Files!"), tstart, addHeader = FALSE) .endLogging(logFile = logFile) @@ -388,8 +428,8 @@ addGroupCoverages <- function( chrValues <- paste0("Coverage/",availableChr[k],"/Values") lengthRle <- length(covk@lengths) o <- h5createGroup(covFile, paste0("Coverage/",availableChr[k])) - o <- .suppressAll(h5createDataset(covFile, chrLengths, storage.mode = "integer", dims = c(lengthRle, 1), level = 0)) - o <- .suppressAll(h5createDataset(covFile, chrValues, storage.mode = "integer", dims = c(lengthRle, 1), level = 0)) + o <- .suppressAll(h5createDataset(covFile, chrLengths, storage.mode = "integer", dims = c(lengthRle, 1), level = getArchRH5Level())) + o <- .suppressAll(h5createDataset(covFile, chrValues, storage.mode = "integer", dims = c(lengthRle, 1), level = getArchRH5Level())) o <- h5write(obj = covk@lengths, file = covFile, name = chrLengths) o <- h5write(obj = covk@values, file = covFile, name = chrValues) diff --git a/R/GroupExport.R b/R/GroupExport.R index fdb15a07..8b56acb3 100644 --- a/R/GroupExport.R +++ b/R/GroupExport.R @@ -19,6 +19,15 @@ exportGroupSE <- function(...){ #' @param threads An integer specifying the number of threads for parallel. #' @param verbose A boolean specifying to print messages during computation. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Group SE +#' se <- getGroupSE(proj, useMatrix = "PeakMatrix", groupBy = "Clusters") +#' #' @export getGroupSE <- function( ArchRProj = NULL, @@ -131,6 +140,95 @@ getGroupSE <- function( } +#' Export PseudoBulk Group Summarized Experiment +#' +#' This function will determine cell groups for pseudobulk, summarize and export a summarized experiment for a assay in a ArchRProject. +#' +#' @param ArchRProj An `ArchRProject` object. +#' @param useMatrix The name of the matrix in the ArrowFiles. See getAvailableMatrices to see options +#' @param groupBy The name of the column in `cellColData` to use for grouping cells together for summarizing. +#' @param divideN A boolean describing whether to divide by the number of cells. +#' @param scaleTo Depth normalize to this value if not NULL. +#' @param useLabels A boolean value indicating whether to use sample labels to create sample-aware subgroupings during as pseudo-bulk replicate generation. +#' @param sampleLabels The name of a column in `cellColData` to use to identify samples. In most cases, this parameter should be left as `NULL` and you +#' should only use this parameter if you do not want to use the default sample labels stored in `cellColData$Sample`. However, if your individual Arrow +#' files do not map to individual samples, then you should set this parameter to accurately identify your samples. This is the case in (for example) +#' multiplexing applications where cells from different biological samples are mixed into the same reaction and demultiplexed based on a lipid barcode or genotype. +#' @param minCells The minimum number of cells required in a given cell group to permit insertion coverage file generation. +#' @param maxCells The maximum number of cells to use during insertion coverage file generation. +#' @param minReplicates The minimum number of pseudo-bulk replicates to be generated. +#' @param maxReplicates The maximum number of pseudo-bulk replicates to be generated. +#' @param sampleRatio The fraction of the total cells that can be sampled to generate any given pseudo-bulk replicate. +#' @param verbose A boolean specifying to print messages during computation. +#' @param threads An integer specifying the number of threads for parallel. +#' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Group SE +#' se <- getPBGroupSE(proj, useMatrix = "PeakMatrix", groupBy = "Clusters") +#' +#' @export +getPBGroupSE <- function( + ArchRProj = NULL, + useMatrix = "GeneScoreMatrix", + groupBy = "Clusters", + divideN = TRUE, + scaleTo = 10000, + useLabels = TRUE, + sampleLabels = "Sample", + minCells = 40, + maxCells = 500, + minReplicates = 2, + maxReplicates = 5, + sampleRatio = 0.8, + verbose = TRUE, + threads = getArchRThreads(), + logFile = createLogFile("getPBGroupSE") + ){ + + #Get PB + cellGroups <- suppressMessages(addGroupCoverages( + ArchRProj = ArchRProj, + groupBy = groupBy, + useLabels = useLabels, + sampleLabels = sampleLabels, + minCells = minCells, + maxCells = maxCells, + maxFragments = 10^9, + minReplicates = minReplicates, + maxReplicates = maxReplicates, + sampleRatio = sampleRatio, + returnGroups = TRUE, + force = TRUE, + logFile = logFile + )) + labeledCells <- unlist(unlist(cellGroups,use.names=TRUE), use.names=TRUE) + labeledGroups <- names(labeledCells) + names(labeledGroups) <- labeledCells + ArchRProj@cellColData$TMP_PB_12312412312312312 <- paste0(as.vector(labeledGroups[rownames(ArchRProj@cellColData)])) + + #Matrix + sePB <- getGroupSE( + ArchRProj = ArchRProj, + useMatrix = useMatrix, + groupBy = "TMP_PB_12312412312312312", + divideN = divideN, + scaleTo = scaleTo, + threads = threads, + verbose = verbose, + logFile = logFile + ) + sePB <- sePB[,colnames(sePB) != "NA"] + colData(sePB)$Group <- stringr::str_split(colnames(sePB), pattern="\\.Rep", simplify=TRUE)[,1] + + sePB + +} + #' Export Group BigWigs #' #' This function will group, summarize and export a bigwig for each group in an ArchRProject. @@ -148,6 +246,15 @@ getGroupSE <- function( #' @param verbose A boolean specifying to print messages during computation. #' @param threads An integer specifying the number of threads for parallel. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Group BW +#' bw <- getGroupBW(proj, groupBy = "Clusters") +#' #' @export getGroupBW <- function( ArchRProj = NULL, @@ -233,8 +340,15 @@ getGroupBW <- function( chromSizes <- getChromSizes(ArchRProj) tiles <- unlist(slidingWindows(chromSizes, width = tileSize, step = tileSize)) - if(threads > 1){ - h5disableFileLocking() + #H5 File Lock Check + h5lock <- setArchRLocking() + if(h5lock){ + threads <- 1 + }else{ + if(threads > 1){ + message("subThreadhing Enabled since ArchRLocking is FALSE see `addArchRLocking`") + } + threads <- threads } covFiles <- c() @@ -264,10 +378,6 @@ getGroupBW <- function( } - if(threads > 1){ - h5enableFileLocking() - } - .endLogging(logFile = logFile) covFiles @@ -380,4 +490,77 @@ getGroupBW <- function( } +#' Export Group Fragment Files +#' +#' This function will group export fragment files for each group in an ArchRProject. +#' +#' @param ArchRProj An `ArchRProject` object. +#' @param groupBy A string that indicates how cells should be grouped. This string corresponds to one of the standard or +#' user-supplied `cellColData` metadata columns (for example, "Clusters"). Cells with the same value annotated in this metadata +#' column will be grouped together and their fragments exported to `outputDirectory`/GroupFragments. +#' @param threads An integer specifying the number of threads for parallel. +#' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Group BW +#' frags <- getGroupFragments(proj, groupBy = "Clusters") +#' +#' @export +getGroupFragments <- function( + ArchRProj = NULL, + groupBy = "Clusters", + threads = getArchRThreads(), + logFile = createLogFile("getGroupFragments") + ){ + + #Cell Col Data + ccd <- getCellColData(ArchRProj = ArchRProj) + + #Get Groups + Groups <- getCellColData(ArchRProj = ArchRProj, select = groupBy, drop = TRUE) + + #Cell Split + cellGroups <- split(getCellNames(ArchRProj), Groups) + + #Outdir + outDir <- file.path(getOutputDirectory(ArchRProj), "GroupFragments") + dir.create(outDir, showWarnings = FALSE) + #Read Fragments From Each Sample Export + outList <- .safelapply(seq_along(cellGroups), function(x){ + + message("Export Fragments : ", x, " of ", length(cellGroups)) + + #Get Fragments + frags <- suppressMessages(getFragmentsFromProject( + ArchRProj = ArchRProj, + cellNames = cellGroups[[x]], + logFile = logFile, + ) %>% Reduce("c", .)) + + #Export Fragments + dt <- data.frame( + V1 = seqnames(frags), + V2 = start(frags) - 1, + V3 = end(frags), + V4 = mcols(frags)$RG + ) %>% data.table + + #Write Bgzip Etc + groupFile <- file.path(outDir, paste0(groupBy, ".", names(cellGroups)[x], ".tsv")) + data.table::fwrite(dt, groupFile, sep = "\t", col.names = FALSE) + Rsamtools::bgzip(groupFile) + file.remove(groupFile) + .fileRename(paste0(groupFile, ".bgz"), paste0(groupFile, ".gz")) + paste0(groupFile, ".gz") + + }, threads = threads) + + #Return + unlist(outList) + +} diff --git a/R/Harmony.R b/R/Harmony.R index ca9fbabe..d4b7deef 100644 --- a/R/Harmony.R +++ b/R/Harmony.R @@ -20,7 +20,19 @@ #' exists as a column name in `cellColData`. #' @param ... Additional arguments to be provided to harmony::HarmonyMatrix #' @export +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Confounder +#' proj <- addCellColData(proj, data = proj$TSSEnrichment > 10, name = "TSSQC", cells = getCellNames(proj)) #' +#' # Run Harmony +#' proj <- addHarmony(proj, groupBy = "TSSQC") +#' +#' @export addHarmony <- function( ArchRProj = NULL, reducedDims = "IterativeLSI", diff --git a/R/HelperUtils.R b/R/HelperUtils.R index eee04e65..c3a33cf8 100644 --- a/R/HelperUtils.R +++ b/R/HelperUtils.R @@ -8,6 +8,12 @@ #' #' @param x The value to search for in `table`. #' @param table The set of values to serve as the base for the match function. +#' +#' @examples +#' +#' #Test +#' c("A", "B", "C") %ni% c("A", "C") +#' #' @export "%ni%" <- function(x, table) !(match(x, table, nomatch = 0) > 0) @@ -17,6 +23,12 @@ #' #' @param x An `S4Vector` object to search for in `table`. #' @param table The set of `S4Vector` objects to serve as the base for the match function. +#' +#' @examples +#' +#' #Test +#' Rle(c("A", "B", "C")) %bcin% Rle(c("A", "C")) +#' #' @export '%bcin%' <- function(x, table) S4Vectors::match(x, table, nomatch = 0) > 0 @@ -26,6 +38,12 @@ #' #' @param x An `S4Vector` object to search for in `table`. #' @param table The set of `S4Vector` objects to serve as the base for the match function. +#' +#' @examples +#' +#' #Test +#' Rle(c("A", "B", "C")) %bcni% Rle(c("A", "C")) +#' #' @export '%bcni%' <- function(x, table) !(S4Vectors::match(x, table, nomatch = 0) > 0) @@ -41,6 +59,15 @@ #' @param fragmentFiles A character vector the paths to fragment files to be reformatted #' @param checkChrPrefix A boolean value that determines whether seqnames should be checked to contain #' "chr". IF set to `TRUE`, any seqnames that do not contain "chr" will be removed from the fragment files. +#' +#' @examples +#' +#' # Get Test Fragments +#' fragments <- getTestFragments() +#' +#' # Get Peak Annotations +#' fragments2 <- reformatFragmentFiles(fragments) +#' #' @export reformatFragmentFiles <- function( fragmentFiles = NULL, @@ -97,10 +124,22 @@ reformatFragmentFiles <- function( #' #' @param i A character/numeric value vector to see concordance with j. #' @param j A character/numeric value vector to see concordance with i. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Overlap of Clusters and CellType +#' confusionMatrix(proj$Clusters, proj$CellType) +#' +#' # Overlap of Cell Type and RNA Predict +#' confusionMatrix(proj$CellType, proj$predictedGroup_Un) +#' #' @export confusionMatrix <- function( - i = NULL, - j = NULL + i = NULL, + j = NULL ){ ui <- unique(i) uj <- unique(j) @@ -124,6 +163,15 @@ confusionMatrix <- function( #' @param labels A character vector containing lables to map. #' @param newLabels A character vector (same length as oldLabels) to map labels to from oldLabels. #' @param oldLabels A character vector (same length as newLabels) to map labels from to newLabels +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Peak Annotations +#' proj$ClusterLabels <- mapLabels(proj$Clusters, c("T", "B", "M"), c("C1", "C2", "C3")) +#' #' @export mapLabels <- function(labels = NULL, newLabels = NULL, oldLabels = names(newLabels)){ diff --git a/R/HiddenUtils.R b/R/HiddenUtils.R index 1e7844dc..d7a7f9cf 100644 --- a/R/HiddenUtils.R +++ b/R/HiddenUtils.R @@ -170,49 +170,10 @@ return(x) } -.normalizeCols <- function(mat = NULL, colSm = NULL, scaleTo = NULL){ - if(is.null(colSm)){ - colSm <- Matrix::colSums(mat) - } - if(!is.null(scaleTo)){ - mat@x <- scaleTo * mat@x / rep.int(colSm, Matrix::diff(mat@p)) - }else{ - mat@x <- mat@x / rep.int(colSm, Matrix::diff(mat@p)) - } - return(mat) -} - -.safeSubset <- function(mat = NULL, subsetRows = NULL, subsetCols = NULL){ - - if(!is.null(subsetRows)){ - idxNotIn <- which(subsetRows %ni% rownames(mat)) - if(length(idxNotIn) > 0){ - subsetNamesNotIn <- subsetRows[idxNotIn] - matNotIn <- Matrix::sparseMatrix(i=1,j=1,x=0,dims=c(length(idxNotIn), ncol = ncol(mat))) - rownames(matNotIn) <- subsetNamesNotIn - mat <- rbind(mat, matNotIn) - } - mat <- mat[subsetRows,] - } - - if(!is.null(subsetCols)){ - idxNotIn <- which(subsetCols %ni% colnames(mat)) - if(length(idxNotIn) > 0){ - subsetNamesNotIn <- subsetCols[idxNotIn] - matNotIn <- Matrix::sparseMatrix(i=1,j=1,x=0,dims=c(nrow(mat), ncol = length(idxNotIn))) - colnames(matNotIn) <- subsetNamesNotIn - mat <- cbind(mat, matNotIn) - } - mat <- mat[,subsetCols] - } - - mat - -} - .groupMeans <- function(mat = NULL, groups=NULL, na.rm = TRUE, sparse = FALSE){ stopifnot(!is.null(groups)) stopifnot(length(groups)==ncol(mat)) + sparse <- is(mat, "sparseMatrix") gm <- lapply(unique(groups), function(x){ if(sparse){ Matrix::rowMeans(mat[,which(groups==x),drop=F], na.rm=na.rm) @@ -227,6 +188,7 @@ .groupSums <- function(mat = NULL, groups=NULL, na.rm = TRUE, sparse = FALSE){ stopifnot(!is.null(groups)) stopifnot(length(groups)==ncol(mat)) + sparse <- is(mat, "sparseMatrix") gm <- lapply(unique(groups), function(x){ if(sparse){ Matrix::rowSums(mat[,which(groups==x),drop=F], na.rm=na.rm) @@ -241,9 +203,10 @@ .groupSds <- function(mat = NULL, groups = NULL, na.rm = TRUE, sparse = FALSE){ stopifnot(!is.null(groups)) stopifnot(length(groups)==ncol(mat)) + sparse <- is(mat, "sparseMatrix") gs <- lapply(unique(groups), function(x){ - if (sparse){ - matrixStats::rowSds(as.matrix(mat[, which(groups == x), drop = F]), na.rm = na.rm) + if(sparse){ + .sparesRowSds(mat[, which(groups == x), drop = F], na.rm = na.rm) }else{ matrixStats::rowSds(mat[, which(groups == x), drop = F], na.rm = na.rm) } diff --git a/R/Imputation.R b/R/Imputation.R index bda3e0da..e6fe0e37 100644 --- a/R/Imputation.R +++ b/R/Imputation.R @@ -31,6 +31,15 @@ #' @param seed A number to be used as the seed for random number generation. It is recommended to keep track of the seed used so that you can #' reproduce results downstream. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Impute Weights +#' proj <- addImputeWeights(proj) +#' #' @export addImputeWeights <- function( ArchRProj = NULL, @@ -180,8 +189,8 @@ addImputeWeights <- function( if(useHdf5){ o <- .suppressAll(h5createGroup(file = weightFile, paste0("block", x))) - o <- .suppressAll(h5write(obj = ix, file = weightFile, name = paste0("block", x, "/Names"), level = 0)) - o <- .suppressAll(h5write(obj = as.matrix(Wt), file = weightFile, name = paste0("block", x, "/Weights"), level = 0)) + o <- .suppressAll(h5write(obj = ix, file = weightFile, name = paste0("block", x, "/Names"), level = getArchRH5Level())) + o <- .suppressAll(h5write(obj = as.matrix(Wt), file = weightFile, name = paste0("block", x, "/Weights"), level = getArchRH5Level())) return(weightFile) }else{ Wt @@ -222,6 +231,18 @@ addImputeWeights <- function( #' This function gets imputation weights from an ArchRProject to impute numeric values. #' #' @param ArchRProj An `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Impute Weights +#' proj <- addImputeWeights(proj) +#' +#' # Get Impute Weights +#' getImputeWeights(proj) +#' #' @export getImputeWeights <- function(ArchRProj = NULL){ .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) @@ -242,6 +263,24 @@ getImputeWeights <- function(ArchRProj = NULL){ #' @param threads The number of threads to be used for parallel computing. #' @param verbose A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Impute Weights +#' proj <- addImputeWeights(proj) +#' +#' # Get Impute Weights +#' iW <- getImputeWeights(proj) +#' +#' # Get Matrix +#' se <- getMatrixFromProject(proj, useMatrix = "GeneScoreMatrix") +#' +#' # Impute +#' mat <- imputeMatrix(assay(se), iW) +#' #' @export imputeMatrix <- function( mat = NULL, diff --git a/R/InputData.R b/R/InputData.R index ce42a3a2..15134e91 100644 --- a/R/InputData.R +++ b/R/InputData.R @@ -2,10 +2,17 @@ #' #' This function will download data for a given tutorial and return the input files required for ArchR. #' -#' @param tutorial The name of the available tutorial for which to retreive the tutorial data. Currently, the only available option is "Hematopoiesis". +#' @param tutorial The name of the available tutorial for which to retreive the tutorial data. The main option is "Hematopoiesis". #' "Hematopoiesis" is a small scATAC-seq dataset that spans the hematopoieitic hierarchy from stem cells to differentiated cells. -#' This dataset is made up of cells from peripheral blood, bone marrow, and CD34+ sorted bone marrow. +#' This dataset is made up of cells from peripheral blood, bone marrow, and CD34+ sorted bone marrow. The second option is "Test" +#' which is downloading a small test PBMC fragments file mainly used to test the url capabilities of this function. #' @param threads The number of threads to be used for parallel computing. +#' +#' @examples +#' +#' # Get Tutorial Fragments using `test` since its smaller +#' fragments <- getTutorialData(tutorial = "test") +#' #' @export getTutorialData <- function( tutorial = "hematopoiesis", @@ -13,8 +20,8 @@ getTutorialData <- function( ){ #Validate - .validInput(input = tutorial, name = "tutorial", valid = "character") - .validInput(input = threads, name = "threads", valid = c("integer")) + ArchR:::.validInput(input = tutorial, name = "tutorial", valid = "character") + ArchR:::.validInput(input = threads, name = "threads", valid = c("integer")) ######### #Make Sure URL doesnt timeout @@ -79,7 +86,29 @@ getTutorialData <- function( inputFiles <- c(fragFiles, geneFiles) - } else{ + }else if(tolower(tutorial) == "test"){ + + filesUrl <- data.frame( + fileUrl = c( + "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/PBMCSmall.tsv.gz" + ), + md5sum = c( + "0a7a7052b83218667e525127c684aa83" + ), + stringsAsFactors = FALSE + ) + + pathDownload <- "TestFragments" + + dir.create(pathDownload, showWarnings = FALSE) + + downloadFiles <- .downloadFiles(filesUrl = filesUrl, pathDownload = pathDownload, threads = threads) + + inputFiles <- list.files(pathDownload, pattern = "\\.gz$", full.names = TRUE) + names(inputFiles) <- gsub(".fragments.tsv.gz|.tsv.gz", "", list.files(pathDownload, pattern = "\\.gz$")) + inputFiles <- inputFiles[!grepl(".tbi", inputFiles)] + + }else{ stop("There is no tutorial data for : ", tutorial) @@ -94,34 +123,42 @@ getTutorialData <- function( #helper for file downloads .downloadFiles <- function(filesUrl = NULL, pathDownload = NULL, threads = 1){ + if(is.null(filesUrl)) { stop("No value supplied to filesUrl in .downloadFiles()!") } + if(is.null(pathDownload)) { stop("No value supplied to pathDownload in .downloadFiles()!") } + if(length(which(c("fileUrl","md5sum") %ni% colnames(filesUrl))) != 0) { cat(colnames(filesUrl)) stop("File download dataframe does not include columns named 'fileUrl' and 'md5sum' which are required!") } + message(paste0("Downloading files to ",pathDownload,"...")) - downloadFiles <- .safelapply(seq_along(filesUrl$fileUrl), function(x){ + + downloadFiles <- ArchR:::.safelapply(seq_along(filesUrl$fileUrl), function(x){ + if(file.exists(file.path(pathDownload, basename(filesUrl$fileUrl[x])))){ if(tools::md5sum(file.path(pathDownload, basename(filesUrl$fileUrl[x]))) != filesUrl$md5sum[x]) { message(paste0("File ",basename(filesUrl$fileUrl[x])," exists but has an incorrect md5sum. Removing...")) file.remove(file.path(pathDownload, basename(filesUrl$fileUrl[x]))) } } + if(!file.exists(file.path(pathDownload, basename(filesUrl$fileUrl[x])))){ message(paste0("Downloading file ", basename(filesUrl$fileUrl[x]),"...")) download.file( url = filesUrl$fileUrl[x], destfile = file.path(pathDownload, basename(filesUrl$fileUrl[x])) ) - } else { + }else{ message(paste0("File exists! Skipping file ", basename(filesUrl$fileUrl[x]),"...")) } - }, threads = min(threads, length(filesUrl))) + + }, threads = min(threads, nrow(filesUrl))) #check for success of file download if(!all(unlist(downloadFiles) == 0)) { @@ -132,38 +169,100 @@ getTutorialData <- function( } +#' Get PBMC Small Test Arrow file +#' +#' V2 : This function will return a test arrow file in your cwd. +#' +#' @param version version of test arrow to return +#' +#' @examples +#' +#' # Get Test Arrow +#' arrow <- getTestArrow() +#' +#' @export +getTestArrow <- function(version = 2){ + + if(version == 2){ + #Add Genome Return Arrow + addArchRGenome("hg19test2") + arrow <- file.path(system.file("testdata", package="ArchR"), "PBSmall.arrow") + file.copy(arrow, basename(arrow), overwrite = TRUE) + basename(arrow) + }else{ + stop("test version doesnt exist!") + } + +} + #' Get PBMC Small Test Fragments #' -#' This function will download fragments for a small PBMC test dataset (2k Cells) spanning chr1 and 2 (~20MB). +#' V1 : This function will download fragments for a small PBMC test dataset. +#' V2 : This function will return test fragments for a small PBMC test dataset in your cwd. +#' +#' @param version version of test fragments to return +#' +#' @examples +#' +#' # Get Test Fragments +#' fragments <- getTestFragments() #' #' @export -getTestFragments <- function(x){ +getTestFragments <- function(version = 2){ - #Make Sure URL doesnt timeout - oldTimeout <- getOption('timeout') - options(timeout=100000) + if(version == 1){ - if(!file.exists("PBMCSmall.tsv.gz")){ - download.file( - url = "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/PBMCSmall.tsv.gz", - destfile = "PBMCSmall.tsv.gz" - ) - } - #Set back URL Options - options(timeout=oldTimeout) + #Make Sure URL doesnt timeout + oldTimeout <- getOption('timeout') + options(timeout=100000) - #Add Genome Return Name Vector - addArchRGenome("hg19test") - c("PBMC" = "PBMCSmall.tsv.gz") + if(!file.exists("PBMCSmall.tsv.gz")){ + download.file( + url = "https://jeffgranja.s3.amazonaws.com/ArchR/TestData/PBMCSmall.tsv.gz", + destfile = "PBMCSmall.tsv.gz" + ) + } + #Set back URL Options + options(timeout=oldTimeout) + + #Add Genome Return Name Vector + addArchRGenome("hg19test") + c("PBMC" = "PBMCSmall.tsv.gz") + + }else if(version == 2){ + + #Add Genome Return Name Vector + addArchRGenome("hg19test2") + fragments <- file.path(system.file("testdata", package="ArchR"), "PBSmall.tsv.gz") + file.copy(fragments, basename(fragments), overwrite = TRUE) + c("PBMC" = basename(fragments)) + + }else{ + + stop("test version doesnt exist!") + + } } + #' Get PBMC Small Test Project #' -#' This function will download an ArchRProject for a small PBMC test dataset (2k Cells) spanning chr1 and 2 (~2-300MB). +#' V1 : This function will download an ArchRProject for a small PBMC test dataset. +#' V2 : This function will return an ArchRProject for a small PBMC test dataset in your cwd. +#' +#' @param version version of test fragments to return +#' +#' @examples +#' +#' # Get Test Project +#' proj <- getTestProject() #' #' @export -getTestProject <- function(){ +getTestProject <- function(version = 2){ + + if(version == 1){ + #Make Sure URL doesnt timeout oldTimeout <- getOption('timeout') options(timeout=100000) @@ -181,6 +280,24 @@ getTestProject <- function(){ #Load addArchRGenome("hg19test") loadArchRProject("PBMCSmall") + + + }else if(version == 2){ + + #Add Genome Return Name Vector + addArchRGenome("hg19test2") + archrproj <- file.path(system.file("testdata", package="ArchR"), "PBSmall.zip") + file.copy(archrproj, basename(archrproj), overwrite = TRUE) + unzip(basename(archrproj), overwrite = TRUE) + file.remove(basename(archrproj)) + loadArchRProject("PBSmall") + + }else{ + + stop("test version doesnt exist!") + + } + } #' Get Input Files from paths to create arrows @@ -247,14 +364,20 @@ getValidBarcodes <- function( } barcodeList <- lapply(seq_along(csvFiles), function(x){ - df <- .suppressAll(data.frame(readr::read_csv(csvFiles[x]))) - if("cell_id" %ni% colnames(df)){ - stop("cell_id not in colnames of 10x singlecell.csv file! Are you sure inut is correct?") + df <- ArchR:::.suppressAll(data.frame(readr::read_csv(csvFiles[x]))) + if("cell_id" %in% colnames(df)){ + as.character(df[which(paste0(df$cell_id) != "None"),]$barcode) + }else if("is__cell_barcode" %in% colnames(df)){ + as.character(df[which(paste0(df$is__cell_barcode) == 1),]$barcode) + }else{ + stop("cell_id and is__cell_barcode not in colnames of 10x singlecell.csv file! Are you sure inut is correct?") } - as.character(df[which(paste0(df$cell_id) != "None"),]$barcode) }) %>% SimpleList names(barcodeList) <- sampleNames barcodeList } + + + diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R index 94116f75..8eda7c9d 100644 --- a/R/IntegrativeAnalysis.R +++ b/R/IntegrativeAnalysis.R @@ -34,6 +34,21 @@ #' @param threads The number of threads to be used for parallel computing. #' @param verbose A boolean value that determines whether standard output should be printed. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Correlate Matrices +#' dfCor <- correlateMatrices( +#' ArchRProj = proj, +#' useMatrix1 = "GeneScoreMatrix", +#' useMatrix2 = "GeneIntegrationMatrix", +#' dimsToUse = 1:5, +#' k = 20 +#' ) +#' #' @export correlateMatrices <- function( ArchRProj = NULL, @@ -381,6 +396,22 @@ correlateMatrices <- function( #' @param force A boolean value that determines whether analysis should continue if resizing coordinates in `seTrajectory1` or #' `seTrajectory2` does not align with the strandedness. Only when `useRanges = TRUE`. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' #Add Trajectory +#' proj <- addTrajectory(proj, trajectory = c("C1", "C2", "C3"), embedding = "UMAP", force = TRUE) +#' +#' #Get Trajectories +#' seTraj1 <- getTrajectory(proj, useMatrix = "GeneScoreMatrix") +#' seTraj2 <- getTrajectory(proj, useMatrix = "GeneIntegrationMatrix") +#' +#' #Correlate +#' corTraj <- correlateTrajectories(seTraj1, seTraj2, corCutOff = 0.35, varCutOff1 = 0.6, varCutOff2 = 0.6) +#' #' @export correlateTrajectories <- function( seTrajectory1 = NULL, @@ -669,6 +700,7 @@ correlateTrajectories <- function( #' @param corCutOff A numeric cutoff for the correlation of each dimension to the sequencing depth. If the dimension has a correlation to #' sequencing depth that is greater than the `corCutOff`, it will be excluded from analysis. #' @param cellsToUse A character vector of cellNames to compute coAccessibility on if desired to run on a subset of the total cells. +#' @param excludeChr A character vector containing the `seqnames` of the chromosomes that should be excluded from this analysis. #' @param k The number of k-nearest neighbors to use for creating single-cell groups for correlation analyses. #' @param knnIteration The number of k-nearest neighbor groupings to test for passing the supplied `overlapCutoff`. #' @param overlapCutoff The maximum allowable overlap between the current group and all previous groups to permit the current group be @@ -682,6 +714,15 @@ correlateTrajectories <- function( #' @param threads The number of threads to be used for parallel computing. #' @param verbose A boolean value that determines whether standard output should be printed. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Co-Accessibility +#' proj <- addCoAccessibility(proj, k = 20) +#' #' @export addCoAccessibility <- function( ArchRProj = NULL, @@ -690,6 +731,7 @@ addCoAccessibility <- function( scaleDims = NULL, corCutOff = 0.75, cellsToUse = NULL, + excludeChr = NULL, k = 100, knnIteration = 500, overlapCutoff = 0.8, @@ -708,6 +750,7 @@ addCoAccessibility <- function( .validInput(input = scaleDims, name = "scaleDims", valid = c("boolean", "null")) .validInput(input = corCutOff, name = "corCutOff", valid = c("numeric", "null")) .validInput(input = cellsToUse, name = "cellsToUse", valid = c("character", "null")) + .validInput(input = excludeChr, name = "excludeChr", valid = c("character", "null")) .validInput(input = k, name = "k", valid = c("integer")) .validInput(input = knnIteration, name = "knnIteration", valid = c("integer")) .validInput(input = overlapCutoff, name = "overlapCutoff", valid = c("numeric")) @@ -758,6 +801,12 @@ addCoAccessibility <- function( chrj <- gtools::mixedsort(unique(paste0(seqnames(getPeakSet(ArchRProj))))) stopifnot(identical(chri,chrj)) + #Filter Chromosomes + if(!is.null(excludeChr)){ + chri <- chri[which(paste0(chri) %ni% excludeChr)] + chrj <- chrj[which(paste0(chrj) %ni% excludeChr)] + } + #Create Ranges peakSummits <- resize(peakSet, 1, "center") peakWindows <- resize(peakSummits, 2*maxDist + 1, "center") @@ -783,7 +832,7 @@ addCoAccessibility <- function( .logDiffTime(sprintf("Computing Co-Accessibility %s (%s of %s)", chri[x], x, length(chri)), t1=tstart, verbose=verbose, logFile=logFile) #Features - featureDF <- mcols(peakSet)[BiocGenerics::which(seqnames(peakSet) == chri[x]),] + featureDF <- mcols(peakSet)[BiocGenerics::which(seqnames(peakSet) == chri[x]),,drop=FALSE] featureDF$seqnames <- chri[x] #Group Matrix @@ -850,6 +899,18 @@ addCoAccessibility <- function( #' This only takes affect if `returnLoops = TRUE`. #' @param returnLoops A boolean indicating to return the co-accessibility signal as a `GRanges` "loops" object designed for use with #' the `ArchRBrowser()` or as an `ArchRBrowserTrack()`. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Co Accessibility +#' proj <- addCoAccessibility(proj, k = 20) +#' +#' # Get Co Accessibility +#' CoA <- getCoAccessibility(proj) +#' #' @export getCoAccessibility <- function( ArchRProj = NULL, @@ -958,6 +1019,7 @@ getCoAccessibility <- function( #' @param corCutOff A numeric cutoff for the correlation of each dimension to the sequencing depth. If the dimension has a #' correlation to sequencing depth that is greater than the `corCutOff`, it will be excluded from analysis. #' @param cellsToUse A character vector of cellNames to compute coAccessibility on if desired to run on a subset of the total cells. +#' @param excludeChr A character vector containing the `seqnames` of the chromosomes that should be excluded from this analysis. #' @param k The number of k-nearest neighbors to use for creating single-cell groups for correlation analyses. #' @param knnIteration The number of k-nearest neighbor groupings to test for passing the supplied `overlapCutoff`. #' @param overlapCutoff The maximum allowable overlap between the current group and all previous groups to permit the current @@ -968,11 +1030,26 @@ getCoAccessibility <- function( #' @param log2Norm A boolean value indicating whether to log2 transform the single-cell groups prior to computing co-accessibility correlations. #' @param predictionCutoff A numeric describing the cutoff for RNA integration to use when picking cells for groupings. #' @param addEmpiricalPval Add empirical p-values based on randomly correlating peaks and genes not on the same seqname. +#' @param addPermutedPval Add permuted p-values based on shuffle sample correlating peaks and genes. This approach was adapted from +#' Regner et al 2021 "A multi-omic single-cell landscape of human gynecologic malignancies". +#' @param nperm An integer representing the number of permutations to run for Regner et al 2021 approach. #' @param seed A number to be used as the seed for random number generation required in knn determination. It is recommended #' to keep track of the seed used so that you can reproduce results downstream. #' @param threads The number of threads to be used for parallel computing. #' @param verbose A boolean value that determines whether standard output should be printed. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add P2G Links +#' proj <- addPeak2GeneLinks(proj, k = 20) +#' +#' # Get P2G Links +#' p2g <- getPeak2GeneLinks(proj) +#' #' @export addPeak2GeneLinks <- function( ArchRProj = NULL, @@ -982,6 +1059,7 @@ addPeak2GeneLinks <- function( scaleDims = NULL, corCutOff = 0.75, cellsToUse = NULL, + excludeChr = NULL, k = 100, knnIteration = 500, overlapCutoff = 0.8, @@ -990,6 +1068,8 @@ addPeak2GeneLinks <- function( log2Norm = TRUE, predictionCutoff = 0.4, addEmpiricalPval = FALSE, + addPermutedPval = FALSE, + nperm = 100, seed = 1, threads = max(floor(getArchRThreads() / 2), 1), verbose = TRUE, @@ -1003,12 +1083,17 @@ addPeak2GeneLinks <- function( .validInput(input = scaleDims, name = "scaleDims", valid = c("boolean", "null")) .validInput(input = corCutOff, name = "corCutOff", valid = c("numeric", "null")) .validInput(input = cellsToUse, name = "cellsToUse", valid = c("character", "null")) + .validInput(input = excludeChr, name = "excludeChr", valid = c("character", "null")) .validInput(input = k, name = "k", valid = c("integer")) .validInput(input = knnIteration, name = "knnIteration", valid = c("integer")) .validInput(input = overlapCutoff, name = "overlapCutoff", valid = c("numeric")) .validInput(input = maxDist, name = "maxDist", valid = c("integer")) .validInput(input = scaleTo, name = "scaleTo", valid = c("numeric")) .validInput(input = log2Norm, name = "log2Norm", valid = c("boolean")) + .validInput(input = predictionCutoff, name = "predictionCutoff", valid = c("numeric", "null")) + .validInput(input = addEmpiricalPval, name = "addEmpiricalPval", valid = c("boolean")) + .validInput(input = addPermutedPval, name = "addPermutedPval", valid = c("boolean")) + .validInput(input = nperm, name = "nperm", valid = c("integer")) .validInput(input = threads, name = "threads", valid = c("integer")) .validInput(input = verbose, name = "verbose", valid = c("boolean")) .validInput(input = logFile, name = "logFile", valid = c("character")) @@ -1060,10 +1145,16 @@ addPeak2GeneLinks <- function( #Get Peak Set peakSet <- getPeakSet(ArchRProj) + if(!is.null(excludeChr)){ + peakSet <- peakSet[BiocGenerics::which(paste0(seqnames(peakSet)) %bcni% excludeChr)] + } .logThis(peakSet, "peakSet", logFile = logFile) #Gene Info geneSet <- .getFeatureDF(ArrowFiles, useMatrix, threads = threads) + if(!is.null(excludeChr)){ + geneSet <- geneSet[BiocGenerics::which(geneSet$seqnames %bcni% excludeChr),,drop=FALSE] + } geneStart <- GRanges(geneSet$seqnames, IRanges(geneSet$start, width = 1), name = geneSet$name, idx = geneSet$idx) .logThis(geneStart, "geneStart", logFile = logFile) @@ -1110,6 +1201,7 @@ addPeak2GeneLinks <- function( ArrowFiles = getArrowFiles(ArchRProj), featureDF = geneDF, groupList = knnObj, + excludeSeqnames = excludeChr, useMatrix = useMatrix, threads = threads, verbose = FALSE @@ -1123,6 +1215,7 @@ addPeak2GeneLinks <- function( ArrowFiles = getArrowFiles(ArchRProj), featureDF = peakDF, groupList = knnObj, + excludeSeqnames = excludeChr, useMatrix = "PeakMatrix", threads = threads, verbose = FALSE @@ -1175,12 +1268,7 @@ addPeak2GeneLinks <- function( o$distance <- distance(rowRanges(seRNA)[o[,1]] , rowRanges(seATAC)[o[,2]] ) colnames(o) <- c("B", "A", "distance") - #Null Correlations - if(addEmpiricalPval){ - .logDiffTime(main="Computing Background Correlations", t1=tstart, verbose=verbose, logFile=logFile) - nullCor <- .getNullCorrelations(seATAC, seRNA, o, 1000) - } - + #Compute PVal Stats .logDiffTime(main="Computing Correlations", t1=tstart, verbose=verbose, logFile=logFile) o$Correlation <- rowCorCpp(as.integer(o$A), as.integer(o$B), assay(seATAC), assay(seRNA)) o$VarAssayA <- .getQuantiles(matrixStats::rowVars(assay(seATAC)))[o$A] @@ -1195,11 +1283,33 @@ addPeak2GeneLinks <- function( metadata(out)$peakSet <- peakSet metadata(out)$geneSet <- geneStart + #Null Correlations if(addEmpiricalPval){ + .logDiffTime(main="Computing Background Correlations", t1=tstart, verbose=verbose, logFile=logFile) + nullCor <- .getNullCorrelations(seATAC, seRNA, o, 1000) out$EmpPval <- 2*pnorm(-abs(((out$Correlation - mean(nullCor[[2]])) / sd(nullCor[[2]])))) out$EmpFDR <- p.adjust(out$EmpPval, method = "fdr") } - + + #Permuted Pval + if(addPermutedPval){ + message("Performing Permuted P-values similar to Regner et al., 2021") + #Permute + p <- o + o$PermPval <- 0 + for(i in seq_len(nperm)){ + message("Running Permutation ", i, " of ", nperm) + idx <- sample(ncol(seATAC)) + p$Correlation <- rowCorCpp(as.integer(p$A), as.integer(p$B), assay(seATAC)[,idx,drop=FALSE], assay(seRNA)) + p$TStat <- (p$Correlation / sqrt((1-p$Correlation^2)/(ncol(seATAC)-2))) #T-statistic P-value + p$Pval <- 2*pt(-abs(p$TStat), ncol(seATAC) - 2) + cdf <- ecdf(p$Pval) + o$PermPval <- o$PvalPerm + p$Pval + } + o$PermPval <- (o$PermPval / nperm) #Average + o$PermFDR <- pmin(ecdf(o$PermPval)(o$Pval) / ecdf(o$Pval)(o$Pval), 1) + } + #Save Group Matrices dir.create(file.path(getOutputDirectory(ArchRProj), "Peak2GeneLinks"), showWarnings = FALSE) outATAC <- file.path(getOutputDirectory(ArchRProj), "Peak2GeneLinks", "seATAC-Group-KNN.rds") @@ -1277,6 +1387,18 @@ addPeak2GeneLinks <- function( #' @param resolution A numeric describing the bp resolution to return loops as. This helps with overplotting of correlated regions. #' @param returnLoops A boolean indicating to return the peak-to-gene links as a `GRanges` "loops" object designed for use with #' the `ArchRBrowser()` or as an `ArchRBrowserTrack()`. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add P2G Links +#' proj <- addPeak2GeneLinks(proj, k = 20) +#' +#' # Get P2G Links +#' p2g <- getPeak2GeneLinks(proj) +#' #' @export getPeak2GeneLinks <- function( ArchRProj = NULL, @@ -1385,6 +1507,22 @@ peak2GeneHeatmap <- function(...){ #' @param seed A number to be used as the seed for random number generation. It is recommended to keep track of the seed used so that you can #' reproduce results downstream. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add P2G Links +#' proj <- addPeak2GeneLinks(proj, k = 20) +#' +#' # Get P2G Links +#' p2g <- getPeak2GeneLinks(proj) +#' +#' # Plot P2G +#' p <- plotPeak2GeneHeatmap(proj) +#' plotPDF(p, name = "P2G-Heatmap", ArchRProj = proj) +#' #' @export plotPeak2GeneHeatmap <- function( ArchRProj = NULL, diff --git a/R/IterativeLSI.R b/R/IterativeLSI.R index c32f7795..1b43f669 100644 --- a/R/IterativeLSI.R +++ b/R/IterativeLSI.R @@ -20,7 +20,8 @@ #' @param depthCol A column in the `ArchRProject` that represents the coverage (scATAC = unique fragments, scRNA = unique molecular identifiers) per cell. #' These values are used to minimize the related biases in the reduction related. For scATAC we recommend "nFrags" and for scRNA we recommend "Gex_nUMI". #' @param varFeatures The number of N variable features to use for LSI. The top N features will be used based on the `selectionMethod`. -#' @param dimsToUse A vector containing the dimensions from the `reducedDims` object to use in clustering. +#' @param dimsToUse A vector containing the dimensions to use in LSI. The total dimensions used in LSI will be `max(dimsToUse)`. If you set this too high, +#' it could impact downstream functionalities including increasing the time required to run `addClusters()`. #' @param LSIMethod A number or string indicating the order of operations in the TF-IDF normalization. #' Possible values are: 1 or "tf-logidf", 2 or "log(tf-idf)", and 3 or "logtf-logidf". #' @param scaleDims A boolean that indicates whether to z-score the reduced dimensions for each cell. This is useful forminimizing the contribution @@ -50,6 +51,7 @@ #' to the first iteration of the iterative LSI paradigm. For example, if `filterQuantile = 0.99`, any features above the 99th percentile in #' insertion counts will be ignored for the first LSI iteration. #' @param excludeChr A string of chromosomes to exclude for iterativeLSI procedure. +#' @param keep0lsi A boolean whether to keep cells with no reads in features used for LSI. #' @param saveIterations A boolean value indicating whether the results of each LSI iterations should be saved as compressed `.rds` files in #' the designated `outDir`. #' @param UMAPParams The list of parameters to pass to the UMAP function if "UMAP" if `saveIterations=TRUE`. See the function `uwot::umap()`. @@ -61,6 +63,15 @@ #' @param verbose A boolean value that determines whether standard output includes verbose sections. #' @param force A boolean value that indicates whether or not to overwrite relevant data in the `ArchRProject` object. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Iterative LSI +#' proj <- addIterativeLSI(proj, dimsToUse = 1:5, varFeatures=1000, iterations = 2, force=TRUE) +#' #' @export addIterativeLSI <- function( ArchRProj = NULL, @@ -91,6 +102,7 @@ addIterativeLSI <- function( totalFeatures = 500000, filterQuantile = 0.995, excludeChr = c(), + keep0lsi = FALSE, saveIterations = TRUE, UMAPParams = list( n_neighbors = 40, @@ -129,6 +141,7 @@ addIterativeLSI <- function( .validInput(input = totalFeatures, name = "totalFeatures", valid = c("integer")) .validInput(input = filterQuantile, name = "filterQuantile", valid = c("numeric")) .validInput(input = excludeChr, name = "excludeChr", valid = c("character", "null")) + .validInput(input = keep0lsi, name = "keep0lsi", valid = c("boolean")) .validInput(input = saveIterations, name = "saveIterations", valid = c("boolean")) .validInput(input = UMAPParams, name = "UMAPParams", valid = c("list")) .validInput(input = nPlot, name = "nPlot", valid = c("integer")) @@ -144,6 +157,20 @@ addIterativeLSI <- function( stop("Please provide more than 1000 varFeatures!") } + if(nCells(ArchRProj) < 500){ + message( + "Detected less than 500 Cells.\n", + "\t`filterBias` disabled.\n", + "\t`outlierQuantiles` disabled\n", + "\t`sampleCellsPre` disabled\n", + "\t`testBias` in `addClusters` disabled\n" + ) + filterBias <- FALSE + outlierQuantiles <- c(0, 1) + sampleCellsPre <- NULL + clusterParams$testBias <- FALSE + } + .startLogging(logFile = logFile) .logThis(mget(names(formals()),sys.frame(sys.nframe())), "IterativeLSI Input-Parameters", logFile=logFile) @@ -235,8 +262,14 @@ addIterativeLSI <- function( .logDiffTime("Computing Top Features", tstart, addHeader = FALSE, verbose = verbose, logFile = logFile) nFeature <- varFeatures[1] rmTop <- floor((1-filterQuantile) * totalFeatures) - topIdx <- head(order(totalAcc$rowSums, decreasing=TRUE), nFeature + rmTop)[-seq_len(rmTop)] + if(sum(totalAcc$rowSums > 0) > 2.25 * varFeatures){ + topIdx <- head(order(totalAcc$rowSums, decreasing=TRUE), nFeature + rmTop)[-seq_len(rmTop)] + }else{ + message("Not Enough Non-Zero Features to Filter!") + topIdx <- head(order(totalAcc$rowSums, decreasing=TRUE), nFeature) + } topFeatures <- totalAcc[sort(topIdx),] + topFeatures <- topFeatures[topFeatures$rowSums > 0,] gc() @@ -272,6 +305,7 @@ addIterativeLSI <- function( } topIdx <- head(order(totalAcc$combinedVars, decreasing=TRUE), nFeature) topFeatures <- totalAcc[sort(topIdx),] + topFeatures <- topFeatures[topFeatures$combinedMeans > 0,] gc() @@ -281,6 +315,10 @@ addIterativeLSI <- function( } + if(nrow(topFeatures) < varFeatures){ + stop(sprintf("Not Enough Features Found in data (%s)!", nrow(topFeatures))) + } + cellDepth <- tryCatch({ df <- getCellColData(ArchRProj = ArchRProj, select = depthCol) v <- df[,1] @@ -326,6 +364,7 @@ addIterativeLSI <- function( dimsToUse = dimsToUse, binarize = binarize, outlierQuantiles = outlierQuantiles, + keep0lsi = keep0lsi, sampleCells = if(j != iterations) sampleCellsPre else sampleCellsFinal, projectAll = j == iterations | projectCellsPre | sampleJ > sampleCellsPre, threads = threads, @@ -440,6 +479,7 @@ addIterativeLSI <- function( dimsToUse = dimsToUse, binarize = binarize, outlierQuantiles = outlierQuantiles, + keep0lsi = keep0lsi, sampleCells = if(j != iterations) sampleCellsPre else sampleCellsFinal, projectAll = j == iterations | projectCellsPre | sampleJ > sampleCellsPre, threads = threads, @@ -515,6 +555,7 @@ addIterativeLSI <- function( dimsToUse = NULL, binarize = TRUE, outlierQuantiles = c(0.02, 0.98), + keep0lsi = FALSE, LSIMethod = FALSE, scaleTo = 10^4, sampleCells = 5000, @@ -560,6 +601,7 @@ addIterativeLSI <- function( nDimensions = max(dimsToUse), binarize = binarize, outlierQuantiles = outlierQuantiles, + keep0lsi = keep0lsi, verbose = FALSE, seed = seed, tstart = tstart, @@ -608,6 +650,7 @@ addIterativeLSI <- function( nDimensions = max(dimsToUse), binarize = binarize, outlierQuantiles = outlierQuantiles, + keep0lsi = keep0lsi, seed = seed, tstart = tstart, logFile = logFile @@ -647,6 +690,7 @@ addIterativeLSI <- function( nDimensions = max(dimsToUse), binarize = binarize, outlierQuantiles = outlierQuantiles, + keep0lsi = keep0lsi, seed = seed, tstart = tstart, logFile = logFile @@ -662,7 +706,7 @@ addIterativeLSI <- function( .logDiffTime("Projecting Matrices with LSI-Projection (Granja* et al 2019)", tstart, addHeader = FALSE, verbose = verbose, logFile = logFile) pLSI <- .safelapply(seq_along(tmpMatFiles), function(x){ .logDiffTime(sprintf("Projecting Matrix (%s of %s) with LSI-Projection", x, length(tmpMatFiles)), tstart, addHeader = FALSE, verbose = FALSE, logFile = logFile) - .projectLSI(mat = readRDS(tmpMatFiles[x]), LSI = outLSI, verbose = FALSE, tstart = tstart, logFile = logFile) + .projectLSI(mat = readRDS(tmpMatFiles[x]), LSI = outLSI, keep0lsi = keep0lsi, verbose = FALSE, tstart = tstart, logFile = logFile) }, threads = threads2) %>% Reduce("rbind", .) #Remove Temporary Matrices @@ -1057,6 +1101,7 @@ addIterativeLSI <- function( nDimensions = 50, binarize = TRUE, outlierQuantiles = c(0.02, 0.98), + keep0lsi = FALSE, seed = 1, verbose = FALSE, tstart = NULL, @@ -1084,7 +1129,22 @@ addIterativeLSI <- function( #Compute Col Sums .logDiffTime("Computing Term Frequency", tstart, addHeader = FALSE, verbose = verbose, logFile = logFile) colSm <- Matrix::colSums(mat) + + #Check + if(keep0lsi){ + colSm[colSm==0] <- 1 + } + if(any(colSm == 0)){ + ############ + wng <- paste0( + "Filtering ", sum(colSm==0), " of ", ncol(mat), " used in LSI since 0 reads were found in the features used!", + "\nPlease consider increasing the number of varFeatures to cleanly handle this issue or", + "\nUse argument `keep0lsi` to keep 0 sum cells by setting their colSums to 1 artificially!" + ) + .logDiffTime(wng, tstart, addHeader = verbose, verbose = verbose, logFile = logFile) + warnings(wng) + ############ exclude <- which(colSm==0) mat <- mat[,-exclude, drop = FALSE] colSm <- colSm[-exclude] @@ -1188,19 +1248,21 @@ addIterativeLSI <- function( scaleTo = scaleTo, nDimensions = nDimensions, LSIMethod = LSIMethod, + keep0lsi = keep0lsi, outliers = NA, date = Sys.Date(), seed = seed ) if(filterOutliers == 1){ - .logDiffTime("Projecting Outliers with LSI-Projection (Granja* et al 2019)", tstart, addHeader = FALSE, verbose = verbose, logFile = logFile) + .logDiffTime("Projecting Outliers with LSI-Projection (Granja* et al 2019)", tstart, addHeader = FALSE, verbose = FALSE, logFile = logFile) #Quick Check LSI-Projection Works - pCheck <- .projectLSI(mat = mat2, LSI = out, verbose = verbose, logFile = logFile) + pCheck <- .projectLSI(mat = mat2, LSI = out, keep0lsi = keep0lsi, verbose = FALSE, logFile = logFile) #Dont Neeed This pCheck2 <- out[[1]][rownames(pCheck), ] pCheck3 <- lapply(seq_len(ncol(pCheck)), function(x){ cor(pCheck[,x], pCheck2[,x]) }) %>% unlist + .logThis(pCheck3, "Projection Correlation Test", logFile=logFile) if(min(pCheck3) < 0.95){ .logThis(pCheck, "pCheck", logFile=logFile) .logThis(pCheck2, "pCheck2", logFile=logFile) @@ -1209,7 +1271,7 @@ addIterativeLSI <- function( } #Project LSI Outliers out$outliers <- colnames(matO) - outlierLSI <- .projectLSI(mat = matO, LSI = out, verbose = verbose, logFile = logFile) + outlierLSI <- .projectLSI(mat = matO, LSI = out, keep0lsi = keep0lsi, verbose = FALSE, logFile = logFile) allLSI <- rbind(out[[1]], outlierLSI) allLSI <- allLSI[cn, , drop = FALSE] #Re-Order Correctly to original out[[1]] <- allLSI @@ -1244,6 +1306,7 @@ addIterativeLSI <- function( mat = NULL, LSI = NULL, returnModel = FALSE, + keep0lsi = FALSE, verbose = FALSE, tstart = NULL, logFile = NULL @@ -1275,7 +1338,22 @@ addIterativeLSI <- function( #TF .logDiffTime("Computing Term Frequency", tstart, addHeader = FALSE, verbose = verbose, logFile = logFile) colSm <- Matrix::colSums(mat) + + #Check + if(keep0lsi){ + colSm[colSm==0] <- 1 + } + if(any(colSm == 0)){ + ############ + wng <- paste0( + "Filtering ", sum(colSm==0), " of ", ncol(mat), " used for LSI Projection since 0 reads were found in the features used!", + "\nPlease consider increasing the number of varFeatures to cleanly handle this issue or", + "\nUse argument `keep0lsi` to keep 0 sum cells by setting their colSums to 1 artificially!" + ) + .logDiffTime(wng, tstart, addHeader = verbose, verbose = verbose, logFile = logFile) + warnings(wng) + ############ exclude <- which(colSm==0) mat <- mat[,-exclude] colSm <- colSm[-exclude] @@ -1332,7 +1410,7 @@ addIterativeLSI <- function( gc() #Clean Up Matrix - idxNA <- Matrix::which(is.na(mat),arr.ind=TRUE) + idxNA <- Matrix::which(is.na(mat), arr.ind=TRUE) if(length(idxNA) > 0){ .logDiffTime(sprintf("Zeroing %s NA elements", length(idxNA)), tstart, addHeader = FALSE, verbose = verbose, logFile = logFile) mat[idxNA] <- 0 diff --git a/R/LoggerUtils.R b/R/LoggerUtils.R index bc498e08..72fc0b28 100644 --- a/R/LoggerUtils.R +++ b/R/LoggerUtils.R @@ -7,6 +7,12 @@ #' This function will set ArchR logging #' #' @param useLogs A boolean describing whether to use logging with ArchR. +#' +#' @examples +#' +#' # Add ArchR Logging +#' addArchRLogging() +#' #' @export addArchRLogging <- function(useLogs = TRUE){ .validInput(input = useLogs, name = "useLogs", valid = "boolean") @@ -18,6 +24,11 @@ addArchRLogging <- function(useLogs = TRUE){ #' Get ArchR Logging #' #' This function will get ArchR logging +#' +#' @examples +#' +#' # Get ArchR Logging +#' getArchRLogging() #' #' @export getArchRLogging <- function(){ @@ -34,6 +45,12 @@ getArchRLogging <- function(){ #' This function will set ArchR Debugging which will save an RDS if an error is encountered. #' #' @param debug A boolean describing whether to use logging with ArchR. +#' +#' @examples +#' +#' # Add ArchR Debugging +#' addArchRDebugging() +#' #' @export addArchRDebugging <- function(debug = FALSE){ .validInput(input = debug, name = "debug", valid = "boolean") @@ -45,6 +62,11 @@ addArchRDebugging <- function(debug = FALSE){ #' Get ArchR Debugging #' #' This function will get ArchR Debugging which will save an RDS if an error is encountered. +#' +#' @examples +#' +#' # Get ArchR Debugging +#' getArchRDebugging() #' #' @export getArchRDebugging <- function(){ @@ -61,6 +83,12 @@ getArchRDebugging <- function(){ #' This function will set ArchR logging verbosity. #' #' @param verbose A boolean describing whether to printMessages in addition to logging with ArchR. +#' +#' @examples +#' +#' # Add ArchR Verbose +#' addArchRVerbose() +#' #' @export addArchRVerbose <- function(verbose = TRUE){ .validInput(input = verbose, name = "verbose", valid = "boolean") @@ -72,6 +100,11 @@ addArchRVerbose <- function(verbose = TRUE){ #' Set ArchR Verbosity for Log Messaging #' #' This function will get ArchR logging verbosity. +#' +#' @examples +#' +#' # Get ArchR Verbose +#' addArchRVerbose() #' #' @export getArchRVerbose <- function(){ @@ -90,6 +123,12 @@ getArchRVerbose <- function(){ #' #' @param name A character string to add a more descriptive name in log file. #' @param logDir The path to a directory where log files should be written. +#' +#' @examples +#' +#' # Create Log File +#' createLogFile(name = "test") +#' #' @export createLogFile <- function( name = NULL, diff --git a/R/MarkerFeatures.R b/R/MarkerFeatures.R index b6fdbab9..e1a9e660 100644 --- a/R/MarkerFeatures.R +++ b/R/MarkerFeatures.R @@ -41,8 +41,24 @@ markerFeatures <- function(...){ #' `seqnames` that are not listed will be ignored. In the context of a `Sparse.Assays.Matrix`, such as a matrix containing chromVAR #' deviations, the `seqnames` do not correspond to chromosomes, rather they correspond to the sub-portions of the matrix, for example #' raw deviations ("deviations") or deviation z-scores ("z") for a chromVAR deviations matrix. +#' @param closest A boolean value that indicated whether to use closest cells from foreground and background instead of random sampling +#' of the foreground cells. #' @param verbose A boolean value that determines whether standard output is printed. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Markers +#' seMarker <- getMarkerFeatures( +#' ArchRProj = proj, +#' useMatrix = "PeakMatrix", +#' testMethod = "binomial", +#' binarize = TRUE +#' ) +#' #' @export getMarkerFeatures <- function( ArchRProj = NULL, @@ -60,6 +76,7 @@ getMarkerFeatures <- function( bufferRatio = 0.8, binarize = FALSE, useSeqnames = NULL, + closest = FALSE, verbose = TRUE, logFile = createLogFile("getMarkerFeatures") ){ @@ -79,6 +96,7 @@ getMarkerFeatures <- function( .validInput(input = bufferRatio, name = "bufferRatio", valid = c("numeric")) .validInput(input = binarize, name = "binarize", valid = c("boolean")) .validInput(input = useSeqnames, name = "useSeqnames", valid = c("character", "null")) + .validInput(input = closest, name = "closest", valid = c("boolean")) .validInput(input = verbose, name = "verbose", valid = c("boolean")) .validInput(input = logFile, name = "logFile", valid = c("character", "null")) @@ -110,6 +128,7 @@ getMarkerFeatures <- function( threads = 1, binarize = FALSE, useSeqnames = NULL, + closest = FALSE, testMethod = "wilcoxon", useMatrix = "GeneScoreMatrix", markerParams = list(), @@ -184,6 +203,7 @@ getMarkerFeatures <- function( bias = bias, k = k, n = maxCells, + closest = closest, bufferRatio = bufferRatio, logFile = logFile ) @@ -576,6 +596,7 @@ getMarkerFeatures <- function( n = 500, seed = 1, bufferRatio = 0.8, + closest = FALSE, logFile = NULL ){ @@ -670,71 +691,119 @@ getMarkerFeatures <- function( }else{ k2 <- k } - - knnx <- .computeKNN(inputNormQ[idB, ,drop=FALSE], inputNormQ[idF, ,drop=FALSE], k = k2) - sx <- sample(seq_len(nrow(knnx)), nrow(knnx)) - - minTotal <- min(n, length(sx) * bufferRatio) - nx <- sort(floor(minTotal * bgdProbx)) - ############### - # ID Matching - ############### - idX <- c() - idY <- c() - it <- 0 - - if(any(nx <= 0)){ - nx[which(nx <= 0)] <- Inf - nx <- sort(nx) - } - - while(it < length(sx) & length(idX) < minTotal){ + if (closest){ + + .logMessage("Using the closest cells identified by KKN between the foreground and background", verbose = TRUE, logFile = logFile) + sortedCells <- .computeClostestCellsList(inputNormQ[idB, ,drop=FALSE], inputNormQ[idF, ,drop=FALSE], k = k2) + idX <- c() + inspected_cells <- c() + idY <- c() + i <- 1 + df_counter <- 1 - it <- it + 1 - knnit <- knnx[sx[it],] - groupit <- match(groups[idB][knnit],names(nx)) - selectUnique <- FALSE - selectit <- 0 - oit <- order(groupit) + sx <- idF + minTotal <- min(n, length(sx) * bufferRatio) + nx <- sort(floor(minTotal * bgdProbx)) - while(!selectUnique){ - selectit <- selectit + 1 - itx <- which(oit==selectit) - cellx <- knnit[itx] - groupitx <- groupit[itx] - if(is.infinite(nx[groupitx])){ - if(selectit == k2){ - itx <- NA - cellx <- NA - selectUnique <- TRUE + if (length(bgdGroups)==1){ + + upper_border <- min(length(idB), length(idF)) + while (i <= upper_border & df_counter <= nrow(sortedCells)){ + inspected_cells <- append(inspected_cells, sortedCells$Cells[df_counter]) + if (sortedCells$Cells[df_counter] %ni% idX && sortedCells$Bgd[df_counter] %ni% idY){ + idX <- append(idX, sortedCells$Cells[df_counter]) + idY <- append(idY, sortedCells$Bgd[df_counter]) + i <- i + 1 } - }else{ - if(cellx %ni% idY){ - selectUnique <- TRUE - } - if(selectit == k2){ - itx <- NA - cellx <- NA - selectUnique <- TRUE + df_counter <- df_counter + 1 + } + + }else{ + + while (i <= floor(minTotal) & df_counter <= nrow(sortedCells)){ + inspected_cells <- append(inspected_cells, sortedCells$Cells[df_counter]) + if (sortedCells$Cells[df_counter] %ni% idX && sortedCells$Bgd[df_counter] %ni% idY){ + groupitx <- match(groups[idB][sortedCells$Bgd[df_counter]],names(nx)) + if (nx[groupitx] > 0){ + idX <- append(idX, sortedCells$Cells[df_counter]) + idY <- append(idY, sortedCells$Bgd[df_counter]) + i <- i + 1 + nx[groupitx] <- nx[groupitx]-1 + } } + df_counter <- df_counter + 1 } } + + it <- length(unique(inspected_cells)) - if(!is.na(itx)){ - idX <- c(idX, sx[it]) - idY <- c(idY, cellx) - nx[groupitx] <- nx[groupitx] - 1 - if(any(nx <= 0)){ - nx[which(nx <= 0)] <- Inf - nx <- sort(nx) - } + } else{ + + knnx <- .computeKNN(inputNormQ[idB, ,drop=FALSE], inputNormQ[idF, ,drop=FALSE], k = k2) + sx <- sample(seq_len(nrow(knnx)), nrow(knnx)) + + minTotal <- min(n, length(sx) * bufferRatio) + nx <- sort(floor(minTotal * bgdProbx)) + + ############### + # ID Matching + ############### + idX <- c() + idY <- c() + it <- 0 + + if(any(nx <= 0)){ + nx[which(nx <= 0)] <- Inf + nx <- sort(nx) } - - if(all(is.infinite(nx))){ - it <- length(sx) + + while(it < length(sx) & length(idX) < minTotal){ + + it <- it + 1 + knnit <- knnx[sx[it],] + groupit <- match(groups[idB][knnit],names(nx)) + selectUnique <- FALSE + selectit <- 0 + + while(!selectUnique){ + selectit <- selectit + 1 + itx <- selectit + cellx <- knnit[itx] + groupitx <- groupit[itx] + if(is.infinite(nx[groupitx])){ + if(selectit == k2){ + itx <- NA + cellx <- NA + selectUnique <- TRUE + } + }else{ + if(cellx %ni% idY){ + selectUnique <- TRUE + } + if(selectit == k2){ + itx <- NA + cellx <- NA + selectUnique <- TRUE + } + } + } + + if(!is.na(itx)){ + idX <- c(idX, sx[it]) + idY <- c(idY, cellx) + nx[groupitx] <- nx[groupitx] - 1 + if(any(nx <= 0)){ + nx[which(nx <= 0)] <- Inf + nx <- sort(nx) + } + } + + if(all(is.infinite(nx))){ + it <- length(sx) + } + } - } ##################### @@ -767,14 +836,18 @@ getMarkerFeatures <- function( summaryBgd = bgdBias, bgdGroups = rbind(estbgd, obsbgd), bgdGroupsProbs = rbind(estbgdP, obsbgdP), - corbgdGroups = suppressWarnings(cor(estbgdP, obsbgdP)), n = length(sx), p = it / length(sx), group = groupx, k = k2 ) - .logThis(out, paste0("MatchSummary ", useGroups[x]), logFile = logFile) + .logThis(out, paste0("MatchSummary : Pre", useGroups[x]), logFile = logFile) + + out$corbgdGroups <- suppressWarnings(cor(estbgdP, obsbgdP)) + + .logThis(out, paste0("MatchSummary : Post", useGroups[x]), logFile = logFile) + return(out) }) %>% SimpleList @@ -794,6 +867,32 @@ getMarkerFeatures <- function( } +#from @anastasiya-pendragon +.computeClostestCellsList <- function( + data = NULL, + query = NULL, + k = 50, + ... +){ + .validInput(input = data, name = "data", valid = c("dataframe", "matrix")) + .validInput(input = query, name = "query", valid = c("dataframe", "matrix")) + .validInput(input = k, name = "k", valid = c("integer")) + .requirePackage("nabor", source = "cran") + + nn1 <- nabor::knn(data = data, query = query, k = k, ...) + dists <- nn1$nn.dists + indxs <- nn1$nn.idx + data <- c() + elements_len <- dim(indxs)[2] + for (i in seq_len(dim(indxs)[1])){ + new_part <- cbind(rep(i, elements_len), indxs[i,], dists[i,]) + data <- rbind(data, new_part) + } + pairs_dist_df <- as.data.frame(data) + colnames(pairs_dist_df) <- c("Cells", "Bgd", "Dist") + pairs_dist_df <- pairs_dist_df[order(pairs_dist_df$Dist),,drop=FALSE] + pairs_dist_df +} #################################################################################################### # Applications of Markers! @@ -838,6 +937,26 @@ markerHeatmap <- function(...){ #' group compared to all other cell groups. Additionally, the color palette is inverted for visualization. This is useful when #' looking for down-regulated markers (`log2(fold change) < 0`) instead of up-regulated markers (`log2(fold change) > 0`). #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' #Get Test Project +#' proj <- getTestProject() +#' +#' #Get Markers +#' seMarker <- getMarkerFeatures( +#' ArchRProj = proj, +#' useMatrix = "PeakMatrix", +#' testMethod = "binomial", +#' binarize = TRUE +#' ) +#' +#' #Plot Markers +#' p <- plotMarkerHeatmap(seMarker) +#' +#' #PDF +#' plotPDF(p, name = "Marker-Heatmap", ArchRProj = proj) +#' #' @export plotMarkerHeatmap <- function( seMarker = NULL, @@ -1156,6 +1275,23 @@ plotMarkerHeatmap <- function( #' of the `assayNames` from `seMarker`. #' @param n An integer that indicates the maximum number of features to return per group. #' @param returnGR A boolean indicating whether to return as a `GRanges` object. Only valid when `seMarker` is computed for a PeakMatrix. +#' +#' @examples +#' +#' #Get Test Project +#' proj <- getTestProject() +#' +#' #Get Markers +#' seMarker <- getMarkerFeatures( +#' ArchRProj = proj, +#' useMatrix = "PeakMatrix", +#' testMethod = "binomial", +#' binarize = TRUE +#' ) +#' +#' #Get Markers +#' getMarkers(seMarker) +#' #' @export getMarkers <- function( seMarker = NULL, @@ -1255,6 +1391,26 @@ markerPlot <- function(...){ #' @param plotAs A string indicating whether to plot a volcano plot ("Volcano") or an MA plot ("MA"). #' @param rastr A boolean value that indicates whether the plot should be rasterized using `ggrastr`. This does not rasterize #' lines and labels, just the internal portions of the plot. +#' +#' @examples +#' +#' #Get Test Project +#' proj <- getTestProject() +#' +#' #Get Markers +#' seMarker <- getMarkerFeatures( +#' ArchRProj = proj, +#' useMatrix = "PeakMatrix", +#' testMethod = "binomial", +#' binarize = TRUE +#' ) +#' +#' #Plot Markers +#' p <- plotMarkers(seMarker, name = "C1") +#' +#' #PDF +#' plotPDF(p, name = "Marker-Plot", ArchRProj = proj) +#' #' @export plotMarkers <- function( seMarker = NULL, @@ -1372,5 +1528,3 @@ plotMarkers <- function( } } - - diff --git a/R/MatrixDeviations.R b/R/MatrixDeviations.R index 904c049f..732d422c 100644 --- a/R/MatrixDeviations.R +++ b/R/MatrixDeviations.R @@ -23,6 +23,22 @@ #' @param force A boolean value indicating whether to force the matrix indicated by `matrixName` to be overwritten if it #' already exists in the ArrowFiles associated with the given `ArchRProject`. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Background Peaks +#' proj <- addBgdPeaks(proj, force = TRUE) +#' +#' # Add Motif Deviations +#' proj <- addDeviationsMatrix( +#' ArchRProj = proj, +#' peakAnnotation = "Motif", +#' force = TRUE +#' ) +#' #' @export addDeviationsMatrix <- function( ArchRProj = NULL, @@ -107,7 +123,12 @@ addDeviationsMatrix <- function( rS$GC <- ArchRProj@peakSet$GC rownames(rS) <- paste0(rS$seqnames, "_", rS$start, "_", rS$end) - annotationsMatrix <- annotationsMatrix[rownames(rS), ] + #Check Anno Matrix + annotationsMatrix <- annotationsMatrix[rownames(rS), , drop = FALSE] + + #Check Bgd Peaks + rownames(bgdPeaks) <- paste0(seqnames(bgdPeaks), "_", start(bgdPeaks), "_", end(bgdPeaks)) + bgdPeaks <- bgdPeaks[rownames(rS), , drop=FALSE] #Create args list args <- mget(names(formals()),sys.frame(sys.nframe())) @@ -117,6 +138,7 @@ addDeviationsMatrix <- function( rm(peakAnnotation) args$annotationsMatrix <- annotationsMatrix + args$bgdPeaks <- bgdPeaks args$featureDF <- rS args$useMatrix <- useMatrix args$ArrowFiles <- ArrowFiles @@ -559,6 +581,15 @@ addDeviationsMatrix <- function( #' @param name The name of the `DeviationsMatrix` object stored in the `ArchRProject`. See `addDeviationsMatrix()`. #' @param plot A boolean value indicating whether the ranked variability should be plotted for each peakAnnotation in `DeviationsMatrix`. #' @param n The number of annotations to label with `ggrepel`. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Variable Motif Deviations +#' varDev <- getVarDeviations(proj) +#' #' @export getVarDeviations <- function(ArchRProj = NULL, name = "MotifMatrix", plot = TRUE, n = 25){ @@ -606,6 +637,15 @@ getVarDeviations <- function(ArchRProj = NULL, name = "MotifMatrix", plot = TRUE #' is to save this file in the `outputDirectory` of the `ArchRProject`. #' @param method A string indicating whether to use chromVAR or ArchR for background peak identification. #' @param force A boolean value indicating whether to force the file indicated by `outFile` to be overwritten if it already exists. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Background Peaks +#' proj <- addBgdPeaks(proj, force = TRUE) +#' #' @export addBgdPeaks <- function( ArchRProj = NULL, @@ -642,8 +682,28 @@ addBgdPeaks <- function( if(force){ message("Previous Background Peaks file does not exist! Identifying Background Peaks!") - bgdPeaks <- .computeBgdPeaks(ArchRProj=ArchRProj, nIterations=nIterations, w=w, binSize=binSize, seed = seed, outFile = outFile, method = method) - + + #Force + if(!is.null(metadata(getPeakSet(ArchRProj))$bgdPeaks)){ + if(file.exists(metadata(getPeakSet(ArchRProj))$bgdPeaks)){ + file.remove(metadata(getPeakSet(ArchRProj))$bgdPeaks) + } + } + if(file.exists(outFile)){ + file.remove(outFile) + } + metadata(getPeakSet(ArchRProj))$bgdPeaks <- NULL + + bgdPeaks <- .computeBgdPeaks( + ArchRProj = ArchRProj, + nIterations = nIterations, + w = w, + binSize = binSize, + seed = seed, + outFile = outFile, + method = method + ) + }else{ stop("Previous Background Peaks file does not exist! set force = TRUE to addBgdPeaks!") @@ -654,8 +714,26 @@ addBgdPeaks <- function( }else{ + #Force + if(!is.null(metadata(getPeakSet(ArchRProj))$bgdPeaks)){ + if(file.exists(metadata(getPeakSet(ArchRProj))$bgdPeaks)){ + file.remove(metadata(getPeakSet(ArchRProj))$bgdPeaks) + } + } + if(file.exists(outFile)){ + file.remove(outFile) + } + message("Identifying Background Peaks!") - bgdPeaks <- .computeBgdPeaks(ArchRProj=ArchRProj, nIterations=nIterations, w=w, binSize=binSize, seed = seed, outFile = outFile, method = method) + bgdPeaks <- .computeBgdPeaks( + ArchRProj = ArchRProj, + nIterations = nIterations, + w = w, + binSize = binSize, + seed = seed, + outFile = outFile, + method = method + ) } @@ -681,6 +759,15 @@ addBgdPeaks <- function( #' so that you can reproduce results downstream. #' @param method A string indicating whether to use chromVAR or ArchR for background peak identification. #' @param force A boolean value indicating whether to force the file indicated by `outFile` to be overwritten if it already exists. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Background Peaks +#' bgdPeaks <- getBgdPeaks(proj, force = TRUE) +#' #' @export getBgdPeaks <- function( ArchRProj = NULL, @@ -711,7 +798,15 @@ getBgdPeaks <- function( if(force){ message("Previous Background Peaks file does not exist! Identifying Background Peaks!") - bgdPeaks <- .computeBgdPeaks(ArchRProj=ArchRProj, nIterations=nIterations, w=w, binSize=binSize, seed = seed, outFile = NULL, method = method) + bgdPeaks <- .computeBgdPeaks( + ArchRProj=ArchRProj, + nIterations=nIterations, + w=w, + binSize=binSize, + seed = seed, + outFile = NULL, + method = method + ) }else{ @@ -724,7 +819,15 @@ getBgdPeaks <- function( }else{ message("Identifying Background Peaks!") - bgdPeaks <- .computeBgdPeaks(ArchRProj=ArchRProj, nIterations=nIterations, w=w, binSize=binSize, seed = seed, outFile = NULL, method = method) + bgdPeaks <- .computeBgdPeaks( + ArchRProj=ArchRProj, + nIterations=nIterations, + w=w, + binSize=binSize, + seed = seed, + outFile = NULL, + method = method + ) } @@ -732,6 +835,16 @@ getBgdPeaks <- function( stop("Number of rows in Background Peaks does not match peakSet!") } + #Check + rr1 <- paste0(getPeakSet(ArchRProj)) + rr2 <- paste0(rowRanges(bgdPeaks)) + if(!all(rr1 %in% rr2)){ + stop("Background Peaks Do Not Match Current ArchRPeakSet! Re-run `addBgdPeaks` with force = TRUE!") + } + rownames(bgdPeaks) <- rr2 + bgdPeaks <- bgdPeaks[rr1, , drop=FALSE] + rownames(bgdPeaks) <- NULL + bgdPeaks } @@ -758,26 +871,23 @@ getBgdPeaks <- function( ArrowFiles = ArrowFiles, seqnames = availableChr, useMatrix = useMatrix, - filter0 = FALSE + filter0 = FALSE, + addInfo = TRUE )) - all1 <- all( - paste0(rS$seqnames, ":", rS$idx) %in% - paste0(seqnames(ArchRProj@peakSet), ":", ArchRProj@peakSet$idx) - ) - - all2 <- all( - paste0(seqnames(ArchRProj@peakSet), ":", ArchRProj@peakSet$idx) %in% - paste0(rS$seqnames, ":", rS$idx) - ) - - if(!(all1 & all2)){ + #Check + rr1 <- paste0(rS$seqnames, ":", rS$start, "-", rS$end) + rr2 <- paste0(ArchRProj@peakSet) + if(!all(rr1 %in% rr2)){ stop("PeakSet in Arrows does not match PeakSet in ArchRProject! - To try to solve this, try re-running addPeakMatrix(ArchRProj, force=TRUE)") + To try to solve this, try re-running addPeakMatrix(ArchRProj, force=TRUE)") } - rS$start <- start(ArchRProj@peakSet) - rS$end <- end(ArchRProj@peakSet) + #ReOrder + rownames(rS) <- rr1 + rS <- rS[rr2, ,drop=FALSE] + + #Add GC rS$GC <- ArchRProj@peakSet$GC uniqueDist <- unique(rS$end - rS$start) diff --git a/R/MatrixFeatures.R b/R/MatrixFeatures.R index 15e50137..68e80e55 100644 --- a/R/MatrixFeatures.R +++ b/R/MatrixFeatures.R @@ -17,6 +17,15 @@ #' @param parallelParam A list of parameters to be passed for biocparallel/batchtools parallel computing. #' @param force A boolean value indicating whether to force the matrix indicated by `matrixName` to be overwritten if it already exists in the `input`. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Custom Matrix Which Is Just Peak Set +#' proj <- addFeatureMatrix(proj, features = getPeakSet(proj)) +#' #' @export addFeatureMatrix <- function( input = NULL, @@ -111,6 +120,15 @@ addFeatureMatrix <- function( #' @param parallelParam A list of parameters to be passed for biocparallel/batchtools parallel computing. #' @param force A boolean value indicating whether to force the "PeakMatrix" to be overwritten if it already exist in the given `ArchRProject`. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Peak Matrix +#' proj <- addPeakMatrix(proj) +#' #' @export addPeakMatrix <- function( ArchRProj = NULL, diff --git a/R/MatrixGeneExpression.R b/R/MatrixGeneExpression.R index b54f409f..39083e5a 100644 --- a/R/MatrixGeneExpression.R +++ b/R/MatrixGeneExpression.R @@ -49,7 +49,6 @@ addGeneExpressionMatrix <- function( .validInput(input = force, name = "force", valid = c("boolean")) .validInput(input = logFile, name = "logFile", valid = c("character")) - if(inherits(input, "ArchRProject")){ ArrowFiles <- getArrowFiles(input) allCells <- rownames(getCellColData(input)) @@ -74,6 +73,15 @@ addGeneExpressionMatrix <- function( .startLogging(logFile = logFile) .logThis(mget(names(formals()),sys.frame(sys.nframe())), "addGeneExpressionMatrix Input-Parameters", logFile = logFile) + seqRNA <- paste0(unique(seqnames(seRNA))) + if(sum(seqRNA %in% paste0(seqnames(chromSizes))) <= 0.5 * length(seqRNA)){ + if(force){ + stop(paste0("Detected less than 50% of seqnames for seRNA in chromSizes! Are you sure the seqnames of your seRNA are correct?!")) + }else{ + message(paste0("Detected less than 50% of seqnames for seRNA in chromSizes! Continuing since `force` = TRUE!")) + } + } + cellsInArrows <- unlist(lapply(ArrowFiles, .availableCells), use.names=FALSE) if(!is.null(allCells)){ cellsInArrows <- allCells diff --git a/R/MatrixGeneScores.R b/R/MatrixGeneScores.R index fd342a1a..2c197979 100644 --- a/R/MatrixGeneScores.R +++ b/R/MatrixGeneScores.R @@ -15,7 +15,7 @@ #' @param extendUpstream The minimum and maximum number of basepairs upstream of the transcription start site to consider for gene #' activity score calculation. #' @param extendDownstream The minimum and maximum number of basepairs downstream of the transcription start site or transcription termination site -#' (based on 'useTSS') to consider for gene activity score calculation. +#' (based on 'useTSS' and 'extendTSS') to consider for gene activity score calculation. #' @param useGeneBoundaries A boolean value indicating whether gene boundaries should be employed during gene activity score #' calculation. Gene boundaries refers to the process of preventing tiles from contributing to the gene score of a given gene #' if there is a second gene's transcription start site between the tile and the gene of interest. @@ -40,6 +40,15 @@ #' @param subThreading A boolean determining whether possible use threads within each multi-threaded subprocess if greater than the number of input samples. #' @param force A boolean value indicating whether to force the matrix indicated by `matrixName` to be overwritten if it already exist in the given `input`. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Gene Score Matrix With New Model +#' proj <- addGeneScoreMatrix(proj, matrixName = "GeneScoreMatrix2", geneModel = "exp(-abs(x)/10000) + exp(-1)") +#' #' @export addGeneScoreMatrix <- function( input = NULL, @@ -52,7 +61,7 @@ addGeneScoreMatrix <- function( geneDownstream = 0, #New Param useGeneBoundaries = TRUE, useTSS = FALSE, #New Param - extendTSS = FALSE, + extendTSS = TRUE, #Make TRUE so if you useTSS it will extend if that is desired... tileSize = 500, ceiling = 4, geneScaleFactor = 5, #New Param @@ -113,6 +122,11 @@ addGeneScoreMatrix <- function( #Valid GRanges genes <- .validGRanges(genes) + #We are going to remove seqlengths from the genes to ensure now errors + seql <- rep(NA, length(seqlengths(genes))) + names(seql) <- names(seqlengths(genes)) + seqlengths(genes) <- seql + #Add args to list args <- mget(names(formals()),sys.frame(sys.nframe()))#as.list(match.call()) args$ArrowFiles <- ArrowFiles @@ -122,10 +136,18 @@ addGeneScoreMatrix <- function( args$registryDir <- file.path(outDir, "GeneScoresRegistry") args$logFile <- logFile - if(subThreading){ - h5disableFileLocking() - }else{ + #H5 File Lock Check + h5lock <- setArchRLocking() + if(h5lock){ + if(subThreading){ + message("subThreadhing Disabled since ArchRLocking is TRUE see `addArchRLocking`") + subThreading <- FALSE + } args$threads <- min(length(ArrowFiles), threads) + }else{ + if(subThreading){ + message("subThreadhing Enabled since ArchRLocking is FALSE see `addArchRLocking`") + } } #Remove Input from args @@ -355,6 +377,9 @@ addGeneScoreMatrix <- function( ) e <- pmax(pmaxGene + pForwardMin, e) + #Check + s <- pmax(1, s) #Must Be higher than 0! + e <- pmin(e, 2147483647) #Maximum allowable Integer! extendedGeneRegion <- IRanges(start = s, end = e) idx1 <- which(pminGene - pReverseMin < start(extendedGeneRegion)) diff --git a/R/MatrixTiles.R b/R/MatrixTiles.R index 4b420c50..bf47f0eb 100644 --- a/R/MatrixTiles.R +++ b/R/MatrixTiles.R @@ -18,6 +18,15 @@ #' @param parallelParam A list of parameters to be passed for biocparallel/batchtools parallel computing. #' @param force A boolean value indicating whether to force the "TileMatrix' to be overwritten if it already exist in the given `input`. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Tile Matrix +#' proj <- addTileMatrix(proj, force = TRUE, tileSize = 25000) +#' #' @export addTileMatrix <- function( input = NULL, @@ -172,7 +181,7 @@ addTileMatrix <- function( cellNames = cellNames, params = dfParams, featureDF = featureDF, - force = force + force = TRUE ) ###################################### diff --git a/R/ModuleScore.R b/R/ModuleScore.R index 725276ab..6cb19f64 100644 --- a/R/ModuleScore.R +++ b/R/ModuleScore.R @@ -17,6 +17,37 @@ #' to keep track of the seed used so that you can reproduce results downstream. #' @param threads The number of threads to be used for parallel computing. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Module Score +#' proj <- addModuleScore(proj, useMatrix = "GeneScoreMatrix", nBin = 25, nBgd = 25, features = list(TScore = c('CD3D', 'CD3E'))) +#' +#' #Check +#' split(proj@cellColData$Module.TScore, proj@cellColData$CellType) %>% lapply(mean) %>% unlist +#' # B M T +#' # -4.352769 -8.438259 9.942678 +#' +#' #Get T cell Features +#' features <- getGenes() +#' T <- features[features$symbol %in% c("CD3D", "CD3E")] +#' B <- features[features$symbol %in% c("MS4A1")] +#' +#' # Add Module Score +#' proj <- addModuleScore(proj, useMatrix = "TileMatrix", nBin = 25, nBgd = 25, features = list(TScore = T, BScore = B)) +#' +#' #Check +#' split(proj@cellColData$Module.TScore, proj@cellColData$CellType) %>% lapply(mean) %>% unlist +#' # B M T +#' # -0.03866667 -0.05303030 0.10306122 +#' +#' split(proj@cellColData$Module.BScore, proj@cellColData$CellType) %>% lapply(mean) %>% unlist +#' # B M T +#' # 0.10000000 -0.03939394 -0.05387755 +#' #' @export addModuleScore <- function( ArchRProj = NULL, @@ -48,41 +79,119 @@ addModuleScore <- function( #Get Feature DF featureDF <- ArchR:::.getFeatureDF(head(getArrowFiles(ArchRProj),2), subGroup=useMatrix) - rownames(featureDF) <- paste0(featureDF$seqnames, ":", featureDF$idx) - featureDF$Match <- seq_len(nrow(featureDF)) + featureDF$Match <- seq_len(nrow(featureDF)) + + if("name" %in% colnames(featureDF)){ + + type <- "name" + featureData <- featureDF + featureData$Match <- seq_len(nrow(featureDF)) + + }else{ + + if(all(c("start", "end") %in% colnames(featureDF))){ + type <- "GRanges" + featureData <- GRanges( + seqnames = featureDF$seqnames, + ranges = IRanges( + start = featureDF$start, + end = featureDF$end + ) + ) + mcols(featureData)$idx <- featureDF$idx + mcols(featureData)$Match <- seq_len(nrow(featureDF)) + mcols(featureData)$name <- paste0(featureDF$seqnames, ":", featureDF$idx) + }else if(c("start") %in% colnames(featureDF)){ + type <- "GRanges" + featureData <- GRanges( + seqnames = featureDF$seqnames, + ranges = IRanges( + start = featureDF$start, + width = diff(featureDF$start)[1] + ) + ) + mcols(featureData)$idx <- featureDF$idx + mcols(featureData)$Match <- seq_len(nrow(featureDF)) + mcols(featureData)$name <- paste0(featureDF$seqnames, ":", featureDF$idx) + }else{ + + stop("Error Unrecognized Feature Type!") + + } + + } matrixClass <- h5read(getArrowFiles(ArchRProj)[1], paste0(useMatrix, "/Info/Class")) - if(matrixClass == "Sparse.Assays.Matrix"){ - if(!all(unlist(lapply(unlist(features), function(x) grepl(":",x))))){ - .logMessage("When accessing features from a matrix of class Sparse.Assays.Matrix it requires seqnames\n(denoted by seqnames:name) specifying to which assay to pull the feature from.\nIf confused, try getFeatures(ArchRProj, useMatrix) to list out available formats for input!", logFile = logFile) - stop("When accessing features from a matrix of class Sparse.Assays.Matrix it requires seqnames\n(denoted by seqnames:name) specifying to which assay to pull the feature from.\nIf confused, try getFeatures(ArchRProj, useMatrix) to list out available formats for input!") + if(type == "name"){ + if(matrixClass == "Sparse.Assays.Matrix"){ + if(!all(unlist(lapply(unlist(features), function(x) grepl(":",x))))){ + .logMessage("When accessing features from a matrix of class Sparse.Assays.Matrix it requires seqnames\n(denoted by seqnames:name) specifying to which assay to pull the feature from.\nIf confused, try getFeatures(ArchRProj, useMatrix) to list out available formats for input!", logFile = logFile) + stop("When accessing features from a matrix of class Sparse.Assays.Matrix it requires seqnames\n(denoted by seqnames:name) specifying to which assay to pull the feature from.\nIf confused, try getFeatures(ArchRProj, useMatrix) to list out available formats for input!") + } } } - #Figure out the index numbers of the selected features within the given matrix - if(grepl(":",unlist(features)[1])){ + if(type == "name"){ + + if(is(features[[1]], "GRanges")){ + stop("Feature Input is Not A character of names!") + } - sname <- stringr::str_split(unlist(features),pattern=":",simplify=TRUE)[,1] - name <- stringr::str_split(unlist(features),pattern=":",simplify=TRUE)[,2] + #Figure out the index numbers of the selected features within the given matrix + if(grepl(":",unlist(features)[1])){ - idx <- lapply(seq_along(name), function(x){ - ix <- intersect(which(tolower(name[x]) == tolower(featureDF$name)), BiocGenerics::which(tolower(sname[x]) == tolower(featureDF$seqnames))) - if(length(ix)==0){ - .logStop(sprintf("FeatureName (%s) does not exist! See available features using getFeatures()", name[x]), logFile = logFile) - } - ix - }) %>% unlist + sname <- stringr::str_split(unlist(features),pattern=":",simplify=TRUE)[,1] + name <- stringr::str_split(unlist(features),pattern=":",simplify=TRUE)[,2] + + idx <- lapply(seq_along(name), function(x){ + ix <- intersect( + which(tolower(name[x]) == tolower(featureDF$name)), + BiocGenerics::which(tolower(sname[x]) == tolower(featureDF$seqnames)) + ) + if(length(ix)==0){ + .logStop(sprintf("FeatureName (%s) does not exist! See available features using getFeatures()", name[x]), logFile = logFile) + } + ix + }) + + }else{ + + idx <- lapply(seq_along(unlist(features)), function(x){ + + ix <- which(tolower(unlist(features)[x]) == tolower(featureDF$name))[1] + + if(length(ix) == 0){ + .logStop(sprintf("FeatureName (%s) no regions found overlapping! See available features using getFeatures()", unlist(features)[x]), logFile = logFile) + } + + ix + + }) + + } }else{ + if(!is(features[[1]], "GRanges")){ + stop("Feature Input is Not A GRanges object!") + } + idx <- lapply(seq_along(unlist(features)), function(x){ - ix <- which(tolower(unlist(features)[x]) == tolower(featureDF$name))[1] - if(is.na(ix)){ + + #Check + o <- tryCatch({GenomeInfoDb::seqlevelsStyle(features[[x]]) <- "UCSC"}, warning = function(w) 0, error = function(e) 0) + + #Overlaps + ix <- which(overlapsAny(featureData, features[[x]], ignore.strand=TRUE)) + + if(length(ix)==0){ .logStop(sprintf("FeatureName (%s) does not exist! See available features using getFeatures()", unlist(features)[x]), logFile = logFile) } + ix - }) %>% unlist + + }) } @@ -92,16 +201,26 @@ addModuleScore <- function( names(features) <- paste0(name, ".", names(features)) } - featuresUse <- featureDF[idx,] - featuresUse$Module <- Rle(stack(features)[,2]) + featuresUse <- featureDF[unlist(idx),] + featuresUse$Module <- Rle(unlist(lapply(seq_along(features), function(z) rep(names(features)[z], length(idx[[z]]))))) #Get average values for all features and then order the features based on their average values #so that the features can be binned into nBins rS <- ArchR:::.getRowSums(ArrowFiles = getArrowFiles(ArchRProj), useMatrix = useMatrix) rS <- rS[order(rS[,3]), ] - rS$Bins <- Rle(ggplot2::cut_number(x = rS[,3] + rnorm(length(rS[,3]))/1e30, n = nBin, labels = FALSE, right = FALSE)) - rS$Match <- match(paste0(rS$seqnames, ":", rS$idx), rownames(featureDF)) + if(is(featureData, "GRanges")){ + rS$Match <- match(paste0(rS$seqnames, ":", rS$idx), paste0(seqnames(featureData), ":", featureData$idx)) + }else{ + rS$Match <- match(paste0(rS$seqnames, ":", rS$idx), paste0(featureData$seqnames, ":", featureData$idx)) + } + #Determine Bins + rS$Bins <- 0 + idx <- which(rS$rowSums > 0) + rS$Bins[idx] <- ceiling(seq_along(idx) / ceiling(length(idx)/nBin)) + rS$Bins <- Rle(rS$Bins + 1) + #rS$Bins <- Rle(ggplot2::cut_number(x = rS[,3] + rnorm(length(rS[,3]))/1e30, n = nBin, labels = FALSE, right = FALSE)) + #check that the number of selected background features isnt bigger than the size of each bin if(nBgd > min(rS$Bins@lengths)){ stop("nBgd must be lower than ", min(rS$Bins@lengths), "!") diff --git a/R/MultiModal.R b/R/MultiModal.R index 6733d78c..b474e205 100644 --- a/R/MultiModal.R +++ b/R/MultiModal.R @@ -16,13 +16,16 @@ #' @param verbose Only relevant when multiple input files are used. A boolean that indicates whether messaging about mismatches should be verbose (`TRUE`) or minimal (`FALSE`) #' @param featureType The name of the feature to extract from the 10x feature file. #' See https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/advanced/h5_matrices for more information. +#' @param features A genomic ranges object containing a "name" column to help fill missing 10x intervals for RSE. +#' For example, in hg38 features provided could be using Bioconductors `genes(EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86)`. #' @export import10xFeatureMatrix <- function( input = NULL, names = NULL, strictMatch = TRUE, verbose = TRUE, - featureType = "Gene Expression" + featureType = "Gene Expression", + features = NULL ){ .validInput(input = input, name = "input", valid = c("character")) @@ -30,42 +33,51 @@ import10xFeatureMatrix <- function( .validInput(input = strictMatch, name = "strictMatch", valid = c("boolean")) .validInput(input = verbose, name = "verbose", valid = c("boolean")) .validInput(input = featureType, name = "featureType", valid = c("character")) + .validInput(input = features, name = "features", valid = c("GRanges", "NULL")) if (!all(file.exists(input))) { stop("Not all input file paths exist!") } - featureMats <- lapply(seq_along(input), function(y) { - message("Importing Feature Matrix ", y, " of ", length(input)) - .importFM(featureMatrix = input[y], featureType = featureType, - name = names[y]) - }) - - message("Re-ordering RNA matricies for consistency.") - for(j in 1:length(featureMats)) { - featureMats[[j]] <- sort.GenomicRanges(sortSeqlevels(featureMats[[j]]), ignore.strand = TRUE) - } + + message("Importing Feature Matrix ", 1, " of ", length(input)) + rse_final <- .import10xToSE( + h5 = input[1], + type10x = featureType, + name = names[1], + ranges = features + ) #if more than one filtered feature barcode matrix is supplied, then merge the RSE objects - if (length(featureMats) > 1) { + if (length(input) > 1) { message("Merging individual RNA objects...") - #make the first matrix the base matrix and merge all others into it - rse_final <- featureMats[[1]] + + #merge all others into 1st rowsToRemove <- c() #rows that have previously been removed from rse_final #for each additional feature matrix (starting with the second), look for mismatches with rse_final and merge accordingly - for (i in 2:length(featureMats)) { - mismatchWarning <- TRUE #a boolean to prevent output of the warning message many times and only output it once - + for (i in seq(2, length(input))){ + message(sprintf("\nMerging %s", names[i])) - - if (!identical(rownames(rse_final), rownames(featureMats[[i]]))) { + message("Importing Feature Matrix ", i, " of ", length(input)) + + #Read RSE + res_i <- .import10xToSE( + h5 = input[i], + type10x = featureType, + name = names[i], + ranges = features + ) + + mismatchWarning <- TRUE #a boolean to prevent output of the warning message many times and only output it once + + if (!identical(rownames(rse_final), rownames(res_i))) { stop("Error - rownames (genes) of individual RNA objects are not equivalent.") } - if (!identical(colnames(rowData(rse_final)), colnames(rowData(featureMats[[i]])))) { + if (!identical(colnames(rowData(rse_final)), colnames(rowData(res_i)))) { stop("Error - rowData (gene metadata) of individual RNA objects have different columns. This is highly unusual and merging has been aborted.") } - if (!identical(names(assays(rse_final)), names(assays(featureMats[[i]])))) { + if (!identical(names(assays(rse_final)), names(assays(res_i)))) { stop("Error - available assays of individual RNA objects are not equivalent. Each object is expected to only have one assay named 'counts'.") } @@ -73,8 +85,8 @@ import10xFeatureMatrix <- function( #occasionally, it seems like 10x is annotating different ensembl IDs to the same gene which seems like a bad way to go #this is a bit heavy-handed but it seems like the safest thing to do is report any mismatch rather than merge blindly - for (x in 1:ncol(rowData(rse_final))) { - if (!identical(rowData(rse_final)[,x], rowData(featureMats[[i]])[,x])) { + for (x in seq_len(ncol(rowData(rse_final)))){ + if (!identical(rowData(rse_final)[,x], rowData(res_i)[,x])) { if(mismatchWarning) { message(sprintf("Warning! Some values within column \"%s\" of the rowData (gene metadata) of your objects do not precisely match!", colnames(rowData(rse_final))[x])) message("This is often caused by slight variations in Ensembl IDs and gene locations used by cellranger across different samples. ArchR will ignore these mismatches and allow merging to proceed but you should check to make sure that these are ok for your data.\n") @@ -82,7 +94,7 @@ import10xFeatureMatrix <- function( } #detect all of the mismatches betwenn rse_final and the current featureMat - mismatch <- which(rowData(rse_final)[,x] != rowData(featureMats[[i]])[,x]) + mismatch <- which(rowData(rse_final)[,x] != rowData(res_i)[,x]) #for each detected mismatch, handle the mismatch according to the value of strictMatch for (y in 1:length(mismatch)) { if (verbose) { @@ -94,107 +106,176 @@ import10xFeatureMatrix <- function( } rowsToRemove <- unique(c(rowsToRemove, mismatch[y])) #temporarily force the data to match so that merging can occur easily. Mismatched rows will be removed later - rowData(featureMats[[i]])[mismatch[y],] <- rowData(rse_final)[mismatch[y],] - rowRanges(featureMats[[i]])[mismatch[y]] <- rowRanges(rse_final)[mismatch[y]] + rowData(res_i)[mismatch[y],] <- rowData(rse_final)[mismatch[y],] + rowRanges(res_i)[mismatch[y]] <- rowRanges(rse_final)[mismatch[y]] } else { if (verbose) { message("strictMatch = FALSE so mismatching information will be coerced to match the first sample provided.") } - rowData(featureMats[[i]])[mismatch[y],] <- rowData(rse_final)[mismatch[y],] - rowRanges(featureMats[[i]])[mismatch[y]] <- rowRanges(rse_final)[mismatch[y]] + rowData(res_i)[mismatch[y],] <- rowData(rse_final)[mismatch[y],] + rowRanges(res_i)[mismatch[y]] <- rowRanges(rse_final)[mismatch[y]] } } } } - rse_final <- SummarizedExperiment::cbind(rse_final, featureMats[[i]]) + rse_final <- SummarizedExperiment::cbind(rse_final, res_i) + gc() } - if (strictMatch) { - if(length(rowsToRemove) > 0) { - rse_final <- rse_final[-rowsToRemove,] - } - } - return(rse_final) - } - else { - return(featureMats[[1]]) - } -} + } -.importFM <- function(featureMatrix = NULL, featureType = NULL, name = NULL){ - - o <- h5closeAll() - barcodes <- h5read(featureMatrix, "/matrix/barcodes") - data <- h5read(featureMatrix, "/matrix/data") - indices <- h5read(featureMatrix, "/matrix/indices") - indptr <- h5read(featureMatrix, "/matrix/indptr") - shape <- h5read(featureMatrix, "/matrix/shape") - - spMat <- sparseMatrix( - i = indices, - p = indptr, - x = data, - dims = shape, - index1 = FALSE - ) + if (strictMatch) { + if(length(rowsToRemove) > 0) { + rse_final <- rse_final[-rowsToRemove,] + } + } - colnames(spMat) <- paste0(name, "#", barcodes) + rse_final + +} - features <- h5read(featureMatrix, "/matrix/features") - features <- lapply(seq_along(features), function(x){ - if(length(features[[x]]) == nrow(spMat)){ - if(object.size(features[[x]]) > object.size(Rle(features[[x]]))){ - df <- DataFrame(x = Rle(features[[x]])) +.import10xToSE <- function( + h5 = NULL, + type10x = NULL, + name = NULL, + ranges = NULL + ){ + + #Shape + shape <- h5read(h5, "/matrix/shape") + + #Read features10x + features10x <- h5read(h5, "/matrix/features") + features10x <- lapply(seq_along(features10x), function(x){ + if(length(features10x[[x]]) == shape[1]){ + if(object.size(features10x[[x]]) > object.size(Rle(features10x[[x]]))){ + df <- DataFrame(x = Rle(as.vector(features10x[[x]]))) }else{ - df <- DataFrame(x = features[[x]]) + df <- DataFrame(x = as.vector(features10x[[x]])) } - colnames(df) <- names(features)[x] + colnames(df) <- names(as.vector(features10x))[x] df }else{ NULL } }) - features <- Reduce("cbind",features[!unlist(lapply(features,is.null))]) + features10x <- Reduce("cbind",features10x[!unlist(lapply(features10x,is.null))]) - se <- SummarizedExperiment(assays = SimpleList(counts = spMat), rowData = features) + #Determine Idx + if(!is.null(type10x)){ + idx <- which(paste0(features10x$feature_type) %in% type10x) + }else{ + idx <- seq_len(nrow(features10x)) + } + if(length(idx)==0){ + stop( + paste0( + h5, + "\nMissing `type10x`! Feature Types in h5:\n", + "\t", paste0(unique(features10x$feature_type),collapse="; ") + ) + ) + } - rownames(se) <- features$name + #Subset + features10x <- features10x[idx, , drop=FALSE] - if("feature_type" %in% colnames(rowData(se))){ - if(!is.null(featureType)){ - idx <- BiocGenerics::which(rowData(se)$feature_type %bcin% featureType) - if(length(idx) == 0){ - stop("Error featureType not within provided features!") - } - se <- se[idx] - } - } + #Interval + if("interval" %in% colnames(features10x)){ + + idxNA <- which(features10x$interval=="NA") - if("interval" %in% colnames(rowData(se))){ - idxNA <- which(rowData(se)$interval=="NA") if(length(idxNA) > 0){ - se <- se[-idxNA, ] + + #Fix ranges + idx1 <- paste0(seqnames(ranges)) %in% c(1:22, "X", "Y", "MT") + if(length(idx1) > 0){ + ranges2 <- GRanges( + seqnames = ifelse(idx1, paste0("chr",seqnames(ranges)), paste0(seqnames(ranges))), + ranges = GenomicRanges::ranges(ranges) + ) + mcols(ranges2) <- mcols(ranges) + } + + #Try To Use Ranges To Match + features10xNA <- features10x[which(features10x$interval=="NA"),,drop=FALSE] + namesNA <- features10xNA$name + idxFix <- match(namesNA, mcols(ranges2)[, grep("name", colnames(mcols(ranges)), ignore.case=TRUE)]) + if(length(idxFix[!is.na(idxFix)]) > 0){ + message("Correcting missing intervals...") + idx2 <- which(!is.na(idxFix)) + rangesFix <- ranges2[idxFix[idx2]] + strand(rangesFix) <- "*" + features10xNA$interval[idx2] <- paste0(rangesFix) + features10x[which(features10x$interval=="NA"), ] <- features10xNA + } + + #NA add Fake Chromosome + features10xNA <- features10x[which(features10x$interval=="NA"),,drop=FALSE] + if(nrow(features10xNA) > 0){ + features10xNA$interval <- paste0("chrUNK:1-1") + features10x[which(features10x$interval=="NA"), ] <- features10xNA + } + } - rr <- GRanges(paste0(rowData(se)$interval)) - mcols(rr) <- rowData(se) - se <- SummarizedExperiment(assays = SimpleList(counts = assay(se)), rowRanges = rr) + + features10x$ranges <- GRanges(paste0(features10x$interval)) + features10x$interval <- NULL + } - idxDup <- which(rownames(se) %in% rownames(se[duplicated(rownames(se))])) - names(idxDup) <- rownames(se)[idxDup] - if(length(idxDup) > 0){ - dupOrder <- idxDup[order(Matrix::rowSums(assay(se[idxDup])),decreasing=TRUE)] - dupOrder <- dupOrder[!duplicated(names(dupOrder))] - se <- se[-as.vector(idxDup[idxDup %ni% dupOrder])] + #Read Matrix + mat <- sparseMatrix( + i = h5read(h5, "/matrix/indices"), + p = h5read(h5, "/matrix/indptr"), + x = h5read(h5, "/matrix/data"), + dims = shape, + index1 = FALSE + ) + barcodes <- h5read(h5, "/matrix/barcodes") + if(!is.null(name)){ + colnames(mat) <- paste0(name, "#", barcodes) + }else{ + colnames(mat) <- barcodes } + #Subset + mat <- mat[idx, , drop = FALSE] gc() - se + #Summarized Experiment + if("ranges" %in% colnames(features10x)){ + mat <- SummarizedExperiment( + assays = SimpleList( + data = mat + ), + rowRanges = features10x$ranges + ) + rowData(mat) <- features10x + rowData(mat)$ranges <- NULL + }else{ + mat <- SummarizedExperiment( + assays = SimpleList( + data = mat + ), + rowData = features10x + ) + } + + rownames(mat) <- rowData(mat)$name + .sortRSE(mat) } +.sortRSE <- function(rse){ + if(!is.null(rowRanges(rse))){ + sort.GenomicRanges(sortSeqlevels(rse), ignore.strand = TRUE) + }else{ + rse[order(rowData(rse)$name)] + } +} + #################################################################### # Combined Modalities #################################################################### diff --git a/R/ProjectMethods.R b/R/ProjectMethods.R index 0ac8258c..9d641ec6 100644 --- a/R/ProjectMethods.R +++ b/R/ProjectMethods.R @@ -7,6 +7,15 @@ #' This function gets the outputDirectory from a given ArchRProject. If null this returns "QualityControl" directory. #' #' @param ArchRProj An `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Output Directory +#' getOutputDirectory(proj) +#' #' @export getOutputDirectory <- function( ArchRProj = NULL @@ -33,6 +42,15 @@ getOutputDirectory <- function( #' This function gets the names of all ArrowFiles associated with a given ArchRProject. #' #' @param ArchRProj An `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Arrow Files +#' getArrowFiles(proj) +#' #' @export getArrowFiles <- function( ArchRProj = NULL @@ -63,6 +81,15 @@ getArrowFiles <- function( #' This function gets the names of all samples from a given ArchRProject. #' #' @param ArchRProj An `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Sample Names +#' getSampleNames(proj) +#' #' @export getSampleNames <- function( ArchRProj = NULL @@ -81,6 +108,15 @@ getSampleNames <- function( #' This function gets number of cells from an ArchRProject or ArrowFile #' #' @param input An `ArchRProject` object or the path to an ArrowFile. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Number of Cells +#' nCells(proj) +#' #' @export nCells <- function( input = NULL @@ -111,6 +147,15 @@ nCells <- function( #' @param select A character vector containing the column names to select from `cellColData`. #' @param summary A character vector describing which method for summarizing across group. Options include "median", "mean", or "sum". #' @param removeNA Remove NA's from summary method. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Group Summary +#' getGroupSummary(proj, groupBy = "Clusters", select = "TSSEnrichment", summary = "mean") +#' #' @export getGroupSummary <- function( ArchRProj = NULL, @@ -163,6 +208,15 @@ getGroupSummary <- function( #' @param ArchRProj An `ArchRProject` object. #' @param select A character vector containing the column names to select from `sampleColData`. #' @param drop A boolean value that indicates whether to drop the `dataframe` structure and convert to a vector if selecting only one column. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Sample Column Data +#' getSampleColData(proj) +#' #' @export getSampleColData <- function( ArchRProj = NULL, @@ -200,6 +254,15 @@ getSampleColData <- function( #' use this argument to only add data to a subset of samples. Samples where `data` is not added are set to `NA`. #' @param force A boolean value that indicates whether or not to overwrite data in a given column when the value passed to `name` #' already exists as a column name in `sampleColData`. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Sample Column Data +#' addSampleColData(proj, data = 1, name = "Test", samples = "PBSmall") +#' #' @export addSampleColData <- function(ArchRProj = NULL, data = NULL, name = NULL, samples = NULL, force = FALSE){ @@ -246,6 +309,15 @@ addSampleColData <- function(ArchRProj = NULL, data = NULL, name = NULL, samples #' This function gets the cellNames from a given ArchRProject object. #' #' @param ArchRProj An `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Cell Names +#' getCellNames(proj) +#' #' @export getCellNames <- function(ArchRProj = NULL){ .validInput(input = ArchRProj, name = "ArchRProj", valid = "ArchRProject") @@ -260,6 +332,15 @@ getCellNames <- function(ArchRProj = NULL){ #' @param ArchRProj An `ArchRProject` object. #' @param select A character vector of column names to select from `cellColData` if you would like to subset the returned data. #' @param drop A boolean value that indicates whether to drop the `dataframe` structure and convert to a vector if selecting only one column. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Cell Column Data +#' getCellColData(proj) +#' #' @export getCellColData <- function(ArchRProj = NULL, select = NULL, drop = FALSE){ @@ -299,6 +380,15 @@ getCellColData <- function(ArchRProj = NULL, select = NULL, drop = FALSE){ #' argument to only add data to a subset of cells. Cells where `data` is not added are set to `NA`. #' @param force A boolean value indicating whether or not to overwrite data in a given column when the value passed to `name` #' already exists as a column name in `cellColData`. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Cell Column Data +#' addCellColData(proj, data = proj$TSSEnrichment, name = "TSS2", cells = getCellNames(proj)) +#' #' @export addCellColData <- function(ArchRProj = NULL, data = NULL, name = NULL, cells = NULL, force = FALSE){ @@ -351,6 +441,15 @@ addCellColData <- function(ArchRProj = NULL, data = NULL, name = NULL, cells = #' This function gets the peak set as a GRanges object from an ArchRProject. #' #' @param ArchRProj An `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get PeakSet +#' getPeakSet(proj) +#' #' @export getPeakSet <- function(ArchRProj = NULL){ .validInput(input = ArchRProj, name = "ArchRProj", valid = "ArchRProject") @@ -367,6 +466,15 @@ getPeakSet <- function(ArchRProj = NULL){ #' information (GC content) or chromosome sizes. #' @param force If a `peakSet` object has already been added to the given `ArchRProject`, the value of `force` determines #' whether or not to overwrite this `peakSet`. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add PeakSet +#' addPeakSet(proj, peakSet = getPeakSet(proj), force = TRUE) +#' #' @export addPeakSet <- function( ArchRProj = NULL, @@ -391,7 +499,11 @@ addPeakSet <- function( #Get NucleoTide Content peakSet <- tryCatch({ .requirePackage("Biostrings",source="bioc") - BSgenome <- eval(parse(text = genomeAnnotation$genome)) + BSgenome <- tryCatch({ + eval(parse(text = paste0(genomeAnnotation$genome))) + }, error = function(e){ + eval(parse(text = paste0(genomeAnnotation$genome,"::",genomeAnnotation$genome))) + }) BSgenome <- validBSgenome(BSgenome) nucFreq <- BSgenome::alphabetFrequency(getSeq(BSgenome, peakSet)) mcols(peakSet)$GC <- round(rowSums(nucFreq[,c("G","C")]) / rowSums(nucFreq),4) @@ -423,6 +535,18 @@ addPeakSet <- function( #' This function gets the genomeAnnotation from a given ArchRProject. #' #' @param ArchRProj An `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Genome Annotation in ArchRProj +#' getGenomeAnnotation(proj) +#' +#' # Get Genome Annotation loaded globally +#' getGenomeAnnotation() +#' #' @export getGenomeAnnotation <- function(ArchRProj = NULL){ .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProject","null")) @@ -442,6 +566,18 @@ getGenomeAnnotation <- function(ArchRProj = NULL){ #' This function gets the blacklist (the regions to be excluded from analysis) as a GRanges object from the genomeAnnotation of a given ArchRProject. #' #' @param ArchRProj An `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Blacklist in ArchRProj +#' getBlacklist(proj) +#' +#' # Get Blacklist loaded globally +#' getBlacklist() +#' #' @export getBlacklist <- function(ArchRProj = NULL){ if(is.character(ArchRProj)){ @@ -464,6 +600,18 @@ getBlacklist <- function(ArchRProj = NULL){ #' This function gets the name of the genome from the genomeAnnotation used by a given ArchRProject. #' #' @param ArchRProj An `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Genome in ArchRProj +#' getGenome(proj) +#' +#' # Get Genome loaded globally +#' getGenome() +#' #' @export getGenome <- function(ArchRProj = NULL){ .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProject","null")) @@ -483,6 +631,18 @@ getGenome <- function(ArchRProj = NULL){ #' This function gets the chromosome lengths as a GRanges object from the genomeAnnotation of a given ArchRProject. #' #' @param ArchRProj An `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get chromSizes in ArchRProj +#' getChromSizes(proj) +#' +#' # Get chromSizes loaded globally +#' getChromSizes() +#' #' @export getChromSizes <- function(ArchRProj = NULL){ .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProject","null")) @@ -502,6 +662,18 @@ getChromSizes <- function(ArchRProj = NULL){ #' This function gets the chromosome lengths as a vector from the genomeAnnotation of a given ArchRProject. #' #' @param ArchRProj An `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get chromLengths in ArchRProj +#' getChromLengths(proj) +#' +#' # Get chromLengths loaded globally +#' getChromLengths() +#' #' @export getChromLengths <- function(ArchRProj = NULL){ .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProject","null")) @@ -538,6 +710,18 @@ getChromLengths <- function(ArchRProj = NULL){ #' This function gets the geneAnnotation from a given ArchRProject #' #' @param ArchRProj An `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Gene Annotation in ArchRProj +#' getGeneAnnotation(proj) +#' +#' # Get Gene Annotation loaded globally +#' getGeneAnnotation() +#' #' @export getGeneAnnotation <- function(ArchRProj = NULL){ .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProject","null")) @@ -557,6 +741,18 @@ getGeneAnnotation <- function(ArchRProj = NULL){ #' This function gets the transcription start sites (TSSs) as a GRanges object of all genes from the geneAnnotation of a given ArchRProject. #' #' @param ArchRProj An `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get TSS in ArchRProj +#' getTSS(proj) +#' +#' # Get TSS loaded globally +#' getTSS() +#' #' @export getTSS <- function(ArchRProj = NULL){ .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProject","null")) @@ -577,6 +773,18 @@ getTSS <- function(ArchRProj = NULL){ #' #' @param ArchRProj An `ArchRProject` object. #' @param symbols A character vector containing the gene symbols to subset from the `geneAnnotation`. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Genes in ArchRProj +#' getGenes(proj) +#' +#' # Get Genes globally +#' getGenes() +#' #' @export getGenes <- function(ArchRProj = NULL, symbols = NULL){ .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProject","null")) @@ -616,6 +824,18 @@ getGenes <- function(ArchRProj = NULL, symbols = NULL){ #' #' @param ArchRProj An `ArchRProject` object. #' @param symbols A character vector containing the gene symbols for the genes where exons should be extracted. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Exons in ArchRProj +#' getExons(proj) +#' +#' # Get Exons globally +#' getExons() +#' #' @export getExons <- function(ArchRProj = NULL, symbols = NULL){ .validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProject","null")) @@ -669,6 +889,15 @@ getExons <- function(ArchRProj = NULL, symbols = NULL){ #' `reducedDims` were created by the dimensionality reduction method. This idea was introduced by Timothy Stuart. #' @param corCutOff A numeric cutoff for the correlation of each dimension to the sequencing depth. If the dimension has a correlation #' to sequencing depth that is greater than the `corCutOff`, it will be excluded. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Iterative LSI +#' getReducedDims(proj, reducedDims = "IterativeLSI") +#' #' @export getReducedDims <- function( ArchRProj = NULL, @@ -690,6 +919,14 @@ getReducedDims <- function( if(reducedDims %in% names(ArchRProj@reducedDims)){ + #check if the dimsToUse are all present in the available dims + #if not, provide a warning + dimsAvailable <- 1:length(ArchRProj@reducedDims[[reducedDims]]$corToDepth$none) + if(!length(which(dimsToUse %in% dimsAvailable)) == length(dimsToUse)) { + message("Warning! Not all requested dimsToUse are available! The follow dims were not found in ", reducedDims,":\n", + paste(which(dimsToUse %ni% dimsAvailable), collapse = ", "),"\nUnavailable dims will be automatically excluded...") + } + if(is.na(ArchRProj@reducedDims[[reducedDims]]$scaleDims[1])){ scaleDims <- FALSE # if na this means dont scaleDims ever. } @@ -714,7 +951,7 @@ getReducedDims <- function( #Determine PCs to Keep if(!is.null(dimsToUse)){ - corToUse <- dimsToUse + corToUse <- intersect(dimsToUse, seq_along(corToDepth)) }else{ corToUse <- seq_along(corToDepth) } @@ -766,6 +1003,15 @@ getReducedDims <- function( #' retrieve from the designated `ArchRProject`. #' @param returnDF A boolean value indicating whether to return the embedding object as a `data.frame`. Otherwise, it will return #' the full embedding object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get UMAP +#' getEmbedding(proj, embedding = "UMAP") +#' #' @export getEmbedding <- function(ArchRProj = NULL, embedding = "UMAP", returnDF = TRUE){ @@ -797,6 +1043,15 @@ getEmbedding <- function(ArchRProj = NULL, embedding = "UMAP", returnDF = TRUE){ #' #' @param ArchRProj An `ArchRProject` object. #' @param returnSummary A boolean value indicating whether to return a summary of the `ArchRProject` or to just print the summary. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Project Summary +#' getProjectSummary(proj) +#' #' @export getProjectSummary <- function(ArchRProj = NULL, returnSummary = FALSE){ @@ -827,6 +1082,15 @@ getProjectSummary <- function(ArchRProj = NULL, returnSummary = FALSE){ #' @param ArchRProj An `ArchRProject` object. #' @param name The name of the summary information to add to the `ArchRProject` object. #' @param summary A vector to add as summary information to the `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Project Summary +#' addProjectSummary(proj, name = "test", summary = "test successful") +#' #' @export addProjectSummary <- function(ArchRProj = NULL, name = NULL, summary = NULL){ @@ -856,37 +1120,82 @@ addProjectSummary <- function(ArchRProj = NULL, name = NULL, summary = NULL){ #' #' @param ArchRProj An `ArchRProject` object. #' @param useMatrix The name of the data matrix as stored in the ArrowFiles of the `ArchRProject`. Options include "TileMatrix", "GeneScoreMatrix", etc. -#' @param select A string specifying a specific feature name (or rowname) to be found with `grep`. +#' @param select A string specifying a specific feature name (or rowname) to be found with `grep` or granges to overlap. #' @param ignoreCase A boolean value indicating whether to ignore the case (upper-case / lower-case) when searching via grep for the string passed to `select`. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Features +#' getFeatures(proj, useMatrix = "GeneScoreMatrix", select = 'CD3') +#' #' @export getFeatures <- function(ArchRProj = NULL, useMatrix = "GeneScoreMatrix", select = NULL, ignoreCase = TRUE){ #Validate .validInput(input = ArchRProj, name = "ArchRProj", valid = "ArchRProject") .validInput(input = useMatrix, name = "useMatrix", valid = "character") - .validInput(input = select, name = "select", valid = c("character", "null")) + .validInput(input = select, name = "select", valid = c("character", "null", "granges")) .validInput(input = ignoreCase, name = "ignoreCase", valid = "boolean") ######### fdf <- .getFeatureDF(getArrowFiles(ArchRProj), useMatrix) matrixClass <- h5read(getArrowFiles(ArchRProj)[1], paste0(useMatrix, "/Info/Class")) - if(is.null(select)){ - if(any(duplicated(paste0(fdf$name))) | matrixClass == "Sparse.Assays.Matrix"){ - paste0(fdf$seqnames,":",fdf$name) + + if("name" %in% colnames(fdf)){ + + if(is.null(select)){ + if(any(duplicated(paste0(fdf$name))) | matrixClass == "Sparse.Assays.Matrix"){ + return(paste0(fdf$seqnames,":",fdf$name)) + }else{ + return(fdf$name) + } }else{ - fdf$name + grepNames <- grep(select, fdf$name, value = TRUE, ignore.case = ignoreCase) + if(any(duplicated(grepNames))){ + grepIdx <- grep(select, fdf$name, ignore.case = ignoreCase) + grepNames <- paste0(fdf$seqnames[grepIdx],":",fdf$name[grepIdx]) + } + if(all(c("deviations", "z") %in% unique(paste0(fdf$seqnames)))){ + grepNames <- rev(grepNames) + } + return(grepNames) } + }else{ - grepNames <- grep(select, fdf$name, value = TRUE, ignore.case = ignoreCase) - if(any(duplicated(grepNames))){ - grepIdx <- grep(select, fdf$name, ignore.case = ignoreCase) - grepNames <- paste0(fdf$seqnames[grepIdx],":",fdf$name[grepIdx]) - } - if(all(c("deviations", "z") %in% unique(paste0(fdf$seqnames)))){ - grepNames <- rev(grepNames) + + if(all(c("start", "end") %in% colnames(fdf))){ + featureData <- GRanges( + seqnames = fdf$seqnames, + ranges = IRanges( + start = fdf$start, + end = fdf$end + ) + ) + mcols(featureData)$idx <- fdf$idx + if(!is.null(select)){ + featureData <- featureData[overlapsAny(featureData, select, ignore.strand=TRUE)] + } + return(featureData) + }else if(c("start") %in% colnames(fdf)){ + featureData <- GRanges( + seqnames = fdf$seqnames, + ranges = IRanges( + start = fdf$start, + width = diff(fdf$start)[1] + ) + ) + mcols(featureData)$idx <- fdf$idx + if(!is.null(select)){ + featureData <- featureData[overlapsAny(featureData, select, ignore.strand=TRUE)] + } + return(featureData) } - grepNames + } + } #' Get the seqnames that could be selected from a given data matrix within an ArchRProject @@ -895,15 +1204,29 @@ getFeatures <- function(ArchRProj = NULL, useMatrix = "GeneScoreMatrix", select #' them for downstream plotting utilities. #' #' @param ArchRProj An `ArchRProject` object. -#' @param useMatrix The name of the data matrix as stored in the ArrowFiles of the `ArchRProject`. Options include "TileMatrix", "GeneScoreMatrix", etc. +#' @param useMatrix The name of the data matrix or "Fragments" as stored in the ArrowFiles of the `ArchRProject`. +#' Options include "TileMatrix", "GeneScoreMatrix", etc. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Seqnames in Matrix +#' getSeqnames(proj, useMatrix = "GeneScoreMatrix") +#' #' @export getSeqnames <- function(ArchRProj = NULL, useMatrix = "GeneScoreMatrix"){ #Validate .validInput(input = ArchRProj, name = "ArchRProj", valid = "ArchRProject") .validInput(input = useMatrix, name = "useMatrix", valid = "character") ######### - fdf <- .getFeatureDF(getArrowFiles(ArchRProj), useMatrix) - unique(paste0(fdf$seqnames)) + if(useMatrix=="Fragments"){ + unique(.availableSeqnames(getArrowFiles(ArchRProj))) + }else{ + fdf <- .getFeatureDF(getArrowFiles(ArchRProj), useMatrix) + unique(paste0(fdf$seqnames)) + } } #' Get a list available matrices in the ArrowFiles storted in an ArchRProject @@ -911,6 +1234,15 @@ getSeqnames <- function(ArchRProj = NULL, useMatrix = "GeneScoreMatrix"){ #' This function gets the available matrices from the ArrowFiles in a given ArchRProject object. #' #' @param ArchRProj An `ArchRProject` object. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Get Available Matrices in ArchR Project +#' getAvailableMatrices(proj) +#' #' @export getAvailableMatrices <- function(ArchRProj = NULL){ .validInput(input = ArchRProj, name = "ArchRProj", valid = "ArchRProject") @@ -927,6 +1259,15 @@ getAvailableMatrices <- function(ArchRProj = NULL){ #' @param addRatio A boolean indicating whether to add the "`name`Ratio" to the `ArchRProject`. #' @param threads The number of threads to use for parallel execution. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Feature Counts +#' proj <- addFeatureCounts(proj, features = getPeakSet(proj), name = 'ReadsInPeaks') +#' #' @export addFeatureCounts <- function( ArchRProj = NULL, @@ -948,8 +1289,19 @@ addFeatureCounts <- function( ArrowFiles <- getArrowFiles(ArchRProj) cellNames <- ArchRProj$cellNames featuresList <- split(features, seqnames(features)) - - h5disableFileLocking() + + #H5 File Lock Check + h5lock <- setArchRLocking() + if(h5lock){ + if(threads > 1){ + message("subThreadhing Disabled since ArchRLocking is TRUE see `addArchRLocking`") + threads <- 1 + } + }else{ + if(threads > 1){ + message("subThreadhing Enabled since ArchRLocking is FALSE see `addArchRLocking`") + } + } countsDF <- .safelapply(seq_along(featuresList), function(i){ @@ -1007,19 +1359,3 @@ addFeatureCounts <- function( ArchRProj } - - -# addColorPalette <- function( -# ArchRProj = NULL, -# pal = NULL -# ){ - -# } - -# getColorPalette <- function( -# ArchRProj = NULL, -# name = NULL -# ){ - -# } - diff --git a/R/QualityControl.R b/R/QualityControl.R index 39672ab5..45cdf404 100644 --- a/R/QualityControl.R +++ b/R/QualityControl.R @@ -17,6 +17,18 @@ #' instead of plotting the TSS enrichment plot. #' @param threads An integer specifying the number of threads to use for calculation. By default this uses the number of threads set by `addArchRThreads()`. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Plot TSS +#' p <- plotTSSEnrichment(proj, groupBy = "Clusters") +#' +#' # PDF +#' plotPDF(p, name = "TSS-Enrich", ArchRProj = proj) +#' #' @export plotTSSEnrichment <- function( ArchRProj = NULL, @@ -55,10 +67,21 @@ plotTSSEnrichment <- function( groups <- getCellColData(ArchRProj = ArchRProj, select = groupBy, drop = FALSE) uniqGroups <- gtools::mixedsort(unique(groups[,1])) - if(threads > 1){ - h5disableFileLocking() + #H5 File Lock Check + h5lock <- setArchRLocking() + if(h5lock){ + if(threads > 1){ + message("subThreadhing Disabled since ArchRLocking is TRUE see `addArchRLocking`") + threads <- 1 + } + }else{ + if(threads > 1){ + message("subThreadhing Enabled since ArchRLocking is FALSE see `addArchRLocking`") + } } + chromLengths <- getChromLengths(ArchRProj) + dfTSS <- .safelapply(seq_along(uniqGroups), function(z){ .logDiffTime(paste0(uniqGroups[z], " Computing TSS (",z," of ",length(uniqGroups),")!"), t1 = tstart, logFile = logFile) @@ -70,6 +93,12 @@ plotTSSEnrichment <- function( #TSS for Chr TSSi <- splitTSS[[chr[k]]] + #Check All Positions Are at least 50 + flank from chromSize start! + idx1 <- start(TSSi) > flank + 50 + + #Check End + 50 + flank less than chromSize end! + idx2 <- end(TSSi) + flank + 50 < chromLengths[paste0(seqnames(TSSi))] + #Set TSS To be a dummy chr1 TSSi <- GRanges(seqnames=rep("chr1",length(TSSi)), ranges = ranges(TSSi), strand = strand(TSSi)) .logThis(TSSi, paste0(uniqGroups[z], " : TSSi : ", chr[k]), logFile = logFile) @@ -129,10 +158,6 @@ plotTSSEnrichment <- function( .logThis(dfTSS, paste0("All : TSSDf"), logFile = logFile) .endLogging(logFile = logFile) - - if(threads > 1){ - h5enableFileLocking() - } if(returnDF){ @@ -176,6 +201,18 @@ plotTSSEnrichment <- function( #' instead of plotting the fragment size distribution. #' @param threads An integer specifying the number of threads to use for calculation. By default this uses the number of threads set by `addArchRThreads()`. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Plot Frag Sizes +#' p <- plotFragmentSizes(proj, groupBy = "Clusters") +#' +#' # PDF +#' plotPDF(p, name = "Frag-Sizes", ArchRProj = proj) +#' #' @export plotFragmentSizes <- function( ArchRProj = NULL, diff --git a/R/RNAIntegration.R b/R/RNAIntegration.R index d1137e79..315b7709 100644 --- a/R/RNAIntegration.R +++ b/R/RNAIntegration.R @@ -54,6 +54,34 @@ #' @param force A boolean value indicating whether to force the matrix indicated by `matrixName` to be overwritten if it already exists in the given `input`. #' @param logFile The path to a file to be used for logging ArchR output. #' @param ... Additional params to be added to `Seurat::FindTransferAnchors` +#' +#' @examples +#' +#' #Get Test Project +#' proj <- getTestProject() +#' +#' #Get RNA Matrix +#' sePBMC <- readRDS( +#' file.path(system.file("testdata", package = "ArchR"), "seRNA_PBMC.rds") +#' ) +#' +#' #Gene Integration Matrix +#' proj <- addGeneIntegrationMatrix( +#' ArchRProj = proj, +#' useMatrix = "GeneScoreMatrix", +#' matrixName = "GeneIntegrationMatrix", +#' reducedDims = "IterativeLSI", +#' seRNA = sePBMC, +#' addToArrow = FALSE, +#' groupRNA = "CellType", +#' nameCell = "predictedCell_Un2", +#' nameGroup = "predictedGroup_Un2", +#' nameScore = "predictedScore_Un2", +#' dimsToUse = 1:10, +#' nGenes = 250, +#' force = TRUE +#' ) +#' #' @export addGeneIntegrationMatrix <- function( ArchRProj = NULL, @@ -353,8 +381,17 @@ addGeneIntegrationMatrix <- function( tmpFile <- .tempfile() o <- suppressWarnings(file.remove(paste0(tmpFile, "-IntegrationBlock-", seq_along(blockList), ".h5"))) - if(threads > 1){ - h5disableFileLocking() + #H5 File Lock Check + h5lock <- setArchRLocking() + if(h5lock){ + if(threads > 1){ + message("subThreadhing Disabled since ArchRLocking is TRUE see `addArchRLocking`") + threads <- 1 + } + }else{ + if(threads > 1){ + message("subThreadhing Enabled since ArchRLocking is FALSE see `addArchRLocking`") + } } rD <- getReducedDims(ArchRProj = ArchRProj, reducedDims = reducedDims, corCutOff = corCutOff, dimsToUse = dimsToUse) @@ -540,6 +577,8 @@ addGeneIntegrationMatrix <- function( subsetCols = matchDF$cellNames ) + #Since this is a temporary addition this does not need the ArchR Arrow version update! + for(z in seq_along(uniqueSamples)){ mat <- matchedRNA[, which(sampleNames == uniqueSamples[z]), drop = FALSE] @@ -557,16 +596,16 @@ addGeneIntegrationMatrix <- function( #Create Data Set o <- .suppressAll(h5createDataset(tmpFilei, paste0(Group,"/i"), storage.mode = "integer", - dims = c(lengthI, 1), level = 0)) + dims = c(lengthI, 1), level = getArchRH5Level())) o <- .suppressAll(h5createDataset(tmpFilei, paste0(Group,"/jLengths"), storage.mode = "integer", - dims = c(lengthRle, 1), level = 0)) + dims = c(lengthRle, 1), level = getArchRH5Level())) o <- .suppressAll(h5createDataset(tmpFilei, paste0(Group,"/jValues"), storage.mode = "integer", - dims = c(lengthRle, 1), level = 0)) + dims = c(lengthRle, 1), level = getArchRH5Level())) o <- .suppressAll(h5createDataset(tmpFilei, paste0(Group, "/x"), storage.mode = "double", - dims = c(lengthI, 1), level = 0)) + dims = c(lengthI, 1), level = getArchRH5Level())) #Write Data Set o <- .suppressAll(h5write(obj = mat@i + 1, file = tmpFilei, name = paste0(Group,"/i"))) diff --git a/R/ReproduciblePeakSet.R b/R/ReproduciblePeakSet.R index a5e47a53..9d1791ae 100644 --- a/R/ReproduciblePeakSet.R +++ b/R/ReproduciblePeakSet.R @@ -45,6 +45,18 @@ #' @param logFile The path to a file to be used for logging ArchR output. #' @param ... Additional parameters to be pass to `addGroupCoverages()` to get sample-guided pseudobulk cell-groupings. Only used for TileMatrix-based #' peak calling (not for MACS2). See `addGroupCoverages()` for more info. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Add Peak Matrix Tiles +#' proj <- addReproduciblePeakSet(proj, peakMethod = "tiles") +#' +#' # Add Peak Matrix Macs2 (Preferred) +#' proj <- addReproduciblePeakSet(proj, peakMethod = "macs2") +#' #' @export addReproduciblePeakSet <- function( ArchRProj = NULL, @@ -66,7 +78,7 @@ addReproduciblePeakSet <- function( promoterRegion = c(2000, 100), genomeAnnotation = getGenomeAnnotation(ArchRProj), geneAnnotation = getGeneAnnotation(ArchRProj), - plot = TRUE, + plot = TRUE, threads = getArchRThreads(), parallelParam = NULL, force = FALSE, @@ -103,7 +115,7 @@ addReproduciblePeakSet <- function( geneAnnotation <- .validGeneAnnotation(geneAnnotation) genomeAnnotation <- .validGenomeAnnotation(genomeAnnotation) geneAnnotation <- .validGeneAnnoByGenomeAnno(geneAnnotation = geneAnnotation, genomeAnnotation = genomeAnnotation) - .validInput(input = plot, name = "plot", valid = c("boolean")) + .validInput(input = plot, name = "plot", valid = c("boolean")) .validInput(input = threads, name = "threads", valid = c("integer")) .validInput(input = parallelParam, name = "parallelParam", valid = c("parallelparam", "null")) .validInput(input = force, name = "force", valid = c("boolean")) @@ -112,7 +124,19 @@ addReproduciblePeakSet <- function( tstart <- Sys.time() .startLogging(logFile = logFile) - .logThis(mget(names(formals()),sys.frame(sys.nframe())), "ReproduciblePeakSet Args", logFile=logFile) + .logThis(mget(names(formals()),sys.frame(sys.nframe())), "ReproduciblePeakSet Args", logFile=logFile) + + ##################################################### + # Create Output Directory + ##################################################### + outDir0 <- file.path(getOutputDirectory(ArchRProj), "PeakCalls") + outDir <- file.path(getOutputDirectory(ArchRProj), "PeakCalls", groupBy) + outSubDir <- file.path(getOutputDirectory(ArchRProj), "PeakCalls", groupBy, "ReplicateCalls") + outBedDir <- file.path(getOutputDirectory(ArchRProj), "PeakCalls", groupBy, "InsertionBeds") + dir.create(outDir0, showWarnings = FALSE) + dir.create(outDir, showWarnings = FALSE) + dir.create(outSubDir, showWarnings = FALSE) + dir.create(outBedDir, showWarnings = FALSE) if(tolower(peakMethod) == "macs2"){ @@ -148,16 +172,6 @@ addReproduciblePeakSet <- function( .logThis(groupSummary, "PeakCallSummary", logFile = logFile) if(verbose) print(groupSummary) - ##################################################### - # Create Output Directory - ##################################################### - outDir <- file.path(getOutputDirectory(ArchRProj), "PeakCalls") - outSubDir <- file.path(getOutputDirectory(ArchRProj), "PeakCalls", "ReplicateCalls") - outBedDir <- file.path(getOutputDirectory(ArchRProj), "PeakCalls", "InsertionBeds") - dir.create(outDir, showWarnings = FALSE) - dir.create(outSubDir, showWarnings = FALSE) - dir.create(outBedDir, showWarnings = FALSE) - ##################################################### # Genome Size Presets ##################################################### @@ -228,10 +242,10 @@ addReproduciblePeakSet <- function( summitFiles = outSummitList[[i]], summitNames = summitNamesList[[i]], reproducibility = reproducibility, - extendSummits = extendSummits, - blacklist = genomeAnnotation$blacklist, - prefix = prefix, - logFile = logFile + extendSummits = extendSummits, + blacklist = genomeAnnotation$blacklist, + prefix = prefix, + logFile = logFile )) .logDiffTime(sprintf("%s Annotating and Filtering Peaks", prefix), tstart, verbose = FALSE, logFile = logFile) peaks <- sort(sortSeqlevels(peaks)) @@ -813,6 +827,11 @@ addReproduciblePeakSet <- function( #' Find the installed location of the MACS2 executable #' #' This function attempts to find the path to the MACS2 executable by serting the path and python's pip. +#' +#' @examples +#' +#' # Get Macs2 +#' findMacs2() #' #' @export findMacs2 <- function(){ diff --git a/R/SparseUtils.R b/R/SparseUtils.R new file mode 100644 index 00000000..32687840 --- /dev/null +++ b/R/SparseUtils.R @@ -0,0 +1,247 @@ +########################################################################################## +# SparseMatrix Utilities +########################################################################################## + +.normalizeCols <- function(mat = NULL, colSm = NULL, scaleTo = NULL){ + if(is.null(colSm)){ + colSm <- Matrix::colSums(mat) + } + if(!is.null(scaleTo)){ + mat@x <- scaleTo * mat@x / rep.int(colSm, Matrix::diff(mat@p)) + }else{ + mat@x <- mat@x / rep.int(colSm, Matrix::diff(mat@p)) + } + return(mat) +} + +.safeSubset <- function(mat = NULL, subsetRows = NULL, subsetCols = NULL){ + + if(!is.null(subsetRows)){ + idxNotIn <- which(subsetRows %ni% rownames(mat)) + if(length(idxNotIn) > 0){ + subsetNamesNotIn <- subsetRows[idxNotIn] + matNotIn <- Matrix::sparseMatrix(i=1,j=1,x=0,dims=c(length(idxNotIn), ncol = ncol(mat))) + rownames(matNotIn) <- subsetNamesNotIn + mat <- rbind(mat, matNotIn) + } + mat <- mat[subsetRows,,drop=FALSE] + } + + if(!is.null(subsetCols)){ + idxNotIn <- which(subsetCols %ni% colnames(mat)) + if(length(idxNotIn) > 0){ + subsetNamesNotIn <- subsetCols[idxNotIn] + matNotIn <- Matrix::sparseMatrix(i=1,j=1,x=0,dims=c(nrow(mat), ncol = length(idxNotIn))) + colnames(matNotIn) <- subsetNamesNotIn + mat <- cbind(mat, matNotIn) + } + mat <- mat[,subsetCols,drop=FALSE] + } + + mat + +} + +################### +# Binary +################### + +.colBinarySums <- function(m){ + m@x <- 1 * (m@x > 0) + Matrix::colSums(m) +} + +.rowBinarySums <- function(m){ + m@x <- 1 * (m@x > 0) + Matrix::rowSums(m) +} + +################### +# rowVars +################### + +.sparseRowVars <- function(m, rM = NULL){ + if(require("sparseMatrixStats", quietly = TRUE)){ + sparseMatrixStats::rowVars(m) + }else{ + message("Using ArchR sparse rowVars recommended to install `sparseMatrixStats`!") + if(is.null(rM)){ + rM <- Matrix::rowMeans(m) + } + computeSparseRowVariances(m@i + 1, m@x, rM, ncol(m)) + } +} + +.sparesRowSds <- function(m){ + sqrt(.sparseRowVars(m=m)) +} + +################### +# Geo +################### + +.expm1 <- function(x, cap = 100){ + x[x > cap] <- cap + expm1(x) +} + +.sparseRowGeoVars <- function(m, rM = NULL){ + m@x <- .expm1(m@x) + .sparseRowVars(m, rM = rM) +} + +.sparseRowGeoMeans <- function(m){ + m@x <- .expm1(m@x) + Matrix::rowMeans(m) +} + +################### +# Log2 +################### + +.log2p1 <- function(m){ + m@x <- log2(m@x + 1) + m +} + +.sparseRowLog2p1Means <- function(m){ + m@x <- log2(m@x + 1) + Matrix::rowMeans(m) +} + +.sparseColLog2p1Means <- function(m){ + m@x <- log2(m@x + 1) + Matrix::rowMeans(m) +} + +.sparseRowLog2p1Vars <- function(m, rM = NULL){ + m@x <- log2(m@x + 1) + .sparseRowVars(m, rM = rM) +} + +################### +# Other +################### +.sparseRowVarsStd <- function( + m = NULL, + rM = NULL, + expSd = NULL, + vmax = sqrt(ncol(m)), + method = "Seurat" + ){ + + if(is.null(rM)){ + rM <- Matrix::rowMeans(m) + } + + if(require("Seurat", quietly = TRUE) & tolower(method) == "seurat"){ + + #Since these Seurat Functions Are Not Exported + #We will have a backup which is slower but should give identical results + o <- tryCatch({ + + Seurat:::SparseRowVarStd( + mat = m, + mu = rM, + sd = expSd, + vmax = vmax, + display_progress = FALSE + ) + + }, error = function(e){ + + #Vals + m@x <- m@x - rM[m@i + 1] + m@x <- m@x / expSd[m@i + 1] + m@x <- (pmin(m@x, vmax))^2 + + #Compute Row Vars + v <- Matrix::rowSums(m) + + #Determine 0s + m@x <- rep(1, length(m@x)) + n <- (ncol(m) - Matrix::rowSums(m)) + v <- v + n * (rM / expSd)^2 + v[is.na(v)] <- 0 + o <- as.vector(v / (ncol(m)-1)) + o + + }) + + }else{ + + #Vals + m@x <- m@x - rM[m@i + 1] + m@x <- m@x / expSd[m@i + 1] + m@x <- (pmin(m@x, vmax))^2 + + #Compute Row Vars + v <- Matrix::rowSums(m) + + #Determine 0s + m@x <- rep(1, length(m@x)) + n <- (ncol(m) - Matrix::rowSums(m)) + v <- v + n * (rM / expSd)^2 + v[is.na(v)] <- 0 + o <- as.vector(v / (ncol(m)-1)) + + } + + o + +} + +.sparseRowScale <- function(m, max = 10, method = "seurat", stats = FALSE){ + + #Since these Seurat Functions Are Not Exported + #We will have a backup which is slower but should give identical results + if(stats){ + method <- "ArchR" + } + + if(require("Seurat", quietly = TRUE) & tolower(method) == "seurat"){ + m <- tryCatch({ + rn <- rownames(m) + cn <- colnames(m) + m <- Seurat:::FastSparseRowScale(m, scale_max = max) + colnames(m) <- cn + rownames(m) <- rn + m + }, error = function(e){ + rM <- Matrix::rowMeans(m) + rV <- .sparseRowVars(m, rM = rM) + m <- m - rM + m <- m / sqrt(rV) + m@x[m@x > max] <- max + m@x[m@x < -max] <- -max + m + }) + }else{ + rM <- Matrix::rowMeans(m) + rV <- .sparseRowVars(m, rM = rM) + m <- m - rM + m <- m / sqrt(rV) + m@x[m@x > max] <- max + m@x[m@x < -max] <- -max + } + + if(stats){ + list(m=m, rM = rM, rV=rV) + }else{ + m + } + +} + +.sparseRowGeoDisp <- function( + m = NULL + ){ + rM <- .sparseRowGeoMeans(m) + rV <- .sparseRowGeoVars(m, rM = m) + rD <- log(rV / rM) + as.vector(rD) +} + + + + diff --git a/R/Trajectory.R b/R/Trajectory.R index ceee126d..d6936b71 100644 --- a/R/Trajectory.R +++ b/R/Trajectory.R @@ -26,6 +26,15 @@ #' @param force A boolean value indicating whether to force the trajactory indicated by `name` to be overwritten if it already exists in the given `ArchRProject`. #' @param seed A number to be used as the seed for random number generation for trajectory creation. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' #Add Trajectory +#' proj <- addTrajectory(proj, trajectory = c("C1", "C2", "C3"), embedding = "UMAP", force = TRUE) +#' #' @export addTrajectory <- function( ArchRProj = NULL, @@ -138,23 +147,25 @@ addTrajectory <- function( ###################################################### .logMessage("Spline Fit", logFile = logFile) matSpline <- lapply(seq_len(ncol(matFilter)), function(x){ - tryCatch({ - stats::smooth.spline( + suppressWarnings( + tryCatch({ + stats::smooth.spline( + x = initialTime, + y = matFilter[names(initialTime), x], + df = dof, + spar = spar + )[[2]] + }, error = function(e){ + errorList <- list( + it = x, x = initialTime, y = matFilter[names(initialTime), x], df = dof, spar = spar - )[[2]] - }, error = function(e){ - errorList <- list( - it = x, - x = initialTime, - y = matFilter[names(initialTime), x], - df = dof, - spar = spar - ) - .logError(e, fn = "smooth.spline", info = "", errorList = errorList, logFile = logFile) - }) + ) + .logError(e, fn = "smooth.spline", info = "", errorList = errorList, logFile = logFile) + }) + ) }) %>% Reduce("cbind",.) %>% data.frame() ###################################################### @@ -262,6 +273,18 @@ addTrajectory <- function( #' @param smoothWindow An integer value indicating the smoothing window in size (relaive to `groupEvery`) for the sequential #' trajectory matrix to better reveal temporal dynamics. #' @param threads The number of threads to be used for parallel computing. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' #Add Trajectory +#' proj <- addTrajectory(proj, trajectory = c("C1", "C2", "C3"), embedding = "UMAP", force = TRUE) +#' +#' #Get Trajectory +#' seTraj <- getTrajectory(proj) +#' #' @export getTrajectory <- function( ArchRProj = NULL, @@ -418,6 +441,24 @@ trajectoryHeatmap <- function(...){ #' @param force If useSeqnames is longer than 1 if matrixClass is "Sparse.Assays.Matrix" to continue. This is not recommended because these matrices #' can be in different units. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' #Add Trajectory +#' proj <- addTrajectory(proj, trajectory = c("C1", "C2", "C3"), embedding = "UMAP", force = TRUE) +#' +#' #Get Trajectory +#' seTraj <- getTrajectory(proj) +#' +#' #Plot Trajectory Heatmap +#' p <- plotTrajectoryHeatmap(seTraj) +#' +#' #Plot PDF +#' plotPDF(p, name = "Trajectory-Heatmap", ArchRProj = proj) +#' #' @export plotTrajectoryHeatmap <- function( seTrajectory = NULL, @@ -649,6 +690,21 @@ plotTrajectoryHeatmap <- function( #' @param smoothWindow An integer value indicating the smoothing window for creating inferred Arrow overlay on to embedding. #' @param logFile The path to a file to be used for logging ArchR output. #' @param ... Additional parameters to pass to `ggPoint()` or `ggHex()`. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' #Add Trajectory +#' proj <- addTrajectory(proj, trajectory = c("C1", "C2", "C3"), embedding = "UMAP", force = TRUE) +#' +#' #Plot Trajectory +#' p <- plotTrajectory(proj, smoothWindow = 20) +#' +#' #PDF +#' plotPDF(p, name = "Trajcetory", ArchRProj = proj) +#' #' @export plotTrajectory <- function( ArchRProj = NULL, @@ -850,11 +906,11 @@ plotTrajectory <- function( } message("Plotting") .logThis(plotParams, name = "PlotParams", logFile = logFile) - out <- do.call(ggHex, plotParams) + out <- suppressWarnings(do.call(ggHex, plotParams)) }else{ message("Plotting") .logThis(plotParams, name = "PlotParams", logFile = logFile) - out <- do.call(ggPoint, plotParams) + out <- suppressWarnings(do.call(ggPoint, plotParams)) } }else{ @@ -864,7 +920,8 @@ plotTrajectory <- function( } if(!keepAxis){ - out <- out + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) + out <- out + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(), + axis.text.y=element_blank(), axis.ticks.y=element_blank()) } .logMessage("Plotting Trajectory", logFile = logFile) @@ -877,7 +934,7 @@ plotTrajectory <- function( .logThis(dfT, "TrajectoryDF", logFile = logFile) #Plot Pseudo-Time - out2 <- ggPoint( + out2 <- suppressWarnings(ggPoint( x = dfT$PseudoTime, y = dfT$value, color = dfT$PseudoTime, @@ -887,7 +944,7 @@ plotTrajectory <- function( pal = plotParams$pal, ratioYX = 0.5, rastr = TRUE - ) + geom_smooth(color = "black") + ) + geom_smooth(color = "black")) attr(out2, "ratioYX") <- 0.5 @@ -943,6 +1000,21 @@ plotTrajectory <- function( #' @param clusterParams A list of parameters to be added when clustering cells for monocle3 with `monocle3::cluster_cells`. #' @param graphParams A list of parameters to be added when learning graphs for monocle3 with `monocle3::learn_graph`. #' @param seed A number to be used as the seed for random number generation for trajectory creation. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' #Create Monocole Trajectory +#' cds <- getMonocleTrajectories( +#' ArchRProj = proj, +#' useGroups = c("C1", "C2", "C3"), +#' principalGroup = "C1", +#' groupBy = "Clusters", +#' embedding = "UMAP" +#' ) +#' #' @export getMonocleTrajectories <- function( ArchRProj = NULL, @@ -966,7 +1038,7 @@ getMonocleTrajectories <- function( .validInput(input = graphParams, name = "graphParams", valid = c("list")) .validInput(input = seed, name = "seed", valid = c("numeric")) - .requirePackage("monocle3") + .requirePackage("monocle3", installInfo = "devtools::install_github('cole-trapnell-lab/monocle3')") set.seed(seed) @@ -1019,7 +1091,7 @@ getMonocleTrajectories <- function( message("Learning Graphs") graphParams$cds <- cds - cds <- do.call(monocle3::learn_graph, graphParams) + cds <- suppressWarnings(do.call(monocle3::learn_graph, graphParams)) rm(graphParams) gc() @@ -1059,7 +1131,7 @@ getMonocleTrajectories <- function( theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank()) - + dir.create(file.path(getOutputDirectory(ArchRProj), "Monocole3"), showWarnings=FALSE) path <- file.path(getOutputDirectory(ArchRProj), "Monocole3", paste0("Plot-Results-", name, ".pdf")) message("Plotting Results - ", path) @@ -1084,6 +1156,29 @@ getMonocleTrajectories <- function( #' `useGroups` to constrain trajectory analysis. #' @param monocleCDS A monocle CDS object created from `getMonocleTrajectories`. #' @param force A boolean value indicating whether to force the trajactory indicated by `name` to be overwritten if it already exists in the given `ArchRProject`. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' #Create Monocole Trajectory +#' cds <- getMonocleTrajectories( +#' ArchRProj = proj, +#' useGroups = c("C1", "C2", "C3"), +#' principalGroup = "C1", +#' groupBy = "Clusters", +#' embedding = "UMAP" +#' ) +#' +#' # Add Monocole Trajectory +#' proj <- addMonocleTrajectory( +#' ArchRProj = proj, +#' name = "Trajectory_Monocole", +#' useGroups = c("C1", "C2", "C3"), +#' monocleCDS = cds +#' ) +#' #' @export addMonocleTrajectory <- function( ArchRProj = NULL, @@ -1146,6 +1241,22 @@ addMonocleTrajectory <- function( #' @param reducedDims A string indicating the name of the `reducedDims` object from the `ArchRProject` that should be used for trajectory analysis. `embedding` must equal NULL to use. #' @param force A boolean value indicating whether to force the trajactory indicated by `name` to be overwritten if it already exists in the given `ArchRProject`. #' @param seed A number to be used as the seed for random number generation for trajectory creation. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' #Add SlingShot Trajectory +#' proj <- addSlingShotTrajectories( +#' ArchRProj = proj, +#' name = "Trajectory_SlingShot", +#' useGroups = c("C1", "C2", "C3"), +#' principalGroup = "C1", +#' groupBy = "Clusters", +#' embedding = "UMAP" +#' ) +#' #' @export addSlingShotTrajectories <- function( ArchRProj = NULL, @@ -1169,7 +1280,7 @@ addSlingShotTrajectories <- function( .validInput(input = force, name = "force", valid = c("boolean")) .validInput(input = seed, name = "seed", valid = c("numeric")) - .requirePackage("slingshot") + .requirePackage("slingshot", installInfo = "BiocManager::install('slingshot')") set.seed(seed) @@ -1229,6 +1340,15 @@ addSlingShotTrajectories <- function( #' @param verbose A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output. #' @param binarize A boolean value indicating whether the matrix should be binarized before return. This is often desired when working with insertion counts. #' @param logFile The path to a file to be used for logging ArchR output. +#' +#' @examples +#' +#' # Get Test ArchR Project +#' proj <- getTestProject() +#' +#' # Export Matrix For Stream +#' exportPeakMatrixForSTREAM(proj) +#' #' @export exportPeakMatrixForSTREAM <- function( ArchRProj = NULL, @@ -1249,12 +1369,8 @@ exportPeakMatrixForSTREAM <- function( logFile = logFile ) - featureDF <- ArchR:::.getFeatureDF(getArrowFiles(ArchRProj)[1], "PeakMatrix") - - stopifnot(all(featureDF$idx == rowData(mat)$idx)) - countsDF <- Matrix::summary(assay(mat)) - peaksDF <- data.frame(as.vector(featureDF[,1]), featureDF[,3], featureDF[,4]) + peaksDF <- data.frame(rowRanges(mat))[,1:3] cellsDF <- data.frame(colnames(mat)) data.table::fwrite(countsDF, file = "STREAM_Counts.tsv.gz", sep = "\t", row.names = FALSE, col.names = FALSE) diff --git a/R/ValidationUtils.R b/R/ValidationUtils.R index 3daeeabe..335eab96 100644 --- a/R/ValidationUtils.R +++ b/R/ValidationUtils.R @@ -225,7 +225,11 @@ validBSgenome <- function(genome = NULL, masked = FALSE){ }else if(is.character(genome)){ genome <- tryCatch({ .requirePackage(genome) - bsg <- eval(parse(text = genome)) + bsg <- tryCatch({ + eval(parse(text = paste0(genome))) + }, error = function(e){ + eval(parse(text = paste0(genome,"::",genome))) + }) if(inherits(bsg, "BSgenome")){ return(bsg) }else{ diff --git a/R/VisualizeData.R b/R/VisualizeData.R index ae9eff3e..d84d63c6 100644 --- a/R/VisualizeData.R +++ b/R/VisualizeData.R @@ -17,6 +17,18 @@ #' @param useDingbats A boolean variable that determines wheter to use dingbats characters for plotting points. #' @param plotList A `list` of plots to be printed to the output PDF file. Each element of `plotList` should be a printable plot formatted #' object (ggplot2, plot, heatmap, etc). +#' +#' @examples +#' +#' #Get Test Project +#' proj <- getTestProject() +#' +#' #Plot UMAP +#' p <- plotEmbedding(proj, name = "Clusters") +#' +#' #PDF +#' plotPDF(p, name = "UMAP-Clusters", ArchRProj = proj) +#' #' @export plotPDF <- function( ..., @@ -86,7 +98,7 @@ plotPDF <- function( filename <- file.path(outDir, paste0(name, ".pdf")) } - o <- tryCatch({ + o <- suppressWarnings(tryCatch({ pdf(filename, width = width, height = height, useDingbats = useDingbats) for(i in seq_along(plotList)){ @@ -149,7 +161,7 @@ plotPDF <- function( if(getArchRVerbose()) message(x) - }) + })) return(invisible(0)) @@ -201,6 +213,18 @@ plotPDF <- function( #' @param threads The number of threads to be used for parallel computing. #' @param logFile The path to a file to be used for logging ArchR output. #' @param ... Additional parameters to pass to `ggPoint()` or `ggHex()`. +#' +#' @examples +#' +#' #Get Test Project +#' proj <- getTestProject() +#' +#' #Plot UMAP +#' p <- plotEmbedding(proj, name = "Clusters") +#' +#' #PDF +#' plotPDF(p, name = "UMAP-Clusters", ArchRProj = proj) +#' #' @export plotEmbedding <- function( ArchRProj = NULL, @@ -424,7 +448,7 @@ plotEmbedding <- function( if(!is.null(quantCut)){ plotParamsx$color <- .quantileCut(plotParamsx$color, min(quantCut), max(quantCut)) } - + plotParamsx$pal <- paletteContinuous(set = plotParamsx$continuousSet) if(!is.null(pal)){ @@ -532,6 +556,18 @@ plotEmbedding <- function( #' @param plotAs A string that indicates whether a rigdge plot ("ridges") should be plotted or a violin plot ("violin") should be plotted. #' @param threads The number of threads to be used for parallel computing. #' @param ... Additional parameters to pass to `ggGroup()`. +#' +#' @examples +#' +#' #Get Test Project +#' proj <- getTestProject() +#' +#' #Plot Groups +#' p <- plotGroups(proj, groupBy = "Clusters", colorBy = "colData", name = "TSSEnrichment", plotAs = "violin", alpha = 0.5) +#' +#' #PDF +#' plotPDF(p, name = "Clusters-TSS", ArchRProj = proj) +#' #' @export plotGroups <- function( ArchRProj = NULL, @@ -948,7 +984,7 @@ plotGroups <- function( legend.spacing.x = unit(0, 'cm'), legend.spacing.y = unit(0, 'cm'), legend.text = element_text(size = max(size, 2)) - ) + guides(fill = guide_legend(ncol = 4), color = guide_legend(ncol = 4)) + ) + .gg_guides(fill = guide_legend(ncol = 4), color = guide_legend(ncol = 4)) )$grobs[[legend]] slh <- convertHeight( diff --git a/data/.DS_Store b/data/.DS_Store index 5008ddfc..3048b847 100644 Binary files a/data/.DS_Store and b/data/.DS_Store differ diff --git a/data/geneAnnoHg19test2.rda b/data/geneAnnoHg19test2.rda new file mode 100644 index 00000000..ef66d413 Binary files /dev/null and b/data/geneAnnoHg19test2.rda differ diff --git a/data/genomeAnnoHg19test2.rda b/data/genomeAnnoHg19test2.rda new file mode 100644 index 00000000..f0104434 Binary files /dev/null and b/data/genomeAnnoHg19test2.rda differ diff --git a/data/m1.rda b/data/m1.rda new file mode 100644 index 00000000..fbfe2577 Binary files /dev/null and b/data/m1.rda differ diff --git a/docs/.DS_Store b/docs/.DS_Store index a0589a20..c76c5c78 100644 Binary files a/docs/.DS_Store and b/docs/.DS_Store differ diff --git a/docs/articles/.DS_Store b/docs/articles/.DS_Store index 4a2be0f2..48fc8ecd 100644 Binary files a/docs/articles/.DS_Store and b/docs/articles/.DS_Store differ diff --git a/inst/testdata/PBSmall.arrow b/inst/testdata/PBSmall.arrow new file mode 100644 index 00000000..b22f61fa Binary files /dev/null and b/inst/testdata/PBSmall.arrow differ diff --git a/inst/testdata/PBSmall.tsv.gz b/inst/testdata/PBSmall.tsv.gz new file mode 100644 index 00000000..e82006d5 Binary files /dev/null and b/inst/testdata/PBSmall.tsv.gz differ diff --git a/inst/testdata/PBSmall.tsv.gz.tbi b/inst/testdata/PBSmall.tsv.gz.tbi new file mode 100644 index 00000000..dc7b622c Binary files /dev/null and b/inst/testdata/PBSmall.tsv.gz.tbi differ diff --git a/inst/testdata/PBSmall.zip b/inst/testdata/PBSmall.zip new file mode 100644 index 00000000..6e26590e Binary files /dev/null and b/inst/testdata/PBSmall.zip differ diff --git a/inst/testdata/seRNA_PBMC.rds b/inst/testdata/seRNA_PBMC.rds new file mode 100644 index 00000000..945b5814 Binary files /dev/null and b/inst/testdata/seRNA_PBMC.rds differ diff --git a/man/.DS_Store b/man/.DS_Store index 5008ddfc..c1cc8fd8 100644 Binary files a/man/.DS_Store and b/man/.DS_Store differ diff --git a/man/ArchRBrowser.Rd b/man/ArchRBrowser.Rd index bbe9fa5e..de4eac18 100644 --- a/man/ArchRBrowser.Rd +++ b/man/ArchRBrowser.Rd @@ -58,3 +58,10 @@ This function will open an interactive shiny session in style of a browser track enables direct comparison across samples. Note that the genes displayed in this browser are derived from your \code{geneAnnotation} (i.e. the \code{BSgenome} object you used) so they may not match other online genome browsers that use different gene annotations. } +\examples{ + +proj <- getTestProject() + +#Launch Browser with `ArchRBrowser(proj)` + +} diff --git a/man/ArchRHeatmap.Rd b/man/ArchRHeatmap.Rd new file mode 100644 index 00000000..b69d0e81 --- /dev/null +++ b/man/ArchRHeatmap.Rd @@ -0,0 +1,96 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ArchRHeatmap.R +\name{ArchRHeatmap} +\alias{ArchRHeatmap} +\title{Plot Nice Lookng Heatmap Using Complex Heatmap} +\usage{ +ArchRHeatmap( + mat = NULL, + scale = FALSE, + limits = c(min(mat), max(mat)), + colData = NULL, + color = paletteContinuous(set = "solarExtra", n = 100), + clusterCols = TRUE, + clusterRows = FALSE, + labelCols = FALSE, + labelRows = FALSE, + colorMap = NULL, + useRaster = TRUE, + rasterQuality = 5, + split = NULL, + fontSizeRows = 10, + fontSizeCols = 10, + fontSizeLabels = 8, + colAnnoPerRow = 4, + showRowDendrogram = FALSE, + showColDendrogram = FALSE, + customRowLabel = NULL, + customRowLabelIDs = NULL, + customColLabel = NULL, + customColLabelIDs = NULL, + customLabelWidth = 0.75, + rasterDevice = "png", + padding = 45, + borderColor = NA, + draw = TRUE, + name = "Heatmap" +) +} +\arguments{ +\item{mat}{A matrix to plot heatmap from.} + +\item{scale}{A boolean whether to convert to row Z-scores} + +\item{limits}{A vector of two values describing min and mix limits to plot} + +\item{colData}{A DataFrame matching the columns of the input matrix to overlay on the columns of the heatmap} + +\item{color}{A palette to use for heatmap continuous color scheme. See \code{paletteContinuous}.} + +\item{clusterCols}{A boolean describing whether to cluster columns for heatmap.} + +\item{clusterRows}{A boolean describing whether to cluster rows for heatmap.} + +\item{colorMap}{A list of color mappings matching the column names in colData.} + +\item{useRaster}{A boolean whether to use rastering when plotting heatmap.} + +\item{rasterQuality}{Raster resolution of raster. Default is set to 5 higher numbers increase resolution.} + +\item{split}{A vector of groupings that will split the rows of heatmap (must be equal to nrow(mat)).} + +\item{fontSizeRows}{A numeric value representing the font size for rownames.} + +\item{fontSizeCols}{A numeric value representing the font size for colnames.} + +\item{fontSizeLabels}{A numeric value representing the font size for labels.} + +\item{colAnnoPerRow}{An integer value describing the number of column annotations per row in the legend.} + +\item{showRowDendrogram}{A boolean whether to show row dendrogram in heatmap.} + +\item{showColDendrogram}{A boolean whether to show column dendrogram in heatmap.} + +\item{customRowLabel}{A vector of indices to custom label from rows.} + +\item{customRowLabelIDs}{A vector of custom labels to overlay. Should match length of \code{customRowLabel}.} + +\item{customColLabel}{A vector of indices to custom label from columns.} + +\item{customColLabelIDs}{A vector of custom labels to overlay. Should match length of \code{customColLabel}.} + +\item{customLabelWidth}{A numeric describing the width of the column labels.} + +\item{rasterDevice}{Which device to use for rastering see \code{ComplexHeatmap}.} + +\item{padding}{A numeric (in cm) to pad the heatmap ie adding white space so that the final plot doesnt cutoff.} + +\item{borderColor}{A character representing the border color for each cell in the heatmap.} + +\item{draw}{A boolean whether to draw the heatmap immediately. If FALSE this will return a ComplexHeatmap object.} + +\item{name}{A character that will appear above color bar legend in heatmap.} +} +\description{ +Plot Nice Lookng Heatmap Using Complex Heatmap +} diff --git a/man/ArchRProject.Rd b/man/ArchRProject.Rd index 931a246e..791be003 100644 --- a/man/ArchRProject.Rd +++ b/man/ArchRProject.Rd @@ -34,3 +34,12 @@ genome information such as nucleotide information or chromosome sizes.} \description{ This function will create an ArchRProject from the provided ArrowFiles. } +\examples{ + +# Get Test Arrow +arrow <- getTestArrow() + +# Create ArchR Project for Analysis +proj <- ArchRProject(arrow) + +} diff --git a/man/addArchRAnnotations.Rd b/man/addArchRAnnotations.Rd index 3c69569e..508f5db6 100644 --- a/man/addArchRAnnotations.Rd +++ b/man/addArchRAnnotations.Rd @@ -35,3 +35,12 @@ overwritten if it already exists in the given \code{ArchRProject}.} This function adds information about which peaks in the ArchR database contain input regions to a given ArchRProject. For each peak, a binary value is stored indicating whether each region is observed within the peak region. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Motif Annotations +proj <- addArchRAnnotations(proj, name = "test") + +} diff --git a/man/addArchRChrPrefix.Rd b/man/addArchRChrPrefix.Rd index d192f5a5..5748e41d 100644 --- a/man/addArchRChrPrefix.Rd +++ b/man/addArchRChrPrefix.Rd @@ -12,3 +12,9 @@ addArchRChrPrefix(chrPrefix = TRUE) \description{ This function will set the default requirement of chromosomes to have a "chr" prefix. } +\examples{ + +# Add ArchR Chr Prefix +addArchRChrPrefix() + +} diff --git a/man/addArchRDebugging.Rd b/man/addArchRDebugging.Rd index 811a6b42..38695c29 100644 --- a/man/addArchRDebugging.Rd +++ b/man/addArchRDebugging.Rd @@ -12,3 +12,9 @@ addArchRDebugging(debug = FALSE) \description{ This function will set ArchR Debugging which will save an RDS if an error is encountered. } +\examples{ + +# Add ArchR Debugging +addArchRDebugging() + +} diff --git a/man/addArchRGenome.Rd b/man/addArchRGenome.Rd index 152a7e25..5f661e0b 100644 --- a/man/addArchRGenome.Rd +++ b/man/addArchRGenome.Rd @@ -19,3 +19,9 @@ automatically installed if it is not currently installed. This is useful for hel \description{ This function will set the genome across all ArchR functions. } +\examples{ + +# Add ArchR Genome to use globally +addArchRGenome("hg19test2") + +} diff --git a/man/addArchRH5Level.Rd b/man/addArchRH5Level.Rd new file mode 100644 index 00000000..0b667b7c --- /dev/null +++ b/man/addArchRH5Level.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/GlobalDefaults.R +\name{addArchRH5Level} +\alias{addArchRH5Level} +\title{Add a globally-applied compression level for h5 files} +\usage{ +addArchRH5Level(level = 0) +} +\arguments{ +\item{level}{The default compression level to be used for h5 file execution across all ArchR functions.} +} +\description{ +This function will set the default compression level to be used for h5 file execution across all ArchR functions. +} +\examples{ + +# Add ArchR H5 Compression level +addArchRH5Level() + +} diff --git a/man/addArchRLocking.Rd b/man/addArchRLocking.Rd new file mode 100644 index 00000000..3ac07801 --- /dev/null +++ b/man/addArchRLocking.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/GlobalDefaults.R +\name{addArchRLocking} +\alias{addArchRLocking} +\title{Add a globally-applied H5 file locking setup} +\usage{ +addArchRLocking(locking = FALSE) +} +\arguments{ +\item{locking}{The default value for H5 File Locking} +} +\description{ +This function will set the default H5 file locking parameters +} +\examples{ + +# Disable/Add ArchR H5 Locking Globally +addArchRLocking(locking=FALSE) + +} diff --git a/man/addArchRLogging.Rd b/man/addArchRLogging.Rd index b72349fa..f4514fc7 100644 --- a/man/addArchRLogging.Rd +++ b/man/addArchRLogging.Rd @@ -12,3 +12,9 @@ addArchRLogging(useLogs = TRUE) \description{ This function will set ArchR logging } +\examples{ + +# Add ArchR Logging +addArchRLogging() + +} diff --git a/man/addArchRThreads.Rd b/man/addArchRThreads.Rd index ab93e8f5..4c2b6779 100644 --- a/man/addArchRThreads.Rd +++ b/man/addArchRThreads.Rd @@ -17,3 +17,9 @@ To bypass this, setting \code{force = TRUE} will use the number provided to \cod \description{ This function will set the number of threads to be used for parallel computing across all ArchR functions. } +\examples{ + +# Add ArchR Threads +addArchRThreads() + +} diff --git a/man/addArchRVerbose.Rd b/man/addArchRVerbose.Rd index dadc3508..5676708b 100644 --- a/man/addArchRVerbose.Rd +++ b/man/addArchRVerbose.Rd @@ -12,3 +12,9 @@ addArchRVerbose(verbose = TRUE) \description{ This function will set ArchR logging verbosity. } +\examples{ + +# Add ArchR Verbose +addArchRVerbose() + +} diff --git a/man/addBgdPeaks.Rd b/man/addBgdPeaks.Rd index 1a2c4c37..3e991fce 100644 --- a/man/addBgdPeaks.Rd +++ b/man/addBgdPeaks.Rd @@ -37,3 +37,12 @@ is to save this file in the \code{outputDirectory} of the \code{ArchRProject}.} \description{ This function will compute background peaks controlling for total accessibility and GC-content and add this information to an ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Background Peaks +proj <- addBgdPeaks(proj, force = TRUE) + +} diff --git a/man/addCellColData.Rd b/man/addCellColData.Rd index f37a47e3..019a08a8 100644 --- a/man/addCellColData.Rd +++ b/man/addCellColData.Rd @@ -29,3 +29,12 @@ already exists as a column name in \code{cellColData}.} \description{ This function adds new data to cellColData in a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Cell Column Data +addCellColData(proj, data = proj$TSSEnrichment, name = "TSS2", cells = getCellNames(proj)) + +} diff --git a/man/addClusters.Rd b/man/addClusters.Rd index f9b78ebc..f9b96e44 100644 --- a/man/addClusters.Rd +++ b/man/addClusters.Rd @@ -114,3 +114,12 @@ exists as a column name in \code{cellColData}.} \description{ This function will identify clusters from a reduced dimensions object in an ArchRProject or from a supplied reduced dimensions matrix. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Peak Annotations +proj <- addClusters(proj, force = TRUE) + +} diff --git a/man/addCoAccessibility.Rd b/man/addCoAccessibility.Rd index 881e697f..4ff15bc7 100644 --- a/man/addCoAccessibility.Rd +++ b/man/addCoAccessibility.Rd @@ -11,10 +11,11 @@ addCoAccessibility( scaleDims = NULL, corCutOff = 0.75, cellsToUse = NULL, + excludeChr = NULL, k = 100, knnIteration = 500, overlapCutoff = 0.8, - maxDist = 1e+05, + maxDist = 100000, scaleTo = 10^4, log2Norm = TRUE, seed = 1, @@ -40,6 +41,8 @@ sequencing depth that is greater than the \code{corCutOff}, it will be excluded \item{cellsToUse}{A character vector of cellNames to compute coAccessibility on if desired to run on a subset of the total cells.} +\item{excludeChr}{A character vector containing the \code{seqnames} of the chromosomes that should be excluded from this analysis.} + \item{k}{The number of k-nearest neighbors to use for creating single-cell groups for correlation analyses.} \item{knnIteration}{The number of k-nearest neighbor groupings to test for passing the supplied \code{overlapCutoff}.} @@ -66,3 +69,12 @@ of the seed used so that you can reproduce results downstream.} \description{ This function will add co-accessibility scores to peaks in a given ArchRProject } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Co-Accessibility +proj <- addCoAccessibility(proj, k = 20) + +} diff --git a/man/addDeviationsMatrix.Rd b/man/addDeviationsMatrix.Rd index 054260c2..aeff4fc3 100644 --- a/man/addDeviationsMatrix.Rd +++ b/man/addDeviationsMatrix.Rd @@ -52,3 +52,19 @@ already exists in the ArrowFiles associated with the given \code{ArchRProject}.} \description{ This function will compute peakAnnotation deviations for each ArrowFiles independently while controlling for global biases (low-memory requirement). } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Background Peaks +proj <- addBgdPeaks(proj, force = TRUE) + +# Add Motif Deviations +proj <- addDeviationsMatrix( + ArchRProj = proj, + peakAnnotation = "Motif", + force = TRUE +) + +} diff --git a/man/addDoubletScores.Rd b/man/addDoubletScores.Rd index 2d1e5bef..c7c03e25 100644 --- a/man/addDoubletScores.Rd +++ b/man/addDoubletScores.Rd @@ -71,3 +71,12 @@ For each sample in the ArrowFiles or ArchRProject provided, this function will i to each cell. This allows for removing strong heterotypic doublet-based clusters downstream. A doublet results from a droplet that contained two cells, causing the ATAC-seq data to be a mixture of the signal from each cell. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Doublet Scores for Small Project +proj <- addDoubletScores(proj, dimsToUse = 1:5, LSIParams = list(dimsToUse = 1:5, varFeatures=1000, iterations = 2)) + +} diff --git a/man/addFeatureCounts.Rd b/man/addFeatureCounts.Rd index 93ea8082..dc80932c 100644 --- a/man/addFeatureCounts.Rd +++ b/man/addFeatureCounts.Rd @@ -29,3 +29,12 @@ addFeatureCounts( \description{ This function will add total counts of scATAC cells in provided features into ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Feature Counts +proj <- addFeatureCounts(proj, features = getPeakSet(proj), name = 'ReadsInPeaks') + +} diff --git a/man/addFeatureMatrix.Rd b/man/addFeatureMatrix.Rd index 17b876ed..10627baf 100644 --- a/man/addFeatureMatrix.Rd +++ b/man/addFeatureMatrix.Rd @@ -42,3 +42,12 @@ downstream analyses when working with insertion counts.} \description{ This function for each sample will independently compute counts for each feature per cell in the provided ArchRProject or set of ArrowFiles. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Custom Matrix Which Is Just Peak Set +proj <- addFeatureMatrix(proj, features = getPeakSet(proj)) + +} diff --git a/man/addGeneExpressionMatrix.Rd b/man/addGeneExpressionMatrix.Rd index 06365823..f56b681a 100644 --- a/man/addGeneExpressionMatrix.Rd +++ b/man/addGeneExpressionMatrix.Rd @@ -38,7 +38,7 @@ for Seurat Objects (see \code{Seurat::as.SingleCellExperiment}). The provided va \item{parallelParam}{A list of parameters to be passed for biocparallel/batchtools parallel computing.} \item{strictMatch}{A boolean value indicating whether every cell in \code{input} must be represented in \code{seRNA}. If set to \code{FALSE}, -this and this \code{GeneExpressionMatrix} is used for certain downstream analyses such as \code{addIterativeLSI()}, then errors may occur +and this \code{GeneExpressionMatrix} is used for certain downstream analyses such as \code{addIterativeLSI()}, then errors may occur because not all cells will have relevant information.} \item{force}{A boolean value indicating whether to force the matrix indicated by \code{matrixName} to be overwritten if it already exist in the given \code{input}.} diff --git a/man/addGeneIntegrationMatrix.Rd b/man/addGeneIntegrationMatrix.Rd index dba7957d..722c1561 100644 --- a/man/addGeneIntegrationMatrix.Rd +++ b/man/addGeneIntegrationMatrix.Rd @@ -124,3 +124,31 @@ the assignment accuracy of the group in the RNA cells. Lower scores represent am This function, will integrate multiple subsets of scATAC cells with a scRNA experiment, compute matched scRNA profiles and then store this in each samples ArrowFile. } +\examples{ + +#Get Test Project +proj <- getTestProject() + +#Get RNA Matrix +sePBMC <- readRDS( + file.path(system.file("testdata", package = "ArchR"), "seRNA_PBMC.rds") +) + +#Gene Integration Matrix +proj <- addGeneIntegrationMatrix( + ArchRProj = proj, + useMatrix = "GeneScoreMatrix", + matrixName = "GeneIntegrationMatrix", + reducedDims = "IterativeLSI", + seRNA = sePBMC, + addToArrow = FALSE, + groupRNA = "CellType", + nameCell = "predictedCell_Un2", + nameGroup = "predictedGroup_Un2", + nameScore = "predictedScore_Un2", + dimsToUse = 1:10, + nGenes = 250, + force = TRUE +) + +} diff --git a/man/addGeneScoreMatrix.Rd b/man/addGeneScoreMatrix.Rd index 04e50485..cf27e559 100644 --- a/man/addGeneScoreMatrix.Rd +++ b/man/addGeneScoreMatrix.Rd @@ -9,13 +9,13 @@ addGeneScoreMatrix( genes = getGenes(input), geneModel = "exp(-abs(x)/5000) + exp(-1)", matrixName = "GeneScoreMatrix", - extendUpstream = c(1000, 1e+05), - extendDownstream = c(1000, 1e+05), + extendUpstream = c(1000, 100000), + extendDownstream = c(1000, 100000), geneUpstream = 5000, geneDownstream = 0, useGeneBoundaries = TRUE, useTSS = FALSE, - extendTSS = FALSE, + extendTSS = TRUE, tileSize = 500, ceiling = 4, geneScaleFactor = 5, @@ -43,7 +43,7 @@ should be a function of \code{x}, where \code{x} is the stranded distance from t activity score calculation.} \item{extendDownstream}{The minimum and maximum number of basepairs downstream of the transcription start site or transcription termination site -(based on 'useTSS') to consider for gene activity score calculation.} +(based on 'useTSS' and 'extendTSS') to consider for gene activity score calculation.} \item{geneUpstream}{An integer describing the number of bp upstream the gene to extend the gene body. This effectively makes the gene body larger as there are proximal peaks that should be weighted equally to the gene body. This parameter is used if 'useTSS=FALSE'.} @@ -89,3 +89,12 @@ biasing the geneScores for genes nearby that locus.} This function, for each sample, will independently compute counts for each tile per cell and then infer gene activity scores. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Gene Score Matrix With New Model +proj <- addGeneScoreMatrix(proj, matrixName = "GeneScoreMatrix2", geneModel = "exp(-abs(x)/10000) + exp(-1)") + +} diff --git a/man/addGroupCoverages.Rd b/man/addGroupCoverages.Rd index 3622b119..f2ea3e3d 100644 --- a/man/addGroupCoverages.Rd +++ b/man/addGroupCoverages.Rd @@ -15,6 +15,7 @@ addGroupCoverages( minReplicates = 2, maxReplicates = 5, sampleRatio = 0.8, + excludeChr = NULL, kmerLength = 6, threads = getArchRThreads(), returnGroups = FALSE, @@ -49,6 +50,8 @@ of excessively large files which would negatively impact memory requirements.} \item{sampleRatio}{The fraction of the total cells that can be sampled to generate any given pseudo-bulk replicate.} +\item{excludeChr}{A character vector containing the \code{seqnames} of the chromosomes that should be excluded from this analysis.} + \item{kmerLength}{The length of the k-mer used for estimating Tn5 bias.} \item{threads}{The number of threads to be used for parallel computing.} @@ -59,7 +62,7 @@ TileMatrix (\code{peakMethod = "Tiles"}).} \item{parallelParam}{A list of parameters to be passed for biocparallel/batchtools parallel computing.} -\item{force}{A boolean value that indicates whether or not to overwrite the relevant data in the \code{ArchRProject} object if +\item{force}{A boolean value that indicates whether or not to skip validation and overwrite the relevant data in the \code{ArchRProject} object if insertion coverage / pseudo-bulk replicate information already exists.} \item{verbose}{A boolean value that determines whether standard output includes verbose sections.} @@ -70,3 +73,12 @@ insertion coverage / pseudo-bulk replicate information already exists.} This function will merge cells within each designated cell group for the generation of pseudo-bulk replicates and then merge these replicates into a single insertion coverage file. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Group Coverages +proj <- addGroupCoverages(proj, force = TRUE) + +} diff --git a/man/addHarmony.Rd b/man/addHarmony.Rd index 260514d4..06e3f48d 100644 --- a/man/addHarmony.Rd +++ b/man/addHarmony.Rd @@ -48,3 +48,15 @@ exists as a column name in \code{cellColData}.} \description{ This function will add the Harmony batch-corrected reduced dimensions to an ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Confounder +proj <- addCellColData(proj, data = proj$TSSEnrichment > 10, name = "TSSQC", cells = getCellNames(proj)) + +# Run Harmony +proj <- addHarmony(proj, groupBy = "TSSQC") + +} diff --git a/man/addImputeWeights.Rd b/man/addImputeWeights.Rd index 0be5e36b..e06154c7 100644 --- a/man/addImputeWeights.Rd +++ b/man/addImputeWeights.Rd @@ -70,3 +70,12 @@ reproduce results downstream.} \description{ This function computes imputations weights that describe each cell as a linear combination of many cells based on a MAGIC diffusion matrix. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Impute Weights +proj <- addImputeWeights(proj) + +} diff --git a/man/addIterativeLSI.Rd b/man/addIterativeLSI.Rd index ac601e66..2b467c7f 100644 --- a/man/addIterativeLSI.Rd +++ b/man/addIterativeLSI.Rd @@ -26,9 +26,10 @@ addIterativeLSI( sampleCellsFinal = NULL, selectionMethod = "var", scaleTo = 10000, - totalFeatures = 5e+05, + totalFeatures = 500000, filterQuantile = 0.995, excludeChr = c(), + keep0lsi = FALSE, saveIterations = TRUE, UMAPParams = list(n_neighbors = 40, min_dist = 0.4, metric = "cosine", verbose = FALSE, fast_sgd = TRUE), @@ -108,6 +109,8 @@ insertion counts will be ignored for the first LSI iteration.} \item{excludeChr}{A string of chromosomes to exclude for iterativeLSI procedure.} +\item{keep0lsi}{A boolean whether to keep cells with no reads in features used for LSI.} + \item{saveIterations}{A boolean value indicating whether the results of each LSI iterations should be saved as compressed \code{.rds} files in the designated \code{outDir}.} @@ -131,3 +134,12 @@ reproduce results downstream.} \description{ This function will compute an iterative LSI dimensionality reduction on an ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Iterative LSI +proj <- addIterativeLSI(proj, dimsToUse = 1:5, varFeatures=1000, iterations = 2, force=TRUE) + +} diff --git a/man/addModuleScore.Rd b/man/addModuleScore.Rd index 2a27fe91..9814eda6 100644 --- a/man/addModuleScore.Rd +++ b/man/addModuleScore.Rd @@ -44,3 +44,34 @@ grouping of multiple features together into a single quantitative measurement. C function only works for modules derived from the \code{GeneScoreMatrix}. Each module is added as a new column in \code{cellColData} } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Module Score +proj <- addModuleScore(proj, useMatrix = "GeneScoreMatrix", nBin = 25, nBgd = 25, features = list(TScore = c('CD3D', 'CD3E'))) + +#Check +split(proj@cellColData$Module.TScore, proj@cellColData$CellType) \%>\% lapply(mean) \%>\% unlist +# B M T +# -4.352769 -8.438259 9.942678 + +#Get T cell Features +features <- getGenes() +T <- features[features$symbol \%in\% c("CD3D", "CD3E")] +B <- features[features$symbol \%in\% c("MS4A1")] + +# Add Module Score +proj <- addModuleScore(proj, useMatrix = "TileMatrix", nBin = 25, nBgd = 25, features = list(TScore = T, BScore = B)) + +#Check +split(proj@cellColData$Module.TScore, proj@cellColData$CellType) \%>\% lapply(mean) \%>\% unlist +# B M T +# -0.03866667 -0.05303030 0.10306122 + +split(proj@cellColData$Module.BScore, proj@cellColData$CellType) \%>\% lapply(mean) \%>\% unlist +# B M T +# 0.10000000 -0.03939394 -0.05387755 + +} diff --git a/man/addMonocleTrajectory.Rd b/man/addMonocleTrajectory.Rd index 3364de0c..c1554b13 100644 --- a/man/addMonocleTrajectory.Rd +++ b/man/addMonocleTrajectory.Rd @@ -31,3 +31,26 @@ addMonocleTrajectory( This function will add a trajectory from a monocle CDS created from \code{getMonocleTrajectories} to an ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +#Create Monocole Trajectory +cds <- getMonocleTrajectories( + ArchRProj = proj, + useGroups = c("C1", "C2", "C3"), + principalGroup = "C1", + groupBy = "Clusters", + embedding = "UMAP" +) + +# Add Monocole Trajectory +proj <- addMonocleTrajectory( + ArchRProj = proj, + name = "Trajectory_Monocole", + useGroups = c("C1", "C2", "C3"), + monocleCDS = cds +) + +} diff --git a/man/addMotifAnnotations.Rd b/man/addMotifAnnotations.Rd index 296da8cc..2d4fecdd 100644 --- a/man/addMotifAnnotations.Rd +++ b/man/addMotifAnnotations.Rd @@ -11,7 +11,7 @@ addMotifAnnotations( species = NULL, collection = "CORE", motifPWMs = NULL, - cutOff = 5e-05, + cutOff = 0.00005, width = 7, version = 2, force = FALSE, @@ -51,9 +51,18 @@ it already exists in the given \code{ArchRProject}.} \item{logFile}{The path to a file to be used for logging ArchR output.} -\item{...}{Additional parameters to be passed to \code{TFBSTools::getMatrixSet} for getting a PWM object.} +\item{...}{Additional parameters to be passed to \code{TFBSTools::getMatrixSet} for getting a JASPAR PWM object.} } \description{ This function adds information about which peaks contain motifs to a given ArchRProject. For each peak, a binary value is stored indicating whether each motif is observed within the peak region. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Motif Annotations +proj <- addMotifAnnotations(proj, motifSet = "cisbptest", annoName = "test") + +} diff --git a/man/addPeak2GeneLinks.Rd b/man/addPeak2GeneLinks.Rd index 891f9ff8..2e8dcb55 100644 --- a/man/addPeak2GeneLinks.Rd +++ b/man/addPeak2GeneLinks.Rd @@ -12,6 +12,7 @@ addPeak2GeneLinks( scaleDims = NULL, corCutOff = 0.75, cellsToUse = NULL, + excludeChr = NULL, k = 100, knnIteration = 500, overlapCutoff = 0.8, @@ -20,6 +21,8 @@ addPeak2GeneLinks( log2Norm = TRUE, predictionCutoff = 0.4, addEmpiricalPval = FALSE, + addPermutedPval = FALSE, + nperm = 100, seed = 1, threads = max(floor(getArchRThreads()/2), 1), verbose = TRUE, @@ -45,6 +48,8 @@ correlation to sequencing depth that is greater than the \code{corCutOff}, it wi \item{cellsToUse}{A character vector of cellNames to compute coAccessibility on if desired to run on a subset of the total cells.} +\item{excludeChr}{A character vector containing the \code{seqnames} of the chromosomes that should be excluded from this analysis.} + \item{k}{The number of k-nearest neighbors to use for creating single-cell groups for correlation analyses.} \item{knnIteration}{The number of k-nearest neighbor groupings to test for passing the supplied \code{overlapCutoff}.} @@ -63,6 +68,11 @@ from the \code{peakSet} of the \code{ArchRProject} and normalized to the total d \item{addEmpiricalPval}{Add empirical p-values based on randomly correlating peaks and genes not on the same seqname.} +\item{addPermutedPval}{Add permuted p-values based on shuffle sample correlating peaks and genes. This approach was adapted from +Regner et al 2021 "A multi-omic single-cell landscape of human gynecologic malignancies".} + +\item{nperm}{An integer representing the number of permutations to run for Regner et al 2021 approach.} + \item{seed}{A number to be used as the seed for random number generation required in knn determination. It is recommended to keep track of the seed used so that you can reproduce results downstream.} @@ -75,3 +85,15 @@ to keep track of the seed used so that you can reproduce results downstream.} \description{ This function will add peak-to-gene links to a given ArchRProject } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add P2G Links +proj <- addPeak2GeneLinks(proj, k = 20) + +# Get P2G Links +p2g <- getPeak2GeneLinks(proj) + +} diff --git a/man/addPeakAnnotations.Rd b/man/addPeakAnnotations.Rd index 72775a9f..45233572 100644 --- a/man/addPeakAnnotations.Rd +++ b/man/addPeakAnnotations.Rd @@ -28,3 +28,15 @@ if it already exists in the given \code{ArchRProject}.} This function adds information about which peaks contain input regions to a given ArchRProject. For each peak, a binary value is stored indicating whether each region is observed within the peak region. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Motif Positions Can Be Any Interval GRanges List +positions <- getPositions(proj) + +# Add Peak Annotations +proj <- addPeakAnnotations(proj, regions = positions) + +} diff --git a/man/addPeakMatrix.Rd b/man/addPeakMatrix.Rd index c58ecfa6..711b99f2 100644 --- a/man/addPeakMatrix.Rd +++ b/man/addPeakMatrix.Rd @@ -37,3 +37,12 @@ for downstream analyses when working with insertion counts.} This function, for each sample, will independently compute counts for each peak per cell in the provided ArchRProject using the "PeakMatrix". } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Peak Matrix +proj <- addPeakMatrix(proj) + +} diff --git a/man/addPeakSet.Rd b/man/addPeakSet.Rd index 81dcd3e6..ea07e8c2 100644 --- a/man/addPeakSet.Rd +++ b/man/addPeakSet.Rd @@ -25,3 +25,12 @@ whether or not to overwrite this \code{peakSet}.} \description{ This function adds a peak set as a GRanges object to a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add PeakSet +addPeakSet(proj, peakSet = getPeakSet(proj), force = TRUE) + +} diff --git a/man/addProjectSummary.Rd b/man/addProjectSummary.Rd index 9acf1d85..db668e8c 100644 --- a/man/addProjectSummary.Rd +++ b/man/addProjectSummary.Rd @@ -16,3 +16,12 @@ addProjectSummary(ArchRProj = NULL, name = NULL, summary = NULL) \description{ This function adds info to the projectSummary of an ArchRProject } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Project Summary +addProjectSummary(proj, name = "test", summary = "test successful") + +} diff --git a/man/addReproduciblePeakSet.Rd b/man/addReproduciblePeakSet.Rd index 7bd22e82..eef6b5d1 100644 --- a/man/addReproduciblePeakSet.Rd +++ b/man/addReproduciblePeakSet.Rd @@ -102,3 +102,15 @@ peak calling (not for MACS2). See \code{addGroupCoverages()} for more info.} This function will get insertions from coverage files, call peaks, and merge peaks to get a "Union Reproducible Peak Set". } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Peak Matrix Tiles +proj <- addReproduciblePeakSet(proj, peakMethod = "tiles") + +# Add Peak Matrix Macs2 (Preferred) +proj <- addReproduciblePeakSet(proj, peakMethod = "macs2") + +} diff --git a/man/addSampleColData.Rd b/man/addSampleColData.Rd index a15dfaba..768537e9 100644 --- a/man/addSampleColData.Rd +++ b/man/addSampleColData.Rd @@ -29,3 +29,12 @@ already exists as a column name in \code{sampleColData}.} \description{ This function adds new data to sampleColData in an ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Sample Column Data +addSampleColData(proj, data = 1, name = "Test", samples = "PBSmall") + +} diff --git a/man/addSlingShotTrajectories.Rd b/man/addSlingShotTrajectories.Rd index 037a6ce0..0167b76f 100644 --- a/man/addSlingShotTrajectories.Rd +++ b/man/addSlingShotTrajectories.Rd @@ -41,3 +41,19 @@ in \code{cellColData}. This limits the groups used to identify trajectories.} This function will fit a supervised trajectory in a lower dimensional space that can then be used for downstream analyses. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +#Add SlingShot Trajectory +proj <- addSlingShotTrajectories( + ArchRProj = proj, + name = "Trajectory_SlingShot", + useGroups = c("C1", "C2", "C3"), + principalGroup = "C1", + groupBy = "Clusters", + embedding = "UMAP" +) + +} diff --git a/man/addTSNE.Rd b/man/addTSNE.Rd index 4697a3f3..8c9a6ac2 100644 --- a/man/addTSNE.Rd +++ b/man/addTSNE.Rd @@ -64,3 +64,12 @@ reproduce results downstream.} \description{ This function will compute a TSNE embedding and add it to an ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add UMAP for Small Project +proj <- addTSNE(proj, force = TRUE) + +} diff --git a/man/addTileMatrix.Rd b/man/addTileMatrix.Rd index b1e71406..3096d32c 100644 --- a/man/addTileMatrix.Rd +++ b/man/addTileMatrix.Rd @@ -43,3 +43,12 @@ is to retrieve this from the \code{ArchRProject} using \code{getBlacklist()}.} \description{ This function, for each sample, will independently compute counts for each tile } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Tile Matrix +proj <- addTileMatrix(proj, force = TRUE, tileSize = 25000) + +} diff --git a/man/addTrajectory.Rd b/man/addTrajectory.Rd index 1ac0b760..7d518071 100644 --- a/man/addTrajectory.Rd +++ b/man/addTrajectory.Rd @@ -59,3 +59,12 @@ is above the provided quantile will be excluded.} This function will fit a supervised trajectory in a lower dimensional space that can then be used for downstream analyses. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +#Add Trajectory +proj <- addTrajectory(proj, trajectory = c("C1", "C2", "C3"), embedding = "UMAP", force = TRUE) + +} diff --git a/man/addUMAP.Rd b/man/addUMAP.Rd index 00ca5503..f72cca57 100644 --- a/man/addUMAP.Rd +++ b/man/addUMAP.Rd @@ -72,3 +72,12 @@ reproduce results downstream.} \description{ This function will compute a UMAP embedding and add it to an ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add UMAP for Small Project +proj <- addUMAP(proj, force = TRUE) + +} diff --git a/man/confusionMatrix.Rd b/man/confusionMatrix.Rd index a56920e9..987f3fcf 100644 --- a/man/confusionMatrix.Rd +++ b/man/confusionMatrix.Rd @@ -14,3 +14,15 @@ confusionMatrix(i = NULL, j = NULL) \description{ This function creates a confusion matrix based on two value vectors. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Overlap of Clusters and CellType +confusionMatrix(proj$Clusters, proj$CellType) + +# Overlap of Cell Type and RNA Predict +confusionMatrix(proj$CellType, proj$predictedGroup_Un) + +} diff --git a/man/correlateMatrices.Rd b/man/correlateMatrices.Rd index 6adb8515..4a8716e3 100644 --- a/man/correlateMatrices.Rd +++ b/man/correlateMatrices.Rd @@ -79,3 +79,18 @@ of the seed used so that you can reproduce results downstream.} \description{ This function will correlate 2 matrices within an ArchRProject by name matching. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Correlate Matrices +dfCor <- correlateMatrices( + ArchRProj = proj, + useMatrix1 = "GeneScoreMatrix", + useMatrix2 = "GeneIntegrationMatrix", + dimsToUse = 1:5, + k = 20 +) + +} diff --git a/man/correlateTrajectories.Rd b/man/correlateTrajectories.Rd index f824e7d3..0d080c45 100644 --- a/man/correlateTrajectories.Rd +++ b/man/correlateTrajectories.Rd @@ -62,3 +62,19 @@ computing correlations.} \description{ This function will correlate 2 trajectory matrices from getTrajectory. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +#Add Trajectory +proj <- addTrajectory(proj, trajectory = c("C1", "C2", "C3"), embedding = "UMAP", force = TRUE) + +#Get Trajectories +seTraj1 <- getTrajectory(proj, useMatrix = "GeneScoreMatrix") +seTraj2 <- getTrajectory(proj, useMatrix = "GeneIntegrationMatrix") + +#Correlate +corTraj <- correlateTrajectories(seTraj1, seTraj2, corCutOff = 0.35, varCutOff1 = 0.6, varCutOff2 = 0.6) + +} diff --git a/man/createArrowFiles.Rd b/man/createArrowFiles.Rd index f1bd3018..be28b415 100644 --- a/man/createArrowFiles.Rd +++ b/man/createArrowFiles.Rd @@ -13,7 +13,7 @@ createArrowFiles( genomeAnnotation = getGenomeAnnotation(), minTSS = 4, minFrags = 1000, - maxFrags = 1e+05, + maxFrags = 100000, minFragSize = 10, maxFragSize = 2000, QCDir = "QualityControl", @@ -140,3 +140,19 @@ ATAC-seq signal proximal to the TSS to estimate gene activity.} \description{ This function will create ArrowFiles from input files. These ArrowFiles are the main constituent for downstream analysis in ArchR. } +\examples{ + +# Get Test Fragments +fragments <- getTestFragments() + +# Create Arrow Files +arrowFiles <- createArrowFiles( + inputFiles = fragments, + sampleNames = "PBSmall", + minFrags = 100, + nChunk = 1, + TileMatParams=list(tileSize=10000), + force = TRUE +) + +} diff --git a/man/createGeneAnnotation.Rd b/man/createGeneAnnotation.Rd index a4fd536e..5309a3e6 100644 --- a/man/createGeneAnnotation.Rd +++ b/man/createGeneAnnotation.Rd @@ -11,7 +11,8 @@ createGeneAnnotation( genes = NULL, exons = NULL, TSS = NULL, - annoStyle = NULL + annoStyle = NULL, + singleStrand = FALSE ) } \arguments{ @@ -31,7 +32,29 @@ For example, from \code{orgdb <- org.Hs.eg.db}.} \item{TSS}{A \code{GRanges} object containing standed transcription start site coordinates for computing TSS enrichment scores downstream.} \item{annoStyle}{annotation style to map between gene names and various gene identifiers e.g. "ENTREZID", "ENSEMBL".} + +\item{singleStrand}{A boolean for GenomicFeatures::genes(\code{single.strand.genes.only}) parameter} } \description{ This function will create a gene annotation object that can be used for creating ArrowFiles or an ArchRProject, etc. } +\examples{ + +if (!require("TxDb.Hsapiens.UCSC.hg19.knownGene", quietly = TRUE)) BiocManager::install("TxDb.Hsapiens.UCSC.hg19.knownGene") +if (!require("org.Hs.eg.db", quietly = TRUE)) BiocManager::install("org.Hs.eg.db") +library(TxDb.Hsapiens.UCSC.hg19.knownGene) +library(org.Hs.eg.db) + +# Get Txdb +TxDb <- TxDb.Hsapiens.UCSC.hg19.knownGene + +# Get OrgDb +OrgDb <- org.Hs.eg.db + +# Create Genome Annotation +geneAnno <- createGeneAnnotation(TxDb=TxDb, OrgDb=OrgDb) + +# Also can create from a string if BSgenome exists +geneAnno <- createGeneAnnotation("hg19") + +} diff --git a/man/createGenomeAnnotation.Rd b/man/createGenomeAnnotation.Rd index e52160c0..1a39a28d 100644 --- a/man/createGenomeAnnotation.Rd +++ b/man/createGenomeAnnotation.Rd @@ -29,3 +29,18 @@ non-standard chromosomes will still be removed as defined in \code{filterChrGR() \description{ This function will create a genome annotation object that can be used for creating ArrowFiles or an ArchRProject, etc. } +\examples{ + +if (!require("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") +library(BSgenome.Hsapiens.UCSC.hg19) + +# Get Genome +genome <- BSgenome.Hsapiens.UCSC.hg19 + +# Create Genome Annotation +genomeAnno <- createGenomeAnnotation(genome) + +# Also can create from a string if BSgenome exists +genomeAnno <- createGenomeAnnotation("hg19") + +} diff --git a/man/createLogFile.Rd b/man/createLogFile.Rd index 1c46c0e6..0ee8cd79 100644 --- a/man/createLogFile.Rd +++ b/man/createLogFile.Rd @@ -15,3 +15,9 @@ createLogFile(name = NULL, logDir = "ArchRLogs", useLogs = getArchRLogging()) This function will create a log file for ArchR functions. If ArchRLogging is not TRUE this function will return NULL. } +\examples{ + +# Create Log File +createLogFile(name = "test") + +} diff --git a/man/customEnrichment.Rd b/man/customEnrichment.Rd new file mode 100644 index 00000000..4bed553d --- /dev/null +++ b/man/customEnrichment.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AnnotationPeaks.R +\name{customEnrichment} +\alias{customEnrichment} +\title{Hypergeometric Enrichment in input peak ranges.} +\usage{ +customEnrichment(ranges = NULL, matches = NULL, bgdPeaks = NULL) +} +\arguments{ +\item{ranges}{A \code{GenomicRanges} object of peaks/regions to overlap with peaks.} + +\item{matches}{A custom \code{peakAnnotation} matches object used as input for the hypergeometric test. See +\code{motifmatchr::matchmotifs()} for additional information.} + +\item{bgdPeaks}{A \code{SummarizedExperiment} of background peaks from \code{getBgdPeaks} can be NULL for using all peaks.} +} +\description{ +This function will perform hypergeometric enrichment of a given peak matches object and ranges. +} +\examples{ +#Project +proj <- getTestProject() + +#Get Peaks +peaks <- getPeakSet(proj) + +#Custom C1 Mono +peaks1 <- peaks[names(peaks)=="C1"] + +#All Peaks +customEnrichment( + ranges = peaks1, + matches = getMatches(proj) +) +# feature CompareFrequency nCompare CompareProportion BackgroundFrequency +# CEBPB_1 CEBPB_1 70 635 0.11023622 122 +# CEBPA_2 CEBPA_2 81 635 0.12755906 156 +# IRF4_4 IRF4_4 103 635 0.16220472 276 +# EOMES_6 EOMES_6 36 635 0.05669291 120 +# ETS1_3 ETS1_3 38 635 0.05984252 149 +# PAX5_5 PAX5_5 23 635 0.03622047 124 +# nBackground BackgroundProporition Enrichment mlog10p mlog10Padj +# CEBPB_1 2142 0.05695612 1.9354589 10.3279 9.373657 +# CEBPA_2 2142 0.07282913 1.7514839 8.9394 7.985157 +# IRF4_4 2142 0.12885154 1.2588497 2.6938 1.739557 +# EOMES_6 2142 0.05602241 1.0119685 0.3001 0.000000 +# ETS1_3 2142 0.06956116 0.8602864 0.0487 0.000000 +# PAX5_5 2142 0.05788982 0.6256795 0.0006 0.000000 + +#Background Peaks +customEnrichment( + ranges = peaks1, + matches = getMatches(proj), + bgdPeaks = getBgdPeaks(proj, force=TRUE) +) +# feature CompareFrequency nCompare CompareProportion BackgroundFrequency +# CEBPB_1 CEBPB_1 70 635 0.11023622 2459 +# CEBPA_2 CEBPA_2 81 635 0.12755906 3154 +# IRF4_4 IRF4_4 103 635 0.16220472 4836 +# ETS1_3 ETS1_3 38 635 0.05984252 1781 +# EOMES_6 EOMES_6 36 635 0.05669291 1975 +# PAX5_5 PAX5_5 23 635 0.03622047 1495 +# nBackground BackgroundProporition Enrichment mlog10p mlog10Padj +# CEBPB_1 32385 0.07593021 1.4518097 2.9544 2.000157 +# CEBPA_2 32385 0.09739077 1.3097654 2.1341 1.179857 +# IRF4_4 32385 0.14932839 1.0862283 0.7142 0.000000 +# ETS1_3 32385 0.05499460 1.0881527 0.4975 0.000000 +# EOMES_6 32385 0.06098502 0.9296203 0.1551 0.000000 +# PAX5_5 32385 0.04616335 0.7846154 0.0422 0.000000 +# +} diff --git a/man/exportPeakMatrixForSTREAM.Rd b/man/exportPeakMatrixForSTREAM.Rd index d8585568..aad27451 100644 --- a/man/exportPeakMatrixForSTREAM.Rd +++ b/man/exportPeakMatrixForSTREAM.Rd @@ -27,3 +27,12 @@ exportPeakMatrixForSTREAM( \description{ This function gets a PeakMatrix from an \code{ArchRProject} and writes it to a set of files for STREAM (https://github.com/pinellolab/STREAM) } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Export Matrix For Stream +exportPeakMatrixForSTREAM(proj) + +} diff --git a/man/extendGR.Rd b/man/extendGR.Rd index 6b161bed..8e2718ea 100644 --- a/man/extendGR.Rd +++ b/man/extendGR.Rd @@ -16,3 +16,19 @@ extendGR(gr = NULL, upstream = NULL, downstream = NULL) \description{ This function extends each region in a Genomic Ranges object by a designated upstream and downstream extension in a strand-aware fashion } +\examples{ + +# Dummy GR +gr <- GRanges( + seqnames = "chr1", + ranges = IRanges( + start = c(1, 4, 11), + end = c(10, 12, 20) + ), + score = c(1, 2, 3) +) + +# Non Overlapping +extendGR(gr, 1, 2) + +} diff --git a/man/filterChrGR.Rd b/man/filterChrGR.Rd index 07aad629..68adc22a 100644 --- a/man/filterChrGR.Rd +++ b/man/filterChrGR.Rd @@ -30,3 +30,12 @@ to be removed before the seqlevels can be dropped. Four pruning modes are curren \description{ This function allows for removal of manually designated or more broadly undesirable seqlevels from a Genomic Ranges object or similar object } +\examples{ + +# Add ArchR Genome +addArchRGenome("hg19test2") + +# Filter Chr +filterChrGR(getChromSizes(), remove = "chr5") + +} diff --git a/man/filterDoublets.Rd b/man/filterDoublets.Rd index 269c5c5f..051da595 100644 --- a/man/filterDoublets.Rd +++ b/man/filterDoublets.Rd @@ -28,3 +28,15 @@ The higher the \code{filterRatio}, the greater the number of cells potentially r \description{ This function will filter doublets from an ArchRProject after addDoubletScores() has been run. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Doublet Scores for Small Project +proj <- addDoubletScores(proj, dimsToUse = 1:5, LSIParams = list(dimsToUse = 1:5, varFeatures=1000, iterations = 2)) + +# Filter Doublets (Since Low Cells filterRatio has to be high before removing 1 cell!) +proj <- filterDoublets(proj, filterRatio=10) + +} diff --git a/man/findMacs2.Rd b/man/findMacs2.Rd index 551b7eb2..8763e98b 100644 --- a/man/findMacs2.Rd +++ b/man/findMacs2.Rd @@ -9,3 +9,9 @@ findMacs2() \description{ This function attempts to find the path to the MACS2 executable by serting the path and python's pip. } +\examples{ + +# Get Macs2 +findMacs2() + +} diff --git a/man/getArchRChrPrefix.Rd b/man/getArchRChrPrefix.Rd index 58d93982..0443046d 100644 --- a/man/getArchRChrPrefix.Rd +++ b/man/getArchRChrPrefix.Rd @@ -9,3 +9,9 @@ getArchRChrPrefix() \description{ This function will get the default requirement of chromosomes to have a "chr" prefix. } +\examples{ + +# Get ArchR Chr Prefix +getArchRChrPrefix() + +} diff --git a/man/getArchRDebugging.Rd b/man/getArchRDebugging.Rd index b0ea6d56..bb4825b7 100644 --- a/man/getArchRDebugging.Rd +++ b/man/getArchRDebugging.Rd @@ -9,3 +9,9 @@ getArchRDebugging() \description{ This function will get ArchR Debugging which will save an RDS if an error is encountered. } +\examples{ + +# Get ArchR Debugging +getArchRDebugging() + +} diff --git a/man/getArchRGenome.Rd b/man/getArchRGenome.Rd index 799a909b..38b0f3f5 100644 --- a/man/getArchRGenome.Rd +++ b/man/getArchRGenome.Rd @@ -19,3 +19,9 @@ This function is not meant to be run with both \code{geneAnnotation} and \code{g This function will retrieve the genome that is currently in use by ArchR. Alternatively, this function can return either the \code{geneAnnotation} or the \code{genomeAnnotation} associated with the globally defined genome if desired. } +\examples{ + +# Get ArchR Genome to use globally +getArchRGenome() + +} diff --git a/man/getArchRH5Level.Rd b/man/getArchRH5Level.Rd new file mode 100644 index 00000000..e13fda8f --- /dev/null +++ b/man/getArchRH5Level.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/GlobalDefaults.R +\name{getArchRH5Level} +\alias{getArchRH5Level} +\title{Get globally-applied compression level for h5 files} +\usage{ +getArchRH5Level() +} +\description{ +This function will get the default compression level to be used for h5 file execution across all ArchR functions. +} +\examples{ + +# Get ArchR H5 Compression level +getArchRH5Level() + +} diff --git a/man/getArchRLogging.Rd b/man/getArchRLogging.Rd index dfbdf2b5..8fe01b06 100644 --- a/man/getArchRLogging.Rd +++ b/man/getArchRLogging.Rd @@ -9,3 +9,9 @@ getArchRLogging() \description{ This function will get ArchR logging } +\examples{ + +# Get ArchR Logging +getArchRLogging() + +} diff --git a/man/getArchRThreads.Rd b/man/getArchRThreads.Rd index 0ae1a620..aba9f69e 100644 --- a/man/getArchRThreads.Rd +++ b/man/getArchRThreads.Rd @@ -9,3 +9,9 @@ getArchRThreads() \description{ This function will get the number of threads to be used for parallel execution across all ArchR functions. } +\examples{ + +# Get ArchR Threads +getArchRThreads() + +} diff --git a/man/getArchRVerbose.Rd b/man/getArchRVerbose.Rd index 0ed46322..04f6e42a 100644 --- a/man/getArchRVerbose.Rd +++ b/man/getArchRVerbose.Rd @@ -9,3 +9,9 @@ getArchRVerbose() \description{ This function will get ArchR logging verbosity. } +\examples{ + +# Get ArchR Verbose +addArchRVerbose() + +} diff --git a/man/getArrowFiles.Rd b/man/getArrowFiles.Rd index f225f936..29ab82b7 100644 --- a/man/getArrowFiles.Rd +++ b/man/getArrowFiles.Rd @@ -12,3 +12,12 @@ getArrowFiles(ArchRProj = NULL) \description{ This function gets the names of all ArrowFiles associated with a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Arrow Files +getArrowFiles(proj) + +} diff --git a/man/getAvailableMatrices.Rd b/man/getAvailableMatrices.Rd index 706cf739..2a4f4096 100644 --- a/man/getAvailableMatrices.Rd +++ b/man/getAvailableMatrices.Rd @@ -12,3 +12,12 @@ getAvailableMatrices(ArchRProj = NULL) \description{ This function gets the available matrices from the ArrowFiles in a given ArchRProject object. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Available Matrices in ArchR Project +getAvailableMatrices(proj) + +} diff --git a/man/getBgdPeaks.Rd b/man/getBgdPeaks.Rd index 1378b3ec..134cdf92 100644 --- a/man/getBgdPeaks.Rd +++ b/man/getBgdPeaks.Rd @@ -33,3 +33,12 @@ so that you can reproduce results downstream.} \description{ This function will get/compute background peaks controlling for total accessibility and GC-content from an ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Background Peaks +bgdPeaks <- getBgdPeaks(proj, force = TRUE) + +} diff --git a/man/getBlacklist.Rd b/man/getBlacklist.Rd index e9e9a442..e2dcafad 100644 --- a/man/getBlacklist.Rd +++ b/man/getBlacklist.Rd @@ -12,3 +12,15 @@ getBlacklist(ArchRProj = NULL) \description{ This function gets the blacklist (the regions to be excluded from analysis) as a GRanges object from the genomeAnnotation of a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Blacklist in ArchRProj +getBlacklist(proj) + +# Get Blacklist loaded globally +getBlacklist() + +} diff --git a/man/getCellColData.Rd b/man/getCellColData.Rd index 378fd207..518499a0 100644 --- a/man/getCellColData.Rd +++ b/man/getCellColData.Rd @@ -16,3 +16,12 @@ getCellColData(ArchRProj = NULL, select = NULL, drop = FALSE) \description{ This function gets the cellColData from a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Cell Column Data +getCellColData(proj) + +} diff --git a/man/getCellNames.Rd b/man/getCellNames.Rd index 1ecbe4b1..9df8039c 100644 --- a/man/getCellNames.Rd +++ b/man/getCellNames.Rd @@ -12,3 +12,12 @@ getCellNames(ArchRProj = NULL) \description{ This function gets the cellNames from a given ArchRProject object. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Cell Names +getCellNames(proj) + +} diff --git a/man/getChromLengths.Rd b/man/getChromLengths.Rd index 3911630c..0b28a406 100644 --- a/man/getChromLengths.Rd +++ b/man/getChromLengths.Rd @@ -12,3 +12,15 @@ getChromLengths(ArchRProj = NULL) \description{ This function gets the chromosome lengths as a vector from the genomeAnnotation of a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get chromLengths in ArchRProj +getChromLengths(proj) + +# Get chromLengths loaded globally +getChromLengths() + +} diff --git a/man/getChromSizes.Rd b/man/getChromSizes.Rd index c1897c3a..7d98cc5b 100644 --- a/man/getChromSizes.Rd +++ b/man/getChromSizes.Rd @@ -12,3 +12,15 @@ getChromSizes(ArchRProj = NULL) \description{ This function gets the chromosome lengths as a GRanges object from the genomeAnnotation of a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get chromSizes in ArchRProj +getChromSizes(proj) + +# Get chromSizes loaded globally +getChromSizes() + +} diff --git a/man/getCoAccessibility.Rd b/man/getCoAccessibility.Rd index b97163e2..fa2ae8c9 100644 --- a/man/getCoAccessibility.Rd +++ b/man/getCoAccessibility.Rd @@ -25,3 +25,15 @@ the \code{ArchRBrowser()} or as an \code{ArchRBrowserTrack()}.} \description{ This function obtains co-accessibility data from an ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Co Accessibility +proj <- addCoAccessibility(proj, k = 20) + +# Get Co Accessibility +CoA <- getCoAccessibility(proj) + +} diff --git a/man/getEmbedding.Rd b/man/getEmbedding.Rd index c08b3ca5..456f26b9 100644 --- a/man/getEmbedding.Rd +++ b/man/getEmbedding.Rd @@ -18,3 +18,12 @@ the full embedding object.} \description{ This function gets an embedding (i.e. UMAP) from a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get UMAP +getEmbedding(proj, embedding = "UMAP") + +} diff --git a/man/getExons.Rd b/man/getExons.Rd index 60cfb17d..bf927e9f 100644 --- a/man/getExons.Rd +++ b/man/getExons.Rd @@ -14,3 +14,15 @@ getExons(ArchRProj = NULL, symbols = NULL) \description{ This function gets the exons coordinates as a GRanges object from the geneAnnotation of a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Exons in ArchRProj +getExons(proj) + +# Get Exons globally +getExons() + +} diff --git a/man/getFeatures.Rd b/man/getFeatures.Rd index a9112e33..10c8dac4 100644 --- a/man/getFeatures.Rd +++ b/man/getFeatures.Rd @@ -16,7 +16,7 @@ getFeatures( \item{useMatrix}{The name of the data matrix as stored in the ArrowFiles of the \code{ArchRProject}. Options include "TileMatrix", "GeneScoreMatrix", etc.} -\item{select}{A string specifying a specific feature name (or rowname) to be found with \code{grep}.} +\item{select}{A string specifying a specific feature name (or rowname) to be found with \code{grep} or granges to overlap.} \item{ignoreCase}{A boolean value indicating whether to ignore the case (upper-case / lower-case) when searching via grep for the string passed to \code{select}.} } @@ -24,3 +24,12 @@ getFeatures( This function will identify available features from a given data matrix (i.e. "GeneScoreMatrix", or "TileMatrix") and return them for downstream plotting utilities. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Features +getFeatures(proj, useMatrix = "GeneScoreMatrix", select = 'CD3') + +} diff --git a/man/getFootprints.Rd b/man/getFootprints.Rd index 232a1fb0..af2adcf0 100644 --- a/man/getFootprints.Rd +++ b/man/getFootprints.Rd @@ -46,3 +46,15 @@ object will be considered for the footprint.} \description{ This function will get footprints for all samples in a given ArchRProject and return a summarized experiment object that can be used for downstream analyses } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Motif Positions +positions <- getPositions(proj) + +# Get Footprints +seFoot <- getFootprints(ArchRProj = proj, positions = positions, groupBy = "Clusters", minCells = 10) + +} diff --git a/man/getFragmentsFromArrow.Rd b/man/getFragmentsFromArrow.Rd index e8cb1c75..c83a8336 100644 --- a/man/getFragmentsFromArrow.Rd +++ b/man/getFragmentsFromArrow.Rd @@ -28,3 +28,12 @@ from the provided ArrowFile using \code{getCellNames()}.} \description{ This function retrieves the fragments from a given ArrowFile as a GRanges object. } +\examples{ + +#Get Test Arrow +arrow <- getTestArrow() + +# Get Fragments +frags <- getFragmentsFromArrow(arrow) + +} diff --git a/man/getFragmentsFromProject.Rd b/man/getFragmentsFromProject.Rd index 15ecf282..5645a2fa 100644 --- a/man/getFragmentsFromProject.Rd +++ b/man/getFragmentsFromProject.Rd @@ -28,3 +28,12 @@ from the provided ArrowFile using \code{getCellNames()}.} \description{ This function retrieves the fragments from a given ArchRProject as a GRangesList object. } +\examples{ + +#Get Test Project +proj <- getTestProject() + +# Get Fragments +frags <- getFragmentsFromProject(proj) + +} diff --git a/man/getGeneAnnotation.Rd b/man/getGeneAnnotation.Rd index 6555ce69..eae5338b 100644 --- a/man/getGeneAnnotation.Rd +++ b/man/getGeneAnnotation.Rd @@ -12,3 +12,15 @@ getGeneAnnotation(ArchRProj = NULL) \description{ This function gets the geneAnnotation from a given ArchRProject } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Gene Annotation in ArchRProj +getGeneAnnotation(proj) + +# Get Gene Annotation loaded globally +getGeneAnnotation() + +} diff --git a/man/getGenes.Rd b/man/getGenes.Rd index e33a3ced..03ba390d 100644 --- a/man/getGenes.Rd +++ b/man/getGenes.Rd @@ -14,3 +14,15 @@ getGenes(ArchRProj = NULL, symbols = NULL) \description{ This function gets the genes start and end coordinates as a GRanges object from the geneAnnotation of a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Genes in ArchRProj +getGenes(proj) + +# Get Genes globally +getGenes() + +} diff --git a/man/getGenome.Rd b/man/getGenome.Rd index f9527b19..2606e002 100644 --- a/man/getGenome.Rd +++ b/man/getGenome.Rd @@ -12,3 +12,15 @@ getGenome(ArchRProj = NULL) \description{ This function gets the name of the genome from the genomeAnnotation used by a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Genome in ArchRProj +getGenome(proj) + +# Get Genome loaded globally +getGenome() + +} diff --git a/man/getGenomeAnnotation.Rd b/man/getGenomeAnnotation.Rd index bb2892f7..53e4584a 100644 --- a/man/getGenomeAnnotation.Rd +++ b/man/getGenomeAnnotation.Rd @@ -12,3 +12,15 @@ getGenomeAnnotation(ArchRProj = NULL) \description{ This function gets the genomeAnnotation from a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Genome Annotation in ArchRProj +getGenomeAnnotation(proj) + +# Get Genome Annotation loaded globally +getGenomeAnnotation() + +} diff --git a/man/getGroupBW.Rd b/man/getGroupBW.Rd index f90166fb..c93aa62a 100644 --- a/man/getGroupBW.Rd +++ b/man/getGroupBW.Rd @@ -42,3 +42,12 @@ is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth \description{ This function will group, summarize and export a bigwig for each group in an ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Group BW +bw <- getGroupBW(proj, groupBy = "Clusters") + +} diff --git a/man/getGroupFragments.Rd b/man/getGroupFragments.Rd new file mode 100644 index 00000000..2c5df16e --- /dev/null +++ b/man/getGroupFragments.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/GroupExport.R +\name{getGroupFragments} +\alias{getGroupFragments} +\title{Export Group Fragment Files} +\usage{ +getGroupFragments( + ArchRProj = NULL, + groupBy = "Clusters", + threads = getArchRThreads(), + logFile = createLogFile("getGroupFragments") +) +} +\arguments{ +\item{ArchRProj}{An \code{ArchRProject} object.} + +\item{groupBy}{A string that indicates how cells should be grouped. This string corresponds to one of the standard or +user-supplied \code{cellColData} metadata columns (for example, "Clusters"). Cells with the same value annotated in this metadata +column will be grouped together and their fragments exported to \code{outputDirectory}/GroupFragments.} + +\item{threads}{An integer specifying the number of threads for parallel.} + +\item{logFile}{The path to a file to be used for logging ArchR output.} +} +\description{ +This function will group export fragment files for each group in an ArchRProject. +} +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Group BW +frags <- getGroupFragments(proj, groupBy = "Clusters") + +} diff --git a/man/getGroupSE.Rd b/man/getGroupSE.Rd index 0759a4e4..d67e6dc2 100644 --- a/man/getGroupSE.Rd +++ b/man/getGroupSE.Rd @@ -35,3 +35,12 @@ getGroupSE( \description{ This function will group, summarize and export a summarized experiment for a assay in a ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Group SE +se <- getGroupSE(proj, useMatrix = "PeakMatrix", groupBy = "Clusters") + +} diff --git a/man/getGroupSummary.Rd b/man/getGroupSummary.Rd index 2c9c24b9..96e25d3f 100644 --- a/man/getGroupSummary.Rd +++ b/man/getGroupSummary.Rd @@ -26,3 +26,12 @@ getGroupSummary( \description{ This function summarizes a numeric cellColData entry across groupings in a ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Group Summary +getGroupSummary(proj, groupBy = "Clusters", select = "TSSEnrichment", summary = "mean") + +} diff --git a/man/getImputeWeights.Rd b/man/getImputeWeights.Rd index 2ad4864e..34ec0331 100644 --- a/man/getImputeWeights.Rd +++ b/man/getImputeWeights.Rd @@ -12,3 +12,15 @@ getImputeWeights(ArchRProj = NULL) \description{ This function gets imputation weights from an ArchRProject to impute numeric values. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Impute Weights +proj <- addImputeWeights(proj) + +# Get Impute Weights +getImputeWeights(proj) + +} diff --git a/man/getMarkerFeatures.Rd b/man/getMarkerFeatures.Rd index 5961795c..af34a116 100644 --- a/man/getMarkerFeatures.Rd +++ b/man/getMarkerFeatures.Rd @@ -20,6 +20,7 @@ getMarkerFeatures( bufferRatio = 0.8, binarize = FALSE, useSeqnames = NULL, + closest = FALSE, verbose = TRUE, logFile = createLogFile("getMarkerFeatures") ) @@ -69,6 +70,9 @@ the stringency of also maintaining the group proportions from \code{bgdGroups}.} deviations, the \code{seqnames} do not correspond to chromosomes, rather they correspond to the sub-portions of the matrix, for example raw deviations ("deviations") or deviation z-scores ("z") for a chromVAR deviations matrix.} +\item{closest}{A boolean value that indicated whether to use closest cells from foreground and background instead of random sampling +of the foreground cells.} + \item{verbose}{A boolean value that determines whether standard output is printed.} \item{logFile}{The path to a file to be used for logging ArchR output.} @@ -76,3 +80,17 @@ raw deviations ("deviations") or deviation z-scores ("z") for a chromVAR deviati \description{ This function will identify features that are definitional of each provided cell grouping where possible } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Markers +seMarker <- getMarkerFeatures( + ArchRProj = proj, + useMatrix = "PeakMatrix", + testMethod = "binomial", + binarize = TRUE +) + +} diff --git a/man/getMarkers.Rd b/man/getMarkers.Rd index 6bf0fc3f..b84c12b5 100644 --- a/man/getMarkers.Rd +++ b/man/getMarkers.Rd @@ -24,3 +24,20 @@ of the \code{assayNames} from \code{seMarker}.} \description{ This function will identify Markers and return a List of Features or a GRangesList for each group of significant marker features. } +\examples{ + +#Get Test Project +proj <- getTestProject() + +#Get Markers +seMarker <- getMarkerFeatures( + ArchRProj = proj, + useMatrix = "PeakMatrix", + testMethod = "binomial", + binarize = TRUE +) + +#Get Markers +getMarkers(seMarker) + +} diff --git a/man/getMatches.Rd b/man/getMatches.Rd index 36e369ab..86f18ada 100644 --- a/man/getMatches.Rd +++ b/man/getMatches.Rd @@ -17,3 +17,12 @@ getMatches(ArchRProj = NULL, name = NULL, annoName = NULL) This function gets peak annotation matches from a given ArchRProject. The peaks in the returned object are in the same order as the peaks returned by \code{getPeakSet()}. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Annotation Matches +matches <- getMatches(proj) + +} diff --git a/man/getMatrixFromArrow.Rd b/man/getMatrixFromArrow.Rd index 302302ff..8168fde0 100644 --- a/man/getMatrixFromArrow.Rd +++ b/man/getMatrixFromArrow.Rd @@ -8,6 +8,7 @@ getMatrixFromArrow( ArrowFile = NULL, useMatrix = "GeneScoreMatrix", useSeqnames = NULL, + excludeChr = NULL, cellNames = NULL, ArchRProj = NULL, verbose = TRUE, @@ -22,6 +23,8 @@ getMatrixFromArrow( \item{useSeqnames}{A character vector of chromosome names to be used to subset the data matrix being obtained.} +\item{excludeChr}{A character vector containing the \code{seqnames} of the chromosomes that should be excluded from this analysis.} + \item{cellNames}{A character vector indicating the cell names of a subset of cells from which fragments whould be extracted. This allows for extraction of fragments from only a subset of selected cells. By default, this function will extract all cells from the provided ArrowFile using \code{getCellNames()}.} @@ -39,3 +42,12 @@ provide the \code{ArchRProject} object here.} \description{ This function gets a given data matrix from an individual ArrowFile. } +\examples{ + +#Get Test Arrow +arrow <- getTestArrow() + +# Get Fragments +se <- getMatrixFromArrow(arrow) + +} diff --git a/man/getMatrixFromProject.Rd b/man/getMatrixFromProject.Rd index 2e96d395..a2ad3e74 100644 --- a/man/getMatrixFromProject.Rd +++ b/man/getMatrixFromProject.Rd @@ -8,6 +8,7 @@ getMatrixFromProject( ArchRProj = NULL, useMatrix = "GeneScoreMatrix", useSeqnames = NULL, + excludeChr = NULL, verbose = TRUE, binarize = FALSE, threads = getArchRThreads(), @@ -21,6 +22,8 @@ getMatrixFromProject( \item{useSeqnames}{A character vector of chromosome names to be used to subset the data matrix being obtained.} +\item{excludeChr}{A character vector containing the \code{seqnames} of the chromosomes that should be excluded from this analysis.} + \item{verbose}{A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output.} \item{binarize}{A boolean value indicating whether the matrix should be binarized before return. @@ -36,3 +39,12 @@ For example, if you added your \code{PeakMatrix} using \code{addPeakMatrix()} wi in the parameters passed to \code{getMatrixFromProject()} and the \code{PeakMatrix} will be binarized as you pull it out. No other normalization is applied to the matrix by this function. } +\examples{ + +#Get Test Project +proj <- getTestProject() + +# Get Fragments +se <- getMatrixFromProject(proj) + +} diff --git a/man/getMonocleTrajectories.Rd b/man/getMonocleTrajectories.Rd index 385e51e9..b8e84296 100644 --- a/man/getMonocleTrajectories.Rd +++ b/man/getMonocleTrajectories.Rd @@ -41,3 +41,18 @@ in \code{cellColData}. This limits the groups used to identify trajectories.} This function will use monocle3 to find trajectories and then returns a monocle CDS object that can be used as input for \code{addMonocleTrajectory}. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +#Create Monocole Trajectory +cds <- getMonocleTrajectories( + ArchRProj = proj, + useGroups = c("C1", "C2", "C3"), + principalGroup = "C1", + groupBy = "Clusters", + embedding = "UMAP" +) + +} diff --git a/man/getOutputDirectory.Rd b/man/getOutputDirectory.Rd index 080c22db..167a4db6 100644 --- a/man/getOutputDirectory.Rd +++ b/man/getOutputDirectory.Rd @@ -12,3 +12,12 @@ getOutputDirectory(ArchRProj = NULL) \description{ This function gets the outputDirectory from a given ArchRProject. If null this returns "QualityControl" directory. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Output Directory +getOutputDirectory(proj) + +} diff --git a/man/getPBGroupSE.Rd b/man/getPBGroupSE.Rd new file mode 100644 index 00000000..1f43ae63 --- /dev/null +++ b/man/getPBGroupSE.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/GroupExport.R +\name{getPBGroupSE} +\alias{getPBGroupSE} +\title{Export PseudoBulk Group Summarized Experiment} +\usage{ +getPBGroupSE( + ArchRProj = NULL, + useMatrix = "GeneScoreMatrix", + groupBy = "Clusters", + divideN = TRUE, + scaleTo = 10000, + useLabels = TRUE, + sampleLabels = "Sample", + minCells = 40, + maxCells = 500, + minReplicates = 2, + maxReplicates = 5, + sampleRatio = 0.8, + verbose = TRUE, + threads = getArchRThreads(), + logFile = createLogFile("getPBGroupSE") +) +} +\arguments{ +\item{ArchRProj}{An \code{ArchRProject} object.} + +\item{useMatrix}{The name of the matrix in the ArrowFiles. See getAvailableMatrices to see options} + +\item{groupBy}{The name of the column in \code{cellColData} to use for grouping cells together for summarizing.} + +\item{divideN}{A boolean describing whether to divide by the number of cells.} + +\item{scaleTo}{Depth normalize to this value if not NULL.} + +\item{useLabels}{A boolean value indicating whether to use sample labels to create sample-aware subgroupings during as pseudo-bulk replicate generation.} + +\item{sampleLabels}{The name of a column in \code{cellColData} to use to identify samples. In most cases, this parameter should be left as \code{NULL} and you +should only use this parameter if you do not want to use the default sample labels stored in \code{cellColData$Sample}. However, if your individual Arrow +files do not map to individual samples, then you should set this parameter to accurately identify your samples. This is the case in (for example) +multiplexing applications where cells from different biological samples are mixed into the same reaction and demultiplexed based on a lipid barcode or genotype.} + +\item{minCells}{The minimum number of cells required in a given cell group to permit insertion coverage file generation.} + +\item{maxCells}{The maximum number of cells to use during insertion coverage file generation.} + +\item{minReplicates}{The minimum number of pseudo-bulk replicates to be generated.} + +\item{maxReplicates}{The maximum number of pseudo-bulk replicates to be generated.} + +\item{sampleRatio}{The fraction of the total cells that can be sampled to generate any given pseudo-bulk replicate.} + +\item{verbose}{A boolean specifying to print messages during computation.} + +\item{threads}{An integer specifying the number of threads for parallel.} + +\item{logFile}{The path to a file to be used for logging ArchR output.} +} +\description{ +This function will determine cell groups for pseudobulk, summarize and export a summarized experiment for a assay in a ArchRProject. +} +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Group SE +se <- getPBGroupSE(proj, useMatrix = "PeakMatrix", groupBy = "Clusters") + +} diff --git a/man/getPeak2GeneLinks.Rd b/man/getPeak2GeneLinks.Rd index 66e712f5..665f9f4c 100644 --- a/man/getPeak2GeneLinks.Rd +++ b/man/getPeak2GeneLinks.Rd @@ -7,7 +7,7 @@ getPeak2GeneLinks( ArchRProj = NULL, corCutOff = 0.45, - FDRCutOff = 1e-04, + FDRCutOff = 0.0001, varCutOffATAC = 0.25, varCutOffRNA = 0.25, resolution = 1, @@ -33,3 +33,15 @@ the \code{ArchRBrowser()} or as an \code{ArchRBrowserTrack()}.} \description{ This function obtains peak-to-gene links from an ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add P2G Links +proj <- addPeak2GeneLinks(proj, k = 20) + +# Get P2G Links +p2g <- getPeak2GeneLinks(proj) + +} diff --git a/man/getPeakAnnotation.Rd b/man/getPeakAnnotation.Rd index 94ebae04..e1f7001c 100644 --- a/man/getPeakAnnotation.Rd +++ b/man/getPeakAnnotation.Rd @@ -14,3 +14,12 @@ getPeakAnnotation(ArchRProj = NULL, name = NULL) \description{ This function gets a peakAnnotation from a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Peak Annotations +peakAnno <- getPeakAnnotation(proj) + +} diff --git a/man/getPeakSet.Rd b/man/getPeakSet.Rd index 307ada51..790bac88 100644 --- a/man/getPeakSet.Rd +++ b/man/getPeakSet.Rd @@ -12,3 +12,12 @@ getPeakSet(ArchRProj = NULL) \description{ This function gets the peak set as a GRanges object from an ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get PeakSet +getPeakSet(proj) + +} diff --git a/man/getPositions.Rd b/man/getPositions.Rd index fde37d94..3b98ba91 100644 --- a/man/getPositions.Rd +++ b/man/getPositions.Rd @@ -16,3 +16,12 @@ getPositions(ArchRProj = NULL, name = NULL, annoName = NULL) \description{ This function gets the peak annotation positions (i.e. Motifs) from a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Annotation Positions +positions <- getPositions(proj) + +} diff --git a/man/getProjectSummary.Rd b/man/getProjectSummary.Rd index 86c514de..7daf9266 100644 --- a/man/getProjectSummary.Rd +++ b/man/getProjectSummary.Rd @@ -14,3 +14,12 @@ getProjectSummary(ArchRProj = NULL, returnSummary = FALSE) \description{ This function prints the projectSummary from an ArchRProject } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Project Summary +getProjectSummary(proj) + +} diff --git a/man/getReducedDims.Rd b/man/getReducedDims.Rd index 4ba7dedf..084f387e 100644 --- a/man/getReducedDims.Rd +++ b/man/getReducedDims.Rd @@ -34,3 +34,12 @@ to sequencing depth that is greater than the \code{corCutOff}, it will be exclud \description{ This function gets a dimensionality reduction object (i.e. UMAP, tSNE, etc) from a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Iterative LSI +getReducedDims(proj, reducedDims = "IterativeLSI") + +} diff --git a/man/getSampleColData.Rd b/man/getSampleColData.Rd index d1c776a7..9b0672ba 100644 --- a/man/getSampleColData.Rd +++ b/man/getSampleColData.Rd @@ -16,3 +16,12 @@ getSampleColData(ArchRProj = NULL, select = NULL, drop = FALSE) \description{ This function gets the sampleColData from a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Sample Column Data +getSampleColData(proj) + +} diff --git a/man/getSampleNames.Rd b/man/getSampleNames.Rd index a7b827c0..d60331ad 100644 --- a/man/getSampleNames.Rd +++ b/man/getSampleNames.Rd @@ -12,3 +12,12 @@ getSampleNames(ArchRProj = NULL) \description{ This function gets the names of all samples from a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Sample Names +getSampleNames(proj) + +} diff --git a/man/getSeqnames.Rd b/man/getSeqnames.Rd index 3306ea7c..66987657 100644 --- a/man/getSeqnames.Rd +++ b/man/getSeqnames.Rd @@ -9,9 +9,19 @@ getSeqnames(ArchRProj = NULL, useMatrix = "GeneScoreMatrix") \arguments{ \item{ArchRProj}{An \code{ArchRProject} object.} -\item{useMatrix}{The name of the data matrix as stored in the ArrowFiles of the \code{ArchRProject}. Options include "TileMatrix", "GeneScoreMatrix", etc.} +\item{useMatrix}{The name of the data matrix or "Fragments" as stored in the ArrowFiles of the \code{ArchRProject}. +Options include "TileMatrix", "GeneScoreMatrix", etc.} } \description{ This function will identify available seqnames from a given data matrix (i.e. "GeneScoreMatrix", or "TileMatrix") and return them for downstream plotting utilities. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Seqnames in Matrix +getSeqnames(proj, useMatrix = "GeneScoreMatrix") + +} diff --git a/man/getTSS.Rd b/man/getTSS.Rd index 370f53b8..03d70166 100644 --- a/man/getTSS.Rd +++ b/man/getTSS.Rd @@ -12,3 +12,15 @@ getTSS(ArchRProj = NULL) \description{ This function gets the transcription start sites (TSSs) as a GRanges object of all genes from the geneAnnotation of a given ArchRProject. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get TSS in ArchRProj +getTSS(proj) + +# Get TSS loaded globally +getTSS() + +} diff --git a/man/getTestArrow.Rd b/man/getTestArrow.Rd new file mode 100644 index 00000000..9eb17db1 --- /dev/null +++ b/man/getTestArrow.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/InputData.R +\name{getTestArrow} +\alias{getTestArrow} +\title{Get PBMC Small Test Arrow file} +\usage{ +getTestArrow(version = 2) +} +\arguments{ +\item{version}{version of test arrow to return} +} +\description{ +V2 : This function will return a test arrow file in your cwd. +} +\examples{ + +# Get Test Arrow +arrow <- getTestArrow() + +} diff --git a/man/getTestFragments.Rd b/man/getTestFragments.Rd index 4de51bcc..1064a815 100644 --- a/man/getTestFragments.Rd +++ b/man/getTestFragments.Rd @@ -4,8 +4,18 @@ \alias{getTestFragments} \title{Get PBMC Small Test Fragments} \usage{ -getTestFragments(x) +getTestFragments(version = 2) +} +\arguments{ +\item{version}{version of test fragments to return} } \description{ -This function will download fragments for a small PBMC test dataset (2k Cells) spanning chr1 and 2 (~20MB). +V1 : This function will download fragments for a small PBMC test dataset. +V2 : This function will return test fragments for a small PBMC test dataset in your cwd. +} +\examples{ + +# Get Test Fragments +fragments <- getTestFragments() + } diff --git a/man/getTestProject.Rd b/man/getTestProject.Rd index fce9f764..3619e02b 100644 --- a/man/getTestProject.Rd +++ b/man/getTestProject.Rd @@ -4,8 +4,18 @@ \alias{getTestProject} \title{Get PBMC Small Test Project} \usage{ -getTestProject() +getTestProject(version = 2) +} +\arguments{ +\item{version}{version of test fragments to return} } \description{ -This function will download an ArchRProject for a small PBMC test dataset (2k Cells) spanning chr1 and 2 (~2-300MB). +V1 : This function will download an ArchRProject for a small PBMC test dataset. +V2 : This function will return an ArchRProject for a small PBMC test dataset in your cwd. +} +\examples{ + +# Get Test Project +proj <- getTestProject() + } diff --git a/man/getTrajectory.Rd b/man/getTrajectory.Rd index 9765e0d8..bc51bb75 100644 --- a/man/getTrajectory.Rd +++ b/man/getTrajectory.Rd @@ -43,3 +43,15 @@ trajectory matrix to better reveal temporal dynamics.} This function will get a supervised trajectory from an \code{ArchRProject} (see \code{addTrajectory}), get data from a desired matrix, and smooth each value across the input trajectory. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +#Add Trajectory +proj <- addTrajectory(proj, trajectory = c("C1", "C2", "C3"), embedding = "UMAP", force = TRUE) + +#Get Trajectory +seTraj <- getTrajectory(proj) + +} diff --git a/man/getTutorialData.Rd b/man/getTutorialData.Rd index 2100812c..96990334 100644 --- a/man/getTutorialData.Rd +++ b/man/getTutorialData.Rd @@ -7,12 +7,19 @@ getTutorialData(tutorial = "hematopoiesis", threads = getArchRThreads()) } \arguments{ -\item{tutorial}{The name of the available tutorial for which to retreive the tutorial data. Currently, the only available option is "Hematopoiesis". +\item{tutorial}{The name of the available tutorial for which to retreive the tutorial data. The main option is "Hematopoiesis". "Hematopoiesis" is a small scATAC-seq dataset that spans the hematopoieitic hierarchy from stem cells to differentiated cells. -This dataset is made up of cells from peripheral blood, bone marrow, and CD34+ sorted bone marrow.} +This dataset is made up of cells from peripheral blood, bone marrow, and CD34+ sorted bone marrow. The second option is "Test" +which is downloading a small test PBMC fragments file mainly used to test the url capabilities of this function.} \item{threads}{The number of threads to be used for parallel computing.} } \description{ This function will download data for a given tutorial and return the input files required for ArchR. } +\examples{ + +# Get Tutorial Fragments using `test` since its smaller +fragments <- getTutorialData(tutorial = "test") + +} diff --git a/man/getVarDeviations.Rd b/man/getVarDeviations.Rd index 9e13e70e..b7e5b9b8 100644 --- a/man/getVarDeviations.Rd +++ b/man/getVarDeviations.Rd @@ -18,3 +18,12 @@ getVarDeviations(ArchRProj = NULL, name = "MotifMatrix", plot = TRUE, n = 25) \description{ This function will rank the variability of the deviations computed by ArchR and label the top variable annotations. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Variable Motif Deviations +varDev <- getVarDeviations(proj) + +} diff --git a/man/ggAlignPlots.Rd b/man/ggAlignPlots.Rd index 25c7e94c..ccc223cd 100644 --- a/man/ggAlignPlots.Rd +++ b/man/ggAlignPlots.Rd @@ -20,3 +20,18 @@ ggAlignPlots(..., plotList = NULL, sizes = NULL, type = "v", draw = TRUE) \description{ This function aligns ggplots vertically or horizontally } +\examples{ + +# Create Random Data +m <- data.frame(x=matrix(rnorm(10, 2),ncol=1)) +m$color <- sample(c("A", "B"), 10, replace = TRUE) + +# Plot +p <- ggGroup(x = m$color, y = m$x) + +# To PDF +pdf("test.pdf", width = 4, height = 7) +ggAlignPlots(p, p) +dev.off() + +} diff --git a/man/ggGroup.Rd b/man/ggGroup.Rd index d32d3e49..c493cf40 100644 --- a/man/ggGroup.Rd +++ b/man/ggGroup.Rd @@ -60,3 +60,18 @@ values of \code{x} in the desired order.} \description{ This function is a wrapper around ggplot geom_density_ridges or geom_violin to allow for plotting group distribution plots in ArchR. } +\examples{ + +# Create Random Data +m <- data.frame(x=matrix(rnorm(10, 2),ncol=1)) +m$color <- sample(c("A", "B"), 10, replace = TRUE) + +# Plot +p <- ggGroup(x = m$color, y = m$x) + +# To PDF +pdf("test.pdf", width = 4, height = 4) +p +dev.off() + +} diff --git a/man/ggHex.Rd b/man/ggHex.Rd index f690a5bd..8c758f22 100644 --- a/man/ggHex.Rd +++ b/man/ggHex.Rd @@ -68,3 +68,17 @@ them to the value of the 97.5th and 2.5th percentile values respectively.} \description{ This function will plot x,y coordinate values summarized in hexagons in a standardized manner } +\examples{ + +# Create Random Data +m <- data.frame(matrix(rnorm(300, 2),ncol=3)) + +# Plot +p <- ggHex(x = m[,1], y = m[,2], color = m[,3]) + +# To PDF +pdf("test.pdf", width = 4, height = 4) +p +dev.off() + +} diff --git a/man/ggOneToOne.Rd b/man/ggOneToOne.Rd index 3c1b79dc..28ec9830 100644 --- a/man/ggOneToOne.Rd +++ b/man/ggOneToOne.Rd @@ -62,3 +62,17 @@ ggOneToOne( \description{ This function is a wrapper around ggplot geom_point to allow for plotting one-to-one sample comparisons in ArchR. } +\examples{ + +# Create Random Data +m <- data.frame(matrix(rnorm(20, 2),ncol=2)) + +# Plot +p <- ggOneToOne(x = m[,1], y = m[,2]) + +# To PDF +pdf("test.pdf", width = 4, height = 4) +p +dev.off() + +} diff --git a/man/ggPoint.Rd b/man/ggPoint.Rd index 3adcaaff..4c5953d2 100644 --- a/man/ggPoint.Rd +++ b/man/ggPoint.Rd @@ -126,3 +126,18 @@ lines and labels, just the internal portions of the plot.} \description{ This function is a wrapper around ggplot geom_point to allow for a more intuitive plotting of ArchR data. } +\examples{ + +# Create Random Data +m <- data.frame(matrix(rnorm(20, 2),ncol=2)) +m$color <- sample(c("A", "B"), 10, replace = TRUE) + +# Plot +p <- ggPoint(x = m[,1], y = m[,2], color = m[,3]) + +# To PDF +pdf("test.pdf", width = 4, height = 4) +p +dev.off() + +} diff --git a/man/grapes-bcin-grapes.Rd b/man/grapes-bcin-grapes.Rd index 5bcf1ead..01335aef 100644 --- a/man/grapes-bcin-grapes.Rd +++ b/man/grapes-bcin-grapes.Rd @@ -14,3 +14,9 @@ x \%bcin\% table \description{ This function provides a generic matching function for S4Vector objects primarily to avoid ambiguity. } +\examples{ + +#Test +Rle(c("A", "B", "C")) \%bcin\% Rle(c("A", "C")) + +} diff --git a/man/grapes-bcni-grapes.Rd b/man/grapes-bcni-grapes.Rd index 8365ee9b..c48d64d9 100644 --- a/man/grapes-bcni-grapes.Rd +++ b/man/grapes-bcni-grapes.Rd @@ -14,3 +14,9 @@ x \%bcni\% table \description{ This function provides the reciprocal of \%bcin\% for S4Vector objects primarily to avoid ambiguity. } +\examples{ + +#Test +Rle(c("A", "B", "C")) \%bcni\% Rle(c("A", "C")) + +} diff --git a/man/grapes-ni-grapes.Rd b/man/grapes-ni-grapes.Rd index 39ea1864..22483b5c 100644 --- a/man/grapes-ni-grapes.Rd +++ b/man/grapes-ni-grapes.Rd @@ -14,3 +14,9 @@ x \%ni\% table \description{ This function is the reciprocal of \%in\%. See the match funciton in base R. } +\examples{ + +#Test +c("A", "B", "C") \%ni\% c("A", "C") + +} diff --git a/man/import10xFeatureMatrix.Rd b/man/import10xFeatureMatrix.Rd index 61261bd5..e93b76c6 100644 --- a/man/import10xFeatureMatrix.Rd +++ b/man/import10xFeatureMatrix.Rd @@ -9,7 +9,8 @@ import10xFeatureMatrix( names = NULL, strictMatch = TRUE, verbose = TRUE, - featureType = "Gene Expression" + featureType = "Gene Expression", + features = NULL ) } \arguments{ @@ -27,6 +28,9 @@ cannot tolerate mismatched gene names, only mismatched metadata for the same gen \item{featureType}{The name of the feature to extract from the 10x feature file. See https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/advanced/h5_matrices for more information.} + +\item{features}{A genomic ranges object containing a "name" column to help fill missing 10x intervals for RSE. +For example, in hg38 features provided could be using Bioconductors \code{genes(EnsDb.Hsapiens.v86::EnsDb.Hsapiens.v86)}.} } \description{ This function will import the feature matrix from a 10x feature hdf5 file. diff --git a/man/imputeMatrix.Rd b/man/imputeMatrix.Rd index 28f094c1..58b35aa9 100644 --- a/man/imputeMatrix.Rd +++ b/man/imputeMatrix.Rd @@ -26,3 +26,21 @@ imputeMatrix( \description{ This function gets imputation weights from an ArchRProject to impute a numerical matrix } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add Impute Weights +proj <- addImputeWeights(proj) + +# Get Impute Weights +iW <- getImputeWeights(proj) + +# Get Matrix +se <- getMatrixFromProject(proj, useMatrix = "GeneScoreMatrix") + +# Impute +mat <- imputeMatrix(assay(se), iW) + +} diff --git a/man/installExtraPackages.Rd b/man/installExtraPackages.Rd index 9a3a5559..02d7b72f 100644 --- a/man/installExtraPackages.Rd +++ b/man/installExtraPackages.Rd @@ -12,3 +12,9 @@ installExtraPackages(force = FALSE) \description{ This function will install extra packages used in ArchR that are not installed by default. } +\examples{ + +# Install +installExtraPackages() + +} diff --git a/man/loadArchRProject.Rd b/man/loadArchRProject.Rd index 58dabb3b..84f0c88c 100644 --- a/man/loadArchRProject.Rd +++ b/man/loadArchRProject.Rd @@ -18,3 +18,21 @@ will fail unless all components can be found.} \description{ This function will load a previously saved ArchRProject and re-normalize paths for usage. } +\examples{ + +# Get Small PBMC Project Location +zipProj <- file.path(system.file("testdata", package="ArchR"), "PBSmall.zip") + +# Copy to current directory +file.copy(zipProj, basename(zipProj), overwrite = TRUE) + +# Unzip +unzip(basename(zipProj), overwrite = TRUE) + +# Remove +file.remove(basename(zipProj)) + +# Load +loadArchRProject("PBSmall") + +} diff --git a/man/mapLabels.Rd b/man/mapLabels.Rd index 825225aa..ac99d9d6 100644 --- a/man/mapLabels.Rd +++ b/man/mapLabels.Rd @@ -17,3 +17,12 @@ mapLabels(labels = NULL, newLabels = NULL, oldLabels = names(newLabels)) This function takes a character vector of labels and uses a set of old and new labels to re-map from the old label set to the new label set. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Peak Annotations +proj$ClusterLabels <- mapLabels(proj$Clusters, c("T", "B", "M"), c("C1", "C2", "C3")) + +} diff --git a/man/nCells.Rd b/man/nCells.Rd index 804bef9a..c494b818 100644 --- a/man/nCells.Rd +++ b/man/nCells.Rd @@ -12,3 +12,12 @@ nCells(input = NULL) \description{ This function gets number of cells from an ArchRProject or ArrowFile } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Number of Cells +nCells(proj) + +} diff --git a/man/nonOverlappingGR.Rd b/man/nonOverlappingGR.Rd index 367e74e3..34b7d67d 100644 --- a/man/nonOverlappingGR.Rd +++ b/man/nonOverlappingGR.Rd @@ -22,3 +22,19 @@ order. If \code{TRUE}, the higher value in \code{by} will be retained.} \description{ This function returns a GRanges object containing a non-overlapping set regions derived from a supplied Genomic Ranges object. } +\examples{ + +# Dummy GR +gr <- GRanges( + seqnames = "chr1", + ranges = IRanges( + start = c(1, 4, 11), + end = c(10, 12, 20) + ), + score = c(1, 2, 3) +) + +# Non Overlapping +nonOverlappingGR(gr) + +} diff --git a/man/paletteContinuous.Rd b/man/paletteContinuous.Rd index 31c720b1..8098d201 100644 --- a/man/paletteContinuous.Rd +++ b/man/paletteContinuous.Rd @@ -16,3 +16,9 @@ paletteContinuous(set = "solarExtra", n = 256, reverse = FALSE) \description{ Continuous Color Palette } +\examples{ + +# Color Palette +pal <- paletteContinuous() + +} diff --git a/man/paletteDiscrete.Rd b/man/paletteDiscrete.Rd index d457f233..4b0953a6 100644 --- a/man/paletteDiscrete.Rd +++ b/man/paletteDiscrete.Rd @@ -18,3 +18,12 @@ given a unique color from the designated palette set.} This function assesses the number of inputs and returns a discrete color palette that is tailored to provide the most possible color contrast from the designated color set. } +\examples{ + +# Vector +v <- c("A", "B") + +# Color Palette +pal <- paletteDiscrete(values = v) + +} diff --git a/man/peakAnnoEnrichment.Rd b/man/peakAnnoEnrichment.Rd index 655e8be9..65593603 100644 --- a/man/peakAnnoEnrichment.Rd +++ b/man/peakAnnoEnrichment.Rd @@ -34,3 +34,24 @@ peakAnnoEnrichment( \description{ This function will perform hypergeometric enrichment of a given peak annotation within the defined marker peaks. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Markers +seMarker <- getMarkerFeatures( + ArchRProj = proj, + useMatrix = "PeakMatrix", + testMethod = "binomial", + binarize = TRUE +) + +# Get Peak Annotation Enrichment +annoEnrich <- peakAnnoEnrichment( + seMarker = seMarker, + ArchRProj = proj, + cutOff = "FDR <= 0.1 & Log2FC >= 0" +) + +} diff --git a/man/plotBrowserTrack.Rd b/man/plotBrowserTrack.Rd index ada70e90..33b3426a 100644 --- a/man/plotBrowserTrack.Rd +++ b/man/plotBrowserTrack.Rd @@ -21,6 +21,8 @@ plotBrowserTrack( tileSize = 250, minCells = 25, normMethod = "ReadsInTSS", + highlight = NULL, + highlightFill = "firebrick3", threads = getArchRThreads(), ylim = NULL, pal = NULL, @@ -57,8 +59,10 @@ Blue-colored genes are on the minus strand and red-colored genes are on the plus \item{sizes}{A numeric vector containing up to 3 values that indicate the sizes of the individual components passed to \code{plotSummary}. The order must be the same as \code{plotSummary}.} -\item{features}{A \code{GRanges} object containing the "features" to be plotted via the "featureTrack". This should be thought of as a -bed track. i.e. the set of peaks obtained using \verb{getPeakSet(ArchRProj))}.} +\item{features}{A \code{GRanges} (for a single feature track) or \code{GRangesList} (for multiple feature tracks) object containing the "features" to +be plotted via the "featureTrack". This should be thought of as a bed track. i.e. the set of peaks obtained using \verb{getPeakSet(ArchRProj))}. +If you provide a \code{GRangesList}, then each element of that object must be named and this name will be used on the plot. +For example - \code{GRangesList("peaks" = peak_gr, "other" = other_gr)}.} \item{loops}{A \code{GRanges} object containing the "loops" to be plotted via the "loopTrack". This \code{GRanges} object start represents the center position of one loop anchor and the end represents the center position of another loop anchor. @@ -85,6 +89,10 @@ used to exclude pseudo-bulk replicates generated from low numbers of cells.} \item{normMethod}{The name of the column in \code{cellColData} by which normalization should be performed. The recommended and default value is "ReadsInTSS" which simultaneously normalizes tracks based on sequencing depth and sample data quality.} +\item{highlight}{A \code{GRanges} object containing regions to highlight.} + +\item{highlightFill}{A \code{character} color for filling highlihgted regions.} + \item{threads}{The number of threads to use for parallel execution.} \item{ylim}{The numeric quantile y-axis limit to be used for for "bulkTrack" plotting. This should be expressed as \verb{c(lower limit, upper limit)} such as \code{c(0,0.99)}. If not provided, the y-axis limit will be c(0, 0.999).} @@ -116,3 +124,19 @@ This function will plot the coverage at an input region in the style of a browse which enables direct comparison across samples. Note that the genes displayed in these plots are derived from your \code{geneAnnotation} (i.e. the \code{BSgenome} object you used) so they may not match other online genome browsers that use different gene annotations. } +\examples{ + +#Get Test ArchR Project +proj <- getTestProject() + +#Highlight +genes <- getGenes() +genes <- genes[which(genes$symbol \%in\% c("CD3D", "MS4A1"))] + +#Plot Track +p <- plotBrowserTrack(proj, geneSymbol = c("CD3D", "MS4A1"), groupBy = "CellType", highlight = genes, highlightFill = "dodgerblue3") + +#Plot PDF +plotPDF(p, name = "Track-CD3D-MS4A1", ArchRProj = proj) + +} diff --git a/man/plotEmbedding.Rd b/man/plotEmbedding.Rd index 4ed4836a..8280117d 100644 --- a/man/plotEmbedding.Rd +++ b/man/plotEmbedding.Rd @@ -92,3 +92,15 @@ if \code{colorBy} is numeric, then \code{plotAs} is set to "hex".} \description{ This function will plot an embedding stored in an ArchRProject } +\examples{ + +#Get Test Project +proj <- getTestProject() + +#Plot UMAP +p <- plotEmbedding(proj, name = "Clusters") + +#PDF +plotPDF(p, name = "UMAP-Clusters", ArchRProj = proj) + +} diff --git a/man/plotEnrichHeatmap.Rd b/man/plotEnrichHeatmap.Rd index ec4de9fe..77f8b6d5 100644 --- a/man/plotEnrichHeatmap.Rd +++ b/man/plotEnrichHeatmap.Rd @@ -49,3 +49,33 @@ lines and labels, just the internal portions of the plot.} \description{ This function will plot a heatmap of hypergeometric enrichment of a given peakAnnotation within the defined marker peaks. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Markers +seMarker <- getMarkerFeatures( + ArchRProj = proj, + useMatrix = "PeakMatrix", + testMethod = "binomial", + binarize = TRUE +) + +# Get Peak Annotation Enrichment +annoEnrich <- peakAnnoEnrichment( + seMarker = seMarker, + ArchRProj = proj, + cutOff = "FDR <= 0.1 & Log2FC >= 0" +) + +# Multiply by 50 since this is a super small test sample +assay(annoEnrich) <- assay(annoEnrich) * 50 + +#Plot +p <- plotEnrichHeatmap(annoEnrich) + +#PDF +plotPDF(p, name = "PeakAnnoEnrich", ArchRProj = proj) + +} diff --git a/man/plotFootprints.Rd b/man/plotFootprints.Rd index 62f4e3f8..3df108d7 100644 --- a/man/plotFootprints.Rd +++ b/man/plotFootprints.Rd @@ -43,7 +43,8 @@ include "none", "subtract", "divide". "Subtract" means subtracting the normalize \item{plot}{A boolean value indicating whether or not the footprints should be plotted (\code{TRUE}) or returned as grob objects (\code{FALSE}).} -\item{ArchRProj}{An \code{ArchRProject} object to be used for plotting directory in \code{getOutputDirectory}.} +\item{ArchRProj}{An \code{ArchRProject} object to be used for plotting directory in \code{getOutputDirectory}. If no \code{ArchRProj} is supplied, +then plots will be stored in a directory called "Plots" in the current working directory.} \item{plotName}{A string indicating the name/prefix of the file to be used for output plots.} @@ -62,3 +63,18 @@ This prevents large amount of footprint plots stored as an object.} \description{ This function will get footprints for all samples in a given ArchRProject or a properly-formatted Summarized Experiment } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Motif Positions +positions <- getPositions(proj) + +# Get Footprints +seFoot <- getFootprints(ArchRProj = proj, positions = positions, groupBy = "Clusters", minCells = 10) + +# Plot Footprints +plotFootprints(seFoot, smoothWindow = 11) + +} diff --git a/man/plotFragmentSizes.Rd b/man/plotFragmentSizes.Rd index 87e0ebd2..149ad9b1 100644 --- a/man/plotFragmentSizes.Rd +++ b/man/plotFragmentSizes.Rd @@ -36,3 +36,15 @@ instead of plotting the fragment size distribution.} \description{ This function will plot a fragment size distribution for each sample. Only cells in the \code{ArchRProject} are used when making this plot. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Plot Frag Sizes +p <- plotFragmentSizes(proj, groupBy = "Clusters") + +# PDF +plotPDF(p, name = "Frag-Sizes", ArchRProj = proj) + +} diff --git a/man/plotGroups.Rd b/man/plotGroups.Rd index a16f907d..c0f0b6f1 100644 --- a/man/plotGroups.Rd +++ b/man/plotGroups.Rd @@ -72,3 +72,15 @@ to the value of the 97.5th and 2.5th percentile values respectively.} \description{ This function will group, summarize and then plot data from an ArchRProject for visual comparison. } +\examples{ + +#Get Test Project +proj <- getTestProject() + +#Plot Groups +p <- plotGroups(proj, groupBy = "Clusters", colorBy = "colData", name = "TSSEnrichment", plotAs = "violin", alpha = 0.5) + +#PDF +plotPDF(p, name = "Clusters-TSS", ArchRProj = proj) + +} diff --git a/man/plotMarkerHeatmap.Rd b/man/plotMarkerHeatmap.Rd index 7ad7c9d7..626d4925 100644 --- a/man/plotMarkerHeatmap.Rd +++ b/man/plotMarkerHeatmap.Rd @@ -16,6 +16,7 @@ plotMarkerHeatmap( pal = NULL, binaryClusterRows = TRUE, clusterCols = TRUE, + subsetMarkers = NULL, labelMarkers = NULL, nLabel = 15, nPrint = 15, @@ -51,11 +52,16 @@ rownames from \code{seMarker} to be excluded from the heatmap.} \item{clusterCols}{A boolean value that indicates whether the columns of the marker heatmap should be clustered.} +\item{subsetMarkers}{A vector of rownames from seMarker to use for subsetting of seMarker to only plot specific features on the heatmap. +Note that these rownames are expected to be integers that come from \code{rownames(rowData(seMarker))}. If this parameter is used for +subsetting, then the values provided to \code{cutOff} are effectively ignored.} + \item{labelMarkers}{A character vector listing the \code{rownames} of \code{seMarker} that should be labeled on the side of the heatmap.} -\item{nLabel}{An integer value that indicates whether the top \code{n} features for each column in \code{seMarker} should be labeled on the side of the heatmap.} +\item{nLabel}{An integer value that indicates how many of the top \code{n} features for each column in \code{seMarker} should be labeled on the side of the heatmap. +To remove all feature labels, set \code{nLabel = 0}.} -\item{nPrint}{If provided \code{seMarker} is from "GeneScoreMatrix" print the top n genes for each group based on how uniquely up-regulated the gene is.} +\item{nPrint}{If provided \code{seMarker} is from "GeneScoreMatrix" print the top \code{n} genes for each group based on how uniquely up-regulated the gene is.} \item{labelRows}{A boolean value that indicates whether all rows should be labeled on the side of the heatmap.} @@ -73,3 +79,23 @@ looking for down-regulated markers (\verb{log2(fold change) < 0}) instead of up- \description{ This function will plot a heatmap of the results from markerFeatures } +\examples{ + +#Get Test Project +proj <- getTestProject() + +#Get Markers +seMarker <- getMarkerFeatures( + ArchRProj = proj, + useMatrix = "PeakMatrix", + testMethod = "binomial", + binarize = TRUE +) + +#Plot Markers +p <- plotMarkerHeatmap(seMarker) + +#PDF +plotPDF(p, name = "Marker-Heatmap", ArchRProj = proj) + +} diff --git a/man/plotMarkers.Rd b/man/plotMarkers.Rd index 6b1a0078..ffe9d0f2 100644 --- a/man/plotMarkers.Rd +++ b/man/plotMarkers.Rd @@ -30,3 +30,23 @@ lines and labels, just the internal portions of the plot.} \description{ This function will plot one group/column of a differential markers as an MA or Volcano plot. } +\examples{ + +#Get Test Project +proj <- getTestProject() + +#Get Markers +seMarker <- getMarkerFeatures( + ArchRProj = proj, + useMatrix = "PeakMatrix", + testMethod = "binomial", + binarize = TRUE +) + +#Plot Markers +p <- plotMarkers(seMarker, name = "C1") + +#PDF +plotPDF(p, name = "Marker-Plot", ArchRProj = proj) + +} diff --git a/man/plotPDF.Rd b/man/plotPDF.Rd index 6ea4bb86..4c57c7da 100644 --- a/man/plotPDF.Rd +++ b/man/plotPDF.Rd @@ -38,3 +38,15 @@ object (ggplot2, plot, heatmap, etc).} \description{ This function will save a plot or set of plots as a PDF file in the outputDirectory of a given ArchRProject. } +\examples{ + +#Get Test Project +proj <- getTestProject() + +#Plot UMAP +p <- plotEmbedding(proj, name = "Clusters") + +#PDF +plotPDF(p, name = "UMAP-Clusters", ArchRProj = proj) + +} diff --git a/man/plotPeak2GeneHeatmap.Rd b/man/plotPeak2GeneHeatmap.Rd index 52d77d6b..a3a6b93e 100644 --- a/man/plotPeak2GeneHeatmap.Rd +++ b/man/plotPeak2GeneHeatmap.Rd @@ -7,7 +7,7 @@ plotPeak2GeneHeatmap( ArchRProj = NULL, corCutOff = 0.45, - FDRCutOff = 1e-04, + FDRCutOff = 0.0001, varCutOffATAC = 0.25, varCutOffRNA = 0.25, k = 25, @@ -63,3 +63,19 @@ reproduce results downstream.} \description{ This function plots side by side heatmaps of linked ATAC and Gene regions from \code{addPeak2GeneLinks}. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Add P2G Links +proj <- addPeak2GeneLinks(proj, k = 20) + +# Get P2G Links +p2g <- getPeak2GeneLinks(proj) + +# Plot P2G +p <- plotPeak2GeneHeatmap(proj) +plotPDF(p, name = "P2G-Heatmap", ArchRProj = proj) + +} diff --git a/man/plotTSSEnrichment.Rd b/man/plotTSSEnrichment.Rd index 264645fa..22fe3f23 100644 --- a/man/plotTSSEnrichment.Rd +++ b/man/plotTSSEnrichment.Rd @@ -48,3 +48,15 @@ instead of plotting the TSS enrichment plot.} This function will plot a TSS enrichment plot for each sample. Cells in \code{ArchRProject} are the only ones used when making this plot. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Plot TSS +p <- plotTSSEnrichment(proj, groupBy = "Clusters") + +# PDF +plotPDF(p, name = "TSS-Enrich", ArchRProj = proj) + +} diff --git a/man/plotTrajectory.Rd b/man/plotTrajectory.Rd index bc6137f8..18a26dba 100644 --- a/man/plotTrajectory.Rd +++ b/man/plotTrajectory.Rd @@ -89,3 +89,18 @@ if \code{colorBy} is numeric, then \code{plotAs} is set to "hex".} \description{ This function will plot a trajectory that was created onto an embedding. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +#Add Trajectory +proj <- addTrajectory(proj, trajectory = c("C1", "C2", "C3"), embedding = "UMAP", force = TRUE) + +#Plot Trajectory +p <- plotTrajectory(proj, smoothWindow = 20) + +#PDF +plotPDF(p, name = "Trajcetory", ArchRProj = proj) + +} diff --git a/man/plotTrajectoryHeatmap.Rd b/man/plotTrajectoryHeatmap.Rd index 4a22c00f..407d5587 100644 --- a/man/plotTrajectoryHeatmap.Rd +++ b/man/plotTrajectoryHeatmap.Rd @@ -64,3 +64,21 @@ can be in different units.} \description{ This function will plot a heatmap of the results from getTrajectory } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +#Add Trajectory +proj <- addTrajectory(proj, trajectory = c("C1", "C2", "C3"), embedding = "UMAP", force = TRUE) + +#Get Trajectory +seTraj <- getTrajectory(proj) + +#Plot Trajectory Heatmap +p <- plotTrajectoryHeatmap(seTraj) + +#Plot PDF +plotPDF(p, name = "Trajectory-Heatmap", ArchRProj = proj) + +} diff --git a/man/recoverArchRProject.Rd b/man/recoverArchRProject.Rd index 80fbefe9..2da818f2 100644 --- a/man/recoverArchRProject.Rd +++ b/man/recoverArchRProject.Rd @@ -12,3 +12,12 @@ recoverArchRProject(ArchRProj) \description{ This function will recover an ArchRProject if it has broken sampleColData or cellColData due to different versions of bioconductor s4vectors. } +\examples{ + +# Get Test Project +proj <- getTestProject() + +# Try to Recover ArchR Project +proj <- recoverArchRProject(proj) + +} diff --git a/man/reformatFragmentFiles.Rd b/man/reformatFragmentFiles.Rd index 933b70c2..11d43031 100644 --- a/man/reformatFragmentFiles.Rd +++ b/man/reformatFragmentFiles.Rd @@ -19,3 +19,12 @@ reformatFragmentFiles( This function provides help in reformatting Fragment Files for reading in createArrowFiles. It will handle weird anomalies found that cause errors in reading tabix bgzip'd fragment files. } +\examples{ + +# Get Test Fragments +fragments <- getTestFragments() + +# Get Peak Annotations +fragments2 <- reformatFragmentFiles(fragments) + +} diff --git a/man/saveArchRProject.Rd b/man/saveArchRProject.Rd index d9a717ab..d0aa6a98 100644 --- a/man/saveArchRProject.Rd +++ b/man/saveArchRProject.Rd @@ -30,3 +30,12 @@ saveArchRProject( \description{ This function will organize arrows and project output into a directory and save the ArchRProject for later usage. } +\examples{ + +# Get Small Test Project +proj <- getTestProject() + +# Save +saveArchRProject(proj) + +} diff --git a/man/setArchRLocking.Rd b/man/setArchRLocking.Rd new file mode 100644 index 00000000..4533dd45 --- /dev/null +++ b/man/setArchRLocking.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/GlobalDefaults.R +\name{setArchRLocking} +\alias{setArchRLocking} +\title{Set a globally-applied H5 file locking setup} +\usage{ +setArchRLocking() +} +\description{ +This function will set the default H5 file locking parameters to the system +} +\examples{ + +# Set ArchR H5 Locking Globally +setArchRLocking() + +} diff --git a/man/subsetArchRProject.Rd b/man/subsetArchRProject.Rd index 9f75916f..ff76b0c0 100644 --- a/man/subsetArchRProject.Rd +++ b/man/subsetArchRProject.Rd @@ -32,3 +32,12 @@ subsetArchRProject( \description{ This function will subset and ArchRProject by cells and save the output to a new directory and re-load the subsetted ArchRProject. } +\examples{ + +# Get Small Test Project +proj <- getTestProject() + +#Subset +proj <- subsetArchRProject(proj, cells = getCellNames(proj)[1:50]) + +} diff --git a/man/subsetCells.Rd b/man/subsetCells.Rd index cac3ee10..0313a905 100644 --- a/man/subsetCells.Rd +++ b/man/subsetCells.Rd @@ -14,3 +14,12 @@ subsetCells(ArchRProj = NULL, cellNames = NULL) \description{ This function returns an ArchRProject object that contains a specified subset of cells. } +\examples{ + +# Get Test ArchR Project +proj <- getTestProject() + +# Get Peak Annotations +proj <- subsetCells(proj, getCellNames(proj)[1:50]) + +} diff --git a/man/theme_ArchR.Rd b/man/theme_ArchR.Rd index eac42e87..043fd1bd 100644 --- a/man/theme_ArchR.Rd +++ b/man/theme_ArchR.Rd @@ -44,3 +44,18 @@ theme_ArchR( \description{ This function returns a ggplot2 theme that is black borded with black font. } +\examples{ + +# Create Random Data +m <- data.frame(x=matrix(rnorm(10, 2),ncol=1)) +m$color <- sample(c("A", "B"), 10, replace = TRUE) + +# Plot +p <- ggGroup(x = m$color, y = m$x) + theme_ArchR() + +# To PDF +pdf("test.pdf", width = 4, height = 7) +p +dev.off() + +} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 00000000..25f5e362 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(ArchR) + +test_check("ArchR") \ No newline at end of file diff --git a/tests/testthat/test_1_arrow.R b/tests/testthat/test_1_arrow.R new file mode 100644 index 00000000..63bab730 --- /dev/null +++ b/tests/testthat/test_1_arrow.R @@ -0,0 +1,278 @@ +# Tests for arrow +# change in random number generation in R3.6, this ensures tests will pass under older and newer Rs +# (this was implemented in Seurat's testthat) +library(ArchR) +library(testthat) +library(Matrix) +suppressWarnings(RNGversion(vstr = "3.5.3")) +set.seed(1) + +#Context +context("test_arrow") + +#Fragments +fragments <- file.path(system.file("testdata", package="ArchR"), "PBSmall.tsv.gz") + +#Check +test_that("Test Fragments Exist...", { + expect_equal(file.exists(fragments), TRUE) +}) + +################################################ +# Testing Create Arrow File +################################################ + +addArchRGenome("hg19test2") + +test_that("Test Genome is Correct Exist...", { + check1 <- all(paste0(seqnames(getGenes())) %in% c("chr5", "chr11")) + check2 <- all(paste0(seqnames(getChromSizes())) %in% c("chr5", "chr11")) + expect_equal(check1, TRUE) + expect_equal(check2, TRUE) +}) + +#Create Arrow Files +arrowFiles <- createArrowFiles( + inputFiles = fragments, + sampleNames = "PBSmall", + minFrags = 100, + nChunk = 1, + TileMatParams=list(tileSize=10000), + force = TRUE +) + +#Get Contents +test_that("Checking Arrow Contents...", { + + expect_equal( + ArchR:::.validArrow(arrowFiles), + arrowFiles + ) + + expect_equal( + ArchR:::.availableArrays(arrowFiles), + c("GeneScoreMatrix", "TileMatrix") + ) + + expect_equal( + nrow(ArchR:::.getFeatureDF(arrowFiles, "TileMatrix")), + 31593 + ) + + expect_equal( + nrow(ArchR:::.getFeatureDF(arrowFiles, "GeneScoreMatrix")), + 2454 + ) + + expect_equal( + ArchR:::.availableSeqnames(arrowFiles), + c("chr11", "chr5") + ) + + expect_equal( + ArchR:::.availableChr(arrowFiles), + c("chr11", "chr5") + ) + + expect_equal( + as.vector(table(substr(ArchR:::.availableCells(arrowFiles), 9, 9))[c("B", "M", "T")]), + c(45, 33, 49) + ) + + expect_equal( + paste0(ArchR:::.sampleName(arrowFiles)), + "PBSmall" + ) + +}) + +################################################ +# Testing Dropping Matrices +################################################ + +arrowFiles <- ArchR:::.dropGroupsFromArrow( + ArrowFile = arrowFiles, + dropGroups = c("GeneScoreMatrix", "TileMatrix") +) + +#Get Contents +test_that("Checking Arrow Contents After Drop...", { + + expect_equal( + ArchR:::.validArrow(arrowFiles), + arrowFiles + ) + + expect_equal( + paste0(ArchR:::.availableArrays(arrowFiles)), + c("")[-1] + ) + + expect_equal( + ArchR:::.availableSeqnames(arrowFiles), + c("chr11", "chr5") + ) + + expect_equal( + ArchR:::.availableChr(arrowFiles), + c("chr11", "chr5") + ) + + expect_equal( + as.vector(table(substr(ArchR:::.availableCells(arrowFiles), 9, 9))[c("B", "M", "T")]), + c(45, 33, 49) + ) + + expect_equal( + paste0(ArchR:::.sampleName(arrowFiles)), + "PBSmall" + ) + +}) + +################################################ +# Adding Tile Matrix +################################################ + +arrowFiles <- addTileMatrix( + input = arrowFiles, + tileSize = 25000, + chromSizes = getChromSizes(), + force = TRUE +) + +#Get Contents +test_that("Checking Arrow Contents after addTileMatrix", { + + expect_equal( + ArchR:::.validArrow(arrowFiles), + arrowFiles + ) + + expect_equal( + paste0(ArchR:::.availableArrays(arrowFiles)), + "TileMatrix" + ) + + expect_equal( + nrow(ArchR:::.getFeatureDF(arrowFiles, "TileMatrix")), + 12638 + ) + + expect_equal( + ArchR:::.availableSeqnames(arrowFiles), + c("chr11", "chr5") + ) + + expect_equal( + ArchR:::.availableChr(arrowFiles), + c("chr11", "chr5") + ) + + expect_equal( + as.vector(table(substr(ArchR:::.availableCells(arrowFiles), 9, 9))[c("B", "M", "T")]), + c(45, 33, 49) + ) + + expect_equal( + paste0(ArchR:::.sampleName(arrowFiles)), + "PBSmall" + ) + +}) + + +################################################ +# Adding Gene Score Matrix +################################################ + +arrowFiles <- addGeneScoreMatrix( + input = arrowFiles, + genes = getGenes() +) + +#Get Contents +test_that("Checking Arrow Contents after addGeneScoreMatrix...", { + + expect_equal( + ArchR:::.validArrow(arrowFiles), + arrowFiles + ) + + expect_equal( + ArchR:::.availableArrays(arrowFiles), + c("GeneScoreMatrix", "TileMatrix") + ) + + expect_equal( + nrow(ArchR:::.getFeatureDF(arrowFiles, "TileMatrix")), + 12638 + ) + + expect_equal( + nrow(ArchR:::.getFeatureDF(arrowFiles, "GeneScoreMatrix")), + 2454 + ) + + expect_equal( + ArchR:::.availableSeqnames(arrowFiles), + c("chr11", "chr5") + ) + + expect_equal( + ArchR:::.availableChr(arrowFiles), + c("chr11", "chr5") + ) + + expect_equal( + as.vector(table(substr(ArchR:::.availableCells(arrowFiles), 9, 9))[c("B", "M", "T")]), + c(45, 33, 49) + ) + + expect_equal( + paste0(ArchR:::.sampleName(arrowFiles)), + "PBSmall" + ) + +}) + +################################################ +# Final Checks +################################################ + +test_that("Final Checks...", { + + expect_equal( + length(getFragmentsFromArrow(arrowFiles)), + 26443 + ) + + expect_equal( + getMatrixFromArrow(arrowFiles, "GeneScoreMatrix") %>% {c(nrow(.), ncol(.))}, + c(2454, 127) + ) + + expect_equal( + getMatrixFromArrow(arrowFiles, "TileMatrix", binarize = TRUE) %>% {c(nrow(.), ncol(.))}, + c(12638, 127) + ) + +}) + +################################################ +# Clear +################################################ + +files <- list.files() +files <- files[!grepl("\\.R", files)] +for(i in seq_along(files)){ + if(dir.exists(files[i])){ + unlink(files[i], recursive=TRUE) + }else if(file.exists(files[i])){ + file.remove(files[i]) + } +} + + + + diff --git a/tests/testthat/test_2_chromVAR.R b/tests/testthat/test_2_chromVAR.R new file mode 100644 index 00000000..34591366 --- /dev/null +++ b/tests/testthat/test_2_chromVAR.R @@ -0,0 +1,106 @@ +# Tests for functions in src directory +# change in random number generation in R3.6, this ensures tests will pass under older and newer Rs +# (this was implemented in Seurat's testthat) +library(ArchR) +library(testthat) +library(Matrix) +library(chromVAR) +suppressWarnings(RNGversion(vstr = "3.5.3")) +set.seed(1) + +#Context +context("test_chromVAR") + +#Test Project +proj <- getTestProject() + +#Matrix +se <- getMatrixFromProject(proj, "PeakMatrix") +names(assays(se)) <- "counts" + +#Bias +se <- addGCBias(se, genome = BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19) +rowData(se)$bias <- round(rowData(se)$bias, 4) + +#Motif Matches +matches <- getMatches(proj) + +#Check +stopifnot(all(paste0(rowRanges(se))==paste0(rowRanges(matches)))) + +#Background Peaks +se2 <- SummarizedExperiment(assays=SimpleList(counts=cbind(Matrix::rowSums(assay(se)), 1))) +rowRanges(se2) <- rowRanges(se) +bgdPeaks <- getBackgroundPeaks(se2) + +#Computing deviations +dev <- computeDeviations( + object = se, + annotations = matches, + background_peaks = bgdPeaks +) + +#Background Peaks ArchR +bgdPeaks2 <- getBgdPeaks(proj, force = TRUE) + +#Computing deviations +dev2 <- computeDeviations( + object = se, + annotations = matches, + background_peaks = assay(bgdPeaks2) +) + +#Compute Deviations ArchR +proj <- ArchR::addDeviationsMatrix(proj, force=TRUE, bgdPeaks = bgdPeaks2) + +#Compare +dev3 <- getMatrixFromProject(proj, "MotifMatrix") + +#Check Deviations +corDev_12 <- lapply(seq_len(nrow(dev)), function(x){ + cor(assay(dev2)[x,], assay(dev)[x,]) +}) %>% unlist + +corZ_12 <- lapply(seq_len(nrow(dev)), function(x){ + cor(assays(dev2)[[2]][x,], assays(dev)[[2]][x,]) +}) %>% unlist + +corDev_13 <- lapply(seq_len(nrow(dev)), function(x){ + cor(assay(dev3)[x,], assay(dev)[x,]) +}) %>% unlist + +corZ_13 <- lapply(seq_len(nrow(dev)), function(x){ + cor(assays(dev3)[[2]][x,], assays(dev)[[2]][x,]) +}) %>% unlist + +corDev_23 <- lapply(seq_len(nrow(dev)), function(x){ + cor(assay(dev2)[x,], assay(dev3)[x,]) +}) %>% unlist + +corZ_23 <- lapply(seq_len(nrow(dev)), function(x){ + cor(assays(dev2)[[2]][x,], assays(dev3)[[2]][x,]) +}) %>% unlist + +#Test +test_that("Testing ArchR ChromVAR", { + expect_equal(all(corDev_12 > 0.95), TRUE) + expect_equal(all(corZ_12 > 0.95), TRUE) + expect_equal(all(corDev_13 > 0.95), TRUE) + expect_equal(all(corZ_13 > 0.95), TRUE) + expect_equal(all(corDev_23 > 0.99), TRUE) + expect_equal(all(corZ_23 > 0.99), TRUE) +}) + +################################################ +# Clear +################################################ + +files <- list.files() +files <- files[!grepl("\\.R", files)] +for(i in seq_along(files)){ + if(dir.exists(files[i])){ + unlink(files[i], recursive=TRUE) + }else if(file.exists(files[i])){ + file.remove(files[i]) + } +} diff --git a/tests/testthat/test_3_cpp.R b/tests/testthat/test_3_cpp.R new file mode 100644 index 00000000..ecb4685f --- /dev/null +++ b/tests/testthat/test_3_cpp.R @@ -0,0 +1,99 @@ +# Tests for functions in src directory +# change in random number generation in R3.6, this ensures tests will pass under older and newer Rs +# (this was implemented in Seurat's testthat) +library(ArchR) +library(testthat) +library(Matrix) +suppressWarnings(RNGversion(vstr = "3.5.3")) +set.seed(1) + +#Context +context("test_cpp") + +#Default Matrices +data("m1") +m2 <- m1[rev(1:10), rev(1:10)] + +################################################ +# Testing Row Correlation +################################################ + +#Correlations +c1 <- ArchR:::rowCorCpp(1:10, 1:10, m1, m2) +c2 <- lapply(1:10, function(x){ + cor(m1[x, ], m2[x, ]) +}) %>% unlist + +#Test +test_that("Row-wise Correlation is working...", { + expect_equal(c1, c2) +}) + +################################################ +# Testing KNN Utils +################################################ + +#KNN +knnObj <- ArchR:::.computeKNN(m1, m2, k = 5) + +#Check Knn Overlap Cpp +overlapCpp <- ArchR:::determineOverlapCpp(knnObj, 3) + +#Check Knn Overlap R +overlapR <- lapply(seq_len(nrow(knnObj)), function(x){ + o <- lapply(seq_len(x-1), function(y){ + sum(knnObj[x, ] %in% knnObj[y, ]) + }) %>% unlist + if(any(o > 3)){ + -1 + }else{ + 0 + } +}) %>% unlist + +#Test +test_that("KNN Utils is working...", { + expect_equal(overlapCpp, overlapR) +}) + +################################################ +# Testing General Utils +################################################ + +#tabulate2dCpp +tab2d <- as.matrix(ArchR:::tabulate2dCpp( + x = c(0,0,2,2,3), + xmin = 0, + xmax = 3, + y = c(1,1,2,3,3), + ymin = 1, + ymax = 3 +)) + +tabSm <- as.matrix(Matrix::sparseMatrix( + i = c(1,1,2,3,3), + j = c(0,0,2,2,3) + 1, + x = c(1,1,1,1,1) +)) + +#Test +test_that("Tabulate Utils is working...", { + expect_equal(max(tab2d-tabSm) <= testthat_tolerance(), TRUE) +}) + +#rowSparseVariances +sm1 <- as(m1, "dgCMatrix") + +#Variances +var1 <- ArchR:::computeSparseRowVariances( + sm1@i + 1, sm1@x, Matrix::rowMeans(sm1), ncol(sm1)) + +var2 <- apply(m1, 1, var) + +test_that("Variance Utils is working...", { + expect_equal(max(var1-var2) <= testthat_tolerance(), TRUE) +}) + + + + diff --git a/tests/testthat/test_4_worklow.R b/tests/testthat/test_4_worklow.R new file mode 100644 index 00000000..10fce071 --- /dev/null +++ b/tests/testthat/test_4_worklow.R @@ -0,0 +1,119 @@ +# Tests for ArchR +# change in random number generation in R3.6, this ensures tests will pass under older and newer Rs +# (this was implemented in Seurat's testthat) +library(ArchR) +library(testthat) +library(Matrix) +suppressWarnings(RNGversion(vstr = "3.5.3")) +set.seed(1) + +#Context +context("test_workflow") + +#Genome +addArchRGenome("hg19test2") + +#Arrows +arrows <- file.path(system.file("testdata", package="ArchR"), "PBSmall.arrow") + +#Project +proj <- ArchRProject(arrows, outputDirectory = "Test") + +#LSI +proj <- addIterativeLSI(proj, dimsToUse = 1:5, varFeatures=1000, iterations = 2, force=TRUE) + +#Clusters +proj <- addClusters(proj, force=TRUE, dimsToUse = 1:5) + +#Cell type +proj$CellType <- substr(getCellNames(proj), 9, 9) + +#Check Clusters +cM <- confusionMatrix(proj$CellType, proj$Clusters) + +test_that("Test Cluster Purity...", { + expect_equal(all(apply(cM / Matrix::rowSums(cM), 1, max) > 0.9), TRUE) +}) + +#UMAP +proj <- addUMAP(proj, nNeighbors = 40, dimsToUse = 1:5, minDist=0.1, force=TRUE) + +#Plot UMAP +p1 <- plotEmbedding(proj, name = "CellType", size = 3) +p2 <- plotEmbedding(proj, name = "Clusters", size = 3) +plotPDF(p1, p2, name = "UMAP", addDOC = FALSE, ArchRProj = proj) + +#Group Coverages +proj <- addGroupCoverages(proj) + +#Custom Peak Calling +proj <- addReproduciblePeakSet( + ArchRProj = proj, + groupBy = "Clusters", + peakMethod = "tiles", + minCells = 20, + cutOff = 0.1 +) + +#Macs2 Peak Calling +pathToMacs2 <- findMacs2() +proj <- addReproduciblePeakSet( + ArchRProj = proj, + groupBy = "Clusters", + pathToMacs2 = pathToMacs2, + minCells = 20, + cutOff = 0.1 +) + +#Add Peak Matrix +proj <- addPeakMatrix(proj) + +#Motif Annotations +proj <- addMotifAnnotations(ArchRProj = proj, motifSet = "cisbpTest", name = "Motif", force=TRUE) + +#Motif Deviations +proj <- addBgdPeaks(proj) +proj <- addDeviationsMatrix( + ArchRProj = proj, + peakAnnotation = "Motif", + force = TRUE +) + +#Check Matrices +se <- getMatrixFromProject(proj, "MotifMatrix") + +pval_CEBP <- t.test( + assays(se)$z["CEBPB_1", se$CellType=="T"], + assays(se)$z["CEBPB_1", se$CellType=="M"] +)$p.value + +pval_EOMES <- t.test( + assays(se)$z["EOMES_6", se$CellType=="T"], + assays(se)$z["EOMES_6", se$CellType=="M"] +)$p.value + +pval_PAX <- t.test( + assays(se)$z["PAX5_5", se$CellType=="M"], + assays(se)$z["PAX5_5", se$CellType=="B"] +)$p.value + +test_that("Test Motif Deviations...", { + expect_equal(pval_CEBP < 0.01, TRUE) + expect_equal(pval_EOMES < 0.1, TRUE) + expect_equal(pval_PAX < 0.01, TRUE) +}) + +################################################ +# Clear +################################################ + +files <- list.files() +files <- files[!grepl("\\.R", files)] +for(i in seq_along(files)){ + if(dir.exists(files[i])){ + unlink(files[i], recursive=TRUE) + }else if(file.exists(files[i])){ + file.remove(files[i]) + } +} + diff --git a/tests/testthat/test_5_rd.R b/tests/testthat/test_5_rd.R new file mode 100644 index 00000000..9d64bd90 --- /dev/null +++ b/tests/testthat/test_5_rd.R @@ -0,0 +1,46 @@ +# Tests for ArchR +# change in random number generation in R3.6, this ensures tests will pass under older and newer Rs +# (this was implemented in Seurat's testthat) +library(ArchR) +library(testthat) +library(Matrix) +suppressWarnings(RNGversion(vstr = "3.5.3")) +set.seed(1) + +clear_test_dir <- function(){ + files <- list.files() + files <- files[!grepl("\\.R", files)] + for(i in seq_along(files)){ + if(dir.exists(files[i])){ + unlink(files[i], recursive=TRUE) + }else if(file.exists(files[i])){ + file.remove(files[i]) + } + } +} + +################################################ +# Test Rd +################################################ + +fn <- list.files(system.file("man", package="ArchR"), full.names=TRUE) +fn <- grep("\\.Rd", fn, value=TRUE) +fn <- c( + fn[grepl("createArrow", fn)], + fn[!grepl("createArrow", fn)] +) +for(i in seq_along(fn)){ + context(paste0(basename(fn[i]))) + test_example(path = fn[i], basename(fn[i])) + clear_test_dir() + gc() +} + +################################################ +# Remove +################################################ + +if(dir.exists("../../data/Annotations")){ + system("rm -r ../../data/Annotations") +} +