morris-lab · KaetheKong · Apr 19, 2019 · Apr 19, 2019 · Apr 30, 2019 · Apr 30, 2019
diff --git a/.Rhistory b/.Rhistory
@@ -0,0 +1,301 @@
+library(roxygen2)
+library(devtools)
+setwd("~/Desktop/CloneHunterNew_5/CloneHunterNew/")
+devtools::document()
+library(tidyverse)
+library(foreach)
+library(networkD3)
+bam.test.obj <- readRDS("~/Desktop/bam_v123_obj.Rds")
+bam.test.obj <- convertCellTagMatrix2LinkList(bam.test.obj)
+bam.test.obj <- getNodesfromLinkList(bam.test.obj)
+additional_data <- data.frame(sample(1:10, size = length(rownames([email protected])), replace = TRUE), row.names = rownames([email protected]))
+colnames(additional_data) <- "Cluster"
+bam.test.obj <- addData2Nodes(bam.test.obj, additional_data)
+bam.test.obj <- drawSubnet(tag = "CellTagV1_2", overlay = "Cluster", celltag.obj = bam.test.obj)
+bam.test.obj@network
+bar.data <- [email protected]
+bar.data$Cell.BC <- rownames(bar.data)
+bar.data <- gather(bar.data, key = "CellTag", value = "Clone", 1:3, na.rm = FALSE)
+ggplot(data = bar.data) +
+geom_bar(mapping = aes(x = CellTag, fill = factor(Clone)), position = "fill", show.legend = FALSE) +
+scale_y_continuous(labels = scales::percent_format()) +
+theme_bw()
+library(roxygen2)
+library(devtools)
+setwd("~/Desktop/CloneHunterNew_5/CloneHunterNew/")
+devtools::document()
+library(tidyverse)
+library(foreach)
+library(networkD3)
+bam.test.obj <- readRDS("~/Desktop/bam_v123_obj.Rds")
+bam.test.obj <- convertCellTagMatrix2LinkList(bam.test.obj)
+bam.test.obj <- getNodesfromLinkList(bam.test.obj)
+additional_data <- data.frame(sample(1:10, size = length(rownames([email protected])), replace = TRUE), row.names = rownames([email protected]))
+colnames(additional_data) <- "Cluster"
+bam.test.obj <- addData2Nodes(bam.test.obj, additional_data)
+bam.test.obj <- drawSubnet(tag = "CellTagV1_2", overlay = "Cluster", celltag.obj = bam.test.obj)
+bam.test.obj@network
+bar.data <- [email protected]
+bar.data$Cell.BC <- rownames(bar.data)
+bar.data <- gather(bar.data, key = "CellTag", value = "Clone", 1:3, na.rm = FALSE)
+ggplot(data = bar.data) +
+geom_bar(mapping = aes(x = CellTag, fill = factor(Clone)), position = "fill", show.legend = FALSE) +
+scale_y_continuous(labels = scales::percent_format()) +
+theme_bw()
+archs4 <- readRDS("~/Desktop/data mining/ARCHS4_polyA_samples_quantile_norm.Rds")
+rownames(archs4)
+colnames(archs4)
+x <- matrix(c(1,2,3,4,5,6), 2,3)
+x
+cor(x)
+rm(x)
+####
+# Data mining check
+###
+library(Matrix)
+archs4 <- as(archs4, "dgCMatrix")
+cor(archs4)
+cor(archs4[,c(1,2)])
+cor(archs4[,c(1:10)])
+cor(archs4[,c(1:100)])
+archs4.first.1000 <- archs4[,c(1:1000)]
+cor.first.1000 <- cor(archs4.first.1000)
+library(corrplot)
+corrplot(cor.first.1000)
+mem.limits
+gc()
+R_MAX_VSIZE
+memory.size()
+library(CloneHunter)
+#' @param ctm.after.whitelist CellTag single-cell matrix after whitelist filtering
+#' @param collapsed.rslt.file File path to the collapsed result file
+#' @param collapsed.csv.file File path to the data frame file generated for collapsing
+#' @param output.file The RDS file path and name to save the resulting UMI matrix
+#' @return The collapsed and processed UMI matrices
+#' @keywords single-cell RNA-seq data, CellTagging
+#' @export
+#' @examples
+#' CellTagDataPostCollapsing(celltags.whitelisted.3, "collapsed_test.txt", "collapsed.csv", "collapsed_data_matrix.Rds")
+#'
+CellTagDataPostCollapsing <- function(celltag.obj, collapsed.rslt.file) {
+# Read in the collpased result
+collapsed <- read.table(collapsed.rslt.file, sep = "\t", header = F, stringsAsFactors = F)
+# Read in the file for collapsing
+collapsing <- [email protected][[[email protected]]]
+colnames(collapsing)[c(1:2)] <- c("CellTag", "Cell.Barcode")
+new.collapsing.df <- collapsing
+final.collapsing.df <- data.frame()
+# Process the collapsing data file
+for (i in 1:nrow(collapsed)) {
+curr.row <- collapsed[i,]
+curr.centroid <- curr.row$V1
+curr.count <- curr.row$V2
+curr.ct <- substring(curr.centroid, 1, 8)
+if (curr.count > 1) {
+curr.collapse.set <- strsplit(curr.row$V3, ",")[[1]]
+curr.to.collapse <- setdiff(curr.collapse.set, curr.centroid)
+for (j in 1:length(curr.to.collapse)) {
+curr.for.c <- curr.to.collapse[j]
+curr.for.c.ct <- substring(curr.for.c, 1, 8)
+if (curr.for.c.ct != curr.ct) {
+ind <- which(collapsing$concat == curr.to.collapse[j])
+ind.cent <- which(collapsing$concat == curr.centroid)
+new.collapsing.df[ind, "concat"] <- curr.centroid
+new.collapsing.df[ind, "CellTag"] <- collapsing[ind.cent[1], "CellTag"]
+new.collapsing.df[ind, "Cell.Barcode"] <- collapsing[ind.cent[1], "Cell.Barcode"]
+}
+}
+curr.centroid.sub <- new.collapsing.df[which(new.collapsing.df$concat == curr.centroid), ]
+curr.count.new <- sum(curr.centroid.sub$value)
+curr.new.row <- data.frame(concat = curr.centroid, CellTag = unique(curr.centroid.sub$CellTag),
+Cell.Barcode = unique(curr.centroid.sub$Cell.Barcode), value = curr.count.new,
+stringsAsFactors = F)
+} else {curr.new.row <- new.collapsing.df[which(new.collapsing.df$concat == curr.centroid), ]}
+if (nrow(final.collapsing.df) <= 0){
+final.collapsing.df <- curr.new.row
+} else {
+final.collapsing.df <- rbind(final.collapsing.df, curr.new.row)
+}
+}
+#new.collapsing.df <- setDT(new.collapsing.df)
+final.collapsing.df <- setDT(final.collapsing.df)
+# Regenerate the new matrix
+#new.matrix <- dcast(new.collapsing.df, Cell.Barcode~CellTag)
+new.matrix <- dcast(final.collapsing.df, Cell.Barcode~CellTag)
+# Give the matrix rownames
+cell.rnm <- new.matrix$Cell.Barcode
+cnms <- colnames(new.matrix)[2:ncol(new.matrix)]
+new.matrix <- as.matrix(new.matrix[, ..cnms])
+rownames(new.matrix) <- cell.rnm
+# Save the new matrix to the object
+new.obj <- SetCellTagCurrentVersionWorkingMatrix(celltag.obj, "collapsed.count", as(new.matrix, "dgCMatrix"))
+return(new.obj)
+}
+bam.test.obj <- readRDS("~/Desktop/demo_object.Rds")
+head([email protected][["v1"]])
+bam.test.obj <- CellTagMatrixCount(celltag.obj = bam.test.obj, barcodes.file = "Desktop/barcodes.tsv")
+dim([email protected])
+bam.test.obj <- CellTagDataForCollapsing(celltag.obj = bam.test.obj, output.file = "~/Desktop/collapsing.txt")
+bam.test.obj <- CellTagDataPostCollapsing(celltag.obj = bam.test.obj, collapsed.rslt.file = "~/Desktop/collapsing_rslt.txt")
+head([email protected])
+library(CloneHunter)
+## Might need to download these two
+# install.packages("roxygen2")
+library(roxygen2)
+library(devtools)
+## Change the working directory to where you have stored your CloneHunter folder
+setwd("~/Desktop/Morris Lab/")
+devtools::install("CloneHunter")
+setwd("CloneHunter/")
+devtools::document()
+library(CloneHunter)
+bam.test.obj <- CellTagMatrixCount(celltag.obj = bam.test.obj, barcodes.file = "Desktop/barcodes.tsv")
+bam.test.obj <- CellTagMatrixCount(celltag.obj = bam.test.obj, barcodes.file = "~/Desktop/barcodes.tsv")
+dim([email protected])
+bam.test.obj <- CellTagDataForCollapsing(celltag.obj = bam.test.obj, output.file = "~/Desktop/collapsing.txt")
+bam.test.obj <- CellTagDataPostCollapsing(celltag.obj = bam.test.obj, collapsed.rslt.file = "~/Desktop/collapsing_rslt.txt")
+head([email protected])
+#' @param ctm.after.whitelist CellTag single-cell matrix after whitelist filtering
+#' @param collapsed.rslt.file File path to the collapsed result file
+#' @param collapsed.csv.file File path to the data frame file generated for collapsing
+#' @param output.file The RDS file path and name to save the resulting UMI matrix
+#' @return The collapsed and processed UMI matrices
+#' @keywords single-cell RNA-seq data, CellTagging
+#' @export
+#' @examples
+#' CellTagDataPostCollapsing(celltags.whitelisted.3, "collapsed_test.txt", "collapsed.csv", "collapsed_data_matrix.Rds")
+#'
+CellTagDataPostCollapsing <- function(celltag.obj, collapsed.rslt.file) {
+# Read in the collpased result
+collapsed <- read.table(collapsed.rslt.file, sep = "\t", header = F, stringsAsFactors = F)
+# Read in the file for collapsing
+collapsing <- [email protected][[[email protected]]]
+colnames(collapsing)[c(1:2)] <- c("CellTag", "Cell.Barcode")
+new.collapsing.df <- collapsing
+final.collapsing.df <- data.frame()
+# Process the collapsing data file
+for (i in 1:nrow(collapsed)) {
+curr.row <- collapsed[i,]
+curr.centroid <- curr.row$V1
+curr.count <- curr.row$V2
+curr.ct <- substring(curr.centroid, 1, 8)
+if (curr.count > 1) {
+curr.collapse.set <- strsplit(curr.row$V3, ",")[[1]]
+curr.to.collapse <- setdiff(curr.collapse.set, curr.centroid)
+for (j in 1:length(curr.to.collapse)) {
+curr.for.c <- curr.to.collapse[j]
+curr.for.c.ct <- substring(curr.for.c, 1, 8)
+if (curr.for.c.ct != curr.ct) {
+ind <- which(collapsing$concat == curr.to.collapse[j])
+ind.cent <- which(collapsing$concat == curr.centroid)
+new.collapsing.df[ind, "concat"] <- curr.centroid
+new.collapsing.df[ind, "CellTag"] <- collapsing[ind.cent[1], "CellTag"]
+new.collapsing.df[ind, "Cell.Barcode"] <- collapsing[ind.cent[1], "Cell.Barcode"]
+}
+}
+curr.centroid.sub <- new.collapsing.df[which(new.collapsing.df$concat == curr.centroid), ]
+curr.count.new <- sum(curr.centroid.sub$value)
+curr.new.row <- data.frame(concat = curr.centroid, CellTag = unique(curr.centroid.sub$CellTag),
+Cell.Barcode = unique(curr.centroid.sub$Cell.Barcode), value = curr.count.new,
+stringsAsFactors = F)
+} else {curr.new.row <- new.collapsing.df[which(new.collapsing.df$concat == curr.centroid), ]}
+if (nrow(final.collapsing.df) <= 0){
+final.collapsing.df <- curr.new.row
+} else {
+final.collapsing.df <- rbind(final.collapsing.df, curr.new.row)
+}
+}
+#new.collapsing.df <- setDT(new.collapsing.df)
+final.collapsing.df <- setDT(final.collapsing.df)
+print(head(final.collapsing.df))
+# Regenerate the new matrix
+#new.matrix <- dcast(new.collapsing.df, Cell.Barcode~CellTag)
+new.matrix <- dcast(final.collapsing.df, Cell.Barcode~CellTag)
+# Give the matrix rownames
+cell.rnm <- new.matrix$Cell.Barcode
+cnms <- colnames(new.matrix)[2:ncol(new.matrix)]
+new.matrix <- as.matrix(new.matrix[, ..cnms])
+rownames(new.matrix) <- cell.rnm
+# Save the new matrix to the object
+new.obj <- SetCellTagCurrentVersionWorkingMatrix(celltag.obj, "collapsed.count", as(new.matrix, "dgCMatrix"))
+return(new.obj)
+}
+bam.test.obj <- CellTagDataPostCollapsing(celltag.obj = bam.test.obj, collapsed.rslt.file = "~/Desktop/collapsing_rslt.txt")
+?dcast
+#' @param ctm.after.whitelist CellTag single-cell matrix after whitelist filtering
+#' @param collapsed.rslt.file File path to the collapsed result file
+#' @param collapsed.csv.file File path to the data frame file generated for collapsing
+#' @param output.file The RDS file path and name to save the resulting UMI matrix
+#' @return The collapsed and processed UMI matrices
+#' @keywords single-cell RNA-seq data, CellTagging
+#' @export
+#' @examples
+#' CellTagDataPostCollapsing(celltags.whitelisted.3, "collapsed_test.txt", "collapsed.csv", "collapsed_data_matrix.Rds")
+#'
+CellTagDataPostCollapsing <- function(celltag.obj, collapsed.rslt.file) {
+# Read in the collpased result
+collapsed <- read.table(collapsed.rslt.file, sep = "\t", header = F, stringsAsFactors = F)
+# Read in the file for collapsing
+collapsing <- [email protected][[[email protected]]]
+colnames(collapsing)[c(1:2)] <- c("CellTag", "Cell.Barcode")
+new.collapsing.df <- collapsing
+final.collapsing.df <- data.frame()
+# Process the collapsing data file
+for (i in 1:nrow(collapsed)) {
+curr.row <- collapsed[i,]
+curr.centroid <- curr.row$V1
+curr.count <- curr.row$V2
+curr.ct <- substring(curr.centroid, 1, 8)
+if (curr.count > 1) {
+curr.collapse.set <- strsplit(curr.row$V3, ",")[[1]]
+curr.to.collapse <- setdiff(curr.collapse.set, curr.centroid)
+for (j in 1:length(curr.to.collapse)) {
+curr.for.c <- curr.to.collapse[j]
+curr.for.c.ct <- substring(curr.for.c, 1, 8)
+if (curr.for.c.ct != curr.ct) {
+ind <- which(collapsing$concat == curr.to.collapse[j])
+ind.cent <- which(collapsing$concat == curr.centroid)
+new.collapsing.df[ind, "concat"] <- curr.centroid
+new.collapsing.df[ind, "CellTag"] <- collapsing[ind.cent[1], "CellTag"]
+new.collapsing.df[ind, "Cell.Barcode"] <- collapsing[ind.cent[1], "Cell.Barcode"]
+}
+}
+curr.centroid.sub <- new.collapsing.df[which(new.collapsing.df$concat == curr.centroid), ]
+curr.count.new <- sum(curr.centroid.sub$value)
+curr.new.row <- data.frame(concat = curr.centroid, CellTag = unique(curr.centroid.sub$CellTag),
+Cell.Barcode = unique(curr.centroid.sub$Cell.Barcode), value = curr.count.new,
+stringsAsFactors = F)
+} else {curr.new.row <- new.collapsing.df[which(new.collapsing.df$concat == curr.centroid), ]}
+if (nrow(final.collapsing.df) <= 0){
+final.collapsing.df <- curr.new.row
+} else {
+final.collapsing.df <- rbind(final.collapsing.df, curr.new.row)
+}
+}
+#new.collapsing.df <- setDT(new.collapsing.df)
+final.collapsing.df <- setDT(final.collapsing.df)
+# Regenerate the new matrix
+#new.matrix <- dcast(new.collapsing.df, Cell.Barcode~CellTag)
+new.matrix <- dcast(final.collapsing.df, Cell.Barcode~CellTag, fill = 0)
+# Give the matrix rownames
+cell.rnm <- new.matrix$Cell.Barcode
+cnms <- colnames(new.matrix)[2:ncol(new.matrix)]
+new.matrix <- as.matrix(new.matrix[, ..cnms])
+rownames(new.matrix) <- cell.rnm
+# Save the new matrix to the object
+new.obj <- SetCellTagCurrentVersionWorkingMatrix(celltag.obj, "collapsed.count", as(new.matrix, "dgCMatrix"))
+return(new.obj)
+}
+bam.test.obj <- CellTagDataPostCollapsing(celltag.obj = bam.test.obj, collapsed.rslt.file = "~/Desktop/collapsing_rslt.txt")
+head([email protected])
+plot(density(colSums([email protected])))
+bam.test.obj <- SingleCellDataBinatization(bam.test.obj, 2)
+MetricPlots(bam.test.obj)
+bam.test.obj <- SingleCellDataWhitelist(bam.test.obj, "~/Desktop/CloneHunterTest/v1_whitelist.csv")
+bam.test.obj <- MetricBasedFiltering(bam.test.obj, 20, comparison = "less")
+bam.test.obj <- MetricBasedFiltering(bam.test.obj, 2, comparison = "greater")
+MetricPlots(bam.test.obj)
+bam.test.obj <- JaccardAnalysis(bam.test.obj)
+bam.test.obj <- CloneCalling(celltag.obj = bam.test.obj, correlation.cutoff=0.7)
+[email protected][["v1"]]
+[email protected][["v1"]]
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -11,8 +11,12 @@ Depends: R (>= 3.5.0),
     igraph,
     data.table,
     plyr,
-    reshape
+    reshape,
+    Matrix,
+    tidyverse,
+    foreach,
+    networkD3
 License: What license is it under?
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 6.1.0.9000
+RoxygenNote: 6.1.1
diff --git a/Exmples/.DS_Store b/Exmples/.DS_Store
diff --git a/Exmples/CloneHunterWhitelistTestRun.Rmd b/Exmples/CloneHunterWhitelistTestRun.Rmd
@@ -0,0 +1,76 @@
+---
+title: "Whitelist Regeneration Notebook"
+output: html_notebook
+---
+
+### Load the Package
+```{r}
+library(roxygen2)
+library(devtools)
+setwd("~/Desktop/CloneHunterNew_5/")
+setwd("CloneHunterNew/")
+devtools::document()
+```
+
+## V1
+### Create CellTag Object
+```{r}
+v1.whitelist <- CellTagObject("v1.whitelist", "~/Desktop/CloneHunterTest/V1_S1_L001_R1_001.fastq")
+```
+
+### Extract the CellTag Reads
+```{r}
+v1.whitelist <- CellTagExtraction(v1.whitelist, "v1")
+```
+
+### Sort by CellTag Frequency
+```{r}
+v1.whitelist <- AddCellTagFreqSort(v1.whitelist)
+```
+
+### V1 Whitelist Generation
+```{r}
+v1.whitelist <- CellTagWhitelistFiltering(v1.whitelist, 0.9)
+```
+
+## V2
+### Create CellTag Object
+```{r}
+v2.whitelist <- CellTagObject("v1.whitelist", "~/Desktop/CloneHunterTest/V2-1_S2_L001_R1_001.fastq")
+```
+
+### Extract the CellTag Reads
+```{r}
+v2.whitelist <- CellTagExtraction(v2.whitelist, "v2")
+```
+
+### Sort by CellTag Frequency
+```{r}
+v2.whitelist <- AddCellTagFreqSort(v2.whitelist)
+```
+
+### V2 Whitelist Generation
+```{r}
+v2.whitelist <- CellTagWhitelistFiltering(v2.whitelist, 0.9)
+```
+
+## V3
+### Create CellTag Object
+```{r}
+v3.whitelist <- CellTagObject("v3.whitelist", "~/Desktop/CloneHunterTest/V2-2_S3_L001_R1_001.fastq")
+```
+
+### Extract the CellTag Reads
+```{r}
+v3.whitelist <- CellTagExtraction(v3.whitelist, "v3")
+```
+
+### Sort by CellTag Frequency
+```{r}
+v3.whitelist <- AddCellTagFreqSort(v3.whitelist)
+```
+
+### V2 Whitelist Generation
+```{r}
+v3.whitelist <- CellTagWhitelistFiltering(v3.whitelist, 0.9)
+```