Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kongw clonehunter #3

Open
wants to merge 32 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
001cb1a
Change to CellTag Object
KaetheKong Apr 19, 2019
c3d7ef6
Update README.md
KaetheKong Apr 19, 2019
fea2877
Update README.md
KaetheKong Apr 30, 2019
7cd9764
modified CellTag Collapsing
KaetheKong Apr 30, 2019
a2d656d
new infrastructure
KaetheKong May 2, 2019
a8d1f55
whitelist tested
KaetheKong May 2, 2019
ee17c75
tested network construction and visualization
KaetheKong May 3, 2019
cb5c10c
modified collapsing
KaetheKong May 8, 2019
7aa6fce
add filter
KaetheKong May 10, 2019
0edc9a8
Update README.md
KaetheKong May 14, 2019
b660be0
Update README.md
KaetheKong May 16, 2019
2957181
Update README.md
KaetheKong May 16, 2019
b3c67cb
Update README.md
KaetheKong May 16, 2019
6c8d913
Update README.md
KaetheKong May 16, 2019
0753cd4
Update README.md
KaetheKong May 16, 2019
e048735
Update README.md
KaetheKong May 16, 2019
c885be7
Update README.md
KaetheKong May 16, 2019
d7de020
Update README.md
KaetheKong May 16, 2019
83d9670
Update README.md
KaetheKong May 16, 2019
8ee64e5
Update README.md
KaetheKong May 16, 2019
f58bcb2
Update README.md
KaetheKong May 16, 2019
00e25a4
Update README.md
KaetheKong May 16, 2019
f68217b
Added comments and changed axis labels
KaetheKong May 20, 2019
fdcfbd5
added sample plots
KaetheKong May 20, 2019
b26de67
Update README.md
KaetheKong May 20, 2019
17d72ad
added sample plots
KaetheKong May 20, 2019
d73b598
Merge branch 'kongw_clonehunter' of https://github.com/morris-lab/Clo…
KaetheKong May 20, 2019
6f9d023
added sample plots
KaetheKong May 20, 2019
3ac71ec
added sample plots
KaetheKong May 20, 2019
30b3070
Update README.md
KaetheKong May 20, 2019
a8407b1
Update README.md
KaetheKong May 20, 2019
8f112e5
fixed collapsing
KaetheKong May 29, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
301 changes: 301 additions & 0 deletions .Rhistory
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
library(roxygen2)
library(devtools)
setwd("~/Desktop/CloneHunterNew_5/CloneHunterNew/")
devtools::document()
library(tidyverse)
library(foreach)
library(networkD3)
bam.test.obj <- readRDS("~/Desktop/bam_v123_obj.Rds")
bam.test.obj <- convertCellTagMatrix2LinkList(bam.test.obj)
bam.test.obj <- getNodesfromLinkList(bam.test.obj)
additional_data <- data.frame(sample(1:10, size = length(rownames([email protected])), replace = TRUE), row.names = rownames([email protected]))
colnames(additional_data) <- "Cluster"
bam.test.obj <- addData2Nodes(bam.test.obj, additional_data)
bam.test.obj <- drawSubnet(tag = "CellTagV1_2", overlay = "Cluster", celltag.obj = bam.test.obj)
bam.test.obj@network
bar.data <- [email protected]
bar.data$Cell.BC <- rownames(bar.data)
bar.data <- gather(bar.data, key = "CellTag", value = "Clone", 1:3, na.rm = FALSE)
ggplot(data = bar.data) +
geom_bar(mapping = aes(x = CellTag, fill = factor(Clone)), position = "fill", show.legend = FALSE) +
scale_y_continuous(labels = scales::percent_format()) +
theme_bw()
library(roxygen2)
library(devtools)
setwd("~/Desktop/CloneHunterNew_5/CloneHunterNew/")
devtools::document()
library(tidyverse)
library(foreach)
library(networkD3)
bam.test.obj <- readRDS("~/Desktop/bam_v123_obj.Rds")
bam.test.obj <- convertCellTagMatrix2LinkList(bam.test.obj)
bam.test.obj <- getNodesfromLinkList(bam.test.obj)
additional_data <- data.frame(sample(1:10, size = length(rownames([email protected])), replace = TRUE), row.names = rownames([email protected]))
colnames(additional_data) <- "Cluster"
bam.test.obj <- addData2Nodes(bam.test.obj, additional_data)
bam.test.obj <- drawSubnet(tag = "CellTagV1_2", overlay = "Cluster", celltag.obj = bam.test.obj)
bam.test.obj@network
bar.data <- [email protected]
bar.data$Cell.BC <- rownames(bar.data)
bar.data <- gather(bar.data, key = "CellTag", value = "Clone", 1:3, na.rm = FALSE)
ggplot(data = bar.data) +
geom_bar(mapping = aes(x = CellTag, fill = factor(Clone)), position = "fill", show.legend = FALSE) +
scale_y_continuous(labels = scales::percent_format()) +
theme_bw()
archs4 <- readRDS("~/Desktop/data mining/ARCHS4_polyA_samples_quantile_norm.Rds")
rownames(archs4)
colnames(archs4)
x <- matrix(c(1,2,3,4,5,6), 2,3)
x
cor(x)
rm(x)
####
# Data mining check
###
library(Matrix)
archs4 <- as(archs4, "dgCMatrix")
cor(archs4)
cor(archs4[,c(1,2)])
cor(archs4[,c(1:10)])
cor(archs4[,c(1:100)])
archs4.first.1000 <- archs4[,c(1:1000)]
cor.first.1000 <- cor(archs4.first.1000)
library(corrplot)
corrplot(cor.first.1000)
mem.limits
gc()
R_MAX_VSIZE
memory.size()
library(CloneHunter)
#' @param ctm.after.whitelist CellTag single-cell matrix after whitelist filtering
#' @param collapsed.rslt.file File path to the collapsed result file
#' @param collapsed.csv.file File path to the data frame file generated for collapsing
#' @param output.file The RDS file path and name to save the resulting UMI matrix
#' @return The collapsed and processed UMI matrices
#' @keywords single-cell RNA-seq data, CellTagging
#' @export
#' @examples
#' CellTagDataPostCollapsing(celltags.whitelisted.3, "collapsed_test.txt", "collapsed.csv", "collapsed_data_matrix.Rds")
#'
CellTagDataPostCollapsing <- function(celltag.obj, collapsed.rslt.file) {
# Read in the collpased result
collapsed <- read.table(collapsed.rslt.file, sep = "\t", header = F, stringsAsFactors = F)
# Read in the file for collapsing
collapsing <- [email protected][[[email protected]]]
colnames(collapsing)[c(1:2)] <- c("CellTag", "Cell.Barcode")
new.collapsing.df <- collapsing
final.collapsing.df <- data.frame()
# Process the collapsing data file
for (i in 1:nrow(collapsed)) {
curr.row <- collapsed[i,]
curr.centroid <- curr.row$V1
curr.count <- curr.row$V2
curr.ct <- substring(curr.centroid, 1, 8)
if (curr.count > 1) {
curr.collapse.set <- strsplit(curr.row$V3, ",")[[1]]
curr.to.collapse <- setdiff(curr.collapse.set, curr.centroid)
for (j in 1:length(curr.to.collapse)) {
curr.for.c <- curr.to.collapse[j]
curr.for.c.ct <- substring(curr.for.c, 1, 8)
if (curr.for.c.ct != curr.ct) {
ind <- which(collapsing$concat == curr.to.collapse[j])
ind.cent <- which(collapsing$concat == curr.centroid)
new.collapsing.df[ind, "concat"] <- curr.centroid
new.collapsing.df[ind, "CellTag"] <- collapsing[ind.cent[1], "CellTag"]
new.collapsing.df[ind, "Cell.Barcode"] <- collapsing[ind.cent[1], "Cell.Barcode"]
}
}
curr.centroid.sub <- new.collapsing.df[which(new.collapsing.df$concat == curr.centroid), ]
curr.count.new <- sum(curr.centroid.sub$value)
curr.new.row <- data.frame(concat = curr.centroid, CellTag = unique(curr.centroid.sub$CellTag),
Cell.Barcode = unique(curr.centroid.sub$Cell.Barcode), value = curr.count.new,
stringsAsFactors = F)
} else {curr.new.row <- new.collapsing.df[which(new.collapsing.df$concat == curr.centroid), ]}
if (nrow(final.collapsing.df) <= 0){
final.collapsing.df <- curr.new.row
} else {
final.collapsing.df <- rbind(final.collapsing.df, curr.new.row)
}
}
#new.collapsing.df <- setDT(new.collapsing.df)
final.collapsing.df <- setDT(final.collapsing.df)
# Regenerate the new matrix
#new.matrix <- dcast(new.collapsing.df, Cell.Barcode~CellTag)
new.matrix <- dcast(final.collapsing.df, Cell.Barcode~CellTag)
# Give the matrix rownames
cell.rnm <- new.matrix$Cell.Barcode
cnms <- colnames(new.matrix)[2:ncol(new.matrix)]
new.matrix <- as.matrix(new.matrix[, ..cnms])
rownames(new.matrix) <- cell.rnm
# Save the new matrix to the object
new.obj <- SetCellTagCurrentVersionWorkingMatrix(celltag.obj, "collapsed.count", as(new.matrix, "dgCMatrix"))
return(new.obj)
}
bam.test.obj <- readRDS("~/Desktop/demo_object.Rds")
head([email protected][["v1"]])
bam.test.obj <- CellTagMatrixCount(celltag.obj = bam.test.obj, barcodes.file = "Desktop/barcodes.tsv")
dim([email protected])
bam.test.obj <- CellTagDataForCollapsing(celltag.obj = bam.test.obj, output.file = "~/Desktop/collapsing.txt")
bam.test.obj <- CellTagDataPostCollapsing(celltag.obj = bam.test.obj, collapsed.rslt.file = "~/Desktop/collapsing_rslt.txt")
head([email protected])
library(CloneHunter)
## Might need to download these two
# install.packages("roxygen2")
library(roxygen2)
library(devtools)
## Change the working directory to where you have stored your CloneHunter folder
setwd("~/Desktop/Morris Lab/")
devtools::install("CloneHunter")
setwd("CloneHunter/")
devtools::document()
library(CloneHunter)
bam.test.obj <- CellTagMatrixCount(celltag.obj = bam.test.obj, barcodes.file = "Desktop/barcodes.tsv")
bam.test.obj <- CellTagMatrixCount(celltag.obj = bam.test.obj, barcodes.file = "~/Desktop/barcodes.tsv")
dim([email protected])
bam.test.obj <- CellTagDataForCollapsing(celltag.obj = bam.test.obj, output.file = "~/Desktop/collapsing.txt")
bam.test.obj <- CellTagDataPostCollapsing(celltag.obj = bam.test.obj, collapsed.rslt.file = "~/Desktop/collapsing_rslt.txt")
head([email protected])
#' @param ctm.after.whitelist CellTag single-cell matrix after whitelist filtering
#' @param collapsed.rslt.file File path to the collapsed result file
#' @param collapsed.csv.file File path to the data frame file generated for collapsing
#' @param output.file The RDS file path and name to save the resulting UMI matrix
#' @return The collapsed and processed UMI matrices
#' @keywords single-cell RNA-seq data, CellTagging
#' @export
#' @examples
#' CellTagDataPostCollapsing(celltags.whitelisted.3, "collapsed_test.txt", "collapsed.csv", "collapsed_data_matrix.Rds")
#'
CellTagDataPostCollapsing <- function(celltag.obj, collapsed.rslt.file) {
# Read in the collpased result
collapsed <- read.table(collapsed.rslt.file, sep = "\t", header = F, stringsAsFactors = F)
# Read in the file for collapsing
collapsing <- [email protected][[[email protected]]]
colnames(collapsing)[c(1:2)] <- c("CellTag", "Cell.Barcode")
new.collapsing.df <- collapsing
final.collapsing.df <- data.frame()
# Process the collapsing data file
for (i in 1:nrow(collapsed)) {
curr.row <- collapsed[i,]
curr.centroid <- curr.row$V1
curr.count <- curr.row$V2
curr.ct <- substring(curr.centroid, 1, 8)
if (curr.count > 1) {
curr.collapse.set <- strsplit(curr.row$V3, ",")[[1]]
curr.to.collapse <- setdiff(curr.collapse.set, curr.centroid)
for (j in 1:length(curr.to.collapse)) {
curr.for.c <- curr.to.collapse[j]
curr.for.c.ct <- substring(curr.for.c, 1, 8)
if (curr.for.c.ct != curr.ct) {
ind <- which(collapsing$concat == curr.to.collapse[j])
ind.cent <- which(collapsing$concat == curr.centroid)
new.collapsing.df[ind, "concat"] <- curr.centroid
new.collapsing.df[ind, "CellTag"] <- collapsing[ind.cent[1], "CellTag"]
new.collapsing.df[ind, "Cell.Barcode"] <- collapsing[ind.cent[1], "Cell.Barcode"]
}
}
curr.centroid.sub <- new.collapsing.df[which(new.collapsing.df$concat == curr.centroid), ]
curr.count.new <- sum(curr.centroid.sub$value)
curr.new.row <- data.frame(concat = curr.centroid, CellTag = unique(curr.centroid.sub$CellTag),
Cell.Barcode = unique(curr.centroid.sub$Cell.Barcode), value = curr.count.new,
stringsAsFactors = F)
} else {curr.new.row <- new.collapsing.df[which(new.collapsing.df$concat == curr.centroid), ]}
if (nrow(final.collapsing.df) <= 0){
final.collapsing.df <- curr.new.row
} else {
final.collapsing.df <- rbind(final.collapsing.df, curr.new.row)
}
}
#new.collapsing.df <- setDT(new.collapsing.df)
final.collapsing.df <- setDT(final.collapsing.df)
print(head(final.collapsing.df))
# Regenerate the new matrix
#new.matrix <- dcast(new.collapsing.df, Cell.Barcode~CellTag)
new.matrix <- dcast(final.collapsing.df, Cell.Barcode~CellTag)
# Give the matrix rownames
cell.rnm <- new.matrix$Cell.Barcode
cnms <- colnames(new.matrix)[2:ncol(new.matrix)]
new.matrix <- as.matrix(new.matrix[, ..cnms])
rownames(new.matrix) <- cell.rnm
# Save the new matrix to the object
new.obj <- SetCellTagCurrentVersionWorkingMatrix(celltag.obj, "collapsed.count", as(new.matrix, "dgCMatrix"))
return(new.obj)
}
bam.test.obj <- CellTagDataPostCollapsing(celltag.obj = bam.test.obj, collapsed.rslt.file = "~/Desktop/collapsing_rslt.txt")
?dcast
#' @param ctm.after.whitelist CellTag single-cell matrix after whitelist filtering
#' @param collapsed.rslt.file File path to the collapsed result file
#' @param collapsed.csv.file File path to the data frame file generated for collapsing
#' @param output.file The RDS file path and name to save the resulting UMI matrix
#' @return The collapsed and processed UMI matrices
#' @keywords single-cell RNA-seq data, CellTagging
#' @export
#' @examples
#' CellTagDataPostCollapsing(celltags.whitelisted.3, "collapsed_test.txt", "collapsed.csv", "collapsed_data_matrix.Rds")
#'
CellTagDataPostCollapsing <- function(celltag.obj, collapsed.rslt.file) {
# Read in the collpased result
collapsed <- read.table(collapsed.rslt.file, sep = "\t", header = F, stringsAsFactors = F)
# Read in the file for collapsing
collapsing <- [email protected][[[email protected]]]
colnames(collapsing)[c(1:2)] <- c("CellTag", "Cell.Barcode")
new.collapsing.df <- collapsing
final.collapsing.df <- data.frame()
# Process the collapsing data file
for (i in 1:nrow(collapsed)) {
curr.row <- collapsed[i,]
curr.centroid <- curr.row$V1
curr.count <- curr.row$V2
curr.ct <- substring(curr.centroid, 1, 8)
if (curr.count > 1) {
curr.collapse.set <- strsplit(curr.row$V3, ",")[[1]]
curr.to.collapse <- setdiff(curr.collapse.set, curr.centroid)
for (j in 1:length(curr.to.collapse)) {
curr.for.c <- curr.to.collapse[j]
curr.for.c.ct <- substring(curr.for.c, 1, 8)
if (curr.for.c.ct != curr.ct) {
ind <- which(collapsing$concat == curr.to.collapse[j])
ind.cent <- which(collapsing$concat == curr.centroid)
new.collapsing.df[ind, "concat"] <- curr.centroid
new.collapsing.df[ind, "CellTag"] <- collapsing[ind.cent[1], "CellTag"]
new.collapsing.df[ind, "Cell.Barcode"] <- collapsing[ind.cent[1], "Cell.Barcode"]
}
}
curr.centroid.sub <- new.collapsing.df[which(new.collapsing.df$concat == curr.centroid), ]
curr.count.new <- sum(curr.centroid.sub$value)
curr.new.row <- data.frame(concat = curr.centroid, CellTag = unique(curr.centroid.sub$CellTag),
Cell.Barcode = unique(curr.centroid.sub$Cell.Barcode), value = curr.count.new,
stringsAsFactors = F)
} else {curr.new.row <- new.collapsing.df[which(new.collapsing.df$concat == curr.centroid), ]}
if (nrow(final.collapsing.df) <= 0){
final.collapsing.df <- curr.new.row
} else {
final.collapsing.df <- rbind(final.collapsing.df, curr.new.row)
}
}
#new.collapsing.df <- setDT(new.collapsing.df)
final.collapsing.df <- setDT(final.collapsing.df)
# Regenerate the new matrix
#new.matrix <- dcast(new.collapsing.df, Cell.Barcode~CellTag)
new.matrix <- dcast(final.collapsing.df, Cell.Barcode~CellTag, fill = 0)
# Give the matrix rownames
cell.rnm <- new.matrix$Cell.Barcode
cnms <- colnames(new.matrix)[2:ncol(new.matrix)]
new.matrix <- as.matrix(new.matrix[, ..cnms])
rownames(new.matrix) <- cell.rnm
# Save the new matrix to the object
new.obj <- SetCellTagCurrentVersionWorkingMatrix(celltag.obj, "collapsed.count", as(new.matrix, "dgCMatrix"))
return(new.obj)
}
bam.test.obj <- CellTagDataPostCollapsing(celltag.obj = bam.test.obj, collapsed.rslt.file = "~/Desktop/collapsing_rslt.txt")
head([email protected])
plot(density(colSums([email protected])))
bam.test.obj <- SingleCellDataBinatization(bam.test.obj, 2)
MetricPlots(bam.test.obj)
bam.test.obj <- SingleCellDataWhitelist(bam.test.obj, "~/Desktop/CloneHunterTest/v1_whitelist.csv")
bam.test.obj <- MetricBasedFiltering(bam.test.obj, 20, comparison = "less")
bam.test.obj <- MetricBasedFiltering(bam.test.obj, 2, comparison = "greater")
MetricPlots(bam.test.obj)
bam.test.obj <- JaccardAnalysis(bam.test.obj)
bam.test.obj <- CloneCalling(celltag.obj = bam.test.obj, correlation.cutoff=0.7)
[email protected][["v1"]]
[email protected][["v1"]]
8 changes: 6 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,12 @@ Depends: R (>= 3.5.0),
igraph,
data.table,
plyr,
reshape
reshape,
Matrix,
tidyverse,
foreach,
networkD3
License: What license is it under?
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.1.0.9000
RoxygenNote: 6.1.1
Binary file added Exmples/.DS_Store
Binary file not shown.
76 changes: 76 additions & 0 deletions Exmples/CloneHunterWhitelistTestRun.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
---
title: "Whitelist Regeneration Notebook"
output: html_notebook
---

### Load the Package
```{r}
library(roxygen2)
library(devtools)
setwd("~/Desktop/CloneHunterNew_5/")
setwd("CloneHunterNew/")
devtools::document()
```

## V1
### Create CellTag Object
```{r}
v1.whitelist <- CellTagObject("v1.whitelist", "~/Desktop/CloneHunterTest/V1_S1_L001_R1_001.fastq")
```

### Extract the CellTag Reads
```{r}
v1.whitelist <- CellTagExtraction(v1.whitelist, "v1")
```

### Sort by CellTag Frequency
```{r}
v1.whitelist <- AddCellTagFreqSort(v1.whitelist)
```

### V1 Whitelist Generation
```{r}
v1.whitelist <- CellTagWhitelistFiltering(v1.whitelist, 0.9)
```

## V2
### Create CellTag Object
```{r}
v2.whitelist <- CellTagObject("v1.whitelist", "~/Desktop/CloneHunterTest/V2-1_S2_L001_R1_001.fastq")
```

### Extract the CellTag Reads
```{r}
v2.whitelist <- CellTagExtraction(v2.whitelist, "v2")
```

### Sort by CellTag Frequency
```{r}
v2.whitelist <- AddCellTagFreqSort(v2.whitelist)
```

### V2 Whitelist Generation
```{r}
v2.whitelist <- CellTagWhitelistFiltering(v2.whitelist, 0.9)
```

## V3
### Create CellTag Object
```{r}
v3.whitelist <- CellTagObject("v3.whitelist", "~/Desktop/CloneHunterTest/V2-2_S3_L001_R1_001.fastq")
```

### Extract the CellTag Reads
```{r}
v3.whitelist <- CellTagExtraction(v3.whitelist, "v3")
```

### Sort by CellTag Frequency
```{r}
v3.whitelist <- AddCellTagFreqSort(v3.whitelist)
```

### V2 Whitelist Generation
```{r}
v3.whitelist <- CellTagWhitelistFiltering(v3.whitelist, 0.9)
```
Loading