Skip to content

Commit

Permalink
make the spacing follow a more consistent style
Browse files Browse the repository at this point in the history
  • Loading branch information
malachig committed Jun 19, 2024
1 parent 3f2182e commit 317f7af
Showing 1 changed file with 32 additions and 32 deletions.
64 changes: 32 additions & 32 deletions _posts/0003-03-02-Differential_Expression-edgeR.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,34 +65,34 @@ R
R code has been provided below. If you wish to have a script with all of the code, it can be found [here](https://github.com/griffithlab/rnabio.org/blob/master/assets/scripts/Tutorial_edgeR.R). Run the R commands below.

```R
###R code###
#Set working directory where output will go

# set working directory where output will go
working_dir = "~/workspace/rnaseq/de/htseq_counts"
setwd(working_dir)

#Read in gene mapping
mapping=read.table("~/workspace/rnaseq/de/htseq_counts/ENSG_ID2Name.txt", header=FALSE, stringsAsFactors=FALSE, row.names=1)
# read in gene mapping
mapping = read.table("~/workspace/rnaseq/de/htseq_counts/ENSG_ID2Name.txt", header = FALSE, stringsAsFactors = FALSE, row.names = 1)

# Read in count matrix
rawdata=read.table("~/workspace/rnaseq/expression/htseq_counts/gene_read_counts_table_all_final.tsv", header=TRUE, stringsAsFactors=FALSE, row.names=1)
# read in count matrix
rawdata = read.table("~/workspace/rnaseq/expression/htseq_counts/gene_read_counts_table_all_final.tsv", header = TRUE, stringsAsFactors = FALSE, row.names = 1)

# Check dimensions
dim(rawdata)

# Require at least 1/6 of samples to have expressed count >= 10
sample_cutoff <- (1/6)
count_cutoff <- 10
sample_cutoff = (1/6)
count_cutoff = 10

#Define a function to calculate the fraction of values expressed above the count cutoff
getFE <- function(data,count_cutoff){
FE <- (sum(data >= count_cutoff)/length(data))
getFE = function(data,count_cutoff){
FE = (sum(data >= count_cutoff) / length(data))
return(FE)
}

#Apply the function to all genes, and filter out genes not meeting the sample cutoff
fraction_expressed <- apply(rawdata, 1, getFE, count_cutoff)
keep <- which(fraction_expressed >= sample_cutoff)
rawdata <- rawdata[keep,]
fraction_expressed = apply(rawdata, 1, getFE, count_cutoff)
keep = which(fraction_expressed >= sample_cutoff)
rawdata = rawdata[keep, ]

# Check dimensions again to see effect of filtering
dim(rawdata)
Expand All @@ -102,57 +102,57 @@ dim(rawdata)
#################

# load edgeR
library('edgeR')
library("edgeR")

# make class labels
class <- c( rep("UHR",3), rep("HBR",3) )
class = c(rep("UHR", 3), rep("HBR", 3))

# Get common gene names
Gene=rownames(rawdata)
Symbol=mapping[Gene,1]
gene_annotations=cbind(Gene,Symbol)
Gene = rownames(rawdata)
Symbol = mapping[Gene, 1]
gene_annotations = cbind(Gene, Symbol)

# Make DGEList object
y <- DGEList(counts=rawdata, genes=gene_annotations, group=class)
y = DGEList(counts = rawdata, genes = gene_annotations, group = class)
nrow(y)

# TMM Normalization
y <- calcNormFactors(y)
y = calcNormFactors(y)

# Estimate dispersion
y <- estimateCommonDisp(y, verbose=TRUE)
y <- estimateTagwiseDisp(y)
y = estimateCommonDisp(y, verbose = TRUE)
y = estimateTagwiseDisp(y)

# Differential expression test
et <- exactTest(y)
et = exactTest(y)

# Extract raw counts to add back onto DE results
counts <- getCounts(y)
counts = getCounts(y)

# Print top genes
topTags(et)

# Print number of up/down significant genes at FDR = 0.05 significance level
summary(de <- decideTestsDGE(et, adjust.method="BH", p=.05))
summary(de = decideTestsDGE(et, adjust.method = "BH", p = 0.05))

#Get output with BH-adjusted FDR values - all genes, any p-value, unsorted
out <- topTags(et, n = "Inf", adjust.method="BH", sort.by="none", p.value=1)$table
out = topTags(et, n = "Inf", adjust.method = "BH", sort.by = "none", p.value = 1)$table

#Add raw counts back onto results for convenience (make sure sort and total number of elements allows proper join)
out2 <- cbind(out, counts)
out2 = cbind(out, counts)

#Limit to significantly DE genes
out3 <- out2[as.logical(de),]
out3 = out2[as.logical(de), ]

# Order by p-value
o <- order(et$table$PValue[as.logical(de)],decreasing=FALSE)
out4 <- out3[o,]
o = order(et$table$PValue[as.logical(de)], decreasing=FALSE)
out4 = out3[o, ]

# Save table
write.table(out4, file="DE_genes.txt", quote=FALSE, row.names=FALSE, sep="\t")
write.table(out4, file = "DE_genes.txt", quote = FALSE, row.names = FALSE, sep = "\t")

#To exit R type the following
quit(save="no")
quit(save = "no")
```

Once you have run the edgeR tutorial, compare the sigDE genes to those saved earlier from ballgown:
Expand Down

0 comments on commit 317f7af

Please sign in to comment.