From 317f7af4989ce5a790431dfda5fd4569b74b9852 Mon Sep 17 00:00:00 2001 From: Malachi Griffith Date: Wed, 19 Jun 2024 11:39:17 -0400 Subject: [PATCH] make the spacing follow a more consistent style --- ...003-03-02-Differential_Expression-edgeR.md | 64 +++++++++---------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/_posts/0003-03-02-Differential_Expression-edgeR.md b/_posts/0003-03-02-Differential_Expression-edgeR.md index b5a87b59..693f6172 100644 --- a/_posts/0003-03-02-Differential_Expression-edgeR.md +++ b/_posts/0003-03-02-Differential_Expression-edgeR.md @@ -65,34 +65,34 @@ R R code has been provided below. If you wish to have a script with all of the code, it can be found [here](https://github.com/griffithlab/rnabio.org/blob/master/assets/scripts/Tutorial_edgeR.R). Run the R commands below. ```R -###R code### -#Set working directory where output will go + +# set working directory where output will go working_dir = "~/workspace/rnaseq/de/htseq_counts" setwd(working_dir) -#Read in gene mapping -mapping=read.table("~/workspace/rnaseq/de/htseq_counts/ENSG_ID2Name.txt", header=FALSE, stringsAsFactors=FALSE, row.names=1) +# read in gene mapping +mapping = read.table("~/workspace/rnaseq/de/htseq_counts/ENSG_ID2Name.txt", header = FALSE, stringsAsFactors = FALSE, row.names = 1) -# Read in count matrix -rawdata=read.table("~/workspace/rnaseq/expression/htseq_counts/gene_read_counts_table_all_final.tsv", header=TRUE, stringsAsFactors=FALSE, row.names=1) +# read in count matrix +rawdata = read.table("~/workspace/rnaseq/expression/htseq_counts/gene_read_counts_table_all_final.tsv", header = TRUE, stringsAsFactors = FALSE, row.names = 1) # Check dimensions dim(rawdata) # Require at least 1/6 of samples to have expressed count >= 10 -sample_cutoff <- (1/6) -count_cutoff <- 10 +sample_cutoff = (1/6) +count_cutoff = 10 #Define a function to calculate the fraction of values expressed above the count cutoff -getFE <- function(data,count_cutoff){ - FE <- (sum(data >= count_cutoff)/length(data)) +getFE = function(data,count_cutoff){ + FE = (sum(data >= count_cutoff) / length(data)) return(FE) } #Apply the function to all genes, and filter out genes not meeting the sample cutoff -fraction_expressed <- apply(rawdata, 1, getFE, count_cutoff) -keep <- which(fraction_expressed >= sample_cutoff) -rawdata <- rawdata[keep,] +fraction_expressed = apply(rawdata, 1, getFE, count_cutoff) +keep = which(fraction_expressed >= sample_cutoff) +rawdata = rawdata[keep, ] # Check dimensions again to see effect of filtering dim(rawdata) @@ -102,57 +102,57 @@ dim(rawdata) ################# # load edgeR -library('edgeR') +library("edgeR") # make class labels -class <- c( rep("UHR",3), rep("HBR",3) ) +class = c(rep("UHR", 3), rep("HBR", 3)) # Get common gene names -Gene=rownames(rawdata) -Symbol=mapping[Gene,1] -gene_annotations=cbind(Gene,Symbol) +Gene = rownames(rawdata) +Symbol = mapping[Gene, 1] +gene_annotations = cbind(Gene, Symbol) # Make DGEList object -y <- DGEList(counts=rawdata, genes=gene_annotations, group=class) +y = DGEList(counts = rawdata, genes = gene_annotations, group = class) nrow(y) # TMM Normalization -y <- calcNormFactors(y) +y = calcNormFactors(y) # Estimate dispersion -y <- estimateCommonDisp(y, verbose=TRUE) -y <- estimateTagwiseDisp(y) +y = estimateCommonDisp(y, verbose = TRUE) +y = estimateTagwiseDisp(y) # Differential expression test -et <- exactTest(y) +et = exactTest(y) # Extract raw counts to add back onto DE results -counts <- getCounts(y) +counts = getCounts(y) # Print top genes topTags(et) # Print number of up/down significant genes at FDR = 0.05 significance level -summary(de <- decideTestsDGE(et, adjust.method="BH", p=.05)) +summary(de = decideTestsDGE(et, adjust.method = "BH", p = 0.05)) #Get output with BH-adjusted FDR values - all genes, any p-value, unsorted -out <- topTags(et, n = "Inf", adjust.method="BH", sort.by="none", p.value=1)$table +out = topTags(et, n = "Inf", adjust.method = "BH", sort.by = "none", p.value = 1)$table #Add raw counts back onto results for convenience (make sure sort and total number of elements allows proper join) -out2 <- cbind(out, counts) +out2 = cbind(out, counts) #Limit to significantly DE genes -out3 <- out2[as.logical(de),] +out3 = out2[as.logical(de), ] # Order by p-value -o <- order(et$table$PValue[as.logical(de)],decreasing=FALSE) -out4 <- out3[o,] +o = order(et$table$PValue[as.logical(de)], decreasing=FALSE) +out4 = out3[o, ] # Save table -write.table(out4, file="DE_genes.txt", quote=FALSE, row.names=FALSE, sep="\t") +write.table(out4, file = "DE_genes.txt", quote = FALSE, row.names = FALSE, sep = "\t") #To exit R type the following -quit(save="no") +quit(save = "no") ``` Once you have run the edgeR tutorial, compare the sigDE genes to those saved earlier from ballgown: