From 317f7af4989ce5a790431dfda5fd4569b74b9852 Mon Sep 17 00:00:00 2001
From: Malachi Griffith <malachig@gmail.com>
Date: Wed, 19 Jun 2024 11:39:17 -0400
Subject: [PATCH] make the spacing follow a more consistent style

---
 ...003-03-02-Differential_Expression-edgeR.md | 64 +++++++++----------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/_posts/0003-03-02-Differential_Expression-edgeR.md b/_posts/0003-03-02-Differential_Expression-edgeR.md
index b5a87b59..693f6172 100644
--- a/_posts/0003-03-02-Differential_Expression-edgeR.md
+++ b/_posts/0003-03-02-Differential_Expression-edgeR.md
@@ -65,34 +65,34 @@ R
 R code has been provided below. If you wish to have a script with all of the code, it can be found [here](https://github.com/griffithlab/rnabio.org/blob/master/assets/scripts/Tutorial_edgeR.R). Run the R commands below.
 
 ```R
-###R code###
-#Set working directory where output will go
+
+# set working directory where output will go
 working_dir = "~/workspace/rnaseq/de/htseq_counts"
 setwd(working_dir)
 
-#Read in gene mapping
-mapping=read.table("~/workspace/rnaseq/de/htseq_counts/ENSG_ID2Name.txt", header=FALSE, stringsAsFactors=FALSE, row.names=1)
+# read in gene mapping
+mapping = read.table("~/workspace/rnaseq/de/htseq_counts/ENSG_ID2Name.txt", header = FALSE, stringsAsFactors = FALSE, row.names = 1)
 
-# Read in count matrix
-rawdata=read.table("~/workspace/rnaseq/expression/htseq_counts/gene_read_counts_table_all_final.tsv", header=TRUE, stringsAsFactors=FALSE, row.names=1)
+# read in count matrix
+rawdata = read.table("~/workspace/rnaseq/expression/htseq_counts/gene_read_counts_table_all_final.tsv", header = TRUE, stringsAsFactors = FALSE, row.names = 1)
 
 # Check dimensions
 dim(rawdata)
 
 # Require at least 1/6 of samples to have expressed count >= 10
-sample_cutoff <- (1/6)
-count_cutoff <- 10
+sample_cutoff = (1/6)
+count_cutoff = 10
 
 #Define a function to calculate the fraction of values expressed above the count cutoff
-getFE <- function(data,count_cutoff){
- FE <- (sum(data >= count_cutoff)/length(data))
+getFE = function(data,count_cutoff){
+ FE = (sum(data >= count_cutoff) / length(data))
  return(FE)
 }
 
 #Apply the function to all genes, and filter out genes not meeting the sample cutoff
-fraction_expressed <- apply(rawdata, 1, getFE, count_cutoff)
-keep <- which(fraction_expressed >= sample_cutoff)
-rawdata <- rawdata[keep,]
+fraction_expressed = apply(rawdata, 1, getFE, count_cutoff)
+keep = which(fraction_expressed >= sample_cutoff)
+rawdata = rawdata[keep, ]
 
 # Check dimensions again to see effect of filtering
 dim(rawdata)
@@ -102,57 +102,57 @@ dim(rawdata)
 #################
 
 # load edgeR
-library('edgeR')
+library("edgeR")
 
 # make class labels
-class <- c( rep("UHR",3), rep("HBR",3) )
+class = c(rep("UHR", 3), rep("HBR", 3))
 
 # Get common gene names
-Gene=rownames(rawdata)
-Symbol=mapping[Gene,1]
-gene_annotations=cbind(Gene,Symbol)
+Gene = rownames(rawdata)
+Symbol = mapping[Gene, 1]
+gene_annotations = cbind(Gene, Symbol)
 
 # Make DGEList object
-y <- DGEList(counts=rawdata, genes=gene_annotations, group=class)
+y = DGEList(counts = rawdata, genes = gene_annotations, group = class)
 nrow(y)
 
 # TMM Normalization
-y <- calcNormFactors(y)
+y = calcNormFactors(y)
 
 # Estimate dispersion
-y <- estimateCommonDisp(y, verbose=TRUE)
-y <- estimateTagwiseDisp(y)
+y = estimateCommonDisp(y, verbose = TRUE)
+y = estimateTagwiseDisp(y)
 
 # Differential expression test
-et <- exactTest(y)
+et = exactTest(y)
 
 # Extract raw counts to add back onto DE results
-counts <- getCounts(y)
+counts = getCounts(y)
 
 # Print top genes
 topTags(et)
 
 # Print number of up/down significant genes at FDR = 0.05  significance level
-summary(de <- decideTestsDGE(et, adjust.method="BH", p=.05))
+summary(de = decideTestsDGE(et, adjust.method = "BH", p = 0.05))
 
 #Get output with BH-adjusted FDR values - all genes, any p-value, unsorted
-out <- topTags(et, n = "Inf", adjust.method="BH", sort.by="none", p.value=1)$table
+out = topTags(et, n = "Inf", adjust.method = "BH", sort.by = "none", p.value = 1)$table
 
 #Add raw counts back onto results for convenience (make sure sort and total number of elements allows proper join)
-out2 <- cbind(out, counts)
+out2 = cbind(out, counts)
 
 #Limit to significantly DE genes
-out3 <- out2[as.logical(de),]
+out3 = out2[as.logical(de), ]
 
 # Order by p-value
-o <- order(et$table$PValue[as.logical(de)],decreasing=FALSE)
-out4 <- out3[o,]
+o = order(et$table$PValue[as.logical(de)], decreasing=FALSE)
+out4 = out3[o, ]
 
 # Save table
-write.table(out4, file="DE_genes.txt", quote=FALSE, row.names=FALSE, sep="\t")
+write.table(out4, file = "DE_genes.txt", quote = FALSE, row.names = FALSE, sep = "\t")
 
 #To exit R type the following
-quit(save="no")
+quit(save = "no")
 ```
 
 Once you have run the edgeR tutorial, compare the sigDE genes to those saved earlier from ballgown: