Merge pull request #7 from LeeBergstrand/develop

Develop
LeeBergstrand · Jun 3, 2015 · 440f2e3 · 440f2e3
2 parents b2528f6 + f191bd9
commit 440f2e3
Showing 9 changed files with 325 additions and 299 deletions.
diff --git a/BackBLAST.py b/BackBLAST.py
@@ -1,196 +1,210 @@
 #!/usr/bin/env python 
 # Created by: Lee Bergstrand
-# Descript: A Biopython program that takes a list of query proteins and uses local BLASTp to search 
-#	    for highly similer proteins within a local blast database (usally a local db of a target 
-#	    proteome). The program then BLASTps backwards from the found subject proteins to the query 
-#	    proteome to confirm gene orthology.
+# Description: A Biopython program that takes a list of query proteins and uses local BLASTp to search
+#              for highly similar proteins within a local blast database (usually a local db of a target
+#              proteome). The program then BLASTps backwards from the found subject proteins to the query
+#              proteome to confirm gene orthology.
 #             
 # Requirements: - This program requires the Biopython module: http://biopython.org/wiki/Download
 #               - This script requires BLAST+ 2.2.9 or later.
-#               - All operations are done with protien sequences.
+#               - All operations are done with protein sequences.
 #               - All query proteins should be from sequenced genomes in order to facilitate backwards BLAST. 
 #               - MakeAABlastDB must be used to create BLASTn databases for both query and subject proteomes.
 #               - BLAST databases require that the FASTA file they were made from remain in the same directory.
 #  
 # Usage: BackBLAST.py <queryGeneList.faa> <queryBLASTDB.faa> <subjectBLASTDB.faa> 
 # Example: BackBLAST.py queryGeneList.faa AL123456.3.faa AUUJ00000000.faa
-#----------------------------------------------------------------------------------------
-#===========================================================================================================
-#Imports & Setup:
+# ----------------------------------------------------------------------------------------
+# ===========================================================================================================
+# Imports & Setup:
 import sys
 import csv
 import subprocess
+import tempfile
 from Bio import SeqIO
-from Graph import Vertex
 from Graph import Graph
 from multiprocessing import cpu_count
 
-processors = cpu_count() # Gets number of processor cores for BLAST.
+processors = cpu_count()  # Gets number of processor cores for BLAST.
 
-# Dev Imports:
-import time # For profiling purposes.
-#===========================================================================================================
+# ===========================================================================================================
 # Functions:
 
 # 1: Checks if in proper number of arguments are passed gives instructions on proper use.
 def argsCheck(argsCount):
 	if len(sys.argv) < argsCount:
-		print "Orthologous Gene Finder"
-		print "By Lee Bergstrand\n"
-		print "Please refer to source code for documentation\n"
-		print "Usage: " + sys.argv[0] + " <queryGeneList.faa> <queryBLASTDB.faa> <subjectBLASTDB.faa> \n"
-		print "Examples:" + sys.argv[0] + " queryGeneList.faa AL123456.3.faa AUUJ00000000.faa"
-		sys.exit(1) # Aborts program. (exit(1) indicates that an error occured)
-#-------------------------------------------------------------------------------------------------
-# 2: Runs BLAST, can either be sent a fasta formatted string or a file ...
+		print("Orthologous Gene Finder")
+		print("By Lee Bergstrand\n")
+		print("Please refer to source code for documentation\n")
+		print("Usage: " + sys.argv[0] + " <queryGeneList.faa> <queryBLASTDB.faa> <subjectBLASTDB.faa> \n")
+		print("Examples:" + sys.argv[0] + " queryGeneList.faa AL123456.3.faa AUUJ00000000.faa")
+		sys.exit(1)  # Aborts program. (exit(1) indicates that an error occurred)
+
+
+# -------------------------------------------------------------------------------------------------
+# 2: Runs BLAST, can either be sent a FASTA formatted string or a file ...
 def runBLAST(query, BLASTDBFile):
-	# Runs BLASTp and saves the output to a string. Blastp is set to output a csv which can be parsed by Pythons CSV module.
-	BLASTOut = subprocess.check_output(["blastp", "-db", BLASTDBFile, "-query", query, "-evalue", "1e-25", "-num_threads", str(processors), "-outfmt", "10 qseqid sseqid pident evalue qcovhsp bitscore"]) 
+	# Runs BLASTp and saves the output to a string.
+	# BLASTp is set to output a csv which can be parsed by Pythons CSV module.
+	BLASTOut = subprocess.check_output(
+		["blastp", "-db", BLASTDBFile, "-query", query, "-evalue", "1e-25", "-num_threads", str(processors), "-outfmt",
+		 "10 qseqid sseqid pident evalue qcovhsp bitscore"])
 	return BLASTOut
-#-------------------------------------------------------------------------------------------------
+
+
+# -------------------------------------------------------------------------------------------------
 # 3: Filters HSPs by Percent Identity...
-def filtreBLASTCSV(BLASTOut):
-
+def filterBLASTCSV(BLASTOut):
 	minIdent = 25
-
-	BLASTCSVOut = BLASTOut.splitlines(True) # Converts raw BLAST csv output into list of csv rows.
-	BLASTreader = csv.reader(BLASTCSVOut) # Reads BLAST csv rows as a csv.
 
-	BLASTCSVOutFiltred = [] # Note should simply delete unwanted HSPs from curent list rather than making new list.
-							# Rather than making a new one.
+	BLASTCSVOut = BLASTOut.splitlines(True)  # Converts raw BLAST csv output into list of csv rows.
+	BLASTreader = csv.reader(BLASTCSVOut)    # Reads BLAST csv rows as a csv.
+
+	BLASTCSVOutFiltred = []  # Note should simply delete unwanted HSPs from current list rather than making new list.
+	# Rather than making a new one.
 	for HSP in BLASTreader:
-		if HSP[2] >= minIdent: # Filtres by minimum ident.
+		if HSP[2] >= minIdent:  # Filters by minimum identity.
 			# Converts each HSP parameter that should be a number to a number.
-			HSP[2] = float(HSP[2]) 
+			HSP[2] = float(HSP[2])
 			HSP[3] = float(HSP[3])
 			HSP[4] = float(HSP[4])
-			HSP[5] = float(HSP[5]) 
-			BLASTCSVOutFiltred.append(HSP) # Appends to output array.
-	
+			HSP[5] = float(HSP[5])
+			BLASTCSVOutFiltred.append(HSP)  # Appends to output array.
+
 	return BLASTCSVOutFiltred
-#-------------------------------------------------------------------------------------------------
-# 5: Creates a python dictionary (hash table) that contains the the fasta for each protien in the proteome.
+
+
+# -------------------------------------------------------------------------------------------------
+# 5: Creates a python dictionary (hash table) that contains the the FASTA for each protein in the proteome.
 def createProteomeHash(ProteomeFile):
-	ProteomeHash = dict() 
+	ProteomeHash = dict()
 	try:
 		handle = open(ProteomeFile, "rU")
 		proteome = SeqIO.parse(handle, "fasta")
 		for record in proteome:
-			ProteomeHash.update({ record.id : record.format("fasta") })
+			ProteomeHash.update({record.id: record.format("fasta")})
 		handle.close()
 	except IOError:
-		print "Failed to open " + ProteomeFile
+		print("Failed to open " + ProteomeFile)
 		sys.exit(1)
-		
+
 	return ProteomeHash
-#===========================================================================================================
+
+
+# ===========================================================================================================
 # Main program code:
 # House keeping...
-argsCheck(4) # Checks if the number of arguments are correct.
+argsCheck(4)  # Checks if the number of arguments are correct.
 
-queryFile   = sys.argv[1]
+queryFile = sys.argv[1]
 queryBLASTDBFile = sys.argv[2]
 subjectBLASTDBFile = sys.argv[3]
 
-print "Opening " + subjectBLASTDBFile + "..."
+print("Opening " + subjectBLASTDBFile + "...")
 
 # File extension checks
 if not queryFile.endswith(".faa"):
-	print "[Warning] " + queryFile + " may not be a amino acid fasta file!"
+	print("[Warning] " + queryFile + " may not be a amino acid FASTA file!")
 if not queryBLASTDBFile.endswith(".faa"):
-	print "[Warning] " + queryBLASTDBFile + " may not be a amino acid fasta file!"
+	print("[Warning] " + queryBLASTDBFile + " may not be a amino acid FASTA file!")
 if not subjectBLASTDBFile.endswith(".faa"):
-	print "[Warning] " + subjectBLASTDBFile + " may not be a amino acid fasta file!"
+	print("[Warning] " + subjectBLASTDBFile + " may not be a amino acid FASTA file!")
 
-OutFile = subjectBLASTDBFile.rstrip(".faa") + ".csv" 
+OutFile = subjectBLASTDBFile.rstrip(".faa") + ".csv"
 
-BLASTGraph = Graph() # Creates graph to map BLAST hits.
+BLASTGraph = Graph()  # Creates graph to map BLAST hits.
 
-print ">> Forward Blasting to subject proteome..."
-BLASTForward = runBLAST(queryFile, subjectBLASTDBFile) # Forward BLASTs from query protiens to subject proteome
-BLASTForward = filtreBLASTCSV(BLASTForward) # Filtres BLAST results by PIdnet.
+print(">> Forward Blasting to subject proteome...")
+BLASTForward = runBLAST(queryFile, subjectBLASTDBFile)  # Forward BLASTs from query proteins to subject proteome
+BLASTForward = filterBLASTCSV(BLASTForward)  # Filters BLAST results by percent identity.
 
 if len(BLASTForward) == 0:
-	print ">> No Forward hits in subject proteome were found."
+	print(">> No Forward hits in subject proteome were found.")
 	# Writes empty file for easier data processing.
 	try:
-		writeFile = open(OutFile, "w") 	
+		writeFile = open(OutFile, "w")
 		writer = csv.writer(writeFile)
 	except IOError:
-		print ">> Failed to create " + outFile
+		print(">> Failed to create " + outFile)
 		sys.exit(1)
-	print ">> Exiting.\n\n"
-	sys.exit(0) # Aborts program. (exit(0) indicates that no error occured)
+	print(">> Exiting.\n\n")
+	sys.exit(0)  # Aborts program. (exit(0) indicates that no error occurred)
 
-SubjectProteomeHash = createProteomeHash(subjectBLASTDBFile) # Creates python dictionary contianing every protien in the subject Proteome.
+SubjectProteomeHash = createProteomeHash(
+	subjectBLASTDBFile)  # Creates python dictionary confining every protein in the subject Proteome.
 BackBlastQueryFASTAs = []
 
-print ">> Creating Back-Blasting Query from found subject protiens..."
+print(">> Creating Back-Blasting Query from found subject proteins...")
 # For each top Hit...
+
+print(BLASTForward)
+
 for hit in BLASTForward:
 	subjectProtein = hit[1]
 	queryProtein = hit[0]
-	subjectProtienFASTA = SubjectProteomeHash.get(subjectProtein) # Extracts subjectProtien from python dictionary.
-	subjectProtienFASTA.strip()
-	BackBlastQueryFASTAs.append(subjectProtienFASTA) # Addes current subject to overall protien list.
-	
+	subjectProteinFASTA = SubjectProteomeHash.get(subjectProtein)  # Extracts subjectProtein from python dictionary.
+	subjectProteinFASTA.strip()
+	BackBlastQueryFASTAs.append(subjectProteinFASTA)  # Adds current subject to overall protein list.
+
 CompleteBackBlastQuery = "".join(BackBlastQueryFASTAs)
 
-# Attempt to write a temporary fasta file for the reverse BLAST to use. 
+# Attempt to write a temporary FASTA file for the reverse BLAST to use.
 try:
-	writeFile = open("tempQuery.faa", "w") 
-	writeFile.write(CompleteBackBlastQuery) 
+	writeFile = open("tempQuery.faa", "w")
+	writeFile.write(CompleteBackBlastQuery)
 	writeFile.close()
 except IOError:
-	print "Failed to create tempQuery.faa"
+	print("Failed to create tempQuery.faa")
 	sys.exit(1)
 
-print ">> Blasting backwards from subject genome to query genome."
+print(">> Blasting backwards from subject genome to query genome.")
 # Run backwards BLAST towards query proteome.
 BLASTBackward = runBLAST("tempQuery.faa", queryBLASTDBFile)
-BLASTBackward = filtreBLASTCSV(BLASTBackward) # Filtres BLAST results by PIdnet.
+BLASTBackward = filterBLASTCSV(BLASTBackward)  # Filters BLAST results by percent identity.
 
-print ">> Creating Graph..."
+print(">> Creating Graph...")
 for hit in BLASTForward:
-	BLASTGraph.addEdge(hit[0],hit[1],hit[5])
+	BLASTGraph.addEdge(hit[0], hit[1], hit[5])
 for hit in BLASTBackward:
-	BLASTGraph.addEdge(hit[0],hit[1],hit[5])
-	
+	BLASTGraph.addEdge(hit[0], hit[1], hit[5])
+
 BackBlastOutput = list(BLASTForward)
 
-print ">> Checking if forward hit subjects have better reciprocal hits than query."
-for hit in BLASTForward: 
-	queryProtien = BLASTGraph.getVertex(hit[0])
-	subjectProtien = BLASTGraph.getVertex(hit[1])
-	
+print(">> Checking if forward hit subjects have better reciprocal hits than query.")
+for hit in BLASTForward:
+	queryProtein = BLASTGraph.getVertex(hit[0])
+	subjectProtein = BLASTGraph.getVertex(hit[1])
+
 	topBackHitScore = 0
-	# Find the top score of the best reciprical BLAST hit.
-	for backHit in subjectProtien.getConnections():
-		backHitScore = subjectProtien.getWeight(backHit) # The edge weight between the subject and its reciprocal BLAST hit is the BLAST score.
+	# Find the top score of the best reciprocal BLAST hit.
+	for backHit in subjectProtein.getConnections():
+		backHitScore = subjectProtein.getWeight(
+			backHit)  # The edge weight between the subject and its reciprocal BLAST hit is the BLAST score.
 		if backHitScore >= topBackHitScore:
 			topBackHitScore = backHitScore
-	
-	# Check if the query is the best reciprical BLAST hit for the subject.
+
+	# Check if the query is the best reciprocal BLAST hit for the subject.
 	deleteHit = False
-	if queryProtien in subjectProtien.getConnections():
-		BackHitToQueryScore = subjectProtien.getWeight(queryProtien) # The edge weight between the subject and the query is the reciprocal BLAST score.
-		if BackHitToQueryScore < topBackHitScore: 
+	if queryProtein in subjectProtein.getConnections():
+		BackHitToQueryScore = subjectProtein.getWeight(
+			queryProtein)  # The edge weight between the subject and the query is the reciprocal BLAST score.
+		if BackHitToQueryScore < topBackHitScore:
 			deleteHit = True  # If the query is not the best reciprocal BLAST hit simply delete it from the BackBlastOutput.
 	else:
-		deleteHit = True # If the query is not a reciprocal BLAST hit simply delete it from the BackBlastOutput.
-	
-	if deleteHit == True:
-		del BackBlastOutput[BackBlastOutput.index(hit)] # Delete the forward BLAST hit from BackBlastOutput.
+		deleteHit = True  # If the query is not a reciprocal BLAST hit simply delete it from the BackBlastOutput.
+
+	if deleteHit:
+		del BackBlastOutput[BackBlastOutput.index(hit)]  # Delete the forward BLAST hit from BackBlastOutput.
 
-#Attempts to write reciprocal BLAST output to file.
+# Attempts to write reciprocal BLAST output to file.
 try:
-	writeFile = open(OutFile, "w") 	
+	writeFile = open(OutFile, "w")
 	writer = csv.writer(writeFile)
-	print ">> Output file created."
-	print ">> Writing Data..."
+	print(">> Output file created.")
+	print(">> Writing Data...")
 	for row in BackBlastOutput:
 		writer.writerow(row)
 except IOError:
-	print ">> Failed to create " + outFile
+	print(">> Failed to create " + outFile)
 	sys.exit(1)
-print ">> Done\n"	
+print(">> Done\n")
diff --git a/Graph.py b/Graph.py
@@ -1,57 +1,58 @@
 #!/usr/bin/env python 
 # From: http://interactivepython.org/courselib/static/pythonds/Graphs/graphintro.html#graphs
-# Descript: Lite Weight Script For Creating Graph Objects.           
-#----------------------------------------------------------------------------------------
+# Description: Lite Weight Script For Creating Graph Objects.
+# ----------------------------------------------------------------------------------------
 
 class Vertex:
-    def __init__(self,key):
-        self.id = key
-        self.connectedTo = {}
+	def __init__(self, key):
+		self.id = key
+		self.connectedTo = {}
 
-    def addNeighbor(self,nbr,weight=0):
-        self.connectedTo[nbr] = weight
+	def addNeighbor(self, nbr, weight=0):
+		self.connectedTo[nbr] = weight
 
-    def __str__(self):
-        return str(self.id) + ' connectedTo: ' + str([x.id for x in self.connectedTo])
+	def __str__(self):
+		return str(self.id) + ' connectedTo: ' + str([x.id for x in self.connectedTo])
 
-    def getConnections(self):
-        return self.connectedTo.keys()
+	def getConnections(self):
+		return self.connectedTo.keys()
 
-    def getId(self):
-        return self.id
+	def getId(self):
+		return self.id
+
+	def getWeight(self, nbr):
+		return self.connectedTo[nbr]
 
-    def getWeight(self,nbr):
-        return self.connectedTo[nbr]
 
 class Graph:
-    def __init__(self):
-        self.vertList = {}
-        self.numVertices = 0
-
-    def addVertex(self,key):
-        self.numVertices = self.numVertices + 1
-        newVertex = Vertex(key)
-        self.vertList[key] = newVertex
-        return newVertex
-
-    def getVertex(self,n):
-        if n in self.vertList:
-            return self.vertList[n]
-        else:
-            return None
-
-    def __contains__(self,n):
-        return n in self.vertList
-
-    def addEdge(self,f,t,cost=0):
-        if f not in self.vertList:
-            nv = self.addVertex(f)
-        if t not in self.vertList:
-            nv = self.addVertex(t)
-        self.vertList[f].addNeighbor(self.vertList[t], cost)
-
-    def getVertices(self):
-        return self.vertList.keys()
-
-    def __iter__(self):
-        return iter(self.vertList.values())
+	def __init__(self):
+		self.vertList = {}
+		self.numVertices = 0
+
+	def addVertex(self, key):
+		self.numVertices = self.numVertices + 1
+		newVertex = Vertex(key)
+		self.vertList[key] = newVertex
+		return newVertex
+
+	def getVertex(self, n):
+		if n in self.vertList:
+			return self.vertList[n]
+		else:
+			return None
+
+	def __contains__(self, n):
+		return n in self.vertList
+
+	def addEdge(self, f, t, cost=0):
+		if f not in self.vertList:
+			nv = self.addVertex(f)
+		if t not in self.vertList:
+			nv = self.addVertex(t)
+		self.vertList[f].addNeighbor(self.vertList[t], cost)
+
+	def getVertices(self):
+		return self.vertList.keys()
+
+	def __iter__(self):
+		return iter(self.vertList.values())
diff --git a/PhyloTree b/PhyloTree
diff --git a/Statistics/BackBLASTStats.py b/Statistics/BackBLASTStats.py
@@ -1,61 +1,63 @@
 #!/usr/bin/env python 
 # Created by: Lee Bergstrand 
-# Descript: Counts up BLAST hits per subject.
+# Description: Counts up BLAST hits per subject.
 #
-# Usage: QueryProtienRepeats.py <input.csv> 
-# Example: QueryProtienRepeats.py AUUJ0000000000.csv
-#----------------------------------------------------------------------------------------
+# Usage: BackBLASTStats.py <input.csv>
+# Example: BackBLASTStats.py AUUJ0000000000.csv
+# ----------------------------------------------------------------------------------------
 # Imports 
 import csv
 import sys
-#========================================================================================
+# ========================================================================================
 
 # If in proper number of arguments are passed gives instructions on proper use.
 if len(sys.argv) < 3 or len(sys.argv) > 3:
-	print "Filtres Blast Hits"
-	print "By Lee Bergstrand\n"
-	print "Filtres Blast Hits to ...\n"
-	print "Usage: " + sys.argv[0] + " <input.csv>"
-	print "Examples: " + sys.argv[0] + ' AUUJ0000000000.csv'
-	exit(1) # Aborts program. (exit(1) indicates that an error occured)
+	print("Filters Blast Hits")
+	print("By Lee Bergstrand\n")
+	print("Filters Blast Hits to ...\n")
+	print("Usage: " + sys.argv[0] + " <input.csv>")
+	print("Examples: " + sys.argv[0] + ' AUUJ0000000000.csv')
+	exit(1)  # Aborts program. (exit(1) indicates that an error occurred)
 
 # Stores stores argument data.
-print ">> Opening CSV Blast Result file..."
-#ClusterBoundries
+print(">> Opening CSV Blast Result file...")
+# ClusterBoundaries
 BLASTResults = sys.argv[1]
 outFile = BLASTResults + ".out"
 
 # File extension check
 if not BLASTResults.endswith(".csv"):
-	print "[Warning] " + BLASTResults + " may not be a CSV file!"
+	print("[Warning] " + BLASTResults + " may not be a CSV file!")
 else:
-	print ">> Good file extention."	
-	
+	print(">> Good file extension.")
+
 # Opens CSV file for reading.
 try:
 	readFile = open(BLASTResults, "r")
-	reader  = csv.reader(readFile) # opens file with csv module which takes into account verying csv formats and parses correctly
-	print ">> Good CSV file."
-except IOError:
-	print "Failed to open " + BLASTResults
-	exit(1)
+	reader = csv.reader(
+		readFile)  # opens file with csv module which takes into account varying csv formats and parses correctly
+	print(">> Good CSV file.")
 
-BlastHits = []
+	BlastHits = []
 
-print ">> Reading BLAST hits..."
-# Modifies changes elements in column according to regex. New modified rows to new file.
-for row in reader:
-	BlastHits.append(row)
-readFile.close()
+	print(">> Reading BLAST hits...")
+	# Modifies changes elements in column according to regex. New modified rows to new file.
+	for row in reader:
+		BlastHits.append(row)
+	readFile.close()
 
-QueryCounter = {}
+	QueryCounter = {}
 
-for hit in BlastHits:
-	QueryProtien = hit[0]
-	if QueryProtien in QueryCounter:
-		QueryCounter[QueryProtien] += 1
-	else:
-		QueryCounter.update({QueryProtien:1})
+	for hit in BlastHits:
+		QueryProtein = hit[0]
+		if QueryProtein in QueryCounter:
+			QueryCounter[QueryProtein] += 1
+		else:
+			QueryCounter.update({QueryProtein: 1})
 
-for x in QueryCounter:
-	print x + ":", QueryCounter[x]
+	for x in QueryCounter:
+		print(x + ":", QueryCounter[x])
+
+except IOError:
+	print("Failed to open " + BLASTResults)
+	exit(1)
diff --git a/Statistics/QueryProtienRepeats.py b/Statistics/QueryProtienRepeats.py
@@ -1,60 +1,60 @@
 #!/usr/bin/env python 
 # Created by: Lee Bergstrand 
-# Descript: Counts up BLAST hits per subject.
+# Description: Counts up BLAST hits per subject.
 #
-# Usage: QueryProtienRepeats.py <input.csv> 
-# Example: QueryProtienRepeats.py AUUJ0000000000.csv
-#----------------------------------------------------------------------------------------
+# Usage: QueryProteinRepeats.py <input.csv>
+# Example: QueryProteinRepeats.py AUUJ0000000000.csv
+# ----------------------------------------------------------------------------------------
 # Imports 
 import csv
 import sys
-#========================================================================================
+# ========================================================================================
 
 # If in proper number of arguments are passed gives instructions on proper use.
 if len(sys.argv) < 2 or len(sys.argv) > 2:
-	print "Filtres Blast Hits"
-	print "By Lee Bergstrand\n"
-	print "Filtres Blast Hits to ...\n"
-	print "Usage: " + sys.argv[0] + " <input.csv>"
-	print "Examples: " + sys.argv[0] + ' AUUJ0000000000.csv'
-	exit(1) # Aborts program. (exit(1) indicates that an error occured)
+	print("Filters Blast Hits")
+	print("By Lee Bergstrand\n")
+	print("Filters Blast Hits to ...\n")
+	print("Usage: " + sys.argv[0] + " <input.csv>")
+	print("Examples: " + sys.argv[0] + ' AUUJ0000000000.csv')
+	exit(1)  # Aborts program. (exit(1) indicates that an error occurred)
 
 # Stores stores argument data.
-print ">> Opening CSV Blast Result file..."
-inFile  = sys.argv[1]
+print(">> Opening CSV Blast Result file...")
+inFile = sys.argv[1]
 outFile = inFile + ".out"
 
 # File extension check
 if not inFile.endswith(".csv"):
-	print "[Warning] " + inFile + " may not be a CSV file!"
+	print("[Warning] " + inFile + " may not be a CSV file!")
 else:
-	print ">> Good file extention."	
-	
+	print(">> Good file extension.")
+
 # Opens CSV file for reading.
 try:
 	readFile = open(inFile, "r")
-	reader  = csv.reader(readFile) # opens file with csv module which takes into account verying csv formats and parses correctly
-	print ">> Good CSV file."
+	reader = csv.reader(
+		readFile)  # opens file with csv module which takes into account varying csv formats and parses correctly
+	print(">> Good CSV file.")
+
+	print(">> Reading BLAST hits...")
+	# Modifies changes elements in column according to regex. New modified rows to new file.
+	BlastHits = []
+	for row in reader:
+		BlastHits.append(row)
+	readFile.close()
+
+	QueryCounter = {}
+
+	for hit in BlastHits:
+		QueryProtein = hit[0]
+		if QueryProtein in QueryCounter:
+			QueryCounter[QueryProtein] += 1
+		else:
+			QueryCounter.update({QueryProtein: 1})
+
+	for x in QueryCounter:
+		print(x + ":", QueryCounter[x])
 except IOError:
-	print "Failed to open " + inFile
+	print("Failed to open " + inFile)
 	exit(1)
-
-BlastHits = []
-
-print ">> Reading BLAST hits..."
-# Modifies changes elements in column according to regex. New modified rows to new file.
-for row in reader:
-	BlastHits.append(row)
-readFile.close()
-
-QueryCounter = {}
-
-for hit in BlastHits:
-	QueryProtien = hit[0]
-	if QueryProtien in QueryCounter:
-		QueryCounter[QueryProtien] += 1
-	else:
-		QueryCounter.update({QueryProtien:1})
-
-for x in QueryCounter:
-	print x + ":", QueryCounter[x]
diff --git a/Visualization/CreateBlankResults.py b/Visualization/CreateBlankResults.py
@@ -1,83 +1,86 @@
 #!/usr/bin/env python 
 # Created by: Lee Bergstrand 
-# Descript: A simple program that takes a FASTA file protien list and makes a csv of fake BLAST results.
+# Description: A simple program that takes a FASTA file protein list and makes a csv of fake BLAST results.
 #
 # Requirements: - This script requires the Biopython module: http://biopython.org/wiki/Download
 #     
 # Usage: getGenbankSeqs.py <sequences.txt> <geneList.fna>
 # Example: getGenbankSeqs.py mySeqs.txt geneList.fna
-#----------------------------------------------------------------------------------------
-#===========================================================================================================
-#Imports:
-	
+# ----------------------------------------------------------------------------------------
+# ===========================================================================================================
+# Imports:
+
 import sys
 from Bio import SeqIO
-#===========================================================================================================
+# ===========================================================================================================
 # Functions:
 
 # 1: Checks if in proper number of arguments are passed gives instructions on proper use.
 def argsCheck(numArgs):
 	if len(sys.argv) < numArgs or len(sys.argv) > numArgs:
-		print "Takes a nucleotide FASTA file and returns the exact same FASTA file with a reverse complemented sequence."
-		print "By Lee Bergstrand\n"
-		print "Usage: " + sys.argv[0] + " <sequences.txt> <geneList.fna>"
-		print "Examples: " + sys.argv[0] + " mySeq.txt geneList.fna\n"
-		exit(1) # Aborts program. (exit(1) indicates that an error occurred)
-#===========================================================================================================
+		print(
+			"Takes a nucleotide FASTA file and returns the exact same FASTA file with a reverse complemented sequence.")
+		print("By Lee Bergstrand\n")
+		print("Usage: " + sys.argv[0] + " <sequences.txt> <geneList.fna>")
+		print("Examples: " + sys.argv[0] + " mySeq.txt geneList.fna\n")
+		exit(1)  # Aborts program. (exit(1) indicates that an error occurred)
+
+# ===========================================================================================================
 # Main program code:
-	
+
 # House keeping...
-argsCheck(3) # Checks if the number of arguments are correct.
-	
+argsCheck(3)  # Checks if the number of arguments are correct.
+
 # Stores file one for input checking.
-print ">> Opening FASTA file..."
+print(">> Opening FASTA file...")
 filesToReplace = sys.argv[1]
 geneList = sys.argv[2]
 
 # File extension check
 if not filesToReplace.endswith(".txt"):
-	print "[Warning] " + geneList + " may not be a text file!"
+	print("[Warning] " + geneList + " may not be a text file!")
 
 # File extension check
 if not geneList.endswith(".faa"):
-	print "[Warning] " + geneList + " may not be a FASTA file!"
-	
+	print("[Warning] " + geneList + " may not be a FASTA file!")
+
 # Reads sequence file list and stores it as a string object. Safely closes file.try:
-try:	
-	with open(filesToReplace,"rU") as newFile:
+try:
+	with open(filesToReplace, "rU") as newFile:
 		filesToReplace = newFile.read()
 		newFile.close()
 except IOError:
-	print "Failed to open " + inFile
+	print("Failed to open " + inFile)
 	exit(1)
 
-filesToReplaceList = filesToReplace.splitlines() # Splits string into a list. Each element is a single line from the string.
+# Splits string into a list. Each element is a single line from the string.
+filesToReplaceList = filesToReplace.splitlines()
 
 FakeResults = []
 try:
 	handle = open(geneList, "rU")
 	SeqRecords = SeqIO.parse(handle, "fasta")
 	for record in SeqRecords:
-		FakeResults.append(record.id + ",sseqid,0,evalue,qcovhsp,bitscore") # qseqid sseqid pident evalue qcovhsp bitscore
+		FakeResults.append(
+			record.id + ",sseqid,0,evalue,qcovhsp,bitscore")  # qseqid sseqid pident evalue qcovhsp bitscore
 	handle.close()
 except IOError:
-	print "Failed to open " + inFile + " or " + outFile
+	print("Failed to open " + inFile + " or " + outFile)
 	exit(1)
-	
+
 FakeResultsOut = "\n".join(FakeResults)
 
-print ">> Writing Fake Results..."
+print(">> Writing Fake Results...")
 
 for x in filesToReplaceList:
 	replacementFileName = x + ".csv"
-	
+
 	try:
 		writeFile = open(replacementFileName, "w")
 		writeFile.write(FakeResultsOut)
 		writeFile.close()
-	except IOError:	
-		print "Failed to create " + outFile
-		exit(1)	
-
+	except IOError:
+		print("Failed to create " + outFile)
+		exit(1)
 
-print ">> Done."
+print(">> Done.")
diff --git a/Visualization/ExtractBackBlastSubject.py b/Visualization/ExtractBackBlastSubject.py
@@ -1,73 +1,79 @@
 #!/usr/bin/env python 
 # Created by: Lee Bergstrand 
-# Descript: A simple script that extracts the subject BLAST hits for a given subject protien from the csv results from BackBLAST.py.
+# Description: A simple script that extracts the subject BLAST hits for
+#              a given subject protein from the csv results from BackBLAST.py.
 #
 # Usage: ExtractBackBlastSubject.py <myInputList.txt> <myInputBLASTCSV.csv>
 # Example: ExtractBackBlastSubject.py myInputList.txt myInputBLASTCSV.csv 
-#----------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------
 
 import csv
 import sys
 
 # If in proper number of arguments are passed gives instructions on proper use.
 if len(sys.argv) < 2 or len(sys.argv) > 2:
-	print "BLAST Subject Extractor"
-	print "By Lee Bergstrand\n"
-	print "A simple script that extracts the subject BLAST hits for a given subject protien from the csv results from BackBLAST.py.\n"
-	print "Usage: " + sys.argv[0] + "<myInputList.txt> <myInputBLASTCSV.csv>"
-	print "Examples: " + sys.argv[0] + 'myInputList.txt myInputBLASTCSV.csv'
-	exit(1) # Aborts program. (exit(1) indicates that an error occured)
+	print("BLAST Subject Extractor")
+	print("By Lee Bergstrand\n")
+	print(
+		"A simple script that extracts the subject BLAST hits for a given subject protien from the csv results from BackBLAST.py.\n")
+	print("Usage: " + sys.argv[0] + "<myInputList.txt> <myInputBLASTCSV.csv>")
+	print("Examples: " + sys.argv[0] + 'myInputList.txt myInputBLASTCSV.csv')
+	exit(1)  # Aborts program. (exit(1) indicates that an error occurred)
 
 # Store argument data.
-inListFile  = sys.argv[1]
-inCSVFile   = sys.argv[2]
+inListFile = sys.argv[1]
+inCSVFile = sys.argv[2]
 outListFile = sys.argv[1] + ".out"
 
-print ">> Reading accession list from " + inListFile + "..." # File extension check
+print(">> Reading accession list from " + inListFile + "...")  # File extension check
 if not inListFile.endswith(".txt"):
-	print "[Warning] " + inListFile + "May not be a txt file!"
+	print("[Warning] " + inListFile + "May not be a txt file!")
 else:
-	print ">> Good file extention."	
+	print(">> Good file extension.")
 
-print ">> Reading back blast results from " + inCSVFile + "..." # File extension check
+print(">> Reading back blast results from " + inCSVFile + "...")  # File extension check
 if not inCSVFile.endswith(".csv"):
-	print "[Warning] " + inCSVFile + "May not be a CSV file!"
+	print("[Warning] " + inCSVFile + "May not be a CSV file!")
 else:
-	print ">> Good file extention."	
-
+	print(">> Good file extension.")
+
+seqList = []
+
 # Reads query sequence file list and stores it as a string object. Safely closes file.try:
-try:	
-	with open(inListFile,"r") as newFile:
+try:
+	with open(inListFile, "r") as newFile:
 		sequences = newFile.read()
-		seqList = sequences.splitlines() # Splits string into a list. Each element is a single line from the string.
+		seqList = sequences.splitlines()  # Splits string into a list. Each element is a single line from the string.
 		newFile.close()
 except IOError:
-	print "Failed to open " + inListFile
+	print("Failed to open " + inListFile)
 	exit(1)
 
 # Opens BLAST CSV output for reading.
 try:
 	csvFile = open(inCSVFile, "r")
-	BLASTreader  = csv.reader(csvFile) # opens file with csv module which takes into account verying csv formats and parses correctly
-	print ">> Good CSV file."
-except IOError:
-	print "Failed to open " + inCSVFile
-	exit(1)
+	BLASTreader = csv.reader(
+		csvFile)  # opens file with csv module which takes into account varying csv formats and parses correctly
+	print(">> Good CSV file.")
+
+	accessionsToAdd = []
+
+	for row in BLASTreader:
+		if row[0] in seqList:
+			if float(row[2]) >= 30:
+				if (row[1] + "\n") not in accessionsToAdd:
+					accessionsToAdd.append(row[1] + "\n")
+
+	try:
+		print(">> Output file created.")
+		print(">> Writing Data...")
+		writeFile = open(outListFile, "w")
+		writeFile.writelines(accessionsToAdd)
+		writeFile.close()
+	except IOError:
+		print("Failed to create " + outListFile)
+		exit(1)
 
-accessionsToAdd = []	
-
-for row in BLASTreader:
-	if row[0] in seqList:
-		if float(row[2]) >= 30:
-			if (row[1] + "\n") not in accessionsToAdd:
-				accessionsToAdd.append(row[1] + "\n")
-
-try:
-	print ">> Output file created."
-	print ">> Writing Data..."
-	writeFile = open(outListFile, "w") 	 
-	writeFile.writelines(accessionsToAdd)	
-	writeFile.close()
 except IOError:
-	print "Failed to create " + outListFile
-	exit(1)
+	print("Failed to open " + inCSVFile)
+	exit(1)
diff --git a/Visualization/PIndentHeatMapper.R b/Visualization/PIndentHeatMapper.R
@@ -10,8 +10,8 @@ library(ape)
 library(gplots)
 
 # Sets the working directory. 
-setwd("/Users/lee/Data/SecondBDHAnalysis/Proteobacteria/Results/New/Filtered/")
-tree = read.tree("/Users/lee/Data/SecondBDHAnalysis/Proteobacteria/16SRRNA/New/PhyloTree/Bootstrap/Proteo16STree.nwk")
+setwd("/Users/lee/Google Drive/Mohn_Lab_Steroid_Research/SecondBDBHAnalysis/Proteobacteria/OldResults/14-09-18/Filtered")
+tree = read.tree("/Users/lee/Google Drive/Mohn_Lab_Steroid_Research/SecondBDBHAnalysis/Proteobacteria/16SRRNA/New/PhyloTree/Old14-08-19/Bootstrap/Proteo16STree.nwk")
 
 # Gets a list files from the working directory.
 fileList = list.files(path = getwd(), all.files = FALSE, pattern = "\\.csv$") 

diff --git a/Visualization/PIndentHeatMapperNoTree.R b/Visualization/PIndentHeatMapperNoTree.R
@@ -10,7 +10,7 @@ library(ape)
 library(gplots)
 
 # Sets the working directory. 
-setwd("/Users/lee/Data/SecondBDHAnalysis/InDraft/Results/Filtered")
+setwd("/Users/lee/Dropbox/RandD/Repositories/BackBLAST_Reciprocal_BLAST/TestData/Old_Run/Filtered")
 
 # Gets a list files from the working directory.
 fileList = list.files(path = getwd(), all.files = FALSE, pattern = "\\.csv$") 
@@ -40,5 +40,5 @@ HeatmapMatrix = HeatmapMatrix[order(rownames(HeatmapMatrix)), ] # Reorders rows
 ColourPal = brewer.pal(9,"YlGn") # Gets Colour Palete from R colour brewer.
 ColourPal[1] = "#F4F5F6" # Swaps lowest colour for off white.
 ColourPal = append(ColourPal, "#00311d")
-heatmap.2(HeatmapMatrix, Rowv = NA, col = ColourPal, 
+heatmap.2(HeatmapMatrix, Rowv=FALSE, Colv=FALSE, col = ColourPal, 
           trace="none", xlab = "Genome", ylab = "Steroid Degrading Gene", margins = c(10,9))
+65 −54		BLASTToFind16S/16SBLAST.py
+103 −76		HMMToFind16S/16SHMMER.py
+28 −25		HMMToFind16S/GetReverseComplement.py
+29 −27		HMMToFind16S/RNAStockholmToFASTA.py
+24 −23		SupplementaryScripts/NewickToPhyloXML.py