Skip to content

Commit

Permalink
Merge pull request #7 from LeeBergstrand/develop
Browse files Browse the repository at this point in the history
Develop
LeeBergstrand committed Jun 3, 2015
2 parents b2528f6 + f191bd9 commit 440f2e3
Showing 9 changed files with 325 additions and 299 deletions.
210 changes: 112 additions & 98 deletions BackBLAST.py
Original file line number Diff line number Diff line change
@@ -1,196 +1,210 @@
#!/usr/bin/env python
# Created by: Lee Bergstrand
# Descript: A Biopython program that takes a list of query proteins and uses local BLASTp to search
# for highly similer proteins within a local blast database (usally a local db of a target
# proteome). The program then BLASTps backwards from the found subject proteins to the query
# proteome to confirm gene orthology.
# Description: A Biopython program that takes a list of query proteins and uses local BLASTp to search
# for highly similar proteins within a local blast database (usually a local db of a target
# proteome). The program then BLASTps backwards from the found subject proteins to the query
# proteome to confirm gene orthology.
#
# Requirements: - This program requires the Biopython module: http://biopython.org/wiki/Download
# - This script requires BLAST+ 2.2.9 or later.
# - All operations are done with protien sequences.
# - All operations are done with protein sequences.
# - All query proteins should be from sequenced genomes in order to facilitate backwards BLAST.
# - MakeAABlastDB must be used to create BLASTn databases for both query and subject proteomes.
# - BLAST databases require that the FASTA file they were made from remain in the same directory.
#
# Usage: BackBLAST.py <queryGeneList.faa> <queryBLASTDB.faa> <subjectBLASTDB.faa>
# Example: BackBLAST.py queryGeneList.faa AL123456.3.faa AUUJ00000000.faa
#----------------------------------------------------------------------------------------
#===========================================================================================================
#Imports & Setup:
# ----------------------------------------------------------------------------------------
# ===========================================================================================================
# Imports & Setup:
import sys
import csv
import subprocess
import tempfile
from Bio import SeqIO
from Graph import Vertex
from Graph import Graph
from multiprocessing import cpu_count

processors = cpu_count() # Gets number of processor cores for BLAST.
processors = cpu_count() # Gets number of processor cores for BLAST.

# Dev Imports:
import time # For profiling purposes.
#===========================================================================================================
# ===========================================================================================================
# Functions:

# 1: Checks if in proper number of arguments are passed gives instructions on proper use.
def argsCheck(argsCount):
if len(sys.argv) < argsCount:
print "Orthologous Gene Finder"
print "By Lee Bergstrand\n"
print "Please refer to source code for documentation\n"
print "Usage: " + sys.argv[0] + " <queryGeneList.faa> <queryBLASTDB.faa> <subjectBLASTDB.faa> \n"
print "Examples:" + sys.argv[0] + " queryGeneList.faa AL123456.3.faa AUUJ00000000.faa"
sys.exit(1) # Aborts program. (exit(1) indicates that an error occured)
#-------------------------------------------------------------------------------------------------
# 2: Runs BLAST, can either be sent a fasta formatted string or a file ...
print("Orthologous Gene Finder")
print("By Lee Bergstrand\n")
print("Please refer to source code for documentation\n")
print("Usage: " + sys.argv[0] + " <queryGeneList.faa> <queryBLASTDB.faa> <subjectBLASTDB.faa> \n")
print("Examples:" + sys.argv[0] + " queryGeneList.faa AL123456.3.faa AUUJ00000000.faa")
sys.exit(1) # Aborts program. (exit(1) indicates that an error occurred)


# -------------------------------------------------------------------------------------------------
# 2: Runs BLAST, can either be sent a FASTA formatted string or a file ...
def runBLAST(query, BLASTDBFile):
# Runs BLASTp and saves the output to a string. Blastp is set to output a csv which can be parsed by Pythons CSV module.
BLASTOut = subprocess.check_output(["blastp", "-db", BLASTDBFile, "-query", query, "-evalue", "1e-25", "-num_threads", str(processors), "-outfmt", "10 qseqid sseqid pident evalue qcovhsp bitscore"])
# Runs BLASTp and saves the output to a string.
# BLASTp is set to output a csv which can be parsed by Pythons CSV module.
BLASTOut = subprocess.check_output(
["blastp", "-db", BLASTDBFile, "-query", query, "-evalue", "1e-25", "-num_threads", str(processors), "-outfmt",
"10 qseqid sseqid pident evalue qcovhsp bitscore"])
return BLASTOut
#-------------------------------------------------------------------------------------------------


# -------------------------------------------------------------------------------------------------
# 3: Filters HSPs by Percent Identity...
def filtreBLASTCSV(BLASTOut):

def filterBLASTCSV(BLASTOut):
minIdent = 25

BLASTCSVOut = BLASTOut.splitlines(True) # Converts raw BLAST csv output into list of csv rows.
BLASTreader = csv.reader(BLASTCSVOut) # Reads BLAST csv rows as a csv.

BLASTCSVOutFiltred = [] # Note should simply delete unwanted HSPs from curent list rather than making new list.
# Rather than making a new one.
BLASTCSVOut = BLASTOut.splitlines(True) # Converts raw BLAST csv output into list of csv rows.
BLASTreader = csv.reader(BLASTCSVOut) # Reads BLAST csv rows as a csv.

BLASTCSVOutFiltred = [] # Note should simply delete unwanted HSPs from current list rather than making new list.
# Rather than making a new one.
for HSP in BLASTreader:
if HSP[2] >= minIdent: # Filtres by minimum ident.
if HSP[2] >= minIdent: # Filters by minimum identity.
# Converts each HSP parameter that should be a number to a number.
HSP[2] = float(HSP[2])
HSP[2] = float(HSP[2])
HSP[3] = float(HSP[3])
HSP[4] = float(HSP[4])
HSP[5] = float(HSP[5])
BLASTCSVOutFiltred.append(HSP) # Appends to output array.
HSP[5] = float(HSP[5])
BLASTCSVOutFiltred.append(HSP) # Appends to output array.

return BLASTCSVOutFiltred
#-------------------------------------------------------------------------------------------------
# 5: Creates a python dictionary (hash table) that contains the the fasta for each protien in the proteome.


# -------------------------------------------------------------------------------------------------
# 5: Creates a python dictionary (hash table) that contains the the FASTA for each protein in the proteome.
def createProteomeHash(ProteomeFile):
ProteomeHash = dict()
ProteomeHash = dict()
try:
handle = open(ProteomeFile, "rU")
proteome = SeqIO.parse(handle, "fasta")
for record in proteome:
ProteomeHash.update({ record.id : record.format("fasta") })
ProteomeHash.update({record.id: record.format("fasta")})
handle.close()
except IOError:
print "Failed to open " + ProteomeFile
print("Failed to open " + ProteomeFile)
sys.exit(1)

return ProteomeHash
#===========================================================================================================


# ===========================================================================================================
# Main program code:
# House keeping...
argsCheck(4) # Checks if the number of arguments are correct.
argsCheck(4) # Checks if the number of arguments are correct.

queryFile = sys.argv[1]
queryFile = sys.argv[1]
queryBLASTDBFile = sys.argv[2]
subjectBLASTDBFile = sys.argv[3]

print "Opening " + subjectBLASTDBFile + "..."
print("Opening " + subjectBLASTDBFile + "...")

# File extension checks
if not queryFile.endswith(".faa"):
print "[Warning] " + queryFile + " may not be a amino acid fasta file!"
print("[Warning] " + queryFile + " may not be a amino acid FASTA file!")
if not queryBLASTDBFile.endswith(".faa"):
print "[Warning] " + queryBLASTDBFile + " may not be a amino acid fasta file!"
print("[Warning] " + queryBLASTDBFile + " may not be a amino acid FASTA file!")
if not subjectBLASTDBFile.endswith(".faa"):
print "[Warning] " + subjectBLASTDBFile + " may not be a amino acid fasta file!"
print("[Warning] " + subjectBLASTDBFile + " may not be a amino acid FASTA file!")

OutFile = subjectBLASTDBFile.rstrip(".faa") + ".csv"
OutFile = subjectBLASTDBFile.rstrip(".faa") + ".csv"

BLASTGraph = Graph() # Creates graph to map BLAST hits.
BLASTGraph = Graph() # Creates graph to map BLAST hits.

print ">> Forward Blasting to subject proteome..."
BLASTForward = runBLAST(queryFile, subjectBLASTDBFile) # Forward BLASTs from query protiens to subject proteome
BLASTForward = filtreBLASTCSV(BLASTForward) # Filtres BLAST results by PIdnet.
print(">> Forward Blasting to subject proteome...")
BLASTForward = runBLAST(queryFile, subjectBLASTDBFile) # Forward BLASTs from query proteins to subject proteome
BLASTForward = filterBLASTCSV(BLASTForward) # Filters BLAST results by percent identity.

if len(BLASTForward) == 0:
print ">> No Forward hits in subject proteome were found."
print(">> No Forward hits in subject proteome were found.")
# Writes empty file for easier data processing.
try:
writeFile = open(OutFile, "w")
writeFile = open(OutFile, "w")
writer = csv.writer(writeFile)
except IOError:
print ">> Failed to create " + outFile
print(">> Failed to create " + outFile)
sys.exit(1)
print ">> Exiting.\n\n"
sys.exit(0) # Aborts program. (exit(0) indicates that no error occured)
print(">> Exiting.\n\n")
sys.exit(0) # Aborts program. (exit(0) indicates that no error occurred)

SubjectProteomeHash = createProteomeHash(subjectBLASTDBFile) # Creates python dictionary contianing every protien in the subject Proteome.
SubjectProteomeHash = createProteomeHash(
subjectBLASTDBFile) # Creates python dictionary confining every protein in the subject Proteome.
BackBlastQueryFASTAs = []

print ">> Creating Back-Blasting Query from found subject protiens..."
print(">> Creating Back-Blasting Query from found subject proteins...")
# For each top Hit...

print(BLASTForward)

for hit in BLASTForward:
subjectProtein = hit[1]
queryProtein = hit[0]
subjectProtienFASTA = SubjectProteomeHash.get(subjectProtein) # Extracts subjectProtien from python dictionary.
subjectProtienFASTA.strip()
BackBlastQueryFASTAs.append(subjectProtienFASTA) # Addes current subject to overall protien list.
subjectProteinFASTA = SubjectProteomeHash.get(subjectProtein) # Extracts subjectProtein from python dictionary.
subjectProteinFASTA.strip()
BackBlastQueryFASTAs.append(subjectProteinFASTA) # Adds current subject to overall protein list.

CompleteBackBlastQuery = "".join(BackBlastQueryFASTAs)

# Attempt to write a temporary fasta file for the reverse BLAST to use.
# Attempt to write a temporary FASTA file for the reverse BLAST to use.
try:
writeFile = open("tempQuery.faa", "w")
writeFile.write(CompleteBackBlastQuery)
writeFile = open("tempQuery.faa", "w")
writeFile.write(CompleteBackBlastQuery)
writeFile.close()
except IOError:
print "Failed to create tempQuery.faa"
print("Failed to create tempQuery.faa")
sys.exit(1)

print ">> Blasting backwards from subject genome to query genome."
print(">> Blasting backwards from subject genome to query genome.")
# Run backwards BLAST towards query proteome.
BLASTBackward = runBLAST("tempQuery.faa", queryBLASTDBFile)
BLASTBackward = filtreBLASTCSV(BLASTBackward) # Filtres BLAST results by PIdnet.
BLASTBackward = filterBLASTCSV(BLASTBackward) # Filters BLAST results by percent identity.

print ">> Creating Graph..."
print(">> Creating Graph...")
for hit in BLASTForward:
BLASTGraph.addEdge(hit[0],hit[1],hit[5])
BLASTGraph.addEdge(hit[0], hit[1], hit[5])
for hit in BLASTBackward:
BLASTGraph.addEdge(hit[0],hit[1],hit[5])
BLASTGraph.addEdge(hit[0], hit[1], hit[5])

BackBlastOutput = list(BLASTForward)

print ">> Checking if forward hit subjects have better reciprocal hits than query."
for hit in BLASTForward:
queryProtien = BLASTGraph.getVertex(hit[0])
subjectProtien = BLASTGraph.getVertex(hit[1])
print(">> Checking if forward hit subjects have better reciprocal hits than query.")
for hit in BLASTForward:
queryProtein = BLASTGraph.getVertex(hit[0])
subjectProtein = BLASTGraph.getVertex(hit[1])

topBackHitScore = 0
# Find the top score of the best reciprical BLAST hit.
for backHit in subjectProtien.getConnections():
backHitScore = subjectProtien.getWeight(backHit) # The edge weight between the subject and its reciprocal BLAST hit is the BLAST score.
# Find the top score of the best reciprocal BLAST hit.
for backHit in subjectProtein.getConnections():
backHitScore = subjectProtein.getWeight(
backHit) # The edge weight between the subject and its reciprocal BLAST hit is the BLAST score.
if backHitScore >= topBackHitScore:
topBackHitScore = backHitScore
# Check if the query is the best reciprical BLAST hit for the subject.

# Check if the query is the best reciprocal BLAST hit for the subject.
deleteHit = False
if queryProtien in subjectProtien.getConnections():
BackHitToQueryScore = subjectProtien.getWeight(queryProtien) # The edge weight between the subject and the query is the reciprocal BLAST score.
if BackHitToQueryScore < topBackHitScore:
if queryProtein in subjectProtein.getConnections():
BackHitToQueryScore = subjectProtein.getWeight(
queryProtein) # The edge weight between the subject and the query is the reciprocal BLAST score.
if BackHitToQueryScore < topBackHitScore:
deleteHit = True # If the query is not the best reciprocal BLAST hit simply delete it from the BackBlastOutput.
else:
deleteHit = True # If the query is not a reciprocal BLAST hit simply delete it from the BackBlastOutput.
if deleteHit == True:
del BackBlastOutput[BackBlastOutput.index(hit)] # Delete the forward BLAST hit from BackBlastOutput.
deleteHit = True # If the query is not a reciprocal BLAST hit simply delete it from the BackBlastOutput.

if deleteHit:
del BackBlastOutput[BackBlastOutput.index(hit)] # Delete the forward BLAST hit from BackBlastOutput.

#Attempts to write reciprocal BLAST output to file.
# Attempts to write reciprocal BLAST output to file.
try:
writeFile = open(OutFile, "w")
writeFile = open(OutFile, "w")
writer = csv.writer(writeFile)
print ">> Output file created."
print ">> Writing Data..."
print(">> Output file created.")
print(">> Writing Data...")
for row in BackBlastOutput:
writer.writerow(row)
except IOError:
print ">> Failed to create " + outFile
print(">> Failed to create " + outFile)
sys.exit(1)
print ">> Done\n"
print(">> Done\n")
93 changes: 47 additions & 46 deletions Graph.py
Original file line number Diff line number Diff line change
@@ -1,57 +1,58 @@
#!/usr/bin/env python
# From: http://interactivepython.org/courselib/static/pythonds/Graphs/graphintro.html#graphs
# Descript: Lite Weight Script For Creating Graph Objects.
#----------------------------------------------------------------------------------------
# Description: Lite Weight Script For Creating Graph Objects.
# ----------------------------------------------------------------------------------------

class Vertex:
def __init__(self,key):
self.id = key
self.connectedTo = {}
def __init__(self, key):
self.id = key
self.connectedTo = {}

def addNeighbor(self,nbr,weight=0):
self.connectedTo[nbr] = weight
def addNeighbor(self, nbr, weight=0):
self.connectedTo[nbr] = weight

def __str__(self):
return str(self.id) + ' connectedTo: ' + str([x.id for x in self.connectedTo])
def __str__(self):
return str(self.id) + ' connectedTo: ' + str([x.id for x in self.connectedTo])

def getConnections(self):
return self.connectedTo.keys()
def getConnections(self):
return self.connectedTo.keys()

def getId(self):
return self.id
def getId(self):
return self.id

def getWeight(self, nbr):
return self.connectedTo[nbr]

def getWeight(self,nbr):
return self.connectedTo[nbr]

class Graph:
def __init__(self):
self.vertList = {}
self.numVertices = 0

def addVertex(self,key):
self.numVertices = self.numVertices + 1
newVertex = Vertex(key)
self.vertList[key] = newVertex
return newVertex

def getVertex(self,n):
if n in self.vertList:
return self.vertList[n]
else:
return None

def __contains__(self,n):
return n in self.vertList

def addEdge(self,f,t,cost=0):
if f not in self.vertList:
nv = self.addVertex(f)
if t not in self.vertList:
nv = self.addVertex(t)
self.vertList[f].addNeighbor(self.vertList[t], cost)

def getVertices(self):
return self.vertList.keys()

def __iter__(self):
return iter(self.vertList.values())
def __init__(self):
self.vertList = {}
self.numVertices = 0

def addVertex(self, key):
self.numVertices = self.numVertices + 1
newVertex = Vertex(key)
self.vertList[key] = newVertex
return newVertex

def getVertex(self, n):
if n in self.vertList:
return self.vertList[n]
else:
return None

def __contains__(self, n):
return n in self.vertList

def addEdge(self, f, t, cost=0):
if f not in self.vertList:
nv = self.addVertex(f)
if t not in self.vertList:
nv = self.addVertex(t)
self.vertList[f].addNeighbor(self.vertList[t], cost)

def getVertices(self):
return self.vertList.keys()

def __iter__(self):
return iter(self.vertList.values())
74 changes: 38 additions & 36 deletions Statistics/BackBLASTStats.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,63 @@
#!/usr/bin/env python
# Created by: Lee Bergstrand
# Descript: Counts up BLAST hits per subject.
# Description: Counts up BLAST hits per subject.
#
# Usage: QueryProtienRepeats.py <input.csv>
# Example: QueryProtienRepeats.py AUUJ0000000000.csv
#----------------------------------------------------------------------------------------
# Usage: BackBLASTStats.py <input.csv>
# Example: BackBLASTStats.py AUUJ0000000000.csv
# ----------------------------------------------------------------------------------------
# Imports
import csv
import sys
#========================================================================================
# ========================================================================================

# If in proper number of arguments are passed gives instructions on proper use.
if len(sys.argv) < 3 or len(sys.argv) > 3:
print "Filtres Blast Hits"
print "By Lee Bergstrand\n"
print "Filtres Blast Hits to ...\n"
print "Usage: " + sys.argv[0] + " <input.csv>"
print "Examples: " + sys.argv[0] + ' AUUJ0000000000.csv'
exit(1) # Aborts program. (exit(1) indicates that an error occured)
print("Filters Blast Hits")
print("By Lee Bergstrand\n")
print("Filters Blast Hits to ...\n")
print("Usage: " + sys.argv[0] + " <input.csv>")
print("Examples: " + sys.argv[0] + ' AUUJ0000000000.csv')
exit(1) # Aborts program. (exit(1) indicates that an error occurred)

# Stores stores argument data.
print ">> Opening CSV Blast Result file..."
#ClusterBoundries
print(">> Opening CSV Blast Result file...")
# ClusterBoundaries
BLASTResults = sys.argv[1]
outFile = BLASTResults + ".out"

# File extension check
if not BLASTResults.endswith(".csv"):
print "[Warning] " + BLASTResults + " may not be a CSV file!"
print("[Warning] " + BLASTResults + " may not be a CSV file!")
else:
print ">> Good file extention."
print(">> Good file extension.")

# Opens CSV file for reading.
try:
readFile = open(BLASTResults, "r")
reader = csv.reader(readFile) # opens file with csv module which takes into account verying csv formats and parses correctly
print ">> Good CSV file."
except IOError:
print "Failed to open " + BLASTResults
exit(1)
reader = csv.reader(
readFile) # opens file with csv module which takes into account varying csv formats and parses correctly
print(">> Good CSV file.")

BlastHits = []
BlastHits = []

print ">> Reading BLAST hits..."
# Modifies changes elements in column according to regex. New modified rows to new file.
for row in reader:
BlastHits.append(row)
readFile.close()
print(">> Reading BLAST hits...")
# Modifies changes elements in column according to regex. New modified rows to new file.
for row in reader:
BlastHits.append(row)
readFile.close()

QueryCounter = {}
QueryCounter = {}

for hit in BlastHits:
QueryProtien = hit[0]
if QueryProtien in QueryCounter:
QueryCounter[QueryProtien] += 1
else:
QueryCounter.update({QueryProtien:1})
for hit in BlastHits:
QueryProtein = hit[0]
if QueryProtein in QueryCounter:
QueryCounter[QueryProtein] += 1
else:
QueryCounter.update({QueryProtein: 1})

for x in QueryCounter:
print x + ":", QueryCounter[x]
for x in QueryCounter:
print(x + ":", QueryCounter[x])

except IOError:
print("Failed to open " + BLASTResults)
exit(1)
78 changes: 39 additions & 39 deletions Statistics/QueryProtienRepeats.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,60 @@
#!/usr/bin/env python
# Created by: Lee Bergstrand
# Descript: Counts up BLAST hits per subject.
# Description: Counts up BLAST hits per subject.
#
# Usage: QueryProtienRepeats.py <input.csv>
# Example: QueryProtienRepeats.py AUUJ0000000000.csv
#----------------------------------------------------------------------------------------
# Usage: QueryProteinRepeats.py <input.csv>
# Example: QueryProteinRepeats.py AUUJ0000000000.csv
# ----------------------------------------------------------------------------------------
# Imports
import csv
import sys
#========================================================================================
# ========================================================================================

# If in proper number of arguments are passed gives instructions on proper use.
if len(sys.argv) < 2 or len(sys.argv) > 2:
print "Filtres Blast Hits"
print "By Lee Bergstrand\n"
print "Filtres Blast Hits to ...\n"
print "Usage: " + sys.argv[0] + " <input.csv>"
print "Examples: " + sys.argv[0] + ' AUUJ0000000000.csv'
exit(1) # Aborts program. (exit(1) indicates that an error occured)
print("Filters Blast Hits")
print("By Lee Bergstrand\n")
print("Filters Blast Hits to ...\n")
print("Usage: " + sys.argv[0] + " <input.csv>")
print("Examples: " + sys.argv[0] + ' AUUJ0000000000.csv')
exit(1) # Aborts program. (exit(1) indicates that an error occurred)

# Stores stores argument data.
print ">> Opening CSV Blast Result file..."
inFile = sys.argv[1]
print(">> Opening CSV Blast Result file...")
inFile = sys.argv[1]
outFile = inFile + ".out"

# File extension check
if not inFile.endswith(".csv"):
print "[Warning] " + inFile + " may not be a CSV file!"
print("[Warning] " + inFile + " may not be a CSV file!")
else:
print ">> Good file extention."
print(">> Good file extension.")

# Opens CSV file for reading.
try:
readFile = open(inFile, "r")
reader = csv.reader(readFile) # opens file with csv module which takes into account verying csv formats and parses correctly
print ">> Good CSV file."
reader = csv.reader(
readFile) # opens file with csv module which takes into account varying csv formats and parses correctly
print(">> Good CSV file.")

print(">> Reading BLAST hits...")
# Modifies changes elements in column according to regex. New modified rows to new file.
BlastHits = []
for row in reader:
BlastHits.append(row)
readFile.close()

QueryCounter = {}

for hit in BlastHits:
QueryProtein = hit[0]
if QueryProtein in QueryCounter:
QueryCounter[QueryProtein] += 1
else:
QueryCounter.update({QueryProtein: 1})

for x in QueryCounter:
print(x + ":", QueryCounter[x])
except IOError:
print "Failed to open " + inFile
print("Failed to open " + inFile)
exit(1)

BlastHits = []

print ">> Reading BLAST hits..."
# Modifies changes elements in column according to regex. New modified rows to new file.
for row in reader:
BlastHits.append(row)
readFile.close()

QueryCounter = {}

for hit in BlastHits:
QueryProtien = hit[0]
if QueryProtien in QueryCounter:
QueryCounter[QueryProtien] += 1
else:
QueryCounter.update({QueryProtien:1})

for x in QueryCounter:
print x + ":", QueryCounter[x]
69 changes: 36 additions & 33 deletions Visualization/CreateBlankResults.py
Original file line number Diff line number Diff line change
@@ -1,83 +1,86 @@
#!/usr/bin/env python
# Created by: Lee Bergstrand
# Descript: A simple program that takes a FASTA file protien list and makes a csv of fake BLAST results.
# Description: A simple program that takes a FASTA file protein list and makes a csv of fake BLAST results.
#
# Requirements: - This script requires the Biopython module: http://biopython.org/wiki/Download
#
# Usage: getGenbankSeqs.py <sequences.txt> <geneList.fna>
# Example: getGenbankSeqs.py mySeqs.txt geneList.fna
#----------------------------------------------------------------------------------------
#===========================================================================================================
#Imports:
# ----------------------------------------------------------------------------------------
# ===========================================================================================================
# Imports:

import sys
from Bio import SeqIO
#===========================================================================================================
# ===========================================================================================================
# Functions:

# 1: Checks if in proper number of arguments are passed gives instructions on proper use.
def argsCheck(numArgs):
if len(sys.argv) < numArgs or len(sys.argv) > numArgs:
print "Takes a nucleotide FASTA file and returns the exact same FASTA file with a reverse complemented sequence."
print "By Lee Bergstrand\n"
print "Usage: " + sys.argv[0] + " <sequences.txt> <geneList.fna>"
print "Examples: " + sys.argv[0] + " mySeq.txt geneList.fna\n"
exit(1) # Aborts program. (exit(1) indicates that an error occurred)
#===========================================================================================================
print(
"Takes a nucleotide FASTA file and returns the exact same FASTA file with a reverse complemented sequence.")
print("By Lee Bergstrand\n")
print("Usage: " + sys.argv[0] + " <sequences.txt> <geneList.fna>")
print("Examples: " + sys.argv[0] + " mySeq.txt geneList.fna\n")
exit(1) # Aborts program. (exit(1) indicates that an error occurred)

# ===========================================================================================================
# Main program code:

# House keeping...
argsCheck(3) # Checks if the number of arguments are correct.
argsCheck(3) # Checks if the number of arguments are correct.

# Stores file one for input checking.
print ">> Opening FASTA file..."
print(">> Opening FASTA file...")
filesToReplace = sys.argv[1]
geneList = sys.argv[2]

# File extension check
if not filesToReplace.endswith(".txt"):
print "[Warning] " + geneList + " may not be a text file!"
print("[Warning] " + geneList + " may not be a text file!")

# File extension check
if not geneList.endswith(".faa"):
print "[Warning] " + geneList + " may not be a FASTA file!"
print("[Warning] " + geneList + " may not be a FASTA file!")

# Reads sequence file list and stores it as a string object. Safely closes file.try:
try:
with open(filesToReplace,"rU") as newFile:
try:
with open(filesToReplace, "rU") as newFile:
filesToReplace = newFile.read()
newFile.close()
except IOError:
print "Failed to open " + inFile
print("Failed to open " + inFile)
exit(1)

filesToReplaceList = filesToReplace.splitlines() # Splits string into a list. Each element is a single line from the string.
# Splits string into a list. Each element is a single line from the string.
filesToReplaceList = filesToReplace.splitlines()

FakeResults = []
try:
handle = open(geneList, "rU")
SeqRecords = SeqIO.parse(handle, "fasta")
for record in SeqRecords:
FakeResults.append(record.id + ",sseqid,0,evalue,qcovhsp,bitscore") # qseqid sseqid pident evalue qcovhsp bitscore
FakeResults.append(
record.id + ",sseqid,0,evalue,qcovhsp,bitscore") # qseqid sseqid pident evalue qcovhsp bitscore
handle.close()
except IOError:
print "Failed to open " + inFile + " or " + outFile
print("Failed to open " + inFile + " or " + outFile)
exit(1)

FakeResultsOut = "\n".join(FakeResults)

print ">> Writing Fake Results..."
print(">> Writing Fake Results...")

for x in filesToReplaceList:
replacementFileName = x + ".csv"

try:
writeFile = open(replacementFileName, "w")
writeFile.write(FakeResultsOut)
writeFile.close()
except IOError:
print "Failed to create " + outFile
exit(1)

except IOError:
print("Failed to create " + outFile)
exit(1)

print ">> Done."
print(">> Done.")
90 changes: 48 additions & 42 deletions Visualization/ExtractBackBlastSubject.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,79 @@
#!/usr/bin/env python
# Created by: Lee Bergstrand
# Descript: A simple script that extracts the subject BLAST hits for a given subject protien from the csv results from BackBLAST.py.
# Description: A simple script that extracts the subject BLAST hits for
# a given subject protein from the csv results from BackBLAST.py.
#
# Usage: ExtractBackBlastSubject.py <myInputList.txt> <myInputBLASTCSV.csv>
# Example: ExtractBackBlastSubject.py myInputList.txt myInputBLASTCSV.csv
#----------------------------------------------------------------------------------------
# ----------------------------------------------------------------------------------------

import csv
import sys

# If in proper number of arguments are passed gives instructions on proper use.
if len(sys.argv) < 2 or len(sys.argv) > 2:
print "BLAST Subject Extractor"
print "By Lee Bergstrand\n"
print "A simple script that extracts the subject BLAST hits for a given subject protien from the csv results from BackBLAST.py.\n"
print "Usage: " + sys.argv[0] + "<myInputList.txt> <myInputBLASTCSV.csv>"
print "Examples: " + sys.argv[0] + 'myInputList.txt myInputBLASTCSV.csv'
exit(1) # Aborts program. (exit(1) indicates that an error occured)
print("BLAST Subject Extractor")
print("By Lee Bergstrand\n")
print(
"A simple script that extracts the subject BLAST hits for a given subject protien from the csv results from BackBLAST.py.\n")
print("Usage: " + sys.argv[0] + "<myInputList.txt> <myInputBLASTCSV.csv>")
print("Examples: " + sys.argv[0] + 'myInputList.txt myInputBLASTCSV.csv')
exit(1) # Aborts program. (exit(1) indicates that an error occurred)

# Store argument data.
inListFile = sys.argv[1]
inCSVFile = sys.argv[2]
inListFile = sys.argv[1]
inCSVFile = sys.argv[2]
outListFile = sys.argv[1] + ".out"

print ">> Reading accession list from " + inListFile + "..." # File extension check
print(">> Reading accession list from " + inListFile + "...") # File extension check
if not inListFile.endswith(".txt"):
print "[Warning] " + inListFile + "May not be a txt file!"
print("[Warning] " + inListFile + "May not be a txt file!")
else:
print ">> Good file extention."
print(">> Good file extension.")

print ">> Reading back blast results from " + inCSVFile + "..." # File extension check
print(">> Reading back blast results from " + inCSVFile + "...") # File extension check
if not inCSVFile.endswith(".csv"):
print "[Warning] " + inCSVFile + "May not be a CSV file!"
print("[Warning] " + inCSVFile + "May not be a CSV file!")
else:
print ">> Good file extention."

print(">> Good file extension.")

seqList = []

# Reads query sequence file list and stores it as a string object. Safely closes file.try:
try:
with open(inListFile,"r") as newFile:
try:
with open(inListFile, "r") as newFile:
sequences = newFile.read()
seqList = sequences.splitlines() # Splits string into a list. Each element is a single line from the string.
seqList = sequences.splitlines() # Splits string into a list. Each element is a single line from the string.
newFile.close()
except IOError:
print "Failed to open " + inListFile
print("Failed to open " + inListFile)
exit(1)

# Opens BLAST CSV output for reading.
try:
csvFile = open(inCSVFile, "r")
BLASTreader = csv.reader(csvFile) # opens file with csv module which takes into account verying csv formats and parses correctly
print ">> Good CSV file."
except IOError:
print "Failed to open " + inCSVFile
exit(1)
BLASTreader = csv.reader(
csvFile) # opens file with csv module which takes into account varying csv formats and parses correctly
print(">> Good CSV file.")

accessionsToAdd = []

for row in BLASTreader:
if row[0] in seqList:
if float(row[2]) >= 30:
if (row[1] + "\n") not in accessionsToAdd:
accessionsToAdd.append(row[1] + "\n")

try:
print(">> Output file created.")
print(">> Writing Data...")
writeFile = open(outListFile, "w")
writeFile.writelines(accessionsToAdd)
writeFile.close()
except IOError:
print("Failed to create " + outListFile)
exit(1)

accessionsToAdd = []

for row in BLASTreader:
if row[0] in seqList:
if float(row[2]) >= 30:
if (row[1] + "\n") not in accessionsToAdd:
accessionsToAdd.append(row[1] + "\n")

try:
print ">> Output file created."
print ">> Writing Data..."
writeFile = open(outListFile, "w")
writeFile.writelines(accessionsToAdd)
writeFile.close()
except IOError:
print "Failed to create " + outListFile
exit(1)
print("Failed to open " + inCSVFile)
exit(1)
4 changes: 2 additions & 2 deletions Visualization/PIndentHeatMapper.R
Original file line number Diff line number Diff line change
@@ -10,8 +10,8 @@ library(ape)
library(gplots)

# Sets the working directory.
setwd("/Users/lee/Data/SecondBDHAnalysis/Proteobacteria/Results/New/Filtered/")
tree = read.tree("/Users/lee/Data/SecondBDHAnalysis/Proteobacteria/16SRRNA/New/PhyloTree/Bootstrap/Proteo16STree.nwk")
setwd("/Users/lee/Google Drive/Mohn_Lab_Steroid_Research/SecondBDBHAnalysis/Proteobacteria/OldResults/14-09-18/Filtered")
tree = read.tree("/Users/lee/Google Drive/Mohn_Lab_Steroid_Research/SecondBDBHAnalysis/Proteobacteria/16SRRNA/New/PhyloTree/Old14-08-19/Bootstrap/Proteo16STree.nwk")

# Gets a list files from the working directory.
fileList = list.files(path = getwd(), all.files = FALSE, pattern = "\\.csv$")
4 changes: 2 additions & 2 deletions Visualization/PIndentHeatMapperNoTree.R
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@ library(ape)
library(gplots)

# Sets the working directory.
setwd("/Users/lee/Data/SecondBDHAnalysis/InDraft/Results/Filtered")
setwd("/Users/lee/Dropbox/RandD/Repositories/BackBLAST_Reciprocal_BLAST/TestData/Old_Run/Filtered")

# Gets a list files from the working directory.
fileList = list.files(path = getwd(), all.files = FALSE, pattern = "\\.csv$")
@@ -40,5 +40,5 @@ HeatmapMatrix = HeatmapMatrix[order(rownames(HeatmapMatrix)), ] # Reorders rows
ColourPal = brewer.pal(9,"YlGn") # Gets Colour Palete from R colour brewer.
ColourPal[1] = "#F4F5F6" # Swaps lowest colour for off white.
ColourPal = append(ColourPal, "#00311d")
heatmap.2(HeatmapMatrix, Rowv = NA, col = ColourPal,
heatmap.2(HeatmapMatrix, Rowv=FALSE, Colv=FALSE, col = ColourPal,
trace="none", xlab = "Genome", ylab = "Steroid Degrading Gene", margins = c(10,9))

0 comments on commit 440f2e3

Please sign in to comment.