-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7 from LeeBergstrand/develop
Develop
Showing
9 changed files
with
325 additions
and
299 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,196 +1,210 @@ | ||
#!/usr/bin/env python | ||
# Created by: Lee Bergstrand | ||
# Descript: A Biopython program that takes a list of query proteins and uses local BLASTp to search | ||
# for highly similer proteins within a local blast database (usally a local db of a target | ||
# proteome). The program then BLASTps backwards from the found subject proteins to the query | ||
# proteome to confirm gene orthology. | ||
# Description: A Biopython program that takes a list of query proteins and uses local BLASTp to search | ||
# for highly similar proteins within a local blast database (usually a local db of a target | ||
# proteome). The program then BLASTps backwards from the found subject proteins to the query | ||
# proteome to confirm gene orthology. | ||
# | ||
# Requirements: - This program requires the Biopython module: http://biopython.org/wiki/Download | ||
# - This script requires BLAST+ 2.2.9 or later. | ||
# - All operations are done with protien sequences. | ||
# - All operations are done with protein sequences. | ||
# - All query proteins should be from sequenced genomes in order to facilitate backwards BLAST. | ||
# - MakeAABlastDB must be used to create BLASTn databases for both query and subject proteomes. | ||
# - BLAST databases require that the FASTA file they were made from remain in the same directory. | ||
# | ||
# Usage: BackBLAST.py <queryGeneList.faa> <queryBLASTDB.faa> <subjectBLASTDB.faa> | ||
# Example: BackBLAST.py queryGeneList.faa AL123456.3.faa AUUJ00000000.faa | ||
#---------------------------------------------------------------------------------------- | ||
#=========================================================================================================== | ||
#Imports & Setup: | ||
# ---------------------------------------------------------------------------------------- | ||
# =========================================================================================================== | ||
# Imports & Setup: | ||
import sys | ||
import csv | ||
import subprocess | ||
import tempfile | ||
from Bio import SeqIO | ||
from Graph import Vertex | ||
from Graph import Graph | ||
from multiprocessing import cpu_count | ||
|
||
processors = cpu_count() # Gets number of processor cores for BLAST. | ||
processors = cpu_count() # Gets number of processor cores for BLAST. | ||
|
||
# Dev Imports: | ||
import time # For profiling purposes. | ||
#=========================================================================================================== | ||
# =========================================================================================================== | ||
# Functions: | ||
|
||
# 1: Checks if in proper number of arguments are passed gives instructions on proper use. | ||
def argsCheck(argsCount): | ||
if len(sys.argv) < argsCount: | ||
print "Orthologous Gene Finder" | ||
print "By Lee Bergstrand\n" | ||
print "Please refer to source code for documentation\n" | ||
print "Usage: " + sys.argv[0] + " <queryGeneList.faa> <queryBLASTDB.faa> <subjectBLASTDB.faa> \n" | ||
print "Examples:" + sys.argv[0] + " queryGeneList.faa AL123456.3.faa AUUJ00000000.faa" | ||
sys.exit(1) # Aborts program. (exit(1) indicates that an error occured) | ||
#------------------------------------------------------------------------------------------------- | ||
# 2: Runs BLAST, can either be sent a fasta formatted string or a file ... | ||
print("Orthologous Gene Finder") | ||
print("By Lee Bergstrand\n") | ||
print("Please refer to source code for documentation\n") | ||
print("Usage: " + sys.argv[0] + " <queryGeneList.faa> <queryBLASTDB.faa> <subjectBLASTDB.faa> \n") | ||
print("Examples:" + sys.argv[0] + " queryGeneList.faa AL123456.3.faa AUUJ00000000.faa") | ||
sys.exit(1) # Aborts program. (exit(1) indicates that an error occurred) | ||
|
||
|
||
# ------------------------------------------------------------------------------------------------- | ||
# 2: Runs BLAST, can either be sent a FASTA formatted string or a file ... | ||
def runBLAST(query, BLASTDBFile): | ||
# Runs BLASTp and saves the output to a string. Blastp is set to output a csv which can be parsed by Pythons CSV module. | ||
BLASTOut = subprocess.check_output(["blastp", "-db", BLASTDBFile, "-query", query, "-evalue", "1e-25", "-num_threads", str(processors), "-outfmt", "10 qseqid sseqid pident evalue qcovhsp bitscore"]) | ||
# Runs BLASTp and saves the output to a string. | ||
# BLASTp is set to output a csv which can be parsed by Pythons CSV module. | ||
BLASTOut = subprocess.check_output( | ||
["blastp", "-db", BLASTDBFile, "-query", query, "-evalue", "1e-25", "-num_threads", str(processors), "-outfmt", | ||
"10 qseqid sseqid pident evalue qcovhsp bitscore"]) | ||
return BLASTOut | ||
#------------------------------------------------------------------------------------------------- | ||
|
||
|
||
# ------------------------------------------------------------------------------------------------- | ||
# 3: Filters HSPs by Percent Identity... | ||
def filtreBLASTCSV(BLASTOut): | ||
|
||
def filterBLASTCSV(BLASTOut): | ||
minIdent = 25 | ||
|
||
BLASTCSVOut = BLASTOut.splitlines(True) # Converts raw BLAST csv output into list of csv rows. | ||
BLASTreader = csv.reader(BLASTCSVOut) # Reads BLAST csv rows as a csv. | ||
|
||
BLASTCSVOutFiltred = [] # Note should simply delete unwanted HSPs from curent list rather than making new list. | ||
# Rather than making a new one. | ||
BLASTCSVOut = BLASTOut.splitlines(True) # Converts raw BLAST csv output into list of csv rows. | ||
BLASTreader = csv.reader(BLASTCSVOut) # Reads BLAST csv rows as a csv. | ||
|
||
BLASTCSVOutFiltred = [] # Note should simply delete unwanted HSPs from current list rather than making new list. | ||
# Rather than making a new one. | ||
for HSP in BLASTreader: | ||
if HSP[2] >= minIdent: # Filtres by minimum ident. | ||
if HSP[2] >= minIdent: # Filters by minimum identity. | ||
# Converts each HSP parameter that should be a number to a number. | ||
HSP[2] = float(HSP[2]) | ||
HSP[2] = float(HSP[2]) | ||
HSP[3] = float(HSP[3]) | ||
HSP[4] = float(HSP[4]) | ||
HSP[5] = float(HSP[5]) | ||
BLASTCSVOutFiltred.append(HSP) # Appends to output array. | ||
HSP[5] = float(HSP[5]) | ||
BLASTCSVOutFiltred.append(HSP) # Appends to output array. | ||
|
||
return BLASTCSVOutFiltred | ||
#------------------------------------------------------------------------------------------------- | ||
# 5: Creates a python dictionary (hash table) that contains the the fasta for each protien in the proteome. | ||
|
||
|
||
# ------------------------------------------------------------------------------------------------- | ||
# 5: Creates a python dictionary (hash table) that contains the the FASTA for each protein in the proteome. | ||
def createProteomeHash(ProteomeFile): | ||
ProteomeHash = dict() | ||
ProteomeHash = dict() | ||
try: | ||
handle = open(ProteomeFile, "rU") | ||
proteome = SeqIO.parse(handle, "fasta") | ||
for record in proteome: | ||
ProteomeHash.update({ record.id : record.format("fasta") }) | ||
ProteomeHash.update({record.id: record.format("fasta")}) | ||
handle.close() | ||
except IOError: | ||
print "Failed to open " + ProteomeFile | ||
print("Failed to open " + ProteomeFile) | ||
sys.exit(1) | ||
|
||
return ProteomeHash | ||
#=========================================================================================================== | ||
|
||
|
||
# =========================================================================================================== | ||
# Main program code: | ||
# House keeping... | ||
argsCheck(4) # Checks if the number of arguments are correct. | ||
argsCheck(4) # Checks if the number of arguments are correct. | ||
|
||
queryFile = sys.argv[1] | ||
queryFile = sys.argv[1] | ||
queryBLASTDBFile = sys.argv[2] | ||
subjectBLASTDBFile = sys.argv[3] | ||
|
||
print "Opening " + subjectBLASTDBFile + "..." | ||
print("Opening " + subjectBLASTDBFile + "...") | ||
|
||
# File extension checks | ||
if not queryFile.endswith(".faa"): | ||
print "[Warning] " + queryFile + " may not be a amino acid fasta file!" | ||
print("[Warning] " + queryFile + " may not be a amino acid FASTA file!") | ||
if not queryBLASTDBFile.endswith(".faa"): | ||
print "[Warning] " + queryBLASTDBFile + " may not be a amino acid fasta file!" | ||
print("[Warning] " + queryBLASTDBFile + " may not be a amino acid FASTA file!") | ||
if not subjectBLASTDBFile.endswith(".faa"): | ||
print "[Warning] " + subjectBLASTDBFile + " may not be a amino acid fasta file!" | ||
print("[Warning] " + subjectBLASTDBFile + " may not be a amino acid FASTA file!") | ||
|
||
OutFile = subjectBLASTDBFile.rstrip(".faa") + ".csv" | ||
OutFile = subjectBLASTDBFile.rstrip(".faa") + ".csv" | ||
|
||
BLASTGraph = Graph() # Creates graph to map BLAST hits. | ||
BLASTGraph = Graph() # Creates graph to map BLAST hits. | ||
|
||
print ">> Forward Blasting to subject proteome..." | ||
BLASTForward = runBLAST(queryFile, subjectBLASTDBFile) # Forward BLASTs from query protiens to subject proteome | ||
BLASTForward = filtreBLASTCSV(BLASTForward) # Filtres BLAST results by PIdnet. | ||
print(">> Forward Blasting to subject proteome...") | ||
BLASTForward = runBLAST(queryFile, subjectBLASTDBFile) # Forward BLASTs from query proteins to subject proteome | ||
BLASTForward = filterBLASTCSV(BLASTForward) # Filters BLAST results by percent identity. | ||
|
||
if len(BLASTForward) == 0: | ||
print ">> No Forward hits in subject proteome were found." | ||
print(">> No Forward hits in subject proteome were found.") | ||
# Writes empty file for easier data processing. | ||
try: | ||
writeFile = open(OutFile, "w") | ||
writeFile = open(OutFile, "w") | ||
writer = csv.writer(writeFile) | ||
except IOError: | ||
print ">> Failed to create " + outFile | ||
print(">> Failed to create " + outFile) | ||
sys.exit(1) | ||
print ">> Exiting.\n\n" | ||
sys.exit(0) # Aborts program. (exit(0) indicates that no error occured) | ||
print(">> Exiting.\n\n") | ||
sys.exit(0) # Aborts program. (exit(0) indicates that no error occurred) | ||
|
||
SubjectProteomeHash = createProteomeHash(subjectBLASTDBFile) # Creates python dictionary contianing every protien in the subject Proteome. | ||
SubjectProteomeHash = createProteomeHash( | ||
subjectBLASTDBFile) # Creates python dictionary confining every protein in the subject Proteome. | ||
BackBlastQueryFASTAs = [] | ||
|
||
print ">> Creating Back-Blasting Query from found subject protiens..." | ||
print(">> Creating Back-Blasting Query from found subject proteins...") | ||
# For each top Hit... | ||
|
||
print(BLASTForward) | ||
|
||
for hit in BLASTForward: | ||
subjectProtein = hit[1] | ||
queryProtein = hit[0] | ||
subjectProtienFASTA = SubjectProteomeHash.get(subjectProtein) # Extracts subjectProtien from python dictionary. | ||
subjectProtienFASTA.strip() | ||
BackBlastQueryFASTAs.append(subjectProtienFASTA) # Addes current subject to overall protien list. | ||
subjectProteinFASTA = SubjectProteomeHash.get(subjectProtein) # Extracts subjectProtein from python dictionary. | ||
subjectProteinFASTA.strip() | ||
BackBlastQueryFASTAs.append(subjectProteinFASTA) # Adds current subject to overall protein list. | ||
|
||
CompleteBackBlastQuery = "".join(BackBlastQueryFASTAs) | ||
|
||
# Attempt to write a temporary fasta file for the reverse BLAST to use. | ||
# Attempt to write a temporary FASTA file for the reverse BLAST to use. | ||
try: | ||
writeFile = open("tempQuery.faa", "w") | ||
writeFile.write(CompleteBackBlastQuery) | ||
writeFile = open("tempQuery.faa", "w") | ||
writeFile.write(CompleteBackBlastQuery) | ||
writeFile.close() | ||
except IOError: | ||
print "Failed to create tempQuery.faa" | ||
print("Failed to create tempQuery.faa") | ||
sys.exit(1) | ||
|
||
print ">> Blasting backwards from subject genome to query genome." | ||
print(">> Blasting backwards from subject genome to query genome.") | ||
# Run backwards BLAST towards query proteome. | ||
BLASTBackward = runBLAST("tempQuery.faa", queryBLASTDBFile) | ||
BLASTBackward = filtreBLASTCSV(BLASTBackward) # Filtres BLAST results by PIdnet. | ||
BLASTBackward = filterBLASTCSV(BLASTBackward) # Filters BLAST results by percent identity. | ||
|
||
print ">> Creating Graph..." | ||
print(">> Creating Graph...") | ||
for hit in BLASTForward: | ||
BLASTGraph.addEdge(hit[0],hit[1],hit[5]) | ||
BLASTGraph.addEdge(hit[0], hit[1], hit[5]) | ||
for hit in BLASTBackward: | ||
BLASTGraph.addEdge(hit[0],hit[1],hit[5]) | ||
BLASTGraph.addEdge(hit[0], hit[1], hit[5]) | ||
|
||
BackBlastOutput = list(BLASTForward) | ||
|
||
print ">> Checking if forward hit subjects have better reciprocal hits than query." | ||
for hit in BLASTForward: | ||
queryProtien = BLASTGraph.getVertex(hit[0]) | ||
subjectProtien = BLASTGraph.getVertex(hit[1]) | ||
print(">> Checking if forward hit subjects have better reciprocal hits than query.") | ||
for hit in BLASTForward: | ||
queryProtein = BLASTGraph.getVertex(hit[0]) | ||
subjectProtein = BLASTGraph.getVertex(hit[1]) | ||
|
||
topBackHitScore = 0 | ||
# Find the top score of the best reciprical BLAST hit. | ||
for backHit in subjectProtien.getConnections(): | ||
backHitScore = subjectProtien.getWeight(backHit) # The edge weight between the subject and its reciprocal BLAST hit is the BLAST score. | ||
# Find the top score of the best reciprocal BLAST hit. | ||
for backHit in subjectProtein.getConnections(): | ||
backHitScore = subjectProtein.getWeight( | ||
backHit) # The edge weight between the subject and its reciprocal BLAST hit is the BLAST score. | ||
if backHitScore >= topBackHitScore: | ||
topBackHitScore = backHitScore | ||
# Check if the query is the best reciprical BLAST hit for the subject. | ||
|
||
# Check if the query is the best reciprocal BLAST hit for the subject. | ||
deleteHit = False | ||
if queryProtien in subjectProtien.getConnections(): | ||
BackHitToQueryScore = subjectProtien.getWeight(queryProtien) # The edge weight between the subject and the query is the reciprocal BLAST score. | ||
if BackHitToQueryScore < topBackHitScore: | ||
if queryProtein in subjectProtein.getConnections(): | ||
BackHitToQueryScore = subjectProtein.getWeight( | ||
queryProtein) # The edge weight between the subject and the query is the reciprocal BLAST score. | ||
if BackHitToQueryScore < topBackHitScore: | ||
deleteHit = True # If the query is not the best reciprocal BLAST hit simply delete it from the BackBlastOutput. | ||
else: | ||
deleteHit = True # If the query is not a reciprocal BLAST hit simply delete it from the BackBlastOutput. | ||
if deleteHit == True: | ||
del BackBlastOutput[BackBlastOutput.index(hit)] # Delete the forward BLAST hit from BackBlastOutput. | ||
deleteHit = True # If the query is not a reciprocal BLAST hit simply delete it from the BackBlastOutput. | ||
|
||
if deleteHit: | ||
del BackBlastOutput[BackBlastOutput.index(hit)] # Delete the forward BLAST hit from BackBlastOutput. | ||
|
||
#Attempts to write reciprocal BLAST output to file. | ||
# Attempts to write reciprocal BLAST output to file. | ||
try: | ||
writeFile = open(OutFile, "w") | ||
writeFile = open(OutFile, "w") | ||
writer = csv.writer(writeFile) | ||
print ">> Output file created." | ||
print ">> Writing Data..." | ||
print(">> Output file created.") | ||
print(">> Writing Data...") | ||
for row in BackBlastOutput: | ||
writer.writerow(row) | ||
except IOError: | ||
print ">> Failed to create " + outFile | ||
print(">> Failed to create " + outFile) | ||
sys.exit(1) | ||
print ">> Done\n" | ||
print(">> Done\n") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,57 +1,58 @@ | ||
#!/usr/bin/env python | ||
# From: http://interactivepython.org/courselib/static/pythonds/Graphs/graphintro.html#graphs | ||
# Descript: Lite Weight Script For Creating Graph Objects. | ||
#---------------------------------------------------------------------------------------- | ||
# Description: Lite Weight Script For Creating Graph Objects. | ||
# ---------------------------------------------------------------------------------------- | ||
|
||
class Vertex: | ||
def __init__(self,key): | ||
self.id = key | ||
self.connectedTo = {} | ||
def __init__(self, key): | ||
self.id = key | ||
self.connectedTo = {} | ||
|
||
def addNeighbor(self,nbr,weight=0): | ||
self.connectedTo[nbr] = weight | ||
def addNeighbor(self, nbr, weight=0): | ||
self.connectedTo[nbr] = weight | ||
|
||
def __str__(self): | ||
return str(self.id) + ' connectedTo: ' + str([x.id for x in self.connectedTo]) | ||
def __str__(self): | ||
return str(self.id) + ' connectedTo: ' + str([x.id for x in self.connectedTo]) | ||
|
||
def getConnections(self): | ||
return self.connectedTo.keys() | ||
def getConnections(self): | ||
return self.connectedTo.keys() | ||
|
||
def getId(self): | ||
return self.id | ||
def getId(self): | ||
return self.id | ||
|
||
def getWeight(self, nbr): | ||
return self.connectedTo[nbr] | ||
|
||
def getWeight(self,nbr): | ||
return self.connectedTo[nbr] | ||
|
||
class Graph: | ||
def __init__(self): | ||
self.vertList = {} | ||
self.numVertices = 0 | ||
|
||
def addVertex(self,key): | ||
self.numVertices = self.numVertices + 1 | ||
newVertex = Vertex(key) | ||
self.vertList[key] = newVertex | ||
return newVertex | ||
|
||
def getVertex(self,n): | ||
if n in self.vertList: | ||
return self.vertList[n] | ||
else: | ||
return None | ||
|
||
def __contains__(self,n): | ||
return n in self.vertList | ||
|
||
def addEdge(self,f,t,cost=0): | ||
if f not in self.vertList: | ||
nv = self.addVertex(f) | ||
if t not in self.vertList: | ||
nv = self.addVertex(t) | ||
self.vertList[f].addNeighbor(self.vertList[t], cost) | ||
|
||
def getVertices(self): | ||
return self.vertList.keys() | ||
|
||
def __iter__(self): | ||
return iter(self.vertList.values()) | ||
def __init__(self): | ||
self.vertList = {} | ||
self.numVertices = 0 | ||
|
||
def addVertex(self, key): | ||
self.numVertices = self.numVertices + 1 | ||
newVertex = Vertex(key) | ||
self.vertList[key] = newVertex | ||
return newVertex | ||
|
||
def getVertex(self, n): | ||
if n in self.vertList: | ||
return self.vertList[n] | ||
else: | ||
return None | ||
|
||
def __contains__(self, n): | ||
return n in self.vertList | ||
|
||
def addEdge(self, f, t, cost=0): | ||
if f not in self.vertList: | ||
nv = self.addVertex(f) | ||
if t not in self.vertList: | ||
nv = self.addVertex(t) | ||
self.vertList[f].addNeighbor(self.vertList[t], cost) | ||
|
||
def getVertices(self): | ||
return self.vertList.keys() | ||
|
||
def __iter__(self): | ||
return iter(self.vertList.values()) |
Submodule PhyloTree
updated
5 files
+65 −54 | BLASTToFind16S/16SBLAST.py | |
+103 −76 | HMMToFind16S/16SHMMER.py | |
+28 −25 | HMMToFind16S/GetReverseComplement.py | |
+29 −27 | HMMToFind16S/RNAStockholmToFASTA.py | |
+24 −23 | SupplementaryScripts/NewickToPhyloXML.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,61 +1,63 @@ | ||
#!/usr/bin/env python | ||
# Created by: Lee Bergstrand | ||
# Descript: Counts up BLAST hits per subject. | ||
# Description: Counts up BLAST hits per subject. | ||
# | ||
# Usage: QueryProtienRepeats.py <input.csv> | ||
# Example: QueryProtienRepeats.py AUUJ0000000000.csv | ||
#---------------------------------------------------------------------------------------- | ||
# Usage: BackBLASTStats.py <input.csv> | ||
# Example: BackBLASTStats.py AUUJ0000000000.csv | ||
# ---------------------------------------------------------------------------------------- | ||
# Imports | ||
import csv | ||
import sys | ||
#======================================================================================== | ||
# ======================================================================================== | ||
|
||
# If in proper number of arguments are passed gives instructions on proper use. | ||
if len(sys.argv) < 3 or len(sys.argv) > 3: | ||
print "Filtres Blast Hits" | ||
print "By Lee Bergstrand\n" | ||
print "Filtres Blast Hits to ...\n" | ||
print "Usage: " + sys.argv[0] + " <input.csv>" | ||
print "Examples: " + sys.argv[0] + ' AUUJ0000000000.csv' | ||
exit(1) # Aborts program. (exit(1) indicates that an error occured) | ||
print("Filters Blast Hits") | ||
print("By Lee Bergstrand\n") | ||
print("Filters Blast Hits to ...\n") | ||
print("Usage: " + sys.argv[0] + " <input.csv>") | ||
print("Examples: " + sys.argv[0] + ' AUUJ0000000000.csv') | ||
exit(1) # Aborts program. (exit(1) indicates that an error occurred) | ||
|
||
# Stores stores argument data. | ||
print ">> Opening CSV Blast Result file..." | ||
#ClusterBoundries | ||
print(">> Opening CSV Blast Result file...") | ||
# ClusterBoundaries | ||
BLASTResults = sys.argv[1] | ||
outFile = BLASTResults + ".out" | ||
|
||
# File extension check | ||
if not BLASTResults.endswith(".csv"): | ||
print "[Warning] " + BLASTResults + " may not be a CSV file!" | ||
print("[Warning] " + BLASTResults + " may not be a CSV file!") | ||
else: | ||
print ">> Good file extention." | ||
print(">> Good file extension.") | ||
|
||
# Opens CSV file for reading. | ||
try: | ||
readFile = open(BLASTResults, "r") | ||
reader = csv.reader(readFile) # opens file with csv module which takes into account verying csv formats and parses correctly | ||
print ">> Good CSV file." | ||
except IOError: | ||
print "Failed to open " + BLASTResults | ||
exit(1) | ||
reader = csv.reader( | ||
readFile) # opens file with csv module which takes into account varying csv formats and parses correctly | ||
print(">> Good CSV file.") | ||
|
||
BlastHits = [] | ||
BlastHits = [] | ||
|
||
print ">> Reading BLAST hits..." | ||
# Modifies changes elements in column according to regex. New modified rows to new file. | ||
for row in reader: | ||
BlastHits.append(row) | ||
readFile.close() | ||
print(">> Reading BLAST hits...") | ||
# Modifies changes elements in column according to regex. New modified rows to new file. | ||
for row in reader: | ||
BlastHits.append(row) | ||
readFile.close() | ||
|
||
QueryCounter = {} | ||
QueryCounter = {} | ||
|
||
for hit in BlastHits: | ||
QueryProtien = hit[0] | ||
if QueryProtien in QueryCounter: | ||
QueryCounter[QueryProtien] += 1 | ||
else: | ||
QueryCounter.update({QueryProtien:1}) | ||
for hit in BlastHits: | ||
QueryProtein = hit[0] | ||
if QueryProtein in QueryCounter: | ||
QueryCounter[QueryProtein] += 1 | ||
else: | ||
QueryCounter.update({QueryProtein: 1}) | ||
|
||
for x in QueryCounter: | ||
print x + ":", QueryCounter[x] | ||
for x in QueryCounter: | ||
print(x + ":", QueryCounter[x]) | ||
|
||
except IOError: | ||
print("Failed to open " + BLASTResults) | ||
exit(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,60 +1,60 @@ | ||
#!/usr/bin/env python | ||
# Created by: Lee Bergstrand | ||
# Descript: Counts up BLAST hits per subject. | ||
# Description: Counts up BLAST hits per subject. | ||
# | ||
# Usage: QueryProtienRepeats.py <input.csv> | ||
# Example: QueryProtienRepeats.py AUUJ0000000000.csv | ||
#---------------------------------------------------------------------------------------- | ||
# Usage: QueryProteinRepeats.py <input.csv> | ||
# Example: QueryProteinRepeats.py AUUJ0000000000.csv | ||
# ---------------------------------------------------------------------------------------- | ||
# Imports | ||
import csv | ||
import sys | ||
#======================================================================================== | ||
# ======================================================================================== | ||
|
||
# If in proper number of arguments are passed gives instructions on proper use. | ||
if len(sys.argv) < 2 or len(sys.argv) > 2: | ||
print "Filtres Blast Hits" | ||
print "By Lee Bergstrand\n" | ||
print "Filtres Blast Hits to ...\n" | ||
print "Usage: " + sys.argv[0] + " <input.csv>" | ||
print "Examples: " + sys.argv[0] + ' AUUJ0000000000.csv' | ||
exit(1) # Aborts program. (exit(1) indicates that an error occured) | ||
print("Filters Blast Hits") | ||
print("By Lee Bergstrand\n") | ||
print("Filters Blast Hits to ...\n") | ||
print("Usage: " + sys.argv[0] + " <input.csv>") | ||
print("Examples: " + sys.argv[0] + ' AUUJ0000000000.csv') | ||
exit(1) # Aborts program. (exit(1) indicates that an error occurred) | ||
|
||
# Stores stores argument data. | ||
print ">> Opening CSV Blast Result file..." | ||
inFile = sys.argv[1] | ||
print(">> Opening CSV Blast Result file...") | ||
inFile = sys.argv[1] | ||
outFile = inFile + ".out" | ||
|
||
# File extension check | ||
if not inFile.endswith(".csv"): | ||
print "[Warning] " + inFile + " may not be a CSV file!" | ||
print("[Warning] " + inFile + " may not be a CSV file!") | ||
else: | ||
print ">> Good file extention." | ||
print(">> Good file extension.") | ||
|
||
# Opens CSV file for reading. | ||
try: | ||
readFile = open(inFile, "r") | ||
reader = csv.reader(readFile) # opens file with csv module which takes into account verying csv formats and parses correctly | ||
print ">> Good CSV file." | ||
reader = csv.reader( | ||
readFile) # opens file with csv module which takes into account varying csv formats and parses correctly | ||
print(">> Good CSV file.") | ||
|
||
print(">> Reading BLAST hits...") | ||
# Modifies changes elements in column according to regex. New modified rows to new file. | ||
BlastHits = [] | ||
for row in reader: | ||
BlastHits.append(row) | ||
readFile.close() | ||
|
||
QueryCounter = {} | ||
|
||
for hit in BlastHits: | ||
QueryProtein = hit[0] | ||
if QueryProtein in QueryCounter: | ||
QueryCounter[QueryProtein] += 1 | ||
else: | ||
QueryCounter.update({QueryProtein: 1}) | ||
|
||
for x in QueryCounter: | ||
print(x + ":", QueryCounter[x]) | ||
except IOError: | ||
print "Failed to open " + inFile | ||
print("Failed to open " + inFile) | ||
exit(1) | ||
|
||
BlastHits = [] | ||
|
||
print ">> Reading BLAST hits..." | ||
# Modifies changes elements in column according to regex. New modified rows to new file. | ||
for row in reader: | ||
BlastHits.append(row) | ||
readFile.close() | ||
|
||
QueryCounter = {} | ||
|
||
for hit in BlastHits: | ||
QueryProtien = hit[0] | ||
if QueryProtien in QueryCounter: | ||
QueryCounter[QueryProtien] += 1 | ||
else: | ||
QueryCounter.update({QueryProtien:1}) | ||
|
||
for x in QueryCounter: | ||
print x + ":", QueryCounter[x] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,83 +1,86 @@ | ||
#!/usr/bin/env python | ||
# Created by: Lee Bergstrand | ||
# Descript: A simple program that takes a FASTA file protien list and makes a csv of fake BLAST results. | ||
# Description: A simple program that takes a FASTA file protein list and makes a csv of fake BLAST results. | ||
# | ||
# Requirements: - This script requires the Biopython module: http://biopython.org/wiki/Download | ||
# | ||
# Usage: getGenbankSeqs.py <sequences.txt> <geneList.fna> | ||
# Example: getGenbankSeqs.py mySeqs.txt geneList.fna | ||
#---------------------------------------------------------------------------------------- | ||
#=========================================================================================================== | ||
#Imports: | ||
# ---------------------------------------------------------------------------------------- | ||
# =========================================================================================================== | ||
# Imports: | ||
|
||
import sys | ||
from Bio import SeqIO | ||
#=========================================================================================================== | ||
# =========================================================================================================== | ||
# Functions: | ||
|
||
# 1: Checks if in proper number of arguments are passed gives instructions on proper use. | ||
def argsCheck(numArgs): | ||
if len(sys.argv) < numArgs or len(sys.argv) > numArgs: | ||
print "Takes a nucleotide FASTA file and returns the exact same FASTA file with a reverse complemented sequence." | ||
print "By Lee Bergstrand\n" | ||
print "Usage: " + sys.argv[0] + " <sequences.txt> <geneList.fna>" | ||
print "Examples: " + sys.argv[0] + " mySeq.txt geneList.fna\n" | ||
exit(1) # Aborts program. (exit(1) indicates that an error occurred) | ||
#=========================================================================================================== | ||
print( | ||
"Takes a nucleotide FASTA file and returns the exact same FASTA file with a reverse complemented sequence.") | ||
print("By Lee Bergstrand\n") | ||
print("Usage: " + sys.argv[0] + " <sequences.txt> <geneList.fna>") | ||
print("Examples: " + sys.argv[0] + " mySeq.txt geneList.fna\n") | ||
exit(1) # Aborts program. (exit(1) indicates that an error occurred) | ||
|
||
# =========================================================================================================== | ||
# Main program code: | ||
|
||
# House keeping... | ||
argsCheck(3) # Checks if the number of arguments are correct. | ||
argsCheck(3) # Checks if the number of arguments are correct. | ||
|
||
# Stores file one for input checking. | ||
print ">> Opening FASTA file..." | ||
print(">> Opening FASTA file...") | ||
filesToReplace = sys.argv[1] | ||
geneList = sys.argv[2] | ||
|
||
# File extension check | ||
if not filesToReplace.endswith(".txt"): | ||
print "[Warning] " + geneList + " may not be a text file!" | ||
print("[Warning] " + geneList + " may not be a text file!") | ||
|
||
# File extension check | ||
if not geneList.endswith(".faa"): | ||
print "[Warning] " + geneList + " may not be a FASTA file!" | ||
print("[Warning] " + geneList + " may not be a FASTA file!") | ||
|
||
# Reads sequence file list and stores it as a string object. Safely closes file.try: | ||
try: | ||
with open(filesToReplace,"rU") as newFile: | ||
try: | ||
with open(filesToReplace, "rU") as newFile: | ||
filesToReplace = newFile.read() | ||
newFile.close() | ||
except IOError: | ||
print "Failed to open " + inFile | ||
print("Failed to open " + inFile) | ||
exit(1) | ||
|
||
filesToReplaceList = filesToReplace.splitlines() # Splits string into a list. Each element is a single line from the string. | ||
# Splits string into a list. Each element is a single line from the string. | ||
filesToReplaceList = filesToReplace.splitlines() | ||
|
||
FakeResults = [] | ||
try: | ||
handle = open(geneList, "rU") | ||
SeqRecords = SeqIO.parse(handle, "fasta") | ||
for record in SeqRecords: | ||
FakeResults.append(record.id + ",sseqid,0,evalue,qcovhsp,bitscore") # qseqid sseqid pident evalue qcovhsp bitscore | ||
FakeResults.append( | ||
record.id + ",sseqid,0,evalue,qcovhsp,bitscore") # qseqid sseqid pident evalue qcovhsp bitscore | ||
handle.close() | ||
except IOError: | ||
print "Failed to open " + inFile + " or " + outFile | ||
print("Failed to open " + inFile + " or " + outFile) | ||
exit(1) | ||
|
||
FakeResultsOut = "\n".join(FakeResults) | ||
|
||
print ">> Writing Fake Results..." | ||
print(">> Writing Fake Results...") | ||
|
||
for x in filesToReplaceList: | ||
replacementFileName = x + ".csv" | ||
|
||
try: | ||
writeFile = open(replacementFileName, "w") | ||
writeFile.write(FakeResultsOut) | ||
writeFile.close() | ||
except IOError: | ||
print "Failed to create " + outFile | ||
exit(1) | ||
|
||
except IOError: | ||
print("Failed to create " + outFile) | ||
exit(1) | ||
|
||
print ">> Done." | ||
print(">> Done.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,73 +1,79 @@ | ||
#!/usr/bin/env python | ||
# Created by: Lee Bergstrand | ||
# Descript: A simple script that extracts the subject BLAST hits for a given subject protien from the csv results from BackBLAST.py. | ||
# Description: A simple script that extracts the subject BLAST hits for | ||
# a given subject protein from the csv results from BackBLAST.py. | ||
# | ||
# Usage: ExtractBackBlastSubject.py <myInputList.txt> <myInputBLASTCSV.csv> | ||
# Example: ExtractBackBlastSubject.py myInputList.txt myInputBLASTCSV.csv | ||
#---------------------------------------------------------------------------------------- | ||
# ---------------------------------------------------------------------------------------- | ||
|
||
import csv | ||
import sys | ||
|
||
# If in proper number of arguments are passed gives instructions on proper use. | ||
if len(sys.argv) < 2 or len(sys.argv) > 2: | ||
print "BLAST Subject Extractor" | ||
print "By Lee Bergstrand\n" | ||
print "A simple script that extracts the subject BLAST hits for a given subject protien from the csv results from BackBLAST.py.\n" | ||
print "Usage: " + sys.argv[0] + "<myInputList.txt> <myInputBLASTCSV.csv>" | ||
print "Examples: " + sys.argv[0] + 'myInputList.txt myInputBLASTCSV.csv' | ||
exit(1) # Aborts program. (exit(1) indicates that an error occured) | ||
print("BLAST Subject Extractor") | ||
print("By Lee Bergstrand\n") | ||
print( | ||
"A simple script that extracts the subject BLAST hits for a given subject protien from the csv results from BackBLAST.py.\n") | ||
print("Usage: " + sys.argv[0] + "<myInputList.txt> <myInputBLASTCSV.csv>") | ||
print("Examples: " + sys.argv[0] + 'myInputList.txt myInputBLASTCSV.csv') | ||
exit(1) # Aborts program. (exit(1) indicates that an error occurred) | ||
|
||
# Store argument data. | ||
inListFile = sys.argv[1] | ||
inCSVFile = sys.argv[2] | ||
inListFile = sys.argv[1] | ||
inCSVFile = sys.argv[2] | ||
outListFile = sys.argv[1] + ".out" | ||
|
||
print ">> Reading accession list from " + inListFile + "..." # File extension check | ||
print(">> Reading accession list from " + inListFile + "...") # File extension check | ||
if not inListFile.endswith(".txt"): | ||
print "[Warning] " + inListFile + "May not be a txt file!" | ||
print("[Warning] " + inListFile + "May not be a txt file!") | ||
else: | ||
print ">> Good file extention." | ||
print(">> Good file extension.") | ||
|
||
print ">> Reading back blast results from " + inCSVFile + "..." # File extension check | ||
print(">> Reading back blast results from " + inCSVFile + "...") # File extension check | ||
if not inCSVFile.endswith(".csv"): | ||
print "[Warning] " + inCSVFile + "May not be a CSV file!" | ||
print("[Warning] " + inCSVFile + "May not be a CSV file!") | ||
else: | ||
print ">> Good file extention." | ||
|
||
print(">> Good file extension.") | ||
|
||
seqList = [] | ||
|
||
# Reads query sequence file list and stores it as a string object. Safely closes file.try: | ||
try: | ||
with open(inListFile,"r") as newFile: | ||
try: | ||
with open(inListFile, "r") as newFile: | ||
sequences = newFile.read() | ||
seqList = sequences.splitlines() # Splits string into a list. Each element is a single line from the string. | ||
seqList = sequences.splitlines() # Splits string into a list. Each element is a single line from the string. | ||
newFile.close() | ||
except IOError: | ||
print "Failed to open " + inListFile | ||
print("Failed to open " + inListFile) | ||
exit(1) | ||
|
||
# Opens BLAST CSV output for reading. | ||
try: | ||
csvFile = open(inCSVFile, "r") | ||
BLASTreader = csv.reader(csvFile) # opens file with csv module which takes into account verying csv formats and parses correctly | ||
print ">> Good CSV file." | ||
except IOError: | ||
print "Failed to open " + inCSVFile | ||
exit(1) | ||
BLASTreader = csv.reader( | ||
csvFile) # opens file with csv module which takes into account varying csv formats and parses correctly | ||
print(">> Good CSV file.") | ||
|
||
accessionsToAdd = [] | ||
|
||
for row in BLASTreader: | ||
if row[0] in seqList: | ||
if float(row[2]) >= 30: | ||
if (row[1] + "\n") not in accessionsToAdd: | ||
accessionsToAdd.append(row[1] + "\n") | ||
|
||
try: | ||
print(">> Output file created.") | ||
print(">> Writing Data...") | ||
writeFile = open(outListFile, "w") | ||
writeFile.writelines(accessionsToAdd) | ||
writeFile.close() | ||
except IOError: | ||
print("Failed to create " + outListFile) | ||
exit(1) | ||
|
||
accessionsToAdd = [] | ||
|
||
for row in BLASTreader: | ||
if row[0] in seqList: | ||
if float(row[2]) >= 30: | ||
if (row[1] + "\n") not in accessionsToAdd: | ||
accessionsToAdd.append(row[1] + "\n") | ||
|
||
try: | ||
print ">> Output file created." | ||
print ">> Writing Data..." | ||
writeFile = open(outListFile, "w") | ||
writeFile.writelines(accessionsToAdd) | ||
writeFile.close() | ||
except IOError: | ||
print "Failed to create " + outListFile | ||
exit(1) | ||
print("Failed to open " + inCSVFile) | ||
exit(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters