Skip to content

Commit

Permalink
FIX : Raise errors in the input files do not exist or if the organism…
Browse files Browse the repository at this point in the history
… names contain spaces
  • Loading branch information
Adelme Bazin committed Nov 14, 2019
1 parent 043f96c commit c0a4535
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 9 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.4
1.0.1
2 changes: 1 addition & 1 deletion ppanggolin/annotate/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def readAnnotations(pangenome, organisms_file, getSeq = True, pseudo = False):
for line in bar:
elements = [el.strip() for el in line.split("\t")]
if len(elements)<=1:
logging.getLogger().error("No tabulation separator found in organisms file")
logging.getLogger().error(f"No tabulation separator found in given --fasta file: '{organisms_file}'")
exit(1)
bar.set_description("Processing "+elements[1].split("/")[-1])
bar.refresh()
Expand Down
6 changes: 2 additions & 4 deletions ppanggolin/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def read_gene2fam(pangenome, gene2fam):
link = True if pangenome.status["genomesAnnotated"] in ["Computed","Loaded"] else False
if link:
if len(gene2fam) != len(pangenome.genes):#then maybe there are genes with identical IDs
raise Exception("Something unexpected happened during clustering (have less genes clustered than genes in the pangenome). A probable reason is that two genes in two different organisms have the same IDs; If you are sure that all of your genes have an identical IDs, please post an issue at https://github.com/labgem/PPanGGOLiN/")
raise Exception("Something unexpected happened during clustering (have less genes clustered than genes in the pangenome). A probable reason is that two genes in two different organisms have the same IDs; If you are sure that all of your genes have non identical IDs, please post an issue at https://github.com/labgem/PPanGGOLiN/")
bar = tqdm(gene2fam.items(), unit = "gene")
for gene, (family, is_frag) in bar:
fam = pangenome.addGeneFamily(family)
Expand All @@ -167,7 +167,7 @@ def writeGeneSequencesFromAnnotations(pangenome, fileObj):
"""
logging.getLogger().info("Writing all of the CDS sequences for clustering...")
bar = tqdm(pangenome.genes, unit="gene")
for gene in bar:#reading the table chunk per chunk otherwise RAM dies on big pangenomes
for gene in bar:
if gene.type == "CDS":
fileObj.write('>' + gene.ID + "\n")
fileObj.write(gene.dna + "\n")
Expand Down Expand Up @@ -210,8 +210,6 @@ def clustering(pangenome, tmpdir, cpu , defrag = False, code = "11", coverage =

checkPangenomeForClustering(pangenome, tmpFile, force)



logging.getLogger().info("Clustering all of the genes sequences...")
rep, tsv = firstClustering(tmpFile, newtmpdir, cpu, code, coverage, identity)
fam2seq = read_faa(rep)
Expand Down
36 changes: 33 additions & 3 deletions ppanggolin/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import resource
import pkg_resources
import tempfile
import os

#local modules
import ppanggolin.pangenome
Expand All @@ -24,11 +25,32 @@
import ppanggolin.info
import ppanggolin.align

def requirements():
def checkTsvSanity(tsv):
f = open(tsv,"r")
for line in f:
elements = [el.strip() for el in line.split("\t")]
if len(elements)<=1:
raise Exception(f"No tabulation separator found in given file: {tsv}")
if " " in elements[0]:
raise Exception(f"Your genome names contain spaces (The first encountered genome name that had this string : '{elements[0]}'). To ensure compatibility with all of the dependencies of PPanGGOLiN this is not allowed. Please remove spaces from your genome names.")

def checkInputFiles(anno=None, pangenome=None, fasta=None):
"""
Checks if the tools and libraries required for each submodule are installed.
Checks if the provided input files exist and are of the proper format
"""
pass
if pangenome is not None:
if not os.path.exists(pangenome):
raise FileNotFoundError(f"No such file or directory: '{pangenome}'")

if anno is not None:
if not os.path.exists(anno):
raise FileNotFoundError(f"No such file or directory: '{anno}'")
checkTsvSanity(anno)

if fasta is not None:
if not os.path.exists(fasta):
raise FileNotFoundError(f"No such file or directory: '{fasta}'")
checkTsvSanity(fasta)

def cmdLine():

Expand Down Expand Up @@ -95,6 +117,14 @@ def cmdLine():

def main():
args = cmdLine()

if hasattr(args, "pangenome"):
checkInputFiles(pangenome = args.pangenome)
if hasattr(args, "fasta"):
checkInputFiles(fasta = args.fasta)
if hasattr(args,"anno"):
checkInputFiles(anno = args.anno)

if hasattr(args, "verbose"):
if args.verbose == 2:
level = logging.DEBUG#info, debug, warnings and errors
Expand Down

0 comments on commit c0a4535

Please sign in to comment.