Skip to content

Commit

Permalink
fixes typos and adds missing variables
Browse files Browse the repository at this point in the history
  • Loading branch information
raphenya committed Aug 24, 2023
1 parent 08f6c7a commit 9b6951e
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 46 deletions.
2 changes: 1 addition & 1 deletion app/ConvertRGIJsonToTSV.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def __init__(self, filepath, homolog_file=None, variant_file=None, overexpressio
name, ext = os.path.splitext(f_name)
self.filepath = os.path.join(f_path, "{}.json".format(f_name))
if ext.lower() == ".json":
self.filepath = os.path.join(f_path, "{}{}".format(name,ext))
self.filepath = os.path.join(f_path, "{}{}".format(name,ext))
self.homolog_file = homolog_file
self.variant_file = variant_file
self.overexpression_file = overexpression_file
Expand Down
27 changes: 17 additions & 10 deletions app/HomologModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ def run(self):
else:
c += 1
orffrom = orfInfo[c:]

modelTypeID = self.extract_nth_bar(alignTitle, 0)

if modelTypeID == 40292:
spacepos = alignTitle.index(' ')
hitid = alignTitle[0:spacepos]
Expand All @@ -78,7 +78,7 @@ def run(self):
seqinModel = modelDescrpt[underscoreinMD+1: modelDescrpt.index(' ')]

pass_bitscore = "{}".format(self.extract_nth_bar(alignment.title, 1))
pass_evalue = "{}".format("n/a")
pass_evalue = "{}".format("n/a")

# logger.info("pass_evalue: {}".format(pass_evalue))
# logger.info("pass_bitscore: {}".format(pass_bitscore))
Expand All @@ -94,7 +94,7 @@ def run(self):
card_sequence = str(json_data[modelID]["model_sequences"]["sequence"][seqinModel]["protein_sequence"]["sequence"])
except Exception as e:
logger.warning("Exception : {} -> {} -> Model({}) missing in database. Please generate new database.".format(type(e), e, modelID))

# if predicted_genes_dict:
# if orfInfo.strip() in predicted_genes_dict.keys():
# orf_protein_sequence = str(Seq(predicted_genes_dict[orfInfo.decode()], generic_dna).translate(table=11)).strip("*")
Expand Down Expand Up @@ -153,7 +153,7 @@ def run(self):
ppinsidedict["hit_end"] = (hsp.sbjct_end)*3

if orfInfo.decode().split(' # ')[0] in predicted_genes_dict:
ppinsidedict["orf_dna_sequence"] = predicted_genes_dict[orfInfo.decode().split(' # ')[0]]
ppinsidedict["orf_dna_sequence"] = predicted_genes_dict[orfInfo.decode().split(' # ')[0]]
# ppinsidedict["orf_prot_sequence"] = str(Seq(predicted_genes_dict[orfInfo.decode().split(' # ')[0]], generic_dna).translate(table=11)).strip("*")
ppinsidedict["orf_prot_sequence"] = orf_protein_sequence
else:
Expand All @@ -165,6 +165,9 @@ def run(self):
ppinsidedict["query_end"] = hsp.query_start + realQueryLength
ppinsidedict["query_from"] = blast_record.query
ppinsidedict["orf_prot_sequence"] = orf_protein_sequence
ppinsidedict["hit_start"] = ""
ppinsidedict["hit_end"] = ""


elif self.input_type == 'read':
pass
Expand All @@ -179,7 +182,7 @@ def run(self):
insidedict = {}
insidedict["type_match"] = "Strict"
insidedict["orf_strand"] = self.extract_nth_bar(orfInfo.decode(), 0)
insidedict["orf_start"] = self.extract_nth_bar(orfInfo.decode(), 1)
insidedict["orf_start"] = self.extract_nth_bar(orfInfo.decode(), 1)
insidedict["orf_end"] = self.extract_nth_bar(orfInfo.decode(), 2)
insidedict["orf_from"] = orffrom.decode()
insidedict["model_name"] = json_data[modelID]["model_name"]
Expand Down Expand Up @@ -214,20 +217,22 @@ def run(self):
insidedict["orf_from"] = self.extract_nth_hash(orfInfo.decode(), 0).rstrip()
insidedict["hit_start"] = (hsp.sbjct_start-1)*3
insidedict["hit_end"] = (hsp.sbjct_end)*3

if orfInfo.decode().split(' # ')[0] in predicted_genes_dict:
insidedict["orf_dna_sequence"] = predicted_genes_dict[orfInfo.decode().split(' # ')[0]]
insidedict["orf_dna_sequence"] = predicted_genes_dict[orfInfo.decode().split(' # ')[0]]
# insidedict["orf_prot_sequence"] = str(Seq(predicted_genes_dict[orfInfo.decode().split(' # ')[0]], generic_dna).translate(table=11)).strip("*")
insidedict["orf_prot_sequence"] = orf_protein_sequence
else:
insidedict["orf_dna_sequence"] = ""
insidedict["orf_prot_sequence"] = ""
insidedict["orf_prot_sequence"] = ""

elif self.input_type == 'protein':
insidedict["query_start"] = hsp.query_start
insidedict["query_end"] = hsp.query_start + realQueryLength
insidedict["query_from"] = blast_record.query
insidedict["orf_prot_sequence"] = orf_protein_sequence
insidedict["hit_start"] = ""
insidedict["hit_end"] = ""

elif self.input_type == 'read':
pass
Expand Down Expand Up @@ -293,6 +298,9 @@ def run(self):
linsidedict["query_end"] = hsp.query_start + realQueryLength
linsidedict["query_from"] = blast_record.query
linsidedict["orf_prot_sequence"] = orf_protein_sequence
linsidedict["hit_start"] = ""
linsidedict["hit_end"] = ""


elif self.input_type == 'read':
pass
Expand All @@ -308,4 +316,3 @@ def run(self):
blastResults = self.results(blastResults, blast_record.query, perfect, strict , loose, self.include_nudge)

return blastResults

41 changes: 24 additions & 17 deletions app/OverexpressionModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def run(self):
temp = ""

pass_bitscore = "{}".format(self.extract_nth_bar(alignment.title, 1))
pass_evalue = "{}".format("n/a")
pass_evalue = "{}".format("n/a")

# logger.debug("pass_evalue: {}".format(pass_evalue))
# logger.debug("pass_bitscore: {}".format(pass_bitscore))
Expand All @@ -110,7 +110,7 @@ def run(self):
card_sequence = ""

orf_protein_sequence = ""

if predicted_genes_dict:
if orfInfo.strip() in predicted_genes_dict.keys():
orf_protein_sequence = str(Seq(predicted_genes_dict[orfInfo.decode()]).translate(table=11)).strip("*")
Expand Down Expand Up @@ -168,10 +168,10 @@ def run(self):
ppinsidedict["orf_from"] = self.extract_nth_hash(orfInfo.decode(), 0).rstrip()
ppinsidedict["hit_start"] = (hsp.sbjct_start-1)*3
ppinsidedict["hit_end"] = (hsp.sbjct_end)*3


if orfInfo.decode().split(' # ')[0] in predicted_genes_dict:
ppinsidedict["orf_dna_sequence"] = predicted_genes_dict[orfInfo.decode().split(' # ')[0]]
ppinsidedict["orf_dna_sequence"] = predicted_genes_dict[orfInfo.decode().split(' # ')[0]]
ppinsidedict["orf_prot_sequence"] = str(Seq(predicted_genes_dict[orfInfo.decode().split(' # ')[0]]).translate(table=11)).strip("*")
# ppinsidedict["orf_prot_sequence"] = orf_protein_sequence
else:
Expand All @@ -183,6 +183,8 @@ def run(self):
ppinsidedict["query_end"] = hsp.query_start + realQueryLength
ppinsidedict["query_from"] = blast_record.query
ppinsidedict["orf_prot_sequence"] = orf_protein_sequence
ppinsidedict["hit_start"] = ""
ppinsidedict["hit_end"] = ""

elif self.input_type == 'read':
pass
Expand Down Expand Up @@ -212,14 +214,14 @@ def run(self):
sinsidedict = {}
sinsidedict["type_match"] = "Strict"
sinsidedict["orf_strand"] = self.extract_nth_bar(orfInfo.decode(), 0)
sinsidedict["orf_start"] = self.extract_nth_bar(orfInfo.decode(), 1)
sinsidedict["orf_start"] = self.extract_nth_bar(orfInfo.decode(), 1)
sinsidedict["orf_end"] = self.extract_nth_bar(orfInfo.decode(), 2)
sinsidedict["orf_from"] = orffrom.decode()
sinsidedict["model_name"] = json_data[modelID]["model_name"]
sinsidedict["model_type"] = json_data[modelID]["model_type"]
sinsidedict["model_type_id"] = modelTypeID
sinsidedict["model_id"] = modelID
sinsidedict["snp"] = eachs
sinsidedict["snp"] = eachs
sinsidedict["pass_evalue"] = pass_evalue
sinsidedict["pass_bitscore"] = pass_bitscore
sinsidedict["ARO_accession"] = json_data[modelID]["ARO_accession"]
Expand Down Expand Up @@ -247,21 +249,23 @@ def run(self):
sinsidedict["orf_end"] = self.extract_nth_hash(orfInfo.decode(), 2)
sinsidedict["orf_from"] = self.extract_nth_hash(orfInfo.decode(), 0).rstrip()
sinsidedict["hit_start"] = (hsp.sbjct_start-1)*3
snsidedict["hit_end"] = (hsp.sbjct_end)*3
sinsidedict["hit_end"] = (hsp.sbjct_end)*3

if orfInfo.decode().split(' # ')[0] in predicted_genes_dict:
sinsidedict["orf_dna_sequence"] = predicted_genes_dict[orfInfo.decode().split(' # ')[0]]
sinsidedict["orf_dna_sequence"] = predicted_genes_dict[orfInfo.decode().split(' # ')[0]]
sinsidedict["orf_prot_sequence"] = str(Seq(predicted_genes_dict[orfInfo.decode().split(' # ')[0]]).translate(table=11)).strip("*")
# sinsidedict["orf_prot_sequence"] = orf_protein_sequence
else:
sinsidedict["orf_dna_sequence"] = ""
sinsidedict["orf_prot_sequence"] = ""
sinsidedict["orf_prot_sequence"] = ""

elif self.input_type == 'protein':
sinsidedict["query_start"] = hsp.query_start
sinsidedict["query_end"] = hsp.query_start + realQueryLength
sinsidedict["query_from"] = blast_record.query
sinsidedict["orf_prot_sequence"] = orf_protein_sequence
sinsidedict["hit_start"] = ""
sinsidedict["hit_end"] = ""

elif self.input_type == 'read':
pass
Expand All @@ -273,11 +277,11 @@ def run(self):
else:
if snp_counter == 0:
"""If no SNP detected in strict hit."""
# logger.debug("Strict hits - no SNP")
# logger.debug("Strict hits - no SNP")
insidedict = {}
insidedict["type_match"] = "Strict"
insidedict["orf_strand"] = self.extract_nth_bar(orfInfo.decode(), 0)
insidedict["orf_start"] = self.extract_nth_bar(orfInfo.decode(), 1)
insidedict["orf_start"] = self.extract_nth_bar(orfInfo.decode(), 1)
insidedict["orf_end"] = self.extract_nth_bar(orfInfo.decode(), 2)
insidedict["orf_from"] = orffrom.decode()
insidedict["model_name"] = json_data[modelID]["model_name"]
Expand Down Expand Up @@ -312,20 +316,22 @@ def run(self):
insidedict["orf_from"] = self.extract_nth_hash(orfInfo.decode(), 0).rstrip()
insidedict["hit_start"] = (hsp.sbjct_start-1)*3
insidedict["hit_end"] = (hsp.sbjct_end)*3

if orfInfo.decode().split(' # ')[0] in predicted_genes_dict:
insidedict["orf_dna_sequence"] = predicted_genes_dict[orfInfo.decode().split(' # ')[0]]
insidedict["orf_dna_sequence"] = predicted_genes_dict[orfInfo.decode().split(' # ')[0]]
insidedict["orf_prot_sequence"] = str(Seq(predicted_genes_dict[orfInfo.decode().split(' # ')[0]]).translate(table=11)).strip("*")
# insidedict["orf_prot_sequence"] = orf_protein_sequence
else:
insidedict["orf_dna_sequence"] = ""
insidedict["orf_prot_sequence"] = ""
insidedict["orf_prot_sequence"] = ""

elif self.input_type == 'protein':
insidedict["query_start"] = hsp.query_start
insidedict["query_end"] = hsp.query_start + realQueryLength
insidedict["query_from"] = blast_record.query
insidedict["orf_prot_sequence"] = orf_protein_sequence
insidedict["hit_start"] = ""
insidedict["hit_end"] = ""

elif self.input_type == 'read':
pass
Expand Down Expand Up @@ -390,6 +396,8 @@ def run(self):
linsidedict["query_end"] = hsp.query_start + realQueryLength
linsidedict["query_from"] = blast_record.query
linsidedict["orf_prot_sequence"] = orf_protein_sequence
linsidedict["hit_start"] = ""
linsidedict["hit_end"] = ""

elif self.input_type == 'read':
pass
Expand All @@ -403,6 +411,5 @@ def run(self):
logger.warning("{} ---> hsp.bits: {} {} ? {}".format(json_data[modelID]["model_name"],hsp.bits, type(hsp.bits), type(pass_bitscore)))

blastResults = self.results(blastResults, blast_record.query, perfect, strict , loose, self.include_nudge)

return blastResults

return blastResults
39 changes: 21 additions & 18 deletions app/VariantModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def run(self):
else:
c += 1
orf_from = orf_info[c:]

model_type_id = self.extract_nth_bar(align_title, 0)
# logger.info("model_type_id: {} ".format(model_type_id))
space_pos = align_title.index(' ')
Expand All @@ -81,23 +81,23 @@ def run(self):
except ValueError:
true_pass_evalue = float(pass_value[0:pass_value.find(' ')])

# logger.info("mutation | model_type_id = " + str(align_title))
# logger.info("mutation | model_type_id = " + str(align_title))
init = 0
evalue_snp = self.extract_nth_bar(align_title, 2)
snpl = []
snp_dict_list = []
temp = ""
evalue_snp_dec = evalue_snp
snpl = evalue_snp_dec.split(',')

for each_snp in snpl:
snp_dict_list.append({"original": each_snp[0], "change": each_snp[-1], "position": int(each_snp[1:-1])})

for hsp in alignment.hsps:
query_seq = hsp.query.replace('-', '')
real_query_length = len(query_seq)
sbjct_seq = hsp.sbjct.replace('-', '')
real_sbjct_length = len(sbjct_seq)
real_query_length = len(query_seq)
sbjct_seq = hsp.sbjct.replace('-', '')
real_sbjct_length = len(sbjct_seq)

for eachs in snp_dict_list:
pos = eachs["position"]
Expand All @@ -121,7 +121,7 @@ def run(self):

if submitted_proteins_dict:
orf_protein_sequence = str(submitted_proteins_dict[orf_info.decode().split(" ")[0]])

# logger.info("mutation | Model:"+str(model_id) + " | pos:" +str(pos) +" | change: "+str(hsp.query[pos - hsp.sbjct_start + \
# self.find_num_dash(hsp.sbjct, (pos-hsp.sbjct_start))]) + "=" + str(chan) + " AND wildtype: " + str(hsp.sbjct[pos - hsp.sbjct_start \
# +self.find_num_dash(hsp.sbjct, (pos-hsp.sbjct_start))]) + "=" + str(ori))
Expand All @@ -131,7 +131,7 @@ def run(self):
sbj = int(pos) - hsp.sbjct_start + self.find_num_dash(hsp.sbjct, (int(pos) - hsp.sbjct_start))

if hsp.query[qry] == chan:
query_snps = {}
query_snps = {}
# logger.debug("mutation | Model:"+str(model_id) + " | pos:" +str(pos) +" | change: "+str(hsp.query[pos - hsp.sbjct_start + \
# self.find_num_dash(hsp.sbjct, (pos-hsp.sbjct_start))]) + "=" + str(chan) + " AND wildtype: " + str(hsp.sbjct[pos - hsp.sbjct_start \
# +self.find_num_dash(hsp.sbjct, (pos-hsp.sbjct_start))]) + "=" + str(ori))
Expand All @@ -143,7 +143,7 @@ def run(self):
# logger.debug("query_snp on frame {} {}".format(hsp.frame, json.dumps(query_snps, indent=2)))

try:
if float(hsp.bits) >= float(true_pass_evalue):
if float(hsp.bits) >= float(true_pass_evalue):
sinsidedict = {}
sinsidedict["type_match"] = "Strict"
sinsidedict["snp"] = eachs
Expand Down Expand Up @@ -174,7 +174,7 @@ def run(self):
sinsidedict["partial"] = json_data[model_id]["model_sequences"]["sequence"][seq_in_model]["dna_sequence"]["partial"]
else:
sinsidedict["partial"] = "0"

if self.input_type == 'contig':
sinsidedict["query_start"] = self.extract_nth_hash(orf_info.decode(), 1) + (hsp.query_start - 1)*3
sinsidedict["query_end"] = self.extract_nth_hash(orf_info.decode(), 1) + (hsp.query_start - 1)*3 + real_query_length*3 - 1
Expand All @@ -184,22 +184,24 @@ def run(self):
sinsidedict["orf_from"] = self.extract_nth_hash(orf_info.decode(), 0)
sinsidedict["hit_start"] = (hsp.sbjct_start-1)*3
sinsidedict["hit_end"] = (hsp.sbjct_end)*3


if orf_info.decode().split(' # ')[0] in predicted_genes_dict:
sinsidedict["orf_dna_sequence"] = predicted_genes_dict[orf_info.decode().split(' # ')[0]]
sinsidedict["orf_dna_sequence"] = predicted_genes_dict[orf_info.decode().split(' # ')[0]]
# sinsidedict["orf_prot_sequence"] = str(Seq(predicted_genes_dict[orf_info.decode().split(' # ')[0]], generic_dna).translate(table=11)).strip("*")
sinsidedict["orf_prot_sequence"] = orf_protein_sequence
else:
sinsidedict["orf_dna_sequence"] = ""
sinsidedict["orf_prot_sequence"] = ""
sinsidedict["orf_prot_sequence"] = ""


elif self.input_type == 'protein':
sinsidedict["query_start"] = hsp.query_start
sinsidedict["query_end"] = hsp.query_start + real_query_length
sinsidedict["query_from"] = blast_record.query
sinsidedict["orf_prot_sequence"] = orf_protein_sequence
sinsidedict["hit_start"] = ""
sinsidedict["hit_end"] = ""

elif self.input_type == 'read':
pass
Expand All @@ -215,7 +217,7 @@ def run(self):
slinsidedict["snp"] = eachs
slinsidedict["query_snp"] = query_snps
slinsidedict["orf_strand"] = self.extract_nth_bar(orf_info.decode(), 0)
slinsidedict["orf_start"] = self.extract_nth_bar(orf_info.decode(), 1)
slinsidedict["orf_start"] = self.extract_nth_bar(orf_info.decode(), 1)
slinsidedict["orf_end"] = self.extract_nth_bar(orf_info.decode(), 2)
slinsidedict["orf_from"] = orf_from.decode()
slinsidedict["model_name"] = json_data[model_id]["model_name"]
Expand Down Expand Up @@ -252,18 +254,20 @@ def run(self):
slinsidedict["hit_end"] = (hsp.sbjct_end)*3

if orf_info.decode().split(' # ')[0] in predicted_genes_dict:
slinsidedict["orf_dna_sequence"] = predicted_genes_dict[orf_info.decode().split(' # ')[0]]
slinsidedict["orf_dna_sequence"] = predicted_genes_dict[orf_info.decode().split(' # ')[0]]
# slinsidedict["orf_prot_sequence"] = str(Seq(predicted_genes_dict[orf_info.decode().split(' # ')[0]], generic_dna).translate(table=11)).strip("*")
slinsidedict["orf_prot_sequence"] = orf_protein_sequence
else:
slinsidedict["orf_dna_sequence"] = ""
slinsidedict["orf_prot_sequence"] = ""
slinsidedict["orf_prot_sequence"] = ""

elif self.input_type == 'protein':
slinsidedict["query_start"] = hsp.query_start
slinsidedict["query_end"] = hsp.query_start + real_query_length
slinsidedict["query_from"] = blast_record.query
slinsidedict["orf_prot_sequence"] = orf_protein_sequence
slinsidedict["hit_start"] = ""
slinsidedict["hit_end"] = ""

elif self.input_type == 'read':
pass
Expand All @@ -277,6 +281,5 @@ def run(self):
logger.warning("{} ---> hsp.bits: {} {} ? {}".format(json_data[model_id]["model_name"],hsp.bits,type(hsp.bits), type(true_pass_evalue)))

blastResults = self.results(blastResults, blast_record.query, perfect, strict , loose, self.include_nudge)

return blastResults

0 comments on commit 9b6951e

Please sign in to comment.