Merge pull request #70 from sudlab/ns-rse/65-logging

sudlab · Nov 22, 2024 · 3fab8c8 · 3fab8c8
2 parents e12e78d + 2ae9aa6
commit 3fab8c8
Show file tree

Hide file tree

Showing 5 changed files with 185 additions and 163 deletions.
diff --git a/isoslam/all_introns_counts_and_info.py b/isoslam/all_introns_counts_and_info.py
@@ -118,7 +118,7 @@ def fragment_iterator(read_iterator):
     with open(argv_as_dictionary["outfile_tsv"], "w") as outfile:
         # Add column headers
         outfile.write(
-            "Read_UID\tTranscript_id\tStart\tEnd\tChr\tStrand\tAssignment\tConversions\tConvertable\tCoverage\n"
+            "Read_UID\tTranscript_id\tStart\tEnd\tChr\tStrand\tAssignment\tConversions\tConvertible\tCoverage\n"
         )
         results = pd.DataFrame()
 
@@ -143,7 +143,7 @@ def fragment_iterator(read_iterator):
 
             if i_progress == 10000:
                 # E.debug(str(i_total_progress) + " pairs processed")
-                # E.debug(str(i) + "spliced/retained pairs proccessed")
+                # E.debug(str(i) + "spliced/retained pairs processed")
                 i_progress = 0
 
             read1_start = read1.reference_start
@@ -268,7 +268,7 @@ def fragment_iterator(read_iterator):
                 continue
             first_matched += 1
 
-            # Create a set of tupples: (tx_id,(start,end))
+            # Create a set of tuples: (tx_id,(start,end))
             # Retained
             assign_conversions_to_retained = []
 
@@ -355,7 +355,7 @@ def fragment_iterator(read_iterator):
             # in the forward read.
             if strand == "+":
                 # pass if mapped to +ve transcript
-                convertable = set()
+                convertible = set()
                 # create a set (list that only allows unique values to be added)
                 # we will add the genome_pos at each point for both reads
                 # len(coverage) will be the # of uniquely covered positions
@@ -376,7 +376,7 @@ def fragment_iterator(read_iterator):
                     read_seq = forward_read.query_sequence[read_pos]
 
                     if genome_seq.upper() == "T":
-                        convertable.add(genome_pos)
+                        convertible.add(genome_pos)
 
                     if read_seq == "C" and genome_seq == "t":
                         variants_at_position = list(
@@ -400,7 +400,7 @@ def fragment_iterator(read_iterator):
                     read_seq = reverse_read.query_sequence[read_pos]
 
                     if genome_seq.upper() == "A":
-                        convertable.add(genome_pos)
+                        convertible.add(genome_pos)
 
                     if read_seq == "G" and genome_seq == "a":
                         variants_at_position = list(
@@ -417,7 +417,7 @@ def fragment_iterator(read_iterator):
 
             elif strand == "-":
                 # pass if mapped to -ve transcript
-                convertable = set()
+                convertible = set()
                 coverage = set()
                 converted_position = set()
                 for base in forward_read.get_aligned_pairs(with_seq=True):
@@ -430,7 +430,7 @@ def fragment_iterator(read_iterator):
                     read_seq = forward_read.query_sequence[read_pos]
 
                     if genome_seq.upper() == "A":
-                        convertable.add(genome_pos)
+                        convertible.add(genome_pos)
 
                     if read_seq == "G" and genome_seq == "a":
                         variants_at_position = list(
@@ -455,7 +455,7 @@ def fragment_iterator(read_iterator):
                     read_seq = reverse_read.query_sequence[read_pos]
 
                     if genome_seq.upper() == "T":
-                        convertable.add(genome_pos)
+                        convertible.add(genome_pos)
 
                     if read_seq == "C" and genome_seq == "t":
                         variants_at_position = list(
@@ -476,15 +476,15 @@ def fragment_iterator(read_iterator):
             i_output += 1
 
             # Stream output as a tsv
-            # Format: read_uid, transcript_id, start, end, ret/spl, conversions, convertable, coverage
+            # Format: read_uid, transcript_id, start, end, ret/spl, conversions, convertible, coverage
             # A read pair will cover multiple lines if it matches multiple events (but metadata will be same)
             # ns-rse : Add in building Pandas dataframe so the function can return something that is testable
             for transcript_id, position in assign_conversions_to_retained:
                 start, end, chr, strand = position
                 outfile.write(
                     f"{i_output}\t{transcript_id}\t"
                     f"{start}\t{end}\t{chr}\t{strand}\tRet\t{len(converted_position)}\t"
-                    f"{len(convertable)}\t{len(coverage)}\n"
+                    f"{len(convertible)}\t{len(coverage)}\n"
                 )
                 row = pd.DataFrame(
                     [
@@ -497,7 +497,7 @@ def fragment_iterator(read_iterator):
                             "Strand": strand,
                             "Assignment": "Ret",
                             "Conversions": len(converted_position),
-                            "Convertable": len(convertable),
+                            "Convertible": len(convertible),
                             "Coverage": len(coverage),
                         }
                     ]
@@ -509,7 +509,7 @@ def fragment_iterator(read_iterator):
                 outfile.write(
                     f"{i_output}\t{transcript_id}\t"
                     f"{start}\t{end}\t{chr}\t{strand}\tSpl\t{len(converted_position)}\t"
-                    f"{len(convertable)}\t{len(coverage)}\n"
+                    f"{len(convertible)}\t{len(coverage)}\n"
                 )
                 row = pd.DataFrame(
                     [
@@ -522,7 +522,7 @@ def fragment_iterator(read_iterator):
                             "Strand": strand,
                             "Assignment": "Spl",
                             "Conversions": len(converted_position),
-                            "Convertable": len(convertable),
+                            "Convertible": len(convertible),
                             "Coverage": len(coverage),
                         }
                     ]

diff --git a/isoslam/logging.py b/isoslam/logging.py
@@ -0,0 +1,14 @@
+"""Configure logging."""
+
+import sys
+
+from loguru import logger
+
+logger.remove()
+logger.add(sys.stderr)
+logger.add(
+    sys.stderr,
+    colorize=True,
+    format="{time:HH:mm:ss} | <level>{level}</level> |<magenta>{file}</magenta>:<magenta>{module}</magenta>:"
+    "<magenta>{function}</magenta>:<magenta>{line}</magenta> | <level>{message}</level>",
+)
diff --git a/pyproject.toml b/pyproject.toml
@@ -40,6 +40,7 @@ dependencies = [
   "cgat",
   "cgatcore",
   "gevent",
+  "loguru",
   "matplotlib",
   "numpy",
   "pandas",
@@ -80,6 +81,7 @@ dev = [
   "pyupgrade",
   "pytest-durations",
   "pytest-icdiff",
+  "pytest-testmon",
   "pytest-xdist",
 ]
 pypi = [
@@ -129,6 +131,7 @@ xfail_strict = true
 source = ["isoslam"]
 omit = [
   "isoslam/_version.py",
+  "isoslam/all_introns_counts_and_info.py",
   "*tests*",
   "**/__init__*",
 ]
@@ -170,6 +173,7 @@ exclude = [
   "build",
   "dist",
   "docs/source/conf.py",
+  "isoslam/all_introns_counts_and_info.py",
 ]
 # per-file-ignores = []
 line-length = 120
@@ -251,6 +255,7 @@ exclude = [  # don't report on objects that match any of these regex
     "^test_",
     "^conftest",
     "^conf$",
+    "isoslam/all_introns_counts_and_info.py",
 ]
 override_SS05 = [  # override SS05 to allow docstrings starting with these words
     "^Process ",
@@ -267,6 +272,9 @@ python_version = "3.9"
 strict = true
 enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"]
 warn_unreachable = true
+exclude = [
+  "isoslam/all_introns_counts_and_info.py",
+]
 
 [[tool.mypy.overrides]]
 module = [ "numpy.*", ]