From 7fb27d86217a07986c1ba97d0328d72a71289f36 Mon Sep 17 00:00:00 2001
From: Andrey Prjibelski <andrewprzh@gmail.com>
Date: Sun, 8 Sep 2024 14:11:51 +0300
Subject: [PATCH] switch to simple id distributor as thread safety is not
 needed acrros differnt processes

---
 src/common.py                         | 19 +++++++------------
 src/gene_info.py                      |  7 +++----
 src/graph_based_model_construction.py | 19 ++++++++-----------
 src/id_policy.py                      | 21 ++++++++++++++++-----
 src/isoform_assignment.py             |  5 +++--
 5 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/src/common.py b/src/common.py
index 9be377b6..c0df0348 100644
--- a/src/common.py
+++ b/src/common.py
@@ -9,7 +9,6 @@
 import os
 import re
 import subprocess
-import threading
 import math
 from collections import defaultdict
 from enum import Enum
@@ -17,17 +16,6 @@
 logger = logging.getLogger('IsoQuant')
 
 
-class AtomicIDDistributor(object):
-    def __init__(self):
-        self.value = 0
-        self._lock = threading.Lock()
-
-    def increment(self):
-        with self._lock:
-            self.value += 1
-            return self.value
-
-
 class CigarEvent(Enum):
     match = 0
     insertion = 1
@@ -48,6 +36,13 @@ def get_ins_del_match_events(cls):
         return {cls.match, cls.insertion, cls.deletion, cls.seq_match, cls.seq_mismatch}
 
 
+class TranscriptNaming:
+    transcript_prefix = "transcript"
+    novel_gene_prefix = "novel_gene_"
+    nic_transcript_suffix = ".nic"
+    nnic_transcript_suffix = ".nnic"
+
+
 # key, value
 def get_first_best_from_sorted(sorted_list_of_pairs):
     if not sorted_list_of_pairs:
diff --git a/src/gene_info.py b/src/gene_info.py
index 277f3c21..469bceae 100644
--- a/src/gene_info.py
+++ b/src/gene_info.py
@@ -16,11 +16,10 @@
     equal_ranges,
     get_intron_strand,
     intervals_total_length,
-    is_subprofile,
     overlaps,
-    junctions_from_blocks,
-    AtomicIDDistributor
+    junctions_from_blocks
 )
+from .id_policy import SimpleIDDistributor
 
 logger = logging.getLogger('IsoQuant')
 
@@ -121,7 +120,7 @@ def print_debug(self):
 
 # exon/intron info
 class FeatureInfo:
-    feature_id_counter = AtomicIDDistributor()
+    feature_id_counter = SimpleIDDistributor()
     def __init__(self, chr_id, start, end, strand, type, gene_ids):
         self.id = FeatureInfo.feature_id_counter.increment()
         self.chr_id = chr_id
diff --git a/src/graph_based_model_construction.py b/src/graph_based_model_construction.py
index d9f71c86..1273ac18 100644
--- a/src/graph_based_model_construction.py
+++ b/src/graph_based_model_construction.py
@@ -11,6 +11,7 @@
 from enum import unique, Enum
 
 from .common import (
+    TranscriptNaming,
     cmp,
     get_exons,
     intersection_len,
@@ -46,10 +47,6 @@ class StrandnessReportingLevel(Enum):
 
 
 class GraphBasedModelConstructor:
-    transcript_prefix = "transcript"
-    novel_gene_prefix = "novel_gene_"
-    nic_transcript_suffix = ".nic"
-    nnic_transcript_suffix = ".nnic"
     detected_known_isoforms = set()
     extended_transcript_ids = set()
 
@@ -411,7 +408,7 @@ def construct_fl_isoforms(self):
             transcript_range = (path[0][1], path[-1][1])
             novel_exons = get_exons(transcript_range, list(intron_path))
             count = self.path_storage.paths[path]
-            new_transcript_id = self.transcript_prefix + str(self.get_transcript_id())
+            new_transcript_id = TranscriptNaming.transcript_prefix + str(self.get_transcript_id())
             # logger.debug("uuu %s: %s" % (new_transcript_id, str(novel_exons)))
 
             reference_isoform = None
@@ -482,17 +479,17 @@ def construct_fl_isoforms(self):
 
                     transcript_gene = self.select_reference_gene(intron_path, transcript_range, transcript_strand)
                     if transcript_gene is None:
-                        transcript_gene = (GraphBasedModelConstructor.novel_gene_prefix + self.gene_info.chr_id +
+                        transcript_gene = (TranscriptNaming.novel_gene_prefix + self.gene_info.chr_id +
                                            "_" + str(self.get_transcript_id()))
                     elif transcript_strand == '.':
                         transcript_strand = self.gene_info.gene_strands[transcript_gene]
 
                     if all(intron in self.known_introns for intron in intron_path):
                         transcript_type = TranscriptModelType.novel_in_catalog
-                        id_suffix = self.nic_transcript_suffix
+                        id_suffix = TranscriptNaming.nic_transcript_suffix
                     else:
                         transcript_type = TranscriptModelType.novel_not_in_catalog
-                        id_suffix = self.nnic_transcript_suffix
+                        id_suffix = TranscriptNaming.nnic_transcript_suffix
 
                     new_model = TranscriptModel(self.gene_info.chr_id, transcript_strand,
                                                 new_transcript_id + ".%s" % self.gene_info.chr_id + id_suffix,
@@ -644,11 +641,11 @@ def generate_monoexon_from_clustered(self, clustered_reads, forward=True):
 
             strand = '+' if forward else '-'
             coordinates = (five_prime_pos, three_prime_pos) if forward else (three_prime_pos, five_prime_pos)
-            new_transcript_id = self.transcript_prefix + str(self.get_transcript_id())
-            transcript_gene = (GraphBasedModelConstructor.novel_gene_prefix + self.gene_info.chr_id +
+            new_transcript_id = TranscriptNaming.transcript_prefix + str(self.get_transcript_id())
+            transcript_gene = (TranscriptNaming.novel_gene_prefix + self.gene_info.chr_id +
                                "_" + str(self.get_transcript_id()))
             transcript_type = TranscriptModelType.novel_not_in_catalog
-            id_suffix = self.nnic_transcript_suffix
+            id_suffix = TranscriptNaming.nnic_transcript_suffix
 
             is_valid = True
             half_len = interval_len(coordinates) / 2
diff --git a/src/id_policy.py b/src/id_policy.py
index 029da66b..9c725830 100644
--- a/src/id_policy.py
+++ b/src/id_policy.py
@@ -3,9 +3,9 @@
 # # All Rights Reserved
 # See file LICENSE for details.
 ############################################################################
+import threading
 
-
-from .graph_based_model_construction import GraphBasedModelConstructor
+from src.common import TranscriptNaming
 
 
 class SimpleIDDistributor(object):
@@ -25,7 +25,7 @@ def __init__(self, genedb, chr_id):
             return
 
         for g in genedb.region(seqid=chr_id, start=1, featuretype="gene"):
-            if g.id.startswith(GraphBasedModelConstructor.novel_gene_prefix):
+            if g.id.startswith(TranscriptNaming.novel_gene_prefix):
                 try:
                     gene_num = int(g.id.split("_")[-1])
                     self.forbidden_ids.add(gene_num)
@@ -34,9 +34,9 @@ def __init__(self, genedb, chr_id):
                 except ValueError:
                     pass
 
-        transcript_num_start_pos = len(GraphBasedModelConstructor.transcript_prefix)
+        transcript_num_start_pos = len(TranscriptNaming.transcript_prefix)
         for t in genedb.region(seqid=chr_id, start=1, featuretype=("transcript", "mRNA")):
-            if t.id.startswith(GraphBasedModelConstructor.transcript_prefix):
+            if t.id.startswith(TranscriptNaming.transcript_prefix):
                 try:
                     transcript_num = int(t.id.split(".")[0][transcript_num_start_pos:])
                     self.forbidden_ids.add(transcript_num)
@@ -78,3 +78,14 @@ def get_id(self, chr_id, feature, strand):
             feature_id =  self.id_dict[feature_tuple]
 
         return feature_id
+
+
+class AtomicIDDistributor(object):
+    def __init__(self):
+        self.value = 0
+        self._lock = threading.Lock()
+
+    def increment(self):
+        with self._lock:
+            self.value += 1
+            return self.value
diff --git a/src/isoform_assignment.py b/src/isoform_assignment.py
index 1f6250f1..db1ba011 100644
--- a/src/isoform_assignment.py
+++ b/src/isoform_assignment.py
@@ -7,7 +7,8 @@
 
 import logging
 from enum import Enum, unique
-from src.common import AtomicIDDistributor, junctions_from_blocks
+from src.common import junctions_from_blocks
+from src.id_policy import SimpleIDDistributor
 from src.serialization import *
 from src.polya_finder import PolyAInfo
 
@@ -619,7 +620,7 @@ def serialize(self, outfile):
 
 
 class ReadAssignment:
-    assignment_id_generator = AtomicIDDistributor()
+    assignment_id_generator = SimpleIDDistributor()
 
     def __init__(self, read_id, assignment_type, match=None):
         self.assignment_id = ReadAssignment.assignment_id_generator.increment()