From c3adc5c94c2603f248f551d2693d8649d4f2deed Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 18 Apr 2024 16:02:17 +0200 Subject: [PATCH 1/7] Changed name of database variable for backup --- taca/backup/backup.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/taca/backup/backup.py b/taca/backup/backup.py index 2105e17d..34d253a9 100644 --- a/taca/backup/backup.py +++ b/taca/backup/backup.py @@ -265,12 +265,14 @@ def _log_pdc_statusdb(self, run): run_date = run_vals[0] run_fc = f"{run_date}_{run_vals[-1]}" couch_connection = statusdb.StatusdbSession(self.couch_info).connection - db = couch_connection[self.couch_info["db"]] - fc_names = {e.key: e.id for e in db.view("names/name", reduce=False)} + x_flowcells_db = couch_connection[self.couch_info["db"]] + fc_names = { + e.key: e.id for e in x_flowcells_db.view("names/name", reduce=False) + } d_id = fc_names[run_fc] - doc = db.get(d_id) + doc = x_flowcells_db.get(d_id) doc["pdc_archived"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - db.save(doc) + x_flowcells_db.save(doc) logger.info( f'Logged "pdc_archived" timestamp for fc {run} in statusdb doc "{d_id}"' ) From 4c361456fb576c1e5d42d6740e860998e67fde9f Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 18 Apr 2024 16:06:12 +0200 Subject: [PATCH 2/7] Changed name of db variable to bioinfo_db --- taca/utils/bioinfo_tab.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/taca/utils/bioinfo_tab.py b/taca/utils/bioinfo_tab.py index 47eee5db..c2d46fb1 100644 --- a/taca/utils/bioinfo_tab.py +++ b/taca/utils/bioinfo_tab.py @@ -55,8 +55,8 @@ def update_statusdb(run_dir): statusdb_conf = CONFIG.get("statusdb") couch_connection = statusdb.StatusdbSession(statusdb_conf).connection valueskey = datetime.datetime.now().isoformat() - db = couch_connection["bioinfo_analysis"] - view = db.view("latest_data/sample_id") + bioinfo_db = couch_connection["bioinfo_analysis"] + view = bioinfo_db.view("latest_data/sample_id") # Construction and sending of individual records, if samplesheet is incorrectly formatted the loop is skipped if project_info: for flowcell in project_info: @@ -87,8 +87,8 @@ def update_statusdb(run_dir): if len(view[[project, run_id, lane, sample]].rows) >= 1: remote_id = view[[project, run_id, lane, sample]].rows[0].id lane = str(lane) - remote_doc = db[remote_id]["values"] - remote_status = db[remote_id]["status"] + remote_doc = bioinfo_db[remote_id]["values"] + remote_status = bioinfo_db[remote_id]["status"] # Only updates the listed statuses if ( remote_status @@ -110,16 +110,16 @@ def update_statusdb(run_dir): ) ) # Update record cluster - obj["_rev"] = db[remote_id].rev + obj["_rev"] = bioinfo_db[remote_id].rev obj["_id"] = remote_id - db.save(obj) + bioinfo_db.save(obj) # Creates new entry else: logger.info( f"Creating {run_id} {project} {flowcell} {lane} {sample} as {sample_status}" ) # Creates record - db.save(obj) + bioinfo_db.save(obj) # Sets FC error flag if project_info[flowcell].value is not None: if ( From 785c44bfc0d5db5ef30010709e654cea4376c973 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 18 Apr 2024 16:08:08 +0200 Subject: [PATCH 3/7] Changed fc_db variable name to x_flowcells_db --- taca/utils/misc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/taca/utils/misc.py b/taca/utils/misc.py index a180bcfd..cdd61237 100755 --- a/taca/utils/misc.py +++ b/taca/utils/misc.py @@ -214,11 +214,11 @@ def run_is_demuxed(run, couch_info=None, seq_run_type=None): run_name = f"{run_date}_{run_fc}" try: couch_connection = statusdb.StatusdbSession(couch_info).connection - fc_db = couch_connection[couch_info["xten_db"]] - for fc in fc_db.view("names/name", reduce=False, descending=True): + x_flowcells_db = couch_connection[couch_info["xten_db"]] + for fc in x_flowcells_db.view("names/name", reduce=False, descending=True): if fc.key != run_name: continue - fc_doc = fc_db.get(fc.id) + fc_doc = x_flowcells_db.get(fc.id) if not fc_doc or not fc_doc.get("illumina", {}).get( "Demultiplex_Stats", {} ): From 370277531a9a6f5f6dfa3c80183cfb900af38d1b Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 18 Apr 2024 16:22:47 +0200 Subject: [PATCH 4/7] Removed dbname option and made it fixed --- taca/utils/statusdb.py | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/taca/utils/statusdb.py b/taca/utils/statusdb.py index 939e0606..b9607b8c 100644 --- a/taca/utils/statusdb.py +++ b/taca/utils/statusdb.py @@ -83,25 +83,27 @@ def get_project_flowcell( class ProjectSummaryConnection(StatusdbSession): - def __init__(self, config, dbname="projects"): + def __init__(self, config): super().__init__(config) - self.db = self.connection[dbname] + self.projects_db = self.connection["projects"] self.name_view = { - k.key: k.id for k in self.db.view("project/project_name", reduce=False) + k.key: k.id + for k in self.projects_db.view("project/project_name", reduce=False) } self.id_view = { - k.key: k.id for k in self.db.view("project/project_id", reduce=False) + k.key: k.id + for k in self.projects_db.view("project/project_id", reduce=False) } class FlowcellRunMetricsConnection(StatusdbSession): - def __init__(self, config, dbname="flowcells"): + def __init__(self, config): super().__init__(config) - self.db = self.connection[dbname] + self.flowcells_db = self.connection["flowcells"] self.name_view = {k.key: k.id for k in self.db.view("names/name", reduce=False)} self.proj_list = { k.key: k.value - for k in self.db.view("names/project_ids_list", reduce=False) + for k in self.flowcells_db.view("names/project_ids_list", reduce=False) if k.key } @@ -109,31 +111,33 @@ def __init__(self, config, dbname="flowcells"): class X_FlowcellRunMetricsConnection(StatusdbSession): def __init__(self, config, dbname="x_flowcells"): super().__init__(config) - self.db = self.connection[dbname] - self.name_view = {k.key: k.id for k in self.db.view("names/name", reduce=False)} + self.x_flowcells_db = self.connection["x_flowcells"] + self.name_view = { + k.key: k.id for k in self.x_flowcells_db.view("names/name", reduce=False) + } self.proj_list = { k.key: k.value - for k in self.db.view("names/project_ids_list", reduce=False) + for k in self.x_flowcells_db.view("names/project_ids_list", reduce=False) if k.key } class NanoporeRunsConnection(StatusdbSession): - def __init__(self, config, dbname="nanopore_runs"): + def __init__(self, config): super().__init__(config) - self.db = self.connection[dbname] + self.nanopore_runs_db = self.connection["nanopore_runs"] def check_run_exists(self, ont_run) -> bool: - view_names = self.db.view("names/name") + view_names = self.nanopore_runs_db.view("names/name") if len(view_names[ont_run.run_name].rows) > 0: return True else: return False def check_run_status(self, ont_run) -> str: - view_all_stats = self.db.view("names/name") + view_all_stats = self.nanopore_runs_db.view("names/name") doc_id = view_all_stats[ont_run.run_name].rows[0].id - return self.db[doc_id]["run_status"] + return self.nanopore_runs_db[doc_id]["run_status"] def create_ongoing_run( self, ont_run, run_path_file: str, pore_count_history_file: str @@ -151,19 +155,19 @@ def create_ongoing_run( "pore_count_history": pore_counts, } - new_doc_id, new_doc_rev = self.db.save(new_doc) + new_doc_id, new_doc_rev = self.nanopore_runs_db.save(new_doc) logger.info( f"New database entry created: {ont_run.run_name}, id {new_doc_id}, rev {new_doc_rev}" ) def finish_ongoing_run(self, ont_run, dict_json: dict): - view_names = self.db.view("names/name") + view_names = self.nanopore_runs_db.view("names/name") doc_id = view_names[ont_run.run_name].rows[0].id - doc = self.db[doc_id] + doc = self.nanopore_runs_db[doc_id] doc.update(dict_json) doc["run_status"] = "finished" - self.db[doc.id] = doc + self.nanopore_runs_db[doc.id] = doc def update_doc(db, obj, over_write_db_entry=False): From 2f3d8f3aead98bba3535f5439c3c97dc49cde318 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 18 Apr 2024 16:29:28 +0200 Subject: [PATCH 5/7] Changed another variable name db to x_flowcells_db --- taca/analysis/analysis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/taca/analysis/analysis.py b/taca/analysis/analysis.py index d4ff702e..120991fa 100755 --- a/taca/analysis/analysis.py +++ b/taca/analysis/analysis.py @@ -110,7 +110,7 @@ def _upload_to_statusdb(run): """ couch_conf = CONFIG["statusdb"] couch_connection = statusdb.StatusdbSession(couch_conf).connection - db = couch_connection[couch_conf["xten_db"]] + x_flowcells_db = couch_connection[couch_conf["xten_db"]] parser = run.runParserObj # Check if I have NoIndex lanes for element in parser.obj["samplesheet_csv"]: @@ -155,7 +155,7 @@ def _upload_to_statusdb(run): parser.obj["DemultiplexConfig"] = { "Setup": {"Software": run.CONFIG.get("bcl2fastq", {})} } - statusdb.update_doc(db, parser.obj, over_write_db_entry=True) + statusdb.update_doc(x_flowcells_db, parser.obj, over_write_db_entry=True) def transfer_run(run_dir, software): From 49abfc57597094fe39939f8b2b0620eb1a5cd34f Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 18 Apr 2024 16:35:58 +0200 Subject: [PATCH 6/7] Updated versionlog --- VERSIONLOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index ac48eec6..ccb0c055 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,8 @@ # TACA Version Log +## 20240418.1 +Removed dbname option from classes where it was not used. Renamed StatusDB database variables to attempt to standardise them. + ## 20240410.1 Expand test coverage by starting and checking demultiplexing for a NovaSeqXPlus run. From e0c1afcd4780a4ca5606da40cbd574bbce13bc74 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 18 Apr 2024 17:03:10 +0200 Subject: [PATCH 7/7] Trying to make prettier happy --- VERSIONLOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index ccb0c055..d51563d8 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,7 +1,8 @@ # TACA Version Log ## 20240418.1 -Removed dbname option from classes where it was not used. Renamed StatusDB database variables to attempt to standardise them. + +Removed dbname option from classes where it was not used. Renamed StatusDB database variables to attempt to standardize them. ## 20240410.1