Added quickstart configs for tutorial, fixed incorrect print stmt in …

…initialize.sh, added timer in BLRunner to compute algorithms execution time
Murali-group · Jun 27, 2022 · b62c67c · b62c67c
1 parent 503bc81
commit b62c67c
Show file tree

Hide file tree

Showing 12 changed files with 1,655 additions and 4 deletions.
diff --git a/BLRunner.py b/BLRunner.py
@@ -22,6 +22,7 @@
 from BLRun.runner import Runner
 import os
 import pandas as pd
+import time
 
 import BLRun as br
 yaml.warnings({'YAMLLoadWarning': False})
@@ -57,8 +58,9 @@ def main():
 
     with open(config_file, 'r') as conf:
         evaluation = br.ConfigParser.parse(conf)
-    print(evaluation)
-    print('Evaluation started')
+    # print(evaluation)
+    start_time = time.process_time()
+    print('Execution of algorithms started')
 
 
     for idx in range(len(evaluation.runners)):
@@ -70,7 +72,8 @@ def main():
     for idx in range(len(evaluation.runners)):
         evaluation.runners[idx].parseOutput()
 
-    print('Evaluation complete')
+    end_time = time.process_time()
+    print(f'Execution of algorithms completed in {end_time-start_time:0.2f} seconds')
 
 
 if __name__ == '__main__':

diff --git a/config-files/Quickstart/Curated/GSD-quickstart.yaml b/config-files/Quickstart/Curated/GSD-quickstart.yaml
@@ -0,0 +1,169 @@
+# Input Settings: initialize base input folder names,
+# dataset collections, and algorithms to run over
+input_settings:
+
+    # Base input directory
+    input_dir : "inputs"
+
+    # Subdirectory of inputs that datasets are placed in
+    dataset_dir: "Curated/GSD"
+
+    # Denotes a list of datasets, each with the following parameters:
+    #   name: Name of the dataset. May be used in logging or other
+    #       messages written during execution
+    #
+    #   ex_filename: scRNA-Seqexpression file name
+    #
+    #   pt_filename: a file containing pseudotime ordering
+    #
+    datasets:      
+        - name: "GSD-2000-1"
+          exprData: "ExpressionData.csv"
+          cellData: "PseudoTime.csv"
+          trueEdges: "refNetwork.csv"
+
+        - name: "GSD-2000-2"
+          exprData: "ExpressionData.csv"
+          cellData: "PseudoTime.csv"
+          trueEdges: "refNetwork.csv"
+
+        - name: "GSD-2000-3"
+          exprData: "ExpressionData.csv"
+          cellData: "PseudoTime.csv"
+          trueEdges: "refNetwork.csv"
+
+        - name: "GSD-2000-4"
+          exprData: "ExpressionData.csv"
+          cellData: "PseudoTime.csv"
+          trueEdges: "refNetwork.csv"
+
+        - name: "GSD-2000-5"
+          exprData: "ExpressionData.csv"
+          cellData: "PseudoTime.csv"
+          trueEdges: "refNetwork.csv"
+
+
+    # Denotes a list of algorithms to run. Each has the following parameters:
+    #   name: Name of the algorithm. Must be recognized by the pipeline, see
+    #       code for acceptable values
+    #
+    #   should_run: whether or not to run the algorithm
+    #
+    #   params: any additional, algorithm-specific parameters
+    #       should be specified in the params map for a given algorithm
+    #
+    # Denotes a list of algorithms to run. Each has the following parameters:
+    #   name: Name of the algorithm. Must be recognized by the pipeline, see
+    #       code for acceptable values
+    #
+    #   should_run: whether or not to run the algorithm
+    #
+    #   params: any additional, algorithm-specific parameters
+    #       should be specified in the params map for a given algorithm
+    #
+    algorithms:
+
+        - name: "PIDC"
+          params: 
+              should_run: [True]
+
+
+        - name: "GRNVBEM"
+          params: 
+              should_run: [False]
+
+
+
+        - name: "GENIE3"
+          params: 
+              should_run: [False]
+
+
+
+        - name: "GRNBOOST2"
+          params: 
+              should_run: [True]
+
+
+        - name: "PPCOR"
+          params: 
+              should_run: [False]
+              # Used in parsing output
+              pVal: [0.01]
+
+
+        - name: "SCODE"
+          params:
+              should_run: [True]
+              z: [2]
+              nIter: [100]
+              nRep: [5]
+
+        - name: "SCNS"
+          params: 
+              should_run: [False]
+
+
+        - name: "SINCERITIES"
+          params: 
+              should_run: [True]
+              nBins: [6]
+
+
+        - name: "LEAP"
+          params: 
+              should_run: [False]
+              # Default maxLag value is 0.33
+              maxLag: [0.1]
+
+
+        - name: "GRISLI"
+          params: 
+              should_run: [False]
+              L: [10]
+              R: [1500]
+              alphaMin: [0.0]
+
+
+        - name: "SINGE"
+          params: 
+              should_run: [False]
+              lambda: [0.01]
+              dT: [15]
+              num_lags: [5]
+              kernel_width: [0.5]
+              prob_zero_removal: [0]
+              prob_remove_samples: [0.0]
+              family: ["gaussian"]
+              num_replicates: [6]
+
+
+        - name: "SCRIBE"
+          params: 
+              should_run: [False]
+              ### required parameters
+              # a list of delay values
+              delay: ["5,25,50,75,100"]
+              # any of 'RDI', 'uRDI', 'cRDI', or 'ucRDI'
+              method: ['ucRDI']
+              # lower detection limit (expression below this 
+              # will be treated as zero.
+              lowerDetectionLimit: [0]
+              # expressionFamily: for synthetic data use uninormal
+              #  for mRNA count data use negbinomial.size()
+              expressionFamily: ['uninormal']
+              ### optional but recommended parameters
+              # log transform expression values or not
+              log: [False]
+              # ignore pseudotime values (and use experimental
+              # time points instead), recommended True for synthetic data
+              # False for real mRNA data
+              ignorePT: [True]
+
+
+# Output Settings: initialize base output folder names
+output_settings:
+
+    # Base output directory
+    output_dir: "outputs"
+    output_prefix: "GSD"
diff --git a/config-files/Quickstart/Curated/HSC-quickstart.yaml b/config-files/Quickstart/Curated/HSC-quickstart.yaml
@@ -0,0 +1,170 @@
+# Input Settings: initialize base input folder names,
+# dataset collections, and algorithms to run over
+input_settings:
+
+    # Base input directory
+    input_dir : "inputs"
+
+    # Subdirectory of inputs that datasets are placed in
+    dataset_dir: "Curated/HSC"
+
+    # Denotes a list of datasets, each with the following parameters:
+    #   name: Name of the dataset. May be used in logging or other
+    #       messages written during execution
+    #
+    #   ex_filename: scRNA-Seqexpression file name
+    #
+    #   pt_filename: a file containing pseudotime ordering
+    #
+    datasets:
+
+        - name: "HSC-2000-1"
+          exprData: "ExpressionData.csv"
+          cellData: "PseudoTime.csv"
+          trueEdges: "refNetwork.csv"
+
+        - name: "HSC-2000-2"
+          exprData: "ExpressionData.csv"
+          cellData: "PseudoTime.csv"
+          trueEdges: "refNetwork.csv"
+
+        - name: "HSC-2000-3"
+          exprData: "ExpressionData.csv"
+          cellData: "PseudoTime.csv"
+          trueEdges: "refNetwork.csv"
+
+        - name: "HSC-2000-4"
+          exprData: "ExpressionData.csv"
+          cellData: "PseudoTime.csv"
+          trueEdges: "refNetwork.csv"
+
+        - name: "HSC-2000-5"
+          exprData: "ExpressionData.csv"
+          cellData: "PseudoTime.csv"
+          trueEdges: "refNetwork.csv"
+
+    # Denotes a list of algorithms to run. Each has the following parameters:
+    #   name: Name of the algorithm. Must be recognized by the pipeline, see
+    #       code for acceptable values
+    #
+    #   should_run: whether or not to run the algorithm
+    #
+    #   params: any additional, algorithm-specific parameters
+    #       should be specified in the params map for a given algorithm
+    #
+    # Denotes a list of algorithms to run. Each has the following parameters:
+    #   name: Name of the algorithm. Must be recognized by the pipeline, see
+    #       code for acceptable values
+    #
+    #   should_run: whether or not to run the algorithm
+    #
+    #   params: any additional, algorithm-specific parameters
+    #       should be specified in the params map for a given algorithm
+    #
+    algorithms:
+
+
+        - name: "PIDC"
+          params: 
+              should_run: [True]
+
+
+        - name: "GRNVBEM"
+          params: 
+              should_run: [False]
+
+
+
+        - name: "GENIE3"
+          params: 
+              should_run: [False]
+
+
+
+        - name: "GRNBOOST2"
+          params: 
+              should_run: [True]
+
+
+        - name: "PPCOR"
+          params: 
+              should_run: [False]
+              # Used in parsing output
+              pVal: [0.01]
+
+
+        - name: "SCODE"
+          params:
+              should_run: [True]
+              z: [2]
+              nIter: [100]
+              nRep: [5]
+
+        - name: "SCNS"
+          params: 
+              should_run: [False]
+
+
+        - name: "SINCERITIES"
+          params: 
+              should_run: [True]
+              nBins: [20]
+
+
+        - name: "LEAP"
+          params: 
+              should_run: [False]
+              # Default maxLag value is 0.33
+              maxLag: [0.05]
+
+
+        - name: "GRISLI"
+          params: 
+              should_run: [False]
+              L: [5]
+              R: [1500]
+              alphaMin: [0.25]
+
+
+        - name: "SINGE"
+          params: 
+              should_run: [False]
+              lambda: [0.01]
+              dT: [3]
+              num_lags: [5]
+              kernel_width: [1]
+              prob_zero_removal: [0]
+              prob_remove_samples: [0.0]
+              family: ["gaussian"]
+              num_replicates: [2]
+
+
+        - name: "SCRIBE"
+          params: 
+              should_run: [False]
+              ### required parameters
+              # a list of delay values
+              delay: ["5"]
+              # any of 'RDI', 'uRDI', 'cRDI', or 'ucRDI'
+              method: ['ucRDI']
+              # lower detection limit (expression below this 
+              # will be treated as zero.
+              lowerDetectionLimit: [0]
+              # expressionFamily: for synthetic data use uninormal
+              #  for mRNA count data use negbinomial.size()
+              expressionFamily: ['uninormal']
+              ### optional but recommended parameters
+              # log transform expression values or not
+              log: [False]
+              # ignore pseudotime values (and use experimental
+              # time points instead), recommended True for synthetic data
+              # False for real mRNA data
+              ignorePT: [True]
+
+
+# Output Settings: initialize base output folder names
+output_settings:
+
+    # Base output directory
+    output_dir: "outputs"
+    output_prefix: "HSC"