Skip to content

Commit

Permalink
Added quickstart configs for tutorial, fixed incorrect print stmt in …
Browse files Browse the repository at this point in the history
…initialize.sh, added timer in BLRunner to compute algorithms execution time
  • Loading branch information
Blessy Antony committed Jun 27, 2022
1 parent 503bc81 commit b62c67c
Show file tree
Hide file tree
Showing 12 changed files with 1,655 additions and 4 deletions.
9 changes: 6 additions & 3 deletions BLRunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from BLRun.runner import Runner
import os
import pandas as pd
import time

import BLRun as br
yaml.warnings({'YAMLLoadWarning': False})
Expand Down Expand Up @@ -57,8 +58,9 @@ def main():

with open(config_file, 'r') as conf:
evaluation = br.ConfigParser.parse(conf)
print(evaluation)
print('Evaluation started')
# print(evaluation)
start_time = time.process_time()
print('Execution of algorithms started')


for idx in range(len(evaluation.runners)):
Expand All @@ -70,7 +72,8 @@ def main():
for idx in range(len(evaluation.runners)):
evaluation.runners[idx].parseOutput()

print('Evaluation complete')
end_time = time.process_time()
print(f'Execution of algorithms completed in {end_time-start_time:0.2f} seconds')


if __name__ == '__main__':
Expand Down
169 changes: 169 additions & 0 deletions config-files/Quickstart/Curated/GSD-quickstart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
# Input Settings: initialize base input folder names,
# dataset collections, and algorithms to run over
input_settings:

# Base input directory
input_dir : "inputs"

# Subdirectory of inputs that datasets are placed in
dataset_dir: "Curated/GSD"

# Denotes a list of datasets, each with the following parameters:
# name: Name of the dataset. May be used in logging or other
# messages written during execution
#
# ex_filename: scRNA-Seqexpression file name
#
# pt_filename: a file containing pseudotime ordering
#
datasets:
- name: "GSD-2000-1"
exprData: "ExpressionData.csv"
cellData: "PseudoTime.csv"
trueEdges: "refNetwork.csv"

- name: "GSD-2000-2"
exprData: "ExpressionData.csv"
cellData: "PseudoTime.csv"
trueEdges: "refNetwork.csv"

- name: "GSD-2000-3"
exprData: "ExpressionData.csv"
cellData: "PseudoTime.csv"
trueEdges: "refNetwork.csv"

- name: "GSD-2000-4"
exprData: "ExpressionData.csv"
cellData: "PseudoTime.csv"
trueEdges: "refNetwork.csv"

- name: "GSD-2000-5"
exprData: "ExpressionData.csv"
cellData: "PseudoTime.csv"
trueEdges: "refNetwork.csv"


# Denotes a list of algorithms to run. Each has the following parameters:
# name: Name of the algorithm. Must be recognized by the pipeline, see
# code for acceptable values
#
# should_run: whether or not to run the algorithm
#
# params: any additional, algorithm-specific parameters
# should be specified in the params map for a given algorithm
#
# Denotes a list of algorithms to run. Each has the following parameters:
# name: Name of the algorithm. Must be recognized by the pipeline, see
# code for acceptable values
#
# should_run: whether or not to run the algorithm
#
# params: any additional, algorithm-specific parameters
# should be specified in the params map for a given algorithm
#
algorithms:

- name: "PIDC"
params:
should_run: [True]


- name: "GRNVBEM"
params:
should_run: [False]



- name: "GENIE3"
params:
should_run: [False]



- name: "GRNBOOST2"
params:
should_run: [True]


- name: "PPCOR"
params:
should_run: [False]
# Used in parsing output
pVal: [0.01]


- name: "SCODE"
params:
should_run: [True]
z: [2]
nIter: [100]
nRep: [5]

- name: "SCNS"
params:
should_run: [False]


- name: "SINCERITIES"
params:
should_run: [True]
nBins: [6]


- name: "LEAP"
params:
should_run: [False]
# Default maxLag value is 0.33
maxLag: [0.1]


- name: "GRISLI"
params:
should_run: [False]
L: [10]
R: [1500]
alphaMin: [0.0]


- name: "SINGE"
params:
should_run: [False]
lambda: [0.01]
dT: [15]
num_lags: [5]
kernel_width: [0.5]
prob_zero_removal: [0]
prob_remove_samples: [0.0]
family: ["gaussian"]
num_replicates: [6]


- name: "SCRIBE"
params:
should_run: [False]
### required parameters
# a list of delay values
delay: ["5,25,50,75,100"]
# any of 'RDI', 'uRDI', 'cRDI', or 'ucRDI'
method: ['ucRDI']
# lower detection limit (expression below this
# will be treated as zero.
lowerDetectionLimit: [0]
# expressionFamily: for synthetic data use uninormal
# for mRNA count data use negbinomial.size()
expressionFamily: ['uninormal']
### optional but recommended parameters
# log transform expression values or not
log: [False]
# ignore pseudotime values (and use experimental
# time points instead), recommended True for synthetic data
# False for real mRNA data
ignorePT: [True]


# Output Settings: initialize base output folder names
output_settings:

# Base output directory
output_dir: "outputs"
output_prefix: "GSD"
170 changes: 170 additions & 0 deletions config-files/Quickstart/Curated/HSC-quickstart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
# Input Settings: initialize base input folder names,
# dataset collections, and algorithms to run over
input_settings:

# Base input directory
input_dir : "inputs"

# Subdirectory of inputs that datasets are placed in
dataset_dir: "Curated/HSC"

# Denotes a list of datasets, each with the following parameters:
# name: Name of the dataset. May be used in logging or other
# messages written during execution
#
# ex_filename: scRNA-Seqexpression file name
#
# pt_filename: a file containing pseudotime ordering
#
datasets:

- name: "HSC-2000-1"
exprData: "ExpressionData.csv"
cellData: "PseudoTime.csv"
trueEdges: "refNetwork.csv"

- name: "HSC-2000-2"
exprData: "ExpressionData.csv"
cellData: "PseudoTime.csv"
trueEdges: "refNetwork.csv"

- name: "HSC-2000-3"
exprData: "ExpressionData.csv"
cellData: "PseudoTime.csv"
trueEdges: "refNetwork.csv"

- name: "HSC-2000-4"
exprData: "ExpressionData.csv"
cellData: "PseudoTime.csv"
trueEdges: "refNetwork.csv"

- name: "HSC-2000-5"
exprData: "ExpressionData.csv"
cellData: "PseudoTime.csv"
trueEdges: "refNetwork.csv"

# Denotes a list of algorithms to run. Each has the following parameters:
# name: Name of the algorithm. Must be recognized by the pipeline, see
# code for acceptable values
#
# should_run: whether or not to run the algorithm
#
# params: any additional, algorithm-specific parameters
# should be specified in the params map for a given algorithm
#
# Denotes a list of algorithms to run. Each has the following parameters:
# name: Name of the algorithm. Must be recognized by the pipeline, see
# code for acceptable values
#
# should_run: whether or not to run the algorithm
#
# params: any additional, algorithm-specific parameters
# should be specified in the params map for a given algorithm
#
algorithms:


- name: "PIDC"
params:
should_run: [True]


- name: "GRNVBEM"
params:
should_run: [False]



- name: "GENIE3"
params:
should_run: [False]



- name: "GRNBOOST2"
params:
should_run: [True]


- name: "PPCOR"
params:
should_run: [False]
# Used in parsing output
pVal: [0.01]


- name: "SCODE"
params:
should_run: [True]
z: [2]
nIter: [100]
nRep: [5]

- name: "SCNS"
params:
should_run: [False]


- name: "SINCERITIES"
params:
should_run: [True]
nBins: [20]


- name: "LEAP"
params:
should_run: [False]
# Default maxLag value is 0.33
maxLag: [0.05]


- name: "GRISLI"
params:
should_run: [False]
L: [5]
R: [1500]
alphaMin: [0.25]


- name: "SINGE"
params:
should_run: [False]
lambda: [0.01]
dT: [3]
num_lags: [5]
kernel_width: [1]
prob_zero_removal: [0]
prob_remove_samples: [0.0]
family: ["gaussian"]
num_replicates: [2]


- name: "SCRIBE"
params:
should_run: [False]
### required parameters
# a list of delay values
delay: ["5"]
# any of 'RDI', 'uRDI', 'cRDI', or 'ucRDI'
method: ['ucRDI']
# lower detection limit (expression below this
# will be treated as zero.
lowerDetectionLimit: [0]
# expressionFamily: for synthetic data use uninormal
# for mRNA count data use negbinomial.size()
expressionFamily: ['uninormal']
### optional but recommended parameters
# log transform expression values or not
log: [False]
# ignore pseudotime values (and use experimental
# time points instead), recommended True for synthetic data
# False for real mRNA data
ignorePT: [True]


# Output Settings: initialize base output folder names
output_settings:

# Base output directory
output_dir: "outputs"
output_prefix: "HSC"
Loading

0 comments on commit b62c67c

Please sign in to comment.