Skip to content

Commit

Permalink
Merge pull request #4 from 4dn-dcic/v0.5.24
Browse files Browse the repository at this point in the history
fix tests, improve Hi-C bam-processing benchmarking for new pipeline
  • Loading branch information
clarabakker authored Jun 12, 2024
2 parents 90709bb + a4ef1d0 commit cf673c6
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 17 deletions.
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
The repo contains a benchmarking script for some of the CWL workflows used by 4DN-DCIC (https://github.com/4dn-dcic/pipelines-cwl), that returns total space, mem and CPUs required per given input size and a recommended AWS EC2 instance type.

[![Build Status](https://travis-ci.org/SooLee/Benchmark.svg?branch=master)](https://travis-ci.org/SooLee/Benchmark)

### Example usage of benchmarking script
* importing the module
```python
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

[tool.poetry]
name = "Benchmark-4dn"
version = "0.5.23"
version = "0.5.24"
description = """Benchmark functions that returns total space, mem, cpu given \
input size and parameters for the CWL workflows"""
authors = ["Soo Lee <[email protected]>"]
Expand Down
2 changes: 1 addition & 1 deletion src/Benchmark/bfunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ def hi_c_processing_bam(input_json):
# input and output are copied once
total_size = (bamsize + other_inputsize + outsize) * 2 + tmp_pairsamsize
safe_total_size = total_size * 2
mem = 2000 # very rough number
mem = 48000 # required for sorting buffer

r = BenchmarkResult(size=safe_total_size, mem=mem, cpu=nthreads)

Expand Down
34 changes: 21 additions & 13 deletions tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ def test_get_optimal_instance_type1(self):

def test_get_optimal_instance_type2(self):
res = C.get_optimal_instance_type(cpu=32, mem_in_gb=16)
print(res)
assert 'recommended_instance_type' in res
assert res['recommended_instance_type'] == 'c5.9xlarge'
assert res['recommended_instance_type'] == 'c6a.8xlarge'


class TestBenchmark(unittest.TestCase):
Expand Down Expand Up @@ -81,7 +82,7 @@ def test_benchmark_atacseq_aln(self):
print(res)
assert 'aws' in res
assert 'recommended_instance_type' in res['aws']
assert res['aws']['recommended_instance_type'] == 'c5.xlarge'
assert res['aws']['recommended_instance_type'] == 'c6a.xlarge'

def test_benchmark_atacseq_postaln(self):
print("testing atacseq-postaln")
Expand All @@ -91,7 +92,7 @@ def test_benchmark_atacseq_postaln(self):
print(res)
assert 'aws' in res
assert 'recommended_instance_type' in res['aws']
assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
assert res['aws']['recommended_instance_type'] == 'c6a.4xlarge'

def test_benchmark_atacseq(self):
print("testing atacseq")
Expand All @@ -100,9 +101,10 @@ def test_benchmark_atacseq(self):
res = B.benchmark('encode-atacseq',
{'input_size_in_bytes': input_sizes,
'parameters': {'atac.bowtie2.cpu': 4}})
print(res)
assert 'aws' in res
assert 'recommended_instance_type' in res['aws']
assert res['aws']['recommended_instance_type'] == 't3.2xlarge'
assert res['aws']['recommended_instance_type'] == 'c6a.2xlarge'
assert res['min_CPU'] == 6
assert int(res['total_size_in_GB']) == 55

Expand All @@ -113,9 +115,10 @@ def test_benchmark_chipseq_aln_chip(self):
res = B.benchmark('encode-chipseq-aln-chip',
{'input_size_in_bytes': input_sizes,
'parameters': {'chip.bwa.cpu': 16}})
print(res)
assert 'aws' in res
assert 'recommended_instance_type' in res['aws']
assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
assert res['aws']['recommended_instance_type'] == 'c6a.4xlarge'

def test_benchmark_chipseq_aln_ctl(self):
print("testing chipseq")
Expand All @@ -124,9 +127,10 @@ def test_benchmark_chipseq_aln_ctl(self):
res = B.benchmark('encode-chipseq-aln-ctl',
{'input_size_in_bytes': input_sizes,
'parameters': {'chip.bwa_ctl.cpu': 16}})
print(res)
assert 'aws' in res
assert 'recommended_instance_type' in res['aws']
assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
assert res['aws']['recommended_instance_type'] == 'c6a.4xlarge'

def test_benchmark_chipseq_postaln(self):
print("testing chipseq")
Expand All @@ -136,9 +140,10 @@ def test_benchmark_chipseq_postaln(self):
res = B.benchmark('encode-chipseq-postaln',
{'input_size_in_bytes': input_sizes,
'parameters': {'chip.spp_cpu': 4}})
print(res)
assert 'aws' in res
assert 'recommended_instance_type' in res['aws']
assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
assert res['aws']['recommended_instance_type'] == 'c6a.4xlarge'

def test_benchmark_chipseq_postaln2(self):
print("testing chipseq")
Expand All @@ -148,6 +153,7 @@ def test_benchmark_chipseq_postaln2(self):
res = B.benchmark('encode-chipseq-postaln',
{'input_size_in_bytes': input_sizes,
'parameters': {'chip.spp_cpu': 4, 'chip.pipeline_type': 'tf'}})
print(res)
assert 'aws' in res
assert 'recommended_instance_type' in res['aws']
assert res['aws']['recommended_instance_type'] == 'r5a.4xlarge'
Expand All @@ -162,7 +168,7 @@ def test_benchmark_chipseq(self):
print(res)
assert 'aws' in res
assert 'recommended_instance_type' in res['aws']
assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
assert res['aws']['recommended_instance_type'] == 'c6a.4xlarge'

def test_benchmark1(self):
res = B.benchmark('md5',
Expand Down Expand Up @@ -202,24 +208,26 @@ def test_benchmark3(self):
print(res)

def test_benchmark4(self):
print("pairsam_parse")
res = B.benchmark('pairsam-parse-sort',
{'input_size_in_bytes': {'bam': 1000000000},
'parameters': {'nThreads': 16}})
print(res)
assert 'aws' in res
assert 'recommended_instance_type' in res['aws']
assert res['aws']['recommended_instance_type'] == 'c5.9xlarge'
print(res)
assert res['aws']['recommended_instance_type'] == 'c6a.8xlarge'

def test_benchmark5(self):
print("pairsam-merge")
input_json = {'input_size_in_bytes': {'input_pairsams': [1000000000,
2000000000,
3000000000]},
'parameters': {'nThreads': 32}}
res = B.benchmark('pairsam-merge', input_json)
print(res)
assert 'aws' in res
assert 'recommended_instance_type' in res['aws']
assert res['aws']['recommended_instance_type'] == 'c5.9xlarge'
print(res)
assert res['aws']['recommended_instance_type'] == 'c6a.8xlarge'

def test_benchmark6(self):
input_json = {'input_size_in_bytes': {'input_pairsam': 1000000000}}
Expand Down Expand Up @@ -285,7 +293,7 @@ def test_benchmark12(self):
print(res)
assert 'aws' in res
assert 'recommended_instance_type' in res['aws']
assert res['aws']['recommended_instance_type'] == 't3.2xlarge'
assert res['aws']['recommended_instance_type'] == 'r5a.2xlarge'
assert res['min_CPU'] == 8

def test_benchmark13(self):
Expand Down

0 comments on commit cf673c6

Please sign in to comment.