Merge pull request #4 from 4dn-dcic/v0.5.24

fix tests, improve Hi-C bam-processing benchmarking for new pipeline
4dn-dcic · Jun 12, 2024 · cf673c6 · cf673c6
2 parents 90709bb + a4ef1d0
commit cf673c6
Show file tree

Hide file tree

Showing 4 changed files with 23 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -1,7 +1,5 @@
 The repo contains a benchmarking script for some of the CWL workflows used by 4DN-DCIC (https://github.com/4dn-dcic/pipelines-cwl), that returns total space, mem and CPUs required per given input size and a recommended AWS EC2 instance type.
 
-[![Build Status](https://travis-ci.org/SooLee/Benchmark.svg?branch=master)](https://travis-ci.org/SooLee/Benchmark)
-
 ### Example usage of benchmarking script
 * importing the module
 ```python

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@
 
 [tool.poetry]
 name = "Benchmark-4dn"
-version = "0.5.23"
+version = "0.5.24"
 description = """Benchmark functions that returns total space, mem, cpu given \
     input size and parameters for the CWL workflows"""
 authors = ["Soo Lee <[email protected]>"]

diff --git a/src/Benchmark/bfunctions.py b/src/Benchmark/bfunctions.py
@@ -532,7 +532,7 @@ def hi_c_processing_bam(input_json):
     # input and output are copied once
     total_size = (bamsize + other_inputsize + outsize) * 2 + tmp_pairsamsize
     safe_total_size = total_size * 2
-    mem = 2000  # very rough number
+    mem = 48000  # required for sorting buffer
 
     r = BenchmarkResult(size=safe_total_size, mem=mem, cpu=nthreads)
 

diff --git a/tests/test.py b/tests/test.py
@@ -12,8 +12,9 @@ def test_get_optimal_instance_type1(self):
 
     def test_get_optimal_instance_type2(self):
         res = C.get_optimal_instance_type(cpu=32, mem_in_gb=16)
+        print(res)
         assert 'recommended_instance_type' in res
-        assert res['recommended_instance_type'] == 'c5.9xlarge'
+        assert res['recommended_instance_type'] == 'c6a.8xlarge'
 
 
 class TestBenchmark(unittest.TestCase):
@@ -81,7 +82,7 @@ def test_benchmark_atacseq_aln(self):
         print(res)
         assert 'aws' in res
         assert 'recommended_instance_type' in res['aws']
-        assert res['aws']['recommended_instance_type'] == 'c5.xlarge'
+        assert res['aws']['recommended_instance_type'] == 'c6a.xlarge'
 
     def test_benchmark_atacseq_postaln(self):
         print("testing atacseq-postaln")
@@ -91,7 +92,7 @@ def test_benchmark_atacseq_postaln(self):
         print(res)
         assert 'aws' in res
         assert 'recommended_instance_type' in res['aws']
-        assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
+        assert res['aws']['recommended_instance_type'] == 'c6a.4xlarge'
 
     def test_benchmark_atacseq(self):
         print("testing atacseq")
@@ -100,9 +101,10 @@ def test_benchmark_atacseq(self):
         res = B.benchmark('encode-atacseq',
                           {'input_size_in_bytes': input_sizes,
                            'parameters': {'atac.bowtie2.cpu': 4}})
+        print(res)
         assert 'aws' in res
         assert 'recommended_instance_type' in res['aws']
-        assert res['aws']['recommended_instance_type'] == 't3.2xlarge'
+        assert res['aws']['recommended_instance_type'] == 'c6a.2xlarge'
         assert res['min_CPU'] == 6
         assert int(res['total_size_in_GB']) == 55
 
@@ -113,9 +115,10 @@ def test_benchmark_chipseq_aln_chip(self):
         res = B.benchmark('encode-chipseq-aln-chip',
                           {'input_size_in_bytes': input_sizes,
                            'parameters': {'chip.bwa.cpu': 16}})
+        print(res)
         assert 'aws' in res
         assert 'recommended_instance_type' in res['aws']
-        assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
+        assert res['aws']['recommended_instance_type'] == 'c6a.4xlarge'
 
     def test_benchmark_chipseq_aln_ctl(self):
         print("testing chipseq")
@@ -124,9 +127,10 @@ def test_benchmark_chipseq_aln_ctl(self):
         res = B.benchmark('encode-chipseq-aln-ctl',
                           {'input_size_in_bytes': input_sizes,
                            'parameters': {'chip.bwa_ctl.cpu': 16}})
+        print(res)
         assert 'aws' in res
         assert 'recommended_instance_type' in res['aws']
-        assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
+        assert res['aws']['recommended_instance_type'] == 'c6a.4xlarge'
 
     def test_benchmark_chipseq_postaln(self):
         print("testing chipseq")
@@ -136,9 +140,10 @@ def test_benchmark_chipseq_postaln(self):
         res = B.benchmark('encode-chipseq-postaln',
                           {'input_size_in_bytes': input_sizes,
                            'parameters': {'chip.spp_cpu': 4}})
+        print(res)
         assert 'aws' in res
         assert 'recommended_instance_type' in res['aws']
-        assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
+        assert res['aws']['recommended_instance_type'] == 'c6a.4xlarge'
 
     def test_benchmark_chipseq_postaln2(self):
         print("testing chipseq")
@@ -148,6 +153,7 @@ def test_benchmark_chipseq_postaln2(self):
         res = B.benchmark('encode-chipseq-postaln',
                           {'input_size_in_bytes': input_sizes,
                            'parameters': {'chip.spp_cpu': 4, 'chip.pipeline_type': 'tf'}})
+        print(res)
         assert 'aws' in res
         assert 'recommended_instance_type' in res['aws']
         assert res['aws']['recommended_instance_type'] == 'r5a.4xlarge'
@@ -162,7 +168,7 @@ def test_benchmark_chipseq(self):
         print(res)
         assert 'aws' in res
         assert 'recommended_instance_type' in res['aws']
-        assert res['aws']['recommended_instance_type'] == 'c5.4xlarge'
+        assert res['aws']['recommended_instance_type'] == 'c6a.4xlarge'
 
     def test_benchmark1(self):
         res = B.benchmark('md5',
@@ -202,24 +208,26 @@ def test_benchmark3(self):
         print(res)
 
     def test_benchmark4(self):
+        print("pairsam_parse")
         res = B.benchmark('pairsam-parse-sort',
                           {'input_size_in_bytes': {'bam': 1000000000},
                            'parameters': {'nThreads': 16}})
+        print(res)
         assert 'aws' in res
         assert 'recommended_instance_type' in res['aws']
-        assert res['aws']['recommended_instance_type'] == 'c5.9xlarge'
-        print(res)
+        assert res['aws']['recommended_instance_type'] == 'c6a.8xlarge'
 
     def test_benchmark5(self):
+        print("pairsam-merge")
         input_json = {'input_size_in_bytes': {'input_pairsams': [1000000000,
                                                                  2000000000,
                                                                  3000000000]},
                       'parameters': {'nThreads': 32}}
         res = B.benchmark('pairsam-merge', input_json)
+        print(res)
         assert 'aws' in res
         assert 'recommended_instance_type' in res['aws']
-        assert res['aws']['recommended_instance_type'] == 'c5.9xlarge'
-        print(res)
+        assert res['aws']['recommended_instance_type'] == 'c6a.8xlarge'
 
     def test_benchmark6(self):
         input_json = {'input_size_in_bytes': {'input_pairsam': 1000000000}}
@@ -285,7 +293,7 @@ def test_benchmark12(self):
         print(res)
         assert 'aws' in res
         assert 'recommended_instance_type' in res['aws']
-        assert res['aws']['recommended_instance_type'] == 't3.2xlarge'
+        assert res['aws']['recommended_instance_type'] == 'r5a.2xlarge'
         assert res['min_CPU'] == 8
 
     def test_benchmark13(self):