diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..36c09e4 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,45 @@ +name: Benchmark + +on: + push: + branches: [ main ] + pull_request: + +jobs: + benchmark: + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + - name: Install juliaup + uses: julia-actions/install-juliaup@v2.1.2 + with: + channel: '1' + - name: Update Julia registry + shell: julia --project=. --color=yes {0} + run: | + using Pkg + Pkg.Registry.update() + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: 3.9 + - name: Install dependencies + run: | + pip install -e .[test] # to put juliapkg.json in sys.path + python -c 'import juliacall' # force install of all deps + - name: Benchmark + run: | + pytest -n 0 benchmark/benchmark.py --benchmark-json=benchmark/output.json + - name: Store benchmark result + uses: benchmark-action/github-action-benchmark@v1 + with: + name: Python Benchmark with pytest-benchmark + tool: 'pytest' + output-file-path: benchmark/output.json + github-token: ${{ secrets.GITHUB_TOKEN }} + auto-push: true + # Show alert with commit comment on detecting possible performance regression + alert-threshold: '200%' + comment-on-alert: true + fail-on-alert: true diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py new file mode 100644 index 0000000..d8894ed --- /dev/null +++ b/benchmark/benchmark.py @@ -0,0 +1,151 @@ +import numpy as np +import pytest +from braket.devices import LocalSimulator +from braket.ir.openqasm import Program + +# always the same for repeatability +np.random.seed(0x1C2C6D66) + +batch_size = (10, 100) +n_qubits = range(3, 16) +exact_shots_results = ( + "state_vector", + "density_matrix q[0], q[1]", + "probability", + "expectation z(q[0])", + "variance y(q[0])", +) +nonzero_shots_results = ( + "probability", + "expectation z(q[0])", + "variance y(q[0])", + "sample z(q[0])", +) + + +def ghz(nq: int, result_type: str): + source = f"OPENQASM 3.0;\nqubit[{nq}] q;\nh q[0];\n" + for q in range(1, nq - 1): + source += f"cnot q[0], q[{q}];\n" + + source += f"#pragma braket result {result_type}\n" + return source + + +def qft(nq: int, result_type: str): + source = f"OPENQASM 3.0;\nqubit[{nq}] q;\n" + for q in range(nq - 1): + angle = np.pi / 2.0 + source += f"h q[{q}];\n" + for ctrl_q in range(q + 1, nq - 1): + source += f"cphaseshift({angle}) q[{ctrl_q}], q[{q}];\n" + angle /= 2.0 + + source += f"#pragma braket result {result_type}\n" + return source + + +def run_sim(oq3_prog, sim, shots): + sim.run(oq3_prog, shots=shots) + return + + +def run_sim_batch(oq3_prog, sim, shots): + sim.run_batch(oq3_prog, shots=shots) + return + + +device_ids = ("braket_sv", "braket_sv_v2", "braket_dm", "braket_dm_v2") + +generators = (ghz, qft) + + +@pytest.mark.parametrize("device_id", device_ids) +@pytest.mark.parametrize("nq", n_qubits) +@pytest.mark.parametrize("exact_results", exact_shots_results) +@pytest.mark.parametrize("circuit", generators) +def test_exact_shots(benchmark, device_id, nq, exact_results, circuit): + if device_id in ("braket_dm_v2", "braket_dm") and ( + exact_results in ("state_vector",) or nq > 10 + ): + pytest.skip() + if ( + device_id in ("braket_sv",) + and exact_results in ("density_matrix q[0], q[1]",) + and nq >= 17 + ): + pytest.skip() + result_type = exact_results + oq3_prog = Program(source=circuit(nq, result_type)) + sim = LocalSimulator(device_id) + benchmark.pedantic(run_sim, args=(oq3_prog, sim, 0), iterations=5, warmup_rounds=1) + + +@pytest.mark.parametrize("device_id", device_ids) +@pytest.mark.parametrize("nq", n_qubits) +@pytest.mark.parametrize("batch_size", batch_size) +@pytest.mark.parametrize("exact_results", exact_shots_results) +@pytest.mark.parametrize("circuit", generators) +def test_exact_shots_batched( + benchmark, device_id, nq, batch_size, exact_results, circuit +): + if device_id in ("braket_dm_v2", "braket_dm") and ( + exact_results in ("state_vector,") or nq >= 5 + ): + pytest.skip() + if nq >= 10: + pytest.skip() + # skip all for now as this is very expensive + pytest.skip() + result_type = exact_results + oq3_prog = [Program(source=circuit(nq, result_type)) for _ in range(batch_size)] + sim = LocalSimulator(device_id) + benchmark.pedantic( + run_sim_batch, args=(oq3_prog, sim, 0), iterations=5, warmup_rounds=1 + ) + + +shots = (100,) + + +@pytest.mark.parametrize("device_id", device_ids) +@pytest.mark.parametrize("nq", n_qubits) +@pytest.mark.parametrize("shots", shots) +@pytest.mark.parametrize("nonzero_shots_results", nonzero_shots_results) +@pytest.mark.parametrize("circuit", generators) +def test_nonzero_shots(benchmark, device_id, nq, shots, nonzero_shots_results, circuit): + if device_id in ("braket_dm_v2", "braket_dm") and nq > 10: + pytest.skip() + result_type = nonzero_shots_results + oq3_prog = Program(source=circuit(nq, result_type)) + sim = LocalSimulator(device_id) + benchmark.pedantic( + run_sim, args=(oq3_prog, sim, shots), iterations=5, warmup_rounds=1 + ) + del sim + + +@pytest.mark.parametrize("device_id", device_ids) +@pytest.mark.parametrize("nq", n_qubits) +@pytest.mark.parametrize("batch_size", batch_size) +@pytest.mark.parametrize("shots", shots) +@pytest.mark.parametrize("nonzero_shots_results", nonzero_shots_results) +@pytest.mark.parametrize("circuit", generators) +def test_nonzero_shots_batched( + benchmark, device_id, nq, batch_size, shots, nonzero_shots_results, circuit +): + if device_id in ("braket_dm_v2", "braket_dm") and nq >= 5: + pytest.skip() + if nq >= 10: + pytest.skip() + + # skip all for now as this is very expensive + pytest.skip() + + result_type = nonzero_shots_results + oq3_prog = [Program(source=circuit(nq, result_type)) for _ in range(batch_size)] + sim = LocalSimulator(device_id) + benchmark.pedantic( + run_sim_batch, args=(oq3_prog, sim, shots), iterations=5, warmup_rounds=1 + ) + del sim diff --git a/requirements.txt b/requirements.txt index 40f36cd..83c0e17 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -juliacall==0.9.22 +juliacall==0.9.23 numpy amazon-braket-schemas>=1.20.2 amazon-braket-sdk>=1.83.0 diff --git a/src/braket/juliapkg.json b/src/braket/juliapkg.json index 9ad41c3..407b497 100644 --- a/src/braket/juliapkg.json +++ b/src/braket/juliapkg.json @@ -1,9 +1,9 @@ { - "julia": "1.9", + "julia": "1.10", "packages": { "BraketSimulator": { "uuid": "76d27892-9a0b-406c-98e4-7c178e9b3dff", - "version": "0.0.4" + "version": "0.0.5" }, "JSON3": { "uuid": "0f8b85d8-7281-11e9-16c2-39a750bddbf1", diff --git a/src/braket/simulator_v2/base_simulator_v2.py b/src/braket/simulator_v2/base_simulator_v2.py index 5ab4bc8..faf0799 100644 --- a/src/braket/simulator_v2/base_simulator_v2.py +++ b/src/braket/simulator_v2/base_simulator_v2.py @@ -26,7 +26,7 @@ def setup_julia(): # don't reimport if we don't have to if "juliacall" in sys.modules: os.environ["PYTHON_JULIACALL_HANDLE_SIGNALS"] = "yes" - return sys.modules["juliacall"].Main + return else: for k, default in ( ("PYTHON_JULIACALL_HANDLE_SIGNALS", "yes"), @@ -40,40 +40,19 @@ def setup_julia(): import juliacall jl = juliacall.Main - jl.seval("using JSON3, BraketSimulator") - sv_stock_oq3 = """ - OPENQASM 3.0; - input float theta; - qubit[2] q; - h q[0]; - cnot q; - x q[0]; - xx(theta) q; - yy(theta) q; - zz(theta) q; - #pragma braket result expectation z(q[0]) - """ - dm_stock_oq3 = """ + jl.seval("using BraketSimulator, JSON3") + stock_oq3 = """ OPENQASM 3.0; - input float theta; qubit[2] q; h q[0]; - x q[0]; + cphaseshift(1.5707963267948966) q[1], q[0]; cnot q; - xx(theta) q; - yy(theta) q; - zz(theta) q; #pragma braket noise bit_flip(0.1) q[0] + #pragma braket result variance y(q[0]) + #pragma braket result density_matrix q[0], q[1] #pragma braket result probability """ - r = jl.BraketSimulator.simulate( - "braket_sv_v2", sv_stock_oq3, '{"theta": 0.1}', 0 - ) - jl.JSON3.write(r) - r = jl.BraketSimulator.simulate( - "braket_dm_v2", dm_stock_oq3, '{"theta": 0.1}', 0 - ) - jl.JSON3.write(r) + jl.BraketSimulator.simulate("braket_dm_v2", stock_oq3, "{}", 0) return @@ -86,6 +65,29 @@ def setup_pool(): return +def _handle_mmaped_result(raw_result, mmap_paths, obj_lengths): + result = GateModelTaskResult(**raw_result) + if mmap_paths: + mmap_files = mmap_paths + array_lens = obj_lengths + mmap_index = 0 + for result_ind, result_type in enumerate(result.resultTypes): + if not result_type.value: + d_type = ( + np.complex128 + if isinstance(result_type.type, (DensityMatrix, StateVector)) + else np.float64 + ) + result.resultTypes[result_ind].value = np.memmap( + mmap_files[mmap_index], + dtype=d_type, + mode="r", + shape=(array_lens[mmap_index],), + ) + mmap_index += 1 + return result + + class BaseLocalSimulatorV2(BaseLocalSimulator): def __init__(self, device: str): global __JULIA_POOL__ @@ -126,8 +128,8 @@ def run_openqasm( except Exception as e: _handle_julia_error(e) - result = GateModelTaskResult(**json.loads(jl_result)) - jl_result = None + loaded_result = json.loads(jl_result[0]) + result = _handle_mmaped_result(loaded_result, jl_result[1], jl_result[2]) result.additionalMetadata.action = openqasm_ir # attach the result types @@ -165,8 +167,15 @@ def run_multiple( except Exception as e: _handle_julia_error(e) + loaded_result = json.loads(jl_results[0]) + paths_and_lens = json.loads(jl_results[1]) + results_paths_lens = [ + (loaded_result[r_ix], paths_and_lens[r_ix][0], paths_and_lens[r_ix][1]) + for r_ix in range(len(loaded_result)) + ] results = [ - GateModelTaskResult(**json.loads(jl_result)) for jl_result in jl_results + _handle_mmaped_result(*result_path_len) + for result_path_len in results_paths_lens ] jl_results = None for p_ix, program in enumerate(programs): @@ -204,9 +213,9 @@ def reconstruct_complex(v): } if isinstance(result_type.type, StateVector): val = task_result.resultTypes[result_ind].value - # complex are stored as tuples of reals - fixed_val = [reconstruct_complex(v) for v in val] - task_result.resultTypes[result_ind].value = np.asarray(fixed_val) + if isinstance(val, list): + fixed_val = [reconstruct_complex(v) for v in val] + task_result.resultTypes[result_ind].value = np.asarray(fixed_val) if isinstance(result_type.type, DensityMatrix): val = task_result.resultTypes[result_ind].value # complex are stored as tuples of reals diff --git a/src/braket/simulator_v2/julia_workers.py b/src/braket/simulator_v2/julia_workers.py index 6189f7d..f85289e 100644 --- a/src/braket/simulator_v2/julia_workers.py +++ b/src/braket/simulator_v2/julia_workers.py @@ -8,6 +8,7 @@ def _handle_julia_error(error): # in case juliacall isn't loaded + print(error) if type(error).__name__ == "JuliaError": python_exception = getattr(error.exception, "alternate_type", None) if python_exception is None: @@ -29,18 +30,20 @@ def translate_and_run( device_id: str, openqasm_ir: OpenQASMProgram, shots: int = 0 ) -> str: jl = sys.modules["juliacall"].Main - jl_shots = shots + jl.GC.enable(False) jl_inputs = json.dumps(openqasm_ir.inputs) if openqasm_ir.inputs else "{}" try: result = jl.BraketSimulator.simulate( device_id, openqasm_ir.source, jl_inputs, - jl_shots, + shots, ) except Exception as e: _handle_julia_error(e) + finally: + jl.GC.enable(True) return result