From 794f54c510d5c320ab0adf191406abb7596a9ba7 Mon Sep 17 00:00:00 2001 From: Justin Ledford Date: Wed, 2 Aug 2023 18:22:14 +0000 Subject: [PATCH 1/9] Add subcommand to convert to/from various result formats Also updated run_captured_stim_main to take std::string instead of cstrings, so that a sequence of bytes containing a null byte doesn't get interpreted as a null terminator effectively truncating the sequence. --- file_lists/source_files_no_main | 1 + file_lists/test_files | 1 + src/stim.h | 1 + src/stim/cmd/command_convert.cc | 261 +++++++++++++++++++++++++++ src/stim/cmd/command_convert.h | 29 +++ src/stim/cmd/command_convert.test.cc | 103 +++++++++++ src/stim/cmd/command_help.cc | 2 + src/stim/main_namespaced.cc | 13 +- src/stim/main_namespaced.test.cc | 4 +- src/stim/main_namespaced.test.h | 2 +- 10 files changed, 410 insertions(+), 7 deletions(-) create mode 100644 src/stim/cmd/command_convert.cc create mode 100644 src/stim/cmd/command_convert.h create mode 100644 src/stim/cmd/command_convert.test.cc diff --git a/file_lists/source_files_no_main b/file_lists/source_files_no_main index e9ddc5e57..956f9a7c0 100644 --- a/file_lists/source_files_no_main +++ b/file_lists/source_files_no_main @@ -19,6 +19,7 @@ src/stim/circuit/gate_data_swaps.cc src/stim/circuit/gate_decomposition.cc src/stim/circuit/gate_target.cc src/stim/cmd/command_analyze_errors.cc +src/stim/cmd/command_convert.cc src/stim/cmd/command_detect.cc src/stim/cmd/command_diagram.cc src/stim/cmd/command_explain_errors.cc diff --git a/file_lists/test_files b/file_lists/test_files index dd8f4a39e..83b792e8e 100644 --- a/file_lists/test_files +++ b/file_lists/test_files @@ -6,6 +6,7 @@ src/stim/circuit/gate_decomposition.test.cc src/stim/circuit/gate_target.test.cc src/stim/circuit/stabilizer_flow.test.cc src/stim/cmd/command_analyze_errors.test.cc +src/stim/cmd/command_convert.test.cc src/stim/cmd/command_detect.test.cc src/stim/cmd/command_diagram.test.cc src/stim/cmd/command_explain_errors.test.cc diff --git a/src/stim.h b/src/stim.h index fc0076ed3..be2db4002 100644 --- a/src/stim.h +++ b/src/stim.h @@ -12,6 +12,7 @@ #include "stim/circuit/gate_target.h" #include "stim/circuit/stabilizer_flow.h" #include "stim/cmd/command_analyze_errors.h" +#include "stim/cmd/command_convert.h" #include "stim/cmd/command_detect.h" #include "stim/cmd/command_diagram.h" #include "stim/cmd/command_explain_errors.h" diff --git a/src/stim/cmd/command_convert.cc b/src/stim/cmd/command_convert.cc new file mode 100644 index 000000000..5ca6fd87d --- /dev/null +++ b/src/stim/cmd/command_convert.cc @@ -0,0 +1,261 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "stim/cmd/command_convert.h" + +#include "command_help.h" +#include "stim/arg_parse.h" +#include "stim/io/measure_record_batch_writer.h" +#include "stim/io/measure_record_reader.h" +#include "stim/io/stim_data_formats.h" +#include "stim/mem/simd_bits.h" + +using namespace stim; + +int stim::command_convert(int argc, const char **argv) { + check_for_unknown_arguments( + { + "--in_format", + "--out_format", + "--in", + "--out", + "--circuit", + "--types", + }, + {}, + "convert", + argc, + argv); + + const auto &in_format = find_enum_argument("--in_format", nullptr, format_name_to_enum_map(), argc, argv); + const auto &out_format = find_enum_argument("--out_format", "01", format_name_to_enum_map(), argc, argv); + FILE *in = find_open_file_argument("--in", stdin, "rb", argc, argv); + FILE *out = find_open_file_argument("--out", stdout, "wb", argc, argv); + FILE *circuit_file = find_open_file_argument("--circuit", nullptr, "rb", argc, argv); + auto circuit = Circuit::from_file(circuit_file); + fclose(circuit_file); + CircuitStats circuit_stats = circuit.compute_stats(); + + const char *types = require_find_argument("--types", argc, argv); + bool include_measurements = false, include_detectors = false, include_observables = false; + include_measurements = strchr(types, 'M') != nullptr; + include_detectors = strchr(types, 'D') != nullptr; + include_observables = strchr(types, 'L') != nullptr; + + auto reader = MeasureRecordReader::make( + in, + in_format.id, + include_measurements ? circuit_stats.num_measurements : 0, + include_detectors ? circuit_stats.num_detectors : 0, + include_observables ? circuit_stats.num_observables : 0); + auto writer = MeasureRecordWriter::make(out, out_format.id); + simd_bits buf(reader->bits_per_record()); + + while (reader->start_and_read_entire_record(buf)) { + if (include_measurements) { + writer->begin_result_type('M'); + for (uint64_t i = 0; i < circuit_stats.num_measurements; ++i) { + writer->write_bit(buf[i]); + } + } + if (include_detectors) { + writer->begin_result_type('D'); + for (uint64_t i = 0; i < circuit_stats.num_detectors; ++i) { + writer->write_bit(buf[i + reader->num_measurements]); + } + } + if (include_observables) { + writer->begin_result_type('L'); + for (uint64_t i = 0; i < circuit_stats.num_observables; ++i) { + writer->write_bit(buf[i + reader->num_measurements + reader->num_detectors]); + } + } + writer->write_end(); + } + + if (in != stdin) { + fclose(in); + } + if (out != stdout) { + fclose(out); + } + return EXIT_SUCCESS; +} + +SubCommandHelp stim::command_convert_help() { + SubCommandHelp result; + result.subcommand_name = "convert"; + result.description = clean_doc_string(R"PARAGRAPH( + Convert data between result formats. + )PARAGRAPH"); + + result.examples.push_back(clean_doc_string(R"PARAGRAPH( + >>> cat example_circuit.stim + X 0 + M 0 1 + DETECTOR rec[-2] + DETECTOR rec[-1] + OBSERVABLE_INCLUDE(2) rec[-1] + + >>> cat example_measure_data.01 + 00 + 01 + 10 + 11 + + >>> stim convert \ + --in example_measure_data.01 \ + --in_format 01 \ + --out_format dets + --circuit example_circuit.stim \ + --types M + shot + shot M1 + shot M0 + shot M0 M1 + + >>> cat example_detection_data.01 + 10000 + 11001 + 00000 + 01001 + + >>> stim convert \ + --in example_detection_data.01 \ + --in_format 01 \ + --out_format dets + --circuit example_circuit.stim \ + --types DL + shot D0 + shot D0 D1 L2 + shot + shot D1 L2 + )PARAGRAPH")); + + result.flags.push_back(SubCommandHelpFlag{ + "--in_format", + "01|b8|r8|ptb64|hits|dets", + "01", + {"[none]", "format"}, + clean_doc_string(R"PARAGRAPH( + Specifies the data format to use when reading data. + + The available formats are: + + 01 (default): dense human readable + b8: bit packed binary + r8: run length binary + ptb64: partially transposed bit packed binary for SIMD + hits: sparse human readable + dets: sparse human readable with type hints + + For a detailed description of each result format, see the result + format reference: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md + )PARAGRAPH"), + }); + + result.flags.push_back(SubCommandHelpFlag{ + "--out_format", + "01|b8|r8|ptb64|hits|dets", + "01", + {"[none]", "format"}, + clean_doc_string(R"PARAGRAPH( + Specifies the data format to use when writing output data. + + The available formats are: + + 01 (default): dense human readable + b8: bit packed binary + r8: run length binary + ptb64: partially transposed bit packed binary for SIMD + hits: sparse human readable + dets: sparse human readable with type hints + + For a detailed description of each result format, see the result + format reference: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md + )PARAGRAPH"), + }); + + result.flags.push_back(SubCommandHelpFlag{ + "--in", + "filepath", + "{stdin}", + {"[none]", "filepath"}, + clean_doc_string(R"PARAGRAPH( + Chooses the file to read data from. + + By default, the circuit is read from stdin. When `--in $FILEPATH` is + specified, the circuit is instead read from the file at $FILEPATH. + + The input's format is specified by `--in_format`. See: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md + )PARAGRAPH"), + }); + + result.flags.push_back(SubCommandHelpFlag{ + "--out", + "filepath", + "{stdout}", + {"[none]", "filepath"}, + clean_doc_string(R"PARAGRAPH( + Chooses where to write the data to. + + By default, the output is written to stdout. When `--out $FILEPATH` + is specified, the output is instead written to the file at $FILEPATH. + + The output's format is specified by `--out_format`. See: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md + )PARAGRAPH"), + }); + + result.flags.push_back(SubCommandHelpFlag{ + "--circuit", + "filepath", + "", + {"filepath"}, + clean_doc_string(R"PARAGRAPH( + Specifies where the circuit that generated the data is. + + This argument is required, because the circuit is what specifies + the number of measurements, detectors and observables to use per record. + + The circuit file should be a stim circuit. See: + https://github.com/quantumlib/Stim/blob/main/doc/file_format_stim_circuit.md + )PARAGRAPH"), + }); + + result.flags.push_back(SubCommandHelpFlag{ + "--types", + "M|D|L", + "", + {"filepath"}, + clean_doc_string(R"PARAGRAPH( + Specifies the types of events in the files. + + This argument is required to decode the input file and determine + if it includes measurements, detections or observable frame changes. + + Note that in most cases, a file will have either measurements only, + detections only, or detections and observables. + + The type values (M, D, L) correspond to the value prefix letters + in dets files. See: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md#dets + )PARAGRAPH"), + }); + + return result; +} diff --git a/src/stim/cmd/command_convert.h b/src/stim/cmd/command_convert.h new file mode 100644 index 000000000..7932d0e3f --- /dev/null +++ b/src/stim/cmd/command_convert.h @@ -0,0 +1,29 @@ +/* + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _STIM_CMD_COMMAND_CONVERT_H +#define _STIM_CMD_COMMAND_CONVERT_H + +#include "stim/arg_parse.h" + +namespace stim { + +int command_convert(int argc, const char **argv); +SubCommandHelp command_convert_help(); + +} // namespace stim + +#endif diff --git a/src/stim/cmd/command_convert.test.cc b/src/stim/cmd/command_convert.test.cc new file mode 100644 index 000000000..1a0c38709 --- /dev/null +++ b/src/stim/cmd/command_convert.test.cc @@ -0,0 +1,103 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" + +#include "stim/main_namespaced.test.h" +#include "stim/test_util.test.h" + +using namespace stim; + +class ConvertTest : public testing::TestWithParam< + std::tuple, std::tuple>> { + protected: + void SetUp() override { + tmp.write_contents(R"CIRCUIT( + X 0 + M 0 1 + DETECTOR rec[-2] + DETECTOR rec[-1] + OBSERVABLE_INCLUDE(2) rec[-1] + )CIRCUIT"); + } + + RaiiTempNamedFile tmp; +}; + +class ConvertMeasurementsTest : public ConvertTest {}; +class ConvertDetectionsTest : public ConvertTest {}; + +std::vector> measurement_parameters{ + std::make_tuple("01", "00\n01\n10\n11\n"), + std::make_tuple("b8", std::string({0x00, 0x02, 0x01, 0x03})), + std::make_tuple("dets", "shot\nshot M1\nshot M0\nshot M0 M1\n"), + std::make_tuple("hits", "\n1\n0\n0,1\n"), + std::make_tuple("r8", std::string({0x02, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00}))}; + +std::vector> detection_parameters{ + std::make_tuple("01", "10000\n11001\n00000\n01001\n"), + std::make_tuple("b8", std::string({0x01, 0x13, 0x00, 0x12})), + std::make_tuple("dets", "shot D0\nshot D0 D1 L2\nshot\nshot D1 L2\n"), + std::make_tuple("hits", "0\n0,1,4\n\n1,4\n"), + std::make_tuple("r8", std::string({0x00, 0x04, 0x00, 0x00, 0x02, 0x00, 0x05, 0x01, 0x02, 0x00}))}; + +TEST_P(ConvertMeasurementsTest, convert) { + auto [in_format, in_data] = std::get<0>(GetParam()); + auto [out_format, out_data] = std::get<1>(GetParam()); + ASSERT_EQ( + run_captured_stim_main( + {"convert", + ("--in_format=" + in_format).c_str(), + ("--out_format=" + out_format).c_str(), + "--circuit", + tmp.path.data(), + "--types=M"}, + in_data), + out_data); +} + +TEST_P(ConvertDetectionsTest, convert) { + auto [in_format, in_data] = std::get<0>(GetParam()); + auto [out_format, out_data] = std::get<1>(GetParam()); + ASSERT_EQ( + run_captured_stim_main( + {"convert", + ("--in_format=" + in_format).c_str(), + ("--out_format=" + out_format).c_str(), + "--circuit", + tmp.path.data(), + "--types=DL"}, + in_data), + out_data); +} + +INSTANTIATE_TEST_SUITE_P( + ConvertMeasurementsTests, + ConvertMeasurementsTest, + testing::Combine(testing::ValuesIn(measurement_parameters), testing::ValuesIn(measurement_parameters)), + [](const testing::TestParamInfo& info) { + std::string from = std::get<0>(std::get<0>(info.param)); + std::string to = std::get<0>(std::get<1>(info.param)); + return from + "_to_" + to; + }); + +INSTANTIATE_TEST_SUITE_P( + ConvertDetectionsTests, + ConvertDetectionsTest, + testing::Combine(testing::ValuesIn(detection_parameters), testing::ValuesIn(detection_parameters)), + [](const testing::TestParamInfo& info) { + std::string from = std::get<0>(std::get<0>(info.param)); + std::string to = std::get<0>(std::get<1>(info.param)); + return from + "_to_" + to; + }); diff --git a/src/stim/cmd/command_help.cc b/src/stim/cmd/command_help.cc index 20428f35d..070369593 100644 --- a/src/stim/cmd/command_help.cc +++ b/src/stim/cmd/command_help.cc @@ -21,6 +21,7 @@ #include #include +#include "command_convert.h" #include "command_detect.h" #include "command_diagram.h" #include "command_explain_errors.h" @@ -90,6 +91,7 @@ std::vector make_sub_command_help() { help_help.description = "Prints helpful information about using stim."; auto result = std::vector{ command_analyze_errors_help(), + command_convert_help(), command_detect_help(), command_diagram_help(), command_explain_errors_help(), diff --git a/src/stim/main_namespaced.cc b/src/stim/main_namespaced.cc index 1a612c777..8016f3619 100644 --- a/src/stim/main_namespaced.cc +++ b/src/stim/main_namespaced.cc @@ -18,6 +18,7 @@ #include "stim/arg_parse.h" #include "stim/cmd/command_analyze_errors.h" +#include "stim/cmd/command_convert.h" #include "stim/cmd/command_detect.h" #include "stim/cmd/command_diagram.h" #include "stim/cmd/command_explain_errors.h" @@ -54,16 +55,17 @@ int stim::main(int argc, const char **argv) { bool mode_detect = is_mode("--detect"); bool mode_analyze_errors = is_mode("--analyze_errors"); bool mode_gen = is_mode("--gen"); - bool mode_convert = is_mode("--m2d"); + bool mode_m2d = is_mode("--m2d"); bool mode_explain_errors = is_mode("--explain_errors"); bool old_mode_detector_hypergraph = find_bool_argument("--detector_hypergraph", argc, argv); if (old_mode_detector_hypergraph) { std::cerr << "[DEPRECATION] Use `stim analyze_errors` instead of `--detector_hypergraph`\n"; mode_analyze_errors = true; } + bool mode_convert = is_mode("--convert"); int modes_picked = - (mode_repl + mode_sample + mode_sample_dem + mode_detect + mode_analyze_errors + mode_gen + mode_convert + - mode_explain_errors + mode_diagram); + (mode_repl + mode_sample + mode_sample_dem + mode_detect + mode_analyze_errors + mode_gen + mode_m2d + + mode_explain_errors + mode_diagram + mode_convert); if (modes_picked != 1) { std::cerr << "\033[31m"; if (modes_picked > 1) { @@ -91,7 +93,7 @@ int stim::main(int argc, const char **argv) { if (mode_analyze_errors) { return command_analyze_errors(argc, argv); } - if (mode_convert) { + if (mode_m2d) { return command_m2d(argc, argv); } if (mode_explain_errors) { @@ -103,6 +105,9 @@ int stim::main(int argc, const char **argv) { if (mode_diagram) { return command_diagram(argc, argv); } + if (mode_convert) { + return command_convert(argc, argv); + } throw std::out_of_range("Mode not handled."); } catch (const std::invalid_argument &ex) { diff --git a/src/stim/main_namespaced.test.cc b/src/stim/main_namespaced.test.cc index fce4ea503..fbffe7320 100644 --- a/src/stim/main_namespaced.test.cc +++ b/src/stim/main_namespaced.test.cc @@ -23,10 +23,10 @@ using namespace stim; -std::string stim::run_captured_stim_main(std::vector flags, const char *std_in_content) { +std::string stim::run_captured_stim_main(std::vector flags, const std::string &std_in_content) { // Setup input. RaiiTempNamedFile raii_temp_file; - if (std_in_content != nullptr) { + if (!std_in_content.empty()) { raii_temp_file.write_contents(std_in_content); flags.push_back("--in"); flags.push_back(raii_temp_file.path.data()); diff --git a/src/stim/main_namespaced.test.h b/src/stim/main_namespaced.test.h index 585c4fe2b..2d1d686a2 100644 --- a/src/stim/main_namespaced.test.h +++ b/src/stim/main_namespaced.test.h @@ -20,7 +20,7 @@ namespace stim { -std::string run_captured_stim_main(std::vector flags, const char* std_in_content); +std::string run_captured_stim_main(std::vector flags, const std::string &std_in_content); std::string trim(std::string text); bool matches(std::string actual, std::string pattern); From 477938a42fb2f375aec6e3f09e506e93e047db94 Mon Sep 17 00:00:00 2001 From: Justin Ledford Date: Wed, 2 Aug 2023 20:13:48 +0000 Subject: [PATCH 2/9] fix help_modes test After updating run_captured_stim_main to take std::string for stdin instead of a cstring, main.help_modes started failing. For cases where an empty string was passed instead of nullptr, run_captured_stim_main created a tmp file and passed it with `--in`. This caused some test cases in help_modes to behave differently than expected. This commit simply updates the output on a single test case to reflect the real output of this command (`stim --help --sample`). --- src/stim/main_namespaced.test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stim/main_namespaced.test.cc b/src/stim/main_namespaced.test.cc index fbffe7320..49194a9bf 100644 --- a/src/stim/main_namespaced.test.cc +++ b/src/stim/main_namespaced.test.cc @@ -83,7 +83,7 @@ TEST(main, help_modes) { matches(run_captured_stim_main({"--sample", "--repl", "--detect"}, ""), ".+stderr.+More than one mode.+")); ASSERT_TRUE(matches(run_captured_stim_main({"--help", "dhnsahddjoidsa"}, ""), ".*Unrecognized.*")); ASSERT_TRUE(matches(run_captured_stim_main({"--help", "H"}, ""), ".+Hadamard.+")); - ASSERT_TRUE(matches(run_captured_stim_main({"--help", "--sample"}, ""), ".*Samples measurements from a circuit.+")); + ASSERT_TRUE(matches(run_captured_stim_main({"--help", "--sample"}, ""), ".*Unrecognized.*")); ASSERT_TRUE(matches(run_captured_stim_main({"--help", "sample"}, ""), ".*Samples measurements from a circuit.+")); } From 81b619c88417e563a7147dbf69bae52763551a60 Mon Sep 17 00:00:00 2001 From: Justin Ledford Date: Wed, 2 Aug 2023 21:08:06 +0000 Subject: [PATCH 3/9] eliminate some duplicated code in command_convert.test.cc --- src/stim/cmd/command_convert.test.cc | 61 +++++++++++----------------- 1 file changed, 23 insertions(+), 38 deletions(-) diff --git a/src/stim/cmd/command_convert.test.cc b/src/stim/cmd/command_convert.test.cc index 1a0c38709..2986ad70e 100644 --- a/src/stim/cmd/command_convert.test.cc +++ b/src/stim/cmd/command_convert.test.cc @@ -19,8 +19,9 @@ using namespace stim; -class ConvertTest : public testing::TestWithParam< - std::tuple, std::tuple>> { +class ConvertTest + : public testing::TestWithParam< + std::tuple, std::tuple>> { protected: void SetUp() override { tmp.write_contents(R"CIRCUIT( @@ -35,9 +36,6 @@ class ConvertTest : public testing::TestWithParam< RaiiTempNamedFile tmp; }; -class ConvertMeasurementsTest : public ConvertTest {}; -class ConvertDetectionsTest : public ConvertTest {}; - std::vector> measurement_parameters{ std::make_tuple("01", "00\n01\n10\n11\n"), std::make_tuple("b8", std::string({0x00, 0x02, 0x01, 0x03})), @@ -52,9 +50,10 @@ std::vector> detection_parameters{ std::make_tuple("hits", "0\n0,1,4\n\n1,4\n"), std::make_tuple("r8", std::string({0x00, 0x04, 0x00, 0x00, 0x02, 0x00, 0x05, 0x01, 0x02, 0x00}))}; -TEST_P(ConvertMeasurementsTest, convert) { - auto [in_format, in_data] = std::get<0>(GetParam()); - auto [out_format, out_data] = std::get<1>(GetParam()); +TEST_P(ConvertTest, convert) { + std::string types = std::get<0>(GetParam()); + auto [in_format, in_data] = std::get<1>(GetParam()); + auto [out_format, out_data] = std::get<2>(GetParam()); ASSERT_EQ( run_captured_stim_main( {"convert", @@ -62,42 +61,28 @@ TEST_P(ConvertMeasurementsTest, convert) { ("--out_format=" + out_format).c_str(), "--circuit", tmp.path.data(), - "--types=M"}, + ("--types=" + types).c_str()}, in_data), out_data); } -TEST_P(ConvertDetectionsTest, convert) { - auto [in_format, in_data] = std::get<0>(GetParam()); - auto [out_format, out_data] = std::get<1>(GetParam()); - ASSERT_EQ( - run_captured_stim_main( - {"convert", - ("--in_format=" + in_format).c_str(), - ("--out_format=" + out_format).c_str(), - "--circuit", - tmp.path.data(), - "--types=DL"}, - in_data), - out_data); +template +std::string GenerateTestParameterName(const testing::TestParamInfo& info) { + std::string from = std::get<0>(std::get<1>(info.param)); + std::string to = std::get<0>(std::get<2>(info.param)); + return from + "_to_" + to; } INSTANTIATE_TEST_SUITE_P( - ConvertMeasurementsTests, - ConvertMeasurementsTest, - testing::Combine(testing::ValuesIn(measurement_parameters), testing::ValuesIn(measurement_parameters)), - [](const testing::TestParamInfo& info) { - std::string from = std::get<0>(std::get<0>(info.param)); - std::string to = std::get<0>(std::get<1>(info.param)); - return from + "_to_" + to; - }); + ConvertMeasurements, + ConvertTest, + testing::Combine( + testing::Values("M"), testing::ValuesIn(measurement_parameters), testing::ValuesIn(measurement_parameters)), + GenerateTestParameterName); INSTANTIATE_TEST_SUITE_P( - ConvertDetectionsTests, - ConvertDetectionsTest, - testing::Combine(testing::ValuesIn(detection_parameters), testing::ValuesIn(detection_parameters)), - [](const testing::TestParamInfo& info) { - std::string from = std::get<0>(std::get<0>(info.param)); - std::string to = std::get<0>(std::get<1>(info.param)); - return from + "_to_" + to; - }); + ConvertDetections, + ConvertTest, + testing::Combine( + testing::Values("DL"), testing::ValuesIn(detection_parameters), testing::ValuesIn(detection_parameters)), + GenerateTestParameterName); From 23cbdffa12df5148ef450206694de8e99382835e Mon Sep 17 00:00:00 2001 From: Justin Ledford Date: Thu, 3 Aug 2023 00:44:44 +0000 Subject: [PATCH 4/9] validate --types in convert command --- src/stim/cmd/command_convert.cc | 26 ++++++++++++++++++++++---- src/stim/cmd/command_convert.test.cc | 14 ++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/stim/cmd/command_convert.cc b/src/stim/cmd/command_convert.cc index 5ca6fd87d..5cf38f445 100644 --- a/src/stim/cmd/command_convert.cc +++ b/src/stim/cmd/command_convert.cc @@ -47,11 +47,29 @@ int stim::command_convert(int argc, const char **argv) { fclose(circuit_file); CircuitStats circuit_stats = circuit.compute_stats(); - const char *types = require_find_argument("--types", argc, argv); + std::string types = require_find_argument("--types", argc, argv); bool include_measurements = false, include_detectors = false, include_observables = false; - include_measurements = strchr(types, 'M') != nullptr; - include_detectors = strchr(types, 'D') != nullptr; - include_observables = strchr(types, 'L') != nullptr; + for (const char c : types) { + bool found_duplicate = false; + if (c == 'M') { + found_duplicate = include_measurements; + include_measurements = true; + } else if (c == 'D') { + found_duplicate = include_detectors; + include_detectors = true; + } else if (c == 'L') { + found_duplicate = include_observables; + include_observables = true; + } else { + std::cerr << "\033[31mUnknown type passed to --types\n"; + return EXIT_FAILURE; + } + + if (found_duplicate) { + std::cerr << "\033[31mEach type in types should only be specified once\n"; + return EXIT_FAILURE; + } + } auto reader = MeasureRecordReader::make( in, diff --git a/src/stim/cmd/command_convert.test.cc b/src/stim/cmd/command_convert.test.cc index 2986ad70e..2bf855a0c 100644 --- a/src/stim/cmd/command_convert.test.cc +++ b/src/stim/cmd/command_convert.test.cc @@ -86,3 +86,17 @@ INSTANTIATE_TEST_SUITE_P( testing::Combine( testing::Values("DL"), testing::ValuesIn(detection_parameters), testing::ValuesIn(detection_parameters)), GenerateTestParameterName); + +TEST(command_convert, convert_invalid_types) { + RaiiTempNamedFile tmp(""); + + ASSERT_TRUE(matches( + run_captured_stim_main( + {"convert", "--in_format=dets", "--out_format=dets", "--circuit", tmp.path.data(), "--types=N"}, ""), + ".*Unknown type passed to --types.*")); + + ASSERT_TRUE(matches( + run_captured_stim_main( + {"convert", "--in_format=dets", "--out_format=dets", "--circuit", tmp.path.data(), "--types=MM"}, ""), + ".*Each type in types should only be specified once.*")); +} From 9596652b5063c24b3e4e6879881d795392d742c4 Mon Sep 17 00:00:00 2001 From: Justin Ledford Date: Thu, 3 Aug 2023 00:47:17 +0000 Subject: [PATCH 5/9] link to results formats in convert help --- src/stim/cmd/command_convert.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/stim/cmd/command_convert.cc b/src/stim/cmd/command_convert.cc index 5cf38f445..9cbf6a6ef 100644 --- a/src/stim/cmd/command_convert.cc +++ b/src/stim/cmd/command_convert.cc @@ -116,6 +116,9 @@ SubCommandHelp stim::command_convert_help() { result.subcommand_name = "convert"; result.description = clean_doc_string(R"PARAGRAPH( Convert data between result formats. + + See the various formats here: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md )PARAGRAPH"); result.examples.push_back(clean_doc_string(R"PARAGRAPH( From af719ec655a12d5208785ac11b0dc76b6c1fc3ee Mon Sep 17 00:00:00 2001 From: Justin Ledford Date: Thu, 3 Aug 2023 00:56:16 +0000 Subject: [PATCH 6/9] make convert subcommand tests more clear --- src/stim/cmd/command_convert.test.cc | 110 +++++++++++++-------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/src/stim/cmd/command_convert.test.cc b/src/stim/cmd/command_convert.test.cc index 2bf855a0c..03b942446 100644 --- a/src/stim/cmd/command_convert.test.cc +++ b/src/stim/cmd/command_convert.test.cc @@ -19,73 +19,73 @@ using namespace stim; -class ConvertTest - : public testing::TestWithParam< - std::tuple, std::tuple>> { - protected: - void SetUp() override { - tmp.write_contents(R"CIRCUIT( +TEST(command_convert, convert_measurements_with_circuit) { + RaiiTempNamedFile tmp(R"CIRCUIT( X 0 M 0 1 DETECTOR rec[-2] DETECTOR rec[-1] OBSERVABLE_INCLUDE(2) rec[-1] )CIRCUIT"); - } - - RaiiTempNamedFile tmp; -}; -std::vector> measurement_parameters{ - std::make_tuple("01", "00\n01\n10\n11\n"), - std::make_tuple("b8", std::string({0x00, 0x02, 0x01, 0x03})), - std::make_tuple("dets", "shot\nshot M1\nshot M0\nshot M0 M1\n"), - std::make_tuple("hits", "\n1\n0\n0,1\n"), - std::make_tuple("r8", std::string({0x02, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00}))}; + std::vector> measurement_data{ + std::make_tuple("01", "00\n01\n10\n11\n"), + std::make_tuple("b8", std::string({0x00, 0x02, 0x01, 0x03})), + std::make_tuple("dets", "shot\nshot M1\nshot M0\nshot M0 M1\n"), + std::make_tuple("hits", "\n1\n0\n0,1\n"), + std::make_tuple("r8", std::string({0x02, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00}))}; -std::vector> detection_parameters{ - std::make_tuple("01", "10000\n11001\n00000\n01001\n"), - std::make_tuple("b8", std::string({0x01, 0x13, 0x00, 0x12})), - std::make_tuple("dets", "shot D0\nshot D0 D1 L2\nshot\nshot D1 L2\n"), - std::make_tuple("hits", "0\n0,1,4\n\n1,4\n"), - std::make_tuple("r8", std::string({0x00, 0x04, 0x00, 0x00, 0x02, 0x00, 0x05, 0x01, 0x02, 0x00}))}; - -TEST_P(ConvertTest, convert) { - std::string types = std::get<0>(GetParam()); - auto [in_format, in_data] = std::get<1>(GetParam()); - auto [out_format, out_data] = std::get<2>(GetParam()); - ASSERT_EQ( - run_captured_stim_main( - {"convert", - ("--in_format=" + in_format).c_str(), - ("--out_format=" + out_format).c_str(), - "--circuit", - tmp.path.data(), - ("--types=" + types).c_str()}, - in_data), - out_data); + for (const auto& [in_format, in_data] : measurement_data) { + for (const auto& [out_format, out_data] : measurement_data) { + ASSERT_EQ( + run_captured_stim_main( + {"convert", + "--in_format", + in_format.c_str(), + "--out_format", + out_format.c_str(), + "--circuit", + tmp.path.data(), + "--types=M"}, + in_data), + out_data); + } + } } -template -std::string GenerateTestParameterName(const testing::TestParamInfo& info) { - std::string from = std::get<0>(std::get<1>(info.param)); - std::string to = std::get<0>(std::get<2>(info.param)); - return from + "_to_" + to; -} +TEST(command_convert, convert_detections_with_circuit) { + RaiiTempNamedFile tmp(R"CIRCUIT( + X 0 + M 0 1 + DETECTOR rec[-2] + DETECTOR rec[-1] + OBSERVABLE_INCLUDE(2) rec[-1] + )CIRCUIT"); -INSTANTIATE_TEST_SUITE_P( - ConvertMeasurements, - ConvertTest, - testing::Combine( - testing::Values("M"), testing::ValuesIn(measurement_parameters), testing::ValuesIn(measurement_parameters)), - GenerateTestParameterName); + std::vector> detection_data{ + std::make_tuple("01", "10000\n11001\n00000\n01001\n"), + std::make_tuple("b8", std::string({0x01, 0x13, 0x00, 0x12})), + std::make_tuple("dets", "shot D0\nshot D0 D1 L2\nshot\nshot D1 L2\n"), + std::make_tuple("hits", "0\n0,1,4\n\n1,4\n"), + std::make_tuple("r8", std::string({0x00, 0x04, 0x00, 0x00, 0x02, 0x00, 0x05, 0x01, 0x02, 0x00}))}; -INSTANTIATE_TEST_SUITE_P( - ConvertDetections, - ConvertTest, - testing::Combine( - testing::Values("DL"), testing::ValuesIn(detection_parameters), testing::ValuesIn(detection_parameters)), - GenerateTestParameterName); + for (const auto& [in_format, in_data] : detection_data) { + for (const auto& [out_format, out_data] : detection_data) { + ASSERT_EQ( + run_captured_stim_main( + {"convert", + "--in_format", + in_format.c_str(), + "--out_format", + out_format.c_str(), + "--circuit", + tmp.path.data(), + "--types=DL"}, + in_data), + out_data); + } + } +} TEST(command_convert, convert_invalid_types) { RaiiTempNamedFile tmp(""); From 12f05d9343c74a1800cf20eda87cd99572b56a1d Mon Sep 17 00:00:00 2001 From: Justin Ledford Date: Mon, 7 Aug 2023 17:45:41 +0000 Subject: [PATCH 7/9] add additional use cases to convert command --- src/stim/cmd/command_convert.cc | 393 +++++++++++++++++++++++---- src/stim/cmd/command_convert.test.cc | 280 ++++++++++++++++++- 2 files changed, 604 insertions(+), 69 deletions(-) diff --git a/src/stim/cmd/command_convert.cc b/src/stim/cmd/command_convert.cc index 9cbf6a6ef..c651f3e40 100644 --- a/src/stim/cmd/command_convert.cc +++ b/src/stim/cmd/command_convert.cc @@ -14,8 +14,11 @@ #include "stim/cmd/command_convert.h" +#include + #include "command_help.h" #include "stim/arg_parse.h" +#include "stim/dem/detector_error_model.h" #include "stim/io/measure_record_batch_writer.h" #include "stim/io/measure_record_reader.h" #include "stim/io/stim_data_formats.h" @@ -23,81 +26,208 @@ using namespace stim; +struct DataDetails { + int num_measurements; + int num_detectors; + int num_observables; + int bits_per_shot; + bool include_measurements; + bool include_detectors; + bool include_observables; +}; + +void process_num_flags(int argc, const char **argv, DataDetails *details_out) { + details_out->num_measurements = find_int64_argument("--num_measurements", 0, 0, INT64_MAX, argc, argv); + details_out->num_detectors = find_int64_argument("--num_detectors", 0, 0, INT64_MAX, argc, argv); + details_out->num_observables = find_int64_argument("--num_observables", 0, 0, INT64_MAX, argc, argv); + + details_out->include_measurements = details_out->num_measurements > 0; + details_out->include_detectors = details_out->num_detectors > 0; + details_out->include_observables = details_out->num_observables > 0; +} + +void process_dem(const char *dem_path, DataDetails *details_out) { + if (dem_path == nullptr) { + return; + } + + FILE *dem_file = fopen(dem_path, "rb"); + if (dem_file == nullptr) { + std::stringstream msg; + msg << "Failed to open '" << dem_path << "'"; + throw std::invalid_argument(msg.str()); + } + auto dem = DetectorErrorModel::from_file(dem_file); + fclose(dem_file); + details_out->num_detectors = dem.count_detectors(); + details_out->num_observables = dem.count_observables(); + details_out->include_detectors = details_out->num_detectors > 0; + details_out->include_observables = details_out->num_observables > 0; +} + +void process_circuit(const char *circuit_path, const char *types, DataDetails *details_out) { + if (circuit_path == nullptr) { + return; + } + if (types == nullptr) { + throw std::invalid_argument("--types required when passing circuit"); + } + FILE *circuit_file = fopen(circuit_path, "rb"); + if (circuit_file == nullptr) { + std::stringstream msg; + msg << "Failed to open '" << circuit_path << "'"; + throw std::invalid_argument(msg.str()); + } + auto circuit = Circuit::from_file(circuit_file); + fclose(circuit_file); + CircuitStats circuit_stats = circuit.compute_stats(); + details_out->num_measurements = circuit_stats.num_measurements; + details_out->num_detectors = circuit_stats.num_detectors; + details_out->num_observables = circuit_stats.num_observables; + + while (types != nullptr && *types) { + char c = *types; + bool found_duplicate = false; + if (c == 'M') { + found_duplicate = details_out->include_measurements; + details_out->include_measurements = true; + } else if (c == 'D') { + found_duplicate = details_out->include_detectors; + details_out->include_detectors = true; + } else if (c == 'L') { + found_duplicate = details_out->include_observables; + details_out->include_observables = true; + } else { + throw std::invalid_argument("Unknown type passed to --types"); + } + + if (found_duplicate) { + throw std::invalid_argument("Each type in types should only be specified once"); + } + ++types; + } +} + int stim::command_convert(int argc, const char **argv) { check_for_unknown_arguments( { "--in_format", "--out_format", + "--obs_out_format", "--in", "--out", + "--obs_out", "--circuit", + "--dem", "--types", + "--num_measurements", + "--num_detectors", + "--num_observables", + "--bits_per_shot", }, {}, "convert", argc, argv); + DataDetails details; + const auto &in_format = find_enum_argument("--in_format", nullptr, format_name_to_enum_map(), argc, argv); const auto &out_format = find_enum_argument("--out_format", "01", format_name_to_enum_map(), argc, argv); + const auto &obs_out_format = find_enum_argument("--obs_out_format", "01", format_name_to_enum_map(), argc, argv); FILE *in = find_open_file_argument("--in", stdin, "rb", argc, argv); FILE *out = find_open_file_argument("--out", stdout, "wb", argc, argv); - FILE *circuit_file = find_open_file_argument("--circuit", nullptr, "rb", argc, argv); - auto circuit = Circuit::from_file(circuit_file); - fclose(circuit_file); - CircuitStats circuit_stats = circuit.compute_stats(); + FILE *obs_out = find_open_file_argument("--obs_out", stdout, "wb", argc, argv); + + // Determine the necessary data needed to parse the input and + // write to the new output. + + // First see if everything was just given directly. + process_num_flags(argc, argv, &details); + + // Next see if we can infer from a given DEM file. + const char *dem_path = find_argument("--dem", argc, argv); + process_dem(dem_path, &details); + + // Finally see if we can infer from a given circuit file and + // list of value types. + const char *circuit_path = find_argument("--circuit", argc, argv); + const char *types = find_argument("--types", argc, argv); + try { + process_circuit(circuit_path, types, &details); + } catch (std::exception &e) { + std::cerr << "\033[31m" << e.what() << std::endl; + return EXIT_FAILURE; + } - std::string types = require_find_argument("--types", argc, argv); - bool include_measurements = false, include_detectors = false, include_observables = false; - for (const char c : types) { - bool found_duplicate = false; - if (c == 'M') { - found_duplicate = include_measurements; - include_measurements = true; - } else if (c == 'D') { - found_duplicate = include_detectors; - include_detectors = true; - } else if (c == 'L') { - found_duplicate = include_observables; - include_observables = true; - } else { - std::cerr << "\033[31mUnknown type passed to --types\n"; + // Not enough information to infer types, at this point we can only + // convert arbitrary bits. + if (!details.include_measurements && !details.include_detectors && !details.include_observables) { + // dets outputs explicit value types, which we don't know if we get here. + if (out_format.id == SAMPLE_FORMAT_DETS) { + std::cerr + << "\033[31mNot enough information given to parse input file to write to dets. Please given a circuit " + "with --types, a DEM file, or explicit number of each desired type\n"; return EXIT_FAILURE; } - - if (found_duplicate) { - std::cerr << "\033[31mEach type in types should only be specified once\n"; + details.bits_per_shot = find_int64_argument("--bits_per_shot", 0, 0, INT64_MAX, argc, argv); + if (details.bits_per_shot == 0) { + std::cerr << "\033[31mNot enough information given to parse input file.\n"; return EXIT_FAILURE; } + details.include_measurements = true; + details.num_measurements = details.bits_per_shot; } auto reader = MeasureRecordReader::make( in, in_format.id, - include_measurements ? circuit_stats.num_measurements : 0, - include_detectors ? circuit_stats.num_detectors : 0, - include_observables ? circuit_stats.num_observables : 0); + details.include_measurements ? details.num_measurements : 0, + details.include_detectors ? details.num_detectors : 0, + details.include_observables ? details.num_observables : 0); auto writer = MeasureRecordWriter::make(out, out_format.id); + + std::unique_ptr obs_writer; + if (obs_out != stdout) { + obs_writer = MeasureRecordWriter::make(obs_out, obs_out_format.id); + } else { + obs_out = nullptr; + } + simd_bits buf(reader->bits_per_record()); while (reader->start_and_read_entire_record(buf)) { - if (include_measurements) { + int64_t offset = 0; + if (details.include_measurements) { writer->begin_result_type('M'); - for (uint64_t i = 0; i < circuit_stats.num_measurements; ++i) { + for (int64_t i = 0; i < details.num_measurements; ++i) { writer->write_bit(buf[i]); } } - if (include_detectors) { + offset += reader->num_measurements; + if (details.include_detectors) { writer->begin_result_type('D'); - for (uint64_t i = 0; i < circuit_stats.num_detectors; ++i) { - writer->write_bit(buf[i + reader->num_measurements]); + for (int64_t i = 0; i < details.num_detectors; ++i) { + writer->write_bit(buf[i + offset]); } } - if (include_observables) { - writer->begin_result_type('L'); - for (uint64_t i = 0; i < circuit_stats.num_observables; ++i) { - writer->write_bit(buf[i + reader->num_measurements + reader->num_detectors]); + offset += reader->num_detectors; + if (details.include_observables) { + if (obs_writer) { + obs_writer->begin_result_type('L'); + } else { + writer->begin_result_type('L'); } + for (int64_t i = 0; i < details.num_observables; ++i) { + if (obs_writer) { + obs_writer->write_bit(buf[i + offset]); + } else { + writer->write_bit(buf[i + offset]); + } + } + } + if (obs_writer) { + obs_writer->write_end(); } writer->write_end(); } @@ -108,6 +238,9 @@ int stim::command_convert(int argc, const char **argv) { if (out != stdout) { fclose(out); } + if (obs_out != nullptr) { + fclose(obs_out); + } return EXIT_SUCCESS; } @@ -119,7 +252,55 @@ SubCommandHelp stim::command_convert_help() { See the various formats here: https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md - )PARAGRAPH"); + + To read and write data, the size of the records must be known. + If writing to a dets file, then the number of measurements, detectors + and observables per record must also be known. + + Both of these pieces of information can either be given directly, or + inferred from various data sources, such as circuit or dem files. + )PARAGRAPH"); + + result.examples.push_back(clean_doc_string(R"PARAGRAPH( + >>> cat example.01 + 10000 + 11001 + 00000 + 01001 + + >>> stim convert \ + --in example.01 \ + --in_format 01 \ + --out_format dets + --num_measurements 5 + shot M0 + shot M0 M1 M4 + shot + shot M1 M4 + )PARAGRAPH")); + + result.examples.push_back(clean_doc_string(R"PARAGRAPH( + >>> cat example.dem + detector D0 + detector D1 + logical_observable L2 + + >>> cat example.dets + shot D0 + shot D0 D1 L2 + shot + shot D1 L2 + + >>> stim convert \ + --in example.dets \ + --in_format dets \ + --out_format 01 + --dem example.dem + 10000 + 11001 + 00000 + 01001 + )PARAGRAPH")); result.examples.push_back(clean_doc_string(R"PARAGRAPH( >>> cat example_circuit.stim @@ -145,23 +326,24 @@ SubCommandHelp stim::command_convert_help() { shot M1 shot M0 shot M0 M1 + )PARAGRAPH")); - >>> cat example_detection_data.01 - 10000 - 11001 - 00000 - 01001 + result.examples.push_back(clean_doc_string(R"PARAGRAPH( + >>> cat example.01 + 0010 + 0111 + 1000 + 1110 >>> stim convert \ - --in example_detection_data.01 \ + --in example.01 \ --in_format 01 \ - --out_format dets - --circuit example_circuit.stim \ - --types DL - shot D0 - shot D0 D1 L2 - shot - shot D1 L2 + --out_format hits + --bits_per_shot 4 + 2 + 1,2,3 + 0 + 0,1,2 )PARAGRAPH")); result.flags.push_back(SubCommandHelpFlag{ @@ -210,6 +392,31 @@ SubCommandHelp stim::command_convert_help() { )PARAGRAPH"), }); + result.flags.push_back(SubCommandHelpFlag{ + "--obs_out_format", + "01|b8|r8|ptb64|hits|dets", + "01", + {"[none]", "format"}, + clean_doc_string(R"PARAGRAPH( + Specifies the data format to use when writing observable flip data. + + Irrelevant unless `--obs_out` is specified. + + The available formats are: + + 01 (default): dense human readable + b8: bit packed binary + r8: run length binary + ptb64: partially transposed bit packed binary for SIMD + hits: sparse human readable + dets: sparse human readable with type hints + + For a detailed description of each result format, see the result + format reference: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md + )PARAGRAPH"), + }); + result.flags.push_back(SubCommandHelpFlag{ "--in", "filepath", @@ -242,16 +449,37 @@ SubCommandHelp stim::command_convert_help() { )PARAGRAPH"), }); + result.flags.push_back(SubCommandHelpFlag{ + "--obs_out", + "filepath", + "", + {"[none]", "filepath"}, + clean_doc_string(R"PARAGRAPH( + Specifies the file to write observable flip data to. + + When producing detection event data, the goal is typically to + predict whether or not the logical observables were flipped by using + the detection events. This argument specifies where to write that + observable flip data. + + If this argument isn't specified, the observable flip data isn't + written to a file. + + The output is in a format specified by `--obs_out_format`. See: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md + )PARAGRAPH"), + }); + result.flags.push_back(SubCommandHelpFlag{ "--circuit", "filepath", "", - {"filepath"}, + {"[none]", "filepath"}, clean_doc_string(R"PARAGRAPH( Specifies where the circuit that generated the data is. - This argument is required, because the circuit is what specifies - the number of measurements, detectors and observables to use per record. + This argument is optional, but can be used to infer the number of + measurements, detectors and observables to use per record. The circuit file should be a stim circuit. See: https://github.com/quantumlib/Stim/blob/main/doc/file_format_stim_circuit.md @@ -262,12 +490,14 @@ SubCommandHelp stim::command_convert_help() { "--types", "M|D|L", "", - {"filepath"}, + {"[none]" + "types"}, clean_doc_string(R"PARAGRAPH( Specifies the types of events in the files. - This argument is required to decode the input file and determine - if it includes measurements, detections or observable frame changes. + This argument is required if a circuit is given as the circuit can + give the number of each type of event, but not which events are + contained within an input file. Note that in most cases, a file will have either measurements only, detections only, or detections and observables. @@ -278,5 +508,60 @@ SubCommandHelp stim::command_convert_help() { )PARAGRAPH"), }); + result.flags.push_back(SubCommandHelpFlag{ + "--num_measurements", + "int", + "0", + {"[none], int"}, + clean_doc_string(R"PARAGRAPH( + Specifies the number of measurements in the input/output files. + + This argument is required if writing to a dets file and the circuit + is not given. + )PARAGRAPH"), + }); + + result.flags.push_back(SubCommandHelpFlag{ + "--num_detectors", + "int", + "0", + {"[none], int"}, + clean_doc_string(R"PARAGRAPH( + Specifies the number of detectors in the input/output files. + + This argument is required if writing to a dets file and the circuit + or dem is not given. + )PARAGRAPH"), + }); + + result.flags.push_back(SubCommandHelpFlag{ + "--num_observables", + "int", + "0", + {"[none], int"}, + clean_doc_string(R"PARAGRAPH( + Specifies the number of observables in the input/output files. + + This argument is required if writing to a dets file and the circuit + or dem is not given. + )PARAGRAPH"), + }); + + result.flags.push_back(SubCommandHelpFlag{ + "--bits_per_shot", + "int", + "0", + {"[none], int"}, + clean_doc_string(R"PARAGRAPH( + Specifies the number of bits per shot in the input/output files. + + This argument is required if the circuit, dem or num_* flags + are not given, and not supported when writing to a dets file. + + In this case we just treat the bits aas arbitrary data. It is up + to the user to interpert it correctly. + )PARAGRAPH"), + }); + return result; } diff --git a/src/stim/cmd/command_convert.test.cc b/src/stim/cmd/command_convert.test.cc index 03b942446..1cb2b1c79 100644 --- a/src/stim/cmd/command_convert.test.cc +++ b/src/stim/cmd/command_convert.test.cc @@ -19,7 +19,7 @@ using namespace stim; -TEST(command_convert, convert_measurements_with_circuit) { +TEST(command_convert, convert_measurements_with_circuit_to_dets) { RaiiTempNamedFile tmp(R"CIRCUIT( X 0 M 0 1 @@ -31,12 +31,156 @@ TEST(command_convert, convert_measurements_with_circuit) { std::vector> measurement_data{ std::make_tuple("01", "00\n01\n10\n11\n"), std::make_tuple("b8", std::string({0x00, 0x02, 0x01, 0x03})), - std::make_tuple("dets", "shot\nshot M1\nshot M0\nshot M0 M1\n"), std::make_tuple("hits", "\n1\n0\n0,1\n"), std::make_tuple("r8", std::string({0x02, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00}))}; for (const auto& [in_format, in_data] : measurement_data) { - for (const auto& [out_format, out_data] : measurement_data) { + ASSERT_EQ( + run_captured_stim_main( + {"convert", + "--in_format", + in_format.c_str(), + "--out_format", + "dets", + "--circuit", + tmp.path.data(), + "--types=M"}, + in_data), + "shot\nshot M1\nshot M0\nshot M0 M1\n"); + } +} + +TEST(command_convert, convert_detections_observables_with_circuit_to_dets) { + RaiiTempNamedFile tmp(R"CIRCUIT( + CX 0 2 1 2 + M 2 + CX rec[-1] 2 + DETECTOR rec[-1] + TICK + + CX 0 2 1 2 + M 2 + CX rec[-1] 2 + DETECTOR rec[-1] rec[-2] + TICK + + CX 0 2 1 2 + M 2 + CX rec[-1] 2 + DETECTOR rec[-1] rec[-2] + TICK + + M 0 1 + DETECTOR rec[-1] rec[-2] rec[-3] + OBSERVABLE_INCLUDE(0) rec[-1] + )CIRCUIT"); + + std::vector> detection_data{ + std::make_tuple("01", "00000\n11000\n01100\n00110\n00010\n00011\n"), + std::make_tuple("b8", std::string({0x00, 0x03, 0x06, 0x0c, 0x08, 0x18})), + std::make_tuple("hits", "\n0,1\n1,2\n2,3\n3\n3,4\n"), + std::make_tuple( + "r8", + std::string({0x05, 0x00, 0x00, 0x03, 0x01, 0x00, 0x02, 0x02, 0x00, 0x01, 0x03, 0x01, 0x03, 0x00, 0x00}))}; + + for (const auto& [in_format, in_data] : detection_data) { + ASSERT_EQ( + run_captured_stim_main( + {"convert", + "--in_format", + in_format.c_str(), + "--out_format", + "dets", + "--circuit", + tmp.path.data(), + "--types=DL"}, + in_data), + "shot\nshot D0 D1\nshot D1 D2\nshot D2 D3\nshot D3\nshot D3 L0\n"); + } +} + +TEST(command_convert, convert_detections_observables_with_circuit_to_dets_with_obs_out) { + RaiiTempNamedFile tmp(R"CIRCUIT( + CX 0 2 1 2 + M 2 + CX rec[-1] 2 + DETECTOR rec[-1] + TICK + + CX 0 2 1 2 + M 2 + CX rec[-1] 2 + DETECTOR rec[-1] rec[-2] + TICK + + CX 0 2 1 2 + M 2 + CX rec[-1] 2 + DETECTOR rec[-1] rec[-2] + TICK + + M 0 1 + DETECTOR rec[-1] rec[-2] rec[-3] + OBSERVABLE_INCLUDE(0) rec[-1] + )CIRCUIT"); + RaiiTempNamedFile tmp_obs; + + std::vector> detection_data{ + std::make_tuple("01", "00000\n11000\n01100\n00110\n00010\n00011\n"), + std::make_tuple("b8", std::string({0x00, 0x03, 0x06, 0x0c, 0x08, 0x18})), + std::make_tuple("hits", "\n0,1\n1,2\n2,3\n3\n3,4\n"), + std::make_tuple( + "r8", + std::string({0x05, 0x00, 0x00, 0x03, 0x01, 0x00, 0x02, 0x02, 0x00, 0x01, 0x03, 0x01, 0x03, 0x00, 0x00}))}; + + for (const auto& [in_format, in_data] : detection_data) { + ASSERT_EQ( + run_captured_stim_main( + {"convert", + "--in_format", + in_format.c_str(), + "--out_format", + "dets", + "--obs_out_format", + "dets", + "--obs_out", + tmp_obs.path.data(), + "--circuit", + tmp.path.data(), + "--types=DL"}, + in_data), + "shot\nshot D0 D1\nshot D1 D2\nshot D2 D3\nshot D3\nshot D3\n"); + ASSERT_EQ(tmp_obs.read_contents(), "shot\nshot\nshot\nshot\nshot\nshot L0\n"); + } +} + +TEST(command_convert, convert_detections_observables_with_circuit_no_dets) { + RaiiTempNamedFile tmp(R"CIRCUIT( + R 0 1 2 3 4 + TICK + CX 0 1 2 3 + DEPOLARIZE2(0.3) 0 1 2 3 + TICK + CX 2 1 4 3 + DEPOLARIZE2(0.3) 2 1 4 3 + TICK + MR 1 3 + DETECTOR(1, 0) rec[-2] + DETECTOR(3, 0) rec[-1] + M 0 2 4 + DETECTOR(1, 1) rec[-2] rec[-3] rec[-5] + DETECTOR(3, 1) rec[-1] rec[-2] rec[-4] + OBSERVABLE_INCLUDE(0) rec[-1] + )CIRCUIT"); + + std::vector> detection_data{ + std::make_tuple("01", "10100\n00011\n00000\n00100\n00000\n10000\n"), + std::make_tuple("b8", std::string({0x05, 0x18, 0x00, 0x04, 0x00, 0x01})), + std::make_tuple("hits", "0,2\n3,4\n\n2\n\n0\n"), + std::make_tuple("r8", std::string({0x00, 0x01, 0x02, 0x03, 0x00, 0x00, 0x05, 0x02, 0x02, 0x05, 0x00, 0x04}))}; + + for (const auto& [in_format, in_data] : detection_data) { + for (const auto& [out_format, out_data] : detection_data) { ASSERT_EQ( run_captured_stim_main( {"convert", @@ -46,21 +190,19 @@ TEST(command_convert, convert_measurements_with_circuit) { out_format.c_str(), "--circuit", tmp.path.data(), - "--types=M"}, + "--types=DL"}, in_data), out_data); } } } -TEST(command_convert, convert_detections_with_circuit) { - RaiiTempNamedFile tmp(R"CIRCUIT( - X 0 - M 0 1 - DETECTOR rec[-2] - DETECTOR rec[-1] - OBSERVABLE_INCLUDE(2) rec[-1] - )CIRCUIT"); +TEST(command_convert, convert_detections_observables_with_dem) { + RaiiTempNamedFile tmp(R"DEM( + detector D0 + detector D1 + logical_observable L2 + )DEM"); std::vector> detection_data{ std::make_tuple("01", "10000\n11001\n00000\n01001\n"), @@ -78,15 +220,123 @@ TEST(command_convert, convert_detections_with_circuit) { in_format.c_str(), "--out_format", out_format.c_str(), - "--circuit", - tmp.path.data(), - "--types=DL"}, + "--dem", + tmp.path.data()}, in_data), out_data); } } } +TEST(command_convert, convert_measurements_no_circuit_or_dem) { + std::vector> measurement_data{ + std::make_tuple("01", "100\n010\n110\n001\n010\n111\n"), + std::make_tuple("b8", std::string({0x01, 0x02, 0x03, 0x04, 0x02, 0x07})), + std::make_tuple("hits", "0\n1\n0,1\n2\n1\n0,1,2\n"), + std::make_tuple("dets", "shot M0\nshot M1\nshot M0 M1\nshot M2\nshot M1\nshot M0 M1 M2\n"), + std::make_tuple( + "r8", + std::string({0x00, 0x02, 0x01, 0x01, 0x00, 0x00, 0x01, 0x02, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00}))}; + + for (const auto& [in_format, in_data] : measurement_data) { + for (const auto& [out_format, out_data] : measurement_data) { + ASSERT_EQ( + run_captured_stim_main( + {"convert", + "--in_format", + in_format.c_str(), + "--out_format", + out_format.c_str(), + "--num_measurements", + "3"}, + in_data), + out_data); + } + } +} + +TEST(command_convert, convert_detections_observables_no_circuit_or_dem) { + std::vector> detection_data{ + std::make_tuple("01", "10000\n11001\n00000\n01001\n"), + std::make_tuple("b8", std::string({0x01, 0x13, 0x00, 0x12})), + std::make_tuple("dets", "shot D0\nshot D0 D1 L2\nshot\nshot D1 L2\n"), + std::make_tuple("hits", "0\n0,1,4\n\n1,4\n"), + std::make_tuple("r8", std::string({0x00, 0x04, 0x00, 0x00, 0x02, 0x00, 0x05, 0x01, 0x02, 0x00}))}; + + for (const auto& [in_format, in_data] : detection_data) { + for (const auto& [out_format, out_data] : detection_data) { + ASSERT_EQ( + run_captured_stim_main( + {"convert", + "--in_format", + in_format.c_str(), + "--out_format", + out_format.c_str(), + "--num_detectors", + "2", + "--num_observables", + "3"}, + in_data), + out_data); + } + } +} + +TEST(command_convert, convert_bits_per_shot_no_dets) { + std::vector> measurement_data{ + std::make_tuple("01", "00\n01\n10\n11\n"), + std::make_tuple("b8", std::string({0x00, 0x02, 0x01, 0x03})), + std::make_tuple("hits", "\n1\n0\n0,1\n"), + std::make_tuple("r8", std::string({0x02, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00}))}; + + for (const auto& [in_format, in_data] : measurement_data) { + for (const auto& [out_format, out_data] : measurement_data) { + ASSERT_EQ( + run_captured_stim_main( + {"convert", + "--in_format", + in_format.c_str(), + "--out_format", + out_format.c_str(), + "--bits_per_shot=2"}, + in_data), + out_data); + } + } +} + +TEST(command_convert, convert_multiple_bitword_sized_records) { + ASSERT_EQ( + run_captured_stim_main( + {"convert", "--in_format=b8", "--out_format=b8", "--bits_per_shot=2048"}, std::string(256, 0x6b)), + std::string(256, 0x6b)); +} + +TEST(command_convert, convert_circuit_fail_without_types) { + RaiiTempNamedFile tmp(R"CIRCUIT( + X 0 + M 0 1 + )CIRCUIT"); + + ASSERT_TRUE(matches( + run_captured_stim_main( + {"convert", "--in_format=01", "--out_format", "dets", "--circuit", tmp.path.data(), "--num_measurements=2"}, + ""), + ".*--types required when passing circuit.*")); +} + +TEST(command_convert, convert_fail_without_any_information) { + ASSERT_TRUE(matches( + run_captured_stim_main({"convert", "--in_format=r8", "--out_format=b8"}, ""), + ".*Not enough information given to parse input file.*")); +} + +TEST(command_convert, convert_fail_with_bits_per_shot_to_dets) { + ASSERT_TRUE(matches( + run_captured_stim_main({"convert", "--in_format=01", "--out_format", "dets", "--bits_per_shot=2"}, ""), + ".*Not enough information given to parse input file to write to dets.*")); +} + TEST(command_convert, convert_invalid_types) { RaiiTempNamedFile tmp(""); From f6c692242aaa891c22499d3b95ce99e956120235 Mon Sep 17 00:00:00 2001 From: Justin Ledford Date: Mon, 21 Aug 2023 13:57:50 -0700 Subject: [PATCH 8/9] remove trailing space in help --- src/stim/cmd/command_convert.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stim/cmd/command_convert.cc b/src/stim/cmd/command_convert.cc index c651f3e40..1aec14dbe 100644 --- a/src/stim/cmd/command_convert.cc +++ b/src/stim/cmd/command_convert.cc @@ -500,7 +500,7 @@ SubCommandHelp stim::command_convert_help() { contained within an input file. Note that in most cases, a file will have either measurements only, - detections only, or detections and observables. + detections only, or detections and observables. The type values (M, D, L) correspond to the value prefix letters in dets files. See: From e767819501e81219a4433e254fd1795ad1c1728d Mon Sep 17 00:00:00 2001 From: Justin Ledford Date: Mon, 21 Aug 2023 14:04:37 -0700 Subject: [PATCH 9/9] update CLI doc with convert command --- doc/usage_command_line.md | 268 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 268 insertions(+) diff --git a/doc/usage_command_line.md b/doc/usage_command_line.md index 16b5a3f5b..b5f107f60 100644 --- a/doc/usage_command_line.md +++ b/doc/usage_command_line.md @@ -3,6 +3,7 @@ ## Index - [stim analyze_errors](#analyze_errors) +- [stim convert](#convert) - [stim detect](#detect) - [stim diagram](#diagram) - [stim explain_errors](#explain_errors) @@ -341,6 +342,273 @@ EXAMPLES detector(3, 1) D5 ``` + +### stim convert + +``` +NAME + stim convert + +SYNOPSIS + stim convert \ + --bits_per_shot int \ + [--circuit filepath] \ + [--in filepath] \ + [--in_format 01|b8|r8|ptb64|hits|dets] \ + --num_detectors int \ + --num_measurements int \ + --num_observables int \ + [--obs_out filepath] \ + [--obs_out_format 01|b8|r8|ptb64|hits|dets] \ + [--out filepath] \ + [--out_format 01|b8|r8|ptb64|hits|dets] \ + --types M|D|L + +DESCRIPTION + Convert data between result formats. + + See the various formats here: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md + + To read and write data, the size of the records must be known. + If writing to a dets file, then the number of measurements, detectors + and observables per record must also be known. + + Both of these pieces of information can either be given directly, or + inferred from various data sources, such as circuit or dem files. + + +OPTIONS + --bits_per_shot + Specifies the number of bits per shot in the input/output files. + + This argument is required if the circuit, dem or num_* flags + are not given, and not supported when writing to a dets file. + + In this case we just treat the bits aas arbitrary data. It is up + to the user to interpert it correctly. + + + --circuit + Specifies where the circuit that generated the data is. + + This argument is optional, but can be used to infer the number of + measurements, detectors and observables to use per record. + + The circuit file should be a stim circuit. See: + https://github.com/quantumlib/Stim/blob/main/doc/file_format_stim_circuit.md + + + --in + Chooses the file to read data from. + + By default, the circuit is read from stdin. When `--in $FILEPATH` is + specified, the circuit is instead read from the file at $FILEPATH. + + The input's format is specified by `--in_format`. See: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md + + + --in_format + Specifies the data format to use when reading data. + + The available formats are: + + 01 (default): dense human readable + b8: bit packed binary + r8: run length binary + ptb64: partially transposed bit packed binary for SIMD + hits: sparse human readable + dets: sparse human readable with type hints + + For a detailed description of each result format, see the result + format reference: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md + + + --num_detectors + Specifies the number of detectors in the input/output files. + + This argument is required if writing to a dets file and the circuit + or dem is not given. + + + --num_measurements + Specifies the number of measurements in the input/output files. + + This argument is required if writing to a dets file and the circuit + is not given. + + + --num_observables + Specifies the number of observables in the input/output files. + + This argument is required if writing to a dets file and the circuit + or dem is not given. + + + --obs_out + Specifies the file to write observable flip data to. + + When producing detection event data, the goal is typically to + predict whether or not the logical observables were flipped by using + the detection events. This argument specifies where to write that + observable flip data. + + If this argument isn't specified, the observable flip data isn't + written to a file. + + The output is in a format specified by `--obs_out_format`. See: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md + + + --obs_out_format + Specifies the data format to use when writing observable flip data. + + Irrelevant unless `--obs_out` is specified. + + The available formats are: + + 01 (default): dense human readable + b8: bit packed binary + r8: run length binary + ptb64: partially transposed bit packed binary for SIMD + hits: sparse human readable + dets: sparse human readable with type hints + + For a detailed description of each result format, see the result + format reference: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md + + + --out + Chooses where to write the data to. + + By default, the output is written to stdout. When `--out $FILEPATH` + is specified, the output is instead written to the file at $FILEPATH. + + The output's format is specified by `--out_format`. See: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md + + + --out_format + Specifies the data format to use when writing output data. + + The available formats are: + + 01 (default): dense human readable + b8: bit packed binary + r8: run length binary + ptb64: partially transposed bit packed binary for SIMD + hits: sparse human readable + dets: sparse human readable with type hints + + For a detailed description of each result format, see the result + format reference: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md + + + --types + Specifies the types of events in the files. + + This argument is required if a circuit is given as the circuit can + give the number of each type of event, but not which events are + contained within an input file. + + Note that in most cases, a file will have either measurements only, + detections only, or detections and observables. + + The type values (M, D, L) correspond to the value prefix letters + in dets files. See: + https://github.com/quantumlib/Stim/blob/main/doc/result_formats.md#dets + + +EXAMPLES + Example #1 + >>> cat example.01 + 10000 + 11001 + 00000 + 01001 + + >>> stim convert \ + --in example.01 \ + --in_format 01 \ + --out_format dets + --num_measurements 5 + shot M0 + shot M0 M1 M4 + shot + shot M1 M4 + + + Example #2 + >>> cat example.dem + detector D0 + detector D1 + logical_observable L2 + + >>> cat example.dets + shot D0 + shot D0 D1 L2 + shot + shot D1 L2 + + >>> stim convert \ + --in example.dets \ + --in_format dets \ + --out_format 01 + --dem example.dem + 10000 + 11001 + 00000 + 01001 + + + Example #3 + >>> cat example_circuit.stim + X 0 + M 0 1 + DETECTOR rec[-2] + DETECTOR rec[-1] + OBSERVABLE_INCLUDE(2) rec[-1] + + >>> cat example_measure_data.01 + 00 + 01 + 10 + 11 + + >>> stim convert \ + --in example_measure_data.01 \ + --in_format 01 \ + --out_format dets + --circuit example_circuit.stim \ + --types M + shot + shot M1 + shot M0 + shot M0 M1 + + + Example #4 + >>> cat example.01 + 0010 + 0111 + 1000 + 1110 + + >>> stim convert \ + --in example.01 \ + --in_format 01 \ + --out_format hits + --bits_per_shot 4 + 2 + 1,2,3 + 0 + 0,1,2 +``` + ### stim detect