From 21eb0377e8257318380403b30b51e1286cb662dc Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Wed, 22 May 2024 16:50:42 -0600
Subject: [PATCH 01/30] Initial implementation of caching for layout creation

The creation of COPA layouts relies on a number of specialized circuit structures which require non-trivial time to construct. In the context of iterative GST estimation with nested circuit lists (i.e. the default) this results in unnecessarily repeat construction of these objects. This is an initial implementation of a caching scheme allowing for more efficient re-use of these circuit structures across iterations.
---
 pygsti/algorithms/core.py              |  13 ++-
 pygsti/circuits/__init__.py            |   2 +-
 pygsti/circuits/circuit.py             |   1 -
 pygsti/forwardsims/matrixforwardsim.py |   9 +-
 pygsti/layouts/matrixlayout.py         | 120 ++++++++++++++++++-------
 pygsti/models/model.py                 | 108 ++++++++++++++++++++--
 6 files changed, 208 insertions(+), 45 deletions(-)

diff --git a/pygsti/algorithms/core.py b/pygsti/algorithms/core.py
index f2b749136..a2c6e0038 100644
--- a/pygsti/algorithms/core.py
+++ b/pygsti/algorithms/core.py
@@ -33,6 +33,8 @@
 from pygsti.baseobjs.resourceallocation import ResourceAllocation as _ResourceAllocation
 from pygsti.optimize.customlm import CustomLMOptimizer as _CustomLMOptimizer
 from pygsti.optimize.customlm import Optimizer as _Optimizer
+from pygsti import forwardsims as _fwdsims
+from pygsti import layouts as _layouts
 
 _dummy_profiler = _DummyProfiler()
 
@@ -888,9 +890,18 @@ def _max_array_types(artypes_list):  # get the maximum number of each array type
     #The ModelDatasetCircuitsStore
     printer.log('Precomputing CircuitOutcomeProbabilityArray layouts for each iteration.', 2)
     precomp_layouts = []
+    #pre-compute a dictionary caching completed circuits for layout construction performance.
+    unique_circuits = {ckt for circuit_list in circuit_lists for ckt in circuit_list}
+    print(f'{len(unique_circuits)=}')
+    if isinstance(mdl.sim, _fwdsims.MatrixForwardSimulator):
+        precomp_layout_circuit_cache = _layouts.matrixlayout.create_matrix_copa_layout_circuit_cache(unique_circuits, mdl)
+    else:
+        precomp_layout_circuit_cache = None
+    #print(completed_circuit_cache)
     for i, circuit_list in enumerate(circuit_lists):
         printer.log(f'Layout for iteration {i}', 2)
-        precomp_layouts.append(mdl.sim.create_layout(circuit_list, dataset, resource_alloc, array_types, verbosity= printer - 1))
+        precomp_layouts.append(mdl.sim.create_layout(circuit_list, dataset, resource_alloc, array_types, verbosity= printer - 1,
+                                                     layout_creation_circuit_cache = precomp_layout_circuit_cache))
     
     with printer.progress_logging(1):
         for i in range(starting_index, len(circuit_lists)):
diff --git a/pygsti/circuits/__init__.py b/pygsti/circuits/__init__.py
index 46be7652f..28cd30f66 100644
--- a/pygsti/circuits/__init__.py
+++ b/pygsti/circuits/__init__.py
@@ -10,7 +10,7 @@
 # http://www.apache.org/licenses/LICENSE-2.0 or in the LICENSE file in the root pyGSTi directory.
 #***************************************************************************************************
 
-from .circuit import Circuit
+from .circuit import Circuit, SeparatePOVMCircuit
 from .circuitlist import CircuitList
 from .circuitstructure import CircuitPlaquette, FiducialPairPlaquette, \
     GermFiducialPairPlaquette, PlaquetteGridCircuitStructure
diff --git a/pygsti/circuits/circuit.py b/pygsti/circuits/circuit.py
index 211405356..f09f51a2d 100644
--- a/pygsti/circuits/circuit.py
+++ b/pygsti/circuits/circuit.py
@@ -4412,7 +4412,6 @@ def add_expanded_circuit_outcomes(circuit, running_outcomes, ootree, start):
 
         return expanded_circuit_outcomes
 
-
 class CompressedCircuit(object):
     """
     A "compressed" Circuit that requires less disk space.
diff --git a/pygsti/forwardsims/matrixforwardsim.py b/pygsti/forwardsims/matrixforwardsim.py
index ddc18270a..e47ad0bb5 100644
--- a/pygsti/forwardsims/matrixforwardsim.py
+++ b/pygsti/forwardsims/matrixforwardsim.py
@@ -1025,7 +1025,7 @@ def _compute_hproduct_cache(self, layout_atom_tree, prod_cache, d_prod_cache1,
         return hProdCache
 
     def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types=('E',),
-                      derivative_dimensions=None, verbosity=0):
+                      derivative_dimensions=None, verbosity=0, layout_creation_circuit_cache= None):
         """
         Constructs an circuit-outcome-probability-array (COPA) layout for a list of circuits.
 
@@ -1056,6 +1056,10 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
             Determines how much output to send to stdout.  0 means no output, higher
             integers mean more output.
 
+        layout_creation_circuit_cache : dict, optional (default None)
+            A precomputed dictionary serving as a cache for completed
+            circuits. I.e. circuits with prep labels and POVM labels appended.
+            
         Returns
         -------
         MatrixCOPALayout
@@ -1105,7 +1109,8 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
         assert(_np.prod((na,) + npp) <= nprocs), "Processor grid size exceeds available processors!"
 
         layout = _MatrixCOPALayout(circuits, self.model, dataset, natoms,
-                                   na, npp, param_dimensions, param_blk_sizes, resource_alloc, verbosity)
+                                   na, npp, param_dimensions, param_blk_sizes, resource_alloc, verbosity, 
+                                   layout_creation_circuit_cache=layout_creation_circuit_cache)
 
         if mem_limit is not None:
             loc_nparams1 = num_params / npp[0] if len(npp) > 0 else 0
diff --git a/pygsti/layouts/matrixlayout.py b/pygsti/layouts/matrixlayout.py
index 4e6cf9266..0b6a86116 100644
--- a/pygsti/layouts/matrixlayout.py
+++ b/pygsti/layouts/matrixlayout.py
@@ -68,7 +68,7 @@ class _MatrixCOPALayoutAtom(_DistributableAtom):
     """
 
     def __init__(self, unique_complete_circuits, unique_nospam_circuits, circuits_by_unique_nospam_circuits,
-                 ds_circuits, group, helpful_scratch, model, dataset):
+                 ds_circuits, group, helpful_scratch, model, dataset=None, expanded_and_separated_circuit_cache=None):
 
         #Note: group gives unique_nospam_circuits indices, which circuits_by_unique_nospam_circuits
         # turns into "unique complete circuit" indices, which the layout via it's to_unique can map
@@ -78,10 +78,15 @@ def add_expanded_circuits(indices, add_to_this_dict):
             for i in indices:
                 nospam_c = unique_nospam_circuits[i]
                 for unique_i in circuits_by_unique_nospam_circuits[nospam_c]:  # "unique" circuits: add SPAM to nospam_c
-                    observed_outcomes = None if (dataset is None) else dataset[ds_circuits[unique_i]].unique_outcomes
-                    expc_outcomes = unique_complete_circuits[unique_i].expand_instruments_and_separate_povm(
-                        model, observed_outcomes)
-                    #Note: unique_complete_circuits may have duplicates (they're only unique *pre*-completion)
+                    if expanded_and_separated_circuit_cache is None:
+                        observed_outcomes = None if (dataset is None) else dataset[ds_circuits[unique_i]].unique_outcomes
+                        expc_outcomes = model.expand_instruments_and_separate_povm(unique_complete_circuits[unique_i], observed_outcomes)
+                        #Note: unique_complete_circuits may have duplicates (they're only unique *pre*-completion)
+                    else:
+                        expc_outcomes = expanded_and_separated_circuit_cache.get(unique_complete_circuits[unique_i], None)
+                        if expc_outcomes is None: #fall back on original non-cache behavior.
+                            observed_outcomes = None if (dataset is None) else dataset[ds_circuits[unique_i]].unique_outcomes
+                            expc_outcomes = model.expand_instruments_and_separate_povm(unique_complete_circuits[unique_i], observed_outcomes)
 
                     for sep_povm_c, outcomes in expc_outcomes.items():  # for each expanded cir from unique_i-th circuit
                         prep_lbl = sep_povm_c.circuit_without_povm[0]
@@ -271,11 +276,16 @@ class MatrixCOPALayout(_DistributableCOPALayout):
     verbosity : int or VerbosityPrinter
         Determines how much output to send to stdout.  0 means no output, higher
         integers mean more output.
+
+    layout_creation_circuit_cache : dict, optional (default None)
+        A precomputed dictionary serving as a cache for completed
+        circuits. I.e. circuits with prep labels and POVM labels appended.
     """
 
     def __init__(self, circuits, model, dataset=None, num_sub_trees=None, num_tree_processors=1,
                  num_param_dimension_processors=(), param_dimensions=(),
-                 param_dimension_blk_sizes=(), resource_alloc=None, verbosity=0):
+                 param_dimension_blk_sizes=(), resource_alloc=None, verbosity=0, 
+                 layout_creation_circuit_cache = None):
 
         #OUTDATED: TODO - revise this:
         # 1. pre-process => get complete circuits => spam-tuples list for each no-spam circuit (no expanding yet)
@@ -290,17 +300,48 @@ def __init__(self, circuits, model, dataset=None, num_sub_trees=None, num_tree_p
         unique_circuits, to_unique = self._compute_unique_circuits(circuits)
         aliases = circuits.op_label_aliases if isinstance(circuits, _CircuitList) else None
         ds_circuits = _lt.apply_aliases_to_circuits(unique_circuits, aliases)
-        unique_complete_circuits = [model.complete_circuit(c) for c in unique_circuits]
+
+        #extract subcaches from layout_creation_circuit_cache:
+        if layout_creation_circuit_cache is not None:
+            completed_circuit_cache = layout_creation_circuit_cache.get('completed_circuits', None)
+            split_circuit_cache = layout_creation_circuit_cache.get('split_circuits', None)
+            expanded_and_separated_circuits_cache = layout_creation_circuit_cache.get('expanded_and_separated_circuits', None)
+        else:
+            completed_circuit_cache = None
+            split_circuit_cache = None
+            expanded_and_separated_circuits_cache = None
+        
+        if completed_circuit_cache is None:
+            unique_complete_circuits = [model.complete_circuit(c) for c in unique_circuits]
+        else:
+            unique_complete_circuits = []
+            for c in unique_circuits:
+                comp_ckt = completed_circuit_cache.get(c, None)
+                if completed_circuit_cache is not None:
+                    unique_complete_circuits.append(comp_ckt)
+                else:
+                    unique_complete_circuits.append(model.complete_circuit(c))
         #Note: "unique" means a unique circuit *before* circuit-completion, so there could be duplicate
         # "unique circuits" after completion, e.g. "rho0Gx" and "Gx" could both complete to "rho0GxMdefault_0".
 
         circuits_by_unique_nospam_circuits = _collections.OrderedDict()
-        for i, c in enumerate(unique_complete_circuits):
-            _, nospam_c, _ = model.split_circuit(c)
-            if nospam_c in circuits_by_unique_nospam_circuits:
-                circuits_by_unique_nospam_circuits[nospam_c].append(i)
-            else:
-                circuits_by_unique_nospam_circuits[nospam_c] = [i]
+        if completed_circuit_cache is None:
+            for i, c in enumerate(unique_complete_circuits):
+                _, nospam_c, _ = model.split_circuit(c)
+                if nospam_c in circuits_by_unique_nospam_circuits:
+                    circuits_by_unique_nospam_circuits[nospam_c].append(i)
+                else:
+                    circuits_by_unique_nospam_circuits[nospam_c] = [i]
+        else:
+            for i, c in enumerate(unique_complete_circuits):
+                _, nospam_c, _ = split_circuit_cache.get(c, None)
+                if nospam_c is None:
+                    _, nospam_c, _ = model.split_circuit(c)
+                if nospam_c in circuits_by_unique_nospam_circuits:
+                    circuits_by_unique_nospam_circuits[nospam_c].append(i)
+                else:
+                    circuits_by_unique_nospam_circuits[nospam_c] = [i]
+
         unique_nospam_circuits = list(circuits_by_unique_nospam_circuits.keys())
 
         # Split circuits into groups that will make good subtrees (all procs do this)
@@ -315,32 +356,45 @@ def __init__(self, circuits, model, dataset=None, num_sub_trees=None, num_tree_p
             helpful_scratch = [set()]
         # (elements of `groups` contain indices into `unique_nospam_circuits`)
 
-        # Divide `groups` into num_tree_processors roughly equal sets (each containing
-        # potentially multiple groups)
-        #my_group_indices, group_owners, grp_subcomm = self._distribute(num_tree_processors, len(groups),
-        #                                                                resource_alloc, verbosity)
-        #my_group_indices = set(my_group_indices)
-
-        #my_atoms = []
-        #elindex_outcome_tuples = _collections.OrderedDict([
-        #    (orig_i, list()) for orig_i in range(len(unique_circuits))])
-        #
-        #offset = 0
-        #for i, (group, helpful_scratch_group) in enumerate(zip(groups, helpful_scratch)):
-        #    if i not in my_group_indices: continue
-        #    my_atoms.append(_MatrixCOPALayoutAtom(unique_complete_circuits, unique_nospam_circuits,
-        #                                       circuits_by_unique_nospam_circuits, ds_circuits,
-        #                                       group, helpful_scratch_group, model, dataset, offset,
-        #                                       elindex_outcome_tuples))
-        #    offset += my_atoms[-1].num_elements
-
         def _create_atom(args):
             group, helpful_scratch_group = args
             return _MatrixCOPALayoutAtom(unique_complete_circuits, unique_nospam_circuits,
                                          circuits_by_unique_nospam_circuits, ds_circuits,
-                                         group, helpful_scratch_group, model, dataset)
+                                         group, helpful_scratch_group, model, dataset,
+                                         expanded_and_separated_circuits_cache)
 
         super().__init__(circuits, unique_circuits, to_unique, unique_complete_circuits,
                          _create_atom, list(zip(groups, helpful_scratch)), num_tree_processors,
                          num_param_dimension_processors, param_dimensions,
                          param_dimension_blk_sizes, resource_alloc, verbosity)
+        
+def create_matrix_copa_layout_circuit_cache(circuits, model, dataset=None):
+    """
+    Helper function for pre-computing/pre-processing circuits structures
+    used in matrix layout creation.
+    """
+    cache = dict()
+    completed_circuits = {ckt: model.complete_circuit(ckt) for ckt in circuits}
+    cache['completed_circuits'] = completed_circuits
+    split_circuits = {ckt: model.split_circuit(ckt) for ckt in completed_circuits.values()}
+    cache['split_circuits'] = split_circuits
+
+    expanded_circuit_cache = dict()
+    #There is some potential aliasing that happens in the init that I am not
+    #doing here, but I think 90+% of the time this ought to be fine.
+    if dataset is not None:
+        for ckt in completed_circuits.values():
+            ds_row = dataset.get(ckt, None)
+            if ds_row is not None:
+                expanded_circuit_cache[ckt] = model.expand_instruments_and_separate_povm(ckt, ds_row.unique_outcomes)
+    else:
+        expanded_circuit_cache = {ckt: model.expand_instruments_and_separate_povm(ckt, None) 
+                                  for ckt in completed_circuits.values()}
+
+    cache['expanded_and_separated_circuits'] = expanded_circuit_cache
+    
+    return cache
+
+
+
+
diff --git a/pygsti/models/model.py b/pygsti/models/model.py
index dbc799a29..cb688fdcb 100644
--- a/pygsti/models/model.py
+++ b/pygsti/models/model.py
@@ -32,6 +32,7 @@
 from pygsti.baseobjs.resourceallocation import ResourceAllocation as _ResourceAllocation
 from pygsti.tools import slicetools as _slct
 from pygsti.tools import matrixtools as _mt
+from pygsti.circuits import Circuit as _Circuit, SeparatePOVMCircuit as _SeparatePOVMCircuit
 
 MEMLIMIT_FOR_NONGAUGE_PARAMS = None
 
@@ -1234,7 +1235,6 @@ def complete_circuit(self, circuit):
         if len(circuit) == 0 or not self._is_primitive_prep_layer_lbl(circuit[0]):
             prep_lbl_to_prepend = self._default_primitive_prep_layer_lbl()
             if prep_lbl_to_prepend is None:
-                #raise ValueError(f"Missing state prep in {circuit.str} and there's no default!")
                 raise ValueError("Missing state prep in %s and there's no default!" % circuit.str)
 
         if len(circuit) == 0 or not self._is_primitive_povm_layer_lbl(circuit[-1]):
@@ -1242,19 +1242,113 @@ def complete_circuit(self, circuit):
             povm_lbl_to_append = self._default_primitive_povm_layer_lbl(sslbls)
 
             if povm_lbl_to_append is None:
-                #raise ValueError(f"Missing POVM in {circuit.str} and there's no default!")
                 raise ValueError("Missing POVM in %s and there's no default!" % circuit.str)
 
         if prep_lbl_to_prepend or povm_lbl_to_append:
-            #SLOW way:
-            #circuit = circuit.copy(editable=True)
-            #if prep_lbl_to_prepend: circuit.insert_layer_inplace(prep_lbl_to_prepend, 0)
-            #if povm_lbl_to_append: circuit.insert_layer_inplace(povm_lbl_to_append, len(circuit))
-            #circuit.done_editing()
             if prep_lbl_to_prepend: circuit = (prep_lbl_to_prepend,) + circuit
             if povm_lbl_to_append: circuit = circuit + (povm_lbl_to_append,)
 
         return circuit
+    
+    def expand_instruments_and_separate_povm(self, circuit, observed_outcomes=None):
+        """
+        Creates a dictionary of :class:`SeparatePOVMCircuit` objects from expanding the instruments of this circuit.
+
+        Each key of the returned dictionary replaces the instruments in this circuit with a selection
+        of their members.  (The size of the resulting dictionary is the product of the sizes of
+        each instrument appearing in this circuit when `observed_outcomes is None`).  Keys are stored
+        as :class:`SeparatePOVMCircuit` objects so it's easy to keep track of which POVM outcomes (effects)
+        correspond to observed data.  This function is, for the most part, used internally to process
+        a circuit before computing its outcome probabilities.
+
+        Parameters
+        ----------
+        model : Model
+            The model used to provide necessary details regarding the expansion, including:
+
+            - default SPAM layers
+            - definitions of instrument-containing layers
+            - expansions of individual instruments and POVMs
+
+        Returns
+        -------
+        OrderedDict
+            A dict whose keys are :class:`SeparatePOVMCircuit` objects and whose
+            values are tuples of the outcome labels corresponding to this circuit,
+            one per POVM effect held in the key.
+        """
+        complete_circuit = self.complete_circuit(circuit)
+        expanded_circuit_outcomes = _collections.OrderedDict()
+        povm_lbl = complete_circuit[-1]  # "complete" circuits always end with a POVM label
+        circuit_without_povm = complete_circuit[0:len(complete_circuit) - 1]
+
+        def create_tree(lst):
+            subs = _collections.OrderedDict()
+            for el in lst:
+                if len(el) > 0:
+                    if el[0] not in subs: subs[el[0]] = []
+                    subs[el[0]].append(el[1:])
+            return _collections.OrderedDict([(k, create_tree(sub_lst)) for k, sub_lst in subs.items()])
+
+        def add_expanded_circuit_outcomes(circuit, running_outcomes, ootree, start):
+            """
+            """
+            cir = circuit if start == 0 else circuit[start:]  # for performance, avoid uneeded slicing
+            for k, layer_label in enumerate(cir, start=start):
+                components = layer_label.components
+                #instrument_inds = _np.nonzero([model._is_primitive_instrument_layer_lbl(component)
+                #                               for component in components])[0]  # SLOWER than statement below
+                instrument_inds = _np.array([i for i, component in enumerate(components)
+                                             if self._is_primitive_instrument_layer_lbl(component)])
+                if instrument_inds.size > 0:
+                    # This layer contains at least one instrument => recurse with instrument(s) replaced with
+                    #  all combinations of their members.
+                    component_lookup = {i: comp for i, comp in enumerate(components)}
+                    instrument_members = [self._member_labels_for_instrument(components[i])
+                                          for i in instrument_inds]  # also components of outcome labels
+                    for selected_instrmt_members in _itertools.product(*instrument_members):
+                        expanded_layer_lbl = component_lookup.copy()
+                        expanded_layer_lbl.update({i: components[i] + "_" + sel
+                                                   for i, sel in zip(instrument_inds, selected_instrmt_members)})
+                        expanded_layer_lbl = _Label([expanded_layer_lbl[i] for i in range(len(components))])
+
+                        if ootree is not None:
+                            new_ootree = ootree
+                            for sel in selected_instrmt_members:
+                                new_ootree = new_ootree.get(sel, {})
+                            if len(new_ootree) == 0: continue  # no observed outcomes along this outcome-tree path
+                        else:
+                            new_ootree = None
+
+                        add_expanded_circuit_outcomes(circuit[0:k] + _Circuit((expanded_layer_lbl,)) + circuit[k + 1:],
+                                                      running_outcomes + selected_instrmt_members, new_ootree, k + 1)
+                    break
+
+            else:  # no more instruments to process: `cir` contains no instruments => add an expanded circuit
+                assert(circuit not in expanded_circuit_outcomes)  # shouldn't be possible to generate duplicates...
+                elabels = self._effect_labels_for_povm(povm_lbl) if (observed_outcomes is None) \
+                    else tuple(ootree.keys())
+                outcomes = tuple((running_outcomes + (elabel,) for elabel in elabels))
+                expanded_circuit_outcomes[_SeparatePOVMCircuit(circuit, povm_lbl, elabels)] = outcomes
+
+        ootree = create_tree(observed_outcomes) if observed_outcomes is not None else None  # tree of observed outcomes
+        # e.g. [('0','00'), ('0','01'), ('1','10')] ==> {'0': {'00': {}, '01': {}}, '1': {'10': {}}}
+
+        if self._has_instruments():
+            add_expanded_circuit_outcomes(circuit_without_povm, (), ootree, start=0)
+        else:
+            # It may be helpful to cache the set of elabels for a POVM (maybe within the model?) because
+            # currently the call to _effect_labels_for_povm may be a bottleneck.  It's needed, even when we have
+            # observed outcomes, because there may be some observed outcomes that aren't modeled (e.g. leakage states)
+            if observed_outcomes is None:
+                elabels = self._effect_labels_for_povm(povm_lbl)
+            else:
+                possible_lbls = set(self._effect_labels_for_povm(povm_lbl))
+                elabels = tuple([oo for oo in ootree.keys() if oo in possible_lbls])
+            outcomes = tuple(((elabel,) for elabel in elabels))
+            expanded_circuit_outcomes[_SeparatePOVMCircuit(circuit_without_povm, povm_lbl, elabels)] = outcomes
+
+        return expanded_circuit_outcomes
 
     # ---- Operation container interface ----
     # These functions allow oracle access to whether a label of a given type

From 9bc47bc35347dc538bc6147a08ad0f656261014e Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Wed, 29 May 2024 17:15:06 -0600
Subject: [PATCH 02/30] Add caching for spam-free circuit expansion

Cache the expanded SPAM-free circuits to reduce recomputing things unnecessarily.
---
 pygsti/algorithms/core.py      |  2 +-
 pygsti/layouts/matrixlayout.py | 60 ++++++++++++++++++++++++----------
 2 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/pygsti/algorithms/core.py b/pygsti/algorithms/core.py
index a2c6e0038..b4f67c286 100644
--- a/pygsti/algorithms/core.py
+++ b/pygsti/algorithms/core.py
@@ -890,9 +890,9 @@ def _max_array_types(artypes_list):  # get the maximum number of each array type
     #The ModelDatasetCircuitsStore
     printer.log('Precomputing CircuitOutcomeProbabilityArray layouts for each iteration.', 2)
     precomp_layouts = []
+
     #pre-compute a dictionary caching completed circuits for layout construction performance.
     unique_circuits = {ckt for circuit_list in circuit_lists for ckt in circuit_list}
-    print(f'{len(unique_circuits)=}')
     if isinstance(mdl.sim, _fwdsims.MatrixForwardSimulator):
         precomp_layout_circuit_cache = _layouts.matrixlayout.create_matrix_copa_layout_circuit_cache(unique_circuits, mdl)
     else:
diff --git a/pygsti/layouts/matrixlayout.py b/pygsti/layouts/matrixlayout.py
index c9986e477..4438ff7c8 100644
--- a/pygsti/layouts/matrixlayout.py
+++ b/pygsti/layouts/matrixlayout.py
@@ -68,7 +68,8 @@ class _MatrixCOPALayoutAtom(_DistributableAtom):
     """
 
     def __init__(self, unique_complete_circuits, unique_nospam_circuits, circuits_by_unique_nospam_circuits,
-                 ds_circuits, group, helpful_scratch, model, dataset=None, expanded_and_separated_circuit_cache=None):
+                 ds_circuits, group, helpful_scratch, model, dataset=None, expanded_and_separated_circuit_cache=None,
+                 double_expanded_nospam_circuits_cache = None):
 
         #Note: group gives unique_nospam_circuits indices, which circuits_by_unique_nospam_circuits
         # turns into "unique complete circuit" indices, which the layout via it's to_unique can map
@@ -119,20 +120,34 @@ def add_expanded_circuits(indices, add_to_this_dict):
         expanded_nospam_circuits = _collections.OrderedDict(
             [(i, cir) for i, cir in enumerate(expanded_nospam_circuit_outcomes.keys())])
 
+        #print(f'{expanded_nospam_circuits=}')
+
         # add suggested scratch to the "final" elements as far as the tree creation is concerned
         # - this allows these scratch element to help balance the tree.
-        expanded_nospam_circuit_outcomes_plus_scratch = expanded_nospam_circuit_outcomes.copy()
-        add_expanded_circuits(helpful_scratch, expanded_nospam_circuit_outcomes_plus_scratch)
-        expanded_nospam_circuits_plus_scratch = _collections.OrderedDict(
-            [(i, cir) for i, cir in enumerate(expanded_nospam_circuit_outcomes_plus_scratch.keys())])
+        if helpful_scratch:
+            expanded_nospam_circuit_outcomes_plus_scratch = expanded_nospam_circuit_outcomes.copy()
+            add_expanded_circuits(helpful_scratch, expanded_nospam_circuit_outcomes_plus_scratch)
+            expanded_nospam_circuits_plus_scratch = _collections.OrderedDict(
+                [(i, cir) for i, cir in enumerate(expanded_nospam_circuit_outcomes_plus_scratch.keys())])
+        else:
+            expanded_nospam_circuits_plus_scratch = expanded_nospam_circuits.copy()
 
         double_expanded_nospam_circuits_plus_scratch = _collections.OrderedDict()
-        for i, cir in expanded_nospam_circuits_plus_scratch.items():
-            cir = cir.copy(editable=True)
-            cir.expand_subcircuits()  # expand sub-circuits for a more efficient tree
-            cir.done_editing()
-            double_expanded_nospam_circuits_plus_scratch[i] = cir
+        if double_expanded_nospam_circuits_cache is not None:
+            for i, cir in expanded_nospam_circuits_plus_scratch.items():
+                # expand sub-circuits for a more efficient tree
+                double_expanded_ckt = double_expanded_nospam_circuits_cache.get(cir, None)
+                if double_expanded_ckt is None: #Fall back to standard behavior and do expansion.
+                    double_expanded_nospam_circuits_plus_scratch[i] = cir.expand_subcircuits()
+                else:
+                    double_expanded_nospam_circuits_plus_scratch[i] = double_expanded_ckt
+        else:
+            for i, cir in expanded_nospam_circuits_plus_scratch.items():
+                # expand sub-circuits for a more efficient tree
+                double_expanded_nospam_circuits_plus_scratch[i] = cir.expand_subcircuits()
 
+        #print(f'{double_expanded_nospam_circuits_plus_scratch=}')
+        #print(f'{double_expanded_nospam_circuits_plus_scratch == expanded_nospam_circuits}')
         self.tree = _EvalTree.create(double_expanded_nospam_circuits_plus_scratch)
         #print("Atom tree: %d circuits => tree of size %d" % (len(expanded_nospam_circuits), len(self.tree)))
 
@@ -306,10 +321,12 @@ def __init__(self, circuits, model, dataset=None, num_sub_trees=None, num_tree_p
             completed_circuit_cache = layout_creation_circuit_cache.get('completed_circuits', None)
             split_circuit_cache = layout_creation_circuit_cache.get('split_circuits', None)
             expanded_and_separated_circuits_cache = layout_creation_circuit_cache.get('expanded_and_separated_circuits', None)
+            expanded_subcircuits_no_spam_cache = layout_creation_circuit_cache.get('expanded_subcircuits_no_spam', None)
         else:
             completed_circuit_cache = None
             split_circuit_cache = None
             expanded_and_separated_circuits_cache = None
+            expanded_subcircuits_no_spam_cache = None
         
         if completed_circuit_cache is None:
             unique_complete_circuits, split_unique_circuits = model.complete_circuits(unique_circuits, return_split=True)
@@ -360,7 +377,8 @@ def _create_atom(args):
             return _MatrixCOPALayoutAtom(unique_complete_circuits, unique_nospam_circuits,
                                          circuits_by_unique_nospam_circuits, ds_circuits,
                                          group, helpful_scratch_group, model, dataset,
-                                         expanded_and_separated_circuits_cache)
+                                         expanded_and_separated_circuits_cache,
+                                         expanded_subcircuits_no_spam_cache)
 
         super().__init__(circuits, unique_circuits, to_unique, unique_complete_circuits,
                          _create_atom, list(zip(groups, helpful_scratch)), num_tree_processors,
@@ -373,10 +391,10 @@ def create_matrix_copa_layout_circuit_cache(circuits, model, dataset=None):
     used in matrix layout creation.
     """
     cache = dict()
-    completed_circuits = {ckt: model.complete_circuit(ckt) for ckt in circuits}
-    cache['completed_circuits'] = completed_circuits
-    split_circuits = {ckt: model.split_circuit(ckt) for ckt in completed_circuits.values()}
-    cache['split_circuits'] = split_circuits
+    completed_circuits, split_circuits = model.complete_circuits(circuits, return_split=True)
+
+    cache['completed_circuits'] = {ckt: comp_ckt for ckt, comp_ckt in zip(circuits, completed_circuits)}
+    cache['split_circuits'] = {ckt: split_ckt for ckt, split_ckt in zip(cache['completed_circuits'].values(), split_circuits)}
 
     expanded_circuit_cache = dict()
     #There is some potential aliasing that happens in the init that I am not
@@ -388,10 +406,18 @@ def create_matrix_copa_layout_circuit_cache(circuits, model, dataset=None):
                 expanded_circuit_cache[ckt] = model.expand_instruments_and_separate_povm(ckt, ds_row.unique_outcomes)
     else:
         expanded_circuit_cache = {ckt: model.expand_instruments_and_separate_povm(ckt, None) 
-                                  for ckt in completed_circuits.values()}
+                                  for ckt in cache['completed_circuits'].values()}
 
     cache['expanded_and_separated_circuits'] = expanded_circuit_cache
-    
+
+    expanded_subcircuits_no_spam_cache = dict()
+    for expc_outcomes in cache['expanded_and_separated_circuits'].values():
+        for sep_povm_c, _ in expc_outcomes.items():  # for each expanded cir from unique_i-th circuit
+            exp_nospam_c = sep_povm_c.circuit_without_povm[1:] 
+            expanded_subcircuits_no_spam_cache[exp_nospam_c] = exp_nospam_c.expand_subcircuits()
+
+    cache['expanded_subcircuits_no_spam'] = expanded_subcircuits_no_spam_cache
+
     return cache
 
 

From bbf29ab5012e3823c5bf32ab65e08b586e36b2fc Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Wed, 29 May 2024 23:45:38 -0600
Subject: [PATCH 03/30] Improve SeparatePOVMCircuit internals

This updates the implementation of the SeparatePOVMCircuit containter class. The most important change is adding an attribute for the full_effect_labels that avoids uneeded reconstruction. To add protection then, to ensure that this is kept in sync with everything else, the povm_label and effect_labels attributes (which feed into full_effect_labels) have been promoted to properties with setters that ensure the full_effect_labels are kept synced.
---
 pygsti/circuits/circuit.py | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/pygsti/circuits/circuit.py b/pygsti/circuits/circuit.py
index 863b66949..0d314ff11 100644
--- a/pygsti/circuits/circuit.py
+++ b/pygsti/circuits/circuit.py
@@ -4661,12 +4661,35 @@ class SeparatePOVMCircuit(object):
     """
     def __init__(self, circuit_without_povm, povm_label, effect_labels):
         self.circuit_without_povm = circuit_without_povm
-        self.povm_label = povm_label
-        self.effect_labels = effect_labels
+        self._povm_label = povm_label
+        self._effect_labels = effect_labels
+        self._full_effect_labels = tuple([(self.povm_label + "_" + el) for el in self._effect_labels])
 
     @property
     def full_effect_labels(self):
-        return [(self.povm_label + "_" + el) for el in self.effect_labels]
+        return self._full_effect_labels
+    
+    @property 
+    def effect_labels(self):
+        return self._effect_labels
+    
+    @property 
+    def povm_label(self):
+        return self._povm_label
+
+    @effect_labels.setter
+    def effect_labels(self, value):
+        self._effect_labels = value
+        self._full_effect_labels = tuple([(self._povm_label + "_" + el) for el in value])
+
+    @povm_label.setter
+    def povm_label(self, value):
+        self._povm_label = value
+        self._full_effect_labels = tuple([(value + "_" + el) for el in self._effect_labels])
+    
+    @full_effect_labels.setter
+    def full_effect_labels(self, value):
+        self._full_effect_labels = value
 
     def __len__(self):
         return len(self.circuit_without_povm)  # don't count POVM in length, so slicing works as expected
@@ -4681,4 +4704,3 @@ def __str__(self):
         return "SeparatePOVM(" + self.circuit_without_povm.str + "," \
             + str(self.povm_label) + "," + str(self.effect_labels) + ")"
 
-    #LATER: add a method for getting the "POVM_effect" labels?

From a79ac039e1d073472edfd2cbba50884da5f544d8 Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Wed, 29 May 2024 23:47:58 -0600
Subject: [PATCH 04/30] New method for doing bulk intrument/effect expansion

Adds a new method to OpModel that allows for doing instrument expansion and povm expansion in bulk, speeding things up be avoiding recomputation of shared quantities. Also adds a pipeline for re-using completed or split circuits (as produced by the related OpModel methods) for more efficient re-use of done work.
---
 pygsti/layouts/matrixlayout.py |  16 ++--
 pygsti/models/model.py         | 151 ++++++++++++++++++++++++++++++++-
 2 files changed, 159 insertions(+), 8 deletions(-)

diff --git a/pygsti/layouts/matrixlayout.py b/pygsti/layouts/matrixlayout.py
index 4438ff7c8..40bcc82a9 100644
--- a/pygsti/layouts/matrixlayout.py
+++ b/pygsti/layouts/matrixlayout.py
@@ -396,18 +396,22 @@ def create_matrix_copa_layout_circuit_cache(circuits, model, dataset=None):
     cache['completed_circuits'] = {ckt: comp_ckt for ckt, comp_ckt in zip(circuits, completed_circuits)}
     cache['split_circuits'] = {ckt: split_ckt for ckt, split_ckt in zip(cache['completed_circuits'].values(), split_circuits)}
 
-    expanded_circuit_cache = dict()
     #There is some potential aliasing that happens in the init that I am not
     #doing here, but I think 90+% of the time this ought to be fine.
     if dataset is not None:
+        unique_outcomes_list = []
         for ckt in completed_circuits.values():
             ds_row = dataset.get(ckt, None)
-            if ds_row is not None:
-                expanded_circuit_cache[ckt] = model.expand_instruments_and_separate_povm(ckt, ds_row.unique_outcomes)
+            unique_outcomes_list.append(ds_row.unique_outcomes if ds_row is not None else None)
     else:
-        expanded_circuit_cache = {ckt: model.expand_instruments_and_separate_povm(ckt, None) 
-                                  for ckt in cache['completed_circuits'].values()}
-
+        unique_outcomes_list = [None]*len(circuits)
+
+    expanded_circuit_outcome_list = model.bulk_expand_instruments_and_separate_povm(circuits, 
+                                                                                    observed_outcomes_list = unique_outcomes_list, 
+                                                                                    split_circuits = split_circuits)
+    
+    expanded_circuit_cache = {ckt: expanded_ckt for ckt,expanded_ckt in zip(cache['completed_circuits'].values(), expanded_circuit_outcome_list)}
+                
     cache['expanded_and_separated_circuits'] = expanded_circuit_cache
 
     expanded_subcircuits_no_spam_cache = dict()
diff --git a/pygsti/models/model.py b/pygsti/models/model.py
index b60e558a9..641bb8d05 100644
--- a/pygsti/models/model.py
+++ b/pygsti/models/model.py
@@ -1391,13 +1391,16 @@ def expand_instruments_and_separate_povm(self, circuit, observed_outcomes=None):
 
         Parameters
         ----------
-        model : Model
-            The model used to provide necessary details regarding the expansion, including:
+        circuit : Circuit
+            The circuit to expand, using necessary details regarding the expansion from this model, including:
 
             - default SPAM layers
             - definitions of instrument-containing layers
             - expansions of individual instruments and POVMs
 
+        observed_outcomes : iterable, optional (default None)
+            If specified an iterable over the subset of outcomes empirically observed for this circuit.
+
         Returns
         -------
         OrderedDict
@@ -1477,6 +1480,150 @@ def add_expanded_circuit_outcomes(circuit, running_outcomes, ootree, start):
             expanded_circuit_outcomes[_SeparatePOVMCircuit(circuit_without_povm, povm_lbl, elabels)] = outcomes
 
         return expanded_circuit_outcomes
+    
+    def bulk_expand_instruments_and_separate_povm(self, circuits, observed_outcomes_list=None, split_circuits = None, 
+                                                  completed_circuits = None):
+        """
+        Creates a list of dictionaries mapping from :class:`SeparatePOVMCircuit` 
+        objects from expanding the instruments of this circuit.
+
+        Each key of the returned dictionary replaces the instruments in this circuit with a selection
+        of their members.  (The size of the resulting dictionary is the product of the sizes of
+        each instrument appearing in this circuit when `observed_outcomes is None`).  Keys are stored
+        as :class:`SeparatePOVMCircuit` objects so it's easy to keep track of which POVM outcomes (effects)
+        correspond to observed data.  This function is, for the most part, used internally to process
+        a circuit before computing its outcome probabilities.
+
+        This function works similarly to expand_instruments_and_separate_povm, except it operates on
+        an entire list of circuits at once, and provides additional kwargs to accelerate computation.
+
+        Parameters
+        ----------
+        circuit : Circuit
+            The circuit to expand, using necessary details regarding the expansion from this model, including:
+
+            - default SPAM layers
+            - definitions of instrument-containing layers
+            - expansions of individual instruments and POVMs
+
+        observed_outcomes_list : list of iterables, optional (default None)
+            If specified a list of iterables over the subset of outcomes empirically observed for each circuit.
+        
+        split_circuits : list of tuples, optional (default None)
+            If specified, this is a list of tuples for each circuit corresponding to the splitting of
+            the circuit into the prep label, spam-free circuit, and povm label. This is the same format
+            produced by the :meth:split_circuit(s) method, and so this option can allow for accelerating this
+            method when that has previously been run. When using this kwarg only one of this or 
+            the `complete_circuits` kwargs should be used.
+
+        complete_circuits : list of Circuits, optional (default None)
+            If specified, this is a list of compeleted circuits with prep and povm labels included.
+            This is the format produced by the :meth:complete_circuit(s) method, and this can
+            be used to accelerate this method call when that has been previously run. Should not
+            be used in conjunction with `split_circuits`.
+
+        Returns
+        -------
+        OrderedDict
+            A dict whose keys are :class:`SeparatePOVMCircuit` objects and whose
+            values are tuples of the outcome labels corresponding to this circuit,
+            one per POVM effect held in the key.
+        """
+
+        assert(not (completed_circuits is not None and split_circuits is not None)), "Inclusion of non-trivial values"\
+              +" for both `complete_circuits` and `split_circuits` is not supported. Please use only one of these two arguments."
+
+        if split_circuits is not None:
+            povm_lbls = [split_ckt[2] for split_ckt in split_circuits]
+            circuits_without_povm = [(split_ckt[0],) + split_ckt[1] for split_ckt in split_circuits]
+        elif completed_circuits is not None:
+            povm_lbls = [comp_ckt[-1] for comp_ckt in completed_circuits]
+            circuits_without_povm = [comp_ckt[:-1] for comp_ckt in completed_circuits]
+        else:
+            completed_circuits = self.complete_circuits(circuits)
+            povm_lbls = [comp_ckt[-1] for comp_ckt in completed_circuits]
+            circuits_without_povm = [comp_ckt[:-1] for comp_ckt in completed_circuits]
+        
+        if observed_outcomes_list is None:
+            observed_outcomes_list = [None]*len(circuits)
+
+
+        expanded_circuit_outcomes_list = [_collections.OrderedDict() for _ in range(len(circuits))]
+
+        def create_tree(lst):
+            subs = _collections.OrderedDict()
+            for el in lst:
+                if len(el) > 0:
+                    if el[0] not in subs: subs[el[0]] = []
+                    subs[el[0]].append(el[1:])
+            return _collections.OrderedDict([(k, create_tree(sub_lst)) for k, sub_lst in subs.items()])
+
+        def add_expanded_circuit_outcomes(circuit, running_outcomes, ootree, start):
+            """
+            """
+            cir = circuit if start == 0 else circuit[start:]  # for performance, avoid uneeded slicing
+            for k, layer_label in enumerate(cir, start=start):
+                components = layer_label.components
+                #instrument_inds = _np.nonzero([model._is_primitive_instrument_layer_lbl(component)
+                #                               for component in components])[0]  # SLOWER than statement below
+                instrument_inds = _np.array([i for i, component in enumerate(components)
+                                             if self._is_primitive_instrument_layer_lbl(component)])
+                if instrument_inds.size > 0:
+                    # This layer contains at least one instrument => recurse with instrument(s) replaced with
+                    #  all combinations of their members.
+                    component_lookup = {i: comp for i, comp in enumerate(components)}
+                    instrument_members = [self._member_labels_for_instrument(components[i])
+                                          for i in instrument_inds]  # also components of outcome labels
+                    for selected_instrmt_members in _itertools.product(*instrument_members):
+                        expanded_layer_lbl = component_lookup.copy()
+                        expanded_layer_lbl.update({i: components[i] + "_" + sel
+                                                   for i, sel in zip(instrument_inds, selected_instrmt_members)})
+                        expanded_layer_lbl = _Label([expanded_layer_lbl[i] for i in range(len(components))])
+
+                        if ootree is not None:
+                            new_ootree = ootree
+                            for sel in selected_instrmt_members:
+                                new_ootree = new_ootree.get(sel, {})
+                            if len(new_ootree) == 0: continue  # no observed outcomes along this outcome-tree path
+                        else:
+                            new_ootree = None
+
+                        add_expanded_circuit_outcomes(circuit[0:k] + _Circuit((expanded_layer_lbl,)) + circuit[k + 1:],
+                                                      running_outcomes + selected_instrmt_members, new_ootree, k + 1)
+                    break
+
+            else:  # no more instruments to process: `cir` contains no instruments => add an expanded circuit
+                assert(circuit not in expanded_circuit_outcomes)  # shouldn't be possible to generate duplicates...
+                elabels = self._effect_labels_for_povm(povm_lbl) if (observed_outcomes is None) \
+                    else tuple(ootree.keys())
+                outcomes = tuple((running_outcomes + (elabel,) for elabel in elabels))
+                expanded_circuit_outcomes[_SeparatePOVMCircuit(circuit, povm_lbl, elabels)] = outcomes
+
+        has_instruments = self._has_instruments()
+        unique_povm_labels = set(povm_lbls)
+        effect_label_dict = {povm_lbl: self._effect_labels_for_povm(povm_lbl) for povm_lbl in unique_povm_labels}
+
+        for povm_lbl, circuit_without_povm, expanded_circuit_outcomes, observed_outcomes in zip(povm_lbls, circuits_without_povm, 
+                                                                                                expanded_circuit_outcomes_list, 
+                                                                                                observed_outcomes_list):
+            ootree = create_tree(observed_outcomes) if observed_outcomes is not None else None  # tree of observed outcomes
+            # e.g. [('0','00'), ('0','01'), ('1','10')] ==> {'0': {'00': {}, '01': {}}, '1': {'10': {}}}
+
+            if has_instruments:
+                add_expanded_circuit_outcomes(circuit_without_povm, (), ootree, start=0)
+            else:
+                # It may be helpful to cache the set of elabels for a POVM (maybe within the model?) because
+                # currently the call to _effect_labels_for_povm may be a bottleneck.  It's needed, even when we have
+                # observed outcomes, because there may be some observed outcomes that aren't modeled (e.g. leakage states)
+                if observed_outcomes is None:
+                    elabels = effect_label_dict[povm_lbl]
+                else:
+                    possible_lbls = set(effect_label_dict[povm_lbl])
+                    elabels = tuple([oo for oo in ootree.keys() if oo in possible_lbls])
+                outcomes = tuple(((elabel,) for elabel in elabels))
+                expanded_circuit_outcomes[_SeparatePOVMCircuit(circuit_without_povm, povm_lbl, elabels)] = outcomes
+
+        return expanded_circuit_outcomes_list
 
     def complete_circuits(self, circuits, prep_lbl_to_prepend=None, povm_lbl_to_append=None, return_split = False):
         """

From d97f7869fdb05d4b1e438132bd0af3545236e59f Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Thu, 30 May 2024 12:33:37 -0600
Subject: [PATCH 05/30] Minor COPA Layout __init__ tweaks

Some minor performance oriented tweaks to the init for COPA layouts.
---
 pygsti/layouts/copalayout.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/pygsti/layouts/copalayout.py b/pygsti/layouts/copalayout.py
index 430fb6734..97b6b021a 100644
--- a/pygsti/layouts/copalayout.py
+++ b/pygsti/layouts/copalayout.py
@@ -190,24 +190,26 @@ def __init__(self, circuits, unique_circuits, to_unique, elindex_outcome_tuples,
         if unique_circuits is None and to_unique is None:
             unique_circuits, to_unique = self._compute_unique_circuits(circuits)
         self._unique_circuits = unique_circuits
-        self._unique_circuit_index = _collections.OrderedDict(
-            [(c, i) for i, c in enumerate(self._unique_circuits)])  # original circuits => unique circuit indices
+        self._unique_circuit_index = {c:i for i, c in enumerate(self._unique_circuits)}  # original circuits => unique circuit indices
         self._to_unique = to_unique  # original indices => unique circuit indices
         self._unique_complete_circuits = unique_complete_circuits  # Note: can be None
         self._param_dimensions = param_dimensions
         self._resource_alloc = _ResourceAllocation.cast(resource_alloc)
 
-        max_element_index = max(_it.chain(*[[ei for ei, _ in pairs] for pairs in elindex_outcome_tuples.values()])) \
-            if len(elindex_outcome_tuples) > 0 else -1  # -1 makes _size = 0 below
-        indices = set(i for tuples in elindex_outcome_tuples.values() for i, o in tuples)
+        indices = [i for tuples in elindex_outcome_tuples.values() for i, _ in tuples]
+        max_element_index = max(indices) if len(elindex_outcome_tuples) > 0 else -1  # -1 makes _size = 0 below
+        indices = set(indices)
+        
+        
         self._size = max_element_index + 1
         assert(len(indices) == self._size), \
             "Inconsistency: %d distinct indices but max index + 1 is %d!" % (len(indices), self._size)
 
-        self._outcomes = _collections.OrderedDict()
-        self._element_indices = _collections.OrderedDict()
+        self._outcomes = dict() #_collections.OrderedDict()
+        self._element_indices = dict() #_collections.OrderedDict()
+        sort_idx_func = lambda x: x[0]
         for i_unique, tuples in elindex_outcome_tuples.items():
-            sorted_tuples = sorted(tuples, key=lambda x: x[0])  # sort by element index
+            sorted_tuples = sorted(tuples, key=sort_idx_func)  # sort by element index
             elindices, outcomes = zip(*sorted_tuples)  # sorted by elindex so we make slices whenever possible
             self._outcomes[i_unique] = tuple(outcomes)
             self._element_indices[i_unique] = _slct.list_to_slice(elindices, array_ok=True)

From 544fb55c9f7103cbd5382037b38e7519143203bb Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Thu, 30 May 2024 13:01:33 -0600
Subject: [PATCH 06/30] Refactor some OrderedDicts into regular ones

Refactor some of the ordered dictionaries in matrix layout creation into regular ones.
---
 pygsti/layouts/copalayout.py   |  4 ++--
 pygsti/layouts/matrixlayout.py | 27 +++++++++------------------
 2 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/pygsti/layouts/copalayout.py b/pygsti/layouts/copalayout.py
index 97b6b021a..bd5020aa8 100644
--- a/pygsti/layouts/copalayout.py
+++ b/pygsti/layouts/copalayout.py
@@ -205,8 +205,8 @@ def __init__(self, circuits, unique_circuits, to_unique, elindex_outcome_tuples,
         assert(len(indices) == self._size), \
             "Inconsistency: %d distinct indices but max index + 1 is %d!" % (len(indices), self._size)
 
-        self._outcomes = dict() #_collections.OrderedDict()
-        self._element_indices = dict() #_collections.OrderedDict()
+        self._outcomes = dict()
+        self._element_indices = dict()
         sort_idx_func = lambda x: x[0]
         for i_unique, tuples in elindex_outcome_tuples.items():
             sorted_tuples = sorted(tuples, key=sort_idx_func)  # sort by element index
diff --git a/pygsti/layouts/matrixlayout.py b/pygsti/layouts/matrixlayout.py
index 40bcc82a9..2825eaa51 100644
--- a/pygsti/layouts/matrixlayout.py
+++ b/pygsti/layouts/matrixlayout.py
@@ -93,15 +93,13 @@ def add_expanded_circuits(indices, add_to_this_dict):
                         prep_lbl = sep_povm_c.circuit_without_povm[0]
                         exp_nospam_c = sep_povm_c.circuit_without_povm[1:]  # sep_povm_c *always* has prep lbl
                         spam_tuples = [(prep_lbl, elabel) for elabel in sep_povm_c.full_effect_labels]
-                        outcome_by_spamtuple = _collections.OrderedDict([(st, outcome)
-                                                                         for st, outcome in zip(spam_tuples, outcomes)])
+                        outcome_by_spamtuple = {st:outcome for st, outcome in zip(spam_tuples, outcomes)}
 
                         #Now add these outcomes to `expanded_nospam_circuit_outcomes` - note that multiple "unique_i"'s
                         # may exist for the same expanded & without-spam circuit (exp_nospam_c) and so we need to
                         # keep track of a list of unique_i indices for each circut and spam tuple below.
                         if exp_nospam_c not in _expanded_nospam_circuit_outcomes:
-                            _expanded_nospam_circuit_outcomes[exp_nospam_c] = _collections.OrderedDict(
-                                [(st, (outcome, [unique_i])) for st, outcome in zip(spam_tuples, outcomes)])
+                            _expanded_nospam_circuit_outcomes[exp_nospam_c] = {st:(outcome, [unique_i]) for st, outcome in zip(spam_tuples, outcomes)}
                         else:
                             for st, outcome in outcome_by_spamtuple.items():
                                 if st in _expanded_nospam_circuit_outcomes[exp_nospam_c]:
@@ -115,24 +113,20 @@ def add_expanded_circuits(indices, add_to_this_dict):
 
         # keys = expanded circuits w/out SPAM layers; values = spamtuple => (outcome, unique_is) dictionary that
         # keeps track of which "unique" circuit indices having each spamtuple / outcome.
-        expanded_nospam_circuit_outcomes = _collections.OrderedDict()
+        expanded_nospam_circuit_outcomes = dict()
         add_expanded_circuits(group, expanded_nospam_circuit_outcomes)
-        expanded_nospam_circuits = _collections.OrderedDict(
-            [(i, cir) for i, cir in enumerate(expanded_nospam_circuit_outcomes.keys())])
-
-        #print(f'{expanded_nospam_circuits=}')
+        expanded_nospam_circuits = {i:cir for i, cir in enumerate(expanded_nospam_circuit_outcomes.keys())}
 
         # add suggested scratch to the "final" elements as far as the tree creation is concerned
         # - this allows these scratch element to help balance the tree.
         if helpful_scratch:
             expanded_nospam_circuit_outcomes_plus_scratch = expanded_nospam_circuit_outcomes.copy()
             add_expanded_circuits(helpful_scratch, expanded_nospam_circuit_outcomes_plus_scratch)
-            expanded_nospam_circuits_plus_scratch = _collections.OrderedDict(
-                [(i, cir) for i, cir in enumerate(expanded_nospam_circuit_outcomes_plus_scratch.keys())])
+            expanded_nospam_circuits_plus_scratch = {i:cir for i, cir in enumerate(expanded_nospam_circuit_outcomes_plus_scratch.keys())}
         else:
             expanded_nospam_circuits_plus_scratch = expanded_nospam_circuits.copy()
 
-        double_expanded_nospam_circuits_plus_scratch = _collections.OrderedDict()
+        double_expanded_nospam_circuits_plus_scratch = dict()
         if double_expanded_nospam_circuits_cache is not None:
             for i, cir in expanded_nospam_circuits_plus_scratch.items():
                 # expand sub-circuits for a more efficient tree
@@ -146,8 +140,6 @@ def add_expanded_circuits(indices, add_to_this_dict):
                 # expand sub-circuits for a more efficient tree
                 double_expanded_nospam_circuits_plus_scratch[i] = cir.expand_subcircuits()
 
-        #print(f'{double_expanded_nospam_circuits_plus_scratch=}')
-        #print(f'{double_expanded_nospam_circuits_plus_scratch == expanded_nospam_circuits}')
         self.tree = _EvalTree.create(double_expanded_nospam_circuits_plus_scratch)
         #print("Atom tree: %d circuits => tree of size %d" % (len(expanded_nospam_circuits), len(self.tree)))
 
@@ -158,7 +150,7 @@ def add_expanded_circuits(indices, add_to_this_dict):
         # quantity plus a spam-tuple. We order the final indices so that all the outcomes corresponding to a
         # given spam-tuple are contiguous.
 
-        tree_indices_by_spamtuple = _collections.OrderedDict()  # "tree" indices index expanded_nospam_circuits
+        tree_indices_by_spamtuple = dict()  # "tree" indices index expanded_nospam_circuits
         for i, c in expanded_nospam_circuits.items():
             for spam_tuple in expanded_nospam_circuit_outcomes[c].keys():
                 if spam_tuple not in tree_indices_by_spamtuple: tree_indices_by_spamtuple[spam_tuple] = []
@@ -167,7 +159,7 @@ def add_expanded_circuits(indices, add_to_this_dict):
         #Assign element indices, starting at `offset`
         # now that we know how many of each spamtuple there are, assign final element indices.
         local_offset = 0
-        self.indices_by_spamtuple = _collections.OrderedDict()  # values are (element_indices, tree_indices) tuples.
+        self.indices_by_spamtuple = dict()  # values are (element_indices, tree_indices) tuples.
         for spam_tuple, tree_indices in tree_indices_by_spamtuple.items():
             self.indices_by_spamtuple[spam_tuple] = (slice(local_offset, local_offset + len(tree_indices)),
                                                      _slct.list_to_slice(tree_indices, array_ok=True))
@@ -177,8 +169,7 @@ def add_expanded_circuits(indices, add_to_this_dict):
         element_slice = None  # slice(offset, offset + local_offset)  # *global* (of parent layout) element-index slice
         num_elements = local_offset
 
-        elindex_outcome_tuples = _collections.OrderedDict([
-            (unique_i, list()) for unique_i in range(len(unique_complete_circuits))])
+        elindex_outcome_tuples = {unique_i: list() for unique_i in range(len(unique_complete_circuits))}
 
         for spam_tuple, (element_indices, tree_indices) in self.indices_by_spamtuple.items():
             for elindex, tree_index in zip(_slct.indices(element_indices), _slct.to_array(tree_indices)):

From 91d5ebb4644a123fc7dcdbf117b49781e562a63f Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Fri, 31 May 2024 18:32:55 -0600
Subject: [PATCH 07/30] Start the process of adding caching to MDC store
 creation

Start adding infrastructure for caching things used in MDC store creation and for plumbing in stuff from layout creation.
---
 pygsti/algorithms/core.py           | 18 ++++++++-
 pygsti/layouts/matrixlayout.py      | 61 +++++++++++++++++------------
 pygsti/models/model.py              | 47 +++++++++++++++++++---
 pygsti/objectivefns/objectivefns.py | 47 +++++++++++++---------
 4 files changed, 122 insertions(+), 51 deletions(-)

diff --git a/pygsti/algorithms/core.py b/pygsti/algorithms/core.py
index b4f67c286..59a696d85 100644
--- a/pygsti/algorithms/core.py
+++ b/pygsti/algorithms/core.py
@@ -897,12 +897,25 @@ def _max_array_types(artypes_list):  # get the maximum number of each array type
         precomp_layout_circuit_cache = _layouts.matrixlayout.create_matrix_copa_layout_circuit_cache(unique_circuits, mdl)
     else:
         precomp_layout_circuit_cache = None
-    #print(completed_circuit_cache)
+
     for i, circuit_list in enumerate(circuit_lists):
         printer.log(f'Layout for iteration {i}', 2)
         precomp_layouts.append(mdl.sim.create_layout(circuit_list, dataset, resource_alloc, array_types, verbosity= printer - 1,
                                                      layout_creation_circuit_cache = precomp_layout_circuit_cache))
     
+    #precompute a cache of possible outcome counts for each circuits to accelerate MDC store creation
+    if isinstance(mdl, _models.model.OpModel):
+        if precomp_layout_circuit_cache is not None: #then grab the split circuits from there.
+            expanded_circuit_outcome_list = mdl.bulk_expand_instruments_and_separate_povm(unique_circuits, 
+                                                                                        split_circuits = precomp_layout_circuit_cache['split_circuits'])
+            outcome_count_by_circuit_cache = {ckt: len(outcome_tup) for ckt,outcome_tup in zip(unique_circuits, expanded_circuit_outcome_list)}
+        else:
+            expanded_circuit_outcome_list = mdl.bulk_expand_instruments_and_separate_povm(unique_circuits)    
+            outcome_count_by_circuit_cache = {ckt: len(outcome_tup) for ckt,outcome_tup in zip(unique_circuits, expanded_circuit_outcome_list)}
+    else:
+        outcome_count_by_circuit_cache = {ckt: mdl.compute_num_outcomes(ckt) for ckt in unique_circuits}
+
+
     with printer.progress_logging(1):
         for i in range(starting_index, len(circuit_lists)):
             circuitsToEstimate = circuit_lists[i]
@@ -919,7 +932,8 @@ def _max_array_types(artypes_list):  # get the maximum number of each array type
             mdl.basis = start_model.basis  # set basis in case of CPTP constraints (needed?)
             initial_mdc_store = _objfns.ModelDatasetCircuitsStore(mdl, dataset, circuitsToEstimate, resource_alloc,
                                                                   array_types=array_types, verbosity=printer - 1, 
-                                                                  precomp_layout = precomp_layouts[i])
+                                                                  precomp_layout = precomp_layouts[i],
+                                                                  outcome_count_by_circuit=outcome_count_by_circuit_cache)
             mdc_store = initial_mdc_store
 
             for j, obj_fn_builder in enumerate(iteration_objfn_builders):
diff --git a/pygsti/layouts/matrixlayout.py b/pygsti/layouts/matrixlayout.py
index 2825eaa51..654f32c86 100644
--- a/pygsti/layouts/matrixlayout.py
+++ b/pygsti/layouts/matrixlayout.py
@@ -61,6 +61,9 @@ class _MatrixCOPALayoutAtom(_DistributableAtom):
     model : Model
         The model being used to construct this layout.  Used for expanding instruments
         within the circuits.
+    
+    unique_circuits : list of Circuits
+        A list of the unique :class:`Circuit` objects representing the circuits this layout will include.
 
     dataset : DataSet
         The dataset, used to include only observed circuit outcomes in this atom
@@ -68,7 +71,7 @@ class _MatrixCOPALayoutAtom(_DistributableAtom):
     """
 
     def __init__(self, unique_complete_circuits, unique_nospam_circuits, circuits_by_unique_nospam_circuits,
-                 ds_circuits, group, helpful_scratch, model, dataset=None, expanded_and_separated_circuit_cache=None,
+                 ds_circuits, group, helpful_scratch, model, unique_circuits, dataset=None, expanded_and_separated_circuit_cache=None,
                  double_expanded_nospam_circuits_cache = None):
 
         #Note: group gives unique_nospam_circuits indices, which circuits_by_unique_nospam_circuits
@@ -84,11 +87,13 @@ def add_expanded_circuits(indices, add_to_this_dict):
                         expc_outcomes = model.expand_instruments_and_separate_povm(unique_complete_circuits[unique_i], observed_outcomes)
                         #Note: unique_complete_circuits may have duplicates (they're only unique *pre*-completion)
                     else:
-                        expc_outcomes = expanded_and_separated_circuit_cache.get(unique_complete_circuits[unique_i], None)
+                        #the cache is indexed into using the (potentially) incomplete circuits
+                        expc_outcomes = expanded_and_separated_circuit_cache.get(unique_circuits[unique_i], None)
                         if expc_outcomes is None: #fall back on original non-cache behavior.
                             observed_outcomes = None if (dataset is None) else dataset[ds_circuits[unique_i]].unique_outcomes
                             expc_outcomes = model.expand_instruments_and_separate_povm(unique_complete_circuits[unique_i], observed_outcomes)
-
+                            #and add this new value to the cache.
+                            expanded_and_separated_circuit_cache[unique_circuits[unique_i]] = expc_outcomes 
                     for sep_povm_c, outcomes in expc_outcomes.items():  # for each expanded cir from unique_i-th circuit
                         prep_lbl = sep_povm_c.circuit_without_povm[0]
                         exp_nospam_c = sep_povm_c.circuit_without_povm[1:]  # sep_povm_c *always* has prep lbl
@@ -242,7 +247,7 @@ class MatrixCOPALayout(_DistributableCOPALayout):
     Parameters
     ----------
     circuits : list
-        A list of:class:`Circuit` objects representing the circuits this layout will include.
+        A list of :class:`Circuit` objects representing the circuits this layout will include.
 
     model : Model
         The model that will be used to compute circuit outcome probabilities using this layout.
@@ -309,23 +314,23 @@ def __init__(self, circuits, model, dataset=None, num_sub_trees=None, num_tree_p
 
         #extract subcaches from layout_creation_circuit_cache:
         if layout_creation_circuit_cache is not None:
-            completed_circuit_cache = layout_creation_circuit_cache.get('completed_circuits', None)
-            split_circuit_cache = layout_creation_circuit_cache.get('split_circuits', None)
-            expanded_and_separated_circuits_cache = layout_creation_circuit_cache.get('expanded_and_separated_circuits', None)
-            expanded_subcircuits_no_spam_cache = layout_creation_circuit_cache.get('expanded_subcircuits_no_spam', None)
+            self.completed_circuit_cache = layout_creation_circuit_cache.get('completed_circuits', None)
+            self.split_circuit_cache = layout_creation_circuit_cache.get('split_circuits', None)
+            self.expanded_and_separated_circuits_cache = layout_creation_circuit_cache.get('expanded_and_separated_circuits', None)
+            self.expanded_subcircuits_no_spam_cache = layout_creation_circuit_cache.get('expanded_subcircuits_no_spam', None)
         else:
-            completed_circuit_cache = None
-            split_circuit_cache = None
-            expanded_and_separated_circuits_cache = None
-            expanded_subcircuits_no_spam_cache = None
+            self.completed_circuit_cache = None
+            self.split_circuit_cache = None
+            self.expanded_and_separated_circuits_cache = None
+            self.expanded_subcircuits_no_spam_cache = None
         
-        if completed_circuit_cache is None:
+        if self.completed_circuit_cache is None:
             unique_complete_circuits, split_unique_circuits = model.complete_circuits(unique_circuits, return_split=True)
         else:
             unique_complete_circuits = []
             for c in unique_circuits:
-                comp_ckt = completed_circuit_cache.get(c, None)
-                if completed_circuit_cache is not None:
+                comp_ckt = self.completed_circuit_cache.get(c, None)
+                if comp_ckt is not None:
                     unique_complete_circuits.append(comp_ckt)
                 else:
                     unique_complete_circuits.append(model.complete_circuit(c))
@@ -334,17 +339,24 @@ def __init__(self, circuits, model, dataset=None, num_sub_trees=None, num_tree_p
         # "unique circuits" after completion, e.g. "rho0Gx" and "Gx" could both complete to "rho0GxMdefault_0".
 
         circuits_by_unique_nospam_circuits = _collections.OrderedDict()
-        if completed_circuit_cache is None:
+        if self.completed_circuit_cache is None:
             for i, (_, nospam_c, _) in enumerate(split_unique_circuits):
                 if nospam_c in circuits_by_unique_nospam_circuits:
                     circuits_by_unique_nospam_circuits[nospam_c].append(i)
                 else:
                     circuits_by_unique_nospam_circuits[nospam_c] = [i]
+            #also create the split circuit cache at this point for future use.
+            self.split_circuit_cache = {unique_ckt:split_ckt for unique_ckt, split_ckt in zip(unique_circuits, split_unique_circuits)}
+
         else:
-            for i, c in enumerate(unique_complete_circuits):
-                _, nospam_c, _ = split_circuit_cache.get(c, None)
+            for i, (c_unique_complete, c_unique) in enumerate(zip(unique_complete_circuits, unique_circuits)):
+                split_ckt_tup = self.split_circuit_cache.get(c_unique, None)
+                nospam_c= split_ckt_tup[1] 
                 if nospam_c is None:
-                    _, nospam_c, _ = model.split_circuit(c)
+                    split_ckt_tup = model.split_circuit(c_unique_complete)
+                    nospam_c= split_ckt_tup[1]
+                    #also add this missing circuit to the cache for future use.
+                    self.split_circuit_cache[c_unique] = split_ckt_tup
                 if nospam_c in circuits_by_unique_nospam_circuits:
                     circuits_by_unique_nospam_circuits[nospam_c].append(i)
                 else:
@@ -367,9 +379,10 @@ def _create_atom(args):
             group, helpful_scratch_group = args
             return _MatrixCOPALayoutAtom(unique_complete_circuits, unique_nospam_circuits,
                                          circuits_by_unique_nospam_circuits, ds_circuits,
-                                         group, helpful_scratch_group, model, dataset,
-                                         expanded_and_separated_circuits_cache,
-                                         expanded_subcircuits_no_spam_cache)
+                                         group, helpful_scratch_group, model, 
+                                         unique_circuits, dataset,
+                                         self.expanded_and_separated_circuits_cache,
+                                         self.expanded_subcircuits_no_spam_cache)
 
         super().__init__(circuits, unique_circuits, to_unique, unique_complete_circuits,
                          _create_atom, list(zip(groups, helpful_scratch)), num_tree_processors,
@@ -385,7 +398,7 @@ def create_matrix_copa_layout_circuit_cache(circuits, model, dataset=None):
     completed_circuits, split_circuits = model.complete_circuits(circuits, return_split=True)
 
     cache['completed_circuits'] = {ckt: comp_ckt for ckt, comp_ckt in zip(circuits, completed_circuits)}
-    cache['split_circuits'] = {ckt: split_ckt for ckt, split_ckt in zip(cache['completed_circuits'].values(), split_circuits)}
+    cache['split_circuits'] = {ckt: split_ckt for ckt, split_ckt in zip(circuits, split_circuits)}
 
     #There is some potential aliasing that happens in the init that I am not
     #doing here, but I think 90+% of the time this ought to be fine.
@@ -401,7 +414,7 @@ def create_matrix_copa_layout_circuit_cache(circuits, model, dataset=None):
                                                                                     observed_outcomes_list = unique_outcomes_list, 
                                                                                     split_circuits = split_circuits)
     
-    expanded_circuit_cache = {ckt: expanded_ckt for ckt,expanded_ckt in zip(cache['completed_circuits'].values(), expanded_circuit_outcome_list)}
+    expanded_circuit_cache = {ckt: expanded_ckt for ckt,expanded_ckt in zip(circuits, expanded_circuit_outcome_list)}
                 
     cache['expanded_and_separated_circuits'] = expanded_circuit_cache
 
diff --git a/pygsti/models/model.py b/pygsti/models/model.py
index 641bb8d05..940e31303 100644
--- a/pygsti/models/model.py
+++ b/pygsti/models/model.py
@@ -1141,10 +1141,45 @@ def circuit_outcomes(self, circuit):
 
         Returns
         -------
-        tuple
+        tuple corresponding to the possible outcomes for circuit.
         """
-        outcomes = circuit.expand_instruments_and_separate_povm(self)  # dict w/keys=sep-povm-circuits, vals=outcomes
+        outcomes = self.expand_instruments_and_separate_povm(circuit)  # dict w/keys=sep-povm-circuits, vals=outcomes
         return tuple(_itertools.chain(*outcomes.values()))  # concatenate outputs from all sep-povm-circuits
+    
+    def bulk_circuit_outcomes(self, circuits, split_circuits=None, completed_circuits=None):
+        """
+        Get all the possible outcome labels produced by simulating each of the circuits
+        in this list of circuits.
+
+        Parameters
+        ----------
+        circuits : list of Circuits
+            list of Circuits to get outcomes of.
+        
+        split_circuits : list of tuples, optional (default None)
+            If specified, this is a list of tuples for each circuit corresponding to the splitting of
+            the circuit into the prep label, spam-free circuit, and povm label. This is the same format
+            produced by the :meth:split_circuit(s) method, and so this option can allow for accelerating this
+            method when that has previously been run. When using this kwarg only one of this or 
+            the `complete_circuits` kwargs should be used.
+
+        completed_circuits : list of Circuits, optional (default None)
+            If specified, this is a list of compeleted circuits with prep and povm labels included.
+            This is the format produced by the :meth:complete_circuit(s) method, and this can
+            be used to accelerate this method call when that has been previously run. Should not
+            be used in conjunction with `split_circuits`.
+
+        Returns
+        -------
+        list of tuples corresponding to the possible outcomes for each circuit.
+        """
+
+        # list of dict w/keys=sep-povm-circuits, vals=outcomes
+        outcomes_list = self.bulk_expand_instruments_and_separate_povm(circuits, 
+                                                                       split_circuits=split_circuits,
+                                                                       completed_circuits=completed_circuits)  
+        
+        return [tuple(_itertools.chain(*outcomes.values())) for outcomes in outcomes_list]  # concatenate outputs from all sep-povm-circuits
 
     def split_circuit(self, circuit, erroron=('prep', 'povm'), split_prep=True, split_povm=True):
         """
@@ -1516,7 +1551,7 @@ def bulk_expand_instruments_and_separate_povm(self, circuits, observed_outcomes_
             method when that has previously been run. When using this kwarg only one of this or 
             the `complete_circuits` kwargs should be used.
 
-        complete_circuits : list of Circuits, optional (default None)
+        completed_circuits : list of Circuits, optional (default None)
             If specified, this is a list of compeleted circuits with prep and povm labels included.
             This is the format produced by the :meth:complete_circuit(s) method, and this can
             be used to accelerate this method call when that has been previously run. Should not
@@ -1524,9 +1559,9 @@ def bulk_expand_instruments_and_separate_povm(self, circuits, observed_outcomes_
 
         Returns
         -------
-        OrderedDict
-            A dict whose keys are :class:`SeparatePOVMCircuit` objects and whose
-            values are tuples of the outcome labels corresponding to this circuit,
+        list of OrderedDicts
+            A list of dictionaries whose keys are :class:`SeparatePOVMCircuit` objects and whose
+            values are tuples of the outcome labels corresponding to each circuit,
             one per POVM effect held in the key.
         """
 
diff --git a/pygsti/objectivefns/objectivefns.py b/pygsti/objectivefns/objectivefns.py
index 191fd736b..9476f1c1c 100644
--- a/pygsti/objectivefns/objectivefns.py
+++ b/pygsti/objectivefns/objectivefns.py
@@ -19,11 +19,13 @@
 
 from pygsti import tools as _tools
 from pygsti.layouts.distlayout import DistributableCOPALayout as _DistributableCOPALayout
+from pygsti.layouts.matrixlayout import MatrixCOPALayout as _MatrixCOPALayout
 from pygsti.tools import slicetools as _slct, mpitools as _mpit, sharedmemtools as _smt
 from pygsti.circuits.circuitlist import CircuitList as _CircuitList
 from pygsti.baseobjs.resourceallocation import ResourceAllocation as _ResourceAllocation
 from pygsti.baseobjs.nicelyserializable import NicelySerializable as _NicelySerializable
 from pygsti.baseobjs.verbosityprinter import VerbosityPrinter as _VerbosityPrinter
+from pygsti.models.model import OpModel as _OpModel
 
 
 def _objfn(objfn_cls, model, dataset, circuits=None,
@@ -843,12 +845,10 @@ class ModelDatasetCircuitsStore(object):
     point.
     """
     def __init__(self, model, dataset, circuits=None, resource_alloc=None, array_types=(),
-                 precomp_layout=None, verbosity=0):
+                 precomp_layout=None, outcome_count_by_circuit=None, verbosity=0):
         self.dataset = dataset
         self.model = model
-        #self.opBasis = mdl.basis
         self.resource_alloc = _ResourceAllocation.cast(resource_alloc)
-        # expand = ??? get from model based on fwdsim type?
 
         circuit_list = circuits if (circuits is not None) else list(dataset.keys())
         bulk_circuit_list = circuit_list if isinstance(
@@ -872,8 +872,21 @@ def __init__(self, model, dataset, circuits=None, resource_alloc=None, array_typ
         else:
             self.global_circuits = self.circuits
 
-        #self.circuits = bulk_circuit_list[:]
-        #self.circuit_weights = bulk_circuit_list.circuit_weights
+        #If a matrix layout then we have some precached circuit structures we can
+        #grab to speed up store generation.
+        if isinstance(self.layout, _MatrixCOPALayout):
+            #Grab the split_circuit_cache and down select to those in
+            #self.circuits
+            self.split_circuit_cache = self.layout.split_circuit_cache
+            self.split_circuits = [self.split_circuit_cache[ckt] for ckt in self.circuits]
+
+        #currently only implemented for matrix, will eventually add map support.
+        else:
+            self.split_circuit_cache = None
+
+        #set the value of the circuit outcome count cache (can be None)
+        self.outcome_count_by_circuit_cache = outcome_count_by_circuit
+
         self.ds_circuits = self.circuits.apply_aliases()
 
         # computed by add_count_vectors
@@ -888,18 +901,6 @@ def __init__(self, model, dataset, circuits=None, resource_alloc=None, array_typ
 
         self.time_dependent = False  # indicates whether the data should be treated as time-resolved
 
-        #if not self.cache.has_evaltree():
-        #    subcalls = self.get_evaltree_subcalls()
-        #    evt_resource_alloc = _ResourceAllocation(self.raw_objfn.comm, evt_mlim,
-        #                                             self.raw_objfn.profiler, self.raw_objfn.distribute_method)
-        #    self.cache.add_evaltree(self.mdl, self.dataset, bulk_circuit_list, evt_resource_alloc,
-        #                            subcalls, self.raw_objfn.printer - 1)
-        #self.eval_tree = self.cache.eval_tree
-        #self.lookup = self.cache.lookup
-        #self.outcomes_lookup = self.cache.outcomes_lookup
-        #self.wrt_block_size = self.cache.wrt_block_size
-        #self.wrt_block_size2 = self.cache.wrt_block_size2
-
         #convenience attributes (could make properties?)
         if isinstance(self.layout, _DistributableCOPALayout):
             self.global_nelements = self.layout.global_num_elements
@@ -941,10 +942,18 @@ def add_omitted_freqs(self, printer=None, force=False):
         if self.firsts is None or force:
             # FUTURE: add any tracked memory? self.resource_alloc.add_tracked_memory(...)
             self.firsts = []; self.indicesOfCircuitsWithOmittedData = []
-            for i, c in enumerate(self.circuits):
+
+            #bulk compute the number of outcomes.
+            if isinstance(self.model, _OpModel):
+                bulk_outcomes_list = self.model.bulk_circuit_outcomes(self.circuits, split_circuits=self.split_circuits)
+                num_outcomes_list = [len(outcome_tup) for outcome_tup in bulk_outcomes_list]
+            else:
+                num_outcomes_list = [self.model.compute_num_outcomes(c) for c in self.circuits]
+
+            for i in range(len(self.circuits)):
                 indices = _slct.to_array(self.layout.indices_for_index(i))
                 lklen = _slct.length(self.layout.indices_for_index(i))
-                if 0 < lklen < self.model.compute_num_outcomes(c):
+                if 0 < lklen < num_outcomes_list[i]:
                     self.firsts.append(indices[0])
                     self.indicesOfCircuitsWithOmittedData.append(i)
             if len(self.firsts) > 0:

From cfb323a36eb3386222ebecc23fe7ece1853e44f9 Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Sun, 2 Jun 2024 18:11:48 -0600
Subject: [PATCH 08/30] Tweak omitted freqs and counts + DataSet and slicetools

Performance optimization for the method for adding omitted frequencies to incorporate caching of the number of outcomes per circuit (which is somewhat expensive since it goes through the instrument/povm expansion code). Additionally refactor some other parts of this code for improved efficiency. Also makes a few minor tweaks to the method for adding counts to speed that up as well. Can probably make this a bit faster still by merging the two calls to reduce redundancy, but that is a future us problem. Additionally make a few microoptimizations to the dataset code for grabbing counts, and to slicetools adding a function for directly giving a numpy array for a slice (instead of needing to cast from a list).

Miscellaneous cleanup of old commented out code that doesn't appear needed any longer.
---
 pygsti/algorithms/core.py           | 18 ++-----
 pygsti/data/dataset.py              | 50 ++++++-------------
 pygsti/objectivefns/objectivefns.py | 50 ++++++++++++-------
 pygsti/tools/slicetools.py          | 74 ++++++++++++++++++++++++-----
 4 files changed, 114 insertions(+), 78 deletions(-)

diff --git a/pygsti/algorithms/core.py b/pygsti/algorithms/core.py
index 59a696d85..691da91b5 100644
--- a/pygsti/algorithms/core.py
+++ b/pygsti/algorithms/core.py
@@ -679,16 +679,10 @@ def run_gst_fit(mdc_store, optimizer, objective_function_builder, verbosity=0):
         if _np.linalg.norm(mdc_store.model.to_vector() - v_cmp) > 1e-6:
             raise ValueError("MPI ERROR: *different* MC2GST start models"
                              " given to different processors!")                   # pragma: no cover
-
-    #MEM from ..baseobjs.profiler import Profiler
-    #MEM debug_prof = Profiler(comm)
-    #MEM debug_prof.print_memory("run_gst_fit1", True)
-
+        
     if objective_function_builder is not None:
         objective_function_builder = _objfns.ObjectiveFunctionBuilder.cast(objective_function_builder)
-        #MEM debug_prof.print_memory("run_gst_fit2", True)
         objective = objective_function_builder.build_from_store(mdc_store, printer)  # (objective is *also* a store)
-        #MEM debug_prof.print_memory("run_gst_fit3", True)
     else:
         assert(isinstance(mdc_store, _objfns.ObjectiveFunction)), \
             "When `objective_function_builder` is None, `mdc_store` must be an objective fn!"
@@ -707,14 +701,8 @@ def run_gst_fit(mdc_store, optimizer, objective_function_builder, verbosity=0):
 
     printer.log("Completed in %.1fs" % (_time.time() - tStart), 1)
 
-    #if target_model is not None:
-    #  target_vec = target_model.to_vector()
-    #  targetErrVec = _objective_func(target_vec)
-    #  return minErrVec, soln_gs, targetErrVec
     profiler.add_time("do_mc2gst: total time", tStart)
-    #TODO: evTree.permute_computation_to_original(minErrVec) #Doesn't work b/c minErrVec is flattened
-    # but maybe best to just remove minErrVec from return value since this isn't very useful
-    # anyway?
+
     return opt_result, objective
 
 
@@ -907,7 +895,7 @@ def _max_array_types(artypes_list):  # get the maximum number of each array type
     if isinstance(mdl, _models.model.OpModel):
         if precomp_layout_circuit_cache is not None: #then grab the split circuits from there.
             expanded_circuit_outcome_list = mdl.bulk_expand_instruments_and_separate_povm(unique_circuits, 
-                                                                                        split_circuits = precomp_layout_circuit_cache['split_circuits'])
+                                                                                        split_circuits = precomp_layout_circuit_cache['split_circuits'].values())
             outcome_count_by_circuit_cache = {ckt: len(outcome_tup) for ckt,outcome_tup in zip(unique_circuits, expanded_circuit_outcome_list)}
         else:
             expanded_circuit_outcome_list = mdl.bulk_expand_instruments_and_separate_povm(unique_circuits)    
diff --git a/pygsti/data/dataset.py b/pygsti/data/dataset.py
index 6214fbf41..c3d6c1671 100644
--- a/pygsti/data/dataset.py
+++ b/pygsti/data/dataset.py
@@ -296,34 +296,10 @@ def timeseries_for_outcomes(self):
         last_time = None
         seriesDict = {self.dataset.olIndex[ol]: [] for ol in self.dataset.outcome_labels}
 
-        #REMOVED: (though this gives slightly different behavior)
-        #for outcome_label in self.outcomes:
-        #    if outcome_label not in seriesDict.keys():
-        #        seriesDict[outcome_label] = []
-
         if self.reps is None:
             reps = _np.ones(len(self.time), _np.int64)
         else: reps = self.reps
 
-        # An alternate implementation that appears to be (surprisingly?) slower...
-        ##Get time bin locations
-        #time_bins_borders = []
-        #last_time = None
-        #for i, t in enumerate(self.time):
-        #    if t != last_time:
-        #        time_bins_borders.append(i)
-        #        last_time = t
-        #time_bins_borders.append(len(self.time))
-        #nTimes = len(time_bins_borders) - 1
-        #
-        #seriesDict = {self.dataset.olIndex[ol]: _np.zeros(nTimes, _np.int64) for ol in self.dataset.outcome_labels}
-        #
-        #for i in range(nTimes):
-        #    slc = slice(time_bins_borders[i],time_bins_borders[i+1])
-        #    times.append( self.time[slc.start] )
-        #    for oli, rep in zip(self.oli[slc], reps[slc]):
-        #        seriesDict[oli][i] += rep
-
         for t, oli, rep in zip(self.time, self.oli, reps):
 
             if t != last_time:
@@ -586,19 +562,22 @@ def _get_counts(self, timestamp=None, all_outcomes=False):
             tslc = _np.where(_np.isclose(self.time, timestamp))[0]
         else: tslc = slice(None)
 
+        oli_tslc = self.oli[tslc]
+        rep_tslc = self.reps[tslc]
         nOutcomes = len(self.dataset.olIndex)
-        nIndices = len(self.oli[tslc])
+        nIndices = len(oli_tslc)
+        
         if nOutcomes <= nIndices or all_outcomes:
             if self.reps is None:
                 for ol, i in self.dataset.olIndex.items():
-                    cnt = float(_np.count_nonzero(_np.equal(self.oli[tslc], i)))
-                    if all_outcomes or cnt > 0:
+                    cnt = float(_np.count_nonzero(_np.equal(oli_tslc, i)))
+                    if cnt > 0 or all_outcomes:
                         cntDict.setitem_unsafe(ol, cnt)
             else:
                 for ol, i in self.dataset.olIndex.items():
-                    inds = _np.nonzero(_np.equal(self.oli[tslc], i))[0]
-                    if all_outcomes or len(inds) > 0:
-                        cntDict.setitem_unsafe(ol, float(sum(self.reps[tslc][inds])))
+                    inds = oli_tslc[oli_tslc == i]
+                    if len(inds) > 0 or all_outcomes:
+                        cntDict.setitem_unsafe(ol, float(sum(rep_tslc[inds])))
         else:
             if self.reps is None:
                 for ol_index in self.oli[tslc]:
@@ -616,7 +595,8 @@ def counts(self):
         """
         Dictionary of per-outcome counts.
         """
-        if self._cntcache: return self._cntcache  # if not None *and* len > 0
+        if self._cntcache: 
+            return self._cntcache  # if not None *and* len > 0
         ret = self._get_counts()
         if self._cntcache is not None:  # == and empty dict {}
             self._cntcache.update(ret)
@@ -1199,10 +1179,10 @@ def _get_row(self, circuit):
         circuit = _cir.Circuit.cast(circuit)
 
         #Note: cirIndex value is either an int (non-static) or a slice (static)
-        repData = self.repData[self.cirIndex[circuit]] \
-            if (self.repData is not None) else None
-        return _DataSetRow(self, self.oliData[self.cirIndex[circuit]],
-                           self.timeData[self.cirIndex[circuit]], repData,
+        cirIndex = self.cirIndex[circuit]
+        repData = self.repData[cirIndex] if (self.repData is not None) else None
+        return _DataSetRow(self, self.oliData[cirIndex],
+                           self.timeData[cirIndex], repData,
                            self.cnt_cache[circuit] if self.bStatic else None,
                            self.auxInfo[circuit])
 
diff --git a/pygsti/objectivefns/objectivefns.py b/pygsti/objectivefns/objectivefns.py
index 9476f1c1c..609286e4c 100644
--- a/pygsti/objectivefns/objectivefns.py
+++ b/pygsti/objectivefns/objectivefns.py
@@ -845,7 +845,7 @@ class ModelDatasetCircuitsStore(object):
     point.
     """
     def __init__(self, model, dataset, circuits=None, resource_alloc=None, array_types=(),
-                 precomp_layout=None, outcome_count_by_circuit=None, verbosity=0):
+                 precomp_layout=None, verbosity=0, outcome_count_by_circuit=None):
         self.dataset = dataset
         self.model = model
         self.resource_alloc = _ResourceAllocation.cast(resource_alloc)
@@ -941,22 +941,36 @@ def add_omitted_freqs(self, printer=None, force=False):
         """
         if self.firsts is None or force:
             # FUTURE: add any tracked memory? self.resource_alloc.add_tracked_memory(...)
-            self.firsts = []; self.indicesOfCircuitsWithOmittedData = []
-
-            #bulk compute the number of outcomes.
-            if isinstance(self.model, _OpModel):
-                bulk_outcomes_list = self.model.bulk_circuit_outcomes(self.circuits, split_circuits=self.split_circuits)
-                num_outcomes_list = [len(outcome_tup) for outcome_tup in bulk_outcomes_list]
+            self.firsts = [] 
+            self.indicesOfCircuitsWithOmittedData = []
+
+            if self.outcome_count_by_circuit_cache is None:
+                #bulk compute the number of outcomes.
+                if isinstance(self.model, _OpModel) and self.split_circuits is not None:
+                    bulk_outcomes_list = self.model.bulk_circuit_outcomes(self.circuits, split_circuits=self.split_circuits)
+                    num_outcomes_list = [len(outcome_tup) for outcome_tup in bulk_outcomes_list]
+                else:
+                    num_outcomes_list = [self.model.compute_num_outcomes(c) for c in self.circuits]
             else:
-                num_outcomes_list = [self.model.compute_num_outcomes(c) for c in self.circuits]
+                num_outcomes_list = []
+                for ckt in self.circuits:
+                    num_outcomes = self.outcome_count_by_circuit_cache.get(ckt, None)
+                    if num_outcomes is None:
+                        num_outcomes = self.model.compute_num_outcomes(ckt)
+                        #also add this to the cache, just in case it is later needed.
+                        self.outcome_count_by_circuit_cache[ckt] = num_outcomes
+                    num_outcomes_list.append(num_outcomes)
 
             for i in range(len(self.circuits)):
-                indices = _slct.to_array(self.layout.indices_for_index(i))
-                lklen = _slct.length(self.layout.indices_for_index(i))
-                if 0 < lklen < num_outcomes_list[i]:
+                indices = self.layout.indices_for_index(i)
+                #The return types of indices_for_index are either ndarrays
+                #or slices.
+                if isinstance(indices, slice): 
+                    indices = _slct.indices(indices)
+                if 0 < len(indices) < num_outcomes_list[i]:
                     self.firsts.append(indices[0])
                     self.indicesOfCircuitsWithOmittedData.append(i)
-            if len(self.firsts) > 0:
+            if self.firsts:
                 self.firsts = _np.array(self.firsts, 'i')
                 self.indicesOfCircuitsWithOmittedData = _np.array(self.indicesOfCircuitsWithOmittedData, 'i')
                 self.dprobs_omitted_rowsum = _np.empty((len(self.firsts), self.nparams), 'd')
@@ -983,13 +997,15 @@ def add_count_vectors(self, force=False):
 
             for (i, circuit) in enumerate(self.ds_circuits):
                 cnts = self.dataset[circuit].counts
-                totals[self.layout.indices_for_index(i)] = sum(cnts.values())  # dataset[opStr].total
-                counts[self.layout.indices_for_index(i)] = [cnts.get(x, 0) for x in self.layout.outcomes_for_index(i)]
+                idcs_for_idx = self.layout.indices_for_index(i)
+                totals[idcs_for_idx] = sum(cnts.values())  # dataset[opStr].
+                counts[idcs_for_idx] = [cnts.getitem_unsafe(x, 0) for x in self.layout.outcomes_for_index(i)]
 
             if self.circuits.circuit_weights is not None:
                 for i in range(len(self.ds_circuits)):  # multiply N's by weights
-                    counts[self.layout.indices_for_index(i)] *= self.circuits.circuit_weights[i]
-                    totals[self.layout.indices_for_index(i)] *= self.circuits.circuit_weights[i]
+                    idcs_for_idx = self.layout.indices_for_index(i)
+                    counts[idcs_for_idx] *= self.circuits.circuit_weights[i]
+                    totals[idcs_for_idx] *= self.circuits.circuit_weights[i]
 
             self.counts = counts
             self.total_counts = totals
@@ -1003,7 +1019,7 @@ class EvaluatedModelDatasetCircuitsStore(ModelDatasetCircuitsStore):
 
     def __init__(self, mdc_store, verbosity):
         super().__init__(mdc_store.model, mdc_store.dataset, mdc_store.global_circuits, mdc_store.resource_alloc,
-                         mdc_store.array_types, mdc_store.layout, verbosity)
+                         mdc_store.array_types, mdc_store.layout, verbosity, mdc_store.outcome_count_by_circuit_cache)
 
         # Memory check - see if there's enough memory to hold all the evaluated quantities
         #persistent_mem = self.layout.memory_estimate()
diff --git a/pygsti/tools/slicetools.py b/pygsti/tools/slicetools.py
index ba49b1056..506045182 100644
--- a/pygsti/tools/slicetools.py
+++ b/pygsti/tools/slicetools.py
@@ -26,7 +26,8 @@ def length(s):
     -------
     int
     """
-    if not isinstance(s, slice): return len(s)
+    if not isinstance(s, slice): 
+        return len(s)
     if s.start is None or s.stop is None:
         return 0
     if s.step is None:
@@ -191,7 +192,8 @@ def indices(s, n=None):
     elif s.start < 0:
         assert(n is not None), "Must supply `n` to obtain indices of a slice with negative start point!"
         start = n + s.start
-    else: start = s.start
+    else: 
+        start = s.start
 
     if s.stop is None:
         assert(n is not None), "Must supply `n` to obtain indices of a slice with unspecified stop point!"
@@ -199,12 +201,56 @@ def indices(s, n=None):
     elif s.stop < 0:
         assert(n is not None), "Must supply `n` to obtain indices of a slice with negative stop point!"
         stop = n + s.stop
-    else: stop = s.stop
+    else: 
+        stop = s.stop
 
     if s.step is None:
         return list(range(start, stop))
-    return list(range(start, stop, s.step))
+    else:
+        return list(range(start, stop, s.step))
+    
+def indices_as_array(s, n=None):
+    """
+    Returns a numpy array of the indices specified by slice `s`.
+
+    Parameters
+    ----------
+    s : slice
+        The slice to operate upon.
+
+    n : int, optional
+        The number of elements in the array being indexed,
+        used for computing *negative* start/stop points.
+
+    Returns
+    -------
+    numpy ndarray array of integers
+    """
+    if s.start is None and s.stop is None:
+        return []
+
+    if s.start is None:
+        start = 0
+    elif s.start < 0:
+        assert(n is not None), "Must supply `n` to obtain indices of a slice with negative start point!"
+        start = n + s.start
+    else: 
+        start = s.start
+
+    if s.stop is None:
+        assert(n is not None), "Must supply `n` to obtain indices of a slice with unspecified stop point!"
+        stop = n
+    elif s.stop < 0:
+        assert(n is not None), "Must supply `n` to obtain indices of a slice with negative stop point!"
+        stop = n + s.stop
+    else: 
+        stop = s.stop
 
+    if s.step is None:
+        return _np.arange(start, stop, dtype=_np.int64)
+    else:
+        return _np.arange(start, stop, s.step, dtype=_np.int64)
+    
 
 def list_to_slice(lst, array_ok=False, require_contiguous=True):
     """
@@ -240,17 +286,23 @@ def list_to_slice(lst, array_ok=False, require_contiguous=True):
                 else:
                     raise ValueError("Slice must be contiguous!")
         return lst
-    if lst is None or len(lst) == 0: return slice(0, 0)
+    if lst is None or len(lst) == 0: 
+        return slice(0, 0)
     start = lst[0]
 
-    if len(lst) == 1: return slice(start, start + 1)
-    step = lst[1] - lst[0]; stop = start + step * len(lst)
+    if len(lst) == 1: 
+        return slice(start, start + 1)
+    step = lst[1] - lst[0]
+    stop = start + step * len(lst)
 
     if list(lst) == list(range(start, stop, step)):
         if require_contiguous and step != 1:
-            if array_ok: return _np.array(lst, _np.int64)
-            else: raise ValueError("Slice must be contiguous (or array_ok must be True)!")
-        if step == 1: step = None
+            if array_ok: 
+                return _np.array(lst, _np.int64)
+            else: 
+                raise ValueError("Slice must be contiguous (or array_ok must be True)!")
+        if step == 1: 
+            step = None
         return slice(start, stop, step)
     elif array_ok:
         return _np.array(lst, _np.int64)
@@ -272,7 +324,7 @@ def to_array(slc_or_list_like):
     numpy.ndarray
     """
     if isinstance(slc_or_list_like, slice):
-        return _np.array(indices(slc_or_list_like), _np.int64)
+        return indices_as_array(slc_or_list_like)
     else:
         return _np.array(slc_or_list_like, _np.int64)
 

From e8e70048a10395c338082b3fa55ebd6fe96e5c8a Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Sun, 2 Jun 2024 18:23:33 -0600
Subject: [PATCH 09/30] Fix dataset bug

Fix a bug I introduced in dataset indexing into something that could be None.
---
 pygsti/data/dataset.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pygsti/data/dataset.py b/pygsti/data/dataset.py
index c3d6c1671..ce7bb52c6 100644
--- a/pygsti/data/dataset.py
+++ b/pygsti/data/dataset.py
@@ -563,7 +563,6 @@ def _get_counts(self, timestamp=None, all_outcomes=False):
         else: tslc = slice(None)
 
         oli_tslc = self.oli[tslc]
-        rep_tslc = self.reps[tslc]
         nOutcomes = len(self.dataset.olIndex)
         nIndices = len(oli_tslc)
         
@@ -577,14 +576,14 @@ def _get_counts(self, timestamp=None, all_outcomes=False):
                 for ol, i in self.dataset.olIndex.items():
                     inds = oli_tslc[oli_tslc == i]
                     if len(inds) > 0 or all_outcomes:
-                        cntDict.setitem_unsafe(ol, float(sum(rep_tslc[inds])))
+                        cntDict.setitem_unsafe(ol, float(sum(self.reps[tslc][inds])))
         else:
             if self.reps is None:
-                for ol_index in self.oli[tslc]:
+                for ol_index in oli_tslc:
                     ol = self.dataset.ol[ol_index]
                     cntDict.setitem_unsafe(ol, 1.0 + cntDict.getitem_unsafe(ol, 0.0))
             else:
-                for ol_index, reps in zip(self.oli[tslc], self.reps[tslc]):
+                for ol_index, reps in zip(oli_tslc, self.reps[tslc]):
                     ol = self.dataset.ol[ol_index]
                     cntDict.setitem_unsafe(ol, reps + cntDict.getitem_unsafe(ol, 0.0))
 

From aa22c3c23b842fb974a94da9cb6cec984b63d38d Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Sun, 2 Jun 2024 18:49:01 -0600
Subject: [PATCH 10/30] Another minor bugfix caught by testing

Another minor bug caught by testing.
---
 pygsti/objectivefns/objectivefns.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pygsti/objectivefns/objectivefns.py b/pygsti/objectivefns/objectivefns.py
index 609286e4c..208bdb46d 100644
--- a/pygsti/objectivefns/objectivefns.py
+++ b/pygsti/objectivefns/objectivefns.py
@@ -879,9 +879,9 @@ def __init__(self, model, dataset, circuits=None, resource_alloc=None, array_typ
             #self.circuits
             self.split_circuit_cache = self.layout.split_circuit_cache
             self.split_circuits = [self.split_circuit_cache[ckt] for ckt in self.circuits]
-
         #currently only implemented for matrix, will eventually add map support.
         else:
+            self.split_circuits = None
             self.split_circuit_cache = None
 
         #set the value of the circuit outcome count cache (can be None)

From be8025559dae38bb30567817db4d4cef54ca0816 Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Sun, 2 Jun 2024 20:58:11 -0600
Subject: [PATCH 11/30] Another minor bugfix caught by testing

---
 pygsti/algorithms/core.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pygsti/algorithms/core.py b/pygsti/algorithms/core.py
index 691da91b5..4db4bfb02 100644
--- a/pygsti/algorithms/core.py
+++ b/pygsti/algorithms/core.py
@@ -888,8 +888,11 @@ def _max_array_types(artypes_list):  # get the maximum number of each array type
 
     for i, circuit_list in enumerate(circuit_lists):
         printer.log(f'Layout for iteration {i}', 2)
-        precomp_layouts.append(mdl.sim.create_layout(circuit_list, dataset, resource_alloc, array_types, verbosity= printer - 1,
-                                                     layout_creation_circuit_cache = precomp_layout_circuit_cache))
+        if isinstance(mdl.sim, _fwdsims.MatrixForwardSimulator):
+            precomp_layouts.append(mdl.sim.create_layout(circuit_list, dataset, resource_alloc, array_types, verbosity= printer - 1,
+                                                        layout_creation_circuit_cache = precomp_layout_circuit_cache))
+        else:
+            precomp_layouts.append(mdl.sim.create_layout(circuit_list, dataset, resource_alloc, array_types, verbosity= printer - 1))
     
     #precompute a cache of possible outcome counts for each circuits to accelerate MDC store creation
     if isinstance(mdl, _models.model.OpModel):

From ff13da64be2ede80c35f56d32944cc7668606128 Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Sun, 2 Jun 2024 21:01:45 -0600
Subject: [PATCH 12/30] Update test_stdinputparser.py

Not sure why this didn't get caught on the circuit update branch, but oh well...
---
 test/test_packages/iotest/test_stdinputparser.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/test/test_packages/iotest/test_stdinputparser.py b/test/test_packages/iotest/test_stdinputparser.py
index fffa1a259..8131621f1 100644
--- a/test/test_packages/iotest/test_stdinputparser.py
+++ b/test/test_packages/iotest/test_stdinputparser.py
@@ -28,17 +28,9 @@ def test_strings(self):
                          ("G1*((G2G3)^2G4G5)^2G7", ('G1', 'G2', 'G3', 'G2', 'G3', 'G4', 'G5', 'G2', 'G3', 'G2', 'G3', 'G4', 'G5', 'G7')),
                          ("G1(G2^2(G3G4)^2)^2", ('G1', 'G2', 'G2', 'G3', 'G4', 'G3', 'G4', 'G2', 'G2', 'G3', 'G4', 'G3', 'G4')),
                          ("G1*G2", ('G1','G2')),
-                         #("S<1>",('G1',)),
-                         #("S<2>",('G1','G2')),
-                         #("G1S<2>^2G3", ('G1', 'G1', 'G2', 'G1', 'G2', 'G3')),
-                         #("G1S<1>G3",('G1','G1','G3')),
-                         #("S<3>[0:4]",('G1', 'G2', 'G3', 'G4')),
                          ("G_my_xG_my_y", ('G_my_x', 'G_my_y')),
                          ("G_my_x*G_my_y", ('G_my_x', 'G_my_y')),
                          ("GsG___", ('Gs', 'G___')),
-                         #("S<2>G3", ('G1', 'G2', 'G3')),
-                         #("S<G12>", ('G1', 'G2')),
-                         #("S<S23>", ('G2', 'G3')),
                          ("G1G2", ('G1', 'G2')),
                          ("rho0*Gx", ('rho0','Gx')),
                          ("rho0*Gx*Mdefault", ('rho0','Gx','Mdefault'))]
@@ -50,7 +42,7 @@ def test_strings(self):
             #print("%s ==> " % s, expected)
             result, line_labels, occurrence_id, compilable_indices = std.parse_circuit_raw(s, lookup=lkup, create_subcircuits=False)
             self.assertEqual(line_labels, None)
-            self.assertEqual(compilable_indices, None)
+            self.assertEqual(compilable_indices, ())
             circuit_result = pygsti.circuits.Circuit(result, line_labels="auto", expand_subcircuits=True)
               #use "auto" line labels since none are parsed.
             self.assertEqual(circuit_result.tup, expected)

From f8c58406f2c55070308f2a2653fae3afb9b6fbd2 Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Tue, 4 Jun 2024 19:37:51 -0600
Subject: [PATCH 13/30] Fix indentation error

Fixes minor error in split_circuits.
---
 pygsti/models/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pygsti/models/model.py b/pygsti/models/model.py
index c7a5bf863..d6e28add6 100644
--- a/pygsti/models/model.py
+++ b/pygsti/models/model.py
@@ -1347,7 +1347,7 @@ def split_circuits(self, circuits, erroron=('prep', 'povm'), split_prep=True, sp
                     else: 
                         povm_lbl = None
                         circuit = ckt
-            split_circuits.append((None, circuit, povm_lbl))
+                split_circuits.append((None, circuit, povm_lbl))
         
         else:
             split_circuits = [(None, ckt, None) for ckt in circuits]

From 0417c20850b05da161d3d23f86d7cba8457ac737 Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Tue, 4 Jun 2024 22:41:54 -0600
Subject: [PATCH 14/30] Faster implementation of __getitem__

Improve the performance of __getitem__ when indexing into static circuits by making use of the _copy_init code path.
---
 pygsti/circuits/circuit.py | 47 +++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/pygsti/circuits/circuit.py b/pygsti/circuits/circuit.py
index b10a963f4..c5df6bc5f 100644
--- a/pygsti/circuits/circuit.py
+++ b/pygsti/circuits/circuit.py
@@ -1095,13 +1095,12 @@ def _proc_lines_arg(self, lines):
     def _proc_key_arg(self, key):
         """ Pre-process the key argument used by many methods """
         if isinstance(key, tuple):
-            if len(key) != 2: return IndexError("Index must be of the form <layerIndex>,<lineIndex>")
-            layers = key[0]
-            lines = key[1]
+            if len(key) != 2: 
+                return IndexError("Index must be of the form <layerIndex>,<lineIndex>")
+            else:
+                return key[0], key[1]
         else:
-            layers = key
-            lines = None
-        return layers, lines
+            return key, None
 
     def _layer_components(self, ilayer):
         """ Get the components of the `ilayer`-th layer as a list/tuple. """
@@ -1191,22 +1190,38 @@ def extract_labels(self, layers=None, lines=None, strict=True):
             `layers` is a single integer and as a `Circuit` otherwise.
             Note: if you want a `Circuit` when only selecting one layer,
             set `layers` to a slice or tuple containing just a single index.
+            Note that the returned circuit doesn't retain any original
+            metadata, such as the compilable layer indices or occurence id.
         """
-        nonint_layers = not isinstance(layers, int)
 
         #Shortcut for common case when lines == None and when we're only taking a layer slice/index
-        if lines is None:
-            assert(layers is not None)
-            if nonint_layers is False: return self.layertup[layers]
-            if isinstance(layers, slice) and strict is True:  # if strict=False, then need to recompute line labels
-                return Circuit._fastinit(self._labels[layers], self._line_labels, not self._static)
+        if lines is None and layers is not None:
+            if self._static:
+                if isinstance(layers, int):
+                    return self._labels[layers]
+                if isinstance(layers, slice) and strict is True:  # if strict=False, then need to recompute line labels
+                    #can speed this up a measurably by manually computing the new hashable tuple value and hash
+                    new_hashable_tup = self._labels[layers] + ('@',) + self._line_labels
+                    ret = Circuit.__new__(Circuit)
+                    return ret._copy_init(self._labels[layers], self._line_labels, not self._static, 
+                                          hashable_tup= new_hashable_tup, 
+                                          precomp_hash=hash(new_hashable_tup))
+            else:
+                if isinstance(layers, int):
+                    return self.layertup[layers]
+                if isinstance(layers, slice) and strict is True:  # if strict=False, then need to recompute line labels
+                    return Circuit._fastinit(self._labels[layers], self._line_labels, not self._static)
+        #otherwise assert both are not None:
 
         layers = self._proc_layers_arg(layers)
         lines = self._proc_lines_arg(lines)
         if len(layers) == 0 or len(lines) == 0:
-            return Circuit._fastinit(() if self._static else [],
-                                     tuple(lines) if self._static else lines,
-                                     not self._static) if nonint_layers else None  # zero-area region
+            if self._static:
+                return Circuit._fastinit((), tuple(lines), False)  # zero-area region
+            else:
+                return Circuit._fastinit(() if self._static else [],
+                                        tuple(lines) if self._static else lines,
+                                        not self._static)  # zero-area region
 
         ret = []
         if self._static:
@@ -1230,7 +1245,7 @@ def get_sslbls(lbl): return lbl.sslbls
                     ret_layer.append(l)
             ret.append(_Label(ret_layer) if len(ret_layer) != 1 else ret_layer[0])  # Labels b/c we use _fastinit
 
-        if nonint_layers:
+        if not isinstance(layers, int):
             if not strict: lines = "auto"  # since we may have included lbls on other lines
             # don't worry about string rep for now...
             

From c39101dd7baa093f0068907199da10c172a4cfd2 Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Tue, 4 Jun 2024 22:44:17 -0600
Subject: [PATCH 15/30] Implement caching for map layout creation

Implement caching of circuit structures tailored to the map forward simulator's requirements.
---
 pygsti/algorithms/core.py              |  10 ++-
 pygsti/forwardsims/mapforwardsim.py    |  10 ++-
 pygsti/forwardsims/matrixforwardsim.py |   2 +
 pygsti/layouts/distlayout.py           |  39 ----------
 pygsti/layouts/maplayout.py            | 100 ++++++++++++++++++++-----
 pygsti/layouts/matrixlayout.py         |   4 +-
 6 files changed, 100 insertions(+), 65 deletions(-)

diff --git a/pygsti/algorithms/core.py b/pygsti/algorithms/core.py
index 4db4bfb02..3b15797bc 100644
--- a/pygsti/algorithms/core.py
+++ b/pygsti/algorithms/core.py
@@ -880,15 +880,17 @@ def _max_array_types(artypes_list):  # get the maximum number of each array type
     precomp_layouts = []
 
     #pre-compute a dictionary caching completed circuits for layout construction performance.
-    unique_circuits = {ckt for circuit_list in circuit_lists for ckt in circuit_list}
+    unique_circuits = list({ckt for circuit_list in circuit_lists for ckt in circuit_list})
     if isinstance(mdl.sim, _fwdsims.MatrixForwardSimulator):
-        precomp_layout_circuit_cache = _layouts.matrixlayout.create_matrix_copa_layout_circuit_cache(unique_circuits, mdl)
+        precomp_layout_circuit_cache = _layouts.matrixlayout.create_matrix_copa_layout_circuit_cache(unique_circuits, mdl, dataset=dataset)
+    elif isinstance(mdl.sim, _fwdsims.MapForwardSimulator):
+        precomp_layout_circuit_cache = _layouts.maplayout.create_map_copa_layout_circuit_cache(unique_circuits, mdl, dataset=dataset)
     else:
         precomp_layout_circuit_cache = None
 
     for i, circuit_list in enumerate(circuit_lists):
         printer.log(f'Layout for iteration {i}', 2)
-        if isinstance(mdl.sim, _fwdsims.MatrixForwardSimulator):
+        if isinstance(mdl.sim, (_fwdsims.MatrixForwardSimulator, _fwdsims.MapForwardSimulator)):
             precomp_layouts.append(mdl.sim.create_layout(circuit_list, dataset, resource_alloc, array_types, verbosity= printer - 1,
                                                         layout_creation_circuit_cache = precomp_layout_circuit_cache))
         else:
@@ -898,7 +900,7 @@ def _max_array_types(artypes_list):  # get the maximum number of each array type
     if isinstance(mdl, _models.model.OpModel):
         if precomp_layout_circuit_cache is not None: #then grab the split circuits from there.
             expanded_circuit_outcome_list = mdl.bulk_expand_instruments_and_separate_povm(unique_circuits, 
-                                                                                        split_circuits = precomp_layout_circuit_cache['split_circuits'].values())
+                                                                                          completed_circuits= precomp_layout_circuit_cache['completed_circuits'].values())
             outcome_count_by_circuit_cache = {ckt: len(outcome_tup) for ckt,outcome_tup in zip(unique_circuits, expanded_circuit_outcome_list)}
         else:
             expanded_circuit_outcome_list = mdl.bulk_expand_instruments_and_separate_povm(unique_circuits)    
diff --git a/pygsti/forwardsims/mapforwardsim.py b/pygsti/forwardsims/mapforwardsim.py
index 6b19e8d39..c8f8a043b 100644
--- a/pygsti/forwardsims/mapforwardsim.py
+++ b/pygsti/forwardsims/mapforwardsim.py
@@ -193,7 +193,7 @@ def copy(self):
                                    self._processor_grid, self._pblk_sizes)
 
     def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types=('E',),
-                      derivative_dimensions=None, verbosity=0):
+                      derivative_dimensions=None, verbosity=0, layout_creation_circuit_cache=None):
         """
         Constructs an circuit-outcome-probability-array (COPA) layout for a list of circuits.
 
@@ -223,6 +223,11 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
         verbosity : int or VerbosityPrinter
             Determines how much output to send to stdout.  0 means no output, higher
             integers mean more output.
+        
+        A precomputed dictionary serving as a cache for completed
+            circuits. I.e. circuits with prep labels and POVM labels appended.
+            Along with other useful pre-computed circuit structures used in layout
+            creation.
 
         Returns
         -------
@@ -265,7 +270,8 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
         assert(_np.prod((na,) + npp) <= nprocs), "Processor grid size exceeds available processors!"
 
         layout = _MapCOPALayout(circuits, self.model, dataset, self._max_cache_size, natoms, na, npp,
-                                param_dimensions, param_blk_sizes, resource_alloc, verbosity)
+                                param_dimensions, param_blk_sizes, resource_alloc, verbosity, 
+                                layout_creation_circuit_cache= layout_creation_circuit_cache)
 
         if mem_limit is not None:
             loc_nparams1 = num_params / npp[0] if len(npp) > 0 else 0
diff --git a/pygsti/forwardsims/matrixforwardsim.py b/pygsti/forwardsims/matrixforwardsim.py
index e47ad0bb5..61fa4022f 100644
--- a/pygsti/forwardsims/matrixforwardsim.py
+++ b/pygsti/forwardsims/matrixforwardsim.py
@@ -1059,6 +1059,8 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
         layout_creation_circuit_cache : dict, optional (default None)
             A precomputed dictionary serving as a cache for completed
             circuits. I.e. circuits with prep labels and POVM labels appended.
+            Along with other useful pre-computed circuit structures used in layout
+            creation.
             
         Returns
         -------
diff --git a/pygsti/layouts/distlayout.py b/pygsti/layouts/distlayout.py
index 7a7184529..9db1150d8 100644
--- a/pygsti/layouts/distlayout.py
+++ b/pygsti/layouts/distlayout.py
@@ -360,9 +360,6 @@ def __init__(self, circuits, unique_circuits, to_unique, unique_complete_circuit
             to_send = 0  # default = contribute nothing to MPI.SUM below
 
             if i in atoms_dict:
-                #print("DB (%d): updating elindex_outcome_tuples w/Atom %d:\n%s"
-                #      % (rank, i, "\n".join(["%d: %s" % (indx, str(tups))
-                #                             for indx, tups in atoms_dict[i].elindex_outcome_tuples.items()])))
                 if start is None:
                     start = stop = offset
                 assert(stop == offset)  # This should be checked by _assert_sequential(myAtomIndices) above
@@ -810,42 +807,6 @@ def __init__(self, circuits, unique_circuits, to_unique, unique_complete_circuit
         super().__init__(local_circuits, local_unique_circuits, local_to_unique, local_elindex_outcome_tuples,
                          local_unique_complete_circuits, param_dimensions, resource_alloc)
 
-        #DEBUG LAYOUT PRINTING
-        #def cnt_str(cnt):
-        #    if cnt is None: return "%4s" % '-'
-        #    return "%4d" % cnt
-        #def slc_str(slc):
-        #    if slc is None: return "%14s" % '--'
-        #    return "%3d->%3d (%3d)" % (slc.start, slc.stop, slc.stop - slc.start) \
-        #        if isinstance(slc, slice) else "%14s" % str(slc)
-        #shm = bool(resource_alloc.host_comm is not None)  # shared mem?
-        #if rank == 0:
-        #    print("%11s %-14s %-14s %-14s   %-14s %-4s %-14s %-4s %-14s %-4s" % (
-        #        '#', 'g-elements', 'g-params', 'g-param2s',
-        #        'h-elements','tot', 'h-params','tot', 'h-params2','tot'),
-        #          flush=True)
-        #resource_alloc.comm.barrier()
-        #for r in range(resource_alloc.comm.size):
-        #    if r == rank:
-        #        my_desc = ("%3d (%2d.%2d)" % (rank, resource_alloc.host_index, resource_alloc.host_comm.rank)) \
-        #                  if shm else ("%11d" % rank)
-        #        print(my_desc, slc_str(self.global_element_slice), slc_str(self.global_param_slice),
-        #              slc_str(self.global_param2_slice), ' ',
-        #              slc_str(self.host_element_slice), cnt_str(self.host_num_elements),
-        #              slc_str(self.host_param_slice), cnt_str(self.host_num_params),
-        #              slc_str(self.host_param2_slice), cnt_str(self.host_num_params2),  flush=True)
-        #    resource_alloc.comm.barrier()
-        #
-        #if rank == 0:
-        #    print("%11s %-14s %-14s %-4s" % ('#', 'g-pfine', 'h-pfine', 'tot'), flush=True)
-        #resource_alloc.comm.barrier()
-        #for r in range(resource_alloc.comm.size):
-        #    if r == rank:
-        #        my_desc = ("%3d (%2d.%2d)" % (rank, resource_alloc.host_index, resource_alloc.host_comm.rank)) \
-        #                  if shm else ("%11d" % rank)
-        #        print(my_desc, slc_str(self.global_param_fine_slice), slc_str(self.host_param_fine_slice),
-        #              cnt_str(self.host_num_params_fine), flush=True)
-        #    resource_alloc.comm.barrier()
 
     @property
     def max_atom_elements(self):
diff --git a/pygsti/layouts/maplayout.py b/pygsti/layouts/maplayout.py
index ca5ca642e..8f9805e89 100644
--- a/pygsti/layouts/maplayout.py
+++ b/pygsti/layouts/maplayout.py
@@ -51,16 +51,23 @@ class _MapCOPALayoutAtom(_DistributableAtom):
     """
 
     def __init__(self, unique_complete_circuits, ds_circuits, group, model,
-                 dataset, max_cache_size):
+                 dataset, max_cache_size, expanded_complete_circuit_cache = None):
 
         expanded_circuit_info_by_unique = _collections.OrderedDict()
         expanded_circuit_set = _collections.OrderedDict()  # only use SeparatePOVMCircuit keys as ordered set
+
         for i in group:
-            observed_outcomes = None if (dataset is None) else dataset[ds_circuits[i]].outcomes
-            d = unique_complete_circuits[i].expand_instruments_and_separate_povm(model, observed_outcomes)
+            if expanded_complete_circuit_cache is None:
+                observed_outcomes = None if (dataset is None) else dataset[ds_circuits[i]].outcomes
+                d = model.expand_instruments_and_separate_povm(unique_complete_circuits[i], observed_outcomes)
+            else:
+                d = expanded_complete_circuit_cache.get(unique_complete_circuits[i], None)
+                if d is None:
+                    observed_outcomes = None if (dataset is None) else dataset[ds_circuits[i]].outcomes
+                    d = model.expand_instruments_and_separate_povm(unique_complete_circuits[i], observed_outcomes)
             expanded_circuit_info_by_unique[i] = d  # a dict of SeparatePOVMCircuits => tuples of outcome labels
             expanded_circuit_set.update(d)
-
+            
         expanded_circuits = list(expanded_circuit_set.keys())
         self.table = _PrefixTable(expanded_circuits, max_cache_size)
 
@@ -206,13 +213,45 @@ class MapCOPALayout(_DistributableCOPALayout):
 
     def __init__(self, circuits, model, dataset=None, max_cache_size=None,
                  num_sub_tables=None, num_table_processors=1, num_param_dimension_processors=(),
-                 param_dimensions=(), param_dimension_blk_sizes=(), resource_alloc=None, verbosity=0):
+                 param_dimensions=(), param_dimension_blk_sizes=(), resource_alloc=None, verbosity=0,
+                 layout_creation_circuit_cache=None):
 
         unique_circuits, to_unique = self._compute_unique_circuits(circuits)
         aliases = circuits.op_label_aliases if isinstance(circuits, _CircuitList) else None
         ds_circuits = _lt.apply_aliases_to_circuits(unique_circuits, aliases)
-        unique_complete_circuits = [model.complete_circuit(c) for c in unique_circuits]
-        unique_povmless_circuits = [model.split_circuit(c, split_prep=False)[1] for c in unique_complete_circuits]
+
+        #extract subcaches from layout_creation_circuit_cache:
+        if layout_creation_circuit_cache is not None:
+            self.completed_circuit_cache = layout_creation_circuit_cache.get('completed_circuits', None)
+            self.split_circuit_cache = layout_creation_circuit_cache.get('split_circuits', None)
+            self.expanded_and_separated_circuits_cache = layout_creation_circuit_cache.get('expanded_and_separated_circuits', None)
+        else:
+            self.completed_circuit_cache = None
+            self.split_circuit_cache = None
+            self.expanded_and_separated_circuits_cache = None
+        
+        if self.completed_circuit_cache is None:
+            unique_complete_circuits = model.complete_circuits(unique_circuits)
+            split_circuits = model.split_circuits(unique_complete_circuits, split_prep=False)
+        else:
+            unique_complete_circuits = []
+            for c in unique_circuits:
+                comp_ckt = self.completed_circuit_cache.get(c, None)
+                if comp_ckt is not None:
+                    unique_complete_circuits.append(comp_ckt)
+                else:
+                    unique_complete_circuits.append(model.complete_circuit(c))
+            split_circuits = []
+            for c, c_complete in zip(unique_circuits,unique_complete_circuits):
+                split_ckt = self.split_circuit_cache.get(c, None)
+                if split_ckt is not None:
+                    split_circuits.append(split_ckt)
+                else:
+                    split_circuits.append(model.split_circuit(c_complete, split_prep=False))
+            
+
+        #construct list of unique POVM-less circuits.
+        unique_povmless_circuits = [ckt_tup[1] for ckt_tup in split_circuits]
 
         max_sub_table_size = None  # was an argument but never used; remove in future
         if (num_sub_tables is not None and num_sub_tables > 1) or max_sub_table_size is not None:
@@ -221,19 +260,10 @@ def __init__(self, circuits, model, dataset=None, max_cache_size=None,
         else:
             groups = [set(range(len(unique_complete_circuits)))]
 
-        #atoms = []
-        #elindex_outcome_tuples = _collections.OrderedDict(
-        #    [(unique_i, list()) for unique_i in range(len(unique_circuits))])
-
-        #offset = 0
-        #for group in groups:
-        #    atoms.append(_MapCOPALayoutAtom(unique_complete_circuits, ds_circuits, to_orig, group,
-        #                                    model, dataset, offset, elindex_outcome_tuples, max_cache_size))
-        #    offset += atoms[-1].num_elements
-
         def _create_atom(group):
             return _MapCOPALayoutAtom(unique_complete_circuits, ds_circuits, group,
-                                      model, dataset, max_cache_size)
+                                      model, dataset, max_cache_size,
+                                      expanded_complete_circuit_cache=self.expanded_and_separated_circuits_cache)
 
         super().__init__(circuits, unique_circuits, to_unique, unique_complete_circuits,
                          _create_atom, groups, num_table_processors,
@@ -248,3 +278,37 @@ def _create_atom(group):
         for atom in self.atoms:
             for expanded_circuit_i, unique_i in atom.unique_indices_by_expcircuit.items():
                 atom.orig_indices_by_expcircuit[expanded_circuit_i] = unique_to_orig[unique_i]
+
+
+def create_map_copa_layout_circuit_cache(circuits, model, dataset=None):
+    """
+    Helper function for pre-computing/pre-processing circuits structures
+    used in matrix layout creation.
+    """
+    cache = dict()
+    completed_circuits = model.complete_circuits(circuits)
+
+    cache['completed_circuits'] = {ckt: comp_ckt for ckt, comp_ckt in zip(circuits, completed_circuits)}
+
+    split_circuits = model.split_circuits(completed_circuits, split_prep=False)    
+    cache['split_circuits'] = {ckt: split_ckt for ckt, split_ckt in zip(circuits, split_circuits)}
+    
+
+    if dataset is not None:
+        outcomes_list = []
+        for ckt in circuits:
+            ds_row = dataset[ckt]
+            outcomes_list.append(ds_row.outcomes if ds_row is not None else None)
+            #slightly different than matrix, for some reason outcomes is used in this class
+            #and unique_outcomes is used in matrix.
+    else:
+        outcomes_list = [None]*len(circuits)
+
+    expanded_circuit_outcome_list = model.bulk_expand_instruments_and_separate_povm(circuits, 
+                                                                                    observed_outcomes_list = outcomes_list, 
+                                                                                    completed_circuits= completed_circuits)
+    
+    expanded_circuit_cache = {ckt: expanded_ckt for ckt,expanded_ckt in zip(completed_circuits, expanded_circuit_outcome_list)}                
+    cache['expanded_and_separated_circuits'] = expanded_circuit_cache
+
+    return cache
\ No newline at end of file
diff --git a/pygsti/layouts/matrixlayout.py b/pygsti/layouts/matrixlayout.py
index 654f32c86..c76e0d9fb 100644
--- a/pygsti/layouts/matrixlayout.py
+++ b/pygsti/layouts/matrixlayout.py
@@ -404,8 +404,8 @@ def create_matrix_copa_layout_circuit_cache(circuits, model, dataset=None):
     #doing here, but I think 90+% of the time this ought to be fine.
     if dataset is not None:
         unique_outcomes_list = []
-        for ckt in completed_circuits.values():
-            ds_row = dataset.get(ckt, None)
+        for ckt in circuits:
+            ds_row = dataset[ckt]
             unique_outcomes_list.append(ds_row.unique_outcomes if ds_row is not None else None)
     else:
         unique_outcomes_list = [None]*len(circuits)

From 6cc69bcbdd4783ab8a171e3b5dcf5d82610a61e4 Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Tue, 4 Jun 2024 23:21:33 -0600
Subject: [PATCH 16/30] Fix bugs in new extract_labels implementation

---
 pygsti/circuits/circuit.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/pygsti/circuits/circuit.py b/pygsti/circuits/circuit.py
index c5df6bc5f..a65629953 100644
--- a/pygsti/circuits/circuit.py
+++ b/pygsti/circuits/circuit.py
@@ -1193,26 +1193,29 @@ def extract_labels(self, layers=None, lines=None, strict=True):
             Note that the returned circuit doesn't retain any original
             metadata, such as the compilable layer indices or occurence id.
         """
+        nonint_layers = not isinstance(layers, int)
 
         #Shortcut for common case when lines == None and when we're only taking a layer slice/index
         if lines is None and layers is not None:
             if self._static:
-                if isinstance(layers, int):
+                if not nonint_layers:
                     return self._labels[layers]
                 if isinstance(layers, slice) and strict is True:  # if strict=False, then need to recompute line labels
                     #can speed this up a measurably by manually computing the new hashable tuple value and hash
-                    new_hashable_tup = self._labels[layers] + ('@',) + self._line_labels
+                    if not self._line_labels in (('*',), ()):
+                        new_hashable_tup = self._labels[layers] + ('@',) + self._line_labels
+                    else:
+                        new_hashable_tup = self._labels[layers]
                     ret = Circuit.__new__(Circuit)
-                    return ret._copy_init(self._labels[layers], self._line_labels, not self._static, 
-                                          hashable_tup= new_hashable_tup, 
-                                          precomp_hash=hash(new_hashable_tup))
+                    return ret._copy_init(self._labels[layers], self._line_labels, not self._static, hashable_tup= new_hashable_tup, precomp_hash=hash(new_hashable_tup))
             else:
-                if isinstance(layers, int):
+                if not nonint_layers:
                     return self.layertup[layers]
                 if isinstance(layers, slice) and strict is True:  # if strict=False, then need to recompute line labels
                     return Circuit._fastinit(self._labels[layers], self._line_labels, not self._static)
         #otherwise assert both are not None:
 
+
         layers = self._proc_layers_arg(layers)
         lines = self._proc_lines_arg(lines)
         if len(layers) == 0 or len(lines) == 0:
@@ -1245,7 +1248,7 @@ def get_sslbls(lbl): return lbl.sslbls
                     ret_layer.append(l)
             ret.append(_Label(ret_layer) if len(ret_layer) != 1 else ret_layer[0])  # Labels b/c we use _fastinit
 
-        if not isinstance(layers, int):
+        if nonint_layers:
             if not strict: lines = "auto"  # since we may have included lbls on other lines
             # don't worry about string rep for now...
             

From 1ff8aeb6a4be7c242ca91afc302540a446f5cb1c Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Tue, 4 Jun 2024 23:54:40 -0600
Subject: [PATCH 17/30] Finish refactoring expand_instruments_and_separate_povm

This finishes the process of refactoring expand_instruments_and_separate_povm from a circuit method to a method of OpModel.
---
 pygsti/algorithms/core.py              |   4 +-
 pygsti/circuits/circuit.py             | 100 -------------------------
 pygsti/forwardsims/mapforwardsim.py    |   2 +-
 pygsti/forwardsims/matrixforwardsim.py |   2 +-
 pygsti/forwardsims/weakforwardsim.py   |   4 +-
 pygsti/layouts/termlayout.py           |   2 +-
 6 files changed, 7 insertions(+), 107 deletions(-)

diff --git a/pygsti/algorithms/core.py b/pygsti/algorithms/core.py
index 3b15797bc..cc888c50a 100644
--- a/pygsti/algorithms/core.py
+++ b/pygsti/algorithms/core.py
@@ -402,7 +402,7 @@ def _construct_ab(prep_fiducials, effect_fiducials, model, dataset, op_label_ali
         for j, rhostr in enumerate(prep_fiducials):
             opLabelString = rhostr + estr  # LEXICOGRAPHICAL VS MATRIX ORDER
             dsStr = opLabelString.replace_layers_with_aliases(op_label_aliases)
-            expd_circuit_outcomes = opLabelString.expand_instruments_and_separate_povm(model)
+            expd_circuit_outcomes = model.expand_instruments_and_separate_povm(opLabelString)
             assert(len(expd_circuit_outcomes) == 1), "No instruments are allowed in LGST fiducials!"
             unique_key = next(iter(expd_circuit_outcomes.keys()))
             outcomes = expd_circuit_outcomes[unique_key]
@@ -431,7 +431,7 @@ def _construct_x_matrix(prep_fiducials, effect_fiducials, model, op_label_tuple,
         for j, rhostr in enumerate(prep_fiducials):
             opLabelString = rhostr + _circuits.Circuit(op_label_tuple, line_labels=rhostr.line_labels) + estr
             dsStr = opLabelString.replace_layers_with_aliases(op_label_aliases)
-            expd_circuit_outcomes = opLabelString.expand_instruments_and_separate_povm(model)
+            expd_circuit_outcomes = model.expand_instruments_and_separate_povm(opLabelString)
             dsRow_fractions = dataset[dsStr].fractions
             assert(len(expd_circuit_outcomes) == nVariants)
 
diff --git a/pygsti/circuits/circuit.py b/pygsti/circuits/circuit.py
index a65629953..0f30872f4 100644
--- a/pygsti/circuits/circuit.py
+++ b/pygsti/circuits/circuit.py
@@ -4414,106 +4414,6 @@ def done_editing(self):
         self._hashable_tup = self.tup
         self._hash = hash(self._hashable_tup)
 
-    def expand_instruments_and_separate_povm(self, model, observed_outcomes=None):
-        """
-        Creates a dictionary of :class:`SeparatePOVMCircuit` objects from expanding the instruments of this circuit.
-
-        Each key of the returned dictionary replaces the instruments in this circuit with a selection
-        of their members.  (The size of the resulting dictionary is the product of the sizes of
-        each instrument appearing in this circuit when `observed_outcomes is None`).  Keys are stored
-        as :class:`SeparatePOVMCircuit` objects so it's easy to keep track of which POVM outcomes (effects)
-        correspond to observed data.  This function is, for the most part, used internally to process
-        a circuit before computing its outcome probabilities.
-
-        Parameters
-        ----------
-        model : Model
-            The model used to provide necessary details regarding the expansion, including:
-
-            - default SPAM layers
-            - definitions of instrument-containing layers
-            - expansions of individual instruments and POVMs
-
-        Returns
-        -------
-        OrderedDict
-            A dict whose keys are :class:`SeparatePOVMCircuit` objects and whose
-            values are tuples of the outcome labels corresponding to this circuit,
-            one per POVM effect held in the key.
-        """
-        complete_circuit = model.complete_circuit(self)
-        expanded_circuit_outcomes = _collections.OrderedDict()
-        povm_lbl = complete_circuit[-1]  # "complete" circuits always end with a POVM label
-        circuit_without_povm = complete_circuit[0:len(complete_circuit) - 1]
-
-        def create_tree(lst):
-            subs = _collections.OrderedDict()
-            for el in lst:
-                if len(el) > 0:
-                    if el[0] not in subs: subs[el[0]] = []
-                    subs[el[0]].append(el[1:])
-            return _collections.OrderedDict([(k, create_tree(sub_lst)) for k, sub_lst in subs.items()])
-
-        def add_expanded_circuit_outcomes(circuit, running_outcomes, ootree, start):
-            """
-            """
-            cir = circuit if start == 0 else circuit[start:]  # for performance, avoid uneeded slicing
-            for k, layer_label in enumerate(cir, start=start):
-                components = layer_label.components
-                #instrument_inds = _np.nonzero([model._is_primitive_instrument_layer_lbl(component)
-                #                               for component in components])[0]  # SLOWER than statement below
-                instrument_inds = _np.array([i for i, component in enumerate(components)
-                                             if model._is_primitive_instrument_layer_lbl(component)])
-                if instrument_inds.size > 0:
-                    # This layer contains at least one instrument => recurse with instrument(s) replaced with
-                    #  all combinations of their members.
-                    component_lookup = {i: comp for i, comp in enumerate(components)}
-                    instrument_members = [model._member_labels_for_instrument(components[i])
-                                          for i in instrument_inds]  # also components of outcome labels
-                    for selected_instrmt_members in _itertools.product(*instrument_members):
-                        expanded_layer_lbl = component_lookup.copy()
-                        expanded_layer_lbl.update({i: components[i] + "_" + sel
-                                                   for i, sel in zip(instrument_inds, selected_instrmt_members)})
-                        expanded_layer_lbl = _Label([expanded_layer_lbl[i] for i in range(len(components))])
-
-                        if ootree is not None:
-                            new_ootree = ootree
-                            for sel in selected_instrmt_members:
-                                new_ootree = new_ootree.get(sel, {})
-                            if len(new_ootree) == 0: continue  # no observed outcomes along this outcome-tree path
-                        else:
-                            new_ootree = None
-
-                        add_expanded_circuit_outcomes(circuit[0:k] + Circuit((expanded_layer_lbl,)) + circuit[k + 1:],
-                                                      running_outcomes + selected_instrmt_members, new_ootree, k + 1)
-                    break
-
-            else:  # no more instruments to process: `cir` contains no instruments => add an expanded circuit
-                assert(circuit not in expanded_circuit_outcomes)  # shouldn't be possible to generate duplicates...
-                elabels = model._effect_labels_for_povm(povm_lbl) if (observed_outcomes is None) \
-                    else tuple(ootree.keys())
-                outcomes = tuple((running_outcomes + (elabel,) for elabel in elabels))
-                expanded_circuit_outcomes[SeparatePOVMCircuit(circuit, povm_lbl, elabels)] = outcomes
-
-        ootree = create_tree(observed_outcomes) if observed_outcomes is not None else None  # tree of observed outcomes
-        # e.g. [('0','00'), ('0','01'), ('1','10')] ==> {'0': {'00': {}, '01': {}}, '1': {'10': {}}}
-
-        if model._has_instruments():
-            add_expanded_circuit_outcomes(circuit_without_povm, (), ootree, start=0)
-        else:
-            # It may be helpful to cache the set of elabels for a POVM (maybe within the model?) because
-            # currently the call to _effect_labels_for_povm may be a bottleneck.  It's needed, even when we have
-            # observed outcomes, because there may be some observed outcomes that aren't modeled (e.g. leakage states)
-            if observed_outcomes is None:
-                elabels = model._effect_labels_for_povm(povm_lbl)
-            else:
-                possible_lbls = set(model._effect_labels_for_povm(povm_lbl))
-                elabels = tuple([oo for oo in ootree.keys() if oo in possible_lbls])
-            outcomes = tuple(((elabel,) for elabel in elabels))
-            expanded_circuit_outcomes[SeparatePOVMCircuit(circuit_without_povm, povm_lbl, elabels)] = outcomes
-
-        return expanded_circuit_outcomes
-
 class CompressedCircuit(object):
     """
     A "compressed" Circuit that requires less disk space.
diff --git a/pygsti/forwardsims/mapforwardsim.py b/pygsti/forwardsims/mapforwardsim.py
index c8f8a043b..a03bd239d 100644
--- a/pygsti/forwardsims/mapforwardsim.py
+++ b/pygsti/forwardsims/mapforwardsim.py
@@ -49,7 +49,7 @@ class SimpleMapForwardSimulator(_ForwardSimulator):
     # If this is done, then MapForwardSimulator wouldn't need to separately subclass DistributableForwardSimulator
 
     def _compute_circuit_outcome_probabilities(self, array_to_fill, circuit, outcomes, resource_alloc, time=None):
-        expanded_circuit_outcomes = circuit.expand_instruments_and_separate_povm(self.model, outcomes)
+        expanded_circuit_outcomes = self.model.expand_instruments_and_separate_povm(circuit, outcomes)
         outcome_to_index = {outc: i for i, outc in enumerate(outcomes)}
         for spc, spc_outcomes in expanded_circuit_outcomes.items():  # spc is a SeparatePOVMCircuit
             # Note: `spc.circuit_without_povm` *always* begins with a prep label.
diff --git a/pygsti/forwardsims/matrixforwardsim.py b/pygsti/forwardsims/matrixforwardsim.py
index 61fa4022f..64e26936c 100644
--- a/pygsti/forwardsims/matrixforwardsim.py
+++ b/pygsti/forwardsims/matrixforwardsim.py
@@ -590,7 +590,7 @@ def _compute_circuit_outcome_probabilities(self, array_to_fill, circuit, outcome
         use_scaling = False  # Hardcoded for now
         assert(time is None), "MatrixForwardSimulator cannot be used to simulate time-dependent circuits"
 
-        expanded_circuit_outcomes = circuit.expand_instruments_and_separate_povm(self.model, outcomes)
+        expanded_circuit_outcomes = self.model.expand_instruments_and_separate_povm(circuit, outcomes)
         outcome_to_index = {outc: i for i, outc in enumerate(outcomes)}
         for spc, spc_outcomes in expanded_circuit_outcomes.items():  # spc is a SeparatePOVMCircuit
             indices = [outcome_to_index[o] for o in spc_outcomes]
diff --git a/pygsti/forwardsims/weakforwardsim.py b/pygsti/forwardsims/weakforwardsim.py
index 017973a1e..32d0e4bc6 100644
--- a/pygsti/forwardsims/weakforwardsim.py
+++ b/pygsti/forwardsims/weakforwardsim.py
@@ -55,7 +55,7 @@ def _compute_circuit_outcome_for_shot(self, circuit, resource_alloc, time=None,
         circuit : Circuit
             A tuple-like object of *simplified* gates (e.g. may include
             instrument elements like 'Imyinst_0') generated by
-            Circuit.expand_instruments_and_separate_povm()
+            OpModel.expand_instruments_and_separate_povm()
 
         resource_alloc: ResourceAlloc
             Currently not used
@@ -77,7 +77,7 @@ def _compute_circuit_outcome_for_shot(self, circuit, resource_alloc, time=None,
         assert(resource_alloc is None), "WeakForwardSimulator cannot use a resource_alloc for one shot."
 
         #prep_label, op_labels, povm_label = self.model.split_circuit(spc_circuit)
-        spc_dict = circuit.expand_instruments_and_separate_povm(self.model,
+        spc_dict = self.model.expand_instruments_and_separate_povm(circuit,
                                                                 observed_outcomes=None)  # FUTURE: observed outcomes?
         assert(len(spc_dict) == 1), "Circuits with instruments are not supported by weak forward simulator (yet)"
         spc = next(iter(spc_dict.keys()))  # first & only SeparatePOVMCircuit
diff --git a/pygsti/layouts/termlayout.py b/pygsti/layouts/termlayout.py
index 10b5458b7..95835c79a 100644
--- a/pygsti/layouts/termlayout.py
+++ b/pygsti/layouts/termlayout.py
@@ -57,7 +57,7 @@ def __init__(self, unique_complete_circuits, ds_circuits, group, model, dataset)
         expanded_circuit_outcomes = _collections.OrderedDict()
         for i in group:
             observed_outcomes = None if (dataset is None) else dataset[ds_circuits[i]].outcomes
-            d = unique_complete_circuits[i].expand_instruments_and_separate_povm(model, observed_outcomes)
+            d = model.expand_instruments_and_separate_povm(unique_complete_circuits[i], observed_outcomes)
             expanded_circuit_outcomes_by_unique[i] = d
             expanded_circuit_outcomes.update(d)
 

From 5db3e5913f07fe1499cd0b0de39d1303a29c5cdc Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Wed, 5 Jun 2024 00:33:40 -0600
Subject: [PATCH 18/30] Refactor expand_instruments_and_separate_povm

Refactor expand_instruments_and_separate_povm to use the multi-circuit version under the hood to reduce code duplication.
---
 pygsti/models/model.py | 74 ++----------------------------------------
 1 file changed, 2 insertions(+), 72 deletions(-)

diff --git a/pygsti/models/model.py b/pygsti/models/model.py
index d6e28add6..f877b5776 100644
--- a/pygsti/models/model.py
+++ b/pygsti/models/model.py
@@ -1415,78 +1415,8 @@ def expand_instruments_and_separate_povm(self, circuit, observed_outcomes=None):
             values are tuples of the outcome labels corresponding to this circuit,
             one per POVM effect held in the key.
         """
-        complete_circuit = self.complete_circuit(circuit)
-        expanded_circuit_outcomes = _collections.OrderedDict()
-        povm_lbl = complete_circuit[-1]  # "complete" circuits always end with a POVM label
-        circuit_without_povm = complete_circuit[0:len(complete_circuit) - 1]
-
-        def create_tree(lst):
-            subs = _collections.OrderedDict()
-            for el in lst:
-                if len(el) > 0:
-                    if el[0] not in subs: subs[el[0]] = []
-                    subs[el[0]].append(el[1:])
-            return _collections.OrderedDict([(k, create_tree(sub_lst)) for k, sub_lst in subs.items()])
-
-        def add_expanded_circuit_outcomes(circuit, running_outcomes, ootree, start):
-            """
-            """
-            cir = circuit if start == 0 else circuit[start:]  # for performance, avoid uneeded slicing
-            for k, layer_label in enumerate(cir, start=start):
-                components = layer_label.components
-                #instrument_inds = _np.nonzero([model._is_primitive_instrument_layer_lbl(component)
-                #                               for component in components])[0]  # SLOWER than statement below
-                instrument_inds = _np.array([i for i, component in enumerate(components)
-                                             if self._is_primitive_instrument_layer_lbl(component)])
-                if instrument_inds.size > 0:
-                    # This layer contains at least one instrument => recurse with instrument(s) replaced with
-                    #  all combinations of their members.
-                    component_lookup = {i: comp for i, comp in enumerate(components)}
-                    instrument_members = [self._member_labels_for_instrument(components[i])
-                                          for i in instrument_inds]  # also components of outcome labels
-                    for selected_instrmt_members in _itertools.product(*instrument_members):
-                        expanded_layer_lbl = component_lookup.copy()
-                        expanded_layer_lbl.update({i: components[i] + "_" + sel
-                                                   for i, sel in zip(instrument_inds, selected_instrmt_members)})
-                        expanded_layer_lbl = _Label([expanded_layer_lbl[i] for i in range(len(components))])
-
-                        if ootree is not None:
-                            new_ootree = ootree
-                            for sel in selected_instrmt_members:
-                                new_ootree = new_ootree.get(sel, {})
-                            if len(new_ootree) == 0: continue  # no observed outcomes along this outcome-tree path
-                        else:
-                            new_ootree = None
-
-                        add_expanded_circuit_outcomes(circuit[0:k] + _Circuit((expanded_layer_lbl,)) + circuit[k + 1:],
-                                                      running_outcomes + selected_instrmt_members, new_ootree, k + 1)
-                    break
-
-            else:  # no more instruments to process: `cir` contains no instruments => add an expanded circuit
-                assert(circuit not in expanded_circuit_outcomes)  # shouldn't be possible to generate duplicates...
-                elabels = self._effect_labels_for_povm(povm_lbl) if (observed_outcomes is None) \
-                    else tuple(ootree.keys())
-                outcomes = tuple((running_outcomes + (elabel,) for elabel in elabels))
-                expanded_circuit_outcomes[_SeparatePOVMCircuit(circuit, povm_lbl, elabels)] = outcomes
-
-        ootree = create_tree(observed_outcomes) if observed_outcomes is not None else None  # tree of observed outcomes
-        # e.g. [('0','00'), ('0','01'), ('1','10')] ==> {'0': {'00': {}, '01': {}}, '1': {'10': {}}}
-
-        if self._has_instruments():
-            add_expanded_circuit_outcomes(circuit_without_povm, (), ootree, start=0)
-        else:
-            # It may be helpful to cache the set of elabels for a POVM (maybe within the model?) because
-            # currently the call to _effect_labels_for_povm may be a bottleneck.  It's needed, even when we have
-            # observed outcomes, because there may be some observed outcomes that aren't modeled (e.g. leakage states)
-            if observed_outcomes is None:
-                elabels = self._effect_labels_for_povm(povm_lbl)
-            else:
-                possible_lbls = set(self._effect_labels_for_povm(povm_lbl))
-                elabels = tuple([oo for oo in ootree.keys() if oo in possible_lbls])
-            outcomes = tuple(((elabel,) for elabel in elabels))
-            expanded_circuit_outcomes[_SeparatePOVMCircuit(circuit_without_povm, povm_lbl, elabels)] = outcomes
-
-        return expanded_circuit_outcomes
+        expanded_circuit_outcomes = self.bulk_expand_instruments_and_separate_povm([circuit], [observed_outcomes])
+        return expanded_circuit_outcomes[0]
     
     def bulk_expand_instruments_and_separate_povm(self, circuits, observed_outcomes_list=None, split_circuits = None, 
                                                   completed_circuits = None):

From 53e2da62422699e0af04dafd588776a7abeec7cc Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Wed, 5 Jun 2024 16:46:52 -0600
Subject: [PATCH 19/30] Refactor cache creation functions

Refactor cache creation functions into static methods of the corresponding forward simulator class. Also add an empty base version of this method, and clean up a few miscellaneous things caught by review.
---
 pygsti/algorithms/core.py              | 21 +++++---------
 pygsti/forwardsims/forwardsim.py       |  9 ++++++
 pygsti/forwardsims/mapforwardsim.py    | 35 ++++++++++++++++++++++
 pygsti/forwardsims/matrixforwardsim.py | 40 ++++++++++++++++++++++++++
 pygsti/layouts/maplayout.py            | 36 +----------------------
 pygsti/layouts/matrixlayout.py         | 39 -------------------------
 6 files changed, 92 insertions(+), 88 deletions(-)

diff --git a/pygsti/algorithms/core.py b/pygsti/algorithms/core.py
index cc888c50a..dd0a21ef7 100644
--- a/pygsti/algorithms/core.py
+++ b/pygsti/algorithms/core.py
@@ -881,34 +881,27 @@ def _max_array_types(artypes_list):  # get the maximum number of each array type
 
     #pre-compute a dictionary caching completed circuits for layout construction performance.
     unique_circuits = list({ckt for circuit_list in circuit_lists for ckt in circuit_list})
-    if isinstance(mdl.sim, _fwdsims.MatrixForwardSimulator):
-        precomp_layout_circuit_cache = _layouts.matrixlayout.create_matrix_copa_layout_circuit_cache(unique_circuits, mdl, dataset=dataset)
-    elif isinstance(mdl.sim, _fwdsims.MapForwardSimulator):
-        precomp_layout_circuit_cache = _layouts.maplayout.create_map_copa_layout_circuit_cache(unique_circuits, mdl, dataset=dataset)
+    if isinstance(mdl.sim, (_fwdsims.MatrixForwardSimulator, _fwdsims.MapForwardSimulator)):
+        precomp_layout_circuit_cache = mdl.sim.create_copa_layout_circuit_cache(unique_circuits, mdl, dataset=dataset)
     else:
         precomp_layout_circuit_cache = None
 
     for i, circuit_list in enumerate(circuit_lists):
         printer.log(f'Layout for iteration {i}', 2)
-        if isinstance(mdl.sim, (_fwdsims.MatrixForwardSimulator, _fwdsims.MapForwardSimulator)):
-            precomp_layouts.append(mdl.sim.create_layout(circuit_list, dataset, resource_alloc, array_types, verbosity= printer - 1,
-                                                        layout_creation_circuit_cache = precomp_layout_circuit_cache))
-        else:
-            precomp_layouts.append(mdl.sim.create_layout(circuit_list, dataset, resource_alloc, array_types, verbosity= printer - 1))
-    
+        precomp_layouts.append(mdl.sim.create_layout(circuit_list, dataset, resource_alloc, array_types, verbosity= printer - 1,
+                                                    layout_creation_circuit_cache = precomp_layout_circuit_cache))
+        
     #precompute a cache of possible outcome counts for each circuits to accelerate MDC store creation
     if isinstance(mdl, _models.model.OpModel):
         if precomp_layout_circuit_cache is not None: #then grab the split circuits from there.
             expanded_circuit_outcome_list = mdl.bulk_expand_instruments_and_separate_povm(unique_circuits, 
                                                                                           completed_circuits= precomp_layout_circuit_cache['completed_circuits'].values())
-            outcome_count_by_circuit_cache = {ckt: len(outcome_tup) for ckt,outcome_tup in zip(unique_circuits, expanded_circuit_outcome_list)}
         else:
-            expanded_circuit_outcome_list = mdl.bulk_expand_instruments_and_separate_povm(unique_circuits)    
-            outcome_count_by_circuit_cache = {ckt: len(outcome_tup) for ckt,outcome_tup in zip(unique_circuits, expanded_circuit_outcome_list)}
+            expanded_circuit_outcome_list = mdl.bulk_expand_instruments_and_separate_povm(unique_circuits)        
+        outcome_count_by_circuit_cache = {ckt: len(outcome_tup) for ckt,outcome_tup in zip(unique_circuits, expanded_circuit_outcome_list)}
     else:
         outcome_count_by_circuit_cache = {ckt: mdl.compute_num_outcomes(ckt) for ckt in unique_circuits}
 
-
     with printer.progress_logging(1):
         for i in range(starting_index, len(circuit_lists)):
             circuitsToEstimate = circuit_lists[i]
diff --git a/pygsti/forwardsims/forwardsim.py b/pygsti/forwardsims/forwardsim.py
index c5e61b057..37e5504c4 100644
--- a/pygsti/forwardsims/forwardsim.py
+++ b/pygsti/forwardsims/forwardsim.py
@@ -378,6 +378,15 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None,
                 derivative_dimensions = tuple()
         return _CircuitOutcomeProbabilityArrayLayout.create_from(circuits, self.model, dataset, derivative_dimensions,
                                                                  resource_alloc=resource_alloc)
+    
+    @staticmethod
+    def create_copa_layout_circuit_cache(circuits, model, dataset=None):
+        """
+        Helper function for pre-computing/pre-processing circuits structures
+        used in matrix layout creation.
+        """
+        msg = "Not currently implemented for this forward simulator class."
+        raise NotImplementedError(msg)
 
     def bulk_probs(self, circuits, clip_to=None, resource_alloc=None, smartc=None):
         """
diff --git a/pygsti/forwardsims/mapforwardsim.py b/pygsti/forwardsims/mapforwardsim.py
index a03bd239d..83a5a869a 100644
--- a/pygsti/forwardsims/mapforwardsim.py
+++ b/pygsti/forwardsims/mapforwardsim.py
@@ -312,6 +312,41 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
                 printer.log("   Esimated memory required = %.1fGB" % (mem_estimate * GB))
 
         return layout
+    
+    @staticmethod
+    def create_copa_layout_circuit_cache(circuits, model, dataset=None):
+        """
+        Helper function for pre-computing/pre-processing circuits structures
+        used in matrix layout creation.
+        """
+        cache = dict()
+        completed_circuits = model.complete_circuits(circuits)
+
+        cache['completed_circuits'] = {ckt: comp_ckt for ckt, comp_ckt in zip(circuits, completed_circuits)}
+
+        split_circuits = model.split_circuits(completed_circuits, split_prep=False)    
+        cache['split_circuits'] = {ckt: split_ckt for ckt, split_ckt in zip(circuits, split_circuits)}
+        
+
+        if dataset is not None:
+            outcomes_list = []
+            for ckt in circuits:
+                ds_row = dataset[ckt]
+                outcomes_list.append(ds_row.outcomes if ds_row is not None else None)
+                #slightly different than matrix, for some reason outcomes is used in this class
+                #and unique_outcomes is used in matrix.
+        else:
+            outcomes_list = [None]*len(circuits)
+
+        expanded_circuit_outcome_list = model.bulk_expand_instruments_and_separate_povm(circuits, 
+                                                                                        observed_outcomes_list = outcomes_list, 
+                                                                                        completed_circuits= completed_circuits)
+        
+        expanded_circuit_cache = {ckt: expanded_ckt for ckt,expanded_ckt in zip(completed_circuits, expanded_circuit_outcome_list)}                
+        cache['expanded_and_separated_circuits'] = expanded_circuit_cache
+
+        return cache
+
 
     def _bulk_fill_probs_atom(self, array_to_fill, layout_atom, resource_alloc):
         # Note: *don't* set dest_indices arg = layout.element_slice, as this is already done by caller
diff --git a/pygsti/forwardsims/matrixforwardsim.py b/pygsti/forwardsims/matrixforwardsim.py
index 64e26936c..944207cf6 100644
--- a/pygsti/forwardsims/matrixforwardsim.py
+++ b/pygsti/forwardsims/matrixforwardsim.py
@@ -1154,6 +1154,46 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
                 printer.log("   Esimated memory required = %.1fGB" % (mem_estimate * GB))
 
         return layout
+    
+    @staticmethod
+    def create_copa_layout_circuit_cache(circuits, model, dataset=None):
+        """
+        Helper function for pre-computing/pre-processing circuits structures
+        used in matrix layout creation.
+        """
+        cache = dict()
+        completed_circuits, split_circuits = model.complete_circuits(circuits, return_split=True)
+
+        cache['completed_circuits'] = {ckt: comp_ckt for ckt, comp_ckt in zip(circuits, completed_circuits)}
+        cache['split_circuits'] = {ckt: split_ckt for ckt, split_ckt in zip(circuits, split_circuits)}
+
+        #There is some potential aliasing that happens in the init that I am not
+        #doing here, but I think 90+% of the time this ought to be fine.
+        if dataset is not None:
+            unique_outcomes_list = []
+            for ckt in circuits:
+                ds_row = dataset[ckt]
+                unique_outcomes_list.append(ds_row.unique_outcomes if ds_row is not None else None)
+        else:
+            unique_outcomes_list = [None]*len(circuits)
+
+        expanded_circuit_outcome_list = model.bulk_expand_instruments_and_separate_povm(circuits, 
+                                                                                        observed_outcomes_list = unique_outcomes_list, 
+                                                                                        split_circuits = split_circuits)
+        
+        expanded_circuit_cache = {ckt: expanded_ckt for ckt,expanded_ckt in zip(circuits, expanded_circuit_outcome_list)}
+                    
+        cache['expanded_and_separated_circuits'] = expanded_circuit_cache
+
+        expanded_subcircuits_no_spam_cache = dict()
+        for expc_outcomes in cache['expanded_and_separated_circuits'].values():
+            for sep_povm_c, _ in expc_outcomes.items():  # for each expanded cir from unique_i-th circuit
+                exp_nospam_c = sep_povm_c.circuit_without_povm[1:] 
+                expanded_subcircuits_no_spam_cache[exp_nospam_c] = exp_nospam_c.expand_subcircuits()
+
+        cache['expanded_subcircuits_no_spam'] = expanded_subcircuits_no_spam_cache
+
+        return cache
 
     def _scale_exp(self, scale_exps):
         old_err = _np.seterr(over='ignore')
diff --git a/pygsti/layouts/maplayout.py b/pygsti/layouts/maplayout.py
index 8f9805e89..e6cbc25f9 100644
--- a/pygsti/layouts/maplayout.py
+++ b/pygsti/layouts/maplayout.py
@@ -277,38 +277,4 @@ def _create_atom(group):
         unique_to_orig = {unique_i: orig_i for orig_i, unique_i in self._to_unique.items()}  # unique => orig. indices
         for atom in self.atoms:
             for expanded_circuit_i, unique_i in atom.unique_indices_by_expcircuit.items():
-                atom.orig_indices_by_expcircuit[expanded_circuit_i] = unique_to_orig[unique_i]
-
-
-def create_map_copa_layout_circuit_cache(circuits, model, dataset=None):
-    """
-    Helper function for pre-computing/pre-processing circuits structures
-    used in matrix layout creation.
-    """
-    cache = dict()
-    completed_circuits = model.complete_circuits(circuits)
-
-    cache['completed_circuits'] = {ckt: comp_ckt for ckt, comp_ckt in zip(circuits, completed_circuits)}
-
-    split_circuits = model.split_circuits(completed_circuits, split_prep=False)    
-    cache['split_circuits'] = {ckt: split_ckt for ckt, split_ckt in zip(circuits, split_circuits)}
-    
-
-    if dataset is not None:
-        outcomes_list = []
-        for ckt in circuits:
-            ds_row = dataset[ckt]
-            outcomes_list.append(ds_row.outcomes if ds_row is not None else None)
-            #slightly different than matrix, for some reason outcomes is used in this class
-            #and unique_outcomes is used in matrix.
-    else:
-        outcomes_list = [None]*len(circuits)
-
-    expanded_circuit_outcome_list = model.bulk_expand_instruments_and_separate_povm(circuits, 
-                                                                                    observed_outcomes_list = outcomes_list, 
-                                                                                    completed_circuits= completed_circuits)
-    
-    expanded_circuit_cache = {ckt: expanded_ckt for ckt,expanded_ckt in zip(completed_circuits, expanded_circuit_outcome_list)}                
-    cache['expanded_and_separated_circuits'] = expanded_circuit_cache
-
-    return cache
\ No newline at end of file
+                atom.orig_indices_by_expcircuit[expanded_circuit_i] = unique_to_orig[unique_i]
\ No newline at end of file
diff --git a/pygsti/layouts/matrixlayout.py b/pygsti/layouts/matrixlayout.py
index c76e0d9fb..8364c1a4d 100644
--- a/pygsti/layouts/matrixlayout.py
+++ b/pygsti/layouts/matrixlayout.py
@@ -388,45 +388,6 @@ def _create_atom(args):
                          _create_atom, list(zip(groups, helpful_scratch)), num_tree_processors,
                          num_param_dimension_processors, param_dimensions,
                          param_dimension_blk_sizes, resource_alloc, verbosity)
-        
-def create_matrix_copa_layout_circuit_cache(circuits, model, dataset=None):
-    """
-    Helper function for pre-computing/pre-processing circuits structures
-    used in matrix layout creation.
-    """
-    cache = dict()
-    completed_circuits, split_circuits = model.complete_circuits(circuits, return_split=True)
-
-    cache['completed_circuits'] = {ckt: comp_ckt for ckt, comp_ckt in zip(circuits, completed_circuits)}
-    cache['split_circuits'] = {ckt: split_ckt for ckt, split_ckt in zip(circuits, split_circuits)}
-
-    #There is some potential aliasing that happens in the init that I am not
-    #doing here, but I think 90+% of the time this ought to be fine.
-    if dataset is not None:
-        unique_outcomes_list = []
-        for ckt in circuits:
-            ds_row = dataset[ckt]
-            unique_outcomes_list.append(ds_row.unique_outcomes if ds_row is not None else None)
-    else:
-        unique_outcomes_list = [None]*len(circuits)
-
-    expanded_circuit_outcome_list = model.bulk_expand_instruments_and_separate_povm(circuits, 
-                                                                                    observed_outcomes_list = unique_outcomes_list, 
-                                                                                    split_circuits = split_circuits)
-    
-    expanded_circuit_cache = {ckt: expanded_ckt for ckt,expanded_ckt in zip(circuits, expanded_circuit_outcome_list)}
-                
-    cache['expanded_and_separated_circuits'] = expanded_circuit_cache
-
-    expanded_subcircuits_no_spam_cache = dict()
-    for expc_outcomes in cache['expanded_and_separated_circuits'].values():
-        for sep_povm_c, _ in expc_outcomes.items():  # for each expanded cir from unique_i-th circuit
-            exp_nospam_c = sep_povm_c.circuit_without_povm[1:] 
-            expanded_subcircuits_no_spam_cache[exp_nospam_c] = exp_nospam_c.expand_subcircuits()
-
-    cache['expanded_subcircuits_no_spam'] = expanded_subcircuits_no_spam_cache
-
-    return cache
 
 
 

From 74dc2152fcb485f9ca3a80e4e6c805d3a8713c0e Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Tue, 30 Jul 2024 11:59:14 -0600
Subject: [PATCH 20/30] Minor updates and unit test fixes

Fix a few minor issues related to refactored code and updates made in this branch.
---
 pygsti/circuits/circuit.py        | 2 +-
 pygsti/forwardsims/forwardsim.py  | 5 ++++-
 pygsti/forwardsims/torchfwdsim.py | 3 ++-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/pygsti/circuits/circuit.py b/pygsti/circuits/circuit.py
index 9cf19b839..2ccb7aaae 100644
--- a/pygsti/circuits/circuit.py
+++ b/pygsti/circuits/circuit.py
@@ -3465,7 +3465,7 @@ def num_gates(self):
         """
         if self._static:
             def cnt(lbl):  # obj a Label, perhaps compound
-                if lbl.is_simple():  # a simple label
+                if lbl.IS_SIMPLE:  # a simple label
                     return 1 if (lbl.sslbls is not None) else 0
                 else:
                     return sum([cnt(sublbl) for sublbl in lbl.components])
diff --git a/pygsti/forwardsims/forwardsim.py b/pygsti/forwardsims/forwardsim.py
index 37e5504c4..2ae19f2f3 100644
--- a/pygsti/forwardsims/forwardsim.py
+++ b/pygsti/forwardsims/forwardsim.py
@@ -323,7 +323,8 @@ def hprobs(self, circuit, resource_alloc=None):
     # ---------------------------------------------------------------------------
 
     def create_layout(self, circuits, dataset=None, resource_alloc=None,
-                      array_types=(), derivative_dimensions=None, verbosity=0):
+                      array_types=(), derivative_dimensions=None, verbosity=0,
+                      layout_creation_circuit_cache = None):
         """
         Constructs an circuit-outcome-probability-array (COPA) layout for `circuits` and `dataset`.
 
@@ -364,6 +365,8 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None,
         verbosity : int or VerbosityPrinter
             Determines how much output to send to stdout.  0 means no output, higher
             integers mean more output.
+        
+        
 
         Returns
         -------
diff --git a/pygsti/forwardsims/torchfwdsim.py b/pygsti/forwardsims/torchfwdsim.py
index 1285e51de..5d61c3f0d 100644
--- a/pygsti/forwardsims/torchfwdsim.py
+++ b/pygsti/forwardsims/torchfwdsim.py
@@ -74,8 +74,9 @@ class StatelessModel:
     def __init__(self, model: ExplicitOpModel, layout: CircuitOutcomeProbabilityArrayLayout):
         circuits = []
         self.outcome_probs_dim = 0
+        #TODO: Refactor this to use the bulk_expand_instruments_and_separate_povm codepath
         for _, circuit, outcomes in layout.iter_unique_circuits():
-            expanded_circuits = circuit.expand_instruments_and_separate_povm(model, outcomes)
+            expanded_circuits = model.expand_instruments_and_separate_povm(circuit, outcomes)
             if len(expanded_circuits) > 1:
                 raise NotImplementedError("I don't know what to do with this.")
             spc = next(iter(expanded_circuits))

From 6f4af73d72d9f1ca75a1a5c0916a5a363ee51289 Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Tue, 30 Jul 2024 20:29:37 -0600
Subject: [PATCH 21/30] Add in DataSet key aliasing

Add in support for data set key aliasing in COPA layout cache creation.
---
 pygsti/forwardsims/mapforwardsim.py    | 16 +++++++++-------
 pygsti/forwardsims/matrixforwardsim.py |  8 +++++---
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/pygsti/forwardsims/mapforwardsim.py b/pygsti/forwardsims/mapforwardsim.py
index 83a5a869a..6e5ed4f83 100644
--- a/pygsti/forwardsims/mapforwardsim.py
+++ b/pygsti/forwardsims/mapforwardsim.py
@@ -26,6 +26,8 @@
 from pygsti.tools import sharedmemtools as _smt
 from pygsti.tools import slicetools as _slct
 from pygsti.tools.matrixtools import _fas
+from pygsti.tools import listtools as _lt
+from pygsti.circuits import CircuitList as _CircuitList
 
 _dummy_profiler = _DummyProfiler()
 
@@ -329,17 +331,17 @@ def create_copa_layout_circuit_cache(circuits, model, dataset=None):
         
 
         if dataset is not None:
-            outcomes_list = []
-            for ckt in circuits:
+            aliases = circuits.op_label_aliases if isinstance(circuits, _CircuitList) else None
+            ds_circuits = _lt.apply_aliases_to_circuits(circuits, aliases)
+            unique_outcomes_list = []
+            for ckt in ds_circuits:
                 ds_row = dataset[ckt]
-                outcomes_list.append(ds_row.outcomes if ds_row is not None else None)
-                #slightly different than matrix, for some reason outcomes is used in this class
-                #and unique_outcomes is used in matrix.
+                unique_outcomes_list.append(ds_row.unique_outcomes if ds_row is not None else None)
         else:
-            outcomes_list = [None]*len(circuits)
+            unique_outcomes_list = [None]*len(circuits)
 
         expanded_circuit_outcome_list = model.bulk_expand_instruments_and_separate_povm(circuits, 
-                                                                                        observed_outcomes_list = outcomes_list, 
+                                                                                        observed_outcomes_list = unique_outcomes_list, 
                                                                                         completed_circuits= completed_circuits)
         
         expanded_circuit_cache = {ckt: expanded_ckt for ckt,expanded_ckt in zip(completed_circuits, expanded_circuit_outcome_list)}                
diff --git a/pygsti/forwardsims/matrixforwardsim.py b/pygsti/forwardsims/matrixforwardsim.py
index 944207cf6..a24c1322d 100644
--- a/pygsti/forwardsims/matrixforwardsim.py
+++ b/pygsti/forwardsims/matrixforwardsim.py
@@ -29,6 +29,8 @@
 from pygsti.tools import sharedmemtools as _smt
 from pygsti.tools import slicetools as _slct
 from pygsti.tools.matrixtools import _fas
+from pygsti.tools import listtools as _lt
+from pygsti.circuits import CircuitList as _CircuitList
 
 _dummy_profiler = _DummyProfiler()
 
@@ -1167,11 +1169,11 @@ def create_copa_layout_circuit_cache(circuits, model, dataset=None):
         cache['completed_circuits'] = {ckt: comp_ckt for ckt, comp_ckt in zip(circuits, completed_circuits)}
         cache['split_circuits'] = {ckt: split_ckt for ckt, split_ckt in zip(circuits, split_circuits)}
 
-        #There is some potential aliasing that happens in the init that I am not
-        #doing here, but I think 90+% of the time this ought to be fine.
         if dataset is not None:
+            aliases = circuits.op_label_aliases if isinstance(circuits, _CircuitList) else None
+            ds_circuits = _lt.apply_aliases_to_circuits(circuits, aliases)
             unique_outcomes_list = []
-            for ckt in circuits:
+            for ckt in ds_circuits:
                 ds_row = dataset[ckt]
                 unique_outcomes_list.append(ds_row.unique_outcomes if ds_row is not None else None)
         else:

From e7bad833e1ad47ddcf06f5bee11f67bcfcc47993 Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Tue, 30 Jul 2024 21:41:57 -0600
Subject: [PATCH 22/30] Minor refactors and updates

Rework some of the if statement branching in the layout creation to instead use fallback behavior of get more.
---
 pygsti/layouts/maplayout.py    | 35 ++++++++----------
 pygsti/layouts/matrixlayout.py | 66 +++++++++++++++-------------------
 2 files changed, 44 insertions(+), 57 deletions(-)

diff --git a/pygsti/layouts/maplayout.py b/pygsti/layouts/maplayout.py
index e6cbc25f9..b2142a5e0 100644
--- a/pygsti/layouts/maplayout.py
+++ b/pygsti/layouts/maplayout.py
@@ -53,18 +53,17 @@ class _MapCOPALayoutAtom(_DistributableAtom):
     def __init__(self, unique_complete_circuits, ds_circuits, group, model,
                  dataset, max_cache_size, expanded_complete_circuit_cache = None):
 
-        expanded_circuit_info_by_unique = _collections.OrderedDict()
-        expanded_circuit_set = _collections.OrderedDict()  # only use SeparatePOVMCircuit keys as ordered set
+        expanded_circuit_info_by_unique = dict()
+        expanded_circuit_set = dict() # only use SeparatePOVMCircuit keys as ordered set
+
+        if expanded_complete_circuit_cache is None:
+            expanded_complete_circuit_cache = dict()
 
         for i in group:
-            if expanded_complete_circuit_cache is None:
-                observed_outcomes = None if (dataset is None) else dataset[ds_circuits[i]].outcomes
-                d = model.expand_instruments_and_separate_povm(unique_complete_circuits[i], observed_outcomes)
-            else:
-                d = expanded_complete_circuit_cache.get(unique_complete_circuits[i], None)
-                if d is None:
-                    observed_outcomes = None if (dataset is None) else dataset[ds_circuits[i]].outcomes
-                    d = model.expand_instruments_and_separate_povm(unique_complete_circuits[i], observed_outcomes)
+            d = expanded_complete_circuit_cache.get(unique_complete_circuits[i], None)
+            if d is None:
+                unique_observed_outcomes = None if (dataset is None) else dataset[ds_circuits[i]].unique_outcomes
+                d = model.expand_instruments_and_separate_povm(unique_complete_circuits[i], unique_observed_outcomes)
             expanded_circuit_info_by_unique[i] = d  # a dict of SeparatePOVMCircuits => tuples of outcome labels
             expanded_circuit_set.update(d)
             
@@ -98,8 +97,7 @@ def __init__(self, unique_complete_circuits, ds_circuits, group, model,
         self.outcomes_by_expcircuit = {}
         self.povm_and_elbls_by_expcircuit = {}
 
-        elindex_outcome_tuples = _collections.OrderedDict([
-            (unique_i, list()) for unique_i in range(len(unique_complete_circuits))])
+        elindex_outcome_tuples = {unique_i: list() for unique_i in range(len(unique_complete_circuits))}
 
         #Assign element indices, "global" indices starting at `offset`
         local_offset = 0
@@ -221,14 +219,11 @@ def __init__(self, circuits, model, dataset=None, max_cache_size=None,
         ds_circuits = _lt.apply_aliases_to_circuits(unique_circuits, aliases)
 
         #extract subcaches from layout_creation_circuit_cache:
-        if layout_creation_circuit_cache is not None:
-            self.completed_circuit_cache = layout_creation_circuit_cache.get('completed_circuits', None)
-            self.split_circuit_cache = layout_creation_circuit_cache.get('split_circuits', None)
-            self.expanded_and_separated_circuits_cache = layout_creation_circuit_cache.get('expanded_and_separated_circuits', None)
-        else:
-            self.completed_circuit_cache = None
-            self.split_circuit_cache = None
-            self.expanded_and_separated_circuits_cache = None
+        if layout_creation_circuit_cache is None:
+            layout_creation_circuit_cache = dict()
+        self.completed_circuit_cache = layout_creation_circuit_cache.get('completed_circuits', None)
+        self.split_circuit_cache = layout_creation_circuit_cache.get('split_circuits', None)
+        self.expanded_and_separated_circuits_cache = layout_creation_circuit_cache.get('expanded_and_separated_circuits', None)
         
         if self.completed_circuit_cache is None:
             unique_complete_circuits = model.complete_circuits(unique_circuits)
diff --git a/pygsti/layouts/matrixlayout.py b/pygsti/layouts/matrixlayout.py
index 8364c1a4d..bfff25a31 100644
--- a/pygsti/layouts/matrixlayout.py
+++ b/pygsti/layouts/matrixlayout.py
@@ -74,6 +74,9 @@ def __init__(self, unique_complete_circuits, unique_nospam_circuits, circuits_by
                  ds_circuits, group, helpful_scratch, model, unique_circuits, dataset=None, expanded_and_separated_circuit_cache=None,
                  double_expanded_nospam_circuits_cache = None):
 
+        if expanded_and_separated_circuit_cache is None:
+            expanded_and_separated_circuit_cache = dict()
+
         #Note: group gives unique_nospam_circuits indices, which circuits_by_unique_nospam_circuits
         # turns into "unique complete circuit" indices, which the layout via it's to_unique can map
         # to original circuit indices.
@@ -82,18 +85,13 @@ def add_expanded_circuits(indices, add_to_this_dict):
             for i in indices:
                 nospam_c = unique_nospam_circuits[i]
                 for unique_i in circuits_by_unique_nospam_circuits[nospam_c]:  # "unique" circuits: add SPAM to nospam_c
-                    if expanded_and_separated_circuit_cache is None:
+                    #the cache is indexed into using the (potentially) incomplete circuits
+                    expc_outcomes = expanded_and_separated_circuit_cache.get(unique_circuits[unique_i], None)
+                    if expc_outcomes is None: #fall back on original non-cache behavior.
                         observed_outcomes = None if (dataset is None) else dataset[ds_circuits[unique_i]].unique_outcomes
                         expc_outcomes = model.expand_instruments_and_separate_povm(unique_complete_circuits[unique_i], observed_outcomes)
-                        #Note: unique_complete_circuits may have duplicates (they're only unique *pre*-completion)
-                    else:
-                        #the cache is indexed into using the (potentially) incomplete circuits
-                        expc_outcomes = expanded_and_separated_circuit_cache.get(unique_circuits[unique_i], None)
-                        if expc_outcomes is None: #fall back on original non-cache behavior.
-                            observed_outcomes = None if (dataset is None) else dataset[ds_circuits[unique_i]].unique_outcomes
-                            expc_outcomes = model.expand_instruments_and_separate_povm(unique_complete_circuits[unique_i], observed_outcomes)
-                            #and add this new value to the cache.
-                            expanded_and_separated_circuit_cache[unique_circuits[unique_i]] = expc_outcomes 
+                        #and add this new value to the cache.
+                        expanded_and_separated_circuit_cache[unique_circuits[unique_i]] = expc_outcomes 
                     for sep_povm_c, outcomes in expc_outcomes.items():  # for each expanded cir from unique_i-th circuit
                         prep_lbl = sep_povm_c.circuit_without_povm[0]
                         exp_nospam_c = sep_povm_c.circuit_without_povm[1:]  # sep_povm_c *always* has prep lbl
@@ -130,21 +128,16 @@ def add_expanded_circuits(indices, add_to_this_dict):
             expanded_nospam_circuits_plus_scratch = {i:cir for i, cir in enumerate(expanded_nospam_circuit_outcomes_plus_scratch.keys())}
         else:
             expanded_nospam_circuits_plus_scratch = expanded_nospam_circuits.copy()
-
+        
+        if double_expanded_nospam_circuits_cache is None:
+            double_expanded_nospam_circuits_cache = dict()
         double_expanded_nospam_circuits_plus_scratch = dict()
-        if double_expanded_nospam_circuits_cache is not None:
-            for i, cir in expanded_nospam_circuits_plus_scratch.items():
-                # expand sub-circuits for a more efficient tree
-                double_expanded_ckt = double_expanded_nospam_circuits_cache.get(cir, None)
-                if double_expanded_ckt is None: #Fall back to standard behavior and do expansion.
-                    double_expanded_nospam_circuits_plus_scratch[i] = cir.expand_subcircuits()
-                else:
-                    double_expanded_nospam_circuits_plus_scratch[i] = double_expanded_ckt
-        else:
-            for i, cir in expanded_nospam_circuits_plus_scratch.items():
-                # expand sub-circuits for a more efficient tree
-                double_expanded_nospam_circuits_plus_scratch[i] = cir.expand_subcircuits()
-
+        for i, cir in expanded_nospam_circuits_plus_scratch.items():
+            # expand sub-circuits for a more efficient tree
+            double_expanded_ckt = double_expanded_nospam_circuits_cache.get(cir, None)
+            if double_expanded_ckt is None: #Fall back to standard behavior and do expansion.
+                double_expanded_ckt = cir.expand_subcircuits()
+            double_expanded_nospam_circuits_plus_scratch[i] = double_expanded_ckt
         self.tree = _EvalTree.create(double_expanded_nospam_circuits_plus_scratch)
         #print("Atom tree: %d circuits => tree of size %d" % (len(expanded_nospam_circuits), len(self.tree)))
 
@@ -313,16 +306,12 @@ def __init__(self, circuits, model, dataset=None, num_sub_trees=None, num_tree_p
         ds_circuits = _lt.apply_aliases_to_circuits(unique_circuits, aliases)
 
         #extract subcaches from layout_creation_circuit_cache:
-        if layout_creation_circuit_cache is not None:
-            self.completed_circuit_cache = layout_creation_circuit_cache.get('completed_circuits', None)
-            self.split_circuit_cache = layout_creation_circuit_cache.get('split_circuits', None)
-            self.expanded_and_separated_circuits_cache = layout_creation_circuit_cache.get('expanded_and_separated_circuits', None)
-            self.expanded_subcircuits_no_spam_cache = layout_creation_circuit_cache.get('expanded_subcircuits_no_spam', None)
-        else:
-            self.completed_circuit_cache = None
-            self.split_circuit_cache = None
-            self.expanded_and_separated_circuits_cache = None
-            self.expanded_subcircuits_no_spam_cache = None
+        if layout_creation_circuit_cache is None:
+            layout_creation_circuit_cache = dict()
+        self.completed_circuit_cache = layout_creation_circuit_cache.get('completed_circuits', None)
+        self.split_circuit_cache = layout_creation_circuit_cache.get('split_circuits', None)
+        self.expanded_and_separated_circuits_cache = layout_creation_circuit_cache.get('expanded_and_separated_circuits', None)
+        self.expanded_subcircuits_no_spam_cache = layout_creation_circuit_cache.get('expanded_subcircuits_no_spam', None)
         
         if self.completed_circuit_cache is None:
             unique_complete_circuits, split_unique_circuits = model.complete_circuits(unique_circuits, return_split=True)
@@ -338,7 +327,7 @@ def __init__(self, circuits, model, dataset=None, num_sub_trees=None, num_tree_p
         #Note: "unique" means a unique circuit *before* circuit-completion, so there could be duplicate
         # "unique circuits" after completion, e.g. "rho0Gx" and "Gx" could both complete to "rho0GxMdefault_0".
 
-        circuits_by_unique_nospam_circuits = _collections.OrderedDict()
+        circuits_by_unique_nospam_circuits = dict()
         if self.completed_circuit_cache is None:
             for i, (_, nospam_c, _) in enumerate(split_unique_circuits):
                 if nospam_c in circuits_by_unique_nospam_circuits:
@@ -346,12 +335,15 @@ def __init__(self, circuits, model, dataset=None, num_sub_trees=None, num_tree_p
                 else:
                     circuits_by_unique_nospam_circuits[nospam_c] = [i]
             #also create the split circuit cache at this point for future use.
-            self.split_circuit_cache = {unique_ckt:split_ckt for unique_ckt, split_ckt in zip(unique_circuits, split_unique_circuits)}
+            if self.split_circuit_cache is None:
+                self.split_circuit_cache = {unique_ckt:split_ckt for unique_ckt, split_ckt in zip(unique_circuits, split_unique_circuits)}
 
         else:
+            if self.split_circuit_cache is None:
+                self.split_circuit_cache = dict()
             for i, (c_unique_complete, c_unique) in enumerate(zip(unique_complete_circuits, unique_circuits)):
                 split_ckt_tup = self.split_circuit_cache.get(c_unique, None)
-                nospam_c= split_ckt_tup[1] 
+                nospam_c= split_ckt_tup[1] if split_ckt_tup is not None else None
                 if nospam_c is None:
                     split_ckt_tup = model.split_circuit(c_unique_complete)
                     nospam_c= split_ckt_tup[1]

From e0d3c476af65f5ab3835432781e3281b6bbe4519 Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Tue, 30 Jul 2024 21:42:45 -0600
Subject: [PATCH 23/30] Unrelated RB testing fix

I accidentally put down the wrong directory for temp testing files in the RB testing code.
---
 test/unit/protocols/test_rb.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/test/unit/protocols/test_rb.py b/test/unit/protocols/test_rb.py
index fcb616cf2..0460f14b8 100644
--- a/test/unit/protocols/test_rb.py
+++ b/test/unit/protocols/test_rb.py
@@ -111,9 +111,9 @@ def test_serialization(self):
             citerations=self.citerations, compilerargs=self.compiler_args, seed=self.seed,
             verbosity=self.verbosity, num_processes=1)
 
-        crb_design.write('../../test_packages/temp_test_files/test_CliffordRBDesign_serialization')
+        crb_design.write('../../test/test_packages/temp_test_files/test_CliffordRBDesign_serialization')
         #then read this back in
-        crb_design_read = _rb.CliffordRBDesign.from_dir('../../test_packages/temp_test_files/test_CliffordRBDesign_serialization')
+        crb_design_read = _rb.CliffordRBDesign.from_dir('../../test/test_packages/temp_test_files/test_CliffordRBDesign_serialization')
 
         self.assertEqual(crb_design.all_circuits_needing_data, crb_design_read.all_circuits_needing_data)
         self.assertEqual(crb_design.interleaved_circuit, crb_design_read.interleaved_circuit)
@@ -163,9 +163,9 @@ def test_combined_design_access(self):
 
     def test_serialization(self):
 
-        self.irb_design.write('../../test_packages/temp_test_files/test_InterleavedRBDesign_serialization')
+        self.irb_design.write('../../test/test_packages/temp_test_files/test_InterleavedRBDesign_serialization')
         #then read this back in
-        irb_design_read = _rb.InterleavedRBDesign.from_dir('../../test_packages/temp_test_files/test_InterleavedRBDesign_serialization')
+        irb_design_read = _rb.InterleavedRBDesign.from_dir('../../test/test_packages/temp_test_files/test_InterleavedRBDesign_serialization')
 
         self.assertEqual(self.irb_design.all_circuits_needing_data, irb_design_read.all_circuits_needing_data)
         self.assertEqual(self.irb_design['crb'].all_circuits_needing_data, irb_design_read['crb'].all_circuits_needing_data)
@@ -248,9 +248,9 @@ def test_serialization(self):
             conditionaltwirl=True, citerations=self.citerations, compilerargs=self.compiler_args,
             partitioned=False, seed=self.seed, verbosity=self.verbosity, num_processes=1)
 
-        drb_design.write('../../test_packages/temp_test_files/test_DirectRBDesign_serialization')
+        drb_design.write('../../test/test_packages/temp_test_files/test_DirectRBDesign_serialization')
         #then read this back in
-        drb_design_read = _rb.DirectRBDesign.from_dir('../../test_packages/temp_test_files/test_DirectRBDesign_serialization')
+        drb_design_read = _rb.DirectRBDesign.from_dir('../../test/test_packages/temp_test_files/test_DirectRBDesign_serialization')
 
         self.assertEqual(drb_design.all_circuits_needing_data, drb_design_read.all_circuits_needing_data)
 
@@ -375,9 +375,9 @@ def test_serialization(self):
             localclifford=True, paulirandomize=True, seed=self.seed, verbosity=self.verbosity,
             num_processes=1)
 
-        mrb_design.write('../../test_packages/temp_test_files/test_MirrorRBDesign_serialization')
+        mrb_design.write('../../test/test_packages/temp_test_files/test_MirrorRBDesign_serialization')
         #then read this back in
-        mrb_design_read = _rb.MirrorRBDesign.from_dir('../../test_packages/temp_test_files/test_MirrorRBDesign_serialization')
+        mrb_design_read = _rb.MirrorRBDesign.from_dir('../../test/test_packages/temp_test_files/test_MirrorRBDesign_serialization')
 
         self.assertEqual(mrb_design.all_circuits_needing_data, mrb_design_read.all_circuits_needing_data)
 
@@ -424,9 +424,9 @@ def test_serialization(self):
                                                  sampler=self.sampler, samplerargs=self.samplerargs, 
                                                  seed=self.seed, verbosity=0)
         
-        birb_design.write('../../test_packages/temp_test_files/test_BinaryRBDesign_serialization')
+        birb_design.write('../../test/test_packages/temp_test_files/test_BinaryRBDesign_serialization')
         #then read this back in
-        birb_design_read = _rb.BinaryRBDesign.from_dir('../../test_packages/temp_test_files/test_BinaryRBDesign_serialization')
+        birb_design_read = _rb.BinaryRBDesign.from_dir('../../test/test_packages/temp_test_files/test_BinaryRBDesign_serialization')
 
         self.assertEqual(birb_design.all_circuits_needing_data, birb_design_read.all_circuits_needing_data)
         
@@ -533,8 +533,8 @@ def test_cliffordrb_protocol_ideal(self):
         self.assertTrue(abs(result.fits['A-fixed'].estimates['r'])<=3e-5)
 
         #also test writing and reading the results from disk.
-        result.write('../../test_packages/temp_test_files/test_RandomizedBenchmarking_results')
-        result_read = pygsti.io.read_results_from_dir('../../test_packages/temp_test_files/test_RandomizedBenchmarking_results')
+        result.write('../../test/test_packages/temp_test_files/test_RandomizedBenchmarking_results')
+        result_read = pygsti.io.read_results_from_dir('../../test/test_packages/temp_test_files/test_RandomizedBenchmarking_results')
         
     def test_cliffordrb_protocol_noisy(self):
         proto = pygsti.protocols.rb.RandomizedBenchmarking(datatype='success_probabilities', defaultfit='A-fixed', rtype='EI',
@@ -703,8 +703,8 @@ def test_interleavedrb_protocol_ideal(self):
         self.assertTrue(abs(estimated_irb_num) <= 1e-5)
 
         #also test writing and reading the results from disk.
-        result.write('../../test_packages/temp_test_files/test_InterleavedRandomizedBenchmarking_results')
-        result_read = pygsti.io.read_results_from_dir('../../test_packages/temp_test_files/test_InterleavedRandomizedBenchmarking_results')
+        result.write('../../test/test_packages/temp_test_files/test_InterleavedRandomizedBenchmarking_results')
+        result_read = pygsti.io.read_results_from_dir('../../test/test_packages/temp_test_files/test_InterleavedRandomizedBenchmarking_results')
         
         
     def test_interleavedrb_protocol_noisy(self):

From e670aeb8eeae5207d0c6db12683139c247ccc4bc Mon Sep 17 00:00:00 2001
From: "Stefan K. Seritan" <sserita@sandia.gov>
Date: Thu, 12 Sep 2024 13:33:11 -0700
Subject: [PATCH 24/30] Allow SIGINT set to be skipped via env variable.

This is relevant when attempting to use Dask outside of pyGSTi,
where signals cannot be set in the workers.
Setting the PYGSTI_NO_CUSTOMLM_SIGINT env variable
now skips this behavior.
---
 pygsti/optimize/customlm.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pygsti/optimize/customlm.py b/pygsti/optimize/customlm.py
index 41c565cd2..56b36dfc2 100644
--- a/pygsti/optimize/customlm.py
+++ b/pygsti/optimize/customlm.py
@@ -10,6 +10,7 @@
 # http://www.apache.org/licenses/LICENSE-2.0 or in the LICENSE file in the root pyGSTi directory.
 #***************************************************************************************************
 
+import os as _os
 import signal as _signal
 import time as _time
 
@@ -25,7 +26,10 @@
 # from scipy.optimize import OptimizeResult as _optResult
 
 #Make sure SIGINT will generate a KeyboardInterrupt (even if we're launched in the background)
-_signal.signal(_signal.SIGINT, _signal.default_int_handler)
+#This may be problematic for multithreaded parallelism above pyGSTi, e.g. Dask,
+#so this can be turned off by setting the PYGSTI_NO_CUSTOMLM_SIGINT environment variable
+if 'PYGSTI_NO_CUSTOMLM_SIGINT' in _os.environ:
+    _signal.signal(_signal.SIGINT, _signal.default_int_handler)
 
 #constants
 _MACH_PRECISION = 1e-12

From f93fd3cd791fe11ae5df9605438018b3ca1ce113 Mon Sep 17 00:00:00 2001
From: "Stefan K. Seritan" <sserita@sandia.gov>
Date: Thu, 12 Sep 2024 13:34:10 -0700
Subject: [PATCH 25/30] Logic bugfix

---
 pygsti/optimize/customlm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pygsti/optimize/customlm.py b/pygsti/optimize/customlm.py
index 56b36dfc2..cbaa9b513 100644
--- a/pygsti/optimize/customlm.py
+++ b/pygsti/optimize/customlm.py
@@ -28,7 +28,7 @@
 #Make sure SIGINT will generate a KeyboardInterrupt (even if we're launched in the background)
 #This may be problematic for multithreaded parallelism above pyGSTi, e.g. Dask,
 #so this can be turned off by setting the PYGSTI_NO_CUSTOMLM_SIGINT environment variable
-if 'PYGSTI_NO_CUSTOMLM_SIGINT' in _os.environ:
+if 'PYGSTI_NO_CUSTOMLM_SIGINT' not in _os.environ:
     _signal.signal(_signal.SIGINT, _signal.default_int_handler)
 
 #constants

From 549f9019aa364a5f82c85453cb6ef31ea6423027 Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Wed, 18 Sep 2024 21:02:40 -0600
Subject: [PATCH 26/30] Fix a bug with parameter label management for
 interposers

A bug in the parameter label handling code was causing parameter labels to explode exponentially in size when _rebuild_paramvec was caused, leading to major memory issues. This now makes it so that the value of _paramlbls is fixed to that of the underlying operations and adds a new version of the parameter_labels property that goes through the interposer (making the interposer labels something generated on demand). Also add a threshold for coefficients printing in the LinearInterposer to avoid obnoxious labels.
---
 pygsti/models/model.py                | 9 ++++++++-
 pygsti/models/modelparaminterposer.py | 2 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/pygsti/models/model.py b/pygsti/models/model.py
index 368308a46..792107ba0 100644
--- a/pygsti/models/model.py
+++ b/pygsti/models/model.py
@@ -488,6 +488,13 @@ def __setstate__(self, state_dict):
     ## Get/Set methods
     ##########################################
 
+    @property
+    def parameter_labels(self):
+        """
+        A list of labels, usually of the form `(op_label, string_description)` describing this model's parameters.
+        """
+        return self._ops_paramlbls_to_model_paramlbls(self._paramlbls)
+
     @property
     def sim(self):
         """ Forward simulator for this model """
@@ -1037,7 +1044,7 @@ def _get_shift(j): return _bisect.bisect_left(indices_to_remove, j)
                 obj.set_gpindices(new_inds, self, memo)
 
         self._paramvec = self._ops_paramvec_to_model_paramvec(w)
-        self._paramlbls = self._ops_paramlbls_to_model_paramlbls(wl)
+        self._paramlbls = wl
         self._param_bounds = wb if _param_bounds_are_nontrivial(wb) else None
         if debug: print("DEBUG: Done rebuild: %d op params" % len(w))
 
diff --git a/pygsti/models/modelparaminterposer.py b/pygsti/models/modelparaminterposer.py
index f92f56a70..aa86fc5f3 100644
--- a/pygsti/models/modelparaminterposer.py
+++ b/pygsti/models/modelparaminterposer.py
@@ -77,7 +77,7 @@ def ops_paramlbls_to_model_paramlbls(self, wl):
         # This can and should be improved later - particularly this will be awful when labels (els of wl) are tuples.
         ret = []
         for irow in range(self.inv_transform_matrix.shape[0]):
-            lbl = ' + '.join(["%g%s" % (coeff, str(lbl)) for coeff, lbl in zip(self.inv_transform_matrix[irow, :], wl)])
+            lbl = ' + '.join(["%g%s" % (coeff, str(lbl)) for coeff, lbl in zip(self.inv_transform_matrix[irow, :], wl) if abs(coeff)>1e-10])
             ret.append(lbl)
         return ret
 

From 2af7e118fe5263cf5c59541240091fcc1f70ef35 Mon Sep 17 00:00:00 2001
From: Corey Ostrove <ciostro@sandia.gov>
Date: Wed, 18 Sep 2024 21:37:09 -0600
Subject: [PATCH 27/30] Fix a serialization bug for trivial gauge optimizations

Serialization wasn't working correctly for GSTGaugeOptSuite with the trivial gauge optimization argument dict. This should fix that bug.
---
 pygsti/protocols/gst.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pygsti/protocols/gst.py b/pygsti/protocols/gst.py
index 74e2d9a6f..9255943d3 100644
--- a/pygsti/protocols/gst.py
+++ b/pygsti/protocols/gst.py
@@ -1133,6 +1133,9 @@ def _to_nice_serialization(self):
             for lbl, goparams in self.gaugeopt_argument_dicts.items():
                 goparams_list = [goparams] if hasattr(goparams, 'keys') else goparams
                 serialize_list = []
+                if lbl == 'trivial_gauge_opt':
+                    dicts_to_serialize[lbl] = None
+                    continue
                 for goparams_dict in goparams_list:
                     to_add = goparams_dict.copy()
                     if 'target_model' in to_add:
@@ -1164,6 +1167,9 @@ def _to_nice_serialization(self):
     def _from_nice_serialization(cls, state):  # memo holds already de-serialized objects
         gaugeopt_argument_dicts = {}
         for lbl, serialized_goparams_list in state['gaugeopt_argument_dicts'].items():
+            if lbl == 'trivial_gauge_opt':
+                gaugeopt_argument_dicts[lbl] = None
+                continue
             goparams_list = []
             for serialized_goparams in serialized_goparams_list:
                 to_add = serialized_goparams.copy()

From db5f4b4b8079ea9c3c3147025ec2b9f29de78b99 Mon Sep 17 00:00:00 2001
From: "Stefan K. Seritan" <sserita@sandia.gov>
Date: Thu, 19 Sep 2024 09:43:42 -0700
Subject: [PATCH 28/30] Bugfix for deterministic Clifford RB test.

Fix was a missing Label.is_simple() -> Label.IS_SIMPLE.
---
 .gitignore                 | 2 ++
 pygsti/circuits/circuit.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 46f0b7850..acf96801b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,6 +31,7 @@ doc/build
 *gst_checkpoints*
 *model_test_checkpoints*
 *standard_gst_checkpoints*
+*ibmqexperiment_checkpoint*
 
 # Serialization Testing Artifacts #
 ###################################
@@ -43,6 +44,7 @@ test/output/pylint/*
 test/output/individual_coverage/*/*
 test/test_packages/cmp_chk_files/Fake_Dataset_none.txt.cache
 **.noseids
+**test_ibmq**
 
 # Tutorial Notebook Untracked Files #
 ####################################
diff --git a/pygsti/circuits/circuit.py b/pygsti/circuits/circuit.py
index 2977dddac..ffacd8846 100644
--- a/pygsti/circuits/circuit.py
+++ b/pygsti/circuits/circuit.py
@@ -3447,7 +3447,7 @@ def num_gates(self):
         """
         if self._static:
             def cnt(lbl):  # obj a Label, perhaps compound
-                if lbl.is_simple():  # a simple label
+                if lbl.IS_SIMPLE:  # a simple label
                     return 1 if (lbl.sslbls is not None) else 0
                 else:
                     return sum([cnt(sublbl) for sublbl in lbl.components])

From 76ff5bf2b9628a8033b172782cfb00f3690c8ac5 Mon Sep 17 00:00:00 2001
From: "Stefan K. Seritan" <sserita@sandia.gov>
Date: Thu, 19 Sep 2024 10:25:43 -0700
Subject: [PATCH 29/30] Make test_rb paths absolute.

---
 test/unit/protocols/test_rb.py | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/test/unit/protocols/test_rb.py b/test/unit/protocols/test_rb.py
index 0460f14b8..9f461f098 100644
--- a/test/unit/protocols/test_rb.py
+++ b/test/unit/protocols/test_rb.py
@@ -1,6 +1,7 @@
 from ..util import BaseCase
 
 import numpy as _np
+from pathlib import Path
 
 import pygsti
 from pygsti.protocols import rb as _rb
@@ -9,6 +10,8 @@
 from pygsti.circuits import Circuit
 from pygsti.baseobjs import Label
 
+FILE_PATH = str(Path(__file__).resolve().parent)
+
 class TestCliffordRBDesign(BaseCase):
 
     def setUp(self):
@@ -111,9 +114,9 @@ def test_serialization(self):
             citerations=self.citerations, compilerargs=self.compiler_args, seed=self.seed,
             verbosity=self.verbosity, num_processes=1)
 
-        crb_design.write('../../test/test_packages/temp_test_files/test_CliffordRBDesign_serialization')
+        crb_design.write(f'{FILE_PATH}/../../test_packages/temp_test_files/test_CliffordRBDesign_serialization')
         #then read this back in
-        crb_design_read = _rb.CliffordRBDesign.from_dir('../../test/test_packages/temp_test_files/test_CliffordRBDesign_serialization')
+        crb_design_read = _rb.CliffordRBDesign.from_dir(f'{FILE_PATH}/../../test_packages/temp_test_files/test_CliffordRBDesign_serialization')
 
         self.assertEqual(crb_design.all_circuits_needing_data, crb_design_read.all_circuits_needing_data)
         self.assertEqual(crb_design.interleaved_circuit, crb_design_read.interleaved_circuit)
@@ -163,9 +166,9 @@ def test_combined_design_access(self):
 
     def test_serialization(self):
 
-        self.irb_design.write('../../test/test_packages/temp_test_files/test_InterleavedRBDesign_serialization')
+        self.irb_design.write(f'{FILE_PATH}/../../test_packages/temp_test_files/test_InterleavedRBDesign_serialization')
         #then read this back in
-        irb_design_read = _rb.InterleavedRBDesign.from_dir('../../test/test_packages/temp_test_files/test_InterleavedRBDesign_serialization')
+        irb_design_read = _rb.InterleavedRBDesign.from_dir(f'{FILE_PATH}/../../test_packages/temp_test_files/test_InterleavedRBDesign_serialization')
 
         self.assertEqual(self.irb_design.all_circuits_needing_data, irb_design_read.all_circuits_needing_data)
         self.assertEqual(self.irb_design['crb'].all_circuits_needing_data, irb_design_read['crb'].all_circuits_needing_data)
@@ -248,9 +251,9 @@ def test_serialization(self):
             conditionaltwirl=True, citerations=self.citerations, compilerargs=self.compiler_args,
             partitioned=False, seed=self.seed, verbosity=self.verbosity, num_processes=1)
 
-        drb_design.write('../../test/test_packages/temp_test_files/test_DirectRBDesign_serialization')
+        drb_design.write(f'{FILE_PATH}/../../test_packages/temp_test_files/test_DirectRBDesign_serialization')
         #then read this back in
-        drb_design_read = _rb.DirectRBDesign.from_dir('../../test/test_packages/temp_test_files/test_DirectRBDesign_serialization')
+        drb_design_read = _rb.DirectRBDesign.from_dir(f'{FILE_PATH}/../../test_packages/temp_test_files/test_DirectRBDesign_serialization')
 
         self.assertEqual(drb_design.all_circuits_needing_data, drb_design_read.all_circuits_needing_data)
 
@@ -375,9 +378,9 @@ def test_serialization(self):
             localclifford=True, paulirandomize=True, seed=self.seed, verbosity=self.verbosity,
             num_processes=1)
 
-        mrb_design.write('../../test/test_packages/temp_test_files/test_MirrorRBDesign_serialization')
+        mrb_design.write(f'{FILE_PATH}/../../test_packages/temp_test_files/test_MirrorRBDesign_serialization')
         #then read this back in
-        mrb_design_read = _rb.MirrorRBDesign.from_dir('../../test/test_packages/temp_test_files/test_MirrorRBDesign_serialization')
+        mrb_design_read = _rb.MirrorRBDesign.from_dir(f'{FILE_PATH}/../../test_packages/temp_test_files/test_MirrorRBDesign_serialization')
 
         self.assertEqual(mrb_design.all_circuits_needing_data, mrb_design_read.all_circuits_needing_data)
 
@@ -424,9 +427,9 @@ def test_serialization(self):
                                                  sampler=self.sampler, samplerargs=self.samplerargs, 
                                                  seed=self.seed, verbosity=0)
         
-        birb_design.write('../../test/test_packages/temp_test_files/test_BinaryRBDesign_serialization')
+        birb_design.write(f'{FILE_PATH}/../../test_packages/temp_test_files/test_BinaryRBDesign_serialization')
         #then read this back in
-        birb_design_read = _rb.BinaryRBDesign.from_dir('../../test/test_packages/temp_test_files/test_BinaryRBDesign_serialization')
+        birb_design_read = _rb.BinaryRBDesign.from_dir(f'{FILE_PATH}/../../test_packages/temp_test_files/test_BinaryRBDesign_serialization')
 
         self.assertEqual(birb_design.all_circuits_needing_data, birb_design_read.all_circuits_needing_data)
         
@@ -533,8 +536,8 @@ def test_cliffordrb_protocol_ideal(self):
         self.assertTrue(abs(result.fits['A-fixed'].estimates['r'])<=3e-5)
 
         #also test writing and reading the results from disk.
-        result.write('../../test/test_packages/temp_test_files/test_RandomizedBenchmarking_results')
-        result_read = pygsti.io.read_results_from_dir('../../test/test_packages/temp_test_files/test_RandomizedBenchmarking_results')
+        result.write(f'{FILE_PATH}/../../test_packages/temp_test_files/test_RandomizedBenchmarking_results')
+        result_read = pygsti.io.read_results_from_dir(f'{FILE_PATH}/../../test_packages/temp_test_files/test_RandomizedBenchmarking_results')
         
     def test_cliffordrb_protocol_noisy(self):
         proto = pygsti.protocols.rb.RandomizedBenchmarking(datatype='success_probabilities', defaultfit='A-fixed', rtype='EI',
@@ -703,8 +706,8 @@ def test_interleavedrb_protocol_ideal(self):
         self.assertTrue(abs(estimated_irb_num) <= 1e-5)
 
         #also test writing and reading the results from disk.
-        result.write('../../test/test_packages/temp_test_files/test_InterleavedRandomizedBenchmarking_results')
-        result_read = pygsti.io.read_results_from_dir('../../test/test_packages/temp_test_files/test_InterleavedRandomizedBenchmarking_results')
+        result.write(f'{FILE_PATH}/../../test_packages/temp_test_files/test_InterleavedRandomizedBenchmarking_results')
+        result_read = pygsti.io.read_results_from_dir(f'{FILE_PATH}/../../test_packages/temp_test_files/test_InterleavedRandomizedBenchmarking_results')
         
         
     def test_interleavedrb_protocol_noisy(self):

From 548703bb3fb993b0c3be89b13edecd0cfb4b8697 Mon Sep 17 00:00:00 2001
From: "Stefan K. Seritan" <sserita@sandia.gov>
Date: Thu, 19 Sep 2024 14:35:14 -0700
Subject: [PATCH 30/30] Fix beta tests.

---
 pygsti/forwardsims/mapforwardsim.py  | 3 ++-
 pygsti/forwardsims/termforwardsim.py | 9 ++++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/pygsti/forwardsims/mapforwardsim.py b/pygsti/forwardsims/mapforwardsim.py
index 6e5ed4f83..8a7140291 100644
--- a/pygsti/forwardsims/mapforwardsim.py
+++ b/pygsti/forwardsims/mapforwardsim.py
@@ -226,7 +226,8 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
             Determines how much output to send to stdout.  0 means no output, higher
             integers mean more output.
         
-        A precomputed dictionary serving as a cache for completed
+        layout_creation_circuit_cache:
+            A precomputed dictionary serving as a cache for completed
             circuits. I.e. circuits with prep labels and POVM labels appended.
             Along with other useful pre-computed circuit structures used in layout
             creation.
diff --git a/pygsti/forwardsims/termforwardsim.py b/pygsti/forwardsims/termforwardsim.py
index 3d4669d2a..d6b00b4fc 100644
--- a/pygsti/forwardsims/termforwardsim.py
+++ b/pygsti/forwardsims/termforwardsim.py
@@ -267,7 +267,7 @@ def copy(self):
                                     self.oob_check_interval, self.cache)
 
     def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types=('E',),
-                      derivative_dimension=None, verbosity=0):
+                      derivative_dimension=None, verbosity=0, layout_creation_circuit_cache=None):
         """
         Constructs an circuit-outcome-probability-array (COPA) layout for a list of circuits.
 
@@ -296,6 +296,12 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
         verbosity : int or VerbosityPrinter
             Determines how much output to send to stdout.  0 means no output, higher
             integers mean more output.
+        
+        layout_creation_circuit_cache:
+            A precomputed dictionary serving as a cache for completed
+            circuits. I.e. circuits with prep labels and POVM labels appended.
+            Along with other useful pre-computed circuit structures used in layout
+            creation.
 
         Returns
         -------
@@ -330,6 +336,7 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
         printer.log("   %d atoms, parameter block size limits %s" % (natoms, str(param_blk_sizes)))
         assert(_np.prod((na,) + npp) <= nprocs), "Processor grid size exceeds available processors!"
 
+        # TODO: Layout circuit creation cache unused for TermCOPALayout
         layout = _TermCOPALayout(circuits, self.model, dataset, natoms, na, npp, param_dimensions,
                                  param_blk_sizes, resource_alloc, printer)
         #MEM debug_prof.print_memory("CreateLayout2 - nAtoms = %d" % len(layout.atoms), True)