sandialabs · rileyjmurray · Dec 16, 2024 · Dec 16, 2024 · Dec 16, 2024 · Dec 16, 2024
@@ -385,7 +385,13 @@ def run_long_sequence_gst(data_filename_or_set, target_model_filename_or_object,
         Specifies advanced options most of which deal with numerical details of
         the objective function or expert-level functionality.  The allowed keys
         and values include:
-        - objective = {'chi2', 'logl'}
+
+        HISTORICAL NOTE: "XX" indicates that we've at least _intended_ for the
+        keyword argument to be removed. I've removed documentation for options
+        that we never reference in the code (truncScheme, estimate_label, and
+        missingDataAction).
+
+        - objective = {'chi2', 'logl', 'tvd'}
         - op_labels = list of strings
         - circuit_weights = dict or None
         - starting_point = "LGST-if-possible" (default), "LGST", or "target"
@@ -401,19 +407,16 @@ def run_long_sequence_gst(data_filename_or_set, target_model_filename_or_object,
         - prob_clip_interval = tuple (default == (-1e6,1e6)
         - radius = float (default == 1e-4)
         - use_freq_weighted_chi2 = True / False (default)
-        - XX nested_circuit_lists = True (default) / False
-        - XX include_lgst = True / False (default is True)
+        - XX nested_circuit_lists = True (default) / False  -- passed to StandardGSTDesign; seems to be functional
+        - XX include_lgst = True / False (default is True)  -- passed to StandardGSTDesign; seems to be functional
         - distribute_method = "default", "circuits" or "deriv"
         - profile = int (default == 1)
         - check = True / False (default)
-        - XX op_label_aliases = dict (default = None)
+        - XX op_label_aliases = dict (default = None) -- passed to StandardGSTDesign and used to set a GateSetTomography protocol object's oplabel_aliases field.
         - always_perform_mle = bool (default = False)
         - only_perform_mle = bool (default = False)
-        - XX truncScheme = "whole germ powers" (default) or "truncated germ powers" or "length as exponent"
         - appendTo = Results (default = None)
-        - estimateLabel = str (default = "default")
-        - XX missingDataAction = {'drop','raise'} (default = 'drop')
-        - XX string_manipulation_rules = list of (find,replace) tuples
+        - XX string_manipulation_rules = list of (find,replace) tuples -- passed to _proto.StandardGSTDesign construct as its "circuit_rules" argument.
         - germ_length_limits = dict of form {germ: maxlength}
         - record_output = bool (default = True)
         - timeDependent = bool (default = False)

@@ -207,12 +207,16 @@ def __init__(self, circuits, unique_circuits, to_unique, elindex_outcome_tuples,
 
         self._outcomes = dict()
         self._element_indices = dict()
+        self._element_inditer = dict()
+        ind_array = _np.arange(self._size)
         sort_idx_func = lambda x: x[0]
         for i_unique, tuples in elindex_outcome_tuples.items():
             sorted_tuples = sorted(tuples, key=sort_idx_func)  # sort by element index
             elindices, outcomes = zip(*sorted_tuples)  # sorted by elindex so we make slices whenever possible
             self._outcomes[i_unique] = tuple(outcomes)
-            self._element_indices[i_unique] = _slct.list_to_slice(elindices, array_ok=True)
+            s = _slct.list_to_slice(elindices, array_ok=True)
+            self._element_indices[i_unique] = s
+            self._element_inditer[i_unique] = ind_array[s]
 
 #    def hotswap_circuits(self, circuits, unique_complete_circuits=None):
 #        self.circuits = circuits if isinstance(circuits, _CircuitList) else _CircuitList(circuits)
@@ -744,7 +748,11 @@ def indices_and_outcomes_for_index(self, index):
 
     def __iter__(self):
         for circuit, i in self._unique_circuit_index.items():
-            for element_index, outcome in zip(self._element_indices[i], self._outcomes[i]):
+            try:
+                iterator = zip(self._element_indices[i], self._outcomes[i])
+            except TypeError:
+                iterator = zip(self._element_inditer[i], self._outcomes[i])
+            for element_index, outcome in iterator:
                 yield element_index, circuit, outcome
 
     def iter_unique_circuits(self):

@@ -234,6 +234,15 @@ def default_gauge_group(self, value):
         """
         The default gauge group.
         """
+        if isinstance(value, str):
+            value = value.lower()
+            if value == 'unitary':
+                value = _gg.UnitaryGaugeGroup(self.state_space, self.basis, self.evotype)
+            elif value == 'tp':
+                value = _gg.TPGaugeGroup(self.state_space, self.basis, self.evotypes)
+            else:
+                msg = f'string "{value}" cannot be used to set this model\'s gauge group.'
+                raise ValueError(msg)
         self._default_gauge_group = value
 
     @property

@@ -555,6 +555,8 @@ def lsvec(self, probs, counts, total_counts, freqs, intermediates=None):
         """
         return _np.sqrt(self.terms(probs, counts, total_counts, freqs, intermediates))
 
+    # Infinite loop in evaluation of "dterms" and "dlsvec".
+
     def dterms(self, probs, counts, total_counts, freqs, intermediates=None):
         """
         Compute the derivatives of the terms of this objective function.
@@ -589,10 +591,11 @@ def dterms(self, probs, counts, total_counts, freqs, intermediates=None):
         """
         if intermediates is None:
             intermediates = self._intermediates(probs, counts, total_counts, freqs)
-        lsvec = self.lsvec(probs, counts, total_counts, freqs, intermediates)
+        u = self.lsvec(probs, counts, total_counts, freqs, intermediates)
+        v = self.dlsvec(probs, counts, total_counts, freqs, intermediates)
         # NOTE: this function is correct under the assumption that terms == lsvec**2,
         #       independent of whether or not lsvec is nonnegative.
-        return 2 * lsvec * self.dlsvec(probs, counts, total_counts, freqs, intermediates)
+        return 2 * u * v
 
     def dlsvec(self, probs, counts, total_counts, freqs, intermediates=None):
         """
@@ -627,7 +630,8 @@ def dlsvec(self, probs, counts, total_counts, freqs, intermediates=None):
             A 1D array of length equal to that of each array argument.
         """
         # lsvec = sqrt(terms)
-        #   NOTE: ^ That's only correct if lsvec is >= 0, and some classes don't satisfy that.
+        #   NOTE: ^ That's only correct if lsvec is >= 0.
+        #           Any class that doesn't ensure lsvec >= 0 must override this function.
         # dlsvec = 0.5/lsvec * dterms
         #
         if intermediates is None:
@@ -2671,36 +2675,37 @@ def zero_freq_hterms(self, total_counts, probs):
             return 2 * _np.ones(len(probs))
 
 
-# The log(Likelihood) within the Poisson picture is:                                                                                                    # noqa
-#                                                                                                                                                       # noqa
-# L = prod_{i,sl} lambda_{i,sl}^N_{i,sl} e^{-lambda_{i,sl}} / N_{i,sl}!                                                                                 # noqa
-#                                                                                                                                                       # noqa
-# Where lamba_{i,sl} := p_{i,sl}*N[i] is a rate, i indexes the operation sequence,                                                                      # noqa
-#  and sl indexes the spam label.  N[i] is the total counts for the i-th circuit, and                                                                   # noqa
-#  so sum_{sl} N_{i,sl} == N[i]. We can ignore the p-independent N_j! and take the log:                                                                 # noqa
-#                                                                                                                                                       # noqa
-# log L = sum_{i,sl} N_{i,sl} log(N[i]*p_{i,sl}) - N[i]*p_{i,sl}                                                                                        # noqa
-#       = sum_{i,sl} N_{i,sl} log(p_{i,sl}) - N[i]*p_{i,sl}   (where we ignore the p-independent log(N[i]) terms)                                       # noqa
-#                                                                                                                                                       # noqa
-# The objective function computes the negative log(Likelihood) as a vector of leastsq                                                                   # noqa
-#  terms, where each term == sqrt( N_{i,sl} * -log(p_{i,sl}) + N[i] * p_{i,sl} )                                                                        # noqa
-#                                                                                                                                                       # noqa
-# See LikelihoodFunctions.py for details on patching                                                                                                    # noqa
-# The log(Likelihood) within the standard picture is:
-#
-# L = prod_{i,sl} p_{i,sl}^N_{i,sl}
-#
-# Where i indexes the operation sequence, and sl indexes the spam label.
-#  N[i] is the total counts for the i-th circuit, and
-#  so sum_{sl} N_{i,sl} == N[i]. We take the log:
-#
-# log L = sum_{i,sl} N_{i,sl} log(p_{i,sl})
-#
-# The objective function computes the negative log(Likelihood) as a vector of leastsq
-#  terms, where each term == sqrt( N_{i,sl} * -log(p_{i,sl}) )
-#
-# See LikelihoodFunction.py for details on patching
-
+"""
+The log(Likelihood) within the Poisson picture is:                                                                                                    # noqa
+                                                                                                                                                      # noqa
+L = prod_{i,sl} lambda_{i,sl}^N_{i,sl} e^{-lambda_{i,sl}} / N_{i,sl}!                                                                                 # noqa
+                                                                                                                                                      # noqa
+Where lamba_{i,sl} := p_{i,sl}*N[i] is a rate, i indexes the operation sequence,                                                                      # noqa
+ and sl indexes the spam label.  N[i] is the total counts for the i-th circuit, and                                                                   # noqa
+ so sum_{sl} N_{i,sl} == N[i]. We can ignore the p-independent N_j! and take the log:                                                                 # noqa
+                                                                                                                                                      # noqa
+log L = sum_{i,sl} N_{i,sl} log(N[i]*p_{i,sl}) - N[i]*p_{i,sl}                                                                                        # noqa
+      = sum_{i,sl} N_{i,sl} log(p_{i,sl}) - N[i]*p_{i,sl}   (where we ignore the p-independent log(N[i]) terms)                                       # noqa
+                                                                                                                                                      # noqa
+The objective function computes the negative log(Likelihood) as a vector of leastsq                                                                   # noqa
+ terms, where each term == sqrt( N_{i,sl} * -log(p_{i,sl}) + N[i] * p_{i,sl} )                                                                        # noqa
+                                                                                                                                                      # noqa
+See LikelihoodFunctions.py for details on patching                                                                                                    # noqa
+The log(Likelihood) within the standard picture is:
+
+L = prod_{i,sl} p_{i,sl}^N_{i,sl}
+
+Where i indexes the operation sequence, and sl indexes the spam label.
+ N[i] is the total counts for the i-th circuit, and
+ so sum_{sl} N_{i,sl} == N[i]. We take the log:
+
+log L = sum_{i,sl} N_{i,sl} log(p_{i,sl})
+
+The objective function computes the negative log(Likelihood) as a vector of leastsq
+ terms, where each term == sqrt( N_{i,sl} * -log(p_{i,sl}) )
+
+See LikelihoodFunction.py for details on patching
+"""
 
 class RawPoissonPicDeltaLogLFunction(RawObjectiveFunction):
     """
@@ -3938,6 +3943,9 @@ def __init__(self, regularization=None,
                  resource_alloc=None, name='tvd', description="Total Variational Distance (TVD)", verbosity=0):
         super().__init__(regularization, resource_alloc, name, description, verbosity)
 
+    def chi2k_distributed_qty(self, objective_function_value):
+        return -1
+
     def terms(self, probs, counts, total_counts, freqs, intermediates=None):
         """
         Compute the terms of the objective function.
@@ -4003,7 +4011,11 @@ def dterms(self, probs, counts, total_counts, freqs, intermediates=None):
         numpy.ndarray
             A 1D array of length equal to that of each array argument.
         """
-        raise NotImplementedError("Derivatives not implemented for TVD yet!")
+        _warnings.warn('This derivative is discontinuous and does not return a full subgradient.')
+        t = probs - freqs
+        d = 0.5*_np.ones_like(t)
+        d[t < 0] *= -1
+        return d
 
     def hterms(self, probs, counts, total_counts, freqs, intermediates=None):
         """
@@ -5208,6 +5220,109 @@ def create_from(cls, model, dataset, circuits, regularization=None, penalties=No
     def __init__(self, mdc_store, regularization=None, penalties=None, name=None, description=None, verbosity=0):
         raw_objfn = RawTVDFunction(regularization, mdc_store.resource_alloc, name, description, verbosity)
         super().__init__(raw_objfn, mdc_store, penalties, verbosity)
+        self.num_circuits = 0
+        self.terms_weights = _np.array([])
+        self._update_terms_weights() # <-- properly computes the two properties above.
+
+    def _update_terms_weights(self):
+        # if self.num_circuits == self.layout.num_circuits:
+        #     # Assume there's nothing to do.
+        #     return
+        # else, assume that self.layout has been updated.
+        self.num_circuits = self.layout.num_circuits
+        num_elements = self.layout.num_elements
+        circuit_sizes = _np.zeros((num_elements,))
+        for elind, circuit, _ in self.layout:
+            circuit_sizes[elind] = 1 + circuit.num_gates
+        assert _np.all(circuit_sizes > 0)
+        self.terms_weights = 1 / circuit_sizes
+        self.terms_weights /= _np.mean(self.terms_weights)
+        # ^ Make the weights mean-1 to avoid unintended changes to Levenberg-Marquardt
+        #   stopping criteria.
+        return
+
+    # QUESTION: could I just call the base class's implementation of terms and then scale the 
+    # result? If so then I can avoid a ton of code duplication.
+    def terms(self, paramvec=None, oob_check=False, profiler_str="TERMS OBJECTIVE"):
+        tm = _time.time()
+        if paramvec is None:
+            paramvec = self.model.to_vector()
+        else:
+            self.model.from_vector(paramvec)
+        terms = self.obj.view()
+
+        unit_ralloc = self.layout.resource_alloc('atom-processing')
+        shared_mem_leader = unit_ralloc.is_host_leader
+
+        with self.resource_alloc.temporarily_track_memory(self.nelements):  # 'e' (terms)
+            self.model.sim.bulk_fill_probs(self.probs, self.layout)
+            self._clip_probs()
+
+            if oob_check:  # Only used for termgap cases
+                if not self.model.sim.bulk_test_if_paths_are_sufficient(self.layout, self.probs, verbosity=1):
+                    raise ValueError("Out of bounds!")  # signals LM optimizer
+
+            if shared_mem_leader:
+                terms_no_penalty = self.raw_objfn.terms(self.probs, self.counts, self.total_counts, self.freqs)
+                terms[:self.nelements] = terms_no_penalty
+                if self._process_penalties:
+                    terms[self.nelements:] = self._terms_penalty(paramvec)
+
+        if self.firsts is not None and shared_mem_leader:
+            omitted_probs = 1.0 - _np.array([self.probs[self.layout.indices_for_index(i)].sum() for i in self.indicesOfCircuitsWithOmittedData])
+            omitted_probs_firsts_terms = self.raw_objfn.zero_freq_terms(self.total_counts[self.firsts], omitted_probs)
+            terms[self.firsts] += omitted_probs_firsts_terms
+
+        self._update_terms_weights()
+        terms[:self.nelements] *= self.terms_weights
+
+        unit_ralloc.host_comm_barrier()
+
+        self.raw_objfn.resource_alloc.profiler.add_time(profiler_str, tm)
+        assert(terms.shape == (self.nelements + self.local_ex,))
+        terms *= self.terms_weights
+        return terms
+
+    # QUESTION: could I just call the base class's implementation of dterms and then scale the 
+    # leading rows of the result? If so then I can avoid a ton of code duplication.
+    def dterms(self, paramvec=None):
+        tm = _time.time()
+        unit_ralloc = self.layout.resource_alloc('param-processing')
+        shared_mem_leader = unit_ralloc.is_host_leader
+
+        if paramvec is None:
+            paramvec = self.model.to_vector()
+        else:
+            self.model.from_vector(paramvec)
+
+        dprobs = self.jac[0:self.nelements, :]
+        dprobs.shape = (self.nelements, self.nparams)
+
+        with self.resource_alloc.temporarily_track_memory(2 * self.nelements):
+            self.model.sim.bulk_fill_dprobs(dprobs, self.layout, self.probs)
+            self._clip_probs()
+            if shared_mem_leader:
+                if self.firsts is not None:
+                    for ii, i in enumerate(self.indicesOfCircuitsWithOmittedData):
+                        self.dprobs_omitted_rowsum[ii, :] = _np.sum(dprobs[self.layout.indices_for_index(i), :], axis=0)
+                dg_probs = self.raw_objfn.dterms(self.probs, self.counts, self.total_counts, self.freqs)
+                dprobs *= dg_probs[:, None]
+
+        if shared_mem_leader and self.firsts is not None:
+            total_counts_firsts = self.total_counts[self.firsts]
+            omitted_probs = 1.0 - _np.array([_np.sum(self.probs[self.layout.indices_for_index(i)]) for i in self.indicesOfCircuitsWithOmittedData])
+            omitted_dprobs_firsts_dterms = self.raw_objfn.zero_freq_dterms(total_counts_firsts, omitted_probs)
+            dprobs[self.firsts] -= omitted_dprobs_firsts_dterms[:, None] * self.dprobs_omitted_rowsum
+
+        self._update_terms_weights()
+        dprobs[:self.nelements] *= self.terms_weights[:, None]
+
+        if shared_mem_leader and self._process_penalties:
+            self._dterms_fill_penalty(paramvec, self.jac[self.nelements:, :])
+
+        unit_ralloc.host_comm_barrier()
+        self.raw_objfn.resource_alloc.profiler.add_time("JACOBIAN", tm)
+        return self.jac
 
 
 class TimeDependentMDCObjectiveFunction(MDCObjectiveFunction):

@@ -785,14 +785,16 @@ def create_from(cls, objective='logl', freq_weighted_chi2=False, always_perform_
         if objective == "chi2":
             iteration_builders = [chi2_builder]
             final_builders = []
-
         elif objective == "logl":
             if always_perform_mle:
                 iteration_builders = [mle_builder] if only_perform_mle else [chi2_builder, mle_builder]
                 final_builders = []
             else:
                 iteration_builders = [chi2_builder]
                 final_builders = [mle_builder]
+        elif objective == "tvd":
+            iteration_builders = [chi2_builder]
+            final_builders = [_objfns.ObjectiveFunctionBuilder.create_from('tvd')]
         else:
             raise ValueError("Invalid objective: %s" % objective)
         return cls(iteration_builders, final_builders)