diff --git a/pygsti/objectivefns/objectivefns.py b/pygsti/objectivefns/objectivefns.py index 0c44fe74c..fd24655c1 100644 --- a/pygsti/objectivefns/objectivefns.py +++ b/pygsti/objectivefns/objectivefns.py @@ -14,6 +14,7 @@ import sys as _sys import time as _time import pathlib as _pathlib +import warnings as _warnings import numpy as _np @@ -588,8 +589,10 @@ def dterms(self, probs, counts, total_counts, freqs, intermediates=None): """ if intermediates is None: intermediates = self._intermediates(probs, counts, total_counts, freqs) - return 2 * self.lsvec(probs, counts, total_counts, freqs, intermediates) \ - * self.dlsvec(probs, counts, total_counts, freqs, intermediates) + lsvec = self.lsvec(probs, counts, total_counts, freqs, intermediates) + # NOTE: this function is correct under the assumption that terms == lsvec**2, + # independent of whether or not lsvec is nonnegative. + return 2 * lsvec * self.dlsvec(probs, counts, total_counts, freqs, intermediates) def dlsvec(self, probs, counts, total_counts, freqs, intermediates=None): """ @@ -624,7 +627,9 @@ def dlsvec(self, probs, counts, total_counts, freqs, intermediates=None): A 1D array of length equal to that of each array argument. """ # lsvec = sqrt(terms) + # NOTE: ^ That's only correct if lsvec is >= 0, and some classes don't satisfy that. # dlsvec = 0.5/lsvec * dterms + # if intermediates is None: intermediates = self._intermediates(probs, counts, total_counts, freqs) lsvec = self.lsvec(probs, counts, total_counts, freqs, intermediates) @@ -1630,6 +1635,8 @@ def _gather_hessian(self, local_hessian): # = (p - f)^2 * ( ((1-p) + p)/(p*(1-p)) ) # = 1/(p*(1-p)) * (p - f)^2 + +# negative lsvec possible class RawChi2Function(RawObjectiveFunction): """ The function `N(p-f)^2 / p` @@ -1720,7 +1727,8 @@ def lsvec(self, probs, counts, total_counts, freqs, intermediates=None): numpy.ndarray A 1D array of length equal to that of each array argument. """ - return (probs - freqs) * self._weights(probs, freqs, total_counts) # Note: ok if this is negative + out = (probs - freqs) * self._weights(probs, freqs, total_counts) # Note: ok if this is negative + return out def dlsvec(self, probs, counts, total_counts, freqs, intermediates=None): """ @@ -1755,7 +1763,8 @@ def dlsvec(self, probs, counts, total_counts, freqs, intermediates=None): A 1D array of length equal to that of each array argument. """ weights = self._weights(probs, freqs, total_counts) - return weights + (probs - freqs) * self._dweights(probs, freqs, weights) + out = weights + (probs - freqs) * self._dweights(probs, freqs, weights) + return out def hlsvec(self, probs, counts, total_counts, freqs, intermediates=None): """ @@ -2275,6 +2284,7 @@ def _zero_freq_dterms_relaxed(self, total_counts, probs): return total_counts * _np.where(probs > p0, 1.0, 2 * c1 * probs) +# negative lsvec possible class RawFreqWeightedChi2Function(RawChi2Function): """ @@ -2469,6 +2479,7 @@ def zero_freq_hterms(self, total_counts, probs): return 2 * total_counts / self.min_freq_clip_for_weighting +# negative lsvec possible class RawCustomWeightedChi2Function(RawChi2Function): """ @@ -2689,6 +2700,8 @@ def zero_freq_hterms(self, total_counts, probs): # terms, where each term == sqrt( N_{i,sl} * -log(p_{i,sl}) ) # # See LikelihoodFunction.py for details on patching + + class RawPoissonPicDeltaLogLFunction(RawObjectiveFunction): """ The function `N*f*log(f/p) - N*(f-p)`. @@ -4100,6 +4113,13 @@ def zero_freq_hterms(self, total_counts, probs): raise NotImplementedError("Derivatives not implemented for TVD yet!") +###################################################### +# +# Start MDCObjectiveFunction subclasses +# +###################################################### + + class TimeIndependentMDCObjectiveFunction(MDCObjectiveFunction): """ A time-independent model-based (:class:`MDCObjectiveFunction`-derived) objective function. @@ -4260,7 +4280,8 @@ def __del__(self): self.layout.free_local_array(self.obj) self.layout.free_local_array(self.jac) - #Model-based regularization and penalty support functions + # Main public instance functions + def set_penalties(self, regularize_factor=0, cptp_penalty_factor=0, spam_penalty_factor=0, errorgen_penalty_factor=0, forcefn_grad=None, shift_fctr=100, prob_clip_interval=(-10000, 1000)): @@ -4324,8 +4345,10 @@ def set_penalties(self, regularize_factor=0, cptp_penalty_factor=0, spam_penalty self.prob_clip_interval = prob_clip_interval # not really a "penalty" per se, but including it as one # gives the user the ability to easily set it if they ever need to (unlikely) - self._process_penalties = self.layout.part_of_final_atom_processor \ - if isinstance(self.layout, _DistributableCOPALayout) else True + if isinstance(self.layout, _DistributableCOPALayout): + self._process_penalties = self.layout.part_of_final_atom_processor + else: + self._process_penalties = True ex = 0 # Compute "extra" number of terms/lsvec-element/rows-of-jacobian beyond evaltree elements @@ -4343,309 +4366,184 @@ def set_penalties(self, regularize_factor=0, cptp_penalty_factor=0, spam_penalty return ex - def _lspenaltyvec(self, paramvec): - """ - The least-squares penalty vector, an array of the square roots of the penalty terms. - - Parameters - ---------- - paramvec : numpy.ndarray - The vector of (model) parameters to evaluate the objective function at. - - Returns - ------- - numpy.ndarray - """ - if self.forcefn_grad is not None: - force_vec = self.forceShift - _np.dot(self.forcefn_grad, self.model.to_vector()) - assert(_np.all(force_vec >= 0)), "Inadequate forcing shift!" - forcefn_penalty = _np.sqrt(force_vec) - else: forcefn_penalty = [] + def terms(self, paramvec=None, oob_check=False, profiler_str="TERMS OBJECTIVE"): + tm = _time.time() + if paramvec is None: + paramvec = self.model.to_vector() + else: + self.model.from_vector(paramvec) + terms = self.obj.view() - if self.regularize_factor != 0: - paramvec_norm = self.regularize_factor * _np.array([max(0, absx - 1.0) for absx in map(abs, paramvec)], 'd') - else: paramvec_norm = [] # so concatenate ignores + unit_ralloc = self.layout.resource_alloc('atom-processing') + shared_mem_leader = unit_ralloc.is_host_leader - if self.cptp_penalty_factor > 0: - cp_penalty_vec = _cptp_penalty(self.model, self.cptp_penalty_factor, self.opBasis) - else: cp_penalty_vec = [] # so concatenate ignores + with self.resource_alloc.temporarily_track_memory(self.nelements): # 'e' (terms) + self.model.sim.bulk_fill_probs(self.probs, self.layout) + self._clip_probs() + + if oob_check: # Only used for termgap cases + if not self.model.sim.bulk_test_if_paths_are_sufficient(self.layout, self.probs, verbosity=1): + raise ValueError("Out of bounds!") # signals LM optimizer - if self.spam_penalty_factor > 0: - spam_penalty_vec = _spam_penalty(self.model, self.spam_penalty_factor, self.opBasis) - else: spam_penalty_vec = [] # so concatenate ignores + if shared_mem_leader: + terms_no_penalty = self.raw_objfn.terms(self.probs, self.counts, self.total_counts, self.freqs) + terms[:self.nelements] = terms_no_penalty + if self._process_penalties: + terms[self.nelements:] = self._terms_penalty(paramvec) - if self.errorgen_penalty_factor > 0: - errorgen_penalty_vec = _errorgen_penalty(self.model, self.errorgen_penalty_factor) - else: - errorgen_penalty_vec = [] + if self.firsts is not None and shared_mem_leader: + omitted_probs = 1.0 - _np.array([self.probs[self.layout.indices_for_index(i)].sum() for i in self.indicesOfCircuitsWithOmittedData]) + omitted_probs_firsts_terms = self.raw_objfn.zero_freq_terms(self.total_counts[self.firsts], omitted_probs) + terms[self.firsts] += omitted_probs_firsts_terms + + unit_ralloc.host_comm_barrier() - return _np.concatenate((forcefn_penalty, paramvec_norm, cp_penalty_vec, spam_penalty_vec, errorgen_penalty_vec)) + self.raw_objfn.resource_alloc.profiler.add_time(profiler_str, tm) + assert(terms.shape == (self.nelements + self.local_ex,)) + return terms - def _penaltyvec(self, paramvec): - """ - The penalty vector, an array of all the penalty terms. + def lsvec(self, paramvec=None, oob_check=False, raw_objfn_lsvec_signs=True): + lsvec = self.terms(paramvec, oob_check, "LS OBJECTIVE") + if _np.any(lsvec < 0): + bad_locs = _np.where(lsvec < 0)[0] + msg = f""" + lsvec is only defined when terms is elementwise nonnegative. + We encountered negative values for terms[i] for indices i + in {bad_locs}. + """ + raise RuntimeError(msg) + lsvec **= 0.5 + if raw_objfn_lsvec_signs: + if self.layout.resource_alloc('atom-processing').is_host_leader: + raw_lsvec = self.raw_objfn.lsvec(self.probs, self.counts, self.total_counts, self.freqs) + lsvec[:self.nelements][raw_lsvec < 0] *= -1 + return lsvec - Parameters - ---------- - paramvec : numpy.ndarray - The vector of (model) parameters to evaluate the objective function at. + def dterms(self, paramvec=None): + tm = _time.time() + unit_ralloc = self.layout.resource_alloc('param-processing') + shared_mem_leader = unit_ralloc.is_host_leader - Returns - ------- - numpy.ndarray - """ - return self._lspenaltyvec(paramvec)**2 + if paramvec is None: + paramvec = self.model.to_vector() + else: + self.model.from_vector(paramvec) + + dprobs = self.jac[0:self.nelements, :] + dprobs.shape = (self.nelements, self.nparams) - def _fill_lspenaltyvec_jac(self, paramvec, lspenaltyvec_jac): - """ - Fill `lspenaltyvec_jac` with the jacobian of the least-squares (sqrt of the) penalty vector. + with self.resource_alloc.temporarily_track_memory(2 * self.nelements): + self.model.sim.bulk_fill_dprobs(dprobs, self.layout, self.probs) + self._clip_probs() + if shared_mem_leader: + if self.firsts is not None: + for ii, i in enumerate(self.indicesOfCircuitsWithOmittedData): + self.dprobs_omitted_rowsum[ii, :] = _np.sum(dprobs[self.layout.indices_for_index(i), :], axis=0) + dg_probs = self.raw_objfn.dterms(self.probs, self.counts, self.total_counts, self.freqs) + dprobs *= dg_probs[:, None] + + if shared_mem_leader and self.firsts is not None: + total_counts_firsts = self.total_counts[self.firsts] + omitted_probs = 1.0 - _np.array([_np.sum(self.probs[self.layout.indices_for_index(i)]) for i in self.indicesOfCircuitsWithOmittedData]) + omitted_dprobs_firsts_dterms = self.raw_objfn.zero_freq_dterms(total_counts_firsts, omitted_probs) + dprobs[self.firsts] -= omitted_dprobs_firsts_dterms[:, None] * self.dprobs_omitted_rowsum + + if shared_mem_leader and self._process_penalties: + self._dterms_fill_penalty(paramvec, self.jac[self.nelements:, :]) + + unit_ralloc.host_comm_barrier() + self.raw_objfn.resource_alloc.profiler.add_time("JACOBIAN", tm) + return self.jac + + def dlsvec(self, paramvec=None): + tm = _time.time() + unit_ralloc = self.layout.resource_alloc('param-processing') + shared_mem_leader = unit_ralloc.is_host_leader - Parameters - ---------- - paramvec : numpy.ndarray - The vector of (model) parameters to evaluate the objective function at. + if paramvec is None: + paramvec = self.model.to_vector() + else: + self.model.from_vector(paramvec) - lspenaltyvec_jac : numpy.ndarray - The array to fill. + jac = self.dterms(paramvec) + if shared_mem_leader: + lsvec = self.lsvec(paramvec) + p5over_lsvec = 0.5/lsvec + p5over_lsvec[_np.abs(lsvec) < 1e-100] = 0.0 + jac *= p5over_lsvec[:, None] - Returns - ------- - None - """ + unit_ralloc.host_comm_barrier() + self.raw_objfn.resource_alloc.profiler.add_time("JACOBIAN", tm) + return self.jac + + # Helpers, supporting main public instance functions + + def _dterms_fill_penalty(self, paramvec, terms_jac): + wrtslice = self.layout.global_param_slice if isinstance(self.layout, _DistributableCOPALayout) else slice(0, len(paramvec)) # all params off = 0 - # wrtslice gives the subset of all the model parameters that this processor is responsible for - # computing derivatives with respect to. - wrtslice = self.layout.global_param_slice if isinstance(self.layout, _DistributableCOPALayout) \ - else slice(0, len(paramvec)) # all params - if self.forcefn_grad is not None: n = self.forcefn_grad.shape[0] - lspenaltyvec_jac[off:off + n, :] = -self.forcefn_grad + terms_jac[off:off + n, :] = -self.forcefn_grad + force = self.forceShift - _np.dot(self.forcefn_grad, self.model.to_vector()) + assert(_np.all(force >= 0)), "Inadequate forcing shift!" + terms_jac[off:off + n, :] *= 2*_np.sqrt(force)[:, None] off += n if self.regularize_factor > 0: n = len(paramvec) - lspenaltyvec_jac[off:off + n, :] = _np.diag([(self.regularize_factor * _np.sign(x) if abs(x) > 1.0 else 0.0) - for x in paramvec[wrtslice]]) # (N,N) + terms_jac[off:off + n, :] = _np.diag([(self.regularize_factor * _np.sign(x) if abs(x) > 1.0 else 0.0) for x in paramvec[wrtslice]]) # (N,N) + paramvec_norm = _paramvec_norm_penalty(self.regularize_factor, paramvec) + terms_jac[off:off + n, :] *= 2*paramvec_norm[:, None] off += n if self.cptp_penalty_factor > 0: - off += _cptp_penalty_jac_fill( - lspenaltyvec_jac[off:, :], self.model, self.cptp_penalty_factor, self.opBasis, wrtslice) + n = _cptp_penalty_jac_fill(terms_jac[off:, :], self.model, self.cptp_penalty_factor, self.opBasis, wrtslice) + cp_penalty = _cptp_penalty(self.model, self.cptp_penalty_factor, self.opBasis) + terms_jac[off:off+n, :] *= 2*cp_penalty[:, None] + off += n if self.spam_penalty_factor > 0: - off += _spam_penalty_jac_fill( - lspenaltyvec_jac[off:, :], self.model, self.spam_penalty_factor, self.opBasis, wrtslice) + n = _spam_penalty_jac_fill(terms_jac[off:, :], self.model, self.spam_penalty_factor, self.opBasis, wrtslice) + spam_penalty = _spam_penalty(self.model, self.spam_penalty_factor, self.opBasis) + terms_jac[off:off+n, :] *= 2*spam_penalty[:, None] + off += n if self.errorgen_penalty_factor > 0: - off += _errorgen_penalty_jac_fill( - lspenaltyvec_jac[off:, :], self.model, self.errorgen_penalty_factor, wrtslice) + n = _errorgen_penalty_jac_fill(terms_jac[off:, :], self.model, self.errorgen_penalty_factor, wrtslice) + errorgen_penalty = _errorgen_penalty(self.model, self.errorgen_penalty_factor) + terms_jac[off:off+n, :] *= 2*errorgen_penalty[:, None] + off += n assert(off == self.local_ex) + return - def _fill_dterms_penalty(self, paramvec, terms_jac): - """ - Fill `terms_jac` with the jacobian of the penalty vector. + def _terms_penalty(self, paramvec): + blocks = [_np.zeros(shape=(0,))] - Parameters - ---------- - paramvec : numpy.ndarray - The vector of (model) parameters to evaluate the objective function at. - - terms_jac : numpy.ndarray - The array to fill. - - Returns - ------- - None - """ - # terms_penalty = ls_penalty**2 - # terms_penalty_jac = 2 * ls_penalty * ls_penalty_jac - self._fill_lspenaltyvec_jac(paramvec, terms_jac) - terms_jac[:, :] *= 2 * self._lspenaltyvec(paramvec)[:, None] - - #Omitted-probability support functions - - def _omitted_prob_first_terms(self, probs): - """ - Extracts the value of the first term for each circuit that has omitted probabilities. - - Nonzero probabilities may be predicted for circuit outcomes that - never occur in the data, and therefore do not produce "terms" for - the objective function sum. Yet, in many objective functions, zero- - frequency terms that have non-zero probabilities still produce a - non-zero contribution and must be included. This is performed by - adding these "omitted-probability" contributions to the first - (nonzero-frequncy, thus present) term corresponding to the given - circuit. This function computes these omitted (zero-frequency) - terms and returns them in an array of length equal to the number - of circuits with omitted-probability contributions. - - Parameters - ---------- - probs : numpy.ndarray - The (full) vector of probabilities. Length is equal to the - total number of circuit outcomes (not the length of the - returned array). - - Returns - ------- - numpy.ndarray - """ - omitted_probs = 1.0 - _np.array([probs[self.layout.indices_for_index(i)].sum() - for i in self.indicesOfCircuitsWithOmittedData]) - return self.raw_objfn.zero_freq_terms(self.total_counts[self.firsts], omitted_probs) - - def _update_lsvec_for_omitted_probs(self, lsvec, probs): - """ - Updates the least-squares vector `lsvec`, adding the omitted-probability contributions. - - Parameters - ---------- - lsvec : numpy.ndarray - Vector of least-squares (sqrt of terms) objective function values *before* adding - omitted-probability contributions. This function updates this array. - - probs : numpy.ndarray - The (full) vector of probabilities. Length is equal to the - total number of circuit outcomes. - - Returns - ------- - None - """ - # lsvec = sqrt(terms) => sqrt(terms + zerofreqfn(omitted)) - lsvec[self.firsts] = _np.sqrt(lsvec[self.firsts]**2 + self._omitted_prob_first_terms(probs)) - - def _update_terms_for_omitted_probs(self, terms, probs): - """ - Updates the terms vector `terms`, adding the omitted-probability contributions. - - Parameters - ---------- - terms : numpy.ndarray - Vector of objective function term values *before* adding - omitted-probability contributions. This function updates this array. - - probs : numpy.ndarray - The (full) vector of probabilities. Length is equal to the - total number of circuit outcomes. - - Returns - ------- - None - """ - # terms => terms + zerofreqfn(omitted) - terms[self.firsts] += self._omitted_prob_first_terms(probs) - - def _omitted_prob_first_dterms(self, probs): - """ - Compute the derivative of the first-terms vector returned by :meth:`_omitted_prob_first_terms`. - - This derivative is just with respect to the *probabilities*, not the - model parameters, as it anticipates a final dot product with the jacobian - of the computed probabilities with respect to the model parameters (see - :meth:`_update_dterms_for_omitted_probs`). - - Parameters - ---------- - probs : numpy.ndarray - The (full) vector of probabilities. Length is equal to the - total number of circuit outcomes. - - Returns - ------- - numpy.ndarray - Vector of the derivatives of the term values with respect - to the corresponding probability. As such, this is a 1D - array of length equal to the number of circuits with omitted - contributions. - """ - omitted_probs = 1.0 - _np.array([_np.sum(probs[self.layout.indices_for_index(i)]) - for i in self.indicesOfCircuitsWithOmittedData]) - return self.raw_objfn.zero_freq_dterms(self.total_counts[self.firsts], omitted_probs) - - def _update_dterms_for_omitted_probs(self, dterms, probs, dprobs_omitted_rowsum): - # terms => terms + zerofreqfn(omitted) - # dterms => dterms + dzerofreqfn(omitted) * domitted (and domitted = (-omitted_rowsum)) - """ - Updates term jacobian to account for omitted probabilities. - - Parameters - ---------- - dterms : numpy.ndarray - Jacobian of terms before and omitted-probability contributions are added. - This array is updated by this function. - - probs : numpy.ndarray - The (full) vector of probabilities. Length is equal to the - total number of circuit outcomes. - - dprobs_omitted_rowsum : numpy.ndarray - An array of shape `(M,N)` where `M` is the number of circuits with - omitted contributions and `N` is the number of model parameters. This - matrix results from summing up the jacobian rows of all the *present* - probabilities for the circuit corresponding to the row. That is, the - i-th row of this matrix contains the summed-up derivatives of all the - computed probabilities (i.e. present outcomes) for the i-th circuit with - omitted probabilities. These omitted probabilities are never computed, but - are inferred as 1.0 minus the present probabilities, so this matrix gives - the negative of the derivative of the omitted probabilities. - - Returns - ------- - None - """ - dterms[self.firsts] -= self._omitted_prob_first_dterms(probs)[:, None] * dprobs_omitted_rowsum - - def _update_dlsvec_for_omitted_probs(self, dlsvec, lsvec, probs, dprobs_omitted_rowsum): - """ - Updates least-squares vector's jacobian to account for omitted probabilities. - - Parameters - ---------- - dlsvec : numpy.ndarray - Jacobian of least-squares vector before and omitted-probability contributions - are added. This array is updated by this function. + if self.forcefn_grad is not None: + forcefn_penalty = self.forceShift - _np.dot(self.forcefn_grad, self.model.to_vector()) + assert(_np.all(forcefn_penalty >= 0)), "Inadequate forcing shift!" + blocks.append(forcefn_penalty) - lsvec : numpy.ndarray - The least-squares vector itself, as this is often helpful in this computation. - Length is equal to the total number of circuit outcomes. + if self.regularize_factor != 0: + paramvec_norm = _paramvec_norm_penalty(self.regularize_factor, paramvec) + paramvec_norm **= 2 + blocks.append(paramvec_norm) - probs : numpy.ndarray - The (full) vector of probabilities. Length is equal to the - total number of circuit outcomes. - - dprobs_omitted_rowsum : numpy.ndarray - An array of shape `(M,N)` where `M` is the number of circuits with - omitted contributions and `N` is the number of model parameters. This - matrix results from summing up the jacobian rows of all the *present* - probabilities for the circuit corresponding to the row. That is, the - i-th row of this matrix contains the summed-up derivatives of all the - computed probabilities (i.e. present outcomes) for the i-th circuit with - omitted probabilities. These omitted probabilities are never computed, but - are inferred as 1.0 minus the present probabilities, so this matrix gives - the negative of the derivative of the omitted probabilities. + if self.cptp_penalty_factor > 0: + cp_penalty = _cptp_penalty(self.model, self.cptp_penalty_factor, self.opBasis) ** 2 + blocks.append(cp_penalty) - Returns - ------- - None - """ - # lsvec = sqrt(terms) => sqrt(terms + zerofreqfn(omitted)) - # dlsvec = 0.5 / sqrt(terms) * dterms = 0.5 / lsvec * dterms - # 0.5 / sqrt(terms + zerofreqfn(omitted)) * (dterms + dzerofreqfn(omitted) * domitted) - # so dterms = 2 * lsvec * dlsvec, and - # new_dlsvec = 0.5 / sqrt(...) * (2 * lsvec * dlsvec + dzerofreqfn(omitted) * domitted) + if self.spam_penalty_factor > 0: + spam_penalty = _spam_penalty(self.model, self.spam_penalty_factor, self.opBasis) ** 2 + blocks.append(spam_penalty) - lsvec_firsts = lsvec[self.firsts] - updated_lsvec = _np.sqrt(lsvec_firsts**2 + self._omitted_prob_first_terms(probs)) - updated_lsvec = _np.where(updated_lsvec == 0, 1.0, updated_lsvec) # avoid 0/0 where lsvec & deriv == 0 + if self.errorgen_penalty_factor > 0: + errorgen_penalty = _errorgen_penalty(self.model, self.errorgen_penalty_factor) ** 2 + blocks.append(errorgen_penalty) - # dlsvec => 0.5 / updated_lsvec * (2 * lsvec * dlsvec + dzerofreqfn(omitted) * domitted) memory efficient: - dlsvec[self.firsts] *= (lsvec_firsts / updated_lsvec)[:, None] - dlsvec[self.firsts] -= ((0.5 / updated_lsvec) * self._omitted_prob_first_dterms(probs))[:, None] \ - * dprobs_omitted_rowsum + return _np.concatenate(blocks) def _clip_probs(self): """ Clips the potentially shared-mem self.probs according to self.prob_clip_interval """ @@ -4657,266 +4555,7 @@ def _clip_probs(self): else: _np.clip(self.probs, self.prob_clip_interval[0], self.prob_clip_interval[1], out=self.probs) - #Objective Function - - def lsvec(self, paramvec=None, oob_check=False): - """ - Compute the least-squares vector of the objective function. - - This is the square-root of the terms-vector returned from :meth:`terms`. - This vector is the objective function value used by a least-squares - optimizer when optimizing this objective function. Note that the existence - of this quantity requires that the terms be non-negative. If this is not - the case, an error is raised. - - Parameters - ---------- - paramvec : numpy.ndarray, optional - The vector of (model) parameters to evaluate the objective function at. - If `None`, then the model's current parameter vector is used (held internally). - - oob_check : bool, optional - Whether the objective function should raise an error if it is being - evaluated in an "out of bounds" region. - - Returns - ------- - numpy.ndarray - An array of shape `(nElements,)` where `nElements` is the number - of circuit outcomes. - """ - - #DEBUG REMOVE - used for memory profiling - #import os, psutil - #process = psutil.Process(os.getpid()) - #def print_mem_usage(prefix): - # print("%s: mem usage = %.3f GB" % (prefix, process.memory_info().rss / (1024.0**3))) - - tm = _time.time() - if paramvec is not None: - self.model.from_vector(paramvec) - else: - paramvec = self.model.to_vector() - lsvec = self.obj.view() - - # Whether this rank is the "leader" of all the processors accessing the same shared self.jac and self.probs mem. - # Only leader processors should modify the contents of the shared memory, so we only apply operations *once* - # `unit_ralloc` is the group of all the procs targeting same destination into self.obj - unit_ralloc = self.layout.resource_alloc('atom-processing') - shared_mem_leader = unit_ralloc.is_host_leader - - with self.resource_alloc.temporarily_track_memory(self.nelements): # 'e' (lsvec) - self.model.sim.bulk_fill_probs(self.probs, self.layout) # syncs shared mem - self._clip_probs() # clips self.probs in place w/shared mem sync - - if oob_check: # Only used for termgap cases - if not self.model.sim.bulk_test_if_paths_are_sufficient(self.layout, self.probs, verbosity=1): - raise ValueError("Out of bounds!") # signals LM optimizer - - if shared_mem_leader: - lsvec_no_penalty = self.raw_objfn.lsvec(self.probs, self.counts, self.total_counts, self.freqs) - lsvec[:] = _np.concatenate((lsvec_no_penalty, self._lspenaltyvec(paramvec))) \ - if self._process_penalties else lsvec_no_penalty - - if self.firsts is not None and shared_mem_leader: - self._update_lsvec_for_omitted_probs(lsvec, self.probs) - unit_ralloc.host_comm_barrier() # have non-leader procs wait for leaders to set shared mem - - self.raw_objfn.resource_alloc.profiler.add_time("LS OBJECTIVE", tm) - assert(lsvec.shape == (self.nelements + self.local_ex,)) - return lsvec - - def terms(self, paramvec=None): - """ - Compute the terms of the objective function. - - The "terms" are the per-circuit-outcome values that get summed together - to result in the objective function value. - - Parameters - ---------- - paramvec : numpy.ndarray, optional - The vector of (model) parameters to evaluate the objective function at. - If `None`, then the model's current parameter vector is used (held internally). - - Returns - ------- - numpy.ndarray - An array of shape `(nElements,)` where `nElements` is the number - of circuit outcomes. - """ - tm = _time.time() - if paramvec is not None: self.model.from_vector(paramvec) - else: paramvec = self.model.to_vector() - terms = self.obj.view() - - # Whether this rank is the "leader" of all the processors accessing the same shared self.jac and self.probs mem. - # Only leader processors should modify the contents of the shared memory, so we only apply operations *once* - # `unit_ralloc` is the group of all the procs targeting same destination into self.obj - unit_ralloc = self.layout.resource_alloc('atom-processing') - shared_mem_leader = unit_ralloc.is_host_leader - - with self.resource_alloc.temporarily_track_memory(self.nelements): # 'e' (terms) - self.model.sim.bulk_fill_probs(self.probs, self.layout) - self._clip_probs() # clips self.probs in place w/shared mem sync - - if shared_mem_leader: - terms_no_penalty = self.raw_objfn.terms(self.probs, self.counts, self.total_counts, self.freqs) - terms[:] = _np.concatenate((terms_no_penalty, self._penaltyvec(paramvec))) \ - if self._process_penalties else terms_no_penalty - - if self.firsts is not None and shared_mem_leader: - self._update_terms_for_omitted_probs(terms, self.probs) - unit_ralloc.host_comm_barrier() # have non-leader procs wait for leaders to set shared mem - - self.raw_objfn.resource_alloc.profiler.add_time("TERMS OBJECTIVE", tm) - assert(terms.shape == (self.nelements + self.local_ex,)) - return terms - - # Jacobian function - def dlsvec(self, paramvec=None): - """ - The derivative (jacobian) of the least-squares vector. - - Derivatives are taken with respect to model parameters. - - Parameters - ---------- - paramvec : numpy.ndarray, optional - The vector of (model) parameters to evaluate the objective function at. - If `None`, then the model's current parameter vector is used (held internally). - - Returns - ------- - numpy.ndarray - An array of shape `(nElements,nParams)` where `nElements` is the number - of circuit outcomes and `nParams` is the number of model parameters. - """ - tm = _time.time() - dprobs = self.jac[0:self.nelements, :] # avoid mem copying: use jac mem for dprobs - dprobs.shape = (self.nelements, self.nparams) - if paramvec is not None: - self.model.from_vector(paramvec) - else: - paramvec = self.model.to_vector() - - # Whether this rank is the "leader" of all the processors accessing the same shared self.jac and self.probs mem. - # Only leader processors should modify the contents of the shared memory, so we only apply operations *once* - # `unit_ralloc` is the group of all the procs targeting same destination into self.jac - unit_ralloc = self.layout.resource_alloc('param-processing') - shared_mem_leader = unit_ralloc.is_host_leader - - with self.resource_alloc.temporarily_track_memory(2 * self.nelements): # 'e' (dg_dprobs, lsvec) - #OLD jac distribution method - # Usual: rootcomm -this_fn-> atom_comm (sub_comm) -> block_comm - # New: rootcomm -> jac_slice_comm -this_fn-> atom_comm (sub_comm) -> block_comm (??) - # it could be that the comm in resource_alloc is already split for jac distribution, but this - # would mean code that doesn't compute jacobians would be less efficient. - # maybe hold a jac_slice_comm that is different? - # break up: N procs = Natom_eaters * Nprocs_per_atom_eater - # Nprocs_per_atom_eater = Nparamblk_eaters * Nprocs_per_paramblk_eater - #wrtSlice = resource_alloc.jac_slice if (resource_alloc.jac_distribution_method == "columns") \ - # else slice(0, self.model.num_params) - - self.model.sim.bulk_fill_dprobs(dprobs, self.layout, self.probs) # wrtSlice) - self._clip_probs() # clips self.probs in place w/shared mem sync - - if shared_mem_leader: - if self.firsts is not None: - for ii, i in enumerate(self.indicesOfCircuitsWithOmittedData): - self.dprobs_omitted_rowsum[ii, :] = _np.sum(dprobs[self.layout.indices_for_index(i), :], axis=0) - - #if shared_mem_leader: # Note: no need for barrier directly below as barrier further down suffices - dg_dprobs, lsvec = self.raw_objfn.dlsvec_and_lsvec(self.probs, self.counts, self.total_counts, - self.freqs) - dprobs *= dg_dprobs[:, None] - # (nelements,N) * (nelements,1) (N = dim of vectorized model) - # this multiply also computes jac, which is just dprobs - # with a different shape (jac.shape == [nelements,nparams]) - - if shared_mem_leader: # only "leader" modifies shared mem (dprobs & self.jac) - if self.firsts is not None: - #Note: lsvec is assumed to be *not* updated w/omitted probs contribution - self._update_dlsvec_for_omitted_probs(dprobs, lsvec, self.probs, self.dprobs_omitted_rowsum) - - if self._process_penalties and shared_mem_leader: - self._fill_lspenaltyvec_jac(paramvec, self.jac[self.nelements:, :]) # jac.shape == (nelements+N,N) - unit_ralloc.host_comm_barrier() # have non-leader procs wait for leaders to set shared mem - - # REMOVE => unit tests? - #if self.check_jacobian: _opt.check_jac(lambda v: self.lsvec( - # v), paramvec, self.jac, tol=1e-3, eps=1e-6, errType='abs') # TO FIX - - # dpr has shape == (nCircuits, nDerivCols), weights has shape == (nCircuits,) - # return shape == (nCircuits, nDerivCols) where ret[i,j] = dP[i,j]*(weights+dweights*(p-f))[i] - self.raw_objfn.resource_alloc.profiler.add_time("JACOBIAN", tm) - return self.jac - - def dterms(self, paramvec=None): - """ - Compute the jacobian of the terms of the objective function. - - The "terms" are the per-circuit-outcome values that get summed together - to result in the objective function value. Differentiation is with - respect to model parameters. - - Parameters - ---------- - paramvec : numpy.ndarray, optional - The vector of (model) parameters to evaluate the objective function at. - If `None`, then the model's current parameter vector is used (held internally). - - Returns - ------- - numpy.ndarray - An array of shape `(nElements,nParams)` where `nElements` is the number - of circuit outcomes and `nParams` is the number of model parameters. - """ - tm = _time.time() - dprobs = self.jac[0:self.nelements, :] # avoid mem copying: use jac mem for dprobs - dprobs.shape = (self.nelements, self.nparams) - if paramvec is not None: - self.model.from_vector(paramvec) - else: - paramvec = self.model.to_vector() - - # Whether this rank is the "leader" of all the processors accessing the same shared self.jac and self.probs mem. - # Only leader processors should modify the contents of the shared memory, so we only apply operations *once* - # `unit_ralloc` is the group of all the procs targeting same destination into self.jac - unit_ralloc = self.layout.resource_alloc('param-processing') - shared_mem_leader = unit_ralloc.is_host_leader - - with self.resource_alloc.temporarily_track_memory(2 * self.nelements): # 'e' (dg_dprobs, lsvec) - self.model.sim.bulk_fill_dprobs(dprobs, self.layout, self.probs) - self._clip_probs() # clips self.probs in place w/shared mem sync - - if shared_mem_leader: - if self.firsts is not None: - for ii, i in enumerate(self.indicesOfCircuitsWithOmittedData): - self.dprobs_omitted_rowsum[ii, :] = _np.sum(dprobs[self.layout.indices_for_index(i), :], axis=0) - - #if shared_mem_leader: # Note: barrier below work suffices for this condition too - dprobs *= self.raw_objfn.dterms(self.probs, self.counts, self.total_counts, self.freqs)[:, None] - # (nelements,N) * (nelements,1) (N = dim of vectorized model) - # this multiply also computes jac, which is just dprobs - # with a different shape (jac.shape == [nelements,nparams]) - - if shared_mem_leader: - if self.firsts is not None: - self._update_dterms_for_omitted_probs(dprobs, self.probs, self.dprobs_omitted_rowsum) - - if self._process_penalties: - self._fill_dterms_penalty(paramvec, self.jac[self.nelements:, :]) # jac.shape == (nelements+N,N) - unit_ralloc.host_comm_barrier() # have non-leader procs wait for leaders to set shared mem - - # REMOVE => unit tests - #if self.check_jacobian: _opt.check_jac(lambda v: self.lsvec( - # v), paramvec, self.jac, tol=1e-3, eps=1e-6, errType='abs') # TO FIX - - # dpr has shape == (nCircuits, nDerivCols), weights has shape == (nCircuits,) - # return shape == (nCircuits, nDerivCols) where ret[i,j] = dP[i,j]*(weights+dweights*(p-f))[i] - self.raw_objfn.resource_alloc.profiler.add_time("JACOBIAN", tm) - return self.jac + # Hessians, public and private instance functions def hessian_brute(self, paramvec=None): """ @@ -6183,6 +5822,11 @@ def _errorgen_penalty(mdl, prefactor): return prefactor * _np.array([_np.sqrt(val)], 'd') +def _paramvec_norm_penalty(reg_factor, paramvec): + out = reg_factor * _np.array([max(0, absx - 1.0) for absx in map(abs, paramvec)], 'd') + return out + + def _cptp_penalty_jac_fill(cp_penalty_vec_grad_to_fill, mdl, prefactor, op_basis, wrt_slice): """ Helper function - jacobian of CPTP penalty (sum of tracenorms of gates) diff --git a/pygsti/optimize/customlm.py b/pygsti/optimize/customlm.py index c1cb81d0c..b3fd03a71 100644 --- a/pygsti/optimize/customlm.py +++ b/pygsti/optimize/customlm.py @@ -1124,13 +1124,22 @@ def dclip(ar): return ar printer.log(" Accepted%s! gain ratio=%g mu * %g => %g" % (" UPHILL" if uphill_ok else "", dF / dL, mu_factor, mu), 2) last_accepted_dx = dx.copy() - if new_x_is_known_inbounds and norm_f < min_norm_f: - min_norm_f = norm_f - best_x[:] = x[:] - best_x_state = (mu, nu, norm_f, f.copy(), spow, None) - #Note: we use rawJTJ=None above because the current `JTJ` was evaluated - # at the *last* x-value -- we need to wait for the next outer loop - # to compute the JTJ for this best_x_state + if norm_f < min_norm_f: + if not new_x_is_known_inbounds: + try: + _ = obj_fn(global_x, oob_check=True) + # ^ Dead-store the return value. + new_x_is_known_inbounds = True + except ValueError: + # Then we keep new_x_is_known_inbounds==False. + pass + if new_x_is_known_inbounds: + min_norm_f = norm_f + best_x[:] = x[:] + best_x_state = (mu, nu, norm_f, f.copy(), spow, None) + # ^ Note: we use rawJTJ=None above because the current `JTJ` was evaluated + # at the *last* x-value -- we need to wait for the next outer loop + # to compute the JTJ for this best_x_state #assert(_np.isfinite(x).all()), "Non-finite x!" # NaNs tracking #assert(_np.isfinite(f).all()), "Non-finite f!" # NaNs tracking diff --git a/pygsti/optimize/simplerlm.py b/pygsti/optimize/simplerlm.py index 44a185285..7d3fd8913 100644 --- a/pygsti/optimize/simplerlm.py +++ b/pygsti/optimize/simplerlm.py @@ -600,7 +600,6 @@ def simplish_leastsq( #determing increment using adaptive damping while True: # inner loop - if profiler: profiler.memory_check("simplish_leastsq: begin inner iter") # ok if assume fine-param-proc.size == 1 (otherwise need to sync setting local JTJ) @@ -783,10 +782,19 @@ def simplish_leastsq( norm_f = norm_new_f global_x[:] = global_new_x[:] printer.log(" Accepted%s! gain ratio=%g mu * %g => %g" % ("", dF / dL, mu_factor, mu), 2) - if new_x_is_known_inbounds and norm_f < min_norm_f: - min_norm_f = norm_f - best_x[:] = x[:] - best_x_state = (mu, nu, norm_f, f.copy()) + if norm_f < min_norm_f: + if not new_x_is_known_inbounds: + try: + _ = obj_fn(global_x, oob_check=True) + # ^ Dead-store the return value. + new_x_is_known_inbounds = True + except ValueError: + # Then we keep new_x_is_known_inbounds==False. + pass + if new_x_is_known_inbounds: + min_norm_f = norm_f + best_x[:] = x[:] + best_x_state = (mu, nu, norm_f, f.copy()) #assert(_np.isfinite(x).all()), "Non-finite x!" # NaNs tracking #assert(_np.isfinite(f).all()), "Non-finite f!" # NaNs tracking @@ -794,6 +802,10 @@ def simplish_leastsq( break # ^ exit inner loop normally ... # end of inner loop + # + # x[:] = best_x[:] + # mu, nu, norm_f, f[:] = best_x_state + # # end of outer loop else: #if no break stmt hit, then we've exceeded max_iter diff --git a/test/performance/mpi_2D_scaling/run.sh b/test/performance/mpi_2D_scaling/run.sh index b5c77dd13..d1b1a0d3e 100755 --- a/test/performance/mpi_2D_scaling/run.sh +++ b/test/performance/mpi_2D_scaling/run.sh @@ -29,4 +29,4 @@ export MKL_NUM_THREADS=1 # Note: This flags are useful on Kahuna to avoid error messages # But the --mca flags are not necessary for performance mpirun -np ${NUM_PROCS} --mca pml ucx --mca btl '^openib' \ - python ./mpi_test.py &> ${PREFIX}.out + python ./run_me_with_mpirun.py &> ${PREFIX}.out diff --git a/test/performance/mpi_2D_scaling/mpi_test.py b/test/performance/mpi_2D_scaling/run_me_with_mpirun.py similarity index 100% rename from test/performance/mpi_2D_scaling/mpi_test.py rename to test/performance/mpi_2D_scaling/run_me_with_mpirun.py diff --git a/test/test_packages/mpi/test_mpi.py b/test/unit/mpi/run_me_with_mpiexec.py similarity index 98% rename from test/test_packages/mpi/test_mpi.py rename to test/unit/mpi/run_me_with_mpiexec.py index 995b3a92f..04a781d28 100644 --- a/test/test_packages/mpi/test_mpi.py +++ b/test/unit/mpi/run_me_with_mpiexec.py @@ -1,4 +1,4 @@ -# This file is designed to be run via: mpiexec -np 4 python -W ignore testMPI.py +# This file is designed to be run via: mpiexec -np 4 python -W ignore run_me_with_mpiexec.py # This does not use nosetests because I want to set verbosity differently based on rank (quiet if not rank 0) # By wrapping asserts in comm.rank == 0, only rank 0 should fail (should help with output) # Can run with different number of procs, but 4 is minimum to test all modes (pure MPI, pure shared mem, and mixed) @@ -226,7 +226,7 @@ def run_fills(self, sim, natoms, nparams): else: raise RuntimeError("Improper sim type passed by test_fills_generator") - serial_layout = mdl.sim.create_layout(circuits, array_types=('E','EP','EPP'), derivative_dimension=nP) + serial_layout = mdl.sim.create_layout(circuits, array_types=('E','EP','EPP'), derivative_dimensions=(nP,)) nE = serial_layout.num_elements nC = len(circuits) @@ -246,7 +246,7 @@ def run_fills(self, sim, natoms, nparams): global_serial_layout = serial_layout.global_layout #Use a parallel layout to compute the same probabilities & their derivatives - local_layout = mdl.sim.create_layout(circuits, array_types=('E','EP','EPP'), derivative_dimension=nP, + local_layout = mdl.sim.create_layout(circuits, array_types=('E','EP','EPP'), derivative_dimensions=(nP,), resource_alloc=self.ralloc) vp_local = local_layout.allocate_local_array('e', 'd') @@ -334,7 +334,7 @@ def setup_class(cls): tester = PureMPIParallel_Test() tester.setup_class() tester.ralloc = pygsti.baseobjs.ResourceAllocation(wcomm) - #tester.run_objfn_values('matrix','logl',4) + tester.run_objfn_values('matrix','logl',4) tester.run_fills('map', 1, None) tester.run_fills('map', 4, None) tester.run_fills('matrix', 4, 15) diff --git a/test/unit/mpi/test_mpi.py b/test/unit/mpi/test_mpi.py new file mode 100644 index 000000000..e4e285148 --- /dev/null +++ b/test/unit/mpi/test_mpi.py @@ -0,0 +1,26 @@ +import subprocess +import pytest +import os +from pathlib import Path + +try: + from mpi4py import MPI +except (ImportError, RuntimeError): + MPI = None + + +class MPITester: + + @pytest.mark.skipif(MPI is None, reason="mpi4py could not be imported") + def test_all(self, capfd: pytest.LogCaptureFixture): + current_filepath = Path(os.path.abspath(__file__)) + to_run = current_filepath.parents[0] / Path('run_me_with_mpiexec.py') + subprocess_args = (f"mpiexec -np 4 python -W ignore {str(to_run)}").split(' ') + + result = subprocess.run(subprocess_args, capture_output=False, text=True) + out, err = capfd.readouterr() + if len(out) + len(err) > 0: + msg = out + '\n'+ 80*'-' + err + raise RuntimeError(msg) + return + diff --git a/test/unit/objects/test_objectivefns.py b/test/unit/objects/test_objectivefns.py index 6e2e5549c..df5873705 100644 --- a/test/unit/objects/test_objectivefns.py +++ b/test/unit/objects/test_objectivefns.py @@ -300,10 +300,12 @@ def test_derivative(self): places=3) # each *element* should match to 3 places if self.computes_lsvec: - lsvec = objfn.lsvec().copy() - dlsvec = objfn.dlsvec().copy() - self.assertArraysAlmostEqual(dterms / nEls, 2 * lsvec[:, None] * dlsvec / nEls, - places=4) # each *element* should match to 4 places + arg1 = dterms / nEls + lsvec = objfn.lsvec(v0).copy() + dlsvec = objfn.dlsvec(v0).copy() + arg2 = 2 * lsvec[:, None] * dlsvec / nEls + self.assertArraysAlmostEqual(arg1, arg2, places=4) # each *element* should match to 4 places + return def test_approximate_hessian(self): if not self.enable_hessian_tests: