Skip to content

Commit 17bcc19

Browse files
committed
Fix calculation of fit weights
1 parent ad8aa79 commit 17bcc19

File tree

4 files changed

+69
-7
lines changed

4 files changed

+69
-7
lines changed

qiskit_experiments/curve_analysis/curve_analysis.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -316,20 +316,30 @@ def _run_curve_fit(
316316

317317
valid_uncertainty = np.all(np.isfinite(curve_data.y_err))
318318

319+
model_weights = {}
320+
if valid_uncertainty:
321+
for model in models:
322+
sub_yerr = curve_data.get_subset_of(model._name).y_err
323+
if len(sub_yerr) == 0:
324+
continue
325+
nonzero_yerr = np.where(np.isclose(sub_yerr, 0.0), np.finfo(float).eps, sub_yerr)
326+
raw_weights = 1 / nonzero_yerr
327+
# Remove outlier. When all sample values are the same with sample average,
328+
# or sampling error is zero with shot-weighted average,
329+
# some yerr values might be very close to zero, yielding significant weights.
330+
# With such outlier, the fit doesn't sense residual of other data points.
331+
maximum_weight = np.percentile(raw_weights, 90)
332+
model_weights[model._name] = np.clip(raw_weights, 0.0, maximum_weight)
333+
319334
# Objective function for minimize. This computes composite residuals of sub models.
320335
def _objective(_params):
321336
ys = []
322337
for model in models:
323338
sub_data = curve_data.get_subset_of(model._name)
324-
with np.errstate(divide="ignore"):
325-
# Ignore numpy runtime warning.
326-
# Zero y_err point introduces infinite weight,
327-
# but this should be managed by LMFIT.
328-
weights = 1.0 / sub_data.y_err if valid_uncertainty else None
329339
yi = model._residual(
330340
params=_params,
331341
data=sub_data.y,
332-
weights=weights,
342+
weights=model_weights.get(model._name, None),
333343
x=sub_data.x,
334344
)
335345
ys.append(yi)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
fixes:
3+
- |
4+
Fix calculation of weight for curve fitting. Previously the weights of data points to obtain
5+
the residual of fit curve were computed by the inverse of the error bars of y data.
6+
This may yield significant weights on certain data points when their error bar is small or zero,
7+
and this can cause the local overfit to these data points.
8+
To avoid this edge case of small error bars, computed weights are now clipped at 90 percentile.
9+
This update might slightly change the outcome of fit.

test/curve_analysis/test_baseclass.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,49 @@ def test_end_to_end_parallel_analysis(self):
440440
self.assertAlmostEqual(taus[0].value.nominal_value, tau1, delta=0.1)
441441
self.assertAlmostEqual(taus[1].value.nominal_value, tau2, delta=0.1)
442442

443+
def test_end_to_end_zero_yerr(self):
444+
"""Integration test for an edge case of having zero y error.
445+
446+
When the error bar is zero, the fit weights to compute residual tend to become larger.
447+
When the weight is too much significant, the result locally overfits to
448+
certain data points with smaller or zero y error.
449+
"""
450+
analysis = CurveAnalysis(models=[ExpressionModel(expr="amp * x**2", name="test")])
451+
analysis.set_options(
452+
data_processor=DataProcessor(input_key="counts", data_actions=[Probability("1")]),
453+
result_parameters=["amp"],
454+
average_method="sample", # Use sample average to make some yerr = 0
455+
plot=False,
456+
p0={"amp": 0.2},
457+
)
458+
459+
amp = 0.3
460+
x = np.linspace(0, 1, 100)
461+
y = amp * x**2
462+
463+
# Replace small y values with zero.
464+
# Since mock function samples count dictionary from binomial distribution,
465+
# y=0 (or 1) yield always the same count dictionary
466+
# and hence y error becomes zero with sample averaging.
467+
# In this case, amp = 0 may yield the best result.
468+
y[0] = 0
469+
y[1] = 0
470+
y[2] = 0
471+
472+
test_data1 = self.single_sampler(x, y, seed=123)
473+
test_data2 = self.single_sampler(x, y, seed=124)
474+
test_data3 = self.single_sampler(x, y, seed=125)
475+
476+
expdata = ExperimentData(experiment=FakeExperiment())
477+
expdata.add_data(test_data1.data())
478+
expdata.add_data(test_data2.data())
479+
expdata.add_data(test_data3.data())
480+
481+
result = analysis.run(expdata)
482+
self.assertExperimentDone(result)
483+
484+
self.assertAlmostEqual(result.analysis_results("amp").value.nominal_value, amp, delta=0.1)
485+
443486
def test_get_init_params(self):
444487
"""Integration test for getting initial parameter from overview entry."""
445488

test/library/calibration/test_ramsey_xy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def test_end_to_end(self, freq_shift: float):
5656
5757
This test also checks that we can pickup frequency shifts with different signs.
5858
"""
59-
test_tol = 0.01
59+
test_tol = 0.03
6060
abs_tol = max(1e3, abs(freq_shift) * test_tol)
6161

6262
exp_helper = RamseyXYHelper()

0 commit comments

Comments
 (0)