sdv-dev
diff --git a/‎HISTORY.md‎
Lines changed: 13 additions & 0 deletions b/‎HISTORY.md‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎conda/meta.yaml‎
Lines changed: 1 addition & 1 deletion b/‎conda/meta.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdmetrics/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎sdmetrics/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdmetrics/single_table/__init__.py‎
Lines changed: 6 additions & 0 deletions b/‎sdmetrics/single_table/__init__.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎sdmetrics/single_table/privacy/__init__.py‎
Lines changed: 6 additions & 0 deletions b/‎sdmetrics/single_table/privacy/__init__.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎sdmetrics/single_table/privacy/base.py‎
Lines changed: 1 addition & 1 deletion b/‎sdmetrics/single_table/privacy/base.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdmetrics/single_table/privacy/cap.py‎
Lines changed: 224 additions & 0 deletions b/‎sdmetrics/single_table/privacy/cap.py‎
Lines changed: 224 additions & 0 deletions
@@ -1,5 +1,18 @@
 # History
 
+## v0.18.0 - 2024-12-13
+
+### Bugs Fixed
+
+* Missing whitespace in `DisclosureProtection` warning - Issue [#694](https://github.com/sdv-dev/SDMetrics/issues/694) by @frances-h
+* `DisclosureProtection` should be NaN if baseline score is zero - Issue [#693](https://github.com/sdv-dev/SDMetrics/issues/693) by @frances-h
+* `CategoricalCAP` metric returns 0 if no overlap in known fields - Issue [#692](https://github.com/sdv-dev/SDMetrics/issues/692) by @frances-h
+
+### New Features
+
+* Add `DisclosureProtectionEstimate` metric - Issue [#676](https://github.com/sdv-dev/SDMetrics/issues/676) by @frances-h
+* Add `DisclosureProtection` metric - Issue [#675](https://github.com/sdv-dev/SDMetrics/issues/675) by @frances-h
+
 ## v0.17.1 - 2024-12-04
 
 ### Maintenance
 
@@ -11,6 +11,7 @@
 [![Coverage Status](https://codecov.io/gh/sdv-dev/SDMetrics/branch/main/graph/badge.svg)](https://codecov.io/gh/sdv-dev/SDMetrics)
 [![Slack](https://img.shields.io/badge/Community-Slack-blue?style=plastic&logo=slack)](https://bit.ly/sdv-slack-invite)
 [![Tutorial](https://img.shields.io/badge/Demo-Get%20started-orange?style=plastic&logo=googlecolab)](https://bit.ly/sdmetrics-demo)
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.14279167.svg)](https://doi.org/10.5281/zenodo.14279167)
 
 <div align="left">
 <br/>
 
@@ -1,4 +1,4 @@
-{% set version = '0.17.1' %}
+{% set version = '0.18.0.dev1' %}
 
 package:
   name: "{{ name|lower }}"
 
@@ -134,7 +134,7 @@ convention = 'google'
 add-ignore = ['D107', 'D407', 'D417']
 
 [tool.bumpversion]
-current_version = "0.17.1"
+current_version = "0.18.0.dev1"
 parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
 serialize = [
     '{major}.{minor}.{patch}.{release}{candidate}',
 
@@ -4,7 +4,7 @@
 
 __author__ = 'MIT Data To AI Lab'
 __email__ = '[email protected]'
-__version__ = '0.17.1'
+__version__ = '0.18.0.dev1'
 
 import sys
 import warnings as python_warnings
 
@@ -67,6 +67,10 @@
     CategoricalRF,
     CategoricalSVM,
 )
+from sdmetrics.single_table.privacy.disclosure_protection import (
+    DisclosureProtection,
+    DisclosureProtectionEstimate,
+)
 from sdmetrics.single_table.privacy.ensemble import CategoricalEnsemble
 from sdmetrics.single_table.privacy.numerical_sklearn import NumericalLR, NumericalMLP, NumericalSVR
 from sdmetrics.single_table.privacy.radius_nearest_neighbor import NumericalRadiusNearestNeighbor
@@ -109,6 +113,8 @@
     'CategoricalCAP',
     'CategoricalZeroCAP',
     'CategoricalGeneralizedCAP',
+    'DisclosureProtection',
+    'DisclosureProtectionEstimate',
     'NumericalMLP',
     'NumericalLR',
     'NumericalSVR',
 
@@ -12,6 +12,10 @@
     CategoricalRF,
     CategoricalSVM,
 )
+from sdmetrics.single_table.privacy.disclosure_protection import (
+    DisclosureProtection,
+    DisclosureProtectionEstimate,
+)
 from sdmetrics.single_table.privacy.ensemble import CategoricalEnsemble
 from sdmetrics.single_table.privacy.numerical_sklearn import NumericalLR, NumericalMLP, NumericalSVR
 from sdmetrics.single_table.privacy.radius_nearest_neighbor import NumericalRadiusNearestNeighbor
@@ -26,6 +30,8 @@
     'CategoricalRF',
     'CategoricalSVM',
     'CategoricalZeroCAP',
+    'DisclosureProtection',
+    'DisclosureProtectionEstimate',
     'NumericalLR',
     'NumericalMLP',
     'NumericalPrivacyMetric',
 
@@ -166,7 +166,7 @@ def compute(
                     score += row_score
 
             if count == 0:
-                return 0
+                return np.nan
 
             return 1.0 - score / count
 
 
@@ -1,8 +1,16 @@
 """CAP modules and their attackers."""
 
+import warnings
+
 from sdmetrics.single_table.privacy.base import CategoricalPrivacyMetric, PrivacyAttackerModel
 from sdmetrics.single_table.privacy.util import closest_neighbors, count_frequency, majority
 
+DEPRECATION_MSG = (
+    'Computing CAP metrics directly is deprecated. For improved privacy metrics, '
+    "please use the 'DisclosureProtection' and 'DisclosureProtectionEstimate' "
+    'metrics instead.'
+)
+
 
 class CAPAttacker(PrivacyAttackerModel):
     """The CAP (Correct Attribution Probability) privacy attacker.
@@ -78,6 +86,78 @@ class CategoricalCAP(CategoricalPrivacyMetric):
     MODEL = CAPAttacker
     ACCURACY_BASE = False
 
+    @classmethod
+    def _compute(
+        cls,
+        real_data,
+        synthetic_data,
+        metadata=None,
+        key_fields=None,
+        sensitive_fields=None,
+        model_kwargs=None,
+    ):
+        return super().compute(
+            real_data=real_data,
+            synthetic_data=synthetic_data,
+            metadata=metadata,
+            key_fields=key_fields,
+            sensitive_fields=sensitive_fields,
+            model_kwargs=model_kwargs,
+        )
+
+    @classmethod
+    def compute(
+        cls,
+        real_data,
+        synthetic_data,
+        metadata=None,
+        key_fields=None,
+        sensitive_fields=None,
+        model_kwargs=None,
+    ):
+        """Compute this metric.
+
+        This fits an adversial attacker model on the synthetic data and
+        then evaluates it making predictions on the real data.
+
+        A ``key_fields`` column(s) name must be given, either directly or as a first level
+        entry in the ``metadata`` dict, which will be used as the key column(s) for the
+        attack.
+
+        A ``sensitive_fields`` column(s) name must be given, either directly or as a first level
+        entry in the ``metadata`` dict, which will be used as the sensitive_fields column(s)
+        for the attack.
+
+        Args:
+            real_data (Union[numpy.ndarray, pandas.DataFrame]):
+                The values from the real dataset.
+            synthetic_data (Union[numpy.ndarray, pandas.DataFrame]):
+                The values from the synthetic dataset.
+            metadata (dict):
+                Table metadata dict. If not passed, it is build based on the
+                real_data fields and dtypes.
+            key_fields (list(str)):
+                Name of the column(s) to use as the key attributes.
+            sensitive_fields (list(str)):
+                Name of the column(s) to use as the sensitive attributes.
+            model_kwargs (dict):
+                Key word arguments of the attacker model. cls.MODEL_KWARGS will be used
+                if none is provided.
+
+        Returns:
+            union[float, tuple[float]]:
+                Scores obtained by the attackers when evaluated on the real data.
+        """
+        warnings.warn(DEPRECATION_MSG, DeprecationWarning)
+        return cls._compute(
+            real_data=real_data,
+            synthetic_data=synthetic_data,
+            metadata=metadata,
+            key_fields=key_fields,
+            sensitive_fields=sensitive_fields,
+            model_kwargs=model_kwargs,
+        )
+
 
 class ZeroCAPAttacker(CAPAttacker):
     """The 0CAP privacy attacker, which operates in the same way as CAP does.
@@ -113,6 +193,78 @@ class CategoricalZeroCAP(CategoricalPrivacyMetric):
     MODEL = ZeroCAPAttacker
     ACCURACY_BASE = False
 
+    @classmethod
+    def _compute(
+        cls,
+        real_data,
+        synthetic_data,
+        metadata=None,
+        key_fields=None,
+        sensitive_fields=None,
+        model_kwargs=None,
+    ):
+        return super().compute(
+            real_data=real_data,
+            synthetic_data=synthetic_data,
+            metadata=metadata,
+            key_fields=key_fields,
+            sensitive_fields=sensitive_fields,
+            model_kwargs=model_kwargs,
+        )
+
+    @classmethod
+    def compute(
+        cls,
+        real_data,
+        synthetic_data,
+        metadata=None,
+        key_fields=None,
+        sensitive_fields=None,
+        model_kwargs=None,
+    ):
+        """Compute this metric.
+
+        This fits an adversial attacker model on the synthetic data and
+        then evaluates it making predictions on the real data.
+
+        A ``key_fields`` column(s) name must be given, either directly or as a first level
+        entry in the ``metadata`` dict, which will be used as the key column(s) for the
+        attack.
+
+        A ``sensitive_fields`` column(s) name must be given, either directly or as a first level
+        entry in the ``metadata`` dict, which will be used as the sensitive_fields column(s)
+        for the attack.
+
+        Args:
+            real_data (Union[numpy.ndarray, pandas.DataFrame]):
+                The values from the real dataset.
+            synthetic_data (Union[numpy.ndarray, pandas.DataFrame]):
+                The values from the synthetic dataset.
+            metadata (dict):
+                Table metadata dict. If not passed, it is build based on the
+                real_data fields and dtypes.
+            key_fields (list(str)):
+                Name of the column(s) to use as the key attributes.
+            sensitive_fields (list(str)):
+                Name of the column(s) to use as the sensitive attributes.
+            model_kwargs (dict):
+                Key word arguments of the attacker model. cls.MODEL_KWARGS will be used
+                if none is provided.
+
+        Returns:
+            union[float, tuple[float]]:
+                Scores obtained by the attackers when evaluated on the real data.
+        """
+        warnings.warn(DEPRECATION_MSG, DeprecationWarning)
+        return cls._compute(
+            real_data=real_data,
+            synthetic_data=synthetic_data,
+            metadata=metadata,
+            key_fields=key_fields,
+            sensitive_fields=sensitive_fields,
+            model_kwargs=model_kwargs,
+        )
+
 
 class GeneralizedCAPAttacker(CAPAttacker):
     """The GeneralizedCAP privacy attacker.
@@ -169,3 +321,75 @@ class CategoricalGeneralizedCAP(CategoricalPrivacyMetric):
     name = 'Categorical GeneralizedCAP'
     MODEL = GeneralizedCAPAttacker
     ACCURACY_BASE = False
+
+    @classmethod
+    def _compute(
+        cls,
+        real_data,
+        synthetic_data,
+        metadata=None,
+        key_fields=None,
+        sensitive_fields=None,
+        model_kwargs=None,
+    ):
+        return super().compute(
+            real_data=real_data,
+            synthetic_data=synthetic_data,
+            metadata=metadata,
+            key_fields=key_fields,
+            sensitive_fields=sensitive_fields,
+            model_kwargs=model_kwargs,
+        )
+
+    @classmethod
+    def compute(
+        cls,
+        real_data,
+        synthetic_data,
+        metadata=None,
+        key_fields=None,
+        sensitive_fields=None,
+        model_kwargs=None,
+    ):
+        """Compute this metric.
+
+        This fits an adversial attacker model on the synthetic data and
+        then evaluates it making predictions on the real data.
+
+        A ``key_fields`` column(s) name must be given, either directly or as a first level
+        entry in the ``metadata`` dict, which will be used as the key column(s) for the
+        attack.
+
+        A ``sensitive_fields`` column(s) name must be given, either directly or as a first level
+        entry in the ``metadata`` dict, which will be used as the sensitive_fields column(s)
+        for the attack.
+
+        Args:
+            real_data (Union[numpy.ndarray, pandas.DataFrame]):
+                The values from the real dataset.
+            synthetic_data (Union[numpy.ndarray, pandas.DataFrame]):
+                The values from the synthetic dataset.
+            metadata (dict):
+                Table metadata dict. If not passed, it is build based on the
+                real_data fields and dtypes.
+            key_fields (list(str)):
+                Name of the column(s) to use as the key attributes.
+            sensitive_fields (list(str)):
+                Name of the column(s) to use as the sensitive attributes.
+            model_kwargs (dict):
+                Key word arguments of the attacker model. cls.MODEL_KWARGS will be used
+                if none is provided.
+
+        Returns:
+            union[float, tuple[float]]:
+                Scores obtained by the attackers when evaluated on the real data.
+        """
+        warnings.warn(DEPRECATION_MSG, DeprecationWarning)
+        return cls._compute(
+            real_data=real_data,
+            synthetic_data=synthetic_data,
+            metadata=metadata,
+            key_fields=key_fields,
+            sensitive_fields=sensitive_fields,
+            model_kwargs=model_kwargs,
+        )
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-{% set version = '0.17.1' %}`
	`1`	`+{% set version = '0.18.0.dev1' %}`
`2`	`2`
`3`	`3`	`package:`
`4`	`4`	`name: "{{ name\|lower }}"`