blob: c82a3b6fc0c8676fa783eb56b74554c026dc5238 [file] [log] [blame]
# Copyright 2024 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Testers type aliases used throughout the dp-auditorium library."""
import abc
from typing import Any, final
import numpy as np
import tensorflow as tf
from typing_extensions import override
from dp_auditorium import interfaces
from dp_auditorium.testers import property_tester_utils
class DivergencePropertyTester(interfaces.PropertyTester, abc.ABC):
"""PropertyTester that estimate divergences optimizing a parametrized model."""
def __init__(self, config: Any, base_model: tf.keras.Model):
"""Initializes the instance.
Args:
config: Configuration for initializing property tester.
base_model: A Keras model that discriminates between samples generated by
a mechanism run on two different datasets.
"""
@property
@abc.abstractmethod
def _test_threshold(self) -> float:
"""Threshold above which a divergence estimator will fail the privacy test."""
@abc.abstractmethod
def _get_optimized_divergence_estimation_model(
self,
samples_first_distribution: np.ndarray,
samples_second_distribution: np.ndarray,
) -> tf.keras.Model:
"""Fits model weights that maximize a given divergence on provided samples.
This method optimizes parameters of a base model using samples from two
distributions, maximizing a lower bound on the divergence between those
distributions.
Args:
samples_first_distribution: Array with training samples from first
distribution.
samples_second_distribution: Arrays with training samples from second
distribution.
Returns:
tf.keras.Model fitted on input samples to maximize a given divergence
estimator.
"""
@abc.abstractmethod
def _compute_divergence_on_samples(
self,
model: tf.keras.Model,
samples_first_distribution: np.ndarray,
samples_second_distribution: np.ndarray,
failure_probability: float,
) -> float:
"""Estimate lower bound divergence on given samples.
Args:
model: Model used to estimate the divergence on test samples.
samples_first_distribution: Array with samples from first distribution.
samples_second_distribution: Arrays with samples from second distribution.
failure_probability: Probability of test failure.
Returns:
Estimated divergence.
"""
@override
@final
def estimate_lower_bound(
self,
samples_first_distribution: np.ndarray,
samples_second_distribution: np.ndarray,
failure_probability: float,
) -> float:
samples1_train, samples1_test = (
property_tester_utils.split_train_test_samples(
samples_first_distribution
)
)
samples2_train, samples2_test = (
property_tester_utils.split_train_test_samples(
samples_second_distribution
)
)
model = self._get_optimized_divergence_estimation_model(
samples1_train,
samples2_train,
)
divergence_test = self._compute_divergence_on_samples(
model,
samples1_test,
samples2_test,
failure_probability,
)
return divergence_test
@override
@final
def reject_property(self, lower_bound: float) -> bool:
return lower_bound > self._test_threshold