python/dp_accounting/dp_event.py - third_party/github.com/google/differential-privacy - Git at Google

 # Copyright 2021, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     https://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Standard DpEvent classes.

 A `DpEvent` represents the (hyper)parameters of a differentially
 private query, amplification mechanism, or composition, that are necessary
 and sufficient for privacy accounting. Various independent implementations of DP
 algorithms that are functionally equivalent from an accounting perspective may
 correspond to the same `DpEvent`. Similarly, various independent implementations
 of accounting algorithms may consume the same `DpEvent`.

 All `DpEvents` processed together are assumed to take place on a single dataset
 of records. `DpEvents` fall into roughly three categories:
  - `DpEvents` that release an output, and incur a privacy cost,
     e.g., `GaussianDpEvent`.
  - `DpEvents` that select a subset (or subsets) of the dataset, and run nested
     `DpEvents` on those subsets, e.g., `PoissonSampledDpEvent`.
  - `DpEvents` that represent (possibly sequentially) applying (multiple)
    mechanisms to the dataset (or currently active subset). Currently, this is
    only `ComposedDpEvent` and `SelfComposedDpEvent`.

 Each `DpEvent` should completely document the mathematical behavior and
 assumptions of the mechanism it represents so that the writer of an accountant
 class can implement the accounting correctly without knowing any other
 implementation details of the algorithm that produced it.

 New mechanism types should be given a corresponding `DpEvent` class, although
 not all accountants will be required to support them. In general,
 `PrivacyAccountant` implementations are not required to be aware of all
 `DpEvent` classes, but they should support the following basic events and handle
 them appropriately: `NoOpDpEvent`, `NonPrivateDpEvent`, `ComposedDpEvent`, and
 `SelfComposedDpEvent`. They should return `supports(event)` is False for
 `UnsupportedDpEvent` or any other event type they have not been designed to
 handle.

 To ensure that a `PrivacyAccountant` does not accidentally start to return
 incorrect results, the following should be enforced:
  * `DpEvent` classes and their parameters should never be removed, barring some
    extended, onerous deprecation process.
  * New parameters cannot be added to existing mechanisms unless they are
    optional. That is, old composed `DpEvent` objects that do not include them
    must remain valid.
  * The meaning of existing mechanisms or parameters must not change. That is,
    existing mechanisms should not have their implementations change in ways that
    alter their privacy properties; new `DpEvent` classes should be added
    instead.
  * `PrivacyAccountant` implementations are expected to return `supports(event)`
    is `False` when processing unknown mechanisms.
 """

 from typing import List, Union

 import attr


 class DpEvent(object):
   """Represents application of a private mechanism.

   A `DpEvent` describes a differentially private mechanism sufficiently for
   computing the associated privacy losses, both in isolation and in combination
   with other `DpEvent`s.
   """


 @attr.s(frozen=True)
 class NoOpDpEvent(DpEvent):
   """Represents appplication of an operation with no privacy impact.

   A `NoOpDpEvent` is generally never required, but it can be useful as a
   placeholder where a `DpEvent` is expected, such as in tests or some live
   accounting pipelines.
   """


 @attr.s(frozen=True)
 class NonPrivateDpEvent(DpEvent):
   """Represents application of a non-private operation.

   This `DpEvent` should be used when an operation is performed that does not
   satisfy (epsilon, delta)-DP. All `PrivacyAccountant`s should return infinite
   epsilon/delta when encountering a `NonPrivateDpEvent`.
   """


 @attr.s(frozen=True)
 class UnsupportedDpEvent(DpEvent):
   """Represents application of an as-yet unsupported operation.

   This `DpEvent` should be used when an operation is performed that does not yet
   have any associated DP description, or if the description is temporarily
   inaccessible, for example, during development. All `PrivacyAccountant`s should
   return `supports(event) == False` for `UnsupportedDpEvent`.
   """


 @attr.s(frozen=True, slots=True, auto_attribs=True)
 class GaussianDpEvent(DpEvent):
   """Represents an application of the Gaussian mechanism.

   For values v_i and noise z ~ N(0, s^2I), this mechanism returns sum_i v_i + z.
   If the norms of the values are bounded ||v_i|| <= C, the noise_multiplier is
   defined as s / C.
   """
   noise_multiplier: float


 @attr.s(frozen=True, slots=True, auto_attribs=True)
 class LaplaceDpEvent(DpEvent):
   """Represents an application of the Laplace mechanism.

   For values v_i and noise z sampled coordinate-wise from the Laplace
   distribution L(0, s), this mechanism returns sum_i v_i + z.
   The probability density function of the Laplace distribution L(0, s) with
   parameter s is given as exp(-|x|/s) * (0.5/s) at x for any real value x.
   If the L_1 norm of the values are bounded ||v_i||_1 <= C, the noise_multiplier
   is defined as s / C.
   """
   noise_multiplier: float


 @attr.s(frozen=True, slots=True, auto_attribs=True)
 class SelfComposedDpEvent(DpEvent):
   """Represents repeated application of a mechanism.

   The repeated applications may be adaptive, where the query producing each
   event depends on the results of prior queries.

   This is equivalent to `ComposedDpEvent` that contains a list of length `count`
   of identical copies of `event`.
   """
   event: DpEvent
   count: int


 @attr.s(frozen=True, slots=True, auto_attribs=True)
 class ComposedDpEvent(DpEvent):
   """Represents application of a series of composed mechanisms.

   The composition may be adaptive, where the query producing each event depends
   on the results of prior queries.
   """
   events: List[DpEvent]


 @attr.s(frozen=True, slots=True, auto_attribs=True)
 class PoissonSampledDpEvent(DpEvent):
   """Represents an application of Poisson subsampling.

   Each record in the dataset is included in the sample independently with
   probability `sampling_probability`. Then the `DpEvent` `event` is applied
   to the sample of records.
   """
   sampling_probability: float
   event: DpEvent


 @attr.s(frozen=True, slots=True, auto_attribs=True)
 class SampledWithReplacementDpEvent(DpEvent):
   """Represents sampling a fixed sized batch of records with replacement.

   A sample of `sample_size` (possibly repeated) records is drawn uniformly at
   random from the set of possible samples of a source dataset of size
   `source_dataset_size`. Then the `DpEvent` `event` is applied to the sample of
   records.
   """
   source_dataset_size: int
   sample_size: int
   event: DpEvent


 @attr.s(frozen=True, slots=True, auto_attribs=True)
 class SampledWithoutReplacementDpEvent(DpEvent):
   """Represents sampling a fixed sized batch of records without replacement.

   A sample of `sample_size` unique records is drawn uniformly at random from the
   set of possible samples of a source dataset of size `source_dataset_size`.
   Then the `DpEvent` `event` is applied to the sample of records.
   """
   source_dataset_size: int
   sample_size: int
   event: DpEvent


 @attr.s(frozen=True, slots=True, auto_attribs=True)
 class SingleEpochTreeAggregationDpEvent(DpEvent):
   """Represents aggregation for a single epoch using one or more trees.

   Multiple tree-aggregation steps can occur, but it is required that each
   record occurs at most once *across all trees*. See appendix D of
   "Practical and Private (Deep) Learning without Sampling or Shuffling"
   https://arxiv.org/abs/2103.00039.

   To represent the common case where the same record can occur in multiple
   trees (but still at most once per tree), wrap this with `SelfComposedDpEvent`
   or `ComposedDpEvent` and use a scalar for `step_counts`.

   Attributes:
     noise_multiplier: The ratio of the noise per node to the sensitivity.
     step_counts: The number of steps in each tree. May be a scalar for a single
       tree.
   """
   noise_multiplier: float
   step_counts: Union[int, List[int]]


 @attr.s(frozen=True, slots=True, auto_attribs=True)
 class RepeatAndSelectDpEvent(DpEvent):
   """Represents repeatedly running the mechanism and selecting the best output.

   The total number of runs is randomized and drawn from a distribution
   with the given parameters: Poisson (shape=infinity), Geometric (shape=1),
   Logarithmic (shape=0), or Truncated Negative binomial (0<shape<infinity).

   See https://arxiv.org/abs/2110.03620 for details.

   Attributes:
     event: The DpEvent that is being repeated.
     mean: The mean number of repetitions.
     shape: The shape of the distribution of the number of repetitions.
   """
   event: DpEvent
   mean: float
   shape: float
	# Copyright 2021, The TensorFlow Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# https://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""Standard DpEvent classes.

	A `DpEvent` represents the (hyper)parameters of a differentially
	private query, amplification mechanism, or composition, that are necessary
	and sufficient for privacy accounting. Various independent implementations of DP
	algorithms that are functionally equivalent from an accounting perspective may
	correspond to the same `DpEvent`. Similarly, various independent implementations
	of accounting algorithms may consume the same `DpEvent`.

	All `DpEvents` processed together are assumed to take place on a single dataset
	of records. `DpEvents` fall into roughly three categories:
	- `DpEvents` that release an output, and incur a privacy cost,
	e.g., `GaussianDpEvent`.
	- `DpEvents` that select a subset (or subsets) of the dataset, and run nested
	`DpEvents` on those subsets, e.g., `PoissonSampledDpEvent`.
	- `DpEvents` that represent (possibly sequentially) applying (multiple)
	mechanisms to the dataset (or currently active subset). Currently, this is
	only `ComposedDpEvent` and `SelfComposedDpEvent`.

	Each `DpEvent` should completely document the mathematical behavior and
	assumptions of the mechanism it represents so that the writer of an accountant
	class can implement the accounting correctly without knowing any other
	implementation details of the algorithm that produced it.

	New mechanism types should be given a corresponding `DpEvent` class, although
	not all accountants will be required to support them. In general,
	`PrivacyAccountant` implementations are not required to be aware of all
	`DpEvent` classes, but they should support the following basic events and handle
	them appropriately: `NoOpDpEvent`, `NonPrivateDpEvent`, `ComposedDpEvent`, and
	`SelfComposedDpEvent`. They should return `supports(event)` is False for
	`UnsupportedDpEvent` or any other event type they have not been designed to
	handle.

	To ensure that a `PrivacyAccountant` does not accidentally start to return
	incorrect results, the following should be enforced:
	* `DpEvent` classes and their parameters should never be removed, barring some
	extended, onerous deprecation process.
	* New parameters cannot be added to existing mechanisms unless they are
	optional. That is, old composed `DpEvent` objects that do not include them
	must remain valid.
	* The meaning of existing mechanisms or parameters must not change. That is,
	existing mechanisms should not have their implementations change in ways that
	alter their privacy properties; new `DpEvent` classes should be added
	instead.
	* `PrivacyAccountant` implementations are expected to return `supports(event)`
	is `False` when processing unknown mechanisms.
	"""

	from typing import List, Union

	import attr


	class DpEvent(object):
	"""Represents application of a private mechanism.

	A `DpEvent` describes a differentially private mechanism sufficiently for
	computing the associated privacy losses, both in isolation and in combination
	with other `DpEvent`s.
	"""


	@attr.s(frozen=True)
	class NoOpDpEvent(DpEvent):
	"""Represents appplication of an operation with no privacy impact.

	A `NoOpDpEvent` is generally never required, but it can be useful as a
	placeholder where a `DpEvent` is expected, such as in tests or some live
	accounting pipelines.
	"""


	@attr.s(frozen=True)
	class NonPrivateDpEvent(DpEvent):
	"""Represents application of a non-private operation.

	This `DpEvent` should be used when an operation is performed that does not
	satisfy (epsilon, delta)-DP. All `PrivacyAccountant`s should return infinite
	epsilon/delta when encountering a `NonPrivateDpEvent`.
	"""


	@attr.s(frozen=True)
	class UnsupportedDpEvent(DpEvent):
	"""Represents application of an as-yet unsupported operation.

	This `DpEvent` should be used when an operation is performed that does not yet
	have any associated DP description, or if the description is temporarily
	inaccessible, for example, during development. All `PrivacyAccountant`s should
	return `supports(event) == False` for `UnsupportedDpEvent`.
	"""


	@attr.s(frozen=True, slots=True, auto_attribs=True)
	class GaussianDpEvent(DpEvent):
	"""Represents an application of the Gaussian mechanism.

	For values v_i and noise z ~ N(0, s^2I), this mechanism returns sum_i v_i + z.
	If the norms of the values are bounded \|\|v_i\|\| <= C, the noise_multiplier is
	defined as s / C.
	"""
	noise_multiplier: float


	@attr.s(frozen=True, slots=True, auto_attribs=True)
	class LaplaceDpEvent(DpEvent):
	"""Represents an application of the Laplace mechanism.

	For values v_i and noise z sampled coordinate-wise from the Laplace
	distribution L(0, s), this mechanism returns sum_i v_i + z.
	The probability density function of the Laplace distribution L(0, s) with
	parameter s is given as exp(-\|x\|/s) * (0.5/s) at x for any real value x.
	If the L_1 norm of the values are bounded \|\|v_i\|\|_1 <= C, the noise_multiplier
	is defined as s / C.
	"""
	noise_multiplier: float


	@attr.s(frozen=True, slots=True, auto_attribs=True)
	class SelfComposedDpEvent(DpEvent):
	"""Represents repeated application of a mechanism.

	The repeated applications may be adaptive, where the query producing each
	event depends on the results of prior queries.

	This is equivalent to `ComposedDpEvent` that contains a list of length `count`
	of identical copies of `event`.
	"""
	event: DpEvent
	count: int


	@attr.s(frozen=True, slots=True, auto_attribs=True)
	class ComposedDpEvent(DpEvent):
	"""Represents application of a series of composed mechanisms.

	The composition may be adaptive, where the query producing each event depends
	on the results of prior queries.
	"""
	events: List[DpEvent]


	@attr.s(frozen=True, slots=True, auto_attribs=True)
	class PoissonSampledDpEvent(DpEvent):
	"""Represents an application of Poisson subsampling.

	Each record in the dataset is included in the sample independently with
	probability `sampling_probability`. Then the `DpEvent` `event` is applied
	to the sample of records.
	"""
	sampling_probability: float
	event: DpEvent


	@attr.s(frozen=True, slots=True, auto_attribs=True)
	class SampledWithReplacementDpEvent(DpEvent):
	"""Represents sampling a fixed sized batch of records with replacement.

	A sample of `sample_size` (possibly repeated) records is drawn uniformly at
	random from the set of possible samples of a source dataset of size
	`source_dataset_size`. Then the `DpEvent` `event` is applied to the sample of
	records.
	"""
	source_dataset_size: int
	sample_size: int
	event: DpEvent


	@attr.s(frozen=True, slots=True, auto_attribs=True)
	class SampledWithoutReplacementDpEvent(DpEvent):
	"""Represents sampling a fixed sized batch of records without replacement.

	A sample of `sample_size` unique records is drawn uniformly at random from the
	set of possible samples of a source dataset of size `source_dataset_size`.
	Then the `DpEvent` `event` is applied to the sample of records.
	"""
	source_dataset_size: int
	sample_size: int
	event: DpEvent


	@attr.s(frozen=True, slots=True, auto_attribs=True)
	class SingleEpochTreeAggregationDpEvent(DpEvent):
	"""Represents aggregation for a single epoch using one or more trees.

	Multiple tree-aggregation steps can occur, but it is required that each
	record occurs at most once across all trees. See appendix D of
	"Practical and Private (Deep) Learning without Sampling or Shuffling"
	https://arxiv.org/abs/2103.00039.

	To represent the common case where the same record can occur in multiple
	trees (but still at most once per tree), wrap this with `SelfComposedDpEvent`
	or `ComposedDpEvent` and use a scalar for `step_counts`.

	Attributes:
	noise_multiplier: The ratio of the noise per node to the sensitivity.
	step_counts: The number of steps in each tree. May be a scalar for a single
	tree.
	"""
	noise_multiplier: float
	step_counts: Union[int, List[int]]


	@attr.s(frozen=True, slots=True, auto_attribs=True)
	class RepeatAndSelectDpEvent(DpEvent):
	"""Represents repeatedly running the mechanism and selecting the best output.

	The total number of runs is randomized and drawn from a distribution
	with the given parameters: Poisson (shape=infinity), Geometric (shape=1),
	Logarithmic (shape=0), or Truncated Negative binomial (0<shape<infinity).

	See https://arxiv.org/abs/2110.03620 for details.

	Attributes:
	event: The DpEvent that is being repeated.
	mean: The mean number of repetitions.
	shape: The shape of the distribution of the number of repetitions.
	"""
	event: DpEvent
	mean: float
	shape: float