| { |
| "cells": [ |
| { |
| "cell_type": "markdown", |
| "metadata": { |
| "id": "I0Z7vNS_ybbU" |
| }, |
| "source": [ |
| "This colab notebook uses DP-auditorium to test differentially private mechanisms computing aggregate statistics using PipelineDP." |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": { |
| "id": "wUtLsXpF9q4D" |
| }, |
| "source": [ |
| "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", |
| " \u003ctd\u003e\n", |
| " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/google/differential-privacy/blob/main/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", |
| " \u003c/td\u003e\n", |
| " \u003ctd\u003e\n", |
| " \u003ca target=\"_blank\" href=\"https://github.com/google/differential-privacy/blob/main/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n", |
| " \u003c/td\u003e\n", |
| "\u003c/table\u003e\n", |
| "\n", |
| "\u003cbr\u003e\n", |
| "\u003cbr\u003e" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "id": "WPLSKwjEHfXI" |
| }, |
| "outputs": [], |
| "source": [ |
| "#@title Install and import dp_auditorium and all necessary libraries.\n", |
| "!pip install google-vizier equinox pipeline_dp\n", |
| "!git clone https://github.com/google/differential-privacy.git\n", |
| "import sys\n", |
| "sys.path.append('differential-privacy/python/dp_auditorium')\n", |
| "\n", |
| "from dp_auditorium import privacy_test_runner\n", |
| "from dp_auditorium.generators import pipeline_dp_vizier_dataset_generator\n", |
| "from dp_auditorium.configs import dataset_generator_config\n", |
| "from dp_auditorium.configs import privacy_property\n", |
| "from dp_auditorium.configs import privacy_test_runner_config\n", |
| "from dp_auditorium.configs import property_tester_config\n", |
| "from dp_auditorium.mechanisms.pipeline_dp import aggregation as pipeline_dp_mechanism\n", |
| "from dp_auditorium.testers import hockey_stick_tester\n", |
| "\n", |
| "import pipeline_dp\n", |
| "import tensorflow as tf\n", |
| "tf.compat.v1.enable_eager_execution()" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "id": "dr5A5W7Aq2SO" |
| }, |
| "outputs": [], |
| "source": [ |
| "#@title Example of testing PipelineDP mean mechanism\n", |
| "import time\n", |
| "\n", |
| "def pipeline_dp_mean_mechanism_report(\n", |
| " epsilon: float,\n", |
| " delta: float,\n", |
| " seed: int,\n", |
| " max_number_partitions: int = 10,\n", |
| ") -\u003e privacy_test_runner_config.PrivacyTestRunnerResults:\n", |
| " \"\"\"Runs the example code for a mean mechanism.\n", |
| "\n", |
| " Args:\n", |
| " epsilon: standard approximate DP parameter.\n", |
| " delta: standard approximate DP parameter.\n", |
| " seed: seed to initialize the random number generator.\n", |
| " max_number_partitions: maximum number of partitions which can be used by\n", |
| " dataset generator.\n", |
| "\n", |
| " Returns:\n", |
| " The result of the example code as PrivacyTestRunnerResults.\n", |
| " \"\"\"\n", |
| " tf.random.set_seed(seed)\n", |
| "\n", |
| " # Specify a config for computing with PipeineDP Mean aggregation, namely\n", |
| " # computing mean aggregation per partition, i.e. in SQL terms DP version of\n", |
| " # SELECT partition_key, mean(value)\n", |
| " # GROUP BY partition_key\n", |
| " # is computed.\n", |
| " # See https://pipelinedp.io/key-definitions/ on more details of PipelineDP terminology.\n", |
| " mech_config = pipeline_dp.AggregateParams(\n", |
| " metrics=[pipeline_dp.Metrics.MEAN],\n", |
| " # Laplace noise is used for ensuring DP\n", |
| " noise_kind=pipeline_dp.NoiseKind.LAPLACE,\n", |
| " # Set contribution bounds:\n", |
| "\n", |
| " # 1. If some privacy unit contributes more than to 1 partition then\n", |
| " # PipelineDP will choose randomly 1 partition, contributions to others\n", |
| " # will be dropped.\n", |
| " max_partitions_contributed=1,\n", |
| " # 2. If some privacy unit contributes to more than 1 time to some\n", |
| " # partition then PipelineDP will choose randomly 1 contribution, others\n", |
| " # contribution will be dropped\n", |
| " max_contributions_per_partition=1,\n", |
| "\n", |
| " # 3. Each contributions will be clipped to [-1, 1].\n", |
| " min_value=-1.0,\n", |
| " max_value=1.0)\n", |
| "\n", |
| " # Initialize the mechanism.\n", |
| " public_partitions = list(range(max_number_partitions))\n", |
| " mechanism = pipeline_dp_mechanism.AggregationMechanism(mech_config,\n", |
| " privacy_property.ApproximateDp(\n", |
| " epsilon=epsilon,\n", |
| " delta=delta,\n", |
| " ), public_partitions)\n", |
| "\n", |
| " # Configuration for a Hockey-Stick property tester. Given arrays s1 and s2\n", |
| " # with samples from two distributions it will estimate the hockey-stick\n", |
| " # divergence between the underlying distributions. It checks if the estimated\n", |
| " # divergence is bounded by delta.\n", |
| " tester_config = property_tester_config.HockeyStickPropertyTesterConfig(\n", |
| " training_config=hockey_stick_tester.make_default_hs_training_config(),\n", |
| " approximate_dp=privacy_property.ApproximateDp(\n", |
| " epsilon=epsilon,\n", |
| " delta=delta,\n", |
| " ),\n", |
| " )\n", |
| "\n", |
| " # Initialize a classifier model for the Hockey-Stick property tester.\n", |
| " # This classifier will learn to distinguish between samples of the mechanism\n", |
| " # on adjacent datasets. Its accuracy level should be controlled by the privacy\n", |
| " # guarantee.\n", |
| " base_model = hockey_stick_tester.make_default_hs_base_model()\n", |
| " # Initialize a property tester.\n", |
| " property_tester = hockey_stick_tester.HockeyStickPropertyTester(\n", |
| " config=tester_config,\n", |
| " base_model=base_model,\n", |
| " )\n", |
| "\n", |
| " # Configuration for dataset generator. It generates neighboring datasets under\n", |
| " # the add/remove definition. Unique study name prevents using cached results\n", |
| " # from previous runs.\n", |
| " generator_config = dataset_generator_config.VizierDatasetGeneratorConfig(\n", |
| " study_name=str(time.time()),\n", |
| " study_owner=\"owner\",\n", |
| " num_vizier_parameters=2,\n", |
| " data_type=dataset_generator_config.DataType.DATA_TYPE_FLOAT,\n", |
| " min_value=-1.0,\n", |
| " max_value=1.0,\n", |
| " search_algorithm=\"RANDOM_SEARCH\",\n", |
| " metric_name=\"hockey_stick_divergence\",\n", |
| " )\n", |
| "\n", |
| " # Dataset generator will generate datasets of not more than\n", |
| " # max_number_partitions partitions and not more than 10 privacy units.\n", |
| " # The same partitions are used as public_partitions and as partitions in\n", |
| " # dataset. So the mechanism will not drop the partitions. We do not check\n", |
| " # partition selection. We focus only on checking noise.\n", |
| " pipeline_dp_generator_config = pipeline_dp_vizier_dataset_generator.PipelineDpDatasetGeneratorConfig(\n", |
| " max_num_privacy_ids=10, max_num_partitions=max_number_partitions)\n", |
| "\n", |
| " # Initialize the dataset generator.\n", |
| " dataset_generator = pipeline_dp_vizier_dataset_generator.PipelineDpDatasetGenerator(\n", |
| " generator_config, pipeline_dp_generator_config)\n", |
| "\n", |
| " # Configuration for the test runner.\n", |
| " # The test runner coordinates how the test is evaluated. It receives a\n", |
| " # dataset generator, a property tester and a configuration (see base class for\n", |
| " # details on these parameters), and runs privacy tests using the property\n", |
| " # tester on datasets generated by the dataset generator.\n", |
| " test_runner_config = privacy_test_runner_config.PrivacyTestRunnerConfig(\n", |
| " property_tester=privacy_test_runner_config.PropertyTester.HOCKEY_STICK_TESTER,\n", |
| " max_num_trials=10,\n", |
| " failure_probability=0.05,\n", |
| " num_samples=10_000,\n", |
| " # Apply a hyperbolic tangent function to the output of the mechanism\n", |
| " post_processing=privacy_test_runner_config.PostProcessing.TANH,\n", |
| " )\n", |
| " # Initialize the test runner.\n", |
| " test_runner = privacy_test_runner.PrivacyTestRunner(\n", |
| " config=test_runner_config,\n", |
| " dataset_generator=dataset_generator,\n", |
| " property_tester=property_tester,\n", |
| " )\n", |
| "\n", |
| " return test_runner.test_privacy(mechanism, \"pipeline_dp-mean-mechanism\")\n", |
| "\n", |
| "\n", |
| "EPSILON = 1.0\n", |
| "DELTA = 1e-5\n", |
| "SEED = 1\n", |
| "\n", |
| "# The results indicate whether a privacy violation was identified within the\n", |
| "# designated number of trials defined in the configuration. In the absence of a\n", |
| "# violation, a message is returned indicating that the limit of the number of\n", |
| "# trials has been reached. For reference, all computed divergences across all\n", |
| "# trials are also reported.\n", |
| "results = pipeline_dp_mean_mechanism_report(EPSILON, DELTA, SEED)\n", |
| "print(f\" \\nResults: \\n{results}\")\n", |
| "if results.found_privacy_violation is not None:\n", |
| " print(\"Privacy violations found!\")\n" |
| ] |
| } |
| ], |
| "metadata": { |
| "colab": { |
| "private_outputs": true, |
| "provenance": [ |
| { |
| "file_id": "1QyFD_doucyHewiRMtxGvFxNrFlgbCqQa", |
| "timestamp": 1708693099970 |
| }, |
| { |
| "file_id": "1pBgTlH19OwJ3diUYf3m3QaZcVNQGeB8B", |
| "timestamp": 1708692052606 |
| } |
| ] |
| }, |
| "kernelspec": { |
| "display_name": "Python 3", |
| "name": "python3" |
| }, |
| "language_info": { |
| "name": "python" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 0 |
| } |