python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb - third_party/github.com/google/differential-privacy - Git at Google

 {
   "cells": [
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "I0Z7vNS_ybbU"
       },
       "source": [
         "This colab notebook uses DP-auditorium to test differentially private mechanisms computing aggregate statistics using PipelineDP."
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "wUtLsXpF9q4D"
       },
       "source": [
         "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
         "  \u003ctd\u003e\n",
         "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/google/differential-privacy/blob/main/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
         "  \u003c/td\u003e\n",
         "  \u003ctd\u003e\n",
         "    \u003ca target=\"_blank\" href=\"https://github.com/google/differential-privacy/blob/main/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
         "  \u003c/td\u003e\n",
         "\u003c/table\u003e\n",
         "\n",
         "\u003cbr\u003e\n",
         "\u003cbr\u003e"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
         "id": "WPLSKwjEHfXI"
       },
       "outputs": [],
       "source": [
         "#@title Install and import dp_auditorium and all necessary libraries.\n",
         "!pip install google-vizier equinox pipeline_dp\n",
         "!git clone https://github.com/google/differential-privacy.git\n",
         "import sys\n",
         "sys.path.append('differential-privacy/python/dp_auditorium')\n",
         "\n",
         "from dp_auditorium import privacy_test_runner\n",
         "from dp_auditorium.generators import pipeline_dp_vizier_dataset_generator\n",
         "from dp_auditorium.configs import dataset_generator_config\n",
         "from dp_auditorium.configs import privacy_property\n",
         "from dp_auditorium.configs import privacy_test_runner_config\n",
         "from dp_auditorium.configs import property_tester_config\n",
         "from dp_auditorium.mechanisms.pipeline_dp import aggregation as pipeline_dp_mechanism\n",
         "from dp_auditorium.testers import hockey_stick_tester\n",
         "\n",
         "import pipeline_dp\n",
         "import tensorflow as tf\n",
         "tf.compat.v1.enable_eager_execution()"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
         "id": "dr5A5W7Aq2SO"
       },
       "outputs": [],
       "source": [
         "#@title Example of testing PipelineDP mean mechanism\n",
         "import time\n",
         "\n",
         "def pipeline_dp_mean_mechanism_report(\n",
         "    epsilon: float,\n",
         "    delta: float,\n",
         "    seed: int,\n",
         "    max_number_partitions: int = 10,\n",
         ") -\u003e privacy_test_runner_config.PrivacyTestRunnerResults:\n",
         "  \"\"\"Runs the example code for a mean mechanism.\n",
         "\n",
         "  Args:\n",
         "    epsilon: standard approximate DP parameter.\n",
         "    delta: standard approximate DP parameter.\n",
         "    seed: seed to initialize the random number generator.\n",
         "    max_number_partitions: maximum number of partitions which can be used by\n",
         "      dataset generator.\n",
         "\n",
         "  Returns:\n",
         "    The result of the example code as PrivacyTestRunnerResults.\n",
         "  \"\"\"\n",
         "  tf.random.set_seed(seed)\n",
         "\n",
         "  # Specify a config for computing with PipeineDP Mean aggregation, namely\n",
         "  # computing mean aggregation per partition, i.e. in SQL terms DP version of\n",
         "  #   SELECT partition_key, mean(value)\n",
         "  #   GROUP BY partition_key\n",
         "  # is computed.\n",
         "  # See https://pipelinedp.io/key-definitions/ on more details of PipelineDP terminology.\n",
         "  mech_config = pipeline_dp.AggregateParams(\n",
         "      metrics=[pipeline_dp.Metrics.MEAN],\n",
         "      # Laplace noise is used for ensuring DP\n",
         "      noise_kind=pipeline_dp.NoiseKind.LAPLACE,\n",
         "      # Set contribution bounds:\n",
         "\n",
         "      # 1. If some privacy unit contributes more than to 1 partition then\n",
         "      # PipelineDP will choose randomly 1 partition, contributions to others\n",
         "      # will be dropped.\n",
         "      max_partitions_contributed=1,\n",
         "      # 2. If some privacy unit contributes to more than 1 time to some\n",
         "      # partition then PipelineDP will choose randomly 1 contribution, others\n",
         "      # contribution will be dropped\n",
         "      max_contributions_per_partition=1,\n",
         "\n",
         "      # 3. Each contributions will be clipped to [-1, 1].\n",
         "      min_value=-1.0,\n",
         "      max_value=1.0)\n",
         "\n",
         "  # Initialize the mechanism.\n",
         "  public_partitions = list(range(max_number_partitions))\n",
         "  mechanism = pipeline_dp_mechanism.AggregationMechanism(mech_config,\n",
         "                                                         privacy_property.ApproximateDp(\n",
         "                                                             epsilon=epsilon,\n",
         "                                                             delta=delta,\n",
         "                                                         ), public_partitions)\n",
         "\n",
         "  # Configuration for a Hockey-Stick property tester. Given arrays s1 and s2\n",
         "  # with samples from two distributions it will estimate the hockey-stick\n",
         "  # divergence between the underlying distributions. It checks if the estimated\n",
         "  # divergence is bounded by delta.\n",
         "  tester_config = property_tester_config.HockeyStickPropertyTesterConfig(\n",
         "      training_config=hockey_stick_tester.make_default_hs_training_config(),\n",
         "      approximate_dp=privacy_property.ApproximateDp(\n",
         "          epsilon=epsilon,\n",
         "          delta=delta,\n",
         "      ),\n",
         "  )\n",
         "\n",
         "  # Initialize a classifier model for the Hockey-Stick property tester.\n",
         "  # This classifier will learn to distinguish between samples of the mechanism\n",
         "  # on adjacent datasets. Its accuracy level should be controlled by the privacy\n",
         "  # guarantee.\n",
         "  base_model = hockey_stick_tester.make_default_hs_base_model()\n",
         "  # Initialize a property tester.\n",
         "  property_tester = hockey_stick_tester.HockeyStickPropertyTester(\n",
         "      config=tester_config,\n",
         "      base_model=base_model,\n",
         "  )\n",
         "\n",
         "  # Configuration for dataset generator. It generates neighboring datasets under\n",
         "  # the add/remove definition. Unique study name prevents using cached results\n",
         "  # from previous runs.\n",
         "  generator_config = dataset_generator_config.VizierDatasetGeneratorConfig(\n",
         "      study_name=str(time.time()),\n",
         "      study_owner=\"owner\",\n",
         "      num_vizier_parameters=2,\n",
         "      data_type=dataset_generator_config.DataType.DATA_TYPE_FLOAT,\n",
         "      min_value=-1.0,\n",
         "      max_value=1.0,\n",
         "      search_algorithm=\"RANDOM_SEARCH\",\n",
         "      metric_name=\"hockey_stick_divergence\",\n",
         "  )\n",
         "\n",
         "  # Dataset generator will generate datasets of not more than\n",
         "  # max_number_partitions partitions and not more than 10 privacy units.\n",
         "  # The same partitions are used as public_partitions and as partitions in\n",
         "  # dataset. So the mechanism will not drop the partitions. We do not check\n",
         "  # partition selection. We focus only on checking noise.\n",
         "  pipeline_dp_generator_config = pipeline_dp_vizier_dataset_generator.PipelineDpDatasetGeneratorConfig(\n",
         "    max_num_privacy_ids=10, max_num_partitions=max_number_partitions)\n",
         "\n",
         "  # Initialize the dataset generator.\n",
         "  dataset_generator = pipeline_dp_vizier_dataset_generator.PipelineDpDatasetGenerator(\n",
         "    generator_config, pipeline_dp_generator_config)\n",
         "\n",
         "  # Configuration for the test runner.\n",
         "  # The test runner coordinates how the test is evaluated. It receives a\n",
         "  # dataset generator, a property tester and a configuration (see base class for\n",
         "  # details on these parameters), and runs privacy tests using the property\n",
         "  # tester on datasets generated by the dataset generator.\n",
         "  test_runner_config = privacy_test_runner_config.PrivacyTestRunnerConfig(\n",
         "      property_tester=privacy_test_runner_config.PropertyTester.HOCKEY_STICK_TESTER,\n",
         "      max_num_trials=10,\n",
         "      failure_probability=0.05,\n",
         "      num_samples=10_000,\n",
         "      # Apply a hyperbolic tangent function to the output of the mechanism\n",
         "      post_processing=privacy_test_runner_config.PostProcessing.TANH,\n",
         "  )\n",
         "  # Initialize the test runner.\n",
         "  test_runner = privacy_test_runner.PrivacyTestRunner(\n",
         "      config=test_runner_config,\n",
         "      dataset_generator=dataset_generator,\n",
         "      property_tester=property_tester,\n",
         "  )\n",
         "\n",
         "  return test_runner.test_privacy(mechanism, \"pipeline_dp-mean-mechanism\")\n",
         "\n",
         "\n",
         "EPSILON = 1.0\n",
         "DELTA = 1e-5\n",
         "SEED = 1\n",
         "\n",
         "# The results indicate whether a privacy violation was identified within the\n",
         "# designated number of trials defined in the configuration. In the absence of a\n",
         "# violation, a message is returned indicating that the limit of the number of\n",
         "# trials has been reached. For reference, all computed divergences across all\n",
         "# trials are also reported.\n",
         "results = pipeline_dp_mean_mechanism_report(EPSILON, DELTA, SEED)\n",
         "print(f\" \\nResults: \\n{results}\")\n",
         "if results.found_privacy_violation is not None:\n",
         "  print(\"Privacy violations found!\")\n"
       ]
     }
   ],
   "metadata": {
     "colab": {
       "private_outputs": true,
       "provenance": [
         {
           "file_id": "1QyFD_doucyHewiRMtxGvFxNrFlgbCqQa",
           "timestamp": 1708693099970
         },
         {
           "file_id": "1pBgTlH19OwJ3diUYf3m3QaZcVNQGeB8B",
           "timestamp": 1708692052606
         }
       ]
     },
     "kernelspec": {
       "display_name": "Python 3",
       "name": "python3"
     },
     "language_info": {
       "name": "python"
     }
   },
   "nbformat": 4,
   "nbformat_minor": 0
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "I0Z7vNS_ybbU"
	},
	"source": [
	"This colab notebook uses DP-auditorium to test differentially private mechanisms computing aggregate statistics using PipelineDP."
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "wUtLsXpF9q4D"
	},
	"source": [
	"\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
	" \u003ctd\u003e\n",
	" \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/google/differential-privacy/blob/main/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
	" \u003c/td\u003e\n",
	" \u003ctd\u003e\n",
	" \u003ca target=\"_blank\" href=\"https://github.com/google/differential-privacy/blob/main/python/dp_auditorium/dp_auditorium/examples/pipelinedp_mean_mechanism_example.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
	" \u003c/td\u003e\n",
	"\u003c/table\u003e\n",
	"\n",
	"\u003cbr\u003e\n",
	"\u003cbr\u003e"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "WPLSKwjEHfXI"
	},
	"outputs": [],
	"source": [
	"#@title Install and import dp_auditorium and all necessary libraries.\n",
	"!pip install google-vizier equinox pipeline_dp\n",
	"!git clone https://github.com/google/differential-privacy.git\n",
	"import sys\n",
	"sys.path.append('differential-privacy/python/dp_auditorium')\n",
	"\n",
	"from dp_auditorium import privacy_test_runner\n",
	"from dp_auditorium.generators import pipeline_dp_vizier_dataset_generator\n",
	"from dp_auditorium.configs import dataset_generator_config\n",
	"from dp_auditorium.configs import privacy_property\n",
	"from dp_auditorium.configs import privacy_test_runner_config\n",
	"from dp_auditorium.configs import property_tester_config\n",
	"from dp_auditorium.mechanisms.pipeline_dp import aggregation as pipeline_dp_mechanism\n",
	"from dp_auditorium.testers import hockey_stick_tester\n",
	"\n",
	"import pipeline_dp\n",
	"import tensorflow as tf\n",
	"tf.compat.v1.enable_eager_execution()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "dr5A5W7Aq2SO"
	},
	"outputs": [],
	"source": [
	"#@title Example of testing PipelineDP mean mechanism\n",
	"import time\n",
	"\n",
	"def pipeline_dp_mean_mechanism_report(\n",
	" epsilon: float,\n",
	" delta: float,\n",
	" seed: int,\n",
	" max_number_partitions: int = 10,\n",
	") -\u003e privacy_test_runner_config.PrivacyTestRunnerResults:\n",
	" \"\"\"Runs the example code for a mean mechanism.\n",
	"\n",
	" Args:\n",
	" epsilon: standard approximate DP parameter.\n",
	" delta: standard approximate DP parameter.\n",
	" seed: seed to initialize the random number generator.\n",
	" max_number_partitions: maximum number of partitions which can be used by\n",
	" dataset generator.\n",
	"\n",
	" Returns:\n",
	" The result of the example code as PrivacyTestRunnerResults.\n",
	" \"\"\"\n",
	" tf.random.set_seed(seed)\n",
	"\n",
	" # Specify a config for computing with PipeineDP Mean aggregation, namely\n",
	" # computing mean aggregation per partition, i.e. in SQL terms DP version of\n",
	" # SELECT partition_key, mean(value)\n",
	" # GROUP BY partition_key\n",
	" # is computed.\n",
	" # See https://pipelinedp.io/key-definitions/ on more details of PipelineDP terminology.\n",
	" mech_config = pipeline_dp.AggregateParams(\n",
	" metrics=[pipeline_dp.Metrics.MEAN],\n",
	" # Laplace noise is used for ensuring DP\n",
	" noise_kind=pipeline_dp.NoiseKind.LAPLACE,\n",
	" # Set contribution bounds:\n",
	"\n",
	" # 1. If some privacy unit contributes more than to 1 partition then\n",
	" # PipelineDP will choose randomly 1 partition, contributions to others\n",
	" # will be dropped.\n",
	" max_partitions_contributed=1,\n",
	" # 2. If some privacy unit contributes to more than 1 time to some\n",
	" # partition then PipelineDP will choose randomly 1 contribution, others\n",
	" # contribution will be dropped\n",
	" max_contributions_per_partition=1,\n",
	"\n",
	" # 3. Each contributions will be clipped to [-1, 1].\n",
	" min_value=-1.0,\n",
	" max_value=1.0)\n",
	"\n",
	" # Initialize the mechanism.\n",
	" public_partitions = list(range(max_number_partitions))\n",
	" mechanism = pipeline_dp_mechanism.AggregationMechanism(mech_config,\n",
	" privacy_property.ApproximateDp(\n",
	" epsilon=epsilon,\n",
	" delta=delta,\n",
	" ), public_partitions)\n",
	"\n",
	" # Configuration for a Hockey-Stick property tester. Given arrays s1 and s2\n",
	" # with samples from two distributions it will estimate the hockey-stick\n",
	" # divergence between the underlying distributions. It checks if the estimated\n",
	" # divergence is bounded by delta.\n",
	" tester_config = property_tester_config.HockeyStickPropertyTesterConfig(\n",
	" training_config=hockey_stick_tester.make_default_hs_training_config(),\n",
	" approximate_dp=privacy_property.ApproximateDp(\n",
	" epsilon=epsilon,\n",
	" delta=delta,\n",
	" ),\n",
	" )\n",
	"\n",
	" # Initialize a classifier model for the Hockey-Stick property tester.\n",
	" # This classifier will learn to distinguish between samples of the mechanism\n",
	" # on adjacent datasets. Its accuracy level should be controlled by the privacy\n",
	" # guarantee.\n",
	" base_model = hockey_stick_tester.make_default_hs_base_model()\n",
	" # Initialize a property tester.\n",
	" property_tester = hockey_stick_tester.HockeyStickPropertyTester(\n",
	" config=tester_config,\n",
	" base_model=base_model,\n",
	" )\n",
	"\n",
	" # Configuration for dataset generator. It generates neighboring datasets under\n",
	" # the add/remove definition. Unique study name prevents using cached results\n",
	" # from previous runs.\n",
	" generator_config = dataset_generator_config.VizierDatasetGeneratorConfig(\n",
	" study_name=str(time.time()),\n",
	" study_owner=\"owner\",\n",
	" num_vizier_parameters=2,\n",
	" data_type=dataset_generator_config.DataType.DATA_TYPE_FLOAT,\n",
	" min_value=-1.0,\n",
	" max_value=1.0,\n",
	" search_algorithm=\"RANDOM_SEARCH\",\n",
	" metric_name=\"hockey_stick_divergence\",\n",
	" )\n",
	"\n",
	" # Dataset generator will generate datasets of not more than\n",
	" # max_number_partitions partitions and not more than 10 privacy units.\n",
	" # The same partitions are used as public_partitions and as partitions in\n",
	" # dataset. So the mechanism will not drop the partitions. We do not check\n",
	" # partition selection. We focus only on checking noise.\n",
	" pipeline_dp_generator_config = pipeline_dp_vizier_dataset_generator.PipelineDpDatasetGeneratorConfig(\n",
	" max_num_privacy_ids=10, max_num_partitions=max_number_partitions)\n",
	"\n",
	" # Initialize the dataset generator.\n",
	" dataset_generator = pipeline_dp_vizier_dataset_generator.PipelineDpDatasetGenerator(\n",
	" generator_config, pipeline_dp_generator_config)\n",
	"\n",
	" # Configuration for the test runner.\n",
	" # The test runner coordinates how the test is evaluated. It receives a\n",
	" # dataset generator, a property tester and a configuration (see base class for\n",
	" # details on these parameters), and runs privacy tests using the property\n",
	" # tester on datasets generated by the dataset generator.\n",
	" test_runner_config = privacy_test_runner_config.PrivacyTestRunnerConfig(\n",
	" property_tester=privacy_test_runner_config.PropertyTester.HOCKEY_STICK_TESTER,\n",
	" max_num_trials=10,\n",
	" failure_probability=0.05,\n",
	" num_samples=10_000,\n",
	" # Apply a hyperbolic tangent function to the output of the mechanism\n",
	" post_processing=privacy_test_runner_config.PostProcessing.TANH,\n",
	" )\n",
	" # Initialize the test runner.\n",
	" test_runner = privacy_test_runner.PrivacyTestRunner(\n",
	" config=test_runner_config,\n",
	" dataset_generator=dataset_generator,\n",
	" property_tester=property_tester,\n",
	" )\n",
	"\n",
	" return test_runner.test_privacy(mechanism, \"pipeline_dp-mean-mechanism\")\n",
	"\n",
	"\n",
	"EPSILON = 1.0\n",
	"DELTA = 1e-5\n",
	"SEED = 1\n",
	"\n",
	"# The results indicate whether a privacy violation was identified within the\n",
	"# designated number of trials defined in the configuration. In the absence of a\n",
	"# violation, a message is returned indicating that the limit of the number of\n",
	"# trials has been reached. For reference, all computed divergences across all\n",
	"# trials are also reported.\n",
	"results = pipeline_dp_mean_mechanism_report(EPSILON, DELTA, SEED)\n",
	"print(f\" \\nResults: \\n{results}\")\n",
	"if results.found_privacy_violation is not None:\n",
	" print(\"Privacy violations found!\")\n"
	]
	}
	],
	"metadata": {
	"colab": {
	"private_outputs": true,
	"provenance": [
	{
	"file_id": "1QyFD_doucyHewiRMtxGvFxNrFlgbCqQa",
	"timestamp": 1708693099970
	},
	{
	"file_id": "1pBgTlH19OwJ3diUYf3m3QaZcVNQGeB8B",
	"timestamp": 1708692052606
	}
	]
	},
	"kernelspec": {
	"display_name": "Python 3",
	"name": "python3"
	},
	"language_info": {
	"name": "python"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}