| #!/usr/bin/env python3 |
| # Copyright 2016 The Fuchsia Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| # Computes the Maximum-Likelihood Estimator for Basic Rappor |
| # This script is only for manual use by Cobalt developers. It is not part |
| # of the Cobalt runtime. It is not part of any automated system. It is not |
| # tested as part of the standard Cobalt tests but it does include some |
| # tests that may be run manually. |
| |
| # Computes n-choose-k. This code was copied from: |
| # http://stackoverflow.com/questions/3025162/statistics-combinations-in-python |
| |
| from __future__ import print_function |
| |
| |
| def choose(n, k): |
| """ |
| A fast way to calculate binomial coefficients by Andrew Dalke (contrib). |
| """ |
| if 0 <= k <= n: |
| ntok = 1 |
| ktok = 1 |
| for t in xrange(1, min(k, n - k) + 1): |
| ntok *= n |
| ktok *= t |
| n -= 1 |
| return ntok // ktok |
| else: |
| return 0 |
| |
| |
| # Tests the function choose(). The test prints an error message and returns |
| # False if it finds an error in the choose() function. |
| def test_choose(): |
| for n in xrange(0, 100, 11): |
| sum = 0 |
| for k in xrange(0, n + 1): |
| sum = sum + choose(n, k) |
| if sum != 2**n: |
| print("**** test_choose failed for n=%d with sum=" % sum) |
| return False |
| return True |
| |
| |
| # Computes and returns the value of the probability mass function |
| # Prob(Y=y) where Y ~ Binomial(q, lam) + Binomial(p, n - lam) |
| # p and q must be floats in the range [0, 1] |
| # lam, n and y must be integers satisfying |
| # n > 0 |
| # 0 <= lam <= n |
| # 0 <= y <= n |
| def pmf(lam, n, y, p, q): |
| prob = 0 |
| # "lam" stands for lambda which is a reserved word in Python. |
| # |
| # Consider Y to be the sum of lambda Bernoulli(q) variables and n - lambda |
| # Bernoulli(p) variables. We think of the desired probability as being |
| # the sum of terms from min_i to max_i where each term is the probability |
| # that i of the Bernoulli(q) variables and y - i of the Bernoulli(p) variables |
| # equal 1 and the other Bernoulli variables equal 0. |
| min_i = max(0, y + lam - n) |
| max_i = min(y, lam) |
| for i in xrange(min_i, max_i + 1): |
| prob = ( |
| prob + choose(lam, i) * q**i * |
| (1.0 - q)**(lam - i) * choose(n - lam, y - i) * p**(y - i) * |
| (1.0 - p)**(n - lam - y + i)) |
| return prob |
| |
| |
| # Computes and returns the value of lambda that maximizes the value |
| # of pmf(lambda, n, y, p, q) over all lambda from 0 to n. |
| def mle(n, y, p, q): |
| best_estimate = 0 |
| max_prob = 0 |
| for lam in xrange(0, n + 1): |
| prob = pmf(lam, n, y, p, q) |
| if prob > max_prob: |
| max_prob = prob |
| best_estimate = lam |
| return best_estimate |
| |
| |
| # Tests that the function pmf is in fact a probability mass function for |
| # the given parameters. The test prints a failure message and returns False |
| # if the sum of pmf(lam, n, y, p, q) over y in [0, n] is not equal to 1 |
| def do_pmf_test(lam, n, p, q): |
| prob = 0 |
| for y in xrange(0, n + 1): |
| prob = prob + pmf(lam, n, y, p, q) |
| if prob < 0.999999 or prob > 1.000001: |
| print("**** test_pmf failed for lam=%d, n=%d, p=%f, q=%f, prob=%f" % |
| (lam, n, p, q, prob)) |
| return False |
| return True |
| |
| |
| def test_pmf(): |
| for n in {1, 10, 31, 54}: |
| for lam in xrange(0, n + 1): |
| q = 0.9 |
| p = 0.1 |
| if not do_pmf_test(lam, n, p, q): |
| return False |
| q = 0.75 |
| p = 0.25 |
| if not do_pmf_test(lam, n, p, q): |
| return False |
| q = 0.7 |
| p = 0.4 |
| if not do_pmf_test(lam, n, p, q): |
| return False |
| q = 0.7 |
| p = 0.2 |
| if not do_pmf_test(lam, n, p, q): |
| return False |
| q = 0.5 |
| p = 0.0 |
| if not do_pmf_test(lam, n, p, q): |
| return False |
| return True |
| |
| |
| def main(): |
| n = 100 |
| p = 0.2 |
| q = 0.8 |
| print("\n") |
| print("Basic RAPPOR Maximum-Likelihood Estimates") |
| print("p=%f q=%f n=%d\n" % (p, q, n)) |
| for y in xrange(0, n + 1): |
| print("--------------------------------------") |
| print("For y=%d..." % y) |
| print("unbiased estimate=%f" % ((float(y) - n * p) / (q - p))) |
| print("maximum-likelihood estimate=%d" % mle(n, y, p, q)) |
| |
| print("\n") |
| print("--------------------------------------") |
| print("--------------------------------------") |
| print("Running tests of this script") |
| if test_choose(): |
| print("test_choose passed") |
| if test_pmf(): |
| print("test_pmf passed") |
| |
| |
| if __name__ == "__main__": |
| main() |