blob: 52681f0a2a297ca01c8456b016ddaded83d9d8ae [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2016 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# Computes the Maximum-Likelihood Estimator for Basic Rappor
# This script is only for manual use by Cobalt developers. It is not part
# of the Cobalt runtime. It is not part of any automated system. It is not
# tested as part of the standard Cobalt tests but it does include some
# tests that may be run manually.
# Computes n-choose-k. This code was copied from:
# http://stackoverflow.com/questions/3025162/statistics-combinations-in-python
from __future__ import print_function
def choose(n, k):
"""
A fast way to calculate binomial coefficients by Andrew Dalke (contrib).
"""
if 0 <= k <= n:
ntok = 1
ktok = 1
for t in xrange(1, min(k, n - k) + 1):
ntok *= n
ktok *= t
n -= 1
return ntok // ktok
else:
return 0
# Tests the function choose(). The test prints an error message and returns
# False if it finds an error in the choose() function.
def test_choose():
for n in xrange(0, 100, 11):
sum = 0
for k in xrange(0, n + 1):
sum = sum + choose(n, k)
if sum != 2**n:
print("**** test_choose failed for n=%d with sum=" % sum)
return False
return True
# Computes and returns the value of the probability mass function
# Prob(Y=y) where Y ~ Binomial(q, lam) + Binomial(p, n - lam)
# p and q must be floats in the range [0, 1]
# lam, n and y must be integers satisfying
# n > 0
# 0 <= lam <= n
# 0 <= y <= n
def pmf(lam, n, y, p, q):
prob = 0
# "lam" stands for lambda which is a reserved word in Python.
#
# Consider Y to be the sum of lambda Bernoulli(q) variables and n - lambda
# Bernoulli(p) variables. We think of the desired probability as being
# the sum of terms from min_i to max_i where each term is the probability
# that i of the Bernoulli(q) variables and y - i of the Bernoulli(p) variables
# equal 1 and the other Bernoulli variables equal 0.
min_i = max(0, y + lam - n)
max_i = min(y, lam)
for i in xrange(min_i, max_i + 1):
prob = (
prob + choose(lam, i) * q**i *
(1.0 - q)**(lam - i) * choose(n - lam, y - i) * p**(y - i) *
(1.0 - p)**(n - lam - y + i))
return prob
# Computes and returns the value of lambda that maximizes the value
# of pmf(lambda, n, y, p, q) over all lambda from 0 to n.
def mle(n, y, p, q):
best_estimate = 0
max_prob = 0
for lam in xrange(0, n + 1):
prob = pmf(lam, n, y, p, q)
if prob > max_prob:
max_prob = prob
best_estimate = lam
return best_estimate
# Tests that the function pmf is in fact a probability mass function for
# the given parameters. The test prints a failure message and returns False
# if the sum of pmf(lam, n, y, p, q) over y in [0, n] is not equal to 1
def do_pmf_test(lam, n, p, q):
prob = 0
for y in xrange(0, n + 1):
prob = prob + pmf(lam, n, y, p, q)
if prob < 0.999999 or prob > 1.000001:
print("**** test_pmf failed for lam=%d, n=%d, p=%f, q=%f, prob=%f" %
(lam, n, p, q, prob))
return False
return True
def test_pmf():
for n in {1, 10, 31, 54}:
for lam in xrange(0, n + 1):
q = 0.9
p = 0.1
if not do_pmf_test(lam, n, p, q):
return False
q = 0.75
p = 0.25
if not do_pmf_test(lam, n, p, q):
return False
q = 0.7
p = 0.4
if not do_pmf_test(lam, n, p, q):
return False
q = 0.7
p = 0.2
if not do_pmf_test(lam, n, p, q):
return False
q = 0.5
p = 0.0
if not do_pmf_test(lam, n, p, q):
return False
return True
def main():
n = 100
p = 0.2
q = 0.8
print("\n")
print("Basic RAPPOR Maximum-Likelihood Estimates")
print("p=%f q=%f n=%d\n" % (p, q, n))
for y in xrange(0, n + 1):
print("--------------------------------------")
print("For y=%d..." % y)
print("unbiased estimate=%f" % ((float(y) - n * p) / (q - p)))
print("maximum-likelihood estimate=%d" % mle(n, y, p, q))
print("\n")
print("--------------------------------------")
print("--------------------------------------")
print("Running tests of this script")
if test_choose():
print("test_choose passed")
if test_pmf():
print("test_pmf passed")
if __name__ == "__main__":
main()