tools/basic_rappor_mle.py - cobalt - Git at Google

 #!/usr/bin/env python
 # Copyright 2016 The Fuchsia Authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 # Computes the Maximum-Likelihood Estimator for Basic Rappor
 # This script is only for manual use by Cobalt developers. It is not part
 # of the Cobalt runtime. It is not part of any automated system. It is not
 # tested as part of the standard Cobalt tests but it does include some
 # tests that may be run manually.

 # Computes n-choose-k. This code was copied from:
 # http://stackoverflow.com/questions/3025162/statistics-combinations-in-python
 def choose(n, k):
     """
     A fast way to calculate binomial coefficients by Andrew Dalke (contrib).
     """
     if 0 <= k <= n:
         ntok = 1
         ktok = 1
         for t in xrange(1, min(k, n - k) + 1):
             ntok *= n
             ktok *= t
             n -= 1
         return ntok // ktok
     else:
         return 0

 # Tests the function choose(). The test prints an error message and returns
 # False if it finds an error in the choose() function.
 def test_choose():
   for n in xrange(0, 100, 11):
     sum = 0
     for k in xrange(0, n+1):
       sum = sum + choose(n, k)
     if sum != 2**n:
       print "**** test_choose failed for n=%d with sum=" % sum
       return False
   return True

 # Computes and returns the value of the probability mass function
 # Prob(Y=y) where Y ~ Binomial(q, lam) + Binomial(p, n - lam)
 # p and q must be floats in the range [0, 1]
 # lam, n and y must be integers satisfying
 # n > 0
 # 0 <= lam <= n
 # 0 <= y <= n
 def pmf(lam, n, y, p, q):
   prob = 0
   # "lam" stands for lambda which is a reserved word in Python.
   #
   # Consider Y to be the sum of lambda Bernoulli(q) variables and n - lambda
   # Bernoulli(p) variables. We think of the desired probability as being
   # the sum of terms from min_i to max_i where each term is the probability
   # that i of the Bernoulli(q) variables and y - i of the Bernoulli(p) variables
   # equal 1 and the other Bernoulli variables equal 0.
   min_i = max(0, y + lam - n)
   max_i = min(y, lam)
   for i in xrange(min_i, max_i + 1):
     prob = (prob + choose(lam, i) * q**i * (1.0 - q)**(lam - i) *
       choose(n - lam, y - i) * p**(y-i) * (1.0 - p)**(n - lam - y + i))
   return prob

 # Computes and returns the value of lambda that maximizes the value
 # of pmf(lambda, n, y, p, q) over all lambda from 0 to n.
 def mle(n, y, p, q):
   best_estimate = 0
   max_prob = 0
   for lam in xrange(0, n+1):
     prob = pmf(lam, n, y, p, q)
     if prob > max_prob:
       max_prob = prob
       best_estimate = lam
   return best_estimate

 # Tests that the function pmf is in fact a probability mass function for
 # the given parameters. The test prints a failure message and returns False
 # if the sum of pmf(lam, n, y, p, q) over y in [0, n] is not equal to 1
 def do_pmf_test(lam, n, p, q):
   prob = 0;
   for y in xrange(0, n+1):
     prob = prob + pmf(lam, n, y, p, q)
   if prob < 0.999999 or prob > 1.000001:
     print "**** test_pmf failed for lam=%d, n=%d, p=%f, q=%f, prob=%f" % (
         lam, n, p, q, prob)
     return False
   return True

 def test_pmf():
   for n in {1, 10, 31, 54}:
     for lam in xrange(0, n+1):
       q = 0.9
       p = 0.1
       if not do_pmf_test(lam, n, p, q):
         return False
       q = 0.75
       p = 0.25
       if not do_pmf_test(lam, n, p, q):
         return False
       q = 0.7
       p = 0.4
       if not do_pmf_test(lam, n, p, q):
         return False
       q = 0.7
       p = 0.2
       if not do_pmf_test(lam, n, p, q):
         return False
       q = 0.5
       p = 0.0
       if not do_pmf_test(lam, n, p, q):
         return False
   return True

 def main():
   n = 100
   p = 0.2
   q = 0.8
   print "\n"
   print "Basic RAPPOR Maximum-Likelihood Estimates"
   print "p=%f q=%f n=%d\n" % (p, q, n)
   for y in xrange(0, n+1):
     print "--------------------------------------"
     print "For y=%d..." % y
     print "unbiased estimate=%f" % ((float(y) - n*p)/(q-p))
     print "maximum-likelihood estimate=%d" % mle(n, y, p, q)

   print "\n"
   print "--------------------------------------"
   print "--------------------------------------"
   print "Running tests of this script"
   if test_choose():
     print "test_choose passed"
   if test_pmf():
     print "test_pmf passed"


 if __name__ == '__main__':
   main()
	#!/usr/bin/env python
	# Copyright 2016 The Fuchsia Authors
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	# Computes the Maximum-Likelihood Estimator for Basic Rappor
	# This script is only for manual use by Cobalt developers. It is not part
	# of the Cobalt runtime. It is not part of any automated system. It is not
	# tested as part of the standard Cobalt tests but it does include some
	# tests that may be run manually.

	# Computes n-choose-k. This code was copied from:
	# http://stackoverflow.com/questions/3025162/statistics-combinations-in-python
	def choose(n, k):
	"""
	A fast way to calculate binomial coefficients by Andrew Dalke (contrib).
	"""
	if 0 <= k <= n:
	ntok = 1
	ktok = 1
	for t in xrange(1, min(k, n - k) + 1):
	ntok *= n
	ktok *= t
	n -= 1
	return ntok // ktok
	else:
	return 0

	# Tests the function choose(). The test prints an error message and returns
	# False if it finds an error in the choose() function.
	def test_choose():
	for n in xrange(0, 100, 11):
	sum = 0
	for k in xrange(0, n+1):
	sum = sum + choose(n, k)
	if sum != 2**n:
	print "**** test_choose failed for n=%d with sum=" % sum
	return False
	return True

	# Computes and returns the value of the probability mass function
	# Prob(Y=y) where Y ~ Binomial(q, lam) + Binomial(p, n - lam)
	# p and q must be floats in the range [0, 1]
	# lam, n and y must be integers satisfying
	# n > 0
	# 0 <= lam <= n
	# 0 <= y <= n
	def pmf(lam, n, y, p, q):
	prob = 0
	# "lam" stands for lambda which is a reserved word in Python.
	#
	# Consider Y to be the sum of lambda Bernoulli(q) variables and n - lambda
	# Bernoulli(p) variables. We think of the desired probability as being
	# the sum of terms from min_i to max_i where each term is the probability
	# that i of the Bernoulli(q) variables and y - i of the Bernoulli(p) variables
	# equal 1 and the other Bernoulli variables equal 0.
	min_i = max(0, y + lam - n)
	max_i = min(y, lam)
	for i in xrange(min_i, max_i + 1):
	prob = (prob + choose(lam, i) * q*i (1.0 - q)*(lam - i)
	choose(n - lam, y - i) * p*(y-i) (1.0 - p)**(n - lam - y + i))
	return prob

	# Computes and returns the value of lambda that maximizes the value
	# of pmf(lambda, n, y, p, q) over all lambda from 0 to n.
	def mle(n, y, p, q):
	best_estimate = 0
	max_prob = 0
	for lam in xrange(0, n+1):
	prob = pmf(lam, n, y, p, q)
	if prob > max_prob:
	max_prob = prob
	best_estimate = lam
	return best_estimate

	# Tests that the function pmf is in fact a probability mass function for
	# the given parameters. The test prints a failure message and returns False
	# if the sum of pmf(lam, n, y, p, q) over y in [0, n] is not equal to 1
	def do_pmf_test(lam, n, p, q):
	prob = 0;
	for y in xrange(0, n+1):
	prob = prob + pmf(lam, n, y, p, q)
	if prob < 0.999999 or prob > 1.000001:
	print "**** test_pmf failed for lam=%d, n=%d, p=%f, q=%f, prob=%f" % (
	lam, n, p, q, prob)
	return False
	return True

	def test_pmf():
	for n in {1, 10, 31, 54}:
	for lam in xrange(0, n+1):
	q = 0.9
	p = 0.1
	if not do_pmf_test(lam, n, p, q):
	return False
	q = 0.75
	p = 0.25
	if not do_pmf_test(lam, n, p, q):
	return False
	q = 0.7
	p = 0.4
	if not do_pmf_test(lam, n, p, q):
	return False
	q = 0.7
	p = 0.2
	if not do_pmf_test(lam, n, p, q):
	return False
	q = 0.5
	p = 0.0
	if not do_pmf_test(lam, n, p, q):
	return False
	return True

	def main():
	n = 100
	p = 0.2
	q = 0.8
	print "\n"
	print "Basic RAPPOR Maximum-Likelihood Estimates"
	print "p=%f q=%f n=%d\n" % (p, q, n)
	for y in xrange(0, n+1):
	print "--------------------------------------"
	print "For y=%d..." % y
	print "unbiased estimate=%f" % ((float(y) - n*p)/(q-p))
	print "maximum-likelihood estimate=%d" % mle(n, y, p, q)

	print "\n"
	print "--------------------------------------"
	print "--------------------------------------"
	print "Running tests of this script"
	if test_choose():
	print "test_choose passed"
	if test_pmf():
	print "test_pmf passed"


	if __name__ == '__main__':
	main()