blob: b4b8d9b6a62f34decbd5b28f486a28e0d2a4804f [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "algorithms/rappor/rappor_config_helper.h"
#include <string>
#include "./logging.h"
namespace cobalt {
namespace rappor {
// Sentinel value returned by ProbBitFlip() when the ReportDefinition
// does not contain the necessary settings to determine a value
// for the probability of flipping a bit.
const float RapporConfigHelper::kInvalidProbability = -1;
// We do not support RAPPOR's PRR in Cobalt.
const float RapporConfigHelper::kProbRR = 0.0;
//////////////////////////// p, q and f //////////////////////////////////
// This is relevant to Basic RAPPOR (via the SIMPLE_OCCURRENCE_COUNT
// Report type) and String RAPPOR (via the HIGH_FREQUENCY_STRING_COUNTS Report
// type.)
// The mapping from different values of LocalPrivacyNoiseLevel to
// RAPPOR parameters p and q. The user sets LocalPrivacyNoiseLevel on
// a per-report basis. We always use q = 1 - p and so we don't need to
// configure two values, only a single value we call ProbBitFlip. We
// set p = ProbBitFlip, q = 1 - ProbBitFlip.
// NONE
static const float kLocalPrivacyNoneProbBitFlip = 0.0;
// SMALL
static const float kLocalPrivacySmallProbBitFlip = 0.01;
// MEDIUM
static const float kLocalPrivacyMediumProbBitFlip = 0.1;
// LARGE
static const float kLocalPrivacyLargeProbBitFlip = 0.25;
//////////////////////// m = num_cohorts ///////////////////////////////
// This is relevant to String RAPPOR (via the HIGH_FREQUENCY_STRING_COUNTS
// Report type.)
// |expected_population_size| is the user's estimate of the number of Fuchsia
// devices from which data is collected.
// The mapping from expected_population_size to num_cohorts
// This is the number of cohorts to use if expected_population_size is not set.
static const size_t kDefaultNumCohorts = 50;
// For estimated_population_size < 100 we use 5 cohorts.
static const size_t kTinyPopulationSize = 100;
static const size_t kTinyNumCohorts = 5;
// For 100 <= estimated_population_size < 1000 we use 10 cohorts.
static const size_t kSmallPopulationSize = 1000;
static const size_t kSmallNumCohorts = 10;
// For 1,000 <= estimated_population_size < 10,000 we use 50 cohorts.
static const size_t kMediumPopulationSize = 10000;
static const size_t kMediumNumCohorts = 50;
// For estimated_population_size >= 10,000 we use 100 cohorts
static const size_t kLargeNumCohorts = 100;
//////////////////////// k = num_bloom_bits //////////////////////////////
// This is relevant to String RAPPOR (via the HIGH_FREQUENCY_STRING_COUNTS
// Report type.)
// |expected_string_set_size| is the user's estimate of the number of candidate
// strings. This cannot be changed once data collection begins.
// The mapping from expected_string_set_size to num_bloom_bits
// This is the number of bits to use if expected_string_set_size is not set.
static const size_t kDefaultNumBits = 32;
// For estimated_string_set_size < 100 we use 8 bits.
static const size_t kTinyNumCandidates = 100;
static const size_t kTinyNumBits = 8;
// For 100 <= estimated_string_set_size < 1000 we use 16 bits.
static const size_t kSmallNumCandidates = 1000;
static const size_t kSmallNumBits = 16;
// For 1,000 <= estimated_string_set_size < 10,000 we use 64 bits.
static const size_t kMediumNumCandidates = 10000;
static const size_t kMediumNumBits = 64;
// For estimated_string_set_size >= 10,000 we use 128 bits
static const size_t kLargeNumBits = 128;
float RapporConfigHelper::ProbBitFlip(const ReportDefinition& report_definition,
const std::string& metric_debug_name) {
switch (report_definition.local_privacy_noise_level()) {
case ReportDefinition::NONE:
return kLocalPrivacyNoneProbBitFlip;
case ReportDefinition::SMALL:
return kLocalPrivacySmallProbBitFlip;
case ReportDefinition::MEDIUM:
return kLocalPrivacyMediumProbBitFlip;
case ReportDefinition::LARGE:
return kLocalPrivacyLargeProbBitFlip;
default:
LOG(ERROR) << "Invalid Cobalt config: Report "
<< report_definition.report_name() << " from metric "
<< metric_debug_name
<< " does not have local_privacy_noise_level set to a "
"recognized value.";
return kInvalidProbability;
}
}
// Calculates the number of categories based on the metric_definition.
//
// - If there are no metric_dimensions, returns max_event_code() + 1 (this
// assumes an old registry)
//
// - If there is exactly 1 metric_dimensions, return
// metric_dimensions[0].max_event_code() + 1 (this is the new registry).
//
// - Otherwise, return 0 and report an error (this is not a supported registry).
size_t RapporConfigHelper::BasicRapporNumCategories(
const MetricDefinition& metric_definition) {
if (metric_definition.metric_dimensions_size() == 0) {
return metric_definition.max_event_code() + 1;
} else if (metric_definition.metric_dimensions_size() == 1) {
return metric_definition.metric_dimensions(0).max_event_code() + 1;
} else {
LOG(ERROR) << "Invalid Cobalt registry: Metric "
<< metric_definition.metric_name()
<< " has too many metric_dimensions.";
return 0;
}
}
size_t RapporConfigHelper::StringRapporNumCohorts(
const ReportDefinition& report_definition) {
if (report_definition.expected_population_size() == 0) {
return kDefaultNumCohorts;
} else if (report_definition.expected_population_size() <
kTinyPopulationSize) {
return kTinyNumCohorts;
} else if (report_definition.expected_population_size() <
kSmallPopulationSize) {
return kSmallNumCohorts;
} else if (report_definition.expected_population_size() <
kMediumPopulationSize) {
return kMediumNumCohorts;
} else {
return kLargeNumCohorts;
}
}
size_t RapporConfigHelper::StringRapporNumBloomBits(
const ReportDefinition& report_definition) {
if (report_definition.expected_string_set_size() == 0) {
return kDefaultNumBits;
} else if (report_definition.expected_string_set_size() <
kTinyNumCandidates) {
return kTinyNumBits;
} else if (report_definition.expected_string_set_size() <
kSmallNumCandidates) {
return kSmallNumBits;
} else if (report_definition.expected_string_set_size() <
kMediumNumCandidates) {
return kMediumNumBits;
} else {
return kLargeNumBits;
}
}
} // namespace rappor
} // namespace cobalt