| #!/usr/bin/env python |
| # Copyright 2016 The Fuchsia Authors |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| ''' Contains utilities for reading and writing files. Also contains constants |
| for the names of directories and files that are used by more than one |
| component. |
| ''' |
| |
| import csv |
| import os |
| import sys |
| |
| # directories |
| THIS_DIR = os.path.dirname(__file__) |
| ROOT_DIR = os.path.abspath(os.path.join(THIS_DIR, os.path.pardir)) |
| OUT_DIR = os.path.abspath(os.path.join(ROOT_DIR,'out')) |
| |
| ANALYZER_TMP_OUT_DIR = os.path.abspath(os.path.join(OUT_DIR,'analyzer_tmp')) |
| CACHE_DIR = os.path.abspath(os.path.join(ROOT_DIR,'cache')) |
| CONFIG_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'config_files')) |
| R_TO_S_DIR = os.path.abspath(os.path.join(OUT_DIR,'r_to_s')) |
| S_TO_A_DIR = os.path.abspath(os.path.join(OUT_DIR,'s_to_a')) |
| VISUALIZATION_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'visualization')) |
| |
| # The name of the file we write containing the synthetic, random input data. |
| # This will be the input to both the straight counting pipeline and the |
| # Cobalt prototype pipeline. |
| GENERATED_INPUT_DATA_FILE_NAME = 'input_data.csv' |
| |
| # The names of config files for RAPPOR |
| # Each config file should comprise two lines |
| # Line 1: k,h,m,p,q,f |
| # Line 2: RAPPOR params (# bloom bits, # hashes, # cohorts, and noise |
| # probabilities p, q, and f.) |
| RAPPOR_CITY_NAME_CONFIG = 'rappor_city_config.csv' |
| RAPPOR_RATING_CONFIG = 'rappor_rating_config.csv' |
| RAPPOR_CITY_RATINGS_ASSOC_CONFIG = 'rappor_city_ratings_assoc.csv' |
| RAPPOR_MODULE_NAME_CONFIG = 'rappor_module_config.csv' |
| RAPPOR_HOUR_CONFIG = 'rappor_hour_config.csv' |
| # RAPPOR onfig file for the private-release pipeline. We use |
| # very weak RAPPOR params because we will add Laplace noise at the end. |
| RAPPOR_MODULE_NAME_PR_CONFIG = 'rappor_module_pr_config.csv' |
| |
| # The names of RAPPOR candidate files. (Not needed for basic RAPPOR) |
| CITY_CANDIDATES_FILE_NAME = "rappor_city_candidates.csv" |
| MODULE_CANDIDATES_FILE_NAME = "rappor_module_candidates.csv" |
| |
| # The names of basic RAPPOR map files (Only needed for basic RAPPOR) |
| HOUR_BASIC_MAP_FILE_NAME = "rappor_hour_map.csv" |
| |
| # Forculus config files |
| FORCULUS_HELP_QUERY_CONFIG = 'forculus_help_query_config.csv' |
| FORCULUS_URL_CONFIG = 'forculus_url_config.csv' |
| |
| # The names of the randomizer output files |
| HELP_QUERY_RANDOMIZER_OUTPUT_FILE_NAME = 'help_query_randomizer_out.csv' |
| CITY_RANDOMIZER_OUTPUT_FILE_NAME = 'city_randomizer_out.csv' |
| MODULE_NAME_RANDOMIZER_OUTPUT_FILE_NAME = 'module_name_randomizer_out.csv' |
| MODULE_NAME_PR_RANDOMIZER_OUTPUT_FILE_NAME = \ |
| 'module_name_pr_randomizer_out.csv' |
| HOUR_RANDOMIZER_OUTPUT_FILE_NAME = 'hour_randomizer_out.csv' |
| URL_RANDOMIZER_OUTPUT_FILE_NAME = 'url_randomizer_out.csv' |
| |
| # The names of the shuffler output files |
| HELP_QUERY_SHUFFLER_OUTPUT_FILE_NAME = "help_query_shuffler_out.csv" |
| CITY_SHUFFLER_OUTPUT_FILE_NAME = "city_shuffler_out.csv" |
| MODULE_NAME_SHUFFLER_OUTPUT_FILE_NAME = "module_name_shuffler_out.csv" |
| MODULE_NAME_PR_SHUFFLER_OUTPUT_FILE_NAME = "module_name_pr_shuffler_out.csv" |
| HOUR_SHUFFLER_OUTPUT_FILE_NAME = "hour_shuffler_out.csv" |
| URL_SHUFFLER_OUTPUT_FILE_NAME = "url_shuffler_out.csv" |
| |
| # The names of the analyzer output files |
| HELP_QUERY_ANALYZER_OUTPUT_FILE_NAME = "help_query_analyzer_out.csv" |
| CITY_NAMES_ANALYZER_OUTPUT_FILE_NAME = "city_names_analyzer_out.csv" |
| CITY_RATINGS_ANALYZER_OUTPUT_FILE_NAME = "city_ratings_analyzer_out.csv" |
| MODULE_NAME_ANALYZER_OUTPUT_FILE_NAME = "module_name_analyzer_out.csv" |
| MODULE_NAME_PR_ANALYZER_OUTPUT_FILE_NAME = "module_name_pr_analyzer_out.csv" |
| HOUR_ANALYZER_OUTPUT_FILE_NAME = "hour_analyzer_out.csv" |
| URL_ANALYZER_OUTPUT_FILE_NAME = "url_analyzer_out.csv" |
| |
| |
| # The csv files written by the direct-counting pipeline |
| USAGE_BY_MODULE_CSV_FILE_NAME = 'usage_by_module.csv' |
| USAGE_BY_CITY_CSV_FILE_NAME = 'usage_and_rating_by_city.csv' |
| USAGE_BY_HOUR_CSV_FILE_NAME = 'usage_by_hour.csv' |
| POPULAR_HELP_QUERIES_CSV_FILE_NAME = 'popular_help_queries.csv' |
| POPULAR_URLS_CSV_FILE_NAME = 'popular_urls.csv' |
| |
| # visualization |
| VISUALIZATION_FILE = os.path.join(VISUALIZATION_DIR, 'visualization.html') |
| |
| def openFileForReading(file_name, dir_path): |
| """Opens the file with the given name in the given directory for reading. |
| Throws an exception if the file does not exist. |
| |
| Args: |
| file_name {string} The simple name of the file. |
| dir_path {string} The path of the directory. |
| """ |
| file = os.path.join(dir_path, file_name) |
| if not os.path.exists(file): |
| raise Exception('File does not exist: %s' % file) |
| return open(file, 'rb') |
| |
| def openForReading(name): |
| """Opens the file with the given name for reading. The file is expected to be |
| in the |out| directory. Throws an exception if the file does not exist. |
| |
| Args: |
| name {string} The simple name of the file to be found in the |out| dir. |
| """ |
| return openFileForReading(name, OUT_DIR) |
| |
| def openForShufflerReading(name): |
| """Opens the file with the given name for reading. The file is expected to be |
| in the out/r_to_s directory. Throws an exception if the file does not exist. |
| |
| Args: |
| name {string} The simple name of the file to be found in the |out| dir. |
| """ |
| return openFileForReading(name, R_TO_S_DIR) |
| |
| def openForAnalyzerReading(name): |
| """Opens the file with the given name for reading. The file is expected to be |
| in the out/s_to_a directory. Throws an exception if the file does not exist. |
| |
| Args: |
| name {string} The simple name of the file to be found in the |out| dir. |
| """ |
| return openFileForReading(name, S_TO_A_DIR) |
| |
| def ensureDir(dir_path): |
| """Ensures that the directory at |dir_path| exists. If not it is created. |
| |
| Args: |
| dir_path{string} The path to a directory. If it does not exist it will be |
| created. |
| """ |
| if not os.path.exists(dir_path): |
| os.makedirs(dir_path) |
| |
| def openFileForWriting(file_name, dir_path): |
| # Create the directory if it does not exist. |
| ensureDir(dir_path) |
| return open(os.path.join(dir_path, file_name), 'w+b') |
| |
| def openForWriting(name): |
| return openFileForWriting(name, OUT_DIR) |
| |
| def openForAnalyzerTempWriting(name): |
| return openFileForWriting(name, ANALYZER_TMP_OUT_DIR) |
| |
| def openForRandomizerWriting(name): |
| return openFileForWriting(name, R_TO_S_DIR) |
| |
| def openForShufflerWriting(name): |
| return openFileForWriting(name, S_TO_A_DIR) |