Change DB API to find constant key IDs.

 - This changes the BuildEngine <-> DB interaction to rely on first getting a
   unique (interned, effectively) ID for all keys.

 - That change allows us to then change the BuildEngine-level result to store
   dependencies in terms of key IDs, instead of expanded rules. This results in
   dramatically reduced memory usage, but also since this matches the underlying
   database schema it allows for much more efficient loading of the result
   dependencies.

 - In particular, this eliminates several joins from the database
   implementation.

 - There is potentially more we could do here to eliminate the ancillary key
   table entirely and simply make the results themselves serve as the canonical
   identifier for a rule, but that requires a little more work.

 - This is good for a 35% speedup on null builds of large build graphs.

 - <rdar://problem/32139612> Change RuleResult to store key IDs
diff --git a/include/llbuild/Core/BuildDB.h b/include/llbuild/Core/BuildDB.h
index a6dd467..af08ce3 100644
--- a/include/llbuild/Core/BuildDB.h
+++ b/include/llbuild/Core/BuildDB.h
@@ -16,6 +16,8 @@
 #include "llbuild/Basic/LLVM.h"
 #include "llvm/ADT/StringRef.h"
 
+#include "llbuild/Core/BuildEngine.h"
+
 #include <cstdint>
 #include <memory>
 #include <string>
@@ -30,6 +32,16 @@
 public:
   virtual ~BuildDB();
 
+  /// Get a unique ID for the given key.
+  ///
+  /// This method is thread safe, and cannot fail.
+  virtual KeyID getKeyID(const KeyType& key) = 0;
+
+  /// Get the key corresponding to a key ID.
+  ///
+  /// This method is thread safe, and cannot fail.
+  virtual KeyType getKeyForID(KeyID key) = 0;
+  
   /// Get the current build iteration.
   ///
   /// \param success_out [out] Whether or not the query succeeded.
@@ -43,6 +55,7 @@
 
   /// Look up the result for a rule.
   ///
+  /// \param keyID The keyID for the rule.
   /// \param rule The rule to look up the result for.
   /// \param result_out [out] The result, if found.
   /// \param error_out [out] Error string if an error occurred.
@@ -53,7 +66,7 @@
   // FIXME: Figure out if we want a more lazy approach where we make the
   // database cache result objects and we query them only when needed. This may
   // scale better to very large build graphs.
-  virtual bool lookupRuleResult(const Rule& rule, Result* result_out, std::string* error_out) = 0;
+  virtual bool lookupRuleResult(KeyID keyID, const Rule& rule, Result* result_out, std::string* error_out) = 0;
 
   /// Update the stored result for a rule.
   ///
@@ -64,8 +77,9 @@
   /// The database *MUST*, however, correctly maintain the order of the
   /// dependencies.
   ///
+  /// \param keyID The keyID for the rule.
   /// \param error_out [out] Error string if return value is false.
-  virtual bool setRuleResult(const Rule& rule, const Result& result, std::string* error_out) = 0;
+  virtual bool setRuleResult(KeyID keyID, const Rule& rule, const Result& result, std::string* error_out) = 0;
 
   /// Called by the build engine to indicate that a build has started.
   ///
diff --git a/include/llbuild/Core/BuildEngine.h b/include/llbuild/Core/BuildEngine.h
index b42ce59..1154471 100644
--- a/include/llbuild/Core/BuildEngine.h
+++ b/include/llbuild/Core/BuildEngine.h
@@ -20,6 +20,7 @@
 #include <utility>
 #include <vector>
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 
 namespace llbuild {
@@ -27,6 +28,7 @@
 
 // FIXME: Need to abstract KeyType;
 typedef std::string KeyType;
+typedef uint64_t KeyID;
 typedef std::vector<uint8_t> ValueType;
 
 class BuildDB;
@@ -61,7 +63,7 @@
   //
   // FIXME: At some point, figure out the optimal representation for this field,
   // which is likely to be a lot of the resident memory size.
-  std::vector<KeyType> dependencies;
+  std::vector<KeyID> dependencies;
 };
 
 /// A task object represents an abstract in-progress computation in the build
@@ -87,8 +89,6 @@
 /// complete its computation and provide the output. The Task is responsible for
 /// providing the engine with the computed value when ready using \see
 /// BuildEngine::taskIsComplete().
-//
-// FIXME: Define parallel execution semantics.
 class Task {
 public:
   Task() {}
diff --git a/lib/Core/BuildEngine.cpp b/lib/Core/BuildEngine.cpp
index 6175471..3b34a3c 100644
--- a/lib/Core/BuildEngine.cpp
+++ b/lib/Core/BuildEngine.cpp
@@ -2,7 +2,7 @@
 //
 // This source file is part of the Swift.org open source project
 //
-// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
+// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See http://swift.org/LICENSE.txt for license information
@@ -15,6 +15,7 @@
 #include "llbuild/Core/BuildDB.h"
 
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
 
 #include "BuildEngineTrace.h"
 
@@ -54,6 +55,12 @@
 
   BuildEngineDelegate& delegate;
 
+  /// The key table, used when there is no database.
+  llvm::StringMap<bool> keyTable;
+
+  /// The mutex that protects the key table.
+  std::mutex keyTableMutex;
+  
   /// The build database, if attached.
   std::unique_ptr<BuildDB> db;
 
@@ -127,9 +134,14 @@
       Complete
     };
 
-    RuleInfo(Rule&& rule) : rule(rule) {}
+    RuleInfo(KeyID keyID, Rule&& rule) : keyID(keyID), rule(rule) {}
 
+    /// The ID for the rule key.
+    KeyID keyID;
+
+    /// The rule this information describes.
     Rule rule;
+    
     /// The state dependent record for in-progress information.
     union {
       RuleScanRecord* pendingScanRecord;
@@ -214,8 +226,9 @@
   // NOTE: We currently rely on the unordered_map behavior that ensures that
   // references to elements are not invalidated by insertion. We will need to
   // move to an alternate allocation strategy if we switch to DenseMap style
-  // table.
-  std::unordered_map<KeyType, RuleInfo> ruleInfos;
+  // table. This is probably a good idea in any case, because we would benefit
+  // from pool allocating RuleInfo instances.
+  std::unordered_map<KeyID, RuleInfo> ruleInfos;
 
   /// Information tracked for executing tasks.
   //
@@ -244,7 +257,7 @@
     /// provided.
     unsigned waitCount = 0;
     /// The list of discovered dependencies found during execution of the task.
-    std::vector<KeyType> discoveredDependencies;
+    std::vector<KeyID> discoveredDependencies;
 
 #ifndef NDEBUG
     void dump() const {
@@ -503,8 +516,8 @@
     do {
       // Look up the input rule info, if not yet cached.
       if (!request.inputRuleInfo) {
-        const auto& inputKey = ruleInfo.result.dependencies[request.inputIndex];
-        request.inputRuleInfo = &getRuleInfoForKey(inputKey);
+        const auto& inputID = ruleInfo.result.dependencies[request.inputIndex];
+        request.inputRuleInfo = &getRuleInfoForKey(inputID);
       }
       auto& inputRuleInfo = *request.inputRuleInfo;
 
@@ -716,7 +729,7 @@
         // * Record at the time it is popped off the input request queue.
         // * Record at the time the input is supplied (here).
         request.taskInfo->forRuleInfo->result.dependencies.push_back(
-          request.inputRuleInfo->rule.key);
+            request.inputRuleInfo->keyID);
 
         // Provide the requesting task with the input.
         //
@@ -811,14 +824,15 @@
         // approach for discovered dependencies instead of just providing
         // support for taskNeedsInput() even after the task has started
         // computing and from parallel contexts.
-        for (const auto& inputKey: taskInfo->discoveredDependencies) {
-          inputRequests.push_back({ nullptr, &getRuleInfoForKey(inputKey) });
+        for (KeyID inputID: taskInfo->discoveredDependencies) {
+          inputRequests.push_back({ nullptr, &getRuleInfoForKey(inputID) });
         }
 
         // Update the database record, if attached.
         if (db) {
           std::string error;
-          bool result = db->setRuleResult(ruleInfo->rule, ruleInfo->result, &error);
+          bool result = db->setRuleResult(
+              ruleInfo->keyID, ruleInfo->rule, ruleInfo->result, &error);
           if (!result) {
             delegate.error(error);
             completeRemainingTasks();
@@ -1060,14 +1074,50 @@
     return currentTimestamp;
   }
 
+  KeyID getKeyID(const KeyType& key) {
+    // Delegate if we have a database.
+    if (db) {
+      return db->getKeyID(key);
+    }
+
+    // Otherwise use our builtin key table.
+    {
+      std::lock_guard<std::mutex> guard(keyTableMutex);
+      auto it = keyTable.insert(std::make_pair(key, false)).first;
+      return (KeyID)(uintptr_t)it->getKey().data();
+    }
+  }
+
   RuleInfo& getRuleInfoForKey(const KeyType& key) {
+    auto keyID = getKeyID(key);
+    
     // Check if we have already found the rule.
-    auto it = ruleInfos.find(key);
+    auto it = ruleInfos.find(keyID);
     if (it != ruleInfos.end())
       return it->second;
 
     // Otherwise, request it from the delegate and add it.
-    return addRule(delegate.lookupRule(key));
+    return addRule(keyID, delegate.lookupRule(key));
+  }
+
+  RuleInfo& getRuleInfoForKey(KeyID keyID) {
+    // Check if we have already found the rule.
+    auto it = ruleInfos.find(keyID);
+    if (it != ruleInfos.end())
+      return it->second;
+
+    // Otherwise, we need to resolve the full key so we can request it from the
+    // delegate.
+    KeyType key;
+    if (db) {
+      key = db->getKeyForID(keyID);
+    } else {
+      // Note that we don't need to lock `keyTable` here because the key entries
+      // themselves don't change once created.
+      key = llvm::StringMapEntry<bool>::GetStringMapEntryFromKeyData(
+          (const char*)(uintptr_t)keyID).getKey();
+    }
+    return addRule(keyID, delegate.lookupRule(key));
   }
 
   TaskInfo* getTaskInfo(Task* task) {
@@ -1080,7 +1130,11 @@
   /// @{
 
   RuleInfo& addRule(Rule&& rule) {
-    auto result = ruleInfos.emplace(rule.key, RuleInfo(std::move(rule)));
+    return addRule(getKeyID(rule.key), std::move(rule));
+  }
+  
+  RuleInfo& addRule(KeyID keyID, Rule&& rule) {
+    auto result = ruleInfos.emplace(keyID, RuleInfo(keyID, std::move(rule)));
     if (!result.second) {
       // FIXME: Error handling.
       std::cerr << "error: attempt to register duplicate rule \""
@@ -1096,7 +1150,7 @@
     RuleInfo& ruleInfo = result.first->second;
     if (db) {
       std::string error;
-      db->lookupRuleResult(ruleInfo.rule, &ruleInfo.result, &error);
+      db->lookupRuleResult(ruleInfo.keyID, ruleInfo.rule, &ruleInfo.result, &error);
       if (!error.empty()) {
         delegate.error(error);
         completeRemainingTasks();
@@ -1225,9 +1279,10 @@
     // Write out all of the rules.
     for (const auto& ruleInfo: orderedRuleInfos) {
       fprintf(fp, "\"%s\"\n", ruleInfo->rule.key.c_str());
-      for (auto& input: ruleInfo->result.dependencies) {
+      for (KeyID inputID: ruleInfo->result.dependencies) {
+        const auto& dependency = getRuleInfoForKey(inputID);
         fprintf(fp, "\"%s\" -> \"%s\"\n", ruleInfo->rule.key.c_str(),
-                input.c_str());
+                dependency.rule.key.c_str());
       }
       fprintf(fp, "\n");
     }
@@ -1294,7 +1349,8 @@
       return;
     }
 
-    taskInfo->discoveredDependencies.push_back(key);
+    auto dependencyID = getKeyID(key);
+    taskInfo->discoveredDependencies.push_back(dependencyID);
   }
 
   void taskIsComplete(Task* task, ValueType&& value, bool forceChange) {
diff --git a/lib/Core/SQLiteBuildDB.cpp b/lib/Core/SQLiteBuildDB.cpp
index 216bb9f..c956a44 100644
--- a/lib/Core/SQLiteBuildDB.cpp
+++ b/lib/Core/SQLiteBuildDB.cpp
@@ -2,7 +2,7 @@
 //
 // This source file is part of the Swift.org open source project
 //
-// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
+// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See http://swift.org/LICENSE.txt for license information
@@ -16,10 +16,12 @@
 #include "llbuild/Core/BuildEngine.h"
 
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
 
 #include <cassert>
 #include <cerrno>
 #include <cstring>
+#include <mutex>
 #include <sstream>
 
 #include <sqlite3.h>
@@ -40,6 +42,10 @@
 
   sqlite3 *db = nullptr;
 
+  /// The mutex that protects the key ID table (which must support concurrent
+  /// access).
+  std::mutex keyIDMutex;
+
   std::string getCurrentErrorMessage() {
     int err_code = sqlite3_errcode(db);
     const char* err_message = sqlite3_errmsg(db);
@@ -229,6 +235,11 @@
       db, findKeyIDForKeyStmtSQL,
       -1, &findKeyIDForKeyStmt, nullptr);
     assert(result == SQLITE_OK);
+
+    result = sqlite3_prepare_v2(
+      db, findKeyNameForKeyIDStmtSQL,
+      -1, &findKeyNameForKeyIDStmt, nullptr);
+    assert(result == SQLITE_OK);
     
     result = sqlite3_prepare_v2(
       db, findIDForKeyInRuleResultsStmtSQL,
@@ -283,6 +294,7 @@
 
     // Destroy prepared statements.
     sqlite3_finalize(findKeyIDForKeyStmt);
+    sqlite3_finalize(findKeyNameForKeyIDStmt);
     sqlite3_finalize(findRuleDependenciesStmt);
     sqlite3_finalize(findRuleResultStmt);
     sqlite3_finalize(deleteFromKeysStmt);
@@ -347,22 +359,23 @@
     return true;
   }
 
-  static constexpr const char *deleteFromKeysStmtSQL =
-  "DELETE FROM key_names WHERE key == ?;";
+  static constexpr const char *deleteFromKeysStmtSQL = (
+      "DELETE FROM key_names WHERE key == ?;");
   sqlite3_stmt* deleteFromKeysStmt = nullptr;
   
-  static constexpr const char *findRuleResultStmtSQL =
-  "SELECT rule_results.id, value, built_at, computed_at FROM rule_results "
-  "INNER JOIN key_names ON key_names.id = rule_results.key_id WHERE key == ?;";
+  static constexpr const char *findRuleResultStmtSQL = (
+      "SELECT id, value, built_at, computed_at FROM rule_results "
+      "WHERE key_id == ?;");
   sqlite3_stmt* findRuleResultStmt = nullptr;
 
-  static constexpr const char *findRuleDependenciesStmtSQL =
-  "SELECT key FROM key_names INNER JOIN rule_dependencies "
-  "ON key_names.id = rule_dependencies.key_id WHERE rule_id == ?"
-  "ORDER BY rule_dependencies.ordinal;";
+  static constexpr const char *findRuleDependenciesStmtSQL = (
+      "SELECT key_id FROM rule_dependencies WHERE rule_id == ?"
+      "ORDER BY rule_dependencies.ordinal;");
   sqlite3_stmt* findRuleDependenciesStmt = nullptr;
 
-  virtual bool lookupRuleResult(const Rule& rule, Result* result_out, std::string *error_out) override {
+  virtual bool lookupRuleResult(KeyID keyID, const Rule& rule,
+                                Result* result_out,
+                                std::string *error_out) override {
     assert(result_out->builtAt == 0);
 
     // Fetch the basic rule information.
@@ -372,9 +385,7 @@
     assert(result == SQLITE_OK);
     result = sqlite3_clear_bindings(findRuleResultStmt);
     assert(result == SQLITE_OK);
-    result = sqlite3_bind_text(findRuleResultStmt, /*index=*/1,
-                               rule.key.data(), rule.key.size(),
-                               SQLITE_STATIC);
+    result = sqlite3_bind_int64(findRuleResultStmt, /*index=*/1, keyID);
     assert(result == SQLITE_OK);
 
     // If the rule wasn't found, we are done.
@@ -417,9 +428,8 @@
         return false;
       }
       assert(sqlite3_column_count(findRuleDependenciesStmt) == 1);
-      result_out->dependencies.push_back(std::string(
-                (const char*)sqlite3_column_text(findRuleDependenciesStmt, 0),
-                sqlite3_column_bytes(findRuleDependenciesStmt, 0)));
+      auto dependencyID = sqlite3_column_int64(findRuleDependenciesStmt, 0);
+      result_out->dependencies.push_back(dependencyID);
     }
 
     return true;
@@ -452,11 +462,16 @@
     "DELETE FROM rule_dependencies WHERE rule_id == ?;";
   sqlite3_stmt* deleteFromRuleDependenciesStmt = nullptr;
 
-  static constexpr const char *findKeyIDForKeyStmtSQL =
-  "SELECT id FROM key_names "
-  "WHERE key == ? LIMIT 1;";
+  static constexpr const char *findKeyIDForKeyStmtSQL = (
+      "SELECT id FROM key_names "
+      "WHERE key == ? LIMIT 1;");
   sqlite3_stmt* findKeyIDForKeyStmt = nullptr;
 
+  static constexpr const char *findKeyNameForKeyIDStmtSQL = (
+      "SELECT key FROM key_names "
+      "WHERE id == ? LIMIT 1;");
+  sqlite3_stmt* findKeyNameForKeyIDStmt = nullptr;
+
   static constexpr const char *insertIntoKeysStmtSQL =
   "INSERT OR IGNORE INTO key_names(key) VALUES (?);";
   sqlite3_stmt* insertIntoKeysStmt = nullptr;
@@ -464,7 +479,8 @@
   /// Inserts a key if not present and always returns keyID
   /// Sometimes key will be inserted for a lookup operation
   /// but that is okay because it'll be added at somepoint anyway
-  uint64_t getOrInsertKey(const KeyType& key, std::string *error_out) {
+  virtual KeyID getKeyID(const KeyType& key) override {
+    std::lock_guard<std::mutex> guard(keyIDMutex);
     int result;
 
     // Seach for the key.
@@ -495,24 +511,43 @@
     assert(result == SQLITE_OK);
     result = sqlite3_step(insertIntoKeysStmt);
     if (result != SQLITE_DONE) {
-      *error_out = getCurrentErrorMessage();
-      return UINT64_MAX;
+      // FIXME: We need to support error reporting here, but the engine cannot
+      // handle it currently.
+      abort();
     }
 
     return sqlite3_last_insert_rowid(db);
   }
-  
-  virtual bool setRuleResult(const Rule& rule,
+
+  virtual KeyType getKeyForID(KeyID keyID) override {
+    std::lock_guard<std::mutex> guard(keyIDMutex);
+    int result;
+
+    // Seach for the key.
+    result = sqlite3_reset(findKeyNameForKeyIDStmt);
+    assert(result == SQLITE_OK);
+    result = sqlite3_clear_bindings(findKeyNameForKeyIDStmt);
+    assert(result == SQLITE_OK);
+    result = sqlite3_bind_int64(findKeyNameForKeyIDStmt, /*index=*/1, keyID);
+    assert(result == SQLITE_OK);
+
+    result = sqlite3_step(findKeyNameForKeyIDStmt);
+    assert(result == SQLITE_ROW);
+    assert(sqlite3_column_count(findKeyNameForKeyIDStmt) == 1);
+
+    // Found a keyID, return.
+    auto size = sqlite3_column_bytes(findKeyNameForKeyIDStmt, 0);
+    auto text = (const char*) sqlite3_column_text(findKeyNameForKeyIDStmt, 0);
+    return KeyType(text, size);
+  }
+
+  virtual bool setRuleResult(KeyID keyID,
+                             const Rule& rule,
                              const Result& ruleResult,
                              std::string *error_out) override {
     int result;
     uint64_t ruleID = 0;
 
-    uint64_t keyID = getOrInsertKey(rule.key, error_out);
-    if (keyID == UINT64_MAX) {
-      return false;
-    }
-
     // Find the existing rule id, if present.
     //
     // We rely on SQLite3 not using 0 as a valid rule ID.
@@ -598,7 +633,7 @@
 
     // Insert all the dependencies.
     for (unsigned i = 0; i != ruleResult.dependencies.size(); ++i) {
-      const auto& dependency = ruleResult.dependencies[i];
+      KeyID dependencyKeyID = ruleResult.dependencies[i];
 
       // Reset the insert statement.
       result = sqlite3_reset(insertIntoRuleDependenciesStmt);
@@ -612,12 +647,8 @@
       assert(result == SQLITE_OK);
 
       // Bind the dependency key ID.
-      uint64_t dependencyKeyID = getOrInsertKey(dependency, error_out);
-      if (dependencyKeyID == UINT64_MAX) {
-        return false;
-      }
       result = sqlite3_bind_int64(insertIntoRuleDependenciesStmt, /*index=*/2,
-                                 dependencyKeyID);
+                                  dependencyKeyID);
       assert(result == SQLITE_OK);
 
       // Bind the index of the dependency.
diff --git a/unittests/Core/BuildEngineTest.cpp b/unittests/Core/BuildEngineTest.cpp
index ed1dea1..74968eb 100644
--- a/unittests/Core/BuildEngineTest.cpp
+++ b/unittests/Core/BuildEngineTest.cpp
@@ -14,6 +14,7 @@
 
 #include "llbuild/Core/BuildDB.h"
 
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/ErrorHandling.h"
 
 #include "gtest/gtest.h"
@@ -419,18 +420,32 @@
   // Attach a custom database, used to get the results.
   class CustomDB : public BuildDB {
   public:
-    std::unordered_map<core::KeyType, Result> ruleResults;
+    llvm::StringMap<bool> keyTable;
+    
+    std::unordered_map<KeyType, Result> ruleResults;
 
+    virtual KeyID getKeyID(const KeyType& key) override {
+      auto it = keyTable.insert(std::make_pair(key, false)).first;
+      return (KeyID)(uintptr_t)it->getKey().data();
+    }
+
+    virtual KeyType getKeyForID(KeyID keyID) override {
+      return llvm::StringMapEntry<bool>::GetStringMapEntryFromKeyData(
+          (const char*)(uintptr_t)keyID).getKey();
+    }
+    
     virtual uint64_t getCurrentIteration(bool* success_out, std::string* error_out) override {
       return 0;
     }
     virtual bool setCurrentIteration(uint64_t value, std::string* error_out) override { return true; }
-    virtual bool lookupRuleResult(const Rule& rule,
+    virtual bool lookupRuleResult(KeyID keyID,
+                                  const Rule& rule,
                                   Result* result_out,
                                   std::string* error_out) override {
       return false;
     }
-    virtual bool setRuleResult(const Rule& rule,
+    virtual bool setRuleResult(KeyID key,
+                               const Rule& rule,
                                const Result& result,
                                std::string* error_out) override {
       ruleResults[rule.key] = result;