Release LevelDB 1.15

- switched from mmap based writing to simpler stdio based writing. Has a
  minor impact (0.5 microseconds) on microbenchmarks for asynchronous
  writes. Synchronous writes speed up from 30ms to 10ms on linux/ext4.
  Should be much more reliable on diverse platforms.
- compaction errors now immediately put the database into a read-only
  mode (until it is re-opened). As a downside, a disk going out of
  space and then space being created will require a re-open to recover
  from, whereas previously that would happen automatically. On the
  plus side, many corruption possibilities go away.
- force the DB to enter an error-state so that all future writes fail
  when a synchronous log write succeeds but the sync fails.
- repair now regenerates sstables that exhibit problems
- fix issue 218 - Use native memory barriers on OSX
- fix issue 212 - QNX build is broken
- fix build on iOS with xcode 5
- make tests compile and pass on windows
diff --git a/Makefile b/Makefile
index 26de8c2..344ff29 100644
--- a/Makefile
+++ b/Makefile
@@ -72,7 +72,7 @@
 else
 # Update db.h if you change these.
 SHARED_MAJOR = 1
-SHARED_MINOR = 14
+SHARED_MINOR = 15
 SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
 SHARED2 = $(SHARED1).$(SHARED_MAJOR)
 SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)
diff --git a/build_detect_platform b/build_detect_platform
index d50869d..6e59c6f 100755
--- a/build_detect_platform
+++ b/build_detect_platform
@@ -131,6 +131,16 @@
         # man ld: +h internal_name
         PLATFORM_SHARED_LDFLAGS="-shared -Wl,+h -Wl,"
         ;;
+    IOS)
+        PLATFORM=IOS
+        COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX"
+        [ -z "$INSTALL_PATH" ] && INSTALL_PATH=`pwd`
+        PORT_FILE=port/port_posix.cc
+        PLATFORM_SHARED_EXT=
+        PLATFORM_SHARED_LDFLAGS=
+        PLATFORM_SHARED_CFLAGS=
+        PLATFORM_SHARED_VERSIONED=
+        ;;
     *)
         echo "Unknown platform!" >&2
         exit 1
diff --git a/db/corruption_test.cc b/db/corruption_test.cc
index b37ffdf..96afc68 100644
--- a/db/corruption_test.cc
+++ b/db/corruption_test.cc
@@ -75,7 +75,13 @@
       Slice key = Key(i, &key_space);
       batch.Clear();
       batch.Put(key, Value(i, &value_space));
-      ASSERT_OK(db_->Write(WriteOptions(), &batch));
+      WriteOptions options;
+      // Corrupt() doesn't work without this sync on windows; stat reports 0 for
+      // the file size.
+      if (i == n - 1) {
+        options.sync = true;
+      }
+      ASSERT_OK(db_->Write(options, &batch));
     }
   }
 
@@ -125,7 +131,7 @@
     FileType type;
     std::string fname;
     int picked_number = -1;
-    for (int i = 0; i < filenames.size(); i++) {
+    for (size_t i = 0; i < filenames.size(); i++) {
       if (ParseFileName(filenames[i], &number, &type) &&
           type == filetype &&
           int(number) > picked_number) {  // Pick latest file
@@ -238,6 +244,22 @@
   Check(90, 99);
 }
 
+TEST(CorruptionTest, TableFileRepair) {
+  options_.block_size = 2 * kValueSize;  // Limit scope of corruption
+  options_.paranoid_checks = true;
+  Reopen();
+  Build(100);
+  DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
+  dbi->TEST_CompactMemTable();
+  dbi->TEST_CompactRange(0, NULL, NULL);
+  dbi->TEST_CompactRange(1, NULL, NULL);
+
+  Corrupt(kTableFile, 100, 1);
+  RepairDB();
+  Reopen();
+  Check(95, 99);
+}
+
 TEST(CorruptionTest, TableFileIndexData) {
   Build(10000);  // Enough to build multiple Tables
   DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
diff --git a/db/db_bench.cc b/db/db_bench.cc
index 7abdf87..fc46d89 100644
--- a/db/db_bench.cc
+++ b/db/db_bench.cc
@@ -128,7 +128,7 @@
     pos_ = 0;
   }
 
-  Slice Generate(int len) {
+  Slice Generate(size_t len) {
     if (pos_ + len > data_.size()) {
       pos_ = 0;
       assert(len < data_.size());
@@ -139,11 +139,11 @@
 };
 
 static Slice TrimSpace(Slice s) {
-  int start = 0;
+  size_t start = 0;
   while (start < s.size() && isspace(s[start])) {
     start++;
   }
-  int limit = s.size();
+  size_t limit = s.size();
   while (limit > start && isspace(s[limit-1])) {
     limit--;
   }
@@ -399,7 +399,7 @@
     heap_counter_(0) {
     std::vector<std::string> files;
     Env::Default()->GetChildren(FLAGS_db, &files);
-    for (int i = 0; i < files.size(); i++) {
+    for (size_t i = 0; i < files.size(); i++) {
       if (Slice(files[i]).starts_with("heap-")) {
         Env::Default()->DeleteFile(std::string(FLAGS_db) + "/" + files[i]);
       }
diff --git a/db/db_impl.cc b/db/db_impl.cc
index fa13510..faf5e7d 100644
--- a/db/db_impl.cc
+++ b/db/db_impl.cc
@@ -133,8 +133,7 @@
       seed_(0),
       tmp_batch_(new WriteBatch),
       bg_compaction_scheduled_(false),
-      manual_compaction_(NULL),
-      consecutive_compaction_errors_(0) {
+      manual_compaction_(NULL) {
   mem_->Ref();
   has_imm_.Release_Store(NULL);
 
@@ -217,6 +216,12 @@
 }
 
 void DBImpl::DeleteObsoleteFiles() {
+  if (!bg_error_.ok()) {
+    // After a background error, we don't know whether a new version may
+    // or may not have been committed, so we cannot safely garbage collect.
+    return;
+  }
+
   // Make a set of all of the live files
   std::set<uint64_t> live = pending_outputs_;
   versions_->AddLiveFiles(&live);
@@ -495,7 +500,7 @@
   return s;
 }
 
-Status DBImpl::CompactMemTable() {
+void DBImpl::CompactMemTable() {
   mutex_.AssertHeld();
   assert(imm_ != NULL);
 
@@ -523,9 +528,9 @@
     imm_ = NULL;
     has_imm_.Release_Store(NULL);
     DeleteObsoleteFiles();
+  } else {
+    RecordBackgroundError(s);
   }
-
-  return s;
 }
 
 void DBImpl::CompactRange(const Slice* begin, const Slice* end) {
@@ -568,15 +573,17 @@
   }
 
   MutexLock l(&mutex_);
-  while (!manual.done) {
-    while (manual_compaction_ != NULL) {
+  while (!manual.done && !shutting_down_.Acquire_Load() && bg_error_.ok()) {
+    if (manual_compaction_ == NULL) {  // Idle
+      manual_compaction_ = &manual;
+      MaybeScheduleCompaction();
+    } else {  // Running either my compaction or another compaction.
       bg_cv_.Wait();
     }
-    manual_compaction_ = &manual;
-    MaybeScheduleCompaction();
-    while (manual_compaction_ == &manual) {
-      bg_cv_.Wait();
-    }
+  }
+  if (manual_compaction_ == &manual) {
+    // Cancel my manual compaction since we aborted early for some reason.
+    manual_compaction_ = NULL;
   }
 }
 
@@ -596,12 +603,22 @@
   return s;
 }
 
+void DBImpl::RecordBackgroundError(const Status& s) {
+  mutex_.AssertHeld();
+  if (bg_error_.ok()) {
+    bg_error_ = s;
+    bg_cv_.SignalAll();
+  }
+}
+
 void DBImpl::MaybeScheduleCompaction() {
   mutex_.AssertHeld();
   if (bg_compaction_scheduled_) {
     // Already scheduled
   } else if (shutting_down_.Acquire_Load()) {
     // DB is being deleted; no more background compactions
+  } else if (!bg_error_.ok()) {
+    // Already got an error; no more changes
   } else if (imm_ == NULL &&
              manual_compaction_ == NULL &&
              !versions_->NeedsCompaction()) {
@@ -619,30 +636,12 @@
 void DBImpl::BackgroundCall() {
   MutexLock l(&mutex_);
   assert(bg_compaction_scheduled_);
-  if (!shutting_down_.Acquire_Load()) {
-    Status s = BackgroundCompaction();
-    if (s.ok()) {
-      // Success
-      consecutive_compaction_errors_ = 0;
-    } else if (shutting_down_.Acquire_Load()) {
-      // Error most likely due to shutdown; do not wait
-    } else {
-      // Wait a little bit before retrying background compaction in
-      // case this is an environmental problem and we do not want to
-      // chew up resources for failed compactions for the duration of
-      // the problem.
-      bg_cv_.SignalAll();  // In case a waiter can proceed despite the error
-      Log(options_.info_log, "Waiting after background compaction error: %s",
-          s.ToString().c_str());
-      mutex_.Unlock();
-      ++consecutive_compaction_errors_;
-      int seconds_to_sleep = 1;
-      for (int i = 0; i < 3 && i < consecutive_compaction_errors_ - 1; ++i) {
-        seconds_to_sleep *= 2;
-      }
-      env_->SleepForMicroseconds(seconds_to_sleep * 1000000);
-      mutex_.Lock();
-    }
+  if (shutting_down_.Acquire_Load()) {
+    // No more background work when shutting down.
+  } else if (!bg_error_.ok()) {
+    // No more background work after a background error.
+  } else {
+    BackgroundCompaction();
   }
 
   bg_compaction_scheduled_ = false;
@@ -653,11 +652,12 @@
   bg_cv_.SignalAll();
 }
 
-Status DBImpl::BackgroundCompaction() {
+void DBImpl::BackgroundCompaction() {
   mutex_.AssertHeld();
 
   if (imm_ != NULL) {
-    return CompactMemTable();
+    CompactMemTable();
+    return;
   }
 
   Compaction* c;
@@ -691,6 +691,9 @@
     c->edit()->AddFile(c->level() + 1, f->number, f->file_size,
                        f->smallest, f->largest);
     status = versions_->LogAndApply(c->edit(), &mutex_);
+    if (!status.ok()) {
+      RecordBackgroundError(status);
+    }
     VersionSet::LevelSummaryStorage tmp;
     Log(options_.info_log, "Moved #%lld to level-%d %lld bytes %s: %s\n",
         static_cast<unsigned long long>(f->number),
@@ -701,6 +704,9 @@
   } else {
     CompactionState* compact = new CompactionState(c);
     status = DoCompactionWork(compact);
+    if (!status.ok()) {
+      RecordBackgroundError(status);
+    }
     CleanupCompaction(compact);
     c->ReleaseInputs();
     DeleteObsoleteFiles();
@@ -714,9 +720,6 @@
   } else {
     Log(options_.info_log,
         "Compaction error: %s", status.ToString().c_str());
-    if (options_.paranoid_checks && bg_error_.ok()) {
-      bg_error_ = status;
-    }
   }
 
   if (is_manual) {
@@ -732,7 +735,6 @@
     }
     manual_compaction_ = NULL;
   }
-  return status;
 }
 
 void DBImpl::CleanupCompaction(CompactionState* compact) {
@@ -1002,6 +1004,9 @@
   if (status.ok()) {
     status = InstallCompactionResults(compact);
   }
+  if (!status.ok()) {
+    RecordBackgroundError(status);
+  }
   VersionSet::LevelSummaryStorage tmp;
   Log(options_.info_log,
       "compacted to: %s", versions_->LevelSummary(&tmp));
@@ -1185,13 +1190,23 @@
     {
       mutex_.Unlock();
       status = log_->AddRecord(WriteBatchInternal::Contents(updates));
+      bool sync_error = false;
       if (status.ok() && options.sync) {
         status = logfile_->Sync();
+        if (!status.ok()) {
+          sync_error = true;
+        }
       }
       if (status.ok()) {
         status = WriteBatchInternal::InsertInto(updates, mem_);
       }
       mutex_.Lock();
+      if (sync_error) {
+        // The state of the log file is indeterminate: the log record we
+        // just added may or may not show up when the DB is re-opened.
+        // So we force the DB into a mode where all future writes fail.
+        RecordBackgroundError(status);
+      }
     }
     if (updates == tmp_batch_) tmp_batch_->Clear();
 
diff --git a/db/db_impl.h b/db/db_impl.h
index 75fd30a..cfc9981 100644
--- a/db/db_impl.h
+++ b/db/db_impl.h
@@ -87,8 +87,8 @@
 
   // Compact the in-memory write buffer to disk.  Switches to a new
   // log-file/memtable and writes a new descriptor iff successful.
-  Status CompactMemTable()
-      EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+  // Errors are recorded in bg_error_.
+  void CompactMemTable() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   Status RecoverLogFile(uint64_t log_number,
                         VersionEdit* edit,
@@ -102,10 +102,12 @@
       EXCLUSIVE_LOCKS_REQUIRED(mutex_);
   WriteBatch* BuildBatchGroup(Writer** last_writer);
 
+  void RecordBackgroundError(const Status& s);
+
   void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
   static void BGWork(void* db);
   void BackgroundCall();
-  Status BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+  void  BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
   void CleanupCompaction(CompactionState* compact)
       EXCLUSIVE_LOCKS_REQUIRED(mutex_);
   Status DoCompactionWork(CompactionState* compact)
@@ -170,7 +172,6 @@
 
   // Have we encountered a background error in paranoid mode?
   Status bg_error_;
-  int consecutive_compaction_errors_;
 
   // Per level compaction stats.  stats_[level] stores the stats for
   // compactions that produced data for the specified "level".
diff --git a/db/db_test.cc b/db/db_test.cc
index 848a038..280b01c 100644
--- a/db/db_test.cc
+++ b/db/db_test.cc
@@ -57,8 +57,11 @@
 // Special Env used to delay background operations
 class SpecialEnv : public EnvWrapper {
  public:
-  // sstable Sync() calls are blocked while this pointer is non-NULL.
-  port::AtomicPointer delay_sstable_sync_;
+  // sstable/log Sync() calls are blocked while this pointer is non-NULL.
+  port::AtomicPointer delay_data_sync_;
+
+  // sstable/log Sync() calls return an error.
+  port::AtomicPointer data_sync_error_;
 
   // Simulate no-space errors while this pointer is non-NULL.
   port::AtomicPointer no_space_;
@@ -75,11 +78,9 @@
   bool count_random_reads_;
   AtomicCounter random_read_counter_;
 
-  AtomicCounter sleep_counter_;
-  AtomicCounter sleep_time_counter_;
-
   explicit SpecialEnv(Env* base) : EnvWrapper(base) {
-    delay_sstable_sync_.Release_Store(NULL);
+    delay_data_sync_.Release_Store(NULL);
+    data_sync_error_.Release_Store(NULL);
     no_space_.Release_Store(NULL);
     non_writable_.Release_Store(NULL);
     count_random_reads_ = false;
@@ -88,17 +89,17 @@
   }
 
   Status NewWritableFile(const std::string& f, WritableFile** r) {
-    class SSTableFile : public WritableFile {
+    class DataFile : public WritableFile {
      private:
       SpecialEnv* env_;
       WritableFile* base_;
 
      public:
-      SSTableFile(SpecialEnv* env, WritableFile* base)
+      DataFile(SpecialEnv* env, WritableFile* base)
           : env_(env),
             base_(base) {
       }
-      ~SSTableFile() { delete base_; }
+      ~DataFile() { delete base_; }
       Status Append(const Slice& data) {
         if (env_->no_space_.Acquire_Load() != NULL) {
           // Drop writes on the floor
@@ -110,7 +111,10 @@
       Status Close() { return base_->Close(); }
       Status Flush() { return base_->Flush(); }
       Status Sync() {
-        while (env_->delay_sstable_sync_.Acquire_Load() != NULL) {
+        if (env_->data_sync_error_.Acquire_Load() != NULL) {
+          return Status::IOError("simulated data sync error");
+        }
+        while (env_->delay_data_sync_.Acquire_Load() != NULL) {
           DelayMilliseconds(100);
         }
         return base_->Sync();
@@ -147,8 +151,9 @@
 
     Status s = target()->NewWritableFile(f, r);
     if (s.ok()) {
-      if (strstr(f.c_str(), ".ldb") != NULL) {
-        *r = new SSTableFile(this, *r);
+      if (strstr(f.c_str(), ".ldb") != NULL ||
+          strstr(f.c_str(), ".log") != NULL) {
+        *r = new DataFile(this, *r);
       } else if (strstr(f.c_str(), "MANIFEST") != NULL) {
         *r = new ManifestFile(this, *r);
       }
@@ -179,12 +184,6 @@
     }
     return s;
   }
-
-  virtual void SleepForMicroseconds(int micros) {
-    sleep_counter_.Increment();
-    sleep_time_counter_.IncrementBy(micros);
-  }
-
 };
 
 class DBTest {
@@ -322,7 +321,7 @@
     }
 
     // Check reverse iteration results are the reverse of forward results
-    int matched = 0;
+    size_t matched = 0;
     for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
       ASSERT_LT(matched, forward.size());
       ASSERT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]);
@@ -543,11 +542,11 @@
     ASSERT_OK(Put("foo", "v1"));
     ASSERT_EQ("v1", Get("foo"));
 
-    env_->delay_sstable_sync_.Release_Store(env_);   // Block sync calls
+    env_->delay_data_sync_.Release_Store(env_);      // Block sync calls
     Put("k1", std::string(100000, 'x'));             // Fill memtable
     Put("k2", std::string(100000, 'y'));             // Trigger compaction
     ASSERT_EQ("v1", Get("foo"));
-    env_->delay_sstable_sync_.Release_Store(NULL);   // Release sync calls
+    env_->delay_data_sync_.Release_Store(NULL);      // Release sync calls
   } while (ChangeOptions());
 }
 
@@ -1534,41 +1533,13 @@
   Compact("a", "z");
   const int num_files = CountFiles();
   env_->no_space_.Release_Store(env_);   // Force out-of-space errors
-  env_->sleep_counter_.Reset();
-  for (int i = 0; i < 5; i++) {
+  for (int i = 0; i < 10; i++) {
     for (int level = 0; level < config::kNumLevels-1; level++) {
       dbfull()->TEST_CompactRange(level, NULL, NULL);
     }
   }
   env_->no_space_.Release_Store(NULL);
   ASSERT_LT(CountFiles(), num_files + 3);
-
-  // Check that compaction attempts slept after errors
-  ASSERT_GE(env_->sleep_counter_.Read(), 5);
-}
-
-TEST(DBTest, ExponentialBackoff) {
-  Options options = CurrentOptions();
-  options.env = env_;
-  Reopen(&options);
-
-  ASSERT_OK(Put("foo", "v1"));
-  ASSERT_EQ("v1", Get("foo"));
-  Compact("a", "z");
-  env_->non_writable_.Release_Store(env_);  // Force errors for new files
-  env_->sleep_counter_.Reset();
-  env_->sleep_time_counter_.Reset();
-  for (int i = 0; i < 5; i++) {
-    dbfull()->TEST_CompactRange(2, NULL, NULL);
-  }
-  env_->non_writable_.Release_Store(NULL);
-
-  // Wait for compaction to finish
-  DelayMilliseconds(1000);
-
-  ASSERT_GE(env_->sleep_counter_.Read(), 5);
-  ASSERT_LT(env_->sleep_counter_.Read(), 10);
-  ASSERT_GE(env_->sleep_time_counter_.Read(), 10e6);
 }
 
 TEST(DBTest, NonWritableFileSystem) {
@@ -1591,6 +1562,37 @@
   env_->non_writable_.Release_Store(NULL);
 }
 
+TEST(DBTest, WriteSyncError) {
+  // Check that log sync errors cause the DB to disallow future writes.
+
+  // (a) Cause log sync calls to fail
+  Options options = CurrentOptions();
+  options.env = env_;
+  Reopen(&options);
+  env_->data_sync_error_.Release_Store(env_);
+
+  // (b) Normal write should succeed
+  WriteOptions w;
+  ASSERT_OK(db_->Put(w, "k1", "v1"));
+  ASSERT_EQ("v1", Get("k1"));
+
+  // (c) Do a sync write; should fail
+  w.sync = true;
+  ASSERT_TRUE(!db_->Put(w, "k2", "v2").ok());
+  ASSERT_EQ("v1", Get("k1"));
+  ASSERT_EQ("NOT_FOUND", Get("k2"));
+
+  // (d) make sync behave normally
+  env_->data_sync_error_.Release_Store(NULL);
+
+  // (e) Do a non-sync write; should fail
+  w.sync = false;
+  ASSERT_TRUE(!db_->Put(w, "k3", "v3").ok());
+  ASSERT_EQ("v1", Get("k1"));
+  ASSERT_EQ("NOT_FOUND", Get("k2"));
+  ASSERT_EQ("NOT_FOUND", Get("k3"));
+}
+
 TEST(DBTest, ManifestWriteError) {
   // Test for the following problem:
   // (a) Compaction produces file F
@@ -1697,7 +1699,7 @@
   dbfull()->TEST_CompactMemTable();
 
   // Prevent auto compactions triggered by seeks
-  env_->delay_sstable_sync_.Release_Store(env_);
+  env_->delay_data_sync_.Release_Store(env_);
 
   // Lookup present keys.  Should rarely read from small sstable.
   env_->random_read_counter_.Reset();
@@ -1718,7 +1720,7 @@
   fprintf(stderr, "%d missing => %d reads\n", N, reads);
   ASSERT_LE(reads, 3*N/100);
 
-  env_->delay_sstable_sync_.Release_Store(NULL);
+  env_->delay_data_sync_.Release_Store(NULL);
   Close();
   delete options.block_cache;
   delete options.filter_policy;
@@ -1778,7 +1780,7 @@
         ASSERT_EQ(k, key);
         ASSERT_GE(w, 0);
         ASSERT_LT(w, kNumThreads);
-        ASSERT_LE(c, reinterpret_cast<uintptr_t>(
+        ASSERT_LE(static_cast<uintptr_t>(c), reinterpret_cast<uintptr_t>(
             t->state->counter[w].Acquire_Load()));
       }
     }
diff --git a/db/repair.cc b/db/repair.cc
index dc93fb8..96c9b37 100644
--- a/db/repair.cc
+++ b/db/repair.cc
@@ -244,68 +244,133 @@
   void ExtractMetaData() {
     std::vector<TableInfo> kept;
     for (size_t i = 0; i < table_numbers_.size(); i++) {
-      TableInfo t;
-      t.meta.number = table_numbers_[i];
-      Status status = ScanTable(&t);
-      if (!status.ok()) {
-        std::string fname = TableFileName(dbname_, table_numbers_[i]);
-        Log(options_.info_log, "Table #%llu: ignoring %s",
-            (unsigned long long) table_numbers_[i],
-            status.ToString().c_str());
-        ArchiveFile(fname);
-      } else {
-        tables_.push_back(t);
-      }
+      ScanTable(table_numbers_[i]);
     }
   }
 
-  Status ScanTable(TableInfo* t) {
-    std::string fname = TableFileName(dbname_, t->meta.number);
-    int counter = 0;
-    Status status = env_->GetFileSize(fname, &t->meta.file_size);
-    if (!status.ok()) {
-      fname = SSTTableFileName(dbname_, t->meta.number);
-      Status s2 = env_->GetFileSize(fname, &t->meta.file_size);
-      if (s2.ok())
-        status = Status::OK();
-    }
-    if (status.ok()) {
-      Iterator* iter = table_cache_->NewIterator(
-          ReadOptions(), t->meta.number, t->meta.file_size);
-      bool empty = true;
-      ParsedInternalKey parsed;
-      t->max_sequence = 0;
-      for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
-        Slice key = iter->key();
-        if (!ParseInternalKey(key, &parsed)) {
-          Log(options_.info_log, "Table #%llu: unparsable key %s",
-              (unsigned long long) t->meta.number,
-              EscapeString(key).c_str());
-          continue;
-        }
+  Iterator* NewTableIterator(const FileMetaData& meta) {
+    // Same as compaction iterators: if paranoid_checks are on, turn
+    // on checksum verification.
+    ReadOptions r;
+    r.verify_checksums = options_.paranoid_checks;
+    return table_cache_->NewIterator(r, meta.number, meta.file_size);
+  }
 
-        counter++;
-        if (empty) {
-          empty = false;
-          t->meta.smallest.DecodeFrom(key);
-        }
-        t->meta.largest.DecodeFrom(key);
-        if (parsed.sequence > t->max_sequence) {
-          t->max_sequence = parsed.sequence;
-        }
+  void ScanTable(uint64_t number) {
+    TableInfo t;
+    t.meta.number = number;
+    std::string fname = TableFileName(dbname_, number);
+    Status status = env_->GetFileSize(fname, &t.meta.file_size);
+    if (!status.ok()) {
+      // Try alternate file name.
+      fname = SSTTableFileName(dbname_, number);
+      Status s2 = env_->GetFileSize(fname, &t.meta.file_size);
+      if (s2.ok()) {
+        status = Status::OK();
       }
-      if (!iter->status().ok()) {
-        status = iter->status();
-      }
-      delete iter;
     }
-    // If there was trouble opening an .sst file this will report that the .ldb
-    // file was not found, which is kind of lame but shouldn't happen often.
+    if (!status.ok()) {
+      ArchiveFile(TableFileName(dbname_, number));
+      ArchiveFile(SSTTableFileName(dbname_, number));
+      Log(options_.info_log, "Table #%llu: dropped: %s",
+          (unsigned long long) t.meta.number,
+          status.ToString().c_str());
+      return;
+    }
+
+    // Extract metadata by scanning through table.
+    int counter = 0;
+    Iterator* iter = NewTableIterator(t.meta);
+    bool empty = true;
+    ParsedInternalKey parsed;
+    t.max_sequence = 0;
+    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+      Slice key = iter->key();
+      if (!ParseInternalKey(key, &parsed)) {
+        Log(options_.info_log, "Table #%llu: unparsable key %s",
+            (unsigned long long) t.meta.number,
+            EscapeString(key).c_str());
+        continue;
+      }
+
+      counter++;
+      if (empty) {
+        empty = false;
+        t.meta.smallest.DecodeFrom(key);
+      }
+      t.meta.largest.DecodeFrom(key);
+      if (parsed.sequence > t.max_sequence) {
+        t.max_sequence = parsed.sequence;
+      }
+    }
+    if (!iter->status().ok()) {
+      status = iter->status();
+    }
+    delete iter;
     Log(options_.info_log, "Table #%llu: %d entries %s",
-        (unsigned long long) t->meta.number,
+        (unsigned long long) t.meta.number,
         counter,
         status.ToString().c_str());
-    return status;
+
+    if (status.ok()) {
+      tables_.push_back(t);
+    } else {
+      RepairTable(fname, t);  // RepairTable archives input file.
+    }
+  }
+
+  void RepairTable(const std::string& src, TableInfo t) {
+    // We will copy src contents to a new table and then rename the
+    // new table over the source.
+
+    // Create builder.
+    std::string copy = TableFileName(dbname_, next_file_number_++);
+    WritableFile* file;
+    Status s = env_->NewWritableFile(copy, &file);
+    if (!s.ok()) {
+      return;
+    }
+    TableBuilder* builder = new TableBuilder(options_, file);
+
+    // Copy data.
+    Iterator* iter = NewTableIterator(t.meta);
+    int counter = 0;
+    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+      builder->Add(iter->key(), iter->value());
+      counter++;
+    }
+    delete iter;
+
+    ArchiveFile(src);
+    if (counter == 0) {
+      builder->Abandon();  // Nothing to save
+    } else {
+      s = builder->Finish();
+      if (s.ok()) {
+        t.meta.file_size = builder->FileSize();
+      }
+    }
+    delete builder;
+    builder = NULL;
+
+    if (s.ok()) {
+      s = file->Close();
+    }
+    delete file;
+    file = NULL;
+
+    if (counter > 0 && s.ok()) {
+      std::string orig = TableFileName(dbname_, t.meta.number);
+      s = env_->RenameFile(copy, orig);
+      if (s.ok()) {
+        Log(options_.info_log, "Table #%llu: %d entries repaired",
+            (unsigned long long) t.meta.number, counter);
+        tables_.push_back(t);
+      }
+    }
+    if (!s.ok()) {
+      env_->DeleteFile(copy);
+    }
   }
 
   Status WriteDescriptor() {
diff --git a/db/version_set.cc b/db/version_set.cc
index 66d73be..517edd3 100644
--- a/db/version_set.cc
+++ b/db/version_set.cc
@@ -876,12 +876,6 @@
       }
       if (!s.ok()) {
         Log(options_->info_log, "MANIFEST write: %s\n", s.ToString().c_str());
-        if (ManifestContains(record)) {
-          Log(options_->info_log,
-              "MANIFEST contains log record despite error; advancing to new "
-              "version to prevent mismatch between in-memory and logged state");
-          s = Status::OK();
-        }
       }
     }
 
@@ -889,8 +883,6 @@
     // new CURRENT file that points to it.
     if (s.ok() && !new_manifest_file.empty()) {
       s = SetCurrentFile(env_, dbname_, manifest_file_number_);
-      // No need to double-check MANIFEST in case of error since it
-      // will be discarded below.
     }
 
     mu->Lock();
@@ -1124,31 +1116,6 @@
   return scratch->buffer;
 }
 
-// Return true iff the manifest contains the specified record.
-bool VersionSet::ManifestContains(const std::string& record) const {
-  std::string fname = DescriptorFileName(dbname_, manifest_file_number_);
-  Log(options_->info_log, "ManifestContains: checking %s\n", fname.c_str());
-  SequentialFile* file = NULL;
-  Status s = env_->NewSequentialFile(fname, &file);
-  if (!s.ok()) {
-    Log(options_->info_log, "ManifestContains: %s\n", s.ToString().c_str());
-    return false;
-  }
-  log::Reader reader(file, NULL, true/*checksum*/, 0);
-  Slice r;
-  std::string scratch;
-  bool result = false;
-  while (reader.ReadRecord(&r, &scratch)) {
-    if (r == Slice(record)) {
-      result = true;
-      break;
-    }
-  }
-  delete file;
-  Log(options_->info_log, "ManifestContains: result = %d\n", result ? 1 : 0);
-  return result;
-}
-
 uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
   uint64_t result = 0;
   for (int level = 0; level < config::kNumLevels; level++) {
diff --git a/db/version_set.h b/db/version_set.h
index 20de0e2..8dc14b8 100644
--- a/db/version_set.h
+++ b/db/version_set.h
@@ -292,8 +292,6 @@
 
   void AppendVersion(Version* v);
 
-  bool ManifestContains(const std::string& record) const;
-
   Env* const env_;
   const std::string dbname_;
   const Options* const options_;
diff --git a/include/leveldb/db.h b/include/leveldb/db.h
index 259a81f..5ffb29d 100644
--- a/include/leveldb/db.h
+++ b/include/leveldb/db.h
@@ -14,7 +14,7 @@
 
 // Update Makefile if you change these
 static const int kMajorVersion = 1;
-static const int kMinorVersion = 14;
+static const int kMinorVersion = 15;
 
 struct Options;
 struct ReadOptions;
diff --git a/include/leveldb/env.h b/include/leveldb/env.h
index fa32289..b2072d0 100644
--- a/include/leveldb/env.h
+++ b/include/leveldb/env.h
@@ -13,9 +13,9 @@
 #ifndef STORAGE_LEVELDB_INCLUDE_ENV_H_
 #define STORAGE_LEVELDB_INCLUDE_ENV_H_
 
-#include <cstdarg>
 #include <string>
 #include <vector>
+#include <stdarg.h>
 #include <stdint.h>
 #include "leveldb/status.h"
 
diff --git a/port/atomic_pointer.h b/port/atomic_pointer.h
index e17bf43..a9866b2 100644
--- a/port/atomic_pointer.h
+++ b/port/atomic_pointer.h
@@ -50,6 +50,13 @@
 // http://msdn.microsoft.com/en-us/library/ms684208(v=vs.85).aspx
 #define LEVELDB_HAVE_MEMORY_BARRIER
 
+// Mac OS
+#elif defined(OS_MACOSX)
+inline void MemoryBarrier() {
+  OSMemoryBarrier();
+}
+#define LEVELDB_HAVE_MEMORY_BARRIER
+
 // Gcc on x86
 #elif defined(ARCH_CPU_X86_FAMILY) && defined(__GNUC__)
 inline void MemoryBarrier() {
@@ -68,13 +75,6 @@
 }
 #define LEVELDB_HAVE_MEMORY_BARRIER
 
-// Mac OS
-#elif defined(OS_MACOSX)
-inline void MemoryBarrier() {
-  OSMemoryBarrier();
-}
-#define LEVELDB_HAVE_MEMORY_BARRIER
-
 // ARM Linux
 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(__linux__)
 typedef void (*LinuxKernelMemoryBarrierFunc)(void);
diff --git a/table/filter_block_test.cc b/table/filter_block_test.cc
index 3a2a07c..8c4a474 100644
--- a/table/filter_block_test.cc
+++ b/table/filter_block_test.cc
@@ -29,7 +29,7 @@
 
   virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
     uint32_t h = Hash(key.data(), key.size(), 1);
-    for (int i = 0; i + 4 <= filter.size(); i += 4) {
+    for (size_t i = 0; i + 4 <= filter.size(); i += 4) {
       if (h == DecodeFixed32(filter.data() + i)) {
         return true;
       }
diff --git a/util/arena.h b/util/arena.h
index 8f7dde2..73bbf1c 100644
--- a/util/arena.h
+++ b/util/arena.h
@@ -5,9 +5,9 @@
 #ifndef STORAGE_LEVELDB_UTIL_ARENA_H_
 #define STORAGE_LEVELDB_UTIL_ARENA_H_
 
-#include <cstddef>
 #include <vector>
 #include <assert.h>
+#include <stddef.h>
 #include <stdint.h>
 
 namespace leveldb {
diff --git a/util/arena_test.cc b/util/arena_test.cc
index 63d1778..58e870e 100644
--- a/util/arena_test.cc
+++ b/util/arena_test.cc
@@ -40,7 +40,7 @@
       r = arena.Allocate(s);
     }
 
-    for (int b = 0; b < s; b++) {
+    for (size_t b = 0; b < s; b++) {
       // Fill the "i"th allocation with a known bit pattern
       r[b] = i % 256;
     }
@@ -51,10 +51,10 @@
       ASSERT_LE(arena.MemoryUsage(), bytes * 1.10);
     }
   }
-  for (int i = 0; i < allocated.size(); i++) {
+  for (size_t i = 0; i < allocated.size(); i++) {
     size_t num_bytes = allocated[i].first;
     const char* p = allocated[i].second;
-    for (int b = 0; b < num_bytes; b++) {
+    for (size_t b = 0; b < num_bytes; b++) {
       // Check the "i"th allocation for the known bit pattern
       ASSERT_EQ(int(p[b]) & 0xff, i % 256);
     }
diff --git a/util/bloom_test.cc b/util/bloom_test.cc
index 0bf8e8d..77fb1b3 100644
--- a/util/bloom_test.cc
+++ b/util/bloom_test.cc
@@ -126,7 +126,8 @@
     }
     Build();
 
-    ASSERT_LE(FilterSize(), (length * 10 / 8) + 40) << length;
+    ASSERT_LE(FilterSize(), static_cast<size_t>((length * 10 / 8) + 40))
+        << length;
 
     // All added keys must match
     for (int i = 0; i < length; i++) {
diff --git a/util/coding_test.cc b/util/coding_test.cc
index fb5726e..521541e 100644
--- a/util/coding_test.cc
+++ b/util/coding_test.cc
@@ -112,13 +112,13 @@
   }
 
   std::string s;
-  for (int i = 0; i < values.size(); i++) {
+  for (size_t i = 0; i < values.size(); i++) {
     PutVarint64(&s, values[i]);
   }
 
   const char* p = s.data();
   const char* limit = p + s.size();
-  for (int i = 0; i < values.size(); i++) {
+  for (size_t i = 0; i < values.size(); i++) {
     ASSERT_TRUE(p < limit);
     uint64_t actual;
     const char* start = p;
@@ -143,7 +143,7 @@
   std::string s;
   PutVarint32(&s, large_value);
   uint32_t result;
-  for (int len = 0; len < s.size() - 1; len++) {
+  for (size_t len = 0; len < s.size() - 1; len++) {
     ASSERT_TRUE(GetVarint32Ptr(s.data(), s.data() + len, &result) == NULL);
   }
   ASSERT_TRUE(GetVarint32Ptr(s.data(), s.data() + s.size(), &result) != NULL);
@@ -162,7 +162,7 @@
   std::string s;
   PutVarint64(&s, large_value);
   uint64_t result;
-  for (int len = 0; len < s.size() - 1; len++) {
+  for (size_t len = 0; len < s.size() - 1; len++) {
     ASSERT_TRUE(GetVarint64Ptr(s.data(), s.data() + len, &result) == NULL);
   }
   ASSERT_TRUE(GetVarint64Ptr(s.data(), s.data() + s.size(), &result) != NULL);
diff --git a/util/env_posix.cc b/util/env_posix.cc
index 3e2925d..e1cbebd 100644
--- a/util/env_posix.cc
+++ b/util/env_posix.cc
@@ -175,147 +175,43 @@
   }
 };
 
-// We preallocate up to an extra megabyte and use memcpy to append new
-// data to the file.  This is safe since we either properly close the
-// file before reading from it, or for log files, the reading code
-// knows enough to skip zero suffixes.
-class PosixMmapFile : public WritableFile {
+class PosixWritableFile : public WritableFile {
  private:
   std::string filename_;
-  int fd_;
-  size_t page_size_;
-  size_t map_size_;       // How much extra memory to map at a time
-  char* base_;            // The mapped region
-  char* limit_;           // Limit of the mapped region
-  char* dst_;             // Where to write next  (in range [base_,limit_])
-  char* last_sync_;       // Where have we synced up to
-  uint64_t file_offset_;  // Offset of base_ in file
-
-  // Have we done an munmap of unsynced data?
-  bool pending_sync_;
-
-  // Roundup x to a multiple of y
-  static size_t Roundup(size_t x, size_t y) {
-    return ((x + y - 1) / y) * y;
-  }
-
-  size_t TruncateToPageBoundary(size_t s) {
-    s -= (s & (page_size_ - 1));
-    assert((s % page_size_) == 0);
-    return s;
-  }
-
-  bool UnmapCurrentRegion() {
-    bool result = true;
-    if (base_ != NULL) {
-      if (last_sync_ < limit_) {
-        // Defer syncing this data until next Sync() call, if any
-        pending_sync_ = true;
-      }
-      if (munmap(base_, limit_ - base_) != 0) {
-        result = false;
-      }
-      file_offset_ += limit_ - base_;
-      base_ = NULL;
-      limit_ = NULL;
-      last_sync_ = NULL;
-      dst_ = NULL;
-
-      // Increase the amount we map the next time, but capped at 1MB
-      if (map_size_ < (1<<20)) {
-        map_size_ *= 2;
-      }
-    }
-    return result;
-  }
-
-  bool MapNewRegion() {
-    assert(base_ == NULL);
-    if (ftruncate(fd_, file_offset_ + map_size_) < 0) {
-      return false;
-    }
-    void* ptr = mmap(NULL, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED,
-                     fd_, file_offset_);
-    if (ptr == MAP_FAILED) {
-      return false;
-    }
-    base_ = reinterpret_cast<char*>(ptr);
-    limit_ = base_ + map_size_;
-    dst_ = base_;
-    last_sync_ = base_;
-    return true;
-  }
+  FILE* file_;
 
  public:
-  PosixMmapFile(const std::string& fname, int fd, size_t page_size)
-      : filename_(fname),
-        fd_(fd),
-        page_size_(page_size),
-        map_size_(Roundup(65536, page_size)),
-        base_(NULL),
-        limit_(NULL),
-        dst_(NULL),
-        last_sync_(NULL),
-        file_offset_(0),
-        pending_sync_(false) {
-    assert((page_size & (page_size - 1)) == 0);
-  }
+  PosixWritableFile(const std::string& fname, FILE* f)
+      : filename_(fname), file_(f) { }
 
-
-  ~PosixMmapFile() {
-    if (fd_ >= 0) {
-      PosixMmapFile::Close();
+  ~PosixWritableFile() {
+    if (file_ != NULL) {
+      // Ignoring any potential errors
+      fclose(file_);
     }
   }
 
   virtual Status Append(const Slice& data) {
-    const char* src = data.data();
-    size_t left = data.size();
-    while (left > 0) {
-      assert(base_ <= dst_);
-      assert(dst_ <= limit_);
-      size_t avail = limit_ - dst_;
-      if (avail == 0) {
-        if (!UnmapCurrentRegion() ||
-            !MapNewRegion()) {
-          return IOError(filename_, errno);
-        }
-      }
-
-      size_t n = (left <= avail) ? left : avail;
-      memcpy(dst_, src, n);
-      dst_ += n;
-      src += n;
-      left -= n;
+    size_t r = fwrite_unlocked(data.data(), 1, data.size(), file_);
+    if (r != data.size()) {
+      return IOError(filename_, errno);
     }
     return Status::OK();
   }
 
   virtual Status Close() {
-    Status s;
-    size_t unused = limit_ - dst_;
-    if (!UnmapCurrentRegion()) {
-      s = IOError(filename_, errno);
-    } else if (unused > 0) {
-      // Trim the extra space at the end of the file
-      if (ftruncate(fd_, file_offset_ - unused) < 0) {
-        s = IOError(filename_, errno);
-      }
+    Status result;
+    if (fclose(file_) != 0) {
+      result = IOError(filename_, errno);
     }
-
-    if (close(fd_) < 0) {
-      if (s.ok()) {
-        s = IOError(filename_, errno);
-      }
-    }
-
-    fd_ = -1;
-    base_ = NULL;
-    limit_ = NULL;
-    return s;
+    file_ = NULL;
+    return result;
   }
 
   virtual Status Flush() {
+    if (fflush_unlocked(file_) != 0) {
+      return IOError(filename_, errno);
+    }
     return Status::OK();
   }
 
@@ -352,26 +248,10 @@
     if (!s.ok()) {
       return s;
     }
-
-    if (pending_sync_) {
-      // Some unmapped data was not synced
-      pending_sync_ = false;
-      if (fdatasync(fd_) < 0) {
-        s = IOError(filename_, errno);
-      }
+    if (fflush_unlocked(file_) != 0 ||
+        fdatasync(fileno(file_)) != 0) {
+      s = Status::IOError(filename_, strerror(errno));
     }
-
-    if (dst_ > last_sync_) {
-      // Find the beginnings of the pages that contain the first and last
-      // bytes to be synced.
-      size_t p1 = TruncateToPageBoundary(last_sync_ - base_);
-      size_t p2 = TruncateToPageBoundary(dst_ - base_ - 1);
-      last_sync_ = dst_;
-      if (msync(base_ + p1, p2 - p1 + page_size_, MS_SYNC) < 0) {
-        s = IOError(filename_, errno);
-      }
-    }
-
     return s;
   }
 };
@@ -462,12 +342,12 @@
   virtual Status NewWritableFile(const std::string& fname,
                                  WritableFile** result) {
     Status s;
-    const int fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644);
-    if (fd < 0) {
+    FILE* f = fopen(fname.c_str(), "w");
+    if (f == NULL) {
       *result = NULL;
       s = IOError(fname, errno);
     } else {
-      *result = new PosixMmapFile(fname, fd, page_size_);
+      *result = new PosixWritableFile(fname, f);
     }
     return s;
   }
@@ -630,7 +510,6 @@
     return NULL;
   }
 
-  size_t page_size_;
   pthread_mutex_t mu_;
   pthread_cond_t bgsignal_;
   pthread_t bgthread_;
@@ -645,8 +524,7 @@
   MmapLimiter mmap_limit_;
 };
 
-PosixEnv::PosixEnv() : page_size_(getpagesize()),
-                       started_bgthread_(false) {
+PosixEnv::PosixEnv() : started_bgthread_(false) {
   PthreadCall("mutex_init", pthread_mutex_init(&mu_, NULL));
   PthreadCall("cvar_init", pthread_cond_init(&bgsignal_, NULL));
 }
diff --git a/util/testharness.cc b/util/testharness.cc
index eb1bdd5..402fab3 100644
--- a/util/testharness.cc
+++ b/util/testharness.cc
@@ -38,7 +38,7 @@
 
   int num = 0;
   if (tests != NULL) {
-    for (int i = 0; i < tests->size(); i++) {
+    for (size_t i = 0; i < tests->size(); i++) {
       const Test& t = (*tests)[i];
       if (matcher != NULL) {
         std::string name = t.base;
diff --git a/util/testutil.cc b/util/testutil.cc
index 538d095..bee56bf 100644
--- a/util/testutil.cc
+++ b/util/testutil.cc
@@ -32,7 +32,7 @@
 
 
 extern Slice CompressibleString(Random* rnd, double compressed_fraction,
-                                int len, std::string* dst) {
+                                size_t len, std::string* dst) {
   int raw = static_cast<int>(len * compressed_fraction);
   if (raw < 1) raw = 1;
   std::string raw_data;
diff --git a/util/testutil.h b/util/testutil.h
index 824e655..adad3fc 100644
--- a/util/testutil.h
+++ b/util/testutil.h
@@ -24,7 +24,7 @@
 // "N*compressed_fraction" bytes and return a Slice that references
 // the generated data.
 extern Slice CompressibleString(Random* rnd, double compressed_fraction,
-                                int len, std::string* dst);
+                                size_t len, std::string* dst);
 
 // A wrapper that allows injection of errors.
 class ErrorEnv : public EnvWrapper {