[f2fs] Add a unit test and correction regarding the NAT bitmap

This unittest checks the validity of kNatBitmap that
a checkpoint pack includes to indicate valid NAT blocks among two NAT area.

This change also corrects the NAT flushing to use only NAT areas
but not journal area when the number of the dirty NAT entries exceeds the journal area.

Test: fx test f2fs-unittest fs-tests large-fs-tests

Change-Id: I3e33b046f8f41b04508feb6098068b752e4185ab
Reviewed-on: https://fuchsia-review.googlesource.com/c/third_party/f2fs/+/548261
Reviewed-by: Brett Wilson <brettw@google.com>
diff --git a/node.cc b/node.cc
index aed5abd..8ba62fe 100644
--- a/node.cc
+++ b/node.cc
@@ -1860,46 +1860,46 @@
   SummaryBlock *sum = curseg->sum_blk;
   int i;
 
-  mtx_lock(&curseg->curseg_mutex);
+  fbl::AutoLock lock(&curseg->curseg_mutex);
 
-  if (NatsInCursum(sum) < static_cast<int>(kNatJournalEntries)) {
-    mtx_unlock(&curseg->curseg_mutex);
+  ReadLock(&nm_i->nat_tree_lock);
+  size_t dirty_nat_cnt = list_length(&nm_i->dirty_nat_entries);
+  if ((NatsInCursum(sum) + dirty_nat_cnt) <= kNatJournalEntries) {
+    ReadUnlock(&nm_i->nat_tree_lock);
     return false;
   }
+  ReadUnlock(&nm_i->nat_tree_lock);
 
   for (i = 0; i < NatsInCursum(sum); i++) {
-    NatEntry *ne;
-    RawNatEntry raw_ne;
+    NatEntry *ne = nullptr;
+    RawNatEntry raw_ne = NatInJournal(sum, i);
     nid_t nid = LeToCpu(NidInJournal(sum, i));
 
-    raw_ne = NatInJournal(sum, i);
-  retry:
-    WriteLock(&nm_i->nat_tree_lock);
-    ne = LookupNatCache(nm_i, nid);
-    if (ne) {
-      SetNatCacheDirty(nm_i, ne);
-      WriteUnlock(&nm_i->nat_tree_lock);
-      continue;
+    while (!ne) {
+      WriteLock(&nm_i->nat_tree_lock);
+      ne = LookupNatCache(nm_i, nid);
+      if (ne) {
+        SetNatCacheDirty(nm_i, ne);
+        WriteUnlock(&nm_i->nat_tree_lock);
+      } else {
+        ne = GrabNatEntry(nm_i, nid);
+        if (!ne) {
+          WriteUnlock(&nm_i->nat_tree_lock);
+          continue;
+        }
+        NatSetBlkaddr(ne, LeToCpu(raw_ne.block_addr));
+        NatSetIno(ne, LeToCpu(raw_ne.ino));
+        NatSetVersion(ne, raw_ne.version);
+        SetNatCacheDirty(nm_i, ne);
+        WriteUnlock(&nm_i->nat_tree_lock);
+      }
     }
-    ne = GrabNatEntry(nm_i, nid);
-    if (!ne) {
-      WriteUnlock(&nm_i->nat_tree_lock);
-      goto retry;
-    }
-    NatSetBlkaddr(ne, LeToCpu(raw_ne.block_addr));
-    NatSetIno(ne, LeToCpu(raw_ne.ino));
-    NatSetVersion(ne, raw_ne.version);
-    SetNatCacheDirty(nm_i, ne);
-    WriteUnlock(&nm_i->nat_tree_lock);
   }
   UpdateNatsInCursum(sum, -i);
-  mtx_unlock(&curseg->curseg_mutex);
   return true;
 }
 
-/**
- * This function is called during the checkpointing process.
- */
+// This function is called during the checkpointing process.
 void NodeMgr::FlushNatEntries() {
   SbInfo &sbi = fs_->GetSbInfo();
   NmInfo *nm_i = GetNmInfo(&sbi);
@@ -1916,9 +1916,9 @@
 #if 0  // porting needed
   //	if (!flushed)
 #endif
-  mtx_lock(&curseg->curseg_mutex);
+  fbl::AutoLock lock(&curseg->curseg_mutex);
 
-  /* 1) flush dirty nat caches */
+  // 1) flush dirty nat caches
   list_for_every_safe(&nm_i->dirty_nat_entries, cur, n) {
     NatEntry *ne;
     nid_t nid;
@@ -1931,41 +1931,39 @@
 
     if (NatGetBlkaddr(ne) == kNewAddr)
       continue;
-    if (flushed)
-      goto to_nat_page;
 
-    /* if there is room for nat enries in curseg->sumpage */
-    offset = fs_->Segmgr().LookupJournalInCursum(sum, JournalType::kNatJournal, nid, 1);
-    if (offset >= 0) {
+    if (!flushed) {
+      // if there is room for nat enries in curseg->sumpage
+      offset = fs_->Segmgr().LookupJournalInCursum(sum, JournalType::kNatJournal, nid, 1);
+    }
+
+    if (offset >= 0) {  // flush to journal
       raw_ne = NatInJournal(sum, offset);
       old_blkaddr = LeToCpu(raw_ne.block_addr);
-      goto flush_now;
-    }
-  to_nat_page:
-    if (!page || (start_nid > nid || nid > end_nid)) {
-      if (page) {
+    } else {  // flush to NAT block
+      if (!page || (start_nid > nid || nid > end_nid)) {
+        if (page) {
 #if 0  // porting needed
        // set_page_dirty(page, fs_);
 #endif
-        FlushDirtyMetaPage(fs_, page);
-        F2fsPutPage(page, 1);
-        page = nullptr;
-      }
-      start_nid = StartNid(nid);
-      end_nid = start_nid + kNatEntryPerBlock - 1;
+          FlushDirtyMetaPage(fs_, page);
+          F2fsPutPage(page, 1);
+          page = nullptr;
+        }
+        start_nid = StartNid(nid);
+        end_nid = start_nid + kNatEntryPerBlock - 1;
 
-      /*
-       * get nat block with dirty flag, increased reference
-       * count, mapped and lock
-       */
-      page = GetNextNatPage(start_nid);
-      nat_blk = static_cast<NatBlock *>(PageAddress(page));
+        // get nat block with dirty flag, increased reference
+        // count, mapped and lock
+        page = GetNextNatPage(start_nid);
+        nat_blk = static_cast<NatBlock *>(PageAddress(page));
+      }
+
+      ZX_ASSERT(nat_blk);
+      raw_ne = nat_blk->entries[nid - start_nid];
+      old_blkaddr = LeToCpu(raw_ne.block_addr);
     }
 
-    ZX_ASSERT(nat_blk);
-    raw_ne = nat_blk->entries[nid - start_nid];
-    old_blkaddr = LeToCpu(raw_ne.block_addr);
-  flush_now:
     new_blkaddr = NatGetBlkaddr(ne);
 
     raw_ne.ino = CpuToLe(NatGetIno(ne));
@@ -1984,7 +1982,7 @@
       DelFromNatCache(nm_i, ne);
       WriteUnlock(&nm_i->nat_tree_lock);
 
-      /* We can reuse this freed nid at this point */
+      // We can reuse this freed nid at this point
       AddFreeNid(GetNmInfo(&sbi), nid);
     } else {
       WriteLock(&nm_i->nat_tree_lock);
@@ -1996,14 +1994,14 @@
 #if 0  // porting needed
   //	if (!flushed)
 #endif
-  mtx_unlock(&curseg->curseg_mutex);
+
 #if 0  // porting needed
   // set_page_dirty(page, fs_);
 #endif
   FlushDirtyMetaPage(fs_, page);
   F2fsPutPage(page, 1);
 
-  /* 2) shrink nat caches if necessary */
+  // 2) shrink nat caches if necessary
   TryToFreeNats(nm_i->nat_cnt - kNmWoutThreshold);
 }
 
diff --git a/test/unit/checkpoint.cc b/test/unit/checkpoint.cc
index e83bbff..f56372c 100644
--- a/test/unit/checkpoint.cc
+++ b/test/unit/checkpoint.cc
@@ -16,11 +16,13 @@
 
 using block_client::FakeBlockDevice;
 constexpr uint32_t kCheckpointVersionTest = 0;
+constexpr uint32_t kCheckpointNatBitmapTest = 1;
 
 constexpr uint32_t kCheckpointPack0 = 0;
 constexpr uint32_t kCheckpointPack1 = 1;
 
-constexpr uint32_t kCheckpointLoopCnt = 2;
+constexpr uint32_t kCheckpointLoopCnt = 20;
+constexpr uint8_t kRootDirNatBit = 0x80;
 
 void ReadCheckpoint(F2fs *fs, block_t cp_addr, Page **cp_out) {
   Page *cp_page[2];  // cp_page[0]: header, cp_page[1]: footer
@@ -101,10 +103,44 @@
   *cp_out = cur_cp_page;
 
   if (!after_mkfs) {
-    if (cp_position == kCheckpointPack0)
+    if (cp_position == kCheckpointPack0) {
       F2fsPutPage(cp_page2, 1);
-    else
+    } else {
       F2fsPutPage(cp_page1, 1);
+    }
+  }
+}
+
+inline void *GetBitmapPrt(Checkpoint *ckpt, MetaBitmap flag) {
+  uint32_t offset = (flag == MetaBitmap::kNatBitmap) ? ckpt->sit_ver_bitmap_bytesize : 0;
+  return &ckpt->sit_nat_version_bitmap + offset;
+}
+
+void CreateDirs(F2fs *fs, int dir_cnt, uint64_t version) {
+  fbl::RefPtr<VnodeF2fs> data_root;
+  ASSERT_EQ(VnodeF2fs::Vget(fs, fs->RawSb().root_ino, &data_root), ZX_OK);
+  Dir *root_dir = static_cast<Dir *>(data_root.get());
+  std::string filename;
+
+  for (int i = 0; i < dir_cnt; i++) {
+    fbl::RefPtr<fs::Vnode> vnode;
+    filename = "dir_" + std::to_string(version) + "_" + std::to_string(i);
+    ASSERT_EQ(root_dir->Create(filename.c_str(), S_IFDIR, &vnode), ZX_OK);
+    vnode.reset();
+  }
+}
+
+void CreateFiles(F2fs *fs, int file_cnt, uint64_t version) {
+  fbl::RefPtr<VnodeF2fs> data_root;
+  ASSERT_EQ(VnodeF2fs::Vget(fs, fs->RawSb().root_ino, &data_root), ZX_OK);
+  Dir *root_dir = static_cast<Dir *>(data_root.get());
+  std::string filename;
+
+  for (int i = 0; i < file_cnt; i++) {
+    fbl::RefPtr<fs::Vnode> vnode;
+    filename = "file_" + std::to_string(version) + "_" + std::to_string(i);
+    ASSERT_EQ(root_dir->Create(filename.c_str(), S_IFREG, &vnode), ZX_OK);
+    vnode.reset();
   }
 }
 
@@ -121,7 +157,99 @@
   F2fsPutPage(cp_page, 1);
 }
 
-void CheckpointTestMain(MountOptions &options, uint32_t test, uint32_t priv) {
+void CheckpointTestNatBitmap(F2fs *fs, uint32_t expect_cp_position, uint32_t expect_cp_ver,
+                             bool after_mkfs, uint8_t *&pre_bitmap) {
+  Page *cp_page = nullptr;
+  uint8_t *version_bitmap;
+  uint32_t cur_nat_block = 0;
+  uint8_t cur_nat_bit = 0;
+
+  // 1. Get last checkpoint
+  GetLastCheckpoint(fs, expect_cp_position, after_mkfs, &cp_page);
+  Checkpoint *cp = static_cast<Checkpoint *>(PageAddress(cp_page));
+  ASSERT_EQ(cp->checkpoint_ver, expect_cp_ver);
+
+  // 2. Get NAT version bitmap
+  version_bitmap = static_cast<uint8_t *>(GetBitmapPrt(cp, MetaBitmap::kNatBitmap));
+  ASSERT_NE(version_bitmap, nullptr);
+
+  if (pre_bitmap == nullptr)
+    pre_bitmap = new uint8_t[cp->nat_ver_bitmap_bytesize]();
+
+#ifdef F2FS_BU_DEBUG
+  std::cout << "CP ver= " << cp->checkpoint_ver << ",     pre_bitmap = " << std::hex;
+  printf("%02x ", (static_cast<uint8_t *>(pre_bitmap))[0]);
+
+  for (uint32_t i = 0; i < 8 /*cp->nat_ver_bitmap_bytesize*/; i++) {
+    std::cout << std::bitset<8>((static_cast<uint8_t *>(pre_bitmap))[i]) << " ";
+  }
+  std::cout << std::dec << std::endl;
+
+  std::cout << "CP ver= " << cp->checkpoint_ver << ", version_bitmap = " << std::hex;
+  printf("%02x ", (static_cast<uint8_t *>(version_bitmap))[0]);
+
+  for (uint32_t i = 0; i < 8 /*cp->nat_ver_bitmap_bytesize*/; i++) {
+    std::cout << std::bitset<8>((static_cast<uint8_t *>(version_bitmap))[i]) << " ";
+  }
+  std::cout << std::dec << std::endl;
+#endif
+
+  // 3. Validate version bitmap
+  // Check root dir version bitmap
+  ASSERT_EQ((static_cast<uint8_t *>(version_bitmap))[0] & kRootDirNatBit,
+            cp->checkpoint_ver % 2 ? 0x00 : kRootDirNatBit);
+
+  // Check dir and file inode version bitmap
+  if (!after_mkfs) {
+    if (cp->checkpoint_ver % 2) {
+      (static_cast<uint8_t *>(pre_bitmap))[0] &= ~kRootDirNatBit;
+    } else {
+      (static_cast<uint8_t *>(pre_bitmap))[0] |= kRootDirNatBit;
+    }
+
+    cur_nat_block = cp->checkpoint_ver - 2;
+    cur_nat_bit = 0x80 >> (cur_nat_block % 8);
+    (static_cast<uint8_t *>(pre_bitmap))[cur_nat_block / 8] |= cur_nat_bit;
+
+    ASSERT_EQ((static_cast<uint8_t *>(version_bitmap))[cur_nat_block / 8],
+              (static_cast<uint8_t *>(pre_bitmap))[cur_nat_block / 8]);
+
+#ifdef F2FS_BU_DEBUG
+    std::cout << "CP ver= " << cp->checkpoint_ver << ", exp pre_bitmap = " << std::hex;
+    printf("%02x ", (static_cast<uint8_t *>(pre_bitmap))[0]);
+
+    for (uint32_t i = 0; i < 8 /*cp->nat_ver_bitmap_bytesize*/; i++) {
+      std::cout << std::bitset<8>((static_cast<uint8_t *>(pre_bitmap))[i]) << " ";
+    }
+    std::cout << std::dec << std::endl;
+#endif
+
+    ASSERT_EQ(memcmp(pre_bitmap, version_bitmap, cp->nat_ver_bitmap_bytesize), 0);
+  }
+
+  memcpy(pre_bitmap, version_bitmap, cp->nat_ver_bitmap_bytesize);
+
+  // 4. Creates inodes and triggers checkpoint
+  // It creates 455 inodes in the root dir to make one dirty NAT block, and
+  // it triggers checkpoint. It results in one bit triggered in NAT bitmap.
+  // Since the current F2FS impl. supports only sync IO, every file creation results in
+  // updating the root inode, and thus the first bit (root inode) in NAT bitmap is also triggered.
+  for (int i = 0; i < 4; i++) {
+    CreateDirs(fs, 1, cp->checkpoint_ver * 10 + i);
+    CreateFiles(fs, 100, cp->checkpoint_ver * 10 + i);
+  }
+
+  CreateDirs(fs, 1, cp->checkpoint_ver * 10 + 4);
+  if (after_mkfs) {
+    CreateFiles(fs, 46, cp->checkpoint_ver * 10 + 4);  // Mkfs uses 4 nids
+  } else {
+    CreateFiles(fs, 50, cp->checkpoint_ver * 10 + 4);  // 5 dirs + 450 files = 455
+  }
+
+  F2fsPutPage(cp_page, 1);
+}
+
+void CheckpointTestMain(MountOptions &options, uint32_t test, uint8_t *&priv) {
   std::unique_ptr<f2fs::Bcache> bc;
   bool readonly_device = false;
   bool after_mkfs = true;
@@ -160,6 +288,9 @@
         case kCheckpointVersionTest:
           CheckpointTestVersion(fs.get(), checkpoint_pack, i, after_mkfs);
           break;
+        case kCheckpointNatBitmapTest:
+          CheckpointTestNatBitmap(fs.get(), checkpoint_pack, i, after_mkfs, priv);
+          break;
         default:
           ASSERT_EQ(0, 1);
           break;
@@ -168,10 +299,11 @@
       if (after_mkfs)
         after_mkfs = false;
 
-      if (checkpoint_pack == kCheckpointPack0)
+      if (checkpoint_pack == kCheckpointPack0) {
         checkpoint_pack = kCheckpointPack1;
-      else
+      } else {
         checkpoint_pack = kCheckpointPack0;
+      }
     }
   }
   fs->Shutdown([&loop](zx_status_t) { loop.Quit(); });
@@ -182,7 +314,18 @@
 
 TEST(CheckpointTest, Version) {
   MountOptions options;
-  CheckpointTestMain(options, kCheckpointVersionTest, 0);
+  uint8_t *priv = nullptr;
+
+  CheckpointTestMain(options, kCheckpointVersionTest, priv);
+}
+
+TEST(CheckpointTest, NatBitmap) {
+  MountOptions options;
+  uint8_t *pre_bitmap = nullptr;
+
+  CheckpointTestMain(options, kCheckpointNatBitmapTest, pre_bitmap);
+
+  delete[] pre_bitmap;
 }
 
 }  // namespace