libsnapshot_cow: Prepare device to boot from pre-created snapshots

Two new API's have been added:

1: BootFromSnapshotsWithoutSlotSwitch: This will create a new marker
which indicates first-stage init to mount the partitions off snapshots.

We need this marker as during boot, there are couple of places during
mounting snapshots wherein the marker is used. However, there is no
change in the existing I/O path related to OTA.

2: PrepareDeviceToBootWithoutSnapshot: This will delete the marker so
that subsequent reboot will not have the partitions mounted without the
snapshots.

VTS tests covers both these API's. Additionally, when these
markers are present, new OTA's cannot be installed. All these
are covered in VTS tests.

===========================================================

snapshotctl: General flow to apply and revert pre-created snapshots

1: To install the pre-created snapshots:

$snapshotctl map-snapshots <directory path containing snapshots patches>

Now the device is ready to boot from snapshots.

2: After device reboots, partitions are mounted off the snapshots. There
   is no snapshot-merge.

3: In order to go back to previous build:

$snapshotctl revert-snapshots

Now the device is ready to boot from base build.

4: After device reboots back to previous build, all the snapshot states
   and COW images are removed.

============================================

Additional commands:

To delete the pre-created snapshots:

$snapshotctl delete-snapshots

======================================

Tested it on Pixel 6 Pro between two builds which are ~24 hours apart.

1: Creating snapshots on a linux-host - ~4-6 seconds
2: Applying pre-created snapshots - ~10-15 seconds (includes intermediate
   transfer of patches to the device). This depends on the size of snapshot patches.
3: Device reboot - ~12-14 seconds.

Bug: 299011882
Test: 1: Apply pre-created snapshots
2: Reboot device: Verify new build
3: Apply OTA when partitions are mounted of snapshots and verify OTA
   fails.
3: Revert-snapshot and reboot.
4: Verify device goes back to base build.

Full OTA on Pixel. vts_libsnapshot_test

Change-Id: I36a72d973d8f70ae49773ebd45dd996fac22a4e3
Signed-off-by: Akilesh Kailash <akailash@google.com>
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
index 6bc8b9b..e7b0020 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
@@ -73,6 +73,9 @@
 class SnapshotMergeStats;
 class SnapshotStatus;
 
+using std::chrono::duration_cast;
+using namespace std::chrono_literals;
+
 static constexpr const std::string_view kCowGroupName = "cow";
 static constexpr char kVirtualAbCompressionProp[] = "ro.virtual_ab.compression.enabled";
 
@@ -424,6 +427,7 @@
     FRIEND_TEST(SnapshotTest, MergeFailureCode);
     FRIEND_TEST(SnapshotTest, NoMergeBeforeReboot);
     FRIEND_TEST(SnapshotTest, UpdateBootControlHal);
+    FRIEND_TEST(SnapshotTest, BootSnapshotWithoutSlotSwitch);
     FRIEND_TEST(SnapshotUpdateTest, AddPartition);
     FRIEND_TEST(SnapshotUpdateTest, ConsistencyCheckResume);
     FRIEND_TEST(SnapshotUpdateTest, DaemonTransition);
@@ -436,6 +440,7 @@
     FRIEND_TEST(SnapshotUpdateTest, QueryStatusError);
     FRIEND_TEST(SnapshotUpdateTest, SnapshotStatusFileWithoutCow);
     FRIEND_TEST(SnapshotUpdateTest, SpaceSwapUpdate);
+    FRIEND_TEST(SnapshotUpdateTest, MapAllSnapshotsWithoutSlotSwitch);
     friend class SnapshotTest;
     friend class SnapshotUpdateTest;
     friend class FlashAfterUpdateTest;
@@ -456,7 +461,7 @@
     bool EnsureImageManager();
 
     // Ensure we're connected to snapuserd.
-    bool EnsureSnapuserdConnected();
+    bool EnsureSnapuserdConnected(std::chrono::milliseconds timeout_ms = 10s);
 
     // Helpers for first-stage init.
     const std::unique_ptr<IDeviceInfo>& device() const { return device_; }
@@ -549,6 +554,16 @@
     // Unmap and remove all known snapshots.
     bool RemoveAllSnapshots(LockedFile* lock);
 
+    // Boot device off snapshots without slot switch
+    bool BootFromSnapshotsWithoutSlotSwitch();
+
+    // Remove kBootSnapshotsWithoutSlotSwitch so that device can boot
+    // without snapshots on the current slot
+    bool PrepareDeviceToBootWithoutSnapshot();
+
+    // Is the kBootSnapshotsWithoutSlotSwitch present
+    bool IsSnapshotWithoutSlotSwitch();
+
     // List the known snapshot names.
     bool ListSnapshots(LockedFile* lock, std::vector<std::string>* snapshots,
                        const std::string& suffix = "");
@@ -663,6 +678,7 @@
     std::string GetRollbackIndicatorPath();
     std::string GetForwardMergeIndicatorPath();
     std::string GetOldPartitionMetadataPath();
+    std::string GetBootSnapshotsWithoutSlotSwitchPath();
 
     const LpMetadata* ReadOldPartitionMetadata(LockedFile* lock);
 
diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp
index 51389a0..4743a42 100644
--- a/fs_mgr/libsnapshot/snapshot.cpp
+++ b/fs_mgr/libsnapshot/snapshot.cpp
@@ -83,6 +83,8 @@
 using namespace std::chrono_literals;
 using namespace std::string_literals;
 
+static constexpr char kBootSnapshotsWithoutSlotSwitch[] =
+        "/metadata/ota/snapshot-boot-without-slot-switch";
 static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
 static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
 static constexpr auto kUpdateStateCheckInterval = 2s;
@@ -217,6 +219,12 @@
     auto file = LockExclusive();
     if (!file) return false;
 
+    if (IsSnapshotWithoutSlotSwitch()) {
+        LOG(ERROR) << "Cannot cancel the snapshots as partitions are mounted off the snapshots on "
+                      "current slot.";
+        return false;
+    }
+
     UpdateState state = ReadUpdateState(file.get());
     if (state == UpdateState::None) {
         RemoveInvalidSnapshots(file.get());
@@ -299,10 +307,9 @@
     // - For ForwardMerge, FinishedSnapshotWrites asserts that the existence of the indicator
     // matches the incoming update.
     std::vector<std::string> files = {
-            GetSnapshotBootIndicatorPath(),
-            GetRollbackIndicatorPath(),
-            GetForwardMergeIndicatorPath(),
-            GetOldPartitionMetadataPath(),
+            GetSnapshotBootIndicatorPath(),          GetRollbackIndicatorPath(),
+            GetForwardMergeIndicatorPath(),          GetOldPartitionMetadataPath(),
+            GetBootSnapshotsWithoutSlotSwitchPath(),
     };
     for (const auto& file : files) {
         RemoveFileIfExists(file);
@@ -483,6 +490,32 @@
             LOG(ERROR) << "Failed to retrieve base_sectors from Snapuserd";
             return false;
         }
+    } else if (IsSnapshotWithoutSlotSwitch()) {
+        // When snapshots are on current slot, we determine the size
+        // of block device based on the number of COW operations. We cannot
+        // use base device as it will be from older image.
+        size_t num_ops = 0;
+        uint64_t dev_sz = 0;
+        unique_fd fd(open(cow_file.c_str(), O_RDONLY | O_CLOEXEC));
+        if (fd < 0) {
+            PLOG(ERROR) << "Failed to open " << cow_file;
+            return false;
+        }
+
+        CowReader reader;
+        if (!reader.Parse(std::move(fd))) {
+            LOG(ERROR) << "Failed to parse cow " << cow_file;
+            return false;
+        }
+
+        const auto& header = reader.GetHeader();
+        if (header.prefix.major_version > 2) {
+            LOG(ERROR) << "COW format not supported";
+            return false;
+        }
+        num_ops = reader.get_num_total_data_ops();
+        dev_sz = (num_ops * header.block_size);
+        base_sectors = dev_sz >> 9;
     } else {
         // For userspace snapshots, the size of the base device is taken as the
         // size of the dm-user block device. Since there is no pseudo mapping
@@ -1479,6 +1512,10 @@
     return result;
 }
 
+std::string SnapshotManager::GetBootSnapshotsWithoutSlotSwitchPath() {
+    return metadata_dir_ + "/" + android::base::Basename(kBootSnapshotsWithoutSlotSwitch);
+}
+
 std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
     return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
 }
@@ -2120,6 +2157,10 @@
     return state;
 }
 
+bool SnapshotManager::IsSnapshotWithoutSlotSwitch() {
+    return (access(GetBootSnapshotsWithoutSlotSwitchPath().c_str(), F_OK) == 0);
+}
+
 bool SnapshotManager::UpdateUsesCompression() {
     auto lock = LockShared();
     if (!lock) return false;
@@ -2212,6 +2253,13 @@
 }
 
 bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
+    if (IsSnapshotWithoutSlotSwitch()) {
+        if (GetCurrentSlot() != Slot::Source) {
+            LOG(ERROR) << "Snapshots marked to boot without slot switch; but slot is wrong";
+            return false;
+        }
+        return true;
+    }
     // If we fail to read, we'll wind up using CreateLogicalPartitions, which
     // will create devices that look like the old slot, except with extra
     // content at the end of each device. This will confuse dm-verity, and
@@ -2347,7 +2395,8 @@
     // completed, live_snapshot_status is set to nullopt.
     std::optional<SnapshotStatus> live_snapshot_status;
     do {
-        if (!(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
+        if (!IsSnapshotWithoutSlotSwitch() &&
+            !(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
             LOG(INFO) << "Detected re-flashing of partition, will skip snapshot: "
                       << params.GetPartitionName();
             break;
@@ -2703,7 +2752,7 @@
     // to unmap; hence, we can't be deleting the device
     // as the table would be mounted off partitions and will fail.
     if (snapshot_status.state() != SnapshotState::MERGE_COMPLETED) {
-        if (!DeleteDeviceIfExists(dm_user_name)) {
+        if (!DeleteDeviceIfExists(dm_user_name, 4000ms)) {
             LOG(ERROR) << "Cannot unmap " << dm_user_name;
             return false;
         }
@@ -3098,7 +3147,7 @@
     return true;
 }
 
-bool SnapshotManager::EnsureSnapuserdConnected() {
+bool SnapshotManager::EnsureSnapuserdConnected(std::chrono::milliseconds timeout_ms) {
     if (snapuserd_client_) {
         return true;
     }
@@ -3107,7 +3156,7 @@
         return false;
     }
 
-    snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
+    snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, timeout_ms);
     if (!snapuserd_client_) {
         LOG(ERROR) << "Unable to connect to snapuserd";
         return false;
@@ -4372,13 +4421,70 @@
 bool SnapshotManager::IsUserspaceSnapshotUpdateInProgress() {
     auto slot = GetCurrentSlot();
     if (slot == Slot::Target) {
+        // Merge in-progress
         if (IsSnapuserdRequired()) {
             return true;
         }
     }
 
+    // Let's check more deeper to see if snapshots are mounted
+    auto lock = LockExclusive();
+    if (!lock) {
+        return false;
+    }
+
+    std::vector<std::string> snapshots;
+    if (!ListSnapshots(lock.get(), &snapshots)) {
+        return false;
+    }
+
+    for (const auto& snapshot : snapshots) {
+        // Active snapshot and daemon is alive
+        if (IsSnapshotDevice(snapshot) && EnsureSnapuserdConnected(2s)) {
+            return true;
+        }
+    }
+
     return false;
 }
 
+bool SnapshotManager::BootFromSnapshotsWithoutSlotSwitch() {
+    auto lock = LockExclusive();
+    if (!lock) return false;
+
+    auto contents = device_->GetSlotSuffix();
+    // This is the indicator which tells first-stage init
+    // to boot from snapshots even though there was no slot-switch
+    auto boot_file = GetBootSnapshotsWithoutSlotSwitchPath();
+    if (!WriteStringToFileAtomic(contents, boot_file)) {
+        PLOG(ERROR) << "write failed: " << boot_file;
+        return false;
+    }
+
+    SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
+    update_status.set_state(UpdateState::Initiated);
+    update_status.set_userspace_snapshots(true);
+    update_status.set_using_snapuserd(true);
+    if (!WriteSnapshotUpdateStatus(lock.get(), update_status)) {
+        return false;
+    }
+    return true;
+}
+
+bool SnapshotManager::PrepareDeviceToBootWithoutSnapshot() {
+    auto lock = LockExclusive();
+    if (!lock) return false;
+
+    android::base::RemoveFileIfExists(GetSnapshotBootIndicatorPath());
+    android::base::RemoveFileIfExists(GetBootSnapshotsWithoutSlotSwitchPath());
+
+    SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
+    update_status.set_state(UpdateState::Cancelled);
+    if (!WriteSnapshotUpdateStatus(lock.get(), update_status)) {
+        return false;
+    }
+    return true;
+}
+
 }  // namespace snapshot
 }  // namespace android
diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp
index 3b6d26a..e506110 100644
--- a/fs_mgr/libsnapshot/snapshot_test.cpp
+++ b/fs_mgr/libsnapshot/snapshot_test.cpp
@@ -2559,6 +2559,56 @@
     }
 }
 
+TEST_F(SnapshotUpdateTest, MapAllSnapshotsWithoutSlotSwitch) {
+    MountMetadata();
+    AddOperationForPartitions();
+    // Execute the update.
+    ASSERT_TRUE(sm->BeginUpdate());
+    ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_));
+
+    if (!sm->UpdateUsesUserSnapshots()) {
+        GTEST_SKIP() << "Test does not apply as UserSnapshots aren't enabled.";
+    }
+
+    ASSERT_TRUE(WriteSnapshots());
+    ASSERT_TRUE(sm->FinishedSnapshotWrites(false));
+
+    if (ShouldSkipLegacyMerging()) {
+        GTEST_SKIP() << "Skipping legacy merge test";
+    }
+    // Mark the indicator
+    ASSERT_TRUE(sm->BootFromSnapshotsWithoutSlotSwitch());
+
+    ASSERT_TRUE(sm->EnsureSnapuserdConnected());
+    sm->set_use_first_stage_snapuserd(true);
+
+    ASSERT_TRUE(sm->NeedSnapshotsInFirstStageMount());
+
+    // Map snapshots
+    ASSERT_TRUE(sm->MapAllSnapshots(10s));
+
+    // New updates should fail
+    ASSERT_FALSE(sm->BeginUpdate());
+
+    // Snapshots cannot be cancelled
+    ASSERT_FALSE(sm->CancelUpdate());
+
+    // Merge cannot start
+    ASSERT_FALSE(sm->InitiateMerge());
+
+    // Read bytes back and verify they match the cache.
+    ASSERT_TRUE(IsPartitionUnchanged("sys_b"));
+
+    // Remove the indicators
+    ASSERT_TRUE(sm->PrepareDeviceToBootWithoutSnapshot());
+
+    // Ensure snapshots are still mounted
+    ASSERT_TRUE(sm->IsUserspaceSnapshotUpdateInProgress());
+
+    // Cleanup snapshots
+    ASSERT_TRUE(sm->UnmapAllSnapshots());
+}
+
 TEST_F(SnapshotUpdateTest, MapAllSnapshots) {
     AddOperationForPartitions();
     // Execute the update.
diff --git a/fs_mgr/libsnapshot/snapshotctl.cpp b/fs_mgr/libsnapshot/snapshotctl.cpp
index 1323b0b..ebaca2d 100644
--- a/fs_mgr/libsnapshot/snapshotctl.cpp
+++ b/fs_mgr/libsnapshot/snapshotctl.cpp
@@ -75,7 +75,11 @@
                  "  unmap-snapshots\n"
                  "    Unmap all pre-created snapshots\n"
                  "  delete-snapshots\n"
-                 "    Delete all pre-created snapshots\n";
+                 "    Delete all pre-created snapshots\n"
+                 "  revert-snapshots\n"
+                 "    Prepares devices to boot without snapshots on next boot.\n"
+                 "    This does not delete the snapshot. It only removes the indicators\n"
+                 "    so that first stage init will not mount from snapshots.\n";
     return EX_USAGE;
 }
 
@@ -87,9 +91,11 @@
     MapSnapshots(std::string path = "");
     bool CreateSnapshotDevice(std::string& partition_name, std::string& patch);
     bool InitiateThreadedSnapshotWrite(std::string& pname, std::string& snapshot_patch);
-    bool WaitForSnapshotWritesToComplete();
+    bool FinishSnapshotWrites();
     bool UnmapCowImagePath(std::string& name);
-    bool DeleteCowImage(std::string& name);
+    bool DeleteSnapshots();
+    bool CleanupSnapshot() { return sm_->PrepareDeviceToBootWithoutSnapshot(); }
+    bool BeginUpdate();
 
   private:
     std::optional<std::string> GetCowImagePath(std::string& name);
@@ -107,7 +113,24 @@
         exit(1);
     }
     snapshot_dir_path_ = path + "/";
+}
+
+bool MapSnapshots::BeginUpdate() {
     lock_ = sm_->LockExclusive();
+    std::vector<std::string> snapshots;
+    sm_->ListSnapshots(lock_.get(), &snapshots);
+    if (!snapshots.empty()) {
+        // Snapshots are already present.
+        return true;
+    }
+
+    lock_ = nullptr;
+    if (!sm_->BeginUpdate()) {
+        LOG(ERROR) << "BeginUpdate failed";
+        return false;
+    }
+    lock_ = sm_->LockExclusive();
+    return true;
 }
 
 bool MapSnapshots::CreateSnapshotDevice(std::string& partition_name, std::string& patchfile) {
@@ -130,6 +153,9 @@
     dev_sz &= ~(block_sz - 1);
 
     SnapshotStatus status;
+    status.set_state(SnapshotState::CREATED);
+    status.set_using_snapuserd(true);
+    status.set_old_partition_size(0);
     status.set_name(partition_name);
     status.set_cow_file_size(dev_sz);
     status.set_cow_partition_size(0);
@@ -216,27 +242,33 @@
     return true;
 }
 
-bool MapSnapshots::WaitForSnapshotWritesToComplete() {
+bool MapSnapshots::FinishSnapshotWrites() {
     bool ret = true;
     for (auto& t : threads_) {
         ret = t.get() && ret;
     }
 
+    lock_ = nullptr;
     if (ret) {
         LOG(INFO) << "Pre-created snapshots successfully copied";
-    } else {
-        LOG(ERROR) << "Snapshot copy failed";
+        if (!sm_->FinishedSnapshotWrites(false)) {
+            return false;
+        }
+        return sm_->BootFromSnapshotsWithoutSlotSwitch();
     }
-    return ret;
+
+    LOG(ERROR) << "Snapshot copy failed";
+    return false;
 }
 
 bool MapSnapshots::UnmapCowImagePath(std::string& name) {
     return sm_->UnmapCowImage(name);
 }
 
-bool MapSnapshots::DeleteCowImage(std::string& name) {
-    if (!sm_->DeleteSnapshot(lock_.get(), name)) {
-        LOG(ERROR) << "Delete snapshot failed";
+bool MapSnapshots::DeleteSnapshots() {
+    lock_ = sm_->LockExclusive();
+    if (!sm_->RemoveAllUpdateState(lock_.get())) {
+        LOG(ERROR) << "Remove All Update State failed";
         return false;
     }
     return true;
@@ -281,7 +313,8 @@
     return true;
 }
 
-bool UnMapPrecreatedSnapshots(int, char**) {
+bool UnMapPrecreatedSnapshots(int, char** argv) {
+    android::base::InitLogging(argv, &android::base::KernelLogger);
     // Make sure we are root.
     if (::getuid() != 0) {
         LOG(ERROR) << "Not running as root. Try \"adb root\" first.";
@@ -302,29 +335,36 @@
     return true;
 }
 
-bool DeletePrecreatedSnapshots(int, char**) {
+bool RemovePrecreatedSnapshots(int, char** argv) {
+    android::base::InitLogging(argv, &android::base::KernelLogger);
+    // Make sure we are root.
+    if (::getuid() != 0) {
+        LOG(ERROR) << "Not running as root. Try \"adb root\" first.";
+        return false;
+    }
+
+    MapSnapshots snapshot;
+    if (!snapshot.CleanupSnapshot()) {
+        LOG(ERROR) << "CleanupSnapshot failed";
+        return false;
+    }
+    return true;
+}
+
+bool DeletePrecreatedSnapshots(int, char** argv) {
+    android::base::InitLogging(argv, &android::base::KernelLogger);
     // Make sure we are root.
     if (::getuid() != 0) {
         LOG(ERROR) << "Not running as root. Try \"adb root\" first.";
         return EXIT_FAILURE;
     }
 
-    std::vector<std::string> partitions;
-    if (!GetVerityPartitions(partitions)) {
-        return false;
-    }
-
     MapSnapshots snapshot;
-    for (auto partition : partitions) {
-        if (!snapshot.DeleteCowImage(partition)) {
-            LOG(ERROR) << "DeleteCowImage failed: " << partition;
-        }
-    }
-    return true;
+    return snapshot.DeleteSnapshots();
 }
 
 bool MapPrecreatedSnapshots(int argc, char** argv) {
-    android::base::InitLogging(argv, &android::base::StderrLogger);
+    android::base::InitLogging(argv, &android::base::KernelLogger);
 
     // Make sure we are root.
     if (::getuid() != 0) {
@@ -365,6 +405,11 @@
     }
 
     MapSnapshots cow(path);
+    if (!cow.BeginUpdate()) {
+        LOG(ERROR) << "BeginUpdate failed";
+        return false;
+    }
+
     for (auto& pair : partitions) {
         if (!cow.CreateSnapshotDevice(pair.first, pair.second)) {
             LOG(ERROR) << "CreateSnapshotDevice failed for: " << pair.first;
@@ -376,7 +421,7 @@
         }
     }
 
-    return cow.WaitForSnapshotWritesToComplete();
+    return cow.FinishSnapshotWrites();
 }
 
 #ifdef SNAPSHOTCTL_USERDEBUG_OR_ENG
@@ -508,6 +553,7 @@
         {"map-snapshots", MapPrecreatedSnapshots},
         {"unmap-snapshots", UnMapPrecreatedSnapshots},
         {"delete-snapshots", DeletePrecreatedSnapshots},
+        {"revert-snapshots", RemovePrecreatedSnapshots},
         // clang-format on
 };