src/storage/blobfs/blob.cc - fuchsia - Git at Google

 // Copyright 2017 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "src/storage/blobfs/blob.h"

 #include <assert.h>
 #include <ctype.h>
 #include <fuchsia/device/c/fidl.h>
 #include <fuchsia/io/llcpp/fidl.h>
 #include <lib/fit/defer.h>
 #include <lib/sync/completion.h>
 #include <lib/syslog/cpp/macros.h>
 #include <lib/zx/status.h>
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
 #include <zircon/assert.h>
 #include <zircon/device/vfs.h>
 #include <zircon/errors.h>
 #include <zircon/status.h>
 #include <zircon/syscalls.h>

 #include <algorithm>
 #include <memory>
 #include <string_view>
 #include <utility>
 #include <vector>

 #include <fbl/algorithm.h>
 #include <fbl/ref_ptr.h>
 #include <fbl/string_buffer.h>
 #include <safemath/checked_math.h>

 #include "src/lib/digest/digest.h"
 #include "src/lib/digest/merkle-tree.h"
 #include "src/lib/digest/node-digest.h"
 #include "src/lib/storage/vfs/cpp/journal/data_streamer.h"
 #include "src/lib/storage/vfs/cpp/metrics/events.h"
 #include "src/lib/storage/vfs/cpp/transaction/writeback.h"
 #include "src/lib/storage/vfs/cpp/vfs_types.h"
 #include "src/storage/blobfs/blob_data_producer.h"
 #include "src/storage/blobfs/blob_layout.h"
 #include "src/storage/blobfs/blob_verifier.h"
 #include "src/storage/blobfs/blobfs.h"
 #include "src/storage/blobfs/common.h"
 #include "src/storage/blobfs/compression/chunked.h"
 #include "src/storage/blobfs/compression_settings.h"
 #include "src/storage/blobfs/format.h"
 #include "src/storage/blobfs/iterator/allocated_extent_iterator.h"
 #include "src/storage/blobfs/iterator/block_iterator.h"
 #include "src/storage/blobfs/iterator/extent_iterator.h"
 #include "src/storage/blobfs/iterator/node_populator.h"
 #include "src/storage/blobfs/iterator/vector_extent_iterator.h"
 #include "src/storage/blobfs/metrics.h"

 namespace blobfs {

 using ::digest::Digest;
 using ::digest::MerkleTreeCreator;

 // Data used exclusively during writeback.
 struct Blob::WriteInfo {
   // See comment for merkle_tree() below.
   static constexpr size_t kPreMerkleTreePadding = kBlobfsBlockSize;

   WriteInfo() = default;

   // Not copyable or movable because merkle_tree_creator has a pointer to digest.
   WriteInfo(const WriteInfo&) = delete;
   WriteInfo& operator=(const WriteInfo&) = delete;

   // We leave room in the merkle tree buffer to add padding before the merkle tree which might be
   // required with the compact blob layout.
   uint8_t* merkle_tree() const {
     ZX_ASSERT(merkle_tree_buffer);
     return merkle_tree_buffer.get() + kPreMerkleTreePadding;
   }

   uint64_t bytes_written = 0;

   fbl::Vector<ReservedExtent> extents;
   fbl::Vector<ReservedNode> node_indices;

   std::optional<BlobCompressor> compressor;

   // Target compressed size for this blob indicates the possible on-disk compressed size in bytes.
   std::optional<uint64_t> target_compression_size_;

   // The fused write error.  Once writing has failed, we return the same error on subsequent
   // writes in case a higher layer dropped the error and returned a short write instead.
   zx_status_t write_error = ZX_OK;

   // As data is written, we build the merkle tree using this.
   digest::MerkleTreeCreator merkle_tree_creator;

   // The merkle tree creator stores the root digest here.
   uint8_t digest[digest::kSha256Length];

   // The merkle tree creator stores the rest of the tree here.  The buffer includes space for
   // padding.  See the comment for merkle_tree() above.
   std::unique_ptr<uint8_t[]> merkle_tree_buffer;

   // The old blob that this write is replacing.
   fbl::RefPtr<Blob> old_blob;

   // Sets the target_compression_size_ field.
   void SetTargetCompressionSize(uint64_t size) {
     target_compression_size_ = std::make_optional(size);
   }
 };

 bool SupportsPaging(const Inode& inode) {
   zx::status<CompressionAlgorithm> status = AlgorithmForInode(inode);
   if (status.is_ok() && (status.value() == CompressionAlgorithm::UNCOMPRESSED ||
                          status.value() == CompressionAlgorithm::CHUNKED)) {
     return true;
   }
   return false;
 }

 zx_status_t Blob::VerifyNullBlob() const {
   ZX_ASSERT(inode_.blob_size == 0);
   std::unique_ptr<BlobVerifier> verifier;
   if (zx_status_t status =
           BlobVerifier::CreateWithoutTree(digest(), blobfs_->Metrics(), inode_.blob_size,
                                           &blobfs_->blob_corruption_notifier(), &verifier);
       status != ZX_OK) {
     return status;
   }
   return verifier->Verify(nullptr, 0, 0);
 }

 uint64_t Blob::SizeData() const {
   std::lock_guard lock(mutex_);
   if (state() == BlobState::kReadable) {
     return inode_.blob_size;
   }
   return 0;
 }

 Blob::Blob(Blobfs* bs, const Digest& digest)
     : CacheNode(bs->vfs(), digest), blobfs_(bs), clone_watcher_(this) {
   write_info_ = std::make_unique<WriteInfo>();
 }

 Blob::Blob(Blobfs* bs, uint32_t node_index, const Inode& inode)
     : CacheNode(bs->vfs(), Digest(inode.merkle_root_hash)),
       blobfs_(bs),
       state_(BlobState::kReadable),
       syncing_state_(SyncingState::kDone),
       map_index_(node_index),
       clone_watcher_(this),
       inode_(inode) {
   write_info_ = std::make_unique<WriteInfo>();
 }

 zx_status_t Blob::WriteNullBlob() {
   ZX_DEBUG_ASSERT(inode_.blob_size == 0);
   ZX_DEBUG_ASSERT(inode_.block_count == 0);

   if (zx_status_t status = VerifyNullBlob(); status != ZX_OK) {
     return status;
   }

   BlobTransaction transaction;
   if (zx_status_t status = WriteMetadata(transaction); status != ZX_OK) {
     return status;
   }
   transaction.Commit(*blobfs_->journal(), {},
                      [blob = fbl::RefPtr(this)]() { blob->CompleteSync(); });

   return MarkReadable();
 }

 zx_status_t Blob::PrepareWrite(uint64_t size_data, bool compress) {
   if (size_data > 0 && fbl::round_up(size_data, kBlobfsBlockSize) == 0) {
     // Fail early if |size_data| would overflow when rounded up to block size.
     return ZX_ERR_OUT_OF_RANGE;
   }

   std::lock_guard lock(mutex_);
   if (state() != BlobState::kEmpty) {
     return ZX_ERR_BAD_STATE;
   }

   // Fail early if target_compression_size is set is not sane.
   if (write_info_->target_compression_size_.has_value() &&
       (write_info_->target_compression_size_.value() == 0 ||
        (write_info_->target_compression_size_.value()) == std::numeric_limits<uint64_t>::max())) {
     FX_LOGS(ERROR) << "Target compressed size is invalid: "
                    << write_info_->target_compression_size_.value();
     return ZX_ERR_INVALID_ARGS;
   }

   memset(inode_.merkle_root_hash, 0, sizeof(inode_.merkle_root_hash));
   inode_.blob_size = size_data;

   // Reserve a node for blob's inode. We might need more nodes for extents later.
   zx_status_t status = blobfs_->GetAllocator()->ReserveNodes(1, &write_info_->node_indices);
   if (status != ZX_OK) {
     return status;
   }
   map_index_ = write_info_->node_indices[0].index();

   // For compressed blobs, we only write into the compression buffer.  For uncompressed blobs we
   // write into the data vmo.
   if (compress) {
     write_info_->compressor =
         BlobCompressor::Create(blobfs_->write_compression_settings(), inode_.blob_size);
     if (!write_info_->compressor) {
       // TODO(fxbug.dev/70356)Make BlobCompressor::Create return the actual error instead.
       // Replace ZX_ERR_INTERNAL with the correct error once fxbug.dev/70356 is fixed.
       FX_LOGS(ERROR) << "Failed to initialize compressor: " << ZX_ERR_INTERNAL;
       return ZX_ERR_INTERNAL;
     }
   } else if (inode_.blob_size != 0) {
     if ((status = PrepareDataVmoForWriting()) != ZX_OK) {
       return status;
     }
   }

   write_info_->merkle_tree_creator.SetUseCompactFormat(
       ShouldUseCompactMerkleTreeFormat(GetBlobLayoutFormat(blobfs_->Info())));
   if ((status = write_info_->merkle_tree_creator.SetDataLength(inode_.blob_size)) != ZX_OK) {
     return status;
   }
   const size_t tree_len = write_info_->merkle_tree_creator.GetTreeLength();
   // Allow for zero padding before and after.
   write_info_->merkle_tree_buffer =
       std::make_unique<uint8_t[]>(tree_len + WriteInfo::kPreMerkleTreePadding);
   if ((status = write_info_->merkle_tree_creator.SetTree(write_info_->merkle_tree(), tree_len,
                                                          &write_info_->digest,
                                                          sizeof(write_info_->digest))) != ZX_OK) {
     return status;
   }

   set_state(BlobState::kDataWrite);

   // Special case for the null blob: We skip the write phase.
   return inode_.blob_size == 0 ? WriteNullBlob() : ZX_OK;
 }

 void Blob::SetOldBlob(Blob& blob) {
   std::lock_guard lock(mutex_);
   write_info_->old_blob = fbl::RefPtr(&blob);
 }

 zx_status_t Blob::SpaceAllocate(uint32_t block_count) {
   TRACE_DURATION("blobfs", "Blobfs::SpaceAllocate", "block_count", block_count);
   ZX_ASSERT(block_count != 0);

   fs::Ticker ticker(blobfs_->Metrics()->Collecting());

   // Initialize the inode with known fields. The block count may change if the
   // blob is compressible.
   inode_.block_count = block_count;

   fbl::Vector<ReservedExtent> extents;
   fbl::Vector<ReservedNode> nodes;

   // Reserve space for the blob.
   const uint64_t reserved_blocks = blobfs_->GetAllocator()->ReservedBlockCount();
   zx_status_t status = blobfs_->GetAllocator()->ReserveBlocks(inode_.block_count, &extents);
   if (status == ZX_ERR_NO_SPACE && reserved_blocks > 0) {
     // It's possible that a blob has just been unlinked but has yet to be flushed through the
     // journal, and the blocks are still reserved, so if that looks likely, force a flush and then
     // try again.  This might need to be revisited if/when blobfs becomes multi-threaded.
     sync_completion_t sync;
     blobfs_->Sync([&](zx_status_t) { sync_completion_signal(&sync); });
     sync_completion_wait(&sync, ZX_TIME_INFINITE);
     status = blobfs_->GetAllocator()->ReserveBlocks(inode_.block_count, &extents);
   }
   if (status != ZX_OK) {
     return status;
   }
   if (extents.size() > kMaxBlobExtents) {
     FX_LOGS(ERROR) << "Error: Block reservation requires too many extents (" << extents.size()
                    << " vs " << kMaxBlobExtents << " max)";
     return ZX_ERR_BAD_STATE;
   }
   const ExtentCountType extent_count = static_cast<ExtentCountType>(extents.size());

   // Reserve space for all additional nodes necessary to contain this blob.
   // The inode has already been reserved in Blob::PrepareWrite.
   // Hence, we need to reserve one less node here.
   size_t node_count = NodePopulator::NodeCountForExtents(extent_count) - 1;
   status = blobfs_->GetAllocator()->ReserveNodes(node_count, &nodes);
   if (status != ZX_OK) {
     return status;
   }

   write_info_->extents = std::move(extents);
   while (!nodes.is_empty()) {
     write_info_->node_indices.push_back(nodes.erase(0));
   }
   blobfs_->Metrics()->UpdateAllocation(inode_.blob_size, ticker.End());
   return ZX_OK;
 }

 bool Blob::IsDataLoaded() const { return vmo().is_valid(); }

 bool Blob::IsPagerBacked() const {
   return SupportsPaging(inode_) && state() == BlobState::kReadable;
 }

 zx_status_t Blob::WriteMetadata(BlobTransaction& transaction) {
   TRACE_DURATION("blobfs", "Blobfs::WriteMetadata");
   assert(state() == BlobState::kDataWrite);

   // Update the on-disk hash.
   digest().CopyTo(inode_.merkle_root_hash);

   if (inode_.block_count) {
     // We utilize the NodePopulator class to take our reserved blocks and nodes and fill the
     // persistent map with an allocated inode / container.

     // If |on_node| is invoked on a node, it means that node was necessary to represent this
     // blob. Persist the node back to durable storage.
     auto on_node = [this, &transaction](uint32_t node_index) {
       blobfs_->PersistNode(node_index, transaction);
     };

     // If |on_extent| is invoked on an extent, it was necessary to represent this blob. Persist
     // the allocation of these blocks back to durable storage.
     //
     // Additionally, because of the compression feature of blobfs, it is possible we reserved
     // more extents than this blob ended up using. Decrement |remaining_blocks| to track if we
     // should exit early.
     size_t remaining_blocks = inode_.block_count;
     auto on_extent = [this, &transaction, &remaining_blocks](ReservedExtent& extent) {
       ZX_DEBUG_ASSERT(remaining_blocks > 0);
       if (remaining_blocks >= extent.extent().Length()) {
         // Consume the entire extent.
         remaining_blocks -= extent.extent().Length();
       } else {
         // Consume only part of the extent; we're done iterating.
         extent.SplitAt(static_cast<BlockCountType>(remaining_blocks));
         remaining_blocks = 0;
       }
       blobfs_->PersistBlocks(extent, transaction);
       if (remaining_blocks == 0) {
         return NodePopulator::IterationCommand::Stop;
       }
       return NodePopulator::IterationCommand::Continue;
     };

     auto mapped_inode_or_error = blobfs_->GetNode(map_index_);
     if (mapped_inode_or_error.is_error()) {
       return mapped_inode_or_error.status_value();
     }
     InodePtr mapped_inode = std::move(mapped_inode_or_error).value();
     *mapped_inode = inode_;
     NodePopulator populator(blobfs_->GetAllocator(), std::move(write_info_->extents),
                             std::move(write_info_->node_indices));
     ZX_ASSERT(populator.Walk(on_node, on_extent) == ZX_OK);

     // Ensure all non-allocation flags are propagated to the inode.
     const uint16_t non_allocation_flags = kBlobFlagMaskAnyCompression;
     {
       const uint16_t compression_flags = inode_.header.flags & kBlobFlagMaskAnyCompression;
       // Kernighan's algorithm for bit counting, returns 0 when zero or one bits are set.
       ZX_DEBUG_ASSERT((compression_flags & (compression_flags - 1)) == 0);
     }
     mapped_inode->header.flags &= ~non_allocation_flags;  // Clear any existing flags first.
     mapped_inode->header.flags |= (inode_.header.flags & non_allocation_flags);
   } else {
     // Special case: Empty node.
     ZX_DEBUG_ASSERT(write_info_->node_indices.size() == 1);
     auto mapped_inode_or_error = blobfs_->GetNode(map_index_);
     if (mapped_inode_or_error.is_error()) {
       return mapped_inode_or_error.status_value();
     }
     InodePtr mapped_inode = std::move(mapped_inode_or_error).value();
     *mapped_inode = inode_;
     blobfs_->GetAllocator()->MarkInodeAllocated(std::move(write_info_->node_indices[0]));
     blobfs_->PersistNode(map_index_, transaction);
   }
   return ZX_OK;
 }

 zx_status_t Blob::WriteInternal(const void* data, size_t len, size_t* actual) {
   TRACE_DURATION("blobfs", "Blobfs::WriteInternal", "data", data, "len", len);

   *actual = 0;
   if (len == 0) {
     return ZX_OK;
   }

   if (state() != BlobState::kDataWrite) {
     if (state() == BlobState::kError && write_info_ && write_info_->write_error != ZX_OK) {
       return write_info_->write_error;
     }
     return ZX_ERR_BAD_STATE;
   }

   const size_t to_write = std::min(len, inode_.blob_size - write_info_->bytes_written);
   const size_t offset = write_info_->bytes_written;

   *actual = to_write;
   write_info_->bytes_written += to_write;

   if (write_info_->compressor) {
     if (zx_status_t status = write_info_->compressor->Update(data, to_write); status != ZX_OK) {
       return status;
     }
   } else {
     if (zx_status_t status = vmo().write(data, offset, to_write); status != ZX_OK) {
       FX_LOGS(ERROR) << "blob: VMO write failed: " << zx_status_get_string(status);
       return status;
     }
   }

   if (zx_status_t status = write_info_->merkle_tree_creator.Append(data, to_write);
       status != ZX_OK) {
     FX_LOGS(ERROR) << "blob: MerkleTreeCreator::Append failed: " << zx_status_get_string(status);
     return status;
   }

   // More data to write.
   if (write_info_->bytes_written < inode_.blob_size) {
     return ZX_OK;
   }

   zx_status_t status = Commit();
   if (status != ZX_OK) {
     // Record the status so that if called again, we return the same status again.  This is done
     // because it's possible that the end-user managed to partially write some data to this blob in
     // which case the error could be dropped (by zxio or some other layer) and a short write
     // returned instead.  If this happens, the end-user will retry at which point it's helpful if we
     // return the same error rather than ZX_ERR_BAD_STATE (see above).
     write_info_->write_error = status;
     set_state(BlobState::kError);
   }

   return status;
 }

 zx_status_t Blob::Commit() {
   if (digest() != write_info_->digest) {
     // Downloaded blob did not match provided digest.
     FX_LOGS(ERROR) << "downloaded blob did not match provided digest " << digest();
     return ZX_ERR_IO_DATA_INTEGRITY;
   }

   const size_t merkle_size = write_info_->merkle_tree_creator.GetTreeLength();
   bool compress = write_info_->compressor.has_value();
   if (compress) {
     if (zx_status_t status = write_info_->compressor->End(); status != ZX_OK) {
       return status;
     }
     // If we're using the chunked compressor, abort compression if we're not going to get any
     // savings.  We can't easily do it for the other compression formats without changing the
     // decompression API to support streaming.
     if (write_info_->compressor->algorithm() == CompressionAlgorithm::CHUNKED &&
         fbl::round_up(write_info_->compressor->Size() + merkle_size, kBlobfsBlockSize) >=
             fbl::round_up(inode_.blob_size + merkle_size, kBlobfsBlockSize)) {
       compress = false;
     }
   }

   fs::Duration generation_time;

   const uint64_t data_size = compress ? write_info_->compressor->Size() : inode_.blob_size;
   auto blob_layout = BlobLayout::CreateFromSizes(GetBlobLayoutFormat(blobfs_->Info()),
                                                  inode_.blob_size, data_size, GetBlockSize());
   if (blob_layout.is_error()) {
     FX_LOGS(ERROR) << "Failed to create blob layout: " << blob_layout.status_string();
     return blob_layout.status_value();
   }

   const uint32_t total_block_count = blob_layout->TotalBlockCount();
   if (zx_status_t status = SpaceAllocate(total_block_count); status != ZX_OK) {
     FX_LOGS(ERROR) << "Failed to allocate " << total_block_count
                    << " blocks for the blob: " << zx_status_get_string(status);
     return status;
   }

   std::variant<std::monostate, DecompressBlobDataProducer, SimpleBlobDataProducer> data;
   BlobDataProducer* data_ptr = nullptr;

   if (compress) {
     // The data comes from the compression buffer.
     data_ptr = &data.emplace<SimpleBlobDataProducer>(
         fbl::Span(static_cast<const uint8_t*>(write_info_->compressor->Data()),
                   write_info_->compressor->Size()));
     SetCompressionAlgorithm(&inode_, write_info_->compressor->algorithm());
   } else if (write_info_->compressor) {
     // In this case, we've decided against compressing because there are no savings, so we have to
     // decompress.
     if (auto producer_or =
             DecompressBlobDataProducer::Create(*write_info_->compressor, inode_.blob_size);
         producer_or.is_error()) {
       return producer_or.error_value();
     } else {
       data_ptr = &data.emplace<DecompressBlobDataProducer>(std::move(producer_or).value());
     }
   } else {
     // The data comes from the data buffer.
     data_ptr = &data.emplace<SimpleBlobDataProducer>(
         fbl::Span(static_cast<const uint8_t*>(data_mapping_.start()), inode_.blob_size));
   }

   SimpleBlobDataProducer merkle(fbl::Span(write_info_->merkle_tree(), merkle_size));

   MergeBlobDataProducer producer = [&]() {
     switch (blob_layout->Format()) {
       case BlobLayoutFormat::kPaddedMerkleTreeAtStart:
         // Write the merkle data first followed by the data.  The merkle data should be a multiple
         // of the block size so we don't need any padding.
         ZX_ASSERT(merkle.GetRemainingBytes() % kBlobfsBlockSize == 0);
         return MergeBlobDataProducer(merkle, *data_ptr, /*padding=*/0);
       case BlobLayoutFormat::kCompactMerkleTreeAtEnd:
         // Write the data followed by the merkle tree.  There might be some padding between the
         // data and the merkle tree.
         return MergeBlobDataProducer(
             *data_ptr, merkle, blob_layout->MerkleTreeOffset() - data_ptr->GetRemainingBytes());
     }
   }();

   fs::DataStreamer streamer(blobfs_->journal(), blobfs_->WriteBufferBlockCount());
   if (zx_status_t status = WriteData(total_block_count, producer, streamer); status != ZX_OK) {
     return status;
   }

   // No more data to write. Flush to disk.
   fs::Ticker ticker(blobfs_->Metrics()->Collecting());  // Tracking enqueue time.

   // Enqueue the blob's final data work. Metadata must be enqueued separately.
   zx_status_t data_status = ZX_ERR_IO;
   sync_completion_t data_written;
   // Issue the signal when the callback is destroyed rather than in the callback because the
   // callback won't get called in some error paths.
   auto data_written_finished = fit::defer([&] { sync_completion_signal(&data_written); });
   auto write_all_data = streamer.Flush().then(
       [&data_status, data_written_finished = std::move(data_written_finished)](
           const fit::result<void, zx_status_t>& result) {
         data_status = result.is_ok() ? ZX_OK : result.error();
         return result;
       });

   // Discard things we don't need any more.  This has to be after the Flush call above to ensure
   // all data has been copied from these buffers.
   data_mapping_.Unmap();
   FreeVmo();
   write_info_->compressor.reset();

   // Wrap all pending writes with a strong reference to this Blob, so that it stays
   // alive while there are writes in progress acting on it.
   BlobTransaction transaction;
   if (zx_status_t status = WriteMetadata(transaction); status != ZX_OK) {
     return status;
   }
   if (write_info_->old_blob) {
     ZX_ASSERT(blobfs_->FreeInode(write_info_->old_blob->Ino(), transaction) == ZX_OK);
     auto& cache = Cache();
     ZX_ASSERT(cache.Evict(write_info_->old_blob) == ZX_OK);
     ZX_ASSERT(cache.Add(fbl::RefPtr(this)) == ZX_OK);
   }
   transaction.Commit(*blobfs_->journal(), std::move(write_all_data),
                      [self = fbl::RefPtr(this)]() {});

   // It's not safe to continue until all data has been written because we might need to reload it
   // (e.g. if the blob is immediately read after writing), and the journal caches data in ring
   // buffers, so wait until that has happened.  We don't need to wait for the metadata because we
   // cache that.
   sync_completion_wait(&data_written, ZX_TIME_INFINITE);
   if (data_status != ZX_OK) {
     return data_status;
   }

   blobfs_->Metrics()->UpdateClientWrite(inode_.block_count * kBlobfsBlockSize, merkle_size,
                                         ticker.End(), generation_time);
   return MarkReadable();
 }

 zx_status_t Blob::WriteData(uint32_t block_count, BlobDataProducer& producer,
                             fs::DataStreamer& streamer) {
   BlockIterator block_iter(std::make_unique<VectorExtentIterator>(write_info_->extents));
   const uint64_t data_start = DataStartBlock(blobfs_->Info());
   return StreamBlocks(
       &block_iter, block_count,
       [&](uint64_t vmo_offset, uint64_t dev_offset, uint32_t block_count) {
         while (block_count) {
           if (producer.NeedsFlush()) {
             // Queued operations might point at buffers that are about to be invalidated, so we have
             // to force those operations to be issued which will cause them to be copied.
             streamer.IssueOperations();
           }
           auto data = producer.Consume(block_count * kBlobfsBlockSize);
           if (data.is_error())
             return data.error_value();
           ZX_ASSERT(!data->empty());
           storage::UnbufferedOperation op = {.data = data->data(),
                                              .op = {
                                                  .type = storage::OperationType::kWrite,
                                                  .dev_offset = dev_offset + data_start,
                                                  .length = data->size() / kBlobfsBlockSize,
                                              }};
           // Pad if necessary.
           const size_t alignment = data->size() % kBlobfsBlockSize;
           if (alignment > 0) {
             memset(const_cast<uint8_t*>(data->end()), 0, kBlobfsBlockSize - alignment);
             ++op.op.length;
           }
           block_count -= op.op.length;
           dev_offset += op.op.length;
           streamer.StreamData(std::move(op));
         }  // while (block_count)
         return ZX_OK;
       });
 }

 zx_status_t Blob::MarkReadable() {
   if (readable_event_.is_valid()) {
     zx_status_t status = readable_event_.signal(0u, ZX_USER_SIGNAL_0);
     if (status != ZX_OK) {
       set_state(BlobState::kError);
       return status;
     }
   }
   set_state(BlobState::kReadable);
   { syncing_state_ = SyncingState::kSyncing; }
   write_info_.reset();
   return ZX_OK;
 }

 zx_status_t Blob::GetReadableEvent(zx::event* out) {
   TRACE_DURATION("blobfs", "Blobfs::GetReadableEvent");
   zx_status_t status;
   // This is the first 'wait until read event' request received.
   if (!readable_event_.is_valid()) {
     status = zx::event::create(0, &readable_event_);
     if (status != ZX_OK) {
       return status;
     } else if (state() == BlobState::kReadable) {
       readable_event_.signal(0u, ZX_USER_SIGNAL_0);
     }
   }
   zx::event out_event;
   status = readable_event_.duplicate(ZX_RIGHTS_BASIC, &out_event);
   if (status != ZX_OK) {
     return status;
   }

   *out = std::move(out_event);
   return ZX_OK;
 }

 zx_status_t Blob::CloneDataVmo(zx_rights_t rights, zx::vmo* out_vmo, size_t* out_size) {
   TRACE_DURATION("blobfs", "Blobfs::CloneVmo", "rights", rights);

   if (state() != BlobState::kReadable) {
     return ZX_ERR_BAD_STATE;
   }
   if (inode_.blob_size == 0) {
     return ZX_ERR_BAD_STATE;
   }

   auto status = LoadVmosFromDisk();
   if (status != ZX_OK) {
     return status;
   }

   zx::vmo clone;
   status = vmo().create_child(ZX_VMO_CHILD_COPY_ON_WRITE, 0, inode_.blob_size, &clone);
   if (status != ZX_OK) {
     FX_LOGS(ERROR) << "Failed to create child VMO: " << zx_status_get_string(status);
     return status;
   }

   // Only add exec right to VMO if explictly requested.  (Saves a syscall if
   // we're just going to drop the right back again in replace() call below.)
   if (rights & ZX_RIGHT_EXECUTE) {
     // Check if the VMEX resource held by Blobfs is valid and fail if it isn't. We do this to make
     // sure that we aren't implicitly relying on the ZX_POL_AMBIENT_MARK_VMO_EXEC job policy.
     const zx::resource& vmex = blobfs_->vmex_resource();
     if (!vmex.is_valid()) {
       FX_LOGS(ERROR) << "No VMEX resource available, executable blobs unsupported";
       return ZX_ERR_NOT_SUPPORTED;
     }
     if ((status = clone.replace_as_executable(vmex, &clone)) != ZX_OK) {
       return status;
     }
   }

   // Narrow rights to those requested.
   if ((status = clone.replace(rights, &clone)) != ZX_OK) {
     return status;
   }
   *out_vmo = std::move(clone);
   *out_size = inode_.blob_size;

   if (clone_watcher_.object() == ZX_HANDLE_INVALID) {
     clone_watcher_.set_object(vmo().get());
     clone_watcher_.set_trigger(ZX_VMO_ZERO_CHILDREN);

     // Keep a reference to "this" alive, preventing the blob
     // from being closed while someone may still be using the
     // underlying memory.
     //
     // We'll release it when no client-held VMOs are in use.
     clone_ref_ = fbl::RefPtr<Blob>(this);
     clone_watcher_.Begin(blobfs_->dispatcher());
   }

   return ZX_OK;
 }

 // TODO(fxbug.dev/51111) This is not used with the new pager. Remove this code when the transition
 // is complete.
 void Blob::HandleNoClones(async_dispatcher_t* dispatcher, async::WaitBase* wait, zx_status_t status,
                           const zx_packet_signal_t* signal) {
   fbl::RefPtr<Blob> local_clone_ref;
   {
     std::lock_guard<std::mutex> lock(mutex_);
     if (IsDataLoaded()) {
       zx_info_vmo_t info;
       zx_status_t info_status = vmo().get_info(ZX_INFO_VMO, &info, sizeof(info), nullptr, nullptr);
       if (info_status == ZX_OK) {
         if (info.num_children > 0) {
           // A clone was added at some point since the asynchronous HandleNoClones call was
           // enqueued. Re-arm the watcher, and return.
           //
           // clone_watcher_ is level triggered, so even if there are no clones now (since the call
           // to get_info), HandleNoClones will still be enqueued.
           //
           // No new clones can be added during this function, since clones are added on the main
           // dispatch thread which is currently running this function. If blobfs becomes
           // multi-threaded, locking will be necessary here.
           clone_watcher_.set_object(vmo().get());
           clone_watcher_.set_trigger(ZX_VMO_ZERO_CHILDREN);
           clone_watcher_.Begin(blobfs_->dispatcher());
           return;
         }
       } else {
         FX_LOGS(WARNING) << "Failed to get_info for vmo (" << zx_status_get_string(info_status)
                          << "); unable to verify VMO has no clones.";
       }
     }
     if (!tearing_down_) {
       ZX_DEBUG_ASSERT(status == ZX_OK);
       ZX_DEBUG_ASSERT((signal->observed & ZX_VMO_ZERO_CHILDREN) != 0);
       ZX_DEBUG_ASSERT(clone_watcher_.object() != ZX_HANDLE_INVALID);
     }
     clone_watcher_.set_object(ZX_HANDLE_INVALID);

     // Save the clone ref to the stack to prevent releasing it (and hence ourselves) inside the
     // lock.
     local_clone_ref = std::move(clone_ref_);
   }

   // Free the clone ref (outside the lock).
   //
   // This will trigger recycling which will call back into us via an external (non-lock-held)
   // function, so must be done outside of the lock to avoid reentrant locking.
   local_clone_ref = nullptr;

   // This might have been the last reference to a deleted blob, so try purging it.
   std::lock_guard<std::mutex> lock(mutex_);
   if (zx_status_t status = TryPurge(); status != ZX_OK) {
     FX_LOGS(WARNING) << "Purging blob " << digest() << " failed: " << zx_status_get_string(status);
   }
   if (!HasReferences()) {
     fbl::StringBuffer<ZX_MAX_NAME_LEN> data_vmo_name;
     FormatInactiveBlobDataVmoName(inode_, &data_vmo_name);
     if (zx_status_t status =
             vmo().set_property(ZX_PROP_NAME, data_vmo_name.c_str(), data_vmo_name.size());
         status != ZX_OK) {
       FX_LOGS(WARNING) << "Failed to update blob VMO name: " << zx_status_get_string(status);
     }
   }
 }

 zx_status_t Blob::ReadInternal(void* data, size_t len, size_t off, size_t* actual) {
   TRACE_DURATION("blobfs", "Blobfs::ReadInternal", "len", len, "off", off);

   // TODO(fxbug.dev/74088) allow multiple reads at a time as long as the blob is completely
   // loaded. This may involve moving the lock to LoadVmosFromDisk and auditing the inode_ and vmo_
   // usage in the code below to see if we can guarantee it won't change out from under us outside
   // the lock.
   std::lock_guard lock(mutex_);

   if (state() != BlobState::kReadable) {
     return ZX_ERR_BAD_STATE;
   }

   auto status = LoadVmosFromDisk();
   if (status != ZX_OK) {
     return status;
   }

   if (inode_.blob_size == 0) {
     *actual = 0;
     return ZX_OK;
   }
   if (off >= inode_.blob_size) {
     *actual = 0;
     return ZX_OK;
   }
   if (len > (inode_.blob_size - off)) {
     len = inode_.blob_size - off;
   }

   status = vmo().read(data, off, len);
   if (status == ZX_OK) {
     *actual = len;
   }
   return status;
 }

 #if defined(ENABLE_BLOBFS_NEW_PAGER)

 // New pager implementation.
 zx_status_t Blob::LoadPagedVmosFromDisk() {
   ZX_ASSERT(!IsDataLoaded());

   // If there is an overriden cache policy for pager-backed blobs, apply it now. Otherwise the
   // system-wide default will be used.
   std::optional<CachePolicy> cache_policy = blobfs_->pager_backed_cache_policy();
   if (cache_policy) {
     set_overridden_cache_policy(*cache_policy);
   }

   zx::status<BlobLoader::PagedLoadResult> load_result =
       blobfs_->loader().LoadBlobPaged(map_index_, &blobfs_->blob_corruption_notifier());
   if (load_result.is_error())
     return load_result.error_value();

   // Make the vmo.
   if (auto status = EnsureCreateVmo(load_result->layout->FileBlockAlignedSize()); status.is_error())
     return status.take_error();

   // Map the result.
   // TODO(fxbug.dev/74061): See if we can avoid doing this mapping.
   if (zx_status_t status = data_mapping_.Map(vmo()); status != ZX_OK) {
     FX_LOGS(ERROR) << "Failed to create mapping for data vmo: " << zx_status_get_string(status);
     FreeVmo();  // Rollback the allocated vmo.
     return status;
   }

   // Commit the other load information.
   pager_info_ = std::move(load_result->pager_info);
   merkle_mapping_ = std::move(load_result->merkle);

   return ZX_OK;
 }

 #else

 // Old pager.
 zx_status_t Blob::LoadPagedVmosFromDisk() {
   ZX_ASSERT(!IsDataLoaded());

   // If there is an overriden cache policy for pager-backed blobs, apply it now. Otherwise the
   // system-wide default will be used.
   std::optional<CachePolicy> cache_policy = blobfs_->pager_backed_cache_policy();
   if (cache_policy) {
     set_overridden_cache_policy(*cache_policy);
   }

   zx::status<BlobLoader::PagedLoadResult> load_result =
       blobfs_->loader().LoadBlobPaged(map_index_, &blobfs_->blob_corruption_notifier());
   if (load_result.is_error())
     return load_result.error_value();

   auto created_page_watcher =
       std::make_unique<pager::PageWatcher>(blobfs_->pager(), std::move(load_result->pager_info));

   // Make the vmo.
   zx::vmo data_vmo;
   if (zx_status_t status = created_page_watcher->CreatePagedVmo(
           load_result->layout->FileBlockAlignedSize(), &data_vmo);
       status != ZX_OK) {
     return status;
   }

   // Map the result.
   // TODO(fxbug.dev/74061): See if we can avoid doing this mapping.
   if (zx_status_t status = data_mapping_.Map(data_vmo); status != ZX_OK) {
     FX_LOGS(ERROR) << "Failed to create mapping for data vmo: " << zx_status_get_string(status);
     return status;
   }

   // Commit the load artifacts now that all setup has succeeded.
   merkle_mapping_ = std::move(load_result->merkle);
   vmo_ = std::move(data_vmo);
   page_watcher_ = std::move(created_page_watcher);

   return ZX_OK;
 }

 #endif

 zx_status_t Blob::LoadUnpagedVmosFromDisk() {
   ZX_ASSERT(!IsDataLoaded());

   zx::status<BlobLoader::UnpagedLoadResult> load_result =
       blobfs_->loader().LoadBlob(map_index_, &blobfs_->blob_corruption_notifier());
   if (load_result.is_ok()) {
     data_mapping_ = std::move(load_result->data_mapper);
     merkle_mapping_ = std::move(load_result->merkle);
     vmo_ = std::move(load_result->data_vmo);
   }

   return load_result.status_value();
 }

 zx_status_t Blob::LoadVmosFromDisk() {
   if (IsDataLoaded())
     return ZX_OK;

   if (inode_.blob_size == 0) {
     // Null blobs don't need any loading, just verification that they're correct.
     return VerifyNullBlob();
   }

   zx_status_t status;
   if (IsPagerBacked()) {
     status = LoadPagedVmosFromDisk();
   } else {
     status = LoadUnpagedVmosFromDisk();
   }

   if (status == ZX_OK)
     SetVmoName();

   syncing_state_ = SyncingState::kDone;
   return status;
 }

 zx_status_t Blob::PrepareDataVmoForWriting() {
   if (IsDataLoaded())
     return ZX_OK;

 #if defined(ENABLE_BLOBFS_NEW_PAGER)
   // TODO(fxbug.dev/51111): Blob writing needs to be addressed in the new pager. The problem is
   // that we can not write directly into the paged VMO (this will cause the kernel to try to fault
   // it in so we can write to it, which will call back into us).
   //
   // The write path either needs to have a different mode where the vmo() isn't registered with the
   // pager, or we need to write into a parallel write VMO. The latter option would be better
   // because it would enable us to page from just-written blobs (currently you have to close and
   // reopen the blob to support paging).
   return ZX_ERR_NOT_SUPPORTED;
 #else

   uint64_t block_aligned_size = fbl::round_up(inode_.blob_size, kBlobfsBlockSize);
   if (zx_status_t status = data_mapping_.CreateAndMap(
           block_aligned_size, ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, nullptr, &vmo_);
       status != ZX_OK) {
     FX_LOGS(ERROR) << "Failed to map data vmo: " << zx_status_get_string(status);
     return status;
   }

   SetVmoName();

   return ZX_OK;
 #endif
 }

 zx_status_t Blob::QueueUnlink() {
   std::lock_guard lock(mutex_);

   deletable_ = true;
   // Attempt to purge in case the blob has been unlinked with no open fds
   return TryPurge();
 }

 zx_status_t Blob::CommitDataBuffer() {
   if (inode_.blob_size == 0) {
     // It's the null blob, so just verify.
     return VerifyNullBlob();
   }
   return vmo().op_range(ZX_VMO_OP_COMMIT, 0, inode_.blob_size, nullptr, 0);
 }

 zx_status_t Blob::Verify() {
   std::lock_guard lock(mutex_);
   if (auto status = LoadVmosFromDisk(); status != ZX_OK) {
     return status;
   }

   // Blobs that are not pager-backed are already verified when they are loaded.
   // For pager-backed blobs, commit the entire blob in memory. This will cause all of the pages to
   // be verified as they are read in (or for the null bob we just verify immediately).
   // If the commit operation fails due to a verification failure, we do propagate the error back via
   // the return status.
   if (IsPagerBacked()) {
     return CommitDataBuffer();
   }
   return ZX_OK;
 }

 #if defined(ENABLE_BLOBFS_NEW_PAGER)
 void Blob::OnNoClones() {
   // Do nothing. The default implementation will free the VMO but we always need to keep it
   // around and registered with the pager because FIDL reads go through VMO read code path.
 }
 #endif

 BlobCache& Blob::Cache() { return blobfs_->Cache(); }

 bool Blob::ShouldCache() const {
   std::lock_guard lock(mutex_);
   switch (state()) {
     // All "Valid", cacheable states, where the blob still exists on storage.
     case BlobState::kReadable:
       return true;
     default:
       return false;
   }
 }

 void Blob::ActivateLowMemory() {
   std::lock_guard<std::mutex> lock(mutex_);

   // We shouldn't be putting the blob into a low-memory state while it is still mapped.
   ZX_ASSERT(!has_clones());

 #if !defined(ENABLE_BLOBFS_NEW_PAGER)
   // This must happen before freeing the vmo for the PageWatcher's pager synchronization code in
   // its destructor to work.
   page_watcher_.reset();
 #endif

   FreeVmo();

   data_mapping_.Unmap();
   merkle_mapping_.Reset();
 }

 Blob::~Blob() { ActivateLowMemory(); }

 fs::VnodeProtocolSet Blob::GetProtocols() const { return fs::VnodeProtocol::kFile; }

 bool Blob::ValidateRights(fs::Rights rights) {
   // To acquire write access to a blob, it must be empty.
   //
   // TODO(fxbug.dev/67659) If we run FIDL on multiple threads (we currently don't) there is a race
   // condition here where another thread could start writing at the same time. Decide whether we
   // support FIDL from multiple threads and if so, whether this condition is important.
   std::lock_guard lock(mutex_);
   return !rights.write || state() == BlobState::kEmpty;
 }

 zx_status_t Blob::GetNodeInfoForProtocol([[maybe_unused]] fs::VnodeProtocol protocol,
                                          [[maybe_unused]] fs::Rights rights,
                                          fs::VnodeRepresentation* info) {
   std::lock_guard lock(mutex_);

   zx::event observer;
   if (zx_status_t status = GetReadableEvent(&observer); status != ZX_OK) {
     return status;
   }
   *info = fs::VnodeRepresentation::File{.observer = std::move(observer)};
   return ZX_OK;
 }

 zx_status_t Blob::Read(void* data, size_t len, size_t off, size_t* out_actual) {
   TRACE_DURATION("blobfs", "Blob::Read", "len", len, "off", off);
   auto event = blobfs_->Metrics()->NewLatencyEvent(fs_metrics::Event::kRead);

   return ReadInternal(data, len, off, out_actual);
 }

 zx_status_t Blob::Write(const void* data, size_t len, size_t offset, size_t* out_actual) {
   TRACE_DURATION("blobfs", "Blob::Write", "len", len, "off", offset);

   std::lock_guard lock(mutex_);
   auto event = blobfs_->Metrics()->NewLatencyEvent(fs_metrics::Event::kWrite);
   return WriteInternal(data, len, out_actual);
 }

 zx_status_t Blob::Append(const void* data, size_t len, size_t* out_end, size_t* out_actual) {
   auto event = blobfs_->Metrics()->NewLatencyEvent(fs_metrics::Event::kAppend);

   std::lock_guard lock(mutex_);

   zx_status_t status = WriteInternal(data, len, out_actual);
   if (state() == BlobState::kDataWrite) {
     ZX_DEBUG_ASSERT(write_info_ != nullptr);
     *out_actual = write_info_->bytes_written;
   } else {
     *out_actual = inode_.blob_size;
   }
   return status;
 }

 zx_status_t Blob::GetAttributes(fs::VnodeAttributes* a) {
   auto event = blobfs_->Metrics()->NewLatencyEvent(fs_metrics::Event::kGetAttr);

   // SizeData() expects to be called outside the lock.
   auto content_size = SizeData();

   std::lock_guard lock(mutex_);

   *a = fs::VnodeAttributes();
   a->mode = V_TYPE_FILE | V_IRUSR;
   a->inode = map_index_;
   a->content_size = content_size;
   a->storage_size = inode_.block_count * kBlobfsBlockSize;
   a->link_count = 1;
   a->creation_time = 0;
   a->modification_time = 0;
   return ZX_OK;
 }

 zx_status_t Blob::Truncate(size_t len) {
   TRACE_DURATION("blobfs", "Blob::Truncate", "len", len);
   auto event = blobfs_->Metrics()->NewLatencyEvent(fs_metrics::Event::kTruncate);
   return PrepareWrite(len, blobfs_->ShouldCompress() && len > kCompressionSizeThresholdBytes);
 }

 void Blob::SetTargetCompressionSize(uint64_t size) {
   std::lock_guard lock(mutex_);
   write_info_.get()->SetTargetCompressionSize(size);
 }

 #ifdef __Fuchsia__

 zx_status_t Blob::QueryFilesystem(fuchsia_io::wire::FilesystemInfo* info) {
   blobfs_->GetFilesystemInfo(info);
   return ZX_OK;
 }

 zx_status_t Blob::GetDevicePath(size_t buffer_len, char* out_name, size_t* out_len) {
   return blobfs_->Device()->GetDevicePath(buffer_len, out_name, out_len);
 }

 zx_status_t Blob::GetVmo(int flags, zx::vmo* out_vmo, size_t* out_size) {
   TRACE_DURATION("blobfs", "Blob::GetVmo", "flags", flags);

   std::lock_guard lock(mutex_);

   if (flags & fuchsia_io::wire::VMO_FLAG_WRITE) {
     return ZX_ERR_NOT_SUPPORTED;
   } else if (flags & fuchsia_io::wire::VMO_FLAG_EXACT) {
     return ZX_ERR_NOT_SUPPORTED;
   }

   // Let clients map and set the names of their VMOs.
   zx_rights_t rights = ZX_RIGHTS_BASIC | ZX_RIGHT_MAP | ZX_RIGHTS_PROPERTY;
   // We can ignore fuchsia_io_VMO_FLAG_PRIVATE, since private / shared access
   // to the underlying VMO can both be satisfied with a clone due to
   // the immutability of blobfs blobs.
   rights |= (flags & fuchsia_io::wire::VMO_FLAG_READ) ? ZX_RIGHT_READ : 0;
   rights |= (flags & fuchsia_io::wire::VMO_FLAG_EXEC) ? ZX_RIGHT_EXECUTE : 0;
   return CloneDataVmo(rights, out_vmo, out_size);
 }

 #endif  // defined(__Fuchsia__)

 void Blob::Sync(SyncCallback on_complete) {
   // This function will issue its callbacks on either the current thread or the journal thread. The
   // vnode interface says this is OK.
   TRACE_DURATION("blobfs", "Blob::Sync");
   auto event = blobfs_->Metrics()->NewLatencyEvent(fs_metrics::Event::kSync);

   SyncingState state;
   {
     std::scoped_lock guard(mutex_);
     state = syncing_state_;
   }

   switch (state) {
     case SyncingState::kDataIncomplete: {
       // It doesn't make sense to sync a partial blob since it can't have its proper
       // content-addressed name without all the data.
       on_complete(ZX_ERR_BAD_STATE);
       break;
     }
     case SyncingState::kSyncing: {
       // The blob data is complete. When this happens the Blob object will automatically write its
       // metadata, but it may not get flushed for some time. This call both encourages the sync to
       // happen "soon" and provides a way to get notified when it does.
       auto trace_id = TRACE_NONCE();
       TRACE_FLOW_BEGIN("blobfs", "Blob.sync", trace_id);
       blobfs_->Sync([evt = std::move(event),
                      on_complete = std::move(on_complete)](zx_status_t status) mutable {
         // Note: this may be executed on an arbitrary thread.
         on_complete(status);
       });
       break;
     }
     case SyncingState::kDone: {
       // All metadata has already been synced. Calling Sync() is a no-op.
       on_complete(ZX_OK);
       break;
     }
   }
 }

 #if defined(ENABLE_BLOBFS_NEW_PAGER)
 // This function will get called on an arbitrary pager worker thread.
 void Blob::VmoRead(uint64_t offset, uint64_t length) {
   TRACE_DURATION("blobfs", "Blob::VmoRead", "offset", offset, "length", length);

   std::lock_guard<std::mutex> lock(mutex_);

   ZX_DEBUG_ASSERT(IsDataLoaded());

   if (is_corrupt_) {
     FX_LOGS(ERROR) << "Blobfs failing page request because blob was previously found corrupt.";
     if (auto error_result = paged_vfs()->ReportPagerError(vmo(), offset, length, ZX_ERR_BAD_STATE);
         error_result.is_error()) {
       FX_LOGS(ERROR) << "Failed to report pager error to kernel: " << error_result.status_string();
     }
     return;
   }

   // TODO(fxbug.dev/51111) This gets the pager state out of the PageWatcher to avoid forking the
   // code too much during the pager code transition. When the old pager is not needed, the
   // PageWatcher should be deleted and its state moved into this class.
   pager::PagerErrorStatus pager_error_status =
       blobfs_->pager()->TransferPagesToVmo(offset, length, vmo(), pager_info_);
   if (pager_error_status != pager::PagerErrorStatus::kOK) {
     FX_LOGS(ERROR) << "Pager failed to transfer pages to the blob, error: "
                    << zx_status_get_string(static_cast<zx_status_t>(pager_error_status));
     if (auto error_result = paged_vfs()->ReportPagerError(
             vmo(), offset, length, static_cast<zx_status_t>(pager_error_status));
         error_result.is_error()) {
       FX_LOGS(ERROR) << "Failed to report pager error to kernel: " << error_result.status_string();
     }

     // We've signaled a failure and unblocked outstanding page requests for this range. If the pager
     // error was a verification error, fail future requests as well - we should not service further
     // page requests on a corrupt blob.
     //
     // Note that we cannot simply detach the VMO from the pager here. There might be outstanding
     // page requests which have been queued but are yet to be serviced. These need to be handled
     // correctly - if the VMO is detached, there will be no way for us to communicate failure to
     // the kernel, since zx_pager_op_range() requires a valid pager VMO handle. Without being able
     // to make a call to zx_pager_op_range() to indicate a failed page request, the faulting thread
     // would hang indefinitely.
     if (pager_error_status == pager::PagerErrorStatus::kErrDataIntegrity)
       is_corrupt_ = true;
   }
 }
 #endif

 bool Blob::HasReferences() const { return open_count() > 0 || has_clones(); }

 void Blob::CompleteSync() {
   // Called on the journal thread when the syncing is complete.
   {
     std::scoped_lock guard(mutex_);
     syncing_state_ = SyncingState::kDone;
   }
 }

 // TODO(fxbug.dev/51111) This is not used with the new pager. Remove this code when the transition
 // is complete.
 fbl::RefPtr<Blob> Blob::CloneWatcherTeardown() {
   std::lock_guard lock(mutex_);
   if (clone_watcher_.is_pending()) {
     clone_watcher_.Cancel();
     clone_watcher_.set_object(ZX_HANDLE_INVALID);
     tearing_down_ = true;
     return std::move(clone_ref_);
   }
   return nullptr;
 }

 zx_status_t Blob::OpenNode([[maybe_unused]] ValidatedOptions options,
                            fbl::RefPtr<Vnode>* out_redirect) {
   std::lock_guard lock(mutex_);
   if (IsDataLoaded()) {
     SetVmoName();
   }
   return ZX_OK;
 }

 zx_status_t Blob::CloseNode() {
   std::lock_guard lock(mutex_);

   auto event = blobfs_->Metrics()->NewLatencyEvent(fs_metrics::Event::kClose);
   if (IsDataLoaded() && !HasReferences()) {
     fbl::StringBuffer<ZX_MAX_NAME_LEN> data_vmo_name;
     FormatInactiveBlobDataVmoName(inode_, &data_vmo_name);
     if (zx_status_t status =
             vmo().set_property(ZX_PROP_NAME, data_vmo_name.data(), data_vmo_name.size());
         status != ZX_OK) {
       FX_LOGS(WARNING) << "Failed to update blob VMO name: " << zx_status_get_string(status);
     }
   }
   // Attempt purge in case blob was unlinked prior to close
   return TryPurge();
 }

 zx_status_t Blob::TryPurge() {
   if (Purgeable()) {
     return Purge();
   }
   return ZX_OK;
 }

 zx_status_t Blob::Purge() {
   ZX_DEBUG_ASSERT(Purgeable());

   if (state() == BlobState::kReadable) {
     // A readable blob should only be purged if it has been unlinked.
     ZX_ASSERT(deletable_);
     BlobTransaction transaction;
     ZX_ASSERT(blobfs_->FreeInode(map_index_, transaction) == ZX_OK);
     transaction.Commit(*blobfs_->journal());
   }
   ZX_ASSERT(Cache().Evict(fbl::RefPtr(this)) == ZX_OK);
   set_state(BlobState::kPurged);
   return ZX_OK;
 }

 uint32_t Blob::GetBlockSize() const { return blobfs_->Info().block_size; }

 void Blob::SetVmoName() {
   fbl::StringBuffer<ZX_MAX_NAME_LEN> name;
   FormatBlobDataVmoName(inode_, &name);
   vmo().set_property(ZX_PROP_NAME, name.data(), name.size());
 }

 }  // namespace blobfs