| // Copyright 2019 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <zircon/types.h> |
| |
| #include <optional> |
| |
| #include <fbl/vector.h> |
| #include <fs/journal/format.h> |
| #include <fs/journal/replay.h> |
| #include <fs/journal/superblock.h> |
| #include <fs/transaction/writeback.h> |
| #include <storage/operation/buffered-operation.h> |
| |
| #include "entry_view.h" |
| #include "replay_tree.h" |
| |
| namespace fs { |
| namespace { |
| |
| // Reads and validates the length of the entry from a header. |
| // Ensures the payload length is not zero, and that the entry length does not overflow |
| // the journal buffer. |
| uint64_t ParseEntryLength(const storage::VmoBuffer* journal_buffer, |
| const JournalHeaderView& header) { |
| uint64_t entry_length = 0; |
| if (unlikely(add_overflow(header.PayloadBlocks(), kEntryMetadataBlocks, &entry_length))) { |
| return 0; |
| } |
| if (header.PayloadBlocks() == 0 || entry_length > journal_buffer->capacity()) { |
| // Zero-length entries and larger-than-buffer entries disallowed. |
| return 0; |
| } |
| ZX_DEBUG_ASSERT(entry_length != 0); |
| return entry_length; |
| } |
| |
| std::optional<const JournalEntryView> ParseEntry(storage::VmoBuffer* journal_buffer, uint64_t start, |
| uint64_t sequence_number) { |
| // To know how much of the journal we need to parse, first observe only one block. |
| storage::BlockBufferView small_view(journal_buffer, start, 1); |
| const auto header = JournalHeaderView::Create( |
| fbl::Span<uint8_t>(static_cast<uint8_t*>(small_view.Data(0)), small_view.BlockSize()), |
| sequence_number); |
| |
| // This is not a header block. |
| if (header.is_error()) { |
| return std::nullopt; |
| } |
| |
| uint64_t entry_length = ParseEntryLength(journal_buffer, header.value()); |
| if (!entry_length) { |
| return std::nullopt; |
| } |
| |
| // Looks good enough. Create a JournalEntryView that now includes the footer. |
| storage::BlockBufferView view(journal_buffer, start, entry_length); |
| JournalEntryView entry_view(std::move(view)); |
| auto& const_entry_view = const_cast<const JournalEntryView&>(entry_view); |
| |
| // Validate the footer. |
| if (const_entry_view.footer()->prefix.magic != kJournalEntryMagic) { |
| return std::nullopt; |
| } |
| if (header.value().SequenceNumber() != const_entry_view.footer()->prefix.sequence_number) { |
| return std::nullopt; |
| } |
| // Validate the contents of the entry itself. |
| if (const_entry_view.footer()->checksum != const_entry_view.CalculateChecksum()) { |
| return std::nullopt; |
| } |
| |
| // Decode any blocks within the entry which were previously encoded (escaped). |
| // |
| // This way, the internal details of on-disk journal storage are hidden from the public |
| // API of parsing entries. |
| entry_view.DecodePayloadBlocks(); |
| |
| return entry_view; |
| } |
| |
| bool IsSubsequentEntryValid(storage::VmoBuffer* journal_buffer, uint64_t start, |
| uint64_t sequence_number) { |
| // Access the current entry, but ignore everything except the "length" field. |
| // WARNING: This (intentionally) does not validate the current entry. |
| storage::BlockBufferView small_view(journal_buffer, start, kJournalEntryHeaderBlocks); |
| const auto header = JournalHeaderView::Create( |
| fbl::Span<uint8_t>(reinterpret_cast<uint8_t*>(small_view.Data(0)), small_view.BlockSize()), |
| sequence_number); |
| |
| if (header.is_error()) { |
| // If this isn't a header, we can't find the subsequent entry. |
| return false; |
| } |
| |
| // Check the next entry, if the current entry's length field is (somehow) valid. |
| uint64_t entry_length = ParseEntryLength(journal_buffer, header.value()); |
| if (!entry_length) { |
| // If we can't parse the length, then we can't check the subsequent entry. |
| // If two neighboring entries are corrupted, this is treated as an interruption. |
| return false; |
| } |
| start = (start + entry_length) % journal_buffer->capacity(); |
| return JournalHeaderView::Create( |
| fbl::Span<uint8_t>(reinterpret_cast<uint8_t*>(journal_buffer->Data(start)), |
| journal_buffer->BlockSize()), |
| sequence_number + 1) |
| .is_ok(); |
| } |
| |
| void ParseBlocks(const storage::VmoBuffer& journal_buffer, const JournalEntryView& entry, |
| uint64_t entry_start, ReplayTree* operation_tree) { |
| // Collect all the operations to be replayed from this entry into |operation_tree|. |
| storage::BufferedOperation operation; |
| for (uint32_t i = 0; i < entry.header().PayloadBlocks(); i++) { |
| operation.vmoid = journal_buffer.vmoid(); |
| operation.op.type = storage::OperationType::kWrite; |
| operation.op.vmo_offset = |
| (entry_start + kJournalEntryHeaderBlocks + i) % journal_buffer.capacity(); |
| operation.op.dev_offset = entry.header().TargetBlock(i); |
| operation.op.length = 1; |
| |
| operation_tree->insert(operation); |
| } |
| } |
| |
| } // namespace |
| |
| zx_status_t ParseJournalEntries(const JournalSuperblock* info, storage::VmoBuffer* journal_buffer, |
| fbl::Vector<storage::BufferedOperation>* operations, |
| uint64_t* out_sequence_number, uint64_t* out_start) { |
| // Validate |info| before using it. |
| zx_status_t status = info->Validate(); |
| if (status != ZX_OK) { |
| FS_TRACE_ERROR("Journal Superblock does not validate: %d\n", status); |
| return status; |
| } |
| if (info->start() >= journal_buffer->capacity()) { |
| FS_TRACE_ERROR("Journal entries start beyond end of journal capacity (%zu vs %zu)\n", |
| info->start(), journal_buffer->capacity()); |
| return ZX_ERR_IO_DATA_INTEGRITY; |
| } |
| |
| // Start parsing the journal, and replay as many entries as possible. |
| uint64_t entry_start = info->start(); |
| uint64_t sequence_number = info->sequence_number(); |
| FS_TRACE_INFO("replay: entry_start: %zu, sequence_number: %zu\n", entry_start, sequence_number); |
| ReplayTree operation_tree; |
| while (true) { |
| // Attempt to parse the next entry in the journal. Eventually, we expect this to fail. |
| std::optional<const JournalEntryView> entry = |
| ParseEntry(journal_buffer, entry_start, sequence_number); |
| if (!entry) { |
| // Typically, an invalid entry will imply that the entry was interrupted |
| // partway through being written. However, if the subsequent entry in the journal |
| // looks valid, that implies the entry at |entry_start| was corrupted for some unknown |
| // reason. The inability to replay committed journal entries may lead to filesystem |
| // corruption, so we return an explicit error in this case. |
| if (IsSubsequentEntryValid(journal_buffer, entry_start, sequence_number)) { |
| return ZX_ERR_IO_DATA_INTEGRITY; |
| } |
| break; |
| } |
| |
| if (entry->header().ObjectType() == JournalObjectType::kRevocation) { |
| // TODO(ZX-4752): Revocation records advise us to avoid replaying the provided |
| // operations. |
| // |
| // We should implement this by: |
| // 1) Parsing all blocks into a non-|operations| vector |
| // 2) Iterate over |operations| and look for collision |
| // 3) Omit the intersect |
| return ZX_ERR_NOT_SUPPORTED; |
| } else { |
| // Replay all operations within this entry. |
| ParseBlocks(*journal_buffer, *entry, entry_start, &operation_tree); |
| } |
| |
| // Move to the next entry. |
| auto entry_blocks = entry->header().PayloadBlocks() + kEntryMetadataBlocks; |
| entry_start = (entry_start + entry_blocks) % journal_buffer->capacity(); |
| |
| // Move the sequence_number forward beyond the most recently seen entry. |
| sequence_number = entry->header().SequenceNumber() + 1; |
| } |
| |
| // Now that we've finished replaying entries, return the next sequence_number to use. |
| // It is the responsibility of the caller to update the info block, but only after |
| // all prior operations have been replayed. |
| *out_sequence_number = sequence_number; |
| *out_start = entry_start; |
| |
| for (const auto& [_, range] : operation_tree) { |
| operations->push_back(range.container().operation); |
| } |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t ReplayJournal(fs::TransactionHandler* transaction_handler, |
| storage::VmoidRegistry* registry, uint64_t journal_start, |
| uint64_t journal_length, JournalSuperblock* out_journal_superblock) { |
| const uint64_t journal_entry_start = journal_start + kJournalMetadataBlocks; |
| const uint64_t journal_entry_blocks = journal_length - kJournalMetadataBlocks; |
| FS_TRACE_DEBUG("replay: Initializing journal superblock\n"); |
| |
| // Initialize and read the journal superblock and journal buffer. |
| auto journal_superblock_buffer = std::make_unique<storage::VmoBuffer>(); |
| zx_status_t status = journal_superblock_buffer->Initialize( |
| registry, kJournalMetadataBlocks, transaction_handler->FsBlockSize(), "journal-superblock"); |
| if (status != ZX_OK) { |
| FS_TRACE_ERROR("journal: Cannot initialize journal info block: %d\n", status); |
| return status; |
| } |
| // Initialize and read the journal itself. |
| FS_TRACE_INFO("replay: Initializing journal buffer (%zu blocks)\n", journal_entry_blocks); |
| storage::VmoBuffer journal_buffer; |
| status = journal_buffer.Initialize(registry, journal_entry_blocks, |
| transaction_handler->FsBlockSize(), "journal-buffer"); |
| if (status != ZX_OK) { |
| FS_TRACE_ERROR("journal: Cannot initialize journal buffer: %d\n", status); |
| return status; |
| } |
| |
| FS_TRACE_DEBUG("replay: Reading from storage\n"); |
| fs::ReadTxn transaction(transaction_handler); |
| transaction.Enqueue(journal_superblock_buffer->vmoid(), 0, journal_start, kJournalMetadataBlocks); |
| transaction.Enqueue(journal_buffer.vmoid(), 0, journal_entry_start, journal_entry_blocks); |
| status = transaction.Transact(); |
| if (status != ZX_OK) { |
| FS_TRACE_ERROR("journal: Cannot load journal: %d\n", status); |
| return status; |
| } |
| |
| // Parse the journal, deciding which entries should be replayed. |
| // |
| // NOTE(ZX-4737): This current implementation of replay is built against the specification of |
| // the journaling format, not against how the journaling writeback code happens to be |
| // implemented. In the current implementation, "write to journal" and "write to final location" |
| // are tightly coupled, so although we will replay a multi-entry journal, it is unlikely the |
| // disk will end up in that state. However, this use case is supported by this replay code |
| // regardless. |
| fbl::Vector<storage::BufferedOperation> operations; |
| uint64_t sequence_number = 0; |
| uint64_t next_entry_start = 0; |
| FS_TRACE_DEBUG("replay: Parsing journal entries\n"); |
| JournalSuperblock journal_superblock(std::move(journal_superblock_buffer)); |
| status = ParseJournalEntries(&journal_superblock, &journal_buffer, &operations, &sequence_number, |
| &next_entry_start); |
| if (status != ZX_OK) { |
| FS_TRACE_ERROR("journal: Cannot parse journal entries: %d\n", status); |
| return status; |
| } |
| |
| // Replay the requested journal entries, then the new header. |
| if (operations.size() > 0) { |
| // Update to the new sequence_number (in-memory). |
| journal_superblock.Update(next_entry_start, sequence_number); |
| |
| for (auto& op : operations) { |
| FS_TRACE_INFO("replay: writing operation @ dev_offset: %zu, vmo_offset: %zu, length: %zu\n", |
| op.op.dev_offset, op.op.vmo_offset, op.op.length); |
| } |
| |
| status = FlushRequests(transaction_handler, operations); |
| if (status != ZX_OK) { |
| FS_TRACE_ERROR("journal: Cannot replay entries: %d\n", status); |
| return status; |
| } |
| |
| operations.reset(); |
| FS_TRACE_INFO("replay: New start: %zu, sequence_number: %zu\n", next_entry_start, |
| sequence_number); |
| storage::BufferedOperation operation; |
| operation.vmoid = journal_superblock.buffer().vmoid(); |
| operation.op.type = storage::OperationType::kWrite; |
| operation.op.vmo_offset = 0; |
| operation.op.dev_offset = journal_start; |
| operation.op.length = kJournalMetadataBlocks; |
| operations.push_back(std::move(operation)); |
| status = FlushRequests(transaction_handler, operations); |
| if (status != ZX_OK) { |
| FS_TRACE_ERROR("journal: Cannot update journal superblock: %d\n", status); |
| return status; |
| } |
| |
| } else { |
| FS_TRACE_DEBUG("replay: Not replaying entries\n"); |
| } |
| |
| if (out_journal_superblock) { |
| *out_journal_superblock = std::move(journal_superblock); |
| } |
| return ZX_OK; |
| } |
| |
| } // namespace fs |