[ledger] Generate piece references in Piece instead of Split
This will allow a unified handling of piece references for local and
downloaded pieces.
Change-Id: I98ff4ad486ed2ff35034d460ced2c3a53c9fad94
Bug: LE-697
Tested: fx run-test ledger_tests -t ledger_unittests
diff --git a/src/ledger/bin/storage/fake/fake_object.cc b/src/ledger/bin/storage/fake/fake_object.cc
index 3c14440..b927ee1 100644
--- a/src/ledger/bin/storage/fake/fake_object.cc
+++ b/src/ledger/bin/storage/fake/fake_object.cc
@@ -12,6 +12,11 @@
fxl::StringView FakePiece::GetData() const { return content_; }
+Status FakePiece::AppendReferences(
+ ObjectReferencesAndPriority* references) const {
+ return Status::OK;
+}
+
ObjectIdentifier FakePiece::GetIdentifier() const { return identifier_; }
FakeObject::FakeObject(ObjectIdentifier identifier, fxl::StringView content)
diff --git a/src/ledger/bin/storage/fake/fake_object.h b/src/ledger/bin/storage/fake/fake_object.h
index 59ed980..1b074ac 100644
--- a/src/ledger/bin/storage/fake/fake_object.h
+++ b/src/ledger/bin/storage/fake/fake_object.h
@@ -15,8 +15,11 @@
class FakePiece : public Piece {
public:
FakePiece(ObjectIdentifier identifier, fxl::StringView content);
+
fxl::StringView GetData() const override;
ObjectIdentifier GetIdentifier() const override;
+ Status AppendReferences(
+ ObjectReferencesAndPriority* references) const override;
private:
ObjectIdentifier identifier_;
diff --git a/src/ledger/bin/storage/impl/object_impl.cc b/src/ledger/bin/storage/impl/object_impl.cc
index 87a14fd..a981454 100644
--- a/src/ledger/bin/storage/impl/object_impl.cc
+++ b/src/ledger/bin/storage/impl/object_impl.cc
@@ -8,9 +8,13 @@
#include <utility>
+#include "src/ledger/bin/storage/impl/file_index.h"
+#include "src/ledger/bin/storage/impl/file_index_generated.h"
#include "src/ledger/bin/storage/impl/object_digest.h"
+#include "src/ledger/bin/storage/impl/object_identifier_encoding.h"
#include "src/ledger/bin/storage/public/data_source.h"
#include "src/ledger/bin/storage/public/types.h"
+#include "src/lib/fxl/logging.h"
namespace storage {
@@ -20,6 +24,34 @@
}
} // namespace
+Status BasePiece::AppendReferences(
+ ObjectReferencesAndPriority* references) const {
+ // Chunks have no references.
+ const auto digest_info = GetObjectDigestInfo(GetIdentifier().object_digest());
+ if (digest_info.is_chunk()) {
+ return Status::OK;
+ }
+ FXL_DCHECK(digest_info.piece_type == PieceType::INDEX);
+ // The piece is an index: parse it and append its children to references.
+ const FileIndex* file_index;
+ Status status =
+ FileIndexSerialization::ParseFileIndex(GetData(), &file_index);
+ if (status != Status::OK) {
+ return status;
+ }
+ for (const auto* child : *file_index->children()) {
+ ObjectDigest child_digest =
+ ToObjectIdentifier(child->object_identifier()).object_digest();
+ // References must not contain inline pieces.
+ if (GetObjectDigestInfo(child_digest).is_inlined()) {
+ continue;
+ }
+ // Piece references are always eager.
+ references->emplace(child_digest, KeyPriority::EAGER);
+ }
+ return Status::OK;
+}
+
InlinePiece::InlinePiece(ObjectIdentifier identifier)
: identifier_(std::move(identifier)) {}
diff --git a/src/ledger/bin/storage/impl/object_impl.h b/src/ledger/bin/storage/impl/object_impl.h
index 1f1444d..d5a06b4 100644
--- a/src/ledger/bin/storage/impl/object_impl.h
+++ b/src/ledger/bin/storage/impl/object_impl.h
@@ -18,8 +18,15 @@
#include "third_party/leveldb/include/leveldb/iterator.h"
namespace storage {
+// Common methods shared by all piece implementations.
+class BasePiece : public Piece {
+ public:
+ Status AppendReferences(
+ ObjectReferencesAndPriority* references) const override;
+};
+
// Piece whose data is equal to its id.
-class InlinePiece : public Piece {
+class InlinePiece : public BasePiece {
public:
explicit InlinePiece(ObjectIdentifier identifier);
@@ -32,7 +39,7 @@
};
// Piece whose data is backed by a DataChunk.
-class DataChunkPiece : public Piece {
+class DataChunkPiece : public BasePiece {
public:
explicit DataChunkPiece(ObjectIdentifier identifier,
std::unique_ptr<DataSource::DataChunk> chunk);
@@ -47,7 +54,7 @@
};
// Piece whose data is backed by a value in LevelDB.
-class LevelDBPiece : public Piece {
+class LevelDBPiece : public BasePiece {
public:
explicit LevelDBPiece(ObjectIdentifier identifier,
std::unique_ptr<leveldb::Iterator> iterator);
diff --git a/src/ledger/bin/storage/impl/object_impl_unittest.cc b/src/ledger/bin/storage/impl/object_impl_unittest.cc
index 40e3364..4f2c788 100644
--- a/src/ledger/bin/storage/impl/object_impl_unittest.cc
+++ b/src/ledger/bin/storage/impl/object_impl_unittest.cc
@@ -9,17 +9,24 @@
#include <memory>
+#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "peridot/lib/scoped_tmpfs/scoped_tmpfs.h"
+#include "src/ledger/bin/storage/impl/constants.h"
+#include "src/ledger/bin/storage/impl/file_index.h"
#include "src/ledger/bin/storage/impl/object_digest.h"
#include "src/ledger/bin/storage/impl/storage_test_utils.h"
#include "src/ledger/bin/storage/public/data_source.h"
+#include "src/ledger/bin/storage/public/types.h"
#include "src/ledger/bin/testing/test_with_environment.h"
+#include "src/lib/fxl/logging.h"
#include "third_party/leveldb/include/leveldb/db.h"
#include "util/env_fuchsia.h"
namespace storage {
namespace {
+using ::testing::Pair;
+using ::testing::UnorderedElementsAre;
ObjectIdentifier CreateObjectIdentifier(ObjectDigest digest) {
return {1u, 2u, std::move(digest)};
@@ -139,6 +146,59 @@
EXPECT_TRUE(CheckPieceValue(piece, identifier, data));
}
+TEST_F(ObjectImplTest, PieceReferences) {
+ // Create various types of identifiers for the piece children. Small pieces
+ // fit in chunks, while bigger ones are split and yield identifiers of index
+ // pieces.
+ constexpr size_t kInlineSize = kStorageHashSize;
+ const ObjectIdentifier inline_chunk = CreateObjectIdentifier(
+ ComputeObjectDigest(PieceType::CHUNK, ObjectType::BLOB,
+ RandomString(environment_.random(), kInlineSize)));
+ ASSERT_TRUE(GetObjectDigestInfo(inline_chunk.object_digest()).is_chunk());
+ ASSERT_TRUE(GetObjectDigestInfo(inline_chunk.object_digest()).is_inlined());
+
+ const ObjectIdentifier inline_index = CreateObjectIdentifier(
+ ComputeObjectDigest(PieceType::INDEX, ObjectType::BLOB,
+ RandomString(environment_.random(), kInlineSize)));
+ ASSERT_FALSE(GetObjectDigestInfo(inline_index.object_digest()).is_chunk());
+ ASSERT_TRUE(GetObjectDigestInfo(inline_index.object_digest()).is_inlined());
+
+ constexpr size_t kNoInlineSize = kStorageHashSize + 1;
+ const ObjectIdentifier noinline_chunk = CreateObjectIdentifier(
+ ComputeObjectDigest(PieceType::CHUNK, ObjectType::BLOB,
+ RandomString(environment_.random(), kNoInlineSize)));
+ ASSERT_TRUE(GetObjectDigestInfo(noinline_chunk.object_digest()).is_chunk());
+ ASSERT_FALSE(
+ GetObjectDigestInfo(noinline_chunk.object_digest()).is_inlined());
+
+ const ObjectIdentifier noinline_index = CreateObjectIdentifier(
+ ComputeObjectDigest(PieceType::INDEX, ObjectType::BLOB,
+ RandomString(environment_.random(), kNoInlineSize)));
+ ASSERT_FALSE(GetObjectDigestInfo(noinline_index.object_digest()).is_chunk());
+ ASSERT_FALSE(
+ GetObjectDigestInfo(noinline_index.object_digest()).is_inlined());
+
+ // Create the parent piece.
+ std::unique_ptr<DataSource::DataChunk> data;
+ size_t total_size;
+ FileIndexSerialization::BuildFileIndex({{inline_chunk, kInlineSize},
+ {noinline_chunk, kInlineSize},
+ {inline_index, kNoInlineSize},
+ {noinline_index, kNoInlineSize}},
+ &data, &total_size);
+ const ObjectIdentifier identifier = CreateObjectIdentifier(
+ ComputeObjectDigest(PieceType::INDEX, ObjectType::BLOB, data->Get()));
+ DataChunkPiece piece(identifier, std::move(data));
+
+ // Check that inline children are not included in the references.
+ ObjectReferencesAndPriority references;
+ ASSERT_EQ(Status::OK, piece.AppendReferences(&references));
+ EXPECT_THAT(references,
+ UnorderedElementsAre(
+ Pair(noinline_chunk.object_digest(), KeyPriority::EAGER),
+ Pair(noinline_index.object_digest(), KeyPriority::EAGER)));
+}
+
TEST_F(ObjectImplTest, ChunkObject) {
std::string data = RandomString(environment_.random(), 12);
ObjectIdentifier identifier = CreateObjectIdentifier(
diff --git a/src/ledger/bin/storage/impl/page_storage_impl.cc b/src/ledger/bin/storage/impl/page_storage_impl.cc
index e728ada..1f22c2f 100644
--- a/src/ledger/bin/storage/impl/page_storage_impl.cc
+++ b/src/ledger/bin/storage/impl/page_storage_impl.cc
@@ -414,18 +414,23 @@
[this, waiter, managed_data_source = std::move(managed_data_source),
tree_references = std::move(tree_references),
callback = std::move(traced_callback)](
- IterationStatus status, std::unique_ptr<Piece> piece,
- ObjectReferencesAndPriority piece_references) mutable {
+ IterationStatus status, std::unique_ptr<Piece> piece) mutable {
if (status == IterationStatus::ERROR) {
callback(Status::IO_ERROR, ObjectIdentifier());
return;
}
FXL_DCHECK(piece != nullptr);
-
ObjectIdentifier identifier = piece->GetIdentifier();
auto object_info = GetObjectDigestInfo(identifier.object_digest());
if (!object_info.is_inlined()) {
+ ObjectReferencesAndPriority piece_references;
+ if (piece->AppendReferences(&piece_references) != Status::OK) {
+ // The piece is generated internally by splitting, not coming from
+ // untrusted source, so decoding should never fail.
+ callback(Status::INTERNAL_ERROR, ObjectIdentifier());
+ return;
+ }
if (object_info.object_type == ObjectType::TREE_NODE) {
// There is at most one TREE_NODE, and it must be the last piece, so
// it is safe to add tree_references to piece_references there.
diff --git a/src/ledger/bin/storage/impl/split.cc b/src/ledger/bin/storage/impl/split.cc
index b61fc43..9e47d12 100644
--- a/src/ledger/bin/storage/impl/split.cc
+++ b/src/ledger/bin/storage/impl/split.cc
@@ -64,9 +64,7 @@
public:
explicit SplitContext(
fit::function<ObjectIdentifier(ObjectDigest)> make_object_identifier,
- fit::function<void(IterationStatus, std::unique_ptr<Piece>,
- ObjectReferencesAndPriority references)>
- callback,
+ fit::function<void(IterationStatus, std::unique_ptr<Piece>)> callback,
ObjectType object_type)
: make_object_identifier_(std::move(make_object_identifier)),
callback_(std::move(callback)),
@@ -78,7 +76,7 @@
void AddChunk(std::unique_ptr<DataSource::DataChunk> chunk,
DataSource::Status status) {
if (status == DataSource::Status::ERROR) {
- callback_(IterationStatus::ERROR, nullptr, {});
+ callback_(IterationStatus::ERROR, nullptr);
return;
}
@@ -133,7 +131,6 @@
// Information about a piece of data (chunk or index) to be sent.
struct PendingPiece {
ObjectIdentifier identifier;
- ObjectReferencesAndPriority references;
std::unique_ptr<DataSource::DataChunk> data;
bool ready() { return data != nullptr; }
@@ -145,14 +142,12 @@
// treated differently in |SendDone|. |children| must contain the identifiers
// of the children pieces if |type| is INDEX, and be empty otherwise.
ObjectIdentifier SendInProgress(PieceType type,
- ObjectReferencesAndPriority references,
std::unique_ptr<DataSource::DataChunk> data) {
if (latest_piece_.ready()) {
callback_(
IterationStatus::IN_PROGRESS,
std::make_unique<DataChunkPiece>(std::move(latest_piece_.identifier),
- std::move(latest_piece_.data)),
- std::move(latest_piece_.references));
+ std::move(latest_piece_.data)));
}
auto data_view = data->Get();
// object_type for inner (IN_PROGRESS) pieces is always BLOB, regardless of
@@ -165,7 +160,6 @@
ComputeObjectDigest(type, ObjectType::BLOB, data_view);
latest_piece_.identifier =
make_object_identifier_(std::move(object_digest));
- latest_piece_.references = std::move(references);
latest_piece_.data = std::move(data);
return latest_piece_.identifier;
}
@@ -182,11 +176,9 @@
object_type_, data_view);
latest_piece_.identifier =
make_object_identifier_(std::move(object_digest));
- callback_(
- IterationStatus::DONE,
- std::make_unique<DataChunkPiece>(std::move(latest_piece_.identifier),
- std::move(latest_piece_.data)),
- std::move(latest_piece_.references));
+ callback_(IterationStatus::DONE, std::make_unique<DataChunkPiece>(
+ std::move(latest_piece_.identifier),
+ std::move(latest_piece_.data)));
}
std::vector<ObjectIdentifierAndSize>& GetCurrentIdentifiersAtLevel(
@@ -226,7 +218,7 @@
void BuildAndSendNextChunk(size_t split_index) {
auto data = BuildNextChunk(split_index);
auto size = data->Get().size();
- auto identifier = SendInProgress(PieceType::CHUNK, {}, std::move(data));
+ auto identifier = SendInProgress(PieceType::CHUNK, std::move(data));
AddIdentifierAtLevel(0, {std::move(identifier), size});
}
@@ -276,18 +268,7 @@
<< "Expected maximum of: " << kMaxChunkSize
<< ", but got: " << chunk->Get().size();
- ObjectReferencesAndPriority references;
- for (const auto& [identifier, size] : identifiers_and_sizes) {
- const auto& digest = identifier.object_digest();
- // Skip inline pieces from references.
- if (GetObjectDigestInfo(digest).is_inlined())
- continue;
- // Links between pieces are eager, only node-value links can be lazy.
- references.emplace(digest, KeyPriority::EAGER);
- }
-
- auto identifier = SendInProgress(PieceType::INDEX, std::move(references),
- std::move(chunk));
+ auto identifier = SendInProgress(PieceType::INDEX, std::move(chunk));
return {std::move(identifier), total_size};
}
@@ -341,9 +322,7 @@
}
fit::function<ObjectIdentifier(ObjectDigest)> make_object_identifier_;
- fit::function<void(IterationStatus, std::unique_ptr<Piece>,
- ObjectReferencesAndPriority references)>
- callback_;
+ fit::function<void(IterationStatus, std::unique_ptr<Piece>)> callback_;
// The object encoded by DataSource.
const ObjectType object_type_;
bup::RollSumSplit roll_sum_split_;
@@ -422,9 +401,7 @@
void SplitDataSource(
DataSource* source, ObjectType object_type,
fit::function<ObjectIdentifier(ObjectDigest)> make_object_identifier,
- fit::function<void(IterationStatus, std::unique_ptr<Piece>,
- ObjectReferencesAndPriority)>
- callback) {
+ fit::function<void(IterationStatus, std::unique_ptr<Piece>)> callback) {
SplitContext context(std::move(make_object_identifier), std::move(callback),
object_type);
source->Get([context = std::move(context)](
diff --git a/src/ledger/bin/storage/impl/split.h b/src/ledger/bin/storage/impl/split.h
index eb91397..9ede678 100644
--- a/src/ledger/bin/storage/impl/split.h
+++ b/src/ledger/bin/storage/impl/split.h
@@ -25,19 +25,13 @@
// builds a multi-level index from the content. The |source| is consumed and
// split using a rolling hash. Each chunk and each index file is returned. On
// each iteration, |make_object_identifier| is called first and must return the
-// |ObjectIdentifier| to use to reference the given content id. The piece
-// (second argument) and the identifiers of its children (third argument) are
+// |ObjectIdentifier| to use to reference the given content id. The piece is
// then passed to |callback|, along with a status of |IN_PROGRESS|, except for
-// the last piece which has a status of |DONE|. The returned children are only
-// children pieces of INDEX pieces; children tree nodes are not included, even
-// if |type| is TREE_NODE. |callback| is not called anymore once |source| is
-// deleted.
+// the last piece which has a status of |DONE|.
void SplitDataSource(
DataSource* source, ObjectType type,
fit::function<ObjectIdentifier(ObjectDigest)> make_object_identifier,
- fit::function<void(IterationStatus, std::unique_ptr<Piece>,
- ObjectReferencesAndPriority)>
- callback);
+ fit::function<void(IterationStatus, std::unique_ptr<Piece>)> callback);
// Iterates over the pieces of an index object.
Status ForEachPiece(fxl::StringView index_content,
diff --git a/src/ledger/bin/storage/impl/split_unittest.cc b/src/ledger/bin/storage/impl/split_unittest.cc
index c989aa7..82249fe 100644
--- a/src/ledger/bin/storage/impl/split_unittest.cc
+++ b/src/ledger/bin/storage/impl/split_unittest.cc
@@ -86,7 +86,6 @@
struct SplitResult {
std::vector<Call> calls;
- std::map<ObjectDigest, ObjectReferencesAndPriority> references;
std::map<ObjectDigest, std::unique_ptr<Piece>> pieces;
};
@@ -99,28 +98,14 @@
return encryption::MakeDefaultObjectIdentifier(std::move(digest));
},
[result = std::move(result), callback = std::move(callback)](
- IterationStatus status, std::unique_ptr<Piece> piece,
- ObjectReferencesAndPriority references) mutable {
+ IterationStatus status, std::unique_ptr<Piece> piece) mutable {
EXPECT_TRUE(result);
const auto digest =
piece ? piece->GetIdentifier().object_digest() : ObjectDigest();
if (status != IterationStatus::ERROR) {
EXPECT_LE(piece->GetData().size(), kMaxChunkSize);
- // Accumulate returned references and data in result, checking that
- // they match if we have already seen this digest.
- if (result->references.count(digest) != 0) {
- EXPECT_THAT(result->references[digest],
- UnorderedElementsAreArray(references));
- } else {
- // Check that references do not point to inline pieces.
- for (const auto& [reference, priority] : references) {
- EXPECT_FALSE(GetObjectDigestInfo(reference).is_inlined())
- << "SplitDataSource returned a reference to an inline "
- "object: "
- << reference;
- }
- result->references[digest] = std::move(references);
- }
+ // Accumulate pieces in result, checking that they match if we have
+ // already seen this digest.
if (result->pieces.count(digest) != 0) {
EXPECT_EQ(result->pieces[digest]->GetData(), piece->GetData());
} else {
@@ -218,20 +203,6 @@
fxl::StringView current = content;
for (const auto& call : split_result.calls) {
- // Check that chunks have no references and indexes have at least one, with
- // associated data.
- if (call.status != IterationStatus::ERROR) {
- if (GetObjectDigestInfo(call.digest).is_chunk()) {
- EXPECT_THAT(split_result.references[call.digest], IsEmpty());
- } else {
- EXPECT_THAT(split_result.references[call.digest], Not(IsEmpty()));
- for (const auto& [child, priority] :
- split_result.references[call.digest]) {
- EXPECT_THAT(split_result.pieces, Contains(Key(child)));
- EXPECT_EQ(priority, KeyPriority::EAGER);
- }
- }
- }
if (call.status == IterationStatus::IN_PROGRESS &&
GetObjectDigestInfo(call.digest).is_chunk()) {
EXPECT_EQ(
@@ -324,11 +295,6 @@
EXPECT_FALSE(GetObjectDigestInfo(split_result.calls[2].digest).is_chunk());
EXPECT_EQ(GetObjectDigestInfo(split_result.calls[2].digest).object_type,
ObjectType::TREE_NODE);
- // The reference from the root piece to the inline one should be skipped, so
- // we expect only one reference here.
- EXPECT_THAT(
- split_result.references[split_result.calls[2].digest],
- ElementsAre(Pair(split_result.calls[0].digest, KeyPriority::EAGER)));
}
TEST(SplitTest, Error) {
diff --git a/src/ledger/bin/storage/impl/storage_test_utils.cc b/src/ledger/bin/storage/impl/storage_test_utils.cc
index 5392c32..913050a 100644
--- a/src/ledger/bin/storage/impl/storage_test_utils.cc
+++ b/src/ledger/bin/storage/impl/storage_test_utils.cc
@@ -52,8 +52,7 @@
return encryption::MakeDefaultObjectIdentifier(
std::move(object_digest));
},
- [&result](IterationStatus status, std::unique_ptr<Piece> piece,
- ObjectReferencesAndPriority references) {
+ [&result](IterationStatus status, std::unique_ptr<Piece> piece) {
if (status == IterationStatus::DONE) {
result = piece->GetIdentifier().object_digest();
}
@@ -122,9 +121,8 @@
return encryption::MakeDefaultObjectIdentifier(
std::move(object_digest));
},
- [&result, callback = std::move(callback)](
- IterationStatus status, std::unique_ptr<Piece> piece,
- ObjectReferencesAndPriority /*references*/) {
+ [&result, callback = std::move(callback)](IterationStatus status,
+ std::unique_ptr<Piece> piece) {
callback(piece->GetIdentifier(), piece->GetData().ToString());
if (status == IterationStatus::DONE) {
result = piece->GetIdentifier().object_digest();
diff --git a/src/ledger/bin/storage/impl/storage_test_utils.h b/src/ledger/bin/storage/impl/storage_test_utils.h
index b64a2972..603197e 100644
--- a/src/ledger/bin/storage/impl/storage_test_utils.h
+++ b/src/ledger/bin/storage/impl/storage_test_utils.h
@@ -47,13 +47,13 @@
const ObjectIdentifier object_identifier;
};
-// Builder the object digest for the given content. If |inline_behavior| is
+// Computes the object digest for the given content. If |inline_behavior| is
// InlineBehavior::PREVENT, resize |content| so that it cannot be inlined.
ObjectDigest MakeObjectDigest(
std::string content,
InlineBehavior inline_behavior = InlineBehavior::ALLOW);
-// Builder the object identifier for the given content. If |inline_behavior| is
+// Computes the object identifier for the given content. If |inline_behavior| is
// InlineBehavior::PREVENT, resize |content| so that it cannot be inlined.
ObjectIdentifier MakeObjectIdentifier(
std::string content,
diff --git a/src/ledger/bin/storage/public/object.h b/src/ledger/bin/storage/public/object.h
index c335031..d6dbd5e 100644
--- a/src/ledger/bin/storage/public/object.h
+++ b/src/ledger/bin/storage/public/object.h
@@ -50,6 +50,13 @@
// piece is not deleted.
virtual fxl::StringView GetData() const = 0;
+ // If this piece references other pieces (eg. an index referencing some
+ // chunks), adds them to |references|. Does not clear |references|. Does not
+ // add |Object| references even if the piece represents a full object of its
+ // own.
+ virtual Status AppendReferences(
+ ObjectReferencesAndPriority* references) const = 0;
+
FXL_DISALLOW_COPY_AND_ASSIGN(Piece);
};