blob: a14136aa221a66129f0a30baf2f8cc8cce751750 [file] [log] [blame]
/*
* Copyright (C) 2025 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/trace_processor/dataframe/dataframe.h"
#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "perfetto/base/logging.h"
#include "perfetto/ext/base/status_macros.h"
#include "perfetto/ext/base/status_or.h"
#include "perfetto/ext/base/variant.h"
#include "src/trace_processor/containers/string_pool.h"
#include "src/trace_processor/dataframe/cursor_impl.h" // IWYU pragma: keep
#include "src/trace_processor/dataframe/impl/query_plan.h"
#include "src/trace_processor/dataframe/impl/types.h"
#include "src/trace_processor/dataframe/specs.h"
#include "src/trace_processor/dataframe/typed_cursor.h"
#include "src/trace_processor/dataframe/types.h"
namespace perfetto::trace_processor::dataframe {
Dataframe::Dataframe(StringPool* string_pool,
uint32_t column_count,
const char* const* column_names,
const ColumnSpec* column_specs)
: Dataframe(
false,
std::vector<std::string>(column_names, column_names + column_count),
CreateColumnVector(column_specs, column_count),
0,
string_pool) {}
Dataframe::Dataframe(bool finalized,
std::vector<std::string> column_names,
std::vector<std::shared_ptr<impl::Column>> columns,
uint32_t row_count,
StringPool* string_pool)
: column_names_(std::move(column_names)),
columns_(std::move(columns)),
row_count_(row_count),
string_pool_(string_pool) {
column_ptrs_.reserve(columns_.size());
for (const auto& col : columns_) {
column_ptrs_.emplace_back(col.get());
}
if (finalized) {
Finalize();
}
}
base::StatusOr<Dataframe::QueryPlan> Dataframe::PlanQuery(
std::vector<FilterSpec>& filter_specs,
const std::vector<DistinctSpec>& distinct_specs,
const std::vector<SortSpec>& sort_specs,
const LimitSpec& limit_spec,
uint64_t cols_used) const {
ASSIGN_OR_RETURN(auto plan,
impl::QueryPlanBuilder::Build(
row_count_, columns_, indexes_, filter_specs,
distinct_specs, sort_specs, limit_spec, cols_used));
return QueryPlan(std::move(plan));
}
void Dataframe::Clear() {
PERFETTO_DCHECK(!finalized_);
for (const auto& c : columns_) {
switch (c->storage.type().index()) {
case StorageType::GetTypeIndex<Uint32>():
c->storage.unchecked_get<Uint32>().clear();
break;
case StorageType::GetTypeIndex<Int32>():
c->storage.unchecked_get<Int32>().clear();
break;
case StorageType::GetTypeIndex<Int64>():
c->storage.unchecked_get<Int64>().clear();
break;
case StorageType::GetTypeIndex<Double>():
c->storage.unchecked_get<Double>().clear();
break;
case StorageType::GetTypeIndex<String>():
c->storage.unchecked_get<String>().clear();
break;
case StorageType::GetTypeIndex<Id>():
c->storage.unchecked_get<Id>().size = 0;
break;
default:
PERFETTO_FATAL("Invalid storage type");
}
switch (c->null_storage.nullability().index()) {
case Nullability::GetTypeIndex<NonNull>():
break;
case Nullability::GetTypeIndex<SparseNull>():
case Nullability::GetTypeIndex<SparseNullWithPopcountUntilFinalization>():
case Nullability::GetTypeIndex<SparseNullWithPopcountAlways>(): {
auto& null = c->null_storage.unchecked_get<SparseNull>();
null.bit_vector.clear();
null.prefix_popcount_for_cell_get.clear();
break;
}
case Nullability::GetTypeIndex<DenseNull>():
c->null_storage.unchecked_get<DenseNull>().bit_vector.clear();
break;
default:
PERFETTO_FATAL("Invalid nullability type");
}
}
row_count_ = 0;
++non_column_mutations_;
}
base::StatusOr<Index> Dataframe::BuildIndex(const uint32_t* columns_start,
const uint32_t* columns_end) const {
std::vector<uint32_t> cols(columns_start, columns_end);
std::vector<SortSpec> sorts;
sorts.reserve(cols.size());
for (const auto& col : cols) {
sorts.push_back(SortSpec{col, SortDirection::kAscending});
}
// Heap allocate to avoid potential stack overflows due to large cursor
// object.
auto c = std::make_unique<TypedCursor>(this, std::vector<FilterSpec>(),
std::move(sorts));
c->ExecuteUnchecked();
std::vector<uint32_t> permutation;
permutation.reserve(row_count_);
for (; !c->Eof(); c->Next()) {
permutation.push_back(c->RowIndex());
}
return Index(std::move(cols),
std::make_shared<std::vector<uint32_t>>(std::move(permutation)));
}
void Dataframe::AddIndex(Index index) {
PERFETTO_CHECK(finalized_);
indexes_.emplace_back(std::move(index));
++non_column_mutations_;
}
void Dataframe::RemoveIndexAt(uint32_t index) {
PERFETTO_CHECK(finalized_);
indexes_.erase(indexes_.begin() + static_cast<std::ptrdiff_t>(index));
++non_column_mutations_;
}
void Dataframe::Finalize() {
if (finalized_) {
return;
}
finalized_ = true;
for (const auto& c : columns_) {
switch (c->storage.type().index()) {
case StorageType::GetTypeIndex<Uint32>():
c->storage.unchecked_get<Uint32>().shrink_to_fit();
break;
case StorageType::GetTypeIndex<Int32>():
c->storage.unchecked_get<Int32>().shrink_to_fit();
break;
case StorageType::GetTypeIndex<Int64>():
c->storage.unchecked_get<Int64>().shrink_to_fit();
break;
case StorageType::GetTypeIndex<Double>():
c->storage.unchecked_get<Double>().shrink_to_fit();
break;
case StorageType::GetTypeIndex<String>():
c->storage.unchecked_get<String>().shrink_to_fit();
break;
case StorageType::GetTypeIndex<Id>():
break;
default:
PERFETTO_FATAL("Invalid storage type");
}
switch (c->null_storage.nullability().index()) {
case Nullability::GetTypeIndex<NonNull>():
break;
case Nullability::GetTypeIndex<SparseNull>():
c->null_storage.unchecked_get<SparseNull>().bit_vector.shrink_to_fit();
break;
case Nullability::GetTypeIndex<SparseNullWithPopcountAlways>(): {
auto& null = c->null_storage.unchecked_get<SparseNull>();
null.bit_vector.shrink_to_fit();
null.prefix_popcount_for_cell_get.shrink_to_fit();
break;
}
case Nullability::GetTypeIndex<
SparseNullWithPopcountUntilFinalization>(): {
auto& null = c->null_storage.unchecked_get<SparseNull>();
null.bit_vector.shrink_to_fit();
null.prefix_popcount_for_cell_get.clear();
null.prefix_popcount_for_cell_get.shrink_to_fit();
break;
}
case Nullability::GetTypeIndex<DenseNull>():
c->null_storage.unchecked_get<DenseNull>().bit_vector.shrink_to_fit();
break;
default:
PERFETTO_FATAL("Invalid nullability type");
}
}
}
dataframe::Dataframe Dataframe::CopyFinalized() const {
PERFETTO_CHECK(finalized_);
return *this;
}
DataframeSpec Dataframe::CreateSpec() const {
DataframeSpec spec{column_names_, {}};
spec.column_specs.reserve(columns_.size());
for (const auto& c : columns_) {
spec.column_specs.push_back({c->storage.type(),
c->null_storage.nullability(), c->sort_state,
c->duplicate_state});
}
return spec;
}
std::vector<std::shared_ptr<impl::Column>> Dataframe::CreateColumnVector(
const ColumnSpec* column_specs,
uint32_t column_count) {
auto make_storage = [](const ColumnSpec& spec) {
switch (spec.type.index()) {
case StorageType::GetTypeIndex<Id>():
return impl::Storage(impl::Storage::Id{});
case StorageType::GetTypeIndex<Uint32>():
return impl::Storage(impl::Storage::Uint32{});
case StorageType::GetTypeIndex<Int32>():
return impl::Storage(impl::Storage::Int32{});
case StorageType::GetTypeIndex<Int64>():
return impl::Storage(impl::Storage::Int64{});
case StorageType::GetTypeIndex<Double>():
return impl::Storage(impl::Storage::Double{});
case StorageType::GetTypeIndex<String>():
return impl::Storage(impl::Storage::String{});
default:
PERFETTO_FATAL("Invalid storage type");
}
};
auto make_null_storage = [](const ColumnSpec& spec) {
switch (spec.nullability.index()) {
case Nullability::GetTypeIndex<NonNull>():
return impl::NullStorage(impl::NullStorage::NonNull{});
case Nullability::GetTypeIndex<SparseNull>():
return impl::NullStorage(impl::NullStorage::SparseNull{}, SparseNull{});
case Nullability::GetTypeIndex<SparseNullWithPopcountAlways>():
return impl::NullStorage(impl::NullStorage::SparseNull{},
SparseNullWithPopcountAlways{});
case Nullability::GetTypeIndex<SparseNullWithPopcountUntilFinalization>():
return impl::NullStorage(impl::NullStorage::SparseNull{},
SparseNullWithPopcountUntilFinalization{});
case Nullability::GetTypeIndex<DenseNull>():
return impl::NullStorage(impl::NullStorage::DenseNull{});
default:
PERFETTO_FATAL("Invalid nullability type");
}
};
std::vector<std::shared_ptr<impl::Column>> columns;
columns.reserve(column_count);
for (uint32_t i = 0; i < column_count; ++i) {
columns.emplace_back(std::make_shared<impl::Column>(impl::Column{
make_storage(column_specs[i]),
make_null_storage(column_specs[i]),
column_specs[i].sort_state,
column_specs[i].duplicate_state,
}));
}
return columns;
}
} // namespace perfetto::trace_processor::dataframe