blob: 80e3fec22694fbfdb0064770aa6cbe8c35172ffc [file] [log] [blame]
//===- Storage.h - TACO-flavored sparse tensor representation ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains definitions for the following classes:
//
// * `SparseTensorStorageBase`
// * `SparseTensorStorage<P, C, V>`
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H
#define MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H
#include "mlir/Dialect/SparseTensor/IR/Enums.h"
#include "mlir/ExecutionEngine/Float16bits.h"
#include "mlir/ExecutionEngine/SparseTensor/ArithmeticUtils.h"
#include "mlir/ExecutionEngine/SparseTensor/COO.h"
#include "mlir/ExecutionEngine/SparseTensor/MapRef.h"
namespace mlir {
namespace sparse_tensor {
//===----------------------------------------------------------------------===//
//
// SparseTensorStorage Classes
//
//===----------------------------------------------------------------------===//
/// Abstract base class for `SparseTensorStorage<P,C,V>`. This class
/// takes responsibility for all the `<P,C,V>`-independent aspects
/// of the tensor (e.g., sizes, sparsity, mapping). In addition,
/// we use function overloading to implement "partial" method
/// specialization, which the C-API relies on to catch type errors
/// arising from our use of opaque pointers.
///
/// Because this class forms a bridge between the denotational semantics
/// of "tensors" and the operational semantics of how we store and
/// compute with them, it also distinguishes between two different
/// coordinate spaces (and their associated rank, sizes, etc).
/// Denotationally, we have the *dimensions* of the tensor represented
/// by this object. Operationally, we have the *levels* of the storage
/// representation itself.
///
/// The *size* of an axis is the cardinality of possible coordinate
/// values along that axis (regardless of which coordinates have stored
/// element values). As such, each size must be non-zero since if any
/// axis has size-zero then the whole tensor would have trivial storage
/// (since there are no possible coordinates). Thus we use the plural
/// term *sizes* for a collection of non-zero cardinalities, and use
/// this term whenever referring to run-time cardinalities. Whereas we
/// use the term *shape* for a collection of compile-time cardinalities,
/// where zero is used to indicate cardinalities which are dynamic (i.e.,
/// unknown/unspecified at compile-time). At run-time, these dynamic
/// cardinalities will be inferred from or checked against sizes otherwise
/// specified. Thus, dynamic cardinalities always have an "immutable but
/// unknown" value; so the term "dynamic" should not be taken to indicate
/// run-time mutability.
class SparseTensorStorageBase {
protected:
SparseTensorStorageBase(const SparseTensorStorageBase &) = default;
SparseTensorStorageBase &operator=(const SparseTensorStorageBase &) = delete;
public:
/// Constructs a new sparse-tensor storage object with the given encoding.
SparseTensorStorageBase(uint64_t dimRank, const uint64_t *dimSizes,
uint64_t lvlRank, const uint64_t *lvlSizes,
const LevelType *lvlTypes, const uint64_t *dim2lvl,
const uint64_t *lvl2dim);
virtual ~SparseTensorStorageBase() = default;
/// Gets the number of tensor-dimensions.
uint64_t getDimRank() const { return dimSizes.size(); }
/// Gets the number of storage-levels.
uint64_t getLvlRank() const { return lvlSizes.size(); }
/// Gets the tensor-dimension sizes array.
const std::vector<uint64_t> &getDimSizes() const { return dimSizes; }
/// Safely looks up the size of the given tensor-dimension.
uint64_t getDimSize(uint64_t d) const {
assert(d < getDimRank());
return dimSizes[d];
}
/// Gets the storage-level sizes array.
const std::vector<uint64_t> &getLvlSizes() const { return lvlSizes; }
/// Safely looks up the size of the given storage-level.
uint64_t getLvlSize(uint64_t l) const {
assert(l < getLvlRank());
return lvlSizes[l];
}
/// Gets the level-types array.
const std::vector<LevelType> &getLvlTypes() const { return lvlTypes; }
/// Safely looks up the type of the given level.
LevelType getLvlType(uint64_t l) const {
assert(l < getLvlRank());
return lvlTypes[l];
}
/// Safely checks if the level uses dense storage.
bool isDenseLvl(uint64_t l) const { return isDenseLT(getLvlType(l)); }
/// Safely checks if the level uses compressed storage.
bool isCompressedLvl(uint64_t l) const {
return isCompressedLT(getLvlType(l));
}
/// Safely checks if the level uses loose compressed storage.
bool isLooseCompressedLvl(uint64_t l) const {
return isLooseCompressedLT(getLvlType(l));
}
/// Safely checks if the level uses singleton storage.
bool isSingletonLvl(uint64_t l) const { return isSingletonLT(getLvlType(l)); }
/// Safely checks if the level uses n out of m storage.
bool isNOutOfMLvl(uint64_t l) const { return isNOutOfMLT(getLvlType(l)); }
/// Safely checks if the level is ordered.
bool isOrderedLvl(uint64_t l) const { return isOrderedLT(getLvlType(l)); }
/// Safely checks if the level is unique.
bool isUniqueLvl(uint64_t l) const { return isUniqueLT(getLvlType(l)); }
/// Gets positions-overhead storage for the given level.
#define DECL_GETPOSITIONS(PNAME, P) \
virtual void getPositions(std::vector<P> **, uint64_t);
MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETPOSITIONS)
#undef DECL_GETPOSITIONS
/// Gets coordinates-overhead storage for the given level.
#define DECL_GETCOORDINATES(INAME, C) \
virtual void getCoordinates(std::vector<C> **, uint64_t);
MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETCOORDINATES)
#undef DECL_GETCOORDINATES
/// Gets coordinates-overhead storage buffer for the given level.
#define DECL_GETCOORDINATESBUFFER(INAME, C) \
virtual void getCoordinatesBuffer(std::vector<C> **, uint64_t);
MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETCOORDINATESBUFFER)
#undef DECL_GETCOORDINATESBUFFER
/// Gets primary storage.
#define DECL_GETVALUES(VNAME, V) virtual void getValues(std::vector<V> **);
MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETVALUES)
#undef DECL_GETVALUES
/// Element-wise insertion in lexicographic coordinate order. The first
/// argument is the level-coordinates for the value being inserted.
#define DECL_LEXINSERT(VNAME, V) virtual void lexInsert(const uint64_t *, V);
MLIR_SPARSETENSOR_FOREVERY_V(DECL_LEXINSERT)
#undef DECL_LEXINSERT
/// Expanded insertion. Note that this method resets the
/// values/filled-switch array back to all-zero/false while only
/// iterating over the nonzero elements.
#define DECL_EXPINSERT(VNAME, V) \
virtual void expInsert(uint64_t *, V *, bool *, uint64_t *, uint64_t, \
uint64_t);
MLIR_SPARSETENSOR_FOREVERY_V(DECL_EXPINSERT)
#undef DECL_EXPINSERT
/// Finalizes lexicographic insertions.
virtual void endLexInsert() = 0;
private:
const std::vector<uint64_t> dimSizes;
const std::vector<uint64_t> lvlSizes;
const std::vector<LevelType> lvlTypes;
const std::vector<uint64_t> dim2lvlVec;
const std::vector<uint64_t> lvl2dimVec;
protected:
const MapRef map; // non-owning pointers into dim2lvl/lvl2dim vectors
const bool allDense;
};
/// A memory-resident sparse tensor using a storage scheme based on
/// per-level sparse/dense annotations. This data structure provides
/// a bufferized form of a sparse tensor type. In contrast to generating
/// setup methods for each differently annotated sparse tensor, this
/// method provides a convenient "one-size-fits-all" solution that simply
/// takes an input tensor and annotations to implement all required setup
/// in a general manner.
template <typename P, typename C, typename V>
class SparseTensorStorage final : public SparseTensorStorageBase {
/// Private constructor to share code between the other constructors.
/// Beware that the object is not necessarily guaranteed to be in a
/// valid state after this constructor alone; e.g., `isCompressedLvl(l)`
/// doesn't entail `!(positions[l].empty())`.
SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
uint64_t lvlRank, const uint64_t *lvlSizes,
const LevelType *lvlTypes, const uint64_t *dim2lvl,
const uint64_t *lvl2dim)
: SparseTensorStorageBase(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
dim2lvl, lvl2dim),
positions(lvlRank), coordinates(lvlRank), lvlCursor(lvlRank) {}
public:
/// Constructs a sparse tensor with the given encoding, and allocates
/// overhead storage according to some simple heuristics. When lvlCOO
/// is set, the sparse tensor initializes with the contents from that
/// data structure. Otherwise, an empty sparse tensor results.
SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
uint64_t lvlRank, const uint64_t *lvlSizes,
const LevelType *lvlTypes, const uint64_t *dim2lvl,
const uint64_t *lvl2dim, SparseTensorCOO<V> *lvlCOO);
/// Constructs a sparse tensor with the given encoding, and initializes
/// the contents from the level buffers. The constructor assumes that the
/// data provided by `lvlBufs` can be directly used to interpret the result
/// sparse tensor and performs no integrity test on the input data.
SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
uint64_t lvlRank, const uint64_t *lvlSizes,
const LevelType *lvlTypes, const uint64_t *dim2lvl,
const uint64_t *lvl2dim, const intptr_t *lvlBufs);
/// Allocates a new empty sparse tensor.
static SparseTensorStorage<P, C, V> *
newEmpty(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
const uint64_t *lvlSizes, const LevelType *lvlTypes,
const uint64_t *dim2lvl, const uint64_t *lvl2dim);
/// Allocates a new sparse tensor and initializes it from the given COO.
static SparseTensorStorage<P, C, V> *
newFromCOO(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
const uint64_t *lvlSizes, const LevelType *lvlTypes,
const uint64_t *dim2lvl, const uint64_t *lvl2dim,
SparseTensorCOO<V> *lvlCOO);
/// Allocates a new sparse tensor and initialize it from the given buffers.
static SparseTensorStorage<P, C, V> *
newFromBuffers(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
const uint64_t *lvlSizes, const LevelType *lvlTypes,
const uint64_t *dim2lvl, const uint64_t *lvl2dim,
uint64_t srcRank, const intptr_t *buffers);
~SparseTensorStorage() final = default;
/// Partially specialize these getter methods based on template types.
void getPositions(std::vector<P> **out, uint64_t lvl) final {
assert(out && "Received nullptr for out parameter");
assert(lvl < getLvlRank());
*out = &positions[lvl];
}
void getCoordinates(std::vector<C> **out, uint64_t lvl) final {
assert(out && "Received nullptr for out parameter");
assert(lvl < getLvlRank());
*out = &coordinates[lvl];
}
void getCoordinatesBuffer(std::vector<C> **out, uint64_t lvl) final {
assert(out && "Received nullptr for out parameter");
assert(lvl < getLvlRank());
// Note that the sparse tensor support library always stores COO in SoA
// format, even when AoS is requested. This is never an issue, since all
// actual code/library generation requests "views" into the coordinate
// storage for the individual levels, which is trivially provided for
// both AoS and SoA (as well as all the other storage formats). The only
// exception is when the buffer version of coordinate storage is requested
// (currently only for printing). In that case, we do the following
// potentially expensive transformation to provide that view. If this
// operation becomes more common beyond debugging, we should consider
// implementing proper AoS in the support library as well.
uint64_t lvlRank = getLvlRank();
uint64_t nnz = values.size();
crdBuffer.clear();
crdBuffer.reserve(nnz * (lvlRank - lvl));
for (uint64_t i = 0; i < nnz; i++) {
for (uint64_t l = lvl; l < lvlRank; l++) {
assert(i < coordinates[l].size());
crdBuffer.push_back(coordinates[l][i]);
}
}
*out = &crdBuffer;
}
void getValues(std::vector<V> **out) final {
assert(out && "Received nullptr for out parameter");
*out = &values;
}
/// Partially specialize lexicographical insertions based on template types.
void lexInsert(const uint64_t *lvlCoords, V val) final {
assert(lvlCoords);
if (allDense) {
uint64_t lvlRank = getLvlRank();
uint64_t valIdx = 0;
// Linearize the address.
for (uint64_t l = 0; l < lvlRank; l++)
valIdx = valIdx * getLvlSize(l) + lvlCoords[l];
values[valIdx] = val;
return;
}
// First, wrap up pending insertion path.
uint64_t diffLvl = 0;
uint64_t full = 0;
if (!values.empty()) {
diffLvl = lexDiff(lvlCoords);
endPath(diffLvl + 1);
full = lvlCursor[diffLvl] + 1;
}
// Then continue with insertion path.
insPath(lvlCoords, diffLvl, full, val);
}
/// Partially specialize expanded insertions based on template types.
void expInsert(uint64_t *lvlCoords, V *values, bool *filled, uint64_t *added,
uint64_t count, uint64_t expsz) final {
assert((lvlCoords && values && filled && added) && "Received nullptr");
if (count == 0)
return;
// Sort.
std::sort(added, added + count);
// Restore insertion path for first insert.
const uint64_t lastLvl = getLvlRank() - 1;
uint64_t c = added[0];
assert(c <= expsz);
assert(filled[c] && "added coordinate is not filled");
lvlCoords[lastLvl] = c;
lexInsert(lvlCoords, values[c]);
values[c] = 0;
filled[c] = false;
// Subsequent insertions are quick.
for (uint64_t i = 1; i < count; i++) {
assert(c < added[i] && "non-lexicographic insertion");
c = added[i];
assert(c <= expsz);
assert(filled[c] && "added coordinate is not filled");
lvlCoords[lastLvl] = c;
insPath(lvlCoords, lastLvl, added[i - 1] + 1, values[c]);
values[c] = 0;
filled[c] = false;
}
}
/// Finalizes lexicographic insertions.
void endLexInsert() final {
if (!allDense) {
if (values.empty())
finalizeSegment(0);
else
endPath(0);
}
}
/// Sort the unordered tensor in place, the method assumes that it is
/// an unordered COO tensor.
void sortInPlace() {
uint64_t nnz = values.size();
#ifndef NDEBUG
for (uint64_t l = 0; l < getLvlRank(); l++)
assert(nnz == coordinates[l].size());
#endif
// In-place permutation.
auto applyPerm = [this](std::vector<uint64_t> &perm) {
uint64_t length = perm.size();
uint64_t lvlRank = getLvlRank();
// Cache for the current level coordinates.
std::vector<P> lvlCrds(lvlRank);
for (uint64_t i = 0; i < length; i++) {
uint64_t current = i;
if (i != perm[current]) {
for (uint64_t l = 0; l < lvlRank; l++)
lvlCrds[l] = coordinates[l][i];
V val = values[i];
// Deals with a permutation cycle.
while (i != perm[current]) {
uint64_t next = perm[current];
// Swaps the level coordinates and value.
for (uint64_t l = 0; l < lvlRank; l++)
coordinates[l][current] = coordinates[l][next];
values[current] = values[next];
perm[current] = current;
current = next;
}
for (uint64_t l = 0; l < lvlRank; l++)
coordinates[l][current] = lvlCrds[l];
values[current] = val;
perm[current] = current;
}
}
};
std::vector<uint64_t> sortedIdx(nnz, 0);
for (uint64_t i = 0; i < nnz; i++)
sortedIdx[i] = i;
std::sort(sortedIdx.begin(), sortedIdx.end(),
[this](uint64_t lhs, uint64_t rhs) {
for (uint64_t l = 0; l < getLvlRank(); l++) {
if (coordinates[l][lhs] == coordinates[l][rhs])
continue;
return coordinates[l][lhs] < coordinates[l][rhs];
}
assert(lhs == rhs && "duplicate coordinates");
return false;
});
applyPerm(sortedIdx);
}
private:
/// Appends coordinate `crd` to level `lvl`, in the semantically
/// general sense. For non-dense levels, that means appending to the
/// `coordinates[lvl]` array, checking that `crd` is representable in
/// the `C` type; however, we do not verify other semantic requirements
/// (e.g., that `crd` is in bounds for `lvlSizes[lvl]`, and not previously
/// occurring in the same segment). For dense levels, this method instead
/// appends the appropriate number of zeros to the `values` array, where
/// `full` is the number of "entries" already written to `values` for this
/// segment (aka one after the highest coordinate previously appended).
void appendCrd(uint64_t lvl, uint64_t full, uint64_t crd) {
if (!isDenseLvl(lvl)) {
assert(isCompressedLvl(lvl) || isLooseCompressedLvl(lvl) ||
isSingletonLvl(lvl) || isNOutOfMLvl(lvl));
coordinates[lvl].push_back(detail::checkOverflowCast<C>(crd));
} else { // Dense level.
assert(crd >= full && "Coordinate was already filled");
if (crd == full)
return; // Short-circuit, since it'll be a nop.
if (lvl + 1 == getLvlRank())
values.insert(values.end(), crd - full, 0);
else
finalizeSegment(lvl + 1, 0, crd - full);
}
}
/// Computes the assembled-size associated with the `l`-th level,
/// given the assembled-size associated with the `(l-1)`-th level.
uint64_t assembledSize(uint64_t parentSz, uint64_t l) const {
if (isCompressedLvl(l))
return positions[l][parentSz];
if (isLooseCompressedLvl(l))
return positions[l][2 * parentSz - 1];
if (isSingletonLvl(l) || isNOutOfMLvl(l))
return parentSz; // new size same as the parent
assert(isDenseLvl(l));
return parentSz * getLvlSize(l);
}
/// Initializes sparse tensor storage scheme from a memory-resident sparse
/// tensor in coordinate scheme. This method prepares the positions and
/// coordinates arrays under the given per-level dense/sparse annotations.
void fromCOO(const std::vector<Element<V>> &lvlElements, uint64_t lo,
uint64_t hi, uint64_t l) {
const uint64_t lvlRank = getLvlRank();
assert(l <= lvlRank && hi <= lvlElements.size());
// Once levels are exhausted, insert the numerical values.
if (l == lvlRank) {
assert(lo < hi);
values.push_back(lvlElements[lo].value);
return;
}
// Visit all elements in this interval.
uint64_t full = 0;
while (lo < hi) { // If `hi` is unchanged, then `lo < lvlElements.size()`.
// Find segment in interval with same coordinate at this level.
const uint64_t c = lvlElements[lo].coords[l];
uint64_t seg = lo + 1;
if (isUniqueLvl(l))
while (seg < hi && lvlElements[seg].coords[l] == c)
seg++;
// Handle segment in interval for sparse or dense level.
appendCrd(l, full, c);
full = c + 1;
fromCOO(lvlElements, lo, seg, l + 1);
// And move on to next segment in interval.
lo = seg;
}
// Finalize the sparse position structure at this level.
finalizeSegment(l, full);
}
/// Finalizes the sparse position structure at this level.
void finalizeSegment(uint64_t l, uint64_t full = 0, uint64_t count = 1) {
if (count == 0)
return; // Short-circuit, since it'll be a nop.
if (isCompressedLvl(l)) {
uint64_t pos = coordinates[l].size();
positions[l].insert(positions[l].end(), count,
detail::checkOverflowCast<P>(pos));
} else if (isLooseCompressedLvl(l)) {
// Finish this level, and push pairs for the empty ones, and one
// more for next level. Note that this always leaves one extra
// unused element at the end.
uint64_t pos = coordinates[l].size();
positions[l].insert(positions[l].end(), 2 * count,
detail::checkOverflowCast<P>(pos));
} else if (isSingletonLvl(l) || isNOutOfMLvl(l)) {
return; // Nothing to finalize.
} else { // Dense dimension.
assert(isDenseLvl(l));
const uint64_t sz = getLvlSizes()[l];
assert(sz >= full && "Segment is overfull");
count = detail::checkedMul(count, sz - full);
// For dense storage we must enumerate all the remaining coordinates
// in this level (i.e., coordinates after the last non-zero
// element), and either fill in their zero values or else recurse
// to finalize some deeper level.
if (l + 1 == getLvlRank())
values.insert(values.end(), count, 0);
else
finalizeSegment(l + 1, 0, count);
}
}
/// Wraps up a single insertion path, inner to outer.
void endPath(uint64_t diffLvl) {
const uint64_t lvlRank = getLvlRank();
const uint64_t lastLvl = lvlRank - 1;
assert(diffLvl <= lvlRank);
const uint64_t stop = lvlRank - diffLvl;
for (uint64_t i = 0; i < stop; i++) {
const uint64_t l = lastLvl - i;
finalizeSegment(l, lvlCursor[l] + 1);
}
}
/// Continues a single insertion path, outer to inner. The first
/// argument is the level-coordinates for the value being inserted.
void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full,
V val) {
const uint64_t lvlRank = getLvlRank();
assert(diffLvl <= lvlRank);
for (uint64_t l = diffLvl; l < lvlRank; l++) {
const uint64_t c = lvlCoords[l];
appendCrd(l, full, c);
full = 0;
lvlCursor[l] = c;
}
values.push_back(val);
}
/// Finds the lexicographically first level where the level-coordinates
/// in the argument differ from those in the current cursor.
uint64_t lexDiff(const uint64_t *lvlCoords) const {
const uint64_t lvlRank = getLvlRank();
for (uint64_t l = 0; l < lvlRank; l++) {
const auto crd = lvlCoords[l];
const auto cur = lvlCursor[l];
if (crd > cur || (crd == cur && !isUniqueLvl(l)) ||
(crd < cur && !isOrderedLvl(l))) {
return l;
}
if (crd < cur) {
assert(false && "non-lexicographic insertion");
return -1u;
}
}
assert(false && "duplicate insertion");
return -1u;
}
// Sparse tensor storage components.
std::vector<std::vector<P>> positions;
std::vector<std::vector<C>> coordinates;
std::vector<V> values;
// Auxiliary data structures.
std::vector<uint64_t> lvlCursor;
std::vector<C> crdBuffer; // just for AoS view
};
//===----------------------------------------------------------------------===//
//
// SparseTensorStorage Factories
//
//===----------------------------------------------------------------------===//
template <typename P, typename C, typename V>
SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newEmpty(
uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
const uint64_t *lvlSizes, const LevelType *lvlTypes,
const uint64_t *dim2lvl, const uint64_t *lvl2dim) {
SparseTensorCOO<V> *noLvlCOO = nullptr;
return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes,
lvlTypes, dim2lvl, lvl2dim, noLvlCOO);
}
template <typename P, typename C, typename V>
SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newFromCOO(
uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
const uint64_t *lvlSizes, const LevelType *lvlTypes,
const uint64_t *dim2lvl, const uint64_t *lvl2dim,
SparseTensorCOO<V> *lvlCOO) {
assert(lvlCOO);
return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes,
lvlTypes, dim2lvl, lvl2dim, lvlCOO);
}
template <typename P, typename C, typename V>
SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newFromBuffers(
uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
const uint64_t *lvlSizes, const LevelType *lvlTypes,
const uint64_t *dim2lvl, const uint64_t *lvl2dim, uint64_t srcRank,
const intptr_t *buffers) {
return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes,
lvlTypes, dim2lvl, lvl2dim, buffers);
}
//===----------------------------------------------------------------------===//
//
// SparseTensorStorage Constructors
//
//===----------------------------------------------------------------------===//
template <typename P, typename C, typename V>
SparseTensorStorage<P, C, V>::SparseTensorStorage(
uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
const uint64_t *lvlSizes, const LevelType *lvlTypes,
const uint64_t *dim2lvl, const uint64_t *lvl2dim,
SparseTensorCOO<V> *lvlCOO)
: SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
dim2lvl, lvl2dim) {
// Provide hints on capacity of positions and coordinates.
// TODO: needs much fine-tuning based on actual sparsity; currently
// we reserve position/coordinate space based on all previous dense
// levels, which works well up to first sparse level; but we should
// really use nnz and dense/sparse distribution.
uint64_t sz = 1;
for (uint64_t l = 0; l < lvlRank; l++) {
if (isCompressedLvl(l)) {
positions[l].reserve(sz + 1);
positions[l].push_back(0);
coordinates[l].reserve(sz);
sz = 1;
} else if (isLooseCompressedLvl(l)) {
positions[l].reserve(2 * sz + 1); // last one unused
positions[l].push_back(0);
coordinates[l].reserve(sz);
sz = 1;
} else if (isSingletonLvl(l)) {
coordinates[l].reserve(sz);
sz = 1;
} else if (isNOutOfMLvl(l)) {
assert(l == lvlRank - 1 && "unexpected n:m usage");
sz = detail::checkedMul(sz, lvlSizes[l]) / 2;
coordinates[l].reserve(sz);
values.reserve(sz);
} else { // Dense level.
assert(isDenseLvl(l));
sz = detail::checkedMul(sz, lvlSizes[l]);
}
}
if (lvlCOO) {
/* New from COO: ensure it is sorted. */
assert(lvlCOO->getRank() == lvlRank);
lvlCOO->sort();
// Now actually insert the `elements`.
const auto &elements = lvlCOO->getElements();
const uint64_t nse = elements.size();
assert(values.size() == 0);
values.reserve(nse);
fromCOO(elements, 0, nse, 0);
} else if (allDense) {
/* New empty (all dense) */
values.resize(sz, 0);
}
}
template <typename P, typename C, typename V>
SparseTensorStorage<P, C, V>::SparseTensorStorage(
uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
const uint64_t *lvlSizes, const LevelType *lvlTypes,
const uint64_t *dim2lvl, const uint64_t *lvl2dim, const intptr_t *lvlBufs)
: SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
dim2lvl, lvl2dim) {
// Note that none of the buffers can be reused because ownership
// of the memory passed from clients is not necessarily transferred.
// Therefore, all data is copied over into a new SparseTensorStorage.
uint64_t trailCOOLen = 0, parentSz = 1, bufIdx = 0;
for (uint64_t l = 0; l < lvlRank; l++) {
if (!isUniqueLvl(l) && (isCompressedLvl(l) || isLooseCompressedLvl(l))) {
// A `(loose)compressed_nu` level marks the start of trailing COO
// start level. Since the coordinate buffer used for trailing COO
// is passed in as AoS scheme and SparseTensorStorage uses a SoA
// scheme, we cannot simply copy the value from the provided buffers.
trailCOOLen = lvlRank - l;
break;
}
if (isCompressedLvl(l) || isLooseCompressedLvl(l)) {
P *posPtr = reinterpret_cast<P *>(lvlBufs[bufIdx++]);
C *crdPtr = reinterpret_cast<C *>(lvlBufs[bufIdx++]);
if (isLooseCompressedLvl(l)) {
positions[l].assign(posPtr, posPtr + 2 * parentSz);
coordinates[l].assign(crdPtr, crdPtr + positions[l][2 * parentSz - 1]);
} else {
positions[l].assign(posPtr, posPtr + parentSz + 1);
coordinates[l].assign(crdPtr, crdPtr + positions[l][parentSz]);
}
} else if (isSingletonLvl(l)) {
assert(0 && "general singleton not supported yet");
} else if (isNOutOfMLvl(l)) {
assert(0 && "n ouf of m not supported yet");
} else {
assert(isDenseLvl(l));
}
parentSz = assembledSize(parentSz, l);
}
// Handle Aos vs. SoA mismatch for COO.
if (trailCOOLen != 0) {
uint64_t cooStartLvl = lvlRank - trailCOOLen;
assert(!isUniqueLvl(cooStartLvl) &&
(isCompressedLvl(cooStartLvl) || isLooseCompressedLvl(cooStartLvl)));
P *posPtr = reinterpret_cast<P *>(lvlBufs[bufIdx++]);
C *aosCrdPtr = reinterpret_cast<C *>(lvlBufs[bufIdx++]);
P crdLen;
if (isLooseCompressedLvl(cooStartLvl)) {
positions[cooStartLvl].assign(posPtr, posPtr + 2 * parentSz);
crdLen = positions[cooStartLvl][2 * parentSz - 1];
} else {
positions[cooStartLvl].assign(posPtr, posPtr + parentSz + 1);
crdLen = positions[cooStartLvl][parentSz];
}
for (uint64_t l = cooStartLvl; l < lvlRank; l++) {
coordinates[l].resize(crdLen);
for (uint64_t n = 0; n < crdLen; n++) {
coordinates[l][n] = *(aosCrdPtr + (l - cooStartLvl) + n * trailCOOLen);
}
}
parentSz = assembledSize(parentSz, cooStartLvl);
}
// Copy the values buffer.
V *valPtr = reinterpret_cast<V *>(lvlBufs[bufIdx]);
values.assign(valPtr, valPtr + parentSz);
}
} // namespace sparse_tensor
} // namespace mlir
#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H