|  | //===- Storage.h - TACO-flavored sparse tensor representation ---*- C++ -*-===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This file contains definitions for the following classes: | 
|  | // | 
|  | // * `SparseTensorStorageBase` | 
|  | // * `SparseTensorStorage<P, C, V>` | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H | 
|  | #define MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H | 
|  |  | 
|  | #include "mlir/Dialect/SparseTensor/IR/Enums.h" | 
|  | #include "mlir/ExecutionEngine/Float16bits.h" | 
|  | #include "mlir/ExecutionEngine/SparseTensor/ArithmeticUtils.h" | 
|  | #include "mlir/ExecutionEngine/SparseTensor/COO.h" | 
|  | #include "mlir/ExecutionEngine/SparseTensor/MapRef.h" | 
|  |  | 
|  | namespace mlir { | 
|  | namespace sparse_tensor { | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | //  SparseTensorStorage Classes | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | /// Abstract base class for `SparseTensorStorage<P,C,V>`. This class | 
|  | /// takes responsibility for all the `<P,C,V>`-independent aspects | 
|  | /// of the tensor (e.g., sizes, sparsity, mapping). In addition, | 
|  | /// we use function overloading to implement "partial" method | 
|  | /// specialization, which the C-API relies on to catch type errors | 
|  | /// arising from our use of opaque pointers. | 
|  | /// | 
|  | /// Because this class forms a bridge between the denotational semantics | 
|  | /// of "tensors" and the operational semantics of how we store and | 
|  | /// compute with them, it also distinguishes between two different | 
|  | /// coordinate spaces (and their associated rank, sizes, etc). | 
|  | /// Denotationally, we have the *dimensions* of the tensor represented | 
|  | /// by this object.  Operationally, we have the *levels* of the storage | 
|  | /// representation itself. | 
|  | /// | 
|  | /// The *size* of an axis is the cardinality of possible coordinate | 
|  | /// values along that axis (regardless of which coordinates have stored | 
|  | /// element values). As such, each size must be non-zero since if any | 
|  | /// axis has size-zero then the whole tensor would have trivial storage | 
|  | /// (since there are no possible coordinates). Thus we use the plural | 
|  | /// term *sizes* for a collection of non-zero cardinalities, and use | 
|  | /// this term whenever referring to run-time cardinalities. Whereas we | 
|  | /// use the term *shape* for a collection of compile-time cardinalities, | 
|  | /// where zero is used to indicate cardinalities which are dynamic (i.e., | 
|  | /// unknown/unspecified at compile-time). At run-time, these dynamic | 
|  | /// cardinalities will be inferred from or checked against sizes otherwise | 
|  | /// specified. Thus, dynamic cardinalities always have an "immutable but | 
|  | /// unknown" value; so the term "dynamic" should not be taken to indicate | 
|  | /// run-time mutability. | 
|  | class SparseTensorStorageBase { | 
|  | protected: | 
|  | SparseTensorStorageBase(const SparseTensorStorageBase &) = default; | 
|  | SparseTensorStorageBase &operator=(const SparseTensorStorageBase &) = delete; | 
|  |  | 
|  | public: | 
|  | /// Constructs a new sparse-tensor storage object with the given encoding. | 
|  | SparseTensorStorageBase(uint64_t dimRank, const uint64_t *dimSizes, | 
|  | uint64_t lvlRank, const uint64_t *lvlSizes, | 
|  | const LevelType *lvlTypes, const uint64_t *dim2lvl, | 
|  | const uint64_t *lvl2dim); | 
|  | virtual ~SparseTensorStorageBase() = default; | 
|  |  | 
|  | /// Gets the number of tensor-dimensions. | 
|  | uint64_t getDimRank() const { return dimSizes.size(); } | 
|  |  | 
|  | /// Gets the number of storage-levels. | 
|  | uint64_t getLvlRank() const { return lvlSizes.size(); } | 
|  |  | 
|  | /// Gets the tensor-dimension sizes array. | 
|  | const std::vector<uint64_t> &getDimSizes() const { return dimSizes; } | 
|  |  | 
|  | /// Safely looks up the size of the given tensor-dimension. | 
|  | uint64_t getDimSize(uint64_t d) const { | 
|  | assert(d < getDimRank()); | 
|  | return dimSizes[d]; | 
|  | } | 
|  |  | 
|  | /// Gets the storage-level sizes array. | 
|  | const std::vector<uint64_t> &getLvlSizes() const { return lvlSizes; } | 
|  |  | 
|  | /// Safely looks up the size of the given storage-level. | 
|  | uint64_t getLvlSize(uint64_t l) const { | 
|  | assert(l < getLvlRank()); | 
|  | return lvlSizes[l]; | 
|  | } | 
|  |  | 
|  | /// Gets the level-types array. | 
|  | const std::vector<LevelType> &getLvlTypes() const { return lvlTypes; } | 
|  |  | 
|  | /// Safely looks up the type of the given level. | 
|  | LevelType getLvlType(uint64_t l) const { | 
|  | assert(l < getLvlRank()); | 
|  | return lvlTypes[l]; | 
|  | } | 
|  |  | 
|  | /// Safely checks if the level uses dense storage. | 
|  | bool isDenseLvl(uint64_t l) const { return isDenseLT(getLvlType(l)); } | 
|  |  | 
|  | /// Safely checks if the level uses compressed storage. | 
|  | bool isCompressedLvl(uint64_t l) const { | 
|  | return isCompressedLT(getLvlType(l)); | 
|  | } | 
|  |  | 
|  | /// Safely checks if the level uses loose compressed storage. | 
|  | bool isLooseCompressedLvl(uint64_t l) const { | 
|  | return isLooseCompressedLT(getLvlType(l)); | 
|  | } | 
|  |  | 
|  | /// Safely checks if the level uses singleton storage. | 
|  | bool isSingletonLvl(uint64_t l) const { return isSingletonLT(getLvlType(l)); } | 
|  |  | 
|  | /// Safely checks if the level uses n out of m storage. | 
|  | bool isNOutOfMLvl(uint64_t l) const { return isNOutOfMLT(getLvlType(l)); } | 
|  |  | 
|  | /// Safely checks if the level is ordered. | 
|  | bool isOrderedLvl(uint64_t l) const { return isOrderedLT(getLvlType(l)); } | 
|  |  | 
|  | /// Safely checks if the level is unique. | 
|  | bool isUniqueLvl(uint64_t l) const { return isUniqueLT(getLvlType(l)); } | 
|  |  | 
|  | /// Gets positions-overhead storage for the given level. | 
|  | #define DECL_GETPOSITIONS(PNAME, P)                                            \ | 
|  | virtual void getPositions(std::vector<P> **, uint64_t); | 
|  | MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETPOSITIONS) | 
|  | #undef DECL_GETPOSITIONS | 
|  |  | 
|  | /// Gets coordinates-overhead storage for the given level. | 
|  | #define DECL_GETCOORDINATES(INAME, C)                                          \ | 
|  | virtual void getCoordinates(std::vector<C> **, uint64_t); | 
|  | MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETCOORDINATES) | 
|  | #undef DECL_GETCOORDINATES | 
|  |  | 
|  | /// Gets coordinates-overhead storage buffer for the given level. | 
|  | #define DECL_GETCOORDINATESBUFFER(INAME, C)                                    \ | 
|  | virtual void getCoordinatesBuffer(std::vector<C> **, uint64_t); | 
|  | MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETCOORDINATESBUFFER) | 
|  | #undef DECL_GETCOORDINATESBUFFER | 
|  |  | 
|  | /// Gets primary storage. | 
|  | #define DECL_GETVALUES(VNAME, V) virtual void getValues(std::vector<V> **); | 
|  | MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETVALUES) | 
|  | #undef DECL_GETVALUES | 
|  |  | 
|  | /// Element-wise insertion in lexicographic coordinate order. The first | 
|  | /// argument is the level-coordinates for the value being inserted. | 
|  | #define DECL_LEXINSERT(VNAME, V) virtual void lexInsert(const uint64_t *, V); | 
|  | MLIR_SPARSETENSOR_FOREVERY_V(DECL_LEXINSERT) | 
|  | #undef DECL_LEXINSERT | 
|  |  | 
|  | /// Expanded insertion.  Note that this method resets the | 
|  | /// values/filled-switch array back to all-zero/false while only | 
|  | /// iterating over the nonzero elements. | 
|  | #define DECL_EXPINSERT(VNAME, V)                                               \ | 
|  | virtual void expInsert(uint64_t *, V *, bool *, uint64_t *, uint64_t,        \ | 
|  | uint64_t); | 
|  | MLIR_SPARSETENSOR_FOREVERY_V(DECL_EXPINSERT) | 
|  | #undef DECL_EXPINSERT | 
|  |  | 
|  | /// Finalizes lexicographic insertions. | 
|  | virtual void endLexInsert() = 0; | 
|  |  | 
|  | private: | 
|  | const std::vector<uint64_t> dimSizes; | 
|  | const std::vector<uint64_t> lvlSizes; | 
|  | const std::vector<LevelType> lvlTypes; | 
|  | const std::vector<uint64_t> dim2lvlVec; | 
|  | const std::vector<uint64_t> lvl2dimVec; | 
|  |  | 
|  | protected: | 
|  | const MapRef map; // non-owning pointers into dim2lvl/lvl2dim vectors | 
|  | const bool allDense; | 
|  | }; | 
|  |  | 
|  | /// A memory-resident sparse tensor using a storage scheme based on | 
|  | /// per-level sparse/dense annotations. This data structure provides | 
|  | /// a bufferized form of a sparse tensor type. In contrast to generating | 
|  | /// setup methods for each differently annotated sparse tensor, this | 
|  | /// method provides a convenient "one-size-fits-all" solution that simply | 
|  | /// takes an input tensor and annotations to implement all required setup | 
|  | /// in a general manner. | 
|  | template <typename P, typename C, typename V> | 
|  | class SparseTensorStorage final : public SparseTensorStorageBase { | 
|  | /// Private constructor to share code between the other constructors. | 
|  | /// Beware that the object is not necessarily guaranteed to be in a | 
|  | /// valid state after this constructor alone; e.g., `isCompressedLvl(l)` | 
|  | /// doesn't entail `!(positions[l].empty())`. | 
|  | SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes, | 
|  | uint64_t lvlRank, const uint64_t *lvlSizes, | 
|  | const LevelType *lvlTypes, const uint64_t *dim2lvl, | 
|  | const uint64_t *lvl2dim) | 
|  | : SparseTensorStorageBase(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, | 
|  | dim2lvl, lvl2dim), | 
|  | positions(lvlRank), coordinates(lvlRank), lvlCursor(lvlRank) {} | 
|  |  | 
|  | public: | 
|  | /// Constructs a sparse tensor with the given encoding, and allocates | 
|  | /// overhead storage according to some simple heuristics. When lvlCOO | 
|  | /// is set, the sparse tensor initializes with the contents from that | 
|  | /// data structure. Otherwise, an empty sparse tensor results. | 
|  | SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes, | 
|  | uint64_t lvlRank, const uint64_t *lvlSizes, | 
|  | const LevelType *lvlTypes, const uint64_t *dim2lvl, | 
|  | const uint64_t *lvl2dim, SparseTensorCOO<V> *lvlCOO); | 
|  |  | 
|  | /// Constructs a sparse tensor with the given encoding, and initializes | 
|  | /// the contents from the level buffers. The constructor assumes that the | 
|  | /// data provided by `lvlBufs` can be directly used to interpret the result | 
|  | /// sparse tensor and performs no integrity test on the input data. | 
|  | SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes, | 
|  | uint64_t lvlRank, const uint64_t *lvlSizes, | 
|  | const LevelType *lvlTypes, const uint64_t *dim2lvl, | 
|  | const uint64_t *lvl2dim, const intptr_t *lvlBufs); | 
|  |  | 
|  | /// Allocates a new empty sparse tensor. | 
|  | static SparseTensorStorage<P, C, V> * | 
|  | newEmpty(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, | 
|  | const uint64_t *lvlSizes, const LevelType *lvlTypes, | 
|  | const uint64_t *dim2lvl, const uint64_t *lvl2dim); | 
|  |  | 
|  | /// Allocates a new sparse tensor and initializes it from the given COO. | 
|  | static SparseTensorStorage<P, C, V> * | 
|  | newFromCOO(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, | 
|  | const uint64_t *lvlSizes, const LevelType *lvlTypes, | 
|  | const uint64_t *dim2lvl, const uint64_t *lvl2dim, | 
|  | SparseTensorCOO<V> *lvlCOO); | 
|  |  | 
|  | /// Allocates a new sparse tensor and initialize it from the given buffers. | 
|  | static SparseTensorStorage<P, C, V> * | 
|  | newFromBuffers(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, | 
|  | const uint64_t *lvlSizes, const LevelType *lvlTypes, | 
|  | const uint64_t *dim2lvl, const uint64_t *lvl2dim, | 
|  | uint64_t srcRank, const intptr_t *buffers); | 
|  |  | 
|  | ~SparseTensorStorage() final = default; | 
|  |  | 
|  | /// Partially specialize these getter methods based on template types. | 
|  | void getPositions(std::vector<P> **out, uint64_t lvl) final { | 
|  | assert(out && "Received nullptr for out parameter"); | 
|  | assert(lvl < getLvlRank()); | 
|  | *out = &positions[lvl]; | 
|  | } | 
|  | void getCoordinates(std::vector<C> **out, uint64_t lvl) final { | 
|  | assert(out && "Received nullptr for out parameter"); | 
|  | assert(lvl < getLvlRank()); | 
|  | *out = &coordinates[lvl]; | 
|  | } | 
|  | void getCoordinatesBuffer(std::vector<C> **out, uint64_t lvl) final { | 
|  | assert(out && "Received nullptr for out parameter"); | 
|  | assert(lvl < getLvlRank()); | 
|  | // Note that the sparse tensor support library always stores COO in SoA | 
|  | // format, even when AoS is requested. This is never an issue, since all | 
|  | // actual code/library generation requests "views" into the coordinate | 
|  | // storage for the individual levels, which is trivially provided for | 
|  | // both AoS and SoA (as well as all the other storage formats). The only | 
|  | // exception is when the buffer version of coordinate storage is requested | 
|  | // (currently only for printing). In that case, we do the following | 
|  | // potentially expensive transformation to provide that view. If this | 
|  | // operation becomes more common beyond debugging, we should consider | 
|  | // implementing proper AoS in the support library as well. | 
|  | uint64_t lvlRank = getLvlRank(); | 
|  | uint64_t nnz = values.size(); | 
|  | crdBuffer.clear(); | 
|  | crdBuffer.reserve(nnz * (lvlRank - lvl)); | 
|  | for (uint64_t i = 0; i < nnz; i++) { | 
|  | for (uint64_t l = lvl; l < lvlRank; l++) { | 
|  | assert(i < coordinates[l].size()); | 
|  | crdBuffer.push_back(coordinates[l][i]); | 
|  | } | 
|  | } | 
|  | *out = &crdBuffer; | 
|  | } | 
|  | void getValues(std::vector<V> **out) final { | 
|  | assert(out && "Received nullptr for out parameter"); | 
|  | *out = &values; | 
|  | } | 
|  |  | 
|  | /// Partially specialize lexicographical insertions based on template types. | 
|  | void lexInsert(const uint64_t *lvlCoords, V val) final { | 
|  | assert(lvlCoords); | 
|  | if (allDense) { | 
|  | uint64_t lvlRank = getLvlRank(); | 
|  | uint64_t valIdx = 0; | 
|  | // Linearize the address. | 
|  | for (uint64_t l = 0; l < lvlRank; l++) | 
|  | valIdx = valIdx * getLvlSize(l) + lvlCoords[l]; | 
|  | values[valIdx] = val; | 
|  | return; | 
|  | } | 
|  | // First, wrap up pending insertion path. | 
|  | uint64_t diffLvl = 0; | 
|  | uint64_t full = 0; | 
|  | if (!values.empty()) { | 
|  | diffLvl = lexDiff(lvlCoords); | 
|  | endPath(diffLvl + 1); | 
|  | full = lvlCursor[diffLvl] + 1; | 
|  | } | 
|  | // Then continue with insertion path. | 
|  | insPath(lvlCoords, diffLvl, full, val); | 
|  | } | 
|  |  | 
|  | /// Partially specialize expanded insertions based on template types. | 
|  | void expInsert(uint64_t *lvlCoords, V *values, bool *filled, uint64_t *added, | 
|  | uint64_t count, uint64_t expsz) final { | 
|  | assert((lvlCoords && values && filled && added) && "Received nullptr"); | 
|  | if (count == 0) | 
|  | return; | 
|  | // Sort. | 
|  | std::sort(added, added + count); | 
|  | // Restore insertion path for first insert. | 
|  | const uint64_t lastLvl = getLvlRank() - 1; | 
|  | uint64_t c = added[0]; | 
|  | assert(c <= expsz); | 
|  | assert(filled[c] && "added coordinate is not filled"); | 
|  | lvlCoords[lastLvl] = c; | 
|  | lexInsert(lvlCoords, values[c]); | 
|  | values[c] = 0; | 
|  | filled[c] = false; | 
|  | // Subsequent insertions are quick. | 
|  | for (uint64_t i = 1; i < count; i++) { | 
|  | assert(c < added[i] && "non-lexicographic insertion"); | 
|  | c = added[i]; | 
|  | assert(c <= expsz); | 
|  | assert(filled[c] && "added coordinate is not filled"); | 
|  | lvlCoords[lastLvl] = c; | 
|  | insPath(lvlCoords, lastLvl, added[i - 1] + 1, values[c]); | 
|  | values[c] = 0; | 
|  | filled[c] = false; | 
|  | } | 
|  | } | 
|  |  | 
|  | /// Finalizes lexicographic insertions. | 
|  | void endLexInsert() final { | 
|  | if (!allDense) { | 
|  | if (values.empty()) | 
|  | finalizeSegment(0); | 
|  | else | 
|  | endPath(0); | 
|  | } | 
|  | } | 
|  |  | 
|  | /// Sort the unordered tensor in place, the method assumes that it is | 
|  | /// an unordered COO tensor. | 
|  | void sortInPlace() { | 
|  | uint64_t nnz = values.size(); | 
|  | #ifndef NDEBUG | 
|  | for (uint64_t l = 0; l < getLvlRank(); l++) | 
|  | assert(nnz == coordinates[l].size()); | 
|  | #endif | 
|  |  | 
|  | // In-place permutation. | 
|  | auto applyPerm = [this](std::vector<uint64_t> &perm) { | 
|  | uint64_t length = perm.size(); | 
|  | uint64_t lvlRank = getLvlRank(); | 
|  | // Cache for the current level coordinates. | 
|  | std::vector<P> lvlCrds(lvlRank); | 
|  | for (uint64_t i = 0; i < length; i++) { | 
|  | uint64_t current = i; | 
|  | if (i != perm[current]) { | 
|  | for (uint64_t l = 0; l < lvlRank; l++) | 
|  | lvlCrds[l] = coordinates[l][i]; | 
|  | V val = values[i]; | 
|  | // Deals with a permutation cycle. | 
|  | while (i != perm[current]) { | 
|  | uint64_t next = perm[current]; | 
|  | // Swaps the level coordinates and value. | 
|  | for (uint64_t l = 0; l < lvlRank; l++) | 
|  | coordinates[l][current] = coordinates[l][next]; | 
|  | values[current] = values[next]; | 
|  | perm[current] = current; | 
|  | current = next; | 
|  | } | 
|  | for (uint64_t l = 0; l < lvlRank; l++) | 
|  | coordinates[l][current] = lvlCrds[l]; | 
|  | values[current] = val; | 
|  | perm[current] = current; | 
|  | } | 
|  | } | 
|  | }; | 
|  |  | 
|  | std::vector<uint64_t> sortedIdx(nnz, 0); | 
|  | for (uint64_t i = 0; i < nnz; i++) | 
|  | sortedIdx[i] = i; | 
|  |  | 
|  | std::sort(sortedIdx.begin(), sortedIdx.end(), | 
|  | [this](uint64_t lhs, uint64_t rhs) { | 
|  | for (uint64_t l = 0; l < getLvlRank(); l++) { | 
|  | if (coordinates[l][lhs] == coordinates[l][rhs]) | 
|  | continue; | 
|  | return coordinates[l][lhs] < coordinates[l][rhs]; | 
|  | } | 
|  | assert(lhs == rhs && "duplicate coordinates"); | 
|  | return false; | 
|  | }); | 
|  |  | 
|  | applyPerm(sortedIdx); | 
|  | } | 
|  |  | 
|  | private: | 
|  | /// Appends coordinate `crd` to level `lvl`, in the semantically | 
|  | /// general sense.  For non-dense levels, that means appending to the | 
|  | /// `coordinates[lvl]` array, checking that `crd` is representable in | 
|  | /// the `C` type; however, we do not verify other semantic requirements | 
|  | /// (e.g., that `crd` is in bounds for `lvlSizes[lvl]`, and not previously | 
|  | /// occurring in the same segment).  For dense levels, this method instead | 
|  | /// appends the appropriate number of zeros to the `values` array, where | 
|  | /// `full` is the number of "entries" already written to `values` for this | 
|  | /// segment (aka one after the highest coordinate previously appended). | 
|  | void appendCrd(uint64_t lvl, uint64_t full, uint64_t crd) { | 
|  | if (!isDenseLvl(lvl)) { | 
|  | assert(isCompressedLvl(lvl) || isLooseCompressedLvl(lvl) || | 
|  | isSingletonLvl(lvl) || isNOutOfMLvl(lvl)); | 
|  | coordinates[lvl].push_back(detail::checkOverflowCast<C>(crd)); | 
|  | } else { // Dense level. | 
|  | assert(crd >= full && "Coordinate was already filled"); | 
|  | if (crd == full) | 
|  | return; // Short-circuit, since it'll be a nop. | 
|  | if (lvl + 1 == getLvlRank()) | 
|  | values.insert(values.end(), crd - full, 0); | 
|  | else | 
|  | finalizeSegment(lvl + 1, 0, crd - full); | 
|  | } | 
|  | } | 
|  |  | 
|  | /// Computes the assembled-size associated with the `l`-th level, | 
|  | /// given the assembled-size associated with the `(l-1)`-th level. | 
|  | uint64_t assembledSize(uint64_t parentSz, uint64_t l) const { | 
|  | if (isCompressedLvl(l)) | 
|  | return positions[l][parentSz]; | 
|  | if (isLooseCompressedLvl(l)) | 
|  | return positions[l][2 * parentSz - 1]; | 
|  | if (isSingletonLvl(l) || isNOutOfMLvl(l)) | 
|  | return parentSz; // new size same as the parent | 
|  | assert(isDenseLvl(l)); | 
|  | return parentSz * getLvlSize(l); | 
|  | } | 
|  |  | 
|  | /// Initializes sparse tensor storage scheme from a memory-resident sparse | 
|  | /// tensor in coordinate scheme. This method prepares the positions and | 
|  | /// coordinates arrays under the given per-level dense/sparse annotations. | 
|  | void fromCOO(const std::vector<Element<V>> &lvlElements, uint64_t lo, | 
|  | uint64_t hi, uint64_t l) { | 
|  | const uint64_t lvlRank = getLvlRank(); | 
|  | assert(l <= lvlRank && hi <= lvlElements.size()); | 
|  | // Once levels are exhausted, insert the numerical values. | 
|  | if (l == lvlRank) { | 
|  | assert(lo < hi); | 
|  | values.push_back(lvlElements[lo].value); | 
|  | return; | 
|  | } | 
|  | // Visit all elements in this interval. | 
|  | uint64_t full = 0; | 
|  | while (lo < hi) { // If `hi` is unchanged, then `lo < lvlElements.size()`. | 
|  | // Find segment in interval with same coordinate at this level. | 
|  | const uint64_t c = lvlElements[lo].coords[l]; | 
|  | uint64_t seg = lo + 1; | 
|  | if (isUniqueLvl(l)) | 
|  | while (seg < hi && lvlElements[seg].coords[l] == c) | 
|  | seg++; | 
|  | // Handle segment in interval for sparse or dense level. | 
|  | appendCrd(l, full, c); | 
|  | full = c + 1; | 
|  | fromCOO(lvlElements, lo, seg, l + 1); | 
|  | // And move on to next segment in interval. | 
|  | lo = seg; | 
|  | } | 
|  | // Finalize the sparse position structure at this level. | 
|  | finalizeSegment(l, full); | 
|  | } | 
|  |  | 
|  | /// Finalizes the sparse position structure at this level. | 
|  | void finalizeSegment(uint64_t l, uint64_t full = 0, uint64_t count = 1) { | 
|  | if (count == 0) | 
|  | return; // Short-circuit, since it'll be a nop. | 
|  | if (isCompressedLvl(l)) { | 
|  | uint64_t pos = coordinates[l].size(); | 
|  | positions[l].insert(positions[l].end(), count, | 
|  | detail::checkOverflowCast<P>(pos)); | 
|  | } else if (isLooseCompressedLvl(l)) { | 
|  | // Finish this level, and push pairs for the empty ones, and one | 
|  | // more for next level. Note that this always leaves one extra | 
|  | // unused element at the end. | 
|  | uint64_t pos = coordinates[l].size(); | 
|  | positions[l].insert(positions[l].end(), 2 * count, | 
|  | detail::checkOverflowCast<P>(pos)); | 
|  | } else if (isSingletonLvl(l) || isNOutOfMLvl(l)) { | 
|  | return; // Nothing to finalize. | 
|  | } else {  // Dense dimension. | 
|  | assert(isDenseLvl(l)); | 
|  | const uint64_t sz = getLvlSizes()[l]; | 
|  | assert(sz >= full && "Segment is overfull"); | 
|  | count = detail::checkedMul(count, sz - full); | 
|  | // For dense storage we must enumerate all the remaining coordinates | 
|  | // in this level (i.e., coordinates after the last non-zero | 
|  | // element), and either fill in their zero values or else recurse | 
|  | // to finalize some deeper level. | 
|  | if (l + 1 == getLvlRank()) | 
|  | values.insert(values.end(), count, 0); | 
|  | else | 
|  | finalizeSegment(l + 1, 0, count); | 
|  | } | 
|  | } | 
|  |  | 
|  | /// Wraps up a single insertion path, inner to outer. | 
|  | void endPath(uint64_t diffLvl) { | 
|  | const uint64_t lvlRank = getLvlRank(); | 
|  | const uint64_t lastLvl = lvlRank - 1; | 
|  | assert(diffLvl <= lvlRank); | 
|  | const uint64_t stop = lvlRank - diffLvl; | 
|  | for (uint64_t i = 0; i < stop; i++) { | 
|  | const uint64_t l = lastLvl - i; | 
|  | finalizeSegment(l, lvlCursor[l] + 1); | 
|  | } | 
|  | } | 
|  |  | 
|  | /// Continues a single insertion path, outer to inner. The first | 
|  | /// argument is the level-coordinates for the value being inserted. | 
|  | void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full, | 
|  | V val) { | 
|  | const uint64_t lvlRank = getLvlRank(); | 
|  | assert(diffLvl <= lvlRank); | 
|  | for (uint64_t l = diffLvl; l < lvlRank; l++) { | 
|  | const uint64_t c = lvlCoords[l]; | 
|  | appendCrd(l, full, c); | 
|  | full = 0; | 
|  | lvlCursor[l] = c; | 
|  | } | 
|  | values.push_back(val); | 
|  | } | 
|  |  | 
|  | /// Finds the lexicographically first level where the level-coordinates | 
|  | /// in the argument differ from those in the current cursor. | 
|  | uint64_t lexDiff(const uint64_t *lvlCoords) const { | 
|  | const uint64_t lvlRank = getLvlRank(); | 
|  | for (uint64_t l = 0; l < lvlRank; l++) { | 
|  | const auto crd = lvlCoords[l]; | 
|  | const auto cur = lvlCursor[l]; | 
|  | if (crd > cur || (crd == cur && !isUniqueLvl(l)) || | 
|  | (crd < cur && !isOrderedLvl(l))) { | 
|  | return l; | 
|  | } | 
|  | if (crd < cur) { | 
|  | assert(false && "non-lexicographic insertion"); | 
|  | return -1u; | 
|  | } | 
|  | } | 
|  | assert(false && "duplicate insertion"); | 
|  | return -1u; | 
|  | } | 
|  |  | 
|  | // Sparse tensor storage components. | 
|  | std::vector<std::vector<P>> positions; | 
|  | std::vector<std::vector<C>> coordinates; | 
|  | std::vector<V> values; | 
|  |  | 
|  | // Auxiliary data structures. | 
|  | std::vector<uint64_t> lvlCursor; | 
|  | std::vector<C> crdBuffer; // just for AoS view | 
|  | }; | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | //  SparseTensorStorage Factories | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | template <typename P, typename C, typename V> | 
|  | SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newEmpty( | 
|  | uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, | 
|  | const uint64_t *lvlSizes, const LevelType *lvlTypes, | 
|  | const uint64_t *dim2lvl, const uint64_t *lvl2dim) { | 
|  | SparseTensorCOO<V> *noLvlCOO = nullptr; | 
|  | return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes, | 
|  | lvlTypes, dim2lvl, lvl2dim, noLvlCOO); | 
|  | } | 
|  |  | 
|  | template <typename P, typename C, typename V> | 
|  | SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newFromCOO( | 
|  | uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, | 
|  | const uint64_t *lvlSizes, const LevelType *lvlTypes, | 
|  | const uint64_t *dim2lvl, const uint64_t *lvl2dim, | 
|  | SparseTensorCOO<V> *lvlCOO) { | 
|  | assert(lvlCOO); | 
|  | return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes, | 
|  | lvlTypes, dim2lvl, lvl2dim, lvlCOO); | 
|  | } | 
|  |  | 
|  | template <typename P, typename C, typename V> | 
|  | SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newFromBuffers( | 
|  | uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, | 
|  | const uint64_t *lvlSizes, const LevelType *lvlTypes, | 
|  | const uint64_t *dim2lvl, const uint64_t *lvl2dim, uint64_t srcRank, | 
|  | const intptr_t *buffers) { | 
|  | return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes, | 
|  | lvlTypes, dim2lvl, lvl2dim, buffers); | 
|  | } | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | //  SparseTensorStorage Constructors | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | template <typename P, typename C, typename V> | 
|  | SparseTensorStorage<P, C, V>::SparseTensorStorage( | 
|  | uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, | 
|  | const uint64_t *lvlSizes, const LevelType *lvlTypes, | 
|  | const uint64_t *dim2lvl, const uint64_t *lvl2dim, | 
|  | SparseTensorCOO<V> *lvlCOO) | 
|  | : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, | 
|  | dim2lvl, lvl2dim) { | 
|  | // Provide hints on capacity of positions and coordinates. | 
|  | // TODO: needs much fine-tuning based on actual sparsity; currently | 
|  | // we reserve position/coordinate space based on all previous dense | 
|  | // levels, which works well up to first sparse level; but we should | 
|  | // really use nnz and dense/sparse distribution. | 
|  | uint64_t sz = 1; | 
|  | for (uint64_t l = 0; l < lvlRank; l++) { | 
|  | if (isCompressedLvl(l)) { | 
|  | positions[l].reserve(sz + 1); | 
|  | positions[l].push_back(0); | 
|  | coordinates[l].reserve(sz); | 
|  | sz = 1; | 
|  | } else if (isLooseCompressedLvl(l)) { | 
|  | positions[l].reserve(2 * sz + 1); // last one unused | 
|  | positions[l].push_back(0); | 
|  | coordinates[l].reserve(sz); | 
|  | sz = 1; | 
|  | } else if (isSingletonLvl(l)) { | 
|  | coordinates[l].reserve(sz); | 
|  | sz = 1; | 
|  | } else if (isNOutOfMLvl(l)) { | 
|  | assert(l == lvlRank - 1 && "unexpected n:m usage"); | 
|  | sz = detail::checkedMul(sz, lvlSizes[l]) / 2; | 
|  | coordinates[l].reserve(sz); | 
|  | values.reserve(sz); | 
|  | } else { // Dense level. | 
|  | assert(isDenseLvl(l)); | 
|  | sz = detail::checkedMul(sz, lvlSizes[l]); | 
|  | } | 
|  | } | 
|  | if (lvlCOO) { | 
|  | /* New from COO: ensure it is sorted. */ | 
|  | assert(lvlCOO->getRank() == lvlRank); | 
|  | lvlCOO->sort(); | 
|  | // Now actually insert the `elements`. | 
|  | const auto &elements = lvlCOO->getElements(); | 
|  | const uint64_t nse = elements.size(); | 
|  | assert(values.size() == 0); | 
|  | values.reserve(nse); | 
|  | fromCOO(elements, 0, nse, 0); | 
|  | } else if (allDense) { | 
|  | /* New empty (all dense) */ | 
|  | values.resize(sz, 0); | 
|  | } | 
|  | } | 
|  |  | 
|  | template <typename P, typename C, typename V> | 
|  | SparseTensorStorage<P, C, V>::SparseTensorStorage( | 
|  | uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, | 
|  | const uint64_t *lvlSizes, const LevelType *lvlTypes, | 
|  | const uint64_t *dim2lvl, const uint64_t *lvl2dim, const intptr_t *lvlBufs) | 
|  | : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, | 
|  | dim2lvl, lvl2dim) { | 
|  | // Note that none of the buffers can be reused because ownership | 
|  | // of the memory passed from clients is not necessarily transferred. | 
|  | // Therefore, all data is copied over into a new SparseTensorStorage. | 
|  | uint64_t trailCOOLen = 0, parentSz = 1, bufIdx = 0; | 
|  | for (uint64_t l = 0; l < lvlRank; l++) { | 
|  | if (!isUniqueLvl(l) && (isCompressedLvl(l) || isLooseCompressedLvl(l))) { | 
|  | // A `(loose)compressed_nu` level marks the start of trailing COO | 
|  | // start level. Since the coordinate buffer used for trailing COO | 
|  | // is passed in as AoS scheme and SparseTensorStorage uses a SoA | 
|  | // scheme, we cannot simply copy the value from the provided buffers. | 
|  | trailCOOLen = lvlRank - l; | 
|  | break; | 
|  | } | 
|  | if (isCompressedLvl(l) || isLooseCompressedLvl(l)) { | 
|  | P *posPtr = reinterpret_cast<P *>(lvlBufs[bufIdx++]); | 
|  | C *crdPtr = reinterpret_cast<C *>(lvlBufs[bufIdx++]); | 
|  | if (isLooseCompressedLvl(l)) { | 
|  | positions[l].assign(posPtr, posPtr + 2 * parentSz); | 
|  | coordinates[l].assign(crdPtr, crdPtr + positions[l][2 * parentSz - 1]); | 
|  | } else { | 
|  | positions[l].assign(posPtr, posPtr + parentSz + 1); | 
|  | coordinates[l].assign(crdPtr, crdPtr + positions[l][parentSz]); | 
|  | } | 
|  | } else if (isSingletonLvl(l)) { | 
|  | assert(0 && "general singleton not supported yet"); | 
|  | } else if (isNOutOfMLvl(l)) { | 
|  | assert(0 && "n ouf of m not supported yet"); | 
|  | } else { | 
|  | assert(isDenseLvl(l)); | 
|  | } | 
|  | parentSz = assembledSize(parentSz, l); | 
|  | } | 
|  |  | 
|  | // Handle Aos vs. SoA mismatch for COO. | 
|  | if (trailCOOLen != 0) { | 
|  | uint64_t cooStartLvl = lvlRank - trailCOOLen; | 
|  | assert(!isUniqueLvl(cooStartLvl) && | 
|  | (isCompressedLvl(cooStartLvl) || isLooseCompressedLvl(cooStartLvl))); | 
|  | P *posPtr = reinterpret_cast<P *>(lvlBufs[bufIdx++]); | 
|  | C *aosCrdPtr = reinterpret_cast<C *>(lvlBufs[bufIdx++]); | 
|  | P crdLen; | 
|  | if (isLooseCompressedLvl(cooStartLvl)) { | 
|  | positions[cooStartLvl].assign(posPtr, posPtr + 2 * parentSz); | 
|  | crdLen = positions[cooStartLvl][2 * parentSz - 1]; | 
|  | } else { | 
|  | positions[cooStartLvl].assign(posPtr, posPtr + parentSz + 1); | 
|  | crdLen = positions[cooStartLvl][parentSz]; | 
|  | } | 
|  | for (uint64_t l = cooStartLvl; l < lvlRank; l++) { | 
|  | coordinates[l].resize(crdLen); | 
|  | for (uint64_t n = 0; n < crdLen; n++) { | 
|  | coordinates[l][n] = *(aosCrdPtr + (l - cooStartLvl) + n * trailCOOLen); | 
|  | } | 
|  | } | 
|  | parentSz = assembledSize(parentSz, cooStartLvl); | 
|  | } | 
|  |  | 
|  | // Copy the values buffer. | 
|  | V *valPtr = reinterpret_cast<V *>(lvlBufs[bufIdx]); | 
|  | values.assign(valPtr, valPtr + parentSz); | 
|  | } | 
|  |  | 
|  | } // namespace sparse_tensor | 
|  | } // namespace mlir | 
|  |  | 
|  | #endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H |