| /**************************************************************************** |
| * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| * |
| * @file arena.h |
| * |
| * @brief Arena memory manager |
| * The arena is convenient and fast for managing allocations for any of |
| * our allocations that are associated with operations and can all be freed |
| * once when their operation has completed. Allocations are cheap since |
| * most of the time its simply an increment of an offset. Also, no need to |
| * free individual allocations. All of the arena memory can be freed at once. |
| * |
| ******************************************************************************/ |
| #pragma once |
| |
| #include <mutex> |
| #include <algorithm> |
| #include <atomic> |
| #include "core/utils.h" |
| |
| static const size_t ARENA_BLOCK_ALIGN = 64; |
| |
| struct ArenaBlock |
| { |
| size_t blockSize = 0; |
| ArenaBlock* pNext = nullptr; |
| }; |
| static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, |
| "Increase BLOCK_ALIGN size"); |
| |
| class DefaultAllocator |
| { |
| public: |
| ArenaBlock* AllocateAligned(size_t size, size_t align) |
| { |
| SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock)); |
| |
| ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock(); |
| p->blockSize = size; |
| return p; |
| } |
| |
| void Free(ArenaBlock* pMem) |
| { |
| if (pMem) |
| { |
| SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd)); |
| AlignedFree(pMem); |
| } |
| } |
| }; |
| |
| // Caching Allocator for Arena |
| template<uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12> |
| struct CachingAllocatorT : DefaultAllocator |
| { |
| ArenaBlock* AllocateAligned(size_t size, size_t align) |
| { |
| SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock)); |
| SWR_ASSUME_ASSERT(size <= uint32_t(-1)); |
| |
| uint32_t bucket = GetBucketId(size); |
| |
| { |
| // search cached blocks |
| std::lock_guard<std::mutex> l(m_mutex); |
| ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket]; |
| ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align); |
| |
| if (pBlock) |
| { |
| m_cachedSize -= pBlock->blockSize; |
| if (pBlock == m_pLastCachedBlocks[bucket]) |
| { |
| m_pLastCachedBlocks[bucket] = pPrevBlock; |
| } |
| } |
| else |
| { |
| pPrevBlock = &m_oldCachedBlocks[bucket]; |
| pBlock = SearchBlocks(pPrevBlock, size, align); |
| |
| if (pBlock) |
| { |
| m_oldCachedSize -= pBlock->blockSize; |
| if (pBlock == m_pOldLastCachedBlocks[bucket]) |
| { |
| m_pOldLastCachedBlocks[bucket] = pPrevBlock; |
| } |
| } |
| } |
| |
| if (pBlock) |
| { |
| SWR_ASSUME_ASSERT(pPrevBlock && pPrevBlock->pNext == pBlock); |
| pPrevBlock->pNext = pBlock->pNext; |
| pBlock->pNext = nullptr; |
| |
| return pBlock; |
| } |
| |
| m_totalAllocated += size; |
| |
| #if 0 |
| { |
| static uint32_t count = 0; |
| char buf[128]; |
| sprintf_s(buf, "Arena Alloc %d 0x%llx bytes - 0x%llx total\n", ++count, uint64_t(size), uint64_t(m_totalAllocated)); |
| OutputDebugStringA(buf); |
| } |
| #endif |
| } |
| |
| if (bucket && bucket < (CACHE_NUM_BUCKETS - 1)) |
| { |
| // Make all blocks in this bucket the same size |
| size = size_t(1) << (bucket + 1 + CACHE_START_BUCKET_BIT); |
| } |
| |
| return this->DefaultAllocator::AllocateAligned(size, align); |
| } |
| |
| void Free(ArenaBlock* pMem) |
| { |
| if (pMem) |
| { |
| std::unique_lock<std::mutex> l(m_mutex); |
| InsertCachedBlock(GetBucketId(pMem->blockSize), pMem); |
| } |
| } |
| |
| void FreeOldBlocks() |
| { |
| if (!m_cachedSize) { return; } |
| std::lock_guard<std::mutex> l(m_mutex); |
| |
| bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE); |
| |
| for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i) |
| { |
| if (doFree) |
| { |
| ArenaBlock* pBlock = m_oldCachedBlocks[i].pNext; |
| while (pBlock) |
| { |
| ArenaBlock* pNext = pBlock->pNext; |
| m_oldCachedSize -= pBlock->blockSize; |
| m_totalAllocated -= pBlock->blockSize; |
| this->DefaultAllocator::Free(pBlock); |
| pBlock = pNext; |
| } |
| m_oldCachedBlocks[i].pNext = nullptr; |
| m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i]; |
| } |
| |
| if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i]) |
| { |
| if (i && i < (CACHE_NUM_BUCKETS - 1)) |
| { |
| // We know that all blocks are the same size. |
| // Just move the list over. |
| m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext; |
| m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext; |
| m_cachedBlocks[i].pNext = nullptr; |
| if (m_pOldLastCachedBlocks[i]->pNext) |
| { |
| m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i]; |
| } |
| m_pLastCachedBlocks[i] = &m_cachedBlocks[i]; |
| } |
| else |
| { |
| // The end buckets can have variable sized lists. |
| // Insert each block based on size |
| ArenaBlock* pBlock = m_cachedBlocks[i].pNext; |
| while (pBlock) |
| { |
| ArenaBlock* pNext = pBlock->pNext; |
| pBlock->pNext = nullptr; |
| m_cachedSize -= pBlock->blockSize; |
| InsertCachedBlock<true>(i, pBlock); |
| pBlock = pNext; |
| } |
| |
| m_pLastCachedBlocks[i] = &m_cachedBlocks[i]; |
| m_cachedBlocks[i].pNext = nullptr; |
| } |
| } |
| } |
| |
| m_oldCachedSize += m_cachedSize; |
| m_cachedSize = 0; |
| } |
| |
| CachingAllocatorT() |
| { |
| for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i) |
| { |
| m_pLastCachedBlocks[i] = &m_cachedBlocks[i]; |
| m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i]; |
| } |
| } |
| |
| ~CachingAllocatorT() |
| { |
| // Free all cached blocks |
| for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i) |
| { |
| ArenaBlock* pBlock = m_cachedBlocks[i].pNext; |
| while (pBlock) |
| { |
| ArenaBlock* pNext = pBlock->pNext; |
| this->DefaultAllocator::Free(pBlock); |
| pBlock = pNext; |
| } |
| pBlock = m_oldCachedBlocks[i].pNext; |
| while (pBlock) |
| { |
| ArenaBlock* pNext = pBlock->pNext; |
| this->DefaultAllocator::Free(pBlock); |
| pBlock = pNext; |
| } |
| } |
| } |
| |
| private: |
| static uint32_t GetBucketId(size_t blockSize) |
| { |
| uint32_t bucketId = 0; |
| |
| #if defined(BitScanReverseSizeT) |
| BitScanReverseSizeT((unsigned long*)&bucketId, (blockSize - 1) >> CACHE_START_BUCKET_BIT); |
| bucketId = std::min<uint32_t>(bucketId, CACHE_NUM_BUCKETS - 1); |
| #endif |
| |
| return bucketId; |
| } |
| |
| template <bool OldBlockT = false> |
| void InsertCachedBlock(uint32_t bucketId, ArenaBlock* pNewBlock) |
| { |
| SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS); |
| |
| ArenaBlock* pPrevBlock = OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId]; |
| ArenaBlock* pBlock = pPrevBlock->pNext; |
| |
| while (pBlock) |
| { |
| if (pNewBlock->blockSize >= pBlock->blockSize) |
| { |
| // Insert here |
| break; |
| } |
| pPrevBlock = pBlock; |
| pBlock = pBlock->pNext; |
| } |
| |
| // Insert into list |
| SWR_ASSUME_ASSERT(pPrevBlock); |
| pPrevBlock->pNext = pNewBlock; |
| pNewBlock->pNext = pBlock; |
| |
| if (OldBlockT) |
| { |
| if (m_pOldLastCachedBlocks[bucketId] == pPrevBlock) |
| { |
| m_pOldLastCachedBlocks[bucketId] = pNewBlock; |
| } |
| |
| m_oldCachedSize += pNewBlock->blockSize; |
| } |
| else |
| { |
| if (m_pLastCachedBlocks[bucketId] == pPrevBlock) |
| { |
| m_pLastCachedBlocks[bucketId] = pNewBlock; |
| } |
| |
| m_cachedSize += pNewBlock->blockSize; |
| } |
| } |
| |
| static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align) |
| { |
| ArenaBlock* pBlock = pPrevBlock->pNext; |
| ArenaBlock* pPotentialBlock = nullptr; |
| ArenaBlock* pPotentialPrev = nullptr; |
| |
| while (pBlock) |
| { |
| if (pBlock->blockSize >= blockSize) |
| { |
| if (pBlock == AlignUp(pBlock, align)) |
| { |
| if (pBlock->blockSize == blockSize) |
| { |
| // Won't find a better match |
| break; |
| } |
| |
| // We could use this as it is larger than we wanted, but |
| // continue to search for a better match |
| pPotentialBlock = pBlock; |
| pPotentialPrev = pPrevBlock; |
| } |
| } |
| else |
| { |
| // Blocks are sorted by size (biggest first) |
| // So, if we get here, there are no blocks |
| // large enough, fall through to allocation. |
| pBlock = nullptr; |
| break; |
| } |
| |
| pPrevBlock = pBlock; |
| pBlock = pBlock->pNext; |
| } |
| |
| if (!pBlock) |
| { |
| // Couldn't find an exact match, use next biggest size |
| pBlock = pPotentialBlock; |
| pPrevBlock = pPotentialPrev; |
| } |
| |
| return pBlock; |
| } |
| |
| // buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ... |
| static const uint32_t CACHE_NUM_BUCKETS = NumBucketsT; |
| static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT; |
| static const size_t MAX_UNUSED_SIZE = sizeof(MEGABYTE); |
| |
| ArenaBlock m_cachedBlocks[CACHE_NUM_BUCKETS]; |
| ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS]; |
| ArenaBlock m_oldCachedBlocks[CACHE_NUM_BUCKETS]; |
| ArenaBlock* m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS]; |
| std::mutex m_mutex; |
| |
| size_t m_totalAllocated = 0; |
| |
| size_t m_cachedSize = 0; |
| size_t m_oldCachedSize = 0; |
| }; |
| typedef CachingAllocatorT<> CachingAllocator; |
| |
| template<typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)> |
| class TArena |
| { |
| public: |
| TArena(T& in_allocator) : m_allocator(in_allocator) {} |
| TArena() : m_allocator(m_defAllocator) {} |
| ~TArena() |
| { |
| Reset(true); |
| } |
| |
| void* AllocAligned(size_t size, size_t align) |
| { |
| if (0 == size) |
| { |
| return nullptr; |
| } |
| |
| SWR_ASSERT(align <= ARENA_BLOCK_ALIGN); |
| |
| if (m_pCurBlock) |
| { |
| ArenaBlock* pCurBlock = m_pCurBlock; |
| size_t offset = AlignUp(m_offset, align); |
| |
| if ((offset + size) <= pCurBlock->blockSize) |
| { |
| void* pMem = PtrAdd(pCurBlock, offset); |
| m_offset = offset + size; |
| return pMem; |
| } |
| |
| // Not enough memory in this block, fall through to allocate |
| // a new block |
| } |
| |
| static const size_t ArenaBlockSize = BlockSizeT; |
| size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize); |
| |
| // Add in one BLOCK_ALIGN unit to store ArenaBlock in. |
| blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN); |
| |
| ArenaBlock* pNewBlock = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned. |
| SWR_ASSERT(pNewBlock != nullptr); |
| |
| if (pNewBlock != nullptr) |
| { |
| m_offset = ARENA_BLOCK_ALIGN; |
| pNewBlock->pNext = m_pCurBlock; |
| |
| m_pCurBlock = pNewBlock; |
| } |
| |
| return AllocAligned(size, align); |
| } |
| |
| void* Alloc(size_t size) |
| { |
| return AllocAligned(size, 1); |
| } |
| |
| void* AllocAlignedSync(size_t size, size_t align) |
| { |
| void* pAlloc = nullptr; |
| |
| m_mutex.lock(); |
| pAlloc = AllocAligned(size, align); |
| m_mutex.unlock(); |
| |
| return pAlloc; |
| } |
| |
| void* AllocSync(size_t size) |
| { |
| void* pAlloc = nullptr; |
| |
| m_mutex.lock(); |
| pAlloc = Alloc(size); |
| m_mutex.unlock(); |
| |
| return pAlloc; |
| } |
| |
| void Reset(bool removeAll = false) |
| { |
| m_offset = ARENA_BLOCK_ALIGN; |
| |
| if (m_pCurBlock) |
| { |
| ArenaBlock *pUsedBlocks = m_pCurBlock->pNext; |
| m_pCurBlock->pNext = nullptr; |
| while (pUsedBlocks) |
| { |
| ArenaBlock* pBlock = pUsedBlocks; |
| pUsedBlocks = pBlock->pNext; |
| |
| m_allocator.Free(pBlock); |
| } |
| |
| if (removeAll) |
| { |
| m_allocator.Free(m_pCurBlock); |
| m_pCurBlock = nullptr; |
| } |
| } |
| } |
| |
| bool IsEmpty() |
| { |
| return (m_pCurBlock == nullptr) || (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr); |
| } |
| |
| private: |
| |
| ArenaBlock* m_pCurBlock = nullptr; |
| size_t m_offset = ARENA_BLOCK_ALIGN; |
| |
| /// @note Mutex is only used by sync allocation functions. |
| std::mutex m_mutex; |
| |
| DefaultAllocator m_defAllocator; |
| T& m_allocator; |
| }; |
| |
| using StdArena = TArena<DefaultAllocator>; |
| using CachingArena = TArena<CachingAllocator>; |