blob: 1db09726cb7e3a11174c4a9f0ec383486046c156 [file] [log] [blame]
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* @file arena.h
*
* @brief Arena memory manager
* The arena is convenient and fast for managing allocations for any of
* our allocations that are associated with operations and can all be freed
* once when their operation has completed. Allocations are cheap since
* most of the time its simply an increment of an offset. Also, no need to
* free individual allocations. All of the arena memory can be freed at once.
*
******************************************************************************/
#pragma once
#include <mutex>
#include <algorithm>
#include <atomic>
#include "core/utils.h"
static const size_t ARENA_BLOCK_ALIGN = 64;
struct ArenaBlock
{
size_t blockSize = 0;
ArenaBlock* pNext = nullptr;
};
static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
"Increase BLOCK_ALIGN size");
class DefaultAllocator
{
public:
ArenaBlock* AllocateAligned(size_t size, size_t align)
{
SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock();
p->blockSize = size;
return p;
}
void Free(ArenaBlock* pMem)
{
if (pMem)
{
SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd));
AlignedFree(pMem);
}
}
};
// Caching Allocator for Arena
template<uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
struct CachingAllocatorT : DefaultAllocator
{
ArenaBlock* AllocateAligned(size_t size, size_t align)
{
SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
SWR_ASSUME_ASSERT(size <= uint32_t(-1));
uint32_t bucket = GetBucketId(size);
{
// search cached blocks
std::lock_guard<std::mutex> l(m_mutex);
ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket];
ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align);
if (pBlock)
{
m_cachedSize -= pBlock->blockSize;
if (pBlock == m_pLastCachedBlocks[bucket])
{
m_pLastCachedBlocks[bucket] = pPrevBlock;
}
}
else
{
pPrevBlock = &m_oldCachedBlocks[bucket];
pBlock = SearchBlocks(pPrevBlock, size, align);
if (pBlock)
{
m_oldCachedSize -= pBlock->blockSize;
if (pBlock == m_pOldLastCachedBlocks[bucket])
{
m_pOldLastCachedBlocks[bucket] = pPrevBlock;
}
}
}
if (pBlock)
{
SWR_ASSUME_ASSERT(pPrevBlock && pPrevBlock->pNext == pBlock);
pPrevBlock->pNext = pBlock->pNext;
pBlock->pNext = nullptr;
return pBlock;
}
m_totalAllocated += size;
#if 0
{
static uint32_t count = 0;
char buf[128];
sprintf_s(buf, "Arena Alloc %d 0x%llx bytes - 0x%llx total\n", ++count, uint64_t(size), uint64_t(m_totalAllocated));
OutputDebugStringA(buf);
}
#endif
}
if (bucket && bucket < (CACHE_NUM_BUCKETS - 1))
{
// Make all blocks in this bucket the same size
size = size_t(1) << (bucket + 1 + CACHE_START_BUCKET_BIT);
}
return this->DefaultAllocator::AllocateAligned(size, align);
}
void Free(ArenaBlock* pMem)
{
if (pMem)
{
std::unique_lock<std::mutex> l(m_mutex);
InsertCachedBlock(GetBucketId(pMem->blockSize), pMem);
}
}
void FreeOldBlocks()
{
if (!m_cachedSize) { return; }
std::lock_guard<std::mutex> l(m_mutex);
bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE);
for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
{
if (doFree)
{
ArenaBlock* pBlock = m_oldCachedBlocks[i].pNext;
while (pBlock)
{
ArenaBlock* pNext = pBlock->pNext;
m_oldCachedSize -= pBlock->blockSize;
m_totalAllocated -= pBlock->blockSize;
this->DefaultAllocator::Free(pBlock);
pBlock = pNext;
}
m_oldCachedBlocks[i].pNext = nullptr;
m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
}
if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i])
{
if (i && i < (CACHE_NUM_BUCKETS - 1))
{
// We know that all blocks are the same size.
// Just move the list over.
m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext;
m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext;
m_cachedBlocks[i].pNext = nullptr;
if (m_pOldLastCachedBlocks[i]->pNext)
{
m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i];
}
m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
}
else
{
// The end buckets can have variable sized lists.
// Insert each block based on size
ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
while (pBlock)
{
ArenaBlock* pNext = pBlock->pNext;
pBlock->pNext = nullptr;
m_cachedSize -= pBlock->blockSize;
InsertCachedBlock<true>(i, pBlock);
pBlock = pNext;
}
m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
m_cachedBlocks[i].pNext = nullptr;
}
}
}
m_oldCachedSize += m_cachedSize;
m_cachedSize = 0;
}
CachingAllocatorT()
{
for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
{
m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
}
}
~CachingAllocatorT()
{
// Free all cached blocks
for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
{
ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
while (pBlock)
{
ArenaBlock* pNext = pBlock->pNext;
this->DefaultAllocator::Free(pBlock);
pBlock = pNext;
}
pBlock = m_oldCachedBlocks[i].pNext;
while (pBlock)
{
ArenaBlock* pNext = pBlock->pNext;
this->DefaultAllocator::Free(pBlock);
pBlock = pNext;
}
}
}
private:
static uint32_t GetBucketId(size_t blockSize)
{
uint32_t bucketId = 0;
#if defined(BitScanReverseSizeT)
BitScanReverseSizeT((unsigned long*)&bucketId, (blockSize - 1) >> CACHE_START_BUCKET_BIT);
bucketId = std::min<uint32_t>(bucketId, CACHE_NUM_BUCKETS - 1);
#endif
return bucketId;
}
template <bool OldBlockT = false>
void InsertCachedBlock(uint32_t bucketId, ArenaBlock* pNewBlock)
{
SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS);
ArenaBlock* pPrevBlock = OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId];
ArenaBlock* pBlock = pPrevBlock->pNext;
while (pBlock)
{
if (pNewBlock->blockSize >= pBlock->blockSize)
{
// Insert here
break;
}
pPrevBlock = pBlock;
pBlock = pBlock->pNext;
}
// Insert into list
SWR_ASSUME_ASSERT(pPrevBlock);
pPrevBlock->pNext = pNewBlock;
pNewBlock->pNext = pBlock;
if (OldBlockT)
{
if (m_pOldLastCachedBlocks[bucketId] == pPrevBlock)
{
m_pOldLastCachedBlocks[bucketId] = pNewBlock;
}
m_oldCachedSize += pNewBlock->blockSize;
}
else
{
if (m_pLastCachedBlocks[bucketId] == pPrevBlock)
{
m_pLastCachedBlocks[bucketId] = pNewBlock;
}
m_cachedSize += pNewBlock->blockSize;
}
}
static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align)
{
ArenaBlock* pBlock = pPrevBlock->pNext;
ArenaBlock* pPotentialBlock = nullptr;
ArenaBlock* pPotentialPrev = nullptr;
while (pBlock)
{
if (pBlock->blockSize >= blockSize)
{
if (pBlock == AlignUp(pBlock, align))
{
if (pBlock->blockSize == blockSize)
{
// Won't find a better match
break;
}
// We could use this as it is larger than we wanted, but
// continue to search for a better match
pPotentialBlock = pBlock;
pPotentialPrev = pPrevBlock;
}
}
else
{
// Blocks are sorted by size (biggest first)
// So, if we get here, there are no blocks
// large enough, fall through to allocation.
pBlock = nullptr;
break;
}
pPrevBlock = pBlock;
pBlock = pBlock->pNext;
}
if (!pBlock)
{
// Couldn't find an exact match, use next biggest size
pBlock = pPotentialBlock;
pPrevBlock = pPotentialPrev;
}
return pBlock;
}
// buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
static const uint32_t CACHE_NUM_BUCKETS = NumBucketsT;
static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT;
static const size_t MAX_UNUSED_SIZE = sizeof(MEGABYTE);
ArenaBlock m_cachedBlocks[CACHE_NUM_BUCKETS];
ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
ArenaBlock m_oldCachedBlocks[CACHE_NUM_BUCKETS];
ArenaBlock* m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS];
std::mutex m_mutex;
size_t m_totalAllocated = 0;
size_t m_cachedSize = 0;
size_t m_oldCachedSize = 0;
};
typedef CachingAllocatorT<> CachingAllocator;
template<typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
class TArena
{
public:
TArena(T& in_allocator) : m_allocator(in_allocator) {}
TArena() : m_allocator(m_defAllocator) {}
~TArena()
{
Reset(true);
}
void* AllocAligned(size_t size, size_t align)
{
if (0 == size)
{
return nullptr;
}
SWR_ASSERT(align <= ARENA_BLOCK_ALIGN);
if (m_pCurBlock)
{
ArenaBlock* pCurBlock = m_pCurBlock;
size_t offset = AlignUp(m_offset, align);
if ((offset + size) <= pCurBlock->blockSize)
{
void* pMem = PtrAdd(pCurBlock, offset);
m_offset = offset + size;
return pMem;
}
// Not enough memory in this block, fall through to allocate
// a new block
}
static const size_t ArenaBlockSize = BlockSizeT;
size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);
// Add in one BLOCK_ALIGN unit to store ArenaBlock in.
blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN);
ArenaBlock* pNewBlock = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
SWR_ASSERT(pNewBlock != nullptr);
if (pNewBlock != nullptr)
{
m_offset = ARENA_BLOCK_ALIGN;
pNewBlock->pNext = m_pCurBlock;
m_pCurBlock = pNewBlock;
}
return AllocAligned(size, align);
}
void* Alloc(size_t size)
{
return AllocAligned(size, 1);
}
void* AllocAlignedSync(size_t size, size_t align)
{
void* pAlloc = nullptr;
m_mutex.lock();
pAlloc = AllocAligned(size, align);
m_mutex.unlock();
return pAlloc;
}
void* AllocSync(size_t size)
{
void* pAlloc = nullptr;
m_mutex.lock();
pAlloc = Alloc(size);
m_mutex.unlock();
return pAlloc;
}
void Reset(bool removeAll = false)
{
m_offset = ARENA_BLOCK_ALIGN;
if (m_pCurBlock)
{
ArenaBlock *pUsedBlocks = m_pCurBlock->pNext;
m_pCurBlock->pNext = nullptr;
while (pUsedBlocks)
{
ArenaBlock* pBlock = pUsedBlocks;
pUsedBlocks = pBlock->pNext;
m_allocator.Free(pBlock);
}
if (removeAll)
{
m_allocator.Free(m_pCurBlock);
m_pCurBlock = nullptr;
}
}
}
bool IsEmpty()
{
return (m_pCurBlock == nullptr) || (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
}
private:
ArenaBlock* m_pCurBlock = nullptr;
size_t m_offset = ARENA_BLOCK_ALIGN;
/// @note Mutex is only used by sync allocation functions.
std::mutex m_mutex;
DefaultAllocator m_defAllocator;
T& m_allocator;
};
using StdArena = TArena<DefaultAllocator>;
using CachingArena = TArena<CachingAllocator>;