src/gallium/drivers/swr/rasterizer/core/arena.h - third_party/mesa - Git at Google

 /****************************************************************************
 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * @file arena.h
 *
 * @brief Arena memory manager
 *        The arena is convenient and fast for managing allocations for any of
 *        our allocations that are associated with operations and can all be freed
 *        once when their operation has completed. Allocations are cheap since
 *        most of the time its simply an increment of an offset. Also, no need to
 *        free individual allocations. All of the arena memory can be freed at once.
 *
 ******************************************************************************/
 #pragma once

 #include <mutex>
 #include <algorithm>
 #include <atomic>
 #include "core/utils.h"

 static const size_t ARENA_BLOCK_ALIGN = 64;

 struct ArenaBlock
 {
     size_t      blockSize = 0;
     ArenaBlock* pNext = nullptr;
 };
 static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
     "Increase BLOCK_ALIGN size");

 class DefaultAllocator
 {
 public:
     ArenaBlock* AllocateAligned(size_t size, size_t align)
     {
         SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));

         ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock();
         p->blockSize = size;
         return p;
     }

     void Free(ArenaBlock* pMem)
     {
         if (pMem)
         {
             SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd));
             AlignedFree(pMem);
         }
     }
 };

 // Caching Allocator for Arena
 template<uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
 struct CachingAllocatorT : DefaultAllocator
 {
     ArenaBlock* AllocateAligned(size_t size, size_t align)
     {
         SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
         SWR_ASSUME_ASSERT(size <= uint32_t(-1));

         uint32_t bucket = GetBucketId(size);

         {
             // search cached blocks
             std::lock_guard<std::mutex> l(m_mutex);
             ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket];
             ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align);

             if (pBlock)
             {
                 m_cachedSize -= pBlock->blockSize;
                 if (pBlock == m_pLastCachedBlocks[bucket])
                 {
                     m_pLastCachedBlocks[bucket] = pPrevBlock;
                 }
             }
             else
             {
                 pPrevBlock = &m_oldCachedBlocks[bucket];
                 pBlock = SearchBlocks(pPrevBlock, size, align);

                 if (pBlock)
                 {
                     m_oldCachedSize -= pBlock->blockSize;
                     if (pBlock == m_pOldLastCachedBlocks[bucket])
                     {
                         m_pOldLastCachedBlocks[bucket] = pPrevBlock;
                     }
                 }
             }

             if (pBlock)
             {
                 SWR_ASSUME_ASSERT(pPrevBlock && pPrevBlock->pNext == pBlock);
                 pPrevBlock->pNext = pBlock->pNext;
                 pBlock->pNext = nullptr;

                 return pBlock;
             }

             m_totalAllocated += size;

 #if 0
             {
                 static uint32_t count = 0;
                 char buf[128];
                 sprintf_s(buf, "Arena Alloc %d 0x%llx bytes - 0x%llx total\n", ++count, uint64_t(size), uint64_t(m_totalAllocated));
                 OutputDebugStringA(buf);
             }
 #endif
         }

         if (bucket && bucket < (CACHE_NUM_BUCKETS - 1))
         {
             // Make all blocks in this bucket the same size
             size = size_t(1) << (bucket + 1 + CACHE_START_BUCKET_BIT);
         }

         return this->DefaultAllocator::AllocateAligned(size, align);
     }

     void Free(ArenaBlock* pMem)
     {
         if (pMem)
         {
             std::unique_lock<std::mutex> l(m_mutex);
             InsertCachedBlock(GetBucketId(pMem->blockSize), pMem);
         }
     }

     void FreeOldBlocks()
     {
         if (!m_cachedSize) { return; }
         std::lock_guard<std::mutex> l(m_mutex);

         bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE);

         for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
         {
             if (doFree)
             {
                 ArenaBlock* pBlock = m_oldCachedBlocks[i].pNext;
                 while (pBlock)
                 {
                     ArenaBlock* pNext = pBlock->pNext;
                     m_oldCachedSize -= pBlock->blockSize;
                     m_totalAllocated -= pBlock->blockSize;
                     this->DefaultAllocator::Free(pBlock);
                     pBlock = pNext;
                 }
                 m_oldCachedBlocks[i].pNext = nullptr;
                 m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
             }

             if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i])
             {
                 if (i && i < (CACHE_NUM_BUCKETS - 1))
                 {
                     // We know that all blocks are the same size.
                     // Just move the list over.
                     m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext;
                     m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext;
                     m_cachedBlocks[i].pNext = nullptr;
                     if (m_pOldLastCachedBlocks[i]->pNext)
                     {
                         m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i];
                     }
                     m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
                 }
                 else
                 {
                     // The end buckets can have variable sized lists.
                     // Insert each block based on size
                     ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
                     while (pBlock)
                     {
                         ArenaBlock* pNext = pBlock->pNext;
                         pBlock->pNext = nullptr;
                         m_cachedSize -= pBlock->blockSize;
                         InsertCachedBlock<true>(i, pBlock);
                         pBlock = pNext;
                     }

                     m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
                     m_cachedBlocks[i].pNext = nullptr;
                 }
             }
         }

         m_oldCachedSize += m_cachedSize;
         m_cachedSize = 0;
     }

     CachingAllocatorT()
     {
         for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
         {
             m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
             m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
         }
     }

     ~CachingAllocatorT()
     {
         // Free all cached blocks
         for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
         {
             ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
             while (pBlock)
             {
                 ArenaBlock* pNext = pBlock->pNext;
                 this->DefaultAllocator::Free(pBlock);
                 pBlock = pNext;
             }
             pBlock = m_oldCachedBlocks[i].pNext;
             while (pBlock)
             {
                 ArenaBlock* pNext = pBlock->pNext;
                 this->DefaultAllocator::Free(pBlock);
                 pBlock = pNext;
             }
         }
     }

 private:
     static uint32_t GetBucketId(size_t blockSize)
     {
         uint32_t bucketId = 0;

 #if defined(BitScanReverseSizeT)
         BitScanReverseSizeT((unsigned long*)&bucketId, (blockSize - 1) >> CACHE_START_BUCKET_BIT);
         bucketId = std::min<uint32_t>(bucketId, CACHE_NUM_BUCKETS - 1);
 #endif

         return bucketId;
     }

     template <bool OldBlockT = false>
     void InsertCachedBlock(uint32_t bucketId, ArenaBlock* pNewBlock)
     {
         SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS);

         ArenaBlock* pPrevBlock = OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId];
         ArenaBlock* pBlock = pPrevBlock->pNext;

         while (pBlock)
         {
             if (pNewBlock->blockSize >= pBlock->blockSize)
             {
                 // Insert here
                 break;
             }
             pPrevBlock = pBlock;
             pBlock = pBlock->pNext;
         }

         // Insert into list
         SWR_ASSUME_ASSERT(pPrevBlock);
         pPrevBlock->pNext = pNewBlock;
         pNewBlock->pNext = pBlock;

         if (OldBlockT)
         {
             if (m_pOldLastCachedBlocks[bucketId] == pPrevBlock)
             {
                 m_pOldLastCachedBlocks[bucketId] = pNewBlock;
             }

             m_oldCachedSize += pNewBlock->blockSize;
         }
         else
         {
             if (m_pLastCachedBlocks[bucketId] == pPrevBlock)
             {
                 m_pLastCachedBlocks[bucketId] = pNewBlock;
             }

             m_cachedSize += pNewBlock->blockSize;
         }
     }

     static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align)
     {
         ArenaBlock* pBlock = pPrevBlock->pNext;
         ArenaBlock* pPotentialBlock = nullptr;
         ArenaBlock* pPotentialPrev = nullptr;

         while (pBlock)
         {
             if (pBlock->blockSize >= blockSize)
             {
                 if (pBlock == AlignUp(pBlock, align))
                 {
                     if (pBlock->blockSize == blockSize)
                     {
                         // Won't find a better match
                         break;
                     }

                     // We could use this as it is larger than we wanted, but
                     // continue to search for a better match
                     pPotentialBlock = pBlock;
                     pPotentialPrev = pPrevBlock;
                 }
             }
             else
             {
                 // Blocks are sorted by size (biggest first)
                 // So, if we get here, there are no blocks
                 // large enough, fall through to allocation.
                 pBlock = nullptr;
                 break;
             }

             pPrevBlock = pBlock;
             pBlock = pBlock->pNext;
         }

         if (!pBlock)
         {
             // Couldn't find an exact match, use next biggest size
             pBlock = pPotentialBlock;
             pPrevBlock = pPotentialPrev;
         }

         return pBlock;
     }

     // buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
     static const uint32_t   CACHE_NUM_BUCKETS       = NumBucketsT;
     static const uint32_t   CACHE_START_BUCKET_BIT  = StartBucketBitT;
     static const size_t     MAX_UNUSED_SIZE         = sizeof(MEGABYTE);

     ArenaBlock              m_cachedBlocks[CACHE_NUM_BUCKETS];
     ArenaBlock*             m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
     ArenaBlock              m_oldCachedBlocks[CACHE_NUM_BUCKETS];
     ArenaBlock*             m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS];
     std::mutex              m_mutex;

     size_t                  m_totalAllocated = 0;

     size_t                  m_cachedSize = 0;
     size_t                  m_oldCachedSize = 0;
 };
 typedef CachingAllocatorT<> CachingAllocator;

 template<typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
 class TArena
 {
 public:
     TArena(T& in_allocator)  : m_allocator(in_allocator) {}
     TArena()                 : m_allocator(m_defAllocator) {}
     ~TArena()
     {
         Reset(true);
     }

     void* AllocAligned(size_t size, size_t  align)
     {
         if (0 == size)
         {
             return nullptr;
         }

         SWR_ASSERT(align <= ARENA_BLOCK_ALIGN);

         if (m_pCurBlock)
         {
             ArenaBlock* pCurBlock = m_pCurBlock;
             size_t offset = AlignUp(m_offset, align);

             if ((offset + size) <= pCurBlock->blockSize)
             {
                 void* pMem = PtrAdd(pCurBlock, offset);
                 m_offset = offset + size;
                 return pMem;
             }

             // Not enough memory in this block, fall through to allocate
             // a new block
         }

         static const size_t ArenaBlockSize = BlockSizeT;
         size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);

         // Add in one BLOCK_ALIGN unit to store ArenaBlock in.
         blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN);

         ArenaBlock* pNewBlock = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN);    // Arena blocks are always simd byte aligned.
         SWR_ASSERT(pNewBlock != nullptr);

         if (pNewBlock != nullptr)
         {
             m_offset = ARENA_BLOCK_ALIGN;
             pNewBlock->pNext = m_pCurBlock;

             m_pCurBlock = pNewBlock;
         }

         return AllocAligned(size, align);
     }

     void* Alloc(size_t  size)
     {
         return AllocAligned(size, 1);
     }

     void* AllocAlignedSync(size_t size, size_t align)
     {
         void* pAlloc = nullptr;

         m_mutex.lock();
         pAlloc = AllocAligned(size, align);
         m_mutex.unlock();

         return pAlloc;
     }

     void* AllocSync(size_t size)
     {
         void* pAlloc = nullptr;

         m_mutex.lock();
         pAlloc = Alloc(size);
         m_mutex.unlock();

         return pAlloc;
     }

     void Reset(bool removeAll = false)
     {
         m_offset = ARENA_BLOCK_ALIGN;

         if (m_pCurBlock)
         {
             ArenaBlock *pUsedBlocks = m_pCurBlock->pNext;
             m_pCurBlock->pNext = nullptr;
             while (pUsedBlocks)
             {
                 ArenaBlock* pBlock = pUsedBlocks;
                 pUsedBlocks = pBlock->pNext;

                 m_allocator.Free(pBlock);
             }

             if (removeAll)
             {
                 m_allocator.Free(m_pCurBlock);
                 m_pCurBlock = nullptr;
             }
         }
     }

     bool IsEmpty()
     {
         return (m_pCurBlock == nullptr) || (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
     }

 private:

     ArenaBlock*         m_pCurBlock = nullptr;
     size_t              m_offset    = ARENA_BLOCK_ALIGN;

     /// @note Mutex is only used by sync allocation functions.
     std::mutex          m_mutex;

     DefaultAllocator    m_defAllocator;
     T&                  m_allocator;
 };

 using StdArena      = TArena<DefaultAllocator>;
 using CachingArena  = TArena<CachingAllocator>;
	/****************************************************************************
	* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	* IN THE SOFTWARE.
	*
	* @file arena.h
	*
	* @brief Arena memory manager
	* The arena is convenient and fast for managing allocations for any of
	* our allocations that are associated with operations and can all be freed
	* once when their operation has completed. Allocations are cheap since
	* most of the time its simply an increment of an offset. Also, no need to
	* free individual allocations. All of the arena memory can be freed at once.
	*
	******************************************************************************/
	#pragma once

	#include <mutex>
	#include <algorithm>
	#include <atomic>
	#include "core/utils.h"

	static const size_t ARENA_BLOCK_ALIGN = 64;

	struct ArenaBlock
	{
	size_t blockSize = 0;
	ArenaBlock* pNext = nullptr;
	};
	static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
	"Increase BLOCK_ALIGN size");

	class DefaultAllocator
	{
	public:
	ArenaBlock* AllocateAligned(size_t size, size_t align)
	{
	SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));

	ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock();
	p->blockSize = size;
	return p;
	}

	void Free(ArenaBlock* pMem)
	{
	if (pMem)
	{
	SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd));
	AlignedFree(pMem);
	}
	}
	};

	// Caching Allocator for Arena
	template<uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
	struct CachingAllocatorT : DefaultAllocator
	{
	ArenaBlock* AllocateAligned(size_t size, size_t align)
	{
	SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
	SWR_ASSUME_ASSERT(size <= uint32_t(-1));

	uint32_t bucket = GetBucketId(size);

	{
	// search cached blocks
	std::lock_guard<std::mutex> l(m_mutex);
	ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket];
	ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align);

	if (pBlock)
	{
	m_cachedSize -= pBlock->blockSize;
	if (pBlock == m_pLastCachedBlocks[bucket])
	{
	m_pLastCachedBlocks[bucket] = pPrevBlock;
	}
	}
	else
	{
	pPrevBlock = &m_oldCachedBlocks[bucket];
	pBlock = SearchBlocks(pPrevBlock, size, align);

	if (pBlock)
	{
	m_oldCachedSize -= pBlock->blockSize;
	if (pBlock == m_pOldLastCachedBlocks[bucket])
	{
	m_pOldLastCachedBlocks[bucket] = pPrevBlock;
	}
	}
	}

	if (pBlock)
	{
	SWR_ASSUME_ASSERT(pPrevBlock && pPrevBlock->pNext == pBlock);
	pPrevBlock->pNext = pBlock->pNext;
	pBlock->pNext = nullptr;

	return pBlock;
	}

	m_totalAllocated += size;

	#if 0
	{
	static uint32_t count = 0;
	char buf[128];
	sprintf_s(buf, "Arena Alloc %d 0x%llx bytes - 0x%llx total\n", ++count, uint64_t(size), uint64_t(m_totalAllocated));
	OutputDebugStringA(buf);
	}
	#endif
	}

	if (bucket && bucket < (CACHE_NUM_BUCKETS - 1))
	{
	// Make all blocks in this bucket the same size
	size = size_t(1) << (bucket + 1 + CACHE_START_BUCKET_BIT);
	}

	return this->DefaultAllocator::AllocateAligned(size, align);
	}

	void Free(ArenaBlock* pMem)
	{
	if (pMem)
	{
	std::unique_lock<std::mutex> l(m_mutex);
	InsertCachedBlock(GetBucketId(pMem->blockSize), pMem);
	}
	}

	void FreeOldBlocks()
	{
	if (!m_cachedSize) { return; }
	std::lock_guard<std::mutex> l(m_mutex);

	bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE);

	for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
	{
	if (doFree)
	{
	ArenaBlock* pBlock = m_oldCachedBlocks[i].pNext;
	while (pBlock)
	{
	ArenaBlock* pNext = pBlock->pNext;
	m_oldCachedSize -= pBlock->blockSize;
	m_totalAllocated -= pBlock->blockSize;
	this->DefaultAllocator::Free(pBlock);
	pBlock = pNext;
	}
	m_oldCachedBlocks[i].pNext = nullptr;
	m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
	}

	if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i])
	{
	if (i && i < (CACHE_NUM_BUCKETS - 1))
	{
	// We know that all blocks are the same size.
	// Just move the list over.
	m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext;
	m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext;
	m_cachedBlocks[i].pNext = nullptr;
	if (m_pOldLastCachedBlocks[i]->pNext)
	{
	m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i];
	}
	m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
	}
	else
	{
	// The end buckets can have variable sized lists.
	// Insert each block based on size
	ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
	while (pBlock)
	{
	ArenaBlock* pNext = pBlock->pNext;
	pBlock->pNext = nullptr;
	m_cachedSize -= pBlock->blockSize;
	InsertCachedBlock<true>(i, pBlock);
	pBlock = pNext;
	}

	m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
	m_cachedBlocks[i].pNext = nullptr;
	}
	}
	}

	m_oldCachedSize += m_cachedSize;
	m_cachedSize = 0;
	}

	CachingAllocatorT()
	{
	for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
	{
	m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
	m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
	}
	}

	~CachingAllocatorT()
	{
	// Free all cached blocks
	for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
	{
	ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
	while (pBlock)
	{
	ArenaBlock* pNext = pBlock->pNext;
	this->DefaultAllocator::Free(pBlock);
	pBlock = pNext;
	}
	pBlock = m_oldCachedBlocks[i].pNext;
	while (pBlock)
	{
	ArenaBlock* pNext = pBlock->pNext;
	this->DefaultAllocator::Free(pBlock);
	pBlock = pNext;
	}
	}
	}

	private:
	static uint32_t GetBucketId(size_t blockSize)
	{
	uint32_t bucketId = 0;

	#if defined(BitScanReverseSizeT)
	BitScanReverseSizeT((unsigned long*)&bucketId, (blockSize - 1) >> CACHE_START_BUCKET_BIT);
	bucketId = std::min<uint32_t>(bucketId, CACHE_NUM_BUCKETS - 1);
	#endif

	return bucketId;
	}

	template <bool OldBlockT = false>
	void InsertCachedBlock(uint32_t bucketId, ArenaBlock* pNewBlock)
	{
	SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS);

	ArenaBlock* pPrevBlock = OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId];
	ArenaBlock* pBlock = pPrevBlock->pNext;

	while (pBlock)
	{
	if (pNewBlock->blockSize >= pBlock->blockSize)
	{
	// Insert here
	break;
	}
	pPrevBlock = pBlock;
	pBlock = pBlock->pNext;
	}

	// Insert into list
	SWR_ASSUME_ASSERT(pPrevBlock);
	pPrevBlock->pNext = pNewBlock;
	pNewBlock->pNext = pBlock;

	if (OldBlockT)
	{
	if (m_pOldLastCachedBlocks[bucketId] == pPrevBlock)
	{
	m_pOldLastCachedBlocks[bucketId] = pNewBlock;
	}

	m_oldCachedSize += pNewBlock->blockSize;
	}
	else
	{
	if (m_pLastCachedBlocks[bucketId] == pPrevBlock)
	{
	m_pLastCachedBlocks[bucketId] = pNewBlock;
	}

	m_cachedSize += pNewBlock->blockSize;
	}
	}

	static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align)
	{
	ArenaBlock* pBlock = pPrevBlock->pNext;
	ArenaBlock* pPotentialBlock = nullptr;
	ArenaBlock* pPotentialPrev = nullptr;

	while (pBlock)
	{
	if (pBlock->blockSize >= blockSize)
	{
	if (pBlock == AlignUp(pBlock, align))
	{
	if (pBlock->blockSize == blockSize)
	{
	// Won't find a better match
	break;
	}

	// We could use this as it is larger than we wanted, but
	// continue to search for a better match
	pPotentialBlock = pBlock;
	pPotentialPrev = pPrevBlock;
	}
	}
	else
	{
	// Blocks are sorted by size (biggest first)
	// So, if we get here, there are no blocks
	// large enough, fall through to allocation.
	pBlock = nullptr;
	break;
	}

	pPrevBlock = pBlock;
	pBlock = pBlock->pNext;
	}

	if (!pBlock)
	{
	// Couldn't find an exact match, use next biggest size
	pBlock = pPotentialBlock;
	pPrevBlock = pPotentialPrev;
	}

	return pBlock;
	}

	// buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
	static const uint32_t CACHE_NUM_BUCKETS = NumBucketsT;
	static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT;
	static const size_t MAX_UNUSED_SIZE = sizeof(MEGABYTE);

	ArenaBlock m_cachedBlocks[CACHE_NUM_BUCKETS];
	ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
	ArenaBlock m_oldCachedBlocks[CACHE_NUM_BUCKETS];
	ArenaBlock* m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS];
	std::mutex m_mutex;

	size_t m_totalAllocated = 0;

	size_t m_cachedSize = 0;
	size_t m_oldCachedSize = 0;
	};
	typedef CachingAllocatorT<> CachingAllocator;

	template<typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
	class TArena
	{
	public:
	TArena(T& in_allocator) : m_allocator(in_allocator) {}
	TArena() : m_allocator(m_defAllocator) {}
	~TArena()
	{
	Reset(true);
	}

	void* AllocAligned(size_t size, size_t align)
	{
	if (0 == size)
	{
	return nullptr;
	}

	SWR_ASSERT(align <= ARENA_BLOCK_ALIGN);

	if (m_pCurBlock)
	{
	ArenaBlock* pCurBlock = m_pCurBlock;
	size_t offset = AlignUp(m_offset, align);

	if ((offset + size) <= pCurBlock->blockSize)
	{
	void* pMem = PtrAdd(pCurBlock, offset);
	m_offset = offset + size;
	return pMem;
	}

	// Not enough memory in this block, fall through to allocate
	// a new block
	}

	static const size_t ArenaBlockSize = BlockSizeT;
	size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);

	// Add in one BLOCK_ALIGN unit to store ArenaBlock in.
	blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN);

	ArenaBlock* pNewBlock = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
	SWR_ASSERT(pNewBlock != nullptr);

	if (pNewBlock != nullptr)
	{
	m_offset = ARENA_BLOCK_ALIGN;
	pNewBlock->pNext = m_pCurBlock;

	m_pCurBlock = pNewBlock;
	}

	return AllocAligned(size, align);
	}

	void* Alloc(size_t size)
	{
	return AllocAligned(size, 1);
	}

	void* AllocAlignedSync(size_t size, size_t align)
	{
	void* pAlloc = nullptr;

	m_mutex.lock();
	pAlloc = AllocAligned(size, align);
	m_mutex.unlock();

	return pAlloc;
	}

	void* AllocSync(size_t size)
	{
	void* pAlloc = nullptr;

	m_mutex.lock();
	pAlloc = Alloc(size);
	m_mutex.unlock();

	return pAlloc;
	}

	void Reset(bool removeAll = false)
	{
	m_offset = ARENA_BLOCK_ALIGN;

	if (m_pCurBlock)
	{
	ArenaBlock *pUsedBlocks = m_pCurBlock->pNext;
	m_pCurBlock->pNext = nullptr;
	while (pUsedBlocks)
	{
	ArenaBlock* pBlock = pUsedBlocks;
	pUsedBlocks = pBlock->pNext;

	m_allocator.Free(pBlock);
	}

	if (removeAll)
	{
	m_allocator.Free(m_pCurBlock);
	m_pCurBlock = nullptr;
	}
	}
	}

	bool IsEmpty()
	{
	return (m_pCurBlock == nullptr) \|\| (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
	}

	private:

	ArenaBlock* m_pCurBlock = nullptr;
	size_t m_offset = ARENA_BLOCK_ALIGN;

	/// @note Mutex is only used by sync allocation functions.
	std::mutex m_mutex;

	DefaultAllocator m_defAllocator;
	T& m_allocator;
	};

	using StdArena = TArena<DefaultAllocator>;
	using CachingArena = TArena<CachingAllocator>;