| /**************************************************************************** |
| * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| * |
| * @file tilemgr.h |
| * |
| * @brief Definitions for Macro Tile Manager which provides the facilities |
| * for threads to work on an macro tile. |
| * |
| ******************************************************************************/ |
| #pragma once |
| |
| #include <set> |
| #include <unordered_map> |
| #include "common/formats.h" |
| #include "fifo.hpp" |
| #include "context.h" |
| #include "format_traits.h" |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// MacroTile - work queue for a tile. |
| ////////////////////////////////////////////////////////////////////////// |
| struct MacroTileQueue |
| { |
| MacroTileQueue() { } |
| ~MacroTileQueue() { } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief Returns number of work items queued for this tile. |
| uint32_t getNumQueued() |
| { |
| return mFifo.getNumQueued(); |
| } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief Attempt to lock the work fifo. If already locked then return false. |
| bool tryLock() |
| { |
| return mFifo.tryLock(); |
| } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief Clear fifo and unlock it. |
| template <typename ArenaT> |
| void clear(ArenaT& arena) |
| { |
| mFifo.clear(arena); |
| } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief Peek at work sitting at the front of the fifo. |
| BE_WORK* peek() |
| { |
| return mFifo.peek(); |
| } |
| |
| template <typename ArenaT> |
| bool enqueue_try_nosync(ArenaT& arena, const BE_WORK* entry) |
| { |
| return mFifo.enqueue_try_nosync(arena, entry); |
| } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief Move to next work item |
| void dequeue() |
| { |
| mFifo.dequeue_noinc(); |
| } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief Destroy fifo |
| void destroy() |
| { |
| mFifo.destroy(); |
| } |
| |
| ///@todo This will all be private. |
| uint32_t mWorkItemsFE = 0; |
| uint32_t mWorkItemsBE = 0; |
| uint32_t mId = 0; |
| |
| private: |
| QUEUE<BE_WORK> mFifo; |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// MacroTileMgr - Manages macrotiles for a draw. |
| ////////////////////////////////////////////////////////////////////////// |
| class MacroTileMgr |
| { |
| public: |
| MacroTileMgr(CachingArena& arena); |
| ~MacroTileMgr() |
| { |
| for (auto &tile : mTiles) |
| { |
| tile.second.destroy(); |
| } |
| } |
| |
| INLINE void initialize() |
| { |
| mWorkItemsProduced = 0; |
| mWorkItemsConsumed = 0; |
| |
| mDirtyTiles.clear(); |
| } |
| |
| INLINE std::vector<MacroTileQueue*>& getDirtyTiles() { return mDirtyTiles; } |
| void markTileComplete(uint32_t id); |
| |
| INLINE bool isWorkComplete() |
| { |
| return mWorkItemsProduced == mWorkItemsConsumed; |
| } |
| |
| void enqueue(uint32_t x, uint32_t y, BE_WORK *pWork); |
| |
| static INLINE void getTileIndices(uint32_t tileID, uint32_t &x, uint32_t &y) |
| { |
| y = tileID & 0xffff; |
| x = (tileID >> 16) & 0xffff; |
| } |
| |
| private: |
| CachingArena& mArena; |
| std::unordered_map<uint32_t, MacroTileQueue> mTiles; |
| |
| // Any tile that has work queued to it is a dirty tile. |
| std::vector<MacroTileQueue*> mDirtyTiles; |
| |
| OSALIGNLINE(LONG) mWorkItemsProduced { 0 }; |
| OSALIGNLINE(volatile LONG) mWorkItemsConsumed { 0 }; |
| }; |
| |
| typedef void(*PFN_DISPATCH)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace); |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// DispatchQueue - work queue for dispatch |
| ////////////////////////////////////////////////////////////////////////// |
| class DispatchQueue |
| { |
| public: |
| DispatchQueue() {} |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief Setup the producer consumer counts. |
| void initialize(uint32_t totalTasks, void* pTaskData, PFN_DISPATCH pfnDispatch) |
| { |
| // The available and outstanding counts start with total tasks. |
| // At the start there are N tasks available and outstanding. |
| // When both the available and outstanding counts have reached 0 then all work has completed. |
| // When a worker starts on a threadgroup then it decrements the available count. |
| // When a worker completes a threadgroup then it decrements the outstanding count. |
| |
| mTasksAvailable = totalTasks; |
| mTasksOutstanding = totalTasks; |
| |
| mpTaskData = pTaskData; |
| mPfnDispatch = pfnDispatch; |
| } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief Returns number of tasks available for this dispatch. |
| uint32_t getNumQueued() |
| { |
| return (mTasksAvailable > 0) ? mTasksAvailable : 0; |
| } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief Atomically decrement the work available count. If the result |
| // is greater than 0 then we can on the associated thread group. |
| // Otherwise, there is no more work to do. |
| bool getWork(uint32_t& groupId) |
| { |
| LONG result = InterlockedDecrement(&mTasksAvailable); |
| |
| if (result >= 0) |
| { |
| groupId = result; |
| return true; |
| } |
| |
| return false; |
| } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief Atomically decrement the outstanding count. A worker is notifying |
| /// us that he just finished some work. Also, return true if we're |
| /// the last worker to complete this dispatch. |
| bool finishedWork() |
| { |
| LONG result = InterlockedDecrement(&mTasksOutstanding); |
| SWR_ASSERT(result >= 0, "Should never oversubscribe work"); |
| |
| return (result == 0) ? true : false; |
| } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief Work is complete once both the available/outstanding counts have reached 0. |
| bool isWorkComplete() |
| { |
| return ((mTasksAvailable <= 0) && |
| (mTasksOutstanding <= 0)); |
| } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief Return pointer to task data. |
| const void* GetTasksData() |
| { |
| return mpTaskData; |
| } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief Dispatches a unit of work |
| void dispatch(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace) |
| { |
| SWR_ASSERT(mPfnDispatch != nullptr); |
| mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace); |
| } |
| |
| void* mpTaskData{ nullptr }; // The API thread will set this up and the callback task function will interpet this. |
| PFN_DISPATCH mPfnDispatch{ nullptr }; // Function to call per dispatch |
| |
| OSALIGNLINE(volatile LONG) mTasksAvailable{ 0 }; |
| OSALIGNLINE(volatile LONG) mTasksOutstanding{ 0 }; |
| }; |
| |
| |
| enum HOTTILE_STATE |
| { |
| HOTTILE_INVALID, // tile is in unitialized state and should be loaded with surface contents before rendering |
| HOTTILE_CLEAR, // tile should be cleared |
| HOTTILE_DIRTY, // tile has been rendered to |
| HOTTILE_RESOLVED, // tile has been stored to memory |
| }; |
| |
| struct HOTTILE |
| { |
| uint8_t *pBuffer; |
| HOTTILE_STATE state; |
| DWORD clearData[4]; // May need to change based on pfnClearTile implementation. Reorder for alignment? |
| uint32_t numSamples; |
| uint32_t renderTargetArrayIndex; // current render target array index loaded |
| }; |
| |
| union HotTileSet |
| { |
| struct |
| { |
| HOTTILE Color[SWR_NUM_RENDERTARGETS]; |
| HOTTILE Depth; |
| HOTTILE Stencil; |
| }; |
| HOTTILE Attachment[SWR_NUM_ATTACHMENTS]; |
| }; |
| |
| class HotTileMgr |
| { |
| public: |
| HotTileMgr() |
| { |
| memset(mHotTiles, 0, sizeof(mHotTiles)); |
| |
| // cache hottile size |
| for (uint32_t i = SWR_ATTACHMENT_COLOR0; i <= SWR_ATTACHMENT_COLOR7; ++i) |
| { |
| mHotTileSize[i] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8; |
| } |
| mHotTileSize[SWR_ATTACHMENT_DEPTH] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8; |
| mHotTileSize[SWR_ATTACHMENT_STENCIL] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8; |
| } |
| |
| ~HotTileMgr() |
| { |
| for (int x = 0; x < KNOB_NUM_HOT_TILES_X; ++x) |
| { |
| for (int y = 0; y < KNOB_NUM_HOT_TILES_Y; ++y) |
| { |
| for (int a = 0; a < SWR_NUM_ATTACHMENTS; ++a) |
| { |
| FreeHotTileMem(mHotTiles[x][y].Attachment[a].pBuffer); |
| } |
| } |
| } |
| } |
| |
| void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID); |
| |
| HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1, |
| uint32_t renderTargetArrayIndex = 0); |
| |
| HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1); |
| |
| static void ClearColorHotTile(const HOTTILE* pHotTile); |
| static void ClearDepthHotTile(const HOTTILE* pHotTile); |
| static void ClearStencilHotTile(const HOTTILE* pHotTile); |
| |
| private: |
| HotTileSet mHotTiles[KNOB_NUM_HOT_TILES_X][KNOB_NUM_HOT_TILES_Y]; |
| uint32_t mHotTileSize[SWR_NUM_ATTACHMENTS]; |
| |
| void* AllocHotTileMem(size_t size, uint32_t align, uint32_t numaNode) |
| { |
| void* p = nullptr; |
| #if defined(_WIN32) |
| HANDLE hProcess = GetCurrentProcess(); |
| p = VirtualAllocExNuma(hProcess, nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE, numaNode); |
| #else |
| p = AlignedMalloc(size, align); |
| #endif |
| |
| return p; |
| } |
| |
| void FreeHotTileMem(void* pBuffer) |
| { |
| if (pBuffer) |
| { |
| #if defined(_WIN32) |
| VirtualFree(pBuffer, 0, MEM_RELEASE); |
| #else |
| AlignedFree(pBuffer); |
| #endif |
| } |
| } |
| }; |
| |