| /*------------------------------------------------------------------------- |
| * drawElements Quality Program OpenGL ES 2.0 Module |
| * ------------------------------------------------- |
| * |
| * Copyright 2014 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| *//*! |
| * \file |
| * \brief Shader operator performance tests. |
| *//*--------------------------------------------------------------------*/ |
| |
| #include "es2pShaderOperatorTests.hpp" |
| #include "glsCalibration.hpp" |
| #include "gluShaderUtil.hpp" |
| #include "gluShaderProgram.hpp" |
| #include "gluPixelTransfer.hpp" |
| #include "tcuTestLog.hpp" |
| #include "tcuRenderTarget.hpp" |
| #include "tcuCommandLine.hpp" |
| #include "tcuSurface.hpp" |
| #include "deStringUtil.hpp" |
| #include "deSharedPtr.hpp" |
| #include "deClock.h" |
| #include "deMath.h" |
| |
| #include "glwEnums.hpp" |
| #include "glwFunctions.hpp" |
| |
| #include <map> |
| #include <algorithm> |
| #include <limits> |
| #include <set> |
| |
| namespace deqp |
| { |
| namespace gles2 |
| { |
| namespace Performance |
| { |
| |
| using namespace gls; |
| using namespace glu; |
| using tcu::Vec2; |
| using tcu::Vec4; |
| using tcu::TestLog; |
| using de::SharedPtr; |
| |
| using std::string; |
| using std::vector; |
| |
| #define MEASUREMENT_FAIL() throw tcu::InternalError("Unable to get sensible measurements for estimation", DE_NULL, __FILE__, __LINE__) |
| |
| // Number of measurements in OperatorPerformanceCase for each workload size, unless specified otherwise by a command line argument. |
| static const int DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD = 3; |
| // How many different workload sizes are used by OperatorPerformanceCase. |
| static const int NUM_WORKLOADS = 8; |
| // Maximum workload size that can be attempted. In a sensible case, this most likely won't be reached. |
| static const int MAX_WORKLOAD_SIZE = 1<<29; |
| |
| // BinaryOpCase-specific constants for shader generation. |
| static const int BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS = 4; |
| static const int BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT = 2; |
| static const int BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT = 4; |
| |
| // FunctionCase-specific constants for shader generation. |
| static const int FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS = 4; |
| |
| static const char* const s_swizzles[][4] = |
| { |
| { "x", "yx", "yzx", "wzyx" }, |
| { "y", "zy", "wyz", "xwzy" }, |
| { "z", "wy", "zxy", "yzwx" }, |
| { "w", "xw", "yxw", "zyxw" } |
| }; |
| |
| template <int N> |
| static tcu::Vector<float, N> mean (const vector<tcu::Vector<float, N> >& data) |
| { |
| tcu::Vector<float, N> sum(0.0f); |
| for (int i = 0; i < (int)data.size(); i++) |
| sum += data[i]; |
| return sum / tcu::Vector<float, N>((float)data.size()); |
| } |
| |
| static void uniformNfv (const glw::Functions& gl, int n, int location, int count, const float* data) |
| { |
| switch (n) |
| { |
| case 1: gl.uniform1fv(location, count, data); break; |
| case 2: gl.uniform2fv(location, count, data); break; |
| case 3: gl.uniform3fv(location, count, data); break; |
| case 4: gl.uniform4fv(location, count, data); break; |
| default: DE_ASSERT(false); |
| } |
| } |
| |
| static void uniformNiv (const glw::Functions& gl, int n, int location, int count, const int* data) |
| { |
| switch (n) |
| { |
| case 1: gl.uniform1iv(location, count, data); break; |
| case 2: gl.uniform2iv(location, count, data); break; |
| case 3: gl.uniform3iv(location, count, data); break; |
| case 4: gl.uniform4iv(location, count, data); break; |
| default: DE_ASSERT(false); |
| } |
| } |
| |
| static void uniformMatrixNfv (const glw::Functions& gl, int n, int location, int count, const float* data) |
| { |
| switch (n) |
| { |
| case 2: gl.uniformMatrix2fv(location, count, GL_FALSE, &data[0]); break; |
| case 3: gl.uniformMatrix3fv(location, count, GL_FALSE, &data[0]); break; |
| case 4: gl.uniformMatrix4fv(location, count, GL_FALSE, &data[0]); break; |
| default: DE_ASSERT(false); |
| } |
| } |
| |
| static glu::DataType getDataTypeFloatOrVec (int size) |
| { |
| return size == 1 ? glu::TYPE_FLOAT : glu::getDataTypeFloatVec(size); |
| } |
| |
| static int getIterationCountOrDefault (const tcu::CommandLine& cmdLine, int def) |
| { |
| const int cmdLineVal = cmdLine.getTestIterationCount(); |
| return cmdLineVal > 0 ? cmdLineVal : def; |
| } |
| |
| static string lineParamsString (const LineParameters& params) |
| { |
| return "y = " + de::toString(params.offset) + " + " + de::toString(params.coefficient) + "*x"; |
| } |
| |
| namespace |
| { |
| |
| /*--------------------------------------------------------------------*//*! |
| * \brief Abstract class for measuring shader operator performance. |
| * |
| * This class draws multiple times with different workload sizes (set |
| * via a uniform, by subclass). Time for each frame is measured, and the |
| * slope of the workload size vs frame time data is estimated. This slope |
| * tells us the estimated increase in frame time caused by a workload |
| * increase of 1 unit (what 1 workload unit means is up to subclass). |
| * |
| * Generally, the shaders contain not just the operation we're interested |
| * in (e.g. addition) but also some other stuff (e.g. loop overhead). To |
| * eliminate this cost, we actually do the stuff described in the above |
| * paragraph with multiple programs (usually two), which contain different |
| * kinds of workload (e.g. different loop contents). Then we can (in |
| * theory) compute the cost of just one operation in a subclass-dependent |
| * manner. |
| * |
| * At this point, the result tells us the increase in frame time caused |
| * by the addition of one operation. Dividing this by the amount of |
| * draw calls in a frame, and further by the amount of vertices or |
| * fragments in a draw call, we get the time cost of one operation. |
| * |
| * In reality, there sometimes isn't just a trivial linear dependence |
| * between workload size and frame time. Instead, there tends to be some |
| * amount of initial "free" operations. That is, it may be that all |
| * workload sizes below some positive integer C yield the same frame time, |
| * and only workload sizes beyond C increase the frame time in a supposedly |
| * linear manner. Graphically, this means that there graph consists of two |
| * parts: a horizontal left part, and a linearly increasing right part; the |
| * right part starts where the left parts ends. The principal task of these |
| * tests is to look at the slope of the increasing right part. Additionally |
| * an estimate for the amount of initial free operations is calculated. |
| * Note that it is also normal to get graphs where the horizontal left part |
| * is of zero width, i.e. there are no free operations. |
| *//*--------------------------------------------------------------------*/ |
| class OperatorPerformanceCase : public tcu::TestCase |
| { |
| public: |
| enum CaseType |
| { |
| CASETYPE_VERTEX = 0, |
| CASETYPE_FRAGMENT, |
| |
| CASETYPE_LAST |
| }; |
| |
| struct InitialCalibration |
| { |
| int initialNumCalls; |
| InitialCalibration (void) : initialNumCalls(1) {} |
| }; |
| |
| typedef SharedPtr<InitialCalibration> InitialCalibrationStorage; |
| |
| OperatorPerformanceCase (tcu::TestContext& testCtx, glu::RenderContext& renderCtx, const char* name, const char* description, |
| CaseType caseType, int numWorkloads, const InitialCalibrationStorage& initialCalibrationStorage); |
| ~OperatorPerformanceCase (void); |
| |
| void init (void); |
| void deinit (void); |
| |
| IterateResult iterate (void); |
| |
| struct AttribSpec |
| { |
| AttribSpec (const char* name_, const tcu::Vec4& p00_, const tcu::Vec4& p01_, const tcu::Vec4& p10_, const tcu::Vec4& p11_) |
| : name (name_) |
| , p00 (p00_) |
| , p01 (p01_) |
| , p10 (p10_) |
| , p11 (p11_) |
| { |
| } |
| |
| AttribSpec (void) {} |
| |
| std::string name; |
| tcu::Vec4 p00; //!< Bottom left. |
| tcu::Vec4 p01; //!< Bottom right. |
| tcu::Vec4 p10; //!< Top left. |
| tcu::Vec4 p11; //!< Top right. |
| }; |
| |
| protected: |
| struct ProgramContext |
| { |
| string vertShaderSource; |
| string fragShaderSource; |
| vector<AttribSpec> attributes; |
| |
| string description; |
| |
| ProgramContext (void) {} |
| ProgramContext (const string& vs, const string& fs, const vector<AttribSpec>& attrs, const string& desc) |
| : vertShaderSource(vs), fragShaderSource(fs), attributes(attrs), description(desc) {} |
| }; |
| |
| virtual vector<ProgramContext> generateProgramData (void) const = 0; |
| //! Sets program-specific uniforms that don't depend on the workload size. |
| virtual void setGeneralUniforms (deUint32 program) const = 0; |
| //! Sets the uniform(s) that specifies the workload size in the shader. |
| virtual void setWorkloadSizeUniform (deUint32 program, int workload) const = 0; |
| //! Computes the cost of a single operation, given the workload costs per program. |
| virtual float computeSingleOperationTime (const vector<float>& perProgramWorkloadCosts) const = 0; |
| //! Logs a human-readable description of what computeSingleOperationTime does. |
| virtual void logSingleOperationCalculationInfo (void) const = 0; |
| |
| glu::RenderContext& m_renderCtx; |
| |
| CaseType m_caseType; |
| |
| private: |
| enum State |
| { |
| STATE_CALIBRATING = 0, //!< Calibrate draw call count, using first program in m_programs, with workload size 1. |
| STATE_FIND_HIGH_WORKLOAD, //!< Find an appropriate lower bound for the highest workload size we intend to use (one with high-enough frame time compared to workload size 1) for each program. |
| STATE_MEASURING, //!< Do actual measurements, for each program in m_programs. |
| STATE_REPORTING, //!< Measurements are done; calculate results and log. |
| STATE_FINISHED, //!< All done. |
| |
| STATE_LAST |
| }; |
| |
| struct WorkloadRecord |
| { |
| int workloadSize; |
| vector<float> frameTimes; //!< In microseconds. |
| |
| WorkloadRecord (int workloadSize_) : workloadSize(workloadSize_) {} |
| bool operator< (const WorkloadRecord& other) const { return this->workloadSize < other.workloadSize; } |
| void addFrameTime (float time) { frameTimes.push_back(time); } |
| float getMedianTime (void) const |
| { |
| vector<float> times = frameTimes; |
| std::sort(times.begin(), times.end()); |
| return times.size() % 2 == 0 ? |
| (times[times.size()/2-1] + times[times.size()/2])*0.5f : |
| times[times.size()/2]; |
| } |
| }; |
| |
| void prepareProgram (int progNdx); //!< Sets attributes and uniforms for m_programs[progNdx]. |
| void prepareWorkload (int progNdx, int workload); //!< Calls setWorkloadSizeUniform and draws, in case the implementation does some draw-time compilation. |
| void prepareNextRound (void); //!< Increases workload and/or updates m_state. |
| void render (int numDrawCalls); |
| deUint64 renderAndMeasure (int numDrawCalls); |
| void adjustAndLogGridAndViewport (void); //!< Log grid and viewport sizes, after possibly reducing them to reduce draw time. |
| |
| vector<Vec2> getWorkloadMedianDataPoints (int progNdx) const; //!< [ Vec2(r.workloadSize, r.getMedianTime()) for r in m_workloadRecords[progNdx] ] |
| |
| const int m_numMeasurementsPerWorkload; |
| const int m_numWorkloads; //!< How many different workload sizes are used for measurement for each program. |
| |
| int m_workloadNdx; //!< Runs from 0 to m_numWorkloads-1. |
| |
| int m_workloadMeasurementNdx; |
| vector<vector<WorkloadRecord> > m_workloadRecordsFindHigh; //!< The measurements done during STATE_FIND_HIGH_WORKLOAD. |
| vector<vector<WorkloadRecord> > m_workloadRecords; //!< The measurements of each program in m_programs. Generated during STATE_MEASURING, into index specified by m_measureProgramNdx. |
| |
| State m_state; |
| int m_measureProgramNdx; //!< When m_state is STATE_FIND_HIGH_WORKLOAD or STATE_MEASURING, this tells which program in m_programs is being measured. |
| |
| vector<int> m_highWorkloadSizes; //!< The first workload size encountered during STATE_FIND_HIGH_WORKLOAD that was determined suitable, for each program. |
| |
| TheilSenCalibrator m_calibrator; |
| InitialCalibrationStorage m_initialCalibrationStorage; |
| |
| int m_viewportWidth; |
| int m_viewportHeight; |
| int m_gridSizeX; |
| int m_gridSizeY; |
| |
| vector<ProgramContext> m_programData; |
| vector<SharedPtr<ShaderProgram> > m_programs; |
| |
| std::vector<deUint32> m_attribBuffers; |
| }; |
| |
| static inline float triangleInterpolate (float v0, float v1, float v2, float x, float y) |
| { |
| return v0 + (v2-v0)*x + (v1-v0)*y; |
| } |
| |
| static inline float triQuadInterpolate (float x, float y, const tcu::Vec4& quad) |
| { |
| // \note Top left fill rule. |
| if (x + y < 1.0f) |
| return triangleInterpolate(quad.x(), quad.y(), quad.z(), x, y); |
| else |
| return triangleInterpolate(quad.w(), quad.z(), quad.y(), 1.0f-x, 1.0f-y); |
| } |
| |
| static inline int getNumVertices (int gridSizeX, int gridSizeY) |
| { |
| return gridSizeX * gridSizeY * 2 * 3; |
| } |
| |
| static void generateVertices (std::vector<float>& dst, int gridSizeX, int gridSizeY, const OperatorPerformanceCase::AttribSpec& spec) |
| { |
| const int numComponents = 4; |
| |
| DE_ASSERT(gridSizeX >= 1 && gridSizeY >= 1); |
| dst.resize(getNumVertices(gridSizeX, gridSizeY) * numComponents); |
| |
| { |
| int dstNdx = 0; |
| |
| for (int baseY = 0; baseY < gridSizeY; baseY++) |
| for (int baseX = 0; baseX < gridSizeX; baseX++) |
| { |
| const float xf0 = (float)(baseX + 0) / (float)gridSizeX; |
| const float yf0 = (float)(baseY + 0) / (float)gridSizeY; |
| const float xf1 = (float)(baseX + 1) / (float)gridSizeX; |
| const float yf1 = (float)(baseY + 1) / (float)gridSizeY; |
| |
| #define ADD_VERTEX(XF, YF) \ |
| for (int compNdx = 0; compNdx < numComponents; compNdx++) \ |
| dst[dstNdx++] = triQuadInterpolate((XF), (YF), tcu::Vec4(spec.p00[compNdx], spec.p01[compNdx], spec.p10[compNdx], spec.p11[compNdx])) |
| |
| ADD_VERTEX(xf0, yf0); |
| ADD_VERTEX(xf1, yf0); |
| ADD_VERTEX(xf0, yf1); |
| |
| ADD_VERTEX(xf1, yf0); |
| ADD_VERTEX(xf1, yf1); |
| ADD_VERTEX(xf0, yf1); |
| |
| #undef ADD_VERTEX |
| } |
| } |
| } |
| |
| static float intersectionX (const gls::LineParameters& a, const gls::LineParameters& b) |
| { |
| return (a.offset - b.offset) / (b.coefficient - a.coefficient); |
| } |
| |
| static int numDistinctX (const vector<Vec2>& data) |
| { |
| std::set<float> xs; |
| for (int i = 0; i < (int)data.size(); i++) |
| xs.insert(data[i].x()); |
| return (int)xs.size(); |
| } |
| |
| static gls::LineParameters simpleLinearRegression (const vector<Vec2>& data) |
| { |
| const Vec2 mid = mean(data); |
| |
| float slopeNumerator = 0.0f; |
| float slopeDenominator = 0.0f; |
| |
| for (int i = 0; i < (int)data.size(); i++) |
| { |
| const Vec2 diff = data[i] - mid; |
| |
| slopeNumerator += diff.x()*diff.y(); |
| slopeDenominator += diff.x()*diff.x(); |
| } |
| |
| const float slope = slopeNumerator / slopeDenominator; |
| const float offset = mid.y() - slope*mid.x(); |
| |
| return gls::LineParameters(offset, slope); |
| } |
| |
| static float simpleLinearRegressionError (const vector<Vec2>& data) |
| { |
| if (numDistinctX(data) <= 2) |
| return 0.0f; |
| else |
| { |
| const gls::LineParameters estimator = simpleLinearRegression(data); |
| float error = 0.0f; |
| |
| for (int i = 0; i < (int)data.size(); i++) |
| { |
| const float estY = estimator.offset + estimator.coefficient*data[i].x(); |
| const float diff = estY - data[i].y(); |
| error += diff*diff; |
| } |
| |
| return error / (float)data.size(); |
| } |
| } |
| |
| static float verticalVariance (const vector<Vec2>& data) |
| { |
| if (numDistinctX(data) <= 2) |
| return 0.0f; |
| else |
| { |
| const float meanY = mean(data).y(); |
| float error = 0.0f; |
| |
| for (int i = 0; i < (int)data.size(); i++) |
| { |
| const float diff = meanY - data[i].y(); |
| error += diff*diff; |
| } |
| |
| return error / (float)data.size(); |
| } |
| } |
| |
| /*--------------------------------------------------------------------*//*! |
| * \brief Find the x coord that divides the input data into two slopes. |
| * |
| * The operator performance measurements tend to produce results where |
| * we get small operation counts "for free" (e.g. because the operations |
| * are performed during some memory transfer overhead or something), |
| * resulting in a curve with two parts: an initial horizontal line segment, |
| * and a rising line. |
| * |
| * This function finds the x coordinate that divides the input data into |
| * two parts such that the sum of the mean square errors for the |
| * least-squares estimated lines for the two parts is minimized, under the |
| * additional condition that the left line is horizontal. |
| * |
| * This function returns a number X s.t. { pt | pt is in data, pt.x >= X } |
| * is the right line, and the rest of data is the left line. |
| *//*--------------------------------------------------------------------*/ |
| static float findSlopePivotX (const vector<Vec2>& data) |
| { |
| std::set<float> xCoords; |
| for (int i = 0; i < (int)data.size(); i++) |
| xCoords.insert(data[i].x()); |
| |
| float lowestError = std::numeric_limits<float>::infinity(); |
| float bestPivotX = -std::numeric_limits<float>::infinity(); |
| |
| for (std::set<float>::const_iterator pivotX = xCoords.begin(); pivotX != xCoords.end(); ++pivotX) |
| { |
| vector<Vec2> leftData; |
| vector<Vec2> rightData; |
| for (int i = 0; i < (int)data.size(); i++) |
| { |
| if (data[i].x() < *pivotX) |
| leftData.push_back(data[i]); |
| else |
| rightData.push_back(data[i]); |
| } |
| |
| if (numDistinctX(rightData) < 3) // We don't trust the right data if there's too little of it. |
| break; |
| |
| { |
| const float totalError = verticalVariance(leftData) + simpleLinearRegressionError(rightData); |
| |
| if (totalError < lowestError) |
| { |
| lowestError = totalError; |
| bestPivotX = *pivotX; |
| } |
| } |
| } |
| |
| DE_ASSERT(lowestError < std::numeric_limits<float>::infinity()); |
| |
| return bestPivotX; |
| } |
| |
| struct SegmentedEstimator |
| { |
| float pivotX; //!< Value returned by findSlopePivotX, or -infinity if only single line. |
| gls::LineParameters left; |
| gls::LineParameters right; |
| SegmentedEstimator (const gls::LineParameters& l, const gls::LineParameters& r, float pivotX_) : pivotX(pivotX_), left(l), right(r) {} |
| }; |
| |
| /*--------------------------------------------------------------------*//*! |
| * \brief Compute line estimators for (potentially) two-segment data. |
| * |
| * Splits the given data into left and right parts (using findSlopePivotX) |
| * and returns the line estimates for them. |
| * |
| * Sometimes, however (especially in fragment shader cases) the data is |
| * in fact not segmented, but a straight line. This function attempts to |
| * detect if this the case, and if so, sets left.offset = right.offset and |
| * left.slope = 0, meaning essentially that the initial "flat" part of the |
| * data has zero width. |
| *//*--------------------------------------------------------------------*/ |
| static SegmentedEstimator computeSegmentedEstimator (const vector<Vec2>& data) |
| { |
| const float pivotX = findSlopePivotX(data); |
| vector<Vec2> leftData; |
| vector<Vec2> rightData; |
| |
| for (int i = 0; i < (int)data.size(); i++) |
| { |
| if (data[i].x() < pivotX) |
| leftData.push_back(data[i]); |
| else |
| rightData.push_back(data[i]); |
| } |
| |
| { |
| const gls::LineParameters leftLine = gls::theilSenLinearRegression(leftData); |
| const gls::LineParameters rightLine = gls::theilSenLinearRegression(rightData); |
| |
| if (numDistinctX(leftData) < 2 || leftLine.coefficient > rightLine.coefficient*0.5f) |
| { |
| // Left data doesn't seem credible; assume the data is just a single line. |
| const gls::LineParameters entireLine = gls::theilSenLinearRegression(data); |
| return SegmentedEstimator(gls::LineParameters(entireLine.offset, 0.0f), entireLine, -std::numeric_limits<float>::infinity()); |
| } |
| else |
| return SegmentedEstimator(leftLine, rightLine, pivotX); |
| } |
| } |
| |
| OperatorPerformanceCase::OperatorPerformanceCase (tcu::TestContext& testCtx, glu::RenderContext& renderCtx, const char* name, const char* description, |
| CaseType caseType, int numWorkloads, const InitialCalibrationStorage& initialCalibrationStorage) |
| : tcu::TestCase (testCtx, tcu::NODETYPE_PERFORMANCE, name, description) |
| , m_renderCtx (renderCtx) |
| , m_caseType (caseType) |
| , m_numMeasurementsPerWorkload (getIterationCountOrDefault(m_testCtx.getCommandLine(), DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD)) |
| , m_numWorkloads (numWorkloads) |
| , m_workloadNdx (-1) |
| , m_workloadMeasurementNdx (-1) |
| , m_state (STATE_LAST) |
| , m_measureProgramNdx (-1) |
| , m_initialCalibrationStorage (initialCalibrationStorage) |
| , m_viewportWidth (caseType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getWidth()) |
| , m_viewportHeight (caseType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getHeight()) |
| , m_gridSizeX (caseType == CASETYPE_FRAGMENT ? 1 : 100) |
| , m_gridSizeY (caseType == CASETYPE_FRAGMENT ? 1 : 100) |
| { |
| DE_ASSERT(m_numWorkloads > 0); |
| } |
| |
| OperatorPerformanceCase::~OperatorPerformanceCase (void) |
| { |
| if (!m_attribBuffers.empty()) |
| { |
| m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]); |
| m_attribBuffers.clear(); |
| } |
| } |
| |
| static void logRenderTargetInfo (TestLog& log, const tcu::RenderTarget& renderTarget) |
| { |
| log << TestLog::Section("RenderTarget", "Render target") |
| << TestLog::Message << "size: " << renderTarget.getWidth() << "x" << renderTarget.getHeight() << TestLog::EndMessage |
| << TestLog::Message << "bits:" |
| << " R" << renderTarget.getPixelFormat().redBits |
| << " G" << renderTarget.getPixelFormat().greenBits |
| << " B" << renderTarget.getPixelFormat().blueBits |
| << " A" << renderTarget.getPixelFormat().alphaBits |
| << " D" << renderTarget.getDepthBits() |
| << " S" << renderTarget.getStencilBits() |
| << TestLog::EndMessage; |
| |
| if (renderTarget.getNumSamples() != 0) |
| log << TestLog::Message << renderTarget.getNumSamples() << "x MSAA" << TestLog::EndMessage; |
| else |
| log << TestLog::Message << "No MSAA" << TestLog::EndMessage; |
| |
| log << TestLog::EndSection; |
| } |
| |
| vector<Vec2> OperatorPerformanceCase::getWorkloadMedianDataPoints (int progNdx) const |
| { |
| const vector<WorkloadRecord>& records = m_workloadRecords[progNdx]; |
| vector<Vec2> result; |
| |
| for (int i = 0; i < (int)records.size(); i++) |
| result.push_back(Vec2((float)records[i].workloadSize, records[i].getMedianTime())); |
| |
| return result; |
| } |
| |
| void OperatorPerformanceCase::prepareProgram (int progNdx) |
| { |
| DE_ASSERT(progNdx < (int)m_programs.size()); |
| DE_ASSERT(m_programData.size() == m_programs.size()); |
| |
| const glw::Functions& gl = m_renderCtx.getFunctions(); |
| const ShaderProgram& program = *m_programs[progNdx]; |
| |
| vector<AttribSpec> attributes = m_programData[progNdx].attributes; |
| |
| attributes.push_back(AttribSpec("a_position", |
| Vec4(-1.0f, -1.0f, 0.0f, 1.0f), |
| Vec4( 1.0f, -1.0f, 0.0f, 1.0f), |
| Vec4(-1.0f, 1.0f, 0.0f, 1.0f), |
| Vec4( 1.0f, 1.0f, 0.0f, 1.0f))); |
| |
| DE_ASSERT(program.isOk()); |
| |
| // Generate vertices. |
| if (!m_attribBuffers.empty()) |
| gl.deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]); |
| m_attribBuffers.resize(attributes.size(), 0); |
| gl.genBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]); |
| GLU_EXPECT_NO_ERROR(gl.getError(), "glGenBuffers()"); |
| |
| for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++) |
| { |
| std::vector<float> vertices; |
| generateVertices(vertices, m_gridSizeX, m_gridSizeY, attributes[attribNdx]); |
| |
| gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]); |
| gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertices.size()*sizeof(float)), &vertices[0], GL_STATIC_DRAW); |
| GLU_EXPECT_NO_ERROR(gl.getError(), "Upload buffer data"); |
| } |
| |
| // Setup attribute bindings. |
| for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++) |
| { |
| int location = gl.getAttribLocation(program.getProgram(), attributes[attribNdx].name.c_str()); |
| |
| if (location >= 0) |
| { |
| gl.enableVertexAttribArray(location); |
| gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]); |
| gl.vertexAttribPointer(location, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL); |
| } |
| } |
| GLU_EXPECT_NO_ERROR(gl.getError(), "Setup vertex input state"); |
| |
| gl.useProgram(program.getProgram()); |
| setGeneralUniforms(program.getProgram()); |
| gl.viewport(0, 0, m_viewportWidth, m_viewportHeight); |
| } |
| |
| void OperatorPerformanceCase::prepareWorkload (int progNdx, int workload) |
| { |
| setWorkloadSizeUniform(m_programs[progNdx]->getProgram(), workload); |
| render(m_calibrator.getCallCount()); |
| } |
| |
| void OperatorPerformanceCase::prepareNextRound (void) |
| { |
| DE_ASSERT(m_state == STATE_CALIBRATING || |
| m_state == STATE_FIND_HIGH_WORKLOAD || |
| m_state == STATE_MEASURING); |
| |
| TestLog& log = m_testCtx.getLog(); |
| |
| if (m_state == STATE_CALIBRATING && m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED) |
| { |
| m_measureProgramNdx = 0; |
| m_state = STATE_FIND_HIGH_WORKLOAD; |
| } |
| |
| if (m_state == STATE_CALIBRATING) |
| prepareWorkload(0, 1); |
| else if (m_state == STATE_FIND_HIGH_WORKLOAD) |
| { |
| vector<WorkloadRecord>& records = m_workloadRecordsFindHigh[m_measureProgramNdx]; |
| |
| if (records.empty() || records.back().getMedianTime() < 2.0f*records[0].getMedianTime()) |
| { |
| int workloadSize; |
| |
| if (records.empty()) |
| workloadSize = 1; |
| else |
| { |
| workloadSize = records.back().workloadSize*2; |
| |
| if (workloadSize > MAX_WORKLOAD_SIZE) |
| { |
| log << TestLog::Message << "Even workload size " << records.back().workloadSize |
| << " doesn't give high enough frame time for program " << m_measureProgramNdx |
| << ". Can't get sensible result." << TestLog::EndMessage; |
| MEASUREMENT_FAIL(); |
| } |
| } |
| |
| records.push_back(WorkloadRecord(workloadSize)); |
| prepareWorkload(0, workloadSize); |
| m_workloadMeasurementNdx = 0; |
| } |
| else |
| { |
| m_highWorkloadSizes[m_measureProgramNdx] = records.back().workloadSize; |
| m_measureProgramNdx++; |
| |
| if (m_measureProgramNdx >= (int)m_programs.size()) |
| { |
| m_state = STATE_MEASURING; |
| m_workloadNdx = -1; |
| m_measureProgramNdx = 0; |
| } |
| |
| prepareProgram(m_measureProgramNdx); |
| prepareNextRound(); |
| } |
| } |
| else |
| { |
| m_workloadNdx++; |
| |
| if (m_workloadNdx < m_numWorkloads) |
| { |
| DE_ASSERT(m_numWorkloads > 1); |
| const int highWorkload = m_highWorkloadSizes[m_measureProgramNdx]; |
| const int workload = highWorkload > m_numWorkloads ? |
| 1 + m_workloadNdx*(highWorkload-1)/(m_numWorkloads-1) : |
| 1 + m_workloadNdx; |
| |
| prepareWorkload(m_measureProgramNdx, workload); |
| |
| m_workloadMeasurementNdx = 0; |
| |
| m_workloadRecords[m_measureProgramNdx].push_back(WorkloadRecord(workload)); |
| } |
| else |
| { |
| m_measureProgramNdx++; |
| |
| if (m_measureProgramNdx < (int)m_programs.size()) |
| { |
| m_workloadNdx = -1; |
| m_workloadMeasurementNdx = 0; |
| prepareProgram(m_measureProgramNdx); |
| prepareNextRound(); |
| } |
| else |
| m_state = STATE_REPORTING; |
| } |
| } |
| } |
| |
| void OperatorPerformanceCase::init (void) |
| { |
| TestLog& log = m_testCtx.getLog(); |
| const glw::Functions& gl = m_renderCtx.getFunctions(); |
| |
| // Validate that we have sane grid and viewport setup. |
| DE_ASSERT(de::inBounds(m_gridSizeX, 1, 256) && de::inBounds(m_gridSizeY, 1, 256)); |
| TCU_CHECK(de::inRange(m_viewportWidth, 1, m_renderCtx.getRenderTarget().getWidth()) && |
| de::inRange(m_viewportHeight, 1, m_renderCtx.getRenderTarget().getHeight())); |
| |
| logRenderTargetInfo(log, m_renderCtx.getRenderTarget()); |
| |
| log << TestLog::Message << "Using additive blending." << TestLog::EndMessage; |
| gl.enable(GL_BLEND); |
| gl.blendEquation(GL_FUNC_ADD); |
| gl.blendFunc(GL_ONE, GL_ONE); |
| |
| // Generate programs. |
| DE_ASSERT(m_programs.empty()); |
| m_programData = generateProgramData(); |
| DE_ASSERT(!m_programData.empty()); |
| |
| for (int progNdx = 0; progNdx < (int)m_programData.size(); progNdx++) |
| { |
| const string& vert = m_programData[progNdx].vertShaderSource; |
| const string& frag = m_programData[progNdx].fragShaderSource; |
| |
| m_programs.push_back(SharedPtr<ShaderProgram>(new ShaderProgram(m_renderCtx, glu::makeVtxFragSources(vert, frag)))); |
| |
| if (!m_programs.back()->isOk()) |
| { |
| log << *m_programs.back(); |
| TCU_FAIL("Compile failed"); |
| } |
| } |
| |
| // Log all programs. |
| for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++) |
| log << TestLog::Section("Program" + de::toString(progNdx), "Program " + de::toString(progNdx)) |
| << TestLog::Message << m_programData[progNdx].description << TestLog::EndMessage |
| << *m_programs[progNdx] |
| << TestLog::EndSection; |
| |
| m_highWorkloadSizes.resize(m_programData.size()); |
| m_workloadRecordsFindHigh.resize(m_programData.size()); |
| m_workloadRecords.resize(m_programData.size()); |
| |
| m_calibrator.clear(CalibratorParameters(m_initialCalibrationStorage->initialNumCalls, 10 /* calibrate iteration frames */, 2000.0f /* calibrate iteration shortcut threshold (ms) */, 16 /* max calibrate iterations */, |
| 1000.0f/30.0f /* frame time (ms) */, 1000.0f/60.0f /* frame time cap (ms) */, 1000.0f /* target measure duration (ms) */)); |
| m_state = STATE_CALIBRATING; |
| |
| prepareProgram(0); |
| prepareNextRound(); |
| } |
| |
| void OperatorPerformanceCase::deinit (void) |
| { |
| if (!m_attribBuffers.empty()) |
| { |
| m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]); |
| m_attribBuffers.clear(); |
| } |
| |
| m_programs.clear(); |
| } |
| |
| void OperatorPerformanceCase::render (int numDrawCalls) |
| { |
| const glw::Functions& gl = m_renderCtx.getFunctions(); |
| const int numVertices = getNumVertices(m_gridSizeX, m_gridSizeY); |
| |
| for (int callNdx = 0; callNdx < numDrawCalls; callNdx++) |
| gl.drawArrays(GL_TRIANGLES, 0, numVertices); |
| |
| glu::readPixels(m_renderCtx, 0, 0, tcu::Surface(1, 1).getAccess()); // \note Serves as a more reliable replacement for glFinish(). |
| } |
| |
| deUint64 OperatorPerformanceCase::renderAndMeasure (int numDrawCalls) |
| { |
| const deUint64 startTime = deGetMicroseconds(); |
| render(numDrawCalls); |
| return deGetMicroseconds() - startTime; |
| } |
| |
| void OperatorPerformanceCase::adjustAndLogGridAndViewport (void) |
| { |
| TestLog& log = m_testCtx.getLog(); |
| |
| // If call count is just 1, and the target frame time still wasn't reached, reduce grid or viewport size. |
| if (m_calibrator.getCallCount() == 1) |
| { |
| const gls::MeasureState& calibratorMeasure = m_calibrator.getMeasureState(); |
| const float drawCallTime = (float)calibratorMeasure.getTotalTime() / (float)calibratorMeasure.frameTimes.size(); |
| const float targetDrawCallTime = m_calibrator.getParameters().targetFrameTimeUs; |
| const float targetRatio = targetDrawCallTime / drawCallTime; |
| |
| if (targetRatio < 0.95f) |
| { |
| // Reduce grid or viewport size assuming draw call time scales proportionally. |
| if (m_caseType == CASETYPE_VERTEX) |
| { |
| const float targetRatioSqrt = deFloatSqrt(targetRatio); |
| m_gridSizeX = (int)(targetRatioSqrt * (float)m_gridSizeX); |
| m_gridSizeY = (int)(targetRatioSqrt * (float)m_gridSizeY); |
| TCU_CHECK_MSG(m_gridSizeX >= 1 && m_gridSizeY >= 1, "Can't decrease grid size enough to achieve low-enough draw times"); |
| log << TestLog::Message << "Note: triangle grid size reduced from original; it's now smaller than during calibration." << TestLog::EndMessage; |
| } |
| else |
| { |
| const float targetRatioSqrt = deFloatSqrt(targetRatio); |
| m_viewportWidth = (int)(targetRatioSqrt * (float)m_viewportWidth); |
| m_viewportHeight = (int)(targetRatioSqrt * (float)m_viewportHeight); |
| TCU_CHECK_MSG(m_viewportWidth >= 1 && m_viewportHeight >= 1, "Can't decrease viewport size enough to achieve low-enough draw times"); |
| log << TestLog::Message << "Note: viewport size reduced from original; it's now smaller than during calibration." << TestLog::EndMessage; |
| } |
| } |
| } |
| |
| prepareProgram(0); |
| |
| // Log grid and viewport sizes. |
| log << TestLog::Message << "Grid size: " << m_gridSizeX << "x" << m_gridSizeY << TestLog::EndMessage; |
| log << TestLog::Message << "Viewport: " << m_viewportWidth << "x" << m_viewportHeight << TestLog::EndMessage; |
| } |
| |
| OperatorPerformanceCase::IterateResult OperatorPerformanceCase::iterate (void) |
| { |
| const TheilSenCalibrator::State calibratorState = m_calibrator.getState(); |
| |
| if (calibratorState != TheilSenCalibrator::STATE_FINISHED) |
| { |
| if (calibratorState == TheilSenCalibrator::STATE_RECOMPUTE_PARAMS) |
| m_calibrator.recomputeParameters(); |
| else if (calibratorState == TheilSenCalibrator::STATE_MEASURE) |
| m_calibrator.recordIteration(renderAndMeasure(m_calibrator.getCallCount())); |
| else |
| DE_ASSERT(false); |
| |
| if (m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED) |
| { |
| logCalibrationInfo(m_testCtx.getLog(), m_calibrator); |
| adjustAndLogGridAndViewport(); |
| prepareNextRound(); |
| m_initialCalibrationStorage->initialNumCalls = m_calibrator.getCallCount(); |
| } |
| } |
| else if (m_state == STATE_FIND_HIGH_WORKLOAD || m_state == STATE_MEASURING) |
| { |
| if (m_workloadMeasurementNdx < m_numMeasurementsPerWorkload) |
| { |
| vector<WorkloadRecord>& records = m_state == STATE_FIND_HIGH_WORKLOAD ? m_workloadRecordsFindHigh[m_measureProgramNdx] : m_workloadRecords[m_measureProgramNdx]; |
| records.back().addFrameTime((float)renderAndMeasure(m_calibrator.getCallCount())); |
| m_workloadMeasurementNdx++; |
| } |
| else |
| prepareNextRound(); |
| } |
| else |
| { |
| DE_ASSERT(m_state == STATE_REPORTING); |
| |
| TestLog& log = m_testCtx.getLog(); |
| const int drawCallCount = m_calibrator.getCallCount(); |
| |
| { |
| // Compute per-program estimators for measurements. |
| vector<SegmentedEstimator> estimators; |
| for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++) |
| estimators.push_back(computeSegmentedEstimator(getWorkloadMedianDataPoints(progNdx))); |
| |
| // Log measurements and their estimators for all programs. |
| for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++) |
| { |
| const SegmentedEstimator& estimator = estimators[progNdx]; |
| const string progNdxStr = de::toString(progNdx); |
| vector<WorkloadRecord> records = m_workloadRecords[progNdx]; |
| std::sort(records.begin(), records.end()); |
| |
| { |
| const tcu::ScopedLogSection section(log, |
| "Program" + progNdxStr + "Measurements", |
| "Measurements for program " + progNdxStr); |
| |
| // Sample list of individual frame times. |
| |
| log << TestLog::SampleList("Program" + progNdxStr + "IndividualFrameTimes", "Individual frame times") |
| << TestLog::SampleInfo << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) |
| << TestLog::ValueInfo("FrameTime", "Frame time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) |
| << TestLog::EndSampleInfo; |
| |
| for (int i = 0; i < (int)records.size(); i++) |
| for (int j = 0; j < (int)records[i].frameTimes.size(); j++) |
| log << TestLog::Sample << records[i].workloadSize << records[i].frameTimes[j] << TestLog::EndSample; |
| |
| log << TestLog::EndSampleList; |
| |
| // Sample list of median frame times. |
| |
| log << TestLog::SampleList("Program" + progNdxStr + "MedianFrameTimes", "Median frame times") |
| << TestLog::SampleInfo << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) |
| << TestLog::ValueInfo("MedianFrameTime", "Median frame time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) |
| << TestLog::EndSampleInfo; |
| |
| for (int i = 0; i < (int)records.size(); i++) |
| log << TestLog::Sample << records[i].workloadSize << records[i].getMedianTime() << TestLog::EndSample; |
| |
| log << TestLog::EndSampleList; |
| |
| log << TestLog::Float("Program" + progNdxStr + "WorkloadCostEstimate", "Workload cost estimate", "us / workload", QP_KEY_TAG_TIME, estimator.right.coefficient); |
| |
| if (estimator.pivotX > -std::numeric_limits<float>::infinity()) |
| log << TestLog::Message << "Note: the data points with x coordinate greater than or equal to " << estimator.pivotX |
| << " seem to form a rising line, and the rest of data points seem to form a near-horizontal line" << TestLog::EndMessage |
| << TestLog::Message << "Note: the left line is estimated to be " << lineParamsString(estimator.left) |
| << " and the right line " << lineParamsString(estimator.right) << TestLog::EndMessage; |
| else |
| log << TestLog::Message << "Note: the data seem to form a single line: " << lineParamsString(estimator.right) << TestLog::EndMessage; |
| } |
| } |
| |
| for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++) |
| { |
| if (estimators[progNdx].right.coefficient <= 0.0f) |
| { |
| log << TestLog::Message << "Slope of measurements for program " << progNdx << " isn't positive. Can't get sensible result." << TestLog::EndMessage; |
| MEASUREMENT_FAIL(); |
| } |
| } |
| |
| // \note For each estimator, .right.coefficient is the increase in draw time (in microseconds) when |
| // incrementing shader workload size by 1, when D draw calls are done, with a vertex/fragment count |
| // of R. |
| // |
| // The measurements of any single program can't tell us the final result (time of single operation), |
| // so we use computeSingleOperationTime to compute it from multiple programs' measurements in a |
| // subclass-defined manner. |
| // |
| // After that, microseconds per operation can be calculated as singleOperationTime / (D * R). |
| |
| { |
| vector<float> perProgramSlopes; |
| for (int i = 0; i < (int)m_programs.size(); i++) |
| perProgramSlopes.push_back(estimators[i].right.coefficient); |
| |
| logSingleOperationCalculationInfo(); |
| |
| const float maxSlope = *std::max_element(perProgramSlopes.begin(), perProgramSlopes.end()); |
| const float usecsPerFramePerOp = computeSingleOperationTime(perProgramSlopes); |
| const int vertexOrFragmentCount = m_caseType == CASETYPE_VERTEX ? |
| getNumVertices(m_gridSizeX, m_gridSizeY) : |
| m_viewportWidth*m_viewportHeight; |
| const double usecsPerDrawCallPerOp = usecsPerFramePerOp / (double)drawCallCount; |
| const double usecsPerSingleOp = usecsPerDrawCallPerOp / (double)vertexOrFragmentCount; |
| const double megaOpsPerSecond = (double)(drawCallCount*vertexOrFragmentCount) / usecsPerFramePerOp; |
| const int numFreeOps = de::max(0, (int)deFloatFloor(intersectionX(estimators[0].left, |
| LineParameters(estimators[0].right.offset, |
| usecsPerFramePerOp)))); |
| |
| log << TestLog::Integer("VertexOrFragmentCount", |
| "R = " + string(m_caseType == CASETYPE_VERTEX ? "Vertex" : "Fragment") + " count", |
| "", QP_KEY_TAG_NONE, vertexOrFragmentCount) |
| |
| << TestLog::Integer("DrawCallsPerFrame", "D = Draw calls per frame", "", QP_KEY_TAG_NONE, drawCallCount) |
| |
| << TestLog::Integer("VerticesOrFragmentsPerFrame", |
| "R*D = " + string(m_caseType == CASETYPE_VERTEX ? "Vertices" : "Fragments") + " per frame", |
| "", QP_KEY_TAG_NONE, vertexOrFragmentCount*drawCallCount) |
| |
| << TestLog::Float("TimePerFramePerOp", |
| "Estimated cost of R*D " + string(m_caseType == CASETYPE_VERTEX ? "vertices" : "fragments") |
| + " (i.e. one frame) with one shader operation", |
| "us", QP_KEY_TAG_TIME, (float)usecsPerFramePerOp) |
| |
| << TestLog::Float("TimePerDrawcallPerOp", |
| "Estimated cost of one draw call with one shader operation", |
| "us", QP_KEY_TAG_TIME, (float)usecsPerDrawCallPerOp) |
| |
| << TestLog::Float("TimePerSingleOp", |
| "Estimated cost of a single shader operation", |
| "us", QP_KEY_TAG_TIME, (float)usecsPerSingleOp); |
| |
| // \note Sometimes, when the operation is free or very cheap, it can happen that the shader with the operation runs, |
| // for some reason, a bit faster than the shader without the operation, and thus we get a negative result. The |
| // following threshold values for accepting a negative or almost-zero result are rather quick and dirty. |
| if (usecsPerFramePerOp <= -0.1f*maxSlope) |
| { |
| log << TestLog::Message << "Got strongly negative result." << TestLog::EndMessage; |
| MEASUREMENT_FAIL(); |
| } |
| else if (usecsPerFramePerOp <= 0.001*maxSlope) |
| { |
| log << TestLog::Message << "Cost of operation seems to be approximately zero." << TestLog::EndMessage; |
| m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); |
| } |
| else |
| { |
| log << TestLog::Float("OpsPerSecond", |
| "Operations per second", |
| "Million/s", QP_KEY_TAG_PERFORMANCE, (float)megaOpsPerSecond) |
| |
| << TestLog::Integer("NumFreeOps", |
| "Estimated number of \"free\" operations", |
| "", QP_KEY_TAG_PERFORMANCE, numFreeOps); |
| |
| m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString((float)megaOpsPerSecond, 2).c_str()); |
| } |
| |
| m_state = STATE_FINISHED; |
| } |
| } |
| |
| return STOP; |
| } |
| |
| return CONTINUE; |
| } |
| |
| // Binary operator case. |
| class BinaryOpCase : public OperatorPerformanceCase |
| { |
| public: |
| BinaryOpCase (Context& context, const char* name, const char* description, const char* op, |
| glu::DataType type, glu::Precision precision, bool useSwizzle, bool isVertex, const InitialCalibrationStorage& initialCalibration); |
| |
| protected: |
| vector<ProgramContext> generateProgramData (void) const; |
| void setGeneralUniforms (deUint32 program) const; |
| void setWorkloadSizeUniform (deUint32 program, int numOperations) const; |
| float computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const; |
| void logSingleOperationCalculationInfo (void) const; |
| |
| private: |
| enum ProgramID |
| { |
| // \note 0-based sequential numbering is relevant, because these are also used as vector indices. |
| // \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow. |
| PROGRAM_WITH_BIGGER_LOOP = 0, |
| PROGRAM_WITH_SMALLER_LOOP, |
| |
| PROGRAM_LAST |
| }; |
| |
| ProgramContext generateSingleProgramData (ProgramID) const; |
| |
| const string m_op; |
| const glu::DataType m_type; |
| const glu::Precision m_precision; |
| const bool m_useSwizzle; |
| }; |
| |
| BinaryOpCase::BinaryOpCase (Context& context, const char* name, const char* description, const char* op, |
| glu::DataType type, glu::Precision precision, bool useSwizzle, bool isVertex, const InitialCalibrationStorage& initialCalibration) |
| : OperatorPerformanceCase (context.getTestContext(), context.getRenderContext(), name, description, |
| isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration) |
| , m_op (op) |
| , m_type (type) |
| , m_precision (precision) |
| , m_useSwizzle (useSwizzle) |
| { |
| } |
| |
| BinaryOpCase::ProgramContext BinaryOpCase::generateSingleProgramData (ProgramID programID) const |
| { |
| DE_ASSERT(glu::isDataTypeFloatOrVec(m_type) || glu::isDataTypeIntOrIVec(m_type)); |
| |
| const bool isVertexCase = m_caseType == CASETYPE_VERTEX; |
| const char* const precision = glu::getPrecisionName(m_precision); |
| const char* const inputPrecision = glu::isDataTypeIntOrIVec(m_type) && m_precision == glu::PRECISION_LOWP ? "mediump" : precision; |
| const char* const typeName = getDataTypeName(m_type); |
| |
| std::ostringstream vtx; |
| std::ostringstream frag; |
| std::ostringstream& op = isVertexCase ? vtx : frag; |
| |
| // Attributes. |
| vtx << "attribute highp vec4 a_position;\n"; |
| for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++) |
| vtx << "attribute " << inputPrecision << " vec4 a_in" << i << ";\n"; |
| |
| if (isVertexCase) |
| { |
| vtx << "varying mediump vec4 v_color;\n"; |
| frag << "varying mediump vec4 v_color;\n"; |
| } |
| else |
| { |
| for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++) |
| { |
| vtx << "varying " << inputPrecision << " vec4 v_in" << i << ";\n"; |
| frag << "varying " << inputPrecision << " vec4 v_in" << i << ";\n"; |
| } |
| } |
| |
| op << "uniform mediump int u_numLoopIterations;\n"; |
| if (isVertexCase) |
| op << "uniform mediump float u_zero;\n"; |
| |
| vtx << "\n"; |
| vtx << "void main()\n"; |
| vtx << "{\n"; |
| |
| if (!isVertexCase) |
| vtx << "\tgl_Position = a_position;\n"; |
| |
| frag << "\n"; |
| frag << "void main()\n"; |
| frag << "{\n"; |
| |
| // Expression inputs. |
| const char* const prefix = isVertexCase ? "a_" : "v_"; |
| for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++) |
| { |
| const int inSize = getDataTypeScalarSize(m_type); |
| const bool isInt = de::inRange<int>(m_type, TYPE_INT, TYPE_INT_VEC4); |
| const bool cast = isInt || (!m_useSwizzle && m_type != TYPE_FLOAT_VEC4); |
| |
| op << "\t" << precision << " " << typeName << " in" << i << " = "; |
| |
| if (cast) |
| op << typeName << "("; |
| |
| op << prefix << "in" << i; |
| |
| if (m_useSwizzle) |
| op << "." << s_swizzles[i % DE_LENGTH_OF_ARRAY(s_swizzles)][inSize-1]; |
| |
| if (cast) |
| op << ")"; |
| |
| op << ";\n"; |
| } |
| |
| // Operation accumulation variables. |
| for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++) |
| { |
| op << "\t" << precision << " " << typeName << " acc" << i << "a" << " = in" << i+0 << ";\n"; |
| op << "\t" << precision << " " << typeName << " acc" << i << "b" << " = in" << i+1 << ";\n"; |
| } |
| |
| // Loop, with expressions in it. |
| op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n"; |
| op << "\t{\n"; |
| { |
| const int unrollAmount = programID == PROGRAM_WITH_SMALLER_LOOP ? BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT : BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT; |
| for (int unrollNdx = 0; unrollNdx < unrollAmount; unrollNdx++) |
| { |
| for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++) |
| { |
| if (i > 0 || unrollNdx > 0) |
| op << "\n"; |
| op << "\t\tacc" << i << "a = acc" << i << "b " << m_op << " acc" << i << "a" << ";\n"; |
| op << "\t\tacc" << i << "b = acc" << i << "a " << m_op << " acc" << i << "b" << ";\n"; |
| } |
| } |
| } |
| op << "\t}\n"; |
| op << "\n"; |
| |
| // Result variable (sum of accumulation variables). |
| op << "\t" << precision << " " << typeName << " res ="; |
| for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++) |
| op << (i > 0 ? " "+m_op : "") << " acc" << i << "b"; |
| op << ";\n"; |
| |
| // Convert to color. |
| op << "\tmediump vec4 color = "; |
| if (m_type == TYPE_FLOAT_VEC4) |
| op << "res"; |
| else |
| { |
| int size = getDataTypeScalarSize(m_type); |
| op << "vec4(res"; |
| |
| for (int i = size; i < 4; i++) |
| op << ", " << (i == 3 ? "1.0" : "0.0"); |
| |
| op << ")"; |
| } |
| op << ";\n"; |
| op << "\t" << (isVertexCase ? "v_color" : "gl_FragColor") << " = color;\n"; |
| |
| if (isVertexCase) |
| { |
| vtx << " gl_Position = a_position + u_zero*color;\n"; |
| frag << " gl_FragColor = v_color;\n"; |
| } |
| else |
| { |
| for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++) |
| vtx << " v_in" << i << " = a_in" << i << ";\n"; |
| } |
| |
| vtx << "}\n"; |
| frag << "}\n"; |
| |
| { |
| vector<AttribSpec> attributes; |
| for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++) |
| attributes.push_back(AttribSpec(("a_in" + de::toString(i)).c_str(), |
| Vec4(2.0f, 2.0f, 2.0f, 1.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4), |
| Vec4(1.0f, 2.0f, 1.0f, 2.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4), |
| Vec4(2.0f, 1.0f, 2.0f, 2.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4), |
| Vec4(1.0f, 1.0f, 2.0f, 1.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4))); |
| |
| { |
| string description = "This is the program with the "; |
| |
| description += programID == PROGRAM_WITH_SMALLER_LOOP ? "smaller" |
| : programID == PROGRAM_WITH_BIGGER_LOOP ? "bigger" |
| : DE_NULL; |
| |
| description += " loop.\n" |
| "Note: workload size for this program means the number of loop iterations."; |
| |
| return ProgramContext(vtx.str(), frag.str(), attributes, description); |
| } |
| } |
| } |
| |
| vector<BinaryOpCase::ProgramContext> BinaryOpCase::generateProgramData (void) const |
| { |
| vector<ProgramContext> progData; |
| for (int i = 0; i < PROGRAM_LAST; i++) |
| progData.push_back(generateSingleProgramData((ProgramID)i)); |
| return progData; |
| } |
| |
| void BinaryOpCase::setGeneralUniforms (deUint32 program) const |
| { |
| const glw::Functions& gl = m_renderCtx.getFunctions(); |
| gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f); |
| } |
| |
| void BinaryOpCase::setWorkloadSizeUniform (deUint32 program, int numLoopIterations) const |
| { |
| const glw::Functions& gl = m_renderCtx.getFunctions(); |
| gl.uniform1i(gl.getUniformLocation(program, "u_numLoopIterations"), numLoopIterations); |
| } |
| |
| float BinaryOpCase::computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const |
| { |
| DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST); |
| |
| const int baseNumOpsInsideLoop = 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; |
| const int numOpsInsideLoopInSmallProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT; |
| const int numOpsInsideLoopInBigProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT; |
| DE_STATIC_ASSERT(numOpsInsideLoopInBigProgram > numOpsInsideLoopInSmallProgram); |
| const int opDiff = numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram; |
| const float programOperationCostDiff = perProgramOperationCosts[PROGRAM_WITH_BIGGER_LOOP] - perProgramOperationCosts[PROGRAM_WITH_SMALLER_LOOP]; |
| |
| return programOperationCostDiff / (float)opDiff; |
| } |
| |
| void BinaryOpCase::logSingleOperationCalculationInfo (void) const |
| { |
| const int baseNumOpsInsideLoop = 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; |
| const int numOpsInsideLoopInSmallProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT; |
| const int numOpsInsideLoopInBigProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT; |
| const int opDiff = numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram; |
| const char* const opName = m_op == "+" ? "addition" |
| : m_op == "-" ? "subtraction" |
| : m_op == "*" ? "multiplication" |
| : m_op == "/" ? "division" |
| : DE_NULL; |
| DE_ASSERT(opName != DE_NULL); |
| |
| m_testCtx.getLog() << TestLog::Message << "Note: the bigger program contains " << opDiff << " more " |
| << opName << " operations in one loop iteration than the small program; " |
| << "cost of one operation is calculated as (cost_of_bigger_workload - cost_of_smaller_workload) / " << opDiff |
| << TestLog::EndMessage; |
| } |
| |
| // Built-in function case. |
| class FunctionCase : public OperatorPerformanceCase |
| { |
| public: |
| enum |
| { |
| MAX_PARAMS = 3 |
| }; |
| |
| FunctionCase (Context& context, |
| const char* name, |
| const char* description, |
| const char* func, |
| glu::DataType returnType, |
| const glu::DataType paramTypes[MAX_PARAMS], |
| const Vec4& attribute, |
| int modifyParamNdx, //!< Add a compile-time constant (2.0) to the parameter at this index. This is ignored if negative. |
| bool useNearlyConstantINputs, //!< Function inputs shouldn't be much bigger than 'attribute'. |
| glu::Precision precision, |
| bool isVertex, |
| const InitialCalibrationStorage& initialCalibration); |
| |
| protected: |
| vector<ProgramContext> generateProgramData (void) const; |
| void setGeneralUniforms (deUint32 program) const; |
| void setWorkloadSizeUniform (deUint32 program, int numOperations) const; |
| float computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const; |
| void logSingleOperationCalculationInfo (void) const; |
| |
| private: |
| enum ProgramID |
| { |
| // \note 0-based sequential numbering is relevant, because these are also used as vector indices. |
| // \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow. |
| PROGRAM_WITH_FUNCTION_CALLS = 0, |
| PROGRAM_WITHOUT_FUNCTION_CALLS, |
| |
| PROGRAM_LAST |
| }; |
| |
| //! Forms a "sum" expression from aExpr and bExpr; for booleans, this is "equal(a,b)", otherwise actual sum. |
| static string sumExpr (const string& aExpr, const string& bExpr, glu::DataType type); |
| //! Forms an expression used to increment an input value in the shader. If type is boolean, this is just |
| //! baseExpr; otherwise, baseExpr is modified by multiplication or division by a loop index, |
| //! to prevent simple compiler optimizations. See m_useNearlyConstantInputs for more explanation. |
| static string incrementExpr (const string& baseExpr, glu::DataType type, bool divide); |
| |
| ProgramContext generateSingleProgramData (ProgramID) const; |
| |
| const string m_func; |
| const glu::DataType m_returnType; |
| glu::DataType m_paramTypes[MAX_PARAMS]; |
| // \note m_modifyParamNdx, if not negative, specifies the index of the parameter to which a |
| // compile-time constant (2.0) is added. This is a quick and dirty way to deal with |
| // functions like clamp or smoothstep that require that a certain parameter is |
| // greater than a certain other parameter. |
| const int m_modifyParamNdx; |
| // \note m_useNearlyConstantInputs determines whether the inputs given to the function |
| // should increase (w.r.t m_attribute) only by very small amounts. This is relevant |
| // for functions like asin, which requires its inputs to be in a specific range. |
| // In practice, this affects whether expressions used to increment the input |
| // variables use division instead of multiplication; normally, multiplication is used, |
| // but it's hard to keep the increments very small that way, and division shouldn't |
| // be the default, since for many functions (probably not asin, luckily), division |
| // is too heavy and dominates time-wise. |
| const bool m_useNearlyConstantInputs; |
| const Vec4 m_attribute; |
| const glu::Precision m_precision; |
| }; |
| |
| FunctionCase::FunctionCase (Context& context, |
| const char* name, |
| const char* description, |
| const char* func, |
| glu::DataType returnType, |
| const glu::DataType paramTypes[MAX_PARAMS], |
| const Vec4& attribute, |
| int modifyParamNdx, |
| bool useNearlyConstantInputs, |
| glu::Precision precision, |
| bool isVertex, |
| const InitialCalibrationStorage& initialCalibration) |
| : OperatorPerformanceCase (context.getTestContext(), context.getRenderContext(), name, description, |
| isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration) |
| , m_func (func) |
| , m_returnType (returnType) |
| , m_modifyParamNdx (modifyParamNdx) |
| , m_useNearlyConstantInputs (useNearlyConstantInputs) |
| , m_attribute (attribute) |
| , m_precision (precision) |
| { |
| for (int i = 0; i < MAX_PARAMS; i++) |
| m_paramTypes[i] = paramTypes[i]; |
| } |
| |
| string FunctionCase::sumExpr (const string& aExpr, const string& bExpr, glu::DataType type) |
| { |
| if (glu::isDataTypeBoolOrBVec(type)) |
| { |
| if (type == glu::TYPE_BOOL) |
| return "(" + aExpr + " == " + bExpr + ")"; |
| else |
| return "equal(" + aExpr + ", " + bExpr + ")"; |
| } |
| else |
| return "(" + aExpr + " + " + bExpr + ")"; |
| } |
| |
| string FunctionCase::incrementExpr (const string& baseExpr, glu::DataType type, bool divide) |
| { |
| const string mulOrDiv = divide ? "/" : "*"; |
| |
| return glu::isDataTypeBoolOrBVec(type) ? baseExpr |
| : glu::isDataTypeIntOrIVec(type) ? "(" + baseExpr + mulOrDiv + "(i+1))" |
| : "(" + baseExpr + mulOrDiv + "float(i+1))"; |
| } |
| |
| FunctionCase::ProgramContext FunctionCase::generateSingleProgramData (ProgramID programID) const |
| { |
| const bool isVertexCase = m_caseType == CASETYPE_VERTEX; |
| const char* const precision = glu::getPrecisionName(m_precision); |
| const char* const returnTypeName = getDataTypeName(m_returnType); |
| const string returnPrecisionMaybe = glu::isDataTypeBoolOrBVec(m_returnType) ? "" : string() + precision + " "; |
| const char* inputPrecision = DE_NULL; |
| const bool isMatrixReturn = isDataTypeMatrix(m_returnType); |
| int numParams = 0; |
| const char* paramTypeNames[MAX_PARAMS]; |
| string paramPrecisionsMaybe[MAX_PARAMS]; |
| |
| for (int i = 0; i < MAX_PARAMS; i++) |
| { |
| paramTypeNames[i] = getDataTypeName(m_paramTypes[i]); |
| paramPrecisionsMaybe[i] = glu::isDataTypeBoolOrBVec(m_paramTypes[i]) ? "" : string() + precision + " "; |
| |
| if (inputPrecision == DE_NULL && isDataTypeIntOrIVec(m_paramTypes[i]) && m_precision == glu::PRECISION_LOWP) |
| inputPrecision = "mediump"; |
| |
| if (m_paramTypes[i] != TYPE_INVALID) |
| numParams = i+1; |
| } |
| |
| DE_ASSERT(numParams > 0); |
| |
| if (inputPrecision == DE_NULL) |
| inputPrecision = precision; |
| |
| int numAttributes = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS + numParams - 1; |
| std::ostringstream vtx; |
| std::ostringstream frag; |
| std::ostringstream& op = isVertexCase ? vtx : frag; |
| |
| // Attributes. |
| vtx << "attribute highp vec4 a_position;\n"; |
| for (int i = 0; i < numAttributes; i++) |
| vtx << "attribute " << inputPrecision << " vec4 a_in" << i << ";\n"; |
| |
| if (isVertexCase) |
| { |
| vtx << "varying mediump vec4 v_color;\n"; |
| frag << "varying mediump vec4 v_color;\n"; |
| } |
| else |
| { |
| for (int i = 0; i < numAttributes; i++) |
| { |
| vtx << "varying " << inputPrecision << " vec4 v_in" << i << ";\n"; |
| frag << "varying " << inputPrecision << " vec4 v_in" << i << ";\n"; |
| } |
| } |
| |
| op << "uniform mediump int u_numLoopIterations;\n"; |
| if (isVertexCase) |
| op << "uniform mediump float u_zero;\n"; |
| |
| for (int paramNdx = 0; paramNdx < numParams; paramNdx++) |
| op << "uniform " << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " u_inc" << (char)('A'+paramNdx) << ";\n"; |
| |
| vtx << "\n"; |
| vtx << "void main()\n"; |
| vtx << "{\n"; |
| |
| if (!isVertexCase) |
| vtx << "\tgl_Position = a_position;\n"; |
| |
| frag << "\n"; |
| frag << "void main()\n"; |
| frag << "{\n"; |
| |
| // Function call input and return value accumulation variables. |
| { |
| const char* const inPrefix = isVertexCase ? "a_" : "v_"; |
| |
| for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++) |
| { |
| for (int paramNdx = 0; paramNdx < numParams; paramNdx++) |
| { |
| const glu::DataType paramType = m_paramTypes[paramNdx]; |
| const bool mustCast = paramType != glu::TYPE_FLOAT_VEC4; |
| |
| op << "\t" << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " in" << calcNdx << (char)('a'+paramNdx) << " = "; |
| |
| if (mustCast) |
| op << paramTypeNames[paramNdx] << "("; |
| |
| if (glu::isDataTypeMatrix(paramType)) |
| { |
| static const char* const swizzles[3] = { "x", "xy", "xyz" }; |
| const int numRows = glu::getDataTypeMatrixNumRows(paramType); |
| const int numCols = glu::getDataTypeMatrixNumColumns(paramType); |
| const string swizzle = numRows < 4 ? string() + "." + swizzles[numRows-1] : ""; |
| |
| for (int i = 0; i < numCols; i++) |
| op << (i > 0 ? ", " : "") << inPrefix << "in" << calcNdx+paramNdx << swizzle; |
| } |
| else |
| { |
| op << inPrefix << "in" << calcNdx+paramNdx; |
| |
| if (paramNdx == m_modifyParamNdx) |
| { |
| DE_ASSERT(glu::isDataTypeFloatOrVec(paramType)); |
| op << " + 2.0"; |
| } |
| } |
| |
| if (mustCast) |
| op << ")"; |
| |
| op << ";\n"; |
| } |
| |
| op << "\t" << returnPrecisionMaybe << returnTypeName << " res" << calcNdx << " = " << returnTypeName << "(0);\n"; |
| } |
| } |
| |
| // Loop with expressions in it. |
| op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n"; |
| op << "\t{\n"; |
| for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++) |
| { |
| if (calcNdx > 0) |
| op << "\n"; |
| |
| op << "\t\t{\n"; |
| |
| for (int inputNdx = 0; inputNdx < numParams; inputNdx++) |
| { |
| const string inputName = "in" + de::toString(calcNdx) + (char)('a'+inputNdx); |
| const string incName = string() + "u_inc" + (char)('A'+inputNdx); |
| const string incExpr = incrementExpr(incName, m_paramTypes[inputNdx], m_useNearlyConstantInputs); |
| |
| op << "\t\t\t" << inputName << " = " << sumExpr(inputName, incExpr, m_paramTypes[inputNdx]) << ";\n"; |
| } |
| |
| op << "\t\t\t" << returnPrecisionMaybe << returnTypeName << " eval" << calcNdx << " = "; |
| |
| if (programID == PROGRAM_WITH_FUNCTION_CALLS) |
| { |
| op << m_func << "("; |
| |
| for (int paramNdx = 0; paramNdx < numParams; paramNdx++) |
| { |
| if (paramNdx > 0) |
| op << ", "; |
| |
| op << "in" << calcNdx << (char)('a'+paramNdx); |
| } |
| |
| op << ")"; |
| } |
| else |
| { |
| DE_ASSERT(programID == PROGRAM_WITHOUT_FUNCTION_CALLS); |
| op << returnTypeName << "(1)"; |
| } |
| |
| op << ";\n"; |
| |
| { |
| const string resName = "res" + de::toString(calcNdx); |
| const string evalName = "eval" + de::toString(calcNdx); |
| const string incExpr = incrementExpr(evalName, m_returnType, m_useNearlyConstantInputs); |
| |
| op << "\t\t\tres" << calcNdx << " = " << sumExpr(resName, incExpr, m_returnType) << ";\n"; |
| } |
| |
| op << "\t\t}\n"; |
| } |
| op << "\t}\n"; |
| op << "\n"; |
| |
| // Result variables. |
| for (int inputNdx = 0; inputNdx < numParams; inputNdx++) |
| { |
| op << "\t" << paramPrecisionsMaybe[inputNdx] << paramTypeNames[inputNdx] << " sumIn" << (char)('A'+inputNdx) << " = "; |
| { |
| string expr = string() + "in0" + (char)('a'+inputNdx); |
| for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++) |
| expr = sumExpr(expr, string() + "in" + de::toString(i) + (char)('a'+inputNdx), m_paramTypes[inputNdx]); |
| op << expr; |
| } |
| op << ";\n"; |
| } |
| |
| op << "\t" << returnPrecisionMaybe << returnTypeName << " sumRes = "; |
| { |
| string expr = "res0"; |
| for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++) |
| expr = sumExpr(expr, "res" + de::toString(i), m_returnType); |
| op << expr; |
| } |
| op << ";\n"; |
| |
| { |
| glu::DataType finalResultDataType = glu::TYPE_LAST; |
| |
| if (glu::isDataTypeMatrix(m_returnType)) |
| { |
| finalResultDataType = m_returnType; |
| |
| op << "\t" << precision << " " << returnTypeName << " finalRes = "; |
| |
| for (int inputNdx = 0; inputNdx < numParams; inputNdx++) |
| { |
| DE_ASSERT(m_paramTypes[inputNdx] == m_returnType); |
| op << "sumIn" << (char)('A'+inputNdx) << " + "; |
| } |
| op << "sumRes;\n"; |
| } |
| else |
| { |
| int numFinalResComponents = glu::getDataTypeScalarSize(m_returnType); |
| for (int inputNdx = 0; inputNdx < numParams; inputNdx++) |
| numFinalResComponents = de::max(numFinalResComponents, glu::getDataTypeScalarSize(m_paramTypes[inputNdx])); |
| |
| finalResultDataType = getDataTypeFloatOrVec(numFinalResComponents); |
| |
| { |
| const string finalResType = glu::getDataTypeName(finalResultDataType); |
| op << "\t" << precision << " " << finalResType << " finalRes = "; |
| for (int inputNdx = 0; inputNdx < numParams; inputNdx++) |
| op << finalResType << "(sumIn" << (char)('A'+inputNdx) << ") + "; |
| op << finalResType << "(sumRes);\n"; |
| } |
| } |
| |
| // Convert to color. |
| op << "\tmediump vec4 color = "; |
| if (finalResultDataType == TYPE_FLOAT_VEC4) |
| op << "finalRes"; |
| else |
| { |
| int size = isMatrixReturn ? getDataTypeMatrixNumRows(finalResultDataType) : getDataTypeScalarSize(finalResultDataType); |
| |
| op << "vec4("; |
| |
| if (isMatrixReturn) |
| { |
| for (int i = 0; i < getDataTypeMatrixNumColumns(finalResultDataType); i++) |
| { |
| if (i > 0) |
| op << " + "; |
| op << "finalRes[" << i << "]"; |
| } |
| } |
| else |
| op << "finalRes"; |
| |
| for (int i = size; i < 4; i++) |
| op << ", " << (i == 3 ? "1.0" : "0.0"); |
| |
| op << ")"; |
| } |
| op << ";\n"; |
| op << "\t" << (isVertexCase ? "v_color" : "gl_FragColor") << " = color;\n"; |
| |
| if (isVertexCase) |
| { |
| vtx << " gl_Position = a_position + u_zero*color;\n"; |
| frag << " gl_FragColor = v_color;\n"; |
| } |
| else |
| { |
| for (int i = 0; i < numAttributes; i++) |
| vtx << " v_in" << i << " = a_in" << i << ";\n"; |
| } |
| |
| vtx << "}\n"; |
| frag << "}\n"; |
| } |
| |
| { |
| vector<AttribSpec> attributes; |
| for (int i = 0; i < numAttributes; i++) |
| attributes.push_back(AttribSpec(("a_in" + de::toString(i)).c_str(), |
| m_attribute.swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4), |
| m_attribute.swizzle((i+1)%4, (i+2)%4, (i+3)%4, (i+0)%4), |
| m_attribute.swizzle((i+2)%4, (i+3)%4, (i+0)%4, (i+1)%4), |
| m_attribute.swizzle((i+3)%4, (i+0)%4, (i+1)%4, (i+2)%4))); |
| |
| { |
| string description = "This is the program "; |
| |
| description += programID == PROGRAM_WITHOUT_FUNCTION_CALLS ? "without" |
| : programID == PROGRAM_WITH_FUNCTION_CALLS ? "with" |
| : DE_NULL; |
| |
| description += " '" + m_func + "' function calls.\n" |
| "Note: workload size for this program means the number of loop iterations."; |
| |
| return ProgramContext(vtx.str(), frag.str(), attributes, description); |
| } |
| } |
| } |
| |
| vector<FunctionCase::ProgramContext> FunctionCase::generateProgramData (void) const |
| { |
| vector<ProgramContext> progData; |
| for (int i = 0; i < PROGRAM_LAST; i++) |
| progData.push_back(generateSingleProgramData((ProgramID)i)); |
| return progData; |
| } |
| |
| void FunctionCase::setGeneralUniforms (deUint32 program) const |
| { |
| const glw::Functions& gl = m_renderCtx.getFunctions(); |
| |
| gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f); |
| |
| for (int paramNdx = 0; paramNdx < MAX_PARAMS; paramNdx++) |
| { |
| if (m_paramTypes[paramNdx] != glu::TYPE_INVALID) |
| { |
| const glu::DataType paramType = m_paramTypes[paramNdx]; |
| const int scalarSize = glu::getDataTypeScalarSize(paramType); |
| const int location = gl.getUniformLocation(program, (string() + "u_inc" + (char)('A'+paramNdx)).c_str()); |
| |
| if (glu::isDataTypeFloatOrVec(paramType)) |
| { |
| float values[4]; |
| for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++) |
| values[i] = (float)paramNdx*0.01f + (float)i*0.001f; // Arbitrary small values. |
| uniformNfv(gl, scalarSize, location, 1, &values[0]); |
| } |
| else if (glu::isDataTypeIntOrIVec(paramType)) |
| { |
| int values[4]; |
| for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++) |
| values[i] = paramNdx*100 + i; // Arbitrary values. |
| uniformNiv(gl, scalarSize, location, 1, &values[0]); |
| } |
| else if (glu::isDataTypeBoolOrBVec(paramType)) |
| { |
| int values[4]; |
| for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++) |
| values[i] = (paramNdx >> i) & 1; // Arbitrary values. |
| uniformNiv(gl, scalarSize, location, 1, &values[0]); |
| } |
| else if (glu::isDataTypeMatrix(paramType)) |
| { |
| const int size = glu::getDataTypeMatrixNumRows(paramType); |
| DE_ASSERT(size == glu::getDataTypeMatrixNumColumns(paramType)); |
| float values[4*4]; |
| for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++) |
| values[i] = (float)paramNdx*0.01f + (float)i*0.001f; // Arbitrary values. |
| uniformMatrixNfv(gl, size, location, 1, &values[0]); |
| } |
| else |
| DE_ASSERT(false); |
| } |
| } |
| } |
| |
| void FunctionCase::setWorkloadSizeUniform (deUint32 program, int numLoopIterations) const |
| { |
| const glw::Functions& gl = m_renderCtx.getFunctions(); |
| const int loc = gl.getUniformLocation(program, "u_numLoopIterations"); |
| |
| gl.uniform1i(loc, numLoopIterations); |
| } |
| |
| float FunctionCase::computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const |
| { |
| DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST); |
| const int numFunctionCalls = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; |
| const float programOperationCostDiff = perProgramOperationCosts[PROGRAM_WITH_FUNCTION_CALLS] - perProgramOperationCosts[PROGRAM_WITHOUT_FUNCTION_CALLS]; |
| |
| return programOperationCostDiff / (float)numFunctionCalls; |
| } |
| |
| void FunctionCase::logSingleOperationCalculationInfo (void) const |
| { |
| const int numFunctionCalls = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; |
| |
| m_testCtx.getLog() << TestLog::Message << "Note: program " << (int)PROGRAM_WITH_FUNCTION_CALLS << " contains " |
| << numFunctionCalls << " calls to '" << m_func << "' in one loop iteration; " |
| << "cost of one operation is calculated as " |
| << "(cost_of_workload_with_calls - cost_of_workload_without_calls) / " << numFunctionCalls << TestLog::EndMessage; |
| } |
| |
| } // anonymous |
| |
| ShaderOperatorTests::ShaderOperatorTests (Context& context) |
| : TestCaseGroup(context, "operator", "Operator Performance Tests") |
| { |
| } |
| |
| ShaderOperatorTests::~ShaderOperatorTests (void) |
| { |
| } |
| |
| void ShaderOperatorTests::init (void) |
| { |
| // Binary operator cases |
| |
| static const DataType binaryOpTypes[] = |
| { |
| TYPE_FLOAT, |
| TYPE_FLOAT_VEC2, |
| TYPE_FLOAT_VEC3, |
| TYPE_FLOAT_VEC4, |
| TYPE_INT, |
| TYPE_INT_VEC2, |
| TYPE_INT_VEC3, |
| TYPE_INT_VEC4, |
| }; |
| static const Precision precisions[] = |
| { |
| PRECISION_LOWP, |
| PRECISION_MEDIUMP, |
| PRECISION_HIGHP |
| }; |
| static const struct |
| { |
| const char* name; |
| const char* op; |
| bool swizzle; |
| } binaryOps[] = |
| { |
| { "add", "+", false }, |
| { "sub", "-", true }, |
| { "mul", "*", false }, |
| { "div", "/", true } |
| }; |
| |
| tcu::TestCaseGroup* const binaryOpsGroup = new tcu::TestCaseGroup(m_testCtx, "binary_operator", "Binary Operator Performance Tests"); |
| addChild(binaryOpsGroup); |
| |
| for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(binaryOps); opNdx++) |
| { |
| tcu::TestCaseGroup* const opGroup = new tcu::TestCaseGroup(m_testCtx, binaryOps[opNdx].name, ""); |
| binaryOpsGroup->addChild(opGroup); |
| |
| for (int isFrag = 0; isFrag <= 1; isFrag++) |
| { |
| const BinaryOpCase::InitialCalibrationStorage shaderGroupCalibrationStorage (new BinaryOpCase::InitialCalibration); |
| const bool isVertex = isFrag == 0; |
| tcu::TestCaseGroup* const shaderGroup = new tcu::TestCaseGroup(m_testCtx, isVertex ? "vertex" : "fragment", ""); |
| opGroup->addChild(shaderGroup); |
| |
| for (int typeNdx = 0; typeNdx < DE_LENGTH_OF_ARRAY(binaryOpTypes); typeNdx++) |
| { |
| for (int precNdx = 0; precNdx < DE_LENGTH_OF_ARRAY(precisions); precNdx++) |
| { |
| const DataType type = binaryOpTypes[typeNdx]; |
| const Precision precision = precisions[precNdx]; |
| const char* const op = binaryOps[opNdx].op; |
| const bool useSwizzle = binaryOps[opNdx].swizzle; |
| std::ostringstream name; |
| |
| name << getPrecisionName(precision) << "_" << getDataTypeName(type); |
| |
| shaderGroup->addChild(new BinaryOpCase(m_context, name.str().c_str(), "", op, type, precision, useSwizzle, isVertex, shaderGroupCalibrationStorage)); |
| } |
| } |
| } |
| } |
| |
| // Built-in function cases. |
| |
| // Non-specific (i.e. includes gentypes) parameter types for the functions. |
| enum ValueType |
| { |
| VALUE_NONE = 0, |
| VALUE_FLOAT = (1<<0), // float scalar |
| VALUE_FLOAT_VEC = (1<<1), // float vector |
| VALUE_FLOAT_VEC34 = (1<<2), // float vector of size 3 or 4 |
| VALUE_FLOAT_GENTYPE = (1<<3), // float scalar/vector |
| VALUE_VEC3 = (1<<4), // vec3 only |
| VALUE_VEC4 = (1<<5), // vec4 only |
| VALUE_MATRIX = (1<<6), // matrix |
| VALUE_BOOL = (1<<7), // boolean scalar |
| VALUE_BOOL_VEC = (1<<8), // boolean vector |
| VALUE_BOOL_GENTYPE = (1<<9), // boolean scalar/vector |
| VALUE_INT = (1<<10), // int scalar |
| VALUE_INT_VEC = (1<<11), // int vector |
| VALUE_INT_GENTYPE = (1<<12), // int scalar/vector |
| |
| // Shorthands. |
| N = VALUE_NONE, |
| F = VALUE_FLOAT, |
| FV = VALUE_FLOAT_VEC, |
| VL = VALUE_FLOAT_VEC34, // L for "large" |
| GT = VALUE_FLOAT_GENTYPE, |
| V3 = VALUE_VEC3, |
| V4 = VALUE_VEC4, |
| M = VALUE_MATRIX, |
| B = VALUE_BOOL, |
| BV = VALUE_BOOL_VEC, |
| BGT = VALUE_BOOL_GENTYPE, |
| I = VALUE_INT, |
| IV = VALUE_INT_VEC, |
| IGT = VALUE_INT_GENTYPE, |
| |
| VALUE_ANY_FLOAT = VALUE_FLOAT | VALUE_FLOAT_VEC | VALUE_FLOAT_GENTYPE | VALUE_VEC3 | VALUE_VEC4 | VALUE_FLOAT_VEC34, |
| VALUE_ANY_INT = VALUE_INT | VALUE_INT_VEC | VALUE_INT_GENTYPE, |
| VALUE_ANY_BOOL = VALUE_BOOL | VALUE_BOOL_VEC | VALUE_BOOL_GENTYPE, |
| |
| VALUE_ANY_GENTYPE = VALUE_FLOAT_VEC | VALUE_FLOAT_GENTYPE | VALUE_FLOAT_VEC34 | |
| VALUE_BOOL_VEC | VALUE_BOOL_GENTYPE | |
| VALUE_INT_VEC | VALUE_INT_GENTYPE | |
| VALUE_MATRIX |
| }; |
| enum PrecisionMask |
| { |
| PRECMASK_NA = 0, //!< Precision not applicable (booleans) |
| PRECMASK_LOWP = (1<<PRECISION_LOWP), |
| PRECMASK_MEDIUMP = (1<<PRECISION_MEDIUMP), |
| PRECMASK_HIGHP = (1<<PRECISION_HIGHP), |
| |
| PRECMASK_MEDIUMP_HIGHP = (1<<PRECISION_MEDIUMP) | (1<<PRECISION_HIGHP), |
| PRECMASK_ALL = (1<<PRECISION_LOWP) | (1<<PRECISION_MEDIUMP) | (1<<PRECISION_HIGHP) |
| }; |
| |
| static const DataType floatTypes[] = |
| { |
| TYPE_FLOAT, |
| TYPE_FLOAT_VEC2, |
| TYPE_FLOAT_VEC3, |
| TYPE_FLOAT_VEC4 |
| }; |
| static const DataType intTypes[] = |
| { |
| TYPE_INT, |
| TYPE_INT_VEC2, |
| TYPE_INT_VEC3, |
| TYPE_INT_VEC4 |
| }; |
| static const DataType boolTypes[] = |
| { |
| TYPE_BOOL, |
| TYPE_BOOL_VEC2, |
| TYPE_BOOL_VEC3, |
| TYPE_BOOL_VEC4 |
| }; |
| static const DataType matrixTypes[] = |
| { |
| TYPE_FLOAT_MAT2, |
| TYPE_FLOAT_MAT3, |
| TYPE_FLOAT_MAT4 |
| }; |
| |
| tcu::TestCaseGroup* const angleAndTrigonometryGroup = new tcu::TestCaseGroup(m_testCtx, "angle_and_trigonometry", "Built-In Angle and Trigonometry Function Performance Tests"); |
| tcu::TestCaseGroup* const exponentialGroup = new tcu::TestCaseGroup(m_testCtx, "exponential", "Built-In Exponential Function Performance Tests"); |
| tcu::TestCaseGroup* const commonFunctionsGroup = new tcu::TestCaseGroup(m_testCtx, "common_functions", "Built-In Common Function Performance Tests"); |
| tcu::TestCaseGroup* const geometricFunctionsGroup = new tcu::TestCaseGroup(m_testCtx, "geometric", "Built-In Geometric Function Performance Tests"); |
| tcu::TestCaseGroup* const matrixFunctionsGroup = new tcu::TestCaseGroup(m_testCtx, "matrix", "Built-In Matrix Function Performance Tests"); |
| tcu::TestCaseGroup* const floatCompareGroup = new tcu::TestCaseGroup(m_testCtx, "float_compare", "Built-In Floating Point Comparison Function Performance Tests"); |
| tcu::TestCaseGroup* const intCompareGroup = new tcu::TestCaseGroup(m_testCtx, "int_compare", "Built-In Integer Comparison Function Performance Tests"); |
| tcu::TestCaseGroup* const boolCompareGroup = new tcu::TestCaseGroup(m_testCtx, "bool_compare", "Built-In Boolean Comparison Function Performance Tests"); |
| |
| addChild(angleAndTrigonometryGroup); |
| addChild(exponentialGroup); |
| addChild(commonFunctionsGroup); |
| addChild(geometricFunctionsGroup); |
| addChild(matrixFunctionsGroup); |
| addChild(floatCompareGroup); |
| addChild(intCompareGroup); |
| addChild(boolCompareGroup); |
| |
| // Some attributes to be used as parameters for the functions. |
| const Vec4 attrPos = Vec4( 2.3f, 1.9f, 0.8f, 0.7f); |
| const Vec4 attrNegPos = Vec4(-1.3f, 2.5f, -3.5f, 4.3f); |
| const Vec4 attrSmall = Vec4(-0.9f, 0.8f, -0.4f, 0.2f); |
| |
| // Function name, return type and parameter type information; also, what attribute should be used in the test. |
| // \note Different versions of the same function (i.e. with the same group name) can be defined by putting them successively in this array. |
| // \note In order to reduce case count and thus total execution time, we don't test all input type combinations for every function. |
| static const struct |
| { |
| tcu::TestCaseGroup* parentGroup; |
| const char* groupName; |
| const char* func; |
| const ValueType types[FunctionCase::MAX_PARAMS + 1]; // Return type and parameter types, in that order. |
| const Vec4& attribute; |
| int modifyParamNdx; |
| bool useNearlyConstantInputs; |
| bool booleanCase; |
| PrecisionMask precMask; |
| } functionCaseGroups[] = |
| { |
| { angleAndTrigonometryGroup, "radians", "radians", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { angleAndTrigonometryGroup, "degrees", "degrees", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { angleAndTrigonometryGroup, "sin", "sin", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { angleAndTrigonometryGroup, "cos", "cos", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { angleAndTrigonometryGroup, "tan", "tan", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { angleAndTrigonometryGroup, "asin", "asin", { F, F, N, N }, attrSmall, -1, true, false, PRECMASK_ALL }, |
| { angleAndTrigonometryGroup, "acos", "acos", { F, F, N, N }, attrSmall, -1, true, false, PRECMASK_ALL }, |
| { angleAndTrigonometryGroup, "atan2", "atan", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { angleAndTrigonometryGroup, "atan", "atan", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| |
| { exponentialGroup, "pow", "pow", { F, F, F, N }, attrPos, -1, false, false, PRECMASK_ALL }, |
| { exponentialGroup, "exp", "exp", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { exponentialGroup, "log", "log", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL }, |
| { exponentialGroup, "exp2", "exp2", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { exponentialGroup, "log2", "log2", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL }, |
| { exponentialGroup, "sqrt", "sqrt", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL }, |
| { exponentialGroup, "inversesqrt", "inversesqrt", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL }, |
| |
| { commonFunctionsGroup, "abs", "abs", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, |
| { commonFunctionsGroup, "abs", "abs", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { commonFunctionsGroup, "sign", "sign", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, |
| { commonFunctionsGroup, "sign", "sign", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { commonFunctionsGroup, "floor", "floor", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, |
| { commonFunctionsGroup, "floor", "floor", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { commonFunctionsGroup, "ceil", "ceil", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, |
| { commonFunctionsGroup, "ceil", "ceil", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { commonFunctionsGroup, "fract", "fract", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, |
| { commonFunctionsGroup, "fract", "fract", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { commonFunctionsGroup, "mod", "mod", { GT, GT, GT, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { commonFunctionsGroup, "min", "min", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, |
| { commonFunctionsGroup, "min", "min", { V4, V4, V4, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { commonFunctionsGroup, "max", "max", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, |
| { commonFunctionsGroup, "max", "max", { V4, V4, V4, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { commonFunctionsGroup, "clamp", "clamp", { F, F, F, F }, attrSmall, 2, false, false, PRECMASK_MEDIUMP_HIGHP }, |
| { commonFunctionsGroup, "clamp", "clamp", { V4, V4, V4, V4 }, attrSmall, 2, false, false, PRECMASK_ALL }, |
| { commonFunctionsGroup, "mix", "mix", { F, F, F, F }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, |
| { commonFunctionsGroup, "mix", "mix", { V4, V4, V4, V4 }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { commonFunctionsGroup, "step", "step", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP }, |
| { commonFunctionsGroup, "step", "step", { V4, V4, V4, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { commonFunctionsGroup, "smoothstep", "smoothstep", { F, F, F, F }, attrSmall, 1, false, false, PRECMASK_MEDIUMP_HIGHP }, |
| { commonFunctionsGroup, "smoothstep", "smoothstep", { V4, V4, V4, V4 }, attrSmall, 1, false, false, PRECMASK_ALL }, |
| |
| { geometricFunctionsGroup, "length", "length", { F, VL, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { geometricFunctionsGroup, "distance", "distance", { F, VL, VL, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { geometricFunctionsGroup, "dot", "dot", { F, VL, VL, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { geometricFunctionsGroup, "cross", "cross", { V3, V3, V3, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { geometricFunctionsGroup, "normalize", "normalize", { VL, VL, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { geometricFunctionsGroup, "faceforward", "faceforward", { VL, VL, VL, VL }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { geometricFunctionsGroup, "reflect", "reflect", { VL, VL, VL, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { geometricFunctionsGroup, "refract", "refract", { VL, VL, VL, F }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| |
| { matrixFunctionsGroup, "matrixCompMult", "matrixCompMult", { M, M, M, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| |
| { floatCompareGroup, "lessThan", "lessThan", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { floatCompareGroup, "lessThanEqual", "lessThanEqual", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { floatCompareGroup, "greaterThan", "greaterThan", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { floatCompareGroup, "greaterThanEqual", "greaterThanEqual", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { floatCompareGroup, "equal", "equal", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { floatCompareGroup, "notEqual", "notEqual", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| |
| { intCompareGroup, "lessThan", "lessThan", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { intCompareGroup, "lessThanEqual", "lessThanEqual", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { intCompareGroup, "greaterThan", "greaterThan", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { intCompareGroup, "greaterThanEqual", "greaterThanEqual", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { intCompareGroup, "equal", "equal", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| { intCompareGroup, "notEqual", "notEqual", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL }, |
| |
| { boolCompareGroup, "equal", "equal", { BV, BV, BV, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP }, |
| { boolCompareGroup, "notEqual", "notEqual", { BV, BV, BV, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP }, |
| { boolCompareGroup, "any", "any", { B, BV, N, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP }, |
| { boolCompareGroup, "all", "all", { B, BV, N, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP }, |
| { boolCompareGroup, "not", "not", { BV, BV, N, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP } |
| }; |
| |
| // vertexSubGroup and fragmentSubGroup are the groups where the various vertex/fragment cases of a single function are added. |
| // \note These are defined here so that different versions (different entries in the functionCaseGroups array) of the same function can be put in the same group. |
| tcu::TestCaseGroup* vertexSubGroup = DE_NULL; |
| tcu::TestCaseGroup* fragmentSubGroup = DE_NULL; |
| FunctionCase::InitialCalibrationStorage vertexSubGroupCalibrationStorage; |
| FunctionCase::InitialCalibrationStorage fragmentSubGroupCalibrationStorage; |
| for (int funcNdx = 0; funcNdx < DE_LENGTH_OF_ARRAY(functionCaseGroups); funcNdx++) |
| { |
| tcu::TestCaseGroup* const parentGroup = functionCaseGroups[funcNdx].parentGroup; |
| const char* const groupName = functionCaseGroups[funcNdx].groupName; |
| const char* const groupFunc = functionCaseGroups[funcNdx].func; |
| const ValueType* const funcTypes = functionCaseGroups[funcNdx].types; |
| const Vec4& groupAttribute = functionCaseGroups[funcNdx].attribute; |
| const int modifyParamNdx = functionCaseGroups[funcNdx].modifyParamNdx; |
| const bool useNearlyConstantInputs = functionCaseGroups[funcNdx].useNearlyConstantInputs; |
| const bool booleanCase = functionCaseGroups[funcNdx].booleanCase; |
| const PrecisionMask precMask = functionCaseGroups[funcNdx].precMask; |
| |
| // If this is a new function and not just a different version of the previously defined function, create a new group. |
| if (funcNdx == 0 || parentGroup != functionCaseGroups[funcNdx-1].parentGroup || string(groupName) != functionCaseGroups[funcNdx-1].groupName) |
| { |
| tcu::TestCaseGroup* const funcGroup = new tcu::TestCaseGroup(m_testCtx, groupName, ""); |
| functionCaseGroups[funcNdx].parentGroup->addChild(funcGroup); |
| |
| vertexSubGroup = new tcu::TestCaseGroup(m_testCtx, "vertex", ""); |
| fragmentSubGroup = new tcu::TestCaseGroup(m_testCtx, "fragment", ""); |
| |
| funcGroup->addChild(vertexSubGroup); |
| funcGroup->addChild(fragmentSubGroup); |
| |
| vertexSubGroupCalibrationStorage = FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration); |
| fragmentSubGroupCalibrationStorage = FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration); |
| } |
| |
| DE_ASSERT(vertexSubGroup != DE_NULL); |
| DE_ASSERT(fragmentSubGroup != DE_NULL); |
| |
| // Find the type size range of parameters (e.g. from 2 to 4 in case of vectors). |
| int genTypeFirstSize = 1; |
| int genTypeLastSize = 1; |
| |
| // Find the first return value or parameter with a gentype (if any) and set sizes accordingly. |
| // \note Assumes only matching sizes gentypes are to be found, e.g. no "genType func (vec param)" |
| for (int i = 0; i < FunctionCase::MAX_PARAMS + 1 && genTypeLastSize == 1; i++) |
| { |
| switch (funcTypes[i]) |
| { |
| case VALUE_FLOAT_VEC: |
| case VALUE_BOOL_VEC: |
| case VALUE_INT_VEC: // \note Fall-through. |
| genTypeFirstSize = 2; |
| genTypeLastSize = 4; |
| break; |
| case VALUE_FLOAT_VEC34: |
| genTypeFirstSize = 3; |
| genTypeLastSize = 4; |
| break; |
| case VALUE_FLOAT_GENTYPE: |
| case VALUE_BOOL_GENTYPE: |
| case VALUE_INT_GENTYPE: // \note Fall-through. |
| genTypeFirstSize = 1; |
| genTypeLastSize = 4; |
| break; |
| case VALUE_MATRIX: |
| genTypeFirstSize = 2; |
| genTypeLastSize = 4; |
| break; |
| // If none of the above, keep looping. |
| default: |
| break; |
| } |
| } |
| |
| // Create a case for each possible size of the gentype. |
| for (int curSize = genTypeFirstSize; curSize <= genTypeLastSize; curSize++) |
| { |
| // Determine specific types for return value and the parameters, according to curSize. Non-gentypes not affected by curSize. |
| DataType types[FunctionCase::MAX_PARAMS + 1]; |
| for (int i = 0; i < FunctionCase::MAX_PARAMS + 1; i++) |
| { |
| if (funcTypes[i] == VALUE_NONE) |
| types[i] = TYPE_INVALID; |
| else |
| { |
| int isFloat = funcTypes[i] & VALUE_ANY_FLOAT; |
| int isBool = funcTypes[i] & VALUE_ANY_BOOL; |
| int isInt = funcTypes[i] & VALUE_ANY_INT; |
| int isMat = funcTypes[i] == VALUE_MATRIX; |
| int inSize = (funcTypes[i] & VALUE_ANY_GENTYPE) ? curSize |
| : funcTypes[i] == VALUE_VEC3 ? 3 |
| : funcTypes[i] == VALUE_VEC4 ? 4 |
| : 1; |
| int typeArrayNdx = isMat ? inSize - 2 : inSize - 1; // \note No matrices of size 1. |
| |
| types[i] = isFloat ? floatTypes[typeArrayNdx] |
| : isBool ? boolTypes[typeArrayNdx] |
| : isInt ? intTypes[typeArrayNdx] |
| : isMat ? matrixTypes[typeArrayNdx] |
| : TYPE_LAST; |
| } |
| |
| DE_ASSERT(types[i] != TYPE_LAST); |
| } |
| |
| // Array for just the parameter types. |
| DataType paramTypes[FunctionCase::MAX_PARAMS]; |
| for (int i = 0; i < FunctionCase::MAX_PARAMS; i++) |
| paramTypes[i] = types[i+1]; |
| |
| for (int prec = (int)PRECISION_LOWP; prec < (int)PRECISION_LAST; prec++) |
| { |
| if ((precMask & (1 << prec)) == 0) |
| continue; |
| |
| const string precisionPrefix = booleanCase ? "" : (string(getPrecisionName((Precision)prec)) + "_"); |
| std::ostringstream caseName; |
| |
| caseName << precisionPrefix; |
| |
| // Write the name of each distinct parameter data type into the test case name. |
| for (int i = 1; i < FunctionCase::MAX_PARAMS + 1 && types[i] != TYPE_INVALID; i++) |
| { |
| if (i == 1 || types[i] != types[i-1]) |
| { |
| if (i > 1) |
| caseName << "_"; |
| |
| caseName << getDataTypeName(types[i]); |
| } |
| } |
| |
| for (int fragI = 0; fragI <= 1; fragI++) |
| { |
| const bool vert = fragI == 0; |
| tcu::TestCaseGroup* const group = vert ? vertexSubGroup : fragmentSubGroup; |
| group->addChild (new FunctionCase(m_context, |
| caseName.str().c_str(), "", |
| groupFunc, |
| types[0], paramTypes, |
| groupAttribute, modifyParamNdx, useNearlyConstantInputs, |
| (Precision)prec, vert, |
| vert ? vertexSubGroupCalibrationStorage : fragmentSubGroupCalibrationStorage)); |
| } |
| } |
| } |
| } |
| } |
| |
| } // Performance |
| } // gles2 |
| } // deqp |