modules/gles3/performance/es3pBufferDataUploadTests.cpp - third_party/vulkan-cts - Git at Google

 /*-------------------------------------------------------------------------
  * drawElements Quality Program OpenGL ES 3.0 Module
  * -------------------------------------------------
  *
  * Copyright 2014 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
  *//*!
  * \file
  * \brief Buffer data upload performance tests.
  *//*--------------------------------------------------------------------*/

 #include "es3pBufferDataUploadTests.hpp"
 #include "glsCalibration.hpp"
 #include "tcuTestLog.hpp"
 #include "tcuVectorUtil.hpp"
 #include "tcuSurface.hpp"
 #include "tcuCPUWarmup.hpp"
 #include "tcuRenderTarget.hpp"
 #include "gluRenderContext.hpp"
 #include "gluShaderProgram.hpp"
 #include "gluStrUtil.hpp"
 #include "gluPixelTransfer.hpp"
 #include "gluObjectWrapper.hpp"
 #include "glwFunctions.hpp"
 #include "glwEnums.hpp"
 #include "deClock.h"
 #include "deMath.h"
 #include "deStringUtil.hpp"
 #include "deRandom.hpp"
 #include "deMemory.h"
 #include "deThread.h"
 #include "deMeta.hpp"

 #include <algorithm>
 #include <iomanip>
 #include <limits>

 namespace deqp
 {
 namespace gles3
 {
 namespace Performance
 {
 namespace
 {

 using gls::theilSenSiegelLinearRegression;
 using gls::LineParametersWithConfidence;
 using de::meta::EnableIf;
 using de::meta::Not;

 static const char* const s_dummyVertexShader =		"#version 300 es\n"
 													"in highp vec4 a_position;\n"
 													"void main (void)\n"
 													"{\n"
 													"	gl_Position = a_position;\n"
 													"}\n";

 static const char* const s_dummyFragnentShader =	"#version 300 es\n"
 													"layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
 													"void main (void)\n"
 													"{\n"
 													"	dEQP_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
 													"}\n";

 static const char* const s_colorVertexShader =		"#version 300 es\n"
 													"in highp vec4 a_position;\n"
 													"in highp vec4 a_color;\n"
 													"out highp vec4 v_color;\n"
 													"void main (void)\n"
 													"{\n"
 													"	gl_Position = a_position;\n"
 													"	v_color = a_color;\n"
 													"}\n";

 static const char* const s_colorFragmentShader =	"#version 300 es\n"
 													"layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
 													"in mediump vec4 v_color;\n"
 													"void main (void)\n"
 													"{\n"
 													"	dEQP_FragColor = v_color;\n"
 													"}\n";

 struct SingleOperationDuration
 {
 	deUint64 totalDuration;
 	deUint64 fitResponseDuration; // used for fitting
 };

 struct MapBufferRangeDuration
 {
 	deUint64 mapDuration;
 	deUint64 unmapDuration;
 	deUint64 writeDuration;
 	deUint64 allocDuration;
 	deUint64 totalDuration;

 	deUint64 fitResponseDuration;
 };

 struct MapBufferRangeDurationNoAlloc
 {
 	deUint64 mapDuration;
 	deUint64 unmapDuration;
 	deUint64 writeDuration;
 	deUint64 totalDuration;

 	deUint64 fitResponseDuration;
 };

 struct MapBufferRangeFlushDuration
 {
 	deUint64 mapDuration;
 	deUint64 unmapDuration;
 	deUint64 writeDuration;
 	deUint64 flushDuration;
 	deUint64 allocDuration;
 	deUint64 totalDuration;

 	deUint64 fitResponseDuration;
 };

 struct MapBufferRangeFlushDurationNoAlloc
 {
 	deUint64 mapDuration;
 	deUint64 unmapDuration;
 	deUint64 writeDuration;
 	deUint64 flushDuration;
 	deUint64 totalDuration;

 	deUint64 fitResponseDuration;
 };

 struct RenderReadDuration
 {
 	deUint64 renderDuration;
 	deUint64 readDuration;
 	deUint64 renderReadDuration;
 	deUint64 totalDuration;

 	deUint64 fitResponseDuration;
 };

 struct UnrelatedUploadRenderReadDuration
 {
 	deUint64 renderDuration;
 	deUint64 readDuration;
 	deUint64 renderReadDuration;
 	deUint64 totalDuration;

 	deUint64 fitResponseDuration;
 };

 struct UploadRenderReadDuration
 {
 	deUint64 uploadDuration;
 	deUint64 renderDuration;
 	deUint64 readDuration;
 	deUint64 totalDuration;
 	deUint64 renderReadDuration;

 	deUint64 fitResponseDuration;
 };

 struct UploadRenderReadDurationWithUnrelatedUploadSize
 {
 	deUint64 uploadDuration;
 	deUint64 renderDuration;
 	deUint64 readDuration;
 	deUint64 totalDuration;
 	deUint64 renderReadDuration;

 	deUint64 fitResponseDuration;
 };

 struct RenderUploadRenderReadDuration
 {
 	deUint64 firstRenderDuration;
 	deUint64 uploadDuration;
 	deUint64 secondRenderDuration;
 	deUint64 readDuration;
 	deUint64 totalDuration;
 	deUint64 renderReadDuration;

 	deUint64 fitResponseDuration;
 };

 template <typename SampleT>
 struct UploadSampleResult
 {
 	typedef SampleT SampleType;

 	int			bufferSize;
 	int			allocatedSize;
 	int			writtenSize;
 	SampleType	duration;
 };

 template <typename SampleT>
 struct RenderSampleResult
 {
 	typedef SampleT SampleType;

 	int			uploadedDataSize;
 	int			renderDataSize;
 	int			unrelatedDataSize;
 	int			numVertices;
 	SampleT		duration;
 };

 struct SingleOperationStatistics
 {
 	float minTime;
 	float maxTime;
 	float medianTime;
 	float min2DecileTime;		// !< minimum value in the 2nd decile
 	float max9DecileTime;		// !< maximum value in the 9th decile
 };

 struct SingleCallStatistics
 {
 	SingleOperationStatistics	result;

 	float						medianRate;
 	float						maxDiffTime;
 	float						maxDiff9DecileTime;
 	float						medianDiffTime;

 	float						maxRelDiffTime;
 	float						max9DecileRelDiffTime;
 	float						medianRelDiffTime;
 };

 struct MapCallStatistics
 {
 	SingleOperationStatistics	map;
 	SingleOperationStatistics	unmap;
 	SingleOperationStatistics	write;
 	SingleOperationStatistics	alloc;
 	SingleOperationStatistics	result;

 	float						medianRate;
 	float						maxDiffTime;
 	float						maxDiff9DecileTime;
 	float						medianDiffTime;

 	float						maxRelDiffTime;
 	float						max9DecileRelDiffTime;
 	float						medianRelDiffTime;
 };

 struct MapFlushCallStatistics
 {
 	SingleOperationStatistics	map;
 	SingleOperationStatistics	unmap;
 	SingleOperationStatistics	write;
 	SingleOperationStatistics	flush;
 	SingleOperationStatistics	alloc;
 	SingleOperationStatistics	result;

 	float						medianRate;
 	float						maxDiffTime;
 	float						maxDiff9DecileTime;
 	float						medianDiffTime;

 	float						maxRelDiffTime;
 	float						max9DecileRelDiffTime;
 	float						medianRelDiffTime;
 };

 struct RenderReadStatistics
 {
 	SingleOperationStatistics	render;
 	SingleOperationStatistics	read;
 	SingleOperationStatistics	result;
 	SingleOperationStatistics	total;

 	float						medianRate;
 	float						maxDiffTime;
 	float						maxDiff9DecileTime;
 	float						medianDiffTime;

 	float						maxRelDiffTime;
 	float						max9DecileRelDiffTime;
 	float						medianRelDiffTime;
 };

 struct UploadRenderReadStatistics
 {
 	SingleOperationStatistics	upload;
 	SingleOperationStatistics	render;
 	SingleOperationStatistics	read;
 	SingleOperationStatistics	result;
 	SingleOperationStatistics	total;

 	float						medianRate;
 	float						maxDiffTime;
 	float						maxDiff9DecileTime;
 	float						medianDiffTime;

 	float						maxRelDiffTime;
 	float						max9DecileRelDiffTime;
 	float						medianRelDiffTime;
 };

 struct RenderUploadRenderReadStatistics
 {
 	SingleOperationStatistics	firstRender;
 	SingleOperationStatistics	upload;
 	SingleOperationStatistics	secondRender;
 	SingleOperationStatistics	read;
 	SingleOperationStatistics	result;
 	SingleOperationStatistics	total;

 	float						medianRate;
 	float						maxDiffTime;
 	float						maxDiff9DecileTime;
 	float						medianDiffTime;

 	float						maxRelDiffTime;
 	float						max9DecileRelDiffTime;
 	float						medianRelDiffTime;
 };

 template <typename T>
 struct SampleTypeTraits
 {
 };

 template <>
 struct SampleTypeTraits<SingleOperationDuration>
 {
 	typedef SingleCallStatistics StatsType;

 	enum { HAS_MAP_STATS		= 0	};
 	enum { HAS_UNMAP_STATS		= 0	};
 	enum { HAS_WRITE_STATS		= 0	};
 	enum { HAS_FLUSH_STATS		= 0	};
 	enum { HAS_ALLOC_STATS		= 0	};
 	enum { LOG_CONTRIBUTIONS	= 0	};
 };

 template <>
 struct SampleTypeTraits<MapBufferRangeDuration>
 {
 	typedef MapCallStatistics StatsType;

 	enum { HAS_MAP_STATS		= 1	};
 	enum { HAS_UNMAP_STATS		= 1	};
 	enum { HAS_WRITE_STATS		= 1	};
 	enum { HAS_FLUSH_STATS		= 0	};
 	enum { HAS_ALLOC_STATS		= 1	};
 	enum { LOG_CONTRIBUTIONS	= 1	};
 };

 template <>
 struct SampleTypeTraits<MapBufferRangeDurationNoAlloc>
 {
 	typedef MapCallStatistics StatsType;

 	enum { HAS_MAP_STATS		= 1	};
 	enum { HAS_UNMAP_STATS		= 1	};
 	enum { HAS_WRITE_STATS		= 1	};
 	enum { HAS_FLUSH_STATS		= 0	};
 	enum { HAS_ALLOC_STATS		= 0	};
 	enum { LOG_CONTRIBUTIONS	= 1	};
 };

 template <>
 struct SampleTypeTraits<MapBufferRangeFlushDuration>
 {
 	typedef MapFlushCallStatistics StatsType;

 	enum { HAS_MAP_STATS		= 1	};
 	enum { HAS_UNMAP_STATS		= 1	};
 	enum { HAS_WRITE_STATS		= 1	};
 	enum { HAS_FLUSH_STATS		= 1	};
 	enum { HAS_ALLOC_STATS		= 1	};
 	enum { LOG_CONTRIBUTIONS	= 1	};
 };

 template <>
 struct SampleTypeTraits<MapBufferRangeFlushDurationNoAlloc>
 {
 	typedef MapFlushCallStatistics StatsType;

 	enum { HAS_MAP_STATS		= 1	};
 	enum { HAS_UNMAP_STATS		= 1	};
 	enum { HAS_WRITE_STATS		= 1	};
 	enum { HAS_FLUSH_STATS		= 1	};
 	enum { HAS_ALLOC_STATS		= 0	};
 	enum { LOG_CONTRIBUTIONS	= 1	};
 };

 template <>
 struct SampleTypeTraits<RenderReadDuration>
 {
 	typedef RenderReadStatistics StatsType;

 	enum { HAS_RENDER_STATS			= 1	};
 	enum { HAS_READ_STATS			= 1	};
 	enum { HAS_UPLOAD_STATS			= 0	};
 	enum { HAS_TOTAL_STATS			= 1	};
 	enum { HAS_FIRST_RENDER_STATS	= 0	};
 	enum { HAS_SECOND_RENDER_STATS	= 0	};

 	enum { LOG_CONTRIBUTIONS	= 1	};
 };

 template <>
 struct SampleTypeTraits<UnrelatedUploadRenderReadDuration>
 {
 	typedef RenderReadStatistics StatsType;

 	enum { HAS_RENDER_STATS			= 1	};
 	enum { HAS_READ_STATS			= 1	};
 	enum { HAS_UPLOAD_STATS			= 0	};
 	enum { HAS_TOTAL_STATS			= 1	};
 	enum { HAS_FIRST_RENDER_STATS	= 0	};
 	enum { HAS_SECOND_RENDER_STATS	= 0	};

 	enum { LOG_CONTRIBUTIONS	= 1	};
 };

 template <>
 struct SampleTypeTraits<UploadRenderReadDuration>
 {
 	typedef UploadRenderReadStatistics StatsType;

 	enum { HAS_RENDER_STATS			= 1	};
 	enum { HAS_READ_STATS			= 1	};
 	enum { HAS_UPLOAD_STATS			= 1	};
 	enum { HAS_TOTAL_STATS			= 1	};
 	enum { HAS_FIRST_RENDER_STATS	= 0	};
 	enum { HAS_SECOND_RENDER_STATS	= 0	};

 	enum { LOG_CONTRIBUTIONS			= 1	};
 	enum { LOG_UNRELATED_UPLOAD_SIZE	= 0 };
 };

 template <>
 struct SampleTypeTraits<UploadRenderReadDurationWithUnrelatedUploadSize>
 {
 	typedef UploadRenderReadStatistics StatsType;

 	enum { HAS_RENDER_STATS			= 1	};
 	enum { HAS_READ_STATS			= 1	};
 	enum { HAS_UPLOAD_STATS			= 1	};
 	enum { HAS_TOTAL_STATS			= 1	};
 	enum { HAS_FIRST_RENDER_STATS	= 0	};
 	enum { HAS_SECOND_RENDER_STATS	= 0	};

 	enum { LOG_CONTRIBUTIONS			= 1	};
 	enum { LOG_UNRELATED_UPLOAD_SIZE	= 1 };
 };

 template <>
 struct SampleTypeTraits<RenderUploadRenderReadDuration>
 {
 	typedef RenderUploadRenderReadStatistics StatsType;

 	enum { HAS_RENDER_STATS			= 0	};
 	enum { HAS_READ_STATS			= 1	};
 	enum { HAS_UPLOAD_STATS			= 1	};
 	enum { HAS_TOTAL_STATS			= 1	};
 	enum { HAS_FIRST_RENDER_STATS	= 1	};
 	enum { HAS_SECOND_RENDER_STATS	= 1	};

 	enum { LOG_CONTRIBUTIONS			= 1	};
 	enum { LOG_UNRELATED_UPLOAD_SIZE	= 1 };
 };

 struct UploadSampleAnalyzeResult
 {
 	float transferRateMedian;
 	float transferRateAtRange;
 	float transferRateAtInfinity;
 };

 struct RenderSampleAnalyzeResult
 {
 	float renderRateMedian;
 	float renderRateAtRange;
 	float renderRateAtInfinity;
 };

 class UnmapFailureError : public std::exception
 {
 public:
 	UnmapFailureError (void) : std::exception() {}
 };

 static std::string getHumanReadableByteSize (int numBytes)
 {
 	std::ostringstream buf;

 	if (numBytes < 1024)
 		buf << numBytes << " byte(s)";
 	else if (numBytes < 1024 * 1024)
 		buf << de::floatToString((float)numBytes/1024.0f, 1) << " KiB";
 	else
 		buf << de::floatToString((float)numBytes/1024.0f/1024.0f, 1) << " MiB";

 	return buf.str();
 }

 static deUint64 medianTimeMemcpy (void* dst, const void* src, int numBytes)
 {
 	// Time used by memcpy is assumed to be asymptotically linear

 	// With large numBytes, the probability of context switch or other random
 	// event is high. Apply memcpy in parts and report how much time would
 	// memcpy have used with the median transfer rate.

 	// Less than 1MiB, no need to do anything special
 	if (numBytes < 1048576)
 	{
 		deUint64 startTime;
 		deUint64 endTime;

 		deYield();

 		startTime = deGetMicroseconds();
 		deMemcpy(dst, src, numBytes);
 		endTime = deGetMicroseconds();

 		return endTime - startTime;
 	}
 	else
 	{
 		// Do memcpy in multiple parts

 		const int	numSections		= 5;
 		const int	sectionAlign	= 16;

 		int			sectionStarts[numSections+1];
 		int			sectionLens[numSections];
 		deUint64	sectionTimes[numSections];
 		deUint64	medianTime;
 		deUint64	bestTime		= 0;

 		for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
 			sectionStarts[sectionNdx] = deAlign32((numBytes * sectionNdx / numSections), sectionAlign);
 		sectionStarts[numSections] = numBytes;

 		for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
 			sectionLens[sectionNdx] = sectionStarts[sectionNdx+1] - sectionStarts[sectionNdx];

 		// Memcpy is usually called after mapbuffer range which may take
 		// a lot of time. To prevent power management from kicking in during
 		// copy, warm up more.
 		{
 			deYield();
 			tcu::warmupCPU();
 			deYield();
 		}

 		for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
 		{
 			deUint64 startTime;
 			deUint64 endTime;

 			startTime = deGetMicroseconds();
 			deMemcpy((deUint8*)dst + sectionStarts[sectionNdx], (const deUint8*)src + sectionStarts[sectionNdx], sectionLens[sectionNdx]);
 			endTime = deGetMicroseconds();

 			sectionTimes[sectionNdx] = endTime - startTime;

 			if (!bestTime || sectionTimes[sectionNdx] < bestTime)
 				bestTime = sectionTimes[sectionNdx];

 			// Detect if write takes 50% longer than it should, and warm up if that happened
 			if (sectionNdx != numSections-1 && (float)sectionTimes[sectionNdx] > 1.5f * (float)bestTime)
 			{
 				deYield();
 				tcu::warmupCPU();
 				deYield();
 			}
 		}

 		std::sort(sectionTimes, sectionTimes + numSections);

 		if ((numSections % 2) == 0)
 			medianTime = (sectionTimes[numSections / 2 - 1] + sectionTimes[numSections / 2]) / 2;
 		else
 			medianTime = sectionTimes[numSections / 2];

 		return medianTime*numSections;
 	}
 }

 static float dummyCalculation (float initial, int workSize)
 {
 	float	a = initial;
 	int		b = 123;

 	for (int ndx = 0; ndx < workSize; ++ndx)
 	{
 		a = deFloatCos(a + (float)b);
 		b = (b + 63) % 107 + de::abs((int)(a*10.0f));
 	}

 	return a + (float)b;
 }

 static void busyWait (int microseconds)
 {
 	const deUint64	maxSingleWaitTime	= 1000; // 1ms
 	const deUint64	endTime				= deGetMicroseconds() + microseconds;
 	float			dummy				= *tcu::warmupCPUInternal::g_dummy.m_v;
 	int				workSize			= 500;

 	// exponentially increase work, cap to 1ms
 	while (deGetMicroseconds() < endTime)
 	{
 		const deUint64	startTime		= deGetMicroseconds();
 		deUint64		totalTime;

 		dummy = dummyCalculation(dummy, workSize);

 		totalTime = deGetMicroseconds() - startTime;

 		if (totalTime >= maxSingleWaitTime)
 			break;
 		else
 			workSize *= 2;
 	}

 	// "wait"
 	while (deGetMicroseconds() < endTime)
 		dummy = dummyCalculation(dummy, workSize);

 	*tcu::warmupCPUInternal::g_dummy.m_v = dummy;
 }

 // Sample from given values using linear interpolation at a given position as if values were laid to range [0, 1]
 template <typename T>
 static float linearSample (const std::vector<T>& values, float position)
 {
 	DE_ASSERT(position >= 0.0f);
 	DE_ASSERT(position <= 1.0f);

 	const float	floatNdx			= (float)(values.size() - 1) * position;
 	const int	lowerNdx			= (int)deFloatFloor(floatNdx);
 	const int	higherNdx			= lowerNdx + 1;
 	const float	interpolationFactor = floatNdx - (float)lowerNdx;

 	DE_ASSERT(lowerNdx >= 0 && lowerNdx < (int)values.size());
 	DE_ASSERT(higherNdx >= 0 && higherNdx < (int)values.size());
 	DE_ASSERT(interpolationFactor >= 0 && interpolationFactor < 1.0f);

 	return tcu::mix((float)values[lowerNdx], (float)values[higherNdx], interpolationFactor);
 }

 template <typename T>
 SingleOperationStatistics calculateSingleOperationStatistics (const std::vector<T>& samples, deUint64 T::SampleType::*target)
 {
 	SingleOperationStatistics	stats;
 	std::vector<deUint64>		values(samples.size());

 	for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
 		values[ndx] = samples[ndx].duration.*target;

 	std::sort(values.begin(), values.end());

 	stats.minTime			= (float)values.front();
 	stats.maxTime			= (float)values.back();
 	stats.medianTime		= linearSample(values, 0.5f);
 	stats.min2DecileTime	= linearSample(values, 0.1f);
 	stats.max9DecileTime	= linearSample(values, 0.9f);

 	return stats;
 }

 template <typename StatisticsType, typename SampleType>
 void calculateBasicStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples, int SampleType::*predictor)
 {
 	std::vector<deUint64> values(samples.size());

 	for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
 		values[ndx] = samples[ndx].duration.fitResponseDuration;

 	// median rate
 	{
 		std::vector<float> processingRates(samples.size());

 		for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
 		{
 			const float timeInSeconds = (float)values[ndx] / 1000.0f / 1000.0f;
 			processingRates[ndx] = (float)(samples[ndx].*predictor) / timeInSeconds;
 		}

 		std::sort(processingRates.begin(), processingRates.end());

 		stats.medianRate = linearSample(processingRates, 0.5f);
 	}

 	// results compared to the approximation
 	{
 		std::vector<float> timeDiffs(samples.size());

 		for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
 		{
 			const float prediction	= (float)(samples[ndx].*predictor) * fit.coefficient + fit.offset;
 			const float actual		= (float)values[ndx];
 			timeDiffs[ndx] = actual - prediction;
 		}
 		std::sort(timeDiffs.begin(), timeDiffs.end());

 		stats.maxDiffTime			= timeDiffs.back();
 		stats.maxDiff9DecileTime	= linearSample(timeDiffs, 0.9f);
 		stats.medianDiffTime		= linearSample(timeDiffs, 0.5f);
 	}

 	// relative comparison to the approximation
 	{
 		std::vector<float> relativeDiffs(samples.size());

 		for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
 		{
 			const float prediction	= (float)(samples[ndx].*predictor) * fit.coefficient + fit.offset;
 			const float actual		= (float)values[ndx];

 			// Ignore cases where we predict negative times, or if
 			// ratio would be (nearly) infinite: ignore if predicted
 			// time is less than 1 microsecond
 			if (prediction < 1.0f)
 				relativeDiffs[ndx] = 0.0f;
 			else
 				relativeDiffs[ndx] = (actual - prediction) / prediction;
 		}
 		std::sort(relativeDiffs.begin(), relativeDiffs.end());

 		stats.maxRelDiffTime		= relativeDiffs.back();
 		stats.max9DecileRelDiffTime	= linearSample(relativeDiffs, 0.9f);
 		stats.medianRelDiffTime		= linearSample(relativeDiffs, 0.5f);
 	}

 	// values calculated using sorted timings

 	std::sort(values.begin(), values.end());

 	stats.result.minTime = (float)values.front();
 	stats.result.maxTime = (float)values.back();
 	stats.result.medianTime = linearSample(values, 0.5f);
 	stats.result.min2DecileTime = linearSample(values, 0.1f);
 	stats.result.max9DecileTime = linearSample(values, 0.9f);
 }

 template <typename StatisticsType, typename SampleType>
 void calculateBasicTransferStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples)
 {
 	calculateBasicStatistics(stats, fit, samples, &SampleType::writtenSize);
 }

 template <typename StatisticsType, typename SampleType>
 void calculateBasicRenderStatistics (StatisticsType& stats, const LineParametersWithConfidence& fit, const std::vector<SampleType>& samples)
 {
 	calculateBasicStatistics(stats, fit, samples, &SampleType::renderDataSize);
 }

 static SingleCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<SingleOperationDuration> >& samples)
 {
 	SingleCallStatistics stats;

 	calculateBasicTransferStatistics(stats, fit, samples);

 	return stats;
 }

 static MapCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeDuration> >& samples)
 {
 	MapCallStatistics stats;

 	calculateBasicTransferStatistics(stats, fit, samples);

 	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::mapDuration);
 	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::unmapDuration);
 	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::writeDuration);
 	stats.alloc	= calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::allocDuration);

 	return stats;
 }

 static MapFlushCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeFlushDuration> >& samples)
 {
 	MapFlushCallStatistics stats;

 	calculateBasicTransferStatistics(stats, fit, samples);

 	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::mapDuration);
 	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::unmapDuration);
 	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::writeDuration);
 	stats.flush	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::flushDuration);
 	stats.alloc	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::allocDuration);

 	return stats;
 }

 static MapCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc> >& samples)
 {
 	MapCallStatistics stats;

 	calculateBasicTransferStatistics(stats, fit, samples);

 	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::mapDuration);
 	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::unmapDuration);
 	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::writeDuration);

 	return stats;
 }

 static MapFlushCallStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> >& samples)
 {
 	MapFlushCallStatistics stats;

 	calculateBasicTransferStatistics(stats, fit, samples);

 	stats.map	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::mapDuration);
 	stats.unmap	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::unmapDuration);
 	stats.write	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::writeDuration);
 	stats.flush	= calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::flushDuration);

 	return stats;
 }

 static RenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<RenderReadDuration> >& samples)
 {
 	RenderReadStatistics stats;

 	calculateBasicRenderStatistics(stats, fit, samples);

 	stats.render	= calculateSingleOperationStatistics(samples, &RenderReadDuration::renderDuration);
 	stats.read		= calculateSingleOperationStatistics(samples, &RenderReadDuration::readDuration);
 	stats.total		= calculateSingleOperationStatistics(samples, &RenderReadDuration::totalDuration);

 	return stats;
 }

 static RenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration> >& samples)
 {
 	RenderReadStatistics stats;

 	calculateBasicRenderStatistics(stats, fit, samples);

 	stats.render	= calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::renderDuration);
 	stats.read		= calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::readDuration);
 	stats.total		= calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::totalDuration);

 	return stats;
 }

 static UploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UploadRenderReadDuration> >& samples)
 {
 	UploadRenderReadStatistics stats;

 	calculateBasicRenderStatistics(stats, fit, samples);

 	stats.upload	= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::uploadDuration);
 	stats.render	= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::renderDuration);
 	stats.read		= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::readDuration);
 	stats.total		= calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::totalDuration);

 	return stats;
 }

 static UploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize> >& samples)
 {
 	UploadRenderReadStatistics stats;

 	calculateBasicRenderStatistics(stats, fit, samples);

 	stats.upload	= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::uploadDuration);
 	stats.render	= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::renderDuration);
 	stats.read		= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::readDuration);
 	stats.total		= calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::totalDuration);

 	return stats;
 }

 static RenderUploadRenderReadStatistics calculateSampleStatistics (const LineParametersWithConfidence& fit, const std::vector<RenderSampleResult<RenderUploadRenderReadDuration> >& samples)
 {
 	RenderUploadRenderReadStatistics stats;

 	calculateBasicRenderStatistics(stats, fit, samples);

 	stats.firstRender	= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::firstRenderDuration);
 	stats.upload		= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::uploadDuration);
 	stats.secondRender	= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::secondRenderDuration);
 	stats.read			= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::readDuration);
 	stats.total			= calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::totalDuration);

 	return stats;
 }

 template <typename DurationType>
 static LineParametersWithConfidence fitLineToSamples (const std::vector<UploadSampleResult<DurationType> >& samples, int beginNdx, int endNdx, int step, deUint64 DurationType::*target = &DurationType::fitResponseDuration)
 {
 	std::vector<tcu::Vec2> samplePoints;

 	for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
 	{
 		tcu::Vec2 point;

 		point.x() = (float)(samples[sampleNdx].writtenSize);
 		point.y() = (float)(samples[sampleNdx].duration.*target);

 		samplePoints.push_back(point);
 	}

 	return theilSenSiegelLinearRegression(samplePoints, 0.6f);
 }

 template <typename DurationType>
 static LineParametersWithConfidence fitLineToSamples (const std::vector<RenderSampleResult<DurationType> >& samples, int beginNdx, int endNdx, int step, deUint64 DurationType::*target = &DurationType::fitResponseDuration)
 {
 	std::vector<tcu::Vec2> samplePoints;

 	for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
 	{
 		tcu::Vec2 point;

 		point.x() = (float)(samples[sampleNdx].renderDataSize);
 		point.y() = (float)(samples[sampleNdx].duration.*target);

 		samplePoints.push_back(point);
 	}

 	return theilSenSiegelLinearRegression(samplePoints, 0.6f);
 }

 template <typename T>
 static LineParametersWithConfidence fitLineToSamples (const std::vector<T>& samples, int beginNdx, int endNdx, deUint64 T::SampleType::*target = &T::SampleType::fitResponseDuration)
 {
 	return fitLineToSamples(samples, beginNdx, endNdx, 1, target);
 }

 template <typename T>
 static LineParametersWithConfidence fitLineToSamples (const std::vector<T>& samples, deUint64 T::SampleType::*target = &T::SampleType::fitResponseDuration)
 {
 	return fitLineToSamples(samples, 0, (int)samples.size(), target);
 }

 static float getAreaBetweenLines (float xmin, float xmax, float lineAOffset, float lineACoefficient, float lineBOffset, float lineBCoefficient)
 {
 	const float lineAMin		= lineAOffset + lineACoefficient * xmin;
 	const float lineAMax		= lineAOffset + lineACoefficient * xmax;
 	const float lineBMin		= lineBOffset + lineBCoefficient * xmin;
 	const float lineBMax		= lineBOffset + lineBCoefficient * xmax;
 	const bool	aOverBAtBegin	= (lineAMin > lineBMin);
 	const bool	aOverBAtEnd		= (lineAMax > lineBMax);

 	if (aOverBAtBegin == aOverBAtEnd)
 	{
 		// lines do not intersect

 		const float midpoint	= (xmin + xmax) / 2.0f;
 		const float width		= (xmax - xmin);

 		const float lineAHeight	= lineAOffset + lineACoefficient * midpoint;
 		const float lineBHeight	= lineBOffset + lineBCoefficient * midpoint;

 		return width * de::abs(lineAHeight - lineBHeight);
 	}
 	else
 	{

 		// lines intersect

 		const float approachCoeffient	= de::abs(lineACoefficient - lineBCoefficient);
 		const float epsilon				= 0.0001f;
 		const float leftHeight			= de::abs(lineAMin - lineBMin);
 		const float rightHeight			= de::abs(lineAMax - lineBMax);

 		if (approachCoeffient < epsilon)
 			return 0.0f;

 		return (0.5f * leftHeight * (leftHeight / approachCoeffient)) + (0.5f * rightHeight * (rightHeight / approachCoeffient));
 	}
 }

 template <typename T>
 static float calculateSampleFitLinearity (const std::vector<T>& samples, int T::*predictor)
 {
 	// Compare the fitted line of first half of the samples to the fitted line of
 	// the second half of the samples. Calculate a AABB that fully contains every
 	// sample's x component and both fit lines in this range. Calculate the ratio
 	// of the area between the lines and the AABB.

 	const float				epsilon				= 1.e-6f;
 	const int				midPoint			= (int)samples.size() / 2;
 	const LineParametersWithConfidence	startApproximation	= fitLineToSamples(samples, 0, midPoint, &T::SampleType::fitResponseDuration);
 	const LineParametersWithConfidence	endApproximation	= fitLineToSamples(samples, midPoint, (int)samples.size(), &T::SampleType::fitResponseDuration);

 	const float				aabbMinX			= (float)(samples.front().*predictor);
 	const float				aabbMinY			= de::min(startApproximation.offset + startApproximation.coefficient*aabbMinX, endApproximation.offset + endApproximation.coefficient*aabbMinX);
 	const float				aabbMaxX			= (float)(samples.back().*predictor);
 	const float				aabbMaxY			= de::max(startApproximation.offset + startApproximation.coefficient*aabbMaxX, endApproximation.offset + endApproximation.coefficient*aabbMaxX);

 	const float				aabbArea			= (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
 	const float				areaBetweenLines	= getAreaBetweenLines(aabbMinX, aabbMaxX, startApproximation.offset, startApproximation.coefficient, endApproximation.offset, endApproximation.coefficient);
 	const float				errorAreaRatio		= (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);

 	return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
 }

 template <typename DurationType>
 static float calculateSampleFitLinearity (const std::vector<UploadSampleResult<DurationType> >& samples)
 {
 	return calculateSampleFitLinearity(samples, &UploadSampleResult<DurationType>::writtenSize);
 }

 template <typename DurationType>
 static float calculateSampleFitLinearity (const std::vector<RenderSampleResult<DurationType> >& samples)
 {
 	return calculateSampleFitLinearity(samples, &RenderSampleResult<DurationType>::renderDataSize);
 }

 template <typename T>
 static float calculateSampleTemporalStability (const std::vector<T>& samples, int T::*predictor)
 {
 	// Samples are sampled in the following order: 1) even samples (in random order) 2) odd samples (in random order)
 	// Compare the fitted line of even samples to the fitted line of the odd samples. Calculate a AABB that fully
 	// contains every sample's x component and both fit lines in this range. Calculate the ratio of the area between
 	// the lines and the AABB.

 	const float				epsilon				= 1.e-6f;
 	const LineParametersWithConfidence	evenApproximation	= fitLineToSamples(samples, 0, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
 	const LineParametersWithConfidence	oddApproximation	= fitLineToSamples(samples, 1, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);

 	const float				aabbMinX			= (float)(samples.front().*predictor);
 	const float				aabbMinY			= de::min(evenApproximation.offset + evenApproximation.coefficient*aabbMinX, oddApproximation.offset + oddApproximation.coefficient*aabbMinX);
 	const float				aabbMaxX			= (float)(samples.back().*predictor);
 	const float				aabbMaxY			= de::max(evenApproximation.offset + evenApproximation.coefficient*aabbMaxX, oddApproximation.offset + oddApproximation.coefficient*aabbMaxX);

 	const float				aabbArea			= (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
 	const float				areaBetweenLines	= getAreaBetweenLines(aabbMinX, aabbMaxX, evenApproximation.offset, evenApproximation.coefficient, oddApproximation.offset, oddApproximation.coefficient);
 	const float				errorAreaRatio		= (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);

 	return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
 }

 template <typename DurationType>
 static float calculateSampleTemporalStability (const std::vector<UploadSampleResult<DurationType> >& samples)
 {
 	return calculateSampleTemporalStability(samples, &UploadSampleResult<DurationType>::writtenSize);
 }

 template <typename DurationType>
 static float calculateSampleTemporalStability (const std::vector<RenderSampleResult<DurationType> >& samples)
 {
 	return calculateSampleTemporalStability(samples, &RenderSampleResult<DurationType>::renderDataSize);
 }

 template <typename DurationType>
 static void bucketizeSamplesUniformly (const std::vector<UploadSampleResult<DurationType> >& samples, std::vector<UploadSampleResult<DurationType> >* buckets, int numBuckets, int& minBufferSize, int& maxBufferSize)
 {
 	minBufferSize = 0;
 	maxBufferSize = 0;

 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
 	{
 		DE_ASSERT(samples[sampleNdx].allocatedSize != 0);

 		if (!minBufferSize || samples[sampleNdx].allocatedSize < minBufferSize)
 			minBufferSize = samples[sampleNdx].allocatedSize;
 		if (!maxBufferSize || samples[sampleNdx].allocatedSize > maxBufferSize)
 			maxBufferSize = samples[sampleNdx].allocatedSize;
 	}

 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
 	{
 		const float bucketNdxFloat	= (float)(samples[sampleNdx].allocatedSize - minBufferSize) / (float)(maxBufferSize - minBufferSize) * (float)numBuckets;
 		const int bucketNdx			= de::clamp((int)deFloatFloor(bucketNdxFloat), 0, numBuckets-1);

 		buckets[bucketNdx].push_back(samples[sampleNdx]);
 	}
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapRangeStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	log	<< tcu::TestLog::Float("MapRangeMin", "MapRange: Min time", "us", QP_KEY_TAG_TIME, stats.map.minTime)
 		<< tcu::TestLog::Float("MapRangeMax", "MapRange: Max time", "us", QP_KEY_TAG_TIME, stats.map.maxTime)
 		<< tcu::TestLog::Float("MapRangeMin90", "MapRange: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.map.min2DecileTime)
 		<< tcu::TestLog::Float("MapRangeMax90", "MapRange: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.map.max9DecileTime)
 		<< tcu::TestLog::Float("MapRangeMedian", "MapRange: Median time", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	log	<< tcu::TestLog::Float("UnmapMin", "Unmap: Min time", "us", QP_KEY_TAG_TIME, stats.unmap.minTime)
 		<< tcu::TestLog::Float("UnmapMax", "Unmap: Max time", "us", QP_KEY_TAG_TIME, stats.unmap.maxTime)
 		<< tcu::TestLog::Float("UnmapMin90", "Unmap: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.unmap.min2DecileTime)
 		<< tcu::TestLog::Float("UnmapMax90", "Unmap: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.unmap.max9DecileTime)
 		<< tcu::TestLog::Float("UnmapMedian", "Unmap: Median time", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	log	<< tcu::TestLog::Float("WriteMin", "Write: Min time", "us", QP_KEY_TAG_TIME, stats.write.minTime)
 		<< tcu::TestLog::Float("WriteMax", "Write: Max time", "us", QP_KEY_TAG_TIME, stats.write.maxTime)
 		<< tcu::TestLog::Float("WriteMin90", "Write: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.write.min2DecileTime)
 		<< tcu::TestLog::Float("WriteMax90", "Write: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.write.max9DecileTime)
 		<< tcu::TestLog::Float("WriteMedian", "Write: Median time", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	log	<< tcu::TestLog::Float("FlushMin", "Flush: Min time", "us", QP_KEY_TAG_TIME, stats.flush.minTime)
 		<< tcu::TestLog::Float("FlushMax", "Flush: Max time", "us", QP_KEY_TAG_TIME, stats.flush.maxTime)
 		<< tcu::TestLog::Float("FlushMin90", "Flush: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.flush.min2DecileTime)
 		<< tcu::TestLog::Float("FlushMax90", "Flush: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.flush.max9DecileTime)
 		<< tcu::TestLog::Float("FlushMedian", "Flush: Median time", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	log	<< tcu::TestLog::Float("AllocMin", "Alloc: Min time", "us", QP_KEY_TAG_TIME, stats.alloc.minTime)
 		<< tcu::TestLog::Float("AllocMax", "Alloc: Max time", "us", QP_KEY_TAG_TIME, stats.alloc.maxTime)
 		<< tcu::TestLog::Float("AllocMin90", "Alloc: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.alloc.min2DecileTime)
 		<< tcu::TestLog::Float("AllocMax90", "Alloc: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.alloc.max9DecileTime)
 		<< tcu::TestLog::Float("AllocMedian", "Alloc: Median time", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Value>::Type logMapRangeStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(stats);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Value>::Type logUnmapStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(stats);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Value>::Type logWriteStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(stats);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Value>::Type logFlushStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(stats);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Value>::Type logAllocStats (tcu::TestLog& log, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(stats);
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::mapDuration);
 	log	<< tcu::TestLog::Float("MapConstantCost", "Map: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
 		<< tcu::TestLog::Float("MapLinearCost", "Map: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
 		<< tcu::TestLog::Float("MapMedianCost", "Map: Median cost", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::unmapDuration);
 	log	<< tcu::TestLog::Float("UnmapConstantCost", "Unmap: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
 		<< tcu::TestLog::Float("UnmapLinearCost", "Unmap: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
 		<< tcu::TestLog::Float("UnmapMedianCost", "Unmap: Median cost", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::writeDuration);
 	log	<< tcu::TestLog::Float("WriteConstantCost", "Write: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
 		<< tcu::TestLog::Float("WriteLinearCost", "Write: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
 		<< tcu::TestLog::Float("WriteMedianCost", "Write: Median cost", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::flushDuration);
 	log	<< tcu::TestLog::Float("FlushConstantCost", "Flush: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
 		<< tcu::TestLog::Float("FlushLinearCost", "Flush: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
 		<< tcu::TestLog::Float("FlushMedianCost", "Flush: Median cost", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::allocDuration);
 	log	<< tcu::TestLog::Float("AllocConstantCost", "Alloc: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
 		<< tcu::TestLog::Float("AllocLinearCost", "Alloc: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
 		<< tcu::TestLog::Float("AllocMedianCost", "Alloc: Median cost", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Type logRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::renderDuration);
 	log	<< tcu::TestLog::Float("DrawCallConstantCost", "DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
 		<< tcu::TestLog::Float("DrawCallLinearCost", "DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
 		<< tcu::TestLog::Float("DrawCallMedianCost", "DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.render.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_READ_STATS>::Type logReadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::readDuration);
 	log	<< tcu::TestLog::Float("ReadConstantCost", "Read: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
 		<< tcu::TestLog::Float("ReadLinearCost", "Read: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
 		<< tcu::TestLog::Float("ReadMedianCost", "Read: Median cost", "us", QP_KEY_TAG_TIME, stats.read.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Type logUploadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::uploadDuration);
 	log	<< tcu::TestLog::Float("UploadConstantCost", "Upload: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
 		<< tcu::TestLog::Float("UploadLinearCost", "Upload: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
 		<< tcu::TestLog::Float("UploadMedianCost", "Upload: Median cost", "us", QP_KEY_TAG_TIME, stats.upload.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Type logTotalContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::totalDuration);
 	log	<< tcu::TestLog::Float("TotalConstantCost", "Total: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
 		<< tcu::TestLog::Float("TotalLinearCost", "Total: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
 		<< tcu::TestLog::Float("TotalMedianCost", "Total: Median cost", "us", QP_KEY_TAG_TIME, stats.total.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Type logFirstRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::firstRenderDuration);
 	log	<< tcu::TestLog::Float("FirstDrawCallConstantCost", "First DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
 		<< tcu::TestLog::Float("FirstDrawCallLinearCost", "First DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
 		<< tcu::TestLog::Float("FirstDrawCallMedianCost", "First DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.firstRender.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Type logSecondRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::secondRenderDuration);
 	log	<< tcu::TestLog::Float("SecondDrawCallConstantCost", "Second DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME, contributionFitting.offset)
 		<< tcu::TestLog::Float("SecondDrawCallLinearCost", "Second DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
 		<< tcu::TestLog::Float("SecondDrawCallMedianCost", "Second DrawCall: Median cost", "us", QP_KEY_TAG_TIME, stats.secondRender.medianTime);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Value>::Type logMapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(samples);
 	DE_UNREF(stats);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Value>::Type logUnmapContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(samples);
 	DE_UNREF(stats);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Value>::Type logWriteContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(samples);
 	DE_UNREF(stats);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Value>::Type logFlushContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(samples);
 	DE_UNREF(stats);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Value>::Type logAllocContribution (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(samples);
 	DE_UNREF(stats);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Value>::Type logRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(samples);
 	DE_UNREF(stats);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_READ_STATS>::Value>::Type logReadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(samples);
 	DE_UNREF(stats);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Value>::Type logUploadContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(samples);
 	DE_UNREF(stats);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Value>::Type logTotalContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(samples);
 	DE_UNREF(stats);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Value>::Type logFirstRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(samples);
 	DE_UNREF(stats);
 }

 template <typename SampleType>
 static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Value>::Type logSecondRenderContribution (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples, const typename SampleTypeTraits<SampleType>::StatsType& stats)
 {
 	DE_UNREF(log);
 	DE_UNREF(samples);
 	DE_UNREF(stats);
 }

 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<SingleOperationDuration> >& samples)
 {
 	log << tcu::TestLog::SampleList("Samples", "Samples")
 		<< tcu::TestLog::SampleInfo
 		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("UploadTime",		"Upload time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::EndSampleInfo;

 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
 	{
 		const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
 		log	<< tcu::TestLog::Sample
 			<< samples[sampleNdx].writtenSize
 			<< samples[sampleNdx].bufferSize
 			<< (int)samples[sampleNdx].duration.totalDuration
 			<< fitResidual
 			<< tcu::TestLog::EndSample;
 	}

 	log << tcu::TestLog::EndSampleList;
 }

 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeDuration> >& samples)
 {
 	log << tcu::TestLog::SampleList("Samples", "Samples")
 		<< tcu::TestLog::SampleInfo
 		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("AllocTime",			"Alloc time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::EndSampleInfo;

 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
 	{
 		const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
 		log	<< tcu::TestLog::Sample
 			<< samples[sampleNdx].writtenSize
 			<< samples[sampleNdx].bufferSize
 			<< (int)samples[sampleNdx].duration.totalDuration
 			<< (int)samples[sampleNdx].duration.allocDuration
 			<< (int)samples[sampleNdx].duration.mapDuration
 			<< (int)samples[sampleNdx].duration.unmapDuration
 			<< (int)samples[sampleNdx].duration.writeDuration
 			<< fitResidual
 			<< tcu::TestLog::EndSample;
 	}

 	log << tcu::TestLog::EndSampleList;
 }

 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc> >& samples)
 {
 	log << tcu::TestLog::SampleList("Samples", "Samples")
 		<< tcu::TestLog::SampleInfo
 		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::EndSampleInfo;

 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
 	{
 		const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
 		log	<< tcu::TestLog::Sample
 			<< samples[sampleNdx].writtenSize
 			<< samples[sampleNdx].bufferSize
 			<< (int)samples[sampleNdx].duration.totalDuration
 			<< (int)samples[sampleNdx].duration.mapDuration
 			<< (int)samples[sampleNdx].duration.unmapDuration
 			<< (int)samples[sampleNdx].duration.writeDuration
 			<< fitResidual
 			<< tcu::TestLog::EndSample;
 	}

 	log << tcu::TestLog::EndSampleList;
 }

 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeFlushDuration> >& samples)
 {
 	log << tcu::TestLog::SampleList("Samples", "Samples")
 		<< tcu::TestLog::SampleInfo
 		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("AllocTime",			"Alloc time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("FlushTime",			"Flush time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::EndSampleInfo;

 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
 	{
 		const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
 		log	<< tcu::TestLog::Sample
 			<< samples[sampleNdx].writtenSize
 			<< samples[sampleNdx].bufferSize
 			<< (int)samples[sampleNdx].duration.totalDuration
 			<< (int)samples[sampleNdx].duration.allocDuration
 			<< (int)samples[sampleNdx].duration.mapDuration
 			<< (int)samples[sampleNdx].duration.unmapDuration
 			<< (int)samples[sampleNdx].duration.writeDuration
 			<< (int)samples[sampleNdx].duration.flushDuration
 			<< fitResidual
 			<< tcu::TestLog::EndSample;
 	}

 	log << tcu::TestLog::EndSampleList;
 }

 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> >& samples)
 {
 	log << tcu::TestLog::SampleList("Samples", "Samples")
 		<< tcu::TestLog::SampleInfo
 		<< tcu::TestLog::ValueInfo("WrittenSize",		"Written size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("BufferSize",		"Buffer size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("MapTime",			"Map time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("UnmapTime",			"Unmap time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("WriteTime",			"Write time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("FlushTime",			"Flush time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::EndSampleInfo;

 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
 	{
 		const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
 		log	<< tcu::TestLog::Sample
 			<< samples[sampleNdx].writtenSize
 			<< samples[sampleNdx].bufferSize
 			<< (int)samples[sampleNdx].duration.totalDuration
 			<< (int)samples[sampleNdx].duration.mapDuration
 			<< (int)samples[sampleNdx].duration.unmapDuration
 			<< (int)samples[sampleNdx].duration.writeDuration
 			<< (int)samples[sampleNdx].duration.flushDuration
 			<< fitResidual
 			<< tcu::TestLog::EndSample;
 	}

 	log << tcu::TestLog::EndSampleList;
 }

 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<RenderReadDuration> >& samples)
 {
 	log << tcu::TestLog::SampleList("Samples", "Samples")
 		<< tcu::TestLog::SampleInfo
 		<< tcu::TestLog::ValueInfo("DataSize",			"Data processed",		"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("VertexCount",		"Number of vertices",	"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("DrawCallTime",		"Draw call time",		"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("ReadTime",			"ReadPixels time",		"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::EndSampleInfo;

 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
 	{
 		const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
 		log	<< tcu::TestLog::Sample
 			<< samples[sampleNdx].renderDataSize
 			<< samples[sampleNdx].numVertices
 			<< (int)samples[sampleNdx].duration.renderReadDuration
 			<< (int)samples[sampleNdx].duration.renderDuration
 			<< (int)samples[sampleNdx].duration.readDuration
 			<< fitResidual
 			<< tcu::TestLog::EndSample;
 	}

 	log << tcu::TestLog::EndSampleList;
 }

 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration> >& samples)
 {
 	log << tcu::TestLog::SampleList("Samples", "Samples")
 		<< tcu::TestLog::SampleInfo
 		<< tcu::TestLog::ValueInfo("DataSize",				"Data processed",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("VertexCount",			"Number of vertices",		"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("UnrelatedUploadSize",	"Unrelated upload size",	"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("TotalTime",				"Total time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("DrawCallTime",			"Draw call time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("ReadTime",				"ReadPixels time",			"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("FitResidual",			"Fit residual",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::EndSampleInfo;

 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
 	{
 		const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
 		log	<< tcu::TestLog::Sample
 			<< samples[sampleNdx].renderDataSize
 			<< samples[sampleNdx].numVertices
 			<< samples[sampleNdx].unrelatedDataSize
 			<< (int)samples[sampleNdx].duration.renderReadDuration
 			<< (int)samples[sampleNdx].duration.renderDuration
 			<< (int)samples[sampleNdx].duration.readDuration
 			<< fitResidual
 			<< tcu::TestLog::EndSample;
 	}

 	log << tcu::TestLog::EndSampleList;
 }

 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UploadRenderReadDuration> >& samples)
 {
 	log << tcu::TestLog::SampleList("Samples", "Samples")
 		<< tcu::TestLog::SampleInfo
 		<< tcu::TestLog::ValueInfo("DataSize",			"Data processed",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("UploadSize",		"Data uploaded",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("VertexCount",		"Number of vertices",				"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("DrawReadTime",		"Draw call and ReadPixels time",	"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("TotalTime",			"Total time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("Upload time",		"Upload time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("DrawCallTime",		"Draw call time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("ReadTime",			"ReadPixels time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("FitResidual",		"Fit residual",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::EndSampleInfo;

 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
 	{
 		const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
 		log	<< tcu::TestLog::Sample
 			<< samples[sampleNdx].renderDataSize
 			<< samples[sampleNdx].uploadedDataSize
 			<< samples[sampleNdx].numVertices
 			<< (int)samples[sampleNdx].duration.renderReadDuration
 			<< (int)samples[sampleNdx].duration.totalDuration
 			<< (int)samples[sampleNdx].duration.uploadDuration
 			<< (int)samples[sampleNdx].duration.renderDuration
 			<< (int)samples[sampleNdx].duration.readDuration
 			<< fitResidual
 			<< tcu::TestLog::EndSample;
 	}

 	log << tcu::TestLog::EndSampleList;
 }

 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize> >& samples)
 {
 	log << tcu::TestLog::SampleList("Samples", "Samples")
 		<< tcu::TestLog::SampleInfo
 		<< tcu::TestLog::ValueInfo("DataSize",				"Data processed",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("UploadSize",			"Data uploaded",					"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("VertexCount",			"Number of vertices",				"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("UnrelatedUploadSize",	"Unrelated upload size",			"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("DrawReadTime",			"Draw call and ReadPixels time",	"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("TotalTime",				"Total time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("Upload time",			"Upload time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("DrawCallTime",			"Draw call time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("ReadTime",				"ReadPixels time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("FitResidual",			"Fit residual",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::EndSampleInfo;

 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
 	{
 		const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
 		log	<< tcu::TestLog::Sample
 			<< samples[sampleNdx].renderDataSize
 			<< samples[sampleNdx].uploadedDataSize
 			<< samples[sampleNdx].numVertices
 			<< samples[sampleNdx].unrelatedDataSize
 			<< (int)samples[sampleNdx].duration.renderReadDuration
 			<< (int)samples[sampleNdx].duration.totalDuration
 			<< (int)samples[sampleNdx].duration.uploadDuration
 			<< (int)samples[sampleNdx].duration.renderDuration
 			<< (int)samples[sampleNdx].duration.readDuration
 			<< fitResidual
 			<< tcu::TestLog::EndSample;
 	}

 	log << tcu::TestLog::EndSampleList;
 }

 void logSampleList (tcu::TestLog& log, const LineParametersWithConfidence& theilSenFitting, const std::vector<RenderSampleResult<RenderUploadRenderReadDuration> >& samples)
 {
 	log << tcu::TestLog::SampleList("Samples", "Samples")
 		<< tcu::TestLog::SampleInfo
 		<< tcu::TestLog::ValueInfo("DataSize",				"Data processed",						"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("UploadSize",			"Data uploaded",						"bytes",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("VertexCount",			"Number of vertices",					"vertices",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("DrawReadTime",			"Second draw call and ReadPixels time",	"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("TotalTime",				"Total time",							"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("FirstDrawCallTime",		"First draw call time",					"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("Upload time",			"Upload time",							"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("SecondDrawCallTime",	"Second draw call time",				"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("ReadTime",				"ReadPixels time",						"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("FitResidual",			"Fit residual",							"us",		QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::EndSampleInfo;

 	for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
 	{
 		const float fitResidual = (float)samples[sampleNdx].duration.fitResponseDuration - (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
 		log	<< tcu::TestLog::Sample
 			<< samples[sampleNdx].renderDataSize
 			<< samples[sampleNdx].uploadedDataSize
 			<< samples[sampleNdx].numVertices
 			<< (int)samples[sampleNdx].duration.renderReadDuration
 			<< (int)samples[sampleNdx].duration.totalDuration
 			<< (int)samples[sampleNdx].duration.firstRenderDuration
 			<< (int)samples[sampleNdx].duration.uploadDuration
 			<< (int)samples[sampleNdx].duration.secondRenderDuration
 			<< (int)samples[sampleNdx].duration.readDuration
 			<< fitResidual
 			<< tcu::TestLog::EndSample;
 	}

 	log << tcu::TestLog::EndSampleList;
 }

 template <typename SampleType>
 static UploadSampleAnalyzeResult analyzeSampleResults (tcu::TestLog& log, const std::vector<UploadSampleResult<SampleType> >& samples, bool logBucketPerformance)
 {
 	// Assume data is linear with some outliers, fit a line
 	const LineParametersWithConfidence						theilSenFitting						= fitLineToSamples(samples);
 	const typename SampleTypeTraits<SampleType>::StatsType	resultStats							= calculateSampleStatistics(theilSenFitting, samples);
 	float													approximatedTransferRate;
 	float													approximatedTransferRateNoConstant;

 	// Output raw samples
 	{
 		const tcu::ScopedLogSection	section(log, "Samples", "Samples");
 		logSampleList(log, theilSenFitting, samples);
 	}

 	// Calculate results for different ranges
 	if (logBucketPerformance)
 	{
 		const int										numBuckets				= 4;
 		int												minBufferSize			= 0;
 		int												maxBufferSize			= 0;
 		std::vector<UploadSampleResult<SampleType> >	buckets[numBuckets];

 		bucketizeSamplesUniformly(samples, &buckets[0], numBuckets, minBufferSize, maxBufferSize);

 		for (int bucketNdx = 0; bucketNdx < numBuckets; ++bucketNdx)
 		{
 			if (buckets[bucketNdx].empty())
 				continue;

 			// Print a nice result summary

 			const int												bucketRangeMin	= minBufferSize + (int)(((float) bucketNdx    / (float)numBuckets) * (float)(maxBufferSize - minBufferSize));
 			const int												bucketRangeMax	= minBufferSize + (int)(((float)(bucketNdx+1) / (float)numBuckets) * (float)(maxBufferSize - minBufferSize));
 			const typename SampleTypeTraits<SampleType>::StatsType	stats			= calculateSampleStatistics(theilSenFitting, buckets[bucketNdx]);
 			const tcu::ScopedLogSection								section			(log, "BufferSizeRange", std::string("Transfer performance with buffer size in range [").append(getHumanReadableByteSize(bucketRangeMin).append(", ").append(getHumanReadableByteSize(bucketRangeMax).append("]"))));

 			logMapRangeStats<SampleType>(log, stats);
 			logUnmapStats<SampleType>(log, stats);
 			logWriteStats<SampleType>(log, stats);
 			logFlushStats<SampleType>(log, stats);
 			logAllocStats<SampleType>(log, stats);

 			log	<< tcu::TestLog::Float("Min", "Total: Min time", "us", QP_KEY_TAG_TIME, stats.result.minTime)
 				<< tcu::TestLog::Float("Max", "Total: Max time", "us", QP_KEY_TAG_TIME, stats.result.maxTime)
 				<< tcu::TestLog::Float("Min90", "Total: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.result.min2DecileTime)
 				<< tcu::TestLog::Float("Max90", "Total: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.result.max9DecileTime)
 				<< tcu::TestLog::Float("Median", "Total: Median time", "us", QP_KEY_TAG_TIME, stats.result.medianTime)
 				<< tcu::TestLog::Float("MedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, stats.medianRate / 1024.0f / 1024.0f)
 				<< tcu::TestLog::Float("MaxDiff", "Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiffTime)
 				<< tcu::TestLog::Float("Max90Diff", "90%-Max difference to approximated", "us", QP_KEY_TAG_TIME, stats.maxDiff9DecileTime)
 				<< tcu::TestLog::Float("MedianDiff", "Median difference to approximated", "us", QP_KEY_TAG_TIME, stats.medianDiffTime)
 				<< tcu::TestLog::Float("MaxRelDiff", "Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.maxRelDiffTime * 100.0f)
 				<< tcu::TestLog::Float("Max90RelDiff", "90%-Max relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.max9DecileRelDiffTime * 100.0f)
 				<< tcu::TestLog::Float("MedianRelDiff", "Median relative difference to approximated", "%", QP_KEY_TAG_NONE, stats.medianRelDiffTime * 100.0f);
 		}
 	}

 	// Contributions
 	if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
 	{
 		const tcu::ScopedLogSection	section(log, "Contribution", "Contributions");

 		logMapContribution(log, samples, resultStats);
 		logUnmapContribution(log, samples, resultStats);
 		logWriteContribution(log, samples, resultStats);
 		logFlushContribution(log, samples, resultStats);
 		logAllocContribution(log, samples, resultStats);
 	}

 	// Print results
 	{
 		const tcu::ScopedLogSection	section(log, "Results", "Results");

 		const int	medianBufferSize					= (samples.front().bufferSize + samples.back().bufferSize) / 2;
 		const float	approximatedTransferTime			= (theilSenFitting.offset + theilSenFitting.coefficient * (float)medianBufferSize) / 1000.0f / 1000.0f;
 		const float	approximatedTransferTimeNoConstant	= (theilSenFitting.coefficient * (float)medianBufferSize) / 1000.0f / 1000.0f;
 		const float	sampleLinearity						= calculateSampleFitLinearity(samples);
 		const float	sampleTemporalStability				= calculateSampleTemporalStability(samples);

 		approximatedTransferRateNoConstant				= (float)medianBufferSize / approximatedTransferTimeNoConstant;
 		approximatedTransferRate						= (float)medianBufferSize / approximatedTransferTime;

 		log	<< tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f)
 			<< tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f)
 			<< tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset)
 			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower)
 			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper)
 			<< tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f)
 			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
 			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
 			<< tcu::TestLog::Float("ApproximatedTransferRate", "Approximated transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRate / 1024.0f / 1024.0f)
 			<< tcu::TestLog::Float("ApproximatedTransferRateNoConstant", "Approximated transfer rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedTransferRateNoConstant / 1024.0f / 1024.0f)
 			<< tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime)
 			<< tcu::TestLog::Float("SampleMedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f);
 	}

 	// return approximated transfer rate
 	{
 		UploadSampleAnalyzeResult result;

 		result.transferRateMedian = resultStats.medianRate;
 		result.transferRateAtRange = approximatedTransferRate;
 		result.transferRateAtInfinity = approximatedTransferRateNoConstant;

 		return result;
 	}
 }

 template <typename SampleType>
 static RenderSampleAnalyzeResult analyzeSampleResults (tcu::TestLog& log, const std::vector<RenderSampleResult<SampleType> >& samples)
 {
 	// Assume data is linear with some outliers, fit a line
 	const LineParametersWithConfidence						theilSenFitting						= fitLineToSamples(samples);
 	const typename SampleTypeTraits<SampleType>::StatsType	resultStats							= calculateSampleStatistics(theilSenFitting, samples);
 	float													approximatedProcessingRate;
 	float													approximatedProcessingRateNoConstant;

 	// output raw samples
 	{
 		const tcu::ScopedLogSection	section(log, "Samples", "Samples");
 		logSampleList(log, theilSenFitting, samples);
 	}

 	// Contributions
 	if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
 	{
 		const tcu::ScopedLogSection	section(log, "Contribution", "Contributions");

 		logFirstRenderContribution(log, samples, resultStats);
 		logUploadContribution(log, samples, resultStats);
 		logRenderContribution(log, samples, resultStats);
 		logSecondRenderContribution(log, samples, resultStats);
 		logReadContribution(log, samples, resultStats);
 		logTotalContribution(log, samples, resultStats);
 	}

 	// print results
 	{
 		const tcu::ScopedLogSection	section(log, "Results", "Results");

 		const int	medianDataSize						= (samples.front().renderDataSize + samples.back().renderDataSize) / 2;
 		const float	approximatedRenderTime				= (theilSenFitting.offset + theilSenFitting.coefficient * (float)medianDataSize) / 1000.0f / 1000.0f;
 		const float	approximatedRenderTimeNoConstant	= (theilSenFitting.coefficient * (float)medianDataSize) / 1000.0f / 1000.0f;
 		const float	sampleLinearity						= calculateSampleFitLinearity(samples);
 		const float	sampleTemporalStability				= calculateSampleTemporalStability(samples);

 		approximatedProcessingRateNoConstant			= (float)medianDataSize / approximatedRenderTimeNoConstant;
 		approximatedProcessingRate						= (float)medianDataSize / approximatedRenderTime;

 		log	<< tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY, sampleLinearity * 100.0f)
 			<< tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY, sampleTemporalStability * 100.0f)
 			<< tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME, theilSenFitting.offset)
 			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower", "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceLower)
 			<< tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper", "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME, theilSenFitting.offsetConfidenceUpper)
 			<< tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficient * 1024.0f * 1024.0f)
 			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower", "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
 			<< tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper", "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME, theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
 			<< tcu::TestLog::Float("ApproximatedProcessRate", "Approximated processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRate / 1024.0f / 1024.0f)
 			<< tcu::TestLog::Float("ApproximatedProcessRateNoConstant", "Approximated processing rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE, approximatedProcessingRateNoConstant / 1024.0f / 1024.0f)
 			<< tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME, resultStats.result.medianTime)
 			<< tcu::TestLog::Float("SampleMedianProcess", "Median processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE, resultStats.medianRate / 1024.0f / 1024.0f);
 	}

 	// return approximated render rate
 	{
 		RenderSampleAnalyzeResult result;

 		result.renderRateMedian		= resultStats.medianRate;
 		result.renderRateAtRange	= approximatedProcessingRate;
 		result.renderRateAtInfinity = approximatedProcessingRateNoConstant;

 		return result;
 	}
 	return RenderSampleAnalyzeResult();
 }

 static void generateTwoPassRandomIterationOrder (std::vector<int>& iterationOrder, int numSamples)
 {
 	de::Random	rnd			(0xabc);
 	const int	midPoint	= (numSamples+1) / 2;		// !< ceil(m_numSamples / 2)

 	DE_ASSERT((int)iterationOrder.size() == numSamples);

 	// Two "passes" over range, randomize order in both passes
 	// This allows to us detect if iterations are not independent
 	// (first run and later run samples differ significantly?)

 	for (int sampleNdx = 0; sampleNdx < midPoint; ++sampleNdx)
 		iterationOrder[sampleNdx] = sampleNdx * 2;
 	for (int sampleNdx = midPoint; sampleNdx < numSamples; ++sampleNdx)
 		iterationOrder[sampleNdx] = (sampleNdx - midPoint) * 2 + 1;

 	for (int ndx = 0; ndx < midPoint; ++ndx)
 		std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(0, midPoint - 1)]);
 	for (int ndx = midPoint; ndx < (int)iterationOrder.size(); ++ndx)
 		std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(midPoint, (int)iterationOrder.size()-1)]);
 }

 template <typename SampleType>
 class BasicBufferCase : public TestCase
 {
 public:

 	enum Flags
 	{
 		FLAG_ALLOCATE_LARGER_BUFFER = 0x01,
 	};
 							BasicBufferCase		(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags);
 							~BasicBufferCase	(void);

 	virtual void			init				(void);
 	virtual void			deinit				(void);

 protected:
 	IterateResult			iterate				(void);

 	virtual bool			runSample			(int iteration, UploadSampleResult<SampleType>& sample) = 0;
 	virtual void			logAndSetTestResult	(const std::vector<UploadSampleResult<SampleType> >& results) = 0;

 	void					disableGLWarmup		(void);
 	void					waitGLResults		(void);

 	enum
 	{
 		DUMMY_RENDER_AREA_SIZE = 32
 	};

 	glu::ShaderProgram*		m_dummyProgram;
 	deInt32					m_dummyProgramPosLoc;
 	deUint32				m_bufferID;

 	const int				m_numSamples;
 	const int				m_bufferSizeMin;
 	const int				m_bufferSizeMax;
 	const bool				m_allocateLargerBuffer;

 private:
 	int						m_iteration;
 	std::vector<int>		m_iterationOrder;
 	std::vector<UploadSampleResult<SampleType> > m_results;

 	bool					m_useGL;
 	int						m_bufferRandomizerTimer;
 };

 template <typename SampleType>
 BasicBufferCase<SampleType>::BasicBufferCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, int flags)
 	: TestCase					(context, tcu::NODETYPE_PERFORMANCE, name, desc)
 	, m_dummyProgram			(DE_NULL)
 	, m_dummyProgramPosLoc		(-1)
 	, m_bufferID				(0)
 	, m_numSamples				(numSamples)
 	, m_bufferSizeMin			(bufferSizeMin)
 	, m_bufferSizeMax			(bufferSizeMax)
 	, m_allocateLargerBuffer	((flags & FLAG_ALLOCATE_LARGER_BUFFER) != 0)
 	, m_iteration				(0)
 	, m_iterationOrder			(numSamples)
 	, m_results					(numSamples)
 	, m_useGL					(true)
 	, m_bufferRandomizerTimer	(0)
 {
 	// "randomize" iteration order. Deterministic, patternless
 	generateTwoPassRandomIterationOrder(m_iterationOrder, m_numSamples);

 	// choose buffer sizes
 	for (int sampleNdx = 0; sampleNdx < m_numSamples; ++sampleNdx)
 	{
 		const int rawBufferSize			= (int)deFloatFloor((float)bufferSizeMin + (float)(bufferSizeMax - bufferSizeMin) * ((float)(sampleNdx + 1) / (float)m_numSamples));
 		const int bufferSize			= deAlign32(rawBufferSize, 16);
 		const int allocatedBufferSize	= deAlign32((m_allocateLargerBuffer) ? ((int)((float)bufferSize * 1.5f)) : (bufferSize), 16);

 		m_results[sampleNdx].bufferSize		= bufferSize;
 		m_results[sampleNdx].allocatedSize	= allocatedBufferSize;
 		m_results[sampleNdx].writtenSize	= -1;
 	}
 }

 template <typename SampleType>
 BasicBufferCase<SampleType>::~BasicBufferCase (void)
 {
 	deinit();
 }

 template <typename SampleType>
 void BasicBufferCase<SampleType>::init (void)
 {
 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	if (!m_useGL)
 		return;

 	// \note Viewport size is not checked, it won't matter if the render target actually is smaller hhan DUMMY_RENDER_AREA_SIZE

 	// dummy shader

 	m_dummyProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_dummyVertexShader) << glu::FragmentSource(s_dummyFragnentShader));
 	if (!m_dummyProgram->isOk())
 	{
 		m_testCtx.getLog() << *m_dummyProgram;
 		throw tcu::TestError("failed to build shader program");
 	}

 	m_dummyProgramPosLoc = gl.getAttribLocation(m_dummyProgram->getProgram(), "a_position");
 	if (m_dummyProgramPosLoc == -1)
 		throw tcu::TestError("a_position location was -1");
 }

 template <typename SampleType>
 void BasicBufferCase<SampleType>::deinit (void)
 {
 	if (m_bufferID)
 	{
 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
 		m_bufferID = 0;
 	}

 	delete m_dummyProgram;
 	m_dummyProgram = DE_NULL;
 }

 template <typename SampleType>
 TestCase::IterateResult BasicBufferCase<SampleType>::iterate (void)
 {
 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
 	static bool				buffersWarmedUp	= false;

 	static const deUint32	usages[] =
 	{
 		GL_STREAM_DRAW, GL_STREAM_READ, GL_STREAM_COPY,
 		GL_STATIC_DRAW, GL_STATIC_READ, GL_STATIC_COPY,
 		GL_DYNAMIC_DRAW, GL_DYNAMIC_READ, GL_DYNAMIC_COPY,
 	};

 	// Allocate some random sized buffers and remove them to
 	// make sure the first samples too have some buffers removed
 	// just before their allocation. This is only needed by the
 	// the first test.

 	if (m_useGL && !buffersWarmedUp)
 	{
 		const int					numRandomBuffers				= 6;
 		const int					numRepeats						= 10;
 		const int					maxBufferSize					= 16777216;
 		const std::vector<deUint8>	zeroData						(maxBufferSize, 0x00);
 		de::Random					rnd								(0x1234);
 		deUint32					bufferIDs[numRandomBuffers]		= {0};

 		gl.useProgram(m_dummyProgram->getProgram());
 		gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
 		gl.enableVertexAttribArray(m_dummyProgramPosLoc);

 		for (int ndx = 0; ndx < numRepeats; ++ndx)
 		{
 			// Create buffer and maybe draw from it
 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
 			{
 				const int		randomSize	= deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
 				const deUint32	usage		= usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];

 				gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
 				gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
 				gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);

 				if (rnd.getBool())
 				{
 					gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
 					gl.drawArrays(GL_POINTS, 0, 1);
 					gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
 				}
 			}

 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
 				gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);

 			waitGLResults();
 			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");

 			m_testCtx.touchWatchdog();
 		}

 		buffersWarmedUp = true;
 		return CONTINUE;
 	}
 	else if (m_useGL && m_bufferRandomizerTimer++ % 8 == 0)
 	{
 		// Do some random buffer operations to every now and then
 		// to make sure the previous test iterations won't affect
 		// following test runs.

 		const int					numRandomBuffers				= 3;
 		const int					maxBufferSize					= 16777216;
 		const std::vector<deUint8>	zeroData						(maxBufferSize, 0x00);
 		de::Random					rnd								(0x1234 + 0xabc * m_bufferRandomizerTimer);

 		// BufferData
 		{
 			deUint32 bufferIDs[numRandomBuffers] = {0};

 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
 			{
 				const int		randomSize	= deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
 				const deUint32	usage		= usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];

 				gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
 				gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
 				gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
 			}

 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
 				gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
 		}

 		GLU_EXPECT_NO_ERROR(gl.getError(), "buffer ops");

 		// Do some memory mappings
 		{
 			deUint32 bufferIDs[numRandomBuffers] = {0};

 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
 			{
 				const int		randomSize	= deAlign32(rnd.getInt(1, maxBufferSize), 4*4);
 				const deUint32	usage		= usages[rnd.getUint32() % (deUint32)DE_LENGTH_OF_ARRAY(usages)];
 				void*			ptr;

 				gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
 				gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
 				gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);

 				gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
 				gl.drawArrays(GL_POINTS, 0, 1);
 				gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);

 				if (rnd.getBool())
 					waitGLResults();

 				ptr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, randomSize, GL_MAP_WRITE_BIT);
 				if (ptr)
 				{
 					medianTimeMemcpy(ptr, &zeroData[0], randomSize);
 					gl.unmapBuffer(GL_ARRAY_BUFFER);
 				}
 			}

 			for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
 				gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);

 			waitGLResults();
 		}

 		GLU_EXPECT_NO_ERROR(gl.getError(), "buffer maps");
 		return CONTINUE;
 	}
 	else
 	{
 		const int	currentIteration	= m_iteration;
 		const int	sampleNdx			= m_iterationOrder[currentIteration];
 		const bool	sampleRunSuccessful	= runSample(currentIteration, m_results[sampleNdx]);

 		GLU_EXPECT_NO_ERROR(gl.getError(), "post runSample()");

 		// Retry failed samples
 		if (!sampleRunSuccessful)
 			return CONTINUE;

 		if (++m_iteration >= m_numSamples)
 		{
 			logAndSetTestResult(m_results);
 			return STOP;
 		}
 		else
 			return CONTINUE;
 	}
 }

 template <typename SampleType>
 void BasicBufferCase<SampleType>::disableGLWarmup (void)
 {
 	m_useGL = false;
 }

 template <typename SampleType>
 void BasicBufferCase<SampleType>::waitGLResults (void)
 {
 	tcu::Surface dummySurface(DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
 	glu::readPixels(m_context.getRenderContext(), 0, 0, dummySurface.getAccess());
 }

 template <typename SampleType>
 class BasicUploadCase : public BasicBufferCase<SampleType>
 {
 public:
 	enum CaseType
 	{
 		CASE_NO_BUFFERS = 0,
 		CASE_NEW_BUFFER,
 		CASE_UNSPECIFIED_BUFFER,
 		CASE_SPECIFIED_BUFFER,
 		CASE_USED_BUFFER,
 		CASE_USED_LARGER_BUFFER,

 		CASE_LAST
 	};

 	enum CaseFlags
 	{
 		FLAG_DONT_LOG_BUFFER_INFO				= 0x01,
 		FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT	= 0x02,
 	};

 	enum ResultType
 	{
 		RESULT_MEDIAN_TRANSFER_RATE = 0,
 		RESULT_ASYMPTOTIC_TRANSFER_RATE,
 	};

 						BasicUploadCase		(Context& context,
 											 const char* name,
 											 const char* desc,
 											 int bufferSizeMin,
 											 int bufferSizeMax,
 											 int numSamples,
 											 deUint32 bufferUsage,
 											 CaseType caseType,
 											 ResultType resultType,
 											 int flags = 0);

 						~BasicUploadCase	(void);

 	virtual void		init				(void);
 	virtual void		deinit				(void);

 private:
 	bool				runSample			(int iteration, UploadSampleResult<SampleType>& sample);
 	void				createBuffer		(int bufferSize, int iteration);
 	void				deleteBuffer		(int bufferSize);
 	void				useBuffer			(int bufferSize);

 	virtual void		testBufferUpload	(UploadSampleResult<SampleType>& result, int writeSize) = 0;
 	void				logAndSetTestResult	(const std::vector<UploadSampleResult<SampleType> >& results);

 	deUint32			m_dummyBufferID;

 protected:
 	const CaseType		m_caseType;
 	const ResultType	m_resultType;
 	const deUint32		m_bufferUsage;
 	const bool			m_logBufferInfo;
 	const bool			m_bufferUnspecifiedContent;
 	std::vector<deUint8> m_zeroData;

 	using BasicBufferCase<SampleType>::m_testCtx;
 	using BasicBufferCase<SampleType>::m_context;

 	using BasicBufferCase<SampleType>::DUMMY_RENDER_AREA_SIZE;
 	using BasicBufferCase<SampleType>::m_dummyProgram;
 	using BasicBufferCase<SampleType>::m_dummyProgramPosLoc;
 	using BasicBufferCase<SampleType>::m_bufferID;
 	using BasicBufferCase<SampleType>::m_numSamples;
 	using BasicBufferCase<SampleType>::m_bufferSizeMin;
 	using BasicBufferCase<SampleType>::m_bufferSizeMax;
 	using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
 };

 template <typename SampleType>
 BasicUploadCase<SampleType>::BasicUploadCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, int numSamples, deUint32 bufferUsage, CaseType caseType, ResultType resultType, int flags)
 	: BasicBufferCase<SampleType>	(context, name, desc, bufferSizeMin, bufferSizeMax, numSamples, (caseType == CASE_USED_LARGER_BUFFER) ? (BasicBufferCase<SampleType>::FLAG_ALLOCATE_LARGER_BUFFER) : (0))
 	, m_dummyBufferID				(0)
 	, m_caseType					(caseType)
 	, m_resultType					(resultType)
 	, m_bufferUsage					(bufferUsage)
 	, m_logBufferInfo				((flags & FLAG_DONT_LOG_BUFFER_INFO) == 0)
 	, m_bufferUnspecifiedContent	((flags & FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT) != 0)
 	, m_zeroData					()
 {
 	DE_ASSERT(m_caseType < CASE_LAST);
 }

 template <typename SampleType>
 BasicUploadCase<SampleType>::~BasicUploadCase (void)
 {
 	deinit();
 }

 template <typename SampleType>
 void BasicUploadCase<SampleType>::init (void)
 {
 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	BasicBufferCase<SampleType>::init();

 	// zero buffer as upload source
 	m_zeroData.resize(m_bufferSizeMax, 0x00);

 	// dummy buffer

 	gl.genBuffers(1, &m_dummyBufferID);
 	GLU_EXPECT_NO_ERROR(gl.getError(), "Gen buf");

 	// log basic info

 	m_testCtx.getLog()
 		<< tcu::TestLog::Message
 		<< "Testing performance with " << m_numSamples << " test samples. Sample order is randomized. All samples at even positions (first = 0) are tested before samples at odd positions.\n"
 		<< "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]."
 		<< tcu::TestLog::EndMessage;

 	if (m_logBufferInfo)
 	{
 		switch (m_caseType)
 		{
 			case CASE_NO_BUFFERS:
 				break;

 			case CASE_NEW_BUFFER:
 				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is generated but not specified (i.e glBufferData() not called)." << tcu::TestLog::EndMessage;
 				break;

 			case CASE_UNSPECIFIED_BUFFER:
 				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is allocated with glBufferData(NULL)." << tcu::TestLog::EndMessage;
 				break;

 			case CASE_SPECIFIED_BUFFER:
 				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer contents are specified prior testing with glBufferData(data)." << tcu::TestLog::EndMessage;
 				break;

 			case CASE_USED_BUFFER:
 				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer has been used in drawing before testing." << tcu::TestLog::EndMessage;
 				break;

 			case CASE_USED_LARGER_BUFFER:
 				m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is larger and has been used in drawing before testing." << tcu::TestLog::EndMessage;
 				break;

 			default:
 				DE_ASSERT(false);
 				break;
 		}
 	}

 	if (m_resultType == RESULT_MEDIAN_TRANSFER_RATE)
 		m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the median transfer rate of the test samples." << tcu::TestLog::EndMessage;
 	else if (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE)
 		m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the asymptotic transfer rate as the buffer size approaches infinity." << tcu::TestLog::EndMessage;
 	else
 		DE_ASSERT(false);
 }

 template <typename SampleType>
 void BasicUploadCase<SampleType>::deinit (void)
 {
 	if (m_dummyBufferID)
 	{
 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_dummyBufferID);
 		m_dummyBufferID = 0;
 	}

 	m_zeroData = std::vector<deUint8>();

 	BasicBufferCase<SampleType>::deinit();
 }

 template <typename SampleType>
 bool BasicUploadCase<SampleType>::runSample (int iteration, UploadSampleResult<SampleType>& sample)
 {
 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
 	const int				allocatedBufferSize	= sample.allocatedSize;
 	const int				bufferSize			= sample.bufferSize;

 	if (m_caseType != CASE_NO_BUFFERS)
 		createBuffer(iteration, allocatedBufferSize);

 	// warmup CPU before the test to make sure the power management governor
 	// keeps us in the "high performance" mode
 	{
 		deYield();
 		tcu::warmupCPU();
 		deYield();
 	}

 	testBufferUpload(sample, bufferSize);
 	GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");

 	if (m_caseType != CASE_NO_BUFFERS)
 		deleteBuffer(bufferSize);

 	return true;
 }

 template <typename SampleType>
 void BasicUploadCase<SampleType>::createBuffer (int iteration, int bufferSize)
 {
 	DE_ASSERT(!m_bufferID);
 	DE_ASSERT(m_caseType != CASE_NO_BUFFERS);

 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	// create buffer

 	if (m_caseType == CASE_NO_BUFFERS)
 		return;

 	// create empty buffer

 	gl.genBuffers(1, &m_bufferID);
 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
 	GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");

 	if (m_caseType == CASE_NEW_BUFFER)
 	{
 		// upload something else first, this should reduce noise in samples

 		de::Random					rng				(0xbadc * iteration);
 		const int					sizeDelta		= rng.getInt(0, 2097140);
 		const int					dummyUploadSize = deAlign32(1048576 + sizeDelta, 4*4); // Vary buffer size to make sure it is always reallocated
 		const std::vector<deUint8>	dummyData		(dummyUploadSize, 0x20);

 		gl.bindBuffer(GL_ARRAY_BUFFER, m_dummyBufferID);
 		gl.bufferData(GL_ARRAY_BUFFER, dummyUploadSize, &dummyData[0], m_bufferUsage);

 		// make sure upload won't interfere with the test
 		useBuffer(dummyUploadSize);

 		// don't kill the buffer so that the following upload cannot potentially reuse the buffer

 		return;
 	}

 	// specify it

 	if (m_caseType == CASE_UNSPECIFIED_BUFFER)
 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
 	else
 	{
 		const std::vector<deUint8> dummyData(bufferSize, 0x20);
 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &dummyData[0], m_bufferUsage);
 	}

 	if (m_caseType == CASE_UNSPECIFIED_BUFFER || m_caseType == CASE_SPECIFIED_BUFFER)
 		return;

 	// use it and make sure it is uploaded

 	useBuffer(bufferSize);
 	DE_ASSERT(m_caseType == CASE_USED_BUFFER || m_caseType == CASE_USED_LARGER_BUFFER);
 }

 template <typename SampleType>
 void BasicUploadCase<SampleType>::deleteBuffer (int bufferSize)
 {
 	DE_ASSERT(m_bufferID);
 	DE_ASSERT(m_caseType != CASE_NO_BUFFERS);

 	// render from the buffer to make sure it actually made it to the gpu. This is to
 	// make sure that if the upload actually happens later or is happening right now in
 	// the background, it will not interfere with further test runs

 	// if buffer contains unspecified content, sourcing data from it results in undefined
 	// results, possibly including program termination. Specify all data to prevent such
 	// case from happening

 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);

 	if (m_bufferUnspecifiedContent)
 	{
 		const std::vector<deUint8> dummyData(bufferSize, 0x20);
 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &dummyData[0], m_bufferUsage);

 		GLU_EXPECT_NO_ERROR(gl.getError(), "re-specify buffer");
 	}

 	useBuffer(bufferSize);

 	gl.deleteBuffers(1, &m_bufferID);
 	m_bufferID = 0;
 }

 template <typename SampleType>
 void BasicUploadCase<SampleType>::useBuffer (int bufferSize)
 {
 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	gl.useProgram(m_dummyProgram->getProgram());

 	gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
 	gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
 	gl.enableVertexAttribArray(m_dummyProgramPosLoc);

 	// use whole buffer to make sure buffer is uploaded by drawing first and last
 	DE_ASSERT(bufferSize % (int)sizeof(float[4]) == 0);
 	gl.drawArrays(GL_POINTS, 0, 1);
 	gl.drawArrays(GL_POINTS, bufferSize / (int)sizeof(float[4]) - 1, 1);

 	BasicBufferCase<SampleType>::waitGLResults();
 }

 template <typename SampleType>
 void BasicUploadCase<SampleType>::logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results)
 {
 	const UploadSampleAnalyzeResult	analysis	= analyzeSampleResults(m_testCtx.getLog(), results, true);

 	// with small buffers, report the median transfer rate of the samples
 	// with large buffers, report the expected preformance of infinitely large buffers
 	const float						rate		= (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE) ? (analysis.transferRateAtInfinity) : (analysis.transferRateMedian);

 	if (rate == std::numeric_limits<float>::infinity())
 	{
 		// sample times are 1) invalid or 2) timer resolution too low
 		// report speed 0 bytes / s since real value cannot be determined
 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
 	}
 	else
 	{
 		// report transfer rate in MB / s
 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
 	}
 }

 class ReferenceMemcpyCase : public BasicUploadCase<SingleOperationDuration>
 {
 public:
 				ReferenceMemcpyCase		(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase);
 				~ReferenceMemcpyCase	(void);

 	void		init					(void);
 	void		deinit					(void);
 private:
 	void		testBufferUpload		(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);

 	std::vector<deUint8> m_dstBuf;
 };

 ReferenceMemcpyCase::ReferenceMemcpyCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, bool largeBuffersCase)
 	: BasicUploadCase<SingleOperationDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, 0, CASE_NO_BUFFERS, (largeBuffersCase) ? (RESULT_ASYMPTOTIC_TRANSFER_RATE) : (RESULT_MEDIAN_TRANSFER_RATE))
 	, m_dstBuf									()
 {
 	disableGLWarmup();
 }

 ReferenceMemcpyCase::~ReferenceMemcpyCase (void)
 {
 }

 void ReferenceMemcpyCase::init (void)
 {
 	// Describe what the test tries to do
 	m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of memcpy()." << tcu::TestLog::EndMessage;

 	m_dstBuf.resize(m_bufferSizeMax, 0x00);

 	BasicUploadCase<SingleOperationDuration>::init();
 }

 void ReferenceMemcpyCase::deinit (void)
 {
 	m_dstBuf = std::vector<deUint8>();
 	BasicUploadCase<SingleOperationDuration>::deinit();
 }

 void ReferenceMemcpyCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
 {
 	// write
 	result.duration.totalDuration = medianTimeMemcpy(&m_dstBuf[0], &m_zeroData[0], bufferSize);
 	result.duration.fitResponseDuration = result.duration.totalDuration;

 	result.writtenSize = bufferSize;
 }

 class BufferDataUploadCase : public BasicUploadCase<SingleOperationDuration>
 {
 public:
 				BufferDataUploadCase	(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType caseType);
 				~BufferDataUploadCase	(void);

 	void		init					(void);
 private:
 	void		testBufferUpload		(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);
 };

 BufferDataUploadCase::BufferDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType caseType)
 	: BasicUploadCase<SingleOperationDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, caseType, RESULT_MEDIAN_TRANSFER_RATE)
 {
 }

 BufferDataUploadCase::~BufferDataUploadCase (void)
 {
 }

 void BufferDataUploadCase::init (void)
 {
 	// Describe what the test tries to do
 	m_testCtx.getLog() << tcu::TestLog::Message << "Testing glBufferData() function." << tcu::TestLog::EndMessage;

 	BasicUploadCase<SingleOperationDuration>::init();
 }

 void BufferDataUploadCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
 {
 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);

 	// upload
 	{
 		deUint64 startTime;
 		deUint64 endTime;

 		startTime = deGetMicroseconds();
 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
 		endTime = deGetMicroseconds();

 		result.duration.totalDuration = endTime - startTime;
 		result.duration.fitResponseDuration = result.duration.totalDuration;
 		result.writtenSize = bufferSize;
 	}
 }

 class BufferSubDataUploadCase : public BasicUploadCase<SingleOperationDuration>
 {
 public:
 	enum Flags
 	{
 		FLAG_FULL_UPLOAD			= 0x01,
 		FLAG_PARTIAL_UPLOAD			= 0x02,
 		FLAG_INVALIDATE_BEFORE_USE	= 0x04,
 	};

 				BufferSubDataUploadCase		(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType parentCase, int flags);
 				~BufferSubDataUploadCase	(void);

 	void		init						(void);
 private:
 	void		testBufferUpload			(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);

 	const bool	m_fullUpload;
 	const bool	m_invalidateBeforeUse;
 };

 BufferSubDataUploadCase::BufferSubDataUploadCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, CaseType parentCase, int flags)
 	: BasicUploadCase<SingleOperationDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, parentCase, RESULT_MEDIAN_TRANSFER_RATE)
 	, m_fullUpload								((flags & FLAG_FULL_UPLOAD) != 0)
 	, m_invalidateBeforeUse						((flags & FLAG_INVALIDATE_BEFORE_USE) != 0)
 {
 	DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != 0);
 	DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD));
 }

 BufferSubDataUploadCase::~BufferSubDataUploadCase (void)
 {
 }

 void BufferSubDataUploadCase::init (void)
 {
 	// Describe what the test tries to do
 	m_testCtx.getLog()
 		<< tcu::TestLog::Message
 		<< "Testing glBufferSubData() function call performance. "
 		<< ((m_fullUpload) ? ("The whole buffer is updated with glBufferSubData. ") : ("Half of the buffer data is updated with glBufferSubData. "))
 		<< ((m_invalidateBeforeUse) ? ("The buffer is cleared with glBufferData(..., NULL) before glBufferSubData upload.") : ("")) << "\n"
 		<< tcu::TestLog::EndMessage;

 	BasicUploadCase<SingleOperationDuration>::init();
 }

 void BufferSubDataUploadCase::testBufferUpload (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
 {
 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);

 	// "invalidate", upload null
 	if (m_invalidateBeforeUse)
 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);

 	// upload
 	{
 		deUint64 startTime;
 		deUint64 endTime;

 		startTime = deGetMicroseconds();

 		if (m_fullUpload)
 			gl.bufferSubData(GL_ARRAY_BUFFER, 0, bufferSize, &m_zeroData[0]);
 		else
 		{
 			// upload to buffer center
 			gl.bufferSubData(GL_ARRAY_BUFFER, bufferSize / 4, bufferSize / 2, &m_zeroData[0]);
 		}

 		endTime = deGetMicroseconds();

 		result.duration.totalDuration = endTime - startTime;
 		result.duration.fitResponseDuration = result.duration.totalDuration;

 		if (m_fullUpload)
 			result.writtenSize = bufferSize;
 		else
 			result.writtenSize = bufferSize / 2;
 	}
 }

 class MapBufferRangeCase : public BasicUploadCase<MapBufferRangeDuration>
 {
 public:
 	enum Flags
 	{
 		FLAG_PARTIAL						= 0x01,
 		FLAG_MANUAL_INVALIDATION			= 0x02,
 		FLAG_USE_UNUSED_UNSPECIFIED_BUFFER	= 0x04,
 		FLAG_USE_UNUSED_SPECIFIED_BUFFER	= 0x08,
 	};

 					MapBufferRangeCase			(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags);
 					~MapBufferRangeCase			(void);

 	void			init						(void);
 private:
 	static CaseType getBaseCaseType				(int caseFlags);
 	static int		getBaseFlags				(deUint32 mapFlags, int caseFlags);

 	void			testBufferUpload			(UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize);
 	void			attemptBufferMap			(UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize);

 	const bool		m_manualInvalidation;
 	const bool		m_fullUpload;
 	const bool		m_useUnusedUnspecifiedBuffer;
 	const bool		m_useUnusedSpecifiedBuffer;
 	const deUint32	m_mapFlags;
 	int				m_unmapFailures;
 };

 MapBufferRangeCase::MapBufferRangeCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags)
 	: BasicUploadCase<MapBufferRangeDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags))
 	, m_manualInvalidation						((caseFlags&FLAG_MANUAL_INVALIDATION) != 0)
 	, m_fullUpload								((caseFlags&FLAG_PARTIAL) == 0)
 	, m_useUnusedUnspecifiedBuffer				((caseFlags&FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
 	, m_useUnusedSpecifiedBuffer				((caseFlags&FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
 	, m_mapFlags								(mapFlags)
 	, m_unmapFailures							(0)
 {
 	DE_ASSERT(!(m_useUnusedUnspecifiedBuffer && m_useUnusedSpecifiedBuffer));
 	DE_ASSERT(!((m_useUnusedUnspecifiedBuffer || m_useUnusedSpecifiedBuffer) && m_manualInvalidation));
 }

 MapBufferRangeCase::~MapBufferRangeCase (void)
 {
 }

 void MapBufferRangeCase::init (void)
 {
 	// Describe what the test tries to do
 	m_testCtx.getLog()
 		<< tcu::TestLog::Message
 		<< "Testing glMapBufferRange() and glUnmapBuffer() function call performance.\n"
 		<< ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
 		<< ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : (""))
 		<< ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : (""))
 		<< ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : (""))
 		<< ((m_manualInvalidation) ? ("The buffer is cleared with glBufferData(..., NULL) before mapping.\n") : (""))
 		<< "Map bits:\n"
 		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
 		<< tcu::TestLog::EndMessage;

 	BasicUploadCase<MapBufferRangeDuration>::init();
 }

 MapBufferRangeCase::CaseType MapBufferRangeCase::getBaseCaseType (int caseFlags)
 {
 	if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
 		return CASE_USED_BUFFER;
 	else
 		return CASE_NEW_BUFFER;
 }

 int MapBufferRangeCase::getBaseFlags (deUint32 mapFlags, int caseFlags)
 {
 	int flags = FLAG_DONT_LOG_BUFFER_INFO;

 	// If buffer contains unspecified data when it is sourced (i.e drawn)
 	// results are undefined, and system errors may occur. Signal parent
 	// class to take this into account
 	if (caseFlags & FLAG_PARTIAL)
 	{
 		if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0			||
 			(caseFlags & FLAG_MANUAL_INVALIDATION) != 0				||
 			(caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
 		{
 			flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
 		}
 	}

 	return flags;
 }

 void MapBufferRangeCase::testBufferUpload (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize)
 {
 	const int unmapFailureThreshold = 4;

 	for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
 	{
 		try
 		{
 			attemptBufferMap(result, bufferSize);
 			return;
 		}
 		catch (UnmapFailureError&)
 		{
 		}
 	}

 	throw tcu::TestError("Unmapping failures exceeded limit");
 }

 void MapBufferRangeCase::attemptBufferMap (UploadSampleResult<MapBufferRangeDuration>& result, int bufferSize)
 {
 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);

 	if (m_fullUpload)
 		result.writtenSize = bufferSize;
 	else
 		result.writtenSize = bufferSize / 2;

 	// Create unused buffer

 	if (m_manualInvalidation || m_useUnusedUnspecifiedBuffer)
 	{
 		deUint64 startTime;
 		deUint64 endTime;

 		// "invalidate" or allocate, upload null
 		startTime = deGetMicroseconds();
 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
 		endTime = deGetMicroseconds();

 		result.duration.allocDuration = endTime - startTime;
 	}
 	else if (m_useUnusedSpecifiedBuffer)
 	{
 		deUint64 startTime;
 		deUint64 endTime;

 		// Specify buffer contents
 		startTime = deGetMicroseconds();
 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
 		endTime = deGetMicroseconds();

 		result.duration.allocDuration = endTime - startTime;
 	}
 	else
 	{
 		// No alloc, no time
 		result.duration.allocDuration = 0;
 	}

 	// upload
 	{
 		void* mapPtr;

 		// Map
 		{
 			deUint64 startTime;
 			deUint64 endTime;

 			startTime = deGetMicroseconds();
 			if (m_fullUpload)
 				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, result.writtenSize, m_mapFlags);
 			else
 			{
 				// upload to buffer center
 				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, result.writtenSize, m_mapFlags);
 			}
 			endTime = deGetMicroseconds();

 			if (!mapPtr)
 				throw tcu::Exception("MapBufferRange returned NULL");

 			result.duration.mapDuration = endTime - startTime;
 		}

 		// Write
 		{
 			result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
 		}

 		// Unmap
 		{
 			deUint64		startTime;
 			deUint64		endTime;
 			glw::GLboolean	unmapSuccessful;

 			startTime = deGetMicroseconds();
 			unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
 			endTime = deGetMicroseconds();

 			// if unmapping fails, just try again later
 			if (!unmapSuccessful)
 				throw UnmapFailureError();

 			result.duration.unmapDuration = endTime - startTime;
 		}

 		result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.allocDuration;
 		result.duration.fitResponseDuration = result.duration.totalDuration;
 	}
 }

 class MapBufferRangeFlushCase : public BasicUploadCase<MapBufferRangeFlushDuration>
 {
 public:
 	enum Flags
 	{
 		FLAG_PARTIAL						= 0x01,
 		FLAG_FLUSH_IN_PARTS					= 0x02,
 		FLAG_USE_UNUSED_UNSPECIFIED_BUFFER	= 0x04,
 		FLAG_USE_UNUSED_SPECIFIED_BUFFER	= 0x08,
 		FLAG_FLUSH_PARTIAL					= 0x10,
 	};

 					MapBufferRangeFlushCase		(Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags);
 					~MapBufferRangeFlushCase	(void);

 	void			init						(void);
 private:
 	static CaseType getBaseCaseType				(int caseFlags);
 	static int		getBaseFlags				(deUint32 mapFlags, int caseFlags);

 	void			testBufferUpload			(UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize);
 	void			attemptBufferMap			(UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize);

 	const bool		m_fullUpload;
 	const bool		m_flushInParts;
 	const bool		m_flushPartial;
 	const bool		m_useUnusedUnspecifiedBuffer;
 	const bool		m_useUnusedSpecifiedBuffer;
 	const deUint32	m_mapFlags;
 	int				m_unmapFailures;
 };

 MapBufferRangeFlushCase::MapBufferRangeFlushCase (Context& ctx, const char* name, const char* desc, int minBufferSize, int maxBufferSize, int numSamples, deUint32 bufferUsage, deUint32 mapFlags, int caseFlags)
 	: BasicUploadCase<MapBufferRangeFlushDuration>	(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE, getBaseFlags(mapFlags, caseFlags))
 	, m_fullUpload									((caseFlags&FLAG_PARTIAL) == 0)
 	, m_flushInParts								((caseFlags&FLAG_FLUSH_IN_PARTS) != 0)
 	, m_flushPartial								((caseFlags&FLAG_FLUSH_PARTIAL) != 0)
 	, m_useUnusedUnspecifiedBuffer					((caseFlags&FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
 	, m_useUnusedSpecifiedBuffer					((caseFlags&FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
 	, m_mapFlags									(mapFlags)
 	, m_unmapFailures								(0)
 {
 	DE_ASSERT(!(m_flushPartial && m_flushInParts));
 	DE_ASSERT(!(m_flushPartial && !m_fullUpload));
 }

 MapBufferRangeFlushCase::~MapBufferRangeFlushCase (void)
 {
 }

 void MapBufferRangeFlushCase::init (void)
 {
 	// Describe what the test tries to do
 	m_testCtx.getLog()
 		<< tcu::TestLog::Message
 		<< "Testing glMapBufferRange(), glFlushMappedBufferRange() and glUnmapBuffer() function call performance.\n"
 		<< ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
 		<< ((m_flushInParts) ?
 			("The mapped range is partitioned to 4 subranges and each partition is flushed separately.") :
 			(m_flushPartial) ?
 				("Half of the buffer range is flushed.") :
 				("The whole mapped range is flushed in one flush call.")) << "\n"
 		<< ((m_useUnusedUnspecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") : (""))
 		<< ((m_useUnusedSpecifiedBuffer) ? ("The buffer has not been used before mapping and is allocated with specified contents.\n") : (""))
 		<< ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ? ("The buffer has previously been used in a drawing operation.\n") : (""))
 		<< "Map bits:\n"
 		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
 		<< tcu::TestLog::EndMessage;

 	BasicUploadCase<MapBufferRangeFlushDuration>::init();
 }

 MapBufferRangeFlushCase::CaseType MapBufferRangeFlushCase::getBaseCaseType (int caseFlags)
 {
 	if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
 		return CASE_USED_BUFFER;
 	else
 		return CASE_NEW_BUFFER;
 }

 int MapBufferRangeFlushCase::getBaseFlags (deUint32 mapFlags, int caseFlags)
 {
 	int flags = FLAG_DONT_LOG_BUFFER_INFO;

 	// If buffer contains unspecified data when it is sourced (i.e drawn)
 	// results are undefined, and system errors may occur. Signal parent
 	// class to take this into account
 	if (caseFlags & FLAG_PARTIAL)
 	{
 		if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0			||
 			(caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0	||
 			(caseFlags & FLAG_FLUSH_PARTIAL) != 0)
 		{
 			flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
 		}
 	}

 	return flags;
 }

 void MapBufferRangeFlushCase::testBufferUpload (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize)
 {
 	const int unmapFailureThreshold = 4;

 	for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
 	{
 		try
 		{
 			attemptBufferMap(result, bufferSize);
 			return;
 		}
 		catch (UnmapFailureError&)
 		{
 		}
 	}

 	throw tcu::TestError("Unmapping failures exceeded limit");
 }

 void MapBufferRangeFlushCase::attemptBufferMap (UploadSampleResult<MapBufferRangeFlushDuration>& result, int bufferSize)
 {
 	const glw::Functions&	gl			= m_context.getRenderContext().getFunctions();
 	const int				mappedSize	= (m_fullUpload) ? (bufferSize) : (bufferSize / 2);

 	if (m_fullUpload && !m_flushPartial)
 		result.writtenSize = bufferSize;
 	else
 		result.writtenSize = bufferSize / 2;

 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);

 	// Create unused buffer

 	if (m_useUnusedUnspecifiedBuffer)
 	{
 		deUint64 startTime;
 		deUint64 endTime;

 		// Don't specify contents
 		startTime = deGetMicroseconds();
 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
 		endTime = deGetMicroseconds();

 		result.duration.allocDuration = endTime - startTime;
 	}
 	else if (m_useUnusedSpecifiedBuffer)
 	{
 		deUint64 startTime;
 		deUint64 endTime;

 		// Specify buffer contents
 		startTime = deGetMicroseconds();
 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
 		endTime = deGetMicroseconds();

 		result.duration.allocDuration = endTime - startTime;
 	}
 	else
 	{
 		// No alloc, no time
 		result.duration.allocDuration = 0;
 	}

 	// upload
 	{
 		void* mapPtr;

 		// Map
 		{
 			deUint64 startTime;
 			deUint64 endTime;

 			startTime = deGetMicroseconds();
 			if (m_fullUpload)
 				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, mappedSize, m_mapFlags);
 			else
 			{
 				// upload to buffer center
 				mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, mappedSize, m_mapFlags);
 			}
 			endTime = deGetMicroseconds();

 			if (!mapPtr)
 				throw tcu::Exception("MapBufferRange returned NULL");

 			result.duration.mapDuration = endTime - startTime;
 		}

 		// Write
 		{
 			if (!m_flushPartial)
 				result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
 			else
 				result.duration.writeDuration = medianTimeMemcpy((deUint8*)mapPtr + bufferSize / 4, &m_zeroData[0], result.writtenSize);
 		}

 		// Flush
 		{
 			deUint64	startTime;
 			deUint64	endTime;

 			startTime = deGetMicroseconds();

 			if (m_flushPartial)
 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, mappedSize/4, mappedSize/2);
 			else if (!m_flushInParts)
 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, mappedSize);
 			else
 			{
 				const int p1 = 0;
 				const int p2 = mappedSize / 3;
 				const int p3 = mappedSize / 2;
 				const int p4 = mappedSize * 2 / 4;
 				const int p5 = mappedSize;

 				// flush in mixed order
 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p2,	p3-p2);
 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p1,	p2-p1);
 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p4,	p5-p4);
 				gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p3,	p4-p3);
 			}

 			endTime = deGetMicroseconds();

 			result.duration.flushDuration = endTime - startTime;
 		}

 		// Unmap
 		{
 			deUint64		startTime;
 			deUint64		endTime;
 			glw::GLboolean	unmapSuccessful;

 			startTime = deGetMicroseconds();
 			unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
 			endTime = deGetMicroseconds();

 			// if unmapping fails, just try again later
 			if (!unmapSuccessful)
 				throw UnmapFailureError();

 			result.duration.unmapDuration = endTime - startTime;
 		}

 		result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.flushDuration + result.duration.unmapDuration + result.duration.allocDuration;
 		result.duration.fitResponseDuration = result.duration.totalDuration;
 	}
 }

 template <typename SampleType>
 class ModifyAfterBasicCase : public BasicBufferCase<SampleType>
 {
 public:
 						ModifyAfterBasicCase	(Context& context, const char* name, const char* description, int bufferSizeMin, int bufferSizeMax, deUint32 usage, bool bufferUnspecifiedAfterTest);
 						~ModifyAfterBasicCase	(void);

 	void				init					(void);
 	void				deinit					(void);

 protected:
 	void				drawBufferRange			(int begin, int end);

 private:
 	enum
 	{
 		NUM_SAMPLES = 20,
 	};


 	bool				runSample				(int iteration, UploadSampleResult<SampleType>& sample);
 	bool				prepareAndRunTest		(int iteration, UploadSampleResult<SampleType>& result, int bufferSize);
 	void				logAndSetTestResult		(const std::vector<UploadSampleResult<SampleType> >& results);

 	virtual void		testWithBufferSize		(UploadSampleResult<SampleType>& result, int bufferSize) = 0;

 	int					m_unmappingErrors;

 protected:
 	const bool			m_bufferUnspecifiedAfterTest;
 	const deUint32		m_bufferUsage;
 	std::vector<deUint8> m_zeroData;

 	using BasicBufferCase<SampleType>::m_testCtx;
 	using BasicBufferCase<SampleType>::m_context;

 	using BasicBufferCase<SampleType>::DUMMY_RENDER_AREA_SIZE;
 	using BasicBufferCase<SampleType>::m_dummyProgram;
 	using BasicBufferCase<SampleType>::m_dummyProgramPosLoc;
 	using BasicBufferCase<SampleType>::m_bufferID;
 	using BasicBufferCase<SampleType>::m_numSamples;
 	using BasicBufferCase<SampleType>::m_bufferSizeMin;
 	using BasicBufferCase<SampleType>::m_bufferSizeMax;
 	using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
 };

 template <typename SampleType>
 ModifyAfterBasicCase<SampleType>::ModifyAfterBasicCase (Context& context, const char* name, const char* description, int bufferSizeMin, int bufferSizeMax, deUint32 usage, bool bufferUnspecifiedAfterTest)
 	: BasicBufferCase<SampleType>	(context, name, description, bufferSizeMin, bufferSizeMax, NUM_SAMPLES, 0)
 	, m_unmappingErrors				(0)
 	, m_bufferUnspecifiedAfterTest	(bufferUnspecifiedAfterTest)
 	, m_bufferUsage					(usage)
 	, m_zeroData					()
 {
 }

 template <typename SampleType>
 ModifyAfterBasicCase<SampleType>::~ModifyAfterBasicCase (void)
 {
 	BasicBufferCase<SampleType>::deinit();
 }

 template <typename SampleType>
 void ModifyAfterBasicCase<SampleType>::init (void)
 {
 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	// init parent

 	BasicBufferCase<SampleType>::init();

 	// upload source
 	m_zeroData.resize(m_bufferSizeMax, 0x00);

 	// log basic info

 	m_testCtx.getLog()
 		<< tcu::TestLog::Message
 		<< "Testing performance with " << (int)NUM_SAMPLES << " test samples. Sample order is randomized. All samples at even positions (first = 0) are tested before samples at odd positions.\n"
 		<< "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", " << getHumanReadableByteSize(m_bufferSizeMax) << "]."
 		<< tcu::TestLog::EndMessage;

 	// log which transfer rate is the test result and buffer info

 	m_testCtx.getLog()
 		<< tcu::TestLog::Message
 		<< "Test result is the median transfer rate of the test samples.\n"
 		<< "Buffer usage = " << glu::getUsageName(m_bufferUsage)
 		<< tcu::TestLog::EndMessage;

 	// Set state for drawing so that we don't have to change these during the iteration
 	{
 		gl.useProgram(m_dummyProgram->getProgram());
 		gl.viewport(0, 0, DUMMY_RENDER_AREA_SIZE, DUMMY_RENDER_AREA_SIZE);
 		gl.enableVertexAttribArray(m_dummyProgramPosLoc);
 	}
 }

 template <typename SampleType>
 void ModifyAfterBasicCase<SampleType>::deinit (void)
 {
 	m_zeroData = std::vector<deUint8>();

 	BasicBufferCase<SampleType>::deinit();
 }

 template <typename SampleType>
 void ModifyAfterBasicCase<SampleType>::drawBufferRange (int begin, int end)
 {
 	DE_ASSERT(begin % (int)sizeof(float[4]) == 0);
 	DE_ASSERT(end % (int)sizeof(float[4]) == 0);

 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	// use given range
 	gl.drawArrays(GL_POINTS, begin / (int)sizeof(float[4]), 1);
 	gl.drawArrays(GL_POINTS, end / (int)sizeof(float[4]) - 1, 1);
 }

 template <typename SampleType>
 bool ModifyAfterBasicCase<SampleType>::runSample (int iteration, UploadSampleResult<SampleType>& sample)
 {
 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
 	const int				bufferSize			= sample.bufferSize;
 	bool					testOk;

 	testOk = prepareAndRunTest(iteration, sample, bufferSize);
 	GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");

 	if (!testOk)
 	{
 		const int unmapFailureThreshold = 4;

 		// only unmapping error can cause iteration failure
 		if (++m_unmappingErrors >= unmapFailureThreshold)
 			throw tcu::TestError("Too many unmapping errors, cannot continue.");

 		// just try again
 		return false;
 	}

 	return true;
 }

 template <typename SampleType>
 bool ModifyAfterBasicCase<SampleType>::prepareAndRunTest (int iteration, UploadSampleResult<SampleType>& result, int bufferSize)
 {
 	DE_UNREF(iteration);

 	DE_ASSERT(!m_bufferID);
 	DE_ASSERT(deIsAligned32(bufferSize, 4*4)); // aligned to vec4

 	const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
 	bool						testRunOk		= true;
 	bool						unmappingFailed	= false;

 	// Upload initial buffer to the GPU...
 	gl.genBuffers(1, &m_bufferID);
 	gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
 	gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);

 	// ...use it...
 	gl.vertexAttribPointer(m_dummyProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
 	drawBufferRange(0, bufferSize);

 	// ..and make sure it is uploaded
 	BasicBufferCase<SampleType>::waitGLResults();

 	// warmup CPU before the test to make sure the power management governor
 	// keeps us in the "high performance" mode
 	{
 		deYield();
 		tcu::warmupCPU();
 		deYield();
 	}

 	// test
 	try
 	{
 		// buffer is uploaded to the GPU. Draw from it.
 		drawBufferRange(0, bufferSize);

 		// and test upload
 		testWithBufferSize(result, bufferSize);
 	}
 	catch (UnmapFailureError&)
 	{
 		testRunOk = false;
 		unmappingFailed = true;
 	}

 	// clean up: make sure buffer is not in upload queue and delete it

 	// sourcing unspecified data causes undefined results, possibly program termination
 	if (m_bufferUnspecifiedAfterTest || unmappingFailed)
 		gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);

 	drawBufferRange(0, bufferSize);
 	BasicBufferCase<SampleType>::waitGLResults();

 	gl.deleteBuffers(1, &m_bufferID);
 	m_bufferID = 0;

 	return testRunOk;
 }

 template <typename SampleType>
 void ModifyAfterBasicCase<SampleType>::logAndSetTestResult (const std::vector<UploadSampleResult<SampleType> >& results)
 {
 	const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, false);

 	// Return median transfer rate of the samples

 	if (analysis.transferRateMedian == std::numeric_limits<float>::infinity())
 	{
 		// sample times are 1) invalid or 2) timer resolution too low
 		// report speed 0 bytes / s since real value cannot be determined
 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
 	}
 	else
 	{
 		// report transfer rate in MB / s
 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(analysis.transferRateMedian / 1024.0f / 1024.0f, 2).c_str());
 	}
 }

 class ModifyAfterWithBufferDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
 {
 public:

 	enum CaseFlags
 	{
 		FLAG_RESPECIFY_SIZE		= 0x1,
 		FLAG_UPLOAD_REPEATED	= 0x2,
 	};

 					ModifyAfterWithBufferDataCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags);
 					~ModifyAfterWithBufferDataCase	(void);

 	void			init							(void);
 	void			deinit							(void);
 private:
 	void			testWithBufferSize				(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);

 	enum
 	{
 		NUM_REPEATS = 2
 	};

 	const bool		m_respecifySize;
 	const bool		m_repeatedUpload;
 	const float		m_sizeDifferenceFactor;
 };

 ModifyAfterWithBufferDataCase::ModifyAfterWithBufferDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags)
 	: ModifyAfterBasicCase<SingleOperationDuration> (context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
 	, m_respecifySize								((flags & FLAG_RESPECIFY_SIZE) != 0)
 	, m_repeatedUpload								((flags & FLAG_UPLOAD_REPEATED) != 0)
 	, m_sizeDifferenceFactor						(1.3f)
 {
 	DE_ASSERT(!(m_repeatedUpload && m_respecifySize));
 }

 ModifyAfterWithBufferDataCase::~ModifyAfterWithBufferDataCase (void)
 {
 	deinit();
 }

 void ModifyAfterWithBufferDataCase::init (void)
 {
 	// Log the purpose of the test

 	if (m_repeatedUpload)
 		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after \"specify buffer contents - draw buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
 	else
 		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferData() command after a draw command that sources data from the target buffer." << tcu::TestLog::EndMessage;

 	m_testCtx.getLog()
 		<< tcu::TestLog::Message
 		<< ((m_respecifySize) ?
 			("Buffer size is increased and contents are modified with BufferData().\n") :
 			("Buffer contents are modified with BufferData().\n"))
 		<< tcu::TestLog::EndMessage;

 	// init parent
 	ModifyAfterBasicCase<SingleOperationDuration>::init();

 	// make sure our zeroBuffer is large enough
 	if (m_respecifySize)
 	{
 		const int largerBufferSize = deAlign32((int)((float)m_bufferSizeMax * m_sizeDifferenceFactor), 4*4);
 		m_zeroData.resize(largerBufferSize, 0x00);
 	}
 }

 void ModifyAfterWithBufferDataCase::deinit (void)
 {
 	ModifyAfterBasicCase<SingleOperationDuration>::deinit();
 }

 void ModifyAfterWithBufferDataCase::testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
 {
 	// always draw the same amount to make compares between cases sensible
 	const int					drawStart			= deAlign32(bufferSize / 4, 4*4);
 	const int					drawEnd				= deAlign32(bufferSize * 3 / 4, 4*4);

 	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
 	const int					largerBufferSize	= deAlign32((int)((float)bufferSize * m_sizeDifferenceFactor), 4*4);
 	const int					newBufferSize		= (m_respecifySize) ? (largerBufferSize) : (bufferSize);
 	deUint64					startTime;
 	deUint64					endTime;

 	// repeat upload-draw
 	if (m_repeatedUpload)
 	{
 		for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
 		{
 			gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
 			drawBufferRange(drawStart, drawEnd);
 		}
 	}

 	// test upload
 	startTime = deGetMicroseconds();
 	gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
 	endTime = deGetMicroseconds();

 	result.duration.totalDuration = endTime - startTime;
 	result.duration.fitResponseDuration = result.duration.totalDuration;
 	result.writtenSize = newBufferSize;
 }

 class ModifyAfterWithBufferSubDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
 {
 public:

 	enum CaseFlags
 	{
 		FLAG_PARTIAL			= 0x1,
 		FLAG_UPLOAD_REPEATED	= 0x2,
 	};

 					ModifyAfterWithBufferSubDataCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags);
 					~ModifyAfterWithBufferSubDataCase	(void);

 	void			init								(void);
 	void			deinit								(void);
 private:
 	void			testWithBufferSize					(UploadSampleResult<SingleOperationDuration>& result, int bufferSize);

 	enum
 	{
 		NUM_REPEATS = 2
 	};

 	const bool		m_partialUpload;
 	const bool		m_repeatedUpload;
 };

 ModifyAfterWithBufferSubDataCase::ModifyAfterWithBufferSubDataCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags)
 	: ModifyAfterBasicCase<SingleOperationDuration>	(context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
 	, m_partialUpload								((flags & FLAG_PARTIAL) != 0)
 	, m_repeatedUpload								((flags & FLAG_UPLOAD_REPEATED) != 0)
 {
 }

 ModifyAfterWithBufferSubDataCase::~ModifyAfterWithBufferSubDataCase (void)
 {
 	deinit();
 }

 void ModifyAfterWithBufferSubDataCase::init (void)
 {
 	// Log the purpose of the test

 	if (m_repeatedUpload)
 		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after \"specify buffer contents - draw buffer\" command pair is repeated " << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
 	else
 		m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of BufferSubData() command after a draw command that sources data from the target buffer." << tcu::TestLog::EndMessage;

 	m_testCtx.getLog()
 		<< tcu::TestLog::Message
 		<< ((m_partialUpload) ?
 			("Half of the buffer contents are modified.\n") :
 			("Buffer contents are fully respecified.\n"))
 		<< tcu::TestLog::EndMessage;

 	ModifyAfterBasicCase<SingleOperationDuration>::init();
 }

 void ModifyAfterWithBufferSubDataCase::deinit (void)
 {
 	ModifyAfterBasicCase<SingleOperationDuration>::deinit();
 }

 void ModifyAfterWithBufferSubDataCase::testWithBufferSize (UploadSampleResult<SingleOperationDuration>& result, int bufferSize)
 {
 	// always draw the same amount to make compares between cases sensible
 	const int					drawStart			= deAlign32(bufferSize / 4, 4*4);
 	const int					drawEnd				= deAlign32(bufferSize * 3 / 4, 4*4);

 	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
 	const int					subdataOffset		= deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
 	const int					subdataSize			= deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
 	deUint64					startTime;
 	deUint64					endTime;

 	// make upload-draw stream
 	if (m_repeatedUpload)
 	{
 		for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
 		{
 			gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
 			drawBufferRange(drawStart, drawEnd);
 		}
 	}

 	// test upload
 	startTime = deGetMicroseconds();
 	gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
 	endTime = deGetMicroseconds();

 	result.duration.totalDuration = endTime - startTime;
 	result.duration.fitResponseDuration = result.duration.totalDuration;
 	result.writtenSize = subdataSize;
 }

 class ModifyAfterWithMapBufferRangeCase : public ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>
 {
 public:

 	enum CaseFlags
 	{
 		FLAG_PARTIAL = 0x1,
 	};

 					ModifyAfterWithMapBufferRangeCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags);
 					~ModifyAfterWithMapBufferRangeCase	(void);

 	void			init								(void);
 	void			deinit								(void);
 private:
 	static bool		isBufferUnspecifiedAfterUpload		(int flags, deUint32 mapFlags);
 	void			testWithBufferSize					(UploadSampleResult<MapBufferRangeDurationNoAlloc>& result, int bufferSize);

 	const bool		m_partialUpload;
 	const deUint32	m_mapFlags;
 };

 ModifyAfterWithMapBufferRangeCase::ModifyAfterWithMapBufferRangeCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags)
 	: ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>	(context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags))
 	, m_partialUpload										((flags & FLAG_PARTIAL) != 0)
 	, m_mapFlags											(glMapFlags)
 {
 }

 ModifyAfterWithMapBufferRangeCase::~ModifyAfterWithMapBufferRangeCase (void)
 {
 	deinit();
 }

 void ModifyAfterWithMapBufferRangeCase::init (void)
 {
 	// Log the purpose of the test

 	m_testCtx.getLog()
 		<< tcu::TestLog::Message
 		<< "Testing performance of MapBufferRange() command after a draw command that sources data from the target buffer.\n"
 		<< ((m_partialUpload) ?
 			("Half of the buffer is mapped.\n") :
 			("Whole buffer is mapped.\n"))
 		<< "Map bits:\n"
 		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
 		<< tcu::TestLog::EndMessage;

 	ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::init();
 }

 void ModifyAfterWithMapBufferRangeCase::deinit (void)
 {
 	ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::deinit();
 }

 bool ModifyAfterWithMapBufferRangeCase::isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags)
 {
 	if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
 		return true;

 	return false;
 }

 void ModifyAfterWithMapBufferRangeCase::testWithBufferSize (UploadSampleResult<MapBufferRangeDurationNoAlloc>& result, int bufferSize)
 {
 	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
 	const int					subdataOffset		= deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
 	const int					subdataSize			= deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
 	void*						mapPtr;

 	// map
 	{
 		deUint64 startTime;
 		deUint64 endTime;

 		startTime = deGetMicroseconds();
 		mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
 		endTime = deGetMicroseconds();

 		if (!mapPtr)
 			throw tcu::TestError("mapBufferRange returned null");

 		result.duration.mapDuration = endTime - startTime;
 	}

 	// write
 	{
 		result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
 	}

 	// unmap
 	{
 		deUint64		startTime;
 		deUint64		endTime;
 		glw::GLboolean	unmapSucceeded;

 		startTime = deGetMicroseconds();
 		unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
 		endTime = deGetMicroseconds();

 		if (unmapSucceeded != GL_TRUE)
 			throw UnmapFailureError();

 		result.duration.unmapDuration = endTime - startTime;
 	}

 	result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration;
 	result.duration.fitResponseDuration = result.duration.totalDuration;
 	result.writtenSize = subdataSize;
 }

 class ModifyAfterWithMapBufferFlushCase : public ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>
 {
 public:

 	enum CaseFlags
 	{
 		FLAG_PARTIAL = 0x1,
 	};

 					ModifyAfterWithMapBufferFlushCase	(Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags);
 					~ModifyAfterWithMapBufferFlushCase	(void);

 	void			init								(void);
 	void			deinit								(void);
 private:
 	static bool		isBufferUnspecifiedAfterUpload		(int flags, deUint32 mapFlags);
 	void			testWithBufferSize					(UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>& result, int bufferSize);

 	const bool		m_partialUpload;
 	const deUint32	m_mapFlags;
 };

 ModifyAfterWithMapBufferFlushCase::ModifyAfterWithMapBufferFlushCase (Context& context, const char* name, const char* desc, int bufferSizeMin, int bufferSizeMax, deUint32 usage, int flags, deUint32 glMapFlags)
 	: ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>	(context, name, desc, bufferSizeMin, bufferSizeMax, usage, isBufferUnspecifiedAfterUpload(flags, glMapFlags))
 	, m_partialUpload											((flags & FLAG_PARTIAL) != 0)
 	, m_mapFlags												(glMapFlags)
 {
 }

 ModifyAfterWithMapBufferFlushCase::~ModifyAfterWithMapBufferFlushCase (void)
 {
 	deinit();
 }

 void ModifyAfterWithMapBufferFlushCase::init (void)
 {
 	// Log the purpose of the test

 	m_testCtx.getLog()
 		<< tcu::TestLog::Message
 		<< "Testing performance of MapBufferRange() command after a draw command that sources data from the target buffer.\n"
 		<< ((m_partialUpload) ?
 			("Half of the buffer is mapped.\n") :
 			("Whole buffer is mapped.\n"))
 		<< "Map bits:\n"
 		<< ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
 		<< ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
 		<< tcu::TestLog::EndMessage;

 	ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::init();
 }

 void ModifyAfterWithMapBufferFlushCase::deinit (void)
 {
 	ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::deinit();
 }

 bool ModifyAfterWithMapBufferFlushCase::isBufferUnspecifiedAfterUpload (int flags, deUint32 mapFlags)
 {
 	if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
 		return true;

 	return false;
 }

 void ModifyAfterWithMapBufferFlushCase::testWithBufferSize (UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>& result, int bufferSize)
 {
 	const glw::Functions&		gl					= m_context.getRenderContext().getFunctions();
 	const int					subdataOffset		= deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4*4);
 	const int					subdataSize			= deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4*4);
 	void*						mapPtr;

 	// map
 	{
 		deUint64 startTime;
 		deUint64 endTime;

 		startTime = deGetMicroseconds();
 		mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
 		endTime = deGetMicroseconds();

 		if (!mapPtr)
 			throw tcu::TestError("mapBufferRange returned null");

 		result.duration.mapDuration = endTime - startTime;
 	}

 	// write
 	{
 		result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
 	}

 	// flush
 	{
 		deUint64 startTime;
 		deUint64 endTime;

 		startTime = deGetMicroseconds();
 		gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, subdataSize);
 		endTime = deGetMicroseconds();

 		result.duration.flushDuration = endTime - startTime;
 	}

 	// unmap
 	{
 		deUint64		startTime;
 		deUint64		endTime;
 		glw::GLboolean	unmapSucceeded;

 		startTime = deGetMicroseconds();
 		unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
 		endTime = deGetMicroseconds();

 		if (unmapSucceeded != GL_TRUE)
 			throw UnmapFailureError();

 		result.duration.unmapDuration = endTime - startTime;
 	}

 	result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration + result.duration.flushDuration;
 	result.duration.fitResponseDuration = result.duration.totalDuration;
 	result.writtenSize = subdataSize;
 }

 enum DrawMethod
 {
 	DRAWMETHOD_DRAW_ARRAYS = 0,
 	DRAWMETHOD_DRAW_ELEMENTS,

 	DRAWMETHOD_LAST
 };

 enum TargetBuffer
 {
 	TARGETBUFFER_VERTEX = 0,
 	TARGETBUFFER_INDEX,

 	TARGETBUFFER_LAST
 };

 enum BufferState
 {
 	BUFFERSTATE_NEW = 0,
 	BUFFERSTATE_EXISTING,

 	BUFFERSTATE_LAST
 };

 enum UploadMethod
 {
 	UPLOADMETHOD_BUFFER_DATA = 0,
 	UPLOADMETHOD_BUFFER_SUB_DATA,
 	UPLOADMETHOD_MAP_BUFFER_RANGE,

 	UPLOADMETHOD_LAST
 };

 enum UnrelatedBufferType
 {
 	UNRELATEDBUFFERTYPE_NONE = 0,
 	UNRELATEDBUFFERTYPE_VERTEX,

 	UNRELATEDBUFFERTYPE_LAST
 };

 enum UploadRange
 {
 	UPLOADRANGE_FULL = 0,
 	UPLOADRANGE_PARTIAL,

 	UPLOADRANGE_LAST
 };

 struct LayeredGridSpec
 {
 	int gridWidth;
 	int gridHeight;
 	int gridLayers;
 };

 static int getLayeredGridNumVertices (const LayeredGridSpec& scene)
 {
 	return scene.gridWidth * scene.gridHeight * scene.gridLayers * 6;
 }

 static void generateLayeredGridVertexAttribData4C4V (std::vector<tcu::Vec4>& vertexData, const LayeredGridSpec& scene)
 {
 	// interleave color & vertex data
 	const tcu::Vec4 green	(0.0f, 1.0f, 0.0f, 0.7f);
 	const tcu::Vec4 yellow	(1.0f, 1.0f, 0.0f, 0.8f);

 	vertexData.resize(getLayeredGridNumVertices(scene) * 2);

 	for (int cellY = 0; cellY < scene.gridHeight; ++cellY)
 	for (int cellX = 0; cellX < scene.gridWidth; ++cellX)
 	for (int cellZ = 0; cellZ < scene.gridLayers; ++cellZ)
 	{
 		const tcu::Vec4	color		= (((cellX + cellY + cellZ) % 2) == 0) ? (green) : (yellow);
 		const float		cellLeft	= (float(cellX  ) / (float)scene.gridWidth  - 0.5f) * 2.0f;
 		const float		cellRight	= (float(cellX+1) / (float)scene.gridWidth  - 0.5f) * 2.0f;
 		const float		cellTop		= (float(cellY+1) / (float)scene.gridHeight - 0.5f) * 2.0f;
 		const float		cellBottom	= (float(cellY  ) / (float)scene.gridHeight - 0.5f) * 2.0f;

 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  0] = color;
 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  1] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);

 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  2] = color;
 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  3] = tcu::Vec4(cellLeft, cellBottom, 0.0f, 1.0f);

 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  4] = color;
 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  5] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);

 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  6] = color;
 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  7] = tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);

 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  8] = color;
 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 +  9] = tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);

 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 10] = color;
 		vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 11] = tcu::Vec4(cellRight, cellTop, 0.0f, 1.0f);
 	}
 }

 static void generateLayeredGridIndexData (std::vector<deUint32>& indexData, const LayeredGridSpec& scene)
 {
 	indexData.resize(getLayeredGridNumVertices(scene) * 2);

 	for (int ndx = 0; ndx < scene.gridLayers * scene.gridHeight * scene.gridWidth * 6; ++ndx)
 		indexData[ndx] = ndx;
 }

 class RenderPerformanceTestBase : public TestCase
 {
 public:
 							RenderPerformanceTestBase	(Context& context, const char* name, const char* description);
 							~RenderPerformanceTestBase	(void);

 protected:
 	void					init						(void);
 	void					deinit						(void);

 	void					waitGLResults				(void) const;
 	void					setupVertexAttribs			(void) const;

 	enum
 	{
 		RENDER_AREA_SIZE = 128
 	};

 private:
 	glu::ShaderProgram*		m_renderProgram;
 	int						m_colorLoc;
 	int						m_positionLoc;
 };

 RenderPerformanceTestBase::RenderPerformanceTestBase (Context& context, const char* name, const char* description)
 	: TestCase			(context, tcu::NODETYPE_PERFORMANCE, name, description)
 	, m_renderProgram	(DE_NULL)
 	, m_colorLoc		(0)
 	, m_positionLoc		(0)
 {
 }

 RenderPerformanceTestBase::~RenderPerformanceTestBase (void)
 {
 	deinit();
 }

 void RenderPerformanceTestBase::init (void)
 {
 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	m_renderProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::VertexSource(s_colorVertexShader) << glu::FragmentSource(s_colorFragmentShader));
 	if (!m_renderProgram->isOk())
 	{
 		m_testCtx.getLog() << *m_renderProgram;
 		throw tcu::TestError("could not build program");
 	}

 	m_colorLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_color");
 	m_positionLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_position");

 	if (m_colorLoc == -1)
 		throw tcu::TestError("Location of attribute a_color was -1");
 	if (m_positionLoc == -1)
 		throw tcu::TestError("Location of attribute a_position was -1");
 }

 void RenderPerformanceTestBase::deinit (void)
 {
 	delete m_renderProgram;
 	m_renderProgram = DE_NULL;
 }

 void RenderPerformanceTestBase::setupVertexAttribs (void) const
 {
 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	// buffers are bound

 	gl.enableVertexAttribArray(m_colorLoc);
 	gl.enableVertexAttribArray(m_positionLoc);

 	gl.vertexAttribPointer(m_colorLoc,    4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), glu::BufferOffsetAsPointer(0 * sizeof(tcu::Vec4)));
 	gl.vertexAttribPointer(m_positionLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)), glu::BufferOffsetAsPointer(1 * sizeof(tcu::Vec4)));

 	gl.useProgram(m_renderProgram->getProgram());

 	GLU_EXPECT_NO_ERROR(gl.getError(), "set up rendering");
 }

 void RenderPerformanceTestBase::waitGLResults (void) const
 {
 	tcu::Surface dummySurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
 	glu::readPixels(m_context.getRenderContext(), 0, 0, dummySurface.getAccess());
 }

 template <typename SampleType>
 class RenderCase : public RenderPerformanceTestBase
 {
 public:
 									RenderCase						(Context& context, const char* name, const char* description, DrawMethod drawMethod);
 									~RenderCase						(void);

 protected:
 	void							init							(void);
 	void							deinit							(void);

 private:
 	IterateResult					iterate							(void);

 protected:
 	struct SampleResult
 	{
 		LayeredGridSpec					scene;
 		RenderSampleResult<SampleType>	result;
 	};

 	int								getMinWorkloadSize				(void) const;
 	int								getMaxWorkloadSize				(void) const;
 	int								getMinWorkloadDataSize			(void) const;
 	int								getMaxWorkloadDataSize			(void) const;
 	int								getVertexDataSize				(void) const;
 	int								getNumSamples					(void) const;
 	void							uploadScene						(const LayeredGridSpec& scene);

 	virtual void					runSample						(SampleResult& sample) = 0;
 	virtual void					logAndSetTestResult				(const std::vector<SampleResult>& results);

 	void							mapResultsToRenderRateFormat	(std::vector<RenderSampleResult<SampleType> >& dst, const std::vector<SampleResult>& src) const;

 	const DrawMethod				m_drawMethod;

 private:
 	glw::GLuint						m_attributeBufferID;
 	glw::GLuint						m_indexBufferID;
 	int								m_iterationNdx;
 	std::vector<int>				m_iterationOrder;
 	std::vector<SampleResult>		m_results;
 	int								m_numUnmapFailures;
 };

 template <typename SampleType>
 RenderCase<SampleType>::RenderCase (Context& context, const char* name, const char* description, DrawMethod drawMethod)
 	: RenderPerformanceTestBase	(context, name, description)
 	, m_drawMethod				(drawMethod)
 	, m_attributeBufferID		(0)
 	, m_indexBufferID			(0)
 	, m_iterationNdx			(0)
 	, m_numUnmapFailures		(0)
 {
 	DE_ASSERT(drawMethod < DRAWMETHOD_LAST);
 }

 template <typename SampleType>
 RenderCase<SampleType>::~RenderCase (void)
 {
 	deinit();
 }

 template <typename SampleType>
 void RenderCase<SampleType>::init (void)
 {
 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	RenderPerformanceTestBase::init();

 	// requirements

 	if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
 		m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
 		throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" + de::toString<int>(RENDER_AREA_SIZE) + " render target");

 	// gl state

 	gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);

 	// enable bleding to prevent grid layers from being discarded
 	gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
 	gl.blendEquation(GL_FUNC_ADD);
 	gl.enable(GL_BLEND);

 	// generate iterations

 	{
 		const int gridSizes[] = { 20, 26, 32, 38, 44, 50, 56, 62, 68, 74, 80,  86,  92,  98,  104, 110, 116, 122, 128 };

 		for (int gridNdx = 0; gridNdx < DE_LENGTH_OF_ARRAY(gridSizes); ++gridNdx)
 		{
 			m_results.push_back(SampleResult());

 			m_results.back().scene.gridHeight = gridSizes[gridNdx];
 			m_results.back().scene.gridWidth = gridSizes[gridNdx];
 			m_results.back().scene.gridLayers = 5;

 			m_results.back().result.numVertices = getLayeredGridNumVertices(m_results.back().scene);

 			// test cases set these, initialize to dummy values
 			m_results.back().result.renderDataSize = -1;
 			m_results.back().result.uploadedDataSize = -1;
 			m_results.back().result.unrelatedDataSize = -1;
 		}
 	}

 	// randomize iteration order
 	{
 		m_iterationOrder.resize(m_results.size());
 		generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_iterationOrder.size());
 	}
 }

 template <typename SampleType>
 void RenderCase<SampleType>::deinit (void)
 {
 	RenderPerformanceTestBase::deinit();

 	if (m_attributeBufferID)
 	{
 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_attributeBufferID);
 		m_attributeBufferID = 0;
 	}

 	if (m_indexBufferID)
 	{
 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBufferID);
 		m_indexBufferID = 0;
 	}
 }

 template <typename SampleType>
 typename RenderCase<SampleType>::IterateResult RenderCase<SampleType>::iterate (void)
 {
 	const int		unmapFailureThreshold	= 3;
 	const int		currentIteration		= m_iterationNdx;
 	const int		currentConfigNdx		= m_iterationOrder[currentIteration];
 	SampleResult&	currentSample			= m_results[currentConfigNdx];

 	try
 	{
 		runSample(currentSample);
 		++m_iterationNdx;
 	}
 	catch (const UnmapFailureError& ex)
 	{
 		DE_UNREF(ex);
 		++m_numUnmapFailures;
 	}

 	if (m_numUnmapFailures > unmapFailureThreshold)
 		throw tcu::TestError("Got too many unmap errors");

 	if (m_iterationNdx < (int)m_iterationOrder.size())
 		return CONTINUE;

 	logAndSetTestResult(m_results);
 	return STOP;
 }

 template <typename SampleType>
 int RenderCase<SampleType>::getMinWorkloadSize (void) const
 {
 	int result = getLayeredGridNumVertices(m_results[0].scene);

 	for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
 	{
 		const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
 		result = de::min(result, workloadSize);
 	}

 	return result;
 }

 template <typename SampleType>
 int RenderCase<SampleType>::getMaxWorkloadSize (void) const
 {
 	int result = getLayeredGridNumVertices(m_results[0].scene);

 	for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
 	{
 		const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
 		result = de::max(result, workloadSize);
 	}

 	return result;
 }

 template <typename SampleType>
 int RenderCase<SampleType>::getMinWorkloadDataSize (void) const
 {
 	return getMinWorkloadSize() * getVertexDataSize();
 }

 template <typename SampleType>
 int RenderCase<SampleType>::getMaxWorkloadDataSize (void) const
 {
 	return getMaxWorkloadSize() * getVertexDataSize();
 }

 template <typename SampleType>
 int RenderCase<SampleType>::getVertexDataSize (void) const
 {
 	const int numVectors	= 2;
 	const int vec4Size		= 4 * sizeof(float);

 	return numVectors * vec4Size;
 }

 template <typename SampleType>
 int RenderCase<SampleType>::getNumSamples (void) const
 {
 	return (int)m_results.size();
 }

 template <typename SampleType>
 void RenderCase<SampleType>::uploadScene (const LayeredGridSpec& scene)
 {
 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	// vertex buffer
 	{
 		std::vector<tcu::Vec4> vertexData;

 		generateLayeredGridVertexAttribData4C4V(vertexData, scene);

 		if (m_attributeBufferID == 0)
 			gl.genBuffers(1, &m_attributeBufferID);
 		gl.bindBuffer(GL_ARRAY_BUFFER, m_attributeBufferID);
 		gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
 	}

 	// index buffer
 	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 	{
 		std::vector<deUint32> indexData;

 		generateLayeredGridIndexData(indexData, scene);

 		if (m_indexBufferID == 0)
 			gl.genBuffers(1, &m_indexBufferID);
 		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBufferID);
 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
 	}

 	GLU_EXPECT_NO_ERROR(gl.getError(), "create buffers");
 }

 template <typename SampleType>
 void RenderCase<SampleType>::logAndSetTestResult (const std::vector<SampleResult>& results)
 {
 	std::vector<RenderSampleResult<SampleType> > mappedResults;

 	mapResultsToRenderRateFormat(mappedResults, results);

 	{
 		const RenderSampleAnalyzeResult	analysis	= analyzeSampleResults(m_testCtx.getLog(), mappedResults);
 		const float						rate		= analysis.renderRateAtRange;

 		if (rate == std::numeric_limits<float>::infinity())
 		{
 			// sample times are 1) invalid or 2) timer resolution too low
 			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
 		}
 		else
 		{
 			// report transfer rate in millions of MiB/s
 			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
 		}
 	}
 }

 template <typename SampleType>
 void RenderCase<SampleType>::mapResultsToRenderRateFormat (std::vector<RenderSampleResult<SampleType> >& dst, const std::vector<SampleResult>& src) const
 {
 	dst.resize(src.size());

 	for (int ndx = 0; ndx < (int)src.size(); ++ndx)
 		dst[ndx] = src[ndx].result;
 }

 class ReferenceRenderTimeCase : public RenderCase<RenderReadDuration>
 {
 public:
 			ReferenceRenderTimeCase		(Context& context, const char* name, const char* description, DrawMethod drawMethod);

 private:
 	void	init						(void);
 	void	runSample					(SampleResult& sample);
 };

 ReferenceRenderTimeCase::ReferenceRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod)
 	: RenderCase<RenderReadDuration>	(context, name, description, drawMethod)
 {
 }

 void ReferenceRenderTimeCase::init (void)
 {
 	const char* const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");

 	// init parent
 	RenderCase<RenderReadDuration>::init();

 	// log
 	m_testCtx.getLog()
 		<< tcu::TestLog::Message
 		<< "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
 		<< getNumSamples() << " test samples. Sample order is randomized.\n"
 		<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
 		<< "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
 		<< "Workload sizes are in the range ["
 			<< getMinWorkloadSize() << ",  "
 			<< getMaxWorkloadSize() << "] vertices (["
 			<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
 			<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
 		<< "Test result is the approximated total processing rate in MiB / s.\n"
 		<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
 		<< "Note! Test result should only be used as a baseline reference result for buffer.data_upload.* test group results."
 		<< tcu::TestLog::EndMessage;
 }

 void ReferenceRenderTimeCase::runSample (SampleResult& sample)
 {
 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
 	tcu::Surface			resultSurface	(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
 	const int				numVertices		= getLayeredGridNumVertices(sample.scene);
 	const glu::Buffer		arrayBuffer		(m_context.getRenderContext());
 	const glu::Buffer		indexBuffer		(m_context.getRenderContext());
 	std::vector<tcu::Vec4>	vertexData;
 	std::vector<deUint32>	indexData;
 	deUint64				startTime;
 	deUint64				endTime;

 	// generate and upload buffers

 	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
 	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
 	gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);

 	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 	{
 		generateLayeredGridIndexData(indexData, sample.scene);
 		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
 	}

 	setupVertexAttribs();

 	// make sure data is uploaded

 	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
 		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
 	else
 		DE_ASSERT(false);
 	waitGLResults();

 	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
 	gl.clear(GL_COLOR_BUFFER_BIT);
 	waitGLResults();

 	tcu::warmupCPU();

 	// Measure both draw and associated readpixels
 	{
 		startTime = deGetMicroseconds();

 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
 		else
 			DE_ASSERT(false);

 		endTime = deGetMicroseconds();

 		sample.result.duration.renderDuration = endTime - startTime;
 	}

 	{
 		startTime = deGetMicroseconds();
 		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
 		endTime = deGetMicroseconds();

 		sample.result.duration.readDuration = endTime - startTime;
 	}

 	sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
 	sample.result.uploadedDataSize = 0;
 	sample.result.unrelatedDataSize = 0;
 	sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
 	sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
 	sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
 }

 class UnrelatedUploadRenderTimeCase : public RenderCase<UnrelatedUploadRenderReadDuration>
 {
 public:
 									UnrelatedUploadRenderTimeCase	(Context& context, const char* name, const char* description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod);

 private:
 	void							init							(void);
 	void							runSample						(SampleResult& sample);

 	const UploadMethod				m_unrelatedUploadMethod;
 };

 UnrelatedUploadRenderTimeCase::UnrelatedUploadRenderTimeCase (Context& context, const char* name, const char* description, DrawMethod drawMethod, UploadMethod unrelatedUploadMethod)
 	: RenderCase<UnrelatedUploadRenderReadDuration>	(context, name, description, drawMethod)
 	, m_unrelatedUploadMethod						(unrelatedUploadMethod)
 {
 	DE_ASSERT(m_unrelatedUploadMethod < UPLOADMETHOD_LAST);
 }

 void UnrelatedUploadRenderTimeCase::init (void)
 {
 	const char* const	targetFunctionName	= (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
 	tcu::MessageBuilder	message				(&m_testCtx.getLog());

 	// init parent
 	RenderCase<UnrelatedUploadRenderReadDuration>::init();

 	// log

 	message
 		<< "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
 		<< "Uploading an unrelated buffer just before issuing the rendering command with "
 			<< ((m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_DATA)		? ("bufferData")		:
 				(m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_SUB_DATA)	? ("bufferSubData")		:
 				(m_unrelatedUploadMethod != UPLOADMETHOD_MAP_BUFFER_RANGE)	? ("mapBufferRange")	:
 				((const char*)DE_NULL))
 			<< ".\n"
 		<< getNumSamples() << " test samples. Sample order is randomized.\n"
 		<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
 		<< "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
 		<< "Workload sizes are in the range ["
 			<< getMinWorkloadSize() << ",  "
 			<< getMaxWorkloadSize() << "] vertices (["
 			<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
 			<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
 		<< "Unrelated upload sizes are in the range ["
 			<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ", "
 			<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "]\n"
 		<< "Test result is the approximated total processing rate in MiB / s.\n"
 		<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
 		<< "Note that the data size and the time used in the unrelated upload is not included in the results.\n"
 		<< "Note! Test result may not be useful as is but instead should be compared against the reference.* group and upload_and_draw.*_and_unrelated_upload group results.\n"
 		<< tcu::TestLog::EndMessage;
 }

 void UnrelatedUploadRenderTimeCase::runSample (SampleResult& sample)
 {
 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
 	tcu::Surface			resultSurface		(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
 	const int				numVertices			= getLayeredGridNumVertices(sample.scene);
 	const glu::Buffer		arrayBuffer			(m_context.getRenderContext());
 	const glu::Buffer		indexBuffer			(m_context.getRenderContext());
 	const glu::Buffer		unrelatedBuffer		(m_context.getRenderContext());
 	int						unrelatedUploadSize	= -1;
 	int						renderUploadSize;
 	std::vector<tcu::Vec4>	vertexData;
 	std::vector<deUint32>	indexData;
 	deUint64				startTime;
 	deUint64				endTime;

 	// generate and upload buffers

 	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
 	renderUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));

 	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
 	gl.bufferData(GL_ARRAY_BUFFER, renderUploadSize, &vertexData[0], GL_STATIC_DRAW);

 	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 	{
 		generateLayeredGridIndexData(indexData, sample.scene);
 		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
 	}

 	setupVertexAttribs();

 	// make sure data is uploaded

 	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
 		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
 	else
 		DE_ASSERT(false);
 	waitGLResults();

 	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
 	gl.clear(GL_COLOR_BUFFER_BIT);
 	waitGLResults();

 	tcu::warmupCPU();

 	// Unrelated upload
 	if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_DATA)
 	{
 		unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));

 		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
 		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
 	}
 	else if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
 	{
 		unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));

 		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
 		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
 		gl.bufferSubData(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, &vertexData[0]);
 	}
 	else if (m_unrelatedUploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
 	{
 		void*			mapPtr;
 		glw::GLboolean	unmapSuccessful;

 		unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));

 		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
 		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);

 		mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
 		if (!mapPtr)
 			throw tcu::Exception("MapBufferRange returned NULL");

 		deMemcpy(mapPtr, &vertexData[0], unrelatedUploadSize);

 		// if unmapping fails, just try again later
 		unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
 		if (!unmapSuccessful)
 			throw UnmapFailureError();
 	}
 	else
 		DE_ASSERT(false);

 	DE_ASSERT(unrelatedUploadSize != -1);

 	// Measure both draw and associated readpixels
 	{
 		startTime = deGetMicroseconds();

 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
 		else
 			DE_ASSERT(false);

 		endTime = deGetMicroseconds();

 		sample.result.duration.renderDuration = endTime - startTime;
 	}

 	{
 		startTime = deGetMicroseconds();
 		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
 		endTime = deGetMicroseconds();

 		sample.result.duration.readDuration = endTime - startTime;
 	}

 	sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;
 	sample.result.uploadedDataSize = renderUploadSize;
 	sample.result.unrelatedDataSize = unrelatedUploadSize;
 	sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
 	sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
 	sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
 }

 class ReferenceReadPixelsTimeCase : public TestCase
 {
 public:
 					ReferenceReadPixelsTimeCase		(Context& context, const char* name, const char* description);

 private:
 	void			init							(void);
 	IterateResult	iterate							(void);
 	void			logAndSetTestResult				(void);

 	enum
 	{
 		RENDER_AREA_SIZE = 128
 	};

 	const int			m_numSamples;
 	int					m_sampleNdx;
 	std::vector<int>	m_samples;
 };

 ReferenceReadPixelsTimeCase::ReferenceReadPixelsTimeCase (Context& context, const char* name, const char* description)
 	: TestCase		(context, tcu::NODETYPE_PERFORMANCE, name, description)
 	, m_numSamples	(20)
 	, m_sampleNdx	(0)
 	, m_samples		(m_numSamples)
 {
 }

 void ReferenceReadPixelsTimeCase::init (void)
 {
 	m_testCtx.getLog()
 		<< tcu::TestLog::Message
 		<< "Measuring the time used in a single readPixels call with " << m_numSamples << " test samples.\n"
 		<< "Test result is the median of the samples in microseconds.\n"
 		<< "Note! Test result should only be used as a baseline reference result for buffer.data_upload.* test group results."
 		<< tcu::TestLog::EndMessage;
 }

 ReferenceReadPixelsTimeCase::IterateResult ReferenceReadPixelsTimeCase::iterate (void)
 {
 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
 	tcu::Surface			resultSurface	(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
 	deUint64				startTime;
 	deUint64				endTime;

 	deYield();
 	tcu::warmupCPU();
 	deYield();

 	// "Render" something and wait for it
 	gl.clearColor(0.0f, 1.0f, float(m_sampleNdx) / float(m_numSamples), 1.0f);
 	gl.clear(GL_COLOR_BUFFER_BIT);

 	// wait for results
 	glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());

 	// measure time used in readPixels
 	startTime = deGetMicroseconds();
 	glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
 	endTime = deGetMicroseconds();

 	m_samples[m_sampleNdx] = (int)(endTime - startTime);

 	if (++m_sampleNdx < m_numSamples)
 		return CONTINUE;

 	logAndSetTestResult();
 	return STOP;
 }

 void ReferenceReadPixelsTimeCase::logAndSetTestResult (void)
 {
 	// Log sample list
 	{
 		m_testCtx.getLog()
 			<< tcu::TestLog::SampleList("Samples", "Samples")
 			<< tcu::TestLog::SampleInfo
 			<< tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
 			<< tcu::TestLog::EndSampleInfo;

 		for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
 			m_testCtx.getLog()
 				<< tcu::TestLog::Sample
 				<< m_samples[sampleNdx]
 				<< tcu::TestLog::EndSample;

 		m_testCtx.getLog() << tcu::TestLog::EndSampleList;
 	}

 	// Log median
 	{
 		float median;
 		float limit60Low;
 		float limit60Up;

 		std::sort(m_samples.begin(), m_samples.end());
 		median		= linearSample(m_samples, 0.5f);
 		limit60Low	= linearSample(m_samples, 0.2f);
 		limit60Up	= linearSample(m_samples, 0.8f);

 		m_testCtx.getLog()
 			<< tcu::TestLog::Float("Median", "Median", "us", QP_KEY_TAG_TIME, median)
 			<< tcu::TestLog::Message
 			<< "60 % of samples within range:\n"
 			<< tcu::TestLog::EndMessage
 			<< tcu::TestLog::Float("Low60Range", "Lower", "us", QP_KEY_TAG_TIME, limit60Low)
 			<< tcu::TestLog::Float("High60Range", "Upper", "us", QP_KEY_TAG_TIME, limit60Up);

 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(median, 2).c_str());
 	}
 }

 template <typename SampleType>
 class GenericUploadRenderTimeCase : public RenderCase<SampleType>
 {
 public:
 	typedef typename RenderCase<SampleType>::SampleResult SampleResult;

 							GenericUploadRenderTimeCase	(Context&				context,
 														 const char*			name,
 														 const char*			description,
 														 DrawMethod				method,
 														 TargetBuffer			targetBuffer,
 														 UploadMethod			uploadMethod,
 														 BufferState			bufferState,
 														 UploadRange			uploadRange,
 														 UnrelatedBufferType	unrelatedBufferType);

 private:
 	void						init					(void);
 	void						runSample				(SampleResult& sample);

 	using RenderCase<SampleType>::RENDER_AREA_SIZE;

 	const TargetBuffer			m_targetBuffer;
 	const BufferState			m_bufferState;
 	const UploadMethod			m_uploadMethod;
 	const UnrelatedBufferType	m_unrelatedBufferType;
 	const UploadRange			m_uploadRange;

 	using RenderCase<SampleType>::m_context;
 	using RenderCase<SampleType>::m_testCtx;
 	using RenderCase<SampleType>::m_drawMethod;
 };

 template <typename SampleType>
 GenericUploadRenderTimeCase<SampleType>::GenericUploadRenderTimeCase (Context&				context,
 																	  const char*			name,
 																	  const char*			description,
 																	  DrawMethod			method,
 																	  TargetBuffer			targetBuffer,
 																	  UploadMethod			uploadMethod,
 																	  BufferState			bufferState,
 																	  UploadRange			uploadRange,
 																	  UnrelatedBufferType	unrelatedBufferType)
 	: RenderCase<SampleType>	(context, name, description, method)
 	, m_targetBuffer			(targetBuffer)
 	, m_bufferState				(bufferState)
 	, m_uploadMethod			(uploadMethod)
 	, m_unrelatedBufferType		(unrelatedBufferType)
 	, m_uploadRange				(uploadRange)
 {
 	DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
 	DE_ASSERT(m_bufferState < BUFFERSTATE_LAST);
 	DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
 	DE_ASSERT(m_unrelatedBufferType < UNRELATEDBUFFERTYPE_LAST);
 	DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
 }

 template <typename SampleType>
 void GenericUploadRenderTimeCase<SampleType>::init (void)
 {
 	// init parent
 	RenderCase<SampleType>::init();

 	// log
 	{
 		const char* const	targetFunctionName		= (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
 		const int			perVertexSize			= (m_targetBuffer == TARGETBUFFER_INDEX) ? ((int)sizeof(deUint32)) : ((int)sizeof(tcu::Vec4[2]));
 		const int			fullMinUploadSize		= RenderCase<SampleType>::getMinWorkloadSize() * perVertexSize;
 		const int			fullMaxUploadSize		= RenderCase<SampleType>::getMaxWorkloadSize() * perVertexSize;
 		const int			minUploadSize			= (m_uploadRange == UPLOADRANGE_FULL) ? (fullMinUploadSize) : (deAlign32(fullMinUploadSize/2, 4));
 		const int			maxUploadSize			= (m_uploadRange == UPLOADRANGE_FULL) ? (fullMaxUploadSize) : (deAlign32(fullMaxUploadSize/2, 4));
 		const int			minUnrelatedUploadSize	= RenderCase<SampleType>::getMinWorkloadSize() * (int)sizeof(tcu::Vec4[2]);
 		const int			maxUnrelatedUploadSize	= RenderCase<SampleType>::getMaxWorkloadSize() * (int)sizeof(tcu::Vec4[2]);

 		m_testCtx.getLog()
 			<< tcu::TestLog::Message
 			<< "Measuring the time used in " << targetFunctionName << " and readPixels call with different rendering workloads.\n"
 			<< "The "
 				<< ((m_targetBuffer == TARGETBUFFER_INDEX) ? ("index") : ("vertex attrib"))
 				<< " buffer "
 				<< ((m_bufferState == BUFFERSTATE_NEW) ? ("") : ("contents "))
 				<< "sourced by the rendering command "
 				<< ((m_bufferState == BUFFERSTATE_NEW)		? ("is uploaded ") :
 					(m_uploadRange == UPLOADRANGE_FULL)		? ("are specified ") :
 					(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("are updated (partial upload) ") :
 					((const char*)DE_NULL))
 				<< "just before issuing the rendering command.\n"
 			<< ((m_bufferState == BUFFERSTATE_EXISTING) ? ("The buffer has been used in rendering.\n") : ("The buffer is generated just before uploading.\n"))
 			<< "Buffer "
 				<< ((m_bufferState == BUFFERSTATE_NEW)		? ("is uploaded") :
 					(m_uploadRange == UPLOADRANGE_FULL)		? ("contents are specified") :
 					(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("contents are partially updated") :
 					((const char*)DE_NULL))
 				<< " with "
 				<< ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange"))
 				<< " command. Usage of the target buffer is DYNAMIC_DRAW.\n"
 			<< ((m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ? ("Mapping buffer with bits MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT | MAP_INVALIDATE_BUFFER_BIT | MAP_UNSYNCHRONIZED_BIT\n") : (""))
 			<< ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Uploading an unrelated buffer just before issuing the rendering command with bufferData.\n") : (""))
 			<< RenderCase<SampleType>::getNumSamples() << " test samples. Sample order is randomized.\n"
 			<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
 			<< "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two separate triangles.\n"
 			<< "Workload sizes are in the range ["
 				<< RenderCase<SampleType>::getMinWorkloadSize() << ",  "
 				<< RenderCase<SampleType>::getMaxWorkloadSize() << "] vertices "
 				<< "(["
 				<< getHumanReadableByteSize(RenderCase<SampleType>::getMinWorkloadDataSize()) << ","
 				<< getHumanReadableByteSize(RenderCase<SampleType>::getMaxWorkloadDataSize()) << "] to be processed).\n"
 			<< "Upload sizes are in the range ["
 				<< getHumanReadableByteSize(minUploadSize) << ","
 				<< getHumanReadableByteSize(maxUploadSize) << "].\n"
 			<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
 				("Unrelated upload sizes are in the range [" + getHumanReadableByteSize(minUnrelatedUploadSize) + ", " + getHumanReadableByteSize(maxUnrelatedUploadSize) + "]\n") :
 				(""))
 			<< "Test result is the approximated processing rate in MiB / s.\n"
 			<< "Note that while upload time is measured, the time used is not included in the results.\n"
 			<< ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ? ("Note that the data size and the time used in the unrelated upload is not included in the results.\n") : (""))
 			<< ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ? ("Note that index array size is not included in the processed size.\n") : (""))
 			<< "Note! Test result may not be useful as is but instead should be compared against the reference.* group and other upload_and_draw.* group results.\n"
 			<< tcu::TestLog::EndMessage;
 	}
 }

 template <typename SampleType>
 void GenericUploadRenderTimeCase<SampleType>::runSample (SampleResult& sample)
 {
 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
 	const glu::Buffer		arrayBuffer			(m_context.getRenderContext());
 	const glu::Buffer		indexBuffer			(m_context.getRenderContext());
 	const glu::Buffer		unrelatedBuffer		(m_context.getRenderContext());
 	const int				numVertices			= getLayeredGridNumVertices(sample.scene);
 	tcu::Surface			resultSurface		(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
 	deUint64				startTime;
 	deUint64				endTime;
 	std::vector<tcu::Vec4>	vertexData;
 	std::vector<deUint32>	indexData;

 	// create data

 	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
 	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 		generateLayeredGridIndexData(indexData, sample.scene);

 	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
 	gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
 	RenderCase<SampleType>::setupVertexAttribs();

 	// target should be an exisiting buffer? Draw from it once to make sure it exists on the gpu

 	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_EXISTING)
 	{
 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_DYNAMIC_DRAW);
 		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
 	}
 	else if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_NEW)
 	{
 		// do not touch the vertex buffer
 	}
 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_EXISTING)
 	{
 		// hint that the target buffer will be modified soon
 		const glw::GLenum vertexDataUsage	= (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
 		const glw::GLenum indexDataUsage	= (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);

 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], vertexDataUsage);
 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], indexDataUsage);
 		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
 	}
 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_NEW)
 	{
 		if (m_targetBuffer == TARGETBUFFER_VERTEX)
 		{
 			// make the index buffer present on the gpu
 			// use another vertex buffer to keep original buffer in unused state
 			const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());

 			gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
 			RenderCase<SampleType>::setupVertexAttribs();

 			gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
 			gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STATIC_DRAW);
 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);

 			// restore original state
 			gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
 			RenderCase<SampleType>::setupVertexAttribs();
 		}
 		else if (m_targetBuffer == TARGETBUFFER_INDEX)
 		{
 			// make the vertex buffer present on the gpu
 			gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
 		}
 		else
 			DE_ASSERT(false);
 	}
 	else
 		DE_ASSERT(false);

 	RenderCase<SampleType>::waitGLResults();
 	GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");

 	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
 	gl.clear(GL_COLOR_BUFFER_BIT);
 	RenderCase<SampleType>::waitGLResults();

 	tcu::warmupCPU();

 	// upload

 	{
 		glw::GLenum		target;
 		glw::GLsizeiptr	size;
 		glw::GLintptr	offset = 0;
 		const void*		source;

 		if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
 		{
 			target	= GL_ARRAY_BUFFER;
 			size	= (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
 			source	= &vertexData[0];
 		}
 		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
 		{
 			target	= GL_ELEMENT_ARRAY_BUFFER;
 			size	= (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32));
 			source	= &indexData[0];
 		}
 		else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
 		{
 			DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);

 			target	= GL_ARRAY_BUFFER;
 			size	= (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
 			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
 			source	= (const deUint8*)&vertexData[0] + offset;
 		}
 		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
 		{
 			DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);

 			// upload to 25% - 75% range
 			target	= GL_ELEMENT_ARRAY_BUFFER;
 			size	= (glw::GLsizeiptr)deAlign32((deInt32)(indexData.size() * sizeof(deUint32)) / 2, 4);
 			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
 			source	= (const deUint8*)&indexData[0] + offset;
 		}
 		else
 		{
 			DE_ASSERT(false);
 			return;
 		}

 		startTime = deGetMicroseconds();

 		if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
 			gl.bufferData(target, size, source, GL_DYNAMIC_DRAW);
 		else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
 		{
 			// create buffer storage
 			if (m_bufferState == BUFFERSTATE_NEW)
 				gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
 			gl.bufferSubData(target, offset, size, source);
 		}
 		else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
 		{
 			void*			mapPtr;
 			glw::GLboolean	unmapSuccessful;

 			// create buffer storage
 			if (m_bufferState == BUFFERSTATE_NEW)
 				gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);

 			mapPtr = gl.mapBufferRange(target, offset, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
 			if (!mapPtr)
 				throw tcu::Exception("MapBufferRange returned NULL");

 			deMemcpy(mapPtr, source, (int)size);

 			// if unmapping fails, just try again later
 			unmapSuccessful = gl.unmapBuffer(target);
 			if (!unmapSuccessful)
 				throw UnmapFailureError();
 		}
 		else
 			DE_ASSERT(false);

 		endTime = deGetMicroseconds();

 		sample.result.uploadedDataSize = (int)size;
 		sample.result.duration.uploadDuration = endTime - startTime;
 	}

 	// unrelated
 	if (m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX)
 	{
 		const int unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));

 		gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
 		gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
 		// Attibute pointers are not modified, no need restore state

 		sample.result.unrelatedDataSize = unrelatedUploadSize;
 	}

 	// draw
 	{
 		startTime = deGetMicroseconds();

 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
 		else
 			DE_ASSERT(false);

 		endTime = deGetMicroseconds();

 		sample.result.duration.renderDuration = endTime - startTime;
 	}

 	// read
 	{
 		startTime = deGetMicroseconds();
 		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
 		endTime = deGetMicroseconds();

 		sample.result.duration.readDuration = endTime - startTime;
 	}

 	// set results

 	sample.result.renderDataSize = RenderCase<SampleType>::getVertexDataSize() * sample.result.numVertices;

 	sample.result.duration.renderReadDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
 	sample.result.duration.totalDuration = sample.result.duration.uploadDuration + sample.result.duration.renderDuration + sample.result.duration.readDuration;
 	sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
 }

 class BufferInUseRenderTimeCase : public RenderCase<RenderUploadRenderReadDuration>
 {
 public:
 	enum MapFlags
 	{
 		MAPFLAG_NONE = 0,
 		MAPFLAG_INVALIDATE_BUFFER,
 		MAPFLAG_INVALIDATE_RANGE,

 		MAPFLAG_LAST
 	};
 	enum UploadBufferTarget
 	{
 		UPLOADBUFFERTARGET_DIFFERENT_BUFFER = 0,
 		UPLOADBUFFERTARGET_SAME_BUFFER,

 		UPLOADBUFFERTARGET_LAST
 	};
 								BufferInUseRenderTimeCase	(Context&			context,
 															 const char*		name,
 															 const char*		description,
 															 DrawMethod			method,
 															 MapFlags			mapFlags,
 															 TargetBuffer		targetBuffer,
 															 UploadMethod		uploadMethod,
 															 UploadRange		uploadRange,
 															 UploadBufferTarget	uploadTarget);

 private:
 	void						init						(void);
 	void						runSample					(SampleResult& sample);

 	const TargetBuffer			m_targetBuffer;
 	const UploadMethod			m_uploadMethod;
 	const UploadRange			m_uploadRange;
 	const MapFlags				m_mapFlags;
 	const UploadBufferTarget	m_uploadBufferTarget;
 };

 BufferInUseRenderTimeCase::BufferInUseRenderTimeCase (Context&				context,
 													  const char*			name,
 													  const char*			description,
 													  DrawMethod			method,
 													  MapFlags				mapFlags,
 													  TargetBuffer			targetBuffer,
 													  UploadMethod			uploadMethod,
 													  UploadRange			uploadRange,
 													  UploadBufferTarget	uploadTarget)
 	: RenderCase<RenderUploadRenderReadDuration>	(context, name, description, method)
 	, m_targetBuffer								(targetBuffer)
 	, m_uploadMethod								(uploadMethod)
 	, m_uploadRange									(uploadRange)
 	, m_mapFlags									(mapFlags)
 	, m_uploadBufferTarget							(uploadTarget)
 {
 	DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
 	DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
 	DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
 	DE_ASSERT(m_mapFlags < MAPFLAG_LAST);
 	DE_ASSERT(m_uploadBufferTarget < UPLOADBUFFERTARGET_LAST);
 }

 void BufferInUseRenderTimeCase::init (void)
 {
 	RenderCase<RenderUploadRenderReadDuration>::init();

 	// log
 	{
 		const char* const	targetFunctionName		= (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
 		const char* const	uploadFunctionName		= (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ? ("bufferData") : (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") : ("mapBufferRange");
 		const bool			isReferenceCase			= (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER);
 		tcu::MessageBuilder	message					(&m_testCtx.getLog());

 		message	<< "Measuring the time used in " << targetFunctionName << " call, a buffer upload, "
 				<< targetFunctionName << " call using the uploaded buffer and readPixels call with different upload sizes.\n";

 		if (isReferenceCase)
 			message << "Rendering:\n"
 					<< "    before test: create and use buffers B and C\n"
 					<< "    first draw: render using buffer B\n"
 					<< ((m_uploadRange == UPLOADRANGE_FULL)		? ("    upload: respecify buffer C contents\n")	:
 						(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("    upload: modify buffer C contents\n")	:
 						((const char*)DE_NULL))
 					<< "    second draw: render using buffer C\n"
 					<< "    read: readPixels\n";
 		else
 			message << "Rendering:\n"
 					<< "    before test: create and use buffer B\n"
 					<< "    first draw: render using buffer B\n"
 					<< ((m_uploadRange == UPLOADRANGE_FULL)		? ("    upload: respecify buffer B contents\n")	:
 						(m_uploadRange == UPLOADRANGE_PARTIAL)	? ("    upload: modify buffer B contents\n")	:
 						((const char*)DE_NULL))
 					<< "    second draw: render using buffer B\n"
 					<< "    read: readPixels\n";

 		message	<< "Uploading using " << uploadFunctionName
 					<< ((m_mapFlags == MAPFLAG_INVALIDATE_RANGE)	? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT")	:
 						(m_mapFlags == MAPFLAG_INVALIDATE_BUFFER)	? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_BUFFER_BIT")	:
 						(m_mapFlags == MAPFLAG_NONE)				? ("")														:
 						((const char*)DE_NULL))
 					<< "\n"
 				<< getNumSamples() << " test samples. Sample order is randomized.\n"
 				<< "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
 				<< "Workload sizes are in the range ["
 					<< getMinWorkloadSize() << ",  "
 					<< getMaxWorkloadSize() << "] vertices "
 					<< "(["
 					<< getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
 					<< getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
 				<< "Test result is the approximated processing rate in MiB / s of the second draw call and the readPixels call.\n";

 		if (isReferenceCase)
 			message	<< "Note! Test result should only be used as a baseline reference result for buffer.render_after_upload.draw_modify_draw test group results.";
 		else
 			message	<< "Note! Test result may not be useful as is but instead should be compared against the buffer.render_after_upload.reference.draw_upload_draw group results.\n";

 		message << tcu::TestLog::EndMessage;
 	}
 }

 void BufferInUseRenderTimeCase::runSample (SampleResult& sample)
 {
 	const glw::Functions&	gl						= m_context.getRenderContext().getFunctions();
 	const glu::Buffer		arrayBuffer				(m_context.getRenderContext());
 	const glu::Buffer		indexBuffer				(m_context.getRenderContext());
 	const glu::Buffer		alternativeUploadBuffer	(m_context.getRenderContext());
 	const int				numVertices				= getLayeredGridNumVertices(sample.scene);
 	tcu::Surface			resultSurface			(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
 	deUint64				startTime;
 	deUint64				endTime;
 	std::vector<tcu::Vec4>	vertexData;
 	std::vector<deUint32>	indexData;

 	// create data

 	generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
 	if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 		generateLayeredGridIndexData(indexData, sample.scene);

 	// make buffers used

 	gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
 	gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
 	setupVertexAttribs();

 	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
 	{
 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
 		gl.drawArrays(GL_TRIANGLES, 0, numVertices);
 	}
 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 	{
 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);
 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STREAM_DRAW);
 		gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
 	}
 	else
 		DE_ASSERT(false);

 	// another pair of buffers for reference case
 	if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
 	{
 		if (m_targetBuffer == TARGETBUFFER_VERTEX)
 		{
 			gl.bindBuffer(GL_ARRAY_BUFFER, *alternativeUploadBuffer);
 			gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STREAM_DRAW);

 			setupVertexAttribs();
 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
 		}
 		else if (m_targetBuffer == TARGETBUFFER_INDEX)
 		{
 			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *alternativeUploadBuffer);
 			gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32)), &indexData[0], GL_STREAM_DRAW);
 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
 		}
 		else
 			DE_ASSERT(false);

 		// restore state
 		gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
 		gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
 		setupVertexAttribs();
 	}

 	waitGLResults();
 	GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");

 	gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
 	gl.clear(GL_COLOR_BUFFER_BIT);
 	waitGLResults();

 	tcu::warmupCPU();

 	// first draw
 	{
 		startTime = deGetMicroseconds();

 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
 		else
 			DE_ASSERT(false);

 		endTime = deGetMicroseconds();

 		sample.result.duration.firstRenderDuration = endTime - startTime;
 	}

 	// upload
 	{
 		glw::GLenum		target;
 		glw::GLsizeiptr	size;
 		glw::GLintptr	offset = 0;
 		const void*		source;

 		if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
 		{
 			target	= GL_ARRAY_BUFFER;
 			size	= (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
 			source	= &vertexData[0];
 		}
 		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
 		{
 			target	= GL_ELEMENT_ARRAY_BUFFER;
 			size	= (glw::GLsizeiptr)(indexData.size() * sizeof(deUint32));
 			source	= &indexData[0];
 		}
 		else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
 		{
 			target	= GL_ARRAY_BUFFER;
 			size	= (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
 			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
 			source	= (const deUint8*)&vertexData[0] + offset;
 		}
 		else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
 		{
 			// upload to 25% - 75% range
 			target	= GL_ELEMENT_ARRAY_BUFFER;
 			size	= (glw::GLsizeiptr)deAlign32((deInt32)(indexData.size() * sizeof(deUint32)) / 2, 4);
 			offset	= (glw::GLintptr)deAlign32((int)size / 2, 4);
 			source	= (const deUint8*)&indexData[0] + offset;
 		}
 		else
 		{
 			DE_ASSERT(false);
 			return;
 		}

 		// reference case? don't modify the buffer in use
 		if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
 			gl.bindBuffer(target, *alternativeUploadBuffer);

 		startTime = deGetMicroseconds();

 		if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
 			gl.bufferData(target, size, source, GL_STREAM_DRAW);
 		else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
 			gl.bufferSubData(target, offset, size, source);
 		else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
 		{
 			const int		mapFlags	= (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER)	? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT)	:
 										  (m_mapFlags == MAPFLAG_INVALIDATE_RANGE)	? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT)	:
 										  (-1);
 			void*			mapPtr;
 			glw::GLboolean	unmapSuccessful;

 			mapPtr = gl.mapBufferRange(target, offset, size, mapFlags);
 			if (!mapPtr)
 				throw tcu::Exception("MapBufferRange returned NULL");

 			deMemcpy(mapPtr, source, (int)size);

 			// if unmapping fails, just try again later
 			unmapSuccessful = gl.unmapBuffer(target);
 			if (!unmapSuccessful)
 				throw UnmapFailureError();
 		}
 		else
 			DE_ASSERT(false);

 		endTime = deGetMicroseconds();

 		sample.result.uploadedDataSize = (int)size;
 		sample.result.duration.uploadDuration = endTime - startTime;
 	}

 	// second draw
 	{
 		// Source vertex data from alternative buffer in refernce case
 		if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER && m_targetBuffer == TARGETBUFFER_VERTEX)
 			setupVertexAttribs();

 		startTime = deGetMicroseconds();

 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
 			gl.drawArrays(GL_TRIANGLES, 0, numVertices);
 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 			gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
 		else
 			DE_ASSERT(false);

 		endTime = deGetMicroseconds();

 		sample.result.duration.secondRenderDuration = endTime - startTime;
 	}

 	// read
 	{
 		startTime = deGetMicroseconds();
 		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
 		endTime = deGetMicroseconds();

 		sample.result.duration.readDuration = endTime - startTime;
 	}

 	// set results

 	sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;

 	sample.result.duration.renderReadDuration	= sample.result.duration.secondRenderDuration + sample.result.duration.readDuration;
 	sample.result.duration.totalDuration		= sample.result.duration.firstRenderDuration +
 												  sample.result.duration.uploadDuration +
 												  sample.result.duration.secondRenderDuration +
 												  sample.result.duration.readDuration;
 	sample.result.duration.fitResponseDuration	= sample.result.duration.renderReadDuration;
 }

 class UploadWaitDrawCase : public RenderPerformanceTestBase
 {
 public:
 	struct Sample
 	{
 		int			numFrames;
 		deUint64	uploadCallEndTime;
 	};
 	struct Result
 	{
 		deUint64	uploadDuration;
 		deUint64	renderDuration;
 		deUint64	readDuration;
 		deUint64	renderReadDuration;

 		deUint64	timeBeforeUse;
 	};

 							UploadWaitDrawCase				(Context&		context,
 															 const char*	name,
 															 const char*	description,
 															 DrawMethod		drawMethod,
 															 TargetBuffer	targetBuffer,
 															 UploadMethod	uploadMethod,
 															 BufferState	bufferState);
 							~UploadWaitDrawCase				(void);

 private:
 	void					init							(void);
 	void					deinit							(void);
 	IterateResult			iterate							(void);

 	void					uploadBuffer					(Sample& sample, Result& result);
 	void					drawFromBuffer					(Sample& sample, Result& result);
 	void					reuseAndDeleteBuffer			(void);
 	void					logAndSetTestResult				(void);
 	void					logSamples						(void);
 	void					drawMisc						(void);
 	int						findStabilizationSample			(deUint64 Result::*target, const char* description);
 	bool					checkSampleTemporalStability	(deUint64 Result::*target, const char* description);

 	const DrawMethod		m_drawMethod;
 	const TargetBuffer		m_targetBuffer;
 	const UploadMethod		m_uploadMethod;
 	const BufferState		m_bufferState;

 	const int				m_numSamplesPerSwap;
 	const int				m_numMaxSwaps;

 	int						m_frameNdx;
 	int						m_sampleNdx;
 	int						m_numVertices;

 	std::vector<tcu::Vec4>	m_vertexData;
 	std::vector<deUint32>	m_indexData;
 	std::vector<Sample>		m_samples;
 	std::vector<Result>		m_results;
 	std::vector<int>		m_iterationOrder;

 	deUint32				m_vertexBuffer;
 	deUint32				m_indexBuffer;
 	deUint32				m_miscBuffer;
 	int						m_numMiscVertices;
 };

 UploadWaitDrawCase::UploadWaitDrawCase (Context&		context,
 										const char*		name,
 										const char*		description,
 										DrawMethod		drawMethod,
 										TargetBuffer	targetBuffer,
 										UploadMethod	uploadMethod,
 										BufferState		bufferState)
 	: RenderPerformanceTestBase	(context, name, description)
 	, m_drawMethod				(drawMethod)
 	, m_targetBuffer			(targetBuffer)
 	, m_uploadMethod			(uploadMethod)
 	, m_bufferState				(bufferState)
 	, m_numSamplesPerSwap		(10)
 	, m_numMaxSwaps				(4)
 	, m_frameNdx				(0)
 	, m_sampleNdx				(0)
 	, m_numVertices				(-1)
 	, m_vertexBuffer			(0)
 	, m_indexBuffer				(0)
 	, m_miscBuffer				(0)
 	, m_numMiscVertices			(-1)
 {
 }

 UploadWaitDrawCase::~UploadWaitDrawCase (void)
 {
 	deinit();
 }

 void UploadWaitDrawCase::init (void)
 {
 	const glw::Functions&	gl						= m_context.getRenderContext().getFunctions();
 	const int				vertexAttribSize		= (int)sizeof(tcu::Vec4) * 2; // color4, position4
 	const int				vertexIndexSize			= (int)sizeof(deUint32);
 	const int				vertexUploadDataSize	= (m_targetBuffer == TARGETBUFFER_VERTEX) ? (vertexAttribSize) : (vertexIndexSize);

 	RenderPerformanceTestBase::init();

 	// requirements

 	if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
 		m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
 		throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" + de::toString<int>(RENDER_AREA_SIZE) + " render target");

 	// gl state

 	gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);

 	// enable bleding to prevent grid layers from being discarded

 	gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
 	gl.blendEquation(GL_FUNC_ADD);
 	gl.enable(GL_BLEND);

 	// scene

 	{
 		LayeredGridSpec scene;

 		// create ~8MB workload with similar characteristics as in the other test
 		// => makes comparison to other results more straightforward
 		scene.gridWidth = 93;
 		scene.gridHeight = 93;
 		scene.gridLayers = 5;

 		generateLayeredGridVertexAttribData4C4V(m_vertexData, scene);
 		generateLayeredGridIndexData(m_indexData, scene);
 		m_numVertices = getLayeredGridNumVertices(scene);
 	}

 	// buffers

 	if (m_bufferState == BUFFERSTATE_NEW)
 	{
 		if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 		{
 			// reads from two buffers, prepare the static buffer

 			if (m_targetBuffer == TARGETBUFFER_VERTEX)
 			{
 				// index buffer is static, use another vertex buffer to keep original buffer in unused state
 				const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());

 				gl.genBuffers(1, &m_indexBuffer);
 				gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
 				gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
 				gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW);
 				gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32)), &m_indexData[0], GL_STATIC_DRAW);

 				setupVertexAttribs();
 				gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
 			}
 			else if (m_targetBuffer == TARGETBUFFER_INDEX)
 			{
 				// vertex buffer is static
 				gl.genBuffers(1, &m_vertexBuffer);
 				gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
 				gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], GL_STATIC_DRAW);

 				setupVertexAttribs();
 				gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
 			}
 			else
 				DE_ASSERT(false);
 		}
 	}
 	else if (m_bufferState == BUFFERSTATE_EXISTING)
 	{
 		const glw::GLenum vertexUsage	= (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
 		const glw::GLenum indexUsage	= (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);

 		gl.genBuffers(1, &m_vertexBuffer);
 		gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0], vertexUsage);

 		if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 		{
 			gl.genBuffers(1, &m_indexBuffer);
 			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
 			gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32)), &m_indexData[0], indexUsage);
 		}

 		setupVertexAttribs();

 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
 			gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 			gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
 		else
 			DE_ASSERT(false);
 	}
 	else
 		DE_ASSERT(false);

 	// misc draw buffer
 	{
 		std::vector<tcu::Vec4>	vertexData;
 		LayeredGridSpec			scene;

 		// create ~1.5MB workload with similar characteristics
 		scene.gridWidth = 40;
 		scene.gridHeight = 40;
 		scene.gridLayers = 5;

 		generateLayeredGridVertexAttribData4C4V(vertexData, scene);

 		gl.genBuffers(1, &m_miscBuffer);
 		gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
 		gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(sizeof(tcu::Vec4) * vertexData.size()), &vertexData[0], GL_STATIC_DRAW);

 		m_numMiscVertices = getLayeredGridNumVertices(scene);
 	}

 	// iterations
 	{
 		m_samples.resize((m_numMaxSwaps+1) * m_numSamplesPerSwap);
 		m_results.resize((m_numMaxSwaps+1) * m_numSamplesPerSwap);

 		for (int numSwaps = 0; numSwaps <= m_numMaxSwaps; ++numSwaps)
 		for (int sampleNdx = 0; sampleNdx < m_numSamplesPerSwap; ++sampleNdx)
 		{
 			const int index = numSwaps*m_numSamplesPerSwap + sampleNdx;

 			m_samples[index].numFrames = numSwaps;
 		}

 		m_iterationOrder.resize(m_samples.size());
 		generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_samples.size());
 	}

 	// log
 	m_testCtx.getLog()
 		<< tcu::TestLog::Message
 		<< "Measuring time used in " << ((m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements")) << " and readPixels call.\n"
 		<< "Drawing using a buffer that has been uploaded N frames ago. Testing with N within range [0, " << m_numMaxSwaps << "].\n"
 		<< "Uploaded buffer is a " << ((m_targetBuffer == TARGETBUFFER_VERTEX) ? ("vertex attribute") : ("index")) << " buffer.\n"
 		<< "Uploading using "
 			<< ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)		? ("bufferData")																							:
 				(m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)	? ("bufferSubData")																							:
 				(m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)	? ("mapBufferRange, flags = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT")	:
 				((const char*)DE_NULL))
 			<< "\n"
 		<< "Upload size is " << getHumanReadableByteSize(m_numVertices * vertexUploadDataSize) << ".\n"
 		<< ((m_bufferState == BUFFERSTATE_EXISTING) ? ("All test samples use the same buffer object.\n") : (""))
 		<< "Test result is the number of frames (swaps) required for the render time to stabilize.\n"
 		<< "Assuming combined time used in the draw call and readPixels call is stabilizes to a constant value.\n"
 		<< tcu::TestLog::EndMessage;
 }

 void UploadWaitDrawCase::deinit (void)
 {
 	RenderPerformanceTestBase::deinit();

 	if (m_vertexBuffer)
 	{
 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_vertexBuffer);
 		m_vertexBuffer = 0;
 	}
 	if (m_indexBuffer)
 	{
 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBuffer);
 		m_indexBuffer = 0;
 	}
 	if (m_miscBuffer)
 	{
 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_miscBuffer);
 		m_miscBuffer = 0;
 	}
 }

 UploadWaitDrawCase::IterateResult UploadWaitDrawCase::iterate (void)
 {
 	const glw::Functions&	gl								= m_context.getRenderContext().getFunctions();
 	const int				betweenIterationDummyFrameCount = 5; // draw misc between test samples
 	const int				frameNdx						= m_frameNdx++;
 	const int				currentSampleNdx				= m_iterationOrder[m_sampleNdx];

 	// Simulate work for about 8ms
 	busyWait(8000);

 	// Dummy rendering during dummy frames
 	if (frameNdx != m_samples[currentSampleNdx].numFrames)
 	{
 		// draw similar from another buffer
 		drawMisc();
 	}

 	if (frameNdx == 0)
 	{
 		// upload and start the clock
 		uploadBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
 	}

 	if (frameNdx == m_samples[currentSampleNdx].numFrames) // \note: not else if, m_samples[currentSampleNdx].numFrames can be 0
 	{
 		// draw using the uploaded buffer
 		drawFromBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);

 		// re-use buffer for something else to make sure test iteration do not affect each other
 		if (m_bufferState == BUFFERSTATE_NEW)
 			reuseAndDeleteBuffer();
 	}
 	else if (frameNdx == m_samples[currentSampleNdx].numFrames + betweenIterationDummyFrameCount)
 	{
 		// next sample
 		++m_sampleNdx;
 		m_frameNdx = 0;
 	}

 	GLU_EXPECT_NO_ERROR(gl.getError(), "post-iterate");

 	if (m_sampleNdx < (int)m_samples.size())
 		return CONTINUE;

 	logAndSetTestResult();
 	return STOP;
 }

 void UploadWaitDrawCase::uploadBuffer (Sample& sample, Result& result)
 {
 	const glw::Functions&	gl			= m_context.getRenderContext().getFunctions();
 	deUint64				startTime;
 	deUint64				endTime;
 	glw::GLenum				target;
 	glw::GLsizeiptr			size;
 	const void*				source;

 	// data source

 	if (m_targetBuffer == TARGETBUFFER_VERTEX)
 	{
 		DE_ASSERT((m_vertexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));

 		target	= GL_ARRAY_BUFFER;
 		size	= (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4));
 		source	= &m_vertexData[0];
 	}
 	else if (m_targetBuffer == TARGETBUFFER_INDEX)
 	{
 		DE_ASSERT((m_indexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));

 		target	= GL_ELEMENT_ARRAY_BUFFER;
 		size	= (glw::GLsizeiptr)(m_indexData.size() * sizeof(deUint32));
 		source	= &m_indexData[0];
 	}
 	else
 	{
 		DE_ASSERT(false);
 		return;
 	}

 	// gen buffer

 	if (m_bufferState == BUFFERSTATE_NEW)
 	{
 		if (m_targetBuffer == TARGETBUFFER_VERTEX)
 		{
 			gl.genBuffers(1, &m_vertexBuffer);
 			gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
 		}
 		else if (m_targetBuffer == TARGETBUFFER_INDEX)
 		{
 			gl.genBuffers(1, &m_indexBuffer);
 			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
 		}
 		else
 			DE_ASSERT(false);

 		if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA ||
 			m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
 		{
 			gl.bufferData(target, size, DE_NULL, GL_STATIC_DRAW);
 		}
 	}
 	else if (m_bufferState == BUFFERSTATE_EXISTING)
 	{
 		if (m_targetBuffer == TARGETBUFFER_VERTEX)
 			gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
 		else if (m_targetBuffer == TARGETBUFFER_INDEX)
 			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
 		else
 			DE_ASSERT(false);
 	}
 	else
 		DE_ASSERT(false);

 	// upload

 	startTime = deGetMicroseconds();

 	if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
 		gl.bufferData(target, size, source, GL_STATIC_DRAW);
 	else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
 		gl.bufferSubData(target, 0, size, source);
 	else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
 	{
 		void*			mapPtr;
 		glw::GLboolean	unmapSuccessful;

 		mapPtr = gl.mapBufferRange(target, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
 		if (!mapPtr)
 			throw tcu::Exception("MapBufferRange returned NULL");

 		deMemcpy(mapPtr, source, (int)size);

 		// if unmapping fails, just try again later
 		unmapSuccessful = gl.unmapBuffer(target);
 		if (!unmapSuccessful)
 			throw UnmapFailureError();
 	}
 	else
 		DE_ASSERT(false);

 	endTime = deGetMicroseconds();

 	sample.uploadCallEndTime = endTime;
 	result.uploadDuration = endTime - startTime;
 }

 void UploadWaitDrawCase::drawFromBuffer (Sample& sample, Result& result)
 {
 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
 	tcu::Surface			resultSurface	(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
 	deUint64				startTime;
 	deUint64				endTime;

 	DE_ASSERT(m_vertexBuffer != 0);
 	if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
 		DE_ASSERT(m_indexBuffer == 0);
 	else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 		DE_ASSERT(m_indexBuffer != 0);
 	else
 		DE_ASSERT(false);

 	// draw
 	{
 		gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
 		if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 			gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);

 		setupVertexAttribs();

 		// microseconds passed since return from upload call
 		result.timeBeforeUse = deGetMicroseconds() - sample.uploadCallEndTime;

 		startTime = deGetMicroseconds();

 		if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
 			gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
 		else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
 			gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
 		else
 			DE_ASSERT(false);

 		endTime = deGetMicroseconds();

 		result.renderDuration = endTime - startTime;
 	}

 	// read
 	{
 		startTime = deGetMicroseconds();
 		glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
 		endTime = deGetMicroseconds();

 		result.readDuration = endTime - startTime;
 	}

 	result.renderReadDuration = result.renderDuration + result.readDuration;
 }

 void UploadWaitDrawCase::reuseAndDeleteBuffer (void)
 {
 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	if (m_targetBuffer == TARGETBUFFER_INDEX)
 	{
 		// respecify and delete index buffer
 		static const deUint32 indices[3] = {1, 3, 8};

 		DE_ASSERT(m_indexBuffer != 0);

 		gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
 		gl.drawElements(GL_TRIANGLES, 3, GL_UNSIGNED_INT, DE_NULL);
 		gl.deleteBuffers(1, &m_indexBuffer);
 		m_indexBuffer = 0;
 	}
 	else if (m_targetBuffer == TARGETBUFFER_VERTEX)
 	{
 		// respecify and delete vertex buffer
 		static const tcu::Vec4 coloredTriangle[6] =
 		{
 			tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.4f, -0.4f, 0.0f, 1.0f),
 			tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4(-0.2f,  0.4f, 0.0f, 1.0f),
 			tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f), tcu::Vec4( 0.8f, -0.1f, 0.0f, 1.0f),
 		};

 		DE_ASSERT(m_vertexBuffer != 0);

 		gl.bufferData(GL_ARRAY_BUFFER, sizeof(coloredTriangle), coloredTriangle, GL_STATIC_DRAW);
 		gl.drawArrays(GL_TRIANGLES, 0, 3);
 		gl.deleteBuffers(1, &m_vertexBuffer);
 		m_vertexBuffer = 0;
 	}

 	waitGLResults();
 }

 void UploadWaitDrawCase::logAndSetTestResult (void)
 {
 	int		uploadStabilization;
 	int		renderReadStabilization;
 	int		renderStabilization;
 	int		readStabilization;
 	bool	temporallyStable;

 	{
 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "Samples", "Result samples");
 		logSamples();
 	}

 	{
 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "Stabilization", "Sample stability");

 		// log stabilization points
 		renderReadStabilization	= findStabilizationSample(&Result::renderReadDuration, "Combined draw and read");
 		uploadStabilization		= findStabilizationSample(&Result::uploadDuration, "Upload time");
 		renderStabilization		= findStabilizationSample(&Result::renderDuration, "Draw call time");
 		readStabilization		= findStabilizationSample(&Result::readDuration, "ReadPixels time");

 		temporallyStable		= true;
 		temporallyStable		&= checkSampleTemporalStability(&Result::renderReadDuration, "Combined draw and read");
 		temporallyStable		&= checkSampleTemporalStability(&Result::uploadDuration, "Upload time");
 		temporallyStable		&= checkSampleTemporalStability(&Result::renderDuration, "Draw call time");
 		temporallyStable		&= checkSampleTemporalStability(&Result::readDuration, "ReadPixels time");
 	}

 	{
 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "Results", "Results");

 		// Check result sanily
 		if (uploadStabilization != 0)
 			m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Upload times are not stable, test result may not be accurate." << tcu::TestLog::EndMessage;
 		if (!temporallyStable)
 			m_testCtx.getLog() << tcu::TestLog::Message << "Warning! Time samples do not seem to be temporally stable, sample times seem to drift to one direction during test execution." << tcu::TestLog::EndMessage;

 		// render & read
 		if (renderReadStabilization == -1)
 			m_testCtx.getLog() << tcu::TestLog::Message << "Combined time used in draw call and ReadPixels did not stabilize." << tcu::TestLog::EndMessage;
 		else
 			m_testCtx.getLog() << tcu::TestLog::Integer("RenderReadStabilizationPoint", "Combined draw call and ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, renderReadStabilization);

 		// draw call
 		if (renderStabilization == -1)
 			m_testCtx.getLog() << tcu::TestLog::Message << "Time used in draw call did not stabilize." << tcu::TestLog::EndMessage;
 		else
 			m_testCtx.getLog() << tcu::TestLog::Integer("DrawCallStabilizationPoint", "Draw call time stabilization time", "frames", QP_KEY_TAG_TIME, renderStabilization);

 		// readpixels
 		if (readStabilization == -1)
 			m_testCtx.getLog() << tcu::TestLog::Message << "Time used in ReadPixels did not stabilize." << tcu::TestLog::EndMessage;
 		else
 			m_testCtx.getLog() << tcu::TestLog::Integer("ReadPixelsStabilizationPoint", "ReadPixels call time stabilization time", "frames", QP_KEY_TAG_TIME, readStabilization);

 		// Report renderReadStabilization
 		if (renderReadStabilization != -1)
 			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(renderReadStabilization).c_str());
 		else
 			m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(m_numMaxSwaps).c_str()); // don't report -1
 	}
 }

 void UploadWaitDrawCase::logSamples (void)
 {
 	// Inverse m_iterationOrder

 	std::vector<int> runOrder(m_iterationOrder.size());
 	for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
 		runOrder[m_iterationOrder[ndx]] = ndx;

 	// Log samples

 	m_testCtx.getLog()
 		<< tcu::TestLog::SampleList("Samples", "Samples")
 		<< tcu::TestLog::SampleInfo
 		<< tcu::TestLog::ValueInfo("NumSwaps",		"SwapBuffers before use",			"",		QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("Delay",			"Time before use",					"us",	QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("RunOrder",		"Sample run order",					"",		QP_SAMPLE_VALUE_TAG_PREDICTOR)
 		<< tcu::TestLog::ValueInfo("DrawReadTime",	"Draw call and ReadPixels time",	"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("TotalTime",		"Total time",						"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("Upload time",	"Upload time",						"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("DrawCallTime",	"Draw call time",					"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::ValueInfo("ReadTime",		"ReadPixels time",					"us",	QP_SAMPLE_VALUE_TAG_RESPONSE)
 		<< tcu::TestLog::EndSampleInfo;

 	for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
 		m_testCtx.getLog()
 			<< tcu::TestLog::Sample
 			<< m_samples[sampleNdx].numFrames
 			<< (int)m_results[sampleNdx].timeBeforeUse
 			<< runOrder[sampleNdx]
 			<< (int)m_results[sampleNdx].renderReadDuration
 			<< (int)(m_results[sampleNdx].renderReadDuration + m_results[sampleNdx].uploadDuration)
 			<< (int)m_results[sampleNdx].uploadDuration
 			<< (int)m_results[sampleNdx].renderDuration
 			<< (int)m_results[sampleNdx].readDuration
 			<< tcu::TestLog::EndSample;

 	m_testCtx.getLog() << tcu::TestLog::EndSampleList;
 }

 void UploadWaitDrawCase::drawMisc (void)
 {
 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

 	gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
 	setupVertexAttribs();
 	gl.drawArrays(GL_TRIANGLES, 0, m_numMiscVertices);
 }

 struct DistributionCompareResult
 {
 	bool	equal;
 	float	standardDeviations;
 };

 template <typename Comparer>
 static float sumOfRanks (const std::vector<deUint64>& testSamples, const std::vector<deUint64>& allSamples, const Comparer& comparer)
 {
 	float sum = 0;

 	for (int sampleNdx = 0; sampleNdx < (int)testSamples.size(); ++sampleNdx)
 	{
 		const deUint64	testSample		= testSamples[sampleNdx];
 		const int		lowerIndex		= (int)(std::lower_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
 		const int		upperIndex		= (int)(std::upper_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
 		const int		lowerRank		= lowerIndex + 1;	// convert zero-indexed to rank
 		const int		upperRank		= upperIndex;		// convert zero-indexed to rank, upperIndex is last equal + 1
 		const float		rankMidpoint	= (float)(lowerRank + upperRank) / 2.0f;

 		sum += rankMidpoint;
 	}

 	return sum;
 }

 template <typename Comparer>
 static DistributionCompareResult distributionCompare (const std::vector<deUint64>& orderedObservationsA, const std::vector<deUint64>& orderedObservationsB, const Comparer& comparer)
 {
 	// Mann-Whitney U test

 	const int				n1			= (int)orderedObservationsA.size();
 	const int				n2			= (int)orderedObservationsB.size();
 	std::vector<deUint64>	allSamples	(n1 + n2);

 	std::copy(orderedObservationsA.begin(), orderedObservationsA.end(), allSamples.begin());
 	std::copy(orderedObservationsB.begin(), orderedObservationsB.end(), allSamples.begin() + n1);
 	std::sort(allSamples.begin(), allSamples.end());

 	{
 		const float					R1		= sumOfRanks(orderedObservationsA, allSamples, comparer);

 		const float					U1		= (float)(n1*n2 + n1*(n1 + 1)/2) - R1;
 		const float					U2		= (float)(n1 * n2) - U1;
 		const float					U		= de::min(U1, U2);

 		// \note: sample sizes might not be large enough to expect normal distribution but we do it anyway

 		const float					mU		= (float)(n1 * n2) / 2.0f;
 		const float					sigmaU	= deFloatSqrt((float)(n1*n2*(n1+n2+1)) / 12.0f);
 		const float					z		= (U - mU) / sigmaU;

 		DistributionCompareResult	result;

 		result.equal				= (de::abs(z) <= 1.96f); // accept within 95% confidence interval
 		result.standardDeviations	= z;

 		return result;
 	}
 }

 template <typename T>
 struct ThresholdComparer
 {
 	float	relativeThreshold;
 	T		absoluteThreshold;

 	bool operator() (const T& a, const T& b) const
 	{
 		const float diff = de::abs((float)a - (float)b);

 		// thresholds
 		if (diff <= (float)absoluteThreshold)
 			return false;
 		if (diff <= float(a)*relativeThreshold ||
 			diff <= float(b)*relativeThreshold)
 			return false;

 		// cmp
 		return a < b;
 	}
 };

 int UploadWaitDrawCase::findStabilizationSample (deUint64 UploadWaitDrawCase::Result::*target, const char* description)
 {
 	std::vector<std::vector<deUint64> >	sampleObservations(m_numMaxSwaps+1);
 	ThresholdComparer<deUint64>			comparer;

 	comparer.relativeThreshold = 0.15f;	// 15%
 	comparer.absoluteThreshold = 100;	// (us), assumed sampling precision

 	// get observations and order them

 	for (int swapNdx = 0; swapNdx <= m_numMaxSwaps; ++swapNdx)
 	{
 		int insertNdx = 0;

 		sampleObservations[swapNdx].resize(m_numSamplesPerSwap);

 		for (int ndx = 0; ndx < (int)m_samples.size(); ++ndx)
 			if (m_samples[ndx].numFrames == swapNdx)
 				sampleObservations[swapNdx][insertNdx++] = m_results[ndx].*target;

 		DE_ASSERT(insertNdx == m_numSamplesPerSwap);

 		std::sort(sampleObservations[swapNdx].begin(), sampleObservations[swapNdx].end());
 	}

 	// find stabilization point

 	for (int sampleNdx = m_numMaxSwaps-1; sampleNdx != -1; --sampleNdx )
 	{
 		// Distribution is equal to all following distributions
 		for (int cmpTargetDistribution = sampleNdx+1; cmpTargetDistribution <= m_numMaxSwaps; ++cmpTargetDistribution)
 		{
 			// Stable section ends here?
 			const DistributionCompareResult result = distributionCompare(sampleObservations[sampleNdx], sampleObservations[cmpTargetDistribution], comparer);
 			if (!result.equal)
 			{
 				// Last two samples are not equal? Samples never stabilized
 				if (sampleNdx == m_numMaxSwaps-1)
 				{
 					m_testCtx.getLog()
 						<< tcu::TestLog::Message
 						<< description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n"
 						<< "\tDifference in standard deviations: " << result.standardDeviations << "\n"
 						<< "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
 						<< "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n"
 						<< tcu::TestLog::EndMessage;
 					return -1;
 				}
 				else
 				{
 					m_testCtx.getLog()
 						<< tcu::TestLog::Message
 						<< description << ": Samples with swap count " << sampleNdx << " and " << cmpTargetDistribution << " do not seem to have the same distribution:\n"
 						<< "\tSamples with swap count " << sampleNdx << " are not part of the tail of stable results.\n"
 						<< "\tDifference in standard deviations: " << result.standardDeviations << "\n"
 						<< "\tSwap count " << sampleNdx << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
 						<< "\tSwap count " << cmpTargetDistribution << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f) << "\n"
 						<< tcu::TestLog::EndMessage;

 					return sampleNdx+1;
 				}
 			}
 		}
 	}

 	m_testCtx.getLog()
 		<< tcu::TestLog::Message
 		<< description << ": All samples seem to have the same distribution"
 		<< tcu::TestLog::EndMessage;

 	// all distributions equal
 	return 0;
 }

 bool UploadWaitDrawCase::checkSampleTemporalStability (deUint64 UploadWaitDrawCase::Result::*target, const char* description)
 {
 	// Try to find correlation with sample order and sample times

 	const int						numDataPoints	= (int)m_iterationOrder.size();
 	std::vector<tcu::Vec2>			dataPoints		(m_iterationOrder.size());
 	LineParametersWithConfidence	lineFit;

 	for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
 	{
 		dataPoints[m_iterationOrder[ndx]].x() = (float)ndx;
 		dataPoints[m_iterationOrder[ndx]].y() = (float)(m_results[m_iterationOrder[ndx]].*target);
 	}

 	lineFit = theilSenSiegelLinearRegression(dataPoints, 0.6f);

 	// Difference of more than 25% of the offset along the whole sample range
 	if (de::abs(lineFit.coefficient) * (float)numDataPoints > de::abs(lineFit.offset) * 0.25f)
 	{
 		m_testCtx.getLog()
 			<< tcu::TestLog::Message
 			<< description << ": Correlation with data point observation order and result time. Results are not temporally stable, observations are not independent.\n"
 			<< "\tCoefficient: " << lineFit.coefficient << " (us / observation)\n"
 			<< tcu::TestLog::EndMessage;

 		return false;
 	}
 	else
 		return true;
 }

 } // anonymous

 BufferDataUploadTests::BufferDataUploadTests (Context& context)
 	: TestCaseGroup(context, "data_upload", "Buffer data upload performance tests")
 {
 }

 BufferDataUploadTests::~BufferDataUploadTests (void)
 {
 }

 void BufferDataUploadTests::init (void)
 {
 	static const struct BufferUsage
 	{
 		const char* name;
 		deUint32	usage;
 		bool		primaryUsage;
 	} bufferUsages[] =
 	{
 		{ "stream_draw",	GL_STREAM_DRAW,		true	},
 		{ "stream_read",	GL_STREAM_READ,		false	},
 		{ "stream_copy",	GL_STREAM_COPY,		false	},
 		{ "static_draw",	GL_STATIC_DRAW,		true	},
 		{ "static_read",	GL_STATIC_READ,		false	},
 		{ "static_copy",	GL_STATIC_COPY,		false	},
 		{ "dynamic_draw",	GL_DYNAMIC_DRAW,	true	},
 		{ "dynamic_read",	GL_DYNAMIC_READ,	false	},
 		{ "dynamic_copy",	GL_DYNAMIC_COPY,	false	},
 	};

 	tcu::TestCaseGroup* const referenceGroup			= new tcu::TestCaseGroup(m_testCtx, "reference",			"Reference functions");
 	tcu::TestCaseGroup* const functionCallGroup			= new tcu::TestCaseGroup(m_testCtx, "function_call",		"Function call timing");
 	tcu::TestCaseGroup* const modifyAfterUseGroup		= new tcu::TestCaseGroup(m_testCtx, "modify_after_use",		"Function call time after buffer has been used");
 	tcu::TestCaseGroup* const renderAfterUploadGroup	= new tcu::TestCaseGroup(m_testCtx, "render_after_upload",	"Function call time of draw commands after buffer has been modified");

 	addChild(referenceGroup);
 	addChild(functionCallGroup);
 	addChild(modifyAfterUseGroup);
 	addChild(renderAfterUploadGroup);

 	// .reference
 	{
 		static const struct BufferSizeRange
 		{
 			const char* name;
 			int			minBufferSize;
 			int			maxBufferSize;
 			int			numSamples;
 			bool		largeBuffersCase;
 		} sizeRanges[] =
 		{
 			{ "small_buffers", 0,		1 << 18,	64,		false	}, // !< 0kB - 256kB
 			{ "large_buffers", 1 << 18,	1 << 24,	32,		true	}, // !< 256kB - 16MB
 		};

 		for (int bufferSizeRangeNdx = 0; bufferSizeRangeNdx < DE_LENGTH_OF_ARRAY(sizeRanges); ++bufferSizeRangeNdx)
 		{
 			referenceGroup->addChild(new ReferenceMemcpyCase(m_context,
 															 std::string("memcpy_").append(sizeRanges[bufferSizeRangeNdx].name).c_str(),
 															 "Test memcpy performance",
 															 sizeRanges[bufferSizeRangeNdx].minBufferSize,
 															 sizeRanges[bufferSizeRangeNdx].maxBufferSize,
 															 sizeRanges[bufferSizeRangeNdx].numSamples,
 															 sizeRanges[bufferSizeRangeNdx].largeBuffersCase));
 		}
 	}

 	// .function_call
 	{
 		const int minBufferSize		= 0;		// !< 0kiB
 		const int maxBufferSize		= 1 << 24;	// !< 16MiB
 		const int numDataSamples	= 25;
 		const int numMapSamples		= 25;

 		tcu::TestCaseGroup* const bufferDataMethodGroup		= new tcu::TestCaseGroup(m_testCtx, "buffer_data", "Use glBufferData");
 		tcu::TestCaseGroup* const bufferSubDataMethodGroup	= new tcu::TestCaseGroup(m_testCtx, "buffer_sub_data", "Use glBufferSubData");
 		tcu::TestCaseGroup* const mapBufferRangeMethodGroup	= new tcu::TestCaseGroup(m_testCtx, "map_buffer_range", "Use glMapBufferRange");

 		functionCallGroup->addChild(bufferDataMethodGroup);
 		functionCallGroup->addChild(bufferSubDataMethodGroup);
 		functionCallGroup->addChild(mapBufferRangeMethodGroup);

 		// .buffer_data
 		{
 			static const struct TargetCase
 			{
 				tcu::TestCaseGroup*				group;
 				BufferDataUploadCase::CaseType	caseType;
 				bool							allUsages;
 			} targetCases[] =
 			{
 				{ new tcu::TestCaseGroup(m_testCtx, "new_buffer",				"Target new buffer"),							BufferDataUploadCase::CASE_NEW_BUFFER,			true	},
 				{ new tcu::TestCaseGroup(m_testCtx, "unspecified_buffer",		"Target new unspecified buffer"),				BufferDataUploadCase::CASE_UNSPECIFIED_BUFFER,	true	},
 				{ new tcu::TestCaseGroup(m_testCtx, "specified_buffer",			"Target new specified buffer"),					BufferDataUploadCase::CASE_SPECIFIED_BUFFER,	true	},
 				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer",				"Target buffer that was used in draw"),			BufferDataUploadCase::CASE_USED_BUFFER,			true	},
 				{ new tcu::TestCaseGroup(m_testCtx, "larger_used_buffer",		"Target larger buffer that was used in draw"),	BufferDataUploadCase::CASE_USED_LARGER_BUFFER,	false	},
 			};

 			for (int targetNdx = 0; targetNdx < DE_LENGTH_OF_ARRAY(targetCases); ++targetNdx)
 			{
 				bufferDataMethodGroup->addChild(targetCases[targetNdx].group);

 				for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
 					if (bufferUsages[usageNdx].primaryUsage || targetCases[targetNdx].allUsages)
 						targetCases[targetNdx].group->addChild(new BufferDataUploadCase(m_context,
 																						std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
 																						std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
 																						minBufferSize,
 																						maxBufferSize,
 																						numDataSamples,
 																						bufferUsages[usageNdx].usage,
 																						targetCases[targetNdx].caseType));
 			}
 		}

 		// .buffer_sub_data
 		{
 			static const struct FlagCase
 			{
 				tcu::TestCaseGroup*					group;
 				BufferSubDataUploadCase::CaseType	parentCase;
 				bool								allUsages;
 				int									flags;
 			} flagCases[] =
 			{
 				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_full_upload",					    ""),															BufferSubDataUploadCase::CASE_USED_BUFFER,	true,	BufferSubDataUploadCase::FLAG_FULL_UPLOAD															},
 				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_full_upload",    "Clear buffer with bufferData(...,NULL) before sub data call"),	BufferSubDataUploadCase::CASE_USED_BUFFER,	false,	BufferSubDataUploadCase::FLAG_FULL_UPLOAD    | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE	},
 				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_partial_upload",                   ""),															BufferSubDataUploadCase::CASE_USED_BUFFER,	true,	BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD														},
 				{ new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_partial_upload", "Clear buffer with bufferData(...,NULL) before sub data call"),	BufferSubDataUploadCase::CASE_USED_BUFFER,	false,	BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE	},
 			};

 			for (int flagNdx = 0; flagNdx < DE_LENGTH_OF_ARRAY(flagCases); ++flagNdx)
 			{
 				bufferSubDataMethodGroup->addChild(flagCases[flagNdx].group);

 				for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
 					if (bufferUsages[usageNdx].primaryUsage || flagCases[flagNdx].allUsages)
 							flagCases[flagNdx].group->addChild(new BufferSubDataUploadCase(m_context,
 																						   std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
 																						   std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
 																						   minBufferSize,
 																						   maxBufferSize,
 																						   numDataSamples,
 																						   bufferUsages[usageNdx].usage,
 																						   flagCases[flagNdx].parentCase,
 																						   flagCases[flagNdx].flags));
 			}
 		}

 		// .map_buffer_range
 		{
 			static const struct FlagCase
 			{
 				const char*	name;
 				bool		usefulForUnusedBuffers;
 				bool		allUsages;
 				int			glFlags;
 				int			caseFlags;
 			} flagCases[] =
 			{
 				{ "flag_write_full",										true,	true,	GL_MAP_WRITE_BIT,																0																				},
 				{ "flag_write_partial",										true,	true,	GL_MAP_WRITE_BIT,																MapBufferRangeCase::FLAG_PARTIAL												},
 				{ "flag_read_write_full",									true,	true,	GL_MAP_WRITE_BIT | GL_MAP_READ_BIT,												0																				},
 				{ "flag_read_write_partial",								true,	true,	GL_MAP_WRITE_BIT | GL_MAP_READ_BIT,												MapBufferRangeCase::FLAG_PARTIAL												},
 				{ "flag_invalidate_range_full",								true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									0																				},
 				{ "flag_invalidate_range_partial",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									MapBufferRangeCase::FLAG_PARTIAL												},
 				{ "flag_invalidate_buffer_full",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,								0																				},
 				{ "flag_invalidate_buffer_partial",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,								MapBufferRangeCase::FLAG_PARTIAL												},
 				{ "flag_write_full_manual_invalidate_buffer",				false,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									MapBufferRangeCase::FLAG_MANUAL_INVALIDATION									},
 				{ "flag_write_partial_manual_invalidate_buffer",			false,	false,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,									MapBufferRangeCase::FLAG_PARTIAL | MapBufferRangeCase::FLAG_MANUAL_INVALIDATION	},
 				{ "flag_unsynchronized_full",								true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT,									0																				},
 				{ "flag_unsynchronized_partial",							true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT,									MapBufferRangeCase::FLAG_PARTIAL												},
 				{ "flag_unsynchronized_and_invalidate_buffer_full",			true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,	0																				},
 				{ "flag_unsynchronized_and_invalidate_buffer_partial",		true,	false,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,	MapBufferRangeCase::FLAG_PARTIAL												},
 			};
 			static const struct FlushCases
 			{
 				const char*	name;
 				int			glFlags;
 				int			caseFlags;
 			} flushCases[] =
 			{
 				{ "flag_flush_explicit_map_full",					GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	0												},
 				{ "flag_flush_explicit_map_partial",				GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	MapBufferRangeFlushCase::FLAG_PARTIAL			},
 				{ "flag_flush_explicit_map_full_flush_in_parts",	GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	MapBufferRangeFlushCase::FLAG_FLUSH_IN_PARTS	},
 				{ "flag_flush_explicit_map_full_flush_partial",		GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,	MapBufferRangeFlushCase::FLAG_FLUSH_PARTIAL		},
 			};
 			static const struct MapTestGroup
 			{
 				int					flags;
 				bool				unusedBufferCase;
 				tcu::TestCaseGroup* group;
 			} groups[] =
 			{
 				{ MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER,	true,	new tcu::TestCaseGroup(m_testCtx, "new_unspecified_buffer", "Test with unused, unspecified buffers"),				},
 				{ MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER,		true,	new tcu::TestCaseGroup(m_testCtx, "new_specified_buffer", "Test with unused, specified buffers"),					},
 				{ 0,														false,	new tcu::TestCaseGroup(m_testCtx, "used_buffer", "Test with used (data has been sourced from a buffer) buffers")	},
 			};

 			// we OR same flags to both range and flushRange cases, make sure it is legal
 			DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER);
 			DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER == (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER);

 			for (int groupNdx = 0; groupNdx < DE_LENGTH_OF_ARRAY(groups); ++groupNdx)
 			{
 				tcu::TestCaseGroup* const bufferTypeGroup = groups[groupNdx].group;

 				mapBufferRangeMethodGroup->addChild(bufferTypeGroup);

 				for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flagCases); ++caseNdx)
 				{
 					if (groups[groupNdx].unusedBufferCase && !flagCases[caseNdx].usefulForUnusedBuffers)
 						continue;

 					tcu::TestCaseGroup* const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flagCases[caseNdx].name, "");
 					bufferTypeGroup->addChild(bufferUsageGroup);

 					for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
 						if (bufferUsages[usageNdx].primaryUsage || flagCases[caseNdx].allUsages)
 							bufferUsageGroup->addChild(new MapBufferRangeCase(m_context,
 																			  bufferUsages[usageNdx].name,
 																			  std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
 																			  minBufferSize,
 																			  maxBufferSize,
 																			  numMapSamples,
 																			  bufferUsages[usageNdx].usage,
 																			  flagCases[caseNdx].glFlags,
 																			  flagCases[caseNdx].caseFlags | groups[groupNdx].flags));
 				}

 				for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flushCases); ++caseNdx)
 				{
 					tcu::TestCaseGroup* const bufferUsageGroup = new tcu::TestCaseGroup(m_testCtx, flushCases[caseNdx].name, "");
 					bufferTypeGroup->addChild(bufferUsageGroup);

 					for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
 						if (bufferUsages[usageNdx].primaryUsage)
 							bufferUsageGroup->addChild(new MapBufferRangeFlushCase(m_context,
 																				   bufferUsages[usageNdx].name,
 																				   std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
 																				   minBufferSize,
 																				   maxBufferSize,
 																				   numMapSamples,
 																				   bufferUsages[usageNdx].usage,
 																				   flushCases[caseNdx].glFlags,
 																				   flushCases[caseNdx].caseFlags | groups[groupNdx].flags));
 				}
 			}
 		}
 	}

 	// .modify_after_use
 	{
 		const int minBufferSize	= 0;		// !< 0kiB
 		const int maxBufferSize	= 1 << 24;	// !< 16MiB

 		static const struct Usage
 		{
 			const char* name;
 			const char* description;
 			deUint32	usage;
 		} usages[] =
 		{
 			{ "static_draw",	"Test with GL_STATIC_DRAW",		GL_STATIC_DRAW	},
 			{ "dynamic_draw",	"Test with GL_DYNAMIC_DRAW",	GL_DYNAMIC_DRAW	},
 			{ "stream_draw",	"Test with GL_STREAM_DRAW",		GL_STREAM_DRAW },

 		};

 		for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(usages); ++usageNdx)
 		{
 			tcu::TestCaseGroup* const usageGroup = new tcu::TestCaseGroup(m_testCtx, usages[usageNdx].name, usages[usageNdx].description);
 			modifyAfterUseGroup->addChild(usageGroup);

 			usageGroup->addChild(new ModifyAfterWithBufferDataCase		(m_context, "buffer_data",							"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0));
 			usageGroup->addChild(new ModifyAfterWithBufferDataCase		(m_context, "buffer_data_different_size",			"Respecify buffer contents and size after use",			minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_RESPECIFY_SIZE));
 			usageGroup->addChild(new ModifyAfterWithBufferDataCase		(m_context, "buffer_data_repeated",					"Respecify buffer contents after upload and use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_UPLOAD_REPEATED));

 			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_full",					"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0));
 			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_partial",				"Respecify buffer contents partially use",				minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
 			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_full_repeated",		"Respecify buffer contents after upload and use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED));
 			usageGroup->addChild(new ModifyAfterWithBufferSubDataCase	(m_context, "buffer_sub_data_partial_repeated",		"Respecify buffer contents partially upload and use",	minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED | ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));

 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_write_full",					"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT));
 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_write_partial",				"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT));
 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_read_write_full",				"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_read_write_partial",			"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_range_full",		"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_range_partial",	"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_buffer_full",		"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_invalidate_buffer_partial",	"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_unsynchronized_full",			"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
 			usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase	(m_context, "map_flag_unsynchronized_partial",		"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));

 			usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase	(m_context, "map_flag_flush_explicit_full",			"Respecify buffer contents after use",					minBufferSize, maxBufferSize, usages[usageNdx].usage, 0,												GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
 			usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase	(m_context, "map_flag_flush_explicit_partial",		"Respecify buffer contents partially after use",		minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferFlushCase::FLAG_PARTIAL,	GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
 		}
 	}

 	// .render_after_upload
 	{
 		// .reference
 		{
 			tcu::TestCaseGroup* const renderReferenceGroup = new tcu::TestCaseGroup(m_testCtx, "reference", "Baseline results");
 			renderAfterUploadGroup->addChild(renderReferenceGroup);

 			// .draw
 			{
 				tcu::TestCaseGroup* const drawGroup = new tcu::TestCaseGroup(m_testCtx, "draw", "Time usage of functions with non-modified buffers");
 				renderReferenceGroup->addChild(drawGroup);

 				// Time consumed by readPixels
 				drawGroup->addChild(new ReferenceReadPixelsTimeCase	(m_context, "read_pixels",		"Measure time consumed by readPixels() function call"));

 				// Time consumed by rendering
 				drawGroup->addChild(new ReferenceRenderTimeCase		(m_context, "draw_arrays",		"Measure time consumed by drawArrays() function call",		DRAWMETHOD_DRAW_ARRAYS));
 				drawGroup->addChild(new ReferenceRenderTimeCase		(m_context, "draw_elements",	"Measure time consumed by drawElements() function call",	DRAWMETHOD_DRAW_ELEMENTS));
 			}

 			// .draw_upload_draw
 			{
 				static const struct
 				{
 					const char*		name;
 					const char*		description;
 					DrawMethod		drawMethod;
 					TargetBuffer	targetBuffer;
 					bool			partial;
 				} uploadTargets[] =
 				{
 					{
 						"draw_arrays_upload_vertices",
 						"Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels function calls.",
 						DRAWMETHOD_DRAW_ARRAYS,
 						TARGETBUFFER_VERTEX,
 						false
 					},
 					{
 						"draw_arrays_upload_vertices_partial",
 						"Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and readPixels function calls.",
 						DRAWMETHOD_DRAW_ARRAYS,
 						TARGETBUFFER_VERTEX,
 						true
 					},
 					{
 						"draw_elements_upload_vertices",
 						"Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels function calls.",
 						DRAWMETHOD_DRAW_ELEMENTS,
 						TARGETBUFFER_VERTEX,
 						false
 					},
 					{
 						"draw_elements_upload_indices",
 						"Measure time consumed by drawElements, index upload, another drawElements, and readPixels function calls.",
 						DRAWMETHOD_DRAW_ELEMENTS,
 						TARGETBUFFER_INDEX,
 						false
 					},
 					{
 						"draw_elements_upload_indices_partial",
 						"Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels function calls.",
 						DRAWMETHOD_DRAW_ELEMENTS,
 						TARGETBUFFER_INDEX,
 						true
 					},
 				};
 				static const struct
 				{
 					const char*							name;
 					const char*							description;
 					UploadMethod						uploadMethod;
 					BufferInUseRenderTimeCase::MapFlags	mapFlags;
 					bool								supportsPartialUpload;
 				} uploadMethods[] =
 				{
 					{ "buffer_data",						"bufferData",		UPLOADMETHOD_BUFFER_DATA,		BufferInUseRenderTimeCase::MAPFLAG_NONE,				false	},
 					{ "buffer_sub_data",					"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA,	BufferInUseRenderTimeCase::MAPFLAG_NONE,				true	},
 					{ "map_buffer_range_invalidate_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE,	true	},
 					{ "map_buffer_range_invalidate_buffer",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER,	false	},
 				};

 				tcu::TestCaseGroup* const drawUploadDrawGroup = new tcu::TestCaseGroup(m_testCtx, "draw_upload_draw", "Time usage of functions draw, upload and another draw");
 				renderReferenceGroup->addChild(drawUploadDrawGroup);

 				for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
 				for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
 				{
 					const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;

 					if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
 						continue;

 					drawUploadDrawGroup->addChild(new BufferInUseRenderTimeCase(m_context,
 																				name.c_str(),
 																				uploadTargets[uploadTargetNdx].description,
 																				uploadTargets[uploadTargetNdx].drawMethod,
 																				uploadMethods[uploadMethodNdx].mapFlags,
 																				uploadTargets[uploadTargetNdx].targetBuffer,
 																				uploadMethods[uploadMethodNdx].uploadMethod,
 																				(uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
 																				BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_DIFFERENT_BUFFER));
 				}
 			}
 		}

 		// .upload_unrelated_and_draw
 		{
 			static const struct
 			{
 				const char*		name;
 				const char*		description;
 				DrawMethod		drawMethod;
 			} drawMethods[] =
 			{
 				{ "draw_arrays",	"drawArrays",	DRAWMETHOD_DRAW_ARRAYS		},
 				{ "draw_elements",	"drawElements",	DRAWMETHOD_DRAW_ELEMENTS	},
 			};

 			static const struct
 			{
 				const char*		name;
 				UploadMethod	uploadMethod;
 			} uploadMethods[] =
 			{
 				{ "buffer_data",		UPLOADMETHOD_BUFFER_DATA		},
 				{ "buffer_sub_data",	UPLOADMETHOD_BUFFER_SUB_DATA	},
 				{ "map_buffer_range",	UPLOADMETHOD_MAP_BUFFER_RANGE	},
 			};

 			tcu::TestCaseGroup* const uploadUnrelatedGroup = new tcu::TestCaseGroup(m_testCtx, "upload_unrelated_and_draw", "Time usage of functions after an unrelated upload");
 			renderAfterUploadGroup->addChild(uploadUnrelatedGroup);

 			for (int drawMethodNdx = 0; drawMethodNdx < DE_LENGTH_OF_ARRAY(drawMethods); ++drawMethodNdx)
 			for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
 			{
 				const std::string name = std::string() + drawMethods[drawMethodNdx].name + "_upload_unrelated_with_" + uploadMethods[uploadMethodNdx].name;
 				const std::string desc = std::string() + "Measure time consumed by " + drawMethods[drawMethodNdx].description + " function call after an unrelated upload";

 				// Time consumed by rendering command after an unrelated upload

 				uploadUnrelatedGroup->addChild(new UnrelatedUploadRenderTimeCase(m_context, name.c_str(), desc.c_str(), drawMethods[drawMethodNdx].drawMethod, uploadMethods[uploadMethodNdx].uploadMethod));
 			}
 		}

 		// .upload_and_draw
 		{
 			static const struct
 			{
 				const char*			name;
 				const char*			description;
 				BufferState			bufferState;
 				UnrelatedBufferType	unrelatedBuffer;
 				bool				supportsPartialUpload;
 			} bufferConfigs[] =
 			{
 				{ "used_buffer",						"Upload to an used buffer",											BUFFERSTATE_EXISTING,	UNRELATEDBUFFERTYPE_NONE,	true	},
 				{ "new_buffer",							"Upload to a new buffer",											BUFFERSTATE_NEW,		UNRELATEDBUFFERTYPE_NONE,	false	},
 				{ "used_buffer_and_unrelated_upload",	"Upload to an used buffer and an unrelated buffer and then draw",	BUFFERSTATE_EXISTING,	UNRELATEDBUFFERTYPE_VERTEX,	true	},
 				{ "new_buffer_and_unrelated_upload",	"Upload to a new buffer and an unrelated buffer and then draw",		BUFFERSTATE_NEW,		UNRELATEDBUFFERTYPE_VERTEX,	false	},
 			};

 			tcu::TestCaseGroup* const uploadAndDrawGroup = new tcu::TestCaseGroup(m_testCtx, "upload_and_draw", "Time usage of rendering functions with modified buffers");
 			renderAfterUploadGroup->addChild(uploadAndDrawGroup);

 			// .used_buffer
 			// .new_buffer
 			// .used_buffer_and_unrelated_upload
 			// .new_buffer_and_unrelated_upload
 			for (int stateNdx = 0; stateNdx < DE_LENGTH_OF_ARRAY(bufferConfigs); ++stateNdx)
 			{
 				static const struct
 				{
 					const char*		name;
 					const char*		description;
 					DrawMethod		drawMethod;
 					TargetBuffer	targetBuffer;
 					bool			partial;
 				} uploadTargets[] =
 				{
 					{
 						"draw_arrays_upload_vertices",
 						"Measure time consumed by vertex attribute upload, drawArrays, and readPixels function calls",
 						DRAWMETHOD_DRAW_ARRAYS,
 						TARGETBUFFER_VERTEX,
 						false
 					},
 					{
 						"draw_arrays_upload_vertices_partial",
 						"Measure time consumed by partial vertex attribute upload, drawArrays, and readPixels function calls",
 						DRAWMETHOD_DRAW_ARRAYS,
 						TARGETBUFFER_VERTEX,
 						true
 					},
 					{
 						"draw_elements_upload_vertices",
 						"Measure time consumed by vertex attribute upload, drawElements, and readPixels function calls",
 						DRAWMETHOD_DRAW_ELEMENTS,
 						TARGETBUFFER_VERTEX,
 						false
 					},
 					{
 						"draw_elements_upload_indices",
 						"Measure time consumed by index upload, drawElements, and readPixels function calls",
 						DRAWMETHOD_DRAW_ELEMENTS,
 						TARGETBUFFER_INDEX,
 						false
 					},
 					{
 						"draw_elements_upload_indices_partial",
 						"Measure time consumed by partial index upload, drawElements, and readPixels function calls",
 						DRAWMETHOD_DRAW_ELEMENTS,
 						TARGETBUFFER_INDEX,
 						true
 					},
 				};
 				static const struct
 				{
 					const char*		name;
 					const char*		description;
 					UploadMethod	uploadMethod;
 					bool			supportsPartialUpload;
 				} uploadMethods[] =
 				{
 					{ "buffer_data",		"bufferData",		UPLOADMETHOD_BUFFER_DATA,		false	},
 					{ "buffer_sub_data",	"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA,	true	},
 					{ "map_buffer_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	true	},
 				};

 				tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, bufferConfigs[stateNdx].name, bufferConfigs[stateNdx].description);
 				uploadAndDrawGroup->addChild(group);

 				for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
 				for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
 				{
 					const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;

 					if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
 						continue;
 					if (uploadTargets[uploadTargetNdx].partial && !bufferConfigs[stateNdx].supportsPartialUpload)
 						continue;

 					// Don't log unrelated buffer information to samples if there is no such buffer

 					if (bufferConfigs[stateNdx].unrelatedBuffer == UNRELATEDBUFFERTYPE_NONE)
 					{
 						typedef UploadRenderReadDuration				SampleType;
 						typedef GenericUploadRenderTimeCase<SampleType>	TestType;

 						group->addChild(new TestType(m_context,
 													 name.c_str(),
 													 uploadTargets[uploadTargetNdx].description,
 													 uploadTargets[uploadTargetNdx].drawMethod,
 													 uploadTargets[uploadTargetNdx].targetBuffer,
 													 uploadMethods[uploadMethodNdx].uploadMethod,
 													 bufferConfigs[stateNdx].bufferState,
 													 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
 													 bufferConfigs[stateNdx].unrelatedBuffer));
 					}
 					else
 					{
 						typedef UploadRenderReadDurationWithUnrelatedUploadSize	SampleType;
 						typedef GenericUploadRenderTimeCase<SampleType>			TestType;

 						group->addChild(new TestType(m_context,
 													 name.c_str(),
 													 uploadTargets[uploadTargetNdx].description,
 													 uploadTargets[uploadTargetNdx].drawMethod,
 													 uploadTargets[uploadTargetNdx].targetBuffer,
 													 uploadMethods[uploadMethodNdx].uploadMethod,
 													 bufferConfigs[stateNdx].bufferState,
 													 (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
 													 bufferConfigs[stateNdx].unrelatedBuffer));
 					}
 				}
 			}
 		}

 		// .draw_modify_draw
 		{
 			static const struct
 			{
 				const char*		name;
 				const char*		description;
 				DrawMethod		drawMethod;
 				TargetBuffer	targetBuffer;
 				bool			partial;
 			} uploadTargets[] =
 			{
 				{
 					"draw_arrays_upload_vertices",
 					"Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels function calls.",
 					DRAWMETHOD_DRAW_ARRAYS,
 					TARGETBUFFER_VERTEX,
 					false
 				},
 				{
 					"draw_arrays_upload_vertices_partial",
 					"Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and readPixels function calls.",
 					DRAWMETHOD_DRAW_ARRAYS,
 					TARGETBUFFER_VERTEX,
 					true
 				},
 				{
 					"draw_elements_upload_vertices",
 					"Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels function calls.",
 					DRAWMETHOD_DRAW_ELEMENTS,
 					TARGETBUFFER_VERTEX,
 					false
 				},
 				{
 					"draw_elements_upload_indices",
 					"Measure time consumed by drawElements, index upload, another drawElements, and readPixels function calls.",
 					DRAWMETHOD_DRAW_ELEMENTS,
 					TARGETBUFFER_INDEX,
 					false
 				},
 				{
 					"draw_elements_upload_indices_partial",
 					"Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels function calls.",
 					DRAWMETHOD_DRAW_ELEMENTS,
 					TARGETBUFFER_INDEX,
 					true
 				},
 			};
 			static const struct
 			{
 				const char*							name;
 				const char*							description;
 				UploadMethod						uploadMethod;
 				BufferInUseRenderTimeCase::MapFlags	mapFlags;
 				bool								supportsPartialUpload;
 			} uploadMethods[] =
 			{
 				{ "buffer_data",						"bufferData",		UPLOADMETHOD_BUFFER_DATA,		BufferInUseRenderTimeCase::MAPFLAG_NONE,				false	},
 				{ "buffer_sub_data",					"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA,	BufferInUseRenderTimeCase::MAPFLAG_NONE,				true	},
 				{ "map_buffer_range_invalidate_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE,	true	},
 				{ "map_buffer_range_invalidate_buffer",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE,	BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER,	false	},
 			};

 			tcu::TestCaseGroup* const drawModifyDrawGroup = new tcu::TestCaseGroup(m_testCtx, "draw_modify_draw", "Time used in rendering functions with modified buffers while original buffer is still in use");
 			renderAfterUploadGroup->addChild(drawModifyDrawGroup);

 			for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
 			for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
 			{
 				const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;

 				if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
 					continue;

 				drawModifyDrawGroup->addChild(new BufferInUseRenderTimeCase(m_context,
 																			name.c_str(),
 																			uploadTargets[uploadTargetNdx].description,
 																			uploadTargets[uploadTargetNdx].drawMethod,
 																			uploadMethods[uploadMethodNdx].mapFlags,
 																			uploadTargets[uploadTargetNdx].targetBuffer,
 																			uploadMethods[uploadMethodNdx].uploadMethod,
 																			(uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
 																			BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_SAME_BUFFER));
 			}
 		}

 		// .upload_wait_draw
 		{
 			static const struct
 			{
 				const char*	name;
 				const char*	description;
 				BufferState	bufferState;
 			} bufferStates[] =
 			{
 				{ "new_buffer",		"Uploading to just generated name",	BUFFERSTATE_NEW			},
 				{ "used_buffer",	"Uploading to a used buffer",		BUFFERSTATE_EXISTING	},
 			};
 			static const struct
 			{
 				const char*		name;
 				const char*		description;
 				DrawMethod		drawMethod;
 				TargetBuffer	targetBuffer;
 			} uploadTargets[] =
 			{
 				{ "draw_arrays_vertices",	"Upload vertex data, draw with drawArrays",		DRAWMETHOD_DRAW_ARRAYS,		TARGETBUFFER_VERTEX	},
 				{ "draw_elements_vertices",	"Upload vertex data, draw with drawElements",	DRAWMETHOD_DRAW_ELEMENTS,	TARGETBUFFER_VERTEX	},
 				{ "draw_elements_indices",	"Upload index data, draw with drawElements",	DRAWMETHOD_DRAW_ELEMENTS,	TARGETBUFFER_INDEX	},
 			};
 			static const struct
 			{
 				const char*		name;
 				const char*		description;
 				UploadMethod	uploadMethod;
 			} uploadMethods[] =
 			{
 				{ "buffer_data",		"bufferData",		UPLOADMETHOD_BUFFER_DATA		},
 				{ "buffer_sub_data",	"bufferSubData",	UPLOADMETHOD_BUFFER_SUB_DATA	},
 				{ "map_buffer_range",	"mapBufferRange",	UPLOADMETHOD_MAP_BUFFER_RANGE	},
 			};

 			tcu::TestCaseGroup* const uploadSwapDrawGroup = new tcu::TestCaseGroup(m_testCtx, "upload_wait_draw", "Time used in rendering functions after a buffer upload N frames ago");
 			renderAfterUploadGroup->addChild(uploadSwapDrawGroup);

 			for (int bufferStateNdx = 0; bufferStateNdx < DE_LENGTH_OF_ARRAY(bufferStates); ++bufferStateNdx)
 			{
 				tcu::TestCaseGroup* const bufferGroup = new tcu::TestCaseGroup(m_testCtx, bufferStates[bufferStateNdx].name, bufferStates[bufferStateNdx].description);
 				uploadSwapDrawGroup->addChild(bufferGroup);

 				for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
 				for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
 				{
 					const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" + uploadMethods[uploadMethodNdx].name;

 					bufferGroup->addChild(new UploadWaitDrawCase(m_context,
 																 name.c_str(),
 																 uploadTargets[uploadTargetNdx].description,
 																 uploadTargets[uploadTargetNdx].drawMethod,
 																 uploadTargets[uploadTargetNdx].targetBuffer,
 																 uploadMethods[uploadMethodNdx].uploadMethod,
 																 bufferStates[bufferStateNdx].bufferState));
 				}
 			}
 		}
 	}
 }

 } // Performance
 } // gles3
 } // deqp