external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsm64bitCompareTests.cpp - third_party/vulkan-cts - Git at Google

 /*------------------------------------------------------------------------
  * Vulkan Conformance Tests
  * ------------------------
  *
  * Copyright (c) 2019 Valve Corporation.
  * Copyright (c) 2019 The Khronos Group Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
  *//*!
  * \file
  * \brief 64-bit data type comparison operations.
  *//*--------------------------------------------------------------------*/

 #include "vktSpvAsm64bitCompareTests.hpp"
 #include "vktTestGroupUtil.hpp"
 #include "vkDefs.hpp"
 #include "vktTestCase.hpp"
 #include "vkQueryUtil.hpp"
 #include "vkMemUtil.hpp"
 #include "vkRefUtil.hpp"
 #include "vkBuilderUtil.hpp"
 #include "vkPrograms.hpp"
 #include "vkCmdUtil.hpp"

 #include "tcuStringTemplate.hpp"

 #include <string>
 #include <vector>
 #include <utility>
 #include <cmath>
 #include <sstream>
 #include <memory>
 #include <limits>

 namespace vkt
 {
 namespace SpirVAssembly
 {
 namespace
 {

 template <typename T>
 class CompareOperation
 {
 public:
 	virtual std::string		spirvName	()					const = 0;
 	virtual bool			run			(T left, T right)	const = 0;
 };

 // Helper intermediate class to be able to implement Ordered and Unordered floating point operations in a simpler way.
 class DoubleCompareOperation: public CompareOperation<double>
 {
 public:
 	struct BasicImplementation
 	{
 		virtual std::string	nameSuffix	()							const = 0;
 		virtual bool		run			(double left, double right)	const = 0; // No NaNs here.
 	};

 	virtual std::string		spirvName	() const
 	{
 		return "OpF" + std::string(m_ordered ? "Ord" : "Unord") + m_impl.nameSuffix();
 	}

 	virtual bool			run			(double left, double right)	const
 	{
 		if (nanInvolved(left, right))
 			return !m_ordered; // Ordered operations return false when NaN is involved.
 		return m_impl.run(left, right);
 	}

 	DoubleCompareOperation(bool ordered, const BasicImplementation& impl)
 		: m_ordered(ordered), m_impl(impl)
 		{}

 private:
 	bool nanInvolved(double left, double right) const
 	{
 		return std::isnan(left) || std::isnan(right);
 	}

 	const bool					m_ordered;
 	const BasicImplementation&	m_impl;
 };

 #define GEN_DOUBLE_BASIC_IMPL(NAME, OPERATION)																	\
 	struct NAME##DoubleBasicImplClass : public DoubleCompareOperation::BasicImplementation						\
 	{																											\
 		virtual std::string	nameSuffix	()							const	{ return #NAME; }					\
 		virtual bool		run			(double left, double right)	const	{ return left OPERATION right; }	\
 	};																											\
 	NAME##DoubleBasicImplClass NAME##DoubleBasicImplInstance;

 GEN_DOUBLE_BASIC_IMPL(Equal,			==	)
 GEN_DOUBLE_BASIC_IMPL(NotEqual,			!=	)
 GEN_DOUBLE_BASIC_IMPL(LessThan,			<	)
 GEN_DOUBLE_BASIC_IMPL(GreaterThan,		>	)
 GEN_DOUBLE_BASIC_IMPL(LessThanEqual,	<=	)
 GEN_DOUBLE_BASIC_IMPL(GreaterThanEqual,	>=	)

 #define GEN_FORDERED_OP(NAME)	DoubleCompareOperation FOrdered##NAME##Op(true, NAME##DoubleBasicImplInstance)
 #define GEN_FUNORDERED_OP(NAME)	DoubleCompareOperation FUnordered##NAME##Op(false, NAME##DoubleBasicImplInstance)
 #define GEN_FBOTH_OP(NAME)		GEN_FORDERED_OP(NAME); GEN_FUNORDERED_OP(NAME);

 GEN_FBOTH_OP(Equal)
 GEN_FBOTH_OP(NotEqual)
 GEN_FBOTH_OP(LessThan)
 GEN_FBOTH_OP(GreaterThan)
 GEN_FBOTH_OP(LessThanEqual)
 GEN_FBOTH_OP(GreaterThanEqual)

 template <typename IntClass>
 class IntCompareOperation: public CompareOperation<IntClass>
 {
 public:
 	struct Implementation
 	{
 		virtual std::string	typeChar	()								const = 0;
 		virtual std::string	opName		()								const = 0;
 		virtual bool		run			(IntClass left, IntClass right)	const = 0;
 	};

 	virtual std::string		spirvName	()								const
 	{
 		return "Op" + m_impl.typeChar() + m_impl.opName();
 	}

 	virtual bool			run			(IntClass left, IntClass right)	const
 	{
 		return m_impl.run(left, right);
 	}

 	IntCompareOperation(const Implementation& impl)
 		: m_impl(impl)
 		{}

 private:
 	const Implementation& m_impl;
 };

 #define GEN_INT_IMPL(INTTYPE, TYPECHAR, OPNAME, OPERATOR)															\
 	struct INTTYPE##OPNAME##IntImplClass : public IntCompareOperation<INTTYPE>::Implementation						\
 	{																												\
 		virtual std::string	typeChar	()								const	{ return #TYPECHAR;	}				\
 		virtual std::string	opName		()								const	{ return #OPNAME;	}				\
 		virtual bool		run			(INTTYPE left, INTTYPE right)	const	{ return left OPERATOR right; }		\
 	};																												\
 	INTTYPE##OPNAME##IntImplClass INTTYPE##OPNAME##IntImplInstance;

 #define GEN_ALL_INT_TYPE_IMPL(INTTYPE, TYPECHAR)				\
 	GEN_INT_IMPL(INTTYPE, I,		Equal,				==	)	\
 	GEN_INT_IMPL(INTTYPE, I,		NotEqual,			!=	)	\
 	GEN_INT_IMPL(INTTYPE, TYPECHAR,	GreaterThan,		>	)	\
 	GEN_INT_IMPL(INTTYPE, TYPECHAR,	GreaterThanEqual,	>=	)	\
 	GEN_INT_IMPL(INTTYPE, TYPECHAR,	LessThan,			<	)	\
 	GEN_INT_IMPL(INTTYPE, TYPECHAR,	LessThanEqual,		<=	)

 GEN_ALL_INT_TYPE_IMPL(deInt64,	S)
 GEN_ALL_INT_TYPE_IMPL(deUint64,	U)

 #define GEN_INT_OP(INTTYPE, OPNAME)																			\
 	struct INTTYPE##OPNAME##OpClass: public IntCompareOperation<INTTYPE>									\
 	{																										\
 		INTTYPE##OPNAME##OpClass () : IntCompareOperation<INTTYPE>(INTTYPE##OPNAME##IntImplInstance) {}		\
 	};																										\
 	INTTYPE##OPNAME##OpClass INTTYPE##OPNAME##Op;

 #define GEN_ALL_INT_OPS(INTTYPE)				\
 	GEN_INT_OP(INTTYPE, Equal				)	\
 	GEN_INT_OP(INTTYPE, NotEqual			)	\
 	GEN_INT_OP(INTTYPE, GreaterThan			)	\
 	GEN_INT_OP(INTTYPE, GreaterThanEqual	)	\
 	GEN_INT_OP(INTTYPE, LessThan			)	\
 	GEN_INT_OP(INTTYPE, LessThanEqual		)

 GEN_ALL_INT_OPS(deInt64)
 GEN_ALL_INT_OPS(deUint64)

 enum DataType {
 	DATA_TYPE_SINGLE = 0,
 	DATA_TYPE_VECTOR,
 	DATA_TYPE_MAX_ENUM,
 };

 template <class T>
 using OperandsVector = std::vector<std::pair<T, T>>;

 template <class T>
 struct TestParameters
 {
 	DataType					dataType;
 	const CompareOperation<T>&	operation;
 	vk::VkShaderStageFlagBits	stage;
 	const OperandsVector<T>&	operands;
 };

 // Shader template for the compute stage using single scalars.
 // Generated from the following GLSL shader, replacing some bits by template parameters.
 #if 0
 #version 430

 // Left operands, right operands and results.
 layout(binding = 0) buffer Input1  { double values[];	} input1;
 layout(binding = 1) buffer Input2  { double values[];	} input2;
 layout(binding = 2) buffer Output1 { int values[];		} output1;

 void main()
 {
         for (int i = 0; i < 20; i++) {
                 output1.values[i] = int(input1.values[i] == input2.values[i]);
         }
 }
 #endif
 const tcu::StringTemplate CompShaderSingle(R"(
                         OpCapability Shader
                         ${OPCAPABILITY}
                    %1 = OpExtInstImport "GLSL.std.450"
                         OpMemoryModel Logical GLSL450
                         OpEntryPoint GLCompute %main "main"
                         OpExecutionMode %main LocalSize 1 1 1
                         OpName %main "main"
                         OpName %i "i"
                         OpName %Output1 "Output1"
                         OpMemberName %Output1 0 "values"
                         OpName %output1 "output1"
                         OpName %Input1 "Input1"
                         OpMemberName %Input1 0 "values"
                         OpName %input1 "input1"
                         OpName %Input2 "Input2"
                         OpMemberName %Input2 0 "values"
                         OpName %input2 "input2"
                         OpDecorate %_runtimearr_int ArrayStride 4
                         OpMemberDecorate %Output1 0 Offset 0
                         OpDecorate %Output1 BufferBlock
                         OpDecorate %output1 DescriptorSet 0
                         OpDecorate %output1 Binding 2
                         OpDecorate %_runtimearr_tinput ArrayStride 8
                         OpMemberDecorate %Input1 0 Offset 0
                         OpDecorate %Input1 BufferBlock
                         OpDecorate %input1 DescriptorSet 0
                         OpDecorate %input1 Binding 0
                         OpDecorate %_runtimearr_tinput_0 ArrayStride 8
                         OpMemberDecorate %Input2 0 Offset 0
                         OpDecorate %Input2 BufferBlock
                         OpDecorate %input2 DescriptorSet 0
                         OpDecorate %input2 Binding 1
                 %void = OpTypeVoid
                    %3 = OpTypeFunction %void
                  %int = OpTypeInt 32 1
    %_ptr_Function_int = OpTypePointer Function %int
                %int_0 = OpConstant %int 0
               %niters = OpConstant %int ${ITERS}
                 %bool = OpTypeBool
      %_runtimearr_int = OpTypeRuntimeArray %int
              %Output1 = OpTypeStruct %_runtimearr_int
 %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
              %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
               %tinput = ${OPTYPE}
   %_runtimearr_tinput = OpTypeRuntimeArray %tinput
               %Input1 = OpTypeStruct %_runtimearr_tinput
  %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
               %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
  %_ptr_Uniform_tinput = OpTypePointer Uniform %tinput
 %_runtimearr_tinput_0 = OpTypeRuntimeArray %tinput
               %Input2 = OpTypeStruct %_runtimearr_tinput_0
  %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
               %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
                %int_1 = OpConstant %int 1
     %_ptr_Uniform_int = OpTypePointer Uniform %int
                 %main = OpFunction %void None %3
                    %5 = OpLabel
                    %i = OpVariable %_ptr_Function_int Function
                         OpStore %i %int_0
                         OpBranch %10
                   %10 = OpLabel
                         OpLoopMerge %12 %13 None
                         OpBranch %14
                   %14 = OpLabel
                   %15 = OpLoad %int %i
                   %18 = OpSLessThan %bool %15 %niters
                         OpBranchConditional %18 %11 %12
                   %11 = OpLabel
                   %23 = OpLoad %int %i
                   %29 = OpLoad %int %i
                   %31 = OpAccessChain %_ptr_Uniform_tinput %input1 %int_0 %29
                   %32 = OpLoad %tinput %31
                   %37 = OpLoad %int %i
                   %38 = OpAccessChain %_ptr_Uniform_tinput %input2 %int_0 %37
                   %39 = OpLoad %tinput %38
                   %40 = ${OPNAME} %bool %32 %39
                   %42 = OpSelect %int %40 %int_1 %int_0
                   %44 = OpAccessChain %_ptr_Uniform_int %output1 %int_0 %23
                         OpStore %44 %42
                         OpBranch %13
                   %13 = OpLabel
                   %45 = OpLoad %int %i
                   %46 = OpIAdd %int %45 %int_1
                         OpStore %i %46
                         OpBranch %10
                   %12 = OpLabel
                         OpReturn
                         OpFunctionEnd
 )");

 // Shader template for the compute stage using vectors.
 // Generated from the following GLSL shader, replacing some bits by template parameters.
 // Note the number of iterations needs to be divided by 4 as the shader will consume 4 doubles at a time.
 #if 0
 #version 430

 // Left operands, right operands and results.
 layout(binding = 0) buffer Input1  { dvec4 values[];	} input1;
 layout(binding = 1) buffer Input2  { dvec4 values[];	} input2;
 layout(binding = 2) buffer Output1 { ivec4 values[];	} output1;

 void main()
 {
         for (int i = 0; i < 5; i++) {
                 output1.values[i] = ivec4(equal(input1.values[i], input2.values[i]));
         }
 }
 #endif
 const tcu::StringTemplate CompShaderVector(R"(
                           OpCapability Shader
                           ${OPCAPABILITY}
                      %1 = OpExtInstImport "GLSL.std.450"
                           OpMemoryModel Logical GLSL450
                           OpEntryPoint GLCompute %main "main"
                           OpExecutionMode %main LocalSize 1 1 1
                           OpName %main "main"
                           OpName %i "i"
                           OpName %Output1 "Output1"
                           OpMemberName %Output1 0 "values"
                           OpName %output1 "output1"
                           OpName %Input1 "Input1"
                           OpMemberName %Input1 0 "values"
                           OpName %input1 "input1"
                           OpName %Input2 "Input2"
                           OpMemberName %Input2 0 "values"
                           OpName %input2 "input2"
                           OpDecorate %_runtimearr_v4int ArrayStride 16
                           OpMemberDecorate %Output1 0 Offset 0
                           OpDecorate %Output1 BufferBlock
                           OpDecorate %output1 DescriptorSet 0
                           OpDecorate %output1 Binding 2
                           OpDecorate %_runtimearr_v4tinput ArrayStride 32
                           OpMemberDecorate %Input1 0 Offset 0
                           OpDecorate %Input1 BufferBlock
                           OpDecorate %input1 DescriptorSet 0
                           OpDecorate %input1 Binding 0
                           OpDecorate %_runtimearr_v4tinput_0 ArrayStride 32
                           OpMemberDecorate %Input2 0 Offset 0
                           OpDecorate %Input2 BufferBlock
                           OpDecorate %input2 DescriptorSet 0
                           OpDecorate %input2 Binding 1
                   %void = OpTypeVoid
                      %3 = OpTypeFunction %void
                    %int = OpTypeInt 32 1
      %_ptr_Function_int = OpTypePointer Function %int
                  %int_0 = OpConstant %int 0
                 %niters = OpConstant %int ${ITERS}
                   %bool = OpTypeBool
                  %v4int = OpTypeVector %int 4
      %_runtimearr_v4int = OpTypeRuntimeArray %v4int
                %Output1 = OpTypeStruct %_runtimearr_v4int
   %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
                %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
                 %tinput = ${OPTYPE}
               %v4tinput = OpTypeVector %tinput 4
   %_runtimearr_v4tinput = OpTypeRuntimeArray %v4tinput
                 %Input1 = OpTypeStruct %_runtimearr_v4tinput
    %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
                 %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
  %_ptr_Uniform_v4tinput = OpTypePointer Uniform %v4tinput
 %_runtimearr_v4tinput_0 = OpTypeRuntimeArray %v4tinput
                 %Input2 = OpTypeStruct %_runtimearr_v4tinput_0
    %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
                 %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
                 %v4bool = OpTypeVector %bool 4
                  %int_1 = OpConstant %int 1
                     %45 = OpConstantComposite %v4int %int_0 %int_0 %int_0 %int_0
                     %46 = OpConstantComposite %v4int %int_1 %int_1 %int_1 %int_1
     %_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
                   %main = OpFunction %void None %3
                      %5 = OpLabel
                      %i = OpVariable %_ptr_Function_int Function
                           OpStore %i %int_0
                           OpBranch %10
                     %10 = OpLabel
                           OpLoopMerge %12 %13 None
                           OpBranch %14
                     %14 = OpLabel
                     %15 = OpLoad %int %i
                     %18 = OpSLessThan %bool %15 %niters
                           OpBranchConditional %18 %11 %12
                     %11 = OpLabel
                     %24 = OpLoad %int %i
                     %31 = OpLoad %int %i
                     %33 = OpAccessChain %_ptr_Uniform_v4tinput %input1 %int_0 %31
                     %34 = OpLoad %v4tinput %33
                     %39 = OpLoad %int %i
                     %40 = OpAccessChain %_ptr_Uniform_v4tinput %input2 %int_0 %39
                     %41 = OpLoad %v4tinput %40
                     %43 = ${OPNAME} %v4bool %34 %41
                     %47 = OpSelect %v4int %43 %46 %45
                     %49 = OpAccessChain %_ptr_Uniform_v4int %output1 %int_0 %24
                           OpStore %49 %47
                           OpBranch %13
                     %13 = OpLabel
                     %50 = OpLoad %int %i
                     %51 = OpIAdd %int %50 %int_1
                           OpStore %i %51
                           OpBranch %10
                     %12 = OpLabel
                           OpReturn
                           OpFunctionEnd
 )");

 // Shader template for the vertex stage using single scalars.
 // Generated from the following GLSL shader, replacing some bits by template parameters.
 #if 0
 #version 430

 // Left operands, right operands and results.
 layout(binding = 0) buffer Input1  { double values[];	} input1;
 layout(binding = 1) buffer Input2  { double values[];	} input2;
 layout(binding = 2) buffer Output1 { int values[];		} output1;

 void main()
 {
       gl_PointSize = 1;
       gl_Position = vec4(0.0, 0.0, 0.0, 1.0);

       for (int i = 0; i < 20; i++) {
               output1.values[i] = int(input1.values[i] == input2.values[i]);
       }
 }
 #endif
 const tcu::StringTemplate VertShaderSingle(R"(
                             OpCapability Shader
                             ${OPCAPABILITY}
                        %1 = OpExtInstImport "GLSL.std.450"
                             OpMemoryModel Logical GLSL450
                             OpEntryPoint Vertex %main "main" %_
                             OpName %main "main"
                             OpName %gl_PerVertex "gl_PerVertex"
                             OpMemberName %gl_PerVertex 0 "gl_Position"
                             OpMemberName %gl_PerVertex 1 "gl_PointSize"
                             OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
                             OpName %_ ""
                             OpName %i "i"
                             OpName %Output1 "Output1"
                             OpMemberName %Output1 0 "values"
                             OpName %output1 "output1"
                             OpName %Input1 "Input1"
                             OpMemberName %Input1 0 "values"
                             OpName %input1 "input1"
                             OpName %Input2 "Input2"
                             OpMemberName %Input2 0 "values"
                             OpName %input2 "input2"
                             OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
                             OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
                             OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
                             OpDecorate %gl_PerVertex Block
                             OpDecorate %_runtimearr_int ArrayStride 4
                             OpMemberDecorate %Output1 0 Offset 0
                             OpDecorate %Output1 BufferBlock
                             OpDecorate %output1 DescriptorSet 0
                             OpDecorate %output1 Binding 2
                             OpDecorate %_runtimearr_tinput ArrayStride 8
                             OpMemberDecorate %Input1 0 Offset 0
                             OpDecorate %Input1 BufferBlock
                             OpDecorate %input1 DescriptorSet 0
                             OpDecorate %input1 Binding 0
                             OpDecorate %_runtimearr_tinput_0 ArrayStride 8
                             OpMemberDecorate %Input2 0 Offset 0
                             OpDecorate %Input2 BufferBlock
                             OpDecorate %input2 DescriptorSet 0
                             OpDecorate %input2 Binding 1
                     %void = OpTypeVoid
                        %3 = OpTypeFunction %void
                    %float = OpTypeFloat 32
                  %v4float = OpTypeVector %float 4
                     %uint = OpTypeInt 32 0
                   %uint_1 = OpConstant %uint 1
        %_arr_float_uint_1 = OpTypeArray %float %uint_1
             %gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1
 %_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
                        %_ = OpVariable %_ptr_Output_gl_PerVertex Output
                      %int = OpTypeInt 32 1
                    %int_1 = OpConstant %int 1
                  %float_1 = OpConstant %float 1
        %_ptr_Output_float = OpTypePointer Output %float
                    %int_0 = OpConstant %int 0
                  %float_0 = OpConstant %float 0
                       %21 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1
      %_ptr_Output_v4float = OpTypePointer Output %v4float
        %_ptr_Function_int = OpTypePointer Function %int
                   %niters = OpConstant %int ${ITERS}
                     %bool = OpTypeBool
          %_runtimearr_int = OpTypeRuntimeArray %int
                  %Output1 = OpTypeStruct %_runtimearr_int
     %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
                  %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
                   %tinput = ${OPTYPE}
       %_runtimearr_tinput = OpTypeRuntimeArray %tinput
                   %Input1 = OpTypeStruct %_runtimearr_tinput
      %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
                   %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
      %_ptr_Uniform_tinput = OpTypePointer Uniform %tinput
     %_runtimearr_tinput_0 = OpTypeRuntimeArray %tinput
                   %Input2 = OpTypeStruct %_runtimearr_tinput_0
      %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
                   %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
         %_ptr_Uniform_int = OpTypePointer Uniform %int
                     %main = OpFunction %void None %3
                        %5 = OpLabel
                        %i = OpVariable %_ptr_Function_int Function
                       %18 = OpAccessChain %_ptr_Output_float %_ %int_1
                             OpStore %18 %float_1
                       %23 = OpAccessChain %_ptr_Output_v4float %_ %int_0
                             OpStore %23 %21
                             OpStore %i %int_0
                             OpBranch %26
                       %26 = OpLabel
                             OpLoopMerge %28 %29 None
                             OpBranch %30
                       %30 = OpLabel
                       %31 = OpLoad %int %i
                       %34 = OpSLessThan %bool %31 %niters
                             OpBranchConditional %34 %27 %28
                       %27 = OpLabel
                       %39 = OpLoad %int %i
                       %45 = OpLoad %int %i
                       %47 = OpAccessChain %_ptr_Uniform_tinput %input1 %int_0 %45
                       %48 = OpLoad %tinput %47
                       %53 = OpLoad %int %i
                       %54 = OpAccessChain %_ptr_Uniform_tinput %input2 %int_0 %53
                       %55 = OpLoad %tinput %54
                       %56 = ${OPNAME} %bool %48 %55
                       %57 = OpSelect %int %56 %int_1 %int_0
                       %59 = OpAccessChain %_ptr_Uniform_int %output1 %int_0 %39
                             OpStore %59 %57
                             OpBranch %29
                       %29 = OpLabel
                       %60 = OpLoad %int %i
                       %61 = OpIAdd %int %60 %int_1
                             OpStore %i %61
                             OpBranch %26
                       %28 = OpLabel
                             OpReturn
                             OpFunctionEnd
 )");

 // Shader template for the vertex stage using vectors.
 // Generated from the following GLSL shader, replacing some bits by template parameters.
 // Note the number of iterations needs to be divided by 4 as the shader will consume 4 doubles at a time.
 #if 0
 #version 430

 // Left operands, right operands and results.
 layout(binding = 0) buffer Input1  { dvec4 values[]; } input1;
 layout(binding = 1) buffer Input2  { dvec4 values[]; } input2;
 layout(binding = 2) buffer Output1 { ivec4 values[]; } output1;

 void main()
 {
       gl_PointSize = 1;
       gl_Position = vec4(0.0, 0.0, 0.0, 1.0);

       for (int i = 0; i < 5; i++) {
               output1.values[i] = ivec4(equal(input1.values[i], input2.values[i]));
       }
 }
 #endif
 const tcu::StringTemplate VertShaderVector(R"(
                             OpCapability Shader
                             ${OPCAPABILITY}
                        %1 = OpExtInstImport "GLSL.std.450"
                             OpMemoryModel Logical GLSL450
                             OpEntryPoint Vertex %main "main" %_
                             OpName %main "main"
                             OpName %gl_PerVertex "gl_PerVertex"
                             OpMemberName %gl_PerVertex 0 "gl_Position"
                             OpMemberName %gl_PerVertex 1 "gl_PointSize"
                             OpMemberName %gl_PerVertex 2 "gl_ClipDistance"
                             OpName %_ ""
                             OpName %i "i"
                             OpName %Output1 "Output1"
                             OpMemberName %Output1 0 "values"
                             OpName %output1 "output1"
                             OpName %Input1 "Input1"
                             OpMemberName %Input1 0 "values"
                             OpName %input1 "input1"
                             OpName %Input2 "Input2"
                             OpMemberName %Input2 0 "values"
                             OpName %input2 "input2"
                             OpMemberDecorate %gl_PerVertex 0 BuiltIn Position
                             OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize
                             OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance
                             OpDecorate %gl_PerVertex Block
                             OpDecorate %_runtimearr_v4int ArrayStride 16
                             OpMemberDecorate %Output1 0 Offset 0
                             OpDecorate %Output1 BufferBlock
                             OpDecorate %output1 DescriptorSet 0
                             OpDecorate %output1 Binding 2
                             OpDecorate %_runtimearr_v4tinput ArrayStride 32
                             OpMemberDecorate %Input1 0 Offset 0
                             OpDecorate %Input1 BufferBlock
                             OpDecorate %input1 DescriptorSet 0
                             OpDecorate %input1 Binding 0
                             OpDecorate %_runtimearr_v4tinput_0 ArrayStride 32
                             OpMemberDecorate %Input2 0 Offset 0
                             OpDecorate %Input2 BufferBlock
                             OpDecorate %input2 DescriptorSet 0
                             OpDecorate %input2 Binding 1
                     %void = OpTypeVoid
                        %3 = OpTypeFunction %void
                    %float = OpTypeFloat 32
                  %v4float = OpTypeVector %float 4
                     %uint = OpTypeInt 32 0
                   %uint_1 = OpConstant %uint 1
        %_arr_float_uint_1 = OpTypeArray %float %uint_1
             %gl_PerVertex = OpTypeStruct %v4float %float %_arr_float_uint_1
 %_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex
                        %_ = OpVariable %_ptr_Output_gl_PerVertex Output
                      %int = OpTypeInt 32 1
                    %int_1 = OpConstant %int 1
                  %float_1 = OpConstant %float 1
        %_ptr_Output_float = OpTypePointer Output %float
                    %int_0 = OpConstant %int 0
                  %float_0 = OpConstant %float 0
                       %21 = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_1
      %_ptr_Output_v4float = OpTypePointer Output %v4float
        %_ptr_Function_int = OpTypePointer Function %int
                   %niters = OpConstant %int ${ITERS}
                     %bool = OpTypeBool
                    %v4int = OpTypeVector %int 4
        %_runtimearr_v4int = OpTypeRuntimeArray %v4int
                  %Output1 = OpTypeStruct %_runtimearr_v4int
     %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
                  %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
                   %tinput = ${OPTYPE}
                 %v4tinput = OpTypeVector %tinput 4
     %_runtimearr_v4tinput = OpTypeRuntimeArray %v4tinput
                   %Input1 = OpTypeStruct %_runtimearr_v4tinput
      %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
                   %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
    %_ptr_Uniform_v4tinput = OpTypePointer Uniform %v4tinput
   %_runtimearr_v4tinput_0 = OpTypeRuntimeArray %v4tinput
                   %Input2 = OpTypeStruct %_runtimearr_v4tinput_0
      %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
                   %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
                   %v4bool = OpTypeVector %bool 4
                       %60 = OpConstantComposite %v4int %int_0 %int_0 %int_0 %int_0
                       %61 = OpConstantComposite %v4int %int_1 %int_1 %int_1 %int_1
       %_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
                     %main = OpFunction %void None %3
                        %5 = OpLabel
                        %i = OpVariable %_ptr_Function_int Function
                       %18 = OpAccessChain %_ptr_Output_float %_ %int_1
                             OpStore %18 %float_1
                       %23 = OpAccessChain %_ptr_Output_v4float %_ %int_0
                             OpStore %23 %21
                             OpStore %i %int_0
                             OpBranch %26
                       %26 = OpLabel
                             OpLoopMerge %28 %29 None
                             OpBranch %30
                       %30 = OpLabel
                       %31 = OpLoad %int %i
                       %34 = OpSLessThan %bool %31 %niters
                             OpBranchConditional %34 %27 %28
                       %27 = OpLabel
                       %40 = OpLoad %int %i
                       %47 = OpLoad %int %i
                       %49 = OpAccessChain %_ptr_Uniform_v4tinput %input1 %int_0 %47
                       %50 = OpLoad %v4tinput %49
                       %55 = OpLoad %int %i
                       %56 = OpAccessChain %_ptr_Uniform_v4tinput %input2 %int_0 %55
                       %57 = OpLoad %v4tinput %56
                       %59 = ${OPNAME} %v4bool %50 %57
                       %62 = OpSelect %v4int %59 %61 %60
                       %64 = OpAccessChain %_ptr_Uniform_v4int %output1 %int_0 %40
                             OpStore %64 %62
                             OpBranch %29
                       %29 = OpLabel
                       %65 = OpLoad %int %i
                       %66 = OpIAdd %int %65 %int_1
                             OpStore %i %66
                             OpBranch %26
                       %28 = OpLabel
                             OpReturn
                             OpFunctionEnd
 )");

 // GLSL passthrough vertex shader to test the fragment shader.
 const std::string VertShaderPassThrough = R"(
 #version 430

 layout(location = 0) out vec4 out_color;

 void main()
 {
 		gl_PointSize	= 1;
 		gl_Position		= vec4(0.0, 0.0, 0.0, 1.0);
 		out_color		= vec4(0.0, 0.0, 0.0, 1.0);
 }
 )";

 // Shader template for the fragment stage using single scalars.
 // Generated from the following GLSL shader, replacing some bits by template parameters.
 #if 0
 #version 430

 // Left operands, right operands and results.
 layout(binding = 0) buffer Input1  { double values[];	} input1;
 layout(binding = 1) buffer Input2  { double values[];	} input2;
 layout(binding = 2) buffer Output1 { int values[];		} output1;

 void main()
 {
       for (int i = 0; i < 20; i++) {
               output1.values[i] = int(input1.values[i] == input2.values[i]);
       }
 }
 #endif
 const tcu::StringTemplate FragShaderSingle(R"(
                         OpCapability Shader
                         ${OPCAPABILITY}
                    %1 = OpExtInstImport "GLSL.std.450"
                         OpMemoryModel Logical GLSL450
                         OpEntryPoint Fragment %main "main"
                         OpExecutionMode %main OriginUpperLeft
                         OpSource GLSL 430
                         OpName %main "main"
                         OpName %i "i"
                         OpName %Output1 "Output1"
                         OpMemberName %Output1 0 "values"
                         OpName %output1 "output1"
                         OpName %Input1 "Input1"
                         OpMemberName %Input1 0 "values"
                         OpName %input1 "input1"
                         OpName %Input2 "Input2"
                         OpMemberName %Input2 0 "values"
                         OpName %input2 "input2"
                         OpDecorate %_runtimearr_int ArrayStride 4
                         OpMemberDecorate %Output1 0 Offset 0
                         OpDecorate %Output1 BufferBlock
                         OpDecorate %output1 DescriptorSet 0
                         OpDecorate %output1 Binding 2
                         OpDecorate %_runtimearr_tinput ArrayStride 8
                         OpMemberDecorate %Input1 0 Offset 0
                         OpDecorate %Input1 BufferBlock
                         OpDecorate %input1 DescriptorSet 0
                         OpDecorate %input1 Binding 0
                         OpDecorate %_runtimearr_tinput_0 ArrayStride 8
                         OpMemberDecorate %Input2 0 Offset 0
                         OpDecorate %Input2 BufferBlock
                         OpDecorate %input2 DescriptorSet 0
                         OpDecorate %input2 Binding 1
                 %void = OpTypeVoid
                    %3 = OpTypeFunction %void
                  %int = OpTypeInt 32 1
    %_ptr_Function_int = OpTypePointer Function %int
                %int_0 = OpConstant %int 0
               %niters = OpConstant %int ${ITERS}
                 %bool = OpTypeBool
      %_runtimearr_int = OpTypeRuntimeArray %int
              %Output1 = OpTypeStruct %_runtimearr_int
 %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
              %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
               %tinput = ${OPTYPE}
   %_runtimearr_tinput = OpTypeRuntimeArray %tinput
               %Input1 = OpTypeStruct %_runtimearr_tinput
  %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
               %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
  %_ptr_Uniform_tinput = OpTypePointer Uniform %tinput
 %_runtimearr_tinput_0 = OpTypeRuntimeArray %tinput
               %Input2 = OpTypeStruct %_runtimearr_tinput_0
  %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
               %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
                %int_1 = OpConstant %int 1
     %_ptr_Uniform_int = OpTypePointer Uniform %int
                 %main = OpFunction %void None %3
                    %5 = OpLabel
                    %i = OpVariable %_ptr_Function_int Function
                         OpStore %i %int_0
                         OpBranch %10
                   %10 = OpLabel
                         OpLoopMerge %12 %13 None
                         OpBranch %14
                   %14 = OpLabel
                   %15 = OpLoad %int %i
                   %18 = OpSLessThan %bool %15 %niters
                         OpBranchConditional %18 %11 %12
                   %11 = OpLabel
                   %23 = OpLoad %int %i
                   %29 = OpLoad %int %i
                   %31 = OpAccessChain %_ptr_Uniform_tinput %input1 %int_0 %29
                   %32 = OpLoad %tinput %31
                   %37 = OpLoad %int %i
                   %38 = OpAccessChain %_ptr_Uniform_tinput %input2 %int_0 %37
                   %39 = OpLoad %tinput %38
                   %40 = ${OPNAME} %bool %32 %39
                   %42 = OpSelect %int %40 %int_1 %int_0
                   %44 = OpAccessChain %_ptr_Uniform_int %output1 %int_0 %23
                         OpStore %44 %42
                         OpBranch %13
                   %13 = OpLabel
                   %45 = OpLoad %int %i
                   %46 = OpIAdd %int %45 %int_1
                         OpStore %i %46
                         OpBranch %10
                   %12 = OpLabel
                         OpReturn
                         OpFunctionEnd
 )");

 // Shader template for the fragment stage using vectors.
 // Generated from the following GLSL shader, replacing some bits by template parameters.
 // Note the number of iterations needs to be divided by 4 as the shader will consume 4 doubles at a time.
 #if 0
 #version 430

 // Left operands, right operands and results.
 layout(binding = 0) buffer Input1  { dvec4 values[]; } input1;
 layout(binding = 1) buffer Input2  { dvec4 values[]; } input2;
 layout(binding = 2) buffer Output1 { ivec4 values[]; } output1;

 void main()
 {
       for (int i = 0; i < 5; i++) {
               output1.values[i] = ivec4(equal(input1.values[i], input2.values[i]));
       }
 }
 #endif
 const tcu::StringTemplate FragShaderVector(R"(
                           OpCapability Shader
                           ${OPCAPABILITY}
                      %1 = OpExtInstImport "GLSL.std.450"
                           OpMemoryModel Logical GLSL450
                           OpEntryPoint Fragment %main "main"
                           OpExecutionMode %main OriginUpperLeft
                           OpName %main "main"
                           OpName %i "i"
                           OpName %Output1 "Output1"
                           OpMemberName %Output1 0 "values"
                           OpName %output1 "output1"
                           OpName %Input1 "Input1"
                           OpMemberName %Input1 0 "values"
                           OpName %input1 "input1"
                           OpName %Input2 "Input2"
                           OpMemberName %Input2 0 "values"
                           OpName %input2 "input2"
                           OpDecorate %_runtimearr_v4int ArrayStride 16
                           OpMemberDecorate %Output1 0 Offset 0
                           OpDecorate %Output1 BufferBlock
                           OpDecorate %output1 DescriptorSet 0
                           OpDecorate %output1 Binding 2
                           OpDecorate %_runtimearr_v4tinput ArrayStride 32
                           OpMemberDecorate %Input1 0 Offset 0
                           OpDecorate %Input1 BufferBlock
                           OpDecorate %input1 DescriptorSet 0
                           OpDecorate %input1 Binding 0
                           OpDecorate %_runtimearr_v4tinput_0 ArrayStride 32
                           OpMemberDecorate %Input2 0 Offset 0
                           OpDecorate %Input2 BufferBlock
                           OpDecorate %input2 DescriptorSet 0
                           OpDecorate %input2 Binding 1
                   %void = OpTypeVoid
                      %3 = OpTypeFunction %void
                    %int = OpTypeInt 32 1
      %_ptr_Function_int = OpTypePointer Function %int
                  %int_0 = OpConstant %int 0
                 %niters = OpConstant %int ${ITERS}
                   %bool = OpTypeBool
                  %v4int = OpTypeVector %int 4
      %_runtimearr_v4int = OpTypeRuntimeArray %v4int
                %Output1 = OpTypeStruct %_runtimearr_v4int
   %_ptr_Uniform_Output1 = OpTypePointer Uniform %Output1
                %output1 = OpVariable %_ptr_Uniform_Output1 Uniform
                 %tinput = ${OPTYPE}
               %v4tinput = OpTypeVector %tinput 4
   %_runtimearr_v4tinput = OpTypeRuntimeArray %v4tinput
                 %Input1 = OpTypeStruct %_runtimearr_v4tinput
    %_ptr_Uniform_Input1 = OpTypePointer Uniform %Input1
                 %input1 = OpVariable %_ptr_Uniform_Input1 Uniform
  %_ptr_Uniform_v4tinput = OpTypePointer Uniform %v4tinput
 %_runtimearr_v4tinput_0 = OpTypeRuntimeArray %v4tinput
                 %Input2 = OpTypeStruct %_runtimearr_v4tinput_0
    %_ptr_Uniform_Input2 = OpTypePointer Uniform %Input2
                 %input2 = OpVariable %_ptr_Uniform_Input2 Uniform
                 %v4bool = OpTypeVector %bool 4
                  %int_1 = OpConstant %int 1
                     %45 = OpConstantComposite %v4int %int_0 %int_0 %int_0 %int_0
                     %46 = OpConstantComposite %v4int %int_1 %int_1 %int_1 %int_1
     %_ptr_Uniform_v4int = OpTypePointer Uniform %v4int
                   %main = OpFunction %void None %3
                      %5 = OpLabel
                      %i = OpVariable %_ptr_Function_int Function
                           OpStore %i %int_0
                           OpBranch %10
                     %10 = OpLabel
                           OpLoopMerge %12 %13 None
                           OpBranch %14
                     %14 = OpLabel
                     %15 = OpLoad %int %i
                     %18 = OpSLessThan %bool %15 %niters
                           OpBranchConditional %18 %11 %12
                     %11 = OpLabel
                     %24 = OpLoad %int %i
                     %31 = OpLoad %int %i
                     %33 = OpAccessChain %_ptr_Uniform_v4tinput %input1 %int_0 %31
                     %34 = OpLoad %v4tinput %33
                     %39 = OpLoad %int %i
                     %40 = OpAccessChain %_ptr_Uniform_v4tinput %input2 %int_0 %39
                     %41 = OpLoad %v4tinput %40
                     %43 = ${OPNAME} %v4bool %34 %41
                     %47 = OpSelect %v4int %43 %46 %45
                     %49 = OpAccessChain %_ptr_Uniform_v4int %output1 %int_0 %24
                           OpStore %49 %47
                           OpBranch %13
                     %13 = OpLabel
                     %50 = OpLoad %int %i
                     %51 = OpIAdd %int %50 %int_1
                           OpStore %i %51
                           OpBranch %10
                     %12 = OpLabel
                           OpReturn
                           OpFunctionEnd
 )");

 struct SpirvTemplateManager
 {
 	static const tcu::StringTemplate& getTemplate (DataType type, vk::VkShaderStageFlagBits stage)
 	{
 		DE_ASSERT(type == DATA_TYPE_SINGLE || type == DATA_TYPE_VECTOR);
 		DE_ASSERT(	stage == vk::VK_SHADER_STAGE_COMPUTE_BIT		||
 					stage == vk::VK_SHADER_STAGE_VERTEX_BIT			||
 					stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT		);

 		if (type == DATA_TYPE_SINGLE)
 		{
 			if (stage == vk::VK_SHADER_STAGE_COMPUTE_BIT)	return CompShaderSingle;
 			if (stage == vk::VK_SHADER_STAGE_VERTEX_BIT)	return VertShaderSingle;
 			else											return FragShaderSingle;
 		}
 		else
 		{
 			if (stage == vk::VK_SHADER_STAGE_COMPUTE_BIT)	return CompShaderVector;
 			if (stage == vk::VK_SHADER_STAGE_VERTEX_BIT)	return VertShaderVector;
 			else											return FragShaderVector;
 		}
 	}

 	// Specialized below for different types.
 	template <class T>
 	static std::string getOpCapability();

 	// Same.
 	template <class T>
 	static std::string getOpType();
 };

 template <> std::string SpirvTemplateManager::getOpCapability<double>()		{ return "OpCapability Float64";	}
 template <> std::string SpirvTemplateManager::getOpCapability<deInt64>()	{ return "OpCapability Int64";		}
 template <> std::string SpirvTemplateManager::getOpCapability<deUint64>()	{ return "OpCapability Int64";		}

 template <> std::string SpirvTemplateManager::getOpType<double>()	{ return "OpTypeFloat 64";	}
 template <> std::string SpirvTemplateManager::getOpType<deInt64>()	{ return "OpTypeInt 64 1";	}
 template <> std::string SpirvTemplateManager::getOpType<deUint64>()	{ return "OpTypeInt 64 0";	}

 struct BufferWithMemory
 {
 	vk::Move<vk::VkBuffer>		buffer;
 	de::MovePtr<vk::Allocation>	allocation;

 	BufferWithMemory ()
 		: buffer(), allocation()
 	{}

 	BufferWithMemory (BufferWithMemory&& other)
 		: buffer(other.buffer), allocation(other.allocation)
 	{}

 	BufferWithMemory& operator= (BufferWithMemory&& other)
 	{
 		buffer		= other.buffer;
 		allocation	= other.allocation;
 		return *this;
 	}
 };

 // Create storage buffer, bind memory to it and return both things.
 BufferWithMemory createStorageBuffer(const vk::DeviceInterface&	vkdi,
 									 const vk::VkDevice			device,
 									 vk::Allocator&				allocator,
 									 size_t						numBytes)
 {
 	const vk::VkBufferCreateInfo bufferCreateInfo =
 	{
 		vk::VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	// sType
 		DE_NULL,									// pNext
 		0u,											// flags
 		numBytes,									// size
 		vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,		// usage
 		vk::VK_SHARING_MODE_EXCLUSIVE,				// sharingMode
 		0u,											// queueFamilyCount
 		DE_NULL,									// pQueueFamilyIndices
 	};

 	BufferWithMemory bufmem;

 	bufmem.buffer								= vk::createBuffer(vkdi, device, &bufferCreateInfo);
 	const vk::VkMemoryRequirements requirements = getBufferMemoryRequirements(vkdi, device, *bufmem.buffer);
 	bufmem.allocation							= allocator.allocate(requirements, vk::MemoryRequirement::HostVisible);

 	VK_CHECK(vkdi.bindBufferMemory(device, *bufmem.buffer, bufmem.allocation->getMemory(), bufmem.allocation->getOffset()));

 	return bufmem;
 }

 vk::Move<vk::VkShaderModule> createShaderModule (const vk::DeviceInterface&	deviceInterface,
 												 vk::VkDevice				device,
 												 const vk::ProgramBinary&	binary)
 {
 	DE_ASSERT(binary.getFormat() == vk::PROGRAM_FORMAT_SPIRV);

 	const struct vk::VkShaderModuleCreateInfo shaderModuleInfo =
 	{
 		vk::VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
 		DE_NULL,
 		0,
 		static_cast<deUintptr>(binary.getSize()),
 		reinterpret_cast<const deUint32*>(binary.getBinary()),
 	};

 	return createShaderModule(deviceInterface, device, &shaderModuleInfo);
 }

 // Make sure the length of the following vectors is a multiple of 4. This will make sure operands can be reused for vectorized tests.
 const OperandsVector<double>	DOUBLE_OPERANDS		=
 {
 	{	-8.0,	-5.0	},
 	{	-5.0,	-8.0	},
 	{	-5.0,	-5.0	},
 	{	-5.0,	 0.0	},
 	{	 0.0,	-5.0	},
 	{	 5.0,	 0.0	},
 	{	 0.0,	 5.0	},
 	{	 0.0,	 0.0	},
 	{	-5.0,	 5.0	},
 	{	 5.0,	-5.0	},
 	{	 5.0,	 8.0	},
 	{	 8.0,	 5.0	},
 	{	 5.0,	 5.0	},
 	{	-6.0,	-5.0	},
 	{	 6.0,	 5.0	},
 	{	 0.0,	 1.0	},
 #if 0
 	{	 1.0,	 0.0	},
 	{	 0.0,	 NAN	},
 	{	 NAN,	 0.0	},
 	{	 NAN,	 NAN	},
 #endif
 };

 const OperandsVector<deInt64>	INT64_OPERANDS	=
 {
 	{	-8,		-5		},
 	{	-5,		-8		},
 	{	-5,		-5		},
 	{	-5,		 0		},
 	{	 0,		-5		},
 	{	 5,		 0		},
 	{	 0,		 5		},
 	{	 0,		 0		},
 	{	-5,		 5		},
 	{	 5,		-5		},
 	{	 5,		 8		},
 	{	 8,		 5		},
 	{	 5,		 5		},
 	{	-6,		-5		},
 	{	 6,		 5		},
 	{	 0,		 1		},
 };

 constexpr auto					MAX_DEUINT64	= std::numeric_limits<deUint64>::max();
 const OperandsVector<deUint64>	UINT64_OPERANDS	=
 {
 	{	0,					0					},
 	{	1,					0					},
 	{	0,					1					},
 	{	1,					1					},
 	{	5,					8					},
 	{	8,					5					},
 	{	5,					5					},
 	{	0,					MAX_DEUINT64		},
 	{	MAX_DEUINT64,		0					},
 	{	MAX_DEUINT64 - 1,	MAX_DEUINT64		},
 	{	MAX_DEUINT64,		MAX_DEUINT64 - 1	},
 	{	MAX_DEUINT64,		MAX_DEUINT64		},
 };

 template <class T>
 class T64bitCompareTestInstance : public TestInstance
 {
 public:
 							T64bitCompareTestInstance	(Context& ctx, const TestParameters<T>& params);
 	tcu::TestStatus			iterate						(void);

 private:
 	const TestParameters<T>	m_params;
 	const size_t			m_numOperations;
 	const size_t			m_inputBufferSize;
 	const size_t			m_outputBufferSize;
 };

 template <class T>
 T64bitCompareTestInstance<T>::T64bitCompareTestInstance (Context& ctx, const TestParameters<T>& params)
 	: TestInstance(ctx)
 	, m_params(params)
 	, m_numOperations(m_params.operands.size())
 	, m_inputBufferSize(m_numOperations * sizeof(T))
 	, m_outputBufferSize(m_numOperations * sizeof(int))
 {
 }

 template <class T>
 tcu::TestStatus T64bitCompareTestInstance<T>::iterate (void)
 {
 	DE_ASSERT(m_params.stage == vk::VK_SHADER_STAGE_COMPUTE_BIT		||
 			  m_params.stage == vk::VK_SHADER_STAGE_VERTEX_BIT		||
 			  m_params.stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT	);

 	auto&			vkdi		= m_context.getDeviceInterface();
 	auto			device		= m_context.getDevice();
 	auto&			allocator	= m_context.getDefaultAllocator();

 	// Create storage buffers (left operands, right operands and results buffer).
 	BufferWithMemory input1		= createStorageBuffer(vkdi, device, allocator, m_inputBufferSize);
 	BufferWithMemory input2		= createStorageBuffer(vkdi, device, allocator, m_inputBufferSize);
 	BufferWithMemory output1	= createStorageBuffer(vkdi, device, allocator, m_outputBufferSize);

 	// Create an array of buffers.
 	std::vector<vk::VkBuffer> buffers;
 	buffers.push_back(input1.buffer.get());
 	buffers.push_back(input2.buffer.get());
 	buffers.push_back(output1.buffer.get());

 	// Create descriptor set layout.
 	std::vector<vk::VkDescriptorSetLayoutBinding> bindings;
 	for (size_t i = 0; i < buffers.size(); ++i)
 	{
 		vk::VkDescriptorSetLayoutBinding binding =
 		{
 			static_cast<deUint32>(i),								// uint32_t              binding;
 			vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,					// VkDescriptorType      descriptorType;
 			1u,														// uint32_t              descriptorCount;
 			static_cast<vk::VkShaderStageFlags>(m_params.stage),	// VkShaderStageFlags    stageFlags;
 			DE_NULL													// const VkSampler*      pImmutableSamplers;
 		};
 		bindings.push_back(binding);
 	}

 	const vk::VkDescriptorSetLayoutCreateInfo layoutCreateInfo =
 	{
 		vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,	// VkStructureType                        sType;
 		DE_NULL,													// const void*                            pNext;
 		0,															// VkDescriptorSetLayoutCreateFlags       flags;
 		static_cast<deUint32>(bindings.size()),						// uint32_t                               bindingCount;
 		bindings.data()												// const VkDescriptorSetLayoutBinding*    pBindings;
 	};
 	auto descriptorSetLayout = vk::createDescriptorSetLayout(vkdi, device, &layoutCreateInfo);

 	// Create descriptor set.
 	vk::DescriptorPoolBuilder poolBuilder;
 	poolBuilder.addType(bindings[0].descriptorType, static_cast<deUint32>(bindings.size()));
 	auto descriptorPool = poolBuilder.build(vkdi, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);

 	const vk::VkDescriptorSetAllocateInfo allocateInfo =
 	{
 		vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,	// VkStructureType                 sType;
 		DE_NULL,											// const void*                     pNext;
 		*descriptorPool,									// VkDescriptorPool                descriptorPool;
 		1u,													// uint32_t                        descriptorSetCount;
 		&descriptorSetLayout.get()							// const VkDescriptorSetLayout*    pSetLayouts;
 	};
 	auto descriptorSet = vk::allocateDescriptorSet(vkdi, device, &allocateInfo);

 	// Update descriptor set.
 	std::vector<vk::VkDescriptorBufferInfo>	descriptorBufferInfos;
 	std::vector<vk::VkWriteDescriptorSet>	descriptorWrites;

 	descriptorBufferInfos.reserve(buffers.size());
 	descriptorWrites.reserve(buffers.size());

 	for (size_t i = 0; i < buffers.size(); ++i)
 	{
 		vk::VkDescriptorBufferInfo bufferInfo =
 		{
 			buffers[i],		// VkBuffer        buffer;
 			0u,				// VkDeviceSize    offset;
 			VK_WHOLE_SIZE,	// VkDeviceSize    range;
 		};
 		descriptorBufferInfos.push_back(bufferInfo);
 	}

 	for (size_t i = 0; i < buffers.size(); ++i)
 	{
 		vk::VkWriteDescriptorSet write =
 		{
 			vk::VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,	// VkStructureType                  sType;
 			DE_NULL,									// const void*                      pNext;
 			*descriptorSet,								// VkDescriptorSet                  dstSet;
 			static_cast<deUint32>(i),					// uint32_t                         dstBinding;
 			0u,											// uint32_t                         dstArrayElement;
 			1u,											// uint32_t                         descriptorCount;
 			bindings[i].descriptorType,					// VkDescriptorType                 descriptorType;
 			DE_NULL,									// const VkDescriptorImageInfo*     pImageInfo;
 			&descriptorBufferInfos[i],					// const VkDescriptorBufferInfo*    pBufferInfo;
 			DE_NULL,									// const VkBufferView*              pTexelBufferView;
 		};
 		descriptorWrites.push_back(write);
 	}
 	vkdi.updateDescriptorSets(device, static_cast<deUint32>(descriptorWrites.size()), descriptorWrites.data(), 0u, DE_NULL);

 	// Fill storage buffers with data. Note: VkPhysicalDeviceLimits.minMemoryMapAlignment guarantees this cast is safe.
 	T*		input1Ptr	= reinterpret_cast<T*>		(input1.allocation->getHostPtr());
 	T*		input2Ptr	= reinterpret_cast<T*>		(input2.allocation->getHostPtr());
 	int*	output1Ptr	= reinterpret_cast<int*>	(output1.allocation->getHostPtr());

 	for (size_t i = 0; i < m_numOperations; ++i)
 	{
 		input1Ptr[i] = m_params.operands[i].first;
 		input2Ptr[i] = m_params.operands[i].second;
 		output1Ptr[i] = -9;
 	}

 	// Flush memory.
 	vk::flushAlloc(vkdi, device, *input1.allocation);
 	vk::flushAlloc(vkdi, device, *input2.allocation);
 	vk::flushAlloc(vkdi, device, *output1.allocation);

 	// Prepare barriers in advance so data is visible to the shaders and the host.
 	std::vector<vk::VkBufferMemoryBarrier> hostToDevBarriers;
 	std::vector<vk::VkBufferMemoryBarrier> devToHostBarriers;
 	for (size_t i = 0; i < buffers.size(); ++i)
 	{
 		const vk::VkBufferMemoryBarrier hostDev =
 		{
 			vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,						// VkStructureType	sType;
 			DE_NULL,															// const void*		pNext;
 			vk::VK_ACCESS_HOST_WRITE_BIT,										// VkAccessFlags	srcAccessMask;
 			(vk::VK_ACCESS_SHADER_READ_BIT | vk::VK_ACCESS_SHADER_WRITE_BIT),	// VkAccessFlags	dstAccessMask;
 			VK_QUEUE_FAMILY_IGNORED,											// deUint32			srcQueueFamilyIndex;
 			VK_QUEUE_FAMILY_IGNORED,											// deUint32			dstQueueFamilyIndex;
 			buffers[i],															// VkBuffer			buffer;
 			0u,																	// VkDeviceSize		offset;
 			VK_WHOLE_SIZE,														// VkDeviceSize		size;
 		};
 		hostToDevBarriers.push_back(hostDev);

 		const vk::VkBufferMemoryBarrier devHost =
 		{
 			vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,						// VkStructureType	sType;
 			DE_NULL,															// const void*		pNext;
 			vk::VK_ACCESS_SHADER_WRITE_BIT,										// VkAccessFlags	srcAccessMask;
 			vk::VK_ACCESS_HOST_READ_BIT,										// VkAccessFlags	dstAccessMask;
 			VK_QUEUE_FAMILY_IGNORED,											// deUint32			srcQueueFamilyIndex;
 			VK_QUEUE_FAMILY_IGNORED,											// deUint32			dstQueueFamilyIndex;
 			buffers[i],															// VkBuffer			buffer;
 			0u,																	// VkDeviceSize		offset;
 			VK_WHOLE_SIZE,														// VkDeviceSize		size;
 		};
 		devToHostBarriers.push_back(devHost);
 	}

 	// Create command pool and command buffer.
 	auto queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();

 	const vk::VkCommandPoolCreateInfo cmdPoolCreateInfo =
 	{
 		vk::VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,	// VkStructureType				sType;
 		DE_NULL,										// const void*					pNext;
 		vk::VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,		// VkCommandPoolCreateFlags		flags;
 		queueFamilyIndex,								// deUint32						queueFamilyIndex;
 	};
 	auto cmdPool = vk::createCommandPool(vkdi, device, &cmdPoolCreateInfo);

 	const vk::VkCommandBufferAllocateInfo cmdBufferAllocateInfo =
 	{
 		vk::VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,	// VkStructureType			sType;
 		DE_NULL,											// const void*				pNext;
 		*cmdPool,											// VkCommandPool			commandPool;
 		vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY,				// VkCommandBufferLevel		level;
 		1u,													// deUint32					commandBufferCount;
 	};
 	auto cmdBuffer = vk::allocateCommandBuffer(vkdi, device, &cmdBufferAllocateInfo);

 	// Create pipeline layout.
 	const vk::VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
 	{
 		vk::VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,	// VkStructureType					sType;
 		DE_NULL,											// const void*						pNext;
 		0,													// VkPipelineLayoutCreateFlags		flags;
 		1u,													// deUint32							setLayoutCount;
 		&descriptorSetLayout.get(),							// const VkDescriptorSetLayout*		pSetLayouts;
 		0u,													// deUint32							pushConstantRangeCount;
 		DE_NULL,											// const VkPushConstantRange*		pPushConstantRanges;
 	};
 	auto pipelineLayout = vk::createPipelineLayout(vkdi, device, &pipelineLayoutCreateInfo);

 	if (m_params.stage == vk::VK_SHADER_STAGE_COMPUTE_BIT)
 	{
 		// Create compute pipeline.
 		auto compShaderModule = createShaderModule(vkdi, device, m_context.getBinaryCollection().get("comp"));

 		const vk::VkComputePipelineCreateInfo computeCreateInfo =
 		{
 			vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// VkStructureType                    sType;
 			DE_NULL,											// const void*                        pNext;
 			0,													// VkPipelineCreateFlags              flags;
 			{													// VkPipelineShaderStageCreateInfo    stage;
 				vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType                     sType;
 				DE_NULL,													// const void*                         pNext;
 				0,															// VkPipelineShaderStageCreateFlags    flags;
 				vk::VK_SHADER_STAGE_COMPUTE_BIT,							// VkShaderStageFlagBits               stage;
 				*compShaderModule,											// VkShaderModule                      module;
 				"main",														// const char*                         pName;
 				DE_NULL,													// const VkSpecializationInfo*         pSpecializationInfo;
 			},
 			*pipelineLayout,									// VkPipelineLayout                   layout;
 			DE_NULL,											// VkPipeline                         basePipelineHandle;
 			0,													// int32_t                            basePipelineIndex;
 		};
 		auto computePipeline = vk::createComputePipeline(vkdi, device, DE_NULL, &computeCreateInfo);

 		// Run the shader.
 		vk::beginCommandBuffer(vkdi, *cmdBuffer);
 			vkdi.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
 			vkdi.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1u, &descriptorSet.get(), 0u, DE_NULL);
 			vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0u, DE_NULL, static_cast<deUint32>(hostToDevBarriers.size()), hostToDevBarriers.data(), 0u, DE_NULL);
 			vkdi.cmdDispatch(*cmdBuffer, 1u, 1u, 1u);
 			vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, 0, 0u, DE_NULL, static_cast<deUint32>(devToHostBarriers.size()), devToHostBarriers.data(), 0u, DE_NULL);
 		vk::endCommandBuffer(vkdi, *cmdBuffer);
 		vk::submitCommandsAndWait(vkdi, device, m_context.getUniversalQueue(), *cmdBuffer);
 	}
 	else if (m_params.stage == vk::VK_SHADER_STAGE_VERTEX_BIT	||
 			 m_params.stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT	)
 	{
 		const bool isFrag = (m_params.stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT);

 		// Create graphics pipeline.
 		auto												vertShaderModule = createShaderModule(vkdi, device, m_context.getBinaryCollection().get("vert"));
 		vk::Move<vk::VkShaderModule>						fragShaderModule;
 		std::vector<vk::VkPipelineShaderStageCreateInfo>	shaderStages;

 		const vk::VkPipelineShaderStageCreateInfo vertexStage =
 		{
 			vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType						sType;
 			DE_NULL,													// const void*							pNext;
 			0,															// VkPipelineShaderStageCreateFlags		flags;
 			vk::VK_SHADER_STAGE_VERTEX_BIT,								// VkShaderStageFlagBits				stage;
 			*vertShaderModule,											// VkShaderModule						module;
 			"main",														// const char*							pName;
 			DE_NULL,													// const VkSpecializationInfo*			pSpecializationInfo;
 		};
 		shaderStages.push_back(vertexStage);

 		if (isFrag)
 		{
 			fragShaderModule = createShaderModule(vkdi, device, m_context.getBinaryCollection().get("frag"));

 			const vk::VkPipelineShaderStageCreateInfo fragmentStage =
 			{
 				vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType						sType;
 				DE_NULL,													// const void*							pNext;
 				0,															// VkPipelineShaderStageCreateFlags		flags;
 				vk::VK_SHADER_STAGE_FRAGMENT_BIT,							// VkShaderStageFlagBits				stage;
 				*fragShaderModule,											// VkShaderModule						module;
 				"main",														// const char*							pName;
 				DE_NULL,													// const VkSpecializationInfo*			pSpecializationInfo;
 			};
 			shaderStages.push_back(fragmentStage);
 		}

         const vk::VkPipelineVertexInputStateCreateInfo vertexInputInfo =
 		{
 			vk::VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType								sType;
 			DE_NULL,														// const void*									pNext;
 			0,																// VkPipelineVertexInputStateCreateFlags		flags;
 			0u,																// deUint32										vertexBindingDescriptionCount;
 			DE_NULL,														// const VkVertexInputBindingDescription*		pVertexBindingDescriptions;
 			0u,																// deUint32										vertexAttributeDescriptionCount;
 			DE_NULL,														// const VkVertexInputAttributeDescription*		pVertexAttributeDescriptions;
 		};

         const vk::VkPipelineInputAssemblyStateCreateInfo inputAssembly =
 		{
 			vk::VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	// VkStructureType							sType;
 			DE_NULL,															// const void*								pNext;
 			0u,																	// VkPipelineInputAssemblyStateCreateFlags	flags;
 			vk::VK_PRIMITIVE_TOPOLOGY_POINT_LIST,								// VkPrimitiveTopology						topology;
 			VK_FALSE,															// VkBool32									primitiveRestartEnable;
 		};

 		const vk::VkPipelineRasterizationStateCreateInfo rasterizationState =
 		{
 			vk::VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,	// VkStructureType							sType;
 			DE_NULL,														// const void*								pNext;
 			0,																// VkPipelineRasterizationStateCreateFlags	flags;
 			VK_FALSE,														// VkBool32									depthClampEnable;
 			(isFrag ? VK_FALSE : VK_TRUE),									// VkBool32									rasterizerDiscardEnable;
 			vk::VK_POLYGON_MODE_FILL,										// VkPolygonMode							polygonMode;
 			vk::VK_CULL_MODE_NONE,											// VkCullModeFlags							cullMode;
 			vk::VK_FRONT_FACE_COUNTER_CLOCKWISE,							// VkFrontFace								frontFace;
 			VK_FALSE,														// VkBool32									depthBiasEnable;
 			0.0f,															// float									depthBiasConstantFactor;
 			0.0f,															// float									depthBiasClamp;
 			0.0f,															// float									depthBiasSlopeFactor;
 			1.0f,															// float									lineWidth;
 		};

 		const vk::VkSubpassDescription subpassDescription =
 		{
 			0,										// VkSubpassDescriptionFlags		flags;
 			vk::VK_PIPELINE_BIND_POINT_GRAPHICS,	// VkPipelineBindPoint				pipelineBindPoint;
 			0u,										// deUint32							inputAttachmentCount;
 			DE_NULL,								// const VkAttachmentReference*		pInputAttachments;
 			0u,										// deUint32							colorAttachmentCount;
 			DE_NULL,								// const VkAttachmentReference*		pColorAttachments;
 			DE_NULL,								// const VkAttachmentReference*		pResolveAttachments;
 			DE_NULL,								// const VkAttachmentReference*		pDepthStencilAttachment;
 			0u,										// deUint32							preserveAttachmentCount;
 			0u,										// const deUint32*					pPreserveAttachments;
 		};

 		const vk::VkRenderPassCreateInfo renderPassCreateInfo =
 		{
 			vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,	// VkStructureType					sType;
 			DE_NULL,										// const void*						pNext;
 			0,												// VkRenderPassCreateFlags			flags;
 			0u,												// deUint32							attachmentCount;
 			DE_NULL,										// const VkAttachmentDescription*	pAttachments;
 			1u,												// deUint32							subpassCount;
 			&subpassDescription,							// const VkSubpassDescription*		pSubpasses;
 			0u,												// deUint32							dependencyCount;
 			DE_NULL,										// const VkSubpassDependency*		pDependencies;
 		};
 		auto renderPass = vk::createRenderPass(vkdi, device, &renderPassCreateInfo);

 		std::unique_ptr<vk::VkPipelineMultisampleStateCreateInfo> multisampleState;
 		if (isFrag)
 		{
 			multisampleState.reset(new vk::VkPipelineMultisampleStateCreateInfo);
 			*multisampleState =
 			{
 				vk::VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	// VkStructureType							sType;
 				DE_NULL,														// const void*								pNext;
 				0,																// VkPipelineMultisampleStateCreateFlags	flags;
 				vk::VK_SAMPLE_COUNT_1_BIT,										// VkSampleCountFlagBits					rasterizationSamples;
 				VK_FALSE,														// VkBool32									sampleShadingEnable;
 				0.0f,															// float									minSampleShading;
 				DE_NULL,														// const VkSampleMask*						pSampleMask;
 				VK_FALSE,														// VkBool32									alphaToCoverageEnable;
 				VK_FALSE,														// VkBool32									alphaToOneEnable;
 			};
 		}

 		const vk::VkViewport viewport =
 		{
 			0.0f,	// float	x;
 			0.0f,	// float	y;
 			1.0f,	// float	width;
 			1.0f,	// float	height;
 			0.0f,	// float	minDepth;
 			1.0f,	// float	maxDepth;
 		};

 		const vk::VkRect2D renderArea = { { 0u, 0u }, { 1u, 1u } };

 		std::unique_ptr<vk::VkPipelineViewportStateCreateInfo> viewportState;

 		if (isFrag)
 		{
 			viewportState.reset(new vk::VkPipelineViewportStateCreateInfo);
 			*viewportState =
 			{
 				vk::VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,	// VkStructureType						sType;
 				DE_NULL,													// const void*							pNext;
 				0,															// VkPipelineViewportStateCreateFlags	flags;
 				1u,															// deUint32							viewportCount;
 				&viewport,													// const VkViewport*					pViewports;
 				1u,															// deUint32							scissorCount;
 				&renderArea,												// const VkRect2D*						pScissors;
 			};
 		}

 		const vk::VkGraphicsPipelineCreateInfo graphicsCreateInfo =
 		{
 			vk::VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,	// VkStructureType									sType;
 			DE_NULL,												// const void*										pNext;
 			0,														// VkPipelineCreateFlags							flags;
 			static_cast<deUint32>(shaderStages.size()),				// deUint32											stageCount;
 			shaderStages.data(),									// const VkPipelineShaderStageCreateInfo*			pStages;
 			&vertexInputInfo,										// const VkPipelineVertexInputStateCreateInfo*		pVertexInputState;
 			&inputAssembly,											// const VkPipelineInputAssemblyStateCreateInfo*	pInputAssemblyState;
 			DE_NULL,												// const VkPipelineTessellationStateCreateInfo*		pTessellationState;
 			viewportState.get(),									// const VkPipelineViewportStateCreateInfo*			pViewportState;
 			&rasterizationState,									// const VkPipelineRasterizationStateCreateInfo*	pRasterizationState;
 			multisampleState.get(),									// const VkPipelineMultisampleStateCreateInfo*		pMultisampleState;
 			DE_NULL,												// const VkPipelineDepthStencilStateCreateInfo*		pDepthStencilState;
 			DE_NULL,												// const VkPipelineColorBlendStateCreateInfo*		pColorBlendState;
 			DE_NULL,												// const VkPipelineDynamicStateCreateInfo*			pDynamicState;
 			*pipelineLayout,										// VkPipelineLayout									layout;
 			*renderPass,											// VkRenderPass										renderPass;
 			0u,														// deUint32											subpass;
 			DE_NULL,												// VkPipeline										basePipelineHandle;
 			0u,														// deInt32											basePipelineIndex;
 		};
 		auto graphicsPipeline = vk::createGraphicsPipeline(vkdi, device, DE_NULL, &graphicsCreateInfo);

 		const vk::VkFramebufferCreateInfo frameBufferCreateInfo =
 		{
 			vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,	// VkStructureType				sType;
 			DE_NULL,										// const void*					pNext;
 			0,												// VkFramebufferCreateFlags		flags;
 			*renderPass,									// VkRenderPass					renderPass;
 			0u,												// deUint32						attachmentCount;
 			DE_NULL,										// const VkImageView*			pAttachments;
 			1u,												// deUint32						width;
 			1u,												// deUint32						height;
 			1u,												// deUint32						layers;
 		};
 		auto frameBuffer = vk::createFramebuffer(vkdi, device, &frameBufferCreateInfo);

 		const vk::VkRenderPassBeginInfo renderPassBeginInfo =
 		{
 			vk::VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,	// VkStructureType		sType;
 			DE_NULL,										// const void*			pNext;
 			*renderPass,									// VkRenderPass			renderPass;
 			*frameBuffer,									// VkFramebuffer		framebuffer;
 			renderArea,										// VkRect2D				renderArea;
 			0u,												// deUint32				clearValueCount;
 			DE_NULL,										// const VkClearValue*	pClearValues;
 		};

 		// Run the shader.
 		vk::VkPipelineStageFlags pipelineStage = (isFrag ? vk::VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT : vk::VK_PIPELINE_STAGE_VERTEX_SHADER_BIT);

 		vk::beginCommandBuffer(vkdi, *cmdBuffer);
 			vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, pipelineStage, 0, 0u, DE_NULL, static_cast<deUint32>(hostToDevBarriers.size()), hostToDevBarriers.data(), 0u, DE_NULL);
 			vkdi.cmdBeginRenderPass(*cmdBuffer, &renderPassBeginInfo, vk::VK_SUBPASS_CONTENTS_INLINE);
 				vkdi.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
 				vkdi.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0, 1u, &descriptorSet.get(), 0u, DE_NULL);
 				vkdi.cmdDraw(*cmdBuffer, 1u, 1u, 0u, 0u);
 			vkdi.cmdEndRenderPass(*cmdBuffer);
 			vkdi.cmdPipelineBarrier(*cmdBuffer, pipelineStage, vk::VK_PIPELINE_STAGE_HOST_BIT, 0, 0u, DE_NULL, static_cast<deUint32>(devToHostBarriers.size()), devToHostBarriers.data(), 0u, DE_NULL);
 		vk::endCommandBuffer(vkdi, *cmdBuffer);
 		vk::submitCommandsAndWait(vkdi, device, m_context.getUniversalQueue(), *cmdBuffer);
 	}

 	// Invalidate allocations.
 	vk::invalidateAlloc(vkdi, device, *input1.allocation);
 	vk::invalidateAlloc(vkdi, device, *input2.allocation);
 	vk::invalidateAlloc(vkdi, device, *output1.allocation);

 	// Read and verify results.
 	std::vector<int> results(m_numOperations);
 	deMemcpy(results.data(), output1.allocation->getHostPtr(), m_outputBufferSize);
 	for (size_t i = 0; i < m_numOperations; ++i)
 	{
 		int expected = static_cast<int>(m_params.operation.run(m_params.operands[i].first, m_params.operands[i].second));
 		if (results[i] != expected)
 		{
 			std::ostringstream msg;
 			msg << "Invalid result found in position " << i << ": expected " << expected << " and found " << results[i];
 			return tcu::TestStatus::fail(msg.str());
 		}
 	}

 	return tcu::TestStatus::pass("Pass");
 }

 template <class T>
 class T64bitCompareTest : public TestCase
 {
 public:
 							T64bitCompareTest	(tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParameters<T>& params);
 	virtual void			checkSupport		(Context& context) const;
 	virtual void			initPrograms		(vk::SourceCollections& programCollection) const;
 	virtual TestInstance*	createInstance		(Context& ctx) const;

 private:
 	const TestParameters<T>	m_params;
 };

 template <class T>
 T64bitCompareTest<T>::T64bitCompareTest (tcu::TestContext& testCtx, const std::string& name, const std::string& description, const TestParameters<T>& params)
 	: TestCase(testCtx, name, description), m_params(params)
 {
 	// This is needed so that the same operands can be used for single-element comparisons or for vectorized comparisons (which use *vec4 types).
 	DE_ASSERT(m_params.operands.size() % 4 == 0);
 }

 // This template checks the needed type support features in shaders for type T.
 // Specializations are provided below.
 template <class T>
 void checkTypeSupport(const vk::VkPhysicalDeviceFeatures& features);

 template <>
 void checkTypeSupport<double>(const vk::VkPhysicalDeviceFeatures& features)
 {
 	if (!features.shaderFloat64)
 		TCU_THROW(NotSupportedError, "64-bit floats not supported in shaders");
 }

 void check64bitIntegers(const vk::VkPhysicalDeviceFeatures& features)
 {
 	if (!features.shaderInt64)
 		TCU_THROW(NotSupportedError, "64-bit integer types not supported in shaders");
 }

 template <>
 void checkTypeSupport<deInt64>(const vk::VkPhysicalDeviceFeatures& features)
 {
 	check64bitIntegers(features);
 }

 template <>
 void checkTypeSupport<deUint64>(const vk::VkPhysicalDeviceFeatures& features)
 {
 	check64bitIntegers(features);
 }

 template <class T>
 void T64bitCompareTest<T>::checkSupport (Context& context) const
 {
 	auto&	vki				= context.getInstanceInterface();
 	auto	physicalDevice	= context.getPhysicalDevice();
 	auto	features		= vk::getPhysicalDeviceFeatures(vki, physicalDevice);

 	checkTypeSupport<T>(features);

 	switch (m_params.stage)
 	{
 	case vk::VK_SHADER_STAGE_COMPUTE_BIT:
 		break;
 	case vk::VK_SHADER_STAGE_VERTEX_BIT:
 		if (!features.vertexPipelineStoresAndAtomics)
 			TCU_THROW(NotSupportedError, "Vertex shader does not support stores");
 		break;
 	case vk::VK_SHADER_STAGE_FRAGMENT_BIT:
 		if (!features.fragmentStoresAndAtomics)
 			TCU_THROW(NotSupportedError, "Fragment shader does not support stores");
 		break;
 	default:
 		DE_ASSERT(DE_NULL == "Invalid shader stage specified");
 	}
 }

 template <class T>
 void T64bitCompareTest<T>::initPrograms (vk::SourceCollections& programCollection) const
 {
 	DE_ASSERT(m_params.stage == vk::VK_SHADER_STAGE_COMPUTE_BIT		||
 			  m_params.stage == vk::VK_SHADER_STAGE_VERTEX_BIT		||
 			  m_params.stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT	);

 	std::map<std::string, std::string> replacements;
 	replacements["ITERS"]			= de::toString((m_params.dataType == DATA_TYPE_SINGLE) ? m_params.operands.size() : m_params.operands.size() / 4);
 	replacements["OPNAME"]			= m_params.operation.spirvName();
 	replacements["OPCAPABILITY"]	= SpirvTemplateManager::getOpCapability<T>();
 	replacements["OPTYPE"]			= SpirvTemplateManager::getOpType<T>();

 	static const std::map<vk::VkShaderStageFlagBits, std::string>	sourceNames			=
 	{
 		std::make_pair( vk::VK_SHADER_STAGE_COMPUTE_BIT,	"comp"	),
 		std::make_pair( vk::VK_SHADER_STAGE_VERTEX_BIT,		"vert"	),
 		std::make_pair( vk::VK_SHADER_STAGE_FRAGMENT_BIT,	"frag"	),
 	};

 	// Add the proper template under the proper name.
 	programCollection.spirvAsmSources.add(sourceNames.find(m_params.stage)->second) << SpirvTemplateManager::getTemplate(m_params.dataType, m_params.stage).specialize(replacements);

 	// Add the passthrough vertex shader needed for the fragment shader.
 	if (m_params.stage == vk::VK_SHADER_STAGE_FRAGMENT_BIT)
 		programCollection.glslSources.add("vert") << glu::VertexSource(VertShaderPassThrough);
 }

 template <class T>
 TestInstance* T64bitCompareTest<T>::createInstance (Context& ctx) const
 {
 	return new T64bitCompareTestInstance<T>(ctx, m_params);
 }

 const std::map<DataType, std::string> dataTypeName =
 {
 	std::make_pair(DATA_TYPE_SINGLE, "single"),
 	std::make_pair(DATA_TYPE_VECTOR, "vector"),
 };

 using StageName = std::map<vk::VkShaderStageFlagBits, std::string>;

 void createDoubleCompareTestsInGroup (tcu::TestCaseGroup* tests, const StageName* stageNames)
 {
 	static const std::vector<const CompareOperation<double>*> operationList =
 	{
 		// Ordered operations.
 		&FOrderedEqualOp,
 		&FOrderedNotEqualOp,
 		&FOrderedLessThanOp,
 		&FOrderedLessThanEqualOp,
 		&FOrderedGreaterThanOp,
 		&FOrderedGreaterThanEqualOp,
 		// Unordered operations.
 		&FUnorderedEqualOp,
 		&FUnorderedNotEqualOp,
 		&FUnorderedLessThanOp,
 		&FUnorderedLessThanEqualOp,
 		&FUnorderedGreaterThanOp,
 		&FUnorderedGreaterThanEqualOp,
 	};

 	for (const auto&	stageNamePair	: *stageNames)
 	for (const auto&	typeNamePair	: dataTypeName)
 	for (const auto		opPtr			: operationList)
 	{
 		TestParameters<double>	params		= { typeNamePair.first, *opPtr, stageNamePair.first, DOUBLE_OPERANDS };
 		std::string				testName	= stageNamePair.second + "_" + de::toLower(opPtr->spirvName()) + "_" + typeNamePair.second;
 		tests->addChild(new T64bitCompareTest<double>(tests->getTestContext(), testName, "", params));
 	}
 }

 void createInt64CompareTestsInGroup (tcu::TestCaseGroup* tests, const StageName* stageNames)
 {
 	static const std::vector<const CompareOperation<deInt64>*> operationList =
 	{
 		&deInt64EqualOp,
 		&deInt64NotEqualOp,
 		&deInt64LessThanOp,
 		&deInt64LessThanEqualOp,
 		&deInt64GreaterThanOp,
 		&deInt64GreaterThanEqualOp,
 	};

 	for (const auto&	stageNamePair	: *stageNames)
 	for (const auto&	typeNamePair	: dataTypeName)
 	for (const auto		opPtr			: operationList)
 	{
 		TestParameters<deInt64>	params		= { typeNamePair.first, *opPtr, stageNamePair.first, INT64_OPERANDS };
 		std::string				testName	= stageNamePair.second + "_" + de::toLower(opPtr->spirvName()) + "_" + typeNamePair.second;
 		tests->addChild(new T64bitCompareTest<deInt64>(tests->getTestContext(), testName, "", params));
 	}
 }

 void createUint64CompareTestsInGroup (tcu::TestCaseGroup* tests, const StageName* stageNames)
 {
 	static const std::vector<const CompareOperation<deUint64>*> operationList =
 	{
 		&deUint64EqualOp,
 		&deUint64NotEqualOp,
 		&deUint64LessThanOp,
 		&deUint64LessThanEqualOp,
 		&deUint64GreaterThanOp,
 		&deUint64GreaterThanEqualOp,
 	};

 	for (const auto&	stageNamePair	: *stageNames)
 	for (const auto&	typeNamePair	: dataTypeName)
 	for (const auto		opPtr			: operationList)
 	{
 		TestParameters<deUint64>	params		= { typeNamePair.first, *opPtr, stageNamePair.first, UINT64_OPERANDS };
 		std::string					testName	= stageNamePair.second + "_" + de::toLower(opPtr->spirvName()) + "_" + typeNamePair.second;
 		tests->addChild(new T64bitCompareTest<deUint64>(tests->getTestContext(), testName, "", params));
 	}
 }

 struct TestMgr
 {
 	typedef void (*CreationFunctionPtr)(tcu::TestCaseGroup*, const StageName*);

 	static const char* getParentGroupName () { return "64bit_compare"; }
 	static const char* getParentGroupDesc () { return "64-bit type comparison operations"; }

 	template <class T>
 	static std::string getGroupName ();

 	template <class T>
 	static std::string getGroupDesc ();

 	template <class T>
 	static CreationFunctionPtr getCreationFunction ();
 };

 template <> std::string TestMgr::getGroupName<double>()		{ return "double";	}
 template <> std::string TestMgr::getGroupName<deInt64>()	{ return "int64";	}
 template <> std::string TestMgr::getGroupName<deUint64>()	{ return "uint64";	}

 template <> std::string TestMgr::getGroupDesc<double>()		{ return "64-bit floating point tests";		}
 template <> std::string TestMgr::getGroupDesc<deInt64>()	{ return "64-bit signed integer tests";		}
 template <> std::string TestMgr::getGroupDesc<deUint64>()	{ return "64-bit unsigned integer tests";	}

 template <> TestMgr::CreationFunctionPtr TestMgr::getCreationFunction<double> ()	{ return createDoubleCompareTestsInGroup;	}
 template <> TestMgr::CreationFunctionPtr TestMgr::getCreationFunction<deInt64> ()	{ return createInt64CompareTestsInGroup;	}
 template <> TestMgr::CreationFunctionPtr TestMgr::getCreationFunction<deUint64> ()	{ return createUint64CompareTestsInGroup;	}

 } // anonymous

 tcu::TestCaseGroup* create64bitCompareGraphicsGroup (tcu::TestContext& testCtx)
 {
 	static const StageName graphicStages =
 	{
 		std::make_pair(vk::VK_SHADER_STAGE_VERTEX_BIT,		"vert"),
 		std::make_pair(vk::VK_SHADER_STAGE_FRAGMENT_BIT,	"frag"),
 	};

 	tcu::TestCaseGroup* newGroup = new tcu::TestCaseGroup(testCtx, TestMgr::getParentGroupName(), TestMgr::getParentGroupDesc());
 	newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<double>(),	TestMgr::getGroupDesc<double>(),	TestMgr::getCreationFunction<double>(),		&graphicStages));
 	newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<deInt64>(),	TestMgr::getGroupDesc<deInt64>(),	TestMgr::getCreationFunction<deInt64>(),	&graphicStages));
 	newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<deUint64>(),	TestMgr::getGroupDesc<deUint64>(),	TestMgr::getCreationFunction<deUint64>(),	&graphicStages));
 	return newGroup;
 }

 tcu::TestCaseGroup* create64bitCompareComputeGroup (tcu::TestContext& testCtx)
 {
 	static const StageName computeStages =
 	{
 		std::make_pair(vk::VK_SHADER_STAGE_COMPUTE_BIT,		"comp"),
 	};

 	tcu::TestCaseGroup* newGroup = new tcu::TestCaseGroup(testCtx, TestMgr::getParentGroupName(), TestMgr::getParentGroupDesc());
 	newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<double>(),	TestMgr::getGroupDesc<double>(),	TestMgr::getCreationFunction<double>(),		&computeStages));
 	newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<deInt64>(),	TestMgr::getGroupDesc<deInt64>(),	TestMgr::getCreationFunction<deInt64>(),	&computeStages));
 	newGroup->addChild(createTestGroup(testCtx, TestMgr::getGroupName<deUint64>(),	TestMgr::getGroupDesc<deUint64>(),	TestMgr::getCreationFunction<deUint64>(),	&computeStages));
 	return newGroup;
 }

 } // SpirVAssembly
 } // vkt