Added conformance tests for 1.3 version of the spec
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6a6caac..29518e1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-# 
+#
 
 # Copyright (c) 2012-2017 The Khronos Group Inc.
 #
@@ -51,6 +51,35 @@
 option( OPENVX_USE_IX ON )
 option( OPENVX_USE_NN ON )
 option( OPENVX_USE_NN_16 ON )
+option( OPENVX_USE_PIPELINING OFF )
+option( OPENVX_USE_STREAMING OFF )
+option( OPENVX_USE_USER_DATA_OBJECT OFF )
+option( OPENVX_USE_U1 ON )
+option( OPENVX_CONFORMANCE_VISION ON )
+option( OPENVX_CONFORMANCE_NEURAL_NETWORKS ON )
+option( OPENVX_CONFORMANCE_NNEF_IMPORT ON)
+option( OPENVX_USE_ENHANCED_VISION ON )
+
+
+message( "-- Selected OpenVX Feature Sets:")
+if (OPENVX_CONFORMANCE_VISION)
+    add_definitions( -DOPENVX_CONFORMANCE_VISION )
+    message( "-- 'Vision' conformance feature set")
+endif (OPENVX_CONFORMANCE_VISION)
+if (OPENVX_CONFORMANCE_NEURAL_NETWORKS)
+    add_definitions( -DOPENVX_CONFORMANCE_NEURAL_NETWORKS -DOPENVX_USE_NN )
+    message( "-- 'Neural Networks' conformance feature set")
+endif (OPENVX_CONFORMANCE_NEURAL_NETWORKS)
+if (OPENVX_CONFORMANCE_NNEF_IMPORT)
+    add_definitions( -DOPENVX_CONFORMANCE_NNEF_IMPORT )
+    message( "-- 'NNEF Import' conformance feature set")
+endif (OPENVX_CONFORMANCE_NNEF_IMPORT)
+if (OPENVX_USE_ENHANCED_VISION)
+    add_definitions( -DOPENVX_USE_ENHANCED_VISION )
+    message( "-- 'Enhanced Vision' feature set")
+endif (OPENVX_USE_ENHANCED_VISION)
+message( "--" )
+
 
 message( "-- Selected OpenVX Extensions:")
 if (OPENVX_USE_IX)
@@ -58,13 +87,29 @@
     message( "--    Offical Import-Export extension")
 endif (OPENVX_USE_IX)
 if (OPENVX_USE_NN)
-    add_definitions( -DOPENVX_USE_NN )
+    add_definitions( -DOPENVX_USE_NN -DOPENVX_CONFORMANCE_NEURAL_NETWORKS )
     message( "--    Offical NeuralNet extension")
 endif (OPENVX_USE_NN)
 if (OPENVX_USE_NN_16)
     add_definitions( -DOPENVX_USE_NN_16 )
     message( "--    Offical NeuralNet 16 bits extension")
 endif (OPENVX_USE_NN_16)
+if (OPENVX_USE_PIPELINING)
+    add_definitions( -DOPENVX_USE_PIPELINING )
+    message( "--    Offical Pipelining extension")
+endif (OPENVX_USE_PIPELINING)
+if (OPENVX_USE_STREAMING)
+    add_definitions( -DOPENVX_USE_STREAMING )
+    message( "--    Offical Streaming extension")
+endif (OPENVX_USE_STREAMING)
+if (OPENVX_USE_USER_DATA_OBJECT)
+    add_definitions( -DOPENVX_USE_USER_DATA_OBJECT )
+    message( "--    Offical User Data Object extension")
+endif (OPENVX_USE_USER_DATA_OBJECT)
+if (OPENVX_USE_U1)
+    add_definitions( -DOPENVX_USE_U1 )
+    message( "--    Offical binary (1 bit) feature set")
+endif (OPENVX_USE_U1)
 message( "--" )
 
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
diff --git a/README b/README
index 63d110b..c89cc9d 100644
--- a/README
+++ b/README
@@ -13,6 +13,8 @@
 
 * OPENVX_USE_NN      - Compile neural network test case or not.
 
+* OPENVX_USE_U1      - Compile binary image test cases or not.
+
 * OPENVX_LIBRARIES   - semicolon separated list of shared/static libraries to link
                        with. If there are link order dependencies between your
                        libraries, then the libraries have to be specified in
@@ -41,8 +43,8 @@
     cmake --build .
 
 
-Use the following commands to build the test suite with import-export and
-neural network KHR extensions activated:
+Use the following commands to build the test suite with import-export, neural
+network and binary image KHR extensions activated:
 
     export OPENVX_DIR=<path to prebuilt OpenVX>
     cd <build dir>
@@ -51,6 +53,7 @@
         -DOPENVX_LIBRARIES=$OPENVX_DIR/lib/libopenvx.so\;$OPENVX_DIR/lib/libvxu.so\;pthread\;dl\;m\;rt \
         -DOPENVX_USE_IX=ON \
         -DOPENVX_USE_NN=ON \
+        -DOPENVX_USE_U1=ON \
         ../conformance_tests/
     cmake --build .
 
diff --git a/openvx_cts_version.inc b/openvx_cts_version.inc
index 9eeb118..ced3a22 100644
--- a/openvx_cts_version.inc
+++ b/openvx_cts_version.inc
@@ -1,2 +1,2 @@
 // <Specification version>-<package release date:YYYYMMDD>
-#define VERSION_STR "1.2"
+#define VERSION_STR "1.3"
diff --git a/test_conformance/Networks/src/graph_alexnet.c b/test_conformance/Networks/src/graph_alexnet.c
index 870a0c8..92eb3ff 100644
--- a/test_conformance/Networks/src/graph_alexnet.c
+++ b/test_conformance/Networks/src/graph_alexnet.c
@@ -1,13 +1,14 @@
 /** @file graph.h
- *  @brief 
+ *  @brief
  *  This file contains the implementation of the generated graph factory function
  */
 
-#ifdef OPENVX_USE_NN
+#ifdef OPENVX_CONFORMANCE_NEURAL_NETWORKS
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <VX/vx_khr_nn.h>
+#include <VX/vx_compatibility.h>  // for vxNormalizeLayer
 #include "graph.h"
 
 
@@ -171,19 +172,12 @@
 
 static vx_status Graph(vx_context context, vx_graph graph, ObjectRefContainerType* pObjectContainer, char* filteredNodeList[], size_t filteredNodeCount, vx_tensor org_khronos_nn_extension_convolution_layer_0_p0, vx_tensor org_khronos_nn_extension_convolution_layer_0_p1, vx_tensor org_khronos_nn_extension_convolution_layer_0_p2, vx_scalar org_khronos_nn_extension_convolution_layer_0_p3, vx_scalar org_khronos_nn_extension_convolution_layer_0_p4, vx_scalar org_khronos_nn_extension_convolution_layer_0_p5, vx_scalar org_khronos_nn_extension_convolution_layer_0_p6, vx_scalar org_khronos_nn_extension_convolution_layer_0_p7, vx_tensor org_khronos_nn_extension_convolution_layer_0_p8)
 {
-    vx_status status = VX_SUCCESS;    
+    vx_status status = VX_SUCCESS;
 
     //
     // Kernel Declarations
     //
 
-    vx_kernel org_khronos_nn_extension_convolution_layer_Kernel;
-    vx_kernel org_khronos_nn_extension_activation_layer_Kernel;
-    vx_kernel org_khronos_nn_extension_normalization_layer_Kernel;
-    vx_kernel org_khronos_nn_extension_pooling_layer_Kernel;
-    vx_kernel org_khronos_nn_extension_fully_connected_layer_Kernel;
-    vx_kernel com_cnn_helpers_scalemddata_Kernel;
-    vx_kernel org_khronos_nn_extension_softmax_layer_Kernel;
 
     //
     // Node Declarations
@@ -212,7 +206,6 @@
     vx_node org_khronos_nn_extension_fully_connected_layer_1;
     vx_node org_khronos_nn_extension_activation_layer_6;
     vx_node org_khronos_nn_extension_fully_connected_layer_2;
-    vx_node com_cnn_helpers_scalemddata_0;
     vx_node org_khronos_nn_extension_softmax_layer_0;
 
     //
@@ -557,7 +550,7 @@
     // Primitive Assignments
     //
     outputAllocators_MergeTensor_2_p0 = vxCreateTensor(context, 4, outputAllocators_MergeTensor_2_p0Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)outputAllocators_MergeTensor_2_p0);
     if(status != VX_SUCCESS)
     {
@@ -567,7 +560,7 @@
     AddVXObject(pObjectContainer, (vx_reference)outputAllocators_MergeTensor_2_p0, VX_TYPE_TENSOR, "relu5_0");
 
     outputAllocators_MergeTensor_1_p0 = vxCreateTensor(context, 4, outputAllocators_MergeTensor_1_p0Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)outputAllocators_MergeTensor_1_p0);
     if(status != VX_SUCCESS)
     {
@@ -577,7 +570,7 @@
     AddVXObject(pObjectContainer, (vx_reference)outputAllocators_MergeTensor_1_p0, VX_TYPE_TENSOR, "relu4_0");
 
     outputAllocators_MergeTensor_0_p0 = vxCreateTensor(context, 4, outputAllocators_MergeTensor_0_p0Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)outputAllocators_MergeTensor_0_p0);
     if(status != VX_SUCCESS)
     {
@@ -586,7 +579,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)outputAllocators_MergeTensor_0_p0, VX_TYPE_TENSOR, "relu2_0");
 
-    org_khronos_nn_extension_activation_layer_0_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_activation_layer_0_scalar_p1);    
+    org_khronos_nn_extension_activation_layer_0_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_activation_layer_0_scalar_p1);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_0_p1);
     if(status != VX_SUCCESS)
     {
@@ -595,7 +588,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_0_p1, VX_TYPE_SCALAR, "relu1_1");
 
-    org_khronos_nn_extension_activation_layer_0_p2 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_0_scalar_p2);    
+    org_khronos_nn_extension_activation_layer_0_p2 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_0_scalar_p2);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_0_p2);
     if(status != VX_SUCCESS)
     {
@@ -604,7 +597,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_0_p2, VX_TYPE_SCALAR, "relu1_2");
 
-    org_khronos_nn_extension_activation_layer_0_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_0_scalar_p3);    
+    org_khronos_nn_extension_activation_layer_0_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_0_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_0_p3);
     if(status != VX_SUCCESS)
     {
@@ -614,7 +607,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_0_p3, VX_TYPE_SCALAR, "relu1_2");
 
     org_khronos_nn_extension_activation_layer_0_p4 = vxCreateTensor(context, 4, org_khronos_nn_extension_activation_layer_0_p4Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_0_p4);
     if(status != VX_SUCCESS)
     {
@@ -623,7 +616,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_0_p4, VX_TYPE_TENSOR, "relu1_4");
 
-    org_khronos_nn_extension_normalization_layer_0_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_normalization_layer_0_scalar_p1);    
+    org_khronos_nn_extension_normalization_layer_0_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_normalization_layer_0_scalar_p1);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_normalization_layer_0_p1);
     if(status != VX_SUCCESS)
     {
@@ -632,7 +625,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_normalization_layer_0_p1, VX_TYPE_SCALAR, "norm1_1");
 
-    org_khronos_nn_extension_normalization_layer_0_p2 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_normalization_layer_0_scalar_p2);    
+    org_khronos_nn_extension_normalization_layer_0_p2 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_normalization_layer_0_scalar_p2);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_normalization_layer_0_p2);
     if(status != VX_SUCCESS)
     {
@@ -641,7 +634,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_normalization_layer_0_p2, VX_TYPE_SCALAR, "norm1_2");
 
-    org_khronos_nn_extension_normalization_layer_0_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_normalization_layer_0_scalar_p3);    
+    org_khronos_nn_extension_normalization_layer_0_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_normalization_layer_0_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_normalization_layer_0_p3);
     if(status != VX_SUCCESS)
     {
@@ -650,7 +643,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_normalization_layer_0_p3, VX_TYPE_SCALAR, "norm1_3");
 
-    org_khronos_nn_extension_normalization_layer_0_p4 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_normalization_layer_0_scalar_p4);    
+    org_khronos_nn_extension_normalization_layer_0_p4 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_normalization_layer_0_scalar_p4);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_normalization_layer_0_p4);
     if(status != VX_SUCCESS)
     {
@@ -660,7 +653,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_normalization_layer_0_p4, VX_TYPE_SCALAR, "norm1_4");
 
     org_khronos_nn_extension_normalization_layer_0_p5 = vxCreateTensor(context, 4, org_khronos_nn_extension_normalization_layer_0_p5Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_normalization_layer_0_p5);
     if(status != VX_SUCCESS)
     {
@@ -669,7 +662,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_normalization_layer_0_p5, VX_TYPE_TENSOR, "norm1_5");
 
-    org_khronos_nn_extension_pooling_layer_0_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_pooling_layer_0_scalar_p1);    
+    org_khronos_nn_extension_pooling_layer_0_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_pooling_layer_0_scalar_p1);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_0_p1);
     if(status != VX_SUCCESS)
     {
@@ -678,7 +671,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_0_p1, VX_TYPE_SCALAR, "pool1_1");
 
-    org_khronos_nn_extension_pooling_layer_0_p2 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_0_scalar_p2);    
+    org_khronos_nn_extension_pooling_layer_0_p2 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_0_scalar_p2);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_0_p2);
     if(status != VX_SUCCESS)
     {
@@ -687,7 +680,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_0_p2, VX_TYPE_SCALAR, "pool1_2");
 
-    org_khronos_nn_extension_pooling_layer_0_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_0_scalar_p3);    
+    org_khronos_nn_extension_pooling_layer_0_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_0_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_0_p3);
     if(status != VX_SUCCESS)
     {
@@ -696,7 +689,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_0_p3, VX_TYPE_SCALAR, "pool1_3");
 
-    org_khronos_nn_extension_pooling_layer_0_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_0_scalar_p4);    
+    org_khronos_nn_extension_pooling_layer_0_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_0_scalar_p4);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_0_p4);
     if(status != VX_SUCCESS)
     {
@@ -705,7 +698,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_0_p4, VX_TYPE_SCALAR, "pool1_4");
 
-    org_khronos_nn_extension_pooling_layer_0_p5 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_0_scalar_p5);    
+    org_khronos_nn_extension_pooling_layer_0_p5 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_0_scalar_p5);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_0_p5);
     if(status != VX_SUCCESS)
     {
@@ -714,7 +707,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_0_p5, VX_TYPE_SCALAR, "pool1_5");
 
-    org_khronos_nn_extension_pooling_layer_0_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_pooling_layer_0_scalar_p6);    
+    org_khronos_nn_extension_pooling_layer_0_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_pooling_layer_0_scalar_p6);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_0_p6);
     if(status != VX_SUCCESS)
     {
@@ -724,7 +717,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_0_p6, VX_TYPE_SCALAR, "pool1_6");
 
     org_khronos_nn_extension_pooling_layer_0_p7 = vxCreateTensor(context, 4, org_khronos_nn_extension_pooling_layer_0_p7Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_0_p7);
     if(status != VX_SUCCESS)
     {
@@ -733,7 +726,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_0_p7, VX_TYPE_TENSOR, "pool1_7");
 
-    outputAllocators_SplitTensor_0_p1 = vxCreateTensorFromView(org_khronos_nn_extension_pooling_layer_0_p7, 4, org_khronos_nn_extension_pooling_layer_0_p7_view1_view_start, org_khronos_nn_extension_pooling_layer_0_p7_view1_view_end);    
+    outputAllocators_SplitTensor_0_p1 = vxCreateTensorFromView(org_khronos_nn_extension_pooling_layer_0_p7, 4, org_khronos_nn_extension_pooling_layer_0_p7_view1_view_start, org_khronos_nn_extension_pooling_layer_0_p7_view1_view_end);
     status = vxGetStatus((vx_reference)outputAllocators_SplitTensor_0_p1);
     if(status != VX_SUCCESS)
     {
@@ -742,7 +735,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)outputAllocators_SplitTensor_0_p1, VX_TYPE_TENSOR, "conv2_0_0");
 
-    outputAllocators_SplitTensor_0_p2 = vxCreateTensorFromView(org_khronos_nn_extension_pooling_layer_0_p7, 4, org_khronos_nn_extension_pooling_layer_0_p7_view2_view_start, org_khronos_nn_extension_pooling_layer_0_p7_view2_view_end);    
+    outputAllocators_SplitTensor_0_p2 = vxCreateTensorFromView(org_khronos_nn_extension_pooling_layer_0_p7, 4, org_khronos_nn_extension_pooling_layer_0_p7_view2_view_start, org_khronos_nn_extension_pooling_layer_0_p7_view2_view_end);
     status = vxGetStatus((vx_reference)outputAllocators_SplitTensor_0_p2);
     if(status != VX_SUCCESS)
     {
@@ -752,7 +745,7 @@
     AddVXObject(pObjectContainer, (vx_reference)outputAllocators_SplitTensor_0_p2, VX_TYPE_TENSOR, "conv2_1_0");
 
     org_khronos_nn_extension_convolution_layer_2_p1 = vxCreateTensor(context, 4, org_khronos_nn_extension_convolution_layer_2_p1Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_2_p1);
     if(status != VX_SUCCESS)
     {
@@ -762,7 +755,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_2_p1, VX_TYPE_TENSOR, "conv2_0_weights");
 
     org_khronos_nn_extension_convolution_layer_2_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_convolution_layer_2_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_2_p2);
     if(status != VX_SUCCESS)
     {
@@ -771,7 +764,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_2_p2, VX_TYPE_TENSOR, "conv2_0_bias");
 
-    org_khronos_nn_extension_convolution_layer_2_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_2_scalar_p3);    
+    org_khronos_nn_extension_convolution_layer_2_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_2_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_2_p3);
     if(status != VX_SUCCESS)
     {
@@ -780,7 +773,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_2_p3, VX_TYPE_SCALAR, "conv2_0_3");
 
-    org_khronos_nn_extension_convolution_layer_2_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_2_scalar_p4);    
+    org_khronos_nn_extension_convolution_layer_2_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_2_scalar_p4);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_2_p4);
     if(status != VX_SUCCESS)
     {
@@ -789,7 +782,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_2_p4, VX_TYPE_SCALAR, "conv2_0_4");
 
-    org_khronos_nn_extension_convolution_layer_2_p5 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_2_scalar_p5);    
+    org_khronos_nn_extension_convolution_layer_2_p5 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_2_scalar_p5);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_2_p5);
     if(status != VX_SUCCESS)
     {
@@ -798,7 +791,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_2_p5, VX_TYPE_SCALAR, "conv2_0_5");
 
-    org_khronos_nn_extension_convolution_layer_2_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_2_scalar_p6);    
+    org_khronos_nn_extension_convolution_layer_2_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_2_scalar_p6);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_2_p6);
     if(status != VX_SUCCESS)
     {
@@ -807,7 +800,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_2_p6, VX_TYPE_SCALAR, "conv2_0_6");
 
-    org_khronos_nn_extension_convolution_layer_2_p7 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_2_scalar_p7);    
+    org_khronos_nn_extension_convolution_layer_2_p7 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_2_scalar_p7);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_2_p7);
     if(status != VX_SUCCESS)
     {
@@ -816,7 +809,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_2_p7, VX_TYPE_SCALAR, "conv2_0_7");
 
-    org_khronos_nn_extension_convolution_layer_2_p8 = vxCreateTensorFromView(outputAllocators_MergeTensor_0_p0, 4, org_khronos_nn_extension_convolution_layer_2_p8_view_view_start, org_khronos_nn_extension_convolution_layer_2_p8_view_view_end);    
+    org_khronos_nn_extension_convolution_layer_2_p8 = vxCreateTensorFromView(outputAllocators_MergeTensor_0_p0, 4, org_khronos_nn_extension_convolution_layer_2_p8_view_view_start, org_khronos_nn_extension_convolution_layer_2_p8_view_view_end);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_2_p8);
     if(status != VX_SUCCESS)
     {
@@ -826,7 +819,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_2_p8, VX_TYPE_TENSOR, "conv2_0_8");
 
     org_khronos_nn_extension_convolution_layer_1_p1 = vxCreateTensor(context, 4, org_khronos_nn_extension_convolution_layer_1_p1Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_1_p1);
     if(status != VX_SUCCESS)
     {
@@ -836,7 +829,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_1_p1, VX_TYPE_TENSOR, "conv2_1_weights");
 
     org_khronos_nn_extension_convolution_layer_1_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_convolution_layer_1_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_1_p2);
     if(status != VX_SUCCESS)
     {
@@ -845,7 +838,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_1_p2, VX_TYPE_TENSOR, "conv2_1_bias");
 
-    org_khronos_nn_extension_convolution_layer_1_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_1_scalar_p3);    
+    org_khronos_nn_extension_convolution_layer_1_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_1_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_1_p3);
     if(status != VX_SUCCESS)
     {
@@ -854,7 +847,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_1_p3, VX_TYPE_SCALAR, "conv2_1_3");
 
-    org_khronos_nn_extension_convolution_layer_1_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_1_scalar_p4);    
+    org_khronos_nn_extension_convolution_layer_1_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_1_scalar_p4);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_1_p4);
     if(status != VX_SUCCESS)
     {
@@ -863,7 +856,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_1_p4, VX_TYPE_SCALAR, "conv2_1_4");
 
-    org_khronos_nn_extension_convolution_layer_1_p5 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_1_scalar_p5);    
+    org_khronos_nn_extension_convolution_layer_1_p5 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_1_scalar_p5);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_1_p5);
     if(status != VX_SUCCESS)
     {
@@ -872,7 +865,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_1_p5, VX_TYPE_SCALAR, "conv2_1_5");
 
-    org_khronos_nn_extension_convolution_layer_1_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_1_scalar_p6);    
+    org_khronos_nn_extension_convolution_layer_1_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_1_scalar_p6);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_1_p6);
     if(status != VX_SUCCESS)
     {
@@ -881,7 +874,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_1_p6, VX_TYPE_SCALAR, "conv2_1_6");
 
-    org_khronos_nn_extension_convolution_layer_1_p7 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_1_scalar_p7);    
+    org_khronos_nn_extension_convolution_layer_1_p7 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_1_scalar_p7);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_1_p7);
     if(status != VX_SUCCESS)
     {
@@ -890,7 +883,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_1_p7, VX_TYPE_SCALAR, "conv2_1_7");
 
-    org_khronos_nn_extension_convolution_layer_1_p8 = vxCreateTensorFromView(outputAllocators_MergeTensor_0_p0, 4, org_khronos_nn_extension_convolution_layer_1_p8_view_view_start, org_khronos_nn_extension_convolution_layer_1_p8_view_view_end);    
+    org_khronos_nn_extension_convolution_layer_1_p8 = vxCreateTensorFromView(outputAllocators_MergeTensor_0_p0, 4, org_khronos_nn_extension_convolution_layer_1_p8_view_view_start, org_khronos_nn_extension_convolution_layer_1_p8_view_view_end);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_1_p8);
     if(status != VX_SUCCESS)
     {
@@ -899,7 +892,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_1_p8, VX_TYPE_TENSOR, "conv2_1_8");
 
-    org_khronos_nn_extension_activation_layer_1_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_activation_layer_1_scalar_p1);    
+    org_khronos_nn_extension_activation_layer_1_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_activation_layer_1_scalar_p1);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_1_p1);
     if(status != VX_SUCCESS)
     {
@@ -908,7 +901,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_1_p1, VX_TYPE_SCALAR, "relu2_1");
 
-    org_khronos_nn_extension_activation_layer_1_p2 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_1_scalar_p2);    
+    org_khronos_nn_extension_activation_layer_1_p2 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_1_scalar_p2);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_1_p2);
     if(status != VX_SUCCESS)
     {
@@ -917,7 +910,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_1_p2, VX_TYPE_SCALAR, "relu2_2");
 
-    org_khronos_nn_extension_activation_layer_1_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_1_scalar_p3);    
+    org_khronos_nn_extension_activation_layer_1_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_1_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_1_p3);
     if(status != VX_SUCCESS)
     {
@@ -927,7 +920,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_1_p3, VX_TYPE_SCALAR, "relu2_2");
 
     org_khronos_nn_extension_activation_layer_1_p4 = vxCreateTensor(context, 4, org_khronos_nn_extension_activation_layer_1_p4Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_1_p4);
     if(status != VX_SUCCESS)
     {
@@ -936,7 +929,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_1_p4, VX_TYPE_TENSOR, "relu2_4");
 
-    org_khronos_nn_extension_normalization_layer_1_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_normalization_layer_1_scalar_p1);    
+    org_khronos_nn_extension_normalization_layer_1_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_normalization_layer_1_scalar_p1);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_normalization_layer_1_p1);
     if(status != VX_SUCCESS)
     {
@@ -945,7 +938,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_normalization_layer_1_p1, VX_TYPE_SCALAR, "norm2_1");
 
-    org_khronos_nn_extension_normalization_layer_1_p2 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_normalization_layer_1_scalar_p2);    
+    org_khronos_nn_extension_normalization_layer_1_p2 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_normalization_layer_1_scalar_p2);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_normalization_layer_1_p2);
     if(status != VX_SUCCESS)
     {
@@ -954,7 +947,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_normalization_layer_1_p2, VX_TYPE_SCALAR, "norm2_2");
 
-    org_khronos_nn_extension_normalization_layer_1_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_normalization_layer_1_scalar_p3);    
+    org_khronos_nn_extension_normalization_layer_1_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_normalization_layer_1_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_normalization_layer_1_p3);
     if(status != VX_SUCCESS)
     {
@@ -963,7 +956,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_normalization_layer_1_p3, VX_TYPE_SCALAR, "norm2_3");
 
-    org_khronos_nn_extension_normalization_layer_1_p4 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_normalization_layer_1_scalar_p4);    
+    org_khronos_nn_extension_normalization_layer_1_p4 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_normalization_layer_1_scalar_p4);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_normalization_layer_1_p4);
     if(status != VX_SUCCESS)
     {
@@ -973,7 +966,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_normalization_layer_1_p4, VX_TYPE_SCALAR, "norm2_4");
 
     org_khronos_nn_extension_normalization_layer_1_p5 = vxCreateTensor(context, 4, org_khronos_nn_extension_normalization_layer_1_p5Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_normalization_layer_1_p5);
     if(status != VX_SUCCESS)
     {
@@ -982,7 +975,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_normalization_layer_1_p5, VX_TYPE_TENSOR, "norm2_5");
 
-    org_khronos_nn_extension_pooling_layer_1_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_pooling_layer_1_scalar_p1);    
+    org_khronos_nn_extension_pooling_layer_1_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_pooling_layer_1_scalar_p1);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_1_p1);
     if(status != VX_SUCCESS)
     {
@@ -991,7 +984,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p1, VX_TYPE_SCALAR, "pool2_1");
 
-    org_khronos_nn_extension_pooling_layer_1_p2 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_1_scalar_p2);    
+    org_khronos_nn_extension_pooling_layer_1_p2 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_1_scalar_p2);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_1_p2);
     if(status != VX_SUCCESS)
     {
@@ -1000,7 +993,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p2, VX_TYPE_SCALAR, "pool2_2");
 
-    org_khronos_nn_extension_pooling_layer_1_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_1_scalar_p3);    
+    org_khronos_nn_extension_pooling_layer_1_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_1_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_1_p3);
     if(status != VX_SUCCESS)
     {
@@ -1009,7 +1002,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p3, VX_TYPE_SCALAR, "pool2_3");
 
-    org_khronos_nn_extension_pooling_layer_1_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_1_scalar_p4);    
+    org_khronos_nn_extension_pooling_layer_1_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_1_scalar_p4);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_1_p4);
     if(status != VX_SUCCESS)
     {
@@ -1018,7 +1011,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p4, VX_TYPE_SCALAR, "pool2_4");
 
-    org_khronos_nn_extension_pooling_layer_1_p5 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_1_scalar_p5);    
+    org_khronos_nn_extension_pooling_layer_1_p5 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_1_scalar_p5);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_1_p5);
     if(status != VX_SUCCESS)
     {
@@ -1027,7 +1020,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p5, VX_TYPE_SCALAR, "pool2_5");
 
-    org_khronos_nn_extension_pooling_layer_1_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_pooling_layer_1_scalar_p6);    
+    org_khronos_nn_extension_pooling_layer_1_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_pooling_layer_1_scalar_p6);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_1_p6);
     if(status != VX_SUCCESS)
     {
@@ -1037,7 +1030,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p6, VX_TYPE_SCALAR, "pool2_6");
 
     org_khronos_nn_extension_pooling_layer_1_p7 = vxCreateTensor(context, 4, org_khronos_nn_extension_pooling_layer_1_p7Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_1_p7);
     if(status != VX_SUCCESS)
     {
@@ -1047,7 +1040,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p7, VX_TYPE_TENSOR, "pool2_7");
 
     org_khronos_nn_extension_convolution_layer_3_p1 = vxCreateTensor(context, 4, org_khronos_nn_extension_convolution_layer_3_p1Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_3_p1);
     if(status != VX_SUCCESS)
     {
@@ -1057,7 +1050,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_3_p1, VX_TYPE_TENSOR, "conv3_weights");
 
     org_khronos_nn_extension_convolution_layer_3_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_convolution_layer_3_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_3_p2);
     if(status != VX_SUCCESS)
     {
@@ -1066,7 +1059,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_3_p2, VX_TYPE_TENSOR, "conv3_bias");
 
-    org_khronos_nn_extension_convolution_layer_3_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_3_scalar_p3);    
+    org_khronos_nn_extension_convolution_layer_3_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_3_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_3_p3);
     if(status != VX_SUCCESS)
     {
@@ -1075,7 +1068,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_3_p3, VX_TYPE_SCALAR, "conv3_3");
 
-    org_khronos_nn_extension_convolution_layer_3_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_3_scalar_p4);    
+    org_khronos_nn_extension_convolution_layer_3_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_3_scalar_p4);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_3_p4);
     if(status != VX_SUCCESS)
     {
@@ -1084,7 +1077,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_3_p4, VX_TYPE_SCALAR, "conv3_4");
 
-    org_khronos_nn_extension_convolution_layer_3_p5 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_3_scalar_p5);    
+    org_khronos_nn_extension_convolution_layer_3_p5 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_3_scalar_p5);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_3_p5);
     if(status != VX_SUCCESS)
     {
@@ -1093,7 +1086,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_3_p5, VX_TYPE_SCALAR, "conv3_5");
 
-    org_khronos_nn_extension_convolution_layer_3_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_3_scalar_p6);    
+    org_khronos_nn_extension_convolution_layer_3_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_3_scalar_p6);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_3_p6);
     if(status != VX_SUCCESS)
     {
@@ -1102,7 +1095,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_3_p6, VX_TYPE_SCALAR, "conv3_6");
 
-    org_khronos_nn_extension_convolution_layer_3_p7 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_3_scalar_p7);    
+    org_khronos_nn_extension_convolution_layer_3_p7 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_3_scalar_p7);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_3_p7);
     if(status != VX_SUCCESS)
     {
@@ -1112,7 +1105,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_3_p7, VX_TYPE_SCALAR, "conv3_7");
 
     org_khronos_nn_extension_convolution_layer_3_p8 = vxCreateTensor(context, 4, org_khronos_nn_extension_convolution_layer_3_p8Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_3_p8);
     if(status != VX_SUCCESS)
     {
@@ -1121,7 +1114,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_3_p8, VX_TYPE_TENSOR, "conv3_8");
 
-    org_khronos_nn_extension_activation_layer_2_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_activation_layer_2_scalar_p1);    
+    org_khronos_nn_extension_activation_layer_2_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_activation_layer_2_scalar_p1);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_2_p1);
     if(status != VX_SUCCESS)
     {
@@ -1130,7 +1123,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_2_p1, VX_TYPE_SCALAR, "relu3_1");
 
-    org_khronos_nn_extension_activation_layer_2_p2 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_2_scalar_p2);    
+    org_khronos_nn_extension_activation_layer_2_p2 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_2_scalar_p2);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_2_p2);
     if(status != VX_SUCCESS)
     {
@@ -1139,7 +1132,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_2_p2, VX_TYPE_SCALAR, "relu3_2");
 
-    org_khronos_nn_extension_activation_layer_2_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_2_scalar_p3);    
+    org_khronos_nn_extension_activation_layer_2_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_2_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_2_p3);
     if(status != VX_SUCCESS)
     {
@@ -1149,7 +1142,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_2_p3, VX_TYPE_SCALAR, "relu3_2");
 
     org_khronos_nn_extension_activation_layer_2_p4 = vxCreateTensor(context, 4, org_khronos_nn_extension_activation_layer_2_p4Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_2_p4);
     if(status != VX_SUCCESS)
     {
@@ -1158,7 +1151,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_2_p4, VX_TYPE_TENSOR, "relu3_4");
 
-    outputAllocators_SplitTensor_1_p1 = vxCreateTensorFromView(org_khronos_nn_extension_activation_layer_2_p4, 4, org_khronos_nn_extension_activation_layer_2_p4_view1_view_start, org_khronos_nn_extension_activation_layer_2_p4_view1_view_end);    
+    outputAllocators_SplitTensor_1_p1 = vxCreateTensorFromView(org_khronos_nn_extension_activation_layer_2_p4, 4, org_khronos_nn_extension_activation_layer_2_p4_view1_view_start, org_khronos_nn_extension_activation_layer_2_p4_view1_view_end);
     status = vxGetStatus((vx_reference)outputAllocators_SplitTensor_1_p1);
     if(status != VX_SUCCESS)
     {
@@ -1167,7 +1160,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)outputAllocators_SplitTensor_1_p1, VX_TYPE_TENSOR, "conv4_0_0");
 
-    outputAllocators_SplitTensor_1_p2 = vxCreateTensorFromView(org_khronos_nn_extension_activation_layer_2_p4, 4, org_khronos_nn_extension_activation_layer_2_p4_view2_view_start, org_khronos_nn_extension_activation_layer_2_p4_view2_view_end);    
+    outputAllocators_SplitTensor_1_p2 = vxCreateTensorFromView(org_khronos_nn_extension_activation_layer_2_p4, 4, org_khronos_nn_extension_activation_layer_2_p4_view2_view_start, org_khronos_nn_extension_activation_layer_2_p4_view2_view_end);
     status = vxGetStatus((vx_reference)outputAllocators_SplitTensor_1_p2);
     if(status != VX_SUCCESS)
     {
@@ -1177,7 +1170,7 @@
     AddVXObject(pObjectContainer, (vx_reference)outputAllocators_SplitTensor_1_p2, VX_TYPE_TENSOR, "conv4_1_0");
 
     org_khronos_nn_extension_convolution_layer_5_p1 = vxCreateTensor(context, 4, org_khronos_nn_extension_convolution_layer_5_p1Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_5_p1);
     if(status != VX_SUCCESS)
     {
@@ -1187,7 +1180,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_5_p1, VX_TYPE_TENSOR, "conv4_0_weights");
 
     org_khronos_nn_extension_convolution_layer_5_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_convolution_layer_5_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_5_p2);
     if(status != VX_SUCCESS)
     {
@@ -1196,7 +1189,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_5_p2, VX_TYPE_TENSOR, "conv4_0_bias");
 
-    org_khronos_nn_extension_convolution_layer_5_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_5_scalar_p3);    
+    org_khronos_nn_extension_convolution_layer_5_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_5_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_5_p3);
     if(status != VX_SUCCESS)
     {
@@ -1205,7 +1198,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_5_p3, VX_TYPE_SCALAR, "conv4_0_3");
 
-    org_khronos_nn_extension_convolution_layer_5_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_5_scalar_p4);    
+    org_khronos_nn_extension_convolution_layer_5_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_5_scalar_p4);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_5_p4);
     if(status != VX_SUCCESS)
     {
@@ -1214,7 +1207,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_5_p4, VX_TYPE_SCALAR, "conv4_0_4");
 
-    org_khronos_nn_extension_convolution_layer_5_p5 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_5_scalar_p5);    
+    org_khronos_nn_extension_convolution_layer_5_p5 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_5_scalar_p5);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_5_p5);
     if(status != VX_SUCCESS)
     {
@@ -1223,7 +1216,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_5_p5, VX_TYPE_SCALAR, "conv4_0_5");
 
-    org_khronos_nn_extension_convolution_layer_5_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_5_scalar_p6);    
+    org_khronos_nn_extension_convolution_layer_5_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_5_scalar_p6);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_5_p6);
     if(status != VX_SUCCESS)
     {
@@ -1232,7 +1225,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_5_p6, VX_TYPE_SCALAR, "conv4_0_6");
 
-    org_khronos_nn_extension_convolution_layer_5_p7 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_5_scalar_p7);    
+    org_khronos_nn_extension_convolution_layer_5_p7 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_5_scalar_p7);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_5_p7);
     if(status != VX_SUCCESS)
     {
@@ -1241,7 +1234,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_5_p7, VX_TYPE_SCALAR, "conv4_0_7");
 
-    org_khronos_nn_extension_convolution_layer_5_p8 = vxCreateTensorFromView(outputAllocators_MergeTensor_1_p0, 4, org_khronos_nn_extension_convolution_layer_5_p8_view_view_start, org_khronos_nn_extension_convolution_layer_5_p8_view_view_end);    
+    org_khronos_nn_extension_convolution_layer_5_p8 = vxCreateTensorFromView(outputAllocators_MergeTensor_1_p0, 4, org_khronos_nn_extension_convolution_layer_5_p8_view_view_start, org_khronos_nn_extension_convolution_layer_5_p8_view_view_end);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_5_p8);
     if(status != VX_SUCCESS)
     {
@@ -1251,7 +1244,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_5_p8, VX_TYPE_TENSOR, "conv4_0_8");
 
     org_khronos_nn_extension_convolution_layer_4_p1 = vxCreateTensor(context, 4, org_khronos_nn_extension_convolution_layer_4_p1Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_4_p1);
     if(status != VX_SUCCESS)
     {
@@ -1261,7 +1254,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_4_p1, VX_TYPE_TENSOR, "conv4_1_weights");
 
     org_khronos_nn_extension_convolution_layer_4_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_convolution_layer_4_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_4_p2);
     if(status != VX_SUCCESS)
     {
@@ -1270,7 +1263,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_4_p2, VX_TYPE_TENSOR, "conv4_1_bias");
 
-    org_khronos_nn_extension_convolution_layer_4_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_4_scalar_p3);    
+    org_khronos_nn_extension_convolution_layer_4_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_4_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_4_p3);
     if(status != VX_SUCCESS)
     {
@@ -1279,7 +1272,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_4_p3, VX_TYPE_SCALAR, "conv4_1_3");
 
-    org_khronos_nn_extension_convolution_layer_4_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_4_scalar_p4);    
+    org_khronos_nn_extension_convolution_layer_4_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_4_scalar_p4);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_4_p4);
     if(status != VX_SUCCESS)
     {
@@ -1288,7 +1281,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_4_p4, VX_TYPE_SCALAR, "conv4_1_4");
 
-    org_khronos_nn_extension_convolution_layer_4_p5 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_4_scalar_p5);    
+    org_khronos_nn_extension_convolution_layer_4_p5 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_4_scalar_p5);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_4_p5);
     if(status != VX_SUCCESS)
     {
@@ -1297,7 +1290,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_4_p5, VX_TYPE_SCALAR, "conv4_1_5");
 
-    org_khronos_nn_extension_convolution_layer_4_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_4_scalar_p6);    
+    org_khronos_nn_extension_convolution_layer_4_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_4_scalar_p6);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_4_p6);
     if(status != VX_SUCCESS)
     {
@@ -1306,7 +1299,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_4_p6, VX_TYPE_SCALAR, "conv4_1_6");
 
-    org_khronos_nn_extension_convolution_layer_4_p7 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_4_scalar_p7);    
+    org_khronos_nn_extension_convolution_layer_4_p7 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_4_scalar_p7);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_4_p7);
     if(status != VX_SUCCESS)
     {
@@ -1315,7 +1308,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_4_p7, VX_TYPE_SCALAR, "conv4_1_7");
 
-    org_khronos_nn_extension_convolution_layer_4_p8 = vxCreateTensorFromView(outputAllocators_MergeTensor_1_p0, 4, org_khronos_nn_extension_convolution_layer_4_p8_view_view_start, org_khronos_nn_extension_convolution_layer_4_p8_view_view_end);    
+    org_khronos_nn_extension_convolution_layer_4_p8 = vxCreateTensorFromView(outputAllocators_MergeTensor_1_p0, 4, org_khronos_nn_extension_convolution_layer_4_p8_view_view_start, org_khronos_nn_extension_convolution_layer_4_p8_view_view_end);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_4_p8);
     if(status != VX_SUCCESS)
     {
@@ -1324,7 +1317,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_4_p8, VX_TYPE_TENSOR, "conv4_1_8");
 
-    org_khronos_nn_extension_activation_layer_3_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_activation_layer_3_scalar_p1);    
+    org_khronos_nn_extension_activation_layer_3_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_activation_layer_3_scalar_p1);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_3_p1);
     if(status != VX_SUCCESS)
     {
@@ -1333,7 +1326,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_3_p1, VX_TYPE_SCALAR, "relu4_1");
 
-    org_khronos_nn_extension_activation_layer_3_p2 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_3_scalar_p2);    
+    org_khronos_nn_extension_activation_layer_3_p2 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_3_scalar_p2);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_3_p2);
     if(status != VX_SUCCESS)
     {
@@ -1342,7 +1335,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_3_p2, VX_TYPE_SCALAR, "relu4_2");
 
-    org_khronos_nn_extension_activation_layer_3_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_3_scalar_p3);    
+    org_khronos_nn_extension_activation_layer_3_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_3_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_3_p3);
     if(status != VX_SUCCESS)
     {
@@ -1352,7 +1345,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_3_p3, VX_TYPE_SCALAR, "relu4_2");
 
     org_khronos_nn_extension_activation_layer_3_p4 = vxCreateTensor(context, 4, org_khronos_nn_extension_activation_layer_3_p4Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_3_p4);
     if(status != VX_SUCCESS)
     {
@@ -1361,7 +1354,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_3_p4, VX_TYPE_TENSOR, "relu4_4");
 
-    outputAllocators_SplitTensor_2_p1 = vxCreateTensorFromView(org_khronos_nn_extension_activation_layer_3_p4, 4, org_khronos_nn_extension_activation_layer_3_p4_view1_view_start, org_khronos_nn_extension_activation_layer_3_p4_view1_view_end);    
+    outputAllocators_SplitTensor_2_p1 = vxCreateTensorFromView(org_khronos_nn_extension_activation_layer_3_p4, 4, org_khronos_nn_extension_activation_layer_3_p4_view1_view_start, org_khronos_nn_extension_activation_layer_3_p4_view1_view_end);
     status = vxGetStatus((vx_reference)outputAllocators_SplitTensor_2_p1);
     if(status != VX_SUCCESS)
     {
@@ -1370,7 +1363,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)outputAllocators_SplitTensor_2_p1, VX_TYPE_TENSOR, "conv5_0_0");
 
-    outputAllocators_SplitTensor_2_p2 = vxCreateTensorFromView(org_khronos_nn_extension_activation_layer_3_p4, 4, org_khronos_nn_extension_activation_layer_3_p4_view2_view_start, org_khronos_nn_extension_activation_layer_3_p4_view2_view_end);    
+    outputAllocators_SplitTensor_2_p2 = vxCreateTensorFromView(org_khronos_nn_extension_activation_layer_3_p4, 4, org_khronos_nn_extension_activation_layer_3_p4_view2_view_start, org_khronos_nn_extension_activation_layer_3_p4_view2_view_end);
     status = vxGetStatus((vx_reference)outputAllocators_SplitTensor_2_p2);
     if(status != VX_SUCCESS)
     {
@@ -1380,7 +1373,7 @@
     AddVXObject(pObjectContainer, (vx_reference)outputAllocators_SplitTensor_2_p2, VX_TYPE_TENSOR, "conv5_1_0");
 
     org_khronos_nn_extension_convolution_layer_7_p1 = vxCreateTensor(context, 4, org_khronos_nn_extension_convolution_layer_7_p1Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_7_p1);
     if(status != VX_SUCCESS)
     {
@@ -1390,7 +1383,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_7_p1, VX_TYPE_TENSOR, "conv5_0_weights");
 
     org_khronos_nn_extension_convolution_layer_7_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_convolution_layer_7_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_7_p2);
     if(status != VX_SUCCESS)
     {
@@ -1399,7 +1392,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_7_p2, VX_TYPE_TENSOR, "conv5_0_bias");
 
-    org_khronos_nn_extension_convolution_layer_7_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_7_scalar_p3);    
+    org_khronos_nn_extension_convolution_layer_7_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_7_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_7_p3);
     if(status != VX_SUCCESS)
     {
@@ -1408,7 +1401,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_7_p3, VX_TYPE_SCALAR, "conv5_0_3");
 
-    org_khronos_nn_extension_convolution_layer_7_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_7_scalar_p4);    
+    org_khronos_nn_extension_convolution_layer_7_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_7_scalar_p4);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_7_p4);
     if(status != VX_SUCCESS)
     {
@@ -1417,7 +1410,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_7_p4, VX_TYPE_SCALAR, "conv5_0_4");
 
-    org_khronos_nn_extension_convolution_layer_7_p5 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_7_scalar_p5);    
+    org_khronos_nn_extension_convolution_layer_7_p5 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_7_scalar_p5);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_7_p5);
     if(status != VX_SUCCESS)
     {
@@ -1426,7 +1419,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_7_p5, VX_TYPE_SCALAR, "conv5_0_5");
 
-    org_khronos_nn_extension_convolution_layer_7_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_7_scalar_p6);    
+    org_khronos_nn_extension_convolution_layer_7_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_7_scalar_p6);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_7_p6);
     if(status != VX_SUCCESS)
     {
@@ -1435,7 +1428,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_7_p6, VX_TYPE_SCALAR, "conv5_0_6");
 
-    org_khronos_nn_extension_convolution_layer_7_p7 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_7_scalar_p7);    
+    org_khronos_nn_extension_convolution_layer_7_p7 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_7_scalar_p7);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_7_p7);
     if(status != VX_SUCCESS)
     {
@@ -1444,7 +1437,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_7_p7, VX_TYPE_SCALAR, "conv5_0_7");
 
-    org_khronos_nn_extension_convolution_layer_7_p8 = vxCreateTensorFromView(outputAllocators_MergeTensor_2_p0, 4, org_khronos_nn_extension_convolution_layer_7_p8_view_view_start, org_khronos_nn_extension_convolution_layer_7_p8_view_view_end);    
+    org_khronos_nn_extension_convolution_layer_7_p8 = vxCreateTensorFromView(outputAllocators_MergeTensor_2_p0, 4, org_khronos_nn_extension_convolution_layer_7_p8_view_view_start, org_khronos_nn_extension_convolution_layer_7_p8_view_view_end);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_7_p8);
     if(status != VX_SUCCESS)
     {
@@ -1454,7 +1447,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_7_p8, VX_TYPE_TENSOR, "conv5_0_8");
 
     org_khronos_nn_extension_convolution_layer_6_p1 = vxCreateTensor(context, 4, org_khronos_nn_extension_convolution_layer_6_p1Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_6_p1);
     if(status != VX_SUCCESS)
     {
@@ -1464,7 +1457,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_6_p1, VX_TYPE_TENSOR, "conv5_1_weights");
 
     org_khronos_nn_extension_convolution_layer_6_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_convolution_layer_6_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_6_p2);
     if(status != VX_SUCCESS)
     {
@@ -1473,7 +1466,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_6_p2, VX_TYPE_TENSOR, "conv5_1_bias");
 
-    org_khronos_nn_extension_convolution_layer_6_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_6_scalar_p3);    
+    org_khronos_nn_extension_convolution_layer_6_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_6_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_6_p3);
     if(status != VX_SUCCESS)
     {
@@ -1482,7 +1475,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_6_p3, VX_TYPE_SCALAR, "conv5_1_3");
 
-    org_khronos_nn_extension_convolution_layer_6_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_6_scalar_p4);    
+    org_khronos_nn_extension_convolution_layer_6_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_convolution_layer_6_scalar_p4);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_6_p4);
     if(status != VX_SUCCESS)
     {
@@ -1491,7 +1484,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_6_p4, VX_TYPE_SCALAR, "conv5_1_4");
 
-    org_khronos_nn_extension_convolution_layer_6_p5 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_6_scalar_p5);    
+    org_khronos_nn_extension_convolution_layer_6_p5 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_6_scalar_p5);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_6_p5);
     if(status != VX_SUCCESS)
     {
@@ -1500,7 +1493,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_6_p5, VX_TYPE_SCALAR, "conv5_1_5");
 
-    org_khronos_nn_extension_convolution_layer_6_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_6_scalar_p6);    
+    org_khronos_nn_extension_convolution_layer_6_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_6_scalar_p6);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_6_p6);
     if(status != VX_SUCCESS)
     {
@@ -1509,7 +1502,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_6_p6, VX_TYPE_SCALAR, "conv5_1_6");
 
-    org_khronos_nn_extension_convolution_layer_6_p7 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_6_scalar_p7);    
+    org_khronos_nn_extension_convolution_layer_6_p7 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_convolution_layer_6_scalar_p7);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_6_p7);
     if(status != VX_SUCCESS)
     {
@@ -1518,7 +1511,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_6_p7, VX_TYPE_SCALAR, "conv5_1_7");
 
-    org_khronos_nn_extension_convolution_layer_6_p8 = vxCreateTensorFromView(outputAllocators_MergeTensor_2_p0, 4, org_khronos_nn_extension_convolution_layer_6_p8_view_view_start, org_khronos_nn_extension_convolution_layer_6_p8_view_view_end);    
+    org_khronos_nn_extension_convolution_layer_6_p8 = vxCreateTensorFromView(outputAllocators_MergeTensor_2_p0, 4, org_khronos_nn_extension_convolution_layer_6_p8_view_view_start, org_khronos_nn_extension_convolution_layer_6_p8_view_view_end);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_6_p8);
     if(status != VX_SUCCESS)
     {
@@ -1527,7 +1520,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_6_p8, VX_TYPE_TENSOR, "conv5_1_8");
 
-    org_khronos_nn_extension_activation_layer_4_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_activation_layer_4_scalar_p1);    
+    org_khronos_nn_extension_activation_layer_4_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_activation_layer_4_scalar_p1);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_4_p1);
     if(status != VX_SUCCESS)
     {
@@ -1536,7 +1529,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_4_p1, VX_TYPE_SCALAR, "relu5_1");
 
-    org_khronos_nn_extension_activation_layer_4_p2 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_4_scalar_p2);    
+    org_khronos_nn_extension_activation_layer_4_p2 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_4_scalar_p2);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_4_p2);
     if(status != VX_SUCCESS)
     {
@@ -1545,7 +1538,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_4_p2, VX_TYPE_SCALAR, "relu5_2");
 
-    org_khronos_nn_extension_activation_layer_4_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_4_scalar_p3);    
+    org_khronos_nn_extension_activation_layer_4_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_4_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_4_p3);
     if(status != VX_SUCCESS)
     {
@@ -1555,7 +1548,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_4_p3, VX_TYPE_SCALAR, "relu5_2");
 
     org_khronos_nn_extension_activation_layer_4_p4 = vxCreateTensor(context, 4, org_khronos_nn_extension_activation_layer_4_p4Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_4_p4);
     if(status != VX_SUCCESS)
     {
@@ -1564,7 +1557,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_4_p4, VX_TYPE_TENSOR, "relu5_4");
 
-    org_khronos_nn_extension_pooling_layer_2_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_pooling_layer_2_scalar_p1);    
+    org_khronos_nn_extension_pooling_layer_2_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_pooling_layer_2_scalar_p1);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_2_p1);
     if(status != VX_SUCCESS)
     {
@@ -1573,7 +1566,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_2_p1, VX_TYPE_SCALAR, "pool5_1");
 
-    org_khronos_nn_extension_pooling_layer_2_p2 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_2_scalar_p2);    
+    org_khronos_nn_extension_pooling_layer_2_p2 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_2_scalar_p2);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_2_p2);
     if(status != VX_SUCCESS)
     {
@@ -1582,7 +1575,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_2_p2, VX_TYPE_SCALAR, "pool5_2");
 
-    org_khronos_nn_extension_pooling_layer_2_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_2_scalar_p3);    
+    org_khronos_nn_extension_pooling_layer_2_p3 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_2_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_2_p3);
     if(status != VX_SUCCESS)
     {
@@ -1591,7 +1584,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_2_p3, VX_TYPE_SCALAR, "pool5_3");
 
-    org_khronos_nn_extension_pooling_layer_2_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_2_scalar_p4);    
+    org_khronos_nn_extension_pooling_layer_2_p4 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_2_scalar_p4);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_2_p4);
     if(status != VX_SUCCESS)
     {
@@ -1600,7 +1593,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_2_p4, VX_TYPE_SCALAR, "pool5_4");
 
-    org_khronos_nn_extension_pooling_layer_2_p5 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_2_scalar_p5);    
+    org_khronos_nn_extension_pooling_layer_2_p5 = vxCreateScalar(context, VX_TYPE_SIZE, (void*)&org_khronos_nn_extension_pooling_layer_2_scalar_p5);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_2_p5);
     if(status != VX_SUCCESS)
     {
@@ -1609,7 +1602,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_2_p5, VX_TYPE_SCALAR, "pool5_5");
 
-    org_khronos_nn_extension_pooling_layer_2_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_pooling_layer_2_scalar_p6);    
+    org_khronos_nn_extension_pooling_layer_2_p6 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_pooling_layer_2_scalar_p6);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_2_p6);
     if(status != VX_SUCCESS)
     {
@@ -1619,7 +1612,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_2_p6, VX_TYPE_SCALAR, "pool5_6");
 
     org_khronos_nn_extension_pooling_layer_2_p7 = vxCreateTensor(context, 4, org_khronos_nn_extension_pooling_layer_2_p7Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_pooling_layer_2_p7);
     if(status != VX_SUCCESS)
     {
@@ -1629,7 +1622,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_pooling_layer_2_p7, VX_TYPE_TENSOR, "pool5_7");
 
     org_khronos_nn_extension_fully_connected_layer_0_p1 = vxCreateTensor(context, 4, org_khronos_nn_extension_fully_connected_layer_0_p1Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p1);
     if(status != VX_SUCCESS)
     {
@@ -1639,7 +1632,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p1, VX_TYPE_TENSOR, "fc6_weights");
 
     org_khronos_nn_extension_fully_connected_layer_0_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_fully_connected_layer_0_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p2);
     if(status != VX_SUCCESS)
     {
@@ -1648,7 +1641,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p2, VX_TYPE_TENSOR, "fc6_bias");
 
-    org_khronos_nn_extension_fully_connected_layer_0_p3 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_fully_connected_layer_0_scalar_p3);    
+    org_khronos_nn_extension_fully_connected_layer_0_p3 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_fully_connected_layer_0_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p3);
     if(status != VX_SUCCESS)
     {
@@ -1657,7 +1650,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p3, VX_TYPE_SCALAR, "fc6_3");
 
-    org_khronos_nn_extension_fully_connected_layer_0_p4 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_fully_connected_layer_0_scalar_p4);    
+    org_khronos_nn_extension_fully_connected_layer_0_p4 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_fully_connected_layer_0_scalar_p4);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p4);
     if(status != VX_SUCCESS)
     {
@@ -1667,7 +1660,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p4, VX_TYPE_SCALAR, "fc6_4");
 
     org_khronos_nn_extension_fully_connected_layer_0_p5 = vxCreateTensor(context, 2, org_khronos_nn_extension_fully_connected_layer_0_p5Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p5);
     if(status != VX_SUCCESS)
     {
@@ -1676,7 +1669,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p5, VX_TYPE_TENSOR, "fc6_5");
 
-    org_khronos_nn_extension_activation_layer_5_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_activation_layer_5_scalar_p1);    
+    org_khronos_nn_extension_activation_layer_5_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_activation_layer_5_scalar_p1);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_5_p1);
     if(status != VX_SUCCESS)
     {
@@ -1685,7 +1678,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_5_p1, VX_TYPE_SCALAR, "relu6_1");
 
-    org_khronos_nn_extension_activation_layer_5_p2 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_5_scalar_p2);    
+    org_khronos_nn_extension_activation_layer_5_p2 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_5_scalar_p2);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_5_p2);
     if(status != VX_SUCCESS)
     {
@@ -1694,7 +1687,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_5_p2, VX_TYPE_SCALAR, "relu6_2");
 
-    org_khronos_nn_extension_activation_layer_5_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_5_scalar_p3);    
+    org_khronos_nn_extension_activation_layer_5_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_5_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_5_p3);
     if(status != VX_SUCCESS)
     {
@@ -1704,7 +1697,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_5_p3, VX_TYPE_SCALAR, "relu6_2");
 
     org_khronos_nn_extension_activation_layer_5_p4 = vxCreateTensor(context, 2, org_khronos_nn_extension_activation_layer_5_p4Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_5_p4);
     if(status != VX_SUCCESS)
     {
@@ -1714,7 +1707,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_5_p4, VX_TYPE_TENSOR, "relu6_4");
 
     org_khronos_nn_extension_fully_connected_layer_1_p1 = vxCreateTensor(context, 2, org_khronos_nn_extension_fully_connected_layer_1_p1Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_1_p1);
     if(status != VX_SUCCESS)
     {
@@ -1724,7 +1717,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_fully_connected_layer_1_p1, VX_TYPE_TENSOR, "fc7_weights");
 
     org_khronos_nn_extension_fully_connected_layer_1_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_fully_connected_layer_1_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_1_p2);
     if(status != VX_SUCCESS)
     {
@@ -1733,7 +1726,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_fully_connected_layer_1_p2, VX_TYPE_TENSOR, "fc7_bias");
 
-    org_khronos_nn_extension_fully_connected_layer_1_p3 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_fully_connected_layer_1_scalar_p3);    
+    org_khronos_nn_extension_fully_connected_layer_1_p3 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_fully_connected_layer_1_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_1_p3);
     if(status != VX_SUCCESS)
     {
@@ -1742,7 +1735,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_fully_connected_layer_1_p3, VX_TYPE_SCALAR, "fc7_3");
 
-    org_khronos_nn_extension_fully_connected_layer_1_p4 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_fully_connected_layer_1_scalar_p4);    
+    org_khronos_nn_extension_fully_connected_layer_1_p4 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_fully_connected_layer_1_scalar_p4);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_1_p4);
     if(status != VX_SUCCESS)
     {
@@ -1752,7 +1745,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_fully_connected_layer_1_p4, VX_TYPE_SCALAR, "fc7_4");
 
     org_khronos_nn_extension_fully_connected_layer_1_p5 = vxCreateTensor(context, 2, org_khronos_nn_extension_fully_connected_layer_1_p5Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_1_p5);
     if(status != VX_SUCCESS)
     {
@@ -1761,7 +1754,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_fully_connected_layer_1_p5, VX_TYPE_TENSOR, "fc7_5");
 
-    org_khronos_nn_extension_activation_layer_6_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_activation_layer_6_scalar_p1);    
+    org_khronos_nn_extension_activation_layer_6_p1 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_activation_layer_6_scalar_p1);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_6_p1);
     if(status != VX_SUCCESS)
     {
@@ -1770,7 +1763,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_6_p1, VX_TYPE_SCALAR, "relu7_1");
 
-    org_khronos_nn_extension_activation_layer_6_p2 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_6_scalar_p2);    
+    org_khronos_nn_extension_activation_layer_6_p2 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_6_scalar_p2);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_6_p2);
     if(status != VX_SUCCESS)
     {
@@ -1779,7 +1772,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_6_p2, VX_TYPE_SCALAR, "relu7_2");
 
-    org_khronos_nn_extension_activation_layer_6_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_6_scalar_p3);    
+    org_khronos_nn_extension_activation_layer_6_p3 = vxCreateScalar(context, VX_TYPE_FLOAT32, (void*)&org_khronos_nn_extension_activation_layer_6_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_6_p3);
     if(status != VX_SUCCESS)
     {
@@ -1789,7 +1782,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_6_p3, VX_TYPE_SCALAR, "relu7_2");
 
     org_khronos_nn_extension_activation_layer_6_p4 = vxCreateTensor(context, 2, org_khronos_nn_extension_activation_layer_6_p4Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_6_p4);
     if(status != VX_SUCCESS)
     {
@@ -1799,7 +1792,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_6_p4, VX_TYPE_TENSOR, "relu7_4");
 
     org_khronos_nn_extension_fully_connected_layer_2_p1 = vxCreateTensor(context, 2, org_khronos_nn_extension_fully_connected_layer_2_p1Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_2_p1);
     if(status != VX_SUCCESS)
     {
@@ -1809,7 +1802,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_fully_connected_layer_2_p1, VX_TYPE_TENSOR, "fc8_weights");
 
     org_khronos_nn_extension_fully_connected_layer_2_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_fully_connected_layer_2_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_2_p2);
     if(status != VX_SUCCESS)
     {
@@ -1818,7 +1811,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_fully_connected_layer_2_p2, VX_TYPE_TENSOR, "fc8_bias");
 
-    org_khronos_nn_extension_fully_connected_layer_2_p3 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_fully_connected_layer_2_scalar_p3);    
+    org_khronos_nn_extension_fully_connected_layer_2_p3 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_fully_connected_layer_2_scalar_p3);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_2_p3);
     if(status != VX_SUCCESS)
     {
@@ -1827,7 +1820,7 @@
     }
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_fully_connected_layer_2_p3, VX_TYPE_SCALAR, "fc8_3");
 
-    org_khronos_nn_extension_fully_connected_layer_2_p4 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_fully_connected_layer_2_scalar_p4);    
+    org_khronos_nn_extension_fully_connected_layer_2_p4 = vxCreateScalar(context, VX_TYPE_ENUM, (void*)&org_khronos_nn_extension_fully_connected_layer_2_scalar_p4);
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_2_p4);
     if(status != VX_SUCCESS)
     {
@@ -1837,7 +1830,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_fully_connected_layer_2_p4, VX_TYPE_SCALAR, "fc8_4");
 
     org_khronos_nn_extension_fully_connected_layer_2_p5 = vxCreateTensor(context, 2, org_khronos_nn_extension_fully_connected_layer_2_p5Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_2_p5);
     if(status != VX_SUCCESS)
     {
@@ -1856,7 +1849,7 @@
     AddVXObject(pObjectContainer, (vx_reference)com_cnn_helpers_scalemddata_0_p1, VX_TYPE_SCALAR, "Power0_1");
 
     com_cnn_helpers_scalemddata_0_p2 = vxCreateTensor(context, 2, com_cnn_helpers_scalemddata_0_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)com_cnn_helpers_scalemddata_0_p2);
     if(status != VX_SUCCESS)
     {
@@ -1866,7 +1859,7 @@
     AddVXObject(pObjectContainer, (vx_reference)com_cnn_helpers_scalemddata_0_p2, VX_TYPE_TENSOR, "Power0_2");
 
     org_khronos_nn_extension_softmax_layer_0_p1 = vxCreateTensor(context, 2, org_khronos_nn_extension_softmax_layer_0_p1Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_softmax_layer_0_p1);
     if(status != VX_SUCCESS)
     {
@@ -1909,7 +1902,7 @@
 	//status = CreateNode(graph, org_khronos_nn_extension_normalization_layer_Kernel, pObjectContainer, "org_khronos_nn_extension_normalization_layer_0", filteredNodeList, filteredNodeCount, &org_khronos_nn_extension_normalization_layer_0);
 	//if(status != VX_SUCCESS)
 	//    return status;
-	org_khronos_nn_extension_normalization_layer_0 = vxNormalizationLayer(graph, org_khronos_nn_extension_activation_layer_0_p4, norm_params.type, norm_params.normalization_size, norm_params.alpha, norm_params.beta,
+	org_khronos_nn_extension_normalization_layer_0 = vxLocalResponseNormalizationLayer(graph, org_khronos_nn_extension_activation_layer_0_p4, norm_params.type, norm_params.normalization_size, norm_params.alpha, norm_params.beta, 1.0f,
 		org_khronos_nn_extension_normalization_layer_0_p5);
 	status = vxGetStatus((vx_reference)org_khronos_nn_extension_normalization_layer_0);
 	if (status != VX_SUCCESS)
@@ -1974,8 +1967,8 @@
 	//status = CreateNode(graph, org_khronos_nn_extension_normalization_layer_Kernel, pObjectContainer, "org_khronos_nn_extension_normalization_layer_1", filteredNodeList, filteredNodeCount, &org_khronos_nn_extension_normalization_layer_1);
 	//if(status != VX_SUCCESS)
 	//    return status;
-	org_khronos_nn_extension_normalization_layer_1 = vxNormalizationLayer(graph, org_khronos_nn_extension_activation_layer_1_p4, norm_params.type, norm_params.normalization_size, norm_params.alpha,
-		norm_params.beta, org_khronos_nn_extension_normalization_layer_1_p5);
+	org_khronos_nn_extension_normalization_layer_1 = vxLocalResponseNormalizationLayer(graph, org_khronos_nn_extension_activation_layer_1_p4, norm_params.type, norm_params.normalization_size, norm_params.alpha,
+		norm_params.beta, 1.0f, org_khronos_nn_extension_normalization_layer_1_p5);
 	if (status != VX_SUCCESS)
 	{
 		WriteLog("ERROR: failed to create node org_khronos_nn_extension_normalization_layer_1\n");
@@ -2211,10 +2204,10 @@
 		return status;
 	}
 	AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_softmax_layer_0, VX_TYPE_NODE, "org_khronos_nn_extension_softmax_layer_0");
-        
- 
+
+
 
     return status;
 }
 
-#endif
+#endif//OPENVX_CONFORMANCE_NEURAL_NETWORKS
diff --git a/test_conformance/Networks/src/graph_googlenet.c b/test_conformance/Networks/src/graph_googlenet.c
index 8781968..1c9cb86 100644
--- a/test_conformance/Networks/src/graph_googlenet.c
+++ b/test_conformance/Networks/src/graph_googlenet.c
@@ -1,8 +1,9 @@
 /** @file graph.h
- *  @brief 
+ *  @brief
  *  This file contains the implementation of the generated graph factory function
  */
 
+#ifdef OPENVX_CONFORMANCE_NEURAL_NETWORKS
 #ifdef OPENVX_USE_NN
 
 #include <stdio.h>
@@ -123,13 +124,12 @@
 
 static vx_status Graph(vx_context context, vx_graph graph, ObjectRefContainerType* pObjectContainer, char* filteredNodeList[], size_t filteredNodeCount, vx_tensor org_khronos_nn_extension_convolution_layer_0_p0, vx_tensor org_khronos_nn_extension_convolution_layer_0_p1, vx_tensor org_khronos_nn_extension_convolution_layer_0_p2, vx_nn_convolution_params_t org_khronos_nn_extension_convolution_layer_0_p3, vx_tensor org_khronos_nn_extension_convolution_layer_0_p8)
 {
-    vx_status status = VX_SUCCESS;    
+    vx_status status = VX_SUCCESS;
 
     //
     // Kernel Declarations
     //
 
-    vx_kernel org_khronos_nn_extension_convolution_layer_Kernel;
     vx_kernel org_khronos_nn_extension_activation_layer_Kernel;
     vx_kernel org_khronos_nn_extension_pooling_layer_Kernel;
     vx_kernel org_khronos_nn_extension_normalization_layer_Kernel;
@@ -3176,7 +3176,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_5_p7, VX_TYPE_SCALAR, "inception_3a_5x5_7");
 
     org_khronos_nn_extension_convolution_layer_5_p8 = vxCreateTensor(context, 4, org_khronos_nn_extension_convolution_layer_5_p8Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_5_p8);
     if(status != VX_SUCCESS)
     {
@@ -3379,7 +3379,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_12_p1, VX_TYPE_TENSOR, "inception_3b_3x3_reduce_weights");
 
     org_khronos_nn_extension_convolution_layer_12_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_convolution_layer_12_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_12_p2);
     if(status != VX_SUCCESS)
     {
@@ -3454,7 +3454,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_10_p1, VX_TYPE_TENSOR, "inception_3b_5x5_reduce_weights");
 
     org_khronos_nn_extension_convolution_layer_10_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_convolution_layer_10_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_10_p2);
     if(status != VX_SUCCESS)
     {
@@ -3509,7 +3509,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_10_p7, VX_TYPE_SCALAR, "inception_3b_5x5_reduce_7");
 
     org_khronos_nn_extension_convolution_layer_10_p8 = vxCreateTensor(context, 4, org_khronos_nn_extension_convolution_layer_10_p8Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_10_p8);
     if(status != VX_SUCCESS)
     {
@@ -3646,7 +3646,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_activation_layer_12_p3, VX_TYPE_SCALAR, "inception_3b_relu_3x3_reduce_2");
 
     org_khronos_nn_extension_activation_layer_12_p4 = vxCreateTensor(context, 4, org_khronos_nn_extension_activation_layer_12_p4Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_activation_layer_12_p4);
     if(status != VX_SUCCESS)
     {
@@ -5639,7 +5639,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_30_p1, VX_TYPE_TENSOR, "inception_4c_3x3_reduce_weights");
 
     org_khronos_nn_extension_convolution_layer_30_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_convolution_layer_30_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_30_p2);
     if(status != VX_SUCCESS)
     {
@@ -5714,7 +5714,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_28_p1, VX_TYPE_TENSOR, "inception_4c_5x5_reduce_weights");
 
     org_khronos_nn_extension_convolution_layer_28_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_convolution_layer_28_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_28_p2);
     if(status != VX_SUCCESS)
     {
@@ -6371,7 +6371,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_36_p1, VX_TYPE_TENSOR, "inception_4d_3x3_reduce_weights");
 
     org_khronos_nn_extension_convolution_layer_36_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_convolution_layer_36_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_36_p2);
     if(status != VX_SUCCESS)
     {
@@ -6446,7 +6446,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_34_p1, VX_TYPE_TENSOR, "inception_4d_5x5_reduce_weights");
 
     org_khronos_nn_extension_convolution_layer_34_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_convolution_layer_34_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_34_p2);
     if(status != VX_SUCCESS)
     {
@@ -7103,7 +7103,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_42_p1, VX_TYPE_TENSOR, "inception_4e_3x3_reduce_weights");
 
     org_khronos_nn_extension_convolution_layer_42_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_convolution_layer_42_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_42_p2);
     if(status != VX_SUCCESS)
     {
@@ -7178,7 +7178,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_convolution_layer_40_p1, VX_TYPE_TENSOR, "inception_4e_5x5_reduce_weights");
 
     org_khronos_nn_extension_convolution_layer_40_p2 = vxCreateTensor(context, 1, org_khronos_nn_extension_convolution_layer_40_p2Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_convolution_layer_40_p2);
     if(status != VX_SUCCESS)
     {
@@ -9380,7 +9380,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p4, VX_TYPE_SCALAR, "loss3_classifier_4");
 
     org_khronos_nn_extension_fully_connected_layer_0_p5 = vxCreateTensor(context, 2, org_khronos_nn_extension_fully_connected_layer_0_p5Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p5);
     if(status != VX_SUCCESS)
     {
@@ -9437,7 +9437,7 @@
     AddVXObject(pObjectContainer, (vx_reference)org_khronos_openvx_tensor_multiply_0_p5, VX_TYPE_TENSOR, "Power0_5");
 
     org_khronos_nn_extension_softmax_layer_0_p1 = vxCreateTensor(context, 2, org_khronos_nn_extension_softmax_layer_0_p1Dimensions ,VX_TYPE_INT16, 8 );
-    
+
     status = vxGetStatus((vx_reference)org_khronos_nn_extension_softmax_layer_0_p1);
     if(status != VX_SUCCESS)
     {
@@ -10535,83 +10535,83 @@
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_0, "org_khronos_nn_extension_convolution_layer_0", 8, (vx_reference)org_khronos_nn_extension_convolution_layer_0_p8);
 //    if(status != VX_SUCCESS)
 //        return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_0, "org_khronos_nn_extension_activation_layer_0", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_0_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_0, "org_khronos_nn_extension_activation_layer_0", 1, (vx_reference)org_khronos_nn_extension_activation_layer_0_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_0, "org_khronos_nn_extension_activation_layer_0", 2, (vx_reference)org_khronos_nn_extension_activation_layer_0_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_0, "org_khronos_nn_extension_activation_layer_0", 3, (vx_reference)org_khronos_nn_extension_activation_layer_0_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_0, "org_khronos_nn_extension_activation_layer_0", 4, (vx_reference)org_khronos_nn_extension_activation_layer_0_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_0, "org_khronos_nn_extension_pooling_layer_0", 0, (vx_reference)org_khronos_nn_extension_activation_layer_0_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_0, "org_khronos_nn_extension_pooling_layer_0", 1, (vx_reference)org_khronos_nn_extension_pooling_layer_0_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_0, "org_khronos_nn_extension_pooling_layer_0", 2, (vx_reference)org_khronos_nn_extension_pooling_layer_0_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_0, "org_khronos_nn_extension_pooling_layer_0", 3, (vx_reference)org_khronos_nn_extension_pooling_layer_0_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_0, "org_khronos_nn_extension_pooling_layer_0", 4, (vx_reference)org_khronos_nn_extension_pooling_layer_0_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_0, "org_khronos_nn_extension_pooling_layer_0", 5, (vx_reference)org_khronos_nn_extension_pooling_layer_0_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_0, "org_khronos_nn_extension_pooling_layer_0", 6, (vx_reference)org_khronos_nn_extension_pooling_layer_0_p6);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_0, "org_khronos_nn_extension_pooling_layer_0", 7, (vx_reference)org_khronos_nn_extension_pooling_layer_0_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_normalization_layer_0, "org_khronos_nn_extension_normalization_layer_0", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_0_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_normalization_layer_0, "org_khronos_nn_extension_normalization_layer_0", 1, (vx_reference)org_khronos_nn_extension_normalization_layer_0_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_normalization_layer_0, "org_khronos_nn_extension_normalization_layer_0", 2, (vx_reference)org_khronos_nn_extension_normalization_layer_0_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_normalization_layer_0, "org_khronos_nn_extension_normalization_layer_0", 3, (vx_reference)org_khronos_nn_extension_normalization_layer_0_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_normalization_layer_0, "org_khronos_nn_extension_normalization_layer_0", 4, (vx_reference)org_khronos_nn_extension_normalization_layer_0_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_normalization_layer_0, "org_khronos_nn_extension_normalization_layer_0", 5, (vx_reference)org_khronos_nn_extension_normalization_layer_0_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_1, "org_khronos_nn_extension_convolution_layer_1", 0, (vx_reference)org_khronos_nn_extension_normalization_layer_0_p5);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -10647,27 +10647,27 @@
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_1, "org_khronos_nn_extension_convolution_layer_1", 8, (vx_reference)org_khronos_nn_extension_convolution_layer_1_p8);
 //    if(status != VX_SUCCESS)
 //        return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_1, "org_khronos_nn_extension_activation_layer_1", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_1_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_1, "org_khronos_nn_extension_activation_layer_1", 1, (vx_reference)org_khronos_nn_extension_activation_layer_1_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_1, "org_khronos_nn_extension_activation_layer_1", 2, (vx_reference)org_khronos_nn_extension_activation_layer_1_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_1, "org_khronos_nn_extension_activation_layer_1", 3, (vx_reference)org_khronos_nn_extension_activation_layer_1_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_1, "org_khronos_nn_extension_activation_layer_1", 4, (vx_reference)org_khronos_nn_extension_activation_layer_1_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_2, "org_khronos_nn_extension_convolution_layer_2", 0, (vx_reference)org_khronos_nn_extension_activation_layer_1_p4);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -10703,83 +10703,83 @@
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_2, "org_khronos_nn_extension_convolution_layer_2", 8, (vx_reference)org_khronos_nn_extension_convolution_layer_2_p8);
 //    if(status != VX_SUCCESS)
 //        return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_2, "org_khronos_nn_extension_activation_layer_2", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_2_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_2, "org_khronos_nn_extension_activation_layer_2", 1, (vx_reference)org_khronos_nn_extension_activation_layer_2_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_2, "org_khronos_nn_extension_activation_layer_2", 2, (vx_reference)org_khronos_nn_extension_activation_layer_2_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_2, "org_khronos_nn_extension_activation_layer_2", 3, (vx_reference)org_khronos_nn_extension_activation_layer_2_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_2, "org_khronos_nn_extension_activation_layer_2", 4, (vx_reference)org_khronos_nn_extension_activation_layer_2_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_normalization_layer_1, "org_khronos_nn_extension_normalization_layer_1", 0, (vx_reference)org_khronos_nn_extension_activation_layer_2_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_normalization_layer_1, "org_khronos_nn_extension_normalization_layer_1", 1, (vx_reference)org_khronos_nn_extension_normalization_layer_1_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_normalization_layer_1, "org_khronos_nn_extension_normalization_layer_1", 2, (vx_reference)org_khronos_nn_extension_normalization_layer_1_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_normalization_layer_1, "org_khronos_nn_extension_normalization_layer_1", 3, (vx_reference)org_khronos_nn_extension_normalization_layer_1_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_normalization_layer_1, "org_khronos_nn_extension_normalization_layer_1", 4, (vx_reference)org_khronos_nn_extension_normalization_layer_1_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_normalization_layer_1, "org_khronos_nn_extension_normalization_layer_1", 5, (vx_reference)org_khronos_nn_extension_normalization_layer_1_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_1, "org_khronos_nn_extension_pooling_layer_1", 0, (vx_reference)org_khronos_nn_extension_normalization_layer_1_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_1, "org_khronos_nn_extension_pooling_layer_1", 1, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_1, "org_khronos_nn_extension_pooling_layer_1", 2, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_1, "org_khronos_nn_extension_pooling_layer_1", 3, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_1, "org_khronos_nn_extension_pooling_layer_1", 4, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_1, "org_khronos_nn_extension_pooling_layer_1", 5, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_1, "org_khronos_nn_extension_pooling_layer_1", 6, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p6);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_1, "org_khronos_nn_extension_pooling_layer_1", 7, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_8, "org_khronos_nn_extension_convolution_layer_8", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p7);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -10887,99 +10887,99 @@
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_4, "org_khronos_nn_extension_convolution_layer_4", 8, (vx_reference)org_khronos_nn_extension_convolution_layer_4_p8);
 //    if(status != VX_SUCCESS)
 //        return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_2, "org_khronos_nn_extension_pooling_layer_2", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_1_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_2, "org_khronos_nn_extension_pooling_layer_2", 1, (vx_reference)org_khronos_nn_extension_pooling_layer_2_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_2, "org_khronos_nn_extension_pooling_layer_2", 2, (vx_reference)org_khronos_nn_extension_pooling_layer_2_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_2, "org_khronos_nn_extension_pooling_layer_2", 3, (vx_reference)org_khronos_nn_extension_pooling_layer_2_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_2, "org_khronos_nn_extension_pooling_layer_2", 4, (vx_reference)org_khronos_nn_extension_pooling_layer_2_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_2, "org_khronos_nn_extension_pooling_layer_2", 5, (vx_reference)org_khronos_nn_extension_pooling_layer_2_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_2, "org_khronos_nn_extension_pooling_layer_2", 6, (vx_reference)org_khronos_nn_extension_pooling_layer_2_p6);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_2, "org_khronos_nn_extension_pooling_layer_2", 7, (vx_reference)org_khronos_nn_extension_pooling_layer_2_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_8, "org_khronos_nn_extension_activation_layer_8", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_8_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_8, "org_khronos_nn_extension_activation_layer_8", 1, (vx_reference)org_khronos_nn_extension_activation_layer_8_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_8, "org_khronos_nn_extension_activation_layer_8", 2, (vx_reference)org_khronos_nn_extension_activation_layer_8_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_8, "org_khronos_nn_extension_activation_layer_8", 3, (vx_reference)org_khronos_nn_extension_activation_layer_8_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_8, "org_khronos_nn_extension_activation_layer_8", 4, (vx_reference)org_khronos_nn_extension_activation_layer_8_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_6, "org_khronos_nn_extension_activation_layer_6", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_6_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_6, "org_khronos_nn_extension_activation_layer_6", 1, (vx_reference)org_khronos_nn_extension_activation_layer_6_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_6, "org_khronos_nn_extension_activation_layer_6", 2, (vx_reference)org_khronos_nn_extension_activation_layer_6_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_6, "org_khronos_nn_extension_activation_layer_6", 3, (vx_reference)org_khronos_nn_extension_activation_layer_6_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_6, "org_khronos_nn_extension_activation_layer_6", 4, (vx_reference)org_khronos_nn_extension_activation_layer_6_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_4, "org_khronos_nn_extension_activation_layer_4", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_4_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_4, "org_khronos_nn_extension_activation_layer_4", 1, (vx_reference)org_khronos_nn_extension_activation_layer_4_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_4, "org_khronos_nn_extension_activation_layer_4", 2, (vx_reference)org_khronos_nn_extension_activation_layer_4_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_4, "org_khronos_nn_extension_activation_layer_4", 3, (vx_reference)org_khronos_nn_extension_activation_layer_4_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_4, "org_khronos_nn_extension_activation_layer_4", 4, (vx_reference)org_khronos_nn_extension_activation_layer_4_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_3, "org_khronos_nn_extension_convolution_layer_3", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_2_p7);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -11087,67 +11087,67 @@
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_5, "org_khronos_nn_extension_convolution_layer_5", 8, (vx_reference)org_khronos_nn_extension_convolution_layer_5_p8);
 //    if(status != VX_SUCCESS)
 //        return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_3, "org_khronos_nn_extension_activation_layer_3", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_3_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_3, "org_khronos_nn_extension_activation_layer_3", 1, (vx_reference)org_khronos_nn_extension_activation_layer_3_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_3, "org_khronos_nn_extension_activation_layer_3", 2, (vx_reference)org_khronos_nn_extension_activation_layer_3_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_3, "org_khronos_nn_extension_activation_layer_3", 3, (vx_reference)org_khronos_nn_extension_activation_layer_3_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_3, "org_khronos_nn_extension_activation_layer_3", 4, (vx_reference)org_khronos_nn_extension_activation_layer_3_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_7, "org_khronos_nn_extension_activation_layer_7", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_7_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_7, "org_khronos_nn_extension_activation_layer_7", 1, (vx_reference)org_khronos_nn_extension_activation_layer_7_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_7, "org_khronos_nn_extension_activation_layer_7", 2, (vx_reference)org_khronos_nn_extension_activation_layer_7_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_7, "org_khronos_nn_extension_activation_layer_7", 3, (vx_reference)org_khronos_nn_extension_activation_layer_7_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_7, "org_khronos_nn_extension_activation_layer_7", 4, (vx_reference)org_khronos_nn_extension_activation_layer_7_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_5, "org_khronos_nn_extension_activation_layer_5", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_5_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_5, "org_khronos_nn_extension_activation_layer_5", 1, (vx_reference)org_khronos_nn_extension_activation_layer_5_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_5, "org_khronos_nn_extension_activation_layer_5", 2, (vx_reference)org_khronos_nn_extension_activation_layer_5_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_5, "org_khronos_nn_extension_activation_layer_5", 3, (vx_reference)org_khronos_nn_extension_activation_layer_5_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_5, "org_khronos_nn_extension_activation_layer_5", 4, (vx_reference)org_khronos_nn_extension_activation_layer_5_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_14, "org_khronos_nn_extension_convolution_layer_14", 0, (vx_reference)outputAllocators_MergeTensor_0_p0);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -11255,99 +11255,99 @@
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_10, "org_khronos_nn_extension_convolution_layer_10", 8, (vx_reference)org_khronos_nn_extension_convolution_layer_10_p8);
 //    if(status != VX_SUCCESS)
 //        return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_3, "org_khronos_nn_extension_pooling_layer_3", 0, (vx_reference)outputAllocators_MergeTensor_0_p0);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_3, "org_khronos_nn_extension_pooling_layer_3", 1, (vx_reference)org_khronos_nn_extension_pooling_layer_3_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_3, "org_khronos_nn_extension_pooling_layer_3", 2, (vx_reference)org_khronos_nn_extension_pooling_layer_3_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_3, "org_khronos_nn_extension_pooling_layer_3", 3, (vx_reference)org_khronos_nn_extension_pooling_layer_3_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_3, "org_khronos_nn_extension_pooling_layer_3", 4, (vx_reference)org_khronos_nn_extension_pooling_layer_3_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_3, "org_khronos_nn_extension_pooling_layer_3", 5, (vx_reference)org_khronos_nn_extension_pooling_layer_3_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_3, "org_khronos_nn_extension_pooling_layer_3", 6, (vx_reference)org_khronos_nn_extension_pooling_layer_3_p6);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_3, "org_khronos_nn_extension_pooling_layer_3", 7, (vx_reference)org_khronos_nn_extension_pooling_layer_3_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_14, "org_khronos_nn_extension_activation_layer_14", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_14_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_14, "org_khronos_nn_extension_activation_layer_14", 1, (vx_reference)org_khronos_nn_extension_activation_layer_14_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_14, "org_khronos_nn_extension_activation_layer_14", 2, (vx_reference)org_khronos_nn_extension_activation_layer_14_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_14, "org_khronos_nn_extension_activation_layer_14", 3, (vx_reference)org_khronos_nn_extension_activation_layer_14_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_14, "org_khronos_nn_extension_activation_layer_14", 4, (vx_reference)org_khronos_nn_extension_activation_layer_14_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_12, "org_khronos_nn_extension_activation_layer_12", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_12_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_12, "org_khronos_nn_extension_activation_layer_12", 1, (vx_reference)org_khronos_nn_extension_activation_layer_12_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_12, "org_khronos_nn_extension_activation_layer_12", 2, (vx_reference)org_khronos_nn_extension_activation_layer_12_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_12, "org_khronos_nn_extension_activation_layer_12", 3, (vx_reference)org_khronos_nn_extension_activation_layer_12_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_12, "org_khronos_nn_extension_activation_layer_12", 4, (vx_reference)org_khronos_nn_extension_activation_layer_12_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_10, "org_khronos_nn_extension_activation_layer_10", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_10_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_10, "org_khronos_nn_extension_activation_layer_10", 1, (vx_reference)org_khronos_nn_extension_activation_layer_10_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_10, "org_khronos_nn_extension_activation_layer_10", 2, (vx_reference)org_khronos_nn_extension_activation_layer_10_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_10, "org_khronos_nn_extension_activation_layer_10", 3, (vx_reference)org_khronos_nn_extension_activation_layer_10_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_10, "org_khronos_nn_extension_activation_layer_10", 4, (vx_reference)org_khronos_nn_extension_activation_layer_10_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_9, "org_khronos_nn_extension_convolution_layer_9", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_3_p7);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -11459,95 +11459,95 @@
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_9, "org_khronos_nn_extension_activation_layer_9", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_9_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_9, "org_khronos_nn_extension_activation_layer_9", 1, (vx_reference)org_khronos_nn_extension_activation_layer_9_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_9, "org_khronos_nn_extension_activation_layer_9", 2, (vx_reference)org_khronos_nn_extension_activation_layer_9_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_9, "org_khronos_nn_extension_activation_layer_9", 3, (vx_reference)org_khronos_nn_extension_activation_layer_9_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_9, "org_khronos_nn_extension_activation_layer_9", 4, (vx_reference)org_khronos_nn_extension_activation_layer_9_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_13, "org_khronos_nn_extension_activation_layer_13", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_13_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_13, "org_khronos_nn_extension_activation_layer_13", 1, (vx_reference)org_khronos_nn_extension_activation_layer_13_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_13, "org_khronos_nn_extension_activation_layer_13", 2, (vx_reference)org_khronos_nn_extension_activation_layer_13_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_13, "org_khronos_nn_extension_activation_layer_13", 3, (vx_reference)org_khronos_nn_extension_activation_layer_13_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_13, "org_khronos_nn_extension_activation_layer_13", 4, (vx_reference)org_khronos_nn_extension_activation_layer_13_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_11, "org_khronos_nn_extension_activation_layer_11", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_11_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_11, "org_khronos_nn_extension_activation_layer_11", 1, (vx_reference)org_khronos_nn_extension_activation_layer_11_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_11, "org_khronos_nn_extension_activation_layer_11", 2, (vx_reference)org_khronos_nn_extension_activation_layer_11_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_11, "org_khronos_nn_extension_activation_layer_11", 3, (vx_reference)org_khronos_nn_extension_activation_layer_11_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_11, "org_khronos_nn_extension_activation_layer_11", 4, (vx_reference)org_khronos_nn_extension_activation_layer_11_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_4, "org_khronos_nn_extension_pooling_layer_4", 0, (vx_reference)outputAllocators_MergeTensor_1_p0);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_4, "org_khronos_nn_extension_pooling_layer_4", 1, (vx_reference)org_khronos_nn_extension_pooling_layer_4_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_4, "org_khronos_nn_extension_pooling_layer_4", 2, (vx_reference)org_khronos_nn_extension_pooling_layer_4_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_4, "org_khronos_nn_extension_pooling_layer_4", 3, (vx_reference)org_khronos_nn_extension_pooling_layer_4_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_4, "org_khronos_nn_extension_pooling_layer_4", 4, (vx_reference)org_khronos_nn_extension_pooling_layer_4_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_4, "org_khronos_nn_extension_pooling_layer_4", 5, (vx_reference)org_khronos_nn_extension_pooling_layer_4_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_4, "org_khronos_nn_extension_pooling_layer_4", 6, (vx_reference)org_khronos_nn_extension_pooling_layer_4_p6);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_4, "org_khronos_nn_extension_pooling_layer_4", 7, (vx_reference)org_khronos_nn_extension_pooling_layer_4_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_20, "org_khronos_nn_extension_convolution_layer_20", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_4_p7);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -11655,99 +11655,99 @@
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_16, "org_khronos_nn_extension_convolution_layer_16", 8, (vx_reference)org_khronos_nn_extension_convolution_layer_16_p8);
 //    if(status != VX_SUCCESS)
 //        return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_5, "org_khronos_nn_extension_pooling_layer_5", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_4_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_5, "org_khronos_nn_extension_pooling_layer_5", 1, (vx_reference)org_khronos_nn_extension_pooling_layer_5_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_5, "org_khronos_nn_extension_pooling_layer_5", 2, (vx_reference)org_khronos_nn_extension_pooling_layer_5_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_5, "org_khronos_nn_extension_pooling_layer_5", 3, (vx_reference)org_khronos_nn_extension_pooling_layer_5_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_5, "org_khronos_nn_extension_pooling_layer_5", 4, (vx_reference)org_khronos_nn_extension_pooling_layer_5_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_5, "org_khronos_nn_extension_pooling_layer_5", 5, (vx_reference)org_khronos_nn_extension_pooling_layer_5_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_5, "org_khronos_nn_extension_pooling_layer_5", 6, (vx_reference)org_khronos_nn_extension_pooling_layer_5_p6);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_5, "org_khronos_nn_extension_pooling_layer_5", 7, (vx_reference)org_khronos_nn_extension_pooling_layer_5_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_20, "org_khronos_nn_extension_activation_layer_20", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_20_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_20, "org_khronos_nn_extension_activation_layer_20", 1, (vx_reference)org_khronos_nn_extension_activation_layer_20_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_20, "org_khronos_nn_extension_activation_layer_20", 2, (vx_reference)org_khronos_nn_extension_activation_layer_20_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_20, "org_khronos_nn_extension_activation_layer_20", 3, (vx_reference)org_khronos_nn_extension_activation_layer_20_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_20, "org_khronos_nn_extension_activation_layer_20", 4, (vx_reference)org_khronos_nn_extension_activation_layer_20_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_18, "org_khronos_nn_extension_activation_layer_18", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_18_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_18, "org_khronos_nn_extension_activation_layer_18", 1, (vx_reference)org_khronos_nn_extension_activation_layer_18_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_18, "org_khronos_nn_extension_activation_layer_18", 2, (vx_reference)org_khronos_nn_extension_activation_layer_18_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_18, "org_khronos_nn_extension_activation_layer_18", 3, (vx_reference)org_khronos_nn_extension_activation_layer_18_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_18, "org_khronos_nn_extension_activation_layer_18", 4, (vx_reference)org_khronos_nn_extension_activation_layer_18_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_16, "org_khronos_nn_extension_activation_layer_16", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_16_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_16, "org_khronos_nn_extension_activation_layer_16", 1, (vx_reference)org_khronos_nn_extension_activation_layer_16_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_16, "org_khronos_nn_extension_activation_layer_16", 2, (vx_reference)org_khronos_nn_extension_activation_layer_16_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_16, "org_khronos_nn_extension_activation_layer_16", 3, (vx_reference)org_khronos_nn_extension_activation_layer_16_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_16, "org_khronos_nn_extension_activation_layer_16", 4, (vx_reference)org_khronos_nn_extension_activation_layer_16_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_15, "org_khronos_nn_extension_convolution_layer_15", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_5_p7);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -11855,67 +11855,67 @@
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_17, "org_khronos_nn_extension_convolution_layer_17", 8, (vx_reference)org_khronos_nn_extension_convolution_layer_17_p8);
 //    if(status != VX_SUCCESS)
 //        return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_15, "org_khronos_nn_extension_activation_layer_15", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_15_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_15, "org_khronos_nn_extension_activation_layer_15", 1, (vx_reference)org_khronos_nn_extension_activation_layer_15_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_15, "org_khronos_nn_extension_activation_layer_15", 2, (vx_reference)org_khronos_nn_extension_activation_layer_15_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_15, "org_khronos_nn_extension_activation_layer_15", 3, (vx_reference)org_khronos_nn_extension_activation_layer_15_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_15, "org_khronos_nn_extension_activation_layer_15", 4, (vx_reference)org_khronos_nn_extension_activation_layer_15_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_19, "org_khronos_nn_extension_activation_layer_19", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_19_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_19, "org_khronos_nn_extension_activation_layer_19", 1, (vx_reference)org_khronos_nn_extension_activation_layer_19_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_19, "org_khronos_nn_extension_activation_layer_19", 2, (vx_reference)org_khronos_nn_extension_activation_layer_19_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_19, "org_khronos_nn_extension_activation_layer_19", 3, (vx_reference)org_khronos_nn_extension_activation_layer_19_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_19, "org_khronos_nn_extension_activation_layer_19", 4, (vx_reference)org_khronos_nn_extension_activation_layer_19_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_17, "org_khronos_nn_extension_activation_layer_17", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_17_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_17, "org_khronos_nn_extension_activation_layer_17", 1, (vx_reference)org_khronos_nn_extension_activation_layer_17_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_17, "org_khronos_nn_extension_activation_layer_17", 2, (vx_reference)org_khronos_nn_extension_activation_layer_17_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_17, "org_khronos_nn_extension_activation_layer_17", 3, (vx_reference)org_khronos_nn_extension_activation_layer_17_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_17, "org_khronos_nn_extension_activation_layer_17", 4, (vx_reference)org_khronos_nn_extension_activation_layer_17_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_26, "org_khronos_nn_extension_convolution_layer_26", 0, (vx_reference)outputAllocators_MergeTensor_2_p0);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -12023,99 +12023,99 @@
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_22, "org_khronos_nn_extension_convolution_layer_22", 8, (vx_reference)org_khronos_nn_extension_convolution_layer_22_p8);
 //    if(status != VX_SUCCESS)
 //        return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_6, "org_khronos_nn_extension_pooling_layer_6", 0, (vx_reference)outputAllocators_MergeTensor_2_p0);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_6, "org_khronos_nn_extension_pooling_layer_6", 1, (vx_reference)org_khronos_nn_extension_pooling_layer_6_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_6, "org_khronos_nn_extension_pooling_layer_6", 2, (vx_reference)org_khronos_nn_extension_pooling_layer_6_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_6, "org_khronos_nn_extension_pooling_layer_6", 3, (vx_reference)org_khronos_nn_extension_pooling_layer_6_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_6, "org_khronos_nn_extension_pooling_layer_6", 4, (vx_reference)org_khronos_nn_extension_pooling_layer_6_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_6, "org_khronos_nn_extension_pooling_layer_6", 5, (vx_reference)org_khronos_nn_extension_pooling_layer_6_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_6, "org_khronos_nn_extension_pooling_layer_6", 6, (vx_reference)org_khronos_nn_extension_pooling_layer_6_p6);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_6, "org_khronos_nn_extension_pooling_layer_6", 7, (vx_reference)org_khronos_nn_extension_pooling_layer_6_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_26, "org_khronos_nn_extension_activation_layer_26", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_26_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_26, "org_khronos_nn_extension_activation_layer_26", 1, (vx_reference)org_khronos_nn_extension_activation_layer_26_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_26, "org_khronos_nn_extension_activation_layer_26", 2, (vx_reference)org_khronos_nn_extension_activation_layer_26_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_26, "org_khronos_nn_extension_activation_layer_26", 3, (vx_reference)org_khronos_nn_extension_activation_layer_26_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_26, "org_khronos_nn_extension_activation_layer_26", 4, (vx_reference)org_khronos_nn_extension_activation_layer_26_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_24, "org_khronos_nn_extension_activation_layer_24", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_24_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_24, "org_khronos_nn_extension_activation_layer_24", 1, (vx_reference)org_khronos_nn_extension_activation_layer_24_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_24, "org_khronos_nn_extension_activation_layer_24", 2, (vx_reference)org_khronos_nn_extension_activation_layer_24_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_24, "org_khronos_nn_extension_activation_layer_24", 3, (vx_reference)org_khronos_nn_extension_activation_layer_24_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_24, "org_khronos_nn_extension_activation_layer_24", 4, (vx_reference)org_khronos_nn_extension_activation_layer_24_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_22, "org_khronos_nn_extension_activation_layer_22", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_22_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_22, "org_khronos_nn_extension_activation_layer_22", 1, (vx_reference)org_khronos_nn_extension_activation_layer_22_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_22, "org_khronos_nn_extension_activation_layer_22", 2, (vx_reference)org_khronos_nn_extension_activation_layer_22_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_22, "org_khronos_nn_extension_activation_layer_22", 3, (vx_reference)org_khronos_nn_extension_activation_layer_22_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_22, "org_khronos_nn_extension_activation_layer_22", 4, (vx_reference)org_khronos_nn_extension_activation_layer_22_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_21, "org_khronos_nn_extension_convolution_layer_21", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_6_p7);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -12223,67 +12223,67 @@
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_23, "org_khronos_nn_extension_convolution_layer_23", 8, (vx_reference)org_khronos_nn_extension_convolution_layer_23_p8);
 //    if(status != VX_SUCCESS)
 //        return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_21, "org_khronos_nn_extension_activation_layer_21", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_21_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_21, "org_khronos_nn_extension_activation_layer_21", 1, (vx_reference)org_khronos_nn_extension_activation_layer_21_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_21, "org_khronos_nn_extension_activation_layer_21", 2, (vx_reference)org_khronos_nn_extension_activation_layer_21_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_21, "org_khronos_nn_extension_activation_layer_21", 3, (vx_reference)org_khronos_nn_extension_activation_layer_21_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_21, "org_khronos_nn_extension_activation_layer_21", 4, (vx_reference)org_khronos_nn_extension_activation_layer_21_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_25, "org_khronos_nn_extension_activation_layer_25", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_25_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_25, "org_khronos_nn_extension_activation_layer_25", 1, (vx_reference)org_khronos_nn_extension_activation_layer_25_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_25, "org_khronos_nn_extension_activation_layer_25", 2, (vx_reference)org_khronos_nn_extension_activation_layer_25_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_25, "org_khronos_nn_extension_activation_layer_25", 3, (vx_reference)org_khronos_nn_extension_activation_layer_25_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_25, "org_khronos_nn_extension_activation_layer_25", 4, (vx_reference)org_khronos_nn_extension_activation_layer_25_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_23, "org_khronos_nn_extension_activation_layer_23", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_23_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_23, "org_khronos_nn_extension_activation_layer_23", 1, (vx_reference)org_khronos_nn_extension_activation_layer_23_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_23, "org_khronos_nn_extension_activation_layer_23", 2, (vx_reference)org_khronos_nn_extension_activation_layer_23_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_23, "org_khronos_nn_extension_activation_layer_23", 3, (vx_reference)org_khronos_nn_extension_activation_layer_23_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_23, "org_khronos_nn_extension_activation_layer_23", 4, (vx_reference)org_khronos_nn_extension_activation_layer_23_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_32, "org_khronos_nn_extension_convolution_layer_32", 0, (vx_reference)outputAllocators_MergeTensor_3_p0);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -12391,99 +12391,99 @@
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_28, "org_khronos_nn_extension_convolution_layer_28", 8, (vx_reference)org_khronos_nn_extension_convolution_layer_28_p8);
 //    if(status != VX_SUCCESS)
 //        return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_7, "org_khronos_nn_extension_pooling_layer_7", 0, (vx_reference)outputAllocators_MergeTensor_3_p0);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_7, "org_khronos_nn_extension_pooling_layer_7", 1, (vx_reference)org_khronos_nn_extension_pooling_layer_7_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_7, "org_khronos_nn_extension_pooling_layer_7", 2, (vx_reference)org_khronos_nn_extension_pooling_layer_7_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_7, "org_khronos_nn_extension_pooling_layer_7", 3, (vx_reference)org_khronos_nn_extension_pooling_layer_7_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_7, "org_khronos_nn_extension_pooling_layer_7", 4, (vx_reference)org_khronos_nn_extension_pooling_layer_7_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_7, "org_khronos_nn_extension_pooling_layer_7", 5, (vx_reference)org_khronos_nn_extension_pooling_layer_7_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_7, "org_khronos_nn_extension_pooling_layer_7", 6, (vx_reference)org_khronos_nn_extension_pooling_layer_7_p6);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_7, "org_khronos_nn_extension_pooling_layer_7", 7, (vx_reference)org_khronos_nn_extension_pooling_layer_7_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_32, "org_khronos_nn_extension_activation_layer_32", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_32_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_32, "org_khronos_nn_extension_activation_layer_32", 1, (vx_reference)org_khronos_nn_extension_activation_layer_32_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_32, "org_khronos_nn_extension_activation_layer_32", 2, (vx_reference)org_khronos_nn_extension_activation_layer_32_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_32, "org_khronos_nn_extension_activation_layer_32", 3, (vx_reference)org_khronos_nn_extension_activation_layer_32_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_32, "org_khronos_nn_extension_activation_layer_32", 4, (vx_reference)org_khronos_nn_extension_activation_layer_32_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_30, "org_khronos_nn_extension_activation_layer_30", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_30_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_30, "org_khronos_nn_extension_activation_layer_30", 1, (vx_reference)org_khronos_nn_extension_activation_layer_30_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_30, "org_khronos_nn_extension_activation_layer_30", 2, (vx_reference)org_khronos_nn_extension_activation_layer_30_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_30, "org_khronos_nn_extension_activation_layer_30", 3, (vx_reference)org_khronos_nn_extension_activation_layer_30_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_30, "org_khronos_nn_extension_activation_layer_30", 4, (vx_reference)org_khronos_nn_extension_activation_layer_30_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_28, "org_khronos_nn_extension_activation_layer_28", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_28_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_28, "org_khronos_nn_extension_activation_layer_28", 1, (vx_reference)org_khronos_nn_extension_activation_layer_28_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_28, "org_khronos_nn_extension_activation_layer_28", 2, (vx_reference)org_khronos_nn_extension_activation_layer_28_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_28, "org_khronos_nn_extension_activation_layer_28", 3, (vx_reference)org_khronos_nn_extension_activation_layer_28_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_28, "org_khronos_nn_extension_activation_layer_28", 4, (vx_reference)org_khronos_nn_extension_activation_layer_28_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_27, "org_khronos_nn_extension_convolution_layer_27", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_7_p7);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -12591,67 +12591,67 @@
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_29, "org_khronos_nn_extension_convolution_layer_29", 8, (vx_reference)org_khronos_nn_extension_convolution_layer_29_p8);
 //    if(status != VX_SUCCESS)
 //        return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_27, "org_khronos_nn_extension_activation_layer_27", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_27_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_27, "org_khronos_nn_extension_activation_layer_27", 1, (vx_reference)org_khronos_nn_extension_activation_layer_27_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_27, "org_khronos_nn_extension_activation_layer_27", 2, (vx_reference)org_khronos_nn_extension_activation_layer_27_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_27, "org_khronos_nn_extension_activation_layer_27", 3, (vx_reference)org_khronos_nn_extension_activation_layer_27_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_27, "org_khronos_nn_extension_activation_layer_27", 4, (vx_reference)org_khronos_nn_extension_activation_layer_27_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_31, "org_khronos_nn_extension_activation_layer_31", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_31_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_31, "org_khronos_nn_extension_activation_layer_31", 1, (vx_reference)org_khronos_nn_extension_activation_layer_31_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_31, "org_khronos_nn_extension_activation_layer_31", 2, (vx_reference)org_khronos_nn_extension_activation_layer_31_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_31, "org_khronos_nn_extension_activation_layer_31", 3, (vx_reference)org_khronos_nn_extension_activation_layer_31_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_31, "org_khronos_nn_extension_activation_layer_31", 4, (vx_reference)org_khronos_nn_extension_activation_layer_31_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_29, "org_khronos_nn_extension_activation_layer_29", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_29_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_29, "org_khronos_nn_extension_activation_layer_29", 1, (vx_reference)org_khronos_nn_extension_activation_layer_29_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_29, "org_khronos_nn_extension_activation_layer_29", 2, (vx_reference)org_khronos_nn_extension_activation_layer_29_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_29, "org_khronos_nn_extension_activation_layer_29", 3, (vx_reference)org_khronos_nn_extension_activation_layer_29_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_29, "org_khronos_nn_extension_activation_layer_29", 4, (vx_reference)org_khronos_nn_extension_activation_layer_29_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_38, "org_khronos_nn_extension_convolution_layer_38", 0, (vx_reference)outputAllocators_MergeTensor_4_p0);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -12759,99 +12759,99 @@
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_34, "org_khronos_nn_extension_convolution_layer_34", 8, (vx_reference)org_khronos_nn_extension_convolution_layer_34_p8);
 //    if(status != VX_SUCCESS)
 //        return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_8, "org_khronos_nn_extension_pooling_layer_8", 0, (vx_reference)outputAllocators_MergeTensor_4_p0);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_8, "org_khronos_nn_extension_pooling_layer_8", 1, (vx_reference)org_khronos_nn_extension_pooling_layer_8_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_8, "org_khronos_nn_extension_pooling_layer_8", 2, (vx_reference)org_khronos_nn_extension_pooling_layer_8_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_8, "org_khronos_nn_extension_pooling_layer_8", 3, (vx_reference)org_khronos_nn_extension_pooling_layer_8_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_8, "org_khronos_nn_extension_pooling_layer_8", 4, (vx_reference)org_khronos_nn_extension_pooling_layer_8_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_8, "org_khronos_nn_extension_pooling_layer_8", 5, (vx_reference)org_khronos_nn_extension_pooling_layer_8_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_8, "org_khronos_nn_extension_pooling_layer_8", 6, (vx_reference)org_khronos_nn_extension_pooling_layer_8_p6);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_8, "org_khronos_nn_extension_pooling_layer_8", 7, (vx_reference)org_khronos_nn_extension_pooling_layer_8_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_38, "org_khronos_nn_extension_activation_layer_38", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_38_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_38, "org_khronos_nn_extension_activation_layer_38", 1, (vx_reference)org_khronos_nn_extension_activation_layer_38_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_38, "org_khronos_nn_extension_activation_layer_38", 2, (vx_reference)org_khronos_nn_extension_activation_layer_38_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_38, "org_khronos_nn_extension_activation_layer_38", 3, (vx_reference)org_khronos_nn_extension_activation_layer_38_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_38, "org_khronos_nn_extension_activation_layer_38", 4, (vx_reference)org_khronos_nn_extension_activation_layer_38_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_36, "org_khronos_nn_extension_activation_layer_36", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_36_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_36, "org_khronos_nn_extension_activation_layer_36", 1, (vx_reference)org_khronos_nn_extension_activation_layer_36_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_36, "org_khronos_nn_extension_activation_layer_36", 2, (vx_reference)org_khronos_nn_extension_activation_layer_36_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_36, "org_khronos_nn_extension_activation_layer_36", 3, (vx_reference)org_khronos_nn_extension_activation_layer_36_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_36, "org_khronos_nn_extension_activation_layer_36", 4, (vx_reference)org_khronos_nn_extension_activation_layer_36_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_34, "org_khronos_nn_extension_activation_layer_34", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_34_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_34, "org_khronos_nn_extension_activation_layer_34", 1, (vx_reference)org_khronos_nn_extension_activation_layer_34_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_34, "org_khronos_nn_extension_activation_layer_34", 2, (vx_reference)org_khronos_nn_extension_activation_layer_34_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_34, "org_khronos_nn_extension_activation_layer_34", 3, (vx_reference)org_khronos_nn_extension_activation_layer_34_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_34, "org_khronos_nn_extension_activation_layer_34", 4, (vx_reference)org_khronos_nn_extension_activation_layer_34_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_33, "org_khronos_nn_extension_convolution_layer_33", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_8_p7);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -12963,63 +12963,63 @@
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_33, "org_khronos_nn_extension_activation_layer_33", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_33_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_33, "org_khronos_nn_extension_activation_layer_33", 1, (vx_reference)org_khronos_nn_extension_activation_layer_33_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_33, "org_khronos_nn_extension_activation_layer_33", 2, (vx_reference)org_khronos_nn_extension_activation_layer_33_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_33, "org_khronos_nn_extension_activation_layer_33", 3, (vx_reference)org_khronos_nn_extension_activation_layer_33_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_33, "org_khronos_nn_extension_activation_layer_33", 4, (vx_reference)org_khronos_nn_extension_activation_layer_33_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_37, "org_khronos_nn_extension_activation_layer_37", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_37_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_37, "org_khronos_nn_extension_activation_layer_37", 1, (vx_reference)org_khronos_nn_extension_activation_layer_37_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_37, "org_khronos_nn_extension_activation_layer_37", 2, (vx_reference)org_khronos_nn_extension_activation_layer_37_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_37, "org_khronos_nn_extension_activation_layer_37", 3, (vx_reference)org_khronos_nn_extension_activation_layer_37_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_37, "org_khronos_nn_extension_activation_layer_37", 4, (vx_reference)org_khronos_nn_extension_activation_layer_37_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_35, "org_khronos_nn_extension_activation_layer_35", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_35_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_35, "org_khronos_nn_extension_activation_layer_35", 1, (vx_reference)org_khronos_nn_extension_activation_layer_35_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_35, "org_khronos_nn_extension_activation_layer_35", 2, (vx_reference)org_khronos_nn_extension_activation_layer_35_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_35, "org_khronos_nn_extension_activation_layer_35", 3, (vx_reference)org_khronos_nn_extension_activation_layer_35_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_35, "org_khronos_nn_extension_activation_layer_35", 4, (vx_reference)org_khronos_nn_extension_activation_layer_35_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_44, "org_khronos_nn_extension_convolution_layer_44", 0, (vx_reference)outputAllocators_MergeTensor_5_p0);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -13131,95 +13131,95 @@
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_9, "org_khronos_nn_extension_pooling_layer_9", 0, (vx_reference)outputAllocators_MergeTensor_5_p0);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_9, "org_khronos_nn_extension_pooling_layer_9", 1, (vx_reference)org_khronos_nn_extension_pooling_layer_9_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_9, "org_khronos_nn_extension_pooling_layer_9", 2, (vx_reference)org_khronos_nn_extension_pooling_layer_9_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_9, "org_khronos_nn_extension_pooling_layer_9", 3, (vx_reference)org_khronos_nn_extension_pooling_layer_9_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_9, "org_khronos_nn_extension_pooling_layer_9", 4, (vx_reference)org_khronos_nn_extension_pooling_layer_9_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_9, "org_khronos_nn_extension_pooling_layer_9", 5, (vx_reference)org_khronos_nn_extension_pooling_layer_9_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_9, "org_khronos_nn_extension_pooling_layer_9", 6, (vx_reference)org_khronos_nn_extension_pooling_layer_9_p6);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_9, "org_khronos_nn_extension_pooling_layer_9", 7, (vx_reference)org_khronos_nn_extension_pooling_layer_9_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_44, "org_khronos_nn_extension_activation_layer_44", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_44_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_44, "org_khronos_nn_extension_activation_layer_44", 1, (vx_reference)org_khronos_nn_extension_activation_layer_44_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_44, "org_khronos_nn_extension_activation_layer_44", 2, (vx_reference)org_khronos_nn_extension_activation_layer_44_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_44, "org_khronos_nn_extension_activation_layer_44", 3, (vx_reference)org_khronos_nn_extension_activation_layer_44_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_44, "org_khronos_nn_extension_activation_layer_44", 4, (vx_reference)org_khronos_nn_extension_activation_layer_44_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_42, "org_khronos_nn_extension_activation_layer_42", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_42_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_42, "org_khronos_nn_extension_activation_layer_42", 1, (vx_reference)org_khronos_nn_extension_activation_layer_42_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_42, "org_khronos_nn_extension_activation_layer_42", 2, (vx_reference)org_khronos_nn_extension_activation_layer_42_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_42, "org_khronos_nn_extension_activation_layer_42", 3, (vx_reference)org_khronos_nn_extension_activation_layer_42_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_42, "org_khronos_nn_extension_activation_layer_42", 4, (vx_reference)org_khronos_nn_extension_activation_layer_42_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_40, "org_khronos_nn_extension_activation_layer_40", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_40_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_40, "org_khronos_nn_extension_activation_layer_40", 1, (vx_reference)org_khronos_nn_extension_activation_layer_40_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_40, "org_khronos_nn_extension_activation_layer_40", 2, (vx_reference)org_khronos_nn_extension_activation_layer_40_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_40, "org_khronos_nn_extension_activation_layer_40", 3, (vx_reference)org_khronos_nn_extension_activation_layer_40_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_40, "org_khronos_nn_extension_activation_layer_40", 4, (vx_reference)org_khronos_nn_extension_activation_layer_40_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_39, "org_khronos_nn_extension_convolution_layer_39", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_9_p7);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -13331,95 +13331,95 @@
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_39, "org_khronos_nn_extension_activation_layer_39", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_39_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_39, "org_khronos_nn_extension_activation_layer_39", 1, (vx_reference)org_khronos_nn_extension_activation_layer_39_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_39, "org_khronos_nn_extension_activation_layer_39", 2, (vx_reference)org_khronos_nn_extension_activation_layer_39_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_39, "org_khronos_nn_extension_activation_layer_39", 3, (vx_reference)org_khronos_nn_extension_activation_layer_39_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_39, "org_khronos_nn_extension_activation_layer_39", 4, (vx_reference)org_khronos_nn_extension_activation_layer_39_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_43, "org_khronos_nn_extension_activation_layer_43", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_43_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_43, "org_khronos_nn_extension_activation_layer_43", 1, (vx_reference)org_khronos_nn_extension_activation_layer_43_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_43, "org_khronos_nn_extension_activation_layer_43", 2, (vx_reference)org_khronos_nn_extension_activation_layer_43_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_43, "org_khronos_nn_extension_activation_layer_43", 3, (vx_reference)org_khronos_nn_extension_activation_layer_43_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_43, "org_khronos_nn_extension_activation_layer_43", 4, (vx_reference)org_khronos_nn_extension_activation_layer_43_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_41, "org_khronos_nn_extension_activation_layer_41", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_41_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_41, "org_khronos_nn_extension_activation_layer_41", 1, (vx_reference)org_khronos_nn_extension_activation_layer_41_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_41, "org_khronos_nn_extension_activation_layer_41", 2, (vx_reference)org_khronos_nn_extension_activation_layer_41_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_41, "org_khronos_nn_extension_activation_layer_41", 3, (vx_reference)org_khronos_nn_extension_activation_layer_41_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_41, "org_khronos_nn_extension_activation_layer_41", 4, (vx_reference)org_khronos_nn_extension_activation_layer_41_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_10, "org_khronos_nn_extension_pooling_layer_10", 0, (vx_reference)outputAllocators_MergeTensor_6_p0);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_10, "org_khronos_nn_extension_pooling_layer_10", 1, (vx_reference)org_khronos_nn_extension_pooling_layer_10_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_10, "org_khronos_nn_extension_pooling_layer_10", 2, (vx_reference)org_khronos_nn_extension_pooling_layer_10_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_10, "org_khronos_nn_extension_pooling_layer_10", 3, (vx_reference)org_khronos_nn_extension_pooling_layer_10_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_10, "org_khronos_nn_extension_pooling_layer_10", 4, (vx_reference)org_khronos_nn_extension_pooling_layer_10_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_10, "org_khronos_nn_extension_pooling_layer_10", 5, (vx_reference)org_khronos_nn_extension_pooling_layer_10_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_10, "org_khronos_nn_extension_pooling_layer_10", 6, (vx_reference)org_khronos_nn_extension_pooling_layer_10_p6);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_10, "org_khronos_nn_extension_pooling_layer_10", 7, (vx_reference)org_khronos_nn_extension_pooling_layer_10_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_50, "org_khronos_nn_extension_convolution_layer_50", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_10_p7);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -13531,95 +13531,95 @@
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_11, "org_khronos_nn_extension_pooling_layer_11", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_10_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_11, "org_khronos_nn_extension_pooling_layer_11", 1, (vx_reference)org_khronos_nn_extension_pooling_layer_11_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_11, "org_khronos_nn_extension_pooling_layer_11", 2, (vx_reference)org_khronos_nn_extension_pooling_layer_11_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_11, "org_khronos_nn_extension_pooling_layer_11", 3, (vx_reference)org_khronos_nn_extension_pooling_layer_11_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_11, "org_khronos_nn_extension_pooling_layer_11", 4, (vx_reference)org_khronos_nn_extension_pooling_layer_11_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_11, "org_khronos_nn_extension_pooling_layer_11", 5, (vx_reference)org_khronos_nn_extension_pooling_layer_11_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_11, "org_khronos_nn_extension_pooling_layer_11", 6, (vx_reference)org_khronos_nn_extension_pooling_layer_11_p6);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_11, "org_khronos_nn_extension_pooling_layer_11", 7, (vx_reference)org_khronos_nn_extension_pooling_layer_11_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_50, "org_khronos_nn_extension_activation_layer_50", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_50_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_50, "org_khronos_nn_extension_activation_layer_50", 1, (vx_reference)org_khronos_nn_extension_activation_layer_50_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_50, "org_khronos_nn_extension_activation_layer_50", 2, (vx_reference)org_khronos_nn_extension_activation_layer_50_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_50, "org_khronos_nn_extension_activation_layer_50", 3, (vx_reference)org_khronos_nn_extension_activation_layer_50_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_50, "org_khronos_nn_extension_activation_layer_50", 4, (vx_reference)org_khronos_nn_extension_activation_layer_50_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_48, "org_khronos_nn_extension_activation_layer_48", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_48_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_48, "org_khronos_nn_extension_activation_layer_48", 1, (vx_reference)org_khronos_nn_extension_activation_layer_48_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_48, "org_khronos_nn_extension_activation_layer_48", 2, (vx_reference)org_khronos_nn_extension_activation_layer_48_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_48, "org_khronos_nn_extension_activation_layer_48", 3, (vx_reference)org_khronos_nn_extension_activation_layer_48_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_48, "org_khronos_nn_extension_activation_layer_48", 4, (vx_reference)org_khronos_nn_extension_activation_layer_48_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_46, "org_khronos_nn_extension_activation_layer_46", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_46_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_46, "org_khronos_nn_extension_activation_layer_46", 1, (vx_reference)org_khronos_nn_extension_activation_layer_46_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_46, "org_khronos_nn_extension_activation_layer_46", 2, (vx_reference)org_khronos_nn_extension_activation_layer_46_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_46, "org_khronos_nn_extension_activation_layer_46", 3, (vx_reference)org_khronos_nn_extension_activation_layer_46_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_46, "org_khronos_nn_extension_activation_layer_46", 4, (vx_reference)org_khronos_nn_extension_activation_layer_46_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_45, "org_khronos_nn_extension_convolution_layer_45", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_11_p7);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -13727,67 +13727,67 @@
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_47, "org_khronos_nn_extension_convolution_layer_47", 8, (vx_reference)org_khronos_nn_extension_convolution_layer_47_p8);
 //    if(status != VX_SUCCESS)
 //        return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_45, "org_khronos_nn_extension_activation_layer_45", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_45_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_45, "org_khronos_nn_extension_activation_layer_45", 1, (vx_reference)org_khronos_nn_extension_activation_layer_45_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_45, "org_khronos_nn_extension_activation_layer_45", 2, (vx_reference)org_khronos_nn_extension_activation_layer_45_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_45, "org_khronos_nn_extension_activation_layer_45", 3, (vx_reference)org_khronos_nn_extension_activation_layer_45_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_45, "org_khronos_nn_extension_activation_layer_45", 4, (vx_reference)org_khronos_nn_extension_activation_layer_45_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_49, "org_khronos_nn_extension_activation_layer_49", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_49_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_49, "org_khronos_nn_extension_activation_layer_49", 1, (vx_reference)org_khronos_nn_extension_activation_layer_49_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_49, "org_khronos_nn_extension_activation_layer_49", 2, (vx_reference)org_khronos_nn_extension_activation_layer_49_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_49, "org_khronos_nn_extension_activation_layer_49", 3, (vx_reference)org_khronos_nn_extension_activation_layer_49_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_49, "org_khronos_nn_extension_activation_layer_49", 4, (vx_reference)org_khronos_nn_extension_activation_layer_49_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_47, "org_khronos_nn_extension_activation_layer_47", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_47_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_47, "org_khronos_nn_extension_activation_layer_47", 1, (vx_reference)org_khronos_nn_extension_activation_layer_47_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_47, "org_khronos_nn_extension_activation_layer_47", 2, (vx_reference)org_khronos_nn_extension_activation_layer_47_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_47, "org_khronos_nn_extension_activation_layer_47", 3, (vx_reference)org_khronos_nn_extension_activation_layer_47_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_47, "org_khronos_nn_extension_activation_layer_47", 4, (vx_reference)org_khronos_nn_extension_activation_layer_47_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_56, "org_khronos_nn_extension_convolution_layer_56", 0, (vx_reference)outputAllocators_MergeTensor_7_p0);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -13895,99 +13895,99 @@
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_52, "org_khronos_nn_extension_convolution_layer_52", 8, (vx_reference)org_khronos_nn_extension_convolution_layer_52_p8);
 //    if(status != VX_SUCCESS)
 //        return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_12, "org_khronos_nn_extension_pooling_layer_12", 0, (vx_reference)outputAllocators_MergeTensor_7_p0);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_12, "org_khronos_nn_extension_pooling_layer_12", 1, (vx_reference)org_khronos_nn_extension_pooling_layer_12_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_12, "org_khronos_nn_extension_pooling_layer_12", 2, (vx_reference)org_khronos_nn_extension_pooling_layer_12_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_12, "org_khronos_nn_extension_pooling_layer_12", 3, (vx_reference)org_khronos_nn_extension_pooling_layer_12_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_12, "org_khronos_nn_extension_pooling_layer_12", 4, (vx_reference)org_khronos_nn_extension_pooling_layer_12_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_12, "org_khronos_nn_extension_pooling_layer_12", 5, (vx_reference)org_khronos_nn_extension_pooling_layer_12_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_12, "org_khronos_nn_extension_pooling_layer_12", 6, (vx_reference)org_khronos_nn_extension_pooling_layer_12_p6);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_12, "org_khronos_nn_extension_pooling_layer_12", 7, (vx_reference)org_khronos_nn_extension_pooling_layer_12_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_56, "org_khronos_nn_extension_activation_layer_56", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_56_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_56, "org_khronos_nn_extension_activation_layer_56", 1, (vx_reference)org_khronos_nn_extension_activation_layer_56_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_56, "org_khronos_nn_extension_activation_layer_56", 2, (vx_reference)org_khronos_nn_extension_activation_layer_56_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_56, "org_khronos_nn_extension_activation_layer_56", 3, (vx_reference)org_khronos_nn_extension_activation_layer_56_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_56, "org_khronos_nn_extension_activation_layer_56", 4, (vx_reference)org_khronos_nn_extension_activation_layer_56_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_54, "org_khronos_nn_extension_activation_layer_54", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_54_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_54, "org_khronos_nn_extension_activation_layer_54", 1, (vx_reference)org_khronos_nn_extension_activation_layer_54_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_54, "org_khronos_nn_extension_activation_layer_54", 2, (vx_reference)org_khronos_nn_extension_activation_layer_54_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_54, "org_khronos_nn_extension_activation_layer_54", 3, (vx_reference)org_khronos_nn_extension_activation_layer_54_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_54, "org_khronos_nn_extension_activation_layer_54", 4, (vx_reference)org_khronos_nn_extension_activation_layer_54_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_52, "org_khronos_nn_extension_activation_layer_52", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_52_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_52, "org_khronos_nn_extension_activation_layer_52", 1, (vx_reference)org_khronos_nn_extension_activation_layer_52_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_52, "org_khronos_nn_extension_activation_layer_52", 2, (vx_reference)org_khronos_nn_extension_activation_layer_52_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_52, "org_khronos_nn_extension_activation_layer_52", 3, (vx_reference)org_khronos_nn_extension_activation_layer_52_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_52, "org_khronos_nn_extension_activation_layer_52", 4, (vx_reference)org_khronos_nn_extension_activation_layer_52_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
 //    status = AssignNodeParameter(org_khronos_nn_extension_convolution_layer_51, "org_khronos_nn_extension_convolution_layer_51", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_12_p7);
 //    if(status != VX_SUCCESS)
 //        return status;
@@ -14099,153 +14099,154 @@
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_51, "org_khronos_nn_extension_activation_layer_51", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_51_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_51, "org_khronos_nn_extension_activation_layer_51", 1, (vx_reference)org_khronos_nn_extension_activation_layer_51_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_51, "org_khronos_nn_extension_activation_layer_51", 2, (vx_reference)org_khronos_nn_extension_activation_layer_51_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_51, "org_khronos_nn_extension_activation_layer_51", 3, (vx_reference)org_khronos_nn_extension_activation_layer_51_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_51, "org_khronos_nn_extension_activation_layer_51", 4, (vx_reference)org_khronos_nn_extension_activation_layer_51_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_55, "org_khronos_nn_extension_activation_layer_55", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_55_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_55, "org_khronos_nn_extension_activation_layer_55", 1, (vx_reference)org_khronos_nn_extension_activation_layer_55_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_55, "org_khronos_nn_extension_activation_layer_55", 2, (vx_reference)org_khronos_nn_extension_activation_layer_55_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_55, "org_khronos_nn_extension_activation_layer_55", 3, (vx_reference)org_khronos_nn_extension_activation_layer_55_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_55, "org_khronos_nn_extension_activation_layer_55", 4, (vx_reference)org_khronos_nn_extension_activation_layer_55_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_53, "org_khronos_nn_extension_activation_layer_53", 0, (vx_reference)org_khronos_nn_extension_convolution_layer_53_p8);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_53, "org_khronos_nn_extension_activation_layer_53", 1, (vx_reference)org_khronos_nn_extension_activation_layer_53_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_53, "org_khronos_nn_extension_activation_layer_53", 2, (vx_reference)org_khronos_nn_extension_activation_layer_53_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_53, "org_khronos_nn_extension_activation_layer_53", 3, (vx_reference)org_khronos_nn_extension_activation_layer_53_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_activation_layer_53, "org_khronos_nn_extension_activation_layer_53", 4, (vx_reference)org_khronos_nn_extension_activation_layer_53_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_13, "org_khronos_nn_extension_pooling_layer_13", 0, (vx_reference)outputAllocators_MergeTensor_8_p0);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_13, "org_khronos_nn_extension_pooling_layer_13", 1, (vx_reference)org_khronos_nn_extension_pooling_layer_13_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_13, "org_khronos_nn_extension_pooling_layer_13", 2, (vx_reference)org_khronos_nn_extension_pooling_layer_13_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_13, "org_khronos_nn_extension_pooling_layer_13", 3, (vx_reference)org_khronos_nn_extension_pooling_layer_13_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_13, "org_khronos_nn_extension_pooling_layer_13", 4, (vx_reference)org_khronos_nn_extension_pooling_layer_13_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_13, "org_khronos_nn_extension_pooling_layer_13", 5, (vx_reference)org_khronos_nn_extension_pooling_layer_13_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_13, "org_khronos_nn_extension_pooling_layer_13", 6, (vx_reference)org_khronos_nn_extension_pooling_layer_13_p6);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_pooling_layer_13, "org_khronos_nn_extension_pooling_layer_13", 7, (vx_reference)org_khronos_nn_extension_pooling_layer_13_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_fully_connected_layer_0, "org_khronos_nn_extension_fully_connected_layer_0", 0, (vx_reference)org_khronos_nn_extension_pooling_layer_13_p7);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_fully_connected_layer_0, "org_khronos_nn_extension_fully_connected_layer_0", 1, (vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_fully_connected_layer_0, "org_khronos_nn_extension_fully_connected_layer_0", 2, (vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_fully_connected_layer_0, "org_khronos_nn_extension_fully_connected_layer_0", 3, (vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_fully_connected_layer_0, "org_khronos_nn_extension_fully_connected_layer_0", 4, (vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_fully_connected_layer_0, "org_khronos_nn_extension_fully_connected_layer_0", 5, (vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_openvx_tensor_multiply_0, "org_khronos_openvx_tensor_multiply_0", 0, (vx_reference)org_khronos_nn_extension_fully_connected_layer_0_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_openvx_tensor_multiply_0, "org_khronos_openvx_tensor_multiply_0", 1, (vx_reference)org_khronos_openvx_tensor_multiply_0_p1);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_openvx_tensor_multiply_0, "org_khronos_openvx_tensor_multiply_0", 2, (vx_reference)org_khronos_openvx_tensor_multiply_0_p2);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_openvx_tensor_multiply_0, "org_khronos_openvx_tensor_multiply_0", 3, (vx_reference)org_khronos_openvx_tensor_multiply_0_p3);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_openvx_tensor_multiply_0, "org_khronos_openvx_tensor_multiply_0", 4, (vx_reference)org_khronos_openvx_tensor_multiply_0_p4);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_openvx_tensor_multiply_0, "org_khronos_openvx_tensor_multiply_0", 5, (vx_reference)org_khronos_openvx_tensor_multiply_0_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_softmax_layer_0, "org_khronos_nn_extension_softmax_layer_0", 0, (vx_reference)org_khronos_openvx_tensor_multiply_0_p5);
     if(status != VX_SUCCESS)
         return status;
-        
+
     status = AssignNodeParameter(org_khronos_nn_extension_softmax_layer_0, "org_khronos_nn_extension_softmax_layer_0", 1, (vx_reference)org_khronos_nn_extension_softmax_layer_0_p1);
     if(status != VX_SUCCESS)
         return status;
-        
- 
+
+
 
     return status;
 }
 #endif
+#endif//OPENVX_CONFORMANCE_NEURAL_NETWORKS
diff --git a/test_conformance/Networks/src/graph_process.c b/test_conformance/Networks/src/graph_process.c
index e05580b..37d98fc 100644
--- a/test_conformance/Networks/src/graph_process.c
+++ b/test_conformance/Networks/src/graph_process.c
@@ -1,5 +1,5 @@
 /** @file graph_process.c
- *  @brief 
+ *  @brief
  *  This file contains the implementation of the graph inputs/outputs processing functions
  */
 
@@ -127,7 +127,7 @@
 
 vx_status preprocess(vx_tensor input, const char * fName)
 {
-    //1. Load image 
+    //1. Load image
     //2. Normalize the image pixels the same way you used in the training process (i.e, mean substraction, scaling, etc)
     //3. Scale each pixel with the scale factor ModelOptimizer reported
     //4. Convert each pixel to the required precision (Q78, FP16, etc)
@@ -160,12 +160,11 @@
 
 vx_status postprocess(vx_tensor output, /*OUT*/ int* detected_class)
 {
-    //1. Find top-N probabilities indices in the output tensor. 
-    //2. Probabilities must be converted back to floating point number in order to be interpreted as percentages 
+    //1. Find top-N probabilities indices in the output tensor.
+    //2. Probabilities must be converted back to floating point number in order to be interpreted as percentages
 
     //getProbabilitiesFromMDData(...)
     vx_int16 mem[1000] = {0};
-    float prob[1000] = {0};
 
     const vx_size view_start[2] = { 0, 0 };
     const vx_size view_end[2] = { 1000, 1 };
@@ -207,18 +206,18 @@
     vx_status status = vxQueryTensor(tensor, VX_TENSOR_NUMBER_OF_DIMS, &dims_num, sizeof(dims_num));
 
     vx_size *dims = (vx_size*)malloc(dims_num * sizeof(vx_size));
-    if (!dims) 
+    if (!dims)
     {
-        fclose(f); 
-        return VX_ERROR_NO_MEMORY; 
+        fclose(f);
+        return VX_ERROR_NO_MEMORY;
     }
 
     status = vxQueryTensor(tensor, VX_TENSOR_DIMS, dims, sizeof(vx_size) * dims_num);
-    if (status != VX_SUCCESS) 
+    if (status != VX_SUCCESS)
     {
-        fclose(f); 
+        fclose(f);
         free(dims);
-        return status; 
+        return status;
     }
 
     vx_size count = 1;
@@ -270,6 +269,6 @@
     }
 
     fclose(f);
-    
+
     return VX_SUCCESS;
 }
diff --git a/test_conformance/Networks/src/precisionConverter.c b/test_conformance/Networks/src/precisionConverter.c
index 702c2de..9dfeaff 100644
--- a/test_conformance/Networks/src/precisionConverter.c
+++ b/test_conformance/Networks/src/precisionConverter.c
@@ -11,7 +11,8 @@
 //small helper function to represent uint32_t value as float32
 float asfloat(uint32_t v)
 {
-    return *(float*)&v;
+    unsigned long value = (unsigned long)(void *)&v;
+    return *(float*)value;
 }
 
 
@@ -85,7 +86,7 @@
     return v.u | s;
 }
 
-/** @brief Converts FP16 to FP32 
+/** @brief Converts FP16 to FP32
 *  @param x - value in FP16 format
 *  @return value in FP32 format
 **************************************************************/
@@ -135,7 +136,7 @@
     return asfloat(u);
 }
 
-/** @brief Converts S16 (signed int16) to a float 
+/** @brief Converts S16 (signed int16) to a float
 *  @param s16Pixel - A pointer to a value in S16 format.
 *  @return float value
 ***************************************************************/
@@ -145,7 +146,7 @@
     return (float)value;
 }
 
-/** @brief Converts Q78 to a float 
+/** @brief Converts Q78 to a float
 *  @param q78Pixel - A pointer to a value in Q78 format.
 *  @return float value
 **************************************************************/
@@ -155,7 +156,7 @@
     return ((float)value) / 256.0;
 }
 
-/** @brief Converts FP16 to a float 
+/** @brief Converts FP16 to a float
 *  @param fp16Pixel - A pointer to a value in FP16 format.
 *  @return float value
 **************************************************************/
diff --git a/test_conformance/Networks/src/utilities.c b/test_conformance/Networks/src/utilities.c
index 7341513..65f0113 100644
--- a/test_conformance/Networks/src/utilities.c
+++ b/test_conformance/Networks/src/utilities.c
@@ -18,6 +18,7 @@
 #endif
     if (df == VX_TYPE_FLOAT32)
 		return FP32ToFloat;
+    return NULL;
 }
 
 //Local function that returns a pointer to a function that converts float to the image format
@@ -31,6 +32,7 @@
 #endif
     if (df == VX_TYPE_FLOAT32)
         return floatToFP32;
+    return NULL;
 }
 
 /** @brief Loads image from a file and converts it to float.
@@ -50,7 +52,7 @@
         WriteLog("Failed to load image from file %s", fileName);
         return NULL;
     }
-    
+
     size_t imageSize = (*width) * (*height) * (*channels);
     float* imageF = (float*) malloc((*width) * (*height) * (*channels) * sizeof(float));
     for (size_t i = 0; i < imageSize; ++i)
@@ -157,7 +159,7 @@
 	    WriteLog("Trying to load image with %d channels. Currently only images with 1 or 3 channels are supported.\n", channels);
 		return VX_FAILURE;
 	}
-	
+
 	vx_size dims_num = 0;
 	vx_size dimensionsArray[VX_MAX_TENSOR_DIMS_CT] = { 0 };
     vx_status status = vxQueryTensor(mddata, VX_TENSOR_NUMBER_OF_DIMS, &dims_num, sizeof(dims_num));
@@ -173,7 +175,7 @@
         WriteLog("MDData has less than 3 dimensions. It cannot store an image\n");
         return VX_FAILURE;
     }
-	
+
 	if (width != dimensionsArray[1] || height != dimensionsArray[0] || channels != dimensionsArray[2])
 	{
 	    WriteLog("Image size %dx%dx%d does not suit MDData size %dx%dx%d\n", width, height, channels, dimensionsArray[1], dimensionsArray[0], dimensionsArray[1]);
@@ -192,7 +194,7 @@
 	const vx_size viewStart[VX_MAX_TENSOR_DIMS_CT] = { 0 };
     mddataBasePtr = malloc(width*height*3*sizeof(vx_int16));
     if (!mddataBasePtr) { WriteLog("ERROR: malloc failed..."); return VX_FAILURE; }
-    
+
 	size_t channelsOrderFix = channels == 1 ? 0 : 2;
     void(*convertFromFloat)(float, char*) = convertFromFloatFunc(dt);
     for (size_t h = 0; h < dimensionsArray[0]; ++h)
@@ -235,12 +237,12 @@
         WriteLog("ERROR: cannot open classification file %s\n", fileFullPath);
         return NULL;
     }
-    
+
     //Get file size
     fseek(fp, 0, SEEK_END); // seek to end of file
     size_t fileSize = ftell(fp); // get current file pointer
     fseek(fp, 0, SEEK_SET); // seek back to beginning of file
-    
+
     char* fileContent = (char*)malloc(fileSize + 1);
     if (fread(fileContent, 1, fileSize, fp) != fileSize)
     {
@@ -250,7 +252,7 @@
         return NULL;
     }
     fclose(fp);
-    
+
     size_t classCount = 0;
     for (size_t i = 0; i < fileSize; ++i)
     {
@@ -265,7 +267,7 @@
         fileContent[fileSize] = '\0';
         classCount++;
     }
-    
+
     char** classArray = (char**)malloc(classCount * sizeof(char*));
     size_t classIndex = 0;
     for (size_t i = 0; i < fileSize + 1; ++i)
@@ -313,7 +315,7 @@
 	    WriteLog("Error in moveHighestProbToTheBegin, received NULL pointer\n");
 		return;
 	}
-	
+
     for (size_t sortIndex = 0; sortIndex < sortNum; ++sortIndex)
     {
         size_t highProbIndex = sortIndex;
diff --git a/test_conformance/shared_functions.h b/test_conformance/shared_functions.h
index f827249..f849d06 100644
--- a/test_conformance/shared_functions.h
+++ b/test_conformance/shared_functions.h
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
diff --git a/test_conformance/test_accumulate.c b/test_conformance/test_accumulate.c
deleted file mode 100644
index 164dc87..0000000
--- a/test_conformance/test_accumulate.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/* 
-
- * Copyright (c) 2012-2017 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "test_engine/test.h"
-#include <VX/vx.h>
-#include <VX/vxu.h>
-
-
-TESTCASE(Accumulate, CT_VXContext, ct_setup_vx_context, 0)
-
-
-TEST(Accumulate, testNodeCreation)
-{
-    vx_context context = context_->vx_context_;
-    vx_image input = 0, accum = 0;
-    vx_graph graph = 0;
-    vx_node node = 0;
-
-    ASSERT_VX_OBJECT(input = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
-
-    ASSERT_VX_OBJECT(accum = vxCreateImage(context, 128, 128, VX_DF_IMAGE_S16), VX_TYPE_IMAGE);
-
-    graph = vxCreateGraph(context);
-    ASSERT_VX_OBJECT(graph, VX_TYPE_GRAPH);
-
-    ASSERT_VX_OBJECT(node = vxAccumulateImageNode(graph, input, accum), VX_TYPE_NODE);
-
-    VX_CALL(vxVerifyGraph(graph));
-
-    VX_CALL(vxReleaseNode(&node));
-    VX_CALL(vxReleaseGraph(&graph));
-    VX_CALL(vxReleaseImage(&accum));
-    VX_CALL(vxReleaseImage(&input));
-
-    ASSERT(node == 0);
-    ASSERT(graph == 0);
-    ASSERT(accum == 0);
-    ASSERT(input == 0);
-}
-
-
-static CT_Image accumulate_generate_random_8u(int width, int height)
-{
-    CT_Image image;
-
-    ASSERT_NO_FAILURE_(return 0,
-            image = ct_allocate_ct_image_random(width, height, VX_DF_IMAGE_U8, &CT()->seed_, 0, 256));
-
-    return image;
-}
-
-
-static CT_Image accumulate_generate_random_16s(int width, int height)
-{
-    CT_Image image;
-
-    ASSERT_NO_FAILURE_(return 0,
-            image = ct_allocate_ct_image_random(width, height, VX_DF_IMAGE_S16, &CT()->seed_, -32768, 32768));
-
-    return image;
-}
-
-
-static void accumulate_reference(CT_Image input, CT_Image accum)
-{
-    CT_FILL_IMAGE_16S(return, accum,
-            {
-                uint8_t* input_data = CT_IMAGE_DATA_PTR_8U(input, x, y);
-                int32_t res32 = ((int32_t)(*dst_data)) + ((int32_t)(*input_data));
-                int16_t res = CT_SATURATE_S16(res32);
-                *dst_data = res;
-            });
-}
-
-
-static void accumulate_check(CT_Image input, CT_Image accum_src, CT_Image accum_dst)
-{
-    CT_Image accum_ref = NULL;
-
-    ASSERT(input && accum_src && accum_dst);
-
-    ASSERT_NO_FAILURE(accum_ref = ct_image_create_clone(accum_src));
-
-    ASSERT_NO_FAILURE(accumulate_reference(input, accum_ref));
-
-    EXPECT_EQ_CTIMAGE(accum_ref, accum_dst);
-#if 0
-    if (CT_HasFailure())
-    {
-        printf("=== Input ===\n");
-        ct_dump_image_info(input);
-        printf("=== Accum source ===\n");
-        ct_dump_image_info(accum_src);
-        printf("=== Accum RESULT ===\n");
-        ct_dump_image_info(accum_dst);
-        printf("=== EXPECTED RESULT ===\n");
-        ct_dump_image_info(accum_ref);
-    }
-#endif
-}
-
-typedef struct {
-    const char* testName;
-    int dummy_;
-    int width, height;
-} Arg;
-
-
-#define PARAMETERS \
-    CT_GENERATE_PARAMETERS("random", ADD_SIZE_SMALL_SET, ARG, 0)
-
-TEST_WITH_ARG(Accumulate, testGraphProcessing, Arg,
-    PARAMETERS
-)
-{
-    vx_context context = context_->vx_context_;
-    vx_image input_image = 0, accum_image = 0;
-    vx_graph graph = 0;
-    vx_node node = 0;
-
-    CT_Image input = NULL, accum_src = NULL, accum_dst = NULL;
-
-    ASSERT_NO_FAILURE(input = accumulate_generate_random_8u(arg_->width, arg_->height));
-
-    ASSERT_NO_FAILURE(accum_src = accumulate_generate_random_16s(arg_->width, arg_->height));
-
-    ASSERT_VX_OBJECT(input_image = ct_image_to_vx_image(input, context), VX_TYPE_IMAGE);
-
-    ASSERT_VX_OBJECT(accum_image = ct_image_to_vx_image(accum_src, context), VX_TYPE_IMAGE);
-
-    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
-
-    ASSERT_VX_OBJECT(node = vxAccumulateImageNode(graph, input_image, accum_image), VX_TYPE_NODE);
-
-    VX_CALL(vxVerifyGraph(graph));
-    VX_CALL(vxProcessGraph(graph));
-
-    ASSERT_NO_FAILURE(accum_dst = ct_image_from_vx_image(accum_image));
-
-    ASSERT_NO_FAILURE(accumulate_check(input, accum_src, accum_dst));
-
-    VX_CALL(vxReleaseNode(&node));
-    VX_CALL(vxReleaseGraph(&graph));
-
-    ASSERT(node == 0);
-    ASSERT(graph == 0);
-
-    VX_CALL(vxReleaseImage(&accum_image));
-    VX_CALL(vxReleaseImage(&input_image));
-
-    ASSERT(accum_image == 0);
-    ASSERT(input_image == 0);
-}
-
-TEST_WITH_ARG(Accumulate, testImmediateProcessing, Arg,
-    PARAMETERS
-)
-{
-    vx_context context = context_->vx_context_;
-    vx_image input_image = 0, accum_image = 0;
-
-    CT_Image input = NULL, accum_src = NULL, accum_dst = NULL;
-
-    ASSERT_NO_FAILURE(input = accumulate_generate_random_8u(arg_->width, arg_->height));
-
-    ASSERT_NO_FAILURE(accum_src = accumulate_generate_random_16s(arg_->width, arg_->height));
-
-    ASSERT_VX_OBJECT(input_image = ct_image_to_vx_image(input, context), VX_TYPE_IMAGE);
-
-    ASSERT_VX_OBJECT(accum_image = ct_image_to_vx_image(accum_src, context), VX_TYPE_IMAGE);
-
-    VX_CALL(vxuAccumulateImage(context, input_image, accum_image));
-
-    ASSERT_NO_FAILURE(accum_dst = ct_image_from_vx_image(accum_image));
-
-    ASSERT_NO_FAILURE(accumulate_check(input, accum_src, accum_dst));
-
-    VX_CALL(vxReleaseImage(&accum_image));
-    VX_CALL(vxReleaseImage(&input_image));
-
-    ASSERT(accum_image == 0);
-    ASSERT(input_image == 0);
-}
-
-TESTCASE_TESTS(Accumulate,
-        testNodeCreation,
-        testGraphProcessing,
-        testImmediateProcessing
-)
diff --git a/test_conformance/test_accumulatesquare.c b/test_conformance/test_accumulatesquare.c
deleted file mode 100644
index 6f4fa69..0000000
--- a/test_conformance/test_accumulatesquare.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/* 
-
- * Copyright (c) 2012-2017 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "test_engine/test.h"
-#include <VX/vx.h>
-#include <VX/vxu.h>
-
-
-TESTCASE(AccumulateSquare, CT_VXContext, ct_setup_vx_context, 0)
-
-
-TEST(AccumulateSquare, testNodeCreation)
-{
-    vx_context context = context_->vx_context_;
-    vx_image input = 0, accum = 0;
-    vx_uint32 shift = 8;
-    vx_scalar shift_scalar = 0;
-    vx_graph graph = 0;
-    vx_node node = 0;
-
-    ASSERT_VX_OBJECT(input = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
-
-    ASSERT_VX_OBJECT(accum = vxCreateImage(context, 128, 128, VX_DF_IMAGE_S16), VX_TYPE_IMAGE);
-
-    ASSERT_VX_OBJECT(shift_scalar = vxCreateScalar(context, VX_TYPE_UINT32, &shift), VX_TYPE_SCALAR);
-
-    graph = vxCreateGraph(context);
-    ASSERT_VX_OBJECT(graph, VX_TYPE_GRAPH);
-
-    ASSERT_VX_OBJECT(node = vxAccumulateSquareImageNode(graph, input, shift_scalar, accum), VX_TYPE_NODE);
-
-    VX_CALL(vxVerifyGraph(graph));
-
-    VX_CALL(vxReleaseNode(&node));
-    VX_CALL(vxReleaseGraph(&graph));
-    VX_CALL(vxReleaseImage(&accum));
-    VX_CALL(vxReleaseImage(&input));
-    VX_CALL(vxReleaseScalar(&shift_scalar));
-
-    ASSERT(node == 0);
-    ASSERT(graph == 0);
-    ASSERT(accum == 0);
-    ASSERT(input == 0);
-}
-
-
-static CT_Image accumulate_square_generate_random_8u(int width, int height)
-{
-    CT_Image image;
-
-    ASSERT_NO_FAILURE_(return 0,
-            image = ct_allocate_ct_image_random(width, height, VX_DF_IMAGE_U8, &CT()->seed_, 0, 256));
-
-    return image;
-}
-
-
-static CT_Image accumulate_square_generate_random_16s_non_negative(int width, int height)
-{
-    CT_Image image;
-
-    ASSERT_NO_FAILURE_(return 0,
-            image = ct_allocate_ct_image_random(width, height, VX_DF_IMAGE_S16, &CT()->seed_, 0, 32768));
-
-    return image;
-}
-
-
-static void accumulate_square_reference(CT_Image input, vx_uint32 shift, CT_Image accum)
-{
-    CT_FILL_IMAGE_16S(return, accum,
-            {
-                uint8_t* input_data = CT_IMAGE_DATA_PTR_8U(input, x, y);
-                int32_t res32 = ((int32_t)(*dst_data)) + ((((int32_t)(*input_data))*((int32_t)(*input_data))) >> shift);
-                int16_t res = CT_SATURATE_S16(res32);
-                *dst_data = res;
-            });
-}
-
-
-static void accumulate_square_check(CT_Image input, vx_uint32 shift, CT_Image accum_src, CT_Image accum_dst)
-{
-    CT_Image accum_ref = NULL;
-
-    ASSERT(input && accum_src && accum_dst);
-
-    ASSERT_NO_FAILURE(accum_ref = ct_image_create_clone(accum_src));
-
-    ASSERT_NO_FAILURE(accumulate_square_reference(input, shift, accum_ref));
-
-    EXPECT_EQ_CTIMAGE(accum_ref, accum_dst);
-#if 0
-    if (CT_HasFailure())
-    {
-        printf("=== Input ===\n");
-        ct_dump_image_info(input);
-        printf("=== Accum source ===\n");
-        ct_dump_image_info(accum_src);
-        printf("=== Accum RESULT ===\n");
-        ct_dump_image_info(accum_dst);
-        printf("=== EXPECTED RESULT ===\n");
-        ct_dump_image_info(accum_ref);
-    }
-#endif
-}
-
-typedef struct {
-    const char* testName;
-    vx_uint32 shift;
-    int width, height;
-} Arg;
-
-
-#define PARAMETERS \
-    CT_GENERATE_PARAMETERS("random/shift0", ADD_SIZE_SMALL_SET, ARG, 0), \
-    CT_GENERATE_PARAMETERS("random/shift1", ADD_SIZE_SMALL_SET, ARG, 1), \
-    CT_GENERATE_PARAMETERS("random/shift8", ADD_SIZE_SMALL_SET, ARG, 8), \
-    CT_GENERATE_PARAMETERS("random/shift15", ADD_SIZE_SMALL_SET, ARG, 15)
-
-TEST_WITH_ARG(AccumulateSquare, testGraphProcessing, Arg,
-    PARAMETERS
-)
-{
-    vx_context context = context_->vx_context_;
-    vx_image input_image = 0, accum_image = 0;
-    vx_scalar shift_scalar = 0;
-    vx_graph graph = 0;
-    vx_node node = 0;
-
-    CT_Image input = NULL, accum_src = NULL, accum_dst = NULL;
-
-    ASSERT_VX_OBJECT(shift_scalar = vxCreateScalar(context, VX_TYPE_UINT32, &arg_->shift), VX_TYPE_SCALAR);
-
-    ASSERT_NO_FAILURE(input = accumulate_square_generate_random_8u(arg_->width, arg_->height));
-
-    ASSERT_NO_FAILURE(accum_src = accumulate_square_generate_random_16s_non_negative(arg_->width, arg_->height));
-
-    ASSERT_VX_OBJECT(input_image = ct_image_to_vx_image(input, context), VX_TYPE_IMAGE);
-
-    ASSERT_VX_OBJECT(accum_image = ct_image_to_vx_image(accum_src, context), VX_TYPE_IMAGE);
-
-    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
-
-    ASSERT_VX_OBJECT(node = vxAccumulateSquareImageNode(graph, input_image, shift_scalar, accum_image), VX_TYPE_NODE);
-
-    VX_CALL(vxVerifyGraph(graph));
-    VX_CALL(vxProcessGraph(graph));
-
-    ASSERT_NO_FAILURE(accum_dst = ct_image_from_vx_image(accum_image));
-
-    ASSERT_NO_FAILURE(accumulate_square_check(input, arg_->shift, accum_src, accum_dst));
-
-    VX_CALL(vxReleaseNode(&node));
-    VX_CALL(vxReleaseGraph(&graph));
-
-    ASSERT(node == 0);
-    ASSERT(graph == 0);
-
-    VX_CALL(vxReleaseImage(&accum_image));
-    VX_CALL(vxReleaseImage(&input_image));
-    VX_CALL(vxReleaseScalar(&shift_scalar));
-
-    ASSERT(accum_image == 0);
-    ASSERT(input_image == 0);
-}
-
-TEST_WITH_ARG(AccumulateSquare, testImmediateProcessing, Arg,
-    PARAMETERS
-)
-{
-    vx_context context = context_->vx_context_;
-    vx_image input_image = 0, accum_image = 0;
-    vx_scalar shift_scalar = 0;
-
-    CT_Image input = NULL, accum_src = NULL, accum_dst = NULL;
-
-    ASSERT_VX_OBJECT(shift_scalar = vxCreateScalar(context, VX_TYPE_UINT32, &arg_->shift), VX_TYPE_SCALAR);
-
-    ASSERT_NO_FAILURE(input = accumulate_square_generate_random_8u(arg_->width, arg_->height));
-
-    ASSERT_NO_FAILURE(accum_src = accumulate_square_generate_random_16s_non_negative(arg_->width, arg_->height));
-
-    ASSERT_VX_OBJECT(input_image = ct_image_to_vx_image(input, context), VX_TYPE_IMAGE);
-
-    ASSERT_VX_OBJECT(accum_image = ct_image_to_vx_image(accum_src, context), VX_TYPE_IMAGE);
-
-    VX_CALL(vxuAccumulateSquareImage(context, input_image, shift_scalar, accum_image));
-
-    ASSERT_NO_FAILURE(accum_dst = ct_image_from_vx_image(accum_image));
-
-    ASSERT_NO_FAILURE(accumulate_square_check(input, arg_->shift, accum_src, accum_dst));
-
-    VX_CALL(vxReleaseImage(&accum_image));
-    VX_CALL(vxReleaseImage(&input_image));
-    VX_CALL(vxReleaseScalar(&shift_scalar));
-
-    ASSERT(accum_image == 0);
-    ASSERT(input_image == 0);
-}
-
-TESTCASE_TESTS(AccumulateSquare,
-        testNodeCreation,
-        testGraphProcessing,
-        testImmediateProcessing
-)
diff --git a/test_conformance/test_accumulateweighted.c b/test_conformance/test_accumulateweighted.c
deleted file mode 100644
index 59b7c02..0000000
--- a/test_conformance/test_accumulateweighted.c
+++ /dev/null
@@ -1,211 +0,0 @@
-/* 
-
- * Copyright (c) 2012-2017 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "test_engine/test.h"
-#include <VX/vx.h>
-#include <VX/vxu.h>
-
-
-TESTCASE(AccumulateWeighted, CT_VXContext, ct_setup_vx_context, 0)
-
-
-TEST(AccumulateWeighted, testNodeCreation)
-{
-    vx_context context = context_->vx_context_;
-    vx_image input = 0, accum = 0;
-    vx_float32 alpha = 0.5f;
-    vx_scalar alpha_scalar = 0;
-    vx_graph graph = 0;
-    vx_node node = 0;
-
-    ASSERT_VX_OBJECT(input = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
-
-    ASSERT_VX_OBJECT(accum = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
-
-    ASSERT_VX_OBJECT(alpha_scalar = vxCreateScalar(context, VX_TYPE_FLOAT32, &alpha), VX_TYPE_SCALAR);
-
-    graph = vxCreateGraph(context);
-    ASSERT_VX_OBJECT(graph, VX_TYPE_GRAPH);
-
-    ASSERT_VX_OBJECT(node = vxAccumulateWeightedImageNode(graph, input, alpha_scalar, accum), VX_TYPE_NODE);
-
-    VX_CALL(vxVerifyGraph(graph));
-
-    VX_CALL(vxReleaseNode(&node));
-    VX_CALL(vxReleaseGraph(&graph));
-    VX_CALL(vxReleaseImage(&accum));
-    VX_CALL(vxReleaseImage(&input));
-    VX_CALL(vxReleaseScalar(&alpha_scalar));
-
-    ASSERT(node == 0);
-    ASSERT(graph == 0);
-    ASSERT(accum == 0);
-    ASSERT(input == 0);
-}
-
-
-static CT_Image accumulate_weighted_generate_random_8u(int width, int height)
-{
-    CT_Image image;
-
-    ASSERT_NO_FAILURE_(return 0,
-            image = ct_allocate_ct_image_random(width, height, VX_DF_IMAGE_U8, &CT()->seed_, 0, 256));
-
-    return image;
-}
-
-
-static void accumulate_weighted_reference(CT_Image input, vx_float32 alpha, CT_Image accum)
-{
-    CT_FILL_IMAGE_8U(return, accum,
-            {
-                uint8_t* input_data = CT_IMAGE_DATA_PTR_8U(input, x, y);
-                vx_float32 res = (1 - alpha) * ((vx_float32)(int32_t)(*dst_data)) + (alpha) * ((vx_float32)(int32_t)(*input_data));
-                uint8_t res8 = CT_SATURATE_U8(res);
-                *dst_data = res8;
-            });
-}
-
-
-static void accumulate_weighted_check(CT_Image input, vx_float32 alpha, CT_Image accum_src, CT_Image accum_dst)
-{
-    CT_Image accum_ref = NULL;
-
-    ASSERT(input && accum_src && accum_dst);
-
-    ASSERT_NO_FAILURE(accum_ref = ct_image_create_clone(accum_src));
-
-    ASSERT_NO_FAILURE(accumulate_weighted_reference(input, alpha, accum_ref));
-
-    EXPECT_CTIMAGE_NEAR(accum_ref, accum_dst, 1);
-#if 0
-    if (CT_HasFailure())
-    {
-        printf("=== Input ===\n");
-        ct_dump_image_info(input);
-        printf("=== Accum source ===\n");
-        ct_dump_image_info(accum_src);
-        printf("=== Accum RESULT ===\n");
-        ct_dump_image_info(accum_dst);
-        printf("=== EXPECTED RESULT ===\n");
-        ct_dump_image_info(accum_ref);
-    }
-#endif
-}
-
-typedef struct {
-    const char* testName;
-    vx_float32 alpha;
-    int width, height;
-} Arg;
-
-
-#define PARAMETERS \
-    CT_GENERATE_PARAMETERS("random/alpha0.5f", ADD_SIZE_SMALL_SET, ARG, 0.5f), \
-    CT_GENERATE_PARAMETERS("random/alpha0.0f", ADD_SIZE_SMALL_SET, ARG, 0.0f), \
-    CT_GENERATE_PARAMETERS("random/alpha1.0f", ADD_SIZE_SMALL_SET, ARG, 1.0f), \
-    CT_GENERATE_PARAMETERS("random/alpha0.25f", ADD_SIZE_SMALL_SET, ARG, 0.25f), \
-    CT_GENERATE_PARAMETERS("random/alpha0.95f", ADD_SIZE_SMALL_SET, ARG, 0.95f), \
-    CT_GENERATE_PARAMETERS("random/alpha0.999f", ADD_SIZE_SMALL_SET, ARG, 0.999f), \
-    CT_GENERATE_PARAMETERS("random/alpha0.001f", ADD_SIZE_SMALL_SET, ARG, 0.001f)
-
-TEST_WITH_ARG(AccumulateWeighted, testGraphProcessing, Arg,
-    PARAMETERS
-)
-{
-    vx_context context = context_->vx_context_;
-    vx_image input_image = 0, accum_image = 0;
-    vx_scalar alpha_scalar = 0;
-    vx_graph graph = 0;
-    vx_node node = 0;
-
-    CT_Image input = NULL, accum_src = NULL, accum_dst = NULL;
-
-    ASSERT_VX_OBJECT(alpha_scalar = vxCreateScalar(context, VX_TYPE_FLOAT32, &arg_->alpha), VX_TYPE_SCALAR);
-
-    ASSERT_NO_FAILURE(input = accumulate_weighted_generate_random_8u(arg_->width, arg_->height));
-
-    ASSERT_NO_FAILURE(accum_src = accumulate_weighted_generate_random_8u(arg_->width, arg_->height));
-
-    ASSERT_VX_OBJECT(input_image = ct_image_to_vx_image(input, context), VX_TYPE_IMAGE);
-
-    ASSERT_VX_OBJECT(accum_image = ct_image_to_vx_image(accum_src, context), VX_TYPE_IMAGE);
-
-    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
-
-    ASSERT_VX_OBJECT(node = vxAccumulateWeightedImageNode(graph, input_image, alpha_scalar, accum_image), VX_TYPE_NODE);
-
-    VX_CALL(vxVerifyGraph(graph));
-    VX_CALL(vxProcessGraph(graph));
-
-    ASSERT_NO_FAILURE(accum_dst = ct_image_from_vx_image(accum_image));
-
-    ASSERT_NO_FAILURE(accumulate_weighted_check(input, arg_->alpha, accum_src, accum_dst));
-
-    VX_CALL(vxReleaseNode(&node));
-    VX_CALL(vxReleaseGraph(&graph));
-
-    ASSERT(node == 0);
-    ASSERT(graph == 0);
-
-    VX_CALL(vxReleaseImage(&accum_image));
-    VX_CALL(vxReleaseImage(&input_image));
-    VX_CALL(vxReleaseScalar(&alpha_scalar));
-
-    ASSERT(accum_image == 0);
-    ASSERT(input_image == 0);
-}
-
-TEST_WITH_ARG(AccumulateWeighted, testImmediateProcessing, Arg,
-    PARAMETERS
-)
-{
-    vx_context context = context_->vx_context_;
-    vx_image input_image = 0, accum_image = 0;
-    vx_scalar alpha_scalar = 0;
-
-    CT_Image input = NULL, accum_src = NULL, accum_dst = NULL;
-
-    ASSERT_VX_OBJECT(alpha_scalar = vxCreateScalar(context, VX_TYPE_FLOAT32, &arg_->alpha), VX_TYPE_SCALAR);
-
-    ASSERT_NO_FAILURE(input = accumulate_weighted_generate_random_8u(arg_->width, arg_->height));
-
-    ASSERT_NO_FAILURE(accum_src = accumulate_weighted_generate_random_8u(arg_->width, arg_->height));
-
-    ASSERT_VX_OBJECT(input_image = ct_image_to_vx_image(input, context), VX_TYPE_IMAGE);
-
-    ASSERT_VX_OBJECT(accum_image = ct_image_to_vx_image(accum_src, context), VX_TYPE_IMAGE);
-
-    VX_CALL(vxuAccumulateWeightedImage(context, input_image, alpha_scalar, accum_image));
-
-    ASSERT_NO_FAILURE(accum_dst = ct_image_from_vx_image(accum_image));
-
-    ASSERT_NO_FAILURE(accumulate_weighted_check(input, arg_->alpha, accum_src, accum_dst));
-
-    VX_CALL(vxReleaseImage(&accum_image));
-    VX_CALL(vxReleaseImage(&input_image));
-    VX_CALL(vxReleaseScalar(&alpha_scalar));
-
-    ASSERT(accum_image == 0);
-    ASSERT(input_image == 0);
-}
-
-TESTCASE_TESTS(AccumulateWeighted,
-        testNodeCreation,
-        testGraphProcessing,
-        testImmediateProcessing
-)
diff --git a/test_conformance/test_addsub.c b/test_conformance/test_addsub.c
index dff1bd0..27e4bf3 100644
--- a/test_conformance/test_addsub.c
+++ b/test_conformance/test_addsub.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 
 #include <VX/vx.h>
@@ -675,3 +677,5 @@
 
 TESTCASE_TESTS(vxuAddSub, DISABLED_testNegativeFormat, DISABLED_testNegativeSizes,                testOverflowModes, testFuzzy)
 TESTCASE_TESTS(vxAddSub,  DISABLED_testNegativeFormat, DISABLED_testNegativeSizes, testInference, testOverflowModes, testFuzzy)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_array.c b/test_conformance/test_array.c
index d0ca84d..27b7af0 100644
--- a/test_conformance/test_array.c
+++ b/test_conformance/test_array.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include <math.h>
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -872,3 +874,5 @@
     test_vxCopyArrayRangeRead,
     test_vxCopyArrayRangeWrite,
     test_vxMapArrayRangeWrite)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_bilateralfilter.c b/test_conformance/test_bilateralfilter.c
index 28f0597..ddfac10 100644
--- a/test_conformance/test_bilateralfilter.c
+++ b/test_conformance/test_bilateralfilter.c
@@ -14,14 +14,18 @@
  * limitations under the License.
  */
 
+#ifdef OPENVX_USE_ENHANCED_VISION
+
 #include "test_engine/test.h"
 
 #include <VX/vx_types.h>
 #include <VX/vx_khr_nn.h>
+#include <VX/vxu.h>
 
 #include <assert.h>
 #include <limits.h>
 #include <math.h>
+#include <float.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -51,23 +55,6 @@
     TT_U8
 };
 
-static size_t ownGetFlatByteOffset(
-        size_t index,
-        vx_size dim_num,
-        const vx_size * in_dims,
-        const vx_size * in_strides)
-{
-    size_t res = 0;
-
-    for (vx_size d = 0; d < dim_num; ++d)
-    {
-        res += in_strides[d] * (index % in_dims[d]);
-        index /= in_dims[d];
-    }
-
-    return res;
-}
-
 static void ownUnpackFormat(
         enum TestTensorDF fmt,
         /*OUT*/ vx_enum * data_type,
@@ -112,116 +99,519 @@
     }
 }
 
+#define  COLOR_WEIGHT_SIZE_PER_CHANNEL      256
+
+static void getMinMax(const void* src, const vx_size* src_strides, const vx_size* dims, vx_size num_of_dims,
+                      vx_int16 *max_value, vx_int16 *min_value)
+{
+    vx_int16 maxVal = INT16_MIN;
+    vx_int16 minVal = INT16_MAX;
+    if (num_of_dims == 2)
+    {
+        for (vx_uint32 y = 0; y < dims[1]; y++)
+        {
+            for (vx_uint32 x = 0; x < dims[0]; x++)
+            {
+                vx_uint32 offset = y * src_strides[1] + x * src_strides[0];
+                vx_int16 val = *(vx_int16 *)((vx_int8 *)src + offset);
+                if (val > maxVal)
+                {
+                    maxVal = val;
+                }
+                if (val < minVal)
+                {
+                    minVal = val;
+                }
+            }
+        }
+        *max_value = maxVal;
+        *min_value = minVal;
+    }
+    else if (num_of_dims == 3)
+    {
+        for (vx_uint32 y = 0; y < dims[2]; y++)
+        {
+            for (vx_uint32 x = 0; x < dims[1]; x++)
+            {
+                for (vx_uint32 z = 0; z < dims[0]; z++)
+                {
+                    vx_uint32 offset = y * src_strides[2] + x * src_strides[1] + z * src_strides[0];
+                    vx_int16 val = *(vx_int16 *)((vx_int8 *)src + offset);
+                    if (val > maxVal)
+                    {
+                        maxVal = val;
+                    }
+                    if (val < minVal)
+                    {
+                        minVal = val;
+                    }
+                }
+            }
+        }
+        *max_value = maxVal;
+        *min_value = minVal;
+    }
+}
+
+static void releaseRes(void *pData)
+{
+    if (NULL != pData)
+    {
+        ct_free_mem(pData);
+    }
+    return;
+}
+
+static vx_status calcColorWeight(vx_uint8 cn, vx_float64 gauss_color_coeff, vx_float32 **color_weight)
+{
+    vx_float32 *tmp_weight = (vx_float32 *)ct_alloc_mem(cn * COLOR_WEIGHT_SIZE_PER_CHANNEL * sizeof(vx_float32));
+    if (NULL == tmp_weight)
+    {
+        return VX_ERROR_NO_MEMORY;
+    }
+
+    for (vx_int32 i = 0; i < (cn * COLOR_WEIGHT_SIZE_PER_CHANNEL); i++)
+    {
+        tmp_weight[i] = (vx_float32)exp(i * i * gauss_color_coeff);
+    }
+
+    *color_weight = tmp_weight;
+
+    return VX_SUCCESS;
+}
+
+static vx_status calcSpaceWeight(vx_int32 diameter, vx_float64 gauss_space_coeff, vx_float32 **space_weight)
+{
+    vx_int32 radius = diameter / 2;
+    vx_float32 *tmp_weight = (vx_float32 *)ct_alloc_mem(diameter * diameter * sizeof(vx_float32));
+    if (NULL == tmp_weight)
+    {
+        return VX_ERROR_NO_MEMORY;
+    }
+
+    for (vx_int32 i = -radius; i <= radius; i++)
+    {
+        vx_int32 j = -radius;
+        for (; j <= radius; j++)
+        {
+            vx_float64 r = sqrt((vx_float64)i * i + (vx_float64)j * j);
+            if (r > radius)
+            {
+                continue;
+            }
+            tmp_weight[(i + radius) * diameter + (j + radius)] = (vx_float32)exp(r * r * gauss_space_coeff);
+        }
+    }
+
+    *space_weight = tmp_weight;
+
+    return VX_SUCCESS;
+}
+
+
 static void ownCheckBilateralFilterResult(
         const void * in_ptr, const vx_size * in_dims, const vx_size * in_strides,
         enum TestTensorDF fmt,
         vx_size dim_num,
-        vx_size out_count,
         int   diameter,
         float sigmaSpace,
-        float sigmaValues,
-        void * out_ptr, const vx_size * out_dims, const vx_size * out_strides)
+        float sigmaColor,
+        void * out_ptr, const vx_size * out_dims, const vx_size * out_strides,
+        vx_border_t border)
 {
+    vx_status status = VX_SUCCESS;
     vx_float32 tolerance = 0.0;
     vx_float32 total_num = 0.0;
     vx_float32 equal_num = 0.0;
-    vx_int32 radius = diameter/2;
-    vx_float32 color_weight_8[256];
-    vx_float32 color_weight_16[256*256];
-    vx_float32 sum = 0, wsum = 0, w = 0;
-    vx_float32 gauss_color_coeff = -0.5/(sigmaValues*sigmaValues);
-    vx_float32 gauss_space_coeff = -0.5/(sigmaSpace*sigmaSpace);
+    vx_int32 y = 0, x = 0;
+    vx_int32 low_x, low_y, high_x, high_y;
+    vx_int32 radius_y, radius_x;
+    vx_float32 scale_index = 0;
+    vx_int32 radius = diameter / 2;
+    vx_enum border_mode = border.mode;
+    vx_int16 out = 0, ref = 0;
 
-    vx_float32 *space_weight = (vx_float32*)malloc((radius * 2 + 1) * sizeof(vx_float32));
+    vx_float32 *color_weight = NULL;
+    vx_float32 *space_weight = NULL;
+    vx_uint8 cn = dim_num == 2 ? 1 : 3;
 
-    for(vx_int32 i = 0; i < 256; i++)
+    vx_float64 gauss_color_coeff = -0.5/(sigmaColor*sigmaColor);
+    vx_float64 gauss_space_coeff = -0.5/(sigmaSpace*sigmaSpace);
+
+    if (border.mode == VX_BORDER_UNDEFINED)
     {
-        color_weight_8[i] = (vx_float32)exp(i*i*gauss_color_coeff);
+        low_x = radius;
+        high_x = (in_dims[dim_num - 2] >= radius) ? in_dims[dim_num - 2] - radius : 0;
+        low_y = radius;
+        high_y = (in_dims[dim_num - 1] >= radius) ? in_dims[dim_num - 1] - radius : 0;
+    }
+    else
+    {
+        low_x = 0;
+        high_x = in_dims[dim_num - 2];
+        low_y = 0;
+        high_y = in_dims[dim_num - 1];
     }
 
-    for(vx_int32 i = 0; i < 256*256; i++)
+    if (fmt == TT_Q78)
     {
-        color_weight_16[i] = (vx_float32)exp(i*i*gauss_color_coeff);
-    }
-
-    for(vx_int32 i = -radius; i <= radius; i++ )
-    {
-        space_weight[i+radius] = (vx_float32)exp(i*i*gauss_space_coeff);
-    }
-
-    for (size_t index = radius; index < out_count-radius; ++index)
-    {
-        const size_t in_byte_offset = ownGetFlatByteOffset(index, dim_num, in_dims, in_strides);
-        const size_t out_byte_offset = ownGetFlatByteOffset(index, dim_num, out_dims, out_strides);
-
-        const char * in_b_ptr = (char*)in_ptr + in_byte_offset;
-        const char * out_b_ptr = (char*)out_ptr + out_byte_offset;
-
-        switch (fmt)
+        vx_int16 minVal = -1;
+        vx_int16 maxVal = 1;
+        getMinMax(in_ptr, in_strides, in_dims, dim_num, &maxVal, &minVal);
+        if ((vx_float32)(abs(maxVal - minVal)) < FLT_EPSILON)
         {
-            case TT_Q78:
+            if (dim_num == 2)
             {
-                const vx_int16 in = *(vx_int16*)in_b_ptr;
-                const vx_int16 out = *(vx_int16*)out_b_ptr;
-                int16_t ref;
-
-                sum = 0, wsum = 0;
-
-                for(vx_int32 j = -radius; j <= radius; j++)
+                for (y = low_y; y < high_y; y++)
                 {
-                    vx_size nei_byte_offset = ownGetFlatByteOffset(index + j, dim_num, in_dims, in_strides);
-                    const char *nei_b_ptr = (char*)in_ptr + nei_byte_offset;
-                    const vx_int16 nei = *(vx_int16*)nei_b_ptr;
-                    w = space_weight[j+radius]*color_weight_16[abs(nei - in)];
-                    sum += nei*w;
-                    wsum += w;
+                    for (x = low_x; x < high_x; x++)
+                    {
+                        out = *((vx_int16 *)((vx_uint8 *)out_ptr + y * in_strides[1] + x * in_strides[0]));
+                        ref = *((vx_int16 *)((vx_uint8 *)in_ptr + y * in_strides[1] + x * in_strides[0]));
+                        if (out == ref)
+                        {
+                            equal_num += 1;
+                        }
+                        total_num += 1;
+                    }
                 }
-                ref = (vx_int16)round(sum/wsum);
-                
-                total_num += 1; 
+                tolerance = (equal_num / total_num);
+                ASSERT(tolerance >= MIN_TOLERANCE);
+                return;
+            }
+            else if (dim_num == 3)
+            {
+                for (y = low_y; y < high_y; y++)
+                {
+                    for (x = low_x; x < high_x; x++)
+                    {
+                        out = *(vx_int16 *)((vx_uint8 *)out_ptr + y * out_strides[2] + x * out_strides[1] + 0 * out_strides[0]);
+                        ref = *(vx_int16 *)((vx_uint8 *)in_ptr + y * in_strides[2] + x * in_strides[1] + 0 * in_strides[0]);
+                        if (out == ref)
+                        {
+                            equal_num += 1;
+                        }
+                        out = *(vx_int16 *)((vx_uint8 *)out_ptr + y * out_strides[2] + x * out_strides[1] + 1 * out_strides[0]);
+                        ref = *(vx_int16 *)((vx_uint8 *)in_ptr + y * in_strides[2] + x * in_strides[1] + 1 * in_strides[0]);
+                        if (out == ref)
+                        {
+                            equal_num += 1;
+                        }
+                        out = *(vx_int16 *)((vx_uint8 *)out_ptr + y * out_strides[2] + x * out_strides[1] + 2 * out_strides[0]);
+                        ref = *(vx_int16 *)((vx_uint8 *)in_ptr + y * in_strides[2] + x * in_strides[1] + 2 * in_strides[0]);
+                        if (out == ref)
+                        {
+                            equal_num += 1;
+                        }
+                        total_num += 3;
+                    }
+                }
+                tolerance = (equal_num / total_num);
+                ASSERT(tolerance >= MIN_TOLERANCE);
+                return;
+            }
+
+            ASSERT(tolerance >= MIN_TOLERANCE);
+            return;
+        }
+
+        //calculation color weight
+        vx_int32 kExpNumBinsPerChannel = 1 << 12;
+        vx_float32 lastExpVal = 1.f;
+        vx_float32 len;
+        vx_int32 kExpNumBins;
+        len = (vx_float32)(maxVal - minVal) * cn;
+        kExpNumBins = kExpNumBinsPerChannel * cn;
+        color_weight = (vx_float32 *)ct_alloc_mem((kExpNumBins + 2) * sizeof(vx_float32));
+        if (NULL == color_weight)
+        {
+            ASSERT(tolerance >= MIN_TOLERANCE);
+            return;
+        }
+        scale_index = kExpNumBins / len;
+        for (vx_uint32 i = 0; i < (kExpNumBins + 2); i++)
+        {
+            if (lastExpVal > 0.f)
+            {
+                vx_float64 val = i / scale_index;
+                color_weight[i] = (vx_float32)exp(val * val * gauss_color_coeff);
+                lastExpVal = color_weight[i];
+            }
+            else
+            {
+                color_weight[i] = 0.f;
+            }
+        }
+    }
+    else if (fmt == TT_U8)
+    {
+        (void)calcColorWeight(cn, gauss_color_coeff, &color_weight);
+    }
+    status = calcSpaceWeight(diameter, gauss_space_coeff, &space_weight);
+    if (status != VX_SUCCESS)
+    {
+        releaseRes(color_weight);
+        releaseRes(space_weight);
+    }
+
+    if (dim_num == 2)
+    {
+        for (y = low_y; y < high_y; y++)
+        {
+            for (x = low_x; x < high_x; x++)
+            {
+                vx_int16 value = 0;
+                if (fmt == TT_U8)
+                {
+                    out = *((vx_uint8 *)out_ptr + y * out_strides[1] + x * out_strides[0]);
+                    value = *((vx_uint8 *)in_ptr + y * in_strides[1] + x * in_strides[0]);
+                }
+                else if (fmt == TT_Q78)
+                {
+                    out = *((vx_int16 *)((vx_uint8 *)out_ptr + y * out_strides[1] + x * out_strides[0]));
+                    value = *((vx_int16 *)((vx_uint8 *)in_ptr + y * in_strides[1] + x * in_strides[0]));
+                }
+
+                vx_float32 sum = 0, wsum = 0;
+                //kernel filter
+                for (radius_y = -radius; radius_y <= radius; radius_y++)
+                {
+                    for (radius_x = -radius; radius_x <= radius; radius_x++)
+                    {
+                        vx_float64 r = sqrt((vx_float64)radius_y * radius_y + (vx_float64)radius_x * radius_x);
+                        if (r > radius)
+                        {
+                            continue;
+                        }
+                        vx_int32 neighbor_x = x + radius_x;
+                        vx_int32 neighbor_y = y + radius_y;
+                        vx_int16 neighborVal = 0;
+                        if (border_mode == VX_BORDER_REPLICATE)
+                        {
+                            vx_int32 tmpx = neighbor_x < 0 ? 0 : (neighbor_x >((vx_int32)in_dims[0] - 1) ? ((vx_int32)in_dims[0] - 1) : neighbor_x);
+                            vx_int32 tmpy = neighbor_y < 0 ? 0 : (neighbor_y >((vx_int32)in_dims[1] - 1) ? ((vx_int32)in_dims[1] - 1) : neighbor_y);
+                            if (fmt == TT_U8)
+                            {
+                                neighborVal = *((vx_uint8 *)in_ptr + tmpy * in_strides[1] + tmpx * in_strides[0]);
+                            }
+                            else if (fmt == TT_Q78)
+                            {
+                                neighborVal = *((vx_int16 *)((vx_uint8 *)in_ptr + tmpy * in_strides[1] + tmpx * in_strides[0]));
+                            }
+                        }
+                        else if (border_mode == VX_BORDER_CONSTANT)
+                        {
+                            vx_int32 tmpx = neighbor_x < 0 ? 0 : (neighbor_x >((vx_int32)in_dims[0] - 1) ? ((vx_int32)in_dims[0] - 1) : neighbor_x);
+                            vx_int32 tmpy = neighbor_y < 0 ? 0 : (neighbor_y >((vx_int32)in_dims[1] - 1) ? ((vx_int32)in_dims[1] - 1) : neighbor_y);
+                            if (neighbor_x < 0 || neighbor_y < 0)
+                            {
+                                if (fmt == TT_U8)
+                                {
+                                    neighborVal = border.constant_value.U8;
+                                }
+                                else if (fmt == TT_Q78)
+                                {
+                                    neighborVal = border.constant_value.S16;
+                                }
+                            }
+                            else
+                            {
+                                if (fmt == TT_U8)
+                                {
+                                    neighborVal = *((vx_uint8 *)in_ptr + tmpy * in_strides[1] + tmpx * in_strides[0]);
+                                }
+                                else if (fmt == TT_Q78)
+                                {
+                                    neighborVal = *((vx_int16 *)((vx_uint8 *)in_ptr + tmpy * in_strides[1] + tmpx * in_strides[0]));
+                                }
+                            }
+                        }
+
+                        vx_float32 w = 0;
+                        if (fmt == TT_U8)
+                        {
+                            w = space_weight[(radius_y + radius) * diameter + (radius_x + radius)] *
+                                       color_weight[abs(neighborVal - value)];
+                        }
+                        else if (fmt == TT_Q78)
+                        {
+                            vx_float32 alpha = abs(neighborVal - value) * scale_index;
+                            vx_int32 idx = (vx_int32)floorf(alpha);
+                            alpha -= idx;
+                            w = space_weight[(radius_y + radius) * diameter + (radius_x + radius)] *
+                                (color_weight[idx] + alpha * (color_weight[idx + 1] - color_weight[idx]));
+                        }
+                        sum += neighborVal * w;
+                        wsum += w;
+                    }
+                }
+
+                if (fmt == TT_U8)
+                {
+                    ref = (vx_uint8)roundf(sum / wsum);
+                }
+                else if (fmt == TT_Q78)
+                {
+                    ref = (vx_int16)roundf(sum / wsum);
+                }
+
+                total_num += 1;
 
                 if (ref == out)
                 {
-                     equal_num += 1;
-                } 
-            }
-            break;
-            case TT_U8:
-            {
-                const vx_uint8 in = *(vx_uint8*)in_b_ptr;
-                const vx_uint8 out = *(vx_uint8*)out_b_ptr;
-                uint8_t ref;
-
-                sum = 0, wsum = 0;
-
-                for(vx_int32 j = -radius; j <= radius; j++)
-                {
-                    vx_size nei_byte_offset = ownGetFlatByteOffset(index + j, dim_num, in_dims, in_strides);
-                    const char *nei_b_ptr = (char*)in_ptr + nei_byte_offset;
-                    const vx_uint8 nei = *(vx_uint8*)nei_b_ptr;
-                    w = space_weight[j+radius]*color_weight_8[abs(nei - in)];
-                    sum += nei*w;
-                    wsum += w;
+                    equal_num += 1;
                 }
-                ref = (vx_uint8)round(sum/wsum);
-
-                total_num += 1; 
-
-                if (ref == out)
-                {
-                     equal_num += 1;
-                } 
             }
-            break;
-            default: assert(0);
+        }
+    }
+    else if (dim_num == 3)
+    {
+        for (y = low_y; y < high_y; y++)
+        {
+            for (x = low_x; x < high_x; x++)
+            {
+                vx_int16 b0 = 0, g0 = 0, r0 = 0;
+                vx_int16 outb = 0, outg = 0, outr = 0;
+                if (fmt == TT_U8)
+                {
+                    outb = *((vx_uint8 *)out_ptr + y * out_strides[2] + x * out_strides[1] + 0 * out_strides[0]);
+                    outg = *((vx_uint8 *)out_ptr + y * out_strides[2] + x * out_strides[1] + 1 * out_strides[0]);
+                    outr = *((vx_uint8 *)out_ptr + y * out_strides[2] + x * out_strides[1] + 2 * out_strides[0]);
+                    b0 = *((vx_uint8 *)in_ptr + y * in_strides[2] + x * in_strides[1] + 0 * in_strides[0]);
+                    g0 = *((vx_uint8 *)in_ptr + y * in_strides[2] + x * in_strides[1] + 1 * in_strides[0]);
+                    r0 = *((vx_uint8 *)in_ptr + y * in_strides[2] + x * in_strides[1] + 2 * in_strides[0]);
+                }
+                else if (fmt == TT_Q78)
+                {
+                    outb = *((vx_int16 *)((vx_uint8 *)out_ptr + y * out_strides[2] + x * out_strides[1] + 0 * out_strides[0]));
+                    outg = *((vx_int16 *)((vx_uint8 *)out_ptr + y * out_strides[2] + x * out_strides[1] + 1 * out_strides[0]));
+                    outr = *((vx_int16 *)((vx_uint8 *)out_ptr + y * out_strides[2] + x * out_strides[1] + 2 * out_strides[0]));
+                    b0 = *((vx_int16 *)((vx_uint8 *)in_ptr + y * in_strides[2] + x * in_strides[1] + 0 * in_strides[0]));
+                    g0 = *((vx_int16 *)((vx_uint8 *)in_ptr + y * in_strides[2] + x * in_strides[1] + 1 * in_strides[0]));
+                    r0 = *((vx_int16 *)((vx_uint8 *)in_ptr + y * in_strides[2] + x * in_strides[1] + 2 * in_strides[0]));
+                }
+                vx_float32 sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0;
+                //kernel filter
+                for (radius_y = -radius; radius_y <= radius; radius_y++)
+                {
+                    for (radius_x = -radius; radius_x <= radius; radius_x++)
+                    {
+                        vx_float64 dist = sqrt((vx_float64)radius_y * radius_y + (vx_float64)radius_x * radius_x);
+                        if (dist > radius)
+                        {
+                            continue;
+                        }
+                        vx_int32 neighbor_x = x + radius_x;
+                        vx_int32 neighbor_y = y + radius_y;
+                        vx_int16 b = 0, g = 0, r = 0;
+                        if (border_mode == VX_BORDER_REPLICATE)
+                        {
+                            vx_int32 tmpx = neighbor_x < 0 ? 0 : (neighbor_x >((vx_int32)in_dims[1] - 1) ? ((vx_int32)in_dims[1] - 1) : neighbor_x);
+                            vx_int32 tmpy = neighbor_y < 0 ? 0 : (neighbor_y >((vx_int32)in_dims[2] - 1) ? ((vx_int32)in_dims[2] - 1) : neighbor_y);
+                            if (fmt == TT_U8)
+                            {
+                                b = *((vx_uint8 *)in_ptr + tmpy * in_strides[2] + tmpx * in_strides[1] + 0 * in_strides[0]);
+                                g = *((vx_uint8 *)in_ptr + tmpy * in_strides[2] + tmpx * in_strides[1] + 1 * in_strides[0]);
+                                r = *((vx_uint8 *)in_ptr + tmpy * in_strides[2] + tmpx * in_strides[1] + 2 * in_strides[0]);
+                            }
+                            else if (fmt == TT_Q78)
+                            {
+                                b = *((vx_int16 *)((vx_uint8 *)in_ptr + tmpy * in_strides[2] + tmpx * in_strides[1] + 0 * in_strides[0]));
+                                g = *((vx_int16 *)((vx_uint8 *)in_ptr + tmpy * in_strides[2] + tmpx * in_strides[1] + 1 * in_strides[0]));
+                                r = *((vx_int16 *)((vx_uint8 *)in_ptr + tmpy * in_strides[2] + tmpx * in_strides[1] + 2 * in_strides[0]));
+                            }
+                        }
+                        else if (border_mode == VX_BORDER_CONSTANT)
+                        {
+                            vx_int32 tmpx = neighbor_x < 0 ? 0 : (neighbor_x >((vx_int32)in_dims[1] - 1) ? ((vx_int32)in_dims[1] - 1) : neighbor_x);
+                            vx_int32 tmpy = neighbor_y < 0 ? 0 : (neighbor_y >((vx_int32)in_dims[2] - 1) ? ((vx_int32)in_dims[2] - 1) : neighbor_y);
+                            if (neighbor_x < 0 || neighbor_y < 0)
+                            {
+                                if (fmt == TT_U8)
+                                {
+                                    b = g = r = border.constant_value.U8;
+                                }
+                                else if (fmt == TT_Q78)
+                                {
+                                    b = g = r  = border.constant_value.S16;
+                                }
+                            }
+                            else
+                            {
+                                if (fmt == TT_U8)
+                                {
+                                    b = *((vx_uint8 *)in_ptr + tmpy * in_strides[2] + tmpx * in_strides[1] + 0 * in_strides[0]);
+                                    g = *((vx_uint8 *)in_ptr + tmpy * in_strides[2] + tmpx * in_strides[1] + 1 * in_strides[0]);
+                                    r = *((vx_uint8 *)in_ptr + tmpy * in_strides[2] + tmpx * in_strides[1] + 2 * in_strides[0]);
+                                }
+                                else if (fmt == TT_Q78)
+                                {
+                                    b = *((vx_int16 *)((vx_uint8 *)in_ptr + tmpy * in_strides[2] + tmpx * in_strides[1] + 0 * in_strides[0]));
+                                    g = *((vx_int16 *)((vx_uint8 *)in_ptr + tmpy * in_strides[2] + tmpx * in_strides[1] + 1 * in_strides[0]));
+                                    r = *((vx_int16 *)((vx_uint8 *)in_ptr + tmpy * in_strides[2] + tmpx * in_strides[1] + 2 * in_strides[0]));
+                                }
+                            }
+                        }
+
+                        vx_float32 w = 0;
+                        if (fmt == TT_U8)
+                        {
+                            w = space_weight[(radius_y + radius) * diameter + (radius_x + radius)] *
+                                       color_weight[abs(b - b0) + abs(g - g0) + abs(r - r0)];
+                        }
+                        else if (fmt == TT_Q78)
+                        {
+                            vx_float32 alpha = (abs(b- b0) + abs(g - g0) + abs(r - r0)) * scale_index;
+                            vx_int32 idx = (vx_int32)floorf(alpha);
+                            alpha -= idx;
+                            w = space_weight[(radius_y + radius) * diameter + (radius_x + radius)] *
+                                (color_weight[idx] + alpha * (color_weight[idx + 1] - color_weight[idx]));
+                        }
+                        sum_b += b * w;
+                        sum_g += g * w;
+                        sum_r += r * w;
+                        wsum += w;
+                    }
+                }
+
+                vx_int16 refb = 0, refg = 0, refr = 0;
+                if (fmt == TT_U8)
+                {
+                    refb = (vx_uint8)roundf(sum_b / wsum);
+                    refg = (vx_uint8)roundf(sum_g / wsum);
+                    refr = (vx_uint8)roundf(sum_r / wsum);
+                }
+                else if (fmt == TT_Q78)
+                {
+                    refb = (vx_int16)roundf(sum_b / wsum);
+                    refg = (vx_int16)roundf(sum_g / wsum);
+                    refr = (vx_int16)roundf(sum_r / wsum);
+                }
+
+                total_num += 3;
+
+                if (refb == outb)
+                {
+                    equal_num += 1;
+                }
+                if (refg == outg)
+                {
+                    equal_num += 1;
+                }
+                if (refr == outr)
+                {
+                    equal_num += 1;
+                }
+            }
         }
     }
 
     tolerance = (equal_num / total_num);
 
-    free(space_weight);
-
     ASSERT(tolerance >= MIN_TOLERANCE);
+
+    releaseRes(color_weight);
+    releaseRes(space_weight);
 }
 
 /****************************************************************************
@@ -232,151 +622,294 @@
 
 TESTCASE(BilateralFilter, CT_VXContext, ct_setup_vx_context, 0)
 
-typedef struct
+static void* bilateral_generate_random(int width, int height, int cn, enum TestTensorDF tensor_fmt)
 {
-    const char * name;
-    enum TestTensorDF src_fmt;
-    enum TestTensorDF dst_fmt;
-    int   diameter;
-    float sigmaSpace;
-    float sigmaValues;
-} test_bilateral_filter_op_arg;
-
-TEST_WITH_ARG(BilateralFilter, testBilateralFilterOp, test_bilateral_filter_op_arg,
-        ARG("BILATERAL_FILTER_Q78", TT_Q78, TT_Q78, 5, 1, 1),
-        ARG("BILATERAL_FILTER_U8", TT_U8, TT_U8, 5, 1, 1),
-)
-{
-    const vx_context context = context_->vx_context_;
-    const enum TestTensorDF src_fmt = arg_->src_fmt;
-    const enum TestTensorDF dst_fmt = arg_->dst_fmt;
-    assert(src_fmt == TT_Q78 || src_fmt == TT_U8);
-    assert(dst_fmt == TT_Q78 || dst_fmt == TT_U8);
-
-    const int diameter = arg_->diameter;
-    const float sigmaSpace = arg_->sigmaSpace;
-    const float sigmaValues = arg_->sigmaValues;
-
-    vx_size max_dims = 0;
-    {
-        VX_CALL(vxQueryContext(context, VX_CONTEXT_MAX_TENSOR_DIMS, &max_dims, sizeof(max_dims)));
-        ASSERT(max_dims > 3);
-        if(!DEBUG_TEST_TENSOR_BEYOND_FOUR_DIMS) max_dims = 4; else max_dims = MIN(max_dims, MAX_TENSOR_DIMS);
-    }
-
+    vx_enum data_type = VX_TYPE_UINT8;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = sizeof(vx_uint8);
+    size_t count = 0;
     uint64_t rng;
     {
         uint64_t * seed = &CT()->seed_;
-        ASSERT(!!seed);
+        //ASSERT(!!seed);
         CT_RNG_INIT(rng, *seed);
     }
 
-    vx_enum src_data_type;
-    vx_enum dst_data_type;
-    vx_uint8 src_fixed_point_position;
-    vx_uint8 dst_fixed_point_position;
-    vx_size src_sizeof_data_type;
-    vx_size dst_sizeof_data_type;
+    ownUnpackFormat(tensor_fmt, &data_type, &fixed_point_position, &sizeof_data_type);
+
+    count = width * height * cn;
+    void *data = ct_alloc_mem(count * sizeof_data_type);
+    if (data != NULL)
+    {
+        ownFillRandData(tensor_fmt, &rng, count, data);
+    }
+
+    return data;
+}
+
+typedef struct {
+    const char* testName;
+    void* (*generator)(int width, int height, int cn, enum TestTensorDF tensor_fmt);
+    const char* fileName;
+    vx_border_t border;
+    int width, height;
+    int cn;
+    int diameter;
+    float sigmaSpace;
+    float sigmaColor;
+    enum TestTensorDF tensor_fmt;
+} bilateral_arg;
+
+#define BILATERAL_FILTER_BORDERS(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/VX_BORDER_REPLICATE", __VA_ARGS__, { VX_BORDER_REPLICATE, {{ 0 }} })), \
+    CT_EXPAND(nextmacro(testArgName "/VX_BORDER_CONSTANT=0", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 0 }} })), \
+    CT_EXPAND(nextmacro(testArgName "/VX_BORDER_CONSTANT=1", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 1 }} })), \
+    CT_EXPAND(nextmacro(testArgName "/VX_BORDER_CONSTANT=127", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 127 }} })), \
+    CT_EXPAND(nextmacro(testArgName "/VX_BORDER_CONSTANT=255", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 255 }} }))
+
+#define BILATERAL_CHANNEL(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/channel=1", __VA_ARGS__, 1)), \
+    CT_EXPAND(nextmacro(testArgName "/channel=3", __VA_ARGS__, 3))
+
+#define BILATERAL_DIAMETER(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/diameter=5", __VA_ARGS__, 5)), \
+    CT_EXPAND(nextmacro(testArgName "/diameter=7", __VA_ARGS__, 7)), \
+    CT_EXPAND(nextmacro(testArgName "/diameter=9", __VA_ARGS__, 9)) \
+
+#define BILATERAL_SPACE_WEIGHT(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/sigmaSpace=10", __VA_ARGS__, 10))
+
+#define BILATERAL_COLOR_WEIGHT(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/sigmaColor=5", __VA_ARGS__, 5))
+
+#define BILATERAL_FORMAT(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/TT_U8", __VA_ARGS__, TT_U8)), \
+    CT_EXPAND(nextmacro(testArgName "/TT_Q78", __VA_ARGS__, TT_Q78))
+
+#define BILATERAL_PARAMETERS \
+    CT_GENERATE_PARAMETERS("randam", BILATERAL_FILTER_BORDERS, ADD_SIZE_SMALL_SET, BILATERAL_CHANNEL, BILATERAL_DIAMETER, BILATERAL_SPACE_WEIGHT, BILATERAL_COLOR_WEIGHT, BILATERAL_FORMAT, ARG, bilateral_generate_random, NULL)
+
+TEST_WITH_ARG(BilateralFilter, testGraphProcessing, bilateral_arg,
+        BILATERAL_PARAMETERS
+)
+{
+    vx_context context = context_->vx_context_;
+    const enum TestTensorDF src_fmt = arg_->tensor_fmt;
+    const enum TestTensorDF dst_fmt = arg_->tensor_fmt;
+    assert(src_fmt == TT_Q78 || src_fmt == TT_U8);
+    assert(dst_fmt == TT_Q78 || dst_fmt == TT_U8);
+    const vx_border_t border = arg_->border;
+    const int diameter = arg_->diameter;
+    const float sigmaSpace = arg_->sigmaSpace;
+    const float sigmaColor = arg_->sigmaColor;
+    const int cn = arg_->cn;
+    const int width = arg_->width;
+    const int height = arg_->height;
+    vx_size num_of_dims = 2;
+
+    vx_enum src_data_type = 0;
+    vx_enum dst_data_type = 0;
+    vx_uint8 src_fixed_point_position = 0;
+    vx_uint8 dst_fixed_point_position= 0;
+    vx_size src_sizeof_data_type = 1;
+    vx_size dst_sizeof_data_type = 1;
     ownUnpackFormat(src_fmt, &src_data_type, &src_fixed_point_position, &src_sizeof_data_type);
     ownUnpackFormat(dst_fmt, &dst_data_type, &dst_fixed_point_position, &dst_sizeof_data_type);
 
-    size_t * const tensor_dims = malloc(sizeof(*tensor_dims) * max_dims);
-    size_t * const src_tensor_strides = malloc(sizeof(*src_tensor_strides) * max_dims);
-    size_t * const dst_tensor_strides = malloc(sizeof(*dst_tensor_strides) * max_dims);
-    ASSERT(tensor_dims && src_tensor_strides && dst_tensor_strides);
-
-    // The input data a vx_tensor. maximum 3 dimension and minimum 2.
-    for (vx_size dims = 2; dims <= max_dims; ++dims)
-    for (int iter = 0; iter < TEST_TENSOR_NUM_ITERATIONS; ++iter)
+    if (cn == 3)
     {
-        if (DEBUG_TEST_TENSOR_ENABLE_PRINTF)
-        {
-            printf("dims #: %zu,\titer #: %d\n", dims, iter);
-            fflush(stdout);
-        }
-
-        for (vx_size i = 0; i < dims; ++i)
-        {
-            tensor_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ+1);
-
-            src_tensor_strides[i] = i ? src_tensor_strides[i-1] * tensor_dims[i-1] : src_sizeof_data_type;
-            dst_tensor_strides[i] = i ? dst_tensor_strides[i-1] * tensor_dims[i-1] : dst_sizeof_data_type;
-        }
-
-        vx_tensor src_tensor = vxCreateTensor(context, dims, tensor_dims, src_data_type, src_fixed_point_position);
-        vx_tensor dst_tensor = vxCreateTensor(context, dims, tensor_dims, dst_data_type, dst_fixed_point_position);
-        ASSERT_VX_OBJECT(src_tensor, VX_TYPE_TENSOR);
-        ASSERT_VX_OBJECT(dst_tensor, VX_TYPE_TENSOR);
-
-        const size_t src_tensor_bytes = tensor_dims[dims-1] * src_tensor_strides[dims-1];
-        const size_t dst_tensor_bytes = tensor_dims[dims-1] * dst_tensor_strides[dims-1];
-        const size_t count = src_tensor_bytes / src_sizeof_data_type;
-
-        if (DEBUG_TEST_TENSOR_ENABLE_PRINTF)
-        {
-            printf("\tconfig: {\n");
-            printf("\t          tensor_dims: { "); for (size_t i = 0; i < dims; ++i) { printf("%zu, ", tensor_dims[i]); } printf(" }, \n");
-            printf("\t        }\n");
-        }
-
-        void * const src_data = malloc(src_tensor_bytes);
-        void * const dst_data = malloc(dst_tensor_bytes);
-        ASSERT(src_data && dst_data);
-
-        {
-            ownFillRandData(src_fmt, &rng, count, src_data);
-
-            vx_size view_start[MAX_TENSOR_DIMS] = { 0 };
-            VX_CALL(vxCopyTensorPatch(src_tensor, dims, view_start, tensor_dims, src_tensor_strides, src_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
-        }
-
-        {
-            vx_graph graph = vxCreateGraph(context);
-            ASSERT_VX_OBJECT(graph, VX_TYPE_GRAPH);
-
-            vx_node node = vxBilateralFilterNode(graph, src_tensor, diameter,  sigmaSpace, sigmaValues, dst_tensor);
-
-            ASSERT_VX_OBJECT(node, VX_TYPE_NODE);
-            VX_CALL(vxReleaseNode(&node));
-            EXPECT_EQ_PTR(NULL, node);
-
-            VX_CALL(vxVerifyGraph(graph));
-            VX_CALL(vxProcessGraph(graph));
-
-            VX_CALL(vxReleaseGraph(&graph));
-            EXPECT_EQ_PTR(NULL, graph);
-        }
-
-        // Verify the reuslts
-        {
-            const size_t view_start[MAX_TENSOR_DIMS] = { 0 };
-            VX_CALL(vxCopyTensorPatch(dst_tensor, dims, view_start, tensor_dims, dst_tensor_strides, dst_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
-
-            ownCheckBilateralFilterResult(
-                    src_data, tensor_dims, src_tensor_strides,
-                    dst_fmt,
-                    dims,
-                    count,
-                    diameter,
-                    sigmaSpace,
-                    sigmaValues,
-                    dst_data, tensor_dims, dst_tensor_strides);
-        }
-
-        VX_CALL(vxReleaseTensor(&src_tensor));
-        VX_CALL(vxReleaseTensor(&dst_tensor));
-        EXPECT_EQ_PTR(NULL, src_tensor);
-        EXPECT_EQ_PTR(NULL, dst_tensor);
-
-        free(src_data);
-        free(dst_data);
+        num_of_dims = 3;
     }
 
-    free(tensor_dims);
-    free(src_tensor_strides);
-    free(dst_tensor_strides);
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * num_of_dims);
+    size_t * const src_tensor_strides = ct_alloc_mem(sizeof(*src_tensor_strides) * num_of_dims);
+    size_t * const dst_tensor_strides = ct_alloc_mem(sizeof(*dst_tensor_strides) * num_of_dims);
+    ASSERT(tensor_dims && src_tensor_strides && dst_tensor_strides);
+
+    if (num_of_dims == 3)
+    {
+        tensor_dims[0] = 3;
+        tensor_dims[1] = width;
+        tensor_dims[2] = height;
+
+        src_tensor_strides[0] = src_sizeof_data_type;
+        src_tensor_strides[1] = tensor_dims[0] * src_tensor_strides[0];
+        src_tensor_strides[2] = tensor_dims[1] * src_tensor_strides[1];
+
+        dst_tensor_strides[0] = src_tensor_strides[0];
+        dst_tensor_strides[1] = src_tensor_strides[1];
+        dst_tensor_strides[2] = src_tensor_strides[2];
+    }
+    else
+    {
+        tensor_dims[0] = width;
+        tensor_dims[1] = height;
+
+        src_tensor_strides[0] = src_sizeof_data_type;
+        src_tensor_strides[1] = tensor_dims[0] * src_tensor_strides[0];
+
+        dst_tensor_strides[0] = src_tensor_strides[0];
+        dst_tensor_strides[1] = src_tensor_strides[1];
+    }
+
+    const size_t dst_tensor_bytes = tensor_dims[num_of_dims-1] * dst_tensor_strides[num_of_dims-1];
+
+    vx_tensor src_tensor = vxCreateTensor(context, num_of_dims, tensor_dims, src_data_type, src_fixed_point_position);
+    vx_tensor dst_tensor = vxCreateTensor(context, num_of_dims, tensor_dims, dst_data_type, dst_fixed_point_position);
+
+    void * const dst_data = ct_alloc_mem(dst_tensor_bytes);
+    vx_size *view_start = (vx_size *)ct_alloc_mem(num_of_dims * sizeof(vx_size));
+    memset(view_start, 0, num_of_dims * sizeof(vx_size));
+
+    void *src_data = NULL;
+    src_data = arg_->generator(arg_->width, arg_->height, arg_->cn, arg_->tensor_fmt);
+    VX_CALL(vxCopyTensorPatch(src_tensor, num_of_dims, view_start, tensor_dims, src_tensor_strides, src_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+    vx_graph graph = vxCreateGraph(context);
+    ASSERT_VX_OBJECT(graph, VX_TYPE_GRAPH);
+
+    vx_node node = vxBilateralFilterNode(graph, src_tensor, diameter,  sigmaSpace, sigmaColor, dst_tensor);
+
+    ASSERT_VX_OBJECT(node, VX_TYPE_NODE);
+    VX_CALL(vxSetNodeAttribute(node, VX_NODE_BORDER, &border, sizeof(border)));
+    VX_CALL(vxReleaseNode(&node));
+    EXPECT_EQ_PTR(NULL, node);
+
+    VX_CALL(vxVerifyGraph(graph));
+    VX_CALL(vxProcessGraph(graph));
+
+    VX_CALL(vxReleaseGraph(&graph));
+    EXPECT_EQ_PTR(NULL, graph);
+
+    VX_CALL(vxCopyTensorPatch(dst_tensor, num_of_dims, view_start, tensor_dims, dst_tensor_strides, dst_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+    ownCheckBilateralFilterResult(
+                    src_data, tensor_dims, src_tensor_strides,
+                    dst_fmt,
+                    num_of_dims,
+                    diameter,
+                    sigmaSpace,
+                    sigmaColor,
+                    dst_data, tensor_dims, dst_tensor_strides,
+                    border);
+
+    VX_CALL(vxReleaseTensor(&src_tensor));
+    VX_CALL(vxReleaseTensor(&dst_tensor));
+    EXPECT_EQ_PTR(NULL, src_tensor);
+    EXPECT_EQ_PTR(NULL, dst_tensor);
+
+    ct_free_mem(src_data);
+    ct_free_mem(dst_data);
+    ct_free_mem(view_start);
+    ct_free_mem(tensor_dims);
+    ct_free_mem(src_tensor_strides);
+    ct_free_mem(dst_tensor_strides);
 }
 
+TEST_WITH_ARG(BilateralFilter, testImmediateProcessing, bilateral_arg,
+        BILATERAL_PARAMETERS
+)
+{
+    vx_context context = context_->vx_context_;
+    const enum TestTensorDF src_fmt = arg_->tensor_fmt;
+    const enum TestTensorDF dst_fmt = arg_->tensor_fmt;
+    assert(src_fmt == TT_Q78 || src_fmt == TT_U8);
+    assert(dst_fmt == TT_Q78 || dst_fmt == TT_U8);
+    const vx_border_t border = arg_->border;
+    const int diameter = arg_->diameter;
+    const float sigmaSpace = arg_->sigmaSpace;
+    const float sigmaColor = arg_->sigmaColor;
+    const int cn = arg_->cn;
+    const int width = arg_->width;
+    const int height = arg_->height;
+    vx_size num_of_dims = 2;
+
+    vx_enum src_data_type = 0;
+    vx_enum dst_data_type = 0;
+    vx_uint8 src_fixed_point_position = 0;
+    vx_uint8 dst_fixed_point_position = 0;
+    vx_size src_sizeof_data_type = 1;
+    vx_size dst_sizeof_data_type = 1;
+    ownUnpackFormat(src_fmt, &src_data_type, &src_fixed_point_position, &src_sizeof_data_type);
+    ownUnpackFormat(dst_fmt, &dst_data_type, &dst_fixed_point_position, &dst_sizeof_data_type);
+
+    if (cn == 3)
+    {
+        num_of_dims = 3;
+    }
+
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * num_of_dims);
+    size_t * const src_tensor_strides = ct_alloc_mem(sizeof(*src_tensor_strides) * num_of_dims);
+    size_t * const dst_tensor_strides = ct_alloc_mem(sizeof(*dst_tensor_strides) * num_of_dims);
+    ASSERT(tensor_dims && src_tensor_strides && dst_tensor_strides);
+
+    if (num_of_dims == 3)
+    {
+        tensor_dims[0] = 3;
+        tensor_dims[1] = width;
+        tensor_dims[2] = height;
+
+        src_tensor_strides[0] = src_sizeof_data_type;
+        src_tensor_strides[1] = tensor_dims[0] * src_tensor_strides[0];
+        src_tensor_strides[2] = tensor_dims[1] * src_tensor_strides[1];
+
+        dst_tensor_strides[0] = src_tensor_strides[0];
+        dst_tensor_strides[1] = src_tensor_strides[1];
+        dst_tensor_strides[2] = src_tensor_strides[2];
+    }
+    else
+    {
+        tensor_dims[0] = width;
+        tensor_dims[1] = height;
+
+        src_tensor_strides[0] = src_sizeof_data_type;
+        src_tensor_strides[1] = tensor_dims[0] * src_tensor_strides[0];
+
+        dst_tensor_strides[0] = src_tensor_strides[0];
+        dst_tensor_strides[1] = src_tensor_strides[1];
+    }
+
+    const size_t dst_tensor_bytes = tensor_dims[num_of_dims-1] * dst_tensor_strides[num_of_dims-1];
+
+    vx_tensor src_tensor = vxCreateTensor(context, num_of_dims, tensor_dims, src_data_type, src_fixed_point_position);
+    vx_tensor dst_tensor = vxCreateTensor(context, num_of_dims, tensor_dims, dst_data_type, dst_fixed_point_position);
+
+    void * const dst_data = ct_alloc_mem(dst_tensor_bytes);
+    vx_size *view_start = (vx_size *)ct_alloc_mem(num_of_dims * sizeof(vx_size));
+    memset(view_start, 0, num_of_dims * sizeof(vx_size));
+
+    void *src_data = NULL;
+    src_data = arg_->generator(arg_->width, arg_->height, arg_->cn, arg_->tensor_fmt);
+    VX_CALL(vxCopyTensorPatch(src_tensor, num_of_dims, view_start, tensor_dims, src_tensor_strides, src_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+
+    VX_CALL(vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, &border, sizeof(border)));
+    VX_CALL(vxuBilateralFilter(context, src_tensor, diameter, sigmaSpace, sigmaColor, dst_tensor));
+
+    VX_CALL(vxCopyTensorPatch(dst_tensor, num_of_dims, view_start, tensor_dims, dst_tensor_strides, dst_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+    ownCheckBilateralFilterResult(
+                    src_data, tensor_dims, src_tensor_strides,
+                    dst_fmt,
+                    num_of_dims,
+                    diameter,
+                    sigmaSpace,
+                    sigmaColor,
+                    dst_data, tensor_dims, dst_tensor_strides,
+                    border);
+
+    VX_CALL(vxReleaseTensor(&src_tensor));
+    VX_CALL(vxReleaseTensor(&dst_tensor));
+    EXPECT_EQ_PTR(NULL, src_tensor);
+    EXPECT_EQ_PTR(NULL, dst_tensor);
+
+    ct_free_mem(src_data);
+    ct_free_mem(dst_data);
+    ct_free_mem(view_start);
+    ct_free_mem(tensor_dims);
+    ct_free_mem(src_tensor_strides);
+    ct_free_mem(dst_tensor_strides);
+}
+
+
 
 TEST(BilateralFilter, testNodeCreation)
 {
@@ -412,9 +945,9 @@
     ownUnpackFormat(src_fmt, &src_data_type, &src_fixed_point_position, &src_sizeof_data_type);
     ownUnpackFormat(dst_fmt, &dst_data_type, &dst_fixed_point_position, &dst_sizeof_data_type);
 
-    size_t * const tensor_dims = malloc(sizeof(*tensor_dims) * max_dims);
-    size_t * const src_tensor_strides = malloc(sizeof(*src_tensor_strides) * max_dims);
-    size_t * const dst_tensor_strides = malloc(sizeof(*dst_tensor_strides) * max_dims);
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+    size_t * const src_tensor_strides = ct_alloc_mem(sizeof(*src_tensor_strides) * max_dims);
+    size_t * const dst_tensor_strides = ct_alloc_mem(sizeof(*dst_tensor_strides) * max_dims);
     ASSERT(tensor_dims && src_tensor_strides && dst_tensor_strides);
 
     // The input data a vx_tensor. maximum 3 dimension and minimum 2.
@@ -451,8 +984,8 @@
             printf("\t        }\n");
         }
 
-        void * const src_data = malloc(src_tensor_bytes);
-        void * const dst_data = malloc(dst_tensor_bytes);
+        void * const src_data = ct_alloc_mem(src_tensor_bytes);
+        void * const dst_data = ct_alloc_mem(dst_tensor_bytes);
         ASSERT(src_data && dst_data);
 
         {
@@ -483,16 +1016,19 @@
         EXPECT_EQ_PTR(NULL, src_tensor);
         EXPECT_EQ_PTR(NULL, dst_tensor);
 
-        free(src_data);
-        free(dst_data);
+        ct_free_mem(src_data);
+        ct_free_mem(dst_data);
     }
 
-    free(tensor_dims);
-    free(src_tensor_strides);
-    free(dst_tensor_strides);
+    ct_free_mem(tensor_dims);
+    ct_free_mem(src_tensor_strides);
+    ct_free_mem(dst_tensor_strides);
 }
 
 TESTCASE_TESTS(BilateralFilter,
     testNodeCreation,
-    testBilateralFilterOp
+    testGraphProcessing,
+    testImmediateProcessing
 );
+
+#endif //OPENVX_USE_ENHANCED_VISION
diff --git a/test_conformance/test_binop16s.c b/test_conformance/test_binop16s.c
index d3ab072..61e3979 100644
--- a/test_conformance/test_binop16s.c
+++ b/test_conformance/test_binop16s.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,7 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
 
 #include "test_engine/test.h"
 
@@ -28,8 +29,8 @@
     uint32_t i, j;
 
     ASSERT(src0 && src1 && dst);
-    ASSERT(src0->width = src1->width && src0->width == dst->width);
-    ASSERT(src0->height = src1->height && src0->height == dst->height);
+    ASSERT(src0->width == src1->width && src0->width == dst->width);
+    ASSERT(src0->height == src1->height && src0->height == dst->height);
     ASSERT(src0->format == src1->format && src1->format == VX_DF_IMAGE_S16);
     ASSERT(dst->format == VX_DF_IMAGE_S16 || dst->format == VX_DF_IMAGE_U16);
 
@@ -298,3 +299,5 @@
 
 TESTCASE_TESTS(vxuBinOp16s, DISABLED_testNegativeSizes,                testFuzzy)
 TESTCASE_TESTS(vxBinOp16s,  DISABLED_testNegativeSizes, testInference, testFuzzy)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_binop1u.c b/test_conformance/test_binop1u.c
new file mode 100644
index 0000000..9e47386
--- /dev/null
+++ b/test_conformance/test_binop1u.c
@@ -0,0 +1,343 @@
+/*
+
+ * Copyright (c) 2012-2017 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+#ifdef OPENVX_USE_U1
+
+#include "test_engine/test.h"
+
+#include <VX/vx.h>
+#include <VX/vxu.h>
+
+//#define CT_EXECUTE_ASYNC
+
+static void referenceAnd(CT_Image src0, CT_Image src1, CT_Image dst)
+{
+    uint32_t i, j;
+
+    ASSERT(src0 && src1 && dst);
+    ASSERT(src0->width == src1->width && src0->width == dst->width);
+    ASSERT(src0->height == src1->height && src0->height == dst->height);
+    ASSERT(src0->format == dst->format && src1->format == dst->format && dst->format == VX_DF_IMAGE_U1);
+
+    for (i = 0; i < dst->height; ++i)
+        for (j = 0; j < dst->width; ++j)
+        {
+            uint32_t xShftd = j + src0->roi.x % 8;
+            uint8_t  mask   = 1 << (xShftd % 8);
+            uint8_t  byte_val = src0->data.y[i * ct_stride_bytes(src0) + xShftd / 8] &
+                                src1->data.y[i * ct_stride_bytes(src1) + xShftd / 8];
+            dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] =
+                (dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] & ~mask) | (byte_val & mask);
+        }
+}
+
+static void referenceOr(CT_Image src0, CT_Image src1, CT_Image dst)
+{
+    uint32_t i, j;
+
+    ASSERT(src0 && src1 && dst);
+    ASSERT(src0->width == src1->width && src0->width == dst->width);
+    ASSERT(src0->height == src1->height && src0->height == dst->height);
+    ASSERT(src0->format == dst->format && src1->format == dst->format && dst->format == VX_DF_IMAGE_U1);
+
+    for (i = 0; i < dst->height; ++i)
+        for (j = 0; j < dst->width; ++j)
+        {
+            uint32_t xShftd = j + src0->roi.x % 8;
+            uint8_t  mask   = 1 << (xShftd % 8);
+            uint8_t  byte_val = src0->data.y[i * ct_stride_bytes(src0) + xShftd / 8] |
+                                src1->data.y[i * ct_stride_bytes(src1) + xShftd / 8];
+            dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] =
+                (dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] & ~mask) | (byte_val & mask);
+        }
+}
+
+static void referenceXor(CT_Image src0, CT_Image src1, CT_Image dst)
+{
+    uint32_t i, j;
+
+    ASSERT(src0 && src1 && dst);
+    ASSERT(src0->width == src1->width && src0->width == dst->width);
+    ASSERT(src0->height == src1->height && src0->height == dst->height);
+    ASSERT(src0->format == dst->format && src1->format == dst->format && dst->format == VX_DF_IMAGE_U1);
+
+    for (i = 0; i < dst->height; ++i)
+        for (j = 0; j < dst->width; ++j)
+        {
+            uint32_t xShftd = j + src0->roi.x % 8;
+            uint8_t  mask   = 1 << (xShftd % 8);
+            uint8_t  byte_val = src0->data.y[i * ct_stride_bytes(src0) + xShftd / 8] ^
+                                src1->data.y[i * ct_stride_bytes(src1) + xShftd / 8];
+            dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] =
+                (dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] & ~mask) | (byte_val & mask);
+        }
+}
+
+typedef vx_status (VX_API_CALL *vxuBinopFunction)(vx_context, vx_image, vx_image, vx_image);
+typedef vx_node   (VX_API_CALL *vxBinopFunction) (vx_graph, vx_image, vx_image, vx_image);
+typedef void      (*referenceFunction)(CT_Image, CT_Image, CT_Image);
+
+TESTCASE(vxuBinOp1u, CT_VXContext, ct_setup_vx_context, 0)
+TESTCASE(vxBinOp1u,  CT_VXContext, ct_setup_vx_context, 0)
+
+typedef struct {
+    const char* name;
+    vxuBinopFunction  vxuFunc;
+    vxBinopFunction   vxFunc;
+    referenceFunction referenceFunc;
+} func_arg;
+
+#define FUNC_ARG(func) ARG("_U1_/" #func, vxu##func, vx##func##Node, reference##func)
+
+TEST_WITH_ARG(vxuBinOp1u, testNegativeSizes, func_arg, FUNC_ARG(And), FUNC_ARG(Or), FUNC_ARG(Xor))
+{
+    vx_image src1_32x32, src1_64x64, src2_32x32, src2_32x64, dst32x32, dst88x16;
+    vx_context context = context_->vx_context_;
+
+    ASSERT_VX_OBJECT(src1_32x32 = vxCreateImage(context, 32, 32, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(src1_64x64 = vxCreateImage(context, 64, 64, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(src2_32x32 = vxCreateImage(context, 32, 32, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(src2_32x64 = vxCreateImage(context, 32, 64, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(dst32x32 = vxCreateImage(context, 32, 32, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst88x16 = vxCreateImage(context, 88, 16, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+
+    // initialize to guarantee that images are allocated
+    ASSERT_NO_FAILURE(ct_fill_image_random(src1_32x32, &CT()->seed_));
+    ASSERT_NO_FAILURE(ct_fill_image_random(src1_64x64, &CT()->seed_));
+    ASSERT_NO_FAILURE(ct_fill_image_random(src2_32x32, &CT()->seed_));
+    ASSERT_NO_FAILURE(ct_fill_image_random(src2_32x64, &CT()->seed_));
+
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, arg_->vxuFunc(context, src1_32x32, src2_32x32, dst88x16));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, arg_->vxuFunc(context, src1_32x32, src2_32x64, dst32x32));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, arg_->vxuFunc(context, src1_64x64, src2_32x32, dst32x32));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, arg_->vxuFunc(context, src1_64x64, src2_32x64, dst32x32));
+
+    VX_CALL(vxReleaseImage(&src1_32x32));
+    VX_CALL(vxReleaseImage(&src2_32x32));
+    VX_CALL(vxReleaseImage(&src1_64x64));
+    VX_CALL(vxReleaseImage(&src2_32x64));
+    VX_CALL(vxReleaseImage(&dst32x32));
+    VX_CALL(vxReleaseImage(&dst88x16));
+}
+
+TEST_WITH_ARG(vxBinOp1u, testNegativeSizes, func_arg, FUNC_ARG(And), FUNC_ARG(Or), FUNC_ARG(Xor))
+{
+    vx_image src1_32x32, src1_64x64, src2_32x32, src2_32x64, dst32x32, dst88x16;
+    vx_graph graph1, graph2, graph3, graph4;
+    vx_context context = context_->vx_context_;
+
+    ASSERT_VX_OBJECT(src1_32x32 = vxCreateImage(context, 32, 32, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(src1_64x64 = vxCreateImage(context, 64, 64, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(src2_32x32 = vxCreateImage(context, 32, 32, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(src2_32x64 = vxCreateImage(context, 32, 64, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(dst32x32 = vxCreateImage(context, 32, 32, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst88x16 = vxCreateImage(context, 88, 16, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(graph1 = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(arg_->vxFunc(graph1, src1_32x32, src2_32x32, dst88x16), VX_TYPE_NODE);
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph1));
+
+    ASSERT_VX_OBJECT(graph2 = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(arg_->vxFunc(graph2, src1_32x32, src2_32x64, dst32x32), VX_TYPE_NODE);
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph2));
+
+    ASSERT_VX_OBJECT(graph3 = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(arg_->vxFunc(graph3, src1_64x64, src2_32x32, dst32x32), VX_TYPE_NODE);
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph3));
+
+    ASSERT_VX_OBJECT(graph4 = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(arg_->vxFunc(graph4, src1_64x64, src2_32x64, dst32x32), VX_TYPE_NODE);
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph4));
+
+    VX_CALL(vxReleaseImage(&src1_32x32));
+    VX_CALL(vxReleaseImage(&src2_32x32));
+    VX_CALL(vxReleaseImage(&src1_64x64));
+    VX_CALL(vxReleaseImage(&src2_32x64));
+    VX_CALL(vxReleaseImage(&dst32x32));
+    VX_CALL(vxReleaseImage(&dst88x16));
+    VX_CALL(vxReleaseGraph(&graph1));
+    VX_CALL(vxReleaseGraph(&graph2));
+    VX_CALL(vxReleaseGraph(&graph3));
+    VX_CALL(vxReleaseGraph(&graph4));
+}
+
+static vx_image inference_image;
+static vx_action VX_CALLBACK inference_image_test(vx_node node)
+{
+    vx_uint32 width  = 0;
+    vx_uint32 height = 0;
+    vx_df_image format = 0;
+
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, vxQueryImage(inference_image, VX_IMAGE_WIDTH,   &width,   sizeof(width)));
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, vxQueryImage(inference_image, VX_IMAGE_HEIGHT,  &height,  sizeof(height)));
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, vxQueryImage(inference_image, VX_IMAGE_FORMAT,  &format,  sizeof(format)));
+
+    EXPECT_EQ_INT(640, width);
+    EXPECT_EQ_INT(480, height);
+    EXPECT_EQ_INT(VX_DF_IMAGE_U1, format);
+
+    return VX_ACTION_CONTINUE;
+}
+
+TEST_WITH_ARG(vxBinOp1u, testInference, func_arg, FUNC_ARG(And), FUNC_ARG(Or), FUNC_ARG(Xor))
+{
+    vx_image src1, src2, dst, srcU8, dstU8, gr;
+    vx_scalar sshift;
+    vx_int32 sval = 0;
+    vx_graph graph;
+    vx_node n, cn1, cn2, tmp;
+    vx_context context = context_->vx_context_;
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(src1  = vxCreateImage(context, 640, 480, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(src2  = vxCreateImage(context, 640, 480, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst   = vxCreateVirtualImage(graph, 0, 0, VX_DF_IMAGE_VIRT), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(n     = arg_->vxFunc(graph, src1, src2, dst), VX_TYPE_NODE);
+
+    // grounding (add doesn't support U1 images)
+    ASSERT_VX_OBJECT(srcU8 = vxCreateImage(context, 640, 480, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dstU8 = vxCreateImage(context, 640, 480, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(gr    = vxCreateImage(context, 640, 480, VX_DF_IMAGE_S16), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(sshift = vxCreateScalar(context, VX_TYPE_INT32, &sval), VX_TYPE_SCALAR);
+    ASSERT_VX_OBJECT(cn1   = vxConvertDepthNode(graph, src2, srcU8, VX_CONVERT_POLICY_SATURATE, sshift), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(cn2   = vxConvertDepthNode(graph, dst,  dstU8, VX_CONVERT_POLICY_SATURATE, sshift), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(tmp   = vxAddNode(graph, dstU8, srcU8, VX_CONVERT_POLICY_WRAP, gr), VX_TYPE_NODE);
+
+    // test
+    inference_image = dst;
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, vxAssignNodeCallback(n, inference_image_test));
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxProcessGraph(graph));
+
+    VX_CALL(vxReleaseNode(&n));
+    VX_CALL(vxReleaseNode(&cn1));
+    VX_CALL(vxReleaseNode(&cn2));
+    VX_CALL(vxReleaseNode(&tmp));
+    VX_CALL(vxReleaseScalar(&sshift));
+    VX_CALL(vxReleaseImage(&src1));
+    VX_CALL(vxReleaseImage(&src2));
+    VX_CALL(vxReleaseImage(&dst));
+    VX_CALL(vxReleaseImage(&srcU8));
+    VX_CALL(vxReleaseImage(&dstU8));
+    VX_CALL(vxReleaseImage(&gr));
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+typedef struct {
+    const char* name;
+    uint32_t width;
+    uint32_t height;
+    vxuBinopFunction  vxuFunc;
+    vxBinopFunction   vxFunc;
+    referenceFunction referenceFunc;
+} fuzzy_arg;
+
+#define FUZZY_ARG(func,w,h) ARG("_U1_/" #func ": " #w "x" #h, w, h, vxu##func, vx##func##Node, reference##func)
+
+#define BINOP_SIZE_ARGS(func)       \
+    FUZZY_ARG(func, 640, 480),      \
+    ARG_EXTENDED_BEGIN(),           \
+    FUZZY_ARG(func, 1, 1),          \
+    FUZZY_ARG(func, 15, 17),        \
+    FUZZY_ARG(func, 32, 32),        \
+    FUZZY_ARG(func, 1231, 1234),    \
+    FUZZY_ARG(func, 1280, 720),     \
+    FUZZY_ARG(func, 1920, 1080),    \
+    ARG_EXTENDED_END()
+
+TEST_WITH_ARG(vxuBinOp1u, testFuzzy, fuzzy_arg, BINOP_SIZE_ARGS(And), BINOP_SIZE_ARGS(Or), BINOP_SIZE_ARGS(Xor))
+{
+    vx_image src1, src2, dst;
+    CT_Image ref1, ref2, refdst, vxdst;
+    vx_context context = context_->vx_context_;
+
+    ASSERT_VX_OBJECT(src1 = vxCreateImage(context, arg_->width, arg_->height, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(src2 = vxCreateImage(context, arg_->width, arg_->height, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst  = vxCreateImage(context, arg_->width, arg_->height, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+
+    ASSERT_NO_FAILURE(ct_fill_image_random(src1, &CT()->seed_));
+    ASSERT_NO_FAILURE(ct_fill_image_random(src2, &CT()->seed_));
+
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, arg_->vxuFunc(context, src1, src2, dst));
+
+    ref1  = ct_image_from_vx_image(src1);
+    ref2  = ct_image_from_vx_image(src2);
+    vxdst = ct_image_from_vx_image(dst);
+    refdst = ct_allocate_image(arg_->width, arg_->height, VX_DF_IMAGE_U1);
+
+    arg_->referenceFunc(ref1, ref2, refdst);
+
+    ASSERT_EQ_CTIMAGE(refdst, vxdst);
+
+    // checked release vx images
+    VX_CALL(vxReleaseImage(&dst));
+    VX_CALL(vxReleaseImage(&src1));
+    VX_CALL(vxReleaseImage(&src2));
+    EXPECT_EQ_PTR(NULL, dst);
+    EXPECT_EQ_PTR(NULL, src1);
+    EXPECT_EQ_PTR(NULL, src2);
+}
+
+TEST_WITH_ARG(vxBinOp1u, testFuzzy, fuzzy_arg, BINOP_SIZE_ARGS(And), BINOP_SIZE_ARGS(Or), BINOP_SIZE_ARGS(Xor))
+{
+    vx_image src1, src2, dst;
+    vx_graph graph;
+    CT_Image ref1, ref2, refdst, vxdst;
+    vx_context context = context_->vx_context_;
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(dst   = vxCreateImage(context, arg_->width, arg_->height, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(src1 = vxCreateImage(context, arg_->width, arg_->height, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(src2 = vxCreateImage(context, arg_->width, arg_->height, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+
+    ASSERT_NO_FAILURE(ct_fill_image_random(src1, &CT()->seed_));
+    ASSERT_NO_FAILURE(ct_fill_image_random(src2, &CT()->seed_));
+
+    // build one-node graph
+    ASSERT_VX_OBJECT(arg_->vxFunc(graph, src1, src2, dst), VX_TYPE_NODE);
+
+    // run graph
+#ifdef CT_EXECUTE_ASYNC
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxScheduleGraph(graph));
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxWaitGraph(graph));
+#else
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxProcessGraph(graph));
+#endif
+
+    ref1  = ct_image_from_vx_image(src1);
+    ref2  = ct_image_from_vx_image(src2);
+    vxdst = ct_image_from_vx_image(dst);
+    refdst = ct_allocate_image(arg_->width, arg_->height, VX_DF_IMAGE_U1);
+
+    arg_->referenceFunc(ref1, ref2, refdst);
+
+    ASSERT_EQ_CTIMAGE(refdst, vxdst);
+
+    VX_CALL(vxReleaseImage(&src1));
+    VX_CALL(vxReleaseImage(&src2));
+    VX_CALL(vxReleaseImage(&dst));
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+TESTCASE_TESTS(vxuBinOp1u, DISABLED_testNegativeSizes,                testFuzzy)
+TESTCASE_TESTS(vxBinOp1u,  DISABLED_testNegativeSizes, testInference, testFuzzy)
+
+#endif //OPENVX_USE_U1
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_binop8u.c b/test_conformance/test_binop8u.c
index 9eb71dd..69add61 100644
--- a/test_conformance/test_binop8u.c
+++ b/test_conformance/test_binop8u.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 
 #include <VX/vx.h>
@@ -27,8 +29,8 @@
     uint32_t i, j;
 
     ASSERT(src0 && src1 && dst);
-    ASSERT(src0->width = src1->width && src0->width == dst->width);
-    ASSERT(src0->height = src1->height && src0->height == dst->height);
+    ASSERT(src0->width == src1->width && src0->width == dst->width);
+    ASSERT(src0->height == src1->height && src0->height == dst->height);
     ASSERT(src0->format == dst->format && src1->format == dst->format && dst->format == VX_DF_IMAGE_U8);
 
     for (i = 0; i < dst->height; ++i)
@@ -44,8 +46,8 @@
     uint32_t i, j;
 
     ASSERT(src0 && src1 && dst);
-    ASSERT(src0->width = src1->width && src0->width == dst->width);
-    ASSERT(src0->height = src1->height && src0->height == dst->height);
+    ASSERT(src0->width == src1->width && src0->width == dst->width);
+    ASSERT(src0->height == src1->height && src0->height == dst->height);
     ASSERT(src0->format == dst->format && src1->format == dst->format && dst->format == VX_DF_IMAGE_U8);
 
     for (i = 0; i < dst->height; ++i)
@@ -58,8 +60,8 @@
     uint32_t i, j;
 
     ASSERT(src0 && src1 && dst);
-    ASSERT(src0->width = src1->width && src0->width == dst->width);
-    ASSERT(src0->height = src1->height && src0->height == dst->height);
+    ASSERT(src0->width == src1->width && src0->width == dst->width);
+    ASSERT(src0->height == src1->height && src0->height == dst->height);
     ASSERT(src0->format == dst->format && src1->format == dst->format && dst->format == VX_DF_IMAGE_U8);
 
     for (i = 0; i < dst->height; ++i)
@@ -72,8 +74,8 @@
     uint32_t i, j;
 
     ASSERT(src0 && src1 && dst);
-    ASSERT(src0->width = src1->width && src0->width == dst->width);
-    ASSERT(src0->height = src1->height && src0->height == dst->height);
+    ASSERT(src0->width == src1->width && src0->width == dst->width);
+    ASSERT(src0->height == src1->height && src0->height == dst->height);
     ASSERT(src0->format == dst->format && src1->format == dst->format && dst->format == VX_DF_IMAGE_U8);
 
     for (i = 0; i < dst->height; ++i)
@@ -320,3 +322,5 @@
 
 TESTCASE_TESTS(vxuBinOp8u, DISABLED_testNegativeSizes,                testFuzzy)
 TESTCASE_TESTS(vxBinOp8u,  DISABLED_testNegativeSizes, testInference, testFuzzy)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_box3x3.c b/test_conformance/test_box3x3.c
index 3c23046..c33582e 100644
--- a/test_conformance/test_box3x3.c
+++ b/test_conformance/test_box3x3.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -347,3 +349,5 @@
 }
 
 TESTCASE_TESTS(Box3x3, testNodeCreation, testGraphProcessing, testImmediateProcessing)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_canny.c b/test_conformance/test_canny.c
index dbdec3f..5f77ade 100644
--- a/test_conformance/test_canny.c
+++ b/test_conformance/test_canny.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,17 +15,21 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 
 #include <stdint.h>
 #include <VX/vx.h>
 #include <VX/vxu.h>
 
-#define USE_OPENCV_GENERATED_REFERENCE
+// #define USE_OPENCV_GENERATED_REFERENCE
+#ifndef USE_OPENCV_GENERATED_REFERENCE
+    #include <string.h>
+#endif
 #define CANNY_ACCEPTANCE_THRESHOLD 0.95
 //#define EXECUTE_ASYNC
 
-
 #define CREF_EDGE 2
 #define CREF_LINK 1
 #define CREF_NONE 0
@@ -93,7 +97,7 @@
     ASSERT(src && dst);
     ASSERT(src->width == dst->width);
     ASSERT(src->height == dst->height);
-    ASSERT(src->format == dst->format && src->format == VX_DF_IMAGE_U8);
+    ASSERT(src->format == VX_DF_IMAGE_U8 && (dst->format == VX_DF_IMAGE_U8 || dst->format == VX_DF_IMAGE_U1));
 
     ASSERT(low_thresh <= high_thresh);
     ASSERT(low_thresh >= 0);
@@ -101,18 +105,24 @@
     ASSERT(norm == VX_NORM_L2 || norm == VX_NORM_L1);
     ASSERT(src->width >= gsz && src->height >= gsz);
 
+    CT_Image tmp;
+    if (dst->format == VX_DF_IMAGE_U1)
+        tmp = ct_allocate_image(dst->width, dst->height, VX_DF_IMAGE_U8);
+    else
+        tmp = dst;
+
     // zero border pixels
     for (j = 0; j < bsz; ++j)
-        for (i = 0; i < dst->width; ++i)
-            dst->data.y[j * dst->stride + i] = dst->data.y[(dst->height - 1 - j) * dst->stride + i] = 255;
-    for (j = bsz; j < dst->height - bsz; ++j)
+        for (i = 0; i < tmp->width; ++i)
+            tmp->data.y[j * tmp->stride + i] = tmp->data.y[(tmp->height - 1 - j) * tmp->stride + i] = 255;
+    for (j = bsz; j < tmp->height - bsz; ++j)
         for (i = 0; i < bsz; ++i)
-            dst->data.y[j * dst->stride + i] = dst->data.y[j * dst->stride + dst->width - 1 - i] = 255;
+            tmp->data.y[j * tmp->stride + i] = tmp->data.y[j * tmp->stride + tmp->width - 1 - i] = 255;
 
     // threshold + nms
-    for (j = bsz; j < dst->height - bsz; ++j)
+    for (j = bsz; j < tmp->height - bsz; ++j)
     {
-        for (i = bsz; i < dst->width - bsz; ++i)
+        for (i = bsz; i < tmp->width - bsz; ++i)
         {
             int32_t dx, dy, e = CREF_NONE;
             uint64_t m1, m2;
@@ -144,21 +154,26 @@
                     e = (m > hi ? CREF_EDGE : CREF_LINK);
             }
 
-            dst->data.y[j * src->stride + i] = e;
+            tmp->data.y[j * tmp->stride + i] = e;
         }
     }
 
     // trace edges
-    for (j = bsz; j < dst->height - bsz; ++j)
-        for (i = bsz; i < dst->width - bsz; ++i)
-            if(dst->data.y[j * dst->stride + i] == CREF_EDGE)
-                follow_edge(dst, i, j);
+    for (j = bsz; j < tmp->height - bsz; ++j)
+        for (i = bsz; i < tmp->width - bsz; ++i)
+            if(tmp->data.y[j * tmp->stride + i] == CREF_EDGE)
+                follow_edge(tmp, i, j);
 
     // clear non-edges
-    for (j = bsz; j < dst->height - bsz; ++j)
-        for (i = bsz; i < dst->width - bsz; ++i)
-            if(dst->data.y[j * dst->stride + i] < 255)
-                dst->data.y[j * dst->stride + i] = 0;
+    for (j = bsz; j < tmp->height - bsz; ++j)
+        for (i = bsz; i < tmp->width - bsz; ++i)
+            if(tmp->data.y[j * tmp->stride + i] < 255)
+                tmp->data.y[j * tmp->stride + i] = 0;
+
+    if (dst->format == VX_DF_IMAGE_U1)
+    {
+        U8_ct_image_to_U1_ct_image(tmp, dst);
+    }
 }
 #endif
 
@@ -170,18 +185,42 @@
     ASSERT_(return 0, src && dst && dist && total_edge_pixels);
     ASSERT_(return 0, src->width == dst->width && src->width == dist->width);
     ASSERT_(return 0, src->height == dst->height && src->height == dist->height);
-    ASSERT_(return 0, src->format == dst->format && src->format == dist->format && src->format == VX_DF_IMAGE_U8);
+    ASSERT_(return 0, dist->format == VX_DF_IMAGE_U8 &&
+                      (src->format == VX_DF_IMAGE_U8 || src->format == VX_DF_IMAGE_U1) &&
+                      (dst->format == VX_DF_IMAGE_U8 || dst->format == VX_DF_IMAGE_U1));
 
     // fill borders with 1 (or 0 for edges)
     for (i = 0; i < dst->width; ++i)
     {
-        dist->data.y[i] = src->data.y[i] == 0 ? 1 : 0;
-        dist->data.y[(dist->height - 1) * dist->stride + i] = src->data.y[(dist->height - 1) * src->stride + i] == 0 ? 1 : 0;
+        if (src->format == VX_DF_IMAGE_U1)
+        {
+            uint32_t xShftd = i + src->roi.x % 8;
+            dist->data.y[i] =
+                (src->data.y[xShftd / 8] & (1 << (xShftd % 8))) == 0 ? 1 : 0;
+            dist->data.y[(dist->height - 1) * dist->stride + i] =
+                (src->data.y[(dist->height - 1) * ct_stride_bytes(src) + xShftd / 8] & (1 << (xShftd % 8))) == 0 ? 1 : 0;
+        }
+        else
+        {
+            dist->data.y[i] = src->data.y[i] == 0 ? 1 : 0;
+            dist->data.y[(dist->height - 1) * dist->stride + i] = src->data.y[(dist->height - 1) * src->stride + i] == 0 ? 1 : 0;
+        }
     }
     for (j = 1; j < dst->height - 1; ++j)
     {
-        dist->data.y[j * dist->stride] = src->data.y[j * src->stride] == 0 ? 1 : 0;
-        dist->data.y[j * dist->stride + dist->width - 1] = src->data.y[j * src->stride + dist->width - 1] == 0 ? 1 : 0;
+        if (src->format == VX_DF_IMAGE_U1)
+        {
+            uint32_t xShftd = src->roi.x % 8;
+            dist->data.y[j * dist->stride] =
+                (src->data.y[j * ct_stride_bytes(src) + xShftd / 8] & (1 << (xShftd % 8))) == 0 ? 1 : 0;
+            dist->data.y[j * dist->stride + dist->width - 1] =
+                (src->data.y[j * ct_stride_bytes(src) + (xShftd + dist->width - 1) / 8] & (1 << ((xShftd + dist->width - 1) % 8))) == 0 ? 1 : 0;
+        }
+        else
+        {
+            dist->data.y[j * dist->stride] = src->data.y[j * src->stride] == 0 ? 1 : 0;
+            dist->data.y[j * dist->stride + dist->width - 1] = src->data.y[j * src->stride + dist->width - 1] == 0 ? 1 : 0;
+        }
     }
 
     // minimalistic variant of disttransform:
@@ -192,14 +231,21 @@
     {
         for (i = 1; i < src->width-1; ++i)
         {
-            if (src->data.y[j * src->stride + i] != 0)
+            uint32_t xShftd = i + src->roi.x % 8;    // Shift for U1 images to account for ROI
+            if ( src->format == VX_DF_IMAGE_U1
+                 ? (src->data.y[j * ct_stride_bytes(src) + xShftd / 8] & (1 << (xShftd % 8))) != 0
+                 :  src->data.y[j * src->stride + i] != 0
+               )
                 dist->data.y[j * dist->stride + i] = 0;
             else
             {
                 int has_edge = 0;
                 for (k = 0; k < sizeof(offsets)/sizeof(offsets[0]); ++k)
                 {
-                    if (src->data.y[(j + offsets[k][1]) * src->stride + i + offsets[k][0]] != 0)
+                    if ( src->format == VX_DF_IMAGE_U1
+                         ? (src->data.y[(j + offsets[k][1]) * ct_stride_bytes(src) + (xShftd + offsets[k][0]) / 8] & (1 << (xShftd + offsets[k][0]) % 8)) != 0
+                         : src->data.y[(j + offsets[k][1]) * src->stride +  i + offsets[k][0]] != 0
+                       )
                     {
                         has_edge = 1;
                         break;
@@ -211,13 +257,15 @@
         }
     }
 
-    // count pixels where disttransform(src) < 2 and dst != 0
+    // count: pixels where disttransform(src) < 2 and dst != 0
     total = count = 0;
     for (j = 0; j < dst->height; ++j)
     {
         for (i = 0; i < dst->width; ++i)
         {
-            if (dst->data.y[j * dst->stride + i] != 0)
+            uint32_t xShftd = i + dst->roi.x % 8;
+            if ( dst->format == VX_DF_IMAGE_U1 ? (dst->data.y[j * ct_stride_bytes(dst) + xShftd / 8] & (1 << (xShftd % 8))) != 0
+                                               :  dst->data.y[j * dst->stride + i] != 0 )
             {
                 total += 1;
                 count += (dist->data.y[j * dist->stride + i] < 2) ? 1 : 0;
@@ -304,17 +352,25 @@
 }
 */
 
-static CT_Image get_reference_result(const char* src_name, CT_Image src, int32_t low_thresh, int32_t high_thresh, uint32_t gsz, vx_enum norm)
+static CT_Image get_reference_result(const char* src_name, CT_Image src, int32_t low_thresh, int32_t high_thresh, uint32_t gsz, vx_enum norm, vx_df_image out_format)
 {
 #ifdef USE_OPENCV_GENERATED_REFERENCE
+    CT_Image tmp_dst, dst;
     char buff[1024];
     sprintf(buff, "canny_%ux%u_%d_%d_%s_%s", gsz, gsz, low_thresh, high_thresh, norm == VX_NORM_L1 ? "L1" : "L2", src_name);
     // printf("reading: %s\n", buff);
-    return ct_read_image(buff, 1);
+
+    tmp_dst = ct_read_image(buff, 1);
+    if ( out_format == VX_DF_IMAGE_U1 && (dst = ct_allocate_image(tmp_dst->width, tmp_dst->height, VX_DF_IMAGE_U1)) )
+        U8_ct_image_to_U1_ct_image(tmp_dst, dst);
+    else
+        dst = tmp_dst;
+
+    return dst;
 #else
     CT_Image dst;
     ASSERT_(return 0, src);
-    if (dst = ct_allocate_image(src->width, src->height, VX_DF_IMAGE_U8))
+    if ( (dst = ct_allocate_image(src->width, src->height, out_format)) )
         reference_canny(src, dst, low_thresh, high_thresh, gsz, norm);
     return dst;
 #endif
@@ -330,45 +386,69 @@
     vx_enum norm_type;
     int32_t low_thresh;
     int32_t high_thresh;
+    vx_df_image out_format;
 } canny_arg;
 
-#define BIT_EXACT_ARG(grad, thresh) ARG(#grad "x" #grad " thresh=" #thresh, "lena_gray.bmp", grad, VX_NORM_L1, thresh, thresh)
-#define DIS_BIT_EXACT_ARG(grad, thresh) ARG("DISABLED_" #grad "x" #grad " thresh=" #thresh, "lena_gray.bmp", grad, VX_NORM_L1, thresh, thresh)
+#define BIT_EXACT_ARG_U8(grad, thresh) ARG(#grad "x" #grad " thresh=" #thresh " output=VX_DF_IMAGE_U8", "lena_gray.bmp", grad, VX_NORM_L1, thresh, thresh, VX_DF_IMAGE_U8)
+#define BIT_EXACT_ARG_U1(grad, thresh) ARG("_U1_/" #grad "x" #grad " thresh=" #thresh " output=VX_DF_IMAGE_U1", "lena_gray.bmp", grad, VX_NORM_L1, thresh, thresh, VX_DF_IMAGE_U1)
 
-TEST_WITH_ARG(vxuCanny, BitExactL1, canny_arg, BIT_EXACT_ARG(3, 120), BIT_EXACT_ARG(5, 100), /* DIS_BIT_EXACT_ARG(7, 80 do not enable this argument) */)
+TEST_WITH_ARG(vxuCanny, BitExactL1, canny_arg,
+    BIT_EXACT_ARG_U8(3, 120),
+    BIT_EXACT_ARG_U8(5, 100),
+    BIT_EXACT_ARG_U1(3, 120),
+    BIT_EXACT_ARG_U1(5, 100)
+    )
 {
     vx_image src, dst;
     vx_threshold hyst;
     CT_Image lena, vxdst, refdst;
     vx_int32 low_thresh  = arg_->low_thresh;
     vx_int32 high_thresh = arg_->high_thresh;
-    vx_int32 false_val = 0;
-    vx_int32 true_val = 255;
     vx_border_t border = { VX_BORDER_UNDEFINED, {{ 0 }} };
     vx_int32 border_width = arg_->grad_size/2 + 1;
+    vx_df_image output_format = arg_->out_format;
     vx_context context = context_->vx_context_;
+    vx_df_image input_format = VX_DF_IMAGE_U8;
+    vx_pixel_value_t low_pixel;
+    vx_pixel_value_t high_pixel;
+    memset(&low_pixel, 0, sizeof(low_pixel));
+    memset(&high_pixel, 0, sizeof(high_pixel));
+    low_pixel.U8 = low_thresh;
+    high_pixel.U8 = high_thresh;
+
+    ASSERT((output_format == VX_DF_IMAGE_U8) || (output_format == VX_DF_IMAGE_U1));
 
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, &border, sizeof(border)));
 
     ASSERT_NO_FAILURE(lena = get_source_image(arg_->filename));
     ASSERT_NO_FAILURE(src = ct_image_to_vx_image(lena, context));
-    ASSERT_VX_OBJECT(dst = vxCreateImage(context, lena->width, lena->height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst = vxCreateImage(context, lena->width, lena->height, output_format), VX_TYPE_IMAGE);
 
-    ASSERT_VX_OBJECT(hyst = vxCreateThreshold(context, VX_THRESHOLD_TYPE_RANGE, VX_TYPE_UINT8), VX_TYPE_THRESHOLD);
-    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxSetThresholdAttribute(hyst, VX_THRESHOLD_THRESHOLD_LOWER, &low_thresh,  sizeof(low_thresh)));
-    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxSetThresholdAttribute(hyst, VX_THRESHOLD_THRESHOLD_UPPER, &high_thresh, sizeof(high_thresh)));
+    ASSERT_VX_OBJECT(hyst = vxCreateThresholdForImage(context, VX_THRESHOLD_TYPE_RANGE, input_format, output_format), VX_TYPE_THRESHOLD);
+    VX_CALL(vxCopyThresholdRange(hyst, &low_pixel, &high_pixel, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
     /* explicitly set FALSE_VALUE and TRUE_VALUE for hyst parameter */
-    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxSetThresholdAttribute(hyst, VX_THRESHOLD_FALSE_VALUE, &false_val, sizeof(false_val)));
-    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxSetThresholdAttribute(hyst, VX_THRESHOLD_TRUE_VALUE, &true_val, sizeof(true_val)));
 
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxuCannyEdgeDetector(context, src, hyst, arg_->grad_size, arg_->norm_type, dst));
 
     ASSERT_NO_FAILURE(vxdst = ct_image_from_vx_image(dst));
-    ASSERT_NO_FAILURE(refdst = get_reference_result(arg_->filename, lena, low_thresh, high_thresh, arg_->grad_size, arg_->norm_type));
+    ASSERT_NO_FAILURE(refdst = get_reference_result(arg_->filename, lena, low_thresh, high_thresh, arg_->grad_size,
+                                                    arg_->norm_type, output_format));
 
     ASSERT_NO_FAILURE(ct_adjust_roi(vxdst,  border_width, border_width, border_width, border_width));
     ASSERT_NO_FAILURE(ct_adjust_roi(refdst, border_width, border_width, border_width, border_width));
 
+#if 0
+    printf("=== SRC ===\n");
+    ct_dump_image_info(lena);
+    printf("=== VX ===\n");
+    ct_dump_image_info(vxdst);
+    printf("=== REF ===\n");
+    ct_dump_image_info(refdst);
+    ct_write_image("canny_src.bmp",  lena);
+    ct_write_image("canny_res_vx.bmp", vxdst);
+    ct_write_image("canny_res_ref.bmp",  refdst);
+#endif
+
     ASSERT_EQ_CTIMAGE(refdst, vxdst);
 
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxReleaseThreshold(&hyst));
@@ -376,7 +456,12 @@
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxReleaseImage(&dst));
 }
 
-TEST_WITH_ARG(vxCanny, BitExactL1, canny_arg, BIT_EXACT_ARG(3, 120), BIT_EXACT_ARG(5, 100), /* DIS_BIT_EXACT_ARG(7, 80 do not enable this argument) */)
+TEST_WITH_ARG(vxCanny, BitExactL1, canny_arg,
+    BIT_EXACT_ARG_U8(3, 120),
+    BIT_EXACT_ARG_U8(5, 100),
+    BIT_EXACT_ARG_U1(3, 120),
+    BIT_EXACT_ARG_U1(5, 100)
+    )
 {
     vx_image src, dst;
     vx_graph graph;
@@ -385,22 +470,27 @@
     CT_Image lena, vxdst, refdst;
     vx_int32 low_thresh  = arg_->low_thresh;
     vx_int32 high_thresh = arg_->high_thresh;
-    vx_int32 false_val = 0;
-    vx_int32 true_val = 255;
     vx_border_t border = { VX_BORDER_UNDEFINED, {{ 0 }} };
     vx_int32 border_width = arg_->grad_size/2 + 1;
+    vx_df_image output_format = arg_->out_format;
     vx_context context = context_->vx_context_;
+    vx_df_image input_format = VX_DF_IMAGE_U8;
+    vx_pixel_value_t low_pixel;
+    vx_pixel_value_t high_pixel;
+    memset(&low_pixel, 0, sizeof(low_pixel));
+    memset(&high_pixel, 0, sizeof(high_pixel));
+    low_pixel.U8 = low_thresh;
+    high_pixel.U8 = high_thresh;
+
+    ASSERT((output_format == VX_DF_IMAGE_U8) || (output_format == VX_DF_IMAGE_U1));
 
     ASSERT_NO_FAILURE(lena = get_source_image(arg_->filename));
     ASSERT_NO_FAILURE(src = ct_image_to_vx_image(lena, context));
-    ASSERT_VX_OBJECT(dst = vxCreateImage(context, lena->width, lena->height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst = vxCreateImage(context, lena->width, lena->height, output_format), VX_TYPE_IMAGE);
 
-    ASSERT_VX_OBJECT(hyst = vxCreateThreshold(context, VX_THRESHOLD_TYPE_RANGE, VX_TYPE_UINT8), VX_TYPE_THRESHOLD);
-    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxSetThresholdAttribute(hyst, VX_THRESHOLD_THRESHOLD_LOWER, &low_thresh,  sizeof(low_thresh)));
-    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxSetThresholdAttribute(hyst, VX_THRESHOLD_THRESHOLD_UPPER, &high_thresh, sizeof(high_thresh)));
+    ASSERT_VX_OBJECT(hyst = vxCreateThresholdForImage(context, VX_THRESHOLD_TYPE_RANGE, input_format, output_format), VX_TYPE_THRESHOLD);
+    VX_CALL(vxCopyThresholdRange(hyst, &low_pixel, &high_pixel, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
     /* explicitly set FALSE_VALUE and TRUE_VALUE for hyst parameter */
-    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxSetThresholdAttribute(hyst, VX_THRESHOLD_FALSE_VALUE, &false_val, sizeof(false_val)));
-    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxSetThresholdAttribute(hyst, VX_THRESHOLD_TRUE_VALUE, &true_val, sizeof(true_val)));
 
     ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
     ASSERT_VX_OBJECT(node = vxCannyEdgeDetectorNode(graph, src, hyst, arg_->grad_size, arg_->norm_type, dst), VX_TYPE_NODE);
@@ -416,7 +506,8 @@
 #endif
 
     ASSERT_NO_FAILURE(vxdst = ct_image_from_vx_image(dst));
-    ASSERT_NO_FAILURE(refdst = get_reference_result(arg_->filename, lena, low_thresh, high_thresh, arg_->grad_size, arg_->norm_type));
+    ASSERT_NO_FAILURE(refdst = get_reference_result(arg_->filename, lena, low_thresh, high_thresh, arg_->grad_size,
+                                                    arg_->norm_type, output_format));
 
     ASSERT_NO_FAILURE(ct_adjust_roi(vxdst,  border_width, border_width, border_width, border_width));
     ASSERT_NO_FAILURE(ct_adjust_roi(refdst, border_width, border_width, border_width, border_width));
@@ -424,6 +515,18 @@
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxReleaseNode(&node));
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxReleaseGraph(&graph));
 
+#if 0
+    printf("=== SRC ===\n");
+    ct_dump_image_info(lena);
+    printf("=== VX ===\n");
+    ct_dump_image_info(vxdst);
+    printf("=== REF ===\n");
+    ct_dump_image_info(refdst);
+    ct_write_image("canny_src.bmp",  lena);
+    ct_write_image("canny_res_vx.bmp", vxdst);
+    ct_write_image("canny_res_ref.bmp",  refdst);
+#endif
+
     ASSERT_EQ_CTIMAGE(refdst, vxdst);
 
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxReleaseThreshold(&hyst));
@@ -431,41 +534,71 @@
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxReleaseImage(&dst));
 }
 
-#define CANNY_ARG(grad, norm, lo, hi, file) ARG(#file "/" #norm " " #grad "x" #grad " thresh=(" #lo ", " #hi ")", #file ".bmp", grad, VX_NORM_##norm, lo, hi)
-#define DISABLED_CANNY_ARG(grad, norm, lo, hi, file) ARG("DISABLED_" #file "/" #norm " " #grad "x" #grad " thresh=(" #lo ", " #hi ")", #file ".bmp", grad, VX_NORM_##norm, lo, hi)
+#define CANNY_ARG_U8(grad, norm, lo, hi, file) ARG(#file "/" #norm " " #grad "x" #grad " thresh=(" #lo ", " #hi ") output=VX_DF_IMAGE_U8", #file ".bmp", grad, VX_NORM_##norm, lo, hi, VX_DF_IMAGE_U8)
+#define CANNY_ARG_U1(grad, norm, lo, hi, file) ARG("_U1_/" #file "/" #norm " " #grad "x" #grad " thresh=(" #lo ", " #hi ") output=VX_DF_IMAGE_U1", #file ".bmp", grad, VX_NORM_##norm, lo, hi, VX_DF_IMAGE_U1)
 
 TEST_WITH_ARG(vxuCanny, Lena, canny_arg,
-    CANNY_ARG(3, L1, 100, 120, lena_gray),
-    CANNY_ARG(3, L2, 100, 120, lena_gray),
-    CANNY_ARG(3, L1, 90,  130, lena_gray),
-    CANNY_ARG(3, L2, 90,  130, lena_gray),
-    CANNY_ARG(3, L1, 70,  71 , lena_gray),
-    CANNY_ARG(3, L2, 70,  71 , lena_gray),
-    CANNY_ARG(3, L1, 150, 220, lena_gray),
-    CANNY_ARG(3, L2, 150, 220, lena_gray),
-    CANNY_ARG(5, L1, 100, 120, lena_gray),
-    CANNY_ARG(5, L2, 100, 120, lena_gray),
-    CANNY_ARG(7, L1, 100, 120, lena_gray),
-    CANNY_ARG(7, L2, 100, 120, lena_gray),
+    CANNY_ARG_U8(3, L1, 100, 120, lena_gray),
+    CANNY_ARG_U8(3, L2, 100, 120, lena_gray),
+    CANNY_ARG_U8(3, L1, 90,  130, lena_gray),
+    CANNY_ARG_U8(3, L2, 90,  130, lena_gray),
+    CANNY_ARG_U8(3, L1, 70,  71 , lena_gray),
+    CANNY_ARG_U8(3, L2, 70,  71 , lena_gray),
+    CANNY_ARG_U8(3, L1, 150, 220, lena_gray),
+    CANNY_ARG_U8(3, L2, 150, 220, lena_gray),
+    CANNY_ARG_U8(5, L1, 100, 120, lena_gray),
+    CANNY_ARG_U8(5, L2, 100, 120, lena_gray),
+    CANNY_ARG_U8(7, L1, 100, 120, lena_gray),
+    CANNY_ARG_U8(7, L2, 100, 120, lena_gray),
 
-    CANNY_ARG(5, L1, 1200, 1440, lena_gray),
-    CANNY_ARG(5, L2, 1200, 1440, lena_gray),
-    CANNY_ARG(7, L1, 16000, 19200, lena_gray),
-    CANNY_ARG(7, L2, 16000, 19200, lena_gray),
+    CANNY_ARG_U8(5, L1, 1200, 1440, lena_gray),
+    CANNY_ARG_U8(5, L2, 1200, 1440, lena_gray),
+    CANNY_ARG_U8(7, L1, 16000, 19200, lena_gray),
+    CANNY_ARG_U8(7, L2, 16000, 19200, lena_gray),
 
-    CANNY_ARG(3, L1, 100, 120, blurred_lena_gray),
-    CANNY_ARG(3, L2, 100, 120, blurred_lena_gray),
-    CANNY_ARG(3, L1, 90,  125, blurred_lena_gray),
-    CANNY_ARG(3, L2, 90,  130, blurred_lena_gray),
-    CANNY_ARG(3, L1, 70,  71 , blurred_lena_gray),
-    CANNY_ARG(3, L2, 70,  71 , blurred_lena_gray),
-    CANNY_ARG(3, L1, 150, 220, blurred_lena_gray),
-    CANNY_ARG(3, L2, 150, 220, blurred_lena_gray),
-    CANNY_ARG(5, L1, 100, 120, blurred_lena_gray),
-    CANNY_ARG(5, L2, 100, 120, blurred_lena_gray),
-    CANNY_ARG(7, L1, 100, 120, blurred_lena_gray),
-    CANNY_ARG(7, L2, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U8(3, L1, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U8(3, L2, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U8(3, L1, 90,  125, blurred_lena_gray),
+    CANNY_ARG_U8(3, L2, 90,  130, blurred_lena_gray),
+    CANNY_ARG_U8(3, L1, 70,  71 , blurred_lena_gray),
+    CANNY_ARG_U8(3, L2, 70,  71 , blurred_lena_gray),
+    CANNY_ARG_U8(3, L1, 150, 220, blurred_lena_gray),
+    CANNY_ARG_U8(3, L2, 150, 220, blurred_lena_gray),
+    CANNY_ARG_U8(5, L1, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U8(5, L2, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U8(7, L1, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U8(7, L2, 100, 120, blurred_lena_gray),
 
+    CANNY_ARG_U1(3, L1, 100, 120, lena_gray),
+    CANNY_ARG_U1(3, L2, 100, 120, lena_gray),
+    CANNY_ARG_U1(3, L1, 90,  130, lena_gray),
+    CANNY_ARG_U1(3, L2, 90,  130, lena_gray),
+    CANNY_ARG_U1(3, L1, 70,  71 , lena_gray),
+    CANNY_ARG_U1(3, L2, 70,  71 , lena_gray),
+    CANNY_ARG_U1(3, L1, 150, 220, lena_gray),
+    CANNY_ARG_U1(3, L2, 150, 220, lena_gray),
+    CANNY_ARG_U1(5, L1, 100, 120, lena_gray),
+    CANNY_ARG_U1(5, L2, 100, 120, lena_gray),
+    CANNY_ARG_U1(7, L1, 100, 120, lena_gray),
+    CANNY_ARG_U1(7, L2, 100, 120, lena_gray),
+
+    CANNY_ARG_U1(5, L1, 1200, 1440, lena_gray),
+    CANNY_ARG_U1(5, L2, 1200, 1440, lena_gray),
+    CANNY_ARG_U1(7, L1, 16000, 19200, lena_gray),
+    CANNY_ARG_U1(7, L2, 16000, 19200, lena_gray),
+
+    CANNY_ARG_U1(3, L1, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U1(3, L2, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U1(3, L1, 90,  125, blurred_lena_gray),
+    CANNY_ARG_U1(3, L2, 90,  130, blurred_lena_gray),
+    CANNY_ARG_U1(3, L1, 70,  71 , blurred_lena_gray),
+    CANNY_ARG_U1(3, L2, 70,  71 , blurred_lena_gray),
+    CANNY_ARG_U1(3, L1, 150, 220, blurred_lena_gray),
+    CANNY_ARG_U1(3, L2, 150, 220, blurred_lena_gray),
+    CANNY_ARG_U1(5, L1, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U1(5, L2, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U1(7, L1, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U1(7, L2, 100, 120, blurred_lena_gray),
 )
 {
     uint32_t total, count;
@@ -476,27 +609,41 @@
     vx_int32 high_thresh = arg_->high_thresh;
     vx_border_t border = { VX_BORDER_UNDEFINED, {{ 0 }} };
     vx_int32 border_width = arg_->grad_size/2 + 1;
+    vx_df_image output_format = arg_->out_format;
     vx_context context = context_->vx_context_;
-    vx_enum thresh_data_type = VX_TYPE_UINT8;
+    vx_df_image input_format = VX_DF_IMAGE_U8;
+    vx_pixel_value_t low_pixel;
+    vx_pixel_value_t high_pixel;
+    memset(&low_pixel, 0, sizeof(low_pixel));
+    memset(&high_pixel, 0, sizeof(high_pixel));
+    low_pixel.U8 = low_thresh;
+    high_pixel.U8 = high_thresh;
     if (low_thresh > 255)
-        thresh_data_type = VX_TYPE_INT16;
+    {
+        input_format = VX_DF_IMAGE_S16;
+        low_pixel.S16 = low_thresh;
+        high_pixel.S16 = high_thresh;
+    }
+
+    ASSERT((output_format == VX_DF_IMAGE_U8) || (output_format == VX_DF_IMAGE_U1));
 
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, &border, sizeof(border)));
 
     ASSERT_NO_FAILURE(lena = get_source_image(arg_->filename));
     ASSERT_NO_FAILURE(src = ct_image_to_vx_image(lena, context));
-    ASSERT_VX_OBJECT(dst = vxCreateImage(context, lena->width, lena->height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst = vxCreateImage(context, lena->width, lena->height, output_format), VX_TYPE_IMAGE);
 
-    ASSERT_VX_OBJECT(hyst = vxCreateThreshold(context, VX_THRESHOLD_TYPE_RANGE, thresh_data_type), VX_TYPE_THRESHOLD);
-    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxSetThresholdAttribute(hyst, VX_THRESHOLD_THRESHOLD_LOWER, &low_thresh,  sizeof(low_thresh)));
-    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxSetThresholdAttribute(hyst, VX_THRESHOLD_THRESHOLD_UPPER, &high_thresh, sizeof(high_thresh)));
+    ASSERT_VX_OBJECT(hyst = vxCreateThresholdForImage(context, VX_THRESHOLD_TYPE_RANGE, input_format, output_format), VX_TYPE_THRESHOLD);
+    VX_CALL(vxCopyThresholdRange(hyst, &low_pixel, &high_pixel, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
     /* FALSE_VALUE and TRUE_VALUE of hyst parameter are set to their default values (0, 255) by vxCreateThreshold */
     /* test reference data are computed with assumption that FALSE_VALUE and TRUE_VALUE set to 0 and 255 */
+
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxuCannyEdgeDetector(context, src, hyst, arg_->grad_size, arg_->norm_type, dst));
 
     ASSERT_NO_FAILURE(vxdst = ct_image_from_vx_image(dst));
+    ASSERT_NO_FAILURE(refdst = get_reference_result(arg_->filename, lena, low_thresh, high_thresh, arg_->grad_size,
+                                                    arg_->norm_type, output_format));
 
-    ASSERT_NO_FAILURE(refdst = get_reference_result(arg_->filename, lena, low_thresh, high_thresh, arg_->grad_size, arg_->norm_type));
     ASSERT_NO_FAILURE(ct_adjust_roi(vxdst,  border_width, border_width, border_width, border_width));
     ASSERT_NO_FAILURE(ct_adjust_roi(refdst, border_width, border_width, border_width, border_width));
 
@@ -505,7 +652,7 @@
     // disttransform(x,y) < tolerance for all (x,y) such that output(x,y) = 255,
     // where disttransform is the distance transform image with Euclidean distance
     // of the reference(x,y) (canny edge ground truth). This condition should be
-    // satisfied by 98% of output edge pixels, tolerance = 2.
+    // satisfied by 95% of output edge pixels, tolerance = 2.
     ASSERT_NO_FAILURE(count = disttransform2_metric(refdst, vxdst, dist, &total));
 
     if (count < CANNY_ACCEPTANCE_THRESHOLD * total)
@@ -520,7 +667,7 @@
     // And the inverse: disttransform(x,y) < tolerance for all (x,y) such that
     // reference(x,y) = 255, where disttransform is the distance transform image
     // with Euclidean distance of the output(x,y) (canny edge ground truth). This
-    // condition should be satisfied by 98% of reference edge pixels, tolerance = 2.
+    // condition should be satisfied by 95% of reference edge pixels, tolerance = 2.
     ASSERT_NO_FAILURE(count = disttransform2_metric(vxdst, refdst, dist, &total));
 
     if (count < CANNY_ACCEPTANCE_THRESHOLD * total)
@@ -538,36 +685,67 @@
 }
 
 TEST_WITH_ARG(vxCanny, Lena, canny_arg,
-    CANNY_ARG(3, L1, 100, 120, lena_gray),
-    CANNY_ARG(3, L2, 100, 120, lena_gray),
-    CANNY_ARG(3, L1, 90,  130, lena_gray),
-    CANNY_ARG(3, L2, 90,  130, lena_gray),
-    CANNY_ARG(3, L1, 70,  71 , lena_gray),
-    CANNY_ARG(3, L2, 70,  71 , lena_gray),
-    CANNY_ARG(3, L1, 150, 220, lena_gray),
-    CANNY_ARG(3, L2, 150, 220, lena_gray),
-    CANNY_ARG(5, L1, 100, 120, lena_gray),
-    CANNY_ARG(5, L2, 100, 120, lena_gray),
-    CANNY_ARG(7, L1, 100, 120, lena_gray),
-    CANNY_ARG(7, L2, 100, 120, lena_gray),
+    CANNY_ARG_U8(3, L1, 100, 120, lena_gray),
+    CANNY_ARG_U8(3, L2, 100, 120, lena_gray),
+    CANNY_ARG_U8(3, L1, 90,  130, lena_gray),
+    CANNY_ARG_U8(3, L2, 90,  130, lena_gray),
+    CANNY_ARG_U8(3, L1, 70,  71 , lena_gray),
+    CANNY_ARG_U8(3, L2, 70,  71 , lena_gray),
+    CANNY_ARG_U8(3, L1, 150, 220, lena_gray),
+    CANNY_ARG_U8(3, L2, 150, 220, lena_gray),
+    CANNY_ARG_U8(5, L1, 100, 120, lena_gray),
+    CANNY_ARG_U8(5, L2, 100, 120, lena_gray),
+    CANNY_ARG_U8(7, L1, 100, 120, lena_gray),
+    CANNY_ARG_U8(7, L2, 100, 120, lena_gray),
 
-    CANNY_ARG(5, L1, 1200, 1440, lena_gray),
-    CANNY_ARG(5, L2, 1200, 1440, lena_gray),
-    CANNY_ARG(7, L1, 16000, 19200, lena_gray),
-    CANNY_ARG(7, L2, 16000, 19200, lena_gray),
+    CANNY_ARG_U8(5, L1, 1200, 1440, lena_gray),
+    CANNY_ARG_U8(5, L2, 1200, 1440, lena_gray),
+    CANNY_ARG_U8(7, L1, 16000, 19200, lena_gray),
+    CANNY_ARG_U8(7, L2, 16000, 19200, lena_gray),
 
-    CANNY_ARG(3, L1, 100, 120, blurred_lena_gray),
-    CANNY_ARG(3, L2, 100, 120, blurred_lena_gray),
-    CANNY_ARG(3, L1, 90,  125, blurred_lena_gray),
-    CANNY_ARG(3, L2, 90,  130, blurred_lena_gray),
-    CANNY_ARG(3, L1, 70,  71 , blurred_lena_gray),
-    CANNY_ARG(3, L2, 70,  71 , blurred_lena_gray),
-    CANNY_ARG(3, L1, 150, 220, blurred_lena_gray),
-    CANNY_ARG(3, L2, 150, 220, blurred_lena_gray),
-    CANNY_ARG(5, L1, 100, 120, blurred_lena_gray),
-    CANNY_ARG(5, L2, 100, 120, blurred_lena_gray),
-    CANNY_ARG(7, L1, 100, 120, blurred_lena_gray),
-    CANNY_ARG(7, L2, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U8(3, L1, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U8(3, L2, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U8(3, L1, 90,  125, blurred_lena_gray),
+    CANNY_ARG_U8(3, L2, 90,  130, blurred_lena_gray),
+    CANNY_ARG_U8(3, L1, 70,  71 , blurred_lena_gray),
+    CANNY_ARG_U8(3, L2, 70,  71 , blurred_lena_gray),
+    CANNY_ARG_U8(3, L1, 150, 220, blurred_lena_gray),
+    CANNY_ARG_U8(3, L2, 150, 220, blurred_lena_gray),
+    CANNY_ARG_U8(5, L1, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U8(5, L2, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U8(7, L1, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U8(7, L2, 100, 120, blurred_lena_gray),
+
+    CANNY_ARG_U1(3, L1, 100, 120, lena_gray),
+    CANNY_ARG_U1(3, L2, 100, 120, lena_gray),
+    CANNY_ARG_U1(3, L1, 90,  130, lena_gray),
+    CANNY_ARG_U1(3, L2, 90,  130, lena_gray),
+    CANNY_ARG_U1(3, L1, 70,  71 , lena_gray),
+    CANNY_ARG_U1(3, L2, 70,  71 , lena_gray),
+    CANNY_ARG_U1(3, L1, 150, 220, lena_gray),
+    CANNY_ARG_U1(3, L2, 150, 220, lena_gray),
+    CANNY_ARG_U1(5, L1, 100, 120, lena_gray),
+    CANNY_ARG_U1(5, L2, 100, 120, lena_gray),
+    CANNY_ARG_U1(7, L1, 100, 120, lena_gray),
+    CANNY_ARG_U1(7, L2, 100, 120, lena_gray),
+
+    CANNY_ARG_U1(5, L1, 1200, 1440, lena_gray),
+    CANNY_ARG_U1(5, L2, 1200, 1440, lena_gray),
+    CANNY_ARG_U1(7, L1, 16000, 19200, lena_gray),
+    CANNY_ARG_U1(7, L2, 16000, 19200, lena_gray),
+
+    CANNY_ARG_U1(3, L1, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U1(3, L2, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U1(3, L1, 90,  125, blurred_lena_gray),
+    CANNY_ARG_U1(3, L2, 90,  130, blurred_lena_gray),
+    CANNY_ARG_U1(3, L1, 70,  71 , blurred_lena_gray),
+    CANNY_ARG_U1(3, L2, 70,  71 , blurred_lena_gray),
+    CANNY_ARG_U1(3, L1, 150, 220, blurred_lena_gray),
+    CANNY_ARG_U1(3, L2, 150, 220, blurred_lena_gray),
+    CANNY_ARG_U1(5, L1, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U1(5, L2, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U1(7, L1, 100, 120, blurred_lena_gray),
+    CANNY_ARG_U1(7, L2, 100, 120, blurred_lena_gray),
 
 )
 {
@@ -581,18 +759,30 @@
     vx_int32 high_thresh = arg_->high_thresh;
     vx_border_t border = { VX_BORDER_UNDEFINED, {{ 0 }} };
     vx_int32 border_width = arg_->grad_size/2 + 1;
+    vx_df_image output_format = arg_->out_format;
     vx_context context = context_->vx_context_;
-    vx_enum thresh_data_type = VX_TYPE_UINT8;
+    vx_df_image input_format = VX_DF_IMAGE_U8;
+    vx_pixel_value_t low_pixel;
+    vx_pixel_value_t high_pixel;
+    memset(&low_pixel, 0, sizeof(low_pixel));
+    memset(&high_pixel, 0, sizeof(high_pixel));
+    low_pixel.U8 = low_thresh;
+    high_pixel.U8 = high_thresh;
     if (low_thresh > 255)
-        thresh_data_type = VX_TYPE_INT16;
+    {
+        input_format = VX_DF_IMAGE_S16;
+        low_pixel.S16 = low_thresh;
+        high_pixel.S16 = high_thresh;
+    }
+
+    ASSERT((output_format == VX_DF_IMAGE_U8) || (output_format == VX_DF_IMAGE_U1));
 
     ASSERT_NO_FAILURE(lena = get_source_image(arg_->filename));
     ASSERT_NO_FAILURE(src = ct_image_to_vx_image(lena, context));
-    ASSERT_VX_OBJECT(dst = vxCreateImage(context, lena->width, lena->height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst = vxCreateImage(context, lena->width, lena->height, output_format), VX_TYPE_IMAGE);
 
-    ASSERT_VX_OBJECT(hyst = vxCreateThreshold(context, VX_THRESHOLD_TYPE_RANGE, thresh_data_type), VX_TYPE_THRESHOLD);
-    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxSetThresholdAttribute(hyst, VX_THRESHOLD_THRESHOLD_LOWER, &low_thresh,  sizeof(low_thresh)));
-    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxSetThresholdAttribute(hyst, VX_THRESHOLD_THRESHOLD_UPPER, &high_thresh, sizeof(high_thresh)));
+    ASSERT_VX_OBJECT(hyst = vxCreateThresholdForImage(context, VX_THRESHOLD_TYPE_RANGE, input_format, output_format), VX_TYPE_THRESHOLD);
+    VX_CALL(vxCopyThresholdRange(hyst, &low_pixel, &high_pixel, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
     /* FALSE_VALUE and TRUE_VALUE of hyst parameter are set to their default values (0, 255) by vxCreateThreshold */
     /* test reference data are computed with assumption that FALSE_VALUE and TRUE_VALUE set to 0 and 255 */
 
@@ -612,7 +802,8 @@
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxReleaseGraph(&graph));
 
     ASSERT_NO_FAILURE(vxdst = ct_image_from_vx_image(dst));
-    ASSERT_NO_FAILURE(refdst = get_reference_result(arg_->filename, lena, low_thresh, high_thresh, arg_->grad_size, arg_->norm_type));
+    ASSERT_NO_FAILURE(refdst = get_reference_result(arg_->filename, lena, low_thresh, high_thresh, arg_->grad_size,
+                                                    arg_->norm_type, output_format));
 
     ASSERT_NO_FAILURE(ct_adjust_roi(vxdst,  border_width, border_width, border_width, border_width));
     ASSERT_NO_FAILURE(ct_adjust_roi(refdst, border_width, border_width, border_width, border_width));
@@ -622,8 +813,9 @@
     // disttransform(x,y) < tolerance for all (x,y) such that output(x,y) = 255,
     // where disttransform is the distance transform image with Euclidean distance
     // of the reference(x,y) (canny edge ground truth). This condition should be
-    // satisfied by 98% of output edge pixels, tolerance = 2.
+    // satisfied by 95% of output edge pixels, tolerance = 2.
     ASSERT_NO_FAILURE(count = disttransform2_metric(refdst, vxdst, dist, &total));
+
     if (count < CANNY_ACCEPTANCE_THRESHOLD * total)
     {
         CT_RecordFailureAtFormat("disttransform(reference) < 2 only for %u of %u pixels of output edges which is %.2f%% < %.2f%%", __FUNCTION__, __FILE__, __LINE__,
@@ -636,7 +828,7 @@
     // And the inverse: disttransform(x,y) < tolerance for all (x,y) such that
     // reference(x,y) = 255, where disttransform is the distance transform image
     // with Euclidean distance of the output(x,y) (canny edge ground truth). This
-    // condition should be satisfied by 98% of reference edge pixels, tolerance = 2.
+    // condition should be satisfied by 95% of reference edge pixels, tolerance = 2.
     ASSERT_NO_FAILURE(count = disttransform2_metric(vxdst, refdst, dist, &total));
     if (count < CANNY_ACCEPTANCE_THRESHOLD * total)
     {
@@ -654,3 +846,5 @@
 
 TESTCASE_TESTS(vxuCanny, DISABLED_BitExactL1, Lena)
 TESTCASE_TESTS(vxCanny,  DISABLED_BitExactL1, Lena)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_channelcombine.c b/test_conformance/test_channelcombine.c
index 0171585..8e9ce53 100644
--- a/test_conformance/test_channelcombine.c
+++ b/test_conformance/test_channelcombine.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -307,3 +309,5 @@
         testGraphProcessing,
         testImmediateProcessing
 )
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_channelextract.c b/test_conformance/test_channelextract.c
index ad7629c..4226f03 100644
--- a/test_conformance/test_channelextract.c
+++ b/test_conformance/test_channelextract.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -257,3 +259,5 @@
         testGraphProcessing,
         testImmediateProcessing
 )
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_controlflow.c b/test_conformance/test_controlflow.c
index c3523c7..4153fa1 100644
--- a/test_conformance/test_controlflow.c
+++ b/test_conformance/test_controlflow.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#ifdef OPENVX_USE_ENHANCED_VISION
+
 #include <math.h>
 #include <float.h>
 #include <VX/vx.h>
@@ -80,7 +82,7 @@
         vx_size stride = img_width*2;
         vx_size stride_y = sizeof(vx_coordinates2df_t) * (stride);
         vx_size size = stride * img_height*2;
-        vx_coordinates2df_t* ptr_w = malloc(sizeof(vx_coordinates2df_t) * size);
+        vx_coordinates2df_t* ptr_w = ct_alloc_mem(sizeof(vx_coordinates2df_t) * size);
         for (vx_size i = 0; i < img_height*2; i++)
         {
             for (vx_size j = 0; j < img_width*2; j++)
@@ -93,7 +95,7 @@
         vxCopyRemapPatch(remap, &rect, stride_y, ptr_w, VX_TYPE_COORDINATES2DF, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
 
         ref = (vx_reference)remap;
-        free(ptr_w);
+        ct_free_mem(ptr_w);
         break;
     }
     case VX_TYPE_LUT:
@@ -108,13 +110,13 @@
         vxReleaseScalar(&scalar);
         break;
     case VX_TYPE_TENSOR:
-        dims = malloc(tensor_dims_num * sizeof(vx_size));
+        dims = ct_alloc_mem(tensor_dims_num * sizeof(vx_size));
         for(vx_size i = 0; i < tensor_dims_num; i++)
         {
             dims[i] = tensor_dims_length;
         }
         ref = (vx_reference)vxCreateTensor(context, tensor_dims_num, dims, VX_TYPE_UINT8, 0);
-        free(dims);
+        ct_free_mem(dims);
         break;
     default:
         break;
@@ -482,62 +484,62 @@
     switch (result_type)
     {
     case VX_TYPE_CHAR:
-        VX_CALL(vxReadScalarValue(o, &o_value.chr));
+        VX_CALL(vxCopyScalar(o, (void *)&o_value.chr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
         ASSERT_EQ_INT(o_check.chr, o_value.chr);
         break;
 
     case VX_TYPE_INT8:
-        VX_CALL(vxReadScalarValue(o, &o_value.s08));
+        VX_CALL(vxCopyScalar(o, (void *)&o_value.s08, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
         ASSERT_EQ_INT(o_check.s08, o_value.s08);
         break;
 
     case VX_TYPE_UINT8:
-        VX_CALL(vxReadScalarValue(o, &o_value.u08));
+        VX_CALL(vxCopyScalar(o, (void *)&o_value.u08, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
         ASSERT_EQ_INT(o_check.u08, o_value.u08);
         break;
 
     case VX_TYPE_INT16:
-        VX_CALL(vxReadScalarValue(o, &o_value.s16));
+        VX_CALL(vxCopyScalar(o, (void *)&o_value.s16, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
         ASSERT_EQ_INT(o_check.s16, o_value.s16);
         break;
 
     case VX_TYPE_UINT16:
-        VX_CALL(vxReadScalarValue(o, &o_value.u16));
+        VX_CALL(vxCopyScalar(o, (void *)&o_value.u16, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
         ASSERT_EQ_INT(o_check.u16, o_value.u16);
         break;
 
     case VX_TYPE_INT32:
-        VX_CALL(vxReadScalarValue(o, &o_value.s32));
+        VX_CALL(vxCopyScalar(o, (void *)&o_value.s32, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
         ASSERT_EQ_INT(o_check.s32, o_value.s32);
         break;
 
     case VX_TYPE_UINT32:
-        VX_CALL(vxReadScalarValue(o, &o_value.u32));
+        VX_CALL(vxCopyScalar(o, (void *)&o_value.u32, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
         ASSERT_EQ_INT(o_check.u32, o_value.u32);
         break;
 
     case VX_TYPE_INT64:
-        VX_CALL(vxReadScalarValue(o, &o_value.s64));
+        VX_CALL(vxCopyScalar(o, (void *)&o_value.s64, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
         ASSERT_EQ_INT(o_check.s64, o_value.s64);
         break;
 
     case VX_TYPE_UINT64:
-        VX_CALL(vxReadScalarValue(o, &o_value.u64));
+        VX_CALL(vxCopyScalar(o, (void *)&o_value.u64, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
         ASSERT_EQ_INT(o_check.u64, o_value.u64);
         break;
 
     case VX_TYPE_FLOAT32:
-        VX_CALL(vxReadScalarValue(o, &o_value.f32));
+        VX_CALL(vxCopyScalar(o, (void *)&o_value.f32, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
         ASSERT(fabs(o_check.f32 - o_value.f32) < 0.000001f);
         break;
 
     case VX_TYPE_SIZE:
-        VX_CALL(vxReadScalarValue(o, &o_value.size));
+        VX_CALL(vxCopyScalar(o, (void *)&o_value.size, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
         ASSERT_EQ_INT(o_check.size, o_value.size);
         break;
 
     case VX_TYPE_BOOL:
-        VX_CALL(vxReadScalarValue(o, &o_value.boolean));
+        VX_CALL(vxCopyScalar(o, (void *)&o_value.boolean, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
         ASSERT_EQ_INT(o_check.boolean, o_value.boolean);
         break;
 
@@ -560,3 +562,5 @@
 }
 
 TESTCASE_TESTS(ControlFlow, testSelectNode, testScalarOperationNode)
+
+#endif //OPENVX_USE_ENHANCED_VISION
diff --git a/test_conformance/test_convertcolor.c b/test_conformance/test_convertcolor.c
index d647aa4..1e7a219 100644
--- a/test_conformance/test_convertcolor.c
+++ b/test_conformance/test_convertcolor.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 
 #include <VX/vx.h>
@@ -572,3 +574,5 @@
 }
 
 TESTCASE_TESTS(ColorConvert, testOnRandomAndNatural)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_convertdepth.c b/test_conformance/test_convertdepth.c
index 6454b7d..7d180de 100644
--- a/test_conformance/test_convertdepth.c
+++ b/test_conformance/test_convertdepth.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include <VX/vx.h>
 #include <VX/vxu.h>
 
@@ -33,18 +35,60 @@
     ASSERT(src && dst);
     ASSERT(src->width == dst->width);
     ASSERT(src->height == dst->height);
-    ASSERT((src->format == VX_DF_IMAGE_U8 && dst->format == VX_DF_IMAGE_S16) || (src->format == VX_DF_IMAGE_S16 && dst->format == VX_DF_IMAGE_U8));
+    ASSERT((src->format == VX_DF_IMAGE_U1  && dst->format == VX_DF_IMAGE_U8)  ||
+           (src->format == VX_DF_IMAGE_U1  && dst->format == VX_DF_IMAGE_S16) ||
+           (src->format == VX_DF_IMAGE_U8  && dst->format == VX_DF_IMAGE_U1)  ||
+           (src->format == VX_DF_IMAGE_U8  && dst->format == VX_DF_IMAGE_S16) ||
+           (src->format == VX_DF_IMAGE_S16 && dst->format == VX_DF_IMAGE_U1)  ||
+           (src->format == VX_DF_IMAGE_S16 && dst->format == VX_DF_IMAGE_U8));
     ASSERT(policy == VX_CONVERT_POLICY_WRAP || policy == VX_CONVERT_POLICY_SATURATE);
 
     if (shift > 16) shift = 16;
     if (shift < -16) shift = -16;
 
-    if (src->format == VX_DF_IMAGE_U8)
+    if (src->format == VX_DF_IMAGE_U1)
     {
-        // according to spec the policy is ignored
-        // if (policy == VX_CONVERT_POLICY_WRAP)
+        // Up-convert policy from U1 doesn't take policy and shift into account
+        if (dst->format == VX_DF_IMAGE_U8)
         {
-            // up-conversion + wrap
+            for (i = 0; i < dst->height; ++i)
+                for (j = 0; j < dst->width; ++j)
+                    {
+                        uint32_t xShftd = j + src->roi.x % 8;    // U1 ROI offset
+                        uint8_t  pixel  = (src->data.y[i * ct_stride_bytes(src) + xShftd / 8] & (1 << (xShftd % 8))
+                                          ) != 0 ? 255 : 0;
+                        dst->data.y[i * dst->stride + j] = pixel;
+                    }
+        }
+        else    // dst->format == VX_DF_IMAGE_S16
+        {
+            for (i = 0; i < dst->height; ++i)
+                for (j = 0; j < dst->width; ++j)
+                    {
+                        uint32_t xShftd = j + src->roi.x % 8;    // U1 ROI offset
+                        int16_t  pixel  = (src->data.y[i * ct_stride_bytes(src) + xShftd / 8] & (1 << (xShftd % 8))
+                                          ) != 0 ? -1 : 0;
+                        dst->data.s16[i * dst->stride + j] = pixel;
+                    }
+        }
+    }
+    else if (src->format == VX_DF_IMAGE_U8)
+    {
+        if (dst->format == VX_DF_IMAGE_U1)
+        {
+            // Down-convert policy to U1 doesn't take policy and shift into account
+            for (i = 0; i < dst->height; ++i)
+                for (j = 0; j < dst->width; ++j)
+                {
+                    uint32_t xShftd = j + src->roi.x % 8;        // U1 ROI offset
+                    uint8_t  pixel  = (src->data.y[i * src->stride + j] != 0) ? 1 << (xShftd % 8) : 0;
+                    dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] =
+                        (dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] & ~(1 << (xShftd % 8))) | pixel;
+                }
+        }
+        else    // dst->format == VX_DF_IMAGE_S16
+        {
+            // Up-converting U8 to S16 doesn't take policy into account
             if (shift < 0)
             {
                 for (i = 0; i < dst->height; ++i)
@@ -58,30 +102,24 @@
                         dst->data.s16[i * dst->stride + j] = ((unsigned)src->data.y[i * src->stride + j]) << shift;
             }
         }
-        // else if (VX_CONVERT_POLICY_SATURATE)
-        // {
-        //     // up-conversion + saturate
-        //     if (shift < 0)
-        //     {
-        //         for (i = 0; i < dst->height; ++i)
-        //             for (j = 0; j < dst->width; ++j)
-        //                 dst->data.s16[i * dst->stride + j] = ((unsigned)src->data.y[i * src->stride + j]) >> (-shift);
-        //     }
-        //     else
-        //     {
-        //         for (i = 0; i < dst->height; ++i)
-        //             for (j = 0; j < dst->width; ++j)
-        //             {
-        //                 unsigned v = ((unsigned)src->data.y[i * src->stride + j]) << shift;
-        //                 if (v > 32767) v = 32767;
-        //                 dst->data.s16[i * dst->stride + j] = v;
-        //             }
-        //     }
-        // }
     }
+    // src->format == VX_DF_IMAGE_S16
+    else if (dst->format == VX_DF_IMAGE_U1)
+    {
+        // Down-convert policy to U1 doesn't take policy and shift into account
+        for (i = 0; i < dst->height; ++i)
+            for (j = 0; j < dst->width; ++j)
+            {
+                uint32_t xShftd = j + src->roi.x % 8;            // U1 ROI offset
+                uint8_t  pixel  = src->data.s16[i * src->stride + j] != 0 ? 1 << (xShftd % 8) : 0;
+                dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] =
+                    (dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] & ~(1 << (xShftd % 8))) | pixel;
+            }
+    }
+    // dst->format == VX_DF_IMAGE_U8
     else if (policy == VX_CONVERT_POLICY_WRAP)
     {
-        // down-conversion + wrap
+        // Down-conversion (S16 to U8) + wrap
         if (shift < 0)
         {
             for (i = 0; i < dst->height; ++i)
@@ -95,9 +133,9 @@
                     dst->data.y[i * dst->stride + j] = src->data.s16[i * src->stride + j] >> shift;
         }
     }
-    else if (policy == VX_CONVERT_POLICY_SATURATE)
+    else // policy == VX_CONVERT_POLICY_SATURATE
     {
-        // down-conversion + saturate
+        // Down-conversion (S16 to U8) + saturate
         if (shift < 0)
         {
             for (i = 0; i < dst->height; ++i)
@@ -123,15 +161,26 @@
     }
 }
 
-static void fillSquence(CT_Image dst, uint32_t seq_init)
+static void fillSequence(CT_Image dst, uint32_t seq_init)
 {
     uint32_t i, j;
     uint32_t val = seq_init;
 
     ASSERT(dst);
-    ASSERT(dst->format == VX_DF_IMAGE_U8 || dst->format == VX_DF_IMAGE_S16);
+    ASSERT(dst->format == VX_DF_IMAGE_U1 || dst->format == VX_DF_IMAGE_U8 || dst->format == VX_DF_IMAGE_S16);
 
-    if (dst->format == VX_DF_IMAGE_U8)
+    if (dst->format == VX_DF_IMAGE_U1)
+    {
+        for (i = 0; i < dst->height; ++i)
+            for (j = 0; j < dst->width; ++j)
+            {
+                uint32_t xShftd = j + dst->roi.x % 8;            // U1 ROI offset
+                uint8_t  pixel  = (++val % 2) << (xShftd % 8);
+                dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] =
+                    (dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] & ~(1 << (xShftd % 8))) | pixel;
+            }
+    }
+    else if (dst->format == VX_DF_IMAGE_U8)
     {
         for (i = 0; i < dst->height; ++i)
             for (j = 0; j < dst->width; ++j)
@@ -148,41 +197,87 @@
 TESTCASE(vxuConvertDepth, CT_VXContext, ct_setup_vx_context, 0)
 TESTCASE(vxConvertDepth,  CT_VXContext, ct_setup_vx_context, 0)
 
-
 TEST(vxuConvertDepth, NegativeSizes)
 {
-    vx_image img16x88, img88x16, img16x16;
+    vx_image img88x88, img88x40, img40x40;
     vx_int32 shift_zero = 0;
     vx_int32 shift_one = 1;
     vx_context context = context_->vx_context_;
 
-    ASSERT_VX_OBJECT(img16x88 = vxCreateImage(context, 16, 88, VX_DF_IMAGE_U8),  VX_TYPE_IMAGE);
-    ASSERT_VX_OBJECT(img88x16 = vxCreateImage(context, 88, 16, VX_DF_IMAGE_S16), VX_TYPE_IMAGE);
-    ASSERT_VX_OBJECT(img16x16 = vxCreateImage(context, 16, 16, VX_DF_IMAGE_U8),  VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(img88x88 = vxCreateImage(context, 88, 88, VX_DF_IMAGE_U8),  VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(img88x40 = vxCreateImage(context, 88, 40, VX_DF_IMAGE_S16), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(img40x40 = vxCreateImage(context, 40, 40, VX_DF_IMAGE_U8),  VX_TYPE_IMAGE);
 
     // initialize to guarantee that images are allocated
-    ASSERT_NO_FAILURE(ct_fill_image_random(img16x88, &CT()->seed_));
-    ASSERT_NO_FAILURE(ct_fill_image_random(img88x16, &CT()->seed_));
+    ASSERT_NO_FAILURE(ct_fill_image_random(img88x88, &CT()->seed_));
+    ASSERT_NO_FAILURE(ct_fill_image_random(img88x40, &CT()->seed_));
+    ASSERT_NO_FAILURE(ct_fill_image_random(img40x40, &CT()->seed_));
+
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x88, img88x40, VX_CONVERT_POLICY_SATURATE, shift_zero));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x88, img88x40, VX_CONVERT_POLICY_WRAP,     shift_zero));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x88, img88x40, VX_CONVERT_POLICY_SATURATE, shift_one));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x88, img88x40, VX_CONVERT_POLICY_WRAP,     shift_one));
+
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x40, img40x40, VX_CONVERT_POLICY_SATURATE, shift_zero));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x40, img40x40, VX_CONVERT_POLICY_WRAP,     shift_zero));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x40, img40x40, VX_CONVERT_POLICY_SATURATE, shift_one));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x40, img40x40, VX_CONVERT_POLICY_WRAP,     shift_one));
+
+    VX_CALL(vxReleaseImage(&img88x88));
+    VX_CALL(vxReleaseImage(&img88x40));
+    VX_CALL(vxReleaseImage(&img40x40));
+}
+
+TEST(vxuConvertDepth, NegativeSizes_U1_)
+{
+    vx_image img88x88, img88x40, img40x40, img16x40, img16x16;
+    vx_int32 shift_zero = 0;
+    vx_int32 shift_one = 1;
+    vx_context context = context_->vx_context_;
+
+    ASSERT_VX_OBJECT(img88x88 = vxCreateImage(context, 88, 88, VX_DF_IMAGE_U1),  VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(img88x40 = vxCreateImage(context, 88, 40, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(img40x40 = vxCreateImage(context, 40, 40, VX_DF_IMAGE_U1),  VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(img16x40 = vxCreateImage(context, 16, 40, VX_DF_IMAGE_S16), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(img16x16 = vxCreateImage(context, 16, 16, VX_DF_IMAGE_U1),  VX_TYPE_IMAGE);
+
+    // initialize to guarantee that images are allocated
+    ASSERT_NO_FAILURE(ct_fill_image_random(img88x88, &CT()->seed_));
+    ASSERT_NO_FAILURE(ct_fill_image_random(img88x40, &CT()->seed_));
+    ASSERT_NO_FAILURE(ct_fill_image_random(img40x40, &CT()->seed_));
+    ASSERT_NO_FAILURE(ct_fill_image_random(img16x40, &CT()->seed_));
     ASSERT_NO_FAILURE(ct_fill_image_random(img16x16, &CT()->seed_));
 
-    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img16x88, img88x16, VX_CONVERT_POLICY_SATURATE, shift_zero));
-    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img16x88, img88x16, VX_CONVERT_POLICY_WRAP, shift_zero));
-    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img16x88, img88x16, VX_CONVERT_POLICY_SATURATE, shift_one));
-    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img16x88, img88x16, VX_CONVERT_POLICY_WRAP, shift_one));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x88, img88x40, VX_CONVERT_POLICY_SATURATE, shift_zero));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x88, img88x40, VX_CONVERT_POLICY_WRAP,     shift_zero));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x88, img88x40, VX_CONVERT_POLICY_SATURATE, shift_one));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x88, img88x40, VX_CONVERT_POLICY_WRAP,     shift_one));
 
-    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x16, img16x16, VX_CONVERT_POLICY_SATURATE, shift_zero));
-    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x16, img16x16, VX_CONVERT_POLICY_WRAP, shift_zero));
-    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x16, img16x16, VX_CONVERT_POLICY_SATURATE, shift_one));
-    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x16, img16x16, VX_CONVERT_POLICY_WRAP, shift_one));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x40, img40x40, VX_CONVERT_POLICY_SATURATE, shift_zero));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x40, img40x40, VX_CONVERT_POLICY_WRAP,     shift_zero));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x40, img40x40, VX_CONVERT_POLICY_SATURATE, shift_one));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img88x40, img40x40, VX_CONVERT_POLICY_WRAP,     shift_one));
 
-    VX_CALL(vxReleaseImage(&img16x88));
-    VX_CALL(vxReleaseImage(&img88x16));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img40x40, img16x40, VX_CONVERT_POLICY_SATURATE, shift_zero));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img40x40, img16x40, VX_CONVERT_POLICY_WRAP,     shift_zero));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img40x40, img16x40, VX_CONVERT_POLICY_SATURATE, shift_one));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img40x40, img16x40, VX_CONVERT_POLICY_WRAP,     shift_one));
+
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img16x40, img16x16, VX_CONVERT_POLICY_SATURATE, shift_zero));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img16x40, img16x16, VX_CONVERT_POLICY_WRAP,     shift_zero));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img16x40, img16x16, VX_CONVERT_POLICY_SATURATE, shift_one));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, img16x40, img16x16, VX_CONVERT_POLICY_WRAP,     shift_one));
+
+    VX_CALL(vxReleaseImage(&img88x88));
+    VX_CALL(vxReleaseImage(&img88x40));
+    VX_CALL(vxReleaseImage(&img40x40));
+    VX_CALL(vxReleaseImage(&img16x40));
     VX_CALL(vxReleaseImage(&img16x16));
 }
 
 TEST(vxConvertDepth, NegativeSizes)
 {
-    vx_image img16x88, img88x16, img16x16;
+    vx_image img88x88, img88x40, img40x40;
     vx_graph graph;
     vx_node node;
     vx_scalar shift;
@@ -190,36 +285,108 @@
     vx_context context = context_->vx_context_;
 
     ASSERT_VX_OBJECT(shift = vxCreateScalar(context, VX_TYPE_INT32, &sh), VX_TYPE_SCALAR);
-    ASSERT_VX_OBJECT(img16x88 = vxCreateImage(context, 16, 88, VX_DF_IMAGE_U8),  VX_TYPE_IMAGE);
-    ASSERT_VX_OBJECT(img88x16 = vxCreateImage(context, 88, 16, VX_DF_IMAGE_S16), VX_TYPE_IMAGE);
-    ASSERT_VX_OBJECT(img16x16 = vxCreateImage(context, 16, 16, VX_DF_IMAGE_U8),  VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(img88x88 = vxCreateImage(context, 88, 88, VX_DF_IMAGE_U8),  VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(img88x40 = vxCreateImage(context, 88, 40, VX_DF_IMAGE_S16), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(img40x40 = vxCreateImage(context, 40, 40, VX_DF_IMAGE_U8),  VX_TYPE_IMAGE);
 
+    /* U8 -> S16 */
     ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
-    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img16x88, img88x16, VX_CONVERT_POLICY_SATURATE, shift), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img88x88, img88x40, VX_CONVERT_POLICY_SATURATE, shift), VX_TYPE_NODE);
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
+    VX_CALL(vxReleaseNode(&node));
+    VX_CALL(vxReleaseGraph(&graph));
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img88x88, img88x40, VX_CONVERT_POLICY_WRAP, shift), VX_TYPE_NODE);
     EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
     VX_CALL(vxReleaseNode(&node));
     VX_CALL(vxReleaseGraph(&graph));
 
+    /* S16 -> U8 */
     ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
-    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img16x88, img88x16, VX_CONVERT_POLICY_WRAP, shift), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img88x40, img40x40, VX_CONVERT_POLICY_SATURATE, shift), VX_TYPE_NODE);
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
+    VX_CALL(vxReleaseNode(&node));
+    VX_CALL(vxReleaseGraph(&graph));
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img88x40, img40x40, VX_CONVERT_POLICY_WRAP, shift), VX_TYPE_NODE);
     EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
     VX_CALL(vxReleaseNode(&node));
     VX_CALL(vxReleaseGraph(&graph));
 
+    VX_CALL(vxReleaseImage(&img88x88));
+    VX_CALL(vxReleaseImage(&img88x40));
+    VX_CALL(vxReleaseImage(&img40x40));
+    VX_CALL(vxReleaseScalar(&shift));
+}
+
+TEST(vxConvertDepth, NegativeSizes_U1_)
+{
+    vx_image img88x88, img88x40, img40x40, img16x40, img16x16;
+    vx_graph graph;
+    vx_node node;
+    vx_scalar shift;
+    vx_int32 sh = 1;
+    vx_context context = context_->vx_context_;
+
+    ASSERT_VX_OBJECT(shift = vxCreateScalar(context, VX_TYPE_INT32, &sh), VX_TYPE_SCALAR);
+    ASSERT_VX_OBJECT(img88x88 = vxCreateImage(context, 88, 88, VX_DF_IMAGE_U1),  VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(img88x40 = vxCreateImage(context, 88, 40, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(img40x40 = vxCreateImage(context, 40, 40, VX_DF_IMAGE_U1),  VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(img16x40 = vxCreateImage(context, 16, 40, VX_DF_IMAGE_S16), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(img16x16 = vxCreateImage(context, 16, 16, VX_DF_IMAGE_U1),  VX_TYPE_IMAGE);
+
+    /* U1 -> U8 */
     ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
-    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img88x16, img16x16, VX_CONVERT_POLICY_SATURATE, shift), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img88x88, img88x40, VX_CONVERT_POLICY_SATURATE, shift), VX_TYPE_NODE);
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
+    VX_CALL(vxReleaseNode(&node));
+    VX_CALL(vxReleaseGraph(&graph));
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img88x88, img88x40, VX_CONVERT_POLICY_WRAP, shift), VX_TYPE_NODE);
     EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
     VX_CALL(vxReleaseNode(&node));
     VX_CALL(vxReleaseGraph(&graph));
 
+    /* U8 -> U1 */
     ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
-    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img88x16, img16x16, VX_CONVERT_POLICY_WRAP, shift), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img88x40, img40x40, VX_CONVERT_POLICY_SATURATE, shift), VX_TYPE_NODE);
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
+    VX_CALL(vxReleaseNode(&node));
+    VX_CALL(vxReleaseGraph(&graph));
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img88x40, img40x40, VX_CONVERT_POLICY_WRAP, shift), VX_TYPE_NODE);
     EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
     VX_CALL(vxReleaseNode(&node));
     VX_CALL(vxReleaseGraph(&graph));
 
-    VX_CALL(vxReleaseImage(&img16x88));
-    VX_CALL(vxReleaseImage(&img88x16));
+    /* U1 -> S16 */
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img40x40, img16x40, VX_CONVERT_POLICY_SATURATE, shift), VX_TYPE_NODE);
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
+    VX_CALL(vxReleaseNode(&node));
+    VX_CALL(vxReleaseGraph(&graph));
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img40x40, img16x40, VX_CONVERT_POLICY_WRAP, shift), VX_TYPE_NODE);
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
+    VX_CALL(vxReleaseNode(&node));
+    VX_CALL(vxReleaseGraph(&graph));
+
+    /* S16 -> U1 */
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img16x40, img16x16, VX_CONVERT_POLICY_SATURATE, shift), VX_TYPE_NODE);
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
+    VX_CALL(vxReleaseNode(&node));
+    VX_CALL(vxReleaseGraph(&graph));
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, img16x40, img16x16, VX_CONVERT_POLICY_WRAP, shift), VX_TYPE_NODE);
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
+    VX_CALL(vxReleaseNode(&node));
+    VX_CALL(vxReleaseGraph(&graph));
+
+    VX_CALL(vxReleaseImage(&img88x88));
+    VX_CALL(vxReleaseImage(&img88x40));
+    VX_CALL(vxReleaseImage(&img40x40));
+    VX_CALL(vxReleaseImage(&img16x40));
     VX_CALL(vxReleaseImage(&img16x16));
     VX_CALL(vxReleaseScalar(&shift));
 }
@@ -233,7 +400,10 @@
     vx_enum policy;
 } cvt_depth_arg;
 
-#define CVT_ARG(w,h,from,to,p) ARG(#p"/"#w"x"#h" "#from"->"#to, w, h, VX_DF_IMAGE_##from, VX_DF_IMAGE_##to, VX_CONVERT_POLICY_##p)
+#define CVT_ARG(w,h,from,to,p)    \
+    ARG(        #p "/" #w "x" #h " " #from "->" #to, w, h, VX_DF_IMAGE_##from, VX_DF_IMAGE_##to, VX_CONVERT_POLICY_##p)
+#define CVT_ARG_U1(w,h,from,to,p) \
+    ARG("_U1_/" #p "/" #w "x" #h " " #from "->" #to, w, h, VX_DF_IMAGE_##from, VX_DF_IMAGE_##to, VX_CONVERT_POLICY_##p)
 
 #define PREPEND_SIZE(macro, ...)                \
     CT_EXPAND(macro(1, 1, __VA_ARGS__)),        \
@@ -246,40 +416,48 @@
     CT_EXPAND(macro(1280, 720, __VA_ARGS__)),
     CT_EXPAND(macro(1920, 1080, __VA_ARGS__))*/
 
-#define CVT_ARGS                                \
-    PREPEND_SIZE(CVT_ARG, U8, S16, SATURATE),   \
-    PREPEND_SIZE(CVT_ARG, U8, S16, WRAP),       \
-    PREPEND_SIZE(CVT_ARG, S16, U8, SATURATE),   \
-    PREPEND_SIZE(CVT_ARG, S16, U8, WRAP)
+#define CVT_ARGS                                    \
+    PREPEND_SIZE(CVT_ARG,     U8, S16, SATURATE),   \
+    PREPEND_SIZE(CVT_ARG,     U8, S16, WRAP),       \
+    PREPEND_SIZE(CVT_ARG,    S16,  U8, SATURATE),   \
+    PREPEND_SIZE(CVT_ARG,    S16,  U8, WRAP),       \
+    PREPEND_SIZE(CVT_ARG_U1,  U1,  U8, SATURATE),   \
+    PREPEND_SIZE(CVT_ARG_U1,  U1,  U8, WRAP),       \
+    PREPEND_SIZE(CVT_ARG_U1,  U8,  U1, SATURATE),   \
+    PREPEND_SIZE(CVT_ARG_U1,  U8,  U1, WRAP),       \
+    PREPEND_SIZE(CVT_ARG_U1,  U1, S16, SATURATE),   \
+    PREPEND_SIZE(CVT_ARG_U1,  U1, S16, WRAP),       \
+    PREPEND_SIZE(CVT_ARG_U1, S16,  U1, SATURATE),   \
+    PREPEND_SIZE(CVT_ARG_U1, S16,  U1, WRAP)
 
 TEST_WITH_ARG(vxuConvertDepth, BitExact, cvt_depth_arg, CVT_ARGS)
 {
     vx_image src, dst;
-    CT_Image ref_src, refdst, vxdst;
+    CT_Image ref_src, ref_dst, vx_dst;
     vx_int32 shift_val;
     vx_context context = context_->vx_context_;
 
     ASSERT_NO_FAILURE({
         ref_src = ct_allocate_image(arg_->width, arg_->height, arg_->format_from);
-        fillSquence(ref_src, (uint32_t)CT()->seed_);
+        fillSequence(ref_src, (uint32_t)CT()->seed_);
         src = ct_image_to_vx_image(ref_src, context);
     });
 
     ASSERT_VX_OBJECT(dst = vxCreateImage(context, arg_->width, arg_->height, arg_->format_to), VX_TYPE_IMAGE);
 
-    refdst = ct_allocate_image(arg_->width, arg_->height, arg_->format_to);
-    vxdst = ct_allocate_image(arg_->width, arg_->height, arg_->format_to);
+    ref_dst = ct_allocate_image(arg_->width, arg_->height, arg_->format_to);
+    vx_dst = ct_allocate_image(arg_->width, arg_->height, arg_->format_to);
     for (shift_val = VALID_SHIFT_MIN; shift_val <= VALID_SHIFT_MAX; ++shift_val)
     {
         ct_update_progress(shift_val - VALID_SHIFT_MIN, VALID_SHIFT_MAX - VALID_SHIFT_MIN + 1);
         EXPECT_EQ_VX_STATUS(VX_SUCCESS, vxuConvertDepth(context, src, dst, arg_->policy, shift_val));
 
         ASSERT_NO_FAILURE({
-            ct_image_copyfrom_vx_image(vxdst, dst);
-            referenceConvertDepth(ref_src, refdst, shift_val, arg_->policy);
+            ct_image_copyfrom_vx_image(vx_dst, dst);
+            referenceConvertDepth(ref_src, ref_dst, shift_val, arg_->policy);
         });
 
-        EXPECT_EQ_CTIMAGE(refdst, vxdst);
+        EXPECT_EQ_CTIMAGE(ref_dst, vx_dst);
         if (CT_HasFailure())
         {
             printf("Shift value is %d\n", shift_val);
@@ -297,7 +475,7 @@
 TEST_WITH_ARG(vxConvertDepth, BitExact, cvt_depth_arg, CVT_ARGS)
 {
     vx_image src, dst;
-    CT_Image ref_src, refdst, vxdst;
+    CT_Image ref_src, ref_dst, vx_dst;
     vx_graph graph;
     vx_node node;
     vx_scalar scalar_shift;
@@ -307,7 +485,7 @@
 
     ASSERT_NO_FAILURE({
         ref_src = ct_allocate_image(arg_->width, arg_->height, arg_->format_from);
-        fillSquence(ref_src, (uint32_t)CT()->seed_);
+        fillSequence(ref_src, (uint32_t)CT()->seed_);
         src = ct_image_to_vx_image(ref_src, context);
     });
 
@@ -316,8 +494,8 @@
     ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
     ASSERT_VX_OBJECT(node = vxConvertDepthNode(graph, src, dst, arg_->policy, scalar_shift), VX_TYPE_NODE);
 
-    refdst = ct_allocate_image(arg_->width, arg_->height, arg_->format_to);
-    vxdst = ct_allocate_image(arg_->width, arg_->height, arg_->format_to);
+    ref_dst = ct_allocate_image(arg_->width, arg_->height, arg_->format_to);
+    vx_dst = ct_allocate_image(arg_->width, arg_->height, arg_->format_to);
     for (shift = VALID_SHIFT_MIN; shift <= VALID_SHIFT_MAX; ++shift)
     {
         ct_update_progress(shift - VALID_SHIFT_MIN, VALID_SHIFT_MAX - VALID_SHIFT_MIN + 1);
@@ -332,11 +510,11 @@
 #endif
 
         ASSERT_NO_FAILURE({
-            ct_image_copyfrom_vx_image(vxdst, dst);
-            referenceConvertDepth(ref_src, refdst, shift, arg_->policy);
+            ct_image_copyfrom_vx_image(vx_dst, dst);
+            referenceConvertDepth(ref_src, ref_dst, shift, arg_->policy);
         });
 
-        EXPECT_EQ_CTIMAGE(refdst, vxdst);
+        EXPECT_EQ_CTIMAGE(ref_dst, vx_dst);
         if (CT_HasFailure())
         {
             printf("Shift value is %d\n", shift);
@@ -351,5 +529,7 @@
     VX_CALL(vxReleaseGraph(&graph));
 }
 
-TESTCASE_TESTS(vxuConvertDepth, DISABLED_NegativeSizes, BitExact)
-TESTCASE_TESTS(vxConvertDepth,  DISABLED_NegativeSizes, BitExact)
+TESTCASE_TESTS(vxuConvertDepth, DISABLED_NegativeSizes, DISABLED_NegativeSizes_U1_, BitExact)
+TESTCASE_TESTS(vxConvertDepth,  DISABLED_NegativeSizes, DISABLED_NegativeSizes_U1_, BitExact)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_convolution.c b/test_conformance/test_convolution.c
index ca347f5..5c9efbb 100644
--- a/test_conformance/test_convolution.c
+++ b/test_conformance/test_convolution.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -119,3 +121,5 @@
 }
 
 TESTCASE_TESTS(Convolution, test_vxCreateConvolution, test_vxCopyConvolution, test_vxQueryConvolution, test_vxCreateVirtualConvolution)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_convolve.c b/test_conformance/test_convolve.c
index aa2b827..4a579e2 100644
--- a/test_conformance/test_convolve.c
+++ b/test_conformance/test_convolve.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -368,3 +370,5 @@
 }
 
 TESTCASE_TESTS(Convolve, testNodeCreation, testGraphProcessing, testImmediateProcessing)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_copy.c b/test_conformance/test_copy.c
index 79d762b..3abe952 100644
--- a/test_conformance/test_copy.c
+++ b/test_conformance/test_copy.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#ifdef OPENVX_USE_ENHANCED_VISION
+
 #include <VX/vx.h>
 #include <VX/vxu.h>
 
@@ -92,13 +94,13 @@
             vxReleaseReference((vx_reference*)&scalar_exemplar);
             break;
         case VX_TYPE_TENSOR:
-            dims = malloc(TENSOR_DIMS_NUM * sizeof(vx_size));
+            dims = ct_alloc_mem(TENSOR_DIMS_NUM * sizeof(vx_size));
             for(vx_size i = 0; i < TENSOR_DIMS_NUM; i++)
             {
                 dims[i] = TENSOR_DIMS_LENGTH;
             }
             exemplar = (vx_reference)vxCreateTensor(context, TENSOR_DIMS_NUM, dims, obj_item_type, 0);
-            free(dims);
+            ct_free_mem(dims);
             break;
         default:
             break;
@@ -263,7 +265,7 @@
                 vx_size stride = IMAGE_SIZE_X*2;
                 vx_size stride_y = sizeof(vx_coordinates2df_t) * (stride);
                 vx_size size = stride * IMAGE_SIZE_Y*2;
-                vx_coordinates2df_t* ptr_w = malloc(sizeof(vx_coordinates2df_t) * size);
+                vx_coordinates2df_t* ptr_w = ct_alloc_mem(sizeof(vx_coordinates2df_t) * size);
 
                 for (vx_size i = 0; i < IMAGE_SIZE_Y*2; i++)
                 {
@@ -276,7 +278,7 @@
                 }
 
                 VX_CALL(vxCopyRemapPatch(input_remap, &rect, stride_y, ptr_w, VX_TYPE_COORDINATES2DF, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
-                free(ptr_w);
+                ct_free_mem(ptr_w);
                 break;
             }
         case VX_TYPE_THRESHOLD:
@@ -292,7 +294,7 @@
                 vx_tensor input_tensor = (vx_tensor)input;
                 vx_size start[TENSOR_DIMS_NUM] = { 0 };
                 vx_size strides[TENSOR_DIMS_NUM]= { 0 };
-                vx_size * dims = malloc(TENSOR_DIMS_NUM * sizeof(vx_size));
+                vx_size * dims = ct_alloc_mem(TENSOR_DIMS_NUM * sizeof(vx_size));
                 for(vx_size i = 0; i < TENSOR_DIMS_NUM; i++)
                 {
                     dims[i] = TENSOR_DIMS_LENGTH;
@@ -300,7 +302,7 @@
                     strides[i] = i ? strides[i - 1] * dims[i - 1] : sizeof(vx_uint8);
                 }
                 const vx_size bytes = dims[TENSOR_DIMS_NUM - 1] * strides[TENSOR_DIMS_NUM - 1];
-                void * data = malloc(bytes);
+                void * data = ct_alloc_mem(bytes);
                 vx_uint8* u8_data = (vx_uint8*)data;
                 for(vx_size i = 0; i < bytes; i++)
                 {
@@ -308,8 +310,8 @@
                 }
 
                 VX_CALL(vxCopyTensorPatch(input_tensor, TENSOR_DIMS_NUM, start, dims, strides, data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
-                free(dims);
-                free(data);
+                ct_free_mem(dims);
+                ct_free_mem(data);
                 break;
             }
         default:
@@ -484,7 +486,7 @@
                 vx_tensor output_tensor = (vx_tensor)output;
                 vx_size start[TENSOR_DIMS_NUM] = { 0 };
                 vx_size strides[TENSOR_DIMS_NUM]= { 0 };
-                vx_size * dims = malloc(TENSOR_DIMS_NUM * sizeof(vx_size));
+                vx_size * dims = ct_alloc_mem(TENSOR_DIMS_NUM * sizeof(vx_size));
                 for(vx_size i = 0; i < TENSOR_DIMS_NUM; i++)
                 {
                     dims[i] = TENSOR_DIMS_LENGTH;
@@ -492,15 +494,15 @@
                     strides[i] = i ? strides[i - 1] * dims[i - 1] : sizeof(vx_uint8);
                 }
                 const vx_size bytes = dims[TENSOR_DIMS_NUM - 1] * strides[TENSOR_DIMS_NUM - 1];
-                void * data = malloc(bytes);
+                void * data = ct_alloc_mem(bytes);
                 VX_CALL(vxCopyTensorPatch(output_tensor, TENSOR_DIMS_NUM, start, dims, strides, data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
                 vx_uint8* u8_data = (vx_uint8*)data;
                 for(vx_size i = 0; i < bytes; i++)
                 {
                     ASSERT(u8_data[i] == 2);
                 }
-                free(dims);
-                free(data);
+                ct_free_mem(dims);
+                ct_free_mem(data);
                 break;
             }
         default:
@@ -640,7 +642,7 @@
                 vx_size stride = IMAGE_SIZE_X*2;
                 vx_size stride_y = sizeof(vx_coordinates2df_t) * (stride);
                 vx_size size = stride * IMAGE_SIZE_Y*2;
-                vx_coordinates2df_t* ptr_w = malloc(sizeof(vx_coordinates2df_t) * size);
+                vx_coordinates2df_t* ptr_w = ct_alloc_mem(sizeof(vx_coordinates2df_t) * size);
 
                 for (vx_size i = 0; i < IMAGE_SIZE_Y*2; i++)
                 {
@@ -653,7 +655,7 @@
                 }
 
                 VX_CALL(vxCopyRemapPatch(input_remap, &rect, stride_y, ptr_w, VX_TYPE_COORDINATES2DF, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
-                free(ptr_w);
+                ct_free_mem(ptr_w);
                 break;
             }
         case VX_TYPE_TENSOR:
@@ -661,7 +663,7 @@
                 vx_tensor input_tensor = (vx_tensor)input;
                 vx_size start[TENSOR_DIMS_NUM] = { 0 };
                 vx_size strides[TENSOR_DIMS_NUM]= { 0 };
-                vx_size * dims = malloc(TENSOR_DIMS_NUM * sizeof(vx_size));
+                vx_size * dims = ct_alloc_mem(TENSOR_DIMS_NUM * sizeof(vx_size));
                 for(vx_size i = 0; i < TENSOR_DIMS_NUM; i++)
                 {
                     dims[i] = TENSOR_DIMS_LENGTH;
@@ -669,7 +671,7 @@
                     strides[i] = i ? strides[i - 1] * dims[i - 1] : sizeof(vx_uint8);
                 }
                 const vx_size bytes = dims[TENSOR_DIMS_NUM - 1] * strides[TENSOR_DIMS_NUM - 1];
-                void * data = malloc(bytes);
+                void * data = ct_alloc_mem(bytes);
                 vx_uint8* u8_data = (vx_uint8*)data;
                 for(vx_size i = 0; i < bytes; i++)
                 {
@@ -677,8 +679,8 @@
                 }
 
                 VX_CALL(vxCopyTensorPatch(input_tensor, TENSOR_DIMS_NUM, start, dims, strides, data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
-                free(dims);
-                free(data);
+                ct_free_mem(dims);
+                ct_free_mem(data);
                 break;
             }
         default:
@@ -693,7 +695,6 @@
         case VX_TYPE_IMAGE:
             {
             vx_image output_image = (vx_image)output;
-            int i;
             void *p = NULL;
             vx_map_id output_map_id;
             vx_rectangle_t rect;
@@ -849,7 +850,7 @@
                 vx_tensor output_tensor = (vx_tensor)output;
                 vx_size start[TENSOR_DIMS_NUM] = { 0 };
                 vx_size strides[TENSOR_DIMS_NUM]= { 0 };
-                vx_size * dims = malloc(TENSOR_DIMS_NUM * sizeof(vx_size));
+                vx_size * dims = ct_alloc_mem(TENSOR_DIMS_NUM * sizeof(vx_size));
                 for(vx_size i = 0; i < TENSOR_DIMS_NUM; i++)
                 {
                     dims[i] = TENSOR_DIMS_LENGTH;
@@ -857,15 +858,15 @@
                     strides[i] = i ? strides[i - 1] * dims[i - 1] : sizeof(vx_uint8);
                 }
                 const vx_size bytes = dims[TENSOR_DIMS_NUM - 1] * strides[TENSOR_DIMS_NUM - 1];
-                void * data = malloc(bytes);
+                void * data = ct_alloc_mem(bytes);
                 VX_CALL(vxCopyTensorPatch(output_tensor, TENSOR_DIMS_NUM, start, dims, strides, data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
                 vx_uint8* u8_data = (vx_uint8*)data;
                 for(vx_size i = 0; i < bytes; i++)
                 {
                     ASSERT(u8_data[i] == 2);
                 }
-                free(dims);
-                free(data);
+                ct_free_mem(dims);
+                ct_free_mem(data);
                 break;
             }
         default:
@@ -882,13 +883,4 @@
     testImmediateProcessing
 )
 
-
-
-
-
-
-
-
-
-
-
+#endif //OPENVX_USE_ENHANCED_VISION
diff --git a/test_conformance/test_dilate3x3.c b/test_conformance/test_dilate3x3.c
index 0f16ff2..6fd1aa5 100644
--- a/test_conformance/test_dilate3x3.c
+++ b/test_conformance/test_dilate3x3.c
@@ -15,14 +15,14 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
 
-
 TESTCASE(Dilate3x3, CT_VXContext, ct_setup_vx_context, 0)
 
-
 TEST(Dilate3x3, testNodeCreation)
 {
     vx_context context = context_->vx_context_;
@@ -49,33 +49,62 @@
     ASSERT(src_image == 0);
 }
 
-
-// VX_DF_IMAGE_8U Boolean image
-static CT_Image dilate3x3_generate_random(const char* fileName, int width, int height)
+static CT_Image dilate3x3_generate_random(const char* fileName, int width, int height, vx_df_image format)
 {
     CT_Image image;
 
-    ASSERT_NO_FAILURE_(return 0,
-            image = ct_allocate_ct_image_random(width, height, VX_DF_IMAGE_U8, &CT()->seed_, 0, 2));
+    ASSERT_(return 0, format == VX_DF_IMAGE_U1 || format == VX_DF_IMAGE_U8);
 
-    // convert 0/1 values to 0/255
-    CT_FILL_IMAGE_8U(return 0, image,
-            *dst_data = (*dst_data) ? 255 : 0);
+    ASSERT_NO_FAILURE_(return 0, image = ct_allocate_ct_image_random(width, height, format, &CT()->seed_, 0, 2));
+
+    if (format == VX_DF_IMAGE_U8)
+    {
+        // convert 0/1 values to 0/255
+        CT_FILL_IMAGE_8U(return 0, image, *dst_data = (*dst_data) ? 255 : 0);
+    }
 
     return image;
 }
 
-static CT_Image dilate3x3_read_image(const char* fileName, int width, int height)
+static CT_Image dilate3x3_read_image(const char* fileName, int width, int height, vx_df_image format)
 {
-    CT_Image image = NULL;
+    CT_Image image_load = NULL, image_ret = NULL;
     ASSERT_(return 0, width == 0 && height == 0);
-    image = ct_read_image(fileName, 1);
-    ASSERT_(return 0, image);
-    ASSERT_(return 0, image->format == VX_DF_IMAGE_U8);
-    return image;
+    ASSERT_(return 0, format == VX_DF_IMAGE_U1 || format == VX_DF_IMAGE_U8);
+
+    image_load = ct_read_image(fileName, 1);
+    ASSERT_(return 0, image_load);
+    ASSERT_(return 0, image_load->format == VX_DF_IMAGE_U8);
+
+    if (format == VX_DF_IMAGE_U1)
+    {
+        ASSERT_NO_FAILURE_(return 0, threshold_U8_ct_image(image_load, 127));   // Threshold to make the U1 image less trivial
+        ASSERT_NO_FAILURE_(return 0, image_ret = ct_allocate_image(image_load->width, image_load->height, VX_DF_IMAGE_U1));
+        ASSERT_NO_FAILURE_(return 0, U8_ct_image_to_U1_ct_image(image_load, image_ret));
+    }
+    else
+        image_ret = image_load;
+
+    ASSERT_(return 0, image_ret);
+    ASSERT_(return 0, image_ret->format == format);
+
+    return image_ret;
 }
 
-static int32_t dilate_get(int32_t *values)
+static int32_t dilate_get_U1(int32_t values[9][2])
+{
+    int i;
+    int32_t v_i;
+    int32_t v = (values[0][0] & (1 << (values[0][1] % 8))) >> (values[0][1] % 8);
+    for (i = 1; i < 9; i++)
+    {
+        v_i = (values[i][0] & (1 << (values[i][1] % 8))) >> (values[i][1] % 8);
+        v = (v < v_i) ? v_i : v;
+    }
+    return v;
+}
+
+static int32_t dilate_get_U8(int32_t *values)
 {
     int i;
     int32_t v = values[0];
@@ -86,54 +115,109 @@
 
 static uint8_t dilate3x3_calculate(CT_Image src, uint32_t x, uint32_t y)
 {
-    int32_t values[9] = {
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y + 0),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y + 0),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y + 0),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y - 1),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y - 1),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y - 1),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y + 1),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y + 1),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y + 1)
-    };
-    return (uint8_t)dilate_get(values);
+    if (src->format == VX_DF_IMAGE_U1)
+    {
+        int32_t values[9][2] = {
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 0, y + 0), (int32_t)x + 0},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x - 1, y + 0), (int32_t)x - 1},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 1, y + 0), (int32_t)x + 1},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 0, y - 1), (int32_t)x + 0},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x - 1, y - 1), (int32_t)x - 1},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 1, y - 1), (int32_t)x + 1},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 0, y + 1), (int32_t)x + 0},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x - 1, y + 1), (int32_t)x - 1},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 1, y + 1), (int32_t)x + 1}
+        };
+        return (uint8_t)dilate_get_U1(values);
+    }
+    else
+    {
+        int32_t values[9] = {
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y + 0),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y + 0),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y + 0),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y - 1),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y - 1),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y - 1),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y + 1),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y + 1),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y + 1)
+        };
+        return (uint8_t)dilate_get_U8(values);
+    }
 }
 
 static uint8_t dilate3x3_calculate_replicate(CT_Image src, uint32_t x_, uint32_t y_)
 {
     int32_t x = (int)x_;
     int32_t y = (int)y_;
-    int32_t values[9] = {
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y + 0),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y + 0),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y + 0),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y - 1),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y - 1),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y - 1),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y + 1),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y + 1),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y + 1)
-    };
-    return (uint8_t)dilate_get(values);
+    if (src->format == VX_DF_IMAGE_U1)
+    {
+        int32_t values[9] = {
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 0, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x - 1, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 1, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 0, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x - 1, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 1, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 0, y + 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x - 1, y + 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 1, y + 1)
+        };
+        return (uint8_t)dilate_get_U8(values);
+    }
+    else
+    {
+        int32_t values[9] = {
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y + 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y + 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y + 1)
+        };
+        return (uint8_t)dilate_get_U8(values);
+    }
 }
 
 static uint8_t dilate3x3_calculate_constant(CT_Image src, uint32_t x_, uint32_t y_, vx_uint32 constant_value)
 {
     int32_t x = (int)x_;
     int32_t y = (int)y_;
-    int32_t values[9] = {
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y + 0, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y + 0, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y + 0, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y - 1, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y - 1, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y - 1, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y + 1, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y + 1, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y + 1, constant_value)
-    };
-    return (uint8_t)dilate_get(values);
+    if (src->format == VX_DF_IMAGE_U1)
+    {
+        vx_bool const_val_bool = (constant_value == 0) ? vx_false_e : vx_true_e;
+        int32_t values[9] = {
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 0, y + 0, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x - 1, y + 0, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 1, y + 0, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 0, y - 1, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x - 1, y - 1, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 1, y - 1, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 0, y + 1, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x - 1, y + 1, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 1, y + 1, const_val_bool)
+        };
+        return (uint8_t)dilate_get_U8(values);
+    }
+    else
+    {
+        int32_t values[9] = {
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y + 0, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y + 0, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y + 0, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y - 1, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y - 1, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y - 1, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y + 1, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y + 1, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y + 1, constant_value)
+        };
+        return (uint8_t)dilate_get_U8(values);
+    }
 }
 
 
@@ -141,35 +225,72 @@
 {
     CT_Image dst;
 
-    CT_ASSERT_(return NULL, src->format == VX_DF_IMAGE_U8);
+    CT_ASSERT_(return NULL, src->format == VX_DF_IMAGE_U1 || src->format == VX_DF_IMAGE_U8);
 
     dst = ct_allocate_image(src->width, src->height, src->format);
 
     if (border.mode == VX_BORDER_UNDEFINED)
     {
-        CT_FILL_IMAGE_8U(return 0, dst,
-                if (x >= 1 && y >= 1 && x < src->width - 1 && y < src->height - 1)
-                {
-                    uint8_t res = dilate3x3_calculate(src, x, y);
-                    *dst_data = res;
-                });
+        if (src->format == VX_DF_IMAGE_U1)
+        {
+            CT_FILL_IMAGE_1U(return 0, dst,
+                    if (x >= 1 && y >= 1 && x < src->width - 1 && y < src->height - 1)
+                    {
+                        uint32_t xShftdSrc = x + src->roi.x % 8;
+                        uint8_t res = dilate3x3_calculate(src, xShftdSrc, y);
+                        *dst_data = (*dst_data & ~(1 << offset)) | (res << offset);
+                    });
+        }
+        else
+        {
+            CT_FILL_IMAGE_8U(return 0, dst,
+                    if (x >= 1 && y >= 1 && x < src->width - 1 && y < src->height - 1)
+                    {
+                        uint8_t res = dilate3x3_calculate(src, x, y);
+                        *dst_data = res;
+                    });
+        }
     }
     else if (border.mode == VX_BORDER_REPLICATE)
     {
-        CT_FILL_IMAGE_8U(return 0, dst,
-                {
-                    uint8_t res = dilate3x3_calculate_replicate(src, x, y);
-                    *dst_data = res;
-                });
+        if (src->format == VX_DF_IMAGE_U1)
+        {
+            CT_FILL_IMAGE_1U(return 0, dst,
+                    {
+                        uint32_t xShftdSrc = x + src->roi.x % 8;
+                        uint8_t res = dilate3x3_calculate_replicate(src, xShftdSrc, y);
+                        *dst_data = (*dst_data & ~(1 << offset)) | (res << offset);
+                    });
+        }
+        else
+        {
+            CT_FILL_IMAGE_8U(return 0, dst,
+                    {
+                        uint8_t res = dilate3x3_calculate_replicate(src, x, y);
+                        *dst_data = res;
+                    });
+        }
     }
     else if (border.mode == VX_BORDER_CONSTANT)
     {
         vx_uint32 constant_value = border.constant_value.U32;
-        CT_FILL_IMAGE_8U(return 0, dst,
-                {
-                    uint8_t res = dilate3x3_calculate_constant(src, x, y, constant_value);
-                    *dst_data = res;
-                });
+        if (src->format == VX_DF_IMAGE_U1)
+        {
+            CT_FILL_IMAGE_1U(return 0, dst,
+                    {
+                        uint32_t xShftdSrc = x + src->roi.x % 8;
+                        uint8_t res = dilate3x3_calculate_constant(src, xShftdSrc, y, constant_value);
+                        *dst_data = (*dst_data & ~(1 << offset)) | (res << offset);
+                    });
+        }
+        else
+        {
+            CT_FILL_IMAGE_8U(return 0, dst,
+                    {
+                        uint8_t res = dilate3x3_calculate_constant(src, x, y, constant_value);
+                        *dst_data = res;
+                    });
+        }
     }
     else
     {
@@ -211,15 +332,18 @@
 
 typedef struct {
     const char* testName;
-    CT_Image (*generator)(const char* fileName, int width, int height);
+    CT_Image (*generator)(const char* fileName, int width, int height, vx_df_image format);
     const char* fileName;
     vx_border_t border;
     int width, height;
+    vx_df_image format;
 } Arg;
 
 #define PARAMETERS \
-    CT_GENERATE_PARAMETERS("randomInput", ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_SMALL_SET, ARG, dilate3x3_generate_random, NULL), \
-    CT_GENERATE_PARAMETERS("lena", ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_NONE, ARG, dilate3x3_read_image, "lena.bmp")
+    CT_GENERATE_PARAMETERS("randomInput", ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ARG, dilate3x3_generate_random, NULL), \
+    CT_GENERATE_PARAMETERS("lena", ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_NONE, ADD_TYPE_U8, ARG, dilate3x3_read_image, "lena.bmp"), \
+    CT_GENERATE_PARAMETERS("_U1_/randomInput", ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ARG, dilate3x3_generate_random, NULL), \
+    CT_GENERATE_PARAMETERS("_U1_/lena", ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_NONE, ADD_TYPE_U1, ARG, dilate3x3_read_image, "lena.bmp")
 
 TEST_WITH_ARG(Dilate3x3, testGraphProcessing, Arg,
     PARAMETERS
@@ -233,7 +357,7 @@
     CT_Image src = NULL, dst = NULL;
     vx_border_t border = arg_->border;
 
-    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height));
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
 
     ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
 
@@ -275,7 +399,7 @@
     CT_Image src = NULL, dst = NULL;
     vx_border_t border = arg_->border;
 
-    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height));
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
 
     ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
 
@@ -296,4 +420,57 @@
     ASSERT(src_image == 0);
 }
 
-TESTCASE_TESTS(Dilate3x3, testNodeCreation, testGraphProcessing, testImmediateProcessing)
+typedef struct {
+    const char* testName;
+    CT_Image (*generator)(const char* fileName, int width, int height, vx_df_image format);
+    const char* fileName;
+    vx_border_t border;
+    int width, height;
+    vx_df_image format;
+    vx_rectangle_t regionShift;
+} ValidRegionTest_Arg;
+
+#define REGION_PARAMETERS \
+    CT_GENERATE_PARAMETERS("lena", ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VALID_REGION_SHRINKS, ARG, dilate3x3_read_image, "lena.bmp"), \
+    CT_GENERATE_PARAMETERS("_U1_/lena", ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VALID_REGION_SHRINKS, ARG, dilate3x3_read_image, "lena.bmp")
+
+TEST_WITH_ARG(Dilate3x3, testWithValidRegion, ValidRegionTest_Arg,
+    REGION_PARAMETERS
+)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src_image = 0, dst_image = 0;
+
+    CT_Image src = NULL, dst = NULL;
+    vx_border_t border = arg_->border;
+    vx_rectangle_t rect = {0, 0, 0, 0}, rect_shft = arg_->regionShift;
+
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
+
+    ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst_image = ct_create_similar_image(src_image), VX_TYPE_IMAGE);
+
+    ASSERT_NO_FAILURE(vxGetValidRegionImage(src_image, &rect));
+    ALTERRECTANGLE(rect, rect_shft.start_x, rect_shft.start_y, rect_shft.end_x, rect_shft.end_y);
+    ASSERT_NO_FAILURE(vxSetImageValidRectangle(src_image, &rect));
+
+    VX_CALL(vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, &border, sizeof(border)));
+
+    VX_CALL(vxuDilate3x3(context, src_image, dst_image));
+
+    ASSERT_NO_FAILURE(dst = ct_image_from_vx_image(dst_image));
+    ASSERT_NO_FAILURE(ct_adjust_roi(dst, rect_shft.start_x, rect_shft.start_y, -rect_shft.end_x, -rect_shft.end_y));
+
+    ASSERT_NO_FAILURE(ct_adjust_roi(src, rect_shft.start_x, rect_shft.start_y, -rect_shft.end_x, -rect_shft.end_y));
+    ASSERT_NO_FAILURE(dilate3x3_check(src, dst, border));
+
+    VX_CALL(vxReleaseImage(&dst_image));
+    VX_CALL(vxReleaseImage(&src_image));
+
+    ASSERT(dst_image == 0);
+    ASSERT(src_image == 0);
+}
+
+TESTCASE_TESTS(Dilate3x3, testNodeCreation, testGraphProcessing, testImmediateProcessing, testWithValidRegion)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_distribution.c b/test_conformance/test_distribution.c
index dad1c33..2742a77 100644
--- a/test_conformance/test_distribution.c
+++ b/test_conformance/test_distribution.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include <string.h>
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -73,7 +75,7 @@
             CT_FAIL("check for query distribution attribute VX_DISTRIBUTION_BINS failed\n");
 
         VX_CALL(vxQueryDistribution(dist1, VX_DISTRIBUTION_WINDOW, &attr_window, sizeof(attr_window)));
-        /*Tthe attribute is specified as valid only when the range is a multiple of nbins, 
+        /*Tthe attribute is specified as valid only when the range is a multiple of nbins,
         * in other cases, its value shouldn't be checked */
         if (((range % nbins) == 0) && (attr_window != reference_window(range, nbins)))
             CT_FAIL("check for query distribution attribute VX_DISTRIBUTION_WINDOW failed\n");
@@ -85,3 +87,5 @@
 }
 
 TESTCASE_TESTS(Distribution, testvxCreateVirtualDistribution)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_eqhist.c b/test_conformance/test_eqhist.c
index 3724072..4612d0b 100644
--- a/test_conformance/test_eqhist.c
+++ b/test_conformance/test_eqhist.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 
 #include <VX/vx.h>
@@ -175,3 +177,6 @@
 }
 
 TESTCASE_TESTS(EqualizeHistogram, testOnRandom)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
diff --git a/test_conformance/test_erode3x3.c b/test_conformance/test_erode3x3.c
index 1468e82..999429a 100644
--- a/test_conformance/test_erode3x3.c
+++ b/test_conformance/test_erode3x3.c
@@ -15,14 +15,14 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
 
-
 TESTCASE(Erode3x3, CT_VXContext, ct_setup_vx_context, 0)
 
-
 TEST(Erode3x3, testNodeCreation)
 {
     vx_context context = context_->vx_context_;
@@ -49,33 +49,62 @@
     ASSERT(src_image == 0);
 }
 
-
-// VX_DF_IMAGE_8U Boolean image
-static CT_Image erode3x3_generate_random(const char* fileName, int width, int height)
+static CT_Image erode3x3_generate_random(const char* fileName, int width, int height, vx_df_image format)
 {
     CT_Image image;
 
-    ASSERT_NO_FAILURE_(return 0,
-            image = ct_allocate_ct_image_random(width, height, VX_DF_IMAGE_U8, &CT()->seed_, 0, 2));
+    ASSERT_(return 0, format == VX_DF_IMAGE_U1 || format == VX_DF_IMAGE_U8);
 
-    // convert 0/1 values to 0/255
-    CT_FILL_IMAGE_8U(return 0, image,
-            *dst_data = (*dst_data) ? 255 : 0);
+    ASSERT_NO_FAILURE_(return 0, image = ct_allocate_ct_image_random(width, height, format, &CT()->seed_, 0, 2));
+
+    if (format == VX_DF_IMAGE_U8)
+    {
+        // convert 0/1 values to 0/255
+        CT_FILL_IMAGE_8U(return 0, image, *dst_data = (*dst_data) ? 255 : 0);
+    }
 
     return image;
 }
 
-static CT_Image erode3x3_read_image(const char* fileName, int width, int height)
+static CT_Image erode3x3_read_image(const char* fileName, int width, int height, vx_df_image format)
 {
-    CT_Image image = NULL;
+    CT_Image image_load = NULL, image_ret = NULL;
     ASSERT_(return 0, width == 0 && height == 0);
-    image = ct_read_image(fileName, 1);
-    ASSERT_(return 0, image);
-    ASSERT_(return 0, image->format == VX_DF_IMAGE_U8);
-    return image;
+    ASSERT_(return 0, format == VX_DF_IMAGE_U1 || format == VX_DF_IMAGE_U8);
+
+    image_load = ct_read_image(fileName, 1);
+    ASSERT_(return 0, image_load);
+    ASSERT_(return 0, image_load->format == VX_DF_IMAGE_U8);
+
+    if (format == VX_DF_IMAGE_U1)
+    {
+        ASSERT_NO_FAILURE_(return 0, threshold_U8_ct_image(image_load, 127));   // Threshold to make the U1 image less trivial
+        ASSERT_NO_FAILURE_(return 0, image_ret = ct_allocate_image(image_load->width, image_load->height, VX_DF_IMAGE_U1));
+        ASSERT_NO_FAILURE_(return 0, U8_ct_image_to_U1_ct_image(image_load, image_ret));
+    }
+    else
+        image_ret = image_load;
+
+    ASSERT_(return 0, image_ret);
+    ASSERT_(return 0, image_ret->format == format);
+
+    return image_ret;
 }
 
-static int32_t erode_get(int32_t *values)
+static int32_t erode_get_U1(int32_t values[9][2])
+{
+    int i;
+    int32_t v_i;
+    int32_t v = (values[0][0] & (1 << (values[0][1] % 8))) >> (values[0][1] % 8);
+    for (i = 1; i < 9; i++)
+    {
+        v_i = (values[i][0] & (1 << (values[i][1] % 8))) >> (values[i][1] % 8);
+        v = (v > v_i) ? v_i : v;
+    }
+    return v;
+}
+
+static int32_t erode_get_U8(int32_t *values)
 {
     int i;
     int32_t v = values[0];
@@ -86,54 +115,109 @@
 
 static uint8_t erode3x3_calculate(CT_Image src, uint32_t x, uint32_t y)
 {
-    int32_t values[9] = {
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y + 0),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y + 0),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y + 0),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y - 1),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y - 1),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y - 1),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y + 1),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y + 1),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y + 1)
-    };
-    return (uint8_t)erode_get(values);
+    if (src->format == VX_DF_IMAGE_U1)
+    {
+        int32_t values[9][2] = {
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 0, y + 0), (int32_t)x + 0},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x - 1, y + 0), (int32_t)x - 1},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 1, y + 0), (int32_t)x + 1},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 0, y - 1), (int32_t)x + 0},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x - 1, y - 1), (int32_t)x - 1},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 1, y - 1), (int32_t)x + 1},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 0, y + 1), (int32_t)x + 0},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x - 1, y + 1), (int32_t)x - 1},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 1, y + 1), (int32_t)x + 1}
+        };
+        return (uint8_t)erode_get_U1(values);
+    }
+    else
+    {
+        int32_t values[9] = {
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y + 0),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y + 0),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y + 0),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y - 1),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y - 1),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y - 1),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y + 1),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y + 1),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y + 1)
+        };
+        return (uint8_t)erode_get_U8(values);
+    }
 }
 
 static uint8_t erode3x3_calculate_replicate(CT_Image src, uint32_t x_, uint32_t y_)
 {
     int32_t x = (int)x_;
     int32_t y = (int)y_;
-    int32_t values[9] = {
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y + 0),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y + 0),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y + 0),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y - 1),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y - 1),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y - 1),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y + 1),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y + 1),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y + 1)
-    };
-    return (uint8_t)erode_get(values);
+    if (src->format == VX_DF_IMAGE_U1)
+    {
+        int32_t values[9] = {
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 0, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x - 1, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 1, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 0, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x - 1, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 1, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 0, y + 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x - 1, y + 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 1, y + 1)
+        };
+        return (uint8_t)erode_get_U8(values);
+    }
+    else
+    {
+        int32_t values[9] = {
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y + 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y + 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y + 1)
+        };
+        return (uint8_t)erode_get_U8(values);
+    }
 }
 
 static uint8_t erode3x3_calculate_constant(CT_Image src, uint32_t x_, uint32_t y_, vx_uint32 constant_value)
 {
     int32_t x = (int)x_;
     int32_t y = (int)y_;
-    int32_t values[9] = {
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y + 0, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y + 0, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y + 0, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y - 1, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y - 1, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y - 1, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y + 1, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y + 1, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y + 1, constant_value)
-    };
-    return (uint8_t)erode_get(values);
+    if (src->format == VX_DF_IMAGE_U1)
+    {
+        vx_bool const_val_bool = (constant_value == 0) ? vx_false_e : vx_true_e;
+        int32_t values[9] = {
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 0, y + 0, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x - 1, y + 0, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 1, y + 0, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 0, y - 1, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x - 1, y - 1, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 1, y - 1, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 0, y + 1, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x - 1, y + 1, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 1, y + 1, const_val_bool)
+        };
+        return (uint8_t)erode_get_U8(values);
+    }
+    else
+    {
+        int32_t values[9] = {
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y + 0, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y + 0, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y + 0, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y - 1, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y - 1, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y - 1, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y + 1, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y + 1, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y + 1, constant_value)
+        };
+        return (uint8_t)erode_get_U8(values);
+    }
 }
 
 
@@ -141,35 +225,72 @@
 {
     CT_Image dst;
 
-    CT_ASSERT_(return NULL, src->format == VX_DF_IMAGE_U8);
+    CT_ASSERT_(return NULL, src->format == VX_DF_IMAGE_U1 || src->format == VX_DF_IMAGE_U8);
 
     dst = ct_allocate_image(src->width, src->height, src->format);
 
     if (border.mode == VX_BORDER_UNDEFINED)
     {
-        CT_FILL_IMAGE_8U(return 0, dst,
-                if (x >= 1 && y >= 1 && x < src->width - 1 && y < src->height - 1)
-                {
-                    uint8_t res = erode3x3_calculate(src, x, y);
-                    *dst_data = res;
-                });
+        if (src->format == VX_DF_IMAGE_U1)
+        {
+            CT_FILL_IMAGE_1U(return 0, dst,
+                    if (x >= 1 && y >= 1 && x < src->width - 1 && y < src->height - 1)
+                    {
+                        vx_uint32 xShftdSrc = x + src->roi.x % 8;
+                        uint8_t res = erode3x3_calculate(src, xShftdSrc, y);
+                        *dst_data = (*dst_data & ~(1 << offset)) | (res << offset);
+                    });
+        }
+        else
+        {
+            CT_FILL_IMAGE_8U(return 0, dst,
+                    if (x >= 1 && y >= 1 && x < src->width - 1 && y < src->height - 1)
+                    {
+                        uint8_t res = erode3x3_calculate(src, x, y);
+                        *dst_data = res;
+                    });
+        }
     }
     else if (border.mode == VX_BORDER_REPLICATE)
     {
-        CT_FILL_IMAGE_8U(return 0, dst,
-                {
-                    uint8_t res = erode3x3_calculate_replicate(src, x, y);
-                    *dst_data = res;
-                });
+        if (src->format == VX_DF_IMAGE_U1)
+        {
+            CT_FILL_IMAGE_1U(return 0, dst,
+                    {
+                        vx_uint32 xShftdSrc = x + src->roi.x % 8;
+                        uint8_t res = erode3x3_calculate_replicate(src, xShftdSrc, y);
+                        *dst_data = (*dst_data & ~(1 << offset)) | (res << offset);
+                    });
+        }
+        else
+        {
+            CT_FILL_IMAGE_8U(return 0, dst,
+                    {
+                        uint8_t res = erode3x3_calculate_replicate(src, x, y);
+                        *dst_data = res;
+                    });
+        }
     }
     else if (border.mode == VX_BORDER_CONSTANT)
     {
         vx_uint32 constant_value = border.constant_value.U32;
-        CT_FILL_IMAGE_8U(return 0, dst,
-                {
-                    uint8_t res = erode3x3_calculate_constant(src, x, y, constant_value);
-                    *dst_data = res;
-                });
+        if (src->format == VX_DF_IMAGE_U1)
+        {
+            CT_FILL_IMAGE_1U(return 0, dst,
+                    {
+                        vx_uint32 xShftdSrc = x + src->roi.x % 8;
+                        uint8_t res = erode3x3_calculate_constant(src, xShftdSrc, y, constant_value);
+                        *dst_data = (*dst_data & ~(1 << offset)) | (res << offset);
+                    });
+        }
+        else
+        {
+            CT_FILL_IMAGE_8U(return 0, dst,
+                    {
+                        uint8_t res = erode3x3_calculate_constant(src, x, y, constant_value);
+                        *dst_data = res;
+                    });
+        }
     }
     else
     {
@@ -211,15 +332,18 @@
 
 typedef struct {
     const char* testName;
-    CT_Image (*generator)(const char* fileName, int width, int height);
+    CT_Image (*generator)(const char* fileName, int width, int height, vx_df_image format);
     const char* fileName;
     vx_border_t border;
     int width, height;
+    vx_df_image format;
 } Arg;
 
 #define PARAMETERS \
-    CT_GENERATE_PARAMETERS("randomInput", ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_SMALL_SET, ARG, erode3x3_generate_random, NULL), \
-    CT_GENERATE_PARAMETERS("lena", ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_NONE, ARG, erode3x3_read_image, "lena.bmp")
+    CT_GENERATE_PARAMETERS("randomInput", ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ARG, erode3x3_generate_random, NULL), \
+    CT_GENERATE_PARAMETERS("lena", ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_NONE, ADD_TYPE_U8, ARG, erode3x3_read_image, "lena.bmp"), \
+    CT_GENERATE_PARAMETERS("_U1_/randomInput", ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ARG, erode3x3_generate_random, NULL), \
+    CT_GENERATE_PARAMETERS("_U1_/lena", ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_NONE, ADD_TYPE_U1, ARG, erode3x3_read_image, "lena.bmp")
 
 TEST_WITH_ARG(Erode3x3, testGraphProcessing, Arg,
     PARAMETERS
@@ -233,7 +357,7 @@
     CT_Image src = NULL, dst = NULL;
     vx_border_t border = arg_->border;
 
-    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height));
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
 
     ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
 
@@ -275,7 +399,7 @@
     CT_Image src = NULL, dst = NULL;
     vx_border_t border = arg_->border;
 
-    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height));
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
 
     ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
 
@@ -296,4 +420,60 @@
     ASSERT(src_image == 0);
 }
 
-TESTCASE_TESTS(Erode3x3, testNodeCreation, testGraphProcessing, testImmediateProcessing)
+typedef struct {
+    const char* testName;
+    CT_Image (*generator)(const char* fileName, int width, int height, vx_df_image format);
+    const char* fileName;
+    vx_border_t border;
+    int width, height;
+    vx_df_image format;
+    vx_rectangle_t regionShift;
+} ValidRegionTest_Arg;
+
+#ifdef PARAMETERS
+#undef PARAMETERS
+#endif
+#define PARAMETERS \
+    CT_GENERATE_PARAMETERS("lena", ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VALID_REGION_SHRINKS, ARG, erode3x3_read_image, "lena.bmp"), \
+    CT_GENERATE_PARAMETERS("_U1_/lena", ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VALID_REGION_SHRINKS, ARG, erode3x3_read_image, "lena.bmp")
+
+TEST_WITH_ARG(Erode3x3, testWithValidRegion, ValidRegionTest_Arg,
+    PARAMETERS
+)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src_image = 0, dst_image = 0;
+
+    CT_Image src = NULL, dst = NULL;
+    vx_border_t border = arg_->border;
+    vx_rectangle_t rect = {0, 0, 0, 0}, rect_shft = arg_->regionShift;
+
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
+
+    ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst_image = ct_create_similar_image(src_image), VX_TYPE_IMAGE);
+
+    ASSERT_NO_FAILURE(vxGetValidRegionImage(src_image, &rect));
+    ALTERRECTANGLE(rect, rect_shft.start_x, rect_shft.start_y, rect_shft.end_x, rect_shft.end_y);
+    ASSERT_NO_FAILURE(vxSetImageValidRectangle(src_image, &rect));
+
+    VX_CALL(vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, &border, sizeof(border)));
+
+    VX_CALL(vxuErode3x3(context, src_image, dst_image));
+
+    ASSERT_NO_FAILURE(dst = ct_image_from_vx_image(dst_image));
+    ASSERT_NO_FAILURE(ct_adjust_roi(dst, rect_shft.start_x, rect_shft.start_y, -rect_shft.end_x, -rect_shft.end_y));
+
+    ASSERT_NO_FAILURE(ct_adjust_roi(src, rect_shft.start_x, rect_shft.start_y, -rect_shft.end_x, -rect_shft.end_y));
+    ASSERT_NO_FAILURE(erode3x3_check(src, dst, border));
+
+    VX_CALL(vxReleaseImage(&dst_image));
+    VX_CALL(vxReleaseImage(&src_image));
+
+    ASSERT(dst_image == 0);
+    ASSERT(src_image == 0);
+}
+
+TESTCASE_TESTS(Erode3x3, testNodeCreation, testGraphProcessing, testImmediateProcessing, testWithValidRegion)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_export_import_extension.c b/test_conformance/test_export_import_extension.c
index 692088e..7436e75 100644
--- a/test_conformance/test_export_import_extension.c
+++ b/test_conformance/test_export_import_extension.c
@@ -111,7 +111,7 @@
     vx_distribution distribution = vxCreateDistribution(context, 32, 0, 255);
     reference_list[2] = (vx_reference ) distribution;
 
-    vx_threshold threshold = vxCreateThreshold(context, VX_THRESHOLD_TYPE_BINARY, VX_TYPE_UINT8);
+    vx_threshold threshold = vxCreateThresholdForImage(context, VX_THRESHOLD_TYPE_BINARY, VX_DF_IMAGE_U8, VX_DF_IMAGE_U8);
     reference_list[3] = (vx_reference ) threshold;
 
     vx_matrix matrix = vxCreateMatrix(context, VX_TYPE_UINT8, 3, 5);
@@ -166,7 +166,7 @@
     reference_list[12] = (vx_reference ) graph;
 
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxExportObjectsToMemory( context, 13, &reference_list[0], &uses_list[0], &blob_ptr, &blob_bytes));
-    void * export_blob = malloc(blob_bytes);
+    void * export_blob = ct_alloc_mem(blob_bytes);
     memcpy(export_blob, blob_ptr, blob_bytes);
 
     VX_CALL(vxReleaseImage(&image));
@@ -197,7 +197,7 @@
     import_list[1] = (vx_reference ) lut1;
     vx_distribution distribution1 = vxCreateDistribution(context1, 32, 0, 255);
     import_list[2] = (vx_reference ) distribution1;
-    vx_threshold threshold1 = vxCreateThreshold(context1, VX_THRESHOLD_TYPE_BINARY, VX_TYPE_UINT8);
+    vx_threshold threshold1 = vxCreateThresholdForImage(context1, VX_THRESHOLD_TYPE_BINARY, VX_DF_IMAGE_U8, VX_DF_IMAGE_U8);
     import_list[3] = (vx_reference ) threshold1;
     vx_matrix matrix1 = vxCreateMatrix(context1, VX_TYPE_UINT8, 3, 5);
     import_list[4] = (vx_reference ) matrix1;
@@ -263,7 +263,7 @@
     vxReleaseReference(&name_reference);
 
     ASSERT_EQ_VX_STATUS(VX_SUCCESS,vxReleaseImport(&import));
-    free(export_blob);
+    ct_free_mem(export_blob);
 }
 
 TESTCASE_TESTS(ExtensionObject, testExtensionObject)
diff --git a/test_conformance/test_fast.c b/test_conformance/test_fast.c
index f0fb064..5ed8f59 100644
--- a/test_conformance/test_fast.c
+++ b/test_conformance/test_fast.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 
 #include <VX/vx.h>
@@ -301,3 +303,5 @@
 }
 
 TESTCASE_TESTS(FastCorners, testOnNaturalImages)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_gaussian3x3.c b/test_conformance/test_gaussian3x3.c
index ffe3ebe..04fc269 100644
--- a/test_conformance/test_gaussian3x3.c
+++ b/test_conformance/test_gaussian3x3.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -298,3 +300,5 @@
 }
 
 TESTCASE_TESTS(Gaussian3x3, testNodeCreation, testGraphProcessing, testImmediateProcessing)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_gaussianpyramid.c b/test_conformance/test_gaussianpyramid.c
index d0eb3a4..dce0aa0 100644
--- a/test_conformance/test_gaussianpyramid.c
+++ b/test_conformance/test_gaussianpyramid.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -654,5 +656,4 @@
         testReference
 )
 
-
-
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_graph.c b/test_conformance/test_graph.c
index 10f812b..188bd2c 100644
--- a/test_conformance/test_graph.c
+++ b/test_conformance/test_graph.c
@@ -18,7 +18,12 @@
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
+#include <math.h>
+#include <string.h>
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
+/* *****************testGraph tests (vision) *******************************/
 TESTCASE(Graph, CT_VXContext, ct_setup_vx_context, 0)
 
 TEST(Graph, testTwoNodes)
@@ -847,9 +852,6 @@
     ARG("org.khronos.openvx.gaussian_3x3",          VX_KERNEL_GAUSSIAN_3x3),
     ARG("org.khronos.openvx.custom_convolution",    VX_KERNEL_CUSTOM_CONVOLUTION),
     ARG("org.khronos.openvx.gaussian_pyramid",      VX_KERNEL_GAUSSIAN_PYRAMID),
-    ARG("org.khronos.openvx.accumulate",            VX_KERNEL_ACCUMULATE),
-    ARG("org.khronos.openvx.accumulate_weighted",   VX_KERNEL_ACCUMULATE_WEIGHTED),
-    ARG("org.khronos.openvx.accumulate_square",     VX_KERNEL_ACCUMULATE_SQUARE),
     ARG("org.khronos.openvx.minmaxloc",             VX_KERNEL_MINMAXLOC),
     ARG("org.khronos.openvx.convertdepth",          VX_KERNEL_CONVERTDEPTH),
     ARG("org.khronos.openvx.canny_edge_detector",   VX_KERNEL_CANNY_EDGE_DETECTOR),
@@ -867,8 +869,10 @@
     ARG("org.khronos.openvx.optical_flow_pyr_lk",   VX_KERNEL_OPTICAL_FLOW_PYR_LK),
     ARG("org.khronos.openvx.remap",                 VX_KERNEL_REMAP),
     ARG("org.khronos.openvx.halfscale_gaussian",    VX_KERNEL_HALFSCALE_GAUSSIAN),
-    ARG("org.khronos.openvx.min",                   VX_KERNEL_MIN),
-    ARG("org.khronos.openvx.max",                   VX_KERNEL_MAX),
+    ARG("org.khronos.openvx.weightedaverage",       VX_KERNEL_WEIGHTED_AVERAGE),
+    ARG("org.khronos.openvx.non_linear_filter",     VX_KERNEL_NON_LINEAR_FILTER),
+    ARG("org.khronos.openvx.laplacian_pyramid",     VX_KERNEL_LAPLACIAN_PYRAMID),
+    ARG("org.khronos.openvx.laplacian_reconstruct", VX_KERNEL_LAPLACIAN_RECONSTRUCT),
     )
 {
     vx_context context = context_->vx_context_;
@@ -895,34 +899,6 @@
     VX_CALL(vxReleaseKernel(&kernel));
 }
 
-TEST(Graph, testAllocateUserKernelId)
-{
-    vx_context context = context_->vx_context_;
-    vx_enum   kernel_id = 0;
-
-    ASSERT_EQ_VX_STATUS(vxAllocateUserKernelId(NULL, &kernel_id), VX_ERROR_INVALID_REFERENCE);
-    ASSERT_NE_VX_STATUS(vxAllocateUserKernelId(context, NULL), VX_SUCCESS);
-
-    VX_CALL(vxAllocateUserKernelId(context, &kernel_id));
-
-    ASSERT(kernel_id >= VX_KERNEL_BASE(VX_ID_USER, 0));
-    ASSERT(kernel_id < (VX_KERNEL_BASE(VX_ID_USER, 0) + 4096));
-}
-
-TEST(Graph, testAllocateUserKernelLibraryId)
-{
-    vx_context context = context_->vx_context_;
-    vx_enum   library_id = 0;
-
-    ASSERT_EQ_VX_STATUS(vxAllocateUserKernelLibraryId(NULL, &library_id), VX_ERROR_INVALID_REFERENCE);
-    ASSERT_NE_VX_STATUS(vxAllocateUserKernelLibraryId(context, NULL), VX_SUCCESS);
-
-    VX_CALL(vxAllocateUserKernelLibraryId(context, &library_id));
-
-    ASSERT(library_id >= 1);
-    ASSERT(library_id <= 255);
-}
-
 void test_case_1(vx_context context, vx_uint32 width, vx_uint32 height)
 {
     vx_image src1 = 0;
@@ -1649,6 +1625,7 @@
     vx_size ref_levels = 0;
     vx_size tst_levels = 0;
     vx_enum type = VX_TYPE_INVALID;
+    int roi_adj = 0;
 
     VX_CALL(vxQueryReference(ref, VX_REFERENCE_TYPE, &type, sizeof(type)));
 
@@ -1679,16 +1656,15 @@
             {
                 if (i > 0)
                 {
-                    if (VX_SCALE_PYRAMID_ORB == scale)
+                    int next_roi_adj = ceil((double)scale*(2+roi_adj));
+
+                    if (next_roi_adj != roi_adj)
                     {
-                        ct_adjust_roi(img1, 2, 2, 2, 2);
-                        ct_adjust_roi(img2, 2, 2, 2, 2);
+                        roi_adj = roi_adj + ceil((double)(scale) * 2);
                     }
-                    else if (VX_SCALE_PYRAMID_HALF == scale)
-                    {
-                        ct_adjust_roi(img1, 1, 1, 1, 1);
-                        ct_adjust_roi(img2, 1, 1, 1, 1);
-                    }
+
+                    ct_adjust_roi(img1, roi_adj, roi_adj, roi_adj, roi_adj);
+                    ct_adjust_roi(img2, roi_adj, roi_adj, roi_adj, roi_adj);
                 }
             }
 
@@ -1858,12 +1834,17 @@
         vx_int32 val1 = 16;
         vx_int32 val2 = 32;
         vx_node node = 0;
+        vx_pixel_value_t low_pixel;
+        vx_pixel_value_t high_pixel;
+        memset(&low_pixel, 0, sizeof(low_pixel));
+        memset(&high_pixel, 0, sizeof(high_pixel));
+        low_pixel.U8 = val1;
+        high_pixel.U8 = val2;
 
         ASSERT_VX_OBJECT(src = vxCreateImage(context, 320, 240, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
         ASSERT_VX_OBJECT(dst = vxCreateImage(context, 320, 240, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
-        ASSERT_VX_OBJECT(threshold = vxCreateThreshold(context, VX_THRESHOLD_TYPE_RANGE, VX_TYPE_UINT8), VX_TYPE_THRESHOLD);
-        VX_CALL(vxSetThresholdAttribute(threshold, VX_THRESHOLD_THRESHOLD_LOWER, &val1, sizeof(val1)));
-        VX_CALL(vxSetThresholdAttribute(threshold, VX_THRESHOLD_THRESHOLD_UPPER, &val2, sizeof(val2)));
+        ASSERT_VX_OBJECT(threshold = vxCreateThresholdForImage(context, VX_THRESHOLD_TYPE_RANGE, VX_DF_IMAGE_U8, VX_DF_IMAGE_U8), VX_TYPE_THRESHOLD);
+        VX_CALL(vxCopyThresholdRange(threshold, &low_pixel, &high_pixel, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
         ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
         ASSERT_VX_OBJECT(node = vxCannyEdgeDetectorNode(graph, src, threshold, 3, VX_NORM_L1, dst), VX_TYPE_NODE);
 
@@ -2336,6 +2317,1779 @@
     ASSERT(dst_image == 0); ASSERT(interm_image == 0); ASSERT(src_image == 0);
 }
 
+TEST(Graph, testvxIsGraphVerified)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src_image = 0, interm_image = 0, dst_image = 0;
+    vx_graph graph = 0;
+    vx_node node1 = 0;
+    vx_bool is_verified = vx_false_e;
+
+    ASSERT_VX_OBJECT(src_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(interm_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U32), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(node1 = vxBox3x3Node(graph, src_image, interm_image), VX_TYPE_NODE);
+    VX_CALL(vxVerifyGraph(graph));
+    is_verified = vxIsGraphVerified(graph);
+    ASSERT_EQ_INT(is_verified, vx_true_e);
+
+    VX_CALL(vxReleaseNode(&node1));
+    VX_CALL(vxReleaseGraph(&graph));
+    VX_CALL(vxReleaseImage(&dst_image));
+    VX_CALL(vxReleaseImage(&interm_image));
+    VX_CALL(vxReleaseImage(&src_image));
+
+    ASSERT(node1 == 0);
+    ASSERT(graph == 0);
+    ASSERT(dst_image == 0);
+    ASSERT(interm_image == 0);
+    ASSERT(src_image == 0);
+}
+
+TEST(Graph, testvxProcessGraph)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src_image = 0, interm_image = 0, dst_image = 0;
+    vx_graph graph = 0;
+    vx_node node1 = 0;
+
+    VX_CALL(vxDirective((vx_reference)context, VX_DIRECTIVE_ENABLE_PERFORMANCE));
+
+    ASSERT_VX_OBJECT(src_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(interm_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U32), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(node1 = vxBox3x3Node(graph, src_image, interm_image), VX_TYPE_NODE);
+
+    VX_CALL(vxVerifyGraph(graph));
+    ASSERT_EQ_INT(VX_SUCCESS, vxProcessGraph(graph));
+
+    VX_CALL(vxReleaseNode(&node1));
+    VX_CALL(vxReleaseGraph(&graph));
+    VX_CALL(vxReleaseImage(&dst_image));
+    VX_CALL(vxReleaseImage(&interm_image));
+    VX_CALL(vxReleaseImage(&src_image));
+
+    ASSERT(node1 == 0);
+    ASSERT(graph == 0);
+    ASSERT(dst_image == 0);
+    ASSERT(interm_image == 0);
+    ASSERT(src_image == 0);
+}
+
+TEST(Graph, testvxWaitGraph)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src_image = 0, interm_image = 0, dst_image = 0;
+    vx_graph graph = 0;
+    vx_node node1 = 0, node2 = 0;
+    vx_border_t border = { VX_BORDER_UNDEFINED };
+
+    ASSERT_VX_OBJECT(src_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(interm_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U32), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(node1 = vxBox3x3Node(graph, src_image, interm_image), VX_TYPE_NODE);
+    VX_CALL(vxSetNodeAttribute(node1, VX_NODE_BORDER, &border, sizeof(border)));
+    ASSERT_VX_OBJECT(node2 = vxIntegralImageNode(graph, interm_image, dst_image), VX_TYPE_NODE);
+
+    VX_CALL(vxVerifyGraph(graph));
+    VX_CALL(vxScheduleGraph(graph));
+    VX_CALL(vxWaitGraph(graph));
+
+    VX_CALL(vxReleaseNode(&node1));
+    VX_CALL(vxReleaseNode(&node2));
+    VX_CALL(vxReleaseGraph(&graph));
+    VX_CALL(vxReleaseImage(&dst_image));
+    VX_CALL(vxReleaseImage(&interm_image));
+    VX_CALL(vxReleaseImage(&src_image));
+
+    ASSERT(node1 == 0); ASSERT(node2 == 0);
+    ASSERT(graph == 0);
+    ASSERT(dst_image == 0); ASSERT(interm_image == 0); ASSERT(src_image == 0);
+}
+
+TEST(Graph, testvxVerifyGraph)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src1_image = 0, src2_image = 0, interm_image = 0, dst_image = 0, dst2_image = 0;
+    vx_graph graph = 0;
+    vx_node node1 = 0, node2 = 0;
+    vx_scalar scale = 0;
+    vx_scalar scale2 = 0;
+    float alpha = 2.0f;
+    int val = 1;
+    vx_status status;
+
+    ASSERT_VX_OBJECT(src1_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(src2_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(interm_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst2_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U32), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(scale = vxCreateScalar(context, VX_TYPE_FLOAT32, &alpha), VX_TYPE_SCALAR);
+    ASSERT_VX_OBJECT(scale2 = vxCreateScalar(context, VX_TYPE_INT32, &val), VX_TYPE_SCALAR);
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    ASSERT_VX_OBJECT(node1 = vxBox3x3Node(graph, src1_image, dst_image), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(node2 = vxMedian3x3Node(graph, src2_image, dst_image), VX_TYPE_NODE);
+    status = vxVerifyGraph(graph);
+    ASSERT_EQ_INT(VX_ERROR_MULTIPLE_WRITERS, status);
+
+    VX_CALL(vxRemoveNode(&node2));
+    ASSERT(node2 == 0);
+    VX_CALL(vxRemoveNode(&node1));
+    ASSERT(node1 == 0);
+    ASSERT_VX_OBJECT(node1 = vxBox3x3Node(graph, src1_image, dst_image), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(node2 = vxMedian3x3Node(graph, dst_image, src1_image), VX_TYPE_NODE);
+    status = vxVerifyGraph(graph);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_GRAPH, status);
+
+    VX_CALL(vxRemoveNode(&node2));
+    ASSERT(node2 == 0);
+    VX_CALL(vxRemoveNode(&node1));
+    ASSERT(node1 == 0);
+    ASSERT_VX_OBJECT(node1 = vxBox3x3Node(graph, src1_image, src2_image), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(node2 = vxIntegralImageNode(graph, src2_image, interm_image), VX_TYPE_NODE);
+    status = vxVerifyGraph(graph);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_FORMAT, status);
+
+    VX_CALL(vxRemoveNode(&node2));
+    ASSERT(node2 == 0);
+    VX_CALL(vxRemoveNode(&node1));
+    ASSERT(node1 == 0);
+    ASSERT_VX_OBJECT(node1 = vxWeightedAverageNode(graph, src1_image, scale, src2_image, dst_image), VX_TYPE_NODE);
+    status = vxVerifyGraph(graph);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_VALUE, status);
+
+    VX_CALL(vxRemoveNode(&node1));
+    ASSERT(node1 == 0);
+    ASSERT_VX_OBJECT(node1 = vxWeightedAverageNode(graph, src1_image, scale2, src2_image, dst_image), VX_TYPE_NODE);
+    status = vxVerifyGraph(graph);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_TYPE, status);
+
+    VX_CALL(vxRemoveNode(&node1));
+    ASSERT(node1 == 0);
+    ASSERT_VX_OBJECT(node1 = vxBox3x3Node(graph, src1_image, interm_image), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(node2 = vxIntegralImageNode(graph, interm_image, dst2_image), VX_TYPE_NODE);
+    VX_CALL(vxVerifyGraph(graph));
+    VX_CALL(vxProcessGraph(graph));
+
+    VX_CALL(vxReleaseNode(&node1));
+    VX_CALL(vxReleaseNode(&node2));
+    VX_CALL(vxReleaseGraph(&graph));
+    VX_CALL(vxReleaseImage(&dst_image));
+    VX_CALL(vxReleaseImage(&dst2_image));
+    VX_CALL(vxReleaseImage(&src1_image));
+    VX_CALL(vxReleaseImage(&src2_image));
+    VX_CALL(vxReleaseImage(&interm_image));
+    VX_CALL(vxReleaseScalar(&scale));
+    VX_CALL(vxReleaseScalar(&scale2));
+
+    ASSERT(node1 == 0); ASSERT(node2 == 0);
+    ASSERT(graph == 0);
+    ASSERT(scale == 0); ASSERT(scale2 == 0);
+    ASSERT(dst_image == 0); ASSERT(dst2_image == 0); ASSERT(src1_image == 0); ASSERT(src2_image == 0); ASSERT(interm_image == 0);
+}
+
+TEST(Graph, testvxQueryNode)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src_image = 0, dst_image = 0;
+    vx_graph graph = 0;
+    vx_node node1 = 0;
+    vx_perf_t perf;
+    vx_status node_status = VX_SUCCESS;
+    vx_status status = VX_SUCCESS;
+    vx_border_t border = { VX_BORDER_UNDEFINED };
+    vx_border_t border_test = { 0 };
+
+    VX_CALL(vxDirective((vx_reference)context, VX_DIRECTIVE_ENABLE_PERFORMANCE));
+
+    ASSERT_VX_OBJECT(src_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U32), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(node1 = vxIntegralImageNode(graph, src_image, dst_image), VX_TYPE_NODE);
+    VX_CALL(vxSetNodeAttribute(node1, VX_NODE_BORDER, &border, sizeof(border)));
+
+    VX_CALL(vxVerifyGraph(graph));
+    VX_CALL(vxProcessGraph(graph));
+
+    status = vxQueryNode(node1, VX_NODE_PERFORMANCE, &perf, sizeof(perf));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryNode(node1, VX_NODE_STATUS, &node_status, sizeof(node_status));
+    ASSERT_EQ_INT(VX_SUCCESS, node_status);
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryNode(node1, VX_NODE_BORDER, &node_status, sizeof(node_status));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+    status = vxQueryNode(node1, VX_NODE_BORDER, &border_test, sizeof(border_test));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    ASSERT_EQ_INT(VX_BORDER_UNDEFINED, border_test.mode);
+
+    ASSERT(perf.num == 1);
+    ASSERT(perf.beg > 0);
+    ASSERT(perf.min > 0);
+
+    VX_CALL(vxReleaseNode(&node1));
+    VX_CALL(vxReleaseGraph(&graph));
+    VX_CALL(vxReleaseImage(&dst_image));
+    VX_CALL(vxReleaseImage(&src_image));
+
+    ASSERT(node1 == 0);
+    ASSERT(graph == 0);
+    ASSERT(dst_image == 0);
+    ASSERT(src_image == 0);
+}
+
+TEST(Graph, testvxReleaseNode)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src_image = 0, dst_image = 0;
+    vx_graph graph = 0;
+    vx_node node1 = 0;
+    vx_status status = VX_SUCCESS;
+
+    ASSERT_VX_OBJECT(src_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U32), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(node1 = vxIntegralImageNode(graph, src_image, dst_image), VX_TYPE_NODE);
+
+    VX_CALL(vxVerifyGraph(graph));
+    VX_CALL(vxProcessGraph(graph));
+
+    status = vxReleaseNode(&node1);
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+
+    VX_CALL(vxReleaseGraph(&graph));
+    VX_CALL(vxReleaseImage(&dst_image));
+    VX_CALL(vxReleaseImage(&src_image));
+
+    ASSERT(node1 == 0);
+    ASSERT(graph == 0);
+    ASSERT(dst_image == 0);
+    ASSERT(src_image == 0);
+}
+
+TEST(Graph, testvxSetNodeAttribute)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src_image = 0, interm_image = 0, dst_image = 0;
+    vx_graph graph = 0;
+    vx_node node1 = 0, node2 = 0;
+    vx_border_t border = { VX_BORDER_UNDEFINED };
+    vx_border_t border_test = { 0 };
+    vx_status status = VX_SUCCESS;
+
+    ASSERT_VX_OBJECT(src_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(interm_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U32), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(node1 = vxBox3x3Node(graph, src_image, interm_image), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(node2 = vxIntegralImageNode(graph, interm_image, dst_image), VX_TYPE_NODE);
+    status = vxSetNodeAttribute(node1, VX_NODE_BORDER, &border, sizeof(border));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryNode(node1, VX_NODE_BORDER, &border_test, sizeof(vx_border_t));
+    ASSERT_EQ_INT(VX_BORDER_UNDEFINED, border_test.mode);
+
+    VX_CALL(vxReleaseNode(&node1));
+    VX_CALL(vxReleaseNode(&node2));
+    VX_CALL(vxReleaseGraph(&graph));
+    VX_CALL(vxReleaseImage(&dst_image));
+    VX_CALL(vxReleaseImage(&interm_image));
+    VX_CALL(vxReleaseImage(&src_image));
+
+    ASSERT(node1 == 0); ASSERT(node2 == 0);
+    ASSERT(graph == 0);
+    ASSERT(dst_image == 0); ASSERT(interm_image == 0); ASSERT(src_image == 0);
+}
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
+/* *****************testGraphBase tests*******************************/
+TESTCASE(GraphBase, CT_VXContext, ct_setup_vx_context, 0)
+
+TEST(GraphBase, testAllocateUserKernelId)
+{
+    vx_context context = context_->vx_context_;
+    vx_enum   kernel_id = 0;
+
+    ASSERT_EQ_VX_STATUS(vxAllocateUserKernelId(NULL, &kernel_id), VX_ERROR_INVALID_REFERENCE);
+    ASSERT_NE_VX_STATUS(vxAllocateUserKernelId(context, NULL), VX_SUCCESS);
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_id));
+
+    ASSERT(kernel_id >= VX_KERNEL_BASE(VX_ID_USER, 0));
+    ASSERT(kernel_id < (VX_KERNEL_BASE(VX_ID_USER, 0) + 4096));
+}
+
+TEST(GraphBase, testAllocateUserKernelLibraryId)
+{
+    vx_context context = context_->vx_context_;
+    vx_enum   library_id = 0;
+
+    ASSERT_EQ_VX_STATUS(vxAllocateUserKernelLibraryId(NULL, &library_id), VX_ERROR_INVALID_REFERENCE);
+    ASSERT_NE_VX_STATUS(vxAllocateUserKernelLibraryId(context, NULL), VX_SUCCESS);
+
+    VX_CALL(vxAllocateUserKernelLibraryId(context, &library_id));
+
+    ASSERT(library_id >= 1);
+    ASSERT(library_id <= 255);
+}
+
+TEST(GraphBase, testGetUserStructNameByEnum)
+{
+    vx_context context = context_->vx_context_;
+    vx_char* type_name = "VX_TYPE_CHAR";
+    vx_enum user_struct_type = vxRegisterUserStructWithName(context, VX_MAX_REFERENCE_NAME, type_name);
+    vx_char out_name[VX_MAX_REFERENCE_NAME];
+
+    ASSERT_EQ_VX_STATUS(vxGetUserStructNameByEnum(NULL, user_struct_type, type_name, VX_MAX_REFERENCE_NAME), VX_ERROR_INVALID_PARAMETERS);
+    ASSERT_EQ_VX_STATUS(vxGetUserStructNameByEnum(context, VX_TYPE_ERROR, type_name, VX_MAX_REFERENCE_NAME), VX_FAILURE);
+    ASSERT_EQ_VX_STATUS(vxGetUserStructNameByEnum(context, user_struct_type, type_name, 0), VX_ERROR_NO_MEMORY);
+    ASSERT_EQ_VX_STATUS(vxGetUserStructNameByEnum(context, user_struct_type, out_name, VX_MAX_REFERENCE_NAME), VX_SUCCESS);
+    int ret = strncmp(type_name, out_name, strlen(type_name));
+    ASSERT_EQ_VX_STATUS(ret, 0);
+}
+
+TEST(GraphBase, testGetUserStructEnumByName)
+{
+    vx_context context = context_->vx_context_;
+    vx_char* type_name = "VX_TYPE_CHAR";
+    vx_enum user_struct_type = vxRegisterUserStructWithName(context, VX_MAX_REFERENCE_NAME, type_name);
+    vx_enum out_type;
+
+    ASSERT_EQ_VX_STATUS(vxGetUserStructEnumByName(context, NULL, &user_struct_type), VX_FAILURE);
+    ASSERT_EQ_VX_STATUS(vxGetUserStructEnumByName(context, type_name, &out_type), VX_SUCCESS);
+    ASSERT_EQ_VX_STATUS(out_type, user_struct_type);
+}
+
+TEST(GraphBase, testRegisterUserStructWithName)
+{
+    vx_context context = context_->vx_context_;
+    vx_char* type_name = "VX_TYPE_CHAR";
+
+    ASSERT_EQ_VX_STATUS(vxRegisterUserStructWithName(NULL, VX_MAX_REFERENCE_NAME, type_name), VX_TYPE_INVALID);
+    ASSERT_EQ_VX_STATUS(vxRegisterUserStructWithName(context, 0, type_name), VX_TYPE_INVALID);
+    ASSERT_NE_VX_STATUS(vxRegisterUserStructWithName(context, VX_MAX_REFERENCE_NAME, type_name), VX_TYPE_INVALID);
+}
+
+TEST(GraphBase, testvxCreateGraph)
+{
+    vx_context context = context_->vx_context_;
+    vx_context context_test = 0;
+    vx_graph graph = NULL;
+
+    graph = vxCreateGraph(context_test);
+    ASSERT_EQ_PTR(graph, NULL);
+    graph = vxCreateGraph(context);
+    ASSERT_VX_OBJECT(graph, VX_TYPE_GRAPH);
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+TEST(GraphBase, testvxIsGraphVerifiedBase)
+{
+    vx_context context = context_->vx_context_;
+    vx_context context_test = 0;
+    vx_graph graph = NULL;
+    vx_bool is_verified = vx_false_e;
+
+    graph = vxCreateGraph(context_test);
+    is_verified = vxIsGraphVerified(graph);
+    ASSERT_EQ_INT(is_verified, vx_false_e);
+    graph = vxCreateGraph(context);
+    is_verified = vxIsGraphVerified(graph);
+    ASSERT_EQ_INT(is_verified, vx_false_e);
+    vx_status status = vxVerifyGraph(graph);
+    if (status == VX_SUCCESS)
+    {
+        is_verified = vxIsGraphVerified(graph);
+        ASSERT_EQ_INT(is_verified, vx_true_e);
+    }
+    if (status != VX_SUCCESS)
+    {
+        is_verified = vxIsGraphVerified(graph);
+        ASSERT_EQ_INT(is_verified, vx_false_e);
+    }
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+TEST(GraphBase, testvxProcessGraphBase)
+{
+    vx_context context = context_->vx_context_;
+    vx_context context_test = 0;
+    vx_graph graph = NULL;
+    vx_status status = VX_SUCCESS;
+
+    graph = vxCreateGraph(context_test);
+    status = vxProcessGraph(graph);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+    graph = vxCreateGraph(context);
+    status = vxProcessGraph(graph);
+    ASSERT_NE_VX_STATUS(VX_SUCCESS, status);
+
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+TEST(GraphBase, testvxQueryGraph)
+{
+    vx_context context = context_->vx_context_;
+    vx_context context_test = 0;
+    vx_graph graph = NULL;
+    char * test = (char*)ct_alloc_mem(sizeof(vx_perf_t));
+    vx_status status = VX_SUCCESS;
+
+    graph = vxCreateGraph(context_test);
+    status = vxQueryGraph(graph, VX_GRAPH_PERFORMANCE, test, sizeof(test));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+    graph = vxCreateGraph(context);
+    status = vxQueryGraph(graph, VX_GRAPH_PERFORMANCE, test, 0);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+    VX_CALL(vxQueryGraph(graph, VX_GRAPH_PERFORMANCE, test, sizeof(vx_perf_t)));
+
+    status = vxQueryGraph(graph, VX_GRAPH_STATE, test, 0);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+    VX_CALL(vxQueryGraph(graph, VX_GRAPH_STATE, test, sizeof(vx_enum)));
+
+    status = vxQueryGraph(graph, VX_GRAPH_NUMNODES, test, 0);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+    VX_CALL(vxQueryGraph(graph, VX_GRAPH_NUMNODES, test, sizeof(vx_uint32)));
+
+    status = vxQueryGraph(graph, VX_GRAPH_NUMPARAMETERS, test, 0);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+    VX_CALL(vxQueryGraph(graph, VX_GRAPH_NUMPARAMETERS, test, sizeof(vx_uint32)));
+
+    status = vxQueryGraph(graph, VX_GRAPH_STATE_ABANDONED, test, 0);
+    ASSERT_EQ_INT(VX_ERROR_NOT_SUPPORTED, status);
+
+    VX_CALL(vxReleaseGraph(&graph));
+    ct_free_mem(test);
+}
+
+TEST(GraphBase, testvxWaitGraphBase)
+{
+    vx_context context = context_->vx_context_;
+    vx_context context_test = 0;
+    vx_graph graph = NULL;
+    vx_status status = VX_SUCCESS;
+
+    graph = vxCreateGraph(context_test);
+    status = vxWaitGraph(graph);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+    graph = vxCreateGraph(context);
+    status = vxWaitGraph(graph);
+    ASSERT_EQ_INT(VX_FAILURE, status);
+
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+TEST(GraphBase, testvxVerifyGraphBase)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph = NULL;
+    vx_status status = VX_SUCCESS;
+
+    graph = vxCreateGraph(context);
+    status = vxVerifyGraph(graph);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_GRAPH, status);
+
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+TEST(GraphBase, testvxScheduleGraph)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph = NULL;
+    vx_status status = VX_SUCCESS;
+
+    status = vxScheduleGraph(graph);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+
+    graph = vxCreateGraph(context);
+    status = vxScheduleGraph(graph);
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+TEST(GraphBase, testvxReleaseGraph)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph = NULL;
+    vx_status status = VX_SUCCESS;
+
+    status = vxReleaseGraph(&graph);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+
+    graph = vxCreateGraph(context);
+    status = vxReleaseGraph(&graph);
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+}
+
+TEST(GraphBase, testvxQueryNodeBase)
+{
+    vx_node node = 0;
+    vx_size size = 0;
+    vx_perf_t perf = { 0 };
+    void* ptr = NULL;
+    vx_border_t border = { 0 };
+    vx_bool flag = vx_true_e;
+    vx_uint32 numParams = 0;
+    vx_status status = VX_SUCCESS;
+
+    status = vxQueryNode(node, VX_NODE_PERFORMANCE, &perf, sizeof(vx_perf_t));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+    status = vxQueryNode(node, VX_NODE_STATUS, &status, sizeof(vx_status));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+    status = vxQueryNode(node, VX_NODE_LOCAL_DATA_SIZE, &size, sizeof(size));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+    status = vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &ptr, sizeof(ptr));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+    status = vxQueryNode(node, VX_NODE_BORDER, &border, sizeof(vx_border_t));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+    status = vxQueryNode(node, VX_NODE_PARAMETERS, &numParams, sizeof(numParams));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+    status = vxQueryNode(node, VX_NODE_IS_REPLICATED, &flag, sizeof(flag));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+    status = vxQueryNode(node, VX_NODE_REPLICATE_FLAGS, &size, sizeof(size));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+    status = vxQueryNode(node, VX_NODE_VALID_RECT_RESET, &flag, sizeof(flag));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+    status = vxQueryNode(node, VX_ERROR_NOT_SUPPORTED, &size, sizeof(size));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+}
+
+TEST(GraphBase, testvxReleaseNodeBase)
+{
+    vx_node node = 0;
+    vx_status status = VX_SUCCESS;
+    status = vxReleaseNode(&node);
+    ASSERT_EQ_VX_STATUS(VX_ERROR_INVALID_REFERENCE, status);
+}
+
+TEST(GraphBase, testvxRemoveNodeBase)
+{
+    vx_node node = 0;
+    vx_status status = VX_SUCCESS;
+    status = vxRemoveNode(&node);
+    ASSERT_EQ_VX_STATUS(VX_ERROR_INVALID_REFERENCE, status);
+}
+
+TEST(GraphBase, testvxReplicateNodeBase)
+{
+    vx_context context = context_->vx_context_;
+    vx_node node = 0;
+    vx_graph graph = NULL;
+    vx_status status = VX_SUCCESS;
+    vx_bool replicate[] = { vx_true_e, vx_true_e, vx_false_e, vx_true_e };
+
+    status = vxReplicateNode(graph, node, replicate, 4);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+    graph = vxCreateGraph(context);
+    ASSERT_VX_OBJECT(graph, VX_TYPE_GRAPH);
+    status = vxReplicateNode(graph, node, replicate, 4);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+TEST(GraphBase, testvxSetNodeAttributeBase)
+{
+    vx_node node1 = 0;
+    vx_border_t border = { VX_BORDER_UNDEFINED };
+    vx_status status = VX_SUCCESS;
+
+    status = vxSetNodeAttribute(node1, VX_NODE_BORDER, &border, sizeof(border));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+
+    ASSERT(node1 == 0);
+}
+
+#ifdef OPENVX_USE_ENHANCED_VISION
+
+/* *****************testKernelName_enhanced tests*******************************/
+TESTCASE(GraphEnhanced, CT_VXContext, ct_setup_vx_context, 0)
+TEST_WITH_ARG(GraphEnhanced, testKernelName_enhanced, kernel_name_arg,
+    ARG("org.khronos.openvx.min",                        VX_KERNEL_MIN),
+    ARG("org.khronos.openvx.max",                        VX_KERNEL_MAX),
+    ARG("org.khronos.openvx.match_template",             VX_KERNEL_MATCH_TEMPLATE),
+    ARG("org.khronos.openvx.lbp",                        VX_KERNEL_LBP),
+    ARG("org.khronos.openvx.hough_lines_probabilistic",  VX_KERNEL_HOUGH_LINES_P),
+    ARG("org.khronos.openvx.tensor_multiply",            VX_KERNEL_TENSOR_MULTIPLY),
+    ARG("org.khronos.openvx.tensor_add",                 VX_KERNEL_TENSOR_ADD),
+    ARG("org.khronos.openvx.tensor_subtract",            VX_KERNEL_TENSOR_SUBTRACT),
+    ARG("org.khronos.openvx.tensor_table_lookup",        VX_KERNEL_TENSOR_TABLE_LOOKUP),
+    ARG("org.khronos.openvx.tensor_transpose",           VX_KERNEL_TENSOR_TRANSPOSE),
+    ARG("org.khronos.openvx.tensor_convert_depth",       VX_KERNEL_TENSOR_CONVERT_DEPTH),
+    ARG("org.khronos.openvx.tensor_matrix_multiply ",    VX_KERNEL_TENSOR_MATRIX_MULTIPLY),
+    ARG("org.khronos.openvx.copy_node",                  VX_KERNEL_COPY),
+    ARG("org.khronos.openvx.nonmaxsuppression",          VX_KERNEL_NON_MAX_SUPPRESSION),
+    ARG("org.khronos.openvx.scalar_operation",           VX_KERNEL_SCALAR_OPERATION),
+    ARG("org.khronos.openvx.hogfeatures",                VX_KERNEL_HOG_FEATURES),
+    ARG("org.khronos.openvx.hog_cells",                  VX_KERNEL_HOG_CELLS),
+    ARG("org.khronos.openvx.bilateral_filter ",          VX_KERNEL_BILATERAL_FILTER),
+    ARG("org.khronos.openvx.select",                     VX_KERNEL_SELECT),
+    )
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = 0;
+    vx_enum   kernel_id = 0;
+
+    EXPECT_VX_OBJECT(kernel = vxGetKernelByName(context, arg_->name), VX_TYPE_KERNEL);
+
+    if (CT_HasFailure())
+    {
+        vx_char name[VX_MAX_KERNEL_NAME] = { 0 };
+
+        ASSERT_VX_OBJECT(kernel = vxGetKernelByEnum(context, arg_->kernel_id), VX_TYPE_KERNEL);
+        VX_CALL(vxQueryKernel(kernel, VX_KERNEL_NAME, &name, sizeof(name)));
+        printf("\tExpected kernel name is: %s\n", arg_->name);
+        printf("\tActual kernel name is:   %-*s\n", VX_MAX_KERNEL_NAME, name);
+    }
+    else
+    {
+        VX_CALL(vxQueryKernel(kernel, VX_KERNEL_ENUM, &kernel_id, sizeof(kernel_id)));
+        EXPECT_EQ_INT(arg_->kernel_id, kernel_id);
+    }
+
+    VX_CALL(vxReleaseKernel(&kernel));
+}
+
+#endif //OPENVX_USE_ENHANCED_VISION
+
+#if defined OPENVX_CONFORMANCE_NEURAL_NETWORKS || OPENVX_CONFORMANCE_NNEF_IMPORT
+
+/* *****************UserKernelsOfNNAndNNEF tests*******************************/
+TESTCASE(UserKernelsOfNNAndNNEF, CT_VXContext, ct_setup_vx_context, 0)
+
+#define VX_USER_KERNEL_CONFORMANCE_NAME  "org.khronos.openvx.test.user.kernel.tensor"
+#define VX_MAX_TENSOR_DIMENSIONS 6
+#define Q78_FIXED_POINT_POSITION 8
+#define MAX_DIMS_TEST1   4
+#define TEST_TENSOR_MIN_DIM_SZ   1
+#define TEST_TENSOR_MAX_DIM_SZ   20
+
+
+static vx_status VX_CALLBACK userKernelValidate1(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[])
+{
+    ASSERT_(return VX_FAILURE, num == 2);
+
+    return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK userKernelValidate2(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[])
+{
+    vx_size num_of_dims = 0;
+    vx_size dims[VX_MAX_TENSOR_DIMENSIONS] = {0};
+    vx_enum data_type = 0;
+    vx_int8 fixed_point_pos = 0;
+    vx_tensor input = 0;
+
+    ASSERT_(return VX_FAILURE, num == 2);
+
+    input = (vx_tensor)parameters[0];
+
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(input, VX_TENSOR_NUMBER_OF_DIMS, &num_of_dims, sizeof(num_of_dims)));
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(input, VX_TENSOR_DIMS, dims, sizeof(vx_size) * num_of_dims));
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(input, VX_TENSOR_DATA_TYPE, &data_type, sizeof(data_type)));
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(input, VX_TENSOR_FIXED_POINT_POSITION, &fixed_point_pos, sizeof(fixed_point_pos)));
+
+    VX_CALL_(return VX_FAILURE, vxSetMetaFormatAttribute(metas[1], VX_TENSOR_NUMBER_OF_DIMS, &num_of_dims, sizeof(num_of_dims)));
+    VX_CALL_(return VX_FAILURE, vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DIMS, dims, sizeof(vx_size) * num_of_dims));
+    VX_CALL_(return VX_FAILURE, vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DATA_TYPE, &data_type, sizeof(data_type)));
+    VX_CALL_(return VX_FAILURE, vxSetMetaFormatAttribute(metas[1], VX_TENSOR_FIXED_POINT_POSITION, &fixed_point_pos, sizeof(fixed_point_pos)));
+
+    return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK userKernelValidate3(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[])
+{
+    ASSERT_(return VX_FAILURE, num == 2);
+
+    VX_CALL_(return VX_FAILURE, vxSetMetaFormatFromReference(metas[1], parameters[0]));
+
+    return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK userKernelValidate4(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[])
+{
+    vx_tensor input;
+    vx_size expect_num_of_dims = 0;
+    vx_size expect_dims[VX_MAX_TENSOR_DIMENSIONS] = {0};
+    vx_enum expect_data_type = 0;
+    vx_int8 expect_fixed_point_pos = 0;
+    vx_size actual_num_of_dims = 1;
+    vx_size actual_dims[VX_MAX_TENSOR_DIMENSIONS] = {1};
+    vx_enum actual_data_type = 1;
+    vx_int8 actual_fixed_point_pos = 1;
+
+    input = (vx_tensor)parameters[0];
+
+    ASSERT_(return VX_FAILURE, num == 2);
+
+    VX_CALL_(return VX_FAILURE, vxSetMetaFormatFromReference(metas[1], parameters[0]));
+
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(input, VX_TENSOR_NUMBER_OF_DIMS, &actual_num_of_dims, sizeof(actual_num_of_dims)));
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(input, VX_TENSOR_DIMS, actual_dims, sizeof(vx_size) * actual_num_of_dims));
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(input, VX_TENSOR_DATA_TYPE, &actual_data_type, sizeof(actual_data_type)));
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(input, VX_TENSOR_FIXED_POINT_POSITION, &actual_fixed_point_pos, sizeof(actual_fixed_point_pos)));
+
+    VX_CALL_(return VX_FAILURE, vxQueryMetaFormatAttribute(metas[1], VX_TENSOR_NUMBER_OF_DIMS, &expect_num_of_dims, sizeof(expect_num_of_dims)));
+    VX_CALL_(return VX_FAILURE, vxQueryMetaFormatAttribute(metas[1], VX_TENSOR_DIMS, expect_dims, sizeof(vx_size) * expect_num_of_dims));
+    VX_CALL_(return VX_FAILURE, vxQueryMetaFormatAttribute(metas[1], VX_TENSOR_DATA_TYPE, &expect_data_type, sizeof(expect_data_type)));
+    VX_CALL_(return VX_FAILURE, vxQueryMetaFormatAttribute(metas[1], VX_TENSOR_FIXED_POINT_POSITION, &expect_fixed_point_pos, sizeof(expect_fixed_point_pos)));
+
+    EXPECT_EQ_INT(expect_num_of_dims, actual_num_of_dims);
+    EXPECT_EQ_INT(expect_data_type, actual_data_type);
+    EXPECT_EQ_INT(expect_fixed_point_pos, actual_fixed_point_pos);
+    for (vx_uint8 i = 0; i < actual_num_of_dims; i++)
+    {
+        EXPECT_EQ_INT(expect_dims[i], actual_dims[i]);
+    }
+
+    return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK userKernelProc1(vx_node node, const vx_reference *parameters, vx_uint32 num)
+{
+    vx_tensor input, output;
+    ASSERT_(return VX_FAILURE, num == 2);
+    ASSERT_VX_OBJECT_(return VX_FAILURE, input  = (vx_tensor)parameters[0], VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT_(return VX_FAILURE, output = (vx_tensor)parameters[1], VX_TYPE_TENSOR);
+
+    return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK userKernelProc2(vx_node node, const vx_reference *parameters, vx_uint32 num)
+{
+    vx_tensor input, output;
+    vx_size expect_num_of_dims = 0;
+    vx_size expect_dims[VX_MAX_TENSOR_DIMENSIONS] = {0};
+    vx_enum expect_data_type = 0;
+    vx_int8 expect_fixed_point_pos = 0;
+    vx_size actual_num_of_dims = 1;
+    vx_size actual_dims[VX_MAX_TENSOR_DIMENSIONS] = {1};
+    vx_enum actual_data_type = 1;
+    vx_int8 actual_fixed_point_pos = 1;
+
+    ASSERT_(return VX_FAILURE, num == 2);
+    ASSERT_VX_OBJECT_(return VX_FAILURE, input  = (vx_tensor)parameters[0], VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT_(return VX_FAILURE, output = (vx_tensor)parameters[1], VX_TYPE_TENSOR);
+
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(input, VX_TENSOR_NUMBER_OF_DIMS, &actual_num_of_dims, sizeof(actual_num_of_dims)));
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(input, VX_TENSOR_DIMS, actual_dims, sizeof(vx_size) * actual_num_of_dims));
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(input, VX_TENSOR_DATA_TYPE, &actual_data_type, sizeof(actual_data_type)));
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(input, VX_TENSOR_FIXED_POINT_POSITION, &actual_fixed_point_pos, sizeof(actual_fixed_point_pos)));
+
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(output, VX_TENSOR_NUMBER_OF_DIMS, &expect_num_of_dims, sizeof(expect_num_of_dims)));
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(output, VX_TENSOR_DIMS, expect_dims, sizeof(vx_size) * expect_num_of_dims));
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(output, VX_TENSOR_DATA_TYPE, &expect_data_type, sizeof(expect_data_type)));
+    VX_CALL_(return VX_FAILURE, vxQueryTensor(output, VX_TENSOR_FIXED_POINT_POSITION, &expect_fixed_point_pos, sizeof(expect_fixed_point_pos)));
+
+    EXPECT_EQ_INT(expect_num_of_dims, actual_num_of_dims);
+    EXPECT_EQ_INT(expect_data_type, actual_data_type);
+    EXPECT_EQ_INT(expect_fixed_point_pos, actual_fixed_point_pos);
+    for (vx_uint8 i = 0; i < actual_num_of_dims; i++)
+    {
+        EXPECT_EQ_INT(expect_dims[i], actual_dims[i]);
+    }
+
+    return VX_SUCCESS;
+}
+
+
+TEST(UserKernelsOfNNAndNNEF, testvxAddUserKernel)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = 0;
+    vx_enum kernel_enum = 0u;
+    vx_uint32 numParams = 2;
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+            context,
+            VX_USER_KERNEL_CONFORMANCE_NAME,
+            kernel_enum,
+            userKernelProc1,
+            numParams,
+            userKernelValidate1,
+            NULL,
+            NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByName(context, VX_USER_KERNEL_CONFORMANCE_NAME), VX_TYPE_KERNEL);
+    VX_CALL(vxRemoveKernel(kernel));
+}
+
+TEST(UserKernelsOfNNAndNNEF, testvxRemoveKernel)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = 0;
+    vx_enum kernel_enum = 0u;
+    vx_uint32 numParams = 2;
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+            context,
+            VX_USER_KERNEL_CONFORMANCE_NAME,
+            kernel_enum,
+            userKernelProc1,
+            numParams,
+            userKernelValidate1,
+            NULL,
+            NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, vxRemoveKernel(kernel));
+}
+
+TEST(UserKernelsOfNNAndNNEF, testvxFinalizeKernel)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = 0;
+    vx_enum kernel_enum = 0u;
+    vx_uint32 numParams = 2;
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+            context,
+            VX_USER_KERNEL_CONFORMANCE_NAME,
+            kernel_enum,
+            userKernelProc1,
+            numParams,
+            userKernelValidate1,
+            NULL,
+            NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, vxFinalizeKernel(kernel));
+
+    VX_CALL(vxRemoveKernel(kernel));
+}
+
+TEST(UserKernelsOfNNAndNNEF, testvxAddParameterToKernel)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph = 0;
+    vx_node node = 0;
+    vx_kernel kernel = 0;
+    vx_enum kernel_enum = 0u;
+    vx_tensor in_tensor = 0;
+    vx_tensor out_tensor = 0;
+    vx_uint32 numParams = 2;
+    vx_enum data_type = VX_TYPE_INT16;
+    vx_int8 fixed_point_position = Q78_FIXED_POINT_POSITION;
+    vx_size max_dims = MAX_DIMS_TEST1;
+
+    uint64_t rng;
+    {
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+            context,
+            VX_USER_KERNEL_CONFORMANCE_NAME,
+            kernel_enum,
+            userKernelProc1,
+            numParams,
+            userKernelValidate2,
+            NULL,
+            NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+    ASSERT(tensor_dims);
+
+    for (vx_size i = 0; i < max_dims; ++i)
+    {
+        tensor_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ+1);
+    }
+
+    in_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    out_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    ASSERT_VX_OBJECT(in_tensor, VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT(out_tensor, VX_TYPE_TENSOR);
+
+    ASSERT_VX_OBJECT(node = vxCreateGenericNode(graph, kernel), VX_TYPE_NODE);
+    VX_CALL(vxSetParameterByIndex(node, 0, (vx_reference)in_tensor));
+    VX_CALL(vxSetParameterByIndex(node, 1, (vx_reference)out_tensor));
+
+    VX_CALL(vxReleaseNode(&node));
+    ASSERT(node == 0);
+
+    VX_CALL(vxVerifyGraph(graph));
+    VX_CALL(vxProcessGraph(graph));
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+
+    VX_CALL(vxReleaseGraph(&graph));
+    EXPECT_EQ_PTR(0, graph);
+
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByName(context, VX_USER_KERNEL_CONFORMANCE_NAME), VX_TYPE_KERNEL);
+    VX_CALL(vxRemoveKernel(kernel));
+
+    VX_CALL(vxReleaseTensor(&in_tensor));
+    EXPECT_EQ_PTR(NULL, in_tensor);
+
+    VX_CALL(vxReleaseTensor(&out_tensor));
+    EXPECT_EQ_PTR(NULL, out_tensor);
+
+    ct_free_mem(tensor_dims);
+}
+
+TEST(UserKernelsOfNNAndNNEF, testvxSetKernelAttribute)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph = 0;
+    vx_node node = 0;
+    vx_kernel kernel = 0;
+    vx_enum kernel_enum = 0u;
+    vx_size actual_datasize = 256;
+    vx_size expect_datasize = 0;
+    vx_uint32 numParams = 2;
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+            context,
+            VX_USER_KERNEL_CONFORMANCE_NAME,
+            kernel_enum,
+            userKernelProc1,
+            numParams,
+            userKernelValidate2,
+            NULL,
+            NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxSetKernelAttribute(kernel, VX_KERNEL_LOCAL_DATA_SIZE, &actual_datasize, sizeof(actual_datasize)));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    VX_CALL(vxQueryKernel(kernel, VX_KERNEL_LOCAL_DATA_SIZE, &expect_datasize, sizeof(expect_datasize)));
+    EXPECT_EQ_INT(expect_datasize, actual_datasize);
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(node = vxCreateGenericNode(graph, kernel), VX_TYPE_NODE);
+
+    expect_datasize = 0;
+    VX_CALL(vxQueryNode(node, VX_NODE_LOCAL_DATA_SIZE, &expect_datasize, sizeof(expect_datasize)));
+    EXPECT_EQ_INT(expect_datasize, actual_datasize);
+
+    VX_CALL(vxReleaseNode(&node));
+    ASSERT(node == 0);
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+
+    VX_CALL(vxReleaseGraph(&graph));
+    EXPECT_EQ_PTR(0, graph);
+
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByName(context, VX_USER_KERNEL_CONFORMANCE_NAME), VX_TYPE_KERNEL);
+    VX_CALL(vxRemoveKernel(kernel));
+}
+
+/* *****************MetaFormatOfNNAndNNEF tests*******************************/
+TESTCASE(MetaFormatOfNNAndNNEF, CT_VXContext, ct_setup_vx_context, 0)
+
+TEST(MetaFormatOfNNAndNNEF, testvxSetMetaFormatAttribute)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = 0;
+    vx_enum kernel_enum = 0u;
+    vx_uint32 numParams = 2;
+    vx_graph graph = 0;
+    vx_node node = 0;
+    vx_tensor in_tensor = 0;
+    vx_tensor out_tensor = 0;
+    vx_enum data_type = VX_TYPE_INT16;
+    vx_int8 fixed_point_position = Q78_FIXED_POINT_POSITION;
+    vx_size max_dims = MAX_DIMS_TEST1;
+
+    uint64_t rng;
+    {
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+            context,
+            VX_USER_KERNEL_CONFORMANCE_NAME,
+            kernel_enum,
+            userKernelProc2,
+            numParams,
+            userKernelValidate2,
+            NULL,
+            NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+    ASSERT(tensor_dims);
+
+    for (vx_size i = 0; i < max_dims; ++i)
+    {
+        tensor_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ+1);
+    }
+
+    in_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    out_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    ASSERT_VX_OBJECT(in_tensor, VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT(out_tensor, VX_TYPE_TENSOR);
+
+    ASSERT_VX_OBJECT(node = vxCreateGenericNode(graph, kernel), VX_TYPE_NODE);
+    VX_CALL(vxSetParameterByIndex(node, 0, (vx_reference)in_tensor));
+    VX_CALL(vxSetParameterByIndex(node, 1, (vx_reference)out_tensor));
+
+    VX_CALL(vxReleaseNode(&node));
+    ASSERT(node == 0);
+
+    VX_CALL(vxVerifyGraph(graph));
+    VX_CALL(vxProcessGraph(graph));
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+
+    VX_CALL(vxReleaseGraph(&graph));
+    EXPECT_EQ_PTR(0, graph);
+
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByName(context, VX_USER_KERNEL_CONFORMANCE_NAME), VX_TYPE_KERNEL);
+    VX_CALL(vxRemoveKernel(kernel));
+
+    VX_CALL(vxReleaseTensor(&in_tensor));
+    EXPECT_EQ_PTR(NULL, in_tensor);
+
+    VX_CALL(vxReleaseTensor(&out_tensor));
+    EXPECT_EQ_PTR(NULL, out_tensor);
+
+    ct_free_mem(tensor_dims);
+}
+
+TEST(MetaFormatOfNNAndNNEF, testvxSetMetaFormatFromReference)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = 0;
+    vx_enum kernel_enum = 0u;
+    vx_uint32 numParams = 2;
+    vx_graph graph = 0;
+    vx_node node = 0;
+    vx_tensor in_tensor = 0;
+    vx_tensor out_tensor = 0;
+    vx_enum data_type = VX_TYPE_INT16;
+    vx_int8 fixed_point_position = Q78_FIXED_POINT_POSITION;
+    vx_size max_dims = MAX_DIMS_TEST1;
+
+    uint64_t rng;
+    {
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+            context,
+            VX_USER_KERNEL_CONFORMANCE_NAME,
+            kernel_enum,
+            userKernelProc2,
+            numParams,
+            userKernelValidate3,
+            NULL,
+            NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+    ASSERT(tensor_dims);
+
+    for (vx_size i = 0; i < max_dims; ++i)
+    {
+        tensor_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ+1);
+    }
+
+    in_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    out_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    ASSERT_VX_OBJECT(in_tensor, VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT(out_tensor, VX_TYPE_TENSOR);
+
+    ASSERT_VX_OBJECT(node = vxCreateGenericNode(graph, kernel), VX_TYPE_NODE);
+    VX_CALL(vxSetParameterByIndex(node, 0, (vx_reference)in_tensor));
+    VX_CALL(vxSetParameterByIndex(node, 1, (vx_reference)out_tensor));
+
+    VX_CALL(vxReleaseNode(&node));
+    ASSERT(node == 0);
+
+    VX_CALL(vxVerifyGraph(graph));
+    VX_CALL(vxProcessGraph(graph));
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+
+    VX_CALL(vxReleaseGraph(&graph));
+    EXPECT_EQ_PTR(0, graph);
+
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByName(context, VX_USER_KERNEL_CONFORMANCE_NAME), VX_TYPE_KERNEL);
+    VX_CALL(vxRemoveKernel(kernel));
+
+    VX_CALL(vxReleaseTensor(&in_tensor));
+    EXPECT_EQ_PTR(NULL, in_tensor);
+
+    VX_CALL(vxReleaseTensor(&out_tensor));
+    EXPECT_EQ_PTR(NULL, out_tensor);
+
+    ct_free_mem(tensor_dims);
+}
+
+TEST(MetaFormatOfNNAndNNEF, testvxQueryMetaFormatAttribute)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = 0;
+    vx_enum kernel_enum = 0u;
+    vx_uint32 numParams = 2;
+    vx_graph graph = 0;
+    vx_node node = 0;
+    vx_tensor in_tensor = 0;
+    vx_tensor out_tensor = 0;
+    vx_enum data_type = VX_TYPE_INT16;
+    vx_int8 fixed_point_position = Q78_FIXED_POINT_POSITION;
+    vx_size max_dims = MAX_DIMS_TEST1;
+
+    uint64_t rng;
+    {
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+            context,
+            VX_USER_KERNEL_CONFORMANCE_NAME,
+            kernel_enum,
+            userKernelProc2,
+            numParams,
+            userKernelValidate4,
+            NULL,
+            NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT,  VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+    ASSERT(tensor_dims);
+
+    for (vx_size i = 0; i < max_dims; ++i)
+    {
+        tensor_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ+1);
+    }
+
+    in_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    out_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    ASSERT_VX_OBJECT(in_tensor, VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT(out_tensor, VX_TYPE_TENSOR);
+
+    ASSERT_VX_OBJECT(node = vxCreateGenericNode(graph, kernel), VX_TYPE_NODE);
+    VX_CALL(vxSetParameterByIndex(node, 0, (vx_reference)in_tensor));
+    VX_CALL(vxSetParameterByIndex(node, 1, (vx_reference)out_tensor));
+
+    VX_CALL(vxReleaseNode(&node));
+    ASSERT(node == 0);
+
+    VX_CALL(vxVerifyGraph(graph));
+    VX_CALL(vxProcessGraph(graph));
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+
+    VX_CALL(vxReleaseGraph(&graph));
+    EXPECT_EQ_PTR(0, graph);
+
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByName(context, VX_USER_KERNEL_CONFORMANCE_NAME), VX_TYPE_KERNEL);
+    VX_CALL(vxRemoveKernel(kernel));
+
+    VX_CALL(vxReleaseTensor(&in_tensor));
+    EXPECT_EQ_PTR(NULL, in_tensor);
+
+    VX_CALL(vxReleaseTensor(&out_tensor));
+    EXPECT_EQ_PTR(NULL, out_tensor);
+
+    ct_free_mem(tensor_dims);
+}
+
+/* *****************VxKernelOfNNAndNNEF tests*******************************/
+TESTCASE(VxKernelOfNNAndNNEF, CT_VXContext, ct_setup_vx_context, 0)
+
+TEST(VxKernelOfNNAndNNEF, testvxGetKernelByEnum)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = 0;
+    vx_enum kernel_enum = 0u;
+    vx_uint32 numParams = 2;
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+        context,
+        VX_USER_KERNEL_CONFORMANCE_NAME,
+        kernel_enum,
+        userKernelProc1,
+        numParams,
+        userKernelValidate1,
+        NULL,
+        NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByEnum(context, kernel_enum), VX_TYPE_KERNEL);
+    VX_CALL(vxRemoveKernel(kernel));
+}
+
+TEST(VxKernelOfNNAndNNEF, testvxGetKernelByName)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = 0;
+    vx_enum kernel_enum = 0u;
+    vx_uint32 numParams = 2;
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+        context,
+        VX_USER_KERNEL_CONFORMANCE_NAME,
+        kernel_enum,
+        userKernelProc1,
+        numParams,
+        userKernelValidate1,
+        NULL,
+        NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByName(context, VX_USER_KERNEL_CONFORMANCE_NAME), VX_TYPE_KERNEL);
+    VX_CALL(vxRemoveKernel(kernel));
+}
+
+TEST(VxKernelOfNNAndNNEF, testvxQueryKernel)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = 0;
+    vx_enum kernel_enum = 0u;
+    vx_enum test_kernel_enum = 0u;
+    vx_uint32 numParams = 2;
+    vx_uint32 para_num = 0;
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+        context,
+        VX_USER_KERNEL_CONFORMANCE_NAME,
+        kernel_enum,
+        userKernelProc1,
+        numParams,
+        userKernelValidate1,
+        NULL,
+        NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByName(context, VX_USER_KERNEL_CONFORMANCE_NAME), VX_TYPE_KERNEL);
+    VX_CALL(vxQueryKernel(kernel, VX_KERNEL_ENUM, &test_kernel_enum, sizeof(test_kernel_enum)));
+    EXPECT_EQ_INT(kernel_enum, test_kernel_enum);
+    VX_CALL(vxQueryKernel(kernel, VX_KERNEL_PARAMETERS, &para_num, sizeof(vx_uint32)));
+    EXPECT_EQ_INT(2, para_num);
+
+    VX_CALL(vxRemoveKernel(kernel));
+}
+
+TEST(VxKernelOfNNAndNNEF, testvxReleaseKernel)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = 0;
+    vx_enum kernel_enum = 0u;
+    vx_uint32 numParams = 2;
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+        context,
+        VX_USER_KERNEL_CONFORMANCE_NAME,
+        kernel_enum,
+        userKernelProc1,
+        numParams,
+        userKernelValidate1,
+        NULL,
+        NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByName(context, VX_USER_KERNEL_CONFORMANCE_NAME), VX_TYPE_KERNEL);
+    VX_CALL(vxRemoveKernel(kernel));
+}
+
+
+/* *****************VxParameterOfNNAndNNEF tests*******************************/
+TESTCASE(VxParameterOfNNAndNNEF, CT_VXContext, ct_setup_vx_context, 0)
+
+TEST(VxParameterOfNNAndNNEF, test_vxGetKernelParameterByIndex)
+{
+    vx_context context = context_->vx_context_;
+    vx_uint32 ref_count0 = 0;
+    vx_uint32 ref_count1 = 0;
+    vx_reference ref = 0;
+    vx_kernel kernel = 0;
+    vx_parameter parameter = 0;
+    vx_enum kernel_enum = 0u;
+    vx_uint32 numParams = 2;
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+        context,
+        VX_USER_KERNEL_CONFORMANCE_NAME,
+        kernel_enum,
+        userKernelProc1,
+        numParams,
+        userKernelValidate1,
+        NULL,
+        NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+
+    EXPECT_VX_OBJECT(kernel = vxGetKernelByEnum(context, kernel_enum), VX_TYPE_KERNEL);
+    EXPECT_VX_OBJECT(parameter = vxGetKernelParameterByIndex(kernel, 0), VX_TYPE_PARAMETER);
+    ref = (vx_reference)parameter;
+    ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count0, sizeof(ref_count0)), VX_SUCCESS);
+    VX_CALL(vxRetainReference(ref));
+    ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
+    ASSERT_EQ_INT(ref_count1 - ref_count0, 1);
+    VX_CALL(vxReleaseReference(&ref));
+    ref = (vx_reference)parameter;
+    ref_count1 = 0;
+    ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
+    ASSERT_EQ_INT(ref_count1 - ref_count0, 0);
+
+    VX_CALL(vxRemoveKernel(kernel));
+    VX_CALL(vxReleaseParameter(&parameter));
+}
+
+TEST(VxParameterOfNNAndNNEF, test_vxQueryParameter)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = 0;
+    vx_parameter parameter = 0;
+    vx_enum kernel_enum = 0u;
+    vx_uint32 numParams = 2;
+    vx_uint32 index = 256;
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+        context,
+        VX_USER_KERNEL_CONFORMANCE_NAME,
+        kernel_enum,
+        userKernelProc1,
+        numParams,
+        userKernelValidate1,
+        NULL,
+        NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+
+    EXPECT_VX_OBJECT(kernel = vxGetKernelByEnum(context, kernel_enum), VX_TYPE_KERNEL);
+    EXPECT_VX_OBJECT(parameter = vxGetKernelParameterByIndex(kernel, 0), VX_TYPE_PARAMETER);
+    EXPECT_EQ_INT(VX_SUCCESS, vxQueryParameter(parameter, VX_PARAMETER_INDEX, &index, sizeof(vx_uint32)));
+    ASSERT(index == 0);
+    VX_CALL(vxReleaseParameter(&parameter));
+    VX_CALL(vxRemoveKernel(kernel));
+}
+
+TEST(VxParameterOfNNAndNNEF, test_vxSetParameterByIndex)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph = 0;
+    vx_node node = 0;
+    vx_kernel kernel = 0;
+    vx_enum kernel_enum = 0u;
+    vx_tensor in_tensor = 0;
+    vx_tensor out_tensor = 0;
+    vx_uint32 numParams = 2;
+    vx_enum data_type = VX_TYPE_INT16;
+    vx_int8 fixed_point_position = Q78_FIXED_POINT_POSITION;
+    vx_size max_dims = MAX_DIMS_TEST1;
+
+    uint64_t rng;
+    {
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+        context,
+        VX_USER_KERNEL_CONFORMANCE_NAME,
+        kernel_enum,
+        userKernelProc1,
+        numParams,
+        userKernelValidate2,
+        NULL,
+        NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+    ASSERT(tensor_dims);
+
+    for (vx_size i = 0; i < max_dims; ++i)
+    {
+        tensor_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ + 1);
+    }
+
+    in_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    out_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    ASSERT_VX_OBJECT(in_tensor, VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT(out_tensor, VX_TYPE_TENSOR);
+
+    ASSERT_VX_OBJECT(node = vxCreateGenericNode(graph, kernel), VX_TYPE_NODE);
+    VX_CALL(vxSetParameterByIndex(node, 0, (vx_reference)in_tensor));
+    VX_CALL(vxSetParameterByIndex(node, 1, (vx_reference)out_tensor));
+
+    VX_CALL(vxReleaseNode(&node));
+    ASSERT(node == 0);
+
+    VX_CALL(vxVerifyGraph(graph));
+    VX_CALL(vxProcessGraph(graph));
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+
+    VX_CALL(vxReleaseGraph(&graph));
+    EXPECT_EQ_PTR(0, graph);
+
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByName(context, VX_USER_KERNEL_CONFORMANCE_NAME), VX_TYPE_KERNEL);
+    VX_CALL(vxRemoveKernel(kernel));
+
+    VX_CALL(vxReleaseTensor(&in_tensor));
+    EXPECT_EQ_PTR(NULL, in_tensor);
+
+    VX_CALL(vxReleaseTensor(&out_tensor));
+    EXPECT_EQ_PTR(NULL, out_tensor);
+
+    ct_free_mem(tensor_dims);
+}
+
+TEST(VxParameterOfNNAndNNEF, test_vxSetParameterByReference)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph = 0;
+    vx_node node = 0;
+    vx_kernel kernel = 0;
+    vx_parameter parameter = 0;
+    vx_enum kernel_enum = 0u;
+    vx_tensor in_tensor = 0;
+    vx_tensor out_tensor = 0;
+    vx_uint32 numParams = 2;
+    vx_enum data_type = VX_TYPE_INT16;
+    vx_int8 fixed_point_position = Q78_FIXED_POINT_POSITION;
+    vx_size max_dims = MAX_DIMS_TEST1;
+
+    uint64_t rng;
+    {
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+        context,
+        VX_USER_KERNEL_CONFORMANCE_NAME,
+        kernel_enum,
+        userKernelProc1,
+        numParams,
+        userKernelValidate2,
+        NULL,
+        NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+    ASSERT(tensor_dims);
+
+    for (vx_size i = 0; i < max_dims; ++i)
+    {
+        tensor_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ + 1);
+    }
+
+    in_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    out_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    ASSERT_VX_OBJECT(in_tensor, VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT(out_tensor, VX_TYPE_TENSOR);
+
+    ASSERT_VX_OBJECT(node = vxCreateGenericNode(graph, kernel), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(parameter = vxGetParameterByIndex(node, 0), VX_TYPE_PARAMETER);
+    VX_CALL(vxSetParameterByReference(parameter, (vx_reference)in_tensor));
+
+    VX_CALL(vxReleaseNode(&node));
+    ASSERT(node == 0);
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+
+    VX_CALL(vxReleaseParameter(&parameter));
+    ASSERT(parameter == 0);
+
+    VX_CALL(vxReleaseGraph(&graph));
+    EXPECT_EQ_PTR(0, graph);
+
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByName(context, VX_USER_KERNEL_CONFORMANCE_NAME), VX_TYPE_KERNEL);
+    VX_CALL(vxRemoveKernel(kernel));
+
+    VX_CALL(vxReleaseTensor(&in_tensor));
+    EXPECT_EQ_PTR(NULL, in_tensor);
+
+    VX_CALL(vxReleaseTensor(&out_tensor));
+    EXPECT_EQ_PTR(NULL, out_tensor);
+
+    ct_free_mem(tensor_dims);
+}
+
+TEST(VxParameterOfNNAndNNEF, test_vxGetParameterByIndex)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph = 0;
+    vx_node node = 0;
+    vx_kernel kernel = 0;
+    vx_parameter parameter = 0;
+    vx_enum kernel_enum = 0u;
+    vx_tensor in_tensor = 0;
+    vx_tensor out_tensor = 0;
+    vx_tensor test_tensor = 0;
+    vx_uint32 numParams = 2;
+    vx_enum data_type = VX_TYPE_INT16;
+    vx_int8 fixed_point_position = Q78_FIXED_POINT_POSITION;
+    vx_size max_dims = MAX_DIMS_TEST1;
+
+    uint64_t rng;
+    {
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+        context,
+        VX_USER_KERNEL_CONFORMANCE_NAME,
+        kernel_enum,
+        userKernelProc1,
+        numParams,
+        userKernelValidate2,
+        NULL,
+        NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+    ASSERT(tensor_dims);
+
+    for (vx_size i = 0; i < max_dims; ++i)
+    {
+        tensor_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ + 1);
+    }
+
+    in_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    out_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    ASSERT_VX_OBJECT(in_tensor, VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT(out_tensor, VX_TYPE_TENSOR);
+
+    ASSERT_VX_OBJECT(node = vxCreateGenericNode(graph, kernel), VX_TYPE_NODE);
+    VX_CALL(vxSetParameterByIndex(node, 0, (vx_reference)in_tensor));
+    VX_CALL(vxSetParameterByIndex(node, 1, (vx_reference)out_tensor));
+
+    ASSERT_VX_OBJECT(parameter = vxGetParameterByIndex(node, 0), VX_TYPE_PARAMETER);
+    VX_CALL(vxQueryParameter(parameter, VX_PARAMETER_REF, &test_tensor, sizeof(test_tensor)));
+    ASSERT_VX_OBJECT(test_tensor, VX_TYPE_TENSOR);
+    VX_CALL(vxReleaseTensor(&test_tensor));
+
+    VX_CALL(vxReleaseNode(&node));
+    ASSERT(node == 0);
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+
+    VX_CALL(vxReleaseParameter(&parameter));
+    ASSERT(parameter == 0);
+
+    VX_CALL(vxReleaseGraph(&graph));
+    EXPECT_EQ_PTR(0, graph);
+
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByName(context, VX_USER_KERNEL_CONFORMANCE_NAME), VX_TYPE_KERNEL);
+    VX_CALL(vxRemoveKernel(kernel));
+
+    VX_CALL(vxReleaseTensor(&in_tensor));
+    EXPECT_EQ_PTR(NULL, in_tensor);
+
+    VX_CALL(vxReleaseTensor(&out_tensor));
+    EXPECT_EQ_PTR(NULL, out_tensor);
+
+    ct_free_mem(tensor_dims);
+}
+
+TEST(VxParameterOfNNAndNNEF, test_vxReleaseParameter)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph = 0;
+    vx_node node = 0;
+    vx_kernel kernel = 0;
+    vx_parameter parameter = 0;
+    vx_enum kernel_enum = 0u;
+    vx_tensor in_tensor = 0;
+    vx_tensor out_tensor = 0;
+    vx_uint32 numParams = 2;
+    vx_enum data_type = VX_TYPE_INT16;
+    vx_int8 fixed_point_position = Q78_FIXED_POINT_POSITION;
+    vx_size max_dims = MAX_DIMS_TEST1;
+
+    uint64_t rng;
+    {
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    VX_CALL(vxAllocateUserKernelId(context, &kernel_enum));
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+        context,
+        VX_USER_KERNEL_CONFORMANCE_NAME,
+        kernel_enum,
+        userKernelProc1,
+        numParams,
+        userKernelValidate2,
+        NULL,
+        NULL), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
+    VX_CALL(vxFinalizeKernel(kernel));
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+    ASSERT(tensor_dims);
+
+    for (vx_size i = 0; i < max_dims; ++i)
+    {
+        tensor_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ + 1);
+    }
+
+    in_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    out_tensor = vxCreateTensor(context, max_dims, tensor_dims, data_type, fixed_point_position);
+    ASSERT_VX_OBJECT(in_tensor, VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT(out_tensor, VX_TYPE_TENSOR);
+
+    ASSERT_VX_OBJECT(node = vxCreateGenericNode(graph, kernel), VX_TYPE_NODE);
+    VX_CALL(vxSetParameterByIndex(node, 0, (vx_reference)in_tensor));
+    VX_CALL(vxSetParameterByIndex(node, 1, (vx_reference)out_tensor));
+
+    ASSERT_VX_OBJECT(parameter = vxGetParameterByIndex(node, 0), VX_TYPE_PARAMETER);
+
+    VX_CALL(vxReleaseNode(&node));
+    ASSERT(node == 0);
+
+    VX_CALL(vxReleaseKernel(&kernel));
+    ASSERT(kernel == 0);
+
+    VX_CALL(vxReleaseParameter(&parameter));
+    ASSERT(parameter == 0);
+
+    VX_CALL(vxReleaseGraph(&graph));
+    EXPECT_EQ_PTR(0, graph);
+
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByName(context, VX_USER_KERNEL_CONFORMANCE_NAME), VX_TYPE_KERNEL);
+    VX_CALL(vxRemoveKernel(kernel));
+
+    VX_CALL(vxReleaseTensor(&in_tensor));
+    EXPECT_EQ_PTR(NULL, in_tensor);
+
+    VX_CALL(vxReleaseTensor(&out_tensor));
+    EXPECT_EQ_PTR(NULL, out_tensor);
+
+    ct_free_mem(tensor_dims);
+}
+
+#endif //OPENVX_CONFORMANCE_NEURAL_NETWORKS || OPENVX_CONFORMANCE_NNEF_IMPORT
+
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
 
 TESTCASE_TESTS(Graph,
         testTwoNodes,
@@ -2353,10 +4107,74 @@
         testNodePerformance,
         testGraphPerformance,
         testKernelName,
-        testAllocateUserKernelId,
-        testAllocateUserKernelLibraryId,
         testReplicateNode,
         testImageContainmentRelationship,
         testVerifyGraphLeak,
-        testGraphState
+        testGraphState,
+        testvxIsGraphVerified,
+        testvxProcessGraph,
+        testvxWaitGraph,
+        testvxVerifyGraph,
+        testvxQueryNode,
+        testvxReleaseNode,
+        testvxSetNodeAttribute
         )
+
+#endif
+
+TESTCASE_TESTS(GraphBase,
+        testAllocateUserKernelId,
+        testAllocateUserKernelLibraryId,
+        testRegisterUserStructWithName,
+        testGetUserStructNameByEnum,
+        testGetUserStructEnumByName,
+        testvxCreateGraph,
+        testvxIsGraphVerifiedBase,
+        testvxProcessGraphBase,
+        testvxQueryGraph,
+        testvxWaitGraphBase,
+        testvxVerifyGraphBase,
+        testvxScheduleGraph,
+        testvxReleaseGraph,
+        testvxQueryNodeBase,
+        testvxReleaseNodeBase,
+        testvxRemoveNodeBase,
+        testvxReplicateNodeBase,
+        testvxSetNodeAttributeBase
+        )
+
+#ifdef OPENVX_USE_ENHANCED_VISION
+
+TESTCASE_TESTS(GraphEnhanced, testKernelName_enhanced)
+
+#endif
+
+#if defined OPENVX_CONFORMANCE_NEURAL_NETWORKS || OPENVX_CONFORMANCE_NNEF_IMPORT
+
+TESTCASE_TESTS(UserKernelsOfNNAndNNEF,
+        testvxAddUserKernel,
+        testvxRemoveKernel,
+        testvxFinalizeKernel,
+        testvxAddParameterToKernel,
+        testvxSetKernelAttribute)
+
+TESTCASE_TESTS(MetaFormatOfNNAndNNEF,
+        testvxSetMetaFormatAttribute,
+        testvxSetMetaFormatFromReference,
+        testvxQueryMetaFormatAttribute)
+
+TESTCASE_TESTS(VxKernelOfNNAndNNEF,
+        testvxGetKernelByEnum,
+        testvxGetKernelByName,
+        testvxQueryKernel,
+        testvxReleaseKernel)
+
+TESTCASE_TESTS(VxParameterOfNNAndNNEF,
+        test_vxGetKernelParameterByIndex,
+        test_vxQueryParameter,
+        test_vxSetParameterByIndex,
+        test_vxSetParameterByReference,
+        test_vxGetParameterByIndex,
+        test_vxReleaseParameter)
+
+#endif
diff --git a/test_conformance/test_graph_callbacks.c b/test_conformance/test_graph_callbacks.c
index 351619f..8211d71 100644
--- a/test_conformance/test_graph_callbacks.c
+++ b/test_conformance/test_graph_callbacks.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -200,3 +202,5 @@
         testAbandon,
         testCallbackOrder
         )
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_graph_delay.c b/test_conformance/test_graph_delay.c
index aa319fb..ef2a1c1 100644
--- a/test_conformance/test_graph_delay.c
+++ b/test_conformance/test_graph_delay.c
@@ -15,7 +15,10 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
+#include "test_tensor_util.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
 
@@ -396,9 +399,419 @@
     ASSERT(delay == 0);
 }
 
+typedef struct
+{
+    const char* testName;
+    const char* p;
+    vx_enum item_type;
+} Obj_Array_Arg;
+
+static vx_reference own_create_exemplar(vx_context context, vx_enum item_type)
+{
+    vx_reference exemplar = NULL;
+
+    vx_uint8 value = 0;
+    vx_enum format = VX_DF_IMAGE_U8;
+    vx_uint32 obj_width = 128, obj_height = 128;
+    vx_enum obj_item_type = VX_TYPE_UINT8;
+    vx_size capacity = 100;
+    vx_size levels = 8;
+    vx_float32 scale = 0.5f;
+    vx_size bins = 36;
+    vx_int32 offset = 0;
+    vx_uint32 range = 360;
+    vx_enum thresh_type = VX_THRESHOLD_TYPE_BINARY;
+    vx_size lut_num_items = 100;
+    vx_size m = 5, n = 5;
+
+    switch (item_type)
+    {
+    case VX_TYPE_IMAGE:
+        exemplar = (vx_reference)vxCreateImage(context, obj_width, obj_height, format);
+        break;
+    case VX_TYPE_ARRAY:
+        exemplar = (vx_reference)vxCreateArray(context, obj_item_type, capacity);
+        break;
+    case VX_TYPE_PYRAMID:
+        exemplar = (vx_reference)vxCreatePyramid(context, levels, scale, obj_width, obj_height, format);
+        break;
+    case VX_TYPE_SCALAR:
+        exemplar = (vx_reference)vxCreateScalar(context, obj_item_type, &value);
+        break;
+    case VX_TYPE_MATRIX:
+        exemplar = (vx_reference)vxCreateMatrix(context, obj_item_type, m, n);
+        break;
+    case VX_TYPE_DISTRIBUTION:
+        exemplar = (vx_reference)vxCreateDistribution(context, bins, offset, range);
+        break;
+    case VX_TYPE_REMAP:
+        exemplar = (vx_reference)vxCreateRemap(context, obj_width, obj_height, obj_width, obj_height);
+        break;
+    case VX_TYPE_LUT:
+        exemplar = (vx_reference)vxCreateLUT(context, obj_item_type, lut_num_items);
+        break;
+    case VX_TYPE_THRESHOLD:
+        exemplar = (vx_reference)vxCreateThresholdForImage(context, thresh_type, format, format);
+        break;
+    default:
+        break;
+    }
+
+    return exemplar;
+}
+
+static void own_check_meta(vx_reference item, vx_reference ref)
+{
+    vx_enum ref_type, item_type;
+
+    VX_CALL(vxQueryReference(ref, VX_REFERENCE_TYPE, &ref_type, sizeof(ref_type)));
+
+    VX_CALL(vxQueryReference(item, VX_REFERENCE_TYPE, &item_type, sizeof(item_type)));
+
+    ASSERT(item_type == ref_type);
+
+    switch (item_type)
+    {
+    case VX_TYPE_IMAGE:
+    {
+        vx_uint32 ref_width, item_width;
+        vx_uint32 ref_height, item_height;
+        vx_df_image ref_format, item_format;
+
+        VX_CALL(vxQueryImage((vx_image)ref, VX_IMAGE_WIDTH, &ref_width, sizeof(ref_width)));
+        VX_CALL(vxQueryImage((vx_image)ref, VX_IMAGE_HEIGHT, &ref_height, sizeof(ref_height)));
+        VX_CALL(vxQueryImage((vx_image)ref, VX_IMAGE_FORMAT, &ref_format, sizeof(ref_format)));
+
+        VX_CALL(vxQueryImage((vx_image)item, VX_IMAGE_WIDTH, &item_width, sizeof(item_width)));
+        VX_CALL(vxQueryImage((vx_image)item, VX_IMAGE_HEIGHT, &item_height, sizeof(item_height)));
+        VX_CALL(vxQueryImage((vx_image)item, VX_IMAGE_FORMAT, &item_format, sizeof(item_format)));
+
+        ASSERT(ref_width == item_width);
+        ASSERT(ref_height == item_height);
+        ASSERT(ref_format == item_format);
+    }   break;
+    case VX_TYPE_ARRAY:
+    {
+        vx_size ref_capacity, item_capacity;
+        vx_enum ref_itemtype, item_itemtype;
+
+        VX_CALL(vxQueryArray((vx_array)ref, VX_ARRAY_CAPACITY, &ref_capacity, sizeof(ref_capacity)));
+        VX_CALL(vxQueryArray((vx_array)ref, VX_ARRAY_ITEMTYPE, &ref_itemtype, sizeof(ref_itemtype)));
+
+        VX_CALL(vxQueryArray((vx_array)item, VX_ARRAY_CAPACITY, &item_capacity, sizeof(item_capacity)));
+        VX_CALL(vxQueryArray((vx_array)item, VX_ARRAY_ITEMTYPE, &item_itemtype, sizeof(item_itemtype)));
+
+        ASSERT(ref_capacity == item_capacity);
+        ASSERT(ref_itemtype == item_itemtype);
+    }   break;
+    case VX_TYPE_PYRAMID:
+    {
+        vx_uint32 ref_width, item_width;
+        vx_uint32 ref_height, item_height;
+        vx_df_image ref_format, item_format;
+        vx_size ref_levels, item_levels;
+        vx_float32 ref_scale, item_scale;
+
+        VX_CALL(vxQueryPyramid((vx_pyramid)ref, VX_PYRAMID_WIDTH, &ref_width, sizeof(ref_width)));
+        VX_CALL(vxQueryPyramid((vx_pyramid)ref, VX_PYRAMID_HEIGHT, &ref_height, sizeof(ref_height)));
+        VX_CALL(vxQueryPyramid((vx_pyramid)ref, VX_PYRAMID_FORMAT, &ref_format, sizeof(ref_format)));
+        VX_CALL(vxQueryPyramid((vx_pyramid)ref, VX_PYRAMID_LEVELS, &ref_levels, sizeof(ref_levels)));
+        VX_CALL(vxQueryPyramid((vx_pyramid)ref, VX_PYRAMID_SCALE, &ref_scale, sizeof(ref_scale)));
+
+        VX_CALL(vxQueryPyramid((vx_pyramid)item, VX_PYRAMID_WIDTH, &item_width, sizeof(item_width)));
+        VX_CALL(vxQueryPyramid((vx_pyramid)item, VX_PYRAMID_HEIGHT, &item_height, sizeof(item_height)));
+        VX_CALL(vxQueryPyramid((vx_pyramid)item, VX_PYRAMID_FORMAT, &item_format, sizeof(item_format)));
+        VX_CALL(vxQueryPyramid((vx_pyramid)item, VX_PYRAMID_LEVELS, &item_levels, sizeof(item_levels)));
+        VX_CALL(vxQueryPyramid((vx_pyramid)item, VX_PYRAMID_SCALE, &item_scale, sizeof(item_scale)));
+
+        ASSERT(ref_width == item_width);
+        ASSERT(ref_height == item_height);
+        ASSERT(ref_format == item_format);
+        ASSERT(ref_levels == item_levels);
+        ASSERT(ref_scale == item_scale);
+    }   break;
+    case VX_TYPE_SCALAR:
+    {
+        vx_enum ref_type, item_type;
+
+        VX_CALL(vxQueryScalar((vx_scalar)ref, VX_SCALAR_TYPE, &ref_type, sizeof(ref_type)));
+
+        VX_CALL(vxQueryScalar((vx_scalar)item, VX_SCALAR_TYPE, &item_type, sizeof(item_type)));
+
+        ASSERT(ref_type == item_type);
+    }   break;
+    case VX_TYPE_MATRIX:
+    {
+        vx_enum ref_type, item_type;
+        vx_size ref_rows, item_rows;
+        vx_size ref_cols, item_cols;
+
+        VX_CALL(vxQueryMatrix((vx_matrix)ref, VX_MATRIX_TYPE, &ref_type, sizeof(ref_type)));
+        VX_CALL(vxQueryMatrix((vx_matrix)ref, VX_MATRIX_ROWS, &ref_rows, sizeof(ref_rows)));
+        VX_CALL(vxQueryMatrix((vx_matrix)ref, VX_MATRIX_COLUMNS, &ref_cols, sizeof(ref_cols)));
+
+        VX_CALL(vxQueryMatrix((vx_matrix)item, VX_MATRIX_TYPE, &item_type, sizeof(item_type)));
+        VX_CALL(vxQueryMatrix((vx_matrix)item, VX_MATRIX_ROWS, &item_rows, sizeof(item_rows)));
+        VX_CALL(vxQueryMatrix((vx_matrix)item, VX_MATRIX_COLUMNS, &item_cols, sizeof(item_cols)));
+
+        ASSERT(ref_type == item_type);
+        ASSERT(ref_rows == item_rows);
+        ASSERT(ref_cols == item_cols);
+    }   break;
+    case VX_TYPE_DISTRIBUTION:
+    {
+        vx_size ref_bins, item_bins;
+        vx_int32 ref_offset, item_offset;
+        vx_uint32 ref_range, item_range;
+
+        VX_CALL(vxQueryDistribution((vx_distribution)ref, VX_DISTRIBUTION_BINS, &ref_bins, sizeof(ref_bins)));
+        VX_CALL(vxQueryDistribution((vx_distribution)ref, VX_DISTRIBUTION_OFFSET, &ref_offset, sizeof(ref_offset)));
+        VX_CALL(vxQueryDistribution((vx_distribution)ref, VX_DISTRIBUTION_RANGE, &ref_range, sizeof(ref_range)));
+
+        VX_CALL(vxQueryDistribution((vx_distribution)item, VX_DISTRIBUTION_BINS, &item_bins, sizeof(item_bins)));
+        VX_CALL(vxQueryDistribution((vx_distribution)item, VX_DISTRIBUTION_OFFSET, &item_offset, sizeof(item_offset)));
+        VX_CALL(vxQueryDistribution((vx_distribution)item, VX_DISTRIBUTION_RANGE, &item_range, sizeof(item_range)));
+
+        ASSERT(ref_bins == item_bins);
+        ASSERT(ref_offset == item_offset);
+        ASSERT(ref_range == item_range);
+    }   break;
+    case VX_TYPE_REMAP:
+    {
+        vx_uint32 ref_srcwidth, item_srcwidth;
+        vx_uint32 ref_srcheight, item_srcheight;
+        vx_uint32 ref_dstwidth, item_dstwidth;
+        vx_uint32 ref_dstheight, item_dstheight;
+
+        VX_CALL(vxQueryRemap((vx_remap)ref, VX_REMAP_SOURCE_WIDTH, &ref_srcwidth, sizeof(ref_srcwidth)));
+        VX_CALL(vxQueryRemap((vx_remap)ref, VX_REMAP_SOURCE_HEIGHT, &ref_srcheight, sizeof(ref_srcheight)));
+        VX_CALL(vxQueryRemap((vx_remap)ref, VX_REMAP_DESTINATION_WIDTH, &ref_dstwidth, sizeof(ref_dstwidth)));
+        VX_CALL(vxQueryRemap((vx_remap)ref, VX_REMAP_DESTINATION_HEIGHT, &ref_dstheight, sizeof(ref_dstheight)));
+
+        VX_CALL(vxQueryRemap((vx_remap)item, VX_REMAP_SOURCE_WIDTH, &item_srcwidth, sizeof(item_srcwidth)));
+        VX_CALL(vxQueryRemap((vx_remap)item, VX_REMAP_SOURCE_HEIGHT, &item_srcheight, sizeof(item_srcheight)));
+        VX_CALL(vxQueryRemap((vx_remap)item, VX_REMAP_DESTINATION_WIDTH, &item_dstwidth, sizeof(item_dstwidth)));
+        VX_CALL(vxQueryRemap((vx_remap)item, VX_REMAP_DESTINATION_HEIGHT, &item_dstheight, sizeof(item_dstheight)));
+
+        ASSERT(ref_srcwidth == item_srcwidth);
+        ASSERT(ref_srcheight == item_srcheight);
+        ASSERT(ref_dstwidth == item_dstwidth);
+        ASSERT(ref_dstheight == item_dstheight);
+    }   break;
+    case VX_TYPE_LUT:
+    {
+        vx_enum ref_type, item_type;
+        vx_size ref_count, item_count;
+
+        VX_CALL(vxQueryLUT((vx_lut)ref, VX_LUT_TYPE, &ref_type, sizeof(ref_type)));
+        VX_CALL(vxQueryLUT((vx_lut)ref, VX_LUT_COUNT, &ref_count, sizeof(ref_count)));
+
+        VX_CALL(vxQueryLUT((vx_lut)item, VX_LUT_TYPE, &item_type, sizeof(item_type)));
+        VX_CALL(vxQueryLUT((vx_lut)item, VX_LUT_COUNT, &item_count, sizeof(item_count)));
+
+        ASSERT(ref_type == item_type);
+        ASSERT(ref_count == item_count);
+    }   break;
+    case VX_TYPE_THRESHOLD:
+    {
+        vx_enum ref_type, item_type;
+
+        VX_CALL(vxQueryThreshold((vx_threshold)ref, VX_THRESHOLD_TYPE, &ref_type, sizeof(ref_type)));
+
+        VX_CALL(vxQueryThreshold((vx_threshold)item, VX_THRESHOLD_TYPE, &item_type, sizeof(item_type)));
+
+        ASSERT(ref_type == item_type);
+    }   break;
+    default:
+        ASSERT(0 == 1);
+    }
+}
+
+#define ADD_VX_OBJECT_ARRAY_TYPES(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/VX_TYPE_IMAGE", __VA_ARGS__, VX_TYPE_IMAGE)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_TYPE_ARRAY", __VA_ARGS__, VX_TYPE_ARRAY)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_TYPE_PYRAMID", __VA_ARGS__, VX_TYPE_PYRAMID)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_TYPE_SCALAR", __VA_ARGS__, VX_TYPE_SCALAR)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_TYPE_MATRIX", __VA_ARGS__, VX_TYPE_MATRIX)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_TYPE_DISTRIBUTION", __VA_ARGS__, VX_TYPE_DISTRIBUTION)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_TYPE_REMAP", __VA_ARGS__, VX_TYPE_REMAP)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_TYPE_LUT", __VA_ARGS__, VX_TYPE_LUT)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_TYPE_THRESHOLD", __VA_ARGS__, VX_TYPE_THRESHOLD ))
+
+#define PARAMETERS \
+    CT_GENERATE_PARAMETERS("object_array", ADD_VX_OBJECT_ARRAY_TYPES, ARG, NULL)
+
+TEST_WITH_ARG(GraphDelay, testObjectArray, Obj_Array_Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_delay delay;
+    vx_reference exemplar = NULL;
+    vx_size num_items = 1;
+    vx_enum item_type = arg_->item_type;
+
+    vx_object_array object_array = 0;
+    vx_object_array delayobjarray;
+
+    vx_reference expect_item = NULL;
+    vx_enum expect_type = VX_TYPE_INVALID;
+    vx_size expect_num_items = 0;
+
+    vx_uint32 i;
+
+    ASSERT_VX_OBJECT(exemplar = own_create_exemplar(context, item_type), (enum vx_type_e)item_type);
+
+    /* 1. check if object array can be created with allowed types*/
+    ASSERT_VX_OBJECT(object_array = vxCreateObjectArray(context, exemplar, num_items), VX_TYPE_OBJECT_ARRAY);
+
+    /* 2. create delay with object array*/
+    ASSERT_VX_OBJECT(delay = vxCreateDelay(context, (vx_reference)object_array, 1), VX_TYPE_DELAY);
+
+    ASSERT_VX_OBJECT(delayobjarray = (vx_object_array)vxGetReferenceFromDelay(delay, 0), VX_TYPE_OBJECT_ARRAY);
+
+    VX_CALL(vxQueryObjectArray(delayobjarray, VX_OBJECT_ARRAY_ITEMTYPE, &expect_type, sizeof(expect_type)));
+    ASSERT_EQ_INT(item_type, expect_type);
+
+    VX_CALL(vxQueryObjectArray(delayobjarray, VX_OBJECT_ARRAY_NUMITEMS, &expect_num_items, sizeof(expect_num_items)));
+    ASSERT_EQ_INT(num_items, expect_num_items);
+
+    for (i = 0u; i < num_items; i++)
+    {
+        ASSERT_VX_OBJECT(expect_item = vxGetObjectArrayItem(delayobjarray, i), (enum vx_type_e)item_type);
+
+        ASSERT_NO_FAILURE(own_check_meta(expect_item, exemplar));
+
+        VX_CALL(vxReleaseReference(&expect_item));
+        ASSERT(expect_item == 0);
+    }
+
+    expect_item = vxGetObjectArrayItem(delayobjarray, (vx_uint32)num_items);
+    ASSERT_NE_VX_STATUS(VX_SUCCESS, vxGetStatus((vx_reference)expect_item));
+
+    VX_CALL(vxReleaseReference(&exemplar));
+    ASSERT(exemplar == 0);
+
+    VX_CALL(vxReleaseObjectArray(&object_array));
+    ASSERT(object_array == 0);
+
+    VX_CALL(vxReleaseDelay(&delay));
+    ASSERT(delay == 0);
+}
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
+#ifdef OPENVX_USE_ENHANCED_VISION
+
+/* *****************testTensor tests*******************************/
+TESTCASE(GraphDelayTensor, CT_VXContext, ct_setup_vx_context, 0)
+
+typedef struct
+{
+    const char *name;
+    enum TestTensorDF fmt;
+} test_tensor_op_arg;
+
+TEST_WITH_ARG(GraphDelayTensor, testTensor, test_tensor_op_arg,
+    ARG("Q78_DELAYTENSOR", TT_Q78),
+    ARG("U8_DELAYTENSOR", TT_U8),
+    ARG("S8_DELAYTENSOR", TT_S8))
+{
+    vx_context context = context_->vx_context_;
+    const enum TestTensorDF fmt = arg_->fmt;
+    vx_size max_dims = 0;
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
+    vx_delay delay;
+    vx_status status;
+    vx_size delaytensor_dims;
+    vx_enum delaytensor_datatype;
+    vx_uint8 delaytensor_fppos;
+    vx_size delaytensor_in0dims[MAX_TENSOR_DIMS];
+
+    ownUnpackFormat(fmt, &data_type, &fixed_point_position, &sizeof_data_type);
+
+    VX_CALL(vxQueryContext(context, VX_CONTEXT_MAX_TENSOR_DIMS, &max_dims, sizeof(max_dims)));
+    ASSERT(max_dims > 3);
+
+    size_t * const in0_dims = ct_alloc_mem(sizeof(*in0_dims) * max_dims);
+    ASSERT(in0_dims);
+
+    uint64_t rng;
+    {   // TODO: ownTestGetRNG() ?
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    for (vx_size i = 0; i < max_dims; ++i)
+    {
+        const size_t new_dim = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ + 1);
+
+        const int mask0 = !!CT_RNG_NEXT_INT(rng, 0, TEST_TENSOR_INVERSE_MASK_PROBABILITY);
+
+        // Note: Broadcasting is described as for each dim, either in0 and in1 have the same
+        // size or "1" for a broadcasted value. And the output is strictly determined by them
+        // so that the implementation is required to support
+        // { in0, in1, out } = { 1, 5, 5 } but not { in0, in1, out } = { 1, 1, 5 }
+        // even though the KHR sample implementation currently supports both.
+        in0_dims[i] = mask0 ? new_dim : 1;
+    }
+
+    vx_tensor tensor = vxCreateTensor(context, max_dims, in0_dims, data_type, fixed_point_position);
+    ASSERT_VX_OBJECT(tensor, VX_TYPE_TENSOR);
+
+    ASSERT_VX_OBJECT(delay = vxCreateDelay(context, (vx_reference)tensor, 1), VX_TYPE_DELAY);
+
+    vx_tensor delaytensor;
+    ASSERT_VX_OBJECT(delaytensor = (vx_tensor)vxGetReferenceFromDelay(delay, 0), VX_TYPE_TENSOR);
+
+    status = vxQueryTensor(delaytensor, VX_TENSOR_NUMBER_OF_DIMS, (void *)&delaytensor_dims, sizeof(delaytensor_dims));
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, status);
+    EXPECT_EQ_INT(max_dims, delaytensor_dims);
+
+    status = vxQueryTensor(delaytensor, VX_TENSOR_DATA_TYPE, (void *)&delaytensor_datatype, sizeof(delaytensor_datatype));
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, status);
+    EXPECT_EQ_INT(data_type, delaytensor_datatype);
+
+    status = vxQueryTensor(delaytensor, VX_TENSOR_FIXED_POINT_POSITION, (void *)&delaytensor_fppos, sizeof(delaytensor_fppos));
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, status);
+    EXPECT_EQ_INT(max_dims, delaytensor_dims);
+
+    status = vxQueryTensor(delaytensor, VX_TENSOR_DIMS, (void *)&delaytensor_in0dims, sizeof(vx_size)*max_dims);
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, status);
+
+    for (vx_size i = 0; i < max_dims; i++)
+    {
+        EXPECT_EQ_INT(in0_dims[i], delaytensor_in0dims[i]);
+    }
+
+    VX_CALL(vxReleaseDelay(&delay));
+    VX_CALL(vxReleaseTensor(&tensor));
+
+    ASSERT(delay == 0);
+    ASSERT(tensor == 0);
+
+    ct_free_mem(in0_dims);
+}
+
+#endif //OPENVX_USE_ENHANCED_VISION
+
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 TESTCASE_TESTS(
     GraphDelay,
     testSimple,
     testPyramid,
-    testRegisterAutoAging
+    testRegisterAutoAging,
+    testObjectArray
     )
+
+#endif
+
+#ifdef OPENVX_USE_ENHANCED_VISION
+
+TESTCASE_TESTS(
+    GraphDelayTensor,
+    testTensor
+    )
+#endif
diff --git a/test_conformance/test_graph_pipeline.c b/test_conformance/test_graph_pipeline.c
new file mode 100644
index 0000000..dc8b865
--- /dev/null
+++ b/test_conformance/test_graph_pipeline.c
@@ -0,0 +1,3537 @@
+/*
+ * Copyright (c) 2012-2018 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef OPENVX_USE_PIPELINING
+
+#include "test_engine/test.h"
+#include <VX/vx.h>
+#include <VX/vx_khr_pipelining.h>
+#include "math.h"
+#include <limits.h>
+
+TESTCASE(GraphPipeline,  CT_VXContext, ct_setup_vx_context, 0)
+
+#define MAX_NUM_BUF               (8u)
+#define MAX_IMAGE_PLANES          (3u)
+#define MAX_NUM_OBJ_ARR_ELEMENTS  (4u)
+#define GRAPH_MAX_DATA_REF_QUEUE  (16u)
+#define DATA_REF_Q_MAX_OBJECTS    (64u)
+
+#define GRAPH_CONSUMED_EVENT      (1u)
+#define NODE0_COMPLETED_EVENT     (2u)
+#define NODE1_COMPLETED_EVENT     (3u)
+#define GRAPH_COMPLETED_EVENT     (4u)
+
+typedef struct {
+    const char* testName;
+    int width, height;
+    int num_buf;
+    int loop_count;
+} Arg;
+
+#define ADD_BUF_1(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/buf=1", __VA_ARGS__, 1))
+
+#define ADD_BUF_2(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/buf=2", __VA_ARGS__, 2))
+
+#define ADD_BUF_3(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/buf=3", __VA_ARGS__, 3))
+
+#define ADD_LOOP_0(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/loop_count=0", __VA_ARGS__, 0))
+
+#define ADD_LOOP_1(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/loop_count=1", __VA_ARGS__, 1))
+
+#define ADD_LOOP_10(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/loop_count=10", __VA_ARGS__, 10))
+
+#define ADD_LOOP_1000(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/loop_count=1000", __VA_ARGS__, 1000))
+
+#define ADD_LOOP_100000(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/loop_count=100000", __VA_ARGS__, 100000))
+
+#define ADD_LOOP_1000000(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/loop_count=1000000", __VA_ARGS__, 1000000))
+
+#define ADD_SIZE_2048x1024(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/sz=2048x1024", __VA_ARGS__, 2048, 1024))
+
+#define PARAMETERS \
+    CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_3, ADD_LOOP_0, ARG), \
+    CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_1, ADD_LOOP_0, ARG), \
+    CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_3, ADD_LOOP_1, ARG), \
+    CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_3, ADD_LOOP_1000, ARG), \
+    CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_3, ADD_LOOP_1000, ARG), \
+    CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_1, ADD_LOOP_1000, ARG), \
+    CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_2, ADD_LOOP_1000, ARG), \
+    CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_2, ADD_LOOP_100000, ARG), \
+    CT_GENERATE_PARAMETERS("random", ADD_SIZE_2048x1024, ADD_BUF_3, ADD_LOOP_1000, ARG), \
+
+
+/*
+ * Utility API used to add a graph parameter from a node, node parameter index
+ */
+static void add_graph_parameter_by_node_index(vx_graph graph, vx_node node, vx_uint32 node_parameter_index)
+{
+    vx_parameter parameter = vxGetParameterByIndex(node, node_parameter_index);
+
+    vxAddParameterToGraph(graph, parameter);
+    vxReleaseParameter(&parameter);
+}
+
+
+
+static void referenceNot(CT_Image src, CT_Image dst)
+{
+    uint32_t i, j;
+
+    ASSERT(src && dst);
+    ASSERT(src->width == dst->width);
+    ASSERT(src->height == dst->height);
+    ASSERT(src->format == dst->format && src->format == VX_DF_IMAGE_U8);
+
+    for (i = 0; i < dst->height; ++i)
+        for (j = 0; j < dst->width; ++j)
+            dst->data.y[i * dst->stride + j] = ~src->data.y[i * src->stride + j];
+}
+
+static void reference_mean_stddev(CT_Image src, vx_float32* _mean, vx_float32* _stddev)
+{
+    uint32_t x, y, width = src ? src->width : 0, height = src ? src->height : 0;
+    uint32_t npix, stride;
+    double sum = 0, sqsum = 0;
+    int format = src ? src->format : VX_DF_IMAGE_U8;
+
+    ASSERT(src);
+    ASSERT(src->width > 0 && src->height > 0);
+    npix = width*height;
+    stride = ct_stride_bytes(src);
+
+#define CASE_MEANSTDDEV(format, type, acctype) \
+    case format: \
+    { \
+        acctype s = 0, s2 = 0; \
+        for( y = 0; y < src->height; y++ ) \
+        { \
+            const type* ptr = (const type*)(src->data.y + stride*y); \
+            for( x = 0; x < src->width; x++ ) \
+            { \
+                type val = ptr[x]; \
+                s += val; \
+                s2 += (acctype)val*val; \
+            } \
+        } \
+        sum = (double)s; sqsum = (double)s2; \
+    } \
+    break
+
+    switch(format)
+    {
+    CASE_MEANSTDDEV(VX_DF_IMAGE_U8, uint8_t, uint64_t);
+    default:
+        FAIL("Unsupported image format: (%d)", &src->format);
+    }
+
+    *_mean = (vx_float32)(sum/npix);
+    sqsum = sqsum/npix - (sum/npix)*(sum/npix);
+    *_stddev = (vx_float32)sqrt(CT_MAX(sqsum, 0.));
+}
+
+static void fillSequence(CT_Image dst, uint32_t seq_init)
+{
+    uint32_t i, j;
+    uint32_t val = seq_init;
+
+    ASSERT(dst);
+    ASSERT(dst->format == VX_DF_IMAGE_U8);
+
+    for (i = 0; i < dst->height; ++i)
+        for (j = 0; j < dst->width; ++j)
+            dst->data.y[i * dst->stride + j] = val;
+}
+
+#define TEST_USER_KERNEL_NAME          "test_graph_pipeline.user_kernel"
+#define TEST_USER_KERNEL_NUM_PARAMS     (4u)
+static vx_kernel test_user_kernel = NULL;
+
+static vx_status test_user_kernel_validate(vx_node node,
+            const vx_reference parameters[ ],
+            vx_uint32 num,
+            vx_meta_format metas[])
+{
+    vx_status status = VX_SUCCESS;
+    vx_scalar scalar[TEST_USER_KERNEL_NUM_PARAMS];
+    vx_enum scalar_type[TEST_USER_KERNEL_NUM_PARAMS];
+    vx_uint32 i;
+
+    if (num != TEST_USER_KERNEL_NUM_PARAMS)
+    {
+        printf(" ERROR: Test user kernel: Number of parameters dont match !!!\n");
+        status = VX_ERROR_INVALID_PARAMETERS;
+    }
+
+    for (i = 0U; i < TEST_USER_KERNEL_NUM_PARAMS; i ++)
+    {
+        scalar[i] = (vx_scalar)parameters[i];
+
+        if(scalar[i] != NULL)
+        {
+            /* i.e not a optional parameter */
+            status = vxQueryScalar(scalar[i],
+                VX_SCALAR_TYPE, &scalar_type[i],
+                sizeof(vx_enum));
+            if(status==VX_SUCCESS)
+            {
+                if(scalar_type[i] != VX_TYPE_UINT32)
+                {
+                    printf(" ERROR: Test user kernel: Scalar type MUST be VX_TYPE_UINT32 !!!\n");
+                    status = VX_ERROR_INVALID_PARAMETERS;
+                }
+                vxSetMetaFormatAttribute(metas[i], VX_SCALAR_TYPE, &scalar_type[i],
+                    sizeof(scalar_type[i]));
+            }
+            if(status!=VX_SUCCESS)
+            {
+                printf(" ERROR: Test user kernel: validate failed !!!\n");
+                break;
+            }
+        }
+    }
+
+    return status;
+}
+
+static vx_status test_user_kernel_run(vx_node node,
+            const vx_reference parameters[ ],
+            vx_uint32 num)
+{
+    vx_status status = VX_SUCCESS;
+    vx_scalar in1, in2, out1, out2;
+    vx_uint32 in1_value = 0, in2_value = 0;
+    vx_uint32 out1_value = 0, out2_value = 0;
+
+    /* Any of the parameter can be NULL since parameter is marked
+     * as optional during kernel register */
+    in1  = (vx_scalar)parameters[0];
+    in2  = (vx_scalar)parameters[1];
+    out1 = (vx_scalar)parameters[2];
+    out2 = (vx_scalar)parameters[3];
+
+    if(in1!=NULL)
+    {
+        vxCopyScalar(in1,
+            &in1_value,
+            VX_READ_ONLY,
+            VX_MEMORY_TYPE_HOST
+            );
+    }
+    if(in2!=NULL)
+    {
+        vxCopyScalar(in2,
+            &in2_value,
+            VX_READ_ONLY,
+            VX_MEMORY_TYPE_HOST
+            );
+    }
+
+    /* just for test
+     * out1_value = in1_value + in2_value
+     * out2_value = out1_value * 2
+     * when in1 reference is not specified (since its optional), in1_value is considered to be 0
+     * when in2 reference is not specified (since its optional), in2_value is considered to be 0
+     */
+    out1_value = in1_value + in2_value;
+    out2_value = out1_value*2;
+
+    if(out1!=NULL)
+    {
+        vxCopyScalar(out1,
+            &out1_value,
+            VX_WRITE_ONLY,
+            VX_MEMORY_TYPE_HOST
+            );
+    }
+    if(out2!=NULL)
+    {
+        vxCopyScalar(out2,
+            &out2_value,
+            VX_WRITE_ONLY,
+            VX_MEMORY_TYPE_HOST
+            );
+    }
+
+    return status;
+}
+
+static vx_status test_user_kernel_register(vx_context context)
+{
+    vx_kernel kernel = NULL;
+    vx_status status;
+    uint32_t index;
+    vx_enum test_user_kernel_id = 0;
+
+    status = vxAllocateUserKernelId(context, &test_user_kernel_id);
+    if(status!=VX_SUCCESS)
+    {
+        printf(" ERROR: Test user kernel: vxAllocateUserKernelId failed (%d)!!!\n", status);
+    }
+    if(status==VX_SUCCESS)
+    {
+        kernel = vxAddUserKernel(
+                    context,
+                    TEST_USER_KERNEL_NAME,
+                    test_user_kernel_id,
+                    test_user_kernel_run,
+                    TEST_USER_KERNEL_NUM_PARAMS, /* number of parameters objects for this user function */
+                    test_user_kernel_validate,
+                    NULL,
+                    NULL);
+    }
+
+    status = vxGetStatus((vx_reference)kernel);
+    if ( status == VX_SUCCESS)
+    {
+        index = 0;
+
+        if ( status == VX_SUCCESS)
+        {
+            status = vxAddParameterToKernel(kernel,
+                index,
+                VX_INPUT,
+                VX_TYPE_SCALAR,
+                VX_PARAMETER_STATE_OPTIONAL
+                );
+            index++;
+        }
+        if ( status == VX_SUCCESS)
+        {
+            status = vxAddParameterToKernel(kernel,
+                index,
+                VX_INPUT,
+                VX_TYPE_SCALAR,
+                VX_PARAMETER_STATE_OPTIONAL
+                );
+            index++;
+        }
+        if ( status == VX_SUCCESS)
+        {
+            status = vxAddParameterToKernel(kernel,
+                index,
+                VX_OUTPUT,
+                VX_TYPE_SCALAR,
+                VX_PARAMETER_STATE_OPTIONAL
+                );
+            index++;
+        }
+        if ( status == VX_SUCCESS)
+        {
+            status = vxAddParameterToKernel(kernel,
+                index,
+                VX_OUTPUT,
+                VX_TYPE_SCALAR,
+                VX_PARAMETER_STATE_OPTIONAL
+                );
+            index++;
+        }
+        if ( status == VX_SUCCESS)
+        {
+            status = vxFinalizeKernel(kernel);
+        }
+        if( status != VX_SUCCESS)
+        {
+            printf(" ERROR: Test user kernel: vxAddParameterToKernel, vxFinalizeKernel failed (%d)!!!\n", status);
+            vxReleaseKernel(&kernel);
+            kernel = NULL;
+        }
+    }
+    else
+    {
+        kernel = NULL;
+        printf(" ERROR: Test user kernel: vxAddUserKernel failed (%d)!!!\n", status);
+    }
+    if(status==VX_SUCCESS)
+    {
+        test_user_kernel = kernel;
+    }
+
+    return status;
+}
+
+static vx_status test_user_kernel_unregister(vx_context context)
+{
+    vx_status status;
+
+    status = vxRemoveKernel(test_user_kernel);
+    test_user_kernel = NULL;
+
+    if(status!=VX_SUCCESS)
+    {
+        printf(" ERROR: Test user kernel: Unable to remove kernel (%d)!!!\n", status);
+    }
+
+    return status;
+}
+
+static vx_node test_user_kernel_node(vx_graph graph,
+            vx_scalar in1,
+            vx_scalar in2,
+            vx_scalar out1,
+            vx_scalar out2)
+{
+    vx_node node = 0;
+
+    vx_context context = vxGetContext((vx_reference)graph);
+
+    vx_kernel kernel = vxGetKernelByName(context, TEST_USER_KERNEL_NAME);
+
+    if(kernel!=NULL)
+    {
+        /* kernel is released inside vxCreateNodeByStructure */
+        //ASSERT_VX_OBJECT(node =  vxCreateNodeByStructure(graph, kernel, 0, refs, sizeof(refs)/sizeof(refs[0])), VX_TYPE_NODE);
+        node = vxCreateGenericNode(graph, kernel);
+
+        if (NULL != in1)
+        {
+            vxSetParameterByIndex(node, 0, (vx_reference)in1);
+        }
+
+        if (NULL != in2)
+        {
+            vxSetParameterByIndex(node, 1, (vx_reference)in2);
+        }
+
+        if (NULL != out1)
+        {
+            vxSetParameterByIndex(node, 2, (vx_reference)out1);
+        }
+
+        if (NULL != out2)
+        {
+            vxSetParameterByIndex(node, 3, (vx_reference)out2);
+        }
+
+        vxReleaseKernel(&kernel);
+    }
+
+    return node;
+}
+
+/*
+ *  d0      n0     d2
+ *  IMG --  OR -- IMG (*)
+ *  (*)     |
+ *          d1 (single ref)
+ *
+ * (*) = queueing enabled
+ *
+ * This test case test the below
+ * - A data reference on which queing is not enabled
+ * - No looping
+ * - fixed pipeline depth of 2
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testOneNode, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_image d0[MAX_NUM_BUF], d1, d2[MAX_NUM_BUF];
+    vx_node n0;
+
+    CT_Image ref_src[MAX_NUM_BUF], vxdst;
+    uint32_t width, height, seq_init, num_buf;
+    uint32_t buf_id, loop_id, loop_cnt;
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
+
+    seq_init = 1;
+    width = arg_->width;
+    height = arg_->height;
+    loop_cnt = arg_->loop_count;
+    num_buf = 2;
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+
+    /* fill reference data */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE({
+            ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
+            fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
+        });
+    }
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    /* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(d2[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    }
+    /* create other refs, these are not multiple refs and same refs is fed as parameter to the graph */
+    ASSERT_VX_OBJECT(d1    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+
+    /* fill d1 with zero's so that OR operation acts like a NOP */
+    {
+        vx_imagepatch_addressing_t addr;
+        vx_rectangle_t rect;
+        void *ptr;
+        vx_map_id map_id;
+
+        rect.start_x = rect.start_y = 0;
+        rect.end_x = width;
+        rect.end_y = height;
+
+        VX_CALL(vxMapImagePatch(d1, &rect, 0, &map_id, &addr, &ptr, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, VX_NOGAP_X));
+
+        ct_memset(ptr, 0x0, addr.stride_y*addr.dim_y);
+
+        VX_CALL(vxUnmapImagePatch(d1, map_id));
+    }
+
+    /* create node, input (index 0) and output (index 2) will be made as graph parameter
+     * so that we can enqueue and dequeue refs to it and thus do graph pipelining.
+     * d0[0], d2[0] used only for their meta data.
+     * Actual input and output used for graph processing will be the
+     * refs that are enqueued later
+     */
+    ASSERT_VX_OBJECT(n0    = vxOrNode(graph, d0[0], d1, d2[0]), VX_TYPE_NODE);
+
+    /* input @ node index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n0, 0);
+    /* output @ node index 2, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n0, 2);
+
+    /* set graph schedule config such that graph parameter @ index 0 and 1 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
+
+    /* Schedule mode auto is used, here we dont need to call vxScheduleGraph
+     * Graph gets scheduled automatically as refs are enqueued to it
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+			VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
+			2,
+			graph_parameters_queue_params_list
+			));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
+    }
+
+    /* enqueue input and output references,
+     * input and output can be enqueued in any order
+     * can be enqueued all together, here they are enqueue one by one just as a example
+     */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
+    }
+
+    buf_id = 0;
+
+    /* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
+    for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
+    {
+        vx_image out_img, in_img;
+        uint32_t num_refs;
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_img, 1, &num_refs));
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
+
+        /* when measuring performance dont check output since it affects graph performance numbers
+         */
+
+        if(loop_cnt > 100)
+        {
+            ct_update_progress(loop_id, loop_cnt+num_buf);
+        }
+
+        ASSERT_NO_FAILURE({
+            vxdst = ct_image_from_vx_image(out_img);
+        });
+
+        /* compare output */
+        ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
+
+        buf_id = (buf_id+1)%num_buf;
+
+        /* recycles dequeued input and output refs 'loop_cnt' times */
+        if(loop_id<loop_cnt)
+        {
+            /* input and output can be enqueued in any order */
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_img, 1));
+        }
+    }
+
+    /* ensure all graph processing is complete */
+    VX_CALL(vxWaitGraph(graph));
+
+    #endif
+
+    VX_CALL(vxReleaseNode(&n0));
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseImage(&d0[buf_id]));
+        VX_CALL(vxReleaseImage(&d2[buf_id]));
+    }
+    VX_CALL(vxReleaseImage(&d1));
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+/*
+ *  d0     n0     d1     n1     d2
+ * IMG -- NOT -- IMG -- NOT -- IMG
+ *
+ * This test case test the below
+ * - Single input, single output nodes
+ * - Two nodes on two different targets
+ * - Number of buffers = pipeline depth
+ * - Virtual objects, no hints provided except for pipeline depth
+ * - fixed pipeline depth of 2
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testTwoNodesBasic, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_image d0[MAX_NUM_BUF], d1, d2[MAX_NUM_BUF];
+    vx_node n0, n1;
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
+
+    CT_Image ref_src[MAX_NUM_BUF], vxdst;
+    uint32_t width, height, seq_init;
+    uint32_t buf_id, loop_id, loop_cnt, num_buf;
+
+    seq_init = 1;
+    width = arg_->width;
+    height = arg_->height;
+    loop_cnt = arg_->loop_count;
+    num_buf = 2;
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+
+    /* fill reference data */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE({
+            ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
+            fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
+        });
+    }
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    /* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(d2[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    }
+    ASSERT_VX_OBJECT(d1    = vxCreateVirtualImage(graph, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(n0    = vxNotNode(graph, d0[0], d1), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(n1    = vxNotNode(graph, d1, d2[0]), VX_TYPE_NODE);
+
+    /* input @ n0 index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n0, 0);
+    /* output @ n1 index 1, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n1, 1);
+
+    /* set graph schedule config such that graph parameter @ index 0 and 1 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
+
+    /* Schedule mode auto is used, here we dont need to call vxScheduleGraph
+     * Graph gets scheduled automatically as refs are enqueued to it
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+			VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
+			2,
+			graph_parameters_queue_params_list
+			));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
+    }
+
+    /* enqueue input and output references,
+     * input and output can be enqueued in any order
+     * can be enqueued all together, here they are enqueue one by one just as a example
+     */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
+    }
+
+    buf_id = 0;
+
+    /* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
+    for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
+    {
+        vx_image out_img, in_img;
+        uint32_t num_refs;
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_img, 1, &num_refs));
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
+
+
+        if(loop_cnt > 100)
+        {
+            ct_update_progress(loop_id, loop_cnt+num_buf);
+        }
+
+        ASSERT_NO_FAILURE({
+            vxdst = ct_image_from_vx_image(out_img);
+        });
+
+        /* compare output */
+        ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
+
+        buf_id = (buf_id+1)%num_buf;
+
+        /* recycles dequeued input and output refs 'loop_cnt' times */
+        if(loop_id<loop_cnt)
+        {
+            /* input and output can be enqueued in any order */
+            vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1);
+            vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_img, 1);
+        }
+    }
+
+    /* ensure all graph processing is complete */
+    VX_CALL(vxWaitGraph(graph));
+
+    #endif
+
+    VX_CALL(vxReleaseNode(&n0));
+    VX_CALL(vxReleaseNode(&n1));
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseImage(&d0[buf_id]));
+        VX_CALL(vxReleaseImage(&d2[buf_id]));
+    }
+    VX_CALL(vxReleaseImage(&d1));
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+/*
+ *  d0     n0     d1     n1     d2
+ * IMG -- NOT -- IMG -- NOT -- IMG
+ *
+ * This test case test the below
+ * - Single input, single output nodes
+ * - Two nodes on two different targets
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testTwoNodes, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_image d0[MAX_NUM_BUF], d1[MAX_NUM_BUF], d2[MAX_NUM_BUF];
+    vx_node n0, n1;
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[3];
+
+    CT_Image ref_src[MAX_NUM_BUF], vxdst;
+    uint32_t width, height, seq_init, num_buf;
+    uint32_t buf_id, loop_id, loop_cnt;
+
+    seq_init = 1;
+    width = arg_->width;
+    height = arg_->height;
+    num_buf = arg_->num_buf;
+    loop_cnt = arg_->loop_count;
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+
+    /* fill reference data */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE({
+            ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
+            fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
+        });
+    }
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    /* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(d1[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(d2[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    }
+    ASSERT_VX_OBJECT(n0    = vxNotNode(graph, d0[0], d1[0]), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(n1    = vxNotNode(graph, d1[0], d2[0]), VX_TYPE_NODE);
+
+    /* input @ n0 index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n0, 0);
+    /* intermediate output @ n0 index 1, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n0, 1);
+    /* output @ n1 index 1, becomes graph parameter 2 */
+    add_graph_parameter_by_node_index(graph, n1, 1);
+
+    /* set graph schedule config such that graph parameter @ index 0, 1, 2 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d1[0];
+
+    graph_parameters_queue_params_list[2].graph_parameter_index = 2;
+    graph_parameters_queue_params_list[2].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[2].refs_list = (vx_reference*)&d2[0];
+
+    /* Schedule mode auto is used, here we dont need to call vxScheduleGraph
+     * Graph gets scheduled automatically as refs are enqueued to it
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+			VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
+			3,
+			graph_parameters_queue_params_list
+			));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
+    }
+
+    /* enqueue input and output references,
+     * input and output can be enqueued in any order
+     * can be enqueued all together, here they are enqueue one by one just as a example
+     */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d1[buf_id], 1));
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&d2[buf_id], 1));
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
+    }
+
+    buf_id = 0;
+
+    /* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
+    for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
+    {
+        vx_image out_img, in_img, intermediate_img;
+        uint32_t num_refs;
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 2, (vx_reference*)&out_img, 1, &num_refs));
+
+        /* Get consumed intermediate reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&intermediate_img, 1, &num_refs));
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
+
+        /* when measuring performance dont check output since it affects graph performance numbers
+         */
+
+        if(loop_cnt > 100)
+        {
+            ct_update_progress(loop_id, loop_cnt+num_buf);
+        }
+
+        ASSERT_NO_FAILURE({
+            vxdst = ct_image_from_vx_image(out_img);
+        });
+
+        /* compare output */
+        ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
+
+        buf_id = (buf_id+1)%num_buf;
+
+        /* recycles dequeued input and output refs 'loop_cnt' times */
+        if(loop_id<loop_cnt)
+        {
+            /* input and output can be enqueued in any order */
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&intermediate_img, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&out_img, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1));
+        }
+    }
+
+    /* ensure all graph processing is complete */
+    VX_CALL(vxWaitGraph(graph));
+
+    #endif
+
+    VX_CALL(vxReleaseNode(&n0));
+    VX_CALL(vxReleaseNode(&n1));
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseImage(&d0[buf_id]));
+        VX_CALL(vxReleaseImage(&d1[buf_id]));
+        VX_CALL(vxReleaseImage(&d2[buf_id]));
+    }
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+/*
+ *  d0     n0     d1     n1     d2    n2     d3     n3      d5
+ * IMG -- NOT -- IMG -- NOT -- IMG -- OR  -- IMG -- AND -- IMG
+ *                |                   |              |
+ *                +-------------------+             IMG
+ *                                                   d4
+ *
+ * This test case test the below
+ * - Same input going to multiple nodes
+ * - Outputs from multiple nodes going to a single node
+ * - Node taking input from another node as well as from user
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testFourNodes, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_image d0[MAX_NUM_BUF], d1, d2, d3, d4[MAX_NUM_BUF], d5[MAX_NUM_BUF];
+    vx_node  n0, n1, n2, n3;
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[3];
+
+    CT_Image ref_src[MAX_NUM_BUF], vxdst;
+    uint32_t width, height, seq_init, num_buf;
+    uint32_t buf_id, loop_id, loop_cnt;
+
+    seq_init = 1;
+    width = arg_->width;
+    height = arg_->height;
+    num_buf = arg_->num_buf;
+    loop_cnt = arg_->loop_count;
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+
+    /* fill reference data */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE({
+            ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
+            fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
+        });
+    }
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    /* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(d4[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(d5[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    }
+    ASSERT_VX_OBJECT(d1    = vxCreateVirtualImage(graph, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(d2    = vxCreateVirtualImage(graph, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(d3    = vxCreateVirtualImage(graph, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(n0    = vxNotNode(graph, d0[0], d1), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(n1    = vxNotNode(graph, d1, d2), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(n2    = vxOrNode(graph, d1, d2, d3), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(n3    = vxAndNode(graph, d3, d4[0], d5[0]), VX_TYPE_NODE);
+
+    /* input @ n0 index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n0, 0);
+    /* input @ n3 index 1, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n3, 1);
+    /* output @ n3 index 2, becomes graph parameter 2 */
+    add_graph_parameter_by_node_index(graph, n3, 2);
+
+    /* set graph schedule config such that graph parameter @ index 0, 1, 2 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d4[0];
+
+    graph_parameters_queue_params_list[2].graph_parameter_index = 2;
+    graph_parameters_queue_params_list[2].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[2].refs_list = (vx_reference*)&d5[0];
+
+    /* Schedule mode auto is used, here we dont need to call vxScheduleGraph
+     * Graph gets scheduled automatically as refs are enqueued to it
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+			VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
+			3,
+			graph_parameters_queue_params_list
+			));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
+        ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d4[buf_id], ref_src[buf_id]));
+    }
+
+    /* enqueue input and output references,
+     * input and output can be enqueued in any order
+     * can be enqueued all together, here they are enqueue one by one just as a example
+     */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d4[buf_id], 1));
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&d5[buf_id], 1));
+    }
+
+    buf_id = 0;
+
+    /* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
+    for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
+    {
+        vx_image out_img, in1_img, in2_img;
+        uint32_t num_refs;
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 2, (vx_reference*)&out_img, 1, &num_refs));
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&in2_img, 1, &num_refs));
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in1_img, 1, &num_refs));
+
+        /* when measuring performance dont check output since it affects graph performance numbers
+         */
+
+        if(loop_cnt > 100)
+        {
+            ct_update_progress(loop_id, loop_cnt+num_buf);
+        }
+
+        ASSERT_NO_FAILURE({
+            vxdst = ct_image_from_vx_image(out_img);
+        });
+
+        /* compare output */
+        ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
+
+        buf_id = (buf_id+1)%num_buf;
+
+        /* recycles dequeued input and output refs 'loop_cnt' times */
+        if(loop_id<loop_cnt)
+        {
+            /* input and output can be enqueued in any order */
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in1_img, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&in2_img, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&out_img, 1));
+        }
+    }
+
+    /* ensure all graph processing is complete */
+    VX_CALL(vxWaitGraph(graph));
+
+    #endif
+
+    VX_CALL(vxReleaseNode(&n0));
+    VX_CALL(vxReleaseNode(&n1));
+    VX_CALL(vxReleaseNode(&n2));
+    VX_CALL(vxReleaseNode(&n3));
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseImage(&d0[buf_id]));
+        VX_CALL(vxReleaseImage(&d4[buf_id]));
+        VX_CALL(vxReleaseImage(&d5[buf_id]));
+    }
+    VX_CALL(vxReleaseImage(&d1));
+    VX_CALL(vxReleaseImage(&d2));
+    VX_CALL(vxReleaseImage(&d3));
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+/*
+ *  d0     n0     d1
+ * IMG -- NOT -- IMG -- NOT --, etc. for GRAPH_MAX_DATA_REF_QUEUE length
+ *
+ *
+ * This test case test the below
+ * - Tests the limit of GRAPH_MAX_DATA_REF_QUEUE
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testMaxDataRef, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_image d0[MAX_NUM_BUF], d_virt[GRAPH_MAX_DATA_REF_QUEUE], d1[MAX_NUM_BUF];
+    vx_node  n[GRAPH_MAX_DATA_REF_QUEUE+1];
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
+
+    CT_Image ref_src[MAX_NUM_BUF];
+    uint32_t width, height, seq_init, num_buf;
+    uint32_t buf_id, loop_id, loop_cnt, i;
+
+    seq_init = 1;
+    width = arg_->width;
+    height = arg_->height;
+    num_buf = arg_->num_buf;
+    loop_cnt = arg_->loop_count;
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+
+    /* fill reference data */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE({
+            ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
+            fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
+        });
+    }
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    /* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(d1[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    }
+    for (i = 0; i < GRAPH_MAX_DATA_REF_QUEUE; i++)
+    {
+        ASSERT_VX_OBJECT(d_virt[i]    = vxCreateVirtualImage(graph, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    }
+
+    ASSERT_VX_OBJECT(n[0]    = vxNotNode(graph, d0[0], d_virt[0]), VX_TYPE_NODE);
+
+    for (i = 0; i < GRAPH_MAX_DATA_REF_QUEUE-1; i++)
+    {
+        ASSERT_VX_OBJECT(n[i+1]    = vxNotNode(graph, d_virt[i], d_virt[i+1]), VX_TYPE_NODE);
+    }
+    ASSERT_VX_OBJECT(n[GRAPH_MAX_DATA_REF_QUEUE]    = vxNotNode(graph, d_virt[GRAPH_MAX_DATA_REF_QUEUE-1], d1[0]), VX_TYPE_NODE);
+
+    /* input @ n0 index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n[0], 0);
+    /* input @ n3 index 1, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n[GRAPH_MAX_DATA_REF_QUEUE], 1);
+
+    /* set graph schedule config such that graph parameter @ index 0, 1, 2 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d1[0];
+
+    /* Schedule mode auto is used, here we dont need to call vxScheduleGraph
+     * Graph gets scheduled automatically as refs are enqueued to it
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+			VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
+			2,
+			graph_parameters_queue_params_list
+			));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
+        ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d1[buf_id], ref_src[buf_id]));
+    }
+
+    /* enqueue input and output references,
+     * input and output can be enqueued in any order
+     * can be enqueued all together, here they are enqueue one by one just as a example
+     */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d1[buf_id], 1));
+    }
+
+    buf_id = 0;
+
+    /* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
+    for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
+    {
+        vx_image out_img, in_img;
+        uint32_t num_refs;
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_img, 1, &num_refs));
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
+
+        buf_id = (buf_id+1)%num_buf;
+
+        /* recycles dequeued input and output refs 'loop_cnt' times */
+        if(loop_id<loop_cnt)
+        {
+            /* input and output can be enqueued in any order */
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_img, 1));
+        }
+    }
+
+    /* ensure all graph processing is complete */
+    VX_CALL(vxWaitGraph(graph));
+
+    #endif
+
+    for (i = 0; i < GRAPH_MAX_DATA_REF_QUEUE+1; i++)
+    {
+        VX_CALL(vxReleaseNode(&n[i]));
+    }
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseImage(&d0[buf_id]));
+        VX_CALL(vxReleaseImage(&d1[buf_id]));
+    }
+    for (i = 0; i < GRAPH_MAX_DATA_REF_QUEUE; i++)
+    {
+        VX_CALL(vxReleaseImage(&d_virt[i]));
+    }
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+/*
+ *  d0      n0      d2
+ *  IMG --  AND -- IMG
+ *          |
+ *          d1 (uniform image) filled with 0xFF
+ *
+ * This test case test the below
+ * - Uniform image as input
+ * - No looping
+ * - fixed pipeline depth of 2
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testUniformImage, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_image d0[MAX_NUM_BUF], d1, d2[MAX_NUM_BUF];
+    vx_node n0;
+    vx_pixel_value_t pixel_value;
+
+    CT_Image ref_src[MAX_NUM_BUF], vxdst;
+    uint32_t width, height, seq_init, num_buf;
+    uint32_t buf_id, loop_id, loop_cnt;
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
+
+    seq_init = 1;
+    width = arg_->width;
+    height = arg_->height;
+    loop_cnt = arg_->loop_count;
+    num_buf = 2;
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+
+    /* fill reference data */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE({
+            ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
+            fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
+        });
+    }
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    /* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(d2[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    }
+    /* create other refs, these are not multiple refs and same refs is fed as parameter to the graph */
+    pixel_value.U8 = 0xFF;
+    ASSERT_VX_OBJECT(d1    = vxCreateUniformImage(context, width, height, VX_DF_IMAGE_U8, &pixel_value), VX_TYPE_IMAGE);
+
+
+    /* create node, input (index 0) and output (index 2) will be made as graph parameter
+     * so that we can enqueue and dequeue refs to it and thus do graph pipelining.
+     * d0[0], d2[0] used only for their meta data.
+     * Actual input and output used for graph processing will be the
+     * refs that are enqueued later.
+     *
+     * d1 also made a graph parameter, however it wont made as enqueable.
+     */
+    ASSERT_VX_OBJECT(n0    = vxAndNode(graph, d0[0], d1, d2[0]), VX_TYPE_NODE);
+
+    /* input @ node index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n0, 0);
+    /* output @ node index 2, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n0, 2);
+    /* input @ node index 1, becomes graph parameter 3 */
+    add_graph_parameter_by_node_index(graph, n0, 1);
+
+    /* set graph schedule config such that graph parameter @ index 0 and 1 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
+
+    /* Schedule mode auto is used, here we dont need to call vxScheduleGraph
+     * Graph gets scheduled automatically as refs are enqueued to it
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+			VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
+			2,
+			graph_parameters_queue_params_list
+			));
+
+    /* This graph parameter @ index 2 is not enqueue-able */
+    VX_CALL(vxSetGraphParameterByIndex(graph, 2, (vx_reference)d1));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
+    }
+
+    /* enqueue input and output references,
+     * input and output can be enqueued in any order
+     * can be enqueued all together, here they are enqueue one by one just as a example
+     */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
+    }
+
+    buf_id = 0;
+
+    /* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
+    for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
+    {
+        vx_image out_img, in_img;
+        uint32_t num_refs;
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_img, 1, &num_refs));
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
+
+        /* when measuring performance dont check output since it affects graph performance numbers
+         */
+
+        if(loop_cnt > 100)
+        {
+            ct_update_progress(loop_id, loop_cnt+num_buf);
+        }
+
+        ASSERT_NO_FAILURE({
+            vxdst = ct_image_from_vx_image(out_img);
+        });
+
+        /* compare output */
+        ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
+
+        buf_id = (buf_id+1)%num_buf;
+
+        /* recycles dequeued input and output refs 'loop_cnt' times */
+        if(loop_id<loop_cnt)
+        {
+            /* input and output can be enqueued in any order */
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_img, 1));
+        }
+    }
+
+    /* ensure all graph processing is complete */
+    VX_CALL(vxWaitGraph(graph));
+
+    #endif
+
+    VX_CALL(vxReleaseNode(&n0));
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseImage(&d0[buf_id]));
+        VX_CALL(vxReleaseImage(&d2[buf_id]));
+    }
+    VX_CALL(vxReleaseImage(&d1));
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+static inline uint32_t get_ref_src_index(uint32_t num_buf, uint32_t objarr_idx, uint32_t buf_id)
+{
+    return (objarr_idx*num_buf + buf_id);
+}
+
+static inline vx_object_array get_object_array_parent_of_image(vx_image out_img,
+                vx_object_array d2[], vx_image img[], vx_uint32 num_buf)
+{
+    vx_object_array objarr = NULL;
+    vx_uint32 buf_id;
+
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        if(out_img==img[buf_id])
+        {
+            objarr = d2[buf_id];
+            break;
+        }
+    }
+    return objarr;
+}
+
+
+/*
+ *  d0     n0     d1     n1     d2
+ * OBJ -- NOT -- OBJ -- NOT -- OBJ
+ * ARR           ARR           ARR
+ * replicate     replicate     replicate
+ *
+ * This test case test the below
+ * - Object array with replciate attribute set
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testReplicateImage, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_object_array d0[MAX_NUM_BUF], d1, d2[MAX_NUM_BUF];
+    vx_image img0[MAX_NUM_BUF], img1, img2[MAX_NUM_BUF];
+    vx_image img_exemplar;
+    vx_node n0, n1;
+    vx_bool replicate[2] = { vx_true_e, vx_true_e };
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
+
+    CT_Image ref_src[MAX_NUM_BUF*MAX_NUM_OBJ_ARR_ELEMENTS], vxdst[MAX_NUM_OBJ_ARR_ELEMENTS];
+    uint32_t width, height, seq_init, num_buf;
+    uint32_t buf_id, loop_id, loop_cnt;
+    uint32_t idx, objarr_idx, objarr_elements;
+
+    seq_init = 1;
+    width = arg_->width;
+    height = arg_->height;
+    num_buf = arg_->num_buf;
+    loop_cnt = arg_->loop_count;
+    objarr_elements = 2;
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+    ASSERT(objarr_elements <= MAX_NUM_OBJ_ARR_ELEMENTS);
+
+    /* fill reference data */
+    for(objarr_idx=0;objarr_idx<objarr_elements;objarr_idx++)
+    {
+        for(buf_id=0; buf_id<num_buf; buf_id++)
+        {
+            idx = get_ref_src_index(num_buf, objarr_idx, buf_id);
+
+            ASSERT_NO_FAILURE({
+                ref_src[idx] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
+                fillSequence(ref_src[idx], (uint32_t)(seq_init+(idx)));
+            });
+        }
+    }
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    ASSERT_VX_OBJECT(img_exemplar    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+
+    /* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id]    = vxCreateObjectArray(context, (vx_reference)img_exemplar, objarr_elements), VX_TYPE_OBJECT_ARRAY);
+        ASSERT_VX_OBJECT(d2[buf_id]    = vxCreateObjectArray(context, (vx_reference)img_exemplar, objarr_elements), VX_TYPE_OBJECT_ARRAY);
+    }
+    ASSERT_VX_OBJECT(d1    = vxCreateObjectArray(context, (vx_reference)img_exemplar, objarr_elements), VX_TYPE_OBJECT_ARRAY);
+
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        img0[buf_id] = (vx_image)vxGetObjectArrayItem(d0[buf_id], 0);
+        img2[buf_id] = (vx_image)vxGetObjectArrayItem(d2[buf_id], 0);
+    }
+    img1 = (vx_image)vxGetObjectArrayItem(d1, 0);
+
+    ASSERT_VX_OBJECT(n0    = vxNotNode(graph, img0[0], img1), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(n1    = vxNotNode(graph, img1, img2[0]), VX_TYPE_NODE);
+
+    VX_CALL(vxReplicateNode(graph, n0, replicate, 2));
+    VX_CALL(vxReplicateNode(graph, n1, replicate, 2));
+
+    /* input @ node0 index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n0, 0);
+    /* output @ node1 index 1, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n1, 1);
+
+    /* set graph schedule config such that graph parameter @ index 0 and 1 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&img0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&img2[0];
+
+    /* Schedule mode auto is used, here we dont need to call vxScheduleGraph
+     * Graph gets scheduled automatically as refs are enqueued to it
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+			VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
+			2,
+			graph_parameters_queue_params_list
+			));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        for(objarr_idx=0;objarr_idx<objarr_elements;objarr_idx++)
+        {
+            vx_image image;
+
+            idx = get_ref_src_index(num_buf, objarr_idx, buf_id);
+
+            image = (vx_image)vxGetObjectArrayItem(d0[buf_id], objarr_idx);
+
+            ASSERT_NO_FAILURE(ct_image_copyto_vx_image(image, ref_src[idx]));
+
+            VX_CALL(vxReleaseImage(&image));
+        }
+    }
+
+    /* enqueue input and output references,
+     * input and output can be enqueued in any order
+     * can be enqueued all together, here they are enqueue one by one just as a example
+     */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        vx_image image;
+
+        image = (vx_image)vxGetObjectArrayItem(d0[buf_id], 0);
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&image, 1));
+        vxReleaseImage(&image);
+
+        image = (vx_image)vxGetObjectArrayItem(d2[buf_id], 0);
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&image, 1));
+        vxReleaseImage(&image);
+    }
+
+    buf_id = 0;
+
+    /* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
+    for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
+    {
+        vx_image out_img, in_img;
+        vx_object_array out_objarr;
+        uint32_t num_refs;
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_img, 1, &num_refs));
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
+
+        if(loop_cnt > 100)
+        {
+            ct_update_progress(loop_id, loop_cnt+num_buf);
+        }
+
+        out_objarr = get_object_array_parent_of_image(out_img, d2, img2, num_buf);
+
+        for(objarr_idx=0;objarr_idx<objarr_elements;objarr_idx++)
+        {
+            vx_image image;
+
+            image = (vx_image)vxGetObjectArrayItem(out_objarr, objarr_idx);
+
+            ASSERT_NO_FAILURE({
+                vxdst[objarr_idx] = ct_image_from_vx_image(image);
+            });
+
+            VX_CALL(vxReleaseImage(&image));
+        }
+
+        for(objarr_idx=0;objarr_idx<objarr_elements;objarr_idx++)
+        {
+            idx = get_ref_src_index(num_buf, objarr_idx, buf_id);
+
+            /* compare output */
+            /* NOT of NOT should give back original image */
+            ASSERT_EQ_CTIMAGE(ref_src[idx], vxdst[objarr_idx]);
+        }
+
+        buf_id = (buf_id+1)%num_buf;
+
+        /* recycles dequeued input and output refs 'loop_cnt' times */
+        if(loop_id<loop_cnt)
+        {
+            /* input and output can be enqueued in any order */
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_img, 1));
+        }
+    }
+
+    /* ensure all graph processing is complete */
+    VX_CALL(vxWaitGraph(graph));
+
+    #endif
+    VX_CALL(vxReleaseNode(&n0));
+    VX_CALL(vxReleaseNode(&n1));
+    VX_CALL(vxReleaseImage(&img_exemplar));
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseImage(&img0[buf_id]));
+        VX_CALL(vxReleaseImage(&img2[buf_id]));
+    }
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseObjectArray(&d0[buf_id]));
+        VX_CALL(vxReleaseObjectArray(&d2[buf_id]));
+    }
+    VX_CALL(vxReleaseImage(&img1));
+    VX_CALL(vxReleaseObjectArray(&d1));
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+/*
+ *
+ * IMAGE -> MeanStdDev -> MEAN (SCALAR)
+ *              |
+ *              +-------> STD_DEV (SCALAR)
+ *
+ * This test case test the below
+ * - Scalar with pipeline with scalar at output
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testScalarOutput, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_image d0[MAX_NUM_BUF];
+    vx_scalar mean_s[MAX_NUM_BUF], stddev_s[MAX_NUM_BUF];
+    vx_node n0;
+    vx_float32 mean_out=0.0, stddev_out=0.0;
+    vx_float32 mean_tolerance=1e-4, stddev_tolerance=1e-4;
+    vx_float32 mean_diff, stddev_diff;
+    CT_Image ref_src[MAX_NUM_BUF];
+    vx_float32 mean_ref[MAX_NUM_BUF];
+    vx_float32 stddev_ref[MAX_NUM_BUF];
+    uint32_t width, height, num_buf;
+    uint32_t buf_id, loop_id, loop_cnt;
+    uint64_t rng;
+    int a = 0, b = 256;
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[3];
+
+    width = arg_->width;
+    height = arg_->height;
+    num_buf = arg_->num_buf;
+    loop_cnt = arg_->loop_count;
+
+    rng = CT()->seed_;
+    mean_tolerance *= b;
+    stddev_tolerance *= b;
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+
+    /* fill reference data */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE({
+            ref_src[buf_id] = ct_allocate_ct_image_random(width, height, VX_DF_IMAGE_U8, &rng, a, b);
+        });
+        reference_mean_stddev(ref_src[buf_id], &mean_ref[buf_id], &stddev_ref[buf_id]);
+        //printf("Ref %d: mean=%5.3f, stddev=%5.3f\n", buf_id, (float)mean_ref[buf_id], (float)stddev_ref[buf_id]);
+    }
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id]       = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(mean_s[buf_id]   = vxCreateScalar(context, VX_TYPE_FLOAT32, &mean_out), VX_TYPE_SCALAR);
+        ASSERT_VX_OBJECT(stddev_s[buf_id] = vxCreateScalar(context, VX_TYPE_FLOAT32, &stddev_out), VX_TYPE_SCALAR);
+    }
+    ASSERT_VX_OBJECT(n0    = vxMeanStdDevNode(graph, d0[0], mean_s[0], stddev_s[0]), VX_TYPE_NODE);
+
+    /* input @ node0 index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n0, 0);
+    /* output @ node0 index 1, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n0, 1);
+    /* output @ node0 index 2, becomes graph parameter 2 */
+    add_graph_parameter_by_node_index(graph, n0, 2);
+
+    /* set graph schedule config such that graph parameter @ index 0 and 1 and 2 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&mean_s[0];
+
+    graph_parameters_queue_params_list[2].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[2].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[2].refs_list = (vx_reference*)&stddev_s[0];
+
+    /* Schedule mode auto is used, here we dont need to call vxScheduleGraph
+     * Graph gets scheduled automatically as refs are enqueued to it
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+            VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
+            3,
+            graph_parameters_queue_params_list
+            ));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
+    }
+
+    /* enqueue input and output references,
+     * input and output can be enqueued in any order
+     * can be enqueued all together, here they are enqueue one by one just as a example
+     */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&mean_s[buf_id], 1));
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&stddev_s[buf_id], 1));
+    }
+
+    buf_id = 0;
+
+    /* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
+    for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
+    {
+        vx_image in_img;
+        vx_scalar out_mean, out_stddev;
+        uint32_t num_refs;
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 2, (vx_reference*)&out_stddev, 1, &num_refs));
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_mean, 1, &num_refs));
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
+
+        /* when measuring performance dont check output since it affects graph performance numbers
+         */
+
+        if(loop_cnt > 100)
+        {
+            ct_update_progress(loop_id, loop_cnt+num_buf);
+        }
+
+        VX_CALL(vxCopyScalar(out_mean, &mean_out, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+        VX_CALL(vxCopyScalar(out_stddev, &stddev_out, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+        /* compare output with reference */
+        mean_diff = fabs(mean_ref[buf_id] - mean_out);
+        stddev_diff = fabs(stddev_ref[buf_id] - stddev_out);
+
+        //printf("Out %d: mean=%5.3f, stddev=%5.3f\n", loop_id, (float)mean_out, (float)stddev_out);
+
+        #if 1
+        if( mean_diff > mean_tolerance ||
+            stddev_diff > stddev_tolerance )
+        {
+            CT_RecordFailureAtFormat("Test case %d. width=%d, height=%d,\n"
+                                     "\tExpected: mean=%.5g, stddev=%.5g\n"
+                                     "\tActual:   mean=%.5g (diff=%.5g %s %.5g), stddev=%.5f (diff=%.5g %s %.5g)\n",
+                                     __FUNCTION__, __FILE__, __LINE__,
+                                     loop_id, width, height,
+                                     mean_ref[buf_id], stddev_ref[buf_id],
+                                     mean_out, mean_diff, mean_diff > mean_tolerance ? ">" : "<=", mean_tolerance,
+                                     stddev_out, stddev_diff, stddev_diff > stddev_tolerance ? ">" : "<=", stddev_tolerance);
+        }
+        #endif
+
+        buf_id = (buf_id+1)%num_buf;
+
+        /* recycles dequeued input and output refs 'loop_cnt' times */
+        if(loop_id<loop_cnt)
+        {
+            /* input and output can be enqueued in any order */
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_mean, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&out_stddev, 1));
+        }
+    }
+
+    /* ensure all graph processing is complete */
+    VX_CALL(vxWaitGraph(graph));
+
+    #endif
+    VX_CALL(vxReleaseNode(&n0));
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseImage(&d0[buf_id]));
+        VX_CALL(vxReleaseScalar(&mean_s[buf_id]));
+        VX_CALL(vxReleaseScalar(&stddev_s[buf_id]));
+    }
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+/*
+ *  d0     n0     d1     n1     d2
+ * IMG -- NOT -- IMG -- NOT -- IMG
+ *
+ * This test case test the below
+ * - Single input, single output nodes
+ * - Two nodes on two different targets
+ * - Events are used to enqueue/dequeue buffers
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testEventHandling, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_image d0[MAX_NUM_BUF], d1, d2[MAX_NUM_BUF];
+    vx_node n0, n1;
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
+
+    CT_Image ref_src[MAX_NUM_BUF], vxdst;
+    uint32_t width, height, seq_init, num_buf, loop_cnt;
+    uint32_t buf_id, loop_id, in_q_cnt;
+
+    vx_bool done;
+    vx_event_t event;
+
+    seq_init = 1;
+    width = arg_->width;
+    height = arg_->height;
+    num_buf = arg_->num_buf;
+    loop_cnt = arg_->loop_count;
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+
+    /* fill reference data */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE({
+            ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
+            fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
+        });
+    }
+
+    VX_CALL(vxEnableEvents(context));
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(d2[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    }
+    ASSERT_VX_OBJECT(d1    = vxCreateVirtualImage(graph, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(n0    = vxNotNode(graph, d0[0], d1), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(n1    = vxNotNode(graph, d1, d2[0]), VX_TYPE_NODE);
+
+    /* input @ n0 index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n0, 0);
+    /* output @ n1 index 1, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n1, 1);
+
+    /* set graph schedule config such that graph parameter @ index 0 and 1 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
+
+    /* Schedule mode auto is used, here we dont need to call vxScheduleGraph
+     * Graph gets scheduled automatically as refs are enqueued to it
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+            VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
+            2,
+            graph_parameters_queue_params_list
+            ));
+
+    VX_CALL(vxRegisterEvent((vx_reference)graph, VX_EVENT_GRAPH_PARAMETER_CONSUMED, 0, GRAPH_CONSUMED_EVENT));
+    VX_CALL(vxRegisterEvent((vx_reference)n0, VX_EVENT_NODE_COMPLETED, 0, NODE0_COMPLETED_EVENT));
+    VX_CALL(vxRegisterEvent((vx_reference)n1, VX_EVENT_NODE_COMPLETED, 0, NODE1_COMPLETED_EVENT));
+    VX_CALL(vxRegisterEvent((vx_reference)graph, VX_EVENT_GRAPH_COMPLETED, 0, GRAPH_COMPLETED_EVENT));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+    /* clear pending events */
+    while( vxWaitEvent(context, &event, vx_true_e) == VX_SUCCESS);
+
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
+    }
+
+    /* enqueue input and output references,
+     * input and output can be enqueued in any order
+     * can be enqueued all together, here they are enqueue one by one just as a example
+     */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
+    }
+
+    buf_id = 0;
+
+    done = vx_false_e;
+    loop_id = 0;
+    in_q_cnt = 0;
+    while(!done)
+    {
+        VX_CALL(vxWaitEvent(context, &event, vx_false_e));
+
+        if(event.app_value==GRAPH_CONSUMED_EVENT)
+        {
+            vx_image in_img;
+            uint32_t num_refs;
+
+            /* input should be free at this point */
+            /* recycle input buffer, input data is not changed in this test */
+            VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
+            if(in_q_cnt<loop_cnt)
+            {
+                VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1));
+            }
+            in_q_cnt++;
+        }
+        else
+        if(event.app_value==NODE1_COMPLETED_EVENT)
+        {
+            vx_image out_img;
+            uint32_t num_refs;
+
+            /* Get output reference, waits until a reference is available */
+            VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_img, 1, &num_refs));
+
+            /* when measuring performance dont check output since it affects graph performance numbers
+             */
+
+            if(loop_cnt > 100)
+            {
+                ct_update_progress(loop_id, loop_cnt+num_buf);
+            }
+
+            ASSERT_NO_FAILURE({
+                vxdst = ct_image_from_vx_image(out_img);
+            });
+
+            /* compare output */
+            ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
+
+            buf_id = (buf_id+1)%num_buf;
+
+            /* recycles dequeued input and output refs 'loop_cnt' times */
+            if(loop_id<loop_cnt)
+            {
+                /* input and output can be enqueued in any order */
+                VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_img, 1));
+            }
+            else
+            {
+                /* send user event to exit, id is 0xDEADBEAF, and parameter is some known pattern 0x12345678 */
+                VX_CALL(vxSendUserEvent(context, 0xDEADBEAFu, (void*)0x12345678u));
+            }
+
+            loop_id++;
+        }
+        else
+        if((event.type==VX_EVENT_USER)
+            && (event.app_value == 0xDEADBEAFu)
+            && (event.event_info.user_event.user_event_parameter == (void*)0x12345678u)
+            )
+        {
+            done = vx_true_e;
+        }
+    }
+
+    VX_CALL(vxWaitGraph(graph));
+
+    /* handle last few buffers */
+    done = vx_false_e;
+    while(!done)
+    {
+        vx_image in_img, out_img;
+        vx_uint32 in_num_refs, out_num_refs;
+
+        /* recycle and access output data */
+        VX_CALL(vxGraphParameterCheckDoneRef(graph, 1, &out_num_refs));
+        if(out_num_refs>0)
+        {
+            VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_img, 1, &out_num_refs));
+
+
+            ASSERT_NO_FAILURE({
+                vxdst = ct_image_from_vx_image(out_img);
+            });
+            /* compare output */
+            /* NOT of NOT should give back original image */
+            ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
+        }
+        /* recycle and access input data */
+        VX_CALL(vxGraphParameterCheckDoneRef(graph, 0, &in_num_refs));
+        if(in_num_refs>0)
+        {
+            VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &in_num_refs));
+        }
+        buf_id = (buf_id+1)%num_buf;
+
+        if(in_num_refs == 0 && out_num_refs == 0)
+        {
+            done = vx_true_e;
+        }
+    }
+
+    /* clear pending events */
+    while( vxWaitEvent(context, &event, vx_true_e) == VX_SUCCESS);
+
+    #endif
+    VX_CALL(vxReleaseNode(&n0));
+    VX_CALL(vxReleaseNode(&n1));
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseImage(&d0[buf_id]));
+        VX_CALL(vxReleaseImage(&d2[buf_id]));
+    }
+    VX_CALL(vxReleaseImage(&d1));
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+/*
+ *
+ * This test case test the below
+ * - Disable of events and reenable of events
+ *
+ */
+TEST(GraphPipeline, testEventHandlingDisableEvents)
+{
+    vx_context context = context_->vx_context_;
+    vx_event_t event;
+
+    VX_CALL(vxEnableEvents(context));
+
+    /* send one user event, this should be received */
+    VX_CALL(vxSendUserEvent(context, 0x1u, NULL));
+
+    /* disable events and send another event */
+    VX_CALL(vxDisableEvents(context));
+    /* this event should get dropped and send event API should return failure */
+    ASSERT(vxSendUserEvent(context, 0x2u, NULL)!=VX_SUCCESS);
+
+    /* re-enable events and send another event */
+    VX_CALL(vxEnableEvents(context));
+    /* this event should get received */
+    VX_CALL(vxSendUserEvent(context, 0x3u, NULL));
+
+    /* wait for one event, this should be the first one */
+    VX_CALL(vxWaitEvent(context, &event, vx_true_e));
+    ASSERT(event.type==VX_EVENT_USER && event.app_value==0x1u);
+
+    /* wait for one more event, this should be the third one */
+    VX_CALL(vxWaitEvent(context, &event, vx_true_e));
+    ASSERT(event.type==VX_EVENT_USER && event.app_value==0x3u);
+
+    /* wait for one more event, there should be no more events */
+    ASSERT(vxWaitEvent(context, &event, vx_true_e) != VX_SUCCESS);
+}
+
+/*
+ *  d0           n0           d1            n1         d2
+ * SCALAR -- USER_KERNEL -- SCALAR -- USER_KERNEL -- SCALAR
+ *                            |            |
+ *                            + -----------+
+ *
+ * This test case test the below
+ * - User kernel nodes
+ * - Nodes with optional parameters
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testUserKernel, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_scalar d0[MAX_NUM_BUF], d1, d2[MAX_NUM_BUF];
+    vx_scalar in_scalar, out_scalar;
+    vx_node n0, n1;
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
+    vx_uint32 in_value[MAX_NUM_BUF], ref_out_value[MAX_NUM_BUF];
+    vx_uint32 tmp_value = 0;
+
+    uint32_t num_buf;
+    uint32_t buf_id, loop_id, loop_cnt;
+
+    test_user_kernel_register(context);
+
+    num_buf = arg_->num_buf;
+    loop_cnt = arg_->loop_count;
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+
+    /* fill reference data */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        in_value[buf_id] = 10*(buf_id+1);
+        ref_out_value[buf_id] = 2 * in_value[buf_id];
+    }
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    /* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id]    = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
+        ASSERT_VX_OBJECT(d2[buf_id]    = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
+    }
+    ASSERT_VX_OBJECT(d1    = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
+
+    ASSERT_VX_OBJECT(n0    = test_user_kernel_node(graph, d0[0], NULL, d1, NULL), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(n1    = test_user_kernel_node(graph, d1, d1, d2[0], NULL), VX_TYPE_NODE);
+
+    /* input @ n0 index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n0, 0);
+    /* output @ n1 index 2, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n1, 2);
+
+    /* set graph schedule config such that graph parameter @ index 0, 1 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
+
+    /* Schedule mode auto is used, here we dont need to call vxScheduleGraph
+     * Graph gets scheduled automatically as refs are enqueued to it
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+            VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
+            2,
+            graph_parameters_queue_params_list
+            ));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE(
+            vxCopyScalar(d0[buf_id],
+            &in_value[buf_id],
+            VX_WRITE_ONLY,
+            VX_MEMORY_TYPE_HOST));
+    }
+
+    /* enqueue input and output references,
+     * input and output can be enqueued in any order
+     * can be enqueued all together, here they are enqueue one by one just as a example
+     */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
+    }
+
+    buf_id = 0;
+
+    /* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
+    for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
+    {
+        uint32_t num_refs;
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_scalar, 1, &num_refs));
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_scalar, 1, &num_refs));
+
+        if(loop_cnt > 100)
+        {
+            ct_update_progress(loop_id, loop_cnt+num_buf);
+        }
+
+        VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+        /* compare output */
+        ASSERT_EQ_INT(tmp_value, ref_out_value[buf_id]);
+
+        /* clear value in output */
+        tmp_value = 0;
+        VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+
+        buf_id = (buf_id+1)%num_buf;
+
+        /* recycles dequeued input and output refs 'loop_cnt' times */
+        if(loop_id<loop_cnt)
+        {
+            /* input and output can be enqueued in any order */
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_scalar, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_scalar, 1));
+        }
+    }
+
+    /* ensure all graph processing is complete */
+    VX_CALL(vxWaitGraph(graph));
+
+    #endif
+
+    VX_CALL(vxReleaseNode(&n0));
+    VX_CALL(vxReleaseNode(&n1));
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseScalar(&d0[buf_id]));
+        VX_CALL(vxReleaseScalar(&d2[buf_id]));
+    }
+    VX_CALL(vxReleaseScalar(&d1));
+    VX_CALL(vxReleaseGraph(&graph));
+
+    test_user_kernel_unregister(context);
+}
+
+/*
+ *  d0     n0     d1     n1     d2
+ * IMG -- NOT -- IMG -- NOT -- IMG
+ *
+ * This test case test the below
+ * - Single input, single output nodes
+ * - Two nodes on two different targets
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testManualSchedule, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_image d0[MAX_NUM_BUF], d1, d2[MAX_NUM_BUF];
+    vx_node n0, n1;
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[3];
+
+    CT_Image ref_src[MAX_NUM_BUF], vxdst;
+    uint32_t width, height, seq_init, num_buf;
+    uint32_t buf_id, loop_id, loop_cnt;
+
+    seq_init = 1;
+    width = arg_->width;
+    height = arg_->height;
+    num_buf = arg_->num_buf;
+    loop_cnt = arg_->loop_count;
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+
+    /* fill reference data */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE({
+            ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
+            fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
+        });
+    }
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    /* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(d2[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    }
+    ASSERT_VX_OBJECT(d1    = vxCreateVirtualImage(graph, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(n0    = vxNotNode(graph, d0[0], d1), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(n1    = vxNotNode(graph, d1, d2[0]), VX_TYPE_NODE);
+
+    /* input @ n0 index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n0, 0);
+    /* output @ n1 index 1, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n1, 1);
+
+    /* set graph schedule config such that graph parameter @ index 0, 1 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
+
+    /* Schedule mode manual is used, here we need to call vxScheduleGraph
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+            VX_GRAPH_SCHEDULE_MODE_QUEUE_MANUAL,
+            2,
+            graph_parameters_queue_params_list
+            ));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
+    }
+
+    loop_cnt = (loop_cnt + num_buf) / num_buf;
+
+    /* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
+    for(loop_id=0; loop_id<(loop_cnt); loop_id++)
+    {
+        vx_image out_img[MAX_NUM_BUF], in_img[MAX_NUM_BUF];
+        uint32_t num_refs_in, num_refs_out;
+
+        /* enqueue input and output references,
+         * input and output can be enqueued in any order
+         * can be enqueued all together, here they are enqueue one by one just as a example
+         */
+        for(buf_id=0; buf_id<num_buf; buf_id++)
+        {
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
+        }
+
+        VX_CALL(vxScheduleGraph(graph));
+        VX_CALL(vxWaitGraph(graph));
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)out_img, num_buf, &num_refs_in));
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)in_img, num_buf, &num_refs_out));
+
+        ASSERT_EQ_INT(num_refs_in, num_buf);
+        ASSERT_EQ_INT(num_refs_out, num_buf);
+
+        if(loop_cnt > 100)
+        {
+            ct_update_progress(loop_id, loop_cnt);
+        }
+
+        for(buf_id=0; buf_id<num_buf; buf_id++)
+        {
+            ASSERT_NO_FAILURE({
+                vxdst = ct_image_from_vx_image(out_img[buf_id]);
+            });
+
+            /* compare output */
+            ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
+        }
+    }
+
+    #endif
+
+    VX_CALL(vxReleaseNode(&n0));
+    VX_CALL(vxReleaseNode(&n1));
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseImage(&d0[buf_id]));
+        VX_CALL(vxReleaseImage(&d2[buf_id]));
+    }
+    VX_CALL(vxReleaseImage(&d1));
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+/*
+ *  d0                        n0           d2
+ * SCALAR -- delay (0) --  USER_KERNEL -- SCALAR
+ *               |            |
+ *           delay (-1) ------+
+ *
+ *
+ * This test case test the below
+ * - Delay objects
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testDelay1, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_scalar d0[MAX_NUM_BUF], d2[MAX_NUM_BUF], exemplar;
+    vx_delay delay;
+    vx_scalar in_scalar, out_scalar;
+    vx_node n0;
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
+    vx_uint32 in_value[MAX_NUM_BUF], ref_out_value[MAX_NUM_BUF];
+    vx_uint32 tmp_value = 0;
+
+    uint32_t num_buf;
+    uint32_t buf_id, loop_id, loop_cnt;
+
+    test_user_kernel_register(context);
+
+    num_buf = arg_->num_buf;
+    loop_cnt = arg_->loop_count;
+
+    /* since delay is of 2 slots, num_buf MUST be >= 2 at input atleast */
+    if(num_buf < 2)
+    {
+        num_buf = 2;
+    }
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+
+    /* fill reference data */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        in_value[buf_id] = 10*(buf_id+1);
+    }
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ref_out_value[buf_id] = in_value[buf_id]
+                          + in_value[ (num_buf + buf_id-1)%num_buf ];
+    }
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    /* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d2[buf_id]    = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
+    }
+
+    /* allocate output, delay slot 0 is d0[0], delay slot -1 is d0[num_buf-1]
+     * allocate other objects in between
+     */
+    ASSERT_VX_OBJECT(exemplar  = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
+    ASSERT_VX_OBJECT(delay     = vxCreateDelay(context, (vx_reference)exemplar, 2), VX_TYPE_DELAY);
+    d0[0] = (vx_scalar)vxGetReferenceFromDelay(delay, 0);
+    for(buf_id=1; buf_id<num_buf-1; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id]    = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
+    }
+    d0[num_buf-1] = (vx_scalar)vxGetReferenceFromDelay(delay, -1);
+    vxReleaseScalar(&exemplar);
+
+    ASSERT_VX_OBJECT(n0    = test_user_kernel_node(graph, d0[0], d0[num_buf-1], d2[0], NULL), VX_TYPE_NODE);
+
+    /* input @ n0 index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n0, 0);
+    /* output @ n0 index 2, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n0, 2);
+
+    /* set graph schedule config such that graph parameter @ index 0, 1 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
+
+    /* Schedule mode auto is used, here we dont need to call vxScheduleGraph
+     * Graph gets scheduled automatically as refs are enqueued to it
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+            VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
+            2,
+            graph_parameters_queue_params_list
+            ));
+
+    /* always auto age delay in pipelined graph */
+    VX_CALL(vxRegisterAutoAging(graph, delay));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE(
+            vxCopyScalar(d0[buf_id],
+            &in_value[buf_id],
+            VX_WRITE_ONLY,
+            VX_MEMORY_TYPE_HOST));
+    }
+
+    /* enqueue input and output references,
+     * input and output can be enqueued in any order
+     * can be enqueued all together, here they are enqueue one by one just as a example
+     */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
+    }
+    /* last buf is already set at the delay slot -1 so dont enqueue that ref */
+    for(buf_id=0; buf_id<num_buf-1; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
+    }
+
+    buf_id = 0;
+
+    /* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
+    for(loop_id=0; loop_id<(loop_cnt+num_buf-1); loop_id++)
+    {
+        uint32_t num_refs;
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_scalar, 1, &num_refs));
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_scalar, 1, &num_refs));
+
+        VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+        /* compare output */
+        //printf(" %d: out = %d ref = %d\n", loop_id, tmp_value, ref_out_value[buf_id]);
+        ASSERT_EQ_INT(tmp_value, ref_out_value[buf_id]);
+
+        /* clear value in output */
+        tmp_value = 0;
+        VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+
+        if(loop_cnt > 100)
+        {
+            ct_update_progress(loop_id, loop_cnt+num_buf);
+        }
+
+        buf_id = (buf_id+1)%num_buf;
+
+        /* recycles dequeued input and output refs 'loop_cnt' times */
+        if(loop_id<loop_cnt)
+        {
+            /* input and output can be enqueued in any order */
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_scalar, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_scalar, 1));
+        }
+    }
+
+    /* ensure all graph processing is complete */
+    vxWaitGraph(graph);
+
+    #endif
+
+    VX_CALL(vxReleaseNode(&n0));
+    for(buf_id=1; buf_id<num_buf-1; buf_id++)
+    {
+        VX_CALL(vxReleaseScalar(&d0[buf_id]));
+    }
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseScalar(&d2[buf_id]));
+    }
+    VX_CALL(vxReleaseDelay(&delay));
+    VX_CALL(vxReleaseGraph(&graph));
+
+    test_user_kernel_unregister(context);
+}
+
+/*
+ *  d0                        n0           d2
+ * SCALAR -- delay (0) --  USER_KERNEL -- SCALAR
+ *               |            |
+ *           delay (-1) ------+--- USER_KERNEL -- null
+ *               |                     n1
+ *               |                     |
+ *           delay (-2) ---------------+
+ *
+ * This test case test the below
+ * - Delay objects with 3 delay slots
+ * - Delay slot connected to two inputs
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testDelay2, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_scalar d0[MAX_NUM_BUF], d2[MAX_NUM_BUF], exemplar;
+    vx_delay delay;
+    vx_scalar in_scalar, out_scalar;
+    vx_node n0, n1;
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
+    vx_uint32 in_value[MAX_NUM_BUF], ref_out_value[MAX_NUM_BUF];
+    vx_uint32 tmp_value = 0;
+
+    uint32_t num_buf;
+    uint32_t buf_id, loop_id, loop_cnt, k;
+
+    test_user_kernel_register(context);
+
+    num_buf = arg_->num_buf;
+    loop_cnt = arg_->loop_count;
+
+    /* since delay is of 3 slots, num_buf MUST be >= 3 at input atleast */
+    if(num_buf < 3)
+    {
+        num_buf = 3;
+    }
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+
+    /* fill reference data */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        in_value[buf_id] = 10*(buf_id+1);
+    }
+    k=0;
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ref_out_value[buf_id] = in_value[k]
+                          + in_value[ (num_buf + k - 2)%num_buf ];
+        k = (k+2)%num_buf;
+    }
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    /* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d2[buf_id]    = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
+    }
+
+    /* allocate output, delay slot 0 is d0[0], delay slot -1 is d0[num_buf-1]
+     * allocate other objects in between
+     */
+    ASSERT_VX_OBJECT(exemplar  = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
+    ASSERT_VX_OBJECT(delay     = vxCreateDelay(context, (vx_reference)exemplar, 3), VX_TYPE_DELAY);
+    d0[0] = (vx_scalar)vxGetReferenceFromDelay(delay, 0);
+    for(buf_id=1; buf_id<num_buf-2; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id]    = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
+    }
+    d0[num_buf-2] = (vx_scalar)vxGetReferenceFromDelay(delay, -1);
+    d0[num_buf-1] = (vx_scalar)vxGetReferenceFromDelay(delay, -2);
+    VX_CALL(vxReleaseScalar(&exemplar));
+
+    ASSERT_VX_OBJECT(n0    = test_user_kernel_node(graph, d0[0], d0[num_buf-2], d2[0], NULL), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(n1    = test_user_kernel_node(graph, d0[num_buf-1], d0[num_buf-2], NULL, NULL), VX_TYPE_NODE);
+
+    /* input @ n0 index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n0, 0);
+    /* output @ n0 index 2, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n0, 2);
+
+    /* set graph schedule config such that graph parameter @ index 0, 1 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
+
+    /* Schedule mode auto is used, here we dont need to call vxScheduleGraph
+     * Graph gets scheduled automatically as refs are enqueued to it
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+            VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
+            2,
+            graph_parameters_queue_params_list
+            ));
+
+    /* always auto age delay in pipelined graph */
+    VX_CALL(vxRegisterAutoAging(graph, delay));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE(
+            vxCopyScalar(d0[buf_id],
+            &in_value[buf_id],
+            VX_WRITE_ONLY,
+            VX_MEMORY_TYPE_HOST));
+    }
+
+    /* enqueue input and output references,
+     * input and output can be enqueued in any order
+     * can be enqueued all together, here they are enqueue one by one just as a example
+     */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
+    }
+    /* last buf is already set at the delay slot -1 so dont enqueue that ref */
+    for(buf_id=0; buf_id<num_buf-2; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
+    }
+
+    buf_id = 0;
+
+    /* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
+    for(loop_id=0; loop_id<(loop_cnt+num_buf-2); loop_id++)
+    {
+        uint32_t num_refs;
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_scalar, 1, &num_refs));
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_scalar, 1, &num_refs));
+
+        VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+        /* compare output */
+        //printf(" %d: out = %d ref = %d\n", loop_id, tmp_value, ref_out_value[buf_id]);
+        ASSERT_EQ_INT(tmp_value, ref_out_value[buf_id]);
+
+        /* clear value in output */
+        tmp_value = 0;
+        VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+
+        if(loop_cnt > 100)
+        {
+            ct_update_progress(loop_id, loop_cnt+num_buf);
+        }
+
+        buf_id = (buf_id+1)%num_buf;
+
+        /* recycles dequeued input and output refs 'loop_cnt' times */
+        if(loop_id<loop_cnt)
+        {
+            /* input and output can be enqueued in any order */
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_scalar, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_scalar, 1));
+        }
+    }
+
+    /* ensure all graph processing is complete */
+    VX_CALL(vxWaitGraph(graph));
+
+    #endif
+
+    VX_CALL(vxReleaseNode(&n0));
+    VX_CALL(vxReleaseNode(&n1));
+    for(buf_id=1; buf_id<num_buf-2; buf_id++)
+    {
+        VX_CALL(vxReleaseScalar(&d0[buf_id]));
+    }
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseScalar(&d2[buf_id]));
+    }
+    VX_CALL(vxReleaseDelay(&delay));
+    VX_CALL(vxReleaseGraph(&graph));
+
+    test_user_kernel_unregister(context);
+}
+
+/*
+ *  d0           n0                           n1            d2
+ * SCALAR -- USER_KERNEL -- delay (-1) --  USER_KERNEL -- SCALAR
+ *                             |            |
+ *                         delay (-2) ------+--- USER_KERNEL -- null
+ *                             |                     n2
+ *                             |                     |
+ *                         delay (0)  ---------------+
+ *
+ * This test case test the below
+ * - Delay objects with 3 delay slots
+ * - Delay slot connected to two inputs
+ * - Delay intermediate to a graph, no graph parameter at any delay slot
+ * - node output to delay slot -1 (instead of typical slot 0)
+ * - multiple buffers at output of n0 i.e delay slot -1
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testDelay3, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_scalar d0[MAX_NUM_BUF], d2[MAX_NUM_BUF], exemplar;
+    vx_delay delay;
+    vx_scalar in_scalar, out_scalar;
+    vx_node n0, n1, n2;
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
+    vx_uint32 in_value[MAX_NUM_BUF], ref_out_value[MAX_NUM_BUF];
+    vx_uint32 tmp_value = 0;
+
+    uint32_t num_buf;
+    uint32_t buf_id, loop_id, loop_cnt;
+
+    test_user_kernel_register(context);
+
+    num_buf = arg_->num_buf;
+    loop_cnt = arg_->loop_count;
+
+    /* since delay is of 3 slots, num_buf MUST be >= 3 at input atleast */
+    if(num_buf < 3)
+    {
+        num_buf = 3;
+    }
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+
+    /* fill reference data */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        in_value[buf_id] = 10*(buf_id+1);
+    }
+
+    {
+        uint32_t tmp_value[3];
+
+        tmp_value[0] = 0;
+        tmp_value[1] = in_value[num_buf-1];
+        tmp_value[2] = in_value[num_buf-2];
+
+        for(buf_id=0; buf_id<num_buf; buf_id++)
+        {
+            ref_out_value[buf_id] = in_value[buf_id]
+                              + tmp_value[ 1 ];
+
+            tmp_value[ 0 ] = tmp_value[ 2 ];
+            tmp_value[ 2 ] = tmp_value[ 1 ];
+            tmp_value[ 1 ] = in_value[buf_id];
+        }
+    }
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    /* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id]    = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
+        ASSERT_VX_OBJECT(d2[buf_id]    = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
+    }
+
+    /* allocate delay
+     */
+    ASSERT_VX_OBJECT(exemplar  = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
+    ASSERT_VX_OBJECT(delay     = vxCreateDelay(context, (vx_reference)exemplar, 3), VX_TYPE_DELAY);
+    vxReleaseScalar(&exemplar);
+
+    ASSERT_VX_OBJECT(
+        n0 = test_user_kernel_node( graph,
+                d0[0], NULL,
+                (vx_scalar)vxGetReferenceFromDelay(delay, -1), NULL),
+                VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(
+        n1 = test_user_kernel_node(graph,
+                (vx_scalar)vxGetReferenceFromDelay(delay, -1), (vx_scalar)vxGetReferenceFromDelay(delay, -2),
+                d2[0], NULL),
+                VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(
+        n2 = test_user_kernel_node(graph,
+                (vx_scalar)vxGetReferenceFromDelay(delay, -2), (vx_scalar)vxGetReferenceFromDelay(delay, 0),
+                NULL, NULL),
+                VX_TYPE_NODE);
+
+    /* input @ n0 index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n0, 0);
+    /* output @ n1 index 2, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n1, 2);
+
+    /* set graph schedule config such that graph parameter @ index 0, 1 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
+
+    /* Schedule mode auto is used, here we dont need to call vxScheduleGraph
+     * Graph gets scheduled automatically as refs are enqueued to it
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+            VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
+            2,
+            graph_parameters_queue_params_list
+            ));
+
+    /* always auto age delay in pipelined graph */
+    VX_CALL(vxRegisterAutoAging(graph, delay));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE(
+            vxCopyScalar(d0[buf_id],
+            &in_value[buf_id],
+            VX_WRITE_ONLY,
+            VX_MEMORY_TYPE_HOST));
+    }
+    {
+        vx_scalar tmp_scalar;
+
+        tmp_scalar = (vx_scalar)vxGetReferenceFromDelay(delay, 0);
+        ASSERT_NO_FAILURE(
+            vxCopyScalar(tmp_scalar,
+            &in_value[num_buf-2],
+            VX_WRITE_ONLY,
+            VX_MEMORY_TYPE_HOST));
+
+        tmp_scalar = (vx_scalar)vxGetReferenceFromDelay(delay, -2);
+        ASSERT_NO_FAILURE(
+            vxCopyScalar(tmp_scalar,
+            &in_value[num_buf-1],
+            VX_WRITE_ONLY,
+            VX_MEMORY_TYPE_HOST));
+    }
+
+    /* enqueue input and output references,
+     * input and output can be enqueued in any order
+     * can be enqueued all together, here they are enqueue one by one just as a example
+     */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
+    }
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
+    }
+
+    buf_id = 0;
+
+    /* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
+    for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
+    {
+        uint32_t num_refs;
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_scalar, 1, &num_refs));
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_scalar, 1, &num_refs));
+
+        VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+        /* compare output */
+        //printf(" %d: out = %d ref = %d\n", loop_id, tmp_value, ref_out_value[buf_id]);
+        ASSERT_EQ_INT(tmp_value, ref_out_value[buf_id]);
+
+        if(loop_cnt > 100)
+        {
+            ct_update_progress(loop_id, loop_cnt+num_buf);
+        }
+
+        /* clear value in output */
+        tmp_value = 0;
+        VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+
+        buf_id = (buf_id+1)%num_buf;
+
+        /* recycles dequeued input and output refs 'loop_cnt' times */
+        if(loop_id<loop_cnt)
+        {
+            /* input and output can be enqueued in any order */
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_scalar, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_scalar, 1));
+        }
+    }
+
+    /* ensure all graph processing is complete */
+    VX_CALL(vxWaitGraph(graph));
+
+    #endif
+
+    VX_CALL(vxReleaseNode(&n0));
+    VX_CALL(vxReleaseNode(&n1));
+    VX_CALL(vxReleaseNode(&n2));
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseScalar(&d0[buf_id]));
+        VX_CALL(vxReleaseScalar(&d2[buf_id]));
+    }
+    VX_CALL(vxReleaseDelay(&delay));
+    VX_CALL(vxReleaseGraph(&graph));
+
+    test_user_kernel_unregister(context);
+}
+
+/*
+ *  d0           n0                           n1            d2
+ * SCALAR -- USER_KERNEL -- delay (0) --  USER_KERNEL -- SCALAR
+ *                             |            |
+ *                          delay (-1)      |
+ *                             |            |
+ *                          delay (-2)      |
+ *                             |            |
+ *                          delay (-3) -----+
+ *
+ * This test case test the below
+ * - Delay objects with 4 delay slots
+ * - Delay intermediate to a graph, no graph parameter at any delay slot
+ * - Delay with slot's not connected to any input - tests auto age at these slots
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testDelay4, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_scalar d0[MAX_NUM_BUF], d2[MAX_NUM_BUF], exemplar;
+    vx_delay delay;
+    vx_scalar in_scalar, out_scalar;
+    vx_node n0, n1;
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
+    vx_uint32 in_value[MAX_NUM_BUF], ref_out_value;
+    vx_uint32 ref_delay_value[MAX_NUM_BUF];
+    vx_uint32 tmp_value = 0;
+
+    uint32_t num_buf;
+    uint32_t buf_id, loop_id, loop_cnt;
+
+    test_user_kernel_register(context);
+
+    num_buf = arg_->num_buf;
+    loop_cnt = arg_->loop_count;
+
+    /* since delay is of 4 slots, num_buf MUST be >= 4 at input atleast */
+    if(num_buf < 4)
+    {
+        num_buf = 4;
+    }
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+
+    /* fill reference data */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        in_value[buf_id] = 10*(buf_id+1);
+    }
+
+    ref_delay_value[0] = 0;
+    ref_delay_value[1] = in_value[num_buf-3];
+    ref_delay_value[2] = in_value[num_buf-2];
+    ref_delay_value[3] = in_value[num_buf-1];
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    /* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id]    = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
+        ASSERT_VX_OBJECT(d2[buf_id]    = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
+    }
+
+    /* allocate delay
+     */
+    ASSERT_VX_OBJECT(exemplar  = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
+    ASSERT_VX_OBJECT(delay     = vxCreateDelay(context, (vx_reference)exemplar, 4), VX_TYPE_DELAY);
+    vxReleaseScalar(&exemplar);
+
+    ASSERT_VX_OBJECT(
+        n0 = test_user_kernel_node( graph,
+                d0[0], NULL,
+                (vx_scalar)vxGetReferenceFromDelay(delay, 0), NULL),
+                VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(
+        n1 = test_user_kernel_node(graph,
+                (vx_scalar)vxGetReferenceFromDelay(delay, 0), (vx_scalar)vxGetReferenceFromDelay(delay, -3),
+                d2[0], NULL),
+                VX_TYPE_NODE);
+
+    /* input @ n0 index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n0, 0);
+    /* output @ n1 index 2, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n1, 2);
+
+    /* set graph schedule config such that graph parameter @ index 0, 1 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
+
+    /* Schedule mode auto is used, here we dont need to call vxScheduleGraph
+     * Graph gets scheduled automatically as refs are enqueued to it
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+            VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
+            2,
+            graph_parameters_queue_params_list
+            ));
+
+    /* always auto age delay in pipelined graph */
+    VX_CALL(vxRegisterAutoAging(graph, delay));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE(
+            vxCopyScalar(d0[buf_id],
+            &in_value[buf_id],
+            VX_WRITE_ONLY,
+            VX_MEMORY_TYPE_HOST));
+    }
+    {
+        vx_scalar tmp_scalar;
+
+        tmp_scalar = (vx_scalar)vxGetReferenceFromDelay(delay, -1);
+        ASSERT_NO_FAILURE(
+            vxCopyScalar(tmp_scalar,
+            &in_value[num_buf-3],
+            VX_WRITE_ONLY,
+            VX_MEMORY_TYPE_HOST));
+
+        tmp_scalar = (vx_scalar)vxGetReferenceFromDelay(delay, -2);
+        ASSERT_NO_FAILURE(
+            vxCopyScalar(tmp_scalar,
+            &in_value[num_buf-2],
+            VX_WRITE_ONLY,
+            VX_MEMORY_TYPE_HOST));
+
+        tmp_scalar = (vx_scalar)vxGetReferenceFromDelay(delay, -3);
+        ASSERT_NO_FAILURE(
+            vxCopyScalar(tmp_scalar,
+            &in_value[num_buf-1],
+            VX_WRITE_ONLY,
+            VX_MEMORY_TYPE_HOST));
+
+    }
+
+    /* enqueue input and output references,
+     * input and output can be enqueued in any order
+     * can be enqueued all together, here they are enqueue one by one just as a example
+     */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
+    }
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
+    }
+
+    buf_id = 0;
+
+    /* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
+    for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
+    {
+        uint32_t num_refs;
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_scalar, 1, &num_refs));
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_scalar, 1, &num_refs));
+
+        VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+        ref_out_value = in_value[buf_id] + ref_delay_value[ 3 ];
+
+        ref_delay_value[ 3 ] = ref_delay_value[ 2 ];
+        ref_delay_value[ 2 ] = ref_delay_value[ 1 ];
+        ref_delay_value[ 1 ] = in_value[buf_id];
+
+        /* compare output */
+        //printf(" %d: out = %d ref = %d\n", loop_id, tmp_value, ref_out_value);
+        ASSERT_EQ_INT(tmp_value, ref_out_value);
+
+        /* clear value in output */
+        tmp_value = 0;
+        VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+
+        if(loop_cnt > 100)
+        {
+            ct_update_progress(loop_id, loop_cnt+num_buf);
+        }
+
+        buf_id = (buf_id+1)%num_buf;
+
+        /* recycles dequeued input and output refs 'loop_cnt' times */
+        if(loop_id<loop_cnt)
+        {
+            /* input and output can be enqueued in any order */
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_scalar, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_scalar, 1));
+        }
+    }
+
+    /* ensure all graph processing is complete */
+    VX_CALL(vxWaitGraph(graph));
+
+    #endif
+
+    VX_CALL(vxReleaseNode(&n0));
+    VX_CALL(vxReleaseNode(&n1));
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseScalar(&d0[buf_id]));
+        VX_CALL(vxReleaseScalar(&d2[buf_id]));
+    }
+    VX_CALL(vxReleaseDelay(&delay));
+    VX_CALL(vxReleaseGraph(&graph));
+
+    test_user_kernel_unregister(context);
+}
+
+/*
+ *  d0     n0     d2     n1     d3     n2     d4
+ * IMG -- ADD -- IMG -- NOT -- IMG -- NOT -- IMG
+ *         |                    |
+ *         |                    |
+ *         +--------------------+
+ *
+ * This test case test for loop carried dependency functional correctness
+ *
+ */
+TEST_WITH_ARG(GraphPipeline, testLoopCarriedDependency, Arg, PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_graph graph;
+    vx_delay delay;
+    vx_image delay_image;
+    vx_image d0[MAX_NUM_BUF], d2[MAX_NUM_BUF], d4[MAX_NUM_BUF];
+    vx_node n0, n1, n2;
+    vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[3];
+
+    int i;
+    vx_graph graph_1 = 0;
+    vx_image images[4];
+    vx_node nodes[3];
+    vx_delay delay_1 = 0;
+    vx_image delay_image_0 = 0;
+    vx_image delay_image_1 = 0;
+    vx_image delay_image_0_nopipeline = 0;
+    vx_image delay_image_1_nopipeline = 0;
+    vx_imagepatch_addressing_t addr;
+    vx_uint8 *pdata = 0;
+    vx_rectangle_t rect = {0, 0, arg_->width, arg_->height};
+    vx_map_id map_id;
+
+    CT_Image ref_src[MAX_NUM_BUF], ref_src1[MAX_NUM_BUF], vxdst0, vxdst1;
+    uint32_t width, height, seq_init, num_buf;
+    uint32_t buf_id, loop_id, loop_cnt;
+
+    seq_init = 1;
+    width = arg_->width;
+    height = arg_->height;
+    num_buf = arg_->num_buf;
+    loop_cnt = arg_->loop_count;
+
+    ASSERT(num_buf <= MAX_NUM_BUF);
+
+    /* fill reference data */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE({
+            ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
+            fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
+        });
+        ASSERT_NO_FAILURE({
+            ref_src1[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
+            fillSequence(ref_src1[buf_id], (uint32_t)(seq_init+buf_id*10));
+        });
+    }
+
+    /* Non-pipelining graph */
+    ASSERT_VX_OBJECT(graph_1 = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    ASSERT_VX_OBJECT(images[0] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(images[1] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(images[2] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(images[3] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+
+    ASSERT_NO_FAILURE(ct_image_copyto_vx_image(images[0], ref_src[0]));
+
+    ASSERT_VX_OBJECT(delay_1 = vxCreateDelay(context, (vx_reference)images[3], 2), VX_TYPE_DELAY);
+
+    ASSERT_VX_OBJECT(delay_image_0_nopipeline = (vx_image)vxGetReferenceFromDelay(delay_1, 0), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(delay_image_1_nopipeline = (vx_image)vxGetReferenceFromDelay(delay_1,-1), VX_TYPE_IMAGE);
+
+    /* Filling reference data */
+    pdata = NULL;
+    VX_CALL(vxMapImagePatch(delay_image_0_nopipeline, &rect, 0, &map_id, &addr, (void **)&pdata,
+                                                    VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0));
+    for (i = 0; i < width*height; i++)
+    {
+        *(pdata+i) = 1;
+    }
+    VX_CALL(vxUnmapImagePatch(delay_image_0_nopipeline, map_id));
+
+    pdata = NULL;
+    VX_CALL(vxMapImagePatch(delay_image_1_nopipeline, &rect, 0, &map_id, &addr, (void **)&pdata,
+                                                    VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0));
+    for (i = 0; i < width*height; i++)
+    {
+        *(pdata+i) = 1;
+    }
+    VX_CALL(vxUnmapImagePatch(delay_image_1_nopipeline, map_id));
+
+    ASSERT_VX_OBJECT(nodes[0] = vxAddNode(graph_1, images[0], (vx_image)vxGetReferenceFromDelay(delay_1, -1), VX_CONVERT_POLICY_WRAP, images[1]), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(nodes[1] = vxNotNode(graph_1, images[1], (vx_image)vxGetReferenceFromDelay(delay_1, 0)), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(nodes[2] = vxNotNode(graph_1, (vx_image)vxGetReferenceFromDelay(delay_1, 0), images[2]), VX_TYPE_NODE);
+
+    VX_CALL(vxRegisterAutoAging(graph_1, delay_1));
+    VX_CALL(vxVerifyGraph(graph_1));
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    /* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_VX_OBJECT(d0[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(d2[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(d4[buf_id]    = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    }
+
+    ASSERT_VX_OBJECT(delay_image = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(delay = vxCreateDelay(context, (vx_reference)delay_image, 2), VX_TYPE_DELAY);
+
+    ASSERT_VX_OBJECT(delay_image_0 = (vx_image)vxGetReferenceFromDelay(delay, 0), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(delay_image_1 = (vx_image)vxGetReferenceFromDelay(delay, -1), VX_TYPE_IMAGE);
+
+    /* Filling reference data */
+    pdata = NULL;
+    VX_CALL(vxMapImagePatch(delay_image_0, &rect, 0, &map_id, &addr, (void **)&pdata,
+                                                    VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0));
+    for (i = 0; i < width*height; i++)
+    {
+        *(pdata+i) = 1;
+    }
+    VX_CALL(vxUnmapImagePatch(delay_image_0, map_id));
+
+    pdata = NULL;
+    VX_CALL(vxMapImagePatch(delay_image_1, &rect, 0, &map_id, &addr, (void **)&pdata,
+                                                    VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0));
+    for (i = 0; i < width*height; i++)
+    {
+        *(pdata+i) = 1;
+    }
+    VX_CALL(vxUnmapImagePatch(delay_image_1, map_id));
+    
+
+    ASSERT_VX_OBJECT(n0    = vxAddNode(graph, d0[0], (vx_image)vxGetReferenceFromDelay(delay, -1), VX_CONVERT_POLICY_WRAP, d2[0]), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(n1    = vxNotNode(graph, d2[0], (vx_image)vxGetReferenceFromDelay(delay, 0)), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(n2    = vxNotNode(graph, (vx_image)vxGetReferenceFromDelay(delay, 0), d4[0]), VX_TYPE_NODE);
+
+    /* input0 @ n0 index 0, becomes graph parameter 0 */
+    add_graph_parameter_by_node_index(graph, n0, 0);
+    /* output @ n0 index 2, becomes graph parameter 1 */
+    add_graph_parameter_by_node_index(graph, n0, 3);
+    /* output @ n2 index 1, becomes graph parameter 2 */
+    add_graph_parameter_by_node_index(graph, n2, 1);
+
+    /* set graph schedule config such that graph parameter @ index 0, 1, 2 are enqueuable */
+    graph_parameters_queue_params_list[0].graph_parameter_index = 0;
+    graph_parameters_queue_params_list[0].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
+
+    graph_parameters_queue_params_list[1].graph_parameter_index = 1;
+    graph_parameters_queue_params_list[1].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
+
+    graph_parameters_queue_params_list[2].graph_parameter_index = 2;
+    graph_parameters_queue_params_list[2].refs_list_size = num_buf;
+    graph_parameters_queue_params_list[2].refs_list = (vx_reference*)&d4[0];
+
+    /* Schedule mode auto is used, here we dont need to call vxScheduleGraph
+     * Graph gets scheduled automatically as refs are enqueued to it
+     */
+    VX_CALL(vxSetGraphScheduleConfig(graph,
+            VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
+            3,
+            graph_parameters_queue_params_list
+            ));
+
+    /* always auto age delay in pipelined graph */
+    VX_CALL(vxRegisterAutoAging(graph, delay));
+
+    VX_CALL(vxVerifyGraph(graph));
+
+    #if 1
+    /* fill reference data into input data reference */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[0]));
+    }
+
+    /* enqueue input and output references,
+     * input and output can be enqueued in any order
+     * can be enqueued all together, here they are enqueue one by one just as a example
+     */
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
+        VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&d4[buf_id], 1));
+    }
+
+    buf_id = 0;
+
+    /* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
+    for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
+    {
+        vx_image add_in0_img, add_out_img, not_out1_img;
+        uint32_t num_refs;
+
+        VX_CALL(vxProcessGraph(graph_1));
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 2, (vx_reference*)&not_out1_img, 1, &num_refs));
+
+        /* Get output reference, waits until a reference is available */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&add_out_img, 1, &num_refs));
+
+        /* Get consumed input reference, waits until a reference is available
+         */
+        VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&add_in0_img, 1, &num_refs));
+
+        /* when measuring performance dont check output since it affects graph performance numbers
+         */
+
+        if(loop_cnt > 100)
+        {
+            ct_update_progress(loop_id, loop_cnt+num_buf);
+        }
+
+        ASSERT_NO_FAILURE({
+            vxdst0 = ct_image_from_vx_image(not_out1_img);
+        });
+
+        ASSERT_NO_FAILURE({
+            vxdst1 = ct_image_from_vx_image(images[2]);
+        });
+
+        ASSERT_EQ_CTIMAGE(vxdst1, vxdst0);
+
+        buf_id = (buf_id+1)%num_buf;
+
+        /* recycles dequeued input and output refs 'loop_cnt' times */
+
+        if(loop_id<loop_cnt)
+        {
+            /* input and output can be enqueued in any order */
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&add_out_img, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&add_in0_img, 1));
+            VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&not_out1_img, 1));
+        }
+
+    }
+
+    /* ensure all graph processing is complete */
+    VX_CALL(vxWaitGraph(graph));
+
+    #endif
+
+    VX_CALL(vxReleaseNode(&n0));
+    VX_CALL(vxReleaseNode(&n1));
+    VX_CALL(vxReleaseNode(&n2));
+    for(buf_id=0; buf_id<num_buf; buf_id++)
+    {
+        VX_CALL(vxReleaseImage(&d0[buf_id]));
+        VX_CALL(vxReleaseImage(&d2[buf_id]));
+        VX_CALL(vxReleaseImage(&d4[buf_id]));
+    }
+    VX_CALL(vxReleaseImage(&delay_image));
+    VX_CALL(vxReleaseDelay(&delay));
+    VX_CALL(vxReleaseGraph(&graph));
+
+    for (i = 0; i < (sizeof(nodes)/sizeof(nodes[0])); i++)
+    {
+        VX_CALL(vxReleaseNode(&nodes[i]));
+    }
+
+    for (i = 0; i < (sizeof(images)/sizeof(images[0])); i++)
+    {
+        VX_CALL(vxReleaseImage(&images[i]));
+    }
+
+    VX_CALL(vxReleaseGraph(&graph_1));
+    VX_CALL(vxReleaseDelay(&delay_1));
+
+    ASSERT(graph_1 == 0);
+    ASSERT(delay_1 == 0);
+
+    CT_CollectGarbage(CT_GC_ALL);
+}
+
+TESTCASE_TESTS(GraphPipeline,
+    testOneNode,
+    testTwoNodesBasic,
+    testTwoNodes,
+    testFourNodes,
+    testMaxDataRef,
+    testUniformImage,
+    testScalarOutput,
+    testEventHandling,
+    testEventHandlingDisableEvents,
+    testReplicateImage,
+    testUserKernel,
+    testManualSchedule,
+    /*testDelay1,
+    testDelay2,
+    testDelay3,
+    testDelay4,*/
+    testLoopCarriedDependency
+    )
+
+#endif
diff --git a/test_conformance/test_graph_roi.c b/test_conformance/test_graph_roi.c
index 4ffb735..7afd0cf 100644
--- a/test_conformance/test_graph_roi.c
+++ b/test_conformance/test_graph_roi.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -151,3 +153,5 @@
         testSimple,
         testCallbackOrder
         )
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_graph_streaming.c b/test_conformance/test_graph_streaming.c
new file mode 100644
index 0000000..bc166d1
--- /dev/null
+++ b/test_conformance/test_graph_streaming.c
@@ -0,0 +1,665 @@
+/*
+ * Copyright (c) 2012-2019 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef OPENVX_USE_STREAMING
+
+#include "test_engine/test.h"
+
+#include <VX/vx.h>
+#include <VX/vxu.h>
+#include <VX/vx_khr_pipelining.h>
+
+#include "math.h"
+#include <limits.h>
+
+TESTCASE(GraphStreaming,  CT_VXContext, ct_setup_vx_context, 0)
+
+#define VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE1 (VX_KERNEL_BASE(VX_ID_DEFAULT, 0) + 3)
+#define VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE1_NAME "org.khronos.openvx.test.user_source_1"
+
+#define VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE2 (VX_KERNEL_BASE(VX_ID_DEFAULT, 0) + 4)
+#define VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE2_NAME "org.khronos.openvx.test.user_source_2"
+
+#define VX_KERNEL_CONFORMANCE_TEST_USER_SINK (VX_KERNEL_BASE(VX_ID_DEFAULT, 0) + 5)
+#define VX_KERNEL_CONFORMANCE_TEST_USER_SINK_NAME "org.khronos.openvx.test.user_sink"
+#define PIPEUP_NUM_BUFS 3
+
+typedef enum _own_source_params_e
+{
+    OWN_SOURCE_PARAM_OUTPUT = 0
+} own_source_params_e;
+
+typedef enum _own_sink_params_e
+{
+    OWN_SINK_PARAM_INPUT = 0
+} own_sink_params_e;
+
+static enum vx_type_e type = (enum vx_type_e)VX_TYPE_SCALAR;
+
+static vx_bool is_pipeup_entered = vx_false_e;
+static vx_bool is_steady_state_entered = vx_false_e;
+static uint8_t pipeup_frame = 0;
+static uint8_t global_value = 0;
+static uint8_t copy_value[PIPEUP_NUM_BUFS-1];
+static uint8_t golden_sink_value = 0;
+
+static vx_status VX_CALLBACK own_source1_Kernel(vx_node node, const vx_reference *parameters, vx_uint32 num)
+{
+    uint32_t state;
+    ASSERT_VX_OBJECT_(return VX_FAILURE, node, VX_TYPE_NODE);
+    uint8_t i;
+
+    vxQueryNode(node, VX_NODE_STATE, &state, sizeof(state));
+    EXPECT(parameters != NULL);
+    EXPECT(num == 1);
+    if (parameters != NULL && num == 1)
+    {
+        EXPECT_VX_OBJECT(parameters[0], type);
+
+        if (255 == global_value)
+        {
+            global_value = 0;
+        }
+        else
+        {
+            global_value++;
+        }
+
+        if (state == VX_NODE_STATE_STEADY)
+        {
+            is_steady_state_entered = vx_true_e;
+
+            vxCopyScalar((vx_scalar)parameters[0], &copy_value[0], VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
+
+            for (i = 0; i < pipeup_frame-1; i++)
+            {
+                copy_value[i] = copy_value[i+1];
+            }
+
+            copy_value[pipeup_frame-1] = global_value;
+        }
+        else
+        {
+            is_pipeup_entered = vx_true_e;
+            copy_value[pipeup_frame] = global_value;
+            pipeup_frame++;
+        }
+    }
+
+    return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK own_source2_Kernel(vx_node node, const vx_reference *parameters, vx_uint32 num)
+{
+    uint32_t state;
+    ASSERT_VX_OBJECT_(return VX_FAILURE, node, VX_TYPE_NODE);
+
+    vxQueryNode(node, VX_NODE_STATE, &state, sizeof(state));
+    EXPECT(parameters != NULL);
+    EXPECT(num == 1);
+    if (parameters != NULL && num == 1)
+    {
+        EXPECT_VX_OBJECT(parameters[0], type);
+
+        if (255 == global_value)
+        {
+            global_value = 0;
+        }
+        else
+        {
+            global_value++;
+        }
+
+        vxCopyScalar((vx_scalar)parameters[0], &global_value, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
+    }
+
+    return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK own_sink_Kernel(vx_node node, const vx_reference *parameters, vx_uint32 num)
+{
+    uint8_t local_copy_value = 0;
+    ASSERT_VX_OBJECT_(return VX_FAILURE, node, VX_TYPE_NODE);
+
+    EXPECT(parameters != NULL);
+    EXPECT(num == 1);
+    if (parameters != NULL && num == 1)
+    {
+        EXPECT_VX_OBJECT(parameters[0], type);
+        vxCopyScalar((vx_scalar)parameters[0], &local_copy_value, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
+
+        if (255 == golden_sink_value)
+        {
+            golden_sink_value = 0;
+        }
+        else
+        {
+            golden_sink_value++;
+        }
+
+        EXPECT(local_copy_value == golden_sink_value);
+    }
+
+    return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK own_Initialize(vx_node node, const vx_reference *parameters, vx_uint32 num)
+{
+    ASSERT_VX_OBJECT_(return VX_FAILURE, node, VX_TYPE_NODE);
+    EXPECT(parameters != NULL);
+    EXPECT(num == 1);
+    if (parameters != NULL && num == 1)
+    {
+        EXPECT_VX_OBJECT(parameters[0], type);
+    }
+    return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK own_Deinitialize(vx_node node, const vx_reference *parameters, vx_uint32 num)
+{
+    EXPECT(node != 0);
+    EXPECT(parameters != NULL);
+    EXPECT(num == 1);
+    if (parameters != NULL && num == 1)
+    {
+        EXPECT_VX_OBJECT(parameters[0], type);
+    }
+
+    return VX_SUCCESS;
+}
+
+/* Source with buffering */
+static void own_register_source1_kernel(vx_context context)
+{
+    vx_kernel kernel = 0;
+    vx_uint32 num_bufs = PIPEUP_NUM_BUFS;
+
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+        context,
+        VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE1_NAME,
+        VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE1,
+        own_source1_Kernel,
+        1,
+        NULL,
+        own_Initialize,
+        own_Deinitialize), VX_TYPE_KERNEL);
+
+    VX_CALL(vxSetKernelAttribute(kernel, VX_KERNEL_PIPEUP_OUTPUT_DEPTH, &num_bufs, sizeof(num_bufs)));
+
+    VX_CALL(vxAddParameterToKernel(kernel, OWN_SOURCE_PARAM_OUTPUT, VX_OUTPUT, type, VX_PARAMETER_STATE_REQUIRED));
+    {
+        vx_parameter parameter = 0;
+        vx_enum direction = 0;
+        ASSERT_VX_OBJECT(parameter = vxGetKernelParameterByIndex(kernel, OWN_SOURCE_PARAM_OUTPUT), VX_TYPE_PARAMETER);
+        VX_CALL(vxQueryParameter(parameter, VX_PARAMETER_DIRECTION, &direction, sizeof(direction)));
+        ASSERT(direction == VX_OUTPUT);
+        VX_CALL(vxReleaseParameter(&parameter));
+    }
+    VX_CALL(vxFinalizeKernel(kernel));
+    VX_CALL(vxReleaseKernel(&kernel));
+}
+
+/* Source without buffering */
+static void own_register_source2_kernel(vx_context context)
+{
+    vx_kernel kernel = 0;
+
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+        context,
+        VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE2_NAME,
+        VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE2,
+        own_source2_Kernel,
+        1,
+        NULL,
+        own_Initialize,
+        own_Deinitialize), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, OWN_SOURCE_PARAM_OUTPUT, VX_OUTPUT, type, VX_PARAMETER_STATE_REQUIRED));
+    {
+        vx_parameter parameter = 0;
+        vx_enum direction = 0;
+        ASSERT_VX_OBJECT(parameter = vxGetKernelParameterByIndex(kernel, OWN_SOURCE_PARAM_OUTPUT), VX_TYPE_PARAMETER);
+        VX_CALL(vxQueryParameter(parameter, VX_PARAMETER_DIRECTION, &direction, sizeof(direction)));
+        ASSERT(direction == VX_OUTPUT);
+        VX_CALL(vxReleaseParameter(&parameter));
+    }
+    VX_CALL(vxFinalizeKernel(kernel));
+    VX_CALL(vxReleaseKernel(&kernel));
+}
+
+static void own_register_sink_kernel(vx_context context)
+{
+    vx_kernel kernel = 0;
+
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+        context,
+        VX_KERNEL_CONFORMANCE_TEST_USER_SINK_NAME,
+        VX_KERNEL_CONFORMANCE_TEST_USER_SINK,
+        own_sink_Kernel,
+        1,
+        NULL,
+        own_Initialize,
+        own_Deinitialize), VX_TYPE_KERNEL);
+
+    VX_CALL(vxAddParameterToKernel(kernel, OWN_SINK_PARAM_INPUT, VX_INPUT, type, VX_PARAMETER_STATE_REQUIRED));
+    {
+        vx_parameter parameter = 0;
+        vx_enum direction = 0;
+        ASSERT_VX_OBJECT(parameter = vxGetKernelParameterByIndex(kernel, OWN_SINK_PARAM_INPUT), VX_TYPE_PARAMETER);
+        VX_CALL(vxQueryParameter(parameter, VX_PARAMETER_DIRECTION, &direction, sizeof(direction)));
+        ASSERT(direction == VX_INPUT);
+        VX_CALL(vxReleaseParameter(&parameter));
+    }
+    VX_CALL(vxFinalizeKernel(kernel));
+    VX_CALL(vxReleaseKernel(&kernel));
+}
+
+typedef struct {
+    const char* name;
+    int stream_time;
+    int source;
+} Arg;
+
+#define STREAMING_PARAMETERS \
+    CT_GENERATE_PARAMETERS("streaming_with_buffering", ARG, 100, 1), \
+    CT_GENERATE_PARAMETERS("streaming_with_buffering", ARG, 1000, 1), \
+    CT_GENERATE_PARAMETERS("streaming_with_buffering", ARG, 10000, 1), \
+    CT_GENERATE_PARAMETERS("streaming_with_buffering", ARG, 100, 2), \
+    CT_GENERATE_PARAMETERS("streaming_with_buffering", ARG, 1000, 2), \
+    CT_GENERATE_PARAMETERS("streaming_with_buffering", ARG, 10000, 2), \
+
+/*
+ *     node       scalar
+ * USER_SOURCE -- SCALAR
+ *
+ * Test case of a graph with single user source node outputting a scalar
+ * Two possible source kernels are used, one containing a pipeup loop and the other without pipeup
+ * In the case of a kernel with pipeup, a flag is checked whether or not the kernel entered the pipeup
+ * The scalar sources act as counters that increment the output upon each graph execution
+ * Upon subsequent graph executions, the outputs are check that they are incrementing
+ *
+ */
+TEST_WITH_ARG(GraphStreaming, testSourceUserKernel, Arg, STREAMING_PARAMETERS)
+{
+    vx_graph graph;
+    vx_context context = context_->vx_context_;
+    vx_kernel user_kernel = 0;
+    vx_node node = 0;
+    vx_uint8  scalar_val = 0;
+    vx_scalar scalar;
+    int i;
+
+    for (i = 0; i < (PIPEUP_NUM_BUFS-1); i++)
+    {
+        copy_value[i] = 0;
+    }
+    pipeup_frame = 0;
+    global_value = 0;
+
+    ASSERT_VX_OBJECT(scalar = vxCreateScalar(context, VX_TYPE_UINT8, &scalar_val), VX_TYPE_SCALAR);
+
+    if (arg_->source == 1)
+    {
+        ASSERT_NO_FAILURE(own_register_source1_kernel(context));
+    }
+    else
+    {
+        ASSERT_NO_FAILURE(own_register_source2_kernel(context));
+    }
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    if (arg_->source == 1)
+    {
+        ASSERT_VX_OBJECT(user_kernel = vxGetKernelByName(context, VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE1_NAME), VX_TYPE_KERNEL);
+    }
+    else
+    {
+        ASSERT_VX_OBJECT(user_kernel = vxGetKernelByName(context, VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE2_NAME), VX_TYPE_KERNEL);
+    }
+
+    ASSERT_VX_OBJECT(node = vxCreateGenericNode(graph, user_kernel), VX_TYPE_NODE);
+
+    VX_CALL(vxSetParameterByIndex(node, 0, (vx_reference)scalar));
+
+    is_pipeup_entered = vx_false_e;
+    is_steady_state_entered = vx_false_e;
+
+    VX_CALL(vxProcessGraph(graph));
+
+    VX_CALL(vxCopyScalar(scalar, &scalar_val, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+    ASSERT(scalar_val == 1);
+
+    VX_CALL(vxProcessGraph(graph));
+
+    VX_CALL(vxCopyScalar(scalar, &scalar_val, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+    ASSERT(scalar_val == 2);
+
+    VX_CALL(vxProcessGraph(graph));
+
+    VX_CALL(vxCopyScalar(scalar, &scalar_val, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+    ASSERT(scalar_val == 3);
+
+    if (arg_->source == 1)
+    {
+        ASSERT(is_pipeup_entered == vx_true_e);
+        ASSERT(is_steady_state_entered == vx_true_e);
+    }
+
+    VX_CALL(vxReleaseNode(&node));
+    VX_CALL(vxReleaseGraph(&graph));
+    /* user kernel should be removed only after all references to it released */
+    /* Note, vxRemoveKernel doesn't zeroing kernel ref */
+    VX_CALL(vxRemoveKernel(user_kernel));
+
+    VX_CALL(vxReleaseScalar(&scalar));
+
+    ASSERT(node == 0);
+    ASSERT(graph == 0);
+    ASSERT(scalar == 0);
+}
+
+/*
+ *     node1      scalar      node2
+ * USER_SOURCE -- SCALAR -- USER_SINK
+ *
+ * Test case of a graph with a user source node outputting a scalar connected to a user sink node
+ * Two possible source kernels are used, one containing a pipeup loop and the other without pipeup
+ * In the case of a kernel with pipeup, a flag is checked whether or not the kernel entered the pipeup
+ * The scalar sources act as counters that increment the output upon each graph execution
+ * Error checking occurs in the sink node, as it expects the source node to increment the input to node
+ *
+ */
+TEST_WITH_ARG(GraphStreaming, testSourceSinkUserKernel, Arg, STREAMING_PARAMETERS)
+{
+    vx_graph graph;
+    vx_context context = context_->vx_context_;
+    vx_kernel source_user_kernel = 0, sink_user_kernel = 0;
+    vx_node node1 = 0, node2 = 0;
+    vx_uint8  scalar_val = 0;
+    vx_scalar scalar;
+    int i;
+
+    for (i = 0; i < (PIPEUP_NUM_BUFS-1); i++)
+    {
+        copy_value[i] = 0;
+    }
+    pipeup_frame = 0;
+    global_value = 0;
+    golden_sink_value = 0;
+
+    ASSERT_VX_OBJECT(scalar = vxCreateScalar(context, VX_TYPE_UINT8, &scalar_val), VX_TYPE_SCALAR);
+
+    if (arg_->source == 1)
+    {
+        ASSERT_NO_FAILURE(own_register_source1_kernel(context));
+    }
+    else
+    {
+        ASSERT_NO_FAILURE(own_register_source2_kernel(context));
+    }
+
+    ASSERT_NO_FAILURE(own_register_sink_kernel(context));
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    if (arg_->source == 1)
+    {
+        ASSERT_VX_OBJECT(source_user_kernel = vxGetKernelByName(context, VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE1_NAME), VX_TYPE_KERNEL);
+    }
+    else
+    {
+        ASSERT_VX_OBJECT(source_user_kernel = vxGetKernelByName(context, VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE2_NAME), VX_TYPE_KERNEL);
+    }
+
+    ASSERT_VX_OBJECT(sink_user_kernel = vxGetKernelByName(context, VX_KERNEL_CONFORMANCE_TEST_USER_SINK_NAME), VX_TYPE_KERNEL);
+
+    ASSERT_VX_OBJECT(node1 = vxCreateGenericNode(graph, source_user_kernel), VX_TYPE_NODE);
+
+    ASSERT_VX_OBJECT(node2 = vxCreateGenericNode(graph, sink_user_kernel), VX_TYPE_NODE);
+
+    VX_CALL(vxSetParameterByIndex(node1, 0, (vx_reference)scalar));
+
+    VX_CALL(vxSetParameterByIndex(node2, 0, (vx_reference)scalar));
+
+    is_pipeup_entered = vx_false_e;
+    is_steady_state_entered = vx_false_e;
+
+    VX_CALL(vxProcessGraph(graph));
+
+    VX_CALL(vxProcessGraph(graph));
+
+    VX_CALL(vxProcessGraph(graph));
+
+    if (arg_->source == 1)
+    {
+        ASSERT(is_pipeup_entered == vx_true_e);
+        ASSERT(is_steady_state_entered == vx_true_e);
+    }
+
+    VX_CALL(vxReleaseNode(&node1));
+    VX_CALL(vxReleaseNode(&node2));
+    VX_CALL(vxReleaseGraph(&graph));
+    /* user kernel should be removed only after all references to it released */
+    /* Note, vxRemoveKernel doesn't zeroing kernel ref */
+    VX_CALL(vxRemoveKernel(source_user_kernel));
+    VX_CALL(vxRemoveKernel(sink_user_kernel));
+
+    VX_CALL(vxReleaseScalar(&scalar));
+
+    ASSERT(node1 == 0);
+    ASSERT(node2 == 0);
+    ASSERT(graph == 0);
+    ASSERT(scalar == 0);
+}
+
+/*
+ *     node       scalar
+ * USER_SOURCE -- SCALAR
+ *
+ * Test case of a graph with single user source node outputting a scalar
+ * Two possible source kernels are used, one containing a pipeup loop and the other without pipeup
+ * In the case of a kernel with pipeup, a flag is checked whether or not the kernel entered the pipeup
+ * The scalar sources act as counters that increment the output upon each graph execution
+ * Error checking occurs in the sink node, as it expects the source node to increment the input to node
+ * Streaming is enabled on the graph.  Streaming starts then stops after a specified delay.
+ *
+ */
+TEST_WITH_ARG(GraphStreaming, testSourceUserKernelStreaming, Arg, STREAMING_PARAMETERS)
+{
+    vx_graph graph;
+    vx_context context = context_->vx_context_;
+    vx_kernel user_kernel = 0;
+    vx_node node = 0;
+    vx_uint8  scalar_val = 0;
+    vx_scalar scalar;
+    int i;
+
+    for (i = 0; i < (PIPEUP_NUM_BUFS-1); i++)
+    {
+        copy_value[i] = 0;
+    }
+
+    pipeup_frame = 0;
+    global_value = 0;
+
+    ASSERT_VX_OBJECT(scalar = vxCreateScalar(context, VX_TYPE_UINT8, &scalar_val), VX_TYPE_SCALAR);
+
+    if (arg_->source == 1)
+    {
+        ASSERT_NO_FAILURE(own_register_source1_kernel(context));
+    }
+    else
+    {
+        ASSERT_NO_FAILURE(own_register_source2_kernel(context));
+    }
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    if (arg_->source == 1)
+    {
+        ASSERT_VX_OBJECT(user_kernel = vxGetKernelByName(context, VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE1_NAME), VX_TYPE_KERNEL);
+    }
+    else
+    {
+        ASSERT_VX_OBJECT(user_kernel = vxGetKernelByName(context, VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE2_NAME), VX_TYPE_KERNEL);
+    }
+
+    ASSERT_VX_OBJECT(node = vxCreateGenericNode(graph, user_kernel), VX_TYPE_NODE);
+
+    VX_CALL(vxSetParameterByIndex(node, 0, (vx_reference)scalar));
+
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxEnableGraphStreaming(graph, node));
+
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
+
+    is_pipeup_entered = vx_false_e;
+    is_steady_state_entered = vx_false_e;
+
+    VX_CALL(vxStartGraphStreaming(graph));
+
+    ct_delay_ms(arg_->stream_time);
+
+    VX_CALL(vxStopGraphStreaming(graph));
+
+    if (arg_->source == 1)
+    {
+        ASSERT(is_pipeup_entered == vx_true_e);
+        ASSERT(is_steady_state_entered == vx_true_e);
+    }
+
+    VX_CALL(vxReleaseNode(&node));
+    VX_CALL(vxReleaseGraph(&graph));
+    /* user kernel should be removed only after all references to it released */
+    /* Note, vxRemoveKernel doesn't zeroing kernel ref */
+    VX_CALL(vxRemoveKernel(user_kernel));
+
+    VX_CALL(vxReleaseScalar(&scalar));
+
+    ASSERT(node == 0);
+    ASSERT(graph == 0);
+    ASSERT(scalar == 0);
+}
+
+/*
+ *     node1      scalar      node2
+ * USER_SOURCE -- SCALAR -- USER_SINK
+ *
+ * Test case of a graph with a user source node outputting a scalar connected to a user sink node
+ * Two possible source kernels are used, one containing a pipeup loop and the other without pipeup
+ * In the case of a kernel with pipeup, a flag is checked whether or not the kernel entered the pipeup
+ * The scalar sources act as counters that increment the output upon each graph execution
+ * Error checking occurs in the sink node, as it expects the source node to increment the input to node
+ * Streaming is enabled on the graph.  Streaming starts then stops after a specified delay.
+ *
+ */
+TEST_WITH_ARG(GraphStreaming, testSourceSinkUserKernelStreaming, Arg, STREAMING_PARAMETERS)
+{
+    vx_graph graph;
+    vx_context context = context_->vx_context_;
+    vx_kernel source_user_kernel = 0, sink_user_kernel = 0;
+    vx_node node1 = 0, node2 = 0;
+    vx_uint8  scalar_val = 0;
+    vx_scalar scalar;
+    int i;
+
+    for (i = 0; i < (PIPEUP_NUM_BUFS-1); i++)
+    {
+        copy_value[i] = 0;
+    }
+
+    pipeup_frame = 0;
+    global_value = 0;
+    golden_sink_value = 0;
+
+    ASSERT_VX_OBJECT(scalar = vxCreateScalar(context, VX_TYPE_UINT8, &scalar_val), VX_TYPE_SCALAR);
+
+    if (arg_->source == 1)
+    {
+        ASSERT_NO_FAILURE(own_register_source1_kernel(context));
+    }
+    else
+    {
+        ASSERT_NO_FAILURE(own_register_source2_kernel(context));
+    }
+
+    ASSERT_NO_FAILURE(own_register_sink_kernel(context));
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    if (arg_->source == 1)
+    {
+        ASSERT_VX_OBJECT(source_user_kernel = vxGetKernelByName(context, VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE1_NAME), VX_TYPE_KERNEL);
+    }
+    else
+    {
+        ASSERT_VX_OBJECT(source_user_kernel = vxGetKernelByName(context, VX_KERNEL_CONFORMANCE_TEST_USER_SOURCE2_NAME), VX_TYPE_KERNEL);
+    }
+
+    ASSERT_VX_OBJECT(sink_user_kernel = vxGetKernelByName(context, VX_KERNEL_CONFORMANCE_TEST_USER_SINK_NAME), VX_TYPE_KERNEL);
+
+    ASSERT_VX_OBJECT(node1 = vxCreateGenericNode(graph, source_user_kernel), VX_TYPE_NODE);
+
+    ASSERT_VX_OBJECT(node2 = vxCreateGenericNode(graph, sink_user_kernel), VX_TYPE_NODE);
+
+    VX_CALL(vxSetParameterByIndex(node1, 0, (vx_reference)scalar));
+
+    VX_CALL(vxSetParameterByIndex(node2, 0, (vx_reference)scalar));
+
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxEnableGraphStreaming(graph, node1));
+
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
+
+    is_pipeup_entered = vx_false_e;
+    is_steady_state_entered = vx_false_e;
+
+    VX_CALL(vxStartGraphStreaming(graph));
+
+    ct_delay_ms(arg_->stream_time);
+
+    VX_CALL(vxStopGraphStreaming(graph));
+
+    if (arg_->source == 1)
+    {
+        ASSERT(is_pipeup_entered == vx_true_e);
+        ASSERT(is_steady_state_entered == vx_true_e);
+    }
+
+    VX_CALL(vxReleaseNode(&node1));
+    VX_CALL(vxReleaseNode(&node2));
+    VX_CALL(vxReleaseGraph(&graph));
+
+    VX_CALL(vxRemoveKernel(source_user_kernel));
+    VX_CALL(vxRemoveKernel(sink_user_kernel));
+
+    VX_CALL(vxReleaseScalar(&scalar));
+
+    ASSERT(node1 == 0);
+    ASSERT(node2 == 0);
+    ASSERT(graph == 0);
+    ASSERT(scalar == 0);
+}
+
+TESTCASE_TESTS(GraphStreaming,
+               testSourceUserKernel,
+               testSourceSinkUserKernel,
+               testSourceUserKernelStreaming,
+               testSourceSinkUserKernelStreaming)
+
+#endif
diff --git a/test_conformance/test_halfscalegaussian.c b/test_conformance/test_halfscalegaussian.c
index 1181b7e..27291c9 100644
--- a/test_conformance/test_halfscalegaussian.c
+++ b/test_conformance/test_halfscalegaussian.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -320,3 +322,5 @@
         testNodeCreation,
         testGraphProcessing,
         testImmediateProcessing)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_harriscorners.c b/test_conformance/test_harriscorners.c
index 5ad0f83..d118e4f 100644
--- a/test_conformance/test_harriscorners.c
+++ b/test_conformance/test_harriscorners.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -417,3 +419,5 @@
         testGraphProcessing,
         testImmediateProcessing
 )
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_histogram.c b/test_conformance/test_histogram.c
index e2b0f68..9a352ec 100644
--- a/test_conformance/test_histogram.c
+++ b/test_conformance/test_histogram.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 
 #include <VX/vx.h>
@@ -228,8 +230,8 @@
                 CT_FAIL("check for query distribution attribute VX_DISTRIBUTION_BINS failed\n");
 
             VX_CALL(vxQueryDistribution(dist1, VX_DISTRIBUTION_WINDOW, &attr_window, sizeof(attr_window)));
-	    /*Tthe attribute is specified as valid only when the range is a multiple of nbins, 
-	     * in other cases, its value shouldn't be checked */
+        /*Tthe attribute is specified as valid only when the range is a multiple of nbins,
+         * in other cases, its value shouldn't be checked */
             if (((range % nbins) == 0) && (attr_window != reference_window(range, nbins)))
                 CT_FAIL("check for query distribution attribute VX_DISTRIBUTION_WINDOW failed\n");
 
@@ -305,3 +307,5 @@
 }
 
 TESTCASE_TESTS(Histogram, testOnRandom)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_hog.c b/test_conformance/test_hog.c
index 54676a8..542b05b 100644
--- a/test_conformance/test_hog.c
+++ b/test_conformance/test_hog.c
@@ -14,6 +14,8 @@
 * limitations under the License.
 */
 
+#ifdef OPENVX_USE_ENHANCED_VISION
+
 #include "test_engine/test.h"
 #include <math.h>
 #include <stdio.h>
@@ -48,7 +50,7 @@
 
     ASSERT_VX_OBJECT(input = vxCreateImage(context, src_width, src_height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
     ASSERT_VX_OBJECT(magnitudes = vxCreateTensor(context, 2, mag_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
-    ASSERT_VX_OBJECT(bins = vxCreateTensor(context, 3, bins_dims, VX_TYPE_INT8, 0), VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT(bins = vxCreateTensor(context, 3, bins_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
 
     ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
     ASSERT_VX_OBJECT(node = vxHOGCellsNode(graph, input, cell_width, cell_height, num_bins, magnitudes, bins), VX_TYPE_NODE);
@@ -86,19 +88,28 @@
     vx_float32 orientation;
     vx_float32 magnitude;
     vx_int8 bin;
-    
+
     width = img->width;
     height = img->height;
-    vx_int16* mag_ref = (vx_int16 *)malloc(height / cell_height * width / cell_width * sizeof(vx_int16));
-    vx_int8* bins_ref = (vx_int8 *)malloc(height / cell_height * width / cell_width * bins_num );
-    vx_int16* mag = (vx_int16 *)malloc(height / cell_height * width / cell_width *sizeof(vx_int16));
-    vx_int8* bins_p = (vx_int8 *)malloc(height / cell_height * width / cell_width * bins_num);
+    vx_int16* mag_ref = (vx_int16 *)ct_alloc_mem(height / cell_height * width / cell_width * sizeof(vx_int16));
+    vx_int16* bins_ref = (vx_int16 *)ct_alloc_mem(height / cell_height * width / cell_width * bins_num * sizeof(vx_int16));
+    vx_int16* mag = (vx_int16 *)ct_alloc_mem(height / cell_height * width / cell_width *sizeof(vx_int16));
+    vx_int16* bins_p = (vx_int16 *)ct_alloc_mem(height / cell_height * width / cell_width * bins_num * sizeof(vx_int16));
     memset(mag_ref, 0, height / cell_height * width / cell_width * sizeof(vx_int16));
-    memset(bins_ref, 0, height / cell_height * width / cell_width * bins_num);
+    memset(bins_ref, 0, height / cell_height * width / cell_width * bins_num * sizeof(vx_int16));
     float num_div_360 = (float)bins_num / 360.0f;
 
-    vx_size magnitudes_dim_num = 2, magnitudes_dims[6] = { width/cell_width, height/cell_height,0 }, magnitudes_strides[6] = { 2, 2 * width / cell_width };
-    vx_size bins_dim_num = 3, bins_dims[6] = { width / cell_width, height / cell_height, bins_num }, bins_strides[6] = { 1,  width / cell_width, height / cell_height * width / cell_width };
+    vx_size magnitudes_dim_num = 2, magnitudes_dims[6] = { width/cell_width, height/cell_height,0 }, magnitudes_strides[6] = { 0 };
+    vx_size bins_dim_num = 3, bins_dims[6] = { width / cell_width, height / cell_height, bins_num }, bins_strides[6] = { 0 };
+    magnitudes_strides[0] = 2;
+    bins_strides[0] = 2;
+    for (vx_size i = 1; i < magnitudes_dim_num; i++) {
+        magnitudes_strides[i] = magnitudes_dims[i - 1] * magnitudes_strides[i - 1];
+    }
+    for (vx_size i = 1; i < bins_dim_num; i++) {
+        bins_strides[i] = bins_dims[i - 1] * bins_strides[i - 1];
+    }
+
     const size_t view_start[6] = { 0 };
     vxCopyTensorPatch(magnitudes, magnitudes_dim_num, view_start, magnitudes_dims, magnitudes_strides, mag, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
     vxCopyTensorPatch(bins, bins_dim_num, view_start, bins_dims, bins_strides, bins_p, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
@@ -160,11 +171,11 @@
             }
         }
     }
-    
-    free(mag_ref);
-    free(mag);
-    free(bins_ref);
-    free(bins_p);
+
+    ct_free_mem(mag_ref);
+    ct_free_mem(mag);
+    ct_free_mem(bins_ref);
+    ct_free_mem(bins_p);
     return status;
 }
 
@@ -214,8 +225,8 @@
 
     ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
     ASSERT_VX_OBJECT(magnitudes = vxCreateTensor(context, 2, mag_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
-    ASSERT_VX_OBJECT(bins = vxCreateTensor(context, 3, bins_dims, VX_TYPE_INT8, 0), VX_TYPE_TENSOR);
-    
+    ASSERT_VX_OBJECT(bins = vxCreateTensor(context, 3, bins_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
+
     ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
     ASSERT_VX_OBJECT(node = vxHOGCellsNode(graph, src_image, cell_width, cell_height, bins_num, magnitudes, bins), VX_TYPE_NODE);
 
@@ -223,7 +234,7 @@
     VX_CALL(vxProcessGraph(graph));
 
     VX_CALL(status = hogcells_ref(src, cell_width, cell_height, bins_num, magnitudes, bins));
-    
+
     VX_CALL(vxReleaseNode(&node));
     VX_CALL(vxReleaseGraph(&graph));
     VX_CALL(vxReleaseImage(&src_image));
@@ -246,7 +257,7 @@
     vx_int32 cell_height = arg_->cell_height;
     vx_int32 bins_num = arg_->bins_num;
     CT_Image src = NULL;
-    vx_status status;
+    vx_status status = VX_SUCCESS;
 
     vx_uint32 src_width;
     vx_uint32 src_height;
@@ -262,10 +273,11 @@
 
     ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
     ASSERT_VX_OBJECT(magnitudes = vxCreateTensor(context, 2, mag_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
-    ASSERT_VX_OBJECT(bins = vxCreateTensor(context, 3, bins_dims, VX_TYPE_INT8, 0), VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT(bins = vxCreateTensor(context, 3, bins_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
 
     VX_CALL(vxuHOGCells(context, src_image, cell_width, cell_width, bins_num, magnitudes, bins));
     ASSERT_NO_FAILURE(status = hogcells_ref(src, cell_width, cell_height, bins_num, magnitudes, bins));
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, status);
 
     VX_CALL(vxReleaseImage(&src_image));
     VX_CALL(vxReleaseTensor(&magnitudes));
@@ -301,17 +313,24 @@
     vx_node feature_node = 0;
     params.window_width = 64;
     params.window_height = 32;
+    params.window_stride = 64;
     params.block_width = 16;
     params.block_height = 16;
+    params.block_stride = 16;
     params.cell_width = 8;
     params.cell_height = 8;
     params.num_bins = 9;
+    const vx_size features_dims[3] = { (src_width - params.window_width) / params.window_stride + 1,
+        (src_height - params.window_height) / params.window_stride + 1,
+        ((params.window_width - params.block_width) / params.block_stride + 1) *
+        ((src_height - params.block_height) / params.block_stride + 1) *
+        ((params.block_width * params.block_height) / (params.cell_width * params.cell_height)) * num_bins };
     ASSERT_VX_OBJECT(input = vxCreateImage(context, src_width, src_height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
     ASSERT_VX_OBJECT(magnitudes = vxCreateTensor(context, 2, mag_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
-    ASSERT_VX_OBJECT(bins = vxCreateTensor(context, 3, bins_dims, VX_TYPE_INT8, 0), VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT(bins = vxCreateTensor(context, 3, bins_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
     ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
     ASSERT_VX_OBJECT(cell_node = vxHOGCellsNode(graph, input, cell_width, cell_height, num_bins, magnitudes, bins), VX_TYPE_NODE);
-    ASSERT_VX_OBJECT(features = vxCreateTensor(context, 3, bins_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT(features = vxCreateTensor(context, 3, features_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
 
     ASSERT_VX_OBJECT(feature_node = vxHOGFeaturesNode(graph, input, magnitudes, bins, &params, 1, features), VX_TYPE_NODE);
     VX_CALL(vxVerifyGraph(graph));
@@ -353,19 +372,18 @@
     vx_int32 num_windowsH = height / params.window_height;
     vx_int32 num_blockW = width / params.cell_width - 1;
     vx_int32 num_blockH = height / params.cell_height - 1;
-    vx_int32 num_block = num_blockW * num_blockH;
     vx_int32 n_cellsx = width / cell_width;
     vx_int32 cells_per_block_w = params.block_width / cell_width;
     vx_int32 cells_per_block_h = params.block_height / cell_height;
-   
-    vx_int16* mag_ref = (vx_int16 *)malloc(height / cell_height * width / cell_width * sizeof(vx_int16));
-    vx_int8* bins_ref = (vx_int8 *)malloc(height / cell_height * width / cell_width * bins_num );
-    vx_int16* features_ref = (vx_int16 *)malloc(num_windowsW * num_windowsH * params.window_width / params.block_stride * 
+
+    vx_int16* mag_ref = (vx_int16 *)ct_alloc_mem(height / cell_height * width / cell_width * sizeof(vx_int16));
+    vx_int16* bins_ref = (vx_int16 *)ct_alloc_mem(height / cell_height * width / cell_width * bins_num * sizeof(vx_int16));
+    vx_int16* features_ref = (vx_int16 *)ct_alloc_mem(num_windowsW * num_windowsH * params.window_width / params.block_stride *
                                                 params.window_height / params.block_stride *bins_num * sizeof(vx_int16));
-    vx_int16* features_p = (vx_int16 *)malloc(num_windowsW * num_windowsH * params.window_width / params.block_stride *
+    vx_int16* features_p = (vx_int16 *)ct_alloc_mem(num_windowsW * num_windowsH * params.window_width / params.block_stride *
                                               params.window_height / params.block_stride *bins_num * sizeof(vx_int16));
     memset(mag_ref, 0, height / cell_height * width / cell_width * sizeof(vx_int16));
-    memset(bins_ref, 0, height / cell_height * width / cell_width * bins_num);
+    memset(bins_ref, 0, height / cell_height * width / cell_width * bins_num * sizeof(vx_int16));
     memset(features_ref, 0, num_windowsW * num_windowsH * params.window_width / params.block_stride *
         params.window_height / params.block_stride *bins_num * sizeof(vx_int16));
 
@@ -451,11 +469,11 @@
             break;
         }
     }
-    
-    free(mag_ref);
-    free(bins_ref);
-    free(features_ref);
-    free(features_p);
+
+    ct_free_mem(mag_ref);
+    ct_free_mem(bins_ref);
+    ct_free_mem(features_ref);
+    ct_free_mem(features_p);
     return status;
 }
 
@@ -499,15 +517,18 @@
 
     const vx_size mag_dims[2] = { src_width / cell_width, src_height / cell_height };
     const vx_size bins_dims[3] = { src_width / cell_width, src_height / cell_height, bins_num };
-    const vx_size features_dims[3] = { src_width / arg_->hog_params.window_stride,  src_height / arg_->hog_params.window_stride, 
-                                       arg_->hog_params.window_width / arg_->hog_params.block_stride * arg_->hog_params.window_height / arg_->hog_params.block_stride *bins_num };
+    const vx_size features_dims[3] = { (src_width - arg_->hog_params.window_width) / arg_->hog_params.window_stride + 1,
+        (src_height - arg_->hog_params.window_height) / arg_->hog_params.window_stride + 1,
+        ((arg_->hog_params.window_width - arg_->hog_params.block_width) / arg_->hog_params.block_stride + 1) *
+        ((src_height - arg_->hog_params.block_height) / arg_->hog_params.block_stride + 1) *
+        ((arg_->hog_params.block_width * arg_->hog_params.block_height) / (arg_->hog_params.cell_width * arg_->hog_params.cell_height)) * bins_num };
     vx_tensor magnitudes;
     vx_tensor bins;
     vx_tensor features;
 
     ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
     ASSERT_VX_OBJECT(magnitudes = vxCreateTensor(context, 2, mag_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
-    ASSERT_VX_OBJECT(bins = vxCreateTensor(context, 3, bins_dims, VX_TYPE_INT8, 0), VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT(bins = vxCreateTensor(context, 3, bins_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
     ASSERT_VX_OBJECT(features = vxCreateTensor(context, 3, features_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
 
     ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
@@ -517,7 +538,7 @@
     VX_CALL(vxProcessGraph(graph));
 
     VX_CALL(status = hogfeatures_ref(src, arg_->hog_params, features));
-    
+
     VX_CALL(vxReleaseNode(&cell_node));
     VX_CALL(vxReleaseNode(&feature_node));
     VX_CALL(vxReleaseGraph(&graph));
@@ -538,14 +559,11 @@
 {
     vx_context context = context_->vx_context_;
     vx_image src_image = 0;
-    vx_graph graph = 0;
-    vx_node cell_node = 0;
-    vx_node feature_node = 0;
     vx_int32 cell_width = arg_->hog_params.cell_width;
     vx_int32 cell_height = arg_->hog_params.cell_height;
     vx_int32 bins_num = arg_->hog_params.num_bins;
     CT_Image src = NULL;
-    vx_status status;
+    vx_status status = VX_SUCCESS;
 
     vx_uint32 src_width;
     vx_uint32 src_height;
@@ -556,20 +574,24 @@
 
     const vx_size mag_dims[2] = { src_width / cell_width, src_height / cell_height };
     const vx_size bins_dims[3] = { src_width / cell_width, src_height / cell_height, bins_num };
-    const vx_size features_dims[3] = { src_width / arg_->hog_params.window_stride,  src_height / arg_->hog_params.window_stride,
-        arg_->hog_params.window_width / arg_->hog_params.block_stride * arg_->hog_params.window_height / arg_->hog_params.block_stride *bins_num };
+    const vx_size features_dims[3] = { (src_width - arg_->hog_params.window_width) / arg_->hog_params.window_stride + 1,
+        (src_height - arg_->hog_params.window_height) / arg_->hog_params.window_stride + 1,
+        ((arg_->hog_params.window_width - arg_->hog_params.block_width) / arg_->hog_params.block_stride + 1) *
+        ((src_height - arg_->hog_params.block_height) / arg_->hog_params.block_stride + 1) *
+        ((arg_->hog_params.block_width * arg_->hog_params.block_height) / (arg_->hog_params.cell_width * arg_->hog_params.cell_height)) * bins_num };
     vx_tensor magnitudes;
     vx_tensor bins;
     vx_tensor features;
 
     ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
     ASSERT_VX_OBJECT(magnitudes = vxCreateTensor(context, 2, mag_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
-    ASSERT_VX_OBJECT(bins = vxCreateTensor(context, 3, bins_dims, VX_TYPE_INT8, 0), VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT(bins = vxCreateTensor(context, 3, bins_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
     ASSERT_VX_OBJECT(features = vxCreateTensor(context, 3, features_dims, VX_TYPE_INT16, 8), VX_TYPE_TENSOR);
 
     VX_CALL(vxuHOGCells(context, src_image, cell_width, cell_width, bins_num, magnitudes, bins));
     VX_CALL(vxuHOGFeatures(context, src_image, magnitudes, bins, &arg_->hog_params, 1, features));
     ASSERT_NO_FAILURE(status = hogfeatures_ref(src, arg_->hog_params, features));
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, status);
 
     VX_CALL(vxReleaseImage(&src_image));
     VX_CALL(vxReleaseTensor(&magnitudes));
@@ -582,3 +604,5 @@
                testNodeCreation,
                testGraphProcessing,
                testImmediateProcessing)
+
+#endif //OPENVX_USE_ENHANCED_VISION
diff --git a/test_conformance/test_houghlinesp.c b/test_conformance/test_houghlinesp.c
index 0d13bf0..3a3f663 100644
--- a/test_conformance/test_houghlinesp.c
+++ b/test_conformance/test_houghlinesp.c
@@ -15,6 +15,8 @@
  * limitations under the License.
 */
 
+#ifdef OPENVX_USE_ENHANCED_VISION
+
 #include "test_engine/test.h"
 
 #include <stdint.h>
@@ -36,7 +38,7 @@
     vx_array lines_array = 0;
     vx_scalar num_lines = 0;
     vx_uint32 numlines = 0;
-    vx_hough_lines_p_t param_hough_lines = {1, 180, 50, 50, 10, M_PI, 0};
+    vx_hough_lines_p_t param_hough_lines = {1, M_PI/180, 50, 50, 10, M_PI, 0};
 
     vx_graph graph = 0;
     vx_node node = 0;
@@ -50,7 +52,7 @@
 
     ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
     ASSERT_VX_OBJECT(node = vxHoughLinesPNode(graph, input, &param_hough_lines, lines_array, num_lines), VX_TYPE_NODE);
-    
+
     VX_CALL(vxVerifyGraph(graph));
     VX_CALL(vxProcessGraph(graph));
 
@@ -67,14 +69,31 @@
     ASSERT(input == 0);
 }
 
-static CT_Image hough_lines_read_image(const char *fileName, int width, int height)
+static CT_Image hough_lines_read_image(const char *fileName, int width, int height, vx_df_image format)
 {
-    CT_Image image = NULL;
+    CT_Image image_load = NULL, image_ret = NULL;
     ASSERT_(return 0, width == 0 && height == 0);
-    image = ct_read_image(fileName, 1);
-    ASSERT_(return 0, image);
-    ASSERT_(return 0, image->format == VX_DF_IMAGE_U8);
-    return image;
+    ASSERT_(return 0, format == VX_DF_IMAGE_U1 || format == VX_DF_IMAGE_U8);
+
+    image_load = ct_read_image(fileName, 1);
+    ASSERT_(return 0, image_load);
+    ASSERT_(return 0, image_load->format == VX_DF_IMAGE_U8);
+
+    if (format == VX_DF_IMAGE_U1)
+    {
+        ASSERT_NO_FAILURE_(return 0, threshold_U8_ct_image(image_load, 127));   // Threshold to make the U1 image less trivial
+        ASSERT_NO_FAILURE_(return 0, image_ret = ct_allocate_image(image_load->width, image_load->height, VX_DF_IMAGE_U1));
+        ASSERT_NO_FAILURE_(return 0, U8_ct_image_to_U1_ct_image(image_load, image_ret));
+    }
+    else
+    {
+        image_ret = image_load;
+    }
+
+    ASSERT_(return 0, image_ret);
+    ASSERT_(return 0, image_ret->format == format);
+
+    return image_ret;
 }
 
 static vx_bool similar_lines(vx_line2d_t act, vx_line2d_t exp, vx_float32 eps)
@@ -115,6 +134,7 @@
         return vx_false_e;
     }
 }
+
 static vx_status countLine2dIntersection(const vx_line2d_t *expect_lines, const vx_line2d_t *actual_lines, vx_int32 exp_lines_num, vx_int32 actual_lines_num, vx_float32 eps)
 {
     vx_status status = VX_FAILURE;
@@ -169,14 +189,13 @@
     vxMapArrayRange(lines_array, 0, lines_array_length, &lines_array_map_id, &lines_array_stride, &lines_array_ptr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, VX_NOGAP_X);
     vx_line2d_t *lines_array_p = (vx_line2d_t *)lines_array_ptr;
     vx_line2d_t *exp_lines = 0;
-    ASSERT_(return VX_FAILURE, exp_lines = ct_alloc_mem(sizeof(vx_line2d_t) * MAX_NUM_EXP_LINES));
+    ASSERT_(return VX_FAILURE, exp_lines = (vx_line2d_t *)ct_alloc_mem(sizeof(vx_line2d_t) * MAX_NUM_EXP_LINES));
 
     vx_int32 id = 0;
-    char * pos = buf;
+    char * pos = (char *)buf;
     char * next = 0;
     while (pos && (next = strchr(pos, '\n')))
     {
-        
         vx_float32 x1, y1, x2, y2;
         vx_int32 line_id;
 
@@ -197,18 +216,25 @@
 
 typedef struct {
     const char* testName;
-    CT_Image(*generator)(const char* fileName, int width, int height);
+    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
     const char* fileName;
     vx_hough_lines_p_t param_hough_lines;
     const char* result_filename;
+    vx_df_image format;
 } Arg;
 
 #define PARAMETERS \
-    ARG("case1_1_180_50_50_10_HoughLines", hough_lines_read_image, "hough_lines.bmp", {1, M_PI/180, 50, 50, 10, M_PI, 0}, "hough_lines_1_180_50_50_10.txt"), \
-    ARG("case1_1_170_40_40_10_HoughLines", hough_lines_read_image, "hough_lines.bmp", {1, M_PI/170, 40, 40, 10, M_PI, 0}, "hough_lines_1_170_40_40_10.txt"), \
-    ARG("case1_1_180_40_40_9_HoughLines", hough_lines_read_image, "hough_lines.bmp", {1, M_PI/180, 40, 40, 9, M_PI, 0}, "hough_lines_1_180_40_40_9.txt"), \
-    ARG("case1_2_180_50_50_9_HoughLines", hough_lines_read_image, "hough_lines.bmp", {2, M_PI/180, 50, 50, 9, M_PI, 0}, "hough_lines_2_180_50_50_9.txt"), \
-    ARG("case1_1_190_40_40_10_HoughLines", hough_lines_read_image, "hough_lines.bmp", {1, M_PI/190, 40, 40, 10, M_PI, 0}, "hough_lines_1_190_40_40_10.txt"), \
+    ARG("case1_1_180_50_50_10_HoughLines", hough_lines_read_image, "hough_lines.bmp", {1, M_PI/180, 50, 50, 10, M_PI, 0}, "hough_lines_1_180_50_50_10.txt", VX_DF_IMAGE_U8), \
+    ARG("case1_1_170_40_40_10_HoughLines", hough_lines_read_image, "hough_lines.bmp", {1, M_PI/170, 40, 40, 10, M_PI, 0}, "hough_lines_1_170_40_40_10.txt", VX_DF_IMAGE_U8), \
+    ARG("case1_1_180_40_40_9_HoughLines",  hough_lines_read_image, "hough_lines.bmp", {1, M_PI/180, 40, 40, 9,  M_PI, 0}, "hough_lines_1_180_40_40_9.txt",  VX_DF_IMAGE_U8), \
+    ARG("case1_2_180_50_50_9_HoughLines",  hough_lines_read_image, "hough_lines.bmp", {2, M_PI/180, 50, 50, 9,  M_PI, 0}, "hough_lines_2_180_50_50_9.txt",  VX_DF_IMAGE_U8), \
+    ARG("case1_1_190_40_40_10_HoughLines", hough_lines_read_image, "hough_lines.bmp", {1, M_PI/190, 40, 40, 10, M_PI, 0}, "hough_lines_1_190_40_40_10.txt", VX_DF_IMAGE_U8), \
+    \
+    ARG("_U1_/case1_1_180_50_50_10_HoughLines", hough_lines_read_image, "hough_lines.bmp", {1, M_PI/180, 50, 50, 10, M_PI, 0}, "hough_lines_1_180_50_50_10.txt", VX_DF_IMAGE_U1), \
+    ARG("_U1_/case1_1_170_40_40_10_HoughLines", hough_lines_read_image, "hough_lines.bmp", {1, M_PI/170, 40, 40, 10, M_PI, 0}, "hough_lines_1_170_40_40_10.txt", VX_DF_IMAGE_U1), \
+    ARG("_U1_/case1_1_180_40_40_9_HoughLines",  hough_lines_read_image, "hough_lines.bmp", {1, M_PI/180, 40, 40, 9,  M_PI, 0}, "hough_lines_1_180_40_40_9.txt",  VX_DF_IMAGE_U1), \
+    ARG("_U1_/case1_2_180_50_50_9_HoughLines",  hough_lines_read_image, "hough_lines.bmp", {2, M_PI/180, 50, 50, 9,  M_PI, 0}, "hough_lines_2_180_50_50_9.txt",  VX_DF_IMAGE_U1), \
+    ARG("_U1_/case1_1_190_40_40_10_HoughLines", hough_lines_read_image, "hough_lines.bmp", {1, M_PI/190, 40, 40, 10, M_PI, 0}, "hough_lines_1_190_40_40_10.txt", VX_DF_IMAGE_U1) \
 
 TEST_WITH_ARG(Houghlinesp, testGraphProcessing, Arg,
     PARAMETERS
@@ -228,8 +254,8 @@
     vx_uint32 src_width;
     vx_uint32 src_height;
 
-    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, 0, 0));
-    
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, 0, 0, arg_->format));
+
     src_width = src->width;
     src_height = src->height;
 
@@ -279,7 +305,7 @@
     vx_uint32 src_width;
     vx_uint32 src_height;
 
-    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, 0, 0));
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, 0, 0, arg_->format));
 
     src_width = src->width;
     src_height = src->height;
@@ -302,7 +328,80 @@
     ASSERT(src_image == 0);
 }
 
-TESTCASE_TESTS(Houghlinesp, 
-               testNodeCreation, 
-               testGraphProcessing, 
-               testImmediateProcessing)
+typedef struct {
+    const char* testName;
+    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
+    const char* fileName;
+    vx_hough_lines_p_t param_hough_lines;
+    const char* result_filename;
+    vx_df_image format;
+    vx_rectangle_t region_shift;
+} ValidRegionTest_Arg;
+
+#define REGION_PARAMETERS \
+    ARG("case1_1_180_50_50_10_HoughLines_RegionShrink=1", hough_lines_read_image, "hough_lines.bmp", {1, M_PI/180, 50, 50, 10, M_PI, 0}, "hough_lines_1_180_50_50_10.txt", VX_DF_IMAGE_U8, {1, 1, -1, -1}), \
+    ARG("case1_2_180_50_50_9_HoughLines_RegionShrink=1",  hough_lines_read_image, "hough_lines.bmp", {2, M_PI/180, 50, 50, 9,  M_PI, 0}, "hough_lines_2_180_50_50_9.txt",  VX_DF_IMAGE_U8, {1, 1, -1, -1}), \
+    ARG("case1_1_180_50_50_10_HoughLines_RegionShrink=7", hough_lines_read_image, "hough_lines.bmp", {1, M_PI/180, 50, 50, 10, M_PI, 0}, "hough_lines_1_180_50_50_10.txt", VX_DF_IMAGE_U8, {7, 7, -7, -7}), \
+    ARG("case1_2_180_50_50_9_HoughLines_RegionShrink=7",  hough_lines_read_image, "hough_lines.bmp", {2, M_PI/180, 50, 50, 9,  M_PI, 0}, "hough_lines_2_180_50_50_9.txt",  VX_DF_IMAGE_U8, {7, 7, -7, -7}), \
+    \
+    ARG("_U1_/case1_1_180_50_50_10_HoughLines_RegionShrink=1", hough_lines_read_image, "hough_lines.bmp", {1, M_PI/180, 50, 50, 10, M_PI, 0}, "hough_lines_1_180_50_50_10.txt", VX_DF_IMAGE_U1, {1, 1, -1, -1}), \
+    ARG("_U1_/case1_2_180_50_50_9_HoughLines_RegionShrink=1",  hough_lines_read_image, "hough_lines.bmp", {2, M_PI/180, 50, 50, 9,  M_PI, 0}, "hough_lines_2_180_50_50_9.txt",  VX_DF_IMAGE_U1, {1, 1, -1, -1}), \
+    ARG("_U1_/case1_1_180_50_50_10_HoughLines_RegionShrink=7", hough_lines_read_image, "hough_lines.bmp", {1, M_PI/180, 50, 50, 10, M_PI, 0}, "hough_lines_1_180_50_50_10.txt", VX_DF_IMAGE_U1, {7, 7, -7, -7}), \
+    ARG("_U1_/case1_2_180_50_50_9_HoughLines_RegionShrink=7",  hough_lines_read_image, "hough_lines.bmp", {2, M_PI/180, 50, 50, 9,  M_PI, 0}, "hough_lines_2_180_50_50_9.txt",  VX_DF_IMAGE_U1, {7, 7, -7, -7}) \
+
+// For small valid region shrinks (like the ones in these tests) the already existing reference output line lists
+// still apply because the objects in hough_lines.bmp are fairly centered in the image and far from the edges
+TEST_WITH_ARG(Houghlinesp, testWithValidRegion, ValidRegionTest_Arg,
+    REGION_PARAMETERS
+)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src_image = 0;
+
+    CT_Image src = NULL;
+    vx_uint32 src_width;
+    vx_uint32 src_height;
+
+    vx_array lines_array = 0;
+    vx_scalar num_lines = 0;
+    vx_uint32 numlines = 0;
+
+    vx_hough_lines_p_t param_lines_p = arg_->param_hough_lines;
+    vx_status status;
+
+    vx_rectangle_t rect = {0, 0, 0, 0}, rect_shft = arg_->region_shift;
+
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, 0, 0, arg_->format));
+
+    src_width = src->width;
+    src_height = src->height;
+
+    ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(lines_array = vxCreateArray(context, VX_TYPE_LINE_2D, src_width * src_height * sizeof(vx_line2d_t)), VX_TYPE_ARRAY);
+    ASSERT_VX_OBJECT(num_lines = vxCreateScalar(context, VX_TYPE_SIZE, &numlines), VX_TYPE_SCALAR);
+
+    ASSERT_NO_FAILURE(vxGetValidRegionImage(src_image, &rect));
+    ALTERRECTANGLE(rect, rect_shft.start_x, rect_shft.start_y, rect_shft.end_x, rect_shft.end_y);
+    ASSERT_NO_FAILURE(vxSetImageValidRectangle(src_image, &rect));
+
+    VX_CALL(vxuHoughLinesP(context, src_image, &param_lines_p, lines_array, num_lines));
+
+    ASSERT_NO_FAILURE(status = houghlinesp_check(lines_array, num_lines, arg_->result_filename));
+    ASSERT(status == VX_SUCCESS);
+
+    VX_CALL(vxReleaseArray(&lines_array));
+    VX_CALL(vxReleaseScalar(&num_lines));
+    VX_CALL(vxReleaseImage(&src_image));
+
+    ASSERT(lines_array == 0);
+    ASSERT(num_lines == 0);
+    ASSERT(src_image == 0);
+}
+
+TESTCASE_TESTS(Houghlinesp,
+               testNodeCreation,
+               testGraphProcessing,
+               testImmediateProcessing,
+               testWithValidRegion)
+
+#endif //OPENVX_USE_ENHANCED_VISION
diff --git a/test_conformance/test_integral.c b/test_conformance/test_integral.c
index 3c587f4..179eb15 100644
--- a/test_conformance/test_integral.c
+++ b/test_conformance/test_integral.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -192,3 +194,5 @@
 }
 
 TESTCASE_TESTS(Integral, testNodeCreation, testGraphProcessing, testImmediateProcessing)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_laplacianpyramid.c b/test_conformance/test_laplacianpyramid.c
index 1130214..a97e282 100644
--- a/test_conformance/test_laplacianpyramid.c
+++ b/test_conformance/test_laplacianpyramid.c
@@ -15,19 +15,19 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include <math.h>
 #include <string.h>
 #include <VX/vx.h>
 #include <VX/vxu.h>
 
 #include "test_engine/test.h"
-#include "shared_functions.h"
 
 #define VX_GAUSSIAN_PYRAMID_TOLERANCE 1
 
 TESTCASE(LaplacianPyramid, CT_VXContext, ct_setup_vx_context, 0)
 
-
 TEST(LaplacianPyramid, testNodeCreation)
 {
     vx_context context = context_->vx_context_;
@@ -416,6 +416,9 @@
     vx_status status = VX_SUCCESS;
     vx_int32 low_x, low_y, high_x, high_y;
 
+    vx_map_id src_map_id;
+    vx_map_id dst_map_id;
+
     status |= vxQueryImage(src, VX_IMAGE_FORMAT, &src_format, sizeof(src_format));
     status |= vxQueryImage(dst, VX_IMAGE_FORMAT, &dst_format, sizeof(dst_format));
     status |= vxQueryConvolution(conv, VX_CONVOLUTION_COLUMNS, &conv_width, sizeof(conv_width));
@@ -425,8 +428,10 @@
     conv_radius_y = (vx_int32)conv_height / 2;
     status |= vxCopyConvolutionCoefficients(conv, conv_mat, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
     status |= vxGetValidRegionImage(src, &rect);
-    status |= vxAccessImagePatch(src, &rect, 0, &src_addr, &src_base, VX_READ_ONLY);
-    status |= vxAccessImagePatch(dst, &rect, 0, &dst_addr, &dst_base, VX_WRITE_ONLY);
+    status |= vxMapImagePatch(src, &rect, 0, &src_map_id, &src_addr, (void **)&src_base,
+                              VX_READ_ONLY, VX_MEMORY_TYPE_HOST, 0);
+    status |= vxMapImagePatch(dst, &rect, 0, &dst_map_id, &dst_addr, (void **)&dst_base,
+                              VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0);
 
     low_x = 0;
     high_x = src_addr.dim_x;
@@ -515,8 +520,8 @@
         }
     }
 
-    status |= vxCommitImagePatch(src, NULL, 0, &src_addr, src_base);
-    status |= vxCommitImagePatch(dst, &rect, 0, &dst_addr, dst_base);
+    status |= vxUnmapImagePatch(src, src_map_id);
+    status |= vxUnmapImagePatch(dst, dst_map_id);
 
     return status;
 }
@@ -636,7 +641,7 @@
     vx_convolution conv = 0;
 
     border.mode = VX_BORDER_REPLICATE;
-    
+
     VX_CALL(vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, &border, sizeof(border)));
 
     VX_CALL(vxQueryPyramid(laplacian, VX_PYRAMID_LEVELS, &levels, sizeof(levels)));
@@ -862,9 +867,9 @@
     }
 
     own_laplacian_pyramid_reference(context, border, src, ref_pyr, ref_dst);
-    
+
     border.mode = VX_BORDER_REPLICATE;
-    
+
     VX_CALL(vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, &border, sizeof(border)));
     VX_CALL(vxuLaplacianPyramid(context, src, tst_pyr, tst_dst));
 
@@ -1198,3 +1203,5 @@
     testGraphProcessing,
     testImmediateProcessing
 )
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_lbp.c b/test_conformance/test_lbp.c
index fcf1f6a..62b6bc7 100644
--- a/test_conformance/test_lbp.c
+++ b/test_conformance/test_lbp.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#ifdef OPENVX_USE_ENHANCED_VISION
+
 #include <math.h>
 #include <float.h>
 #include <string.h>
@@ -158,3 +160,5 @@
 }
 
 TESTCASE_TESTS(LBP, testNodeCreation, testGraphProcessing, testImmediateProcessing)
+
+#endif //OPENVX_USE_ENHANCED_VISION
diff --git a/test_conformance/test_logging.c b/test_conformance/test_logging.c
index 67a32b8..148d237 100644
--- a/test_conformance/test_logging.c
+++ b/test_conformance/test_logging.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -31,42 +31,42 @@
 
 TEST(Logging, Cummulative)
 {
-    vx_image image = 0;
+    vx_graph graph = 0;
     vx_context context = vxCreateContext();
 
     ASSERT_VX_OBJECT(context, VX_TYPE_CONTEXT);
     CT_RegisterForGarbageCollection(context, ct_destroy_vx_context, CT_GC_OBJECT);
-    ASSERT_VX_OBJECT(image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
 
     // normall logging
     vxRegisterLogCallback(context, test_log_callback, vx_false_e);
     log_callback_is_called = vx_false_e;
-    vxAddLogEntry((vx_reference)image, VX_FAILURE, "hello world", 1, 2, 3);
+    vxAddLogEntry((vx_reference)graph, VX_FAILURE, "hello world", 1, 2, 3);
     ASSERT(log_callback_is_called);
 
     // clear callback
     vxRegisterLogCallback(context, NULL, vx_true_e);
     log_callback_is_called = vx_false_e;
-    vxAddLogEntry((vx_reference)image, VX_FAILURE, "hello world", 4, 5, 6);
+    vxAddLogEntry((vx_reference)graph, VX_FAILURE, "hello world", 4, 5, 6);
     ASSERT(!log_callback_is_called);
 
     // restore callback
     vxRegisterLogCallback(context, test_log_callback, vx_true_e);
 
     // disable logs for image
-    VX_CALL(vxDirective((vx_reference)image, VX_DIRECTIVE_DISABLE_LOGGING));
+    VX_CALL(vxDirective((vx_reference)graph, VX_DIRECTIVE_DISABLE_LOGGING));
     log_callback_is_called = vx_false_e;
-    vxAddLogEntry((vx_reference)image, VX_FAILURE, "hello world", 4, 5, 6);
+    vxAddLogEntry((vx_reference)graph, VX_FAILURE, "hello world", 4, 5, 6);
     ASSERT(!log_callback_is_called);
 
     // turn on logs once again
-    VX_CALL(vxDirective((vx_reference)image, VX_DIRECTIVE_ENABLE_LOGGING));
+    VX_CALL(vxDirective((vx_reference)graph, VX_DIRECTIVE_ENABLE_LOGGING));
     log_callback_is_called = vx_false_e;
-    vxAddLogEntry((vx_reference)image, VX_FAILURE, "%*s", VX_MAX_LOG_MESSAGE_LEN + 20, ""); // 20 symbols longer string than limit
+    vxAddLogEntry((vx_reference)graph, VX_FAILURE, "%*s", VX_MAX_LOG_MESSAGE_LEN + 20, ""); // 20 symbols longer string than limit
     ASSERT(log_callback_is_called);
 
-    VX_CALL(vxReleaseImage(&image));
-    ASSERT(image == 0);
+    VX_CALL(vxReleaseGraph(&graph));
+    ASSERT(graph == 0);
 }
 
 
diff --git a/test_conformance/test_lut.c b/test_conformance/test_lut.c
index 364d5d4..9965c3b 100644
--- a/test_conformance/test_lut.c
+++ b/test_conformance/test_lut.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -637,3 +639,5 @@
                testImmediateProcessing,
                test_vxCopyLUT,
                test_vxMapLUTWrite)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_magnitude.c b/test_conformance/test_magnitude.c
index 2dfaace..5d9c7f1 100644
--- a/test_conformance/test_magnitude.c
+++ b/test_conformance/test_magnitude.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 
 #include <VX/vx.h>
@@ -209,3 +211,5 @@
 }
 
 TESTCASE_TESTS(Magnitude, testOnRandom)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_main.h b/test_conformance/test_main.h
index b9aaf88..1b0e8fa 100644
--- a/test_conformance/test_main.h
+++ b/test_conformance/test_main.h
@@ -15,6 +15,13 @@
  * limitations under the License.
  */
 
+/* Base Feature Set Tests (for all conformance feature sets) */
+TESTCASE(GraphBase)
+TESTCASE(Logging)
+TESTCASE(SmokeTestBase)
+TESTCASE(TargetBase)
+
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
 TESTCASE(Graph)
 TESTCASE(GraphCallback)
 TESTCASE(GraphDelay)
@@ -23,26 +30,34 @@
 TESTCASE(Array)
 TESTCASE(ObjectArray)
 TESTCASE(Image)
-
 TESTCASE(vxCreateImageFromChannel)
-
 TESTCASE(vxCopyImagePatch)
 TESTCASE(vxMapImagePatch)
+TESTCASE(Distribution)
 
+TESTCASE(vxCopyRemapPatch)
+TESTCASE(vxMapRemapPatch)
+
+TESTCASE(UserNode)
+TESTCASE(SmokeTest)
+TESTCASE(Target)
+TESTCASE(Convolution)
+TESTCASE(Matrix)
 TESTCASE(vxuConvertDepth)
 TESTCASE(vxConvertDepth)
-
 TESTCASE(ChannelCombine)
 TESTCASE(ChannelExtract)
-
 TESTCASE(ColorConvert)
-TESTCASE(Distribution)
 TESTCASE(vxuAddSub)
 TESTCASE(vxAddSub)
-
 TESTCASE(vxuNot)
 TESTCASE(vxNot)
 
+#ifdef OPENVX_USE_U1
+TESTCASE(vxuBinOp1u)
+TESTCASE(vxBinOp1u)
+#endif
+
 TESTCASE(vxuBinOp8u)
 TESTCASE(vxBinOp8u)
 
@@ -51,96 +66,99 @@
 
 TESTCASE(vxuMultiply)
 TESTCASE(vxMultiply)
-
 TESTCASE(Histogram)
 TESTCASE(EqualizeHistogram)
 TESTCASE(MeanStdDev)
 TESTCASE(MinMaxLoc)
-TESTCASE(Min)
-TESTCASE(Max)
-TESTCASE(Threshold)
 
+TESTCASE(WeightedAverage)
+TESTCASE(Threshold)
 TESTCASE(Box3x3)
 TESTCASE(Convolve)
 TESTCASE(Dilate3x3)
 TESTCASE(Erode3x3)
+
 TESTCASE(Gaussian3x3)
 TESTCASE(Median3x3)
 TESTCASE(Sobel3x3)
 TESTCASE(NonLinearFilter)
-
-TESTCASE(Accumulate)
-TESTCASE(AccumulateSquare)
-TESTCASE(AccumulateWeighted)
-
 TESTCASE(Integral)
-TESTCASE(LUT)
 
 TESTCASE(Magnitude)
 TESTCASE(Phase)
-
 TESTCASE(FastCorners)
 TESTCASE(HarrisCorners)
-
 TESTCASE(Scale)
 TESTCASE(WarpAffine)
 TESTCASE(WarpPerspective)
 TESTCASE(Remap)
-TESTCASE(vxCopyRemapPatch)
-TESTCASE(vxMapRemapPatch)
-
+TESTCASE(Scalar)
 
 TESTCASE(GaussianPyramid)
 TESTCASE(HalfScaleGaussian)
-
 TESTCASE(LaplacianPyramid)
 TESTCASE(LaplacianReconstruct)
-
 TESTCASE(vxuCanny)
 TESTCASE(vxCanny)
 TESTCASE(OptFlowPyrLK)
+TESTCASE(LUT)
+#endif
 
-TESTCASE(UserNode)
-
-TESTCASE(Logging)
-TESTCASE(SmokeTest)
-
-TESTCASE(Scalar)
-
-TESTCASE(Target)
-
-TESTCASE(Convolution)
-TESTCASE(Matrix)
-
+#ifdef OPENVX_USE_ENHANCED_VISION
+TESTCASE(GraphEnhanced)
+TESTCASE(GraphDelayTensor)
+TESTCASE(Min)
+TESTCASE(Max)
 TESTCASE(Nonmaxsuppression)
-
 TESTCASE(TensorOp)
-
 TESTCASE(LBP)
-
 TESTCASE(BilateralFilter)
-
 TESTCASE(MatchTemplate)
-
 TESTCASE(Houghlinesp)
-
 TESTCASE(Copy)
-
 TESTCASE(HogCells)
 TESTCASE(HogFeatures)
-
 TESTCASE(ControlFlow)
+TESTCASE(TensorEnhanced)
+#endif
+
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_NEURAL_NETWORKS || OPENVX_CONFORMANCE_NNEF_IMPORT
+TESTCASE(Tensor)
+#endif
+
+#if defined OPENVX_CONFORMANCE_NEURAL_NETWORKS || OPENVX_CONFORMANCE_NNEF_IMPORT
+TESTCASE(VxKernelOfNNAndNNEF)
+TESTCASE(VxParameterOfNNAndNNEF)
+TESTCASE(UserKernelsOfNNAndNNEF)
+TESTCASE(MetaFormatOfNNAndNNEF)
+#endif
 
 #ifdef OPENVX_USE_IX
 TESTCASE(ExtensionObject)
 #endif
 
+#ifdef OPENVX_CONFORMANCE_NEURAL_NETWORKS
 #ifdef OPENVX_USE_NN
 TESTCASE(TensorNN)
 #endif
-
 #ifdef OPENVX_USE_NN_16
 TESTCASE(TensorNetworks)
 #endif
+#endif
 
+#ifdef OPENVX_CONFORMANCE_NNEF_IMPORT
+TESTCASE(TensorNNEFImport)
+#endif
+
+#ifdef OPENVX_USE_PIPELINING
+TESTCASE(GraphPipeline)
+#endif
+
+#ifdef OPENVX_USE_STREAMING
+TESTCASE(GraphStreaming)
+#endif
+
+#ifdef OPENVX_USE_USER_DATA_OBJECT
+TESTCASE(UserDataObject)
+#endif
 
diff --git a/test_conformance/test_matchtemplate.c b/test_conformance/test_matchtemplate.c
index 9f35040..fd7aa4a 100644
--- a/test_conformance/test_matchtemplate.c
+++ b/test_conformance/test_matchtemplate.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#ifdef OPENVX_USE_ENHANCED_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -219,3 +221,5 @@
     ASSERT(vx_result_image == 0);}
 
 TESTCASE_TESTS(MatchTemplate, testNodeCreation, testGraphProcessing, testImmediateProcessing)
+
+#endif //OPENVX_USE_ENHANCED_VISION
diff --git a/test_conformance/test_matrix.c b/test_conformance/test_matrix.c
index ade4a82..f29d39e 100644
--- a/test_conformance/test_matrix.c
+++ b/test_conformance/test_matrix.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -208,3 +210,6 @@
 
 
 TESTCASE_TESTS(Matrix, test_vxCreateMatrix, test_vxCreateVirtualMatrix, test_vxCreateMatrixFromPatternAndOrigin, test_vxCopyMatrix, test_vxQueryMatrix)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
diff --git a/test_conformance/test_max.c b/test_conformance/test_max.c
index 8c1318e..d409d9a 100644
--- a/test_conformance/test_max.c
+++ b/test_conformance/test_max.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#ifdef OPENVX_USE_ENHANCED_VISION
+
 #include <string.h>
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -180,3 +182,5 @@
 
 }
 TESTCASE_TESTS(Max, testvxMax)
+
+#endif //OPENVX_USE_ENHANCED_VISION
diff --git a/test_conformance/test_meanstddev.c b/test_conformance/test_meanstddev.c
index bbf2c2b..3a25bc4 100644
--- a/test_conformance/test_meanstddev.c
+++ b/test_conformance/test_meanstddev.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include <math.h>
 #include <stdlib.h>
 #include <limits.h>
@@ -26,47 +28,59 @@
 
 static void reference_mean_stddev(CT_Image src, vx_float32* _mean, vx_float32* _stddev)
 {
-    uint32_t x, y, width = src ? src->width : 0, height = src ? src->height : 0;
-    uint32_t npix, stride;
-    double sum = 0, sqsum = 0;
-    int format = src ? src->format : VX_DF_IMAGE_U8;
-
     ASSERT(src);
     ASSERT(src->width > 0 && src->height > 0);
-    npix = width*height;
+
+    uint32_t x, y, width = src->width, height = src->height, shift_x_u1;
+    uint32_t npix, stride;
+    double sum = 0, sqsum = 0;
+    int format = src->format;
+
+    npix = width * height;
     stride = ct_stride_bytes(src);
+    shift_x_u1 = src->roi.x % 8;    // Bit shift for U1 images with ROIs
 
 #define CASE_MEANSTDDEV(format, type, acctype) \
     case format: \
-    { \
-        acctype s = 0, s2 = 0; \
-        for( y = 0; y < src->height; y++ ) \
         { \
-            const type* ptr = (const type*)(src->data.y + stride*y); \
-            for( x = 0; x < src->width; x++ ) \
+            acctype s = 0, s2 = 0; \
+            for ( y = 0; y < height; y++ ) \
             { \
-                type val = ptr[x]; \
-                s += val; \
-                s2 += (acctype)val*val; \
+                const type* ptr = (const type*)(src->data.y + stride * y); \
+                for ( x = 0; x < width; x++ ) \
+                { \
+                    type val; \
+                    if (format == VX_DF_IMAGE_U1) \
+                    { \
+                        uint32_t x_shiftd = x + shift_x_u1; \
+                        val = (ptr[x_shiftd / 8] & (1 << (x_shiftd % 8))) >> (x_shiftd % 8); \
+                    } \
+                    else \
+                    { \
+                        val =  ptr[x]; \
+                    } \
+                    s += val; \
+                    s2 += (acctype)val * val; \
+                } \
             } \
+            sum = (double)s; \
+            sqsum = (double)s2; \
         } \
-        sum = (double)s; sqsum = (double)s2; \
-    } \
-    break
+        break
 
-    switch(format)
+    switch (format)
     {
+    CASE_MEANSTDDEV(VX_DF_IMAGE_U1, uint8_t, uint64_t);
     CASE_MEANSTDDEV(VX_DF_IMAGE_U8, uint8_t, uint64_t);
     default:
         FAIL("Unsupported image format: (%d)", &src->format);
     }
 
-    *_mean = (vx_float32)(sum/npix);
-    sqsum = sqsum/npix - (sum/npix)*(sum/npix);
+    *_mean = (vx_float32)(sum / npix);
+    sqsum = (sqsum / npix) - (sum / npix) * (sum / npix);
     *_stddev = (vx_float32)sqrt(CT_MAX(sqsum, 0.));
 }
 
-
 TESTCASE(MeanStdDev, CT_VXContext, ct_setup_vx_context, 0)
 
 typedef struct {
@@ -75,13 +89,14 @@
     vx_df_image format;
 } format_arg;
 
-
-#define MEANSTDDEV_TEST_CASE(imm, tp) \
-    {#imm "/" #tp, CT_##imm##_MODE, VX_DF_IMAGE_##tp}
+#define MEANSTDDEV_TEST_CASE_U8(imm)    {        #imm, CT_##imm##_MODE, VX_DF_IMAGE_U8}
+#define MEANSTDDEV_TEST_CASE_U1(imm)    {"_U1_/" #imm, CT_##imm##_MODE, VX_DF_IMAGE_U1}
 
 TEST_WITH_ARG(MeanStdDev, testOnRandom, format_arg,
-              MEANSTDDEV_TEST_CASE(Immediate, U8),
-              MEANSTDDEV_TEST_CASE(Graph, U8),
+              MEANSTDDEV_TEST_CASE_U8(Immediate),
+              MEANSTDDEV_TEST_CASE_U8(Graph),
+              MEANSTDDEV_TEST_CASE_U1(Immediate),
+              MEANSTDDEV_TEST_CASE_U1(Graph),
               )
 {
     double mean_tolerance = 1e-4;
@@ -97,7 +112,7 @@
     int iter, niters = 100;
     uint64_t rng;
     vx_float32 mean0 = 0.f, stddev0 = 0.f, mean = 0.f, stddev = 0.f;
-    int a = 0, b = 256;
+    int a = 0, b = (format == VX_DF_IMAGE_U1) ? 2 : 256;
 
     rng = CT()->seed_;
     mean_tolerance *= b;
@@ -110,9 +125,9 @@
 
     for( iter = 0; iter < niters; iter++ )
     {
+        double mean_diff, stddev_diff;
         int width = ct_roundf(ct_log_rng(&rng, 0, 10));
         int height = ct_roundf(ct_log_rng(&rng, 0, 10));
-        double mean_diff, stddev_diff;
         width = CT_MAX(width, 1);
         height = CT_MAX(height, 1);
 
@@ -122,6 +137,11 @@
             height = CT_MIN((height + 7) & -8, 480);
         }
 
+        if (format == VX_DF_IMAGE_U1)
+        {
+            width = ((width + 7) / 8) * 8;      // Width must be multiple of 8 for U1 images
+        }
+
         ct_update_progress(iter, niters);
 
         src0 = ct_allocate_ct_image_random(width, height, format, &rng, a, b);
@@ -175,4 +195,106 @@
     VX_CALL(vxReleaseScalar(&stddev_s));
 }
 
-TESTCASE_TESTS(MeanStdDev, testOnRandom)
+typedef struct {
+    const char* name;
+    int mode;
+    vx_df_image format;
+    vx_rectangle_t region_shift;
+} format_region_arg;
+
+
+#define MEANSTDDEV_REGION_TEST_CASE_U8(imm, shrink) \
+    {        #imm "/RegionShrink=" #shrink, CT_##imm##_MODE, VX_DF_IMAGE_U8, {shrink, shrink, -shrink, -shrink}}
+#define MEANSTDDEV_REGION_TEST_CASE_U1(imm, shrink) \
+    {"_U1_/" #imm "/RegionShrink=" #shrink, CT_##imm##_MODE, VX_DF_IMAGE_U1, {shrink, shrink, -shrink, -shrink}}
+
+TEST_WITH_ARG(MeanStdDev, testOnRandomWithValidRegion, format_region_arg,
+              MEANSTDDEV_REGION_TEST_CASE_U8(Immediate, 1),
+              MEANSTDDEV_REGION_TEST_CASE_U8(Immediate, 7),
+              MEANSTDDEV_REGION_TEST_CASE_U1(Immediate, 1),
+              MEANSTDDEV_REGION_TEST_CASE_U1(Immediate, 7),
+              )
+{
+    double mean_tolerance = 1e-4;
+    double stddev_tolerance = 1e-4;
+    int format = arg_->format;
+    vx_image src;
+    CT_Image src0;
+    vx_scalar mean_s, stddev_s;
+    vx_context context = context_->vx_context_;
+    int iter, niters = 100;
+    uint64_t rng;
+    vx_float32 mean0 = 0.f, stddev0 = 0.f, mean = 0.f, stddev = 0.f;
+    int a = 0, b = (format == VX_DF_IMAGE_U1) ? 2 : 256;
+    vx_rectangle_t rect = {0, 0, 0, 0}, rect_shft = arg_->region_shift;
+
+    rng = CT()->seed_;
+    mean_tolerance *= b;
+    stddev_tolerance *= b;
+
+    mean_s = vxCreateScalar(context, VX_TYPE_FLOAT32, &mean);
+    ASSERT_VX_OBJECT(mean_s, VX_TYPE_SCALAR);
+    stddev_s = vxCreateScalar(context, VX_TYPE_FLOAT32, &stddev);
+    ASSERT_VX_OBJECT(stddev_s, VX_TYPE_SCALAR);
+
+    for( iter = 0; iter < niters; iter++ )
+    {
+        double mean_diff, stddev_diff;
+        int width  = ct_roundf(ct_log_rng(&rng, 0, 10));
+        int height = ct_roundf(ct_log_rng(&rng, 0, 10));
+        width  = CT_MAX(width, 15);             // Max region shrink is 7 on each side -> minimum size is 15
+        height = CT_MAX(height, 15);
+
+        if( !ct_check_any_size() )
+        {
+            width  = CT_MIN((width + 7) & -8, 640);
+            height = CT_MIN((height + 7) & -8, 480);
+        }
+
+        if (format == VX_DF_IMAGE_U1)
+        {
+            width = ((width + 7) / 8) * 8;      // Width must be multiple of 8 for U1 images
+        }
+
+        ct_update_progress(iter, niters);
+
+        ASSERT_NO_FAILURE(src0 = ct_allocate_ct_image_random(width, height, format, &rng, a, b));
+        ASSERT_VX_OBJECT(src = ct_image_to_vx_image(src0, context), VX_TYPE_IMAGE);
+
+        ASSERT_NO_FAILURE(ct_adjust_roi(src0, rect_shft.start_x, rect_shft.start_y, -rect_shft.end_x, -rect_shft.end_y));
+        reference_mean_stddev(src0, &mean0, &stddev0);
+
+        ASSERT_NO_FAILURE(vxGetValidRegionImage(src, &rect));
+        ALTERRECTANGLE(rect, rect_shft.start_x, rect_shft.start_y, rect_shft.end_x, rect_shft.end_y);
+        ASSERT_NO_FAILURE(vxSetImageValidRectangle(src, &rect));
+
+        ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxuMeanStdDev(context, src, &mean, &stddev));
+
+        mean_diff = fabs(mean - mean0);
+        stddev_diff = fabs(stddev - stddev0);
+
+        if( mean_diff > mean_tolerance ||
+            stddev_diff > stddev_tolerance )
+        {
+            CT_RecordFailureAtFormat("Test case %d. width=%d, height=%d,\n"
+                                     "\tExpected: mean=%.5g, stddev=%.5g\n"
+                                     "\tActual:   mean=%.5g (diff=%.5g %s %.5g), stddev=%.5f (diff=%.5g %s %.5g)\n",
+                                     __FUNCTION__, __FILE__, __LINE__,
+                                     iter, width, height,
+                                     mean0, stddev0,
+                                     mean, mean_diff, mean_diff > mean_tolerance ? ">" : "<=", mean_tolerance,
+                                     stddev, stddev_diff, stddev_diff > stddev_tolerance ? ">" : "<=", stddev_tolerance);
+            break;
+        }
+
+        VX_CALL(vxReleaseImage(&src));
+        CT_CollectGarbage(CT_GC_IMAGE);
+    }
+
+    VX_CALL(vxReleaseScalar(&mean_s));
+    VX_CALL(vxReleaseScalar(&stddev_s));
+}
+
+TESTCASE_TESTS(MeanStdDev, testOnRandom, testOnRandomWithValidRegion)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_median3x3.c b/test_conformance/test_median3x3.c
index 4b577c9..f4ada07 100644
--- a/test_conformance/test_median3x3.c
+++ b/test_conformance/test_median3x3.c
@@ -15,14 +15,14 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
 
-
 TESTCASE(Median3x3, CT_VXContext, ct_setup_vx_context, 0)
 
-
 TEST(Median3x3, testNodeCreation)
 {
     vx_context context = context_->vx_context_;
@@ -49,27 +49,47 @@
     ASSERT(src_image == 0);
 }
 
-
 // Generate input to cover these requirements:
 // There should be a image with randomly generated pixel intensities.
-static CT_Image median3x3_generate_random(const char* fileName, int width, int height)
+static CT_Image median3x3_generate_random(const char* fileName, int width, int height, vx_df_image format)
 {
     CT_Image image;
 
-    ASSERT_NO_FAILURE_(return 0,
-            image = ct_allocate_ct_image_random(width, height, VX_DF_IMAGE_U8, &CT()->seed_, 0, 256));
+    ASSERT_(return 0, format == VX_DF_IMAGE_U1 || format == VX_DF_IMAGE_U8);
+
+    if (format == VX_DF_IMAGE_U1)
+        ASSERT_NO_FAILURE_(return 0, image = ct_allocate_ct_image_random(width, height, format, &CT()->seed_, 0, 2));
+    else
+        ASSERT_NO_FAILURE_(return 0, image = ct_allocate_ct_image_random(width, height, format, &CT()->seed_, 0, 256));
 
     return image;
 }
 
-static CT_Image median3x3_read_image(const char* fileName, int width, int height)
+static CT_Image median3x3_read_image(const char* fileName, int width, int height, vx_df_image format)
 {
-    CT_Image image = NULL;
+    CT_Image image_load = NULL, image_ret = NULL;
     ASSERT_(return 0, width == 0 && height == 0);
-    image = ct_read_image(fileName, 1);
-    ASSERT_(return 0, image);
-    ASSERT_(return 0, image->format == VX_DF_IMAGE_U8);
-    return image;
+    ASSERT_(return 0, format == VX_DF_IMAGE_U1 || format == VX_DF_IMAGE_U8);
+
+    image_load = ct_read_image(fileName, 1);
+    ASSERT_(return 0, image_load);
+    ASSERT_(return 0, image_load->format == VX_DF_IMAGE_U8);
+
+    if (format == VX_DF_IMAGE_U1)
+    {
+        ASSERT_NO_FAILURE_(return 0, threshold_U8_ct_image(image_load, 127));   // Threshold to make the U1 image less trivial
+        ASSERT_NO_FAILURE_(return 0, image_ret = ct_allocate_image(image_load->width, image_load->height, VX_DF_IMAGE_U1));
+        ASSERT_NO_FAILURE_(return 0, U8_ct_image_to_U1_ct_image(image_load, image_ret));
+    }
+    else
+    {
+        image_ret = image_load;
+    }
+
+    ASSERT_(return 0, image_ret);
+    ASSERT_(return 0, image_ret->format == format);
+
+    return image_ret;
 }
 
 static int compare_for_median_get(const void * a, const void * b)
@@ -77,7 +97,18 @@
     return *(int*)a - *(int*)b;
 }
 
-static int32_t median_get(int32_t *values)
+static int32_t median_get_U1(int32_t values[9][2])
+{
+    int i;
+    int32_t v_acc = 0;
+    for (i = 0; i < 9; i++)     // Find median value by counting number of pixels == 1 and checking if sum > 4
+    {
+        v_acc += (values[i][0] & (1 << (values[i][1] % 8))) >> (values[i][1] % 8);
+    }
+    return (v_acc > 4) ? 1 : 0;
+}
+
+static int32_t median_get_U8(int32_t *values)
 {
     qsort(values, 9, sizeof(values[0]), compare_for_median_get);
     return values[4];
@@ -85,54 +116,109 @@
 
 static uint8_t median3x3_calculate(CT_Image src, uint32_t x, uint32_t y)
 {
-    int32_t values[9] = {
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y + 0),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y + 0),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y + 0),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y - 1),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y - 1),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y - 1),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y + 1),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y + 1),
-        (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y + 1)
-    };
-    return (uint8_t)median_get(values);
+    if (src->format == VX_DF_IMAGE_U1)
+    {
+        int32_t values[9][2] = {
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 0, y + 0), (int32_t)x + 0},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x - 1, y + 0), (int32_t)x - 1},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 1, y + 0), (int32_t)x + 1},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 0, y - 1), (int32_t)x + 0},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x - 1, y - 1), (int32_t)x - 1},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 1, y - 1), (int32_t)x + 1},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 0, y + 1), (int32_t)x + 0},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x - 1, y + 1), (int32_t)x - 1},
+            {(int32_t)*CT_IMAGE_DATA_PTR_1U(src, x + 1, y + 1), (int32_t)x + 1}
+        };
+        return (uint8_t)median_get_U1(values);
+    }
+    else
+    {
+        int32_t values[9] = {
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y + 0),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y + 0),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y + 0),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y - 1),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y - 1),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y - 1),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 0, y + 1),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x - 1, y + 1),
+            (int32_t)*CT_IMAGE_DATA_PTR_8U(src, x + 1, y + 1)
+        };
+        return (uint8_t)median_get_U8(values);
+    }
 }
 
 static uint8_t median3x3_calculate_replicate(CT_Image src, uint32_t x_, uint32_t y_)
 {
     int32_t x = (int)x_;
     int32_t y = (int)y_;
-    int32_t values[9] = {
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y + 0),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y + 0),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y + 0),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y - 1),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y - 1),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y - 1),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y + 1),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y + 1),
-        (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y + 1)
-    };
-    return (uint8_t)median_get(values);
+    if (src->format == VX_DF_IMAGE_U1)
+    {
+        int32_t values[9] = {
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 0, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x - 1, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 1, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 0, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x - 1, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 1, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 0, y + 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x - 1, y + 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_1U(src, x + 1, y + 1)
+        };
+        return (uint8_t)median_get_U8(values);
+    }
+    else
+    {
+        int32_t values[9] = {
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y + 0),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y - 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 0, y + 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x - 1, y + 1),
+            (int32_t)CT_IMAGE_DATA_REPLICATE_8U(src, x + 1, y + 1)
+        };
+        return (uint8_t)median_get_U8(values);
+    }
 }
 
 static uint8_t median3x3_calculate_constant(CT_Image src, uint32_t x_, uint32_t y_, vx_uint32 constant_value)
 {
     int32_t x = (int)x_;
     int32_t y = (int)y_;
-    int32_t values[9] = {
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y + 0, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y + 0, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y + 0, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y - 1, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y - 1, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y - 1, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y + 1, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y + 1, constant_value),
-        (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y + 1, constant_value)
-    };
-    return (uint8_t)median_get(values);
+    if (src->format == VX_DF_IMAGE_U1)
+    {
+        vx_bool const_val_bool = (constant_value == 0) ? vx_false_e : vx_true_e;
+        int32_t values[9] = {
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 0, y + 0, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x - 1, y + 0, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 1, y + 0, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 0, y - 1, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x - 1, y - 1, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 1, y - 1, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 0, y + 1, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x - 1, y + 1, const_val_bool),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_1U(src, x + 1, y + 1, const_val_bool)
+        };
+        return (uint8_t)median_get_U8(values);
+    }
+    else
+    {
+        int32_t values[9] = {
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y + 0, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y + 0, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y + 0, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y - 1, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y - 1, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y - 1, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 0, y + 1, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x - 1, y + 1, constant_value),
+            (int32_t)CT_IMAGE_DATA_CONSTANT_8U(src, x + 1, y + 1, constant_value)
+        };
+        return (uint8_t)median_get_U8(values);
+    }
 }
 
 
@@ -140,35 +226,72 @@
 {
     CT_Image dst;
 
-    CT_ASSERT_(return NULL, src->format == VX_DF_IMAGE_U8);
+    CT_ASSERT_(return NULL, src->format == VX_DF_IMAGE_U1 || src->format == VX_DF_IMAGE_U8);
 
     dst = ct_allocate_image(src->width, src->height, src->format);
 
     if (border.mode == VX_BORDER_UNDEFINED)
     {
-        CT_FILL_IMAGE_8U(return 0, dst,
-                if (x >= 1 && y >= 1 && x < src->width - 1 && y < src->height - 1)
-                {
-                    uint8_t res = median3x3_calculate(src, x, y);
-                    *dst_data = res;
-                });
+        if (src->format == VX_DF_IMAGE_U1)
+        {
+            CT_FILL_IMAGE_1U(return 0, dst,
+                    if (x >= 1 && y >= 1 && x < src->width - 1 && y < src->height - 1)
+                    {
+                        uint32_t xShftdSrc = x + src->roi.x % 8;
+                        uint8_t res = median3x3_calculate(src, xShftdSrc, y);
+                        *dst_data = (*dst_data & ~(1 << offset)) | (res << offset);
+                    });
+        }
+        else
+        {
+            CT_FILL_IMAGE_8U(return 0, dst,
+                    if (x >= 1 && y >= 1 && x < src->width - 1 && y < src->height - 1)
+                    {
+                        uint8_t res = median3x3_calculate(src, x, y);
+                        *dst_data = res;
+                    });
+        }
     }
     else if (border.mode == VX_BORDER_REPLICATE)
     {
-        CT_FILL_IMAGE_8U(return 0, dst,
-                {
-                    uint8_t res = median3x3_calculate_replicate(src, x, y);
-                    *dst_data = res;
-                });
+        if (src->format == VX_DF_IMAGE_U1)
+        {
+            CT_FILL_IMAGE_1U(return 0, dst,
+                    {
+                        uint32_t xShftdSrc = x + src->roi.x % 8;
+                        uint8_t res = median3x3_calculate_replicate(src, xShftdSrc, y);
+                        *dst_data = (*dst_data & ~(1 << offset)) | (res << offset);
+                    });
+        }
+        else
+        {
+            CT_FILL_IMAGE_8U(return 0, dst,
+                    {
+                        uint8_t res = median3x3_calculate_replicate(src, x, y);
+                        *dst_data = res;
+                    });
+        }
     }
     else if (border.mode == VX_BORDER_CONSTANT)
     {
         vx_uint32 constant_value = border.constant_value.U32;
-        CT_FILL_IMAGE_8U(return 0, dst,
-                {
-                    uint8_t res = median3x3_calculate_constant(src, x, y, constant_value);
-                    *dst_data = res;
-                });
+        if (src->format == VX_DF_IMAGE_U1)
+        {
+            CT_FILL_IMAGE_1U(return 0, dst,
+                    {
+                        uint32_t xShftdSrc = x + src->roi.x % 8;
+                        uint8_t res = median3x3_calculate_constant(src, xShftdSrc, y, constant_value);
+                        *dst_data = (*dst_data & ~(1 << offset)) | (res << offset);
+                    });
+        }
+        else
+        {
+            CT_FILL_IMAGE_8U(return 0, dst,
+                    {
+                        uint8_t res = median3x3_calculate_constant(src, x, y, constant_value);
+                        *dst_data = res;
+                    });
+        }
     }
     else
     {
@@ -210,15 +333,18 @@
 
 typedef struct {
     const char* testName;
-    CT_Image (*generator)(const char* fileName, int width, int height);
+    CT_Image (*generator)(const char* fileName, int width, int height, vx_df_image format);
     const char* fileName;
     vx_border_t border;
     int width, height;
+    vx_df_image format;
 } Filter_Arg;
 
 #define MEDIAN_PARAMETERS \
-    CT_GENERATE_PARAMETERS("randomInput", ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_SMALL_SET, ARG, median3x3_generate_random, NULL), \
-    CT_GENERATE_PARAMETERS("lena", ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_NONE, ARG, median3x3_read_image, "lena.bmp")
+    CT_GENERATE_PARAMETERS("randomInput", ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ARG, median3x3_generate_random, NULL), \
+    CT_GENERATE_PARAMETERS("lena", ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_NONE, ADD_TYPE_U8, ARG, median3x3_read_image, "lena.bmp"), \
+    CT_GENERATE_PARAMETERS("_U1_/randomInput", ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ARG, median3x3_generate_random, NULL), \
+    CT_GENERATE_PARAMETERS("_U1_/lena", ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_NONE, ADD_TYPE_U1, ARG, median3x3_read_image, "lena.bmp")
 
 TEST_WITH_ARG(Median3x3, testGraphProcessing, Filter_Arg,
     MEDIAN_PARAMETERS
@@ -232,7 +358,7 @@
     CT_Image src = NULL, dst = NULL;
     vx_border_t border = arg_->border;
 
-    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height));
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
 
     ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
 
@@ -274,7 +400,7 @@
     CT_Image src = NULL, dst = NULL;
     vx_border_t border = arg_->border;
 
-    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height));
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
 
     ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
 
@@ -295,4 +421,60 @@
     ASSERT(src_image == 0);
 }
 
-TESTCASE_TESTS(Median3x3, testNodeCreation, testGraphProcessing, testImmediateProcessing)
+typedef struct {
+    const char* testName;
+    CT_Image (*generator)(const char* fileName, int width, int height, vx_df_image format);
+    const char* fileName;
+    vx_border_t border;
+    int width, height;
+    vx_df_image format;
+    vx_rectangle_t regionShift;
+} ValidRegionTest_Arg;
+
+#ifdef MEDIAN_PARAMETERS
+#undef MEDIAN_PARAMETERS
+#endif
+#define MEDIAN_PARAMETERS \
+    CT_GENERATE_PARAMETERS("lena", ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VALID_REGION_SHRINKS, ARG, median3x3_read_image, "lena.bmp"), \
+    CT_GENERATE_PARAMETERS("_U1_/lena", ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VALID_REGION_SHRINKS, ARG, median3x3_read_image, "lena.bmp")
+
+TEST_WITH_ARG(Median3x3, testWithValidRegion, ValidRegionTest_Arg,
+    MEDIAN_PARAMETERS
+)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src_image = 0, dst_image = 0;
+
+    CT_Image src = NULL, dst = NULL;
+    vx_border_t border = arg_->border;
+    vx_rectangle_t rect = {0, 0, 0, 0}, rect_shft = arg_->regionShift;
+
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
+
+    ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst_image = ct_create_similar_image(src_image), VX_TYPE_IMAGE);
+
+    ASSERT_NO_FAILURE(vxGetValidRegionImage(src_image, &rect));
+    ALTERRECTANGLE(rect, rect_shft.start_x, rect_shft.start_y, rect_shft.end_x, rect_shft.end_y);
+    ASSERT_NO_FAILURE(vxSetImageValidRectangle(src_image, &rect));
+
+    VX_CALL(vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, &border, sizeof(border)));
+
+    VX_CALL(vxuMedian3x3(context, src_image, dst_image));
+
+    ASSERT_NO_FAILURE(dst = ct_image_from_vx_image(dst_image));
+    ASSERT_NO_FAILURE(ct_adjust_roi(dst, rect_shft.start_x, rect_shft.start_y, -rect_shft.end_x, -rect_shft.end_y));
+
+    ASSERT_NO_FAILURE(ct_adjust_roi(src, rect_shft.start_x, rect_shft.start_y, -rect_shft.end_x, -rect_shft.end_y));
+    ASSERT_NO_FAILURE(median3x3_check(src, dst, border));
+
+    VX_CALL(vxReleaseImage(&dst_image));
+    VX_CALL(vxReleaseImage(&src_image));
+
+    ASSERT(dst_image == 0);
+    ASSERT(src_image == 0);
+}
+
+TESTCASE_TESTS(Median3x3, testNodeCreation, testGraphProcessing, testImmediateProcessing, testWithValidRegion)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_min.c b/test_conformance/test_min.c
index 024a5f9..082297e 100644
--- a/test_conformance/test_min.c
+++ b/test_conformance/test_min.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#ifdef OPENVX_USE_ENHANCED_VISION
+
 #include <string.h>
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -182,3 +184,5 @@
 
 }
 TESTCASE_TESTS(Min, testvxMin)
+
+#endif //OPENVX_USE_ENHANCED_VISION
diff --git a/test_conformance/test_minmaxloc.c b/test_conformance/test_minmaxloc.c
index 08be9d9..2f43f27 100644
--- a/test_conformance/test_minmaxloc.c
+++ b/test_conformance/test_minmaxloc.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 
 #include <VX/vx.h>
@@ -333,3 +335,6 @@
 }
 
 TESTCASE_TESTS(MinMaxLoc, testOnRandom)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
diff --git a/test_conformance/test_multiply.c b/test_conformance/test_multiply.c
index 16f74bc..c050a49 100644
--- a/test_conformance/test_multiply.c
+++ b/test_conformance/test_multiply.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 
 #include <VX/vx.h>
@@ -504,3 +506,5 @@
 
 TESTCASE_TESTS(vxuMultiply, DISABLED_testNegativeFormat, DISABLED_testNegativeSizes,                testFuzzy)
 TESTCASE_TESTS(vxMultiply,  DISABLED_testNegativeFormat, DISABLED_testNegativeSizes, testInference, testFuzzy)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_nnef_import.c b/test_conformance/test_nnef_import.c
new file mode 100644
index 0000000..eb27dd7
--- /dev/null
+++ b/test_conformance/test_nnef_import.c
@@ -0,0 +1,57 @@
+/*
+
+* Copyright (c) 2017-2017 The Khronos Group Inc.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#ifdef OPENVX_CONFORMANCE_NNEF_IMPORT
+
+#include <VX/vx_khr_import_kernel.h>
+
+#include "test_engine/test.h"
+
+typedef struct {
+    const char* name;
+    char  *type;
+    char  *url;
+} nnef_import_arg;
+
+#define NNEF_IMPORT_PARAMETERS \
+    ARG("importkernel", "vx_kernel", "./kernel.img") \
+
+
+TESTCASE(TensorNNEFImport, CT_VXContext, ct_setup_vx_context, 0)
+
+TEST_WITH_ARG(TensorNNEFImport, testNNEFImport, nnef_import_arg,
+    NNEF_IMPORT_PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_char *type = arg_->type;
+    vx_char *url = arg_->url;
+    vx_kernel kernel = NULL;
+    vx_status status = VX_SUCCESS;
+
+    kernel = vxImportKernelFromURL(context, type, url);
+
+    status = vxGetStatus((vx_reference)kernel);
+
+    if(VX_SUCCESS == status)
+    {
+        VX_CALL(vxReleaseKernel(&kernel));
+        ASSERT(kernel == 0);
+    }
+}
+
+TESTCASE_TESTS(TensorNNEFImport, testNNEFImport)
+#endif
diff --git a/test_conformance/test_nonlinearfilter.c b/test_conformance/test_nonlinearfilter.c
index d5ad0cc..8243710 100644
--- a/test_conformance/test_nonlinearfilter.c
+++ b/test_conformance/test_nonlinearfilter.c
@@ -15,15 +15,14 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include <VX/vx.h>
 #include <VX/vxu.h>
-
 #include "test_engine/test.h"
-#include "shared_functions.h"
 
 TESTCASE(NonLinearFilter, CT_VXContext, ct_setup_vx_context, 0)
 
-
 #define MASK_SIZE_MAX (5)
 
 #ifndef MIN
@@ -63,12 +62,16 @@
     ASSERT(src_image == 0);
 }
 
-static CT_Image generate_random(const char* fileName, int width, int height)
+static CT_Image generate_random(const char* fileName, int width, int height, vx_df_image format)
 {
     CT_Image image;
 
-    ASSERT_NO_FAILURE_(return 0,
-        image = ct_allocate_ct_image_random(width, height, VX_DF_IMAGE_U8, &CT()->seed_, 0, 256));
+    ASSERT_(return 0, format == VX_DF_IMAGE_U1 || format == VX_DF_IMAGE_U8);
+
+    if (format == VX_DF_IMAGE_U1)
+        ASSERT_NO_FAILURE_(return 0, image = ct_allocate_ct_image_random(width, height, format, &CT()->seed_, 0, 2));
+    else
+        ASSERT_NO_FAILURE_(return 0, image = ct_allocate_ct_image_random(width, height, format, &CT()->seed_, 0, 256));
 
     return image;
 }
@@ -85,11 +88,13 @@
         return -1;
 }
 
-static void filter_calculate(vx_enum function, CT_Image src, vx_coordinates2d_t* origin, vx_int32 cols, vx_int32 rows, vx_uint8* mask, vx_border_t* border, int32_t x, int32_t y, uint8_t *data)
+static uint8_t filter_calculate(vx_enum function, CT_Image src, vx_coordinates2d_t* origin, vx_int32 cols, vx_int32 rows, vx_uint8* mask, vx_border_t* border, int32_t x, int32_t y, uint32_t shift_x_u1)
 {
     vx_uint8 values[MASK_SIZE_MAX * MASK_SIZE_MAX];
+    vx_uint8 res_val = 0;
 
     vx_int32 i, j, ci, cj, m = 0, v = 0;
+    vx_int32 x_start = (vx_int32)shift_x_u1;    // Bit-shift offset for U1 images, always 0 for other image formats
     vx_int32 cx = origin->x;
     vx_int32 cy = origin->y;
 
@@ -99,10 +104,17 @@
         {
             if (mask[m])
             {
-                ci = MAX(0, MIN(i, (vx_int32)src->width - 1));
+                ci = MAX(x_start, MIN(i, (vx_int32)src->width - 1 + x_start));
                 cj = MAX(0, MIN(j, (vx_int32)src->height - 1));
 
-                values[v++] = (border->mode == VX_BORDER_CONSTANT && (i != ci || j != cj)) ? border->constant_value.U8 : *CT_IMAGE_DATA_PTR_8U(src, ci, cj);
+                if (src->format == VX_DF_IMAGE_U1)
+                    values[v++] = (border->mode == VX_BORDER_CONSTANT && (i != ci || j != cj))
+                                    ?  border->constant_value.U1 ? 1 : 0
+                                    : (*CT_IMAGE_DATA_PTR_1U(src, ci, cj) & (1 << (ci % 8))) >> (ci % 8);
+                else
+                    values[v++] = (border->mode == VX_BORDER_CONSTANT && (i != ci || j != cj))
+                                    ? border->constant_value.U8
+                                    : *CT_IMAGE_DATA_PTR_8U(src, ci, cj);
             }
         }
     }
@@ -111,20 +123,23 @@
 
     switch (function)
     {
-    case VX_NONLINEAR_FILTER_MIN: *data = values[0]; break; /* minimal value */
-    case VX_NONLINEAR_FILTER_MAX: *data = values[v - 1]; break; /* maximum value */
-    case VX_NONLINEAR_FILTER_MEDIAN: *data = values[v / 2]; break; /* pick the middle value */
+    case VX_NONLINEAR_FILTER_MIN:    res_val = values[0];     break; /* minimal value */
+    case VX_NONLINEAR_FILTER_MAX:    res_val = values[v - 1]; break; /* maximum value */
+    case VX_NONLINEAR_FILTER_MEDIAN: res_val = values[v / 2]; break; /* pick the middle value */
     }
+
+    return res_val;
 }
 
 void filter_create_reference_image(vx_enum function, CT_Image src, vx_coordinates2d_t* origin, vx_size cols, vx_size rows, vx_uint8* mask, CT_Image* pdst, vx_border_t* border)
 {
     CT_Image dst = NULL;
 
-    CT_ASSERT(src->format == VX_DF_IMAGE_U8);
+    CT_ASSERT(src->format == VX_DF_IMAGE_U1 || src->format == VX_DF_IMAGE_U8);
 
-    dst = ct_allocate_image(src->width, src->height, VX_DF_IMAGE_U8);
+    dst = ct_allocate_image(src->width, src->height, src->format);
 
+    vx_uint32 shift_x_u1 = (src->format == VX_DF_IMAGE_U1) ? src->roi.x % 8 : 0;
     if (border->mode == VX_BORDER_UNDEFINED)
     {
         vx_uint32 left = origin->x;
@@ -132,16 +147,41 @@
         vx_uint32 right = (vx_uint32)(cols - origin->x - 1);
         vx_uint32 bottom = (vx_uint32)(rows - origin->y - 1);
 
-        CT_FILL_IMAGE_8U(return, dst,
-            if (x >= left && y >= top && x < src->width - right && y < src->height - bottom)
-                filter_calculate(function, src, origin, (vx_int32)cols, (vx_int32)rows, mask, border, x, y, dst_data);
-        );
+        if (src->format == VX_DF_IMAGE_U1)
+        {
+            CT_FILL_IMAGE_1U(return, dst,
+                if (x >= left && y >= top && x < src->width - right && y < src->height - bottom)
+                {
+                    uint32_t xShftdSrc = x + shift_x_u1;
+                    uint8_t res = filter_calculate(function, src, origin, (vx_int32)cols, (vx_int32)rows, mask, border, xShftdSrc, y, shift_x_u1);
+                    *dst_data = (*dst_data & ~(1 << offset)) | (res << offset);
+                });
+        }
+        else
+        {
+            CT_FILL_IMAGE_8U(return, dst,
+                if (x >= left && y >= top && x < src->width - right && y < src->height - bottom)
+                    *dst_data = filter_calculate(function, src, origin, (vx_int32)cols, (vx_int32)rows, mask, border, x, y, 0);
+            );
+        }
     }
     else
     {
-        CT_FILL_IMAGE_8U(return, dst,
-            filter_calculate(function, src, origin, (vx_int32)cols, (vx_int32)rows, mask, border, x, y, dst_data);
-        );
+        if (src->format == VX_DF_IMAGE_U1)
+        {
+            CT_FILL_IMAGE_1U(return, dst,
+                {
+                    uint32_t xShftdSrc = x + shift_x_u1;
+                    uint8_t res = filter_calculate(function, src, origin, (vx_int32)cols, (vx_int32)rows, mask, border, xShftdSrc, y, shift_x_u1);
+                    *dst_data = (*dst_data & ~(1 << offset)) | (res << offset);
+                });
+        }
+        else
+        {
+            CT_FILL_IMAGE_8U(return, dst,
+                *dst_data = filter_calculate(function, src, origin, (vx_int32)cols, (vx_int32)rows, mask, border, x, y, 0);
+            );
+        }
     }
 
     *pdst = dst;
@@ -211,7 +251,7 @@
 
     EXPECT_EQ_CTIMAGE(dst_ref, dst);
 
-#if 0
+#if 1
     if (CT_HasFailure())
     {
         printf("=== SRC ===\n");
@@ -220,6 +260,9 @@
         ct_dump_image_info(dst);
         printf("=== EXPECTED ===\n");
         ct_dump_image_info(dst_ref);
+        ct_write_image("nlf_src.bmp",  src);
+        ct_write_image("nlf_calc.bmp", dst);
+        ct_write_image("nlf_ref.bmp",  dst_ref);
     }
 #endif
 }
@@ -227,13 +270,14 @@
 
 typedef struct {
     const char* testName;
-    CT_Image(*generator)(const char* fileName, int width, int height);
+    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
     const char* fileName;
     vx_size mask_size;
     vx_enum function;
     vx_enum pattern;
     vx_border_t border;
     int width, height;
+    vx_df_image format;
 } Filter_Arg;
 
 
@@ -252,8 +296,10 @@
     CT_EXPAND(nextmacro(testArgName "/VX_PATTERN_CROSS", __VA_ARGS__, VX_PATTERN_CROSS))
 
 #define FILTER_PARAMETERS \
-    CT_GENERATE_PARAMETERS("randomInput/mask=3x3", ADD_FUNCTIONS, ADD_PATTERNS_BOX_CROSS, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_SMALL_SET, ARG, generate_random, NULL, 3), \
-    CT_GENERATE_PARAMETERS("randomInput/mask=5x5", ADD_FUNCTIONS, ADD_PATTERNS_BOX_CROSS_DISK, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_SMALL_SET, ARG, generate_random, NULL, 5)
+    CT_GENERATE_PARAMETERS("randomInput/mask=3x3", ADD_FUNCTIONS, ADD_PATTERNS_BOX_CROSS, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ARG, generate_random, NULL, 3), \
+    CT_GENERATE_PARAMETERS("randomInput/mask=5x5", ADD_FUNCTIONS, ADD_PATTERNS_BOX_CROSS_DISK, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ARG, generate_random, NULL, 5), \
+    CT_GENERATE_PARAMETERS("_U1_/randomInput/mask=3x3", ADD_FUNCTIONS, ADD_PATTERNS_BOX_CROSS, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ARG, generate_random, NULL, 3), \
+    CT_GENERATE_PARAMETERS("_U1_/randomInput/mask=5x5", ADD_FUNCTIONS, ADD_PATTERNS_BOX_CROSS_DISK, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ARG, generate_random, NULL, 5)
 
 
 TEST_WITH_ARG(NonLinearFilter, testGraphProcessing, Filter_Arg,
@@ -270,7 +316,7 @@
     CT_Image src = NULL, dst = NULL;
     vx_border_t border = arg_->border;
 
-    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height));
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
 
     ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
 
@@ -324,7 +370,7 @@
     CT_Image src = NULL, dst = NULL;
     vx_border_t border = arg_->border;
 
-    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height));
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
 
     ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
 
@@ -353,7 +399,7 @@
     ASSERT(src_image == 0);
 }
 
-TEST_WITH_ARG(NonLinearFilter, testGraphProcessingWithNondefaultOrginMatrix, Filter_Arg,
+TEST_WITH_ARG(NonLinearFilter, testGraphProcessingWithNondefaultOriginMatrix, Filter_Arg,
     FILTER_PARAMETERS
     )
 {
@@ -367,7 +413,7 @@
     CT_Image src = NULL, dst = NULL;
     vx_border_t border = arg_->border;
 
-    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height));
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
 
     ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
 
@@ -409,4 +455,75 @@
     ASSERT(dst_image == 0);
     ASSERT(src_image == 0);
 }
-TESTCASE_TESTS(NonLinearFilter, testNodeCreation, testGraphProcessing, testImmediateProcessing, testGraphProcessingWithNondefaultOrginMatrix)
+
+typedef struct {
+    const char* testName;
+    CT_Image (*generator)(const char* fileName, int width, int height, vx_df_image format);
+    const char* fileName;
+    vx_size mask_size;
+    vx_enum function;
+    vx_enum pattern;
+    vx_border_t border;
+    int width, height;
+    vx_df_image format;
+    vx_rectangle_t regionShift;
+} ValidRegionTest_Arg;
+
+#ifdef FILTER_PARAMETERS
+#undef FILTER_PARAMETERS
+#endif
+#define FILTER_PARAMETERS \
+    CT_GENERATE_PARAMETERS("randomInput/mask=3x3", ADD_FUNCTIONS, ADD_PATTERNS_BOX_CROSS, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_256x256, ADD_TYPE_U8, ADD_VALID_REGION_SHRINKS, ARG, generate_random, NULL, 3), \
+    CT_GENERATE_PARAMETERS("randomInput/mask=5x5", ADD_FUNCTIONS, ADD_PATTERNS_BOX_CROSS, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_256x256, ADD_TYPE_U8, ADD_VALID_REGION_SHRINKS, ARG, generate_random, NULL, 5), \
+    CT_GENERATE_PARAMETERS("_U1_/randomInput/mask=3x3", ADD_FUNCTIONS, ADD_PATTERNS_BOX_CROSS, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_256x256, ADD_TYPE_U1, ADD_VALID_REGION_SHRINKS, ARG, generate_random, NULL, 3), \
+    CT_GENERATE_PARAMETERS("_U1_/randomInput/mask=5x5", ADD_FUNCTIONS, ADD_PATTERNS_BOX_CROSS, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_SIZE_256x256, ADD_TYPE_U1, ADD_VALID_REGION_SHRINKS, ARG, generate_random, NULL, 5)
+
+TEST_WITH_ARG(NonLinearFilter, testWithValidRegion, ValidRegionTest_Arg,
+    FILTER_PARAMETERS
+)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src_image = 0, dst_image = 0;
+    vx_matrix mask = 0;
+    vx_enum pattern = 0;
+
+    CT_Image src = NULL, dst = NULL;
+    vx_border_t border = arg_->border;
+    vx_rectangle_t rect = {0, 0, 0, 0}, rect_shft = arg_->regionShift;
+
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
+
+    ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst_image = ct_create_similar_image(src_image), VX_TYPE_IMAGE);
+
+    ASSERT_NO_FAILURE(vxGetValidRegionImage(src_image, &rect));
+    ALTERRECTANGLE(rect, rect_shft.start_x, rect_shft.start_y, rect_shft.end_x, rect_shft.end_y);
+    ASSERT_NO_FAILURE(vxSetImageValidRectangle(src_image, &rect));
+
+    ASSERT_VX_OBJECT(mask = vxCreateMatrixFromPattern(context, arg_->pattern, arg_->mask_size, arg_->mask_size),
+                     VX_TYPE_MATRIX);
+    VX_CALL(vxQueryMatrix(mask, VX_MATRIX_PATTERN, &pattern, sizeof(pattern)));
+    ASSERT_EQ_INT(arg_->pattern, pattern);
+
+    VX_CALL(vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, &border, sizeof(border)));
+
+    VX_CALL(vxuNonLinearFilter(context, arg_->function, src_image, mask, dst_image));
+
+    ASSERT_NO_FAILURE(dst = ct_image_from_vx_image(dst_image));
+    ASSERT_NO_FAILURE(ct_adjust_roi(dst, rect_shft.start_x, rect_shft.start_y, -rect_shft.end_x, -rect_shft.end_y));
+
+    ASSERT_NO_FAILURE(ct_adjust_roi(src, rect_shft.start_x, rect_shft.start_y, -rect_shft.end_x, -rect_shft.end_y));
+    ASSERT_NO_FAILURE(filter_check(arg_->function, src, mask, dst, &border));
+
+    VX_CALL(vxReleaseMatrix(&mask));
+    VX_CALL(vxReleaseImage(&dst_image));
+    VX_CALL(vxReleaseImage(&src_image));
+
+    ASSERT(mask == 0);
+    ASSERT(dst_image == 0);
+    ASSERT(src_image == 0);
+}
+
+TESTCASE_TESTS(NonLinearFilter, testNodeCreation, testGraphProcessing, testImmediateProcessing, testGraphProcessingWithNondefaultOriginMatrix, testWithValidRegion)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_nonmaxsuppression.c b/test_conformance/test_nonmaxsuppression.c
index 6d4d26f..91fed90 100644
--- a/test_conformance/test_nonmaxsuppression.c
+++ b/test_conformance/test_nonmaxsuppression.c
@@ -15,6 +15,7 @@
  * limitations under the License.
 */
 
+#ifdef OPENVX_USE_ENHANCED_VISION
 
 #include "test_engine/test.h"
 
@@ -31,7 +32,7 @@
     vx_image mask = 0;
     vx_image output = 0;
     vx_uint32 src_width, src_height;
-    
+
     vx_int32 wsize = 3;
 
     vx_graph graph = 0;
@@ -72,6 +73,7 @@
 
     return image;
 }
+
 static CT_Image nonmaxsuppression_read_image(const char* fileName, int width, int height)
 {
     CT_Image image = NULL;
@@ -81,48 +83,65 @@
     ASSERT_(return 0, image->format == VX_DF_IMAGE_U8);
     return image;
 }
+
 static CT_Image nonmax_golden(vx_image input, vx_image mask, vx_int32 wsize)
 {
     vx_status status = VX_FAILURE;
-    vx_int32 height, width;
+    vx_int32 rect_start_x, rect_start_y, rect_width, rect_height;
+    vx_uint32 full_width, full_height;
     vx_uint8 mask_data = 0;
+    vx_df_image format = 0, mask_format = 0;
 
-    void *src_base = NULL;
-    void *mask_base = NULL;
-    void *dst_base = NULL;
+    status  = vxQueryImage(input, VX_IMAGE_WIDTH,  &full_width,  sizeof(full_width));
+    status |= vxQueryImage(input, VX_IMAGE_HEIGHT, &full_height, sizeof(full_height));
+    status |= vxQueryImage(input, VX_IMAGE_FORMAT, &format, sizeof(format));
 
     vx_imagepatch_addressing_t src_addr = VX_IMAGEPATCH_ADDR_INIT;
     vx_imagepatch_addressing_t mask_addr = VX_IMAGEPATCH_ADDR_INIT;
-    vx_rectangle_t src_rect, mask_rect;
+    vx_rectangle_t src_rect, mask_rect, full_rect = {0, 0, full_width, full_height};
     vx_map_id src_map_id = 0;
     vx_map_id mask_map_id = 0;
 
-    status = vxGetValidRegionImage(input, &src_rect);
-    status |= vxMapImagePatch(input, &src_rect, 0, &src_map_id, &src_addr, &src_base, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, 0);
+    void *src_base  = NULL;
+    void *mask_base = NULL;
+    void *dst_base  = NULL;
+
+    status |= vxGetValidRegionImage(input, &src_rect);
+    status |= vxMapImagePatch(input, &full_rect, 0, &src_map_id, &src_addr, &src_base, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, 0);
 
     if (mask != NULL)
     {
         status |= vxGetValidRegionImage(mask, &mask_rect);
-        status |= vxMapImagePatch(mask, &mask_rect, 0, &mask_map_id, &mask_addr, (void **)&mask_base, VX_READ_AND_WRITE, VX_MEMORY_TYPE_HOST, 0);
+        status |= vxMapImagePatch(mask, &full_rect, 0, &mask_map_id, &mask_addr, (void **)&mask_base, VX_READ_AND_WRITE, VX_MEMORY_TYPE_HOST, 0);
+        status |= vxQueryImage(mask, VX_IMAGE_FORMAT, &mask_format, sizeof(mask_format));
+        if ((mask_rect.start_x > src_rect.start_x) || (mask_rect.start_y > src_rect.start_y) ||
+            (mask_rect.end_x   < src_rect.end_x)   || (mask_rect.end_y   < src_rect.end_y))
+        {
+            status |= vxUnmapImagePatch(mask,  mask_map_id);
+            status |= vxUnmapImagePatch(input, src_map_id);
+            FAIL_(return NULL, "The mask's valid region didn't cover the entire valid region of the input image.");
+        }
     }
-    vx_df_image format = 0;
-    status |= vxQueryImage(input, VX_IMAGE_FORMAT, &format, sizeof(format));
 
-    width = src_addr.dim_x;
-    height = src_addr.dim_y;
+    rect_start_x = src_rect.start_x;
+    rect_start_y = src_rect.start_y;
+    rect_width   = src_rect.end_x - src_rect.start_x;
+    rect_height  = src_rect.end_y - src_rect.start_y;
 
-    CT_Image output = ct_allocate_image(width, height, format);
+    CT_Image output = ct_allocate_image(full_width, full_height, format);
     dst_base = ct_image_get_plane_base(output, 0);
     vx_int32 border = wsize / 2;
 
-    for (vx_int32 x = border; x < (width - border); x++)
+    for (vx_int32 x = rect_start_x + border; x < (rect_start_x + rect_width - border); x++)
     {
-        for (vx_int32 y = border; y < (height - border); y++)
+        for (vx_int32 y = rect_start_y + border; y < (rect_start_y + rect_height - border); y++)
         {
             vx_uint8 *_mask;
             if (mask != NULL)
             {
                 _mask = (vx_uint8 *)vxFormatImagePatchAddress2d(mask_base, x, y, &mask_addr);
+                if (mask_format == VX_DF_IMAGE_U1)
+                    _mask = (*_mask & (1 << (x % 8))) != 0 ? _mask : &mask_data;
             }
             else
             {
@@ -143,18 +162,20 @@
                     for (vx_int32 j = -border; j <= border; j++)
                     {
                         void *neighbor = vxFormatImagePatchAddress2d(src_base, x + i, y + j, &src_addr);
-			if (mask != NULL)
-			{
-				_mask = (vx_uint8 *)vxFormatImagePatchAddress2d(mask_base, x + i, y + j, &mask_addr);
-			}
-			else
-			{
-				_mask = &mask_data;
-			}
+                        if (mask != NULL)
+                        {
+                            _mask = (vx_uint8 *)vxFormatImagePatchAddress2d(mask_base, x + i, y + j, &mask_addr);
+                            if (mask_format == VX_DF_IMAGE_U1)
+                                _mask = (*_mask & (1 << ((x + i) % 8))) != 0 ? _mask : &mask_data;
+                        }
+                        else
+                        {
+                            _mask = &mask_data;
+                        }
                         vx_int32 neighbor_val = *(vx_int16 *)neighbor;
                         if ((*_mask == 0)
-			   && (((j < 0 || (j == 0 && i <= 0)) && (src_val < neighbor_val))
-			      || ((j > 0 || (j == 0 && i > 0)) && (src_val <= neighbor_val))))
+                            && ( ((j < 0 || (j == 0 && i <= 0)) && (src_val < neighbor_val))
+                                 || ((j > 0 || (j == 0 && i > 0)) && (src_val <= neighbor_val)) ))
                         {
                             flag = 0;
                             break;
@@ -165,6 +186,7 @@
                         break;
                     }
                 }
+
                 if (flag)
                 {
                     *(vx_int16 *)dest = (vx_int16)src_val;
@@ -184,29 +206,37 @@
 
     return output;
 }
+
 typedef struct {
     const char* testName;
     CT_Image(*generator)(const char* fileName, int width, int height);
     const char* fileName;
     vx_int32 wsize;
     vx_bool _mask;
+    vx_df_image maskFormat;
     vx_df_image format;
     const char* result_filename;
 } Arg;
 
 #define PARAMETERS \
-    ARG("case_1_nomask_u8_nms", nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 1, vx_false_e, VX_DF_IMAGE_U8, "nms_1_nomask.bmp"), \
-    ARG("case_3_nomask_u8_nms", nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 3, vx_false_e, VX_DF_IMAGE_U8, "nms_3_nomask.bmp"), \
-    ARG("case_5_nomask_u8_nms", nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 5, vx_false_e, VX_DF_IMAGE_U8, "nms_5_nomask.bmp"), \
-    ARG("case_1_mask_u8_nms", nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 1, vx_true_e, VX_DF_IMAGE_U8, "nms_1_mask.bmp"), \
-    ARG("case_3_mask_u8_nms", nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 3, vx_true_e, VX_DF_IMAGE_U8, "nms_3_mask.bmp"), \
-    ARG("case_5_mask_u8_nms", nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 5, vx_true_e, VX_DF_IMAGE_U8, "nms_5_mask.bmp"), \
-    ARG("case_1_nomask_s16_nms", nonmaxsuppression_generate_random, NULL, 1, vx_false_e, VX_DF_IMAGE_S16, NULL), \
-    ARG("case_3_nomask_s16_nms", nonmaxsuppression_generate_random, NULL, 3, vx_false_e, VX_DF_IMAGE_S16, NULL), \
-    ARG("case_5_nomask_s16_nms", nonmaxsuppression_generate_random, NULL, 5, vx_false_e, VX_DF_IMAGE_S16, NULL), \
-    ARG("case_1_mask_s16_nms", nonmaxsuppression_generate_random, NULL, 1, vx_true_e, VX_DF_IMAGE_S16, NULL), \
-    ARG("case_3_mask_s16_nms", nonmaxsuppression_generate_random, NULL, 3, vx_true_e, VX_DF_IMAGE_S16, NULL), \
-    ARG("case_5_mask_s16_nms", nonmaxsuppression_generate_random, NULL, 5, vx_true_e, VX_DF_IMAGE_S16, NULL), \
+    ARG("case_1_U8_nomask", nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 1, vx_false_e, VX_DF_IMAGE_U8, VX_DF_IMAGE_U8, "nms_1_nomask.bmp"), \
+    ARG("case_3_U8_nomask", nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 3, vx_false_e, VX_DF_IMAGE_U8, VX_DF_IMAGE_U8, "nms_3_nomask.bmp"), \
+    ARG("case_5_U8_nomask", nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 5, vx_false_e, VX_DF_IMAGE_U8, VX_DF_IMAGE_U8, "nms_5_nomask.bmp"), \
+    ARG("case_1_U8_mask",   nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 1, vx_true_e,  VX_DF_IMAGE_U8, VX_DF_IMAGE_U8, "nms_1_mask.bmp"), \
+    ARG("case_3_U8_mask",   nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 3, vx_true_e,  VX_DF_IMAGE_U8, VX_DF_IMAGE_U8, "nms_3_mask.bmp"), \
+    ARG("case_5_U8_mask",   nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 5, vx_true_e,  VX_DF_IMAGE_U8, VX_DF_IMAGE_U8, "nms_5_mask.bmp"), \
+    ARG("case_1_S16_nomask", nonmaxsuppression_generate_random, NULL, 1, vx_false_e, VX_DF_IMAGE_U8, VX_DF_IMAGE_S16, NULL), \
+    ARG("case_3_S16_nomask", nonmaxsuppression_generate_random, NULL, 3, vx_false_e, VX_DF_IMAGE_U8, VX_DF_IMAGE_S16, NULL), \
+    ARG("case_5_S16_nomask", nonmaxsuppression_generate_random, NULL, 5, vx_false_e, VX_DF_IMAGE_U8, VX_DF_IMAGE_S16, NULL), \
+    ARG("case_1_S16_mask",   nonmaxsuppression_generate_random, NULL, 1, vx_true_e,  VX_DF_IMAGE_U8, VX_DF_IMAGE_S16, NULL), \
+    ARG("case_3_S16_mask",   nonmaxsuppression_generate_random, NULL, 3, vx_true_e,  VX_DF_IMAGE_U8, VX_DF_IMAGE_S16, NULL), \
+    ARG("case_5_S16_mask",   nonmaxsuppression_generate_random, NULL, 5, vx_true_e,  VX_DF_IMAGE_U8, VX_DF_IMAGE_S16, NULL), \
+    ARG("_U1_/case_1_U8_mask",  nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 1, vx_true_e, VX_DF_IMAGE_U1, VX_DF_IMAGE_U8, "nms_1_mask.bmp"), \
+    ARG("_U1_/case_3_U8_mask",  nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 3, vx_true_e, VX_DF_IMAGE_U1, VX_DF_IMAGE_U8, "nms_3_mask.bmp"), \
+    ARG("_U1_/case_5_U8_mask",  nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 5, vx_true_e, VX_DF_IMAGE_U1, VX_DF_IMAGE_U8, "nms_5_mask.bmp"), \
+    ARG("_U1_/case_1_S16_mask", nonmaxsuppression_generate_random, NULL, 1, vx_true_e, VX_DF_IMAGE_U1, VX_DF_IMAGE_S16, NULL), \
+    ARG("_U1_/case_3_S16_mask", nonmaxsuppression_generate_random, NULL, 3, vx_true_e, VX_DF_IMAGE_U1, VX_DF_IMAGE_S16, NULL), \
+    ARG("_U1_/case_5_s16_mask", nonmaxsuppression_generate_random, NULL, 5, vx_true_e, VX_DF_IMAGE_U1, VX_DF_IMAGE_S16, NULL)
 
 TEST_WITH_ARG(Nonmaxsuppression, testGraphProcessing, Arg,
     PARAMETERS
@@ -248,23 +278,25 @@
 
     if (arg_->_mask)
     {
-        ASSERT_VX_OBJECT(mask = vxCreateImage(context, src_width, src_height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(mask = vxCreateImage(context, src_width, src_height, arg_->maskFormat), VX_TYPE_IMAGE);
         status = vxGetValidRegionImage(mask, &mask_rect);
         status |= vxMapImagePatch(mask, &mask_rect, 0, &mask_map_id, &mask_addr, (void **)&mask_base, VX_READ_AND_WRITE, VX_MEMORY_TYPE_HOST, 0);
         for (vx_uint32 i = 0; i < src_width; i++)
         {
             for (vx_uint32 j = 0; j < src_height; j++)
             {
-                void *src = vxFormatImagePatchAddress2d(mask_base, i, j, &mask_addr);
+                vx_uint8 val;
+                void *mask_ptr = vxFormatImagePatchAddress2d(mask_base, i, j, &mask_addr);
                 if (i % 2 == 0 && j % 2 == 0)
                 {
-                    *(vx_uint8 *)src = 1;
+                    val = 1;
                 }
                 else
                 {
-                    *(vx_uint8 *)src = 0;
+                    val = 0;
                 }
-               
+                *(vx_uint8 *)mask_ptr = (arg_->maskFormat == VX_DF_IMAGE_U1)
+                                        ? (*(vx_uint8 *)mask_ptr & ~(1 << (i % 8))) | (val << (i % 8)) : val;
             }
         }
         status |= vxUnmapImagePatch(mask, mask_map_id);
@@ -310,7 +342,6 @@
 )
 {
     vx_context context = context_->vx_context_;
-    vx_graph graph = 0;
 
     vx_image input = 0;
     vx_image mask = 0;
@@ -344,22 +375,25 @@
 
     if (arg_->_mask)
     {
-        ASSERT_VX_OBJECT(mask = vxCreateImage(context, src_width, src_height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(mask = vxCreateImage(context, src_width, src_height, arg_->maskFormat), VX_TYPE_IMAGE);
         status = vxGetValidRegionImage(mask, &mask_rect);
         status |= vxMapImagePatch(mask, &mask_rect, 0, &mask_map_id, &mask_addr, (void **)&mask_base, VX_READ_AND_WRITE, VX_MEMORY_TYPE_HOST, 0);
         for (vx_uint32 i = 0; i < src_width; i++)
         {
             for (vx_uint32 j = 0; j < src_height; j++)
             {
-                void *src = vxFormatImagePatchAddress2d(mask_base, i, j, &mask_addr);
+                vx_uint8 val;
+                void *mask_ptr = vxFormatImagePatchAddress2d(mask_base, i, j, &mask_addr);
                 if (i % 2 == 0 && j % 2 == 0)
                 {
-                    *(vx_uint8 *)src = 1;
+                    val = 1;
                 }
                 else
                 {
-                    *(vx_uint8 *)src = 0;
+                    val = 0;
                 }
+                *(vx_uint8 *)mask_ptr = (arg_->maskFormat == VX_DF_IMAGE_U1)
+                                        ? (*(vx_uint8 *)mask_ptr & ~(1 << (i % 8))) | (val << (i % 8)) : val;
             }
         }
         status |= vxUnmapImagePatch(mask, mask_map_id);
@@ -392,10 +426,133 @@
     ASSERT(output == 0);
     ASSERT(mask == 0);
     ASSERT(input == 0);
-    ASSERT(graph == 0);
+}
+
+typedef struct {
+    const char* testName;
+    CT_Image(*generator)(const char* fileName, int width, int height);
+    const char* fileName;
+    vx_int32 wsize;
+    vx_bool _mask;
+    vx_df_image maskFormat;
+    vx_df_image format;
+    const char* result_filename;
+    vx_rectangle_t region_shift;
+} ValidRegionTest_Arg;
+
+#define REGION_PARAMETERS \
+    ARG("case_1_U8_nomask_region_shrink=1", nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 1, vx_false_e, VX_DF_IMAGE_U8, VX_DF_IMAGE_U8, "nms_1_nomask.bmp", {1, 1, -1, -1}), \
+    ARG("case_1_U8_nomask_region_shrink=7", nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 1, vx_false_e, VX_DF_IMAGE_U8, VX_DF_IMAGE_U8, "nms_1_nomask.bmp", {7, 7, -7, -7}), \
+    ARG("case_3_U8_mask_region_shrink=1",   nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 3, vx_true_e,  VX_DF_IMAGE_U8, VX_DF_IMAGE_U8, "nms_3_mask.bmp",   {1, 1, -1, -1}), \
+    ARG("case_3_U8_mask_region_shrink=7",   nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 3, vx_true_e,  VX_DF_IMAGE_U8, VX_DF_IMAGE_U8, "nms_3_mask.bmp",   {7, 7, -7, -7}), \
+    ARG("case_1_S16_nomask_region_shrink=1", nonmaxsuppression_generate_random, NULL, 1, vx_false_e, VX_DF_IMAGE_U8, VX_DF_IMAGE_S16, NULL, {1, 1, -1, -1}), \
+    ARG("case_1_S16_nomask_region_shrink=7", nonmaxsuppression_generate_random, NULL, 1, vx_false_e, VX_DF_IMAGE_U8, VX_DF_IMAGE_S16, NULL, {7, 7, -7, -7}), \
+    ARG("case_3_S16_mask_region_shrink=1",   nonmaxsuppression_generate_random, NULL, 3, vx_true_e,  VX_DF_IMAGE_U8, VX_DF_IMAGE_S16, NULL, {1, 1, -1, -1}), \
+    ARG("case_3_S16_mask_region_shrink=7",   nonmaxsuppression_generate_random, NULL, 3, vx_true_e,  VX_DF_IMAGE_U8, VX_DF_IMAGE_S16, NULL, {7, 7, -7, -7}), \
+    ARG("_U1_/case_3_U8_mask_region_shrink=1",  nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 3, vx_true_e,  VX_DF_IMAGE_U1, VX_DF_IMAGE_U8, "nms_3_mask.bmp",   {1, 1, -1, -1}), \
+    ARG("_U1_/case_3_U8_mask_region_shrink=7",  nonmaxsuppression_read_image, "blurred_lena_gray.bmp", 3, vx_true_e,  VX_DF_IMAGE_U1, VX_DF_IMAGE_U8, "nms_3_mask.bmp",   {7, 7, -7, -7}), \
+    ARG("_U1_/case_3_S16_mask_region_shrink=1", nonmaxsuppression_generate_random, NULL, 3, vx_true_e,  VX_DF_IMAGE_U1, VX_DF_IMAGE_S16, NULL, {1, 1, -1, -1}), \
+    ARG("_U1_/case_3_S16_mask_region_shrink=7", nonmaxsuppression_generate_random, NULL, 3, vx_true_e,  VX_DF_IMAGE_U1, VX_DF_IMAGE_S16, NULL, {7, 7, -7, -7})
+
+TEST_WITH_ARG(Nonmaxsuppression, testWithValidRegion, ValidRegionTest_Arg,
+    REGION_PARAMETERS
+)
+{
+    vx_context context = context_->vx_context_;
+
+    vx_image input = 0, mask = 0, output = 0;
+    vx_uint32 src_width, src_height;
+
+    vx_int32 wsize = arg_->wsize;
+    vx_int32 border = wsize / 2;
+    CT_Image ct_input = NULL, ct_output = NULL, golden_image = NULL;
+
+    vx_status status;
+
+    void *mask_base = NULL;
+    vx_imagepatch_addressing_t mask_addr = VX_IMAGEPATCH_ADDR_INIT;
+    vx_rectangle_t mask_rect, src_rect, rect_shft = arg_->region_shift;
+    vx_map_id mask_map_id = 0;
+
+    if (arg_->format == VX_DF_IMAGE_U8)
+    {
+        ASSERT_NO_FAILURE(ct_input = arg_->generator(arg_->fileName, 0, 0));
+    }
+    else    // format == VX_DF_IMAGE_S16
+    {
+        ASSERT_NO_FAILURE(ct_input = arg_->generator(arg_->fileName, 640, 480));
+    }
+    src_width  = ct_input->width;
+    src_height = ct_input->height;
+
+    ASSERT_VX_OBJECT(input = ct_image_to_vx_image(ct_input, context), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(output = vxCreateImage(context, src_width, src_height, arg_->format), VX_TYPE_IMAGE);
+
+    if (arg_->_mask)
+    {
+        ASSERT_VX_OBJECT(mask = vxCreateImage(context, src_width, src_height, arg_->maskFormat), VX_TYPE_IMAGE);
+        status = vxGetValidRegionImage(mask, &mask_rect);
+        status |= vxMapImagePatch(mask, &mask_rect, 0, &mask_map_id, &mask_addr, (void **)&mask_base, VX_READ_AND_WRITE, VX_MEMORY_TYPE_HOST, 0);
+        for (vx_uint32 i = 0; i < src_width; i++)
+        {
+            for (vx_uint32 j = 0; j < src_height; j++)
+            {
+                vx_uint8 val;
+                void *mask_ptr = vxFormatImagePatchAddress2d(mask_base, i, j, &mask_addr);
+                if (i % 2 == 0 && j % 2 == 0)
+                {
+                    val = 1;
+                }
+                else
+                {
+                    val = 0;
+                }
+                *(vx_uint8 *)mask_ptr = (arg_->maskFormat == VX_DF_IMAGE_U1)
+                                        ? (*(vx_uint8 *)mask_ptr & ~(1 << (i % 8))) | (val << (i % 8)) : val;
+            }
+        }
+        status |= vxUnmapImagePatch(mask, mask_map_id);
+    }
+
+    ASSERT_NO_FAILURE(vxGetValidRegionImage(input, &src_rect));
+    ALTERRECTANGLE(src_rect, rect_shft.start_x, rect_shft.start_y, rect_shft.end_x, rect_shft.end_y);
+    ASSERT_NO_FAILURE(vxSetImageValidRectangle(input, &src_rect));
+
+    VX_CALL(vxuNonMaxSuppression(context, input, mask, wsize, output));
+
+    ASSERT_NO_FAILURE(ct_output = ct_image_from_vx_image(output));
+
+    if (arg_->format == VX_DF_IMAGE_U8)
+    {
+        golden_image = arg_->generator(arg_->result_filename, 0, 0);
+    }
+    else
+    {
+        golden_image = nonmax_golden(input, mask, wsize);
+    }
+
+    ct_adjust_roi(ct_output,    rect_shft.start_x, rect_shft.start_y, -rect_shft.end_x, -rect_shft.end_y);
+    ct_adjust_roi(golden_image, rect_shft.start_x, rect_shft.start_y, -rect_shft.end_x, -rect_shft.end_y);
+    ct_adjust_roi(ct_output,    border, border, border, border);
+    ct_adjust_roi(golden_image, border, border, border, border);
+    EXPECT_EQ_CTIMAGE(golden_image, ct_output);
+
+    VX_CALL(vxReleaseImage(&input));
+    if (arg_->_mask)
+    {
+        VX_CALL(vxReleaseImage(&mask));
+    }
+    VX_CALL(vxReleaseImage(&output));
+
+    ASSERT(output == 0);
+    ASSERT(mask == 0);
+    ASSERT(input == 0);
 }
 
 TESTCASE_TESTS(Nonmaxsuppression,
                testNodeCreation,
                testGraphProcessing,
-               testImmediateProcessing)
+               testImmediateProcessing,
+               testWithValidRegion)
+
+#endif //OPENVX_USE_ENHANCED_VISION
diff --git a/test_conformance/test_not.c b/test_conformance/test_not.c
index 5ef9366..d89ad56 100644
--- a/test_conformance/test_not.c
+++ b/test_conformance/test_not.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 
 #include <VX/vx.h>
@@ -29,65 +31,124 @@
     ASSERT(src && dst);
     ASSERT(src->width == dst->width);
     ASSERT(src->height == dst->height);
-    ASSERT(src->format == dst->format && src->format == VX_DF_IMAGE_U8);
+    ASSERT(src->format == dst->format && (src->format == VX_DF_IMAGE_U1 || src->format == VX_DF_IMAGE_U8));
 
     for (i = 0; i < dst->height; ++i)
         for (j = 0; j < dst->width; ++j)
-            dst->data.y[i * dst->stride + j] = ~src->data.y[i * src->stride + j];
+            if (src->format == VX_DF_IMAGE_U1)
+            {
+                uint32_t xShftd = j + src->roi.x % 8;         // Offset needed for U1 ROI
+                uint8_t  pixel  = ~src->data.y[i * ct_stride_bytes(src) + xShftd / 8] & (1 << xShftd % 8);
+                dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] =
+                    (dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] & ~(1 << xShftd % 8)) | pixel;
+            }
+            else
+            {
+                dst->data.y[i * dst->stride + j] = ~src->data.y[i * src->stride + j];
+            }
 }
 
-static void fillSquence(CT_Image dst, uint32_t seq_init)
+static void fillSequence(CT_Image dst, uint32_t seq_init)
 {
     uint32_t i, j;
     uint32_t val = seq_init;
 
     ASSERT(dst);
-    ASSERT(dst->format == VX_DF_IMAGE_U8);
+    ASSERT(dst->format == VX_DF_IMAGE_U1 || dst->format == VX_DF_IMAGE_U8);
 
     for (i = 0; i < dst->height; ++i)
+    {
         for (j = 0; j < dst->width; ++j)
-            dst->data.y[i * dst->stride + j] = ++val;
+        {
+            if (dst->format == VX_DF_IMAGE_U1)
+            {
+                uint32_t xShftd = j + dst->roi.x % 8;         // Offset needed for U1 ROI
+                uint8_t  pixel  = (++val % 2) << (xShftd % 8);
+                dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] =
+                    (dst->data.y[i * ct_stride_bytes(dst) + xShftd / 8] & ~(1 << (xShftd % 8))) | pixel;
+            }
+            else
+            {
+                dst->data.y[i * dst->stride + j] = ++val;
+            }
+        }
+    }
 }
 
 TESTCASE(vxuNot, CT_VXContext, ct_setup_vx_context, 0)
 TESTCASE(vxNot,  CT_VXContext, ct_setup_vx_context, 0)
 
-
 TEST(vxuNot, testNegativeSizes)
 {
-    vx_image src16x88, dst88x16;
+    vx_image src16x88u8, dst88x16u8;
     vx_context context = context_->vx_context_;
 
-    ASSERT_VX_OBJECT(src16x88 = vxCreateImage(context, 16, 88, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
-    ASSERT_VX_OBJECT(dst88x16 = vxCreateImage(context, 88, 16, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(src16x88u8 = vxCreateImage(context, 16, 88, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst88x16u8 = vxCreateImage(context, 88, 16, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+
+    // initialize to guarantee that the image is allocated
+    ASSERT_NO_FAILURE(ct_fill_image_random(src16x88u8, &CT()->seed_));
+
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuNot(context, src16x88u8, dst88x16u8));
+
+    VX_CALL(vxReleaseImage(&src16x88u8));
+    VX_CALL(vxReleaseImage(&dst88x16u8));
+}
+
+TEST(vxuNot, testNegativeSizes_U1_)
+{
+    vx_image src16x88u1, dst88x16u1;
+    vx_context context = context_->vx_context_;
+
+    ASSERT_VX_OBJECT(src16x88u1 = vxCreateImage(context, 16, 88, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst88x16u1 = vxCreateImage(context, 88, 16, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
 
     // initialize to guarantee that images are allocated
-    ASSERT_NO_FAILURE(ct_fill_image_random(src16x88, &CT()->seed_));
+    ASSERT_NO_FAILURE(ct_fill_image_random(src16x88u1, &CT()->seed_));
 
-    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuNot(context, src16x88, dst88x16));
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxuNot(context, src16x88u1, dst88x16u1));
 
-    VX_CALL(vxReleaseImage(&src16x88));
-    VX_CALL(vxReleaseImage(&dst88x16));
+    VX_CALL(vxReleaseImage(&src16x88u1));
+    VX_CALL(vxReleaseImage(&dst88x16u1));
 }
 
 TEST(vxNot, testNegativeSizes)
 {
-    vx_image src16x88, dst88x16;
+    vx_image src16x88u8, dst88x16u8;
     vx_graph graph;
     vx_context context = context_->vx_context_;
 
-    ASSERT_VX_OBJECT(src16x88 = vxCreateImage(context, 16, 88, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
-    ASSERT_VX_OBJECT(dst88x16 = vxCreateImage(context, 88, 16, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(src16x88u8 = vxCreateImage(context, 16, 88, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst88x16u8 = vxCreateImage(context, 88, 16, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
 
     ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
-    ASSERT_VX_OBJECT(vxNotNode(graph, src16x88, dst88x16), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(vxNotNode(graph, src16x88u8, dst88x16u8), VX_TYPE_NODE);
     EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
 
-    VX_CALL(vxReleaseImage(&src16x88));
-    VX_CALL(vxReleaseImage(&dst88x16));
+    VX_CALL(vxReleaseImage(&src16x88u8));
+    VX_CALL(vxReleaseImage(&dst88x16u8));
     VX_CALL(vxReleaseGraph(&graph));
 }
 
+TEST(vxNot, testNegativeSizes_U1_)
+{
+    vx_image src16x88u1, dst88x16u1;
+    vx_graph graph;
+    vx_context context = context_->vx_context_;
+
+    ASSERT_VX_OBJECT(src16x88u1 = vxCreateImage(context, 16, 88, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst88x16u1 = vxCreateImage(context, 88, 16, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(vxNotNode(graph, src16x88u1, dst88x16u1), VX_TYPE_NODE);
+    EXPECT_NE_VX_STATUS(VX_SUCCESS, vxVerifyGraph(graph));
+
+    VX_CALL(vxReleaseImage(&src16x88u1));
+    VX_CALL(vxReleaseImage(&dst88x16u1));
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+static vx_df_image target_format;
 static vx_image inference_image;
 static vx_action VX_CALLBACK inference_image_test(vx_node node)
 {
@@ -101,7 +162,7 @@
 
     EXPECT_EQ_INT(640, width);
     EXPECT_EQ_INT(480, height);
-    EXPECT_EQ_INT(VX_DF_IMAGE_U8, format);
+    EXPECT_EQ_INT(target_format, format);
 
     return VX_ACTION_CONTINUE;
 }
@@ -123,6 +184,7 @@
     ASSERT_VX_OBJECT(tmp   = vxAddNode(graph, dst, src, VX_CONVERT_POLICY_WRAP, gr), VX_TYPE_NODE);
 
     // test
+    target_format = VX_DF_IMAGE_U8;
     inference_image = dst;
     EXPECT_EQ_VX_STATUS(VX_SUCCESS, vxAssignNodeCallback(n, inference_image_test));
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxProcessGraph(graph));
@@ -135,48 +197,99 @@
     VX_CALL(vxReleaseGraph(&graph));
 }
 
+TEST(vxNot, testInference_U1_)
+{
+    vx_image src, dst, src2, dst2, gr;
+    vx_scalar sshift;
+    vx_int32 sval = 0;
+    vx_graph graph;
+    vx_node n, cn1, cn2, tmp;
+    vx_context context = context_->vx_context_;
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(src   = vxCreateImage(context, 640, 480, VX_DF_IMAGE_U1), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst   = vxCreateVirtualImage(graph, 0, 0, VX_DF_IMAGE_VIRT), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(n     = vxNotNode(graph, src, dst), VX_TYPE_NODE);
+
+    // grounding (convert U1 images to U8 since vxAddNode doesn't support U1 images)
+    ASSERT_VX_OBJECT(src2  = vxCreateImage(context, 640, 480, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst2  = vxCreateImage(context, 640, 480, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(gr    = vxCreateImage(context, 640, 480, VX_DF_IMAGE_S16), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(sshift = vxCreateScalar(context, VX_TYPE_INT32, &sval), VX_TYPE_SCALAR);
+    ASSERT_VX_OBJECT(cn1   = vxConvertDepthNode(graph, src, src2, VX_CONVERT_POLICY_SATURATE, sshift), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(cn2   = vxConvertDepthNode(graph, dst, dst2, VX_CONVERT_POLICY_SATURATE, sshift), VX_TYPE_NODE);
+    ASSERT_VX_OBJECT(tmp   = vxAddNode(graph, dst2, src2, VX_CONVERT_POLICY_WRAP, gr), VX_TYPE_NODE);
+
+    // test
+    target_format = VX_DF_IMAGE_U1;
+    inference_image = dst;
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, vxAssignNodeCallback(n, inference_image_test));
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxProcessGraph(graph));
+
+    VX_CALL(vxReleaseNode(&n));
+    VX_CALL(vxReleaseNode(&cn1));
+    VX_CALL(vxReleaseNode(&cn2));
+    VX_CALL(vxReleaseNode(&tmp));
+    VX_CALL(vxReleaseScalar(&sshift));
+    VX_CALL(vxReleaseImage(&src));
+    VX_CALL(vxReleaseImage(&dst));
+    VX_CALL(vxReleaseImage(&src2));
+    VX_CALL(vxReleaseImage(&dst2));
+    VX_CALL(vxReleaseImage(&gr));
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
 typedef struct {
     const char* name;
     uint32_t width;
     uint32_t height;
+    vx_df_image format;
 } size_arg;
 
-#define SIZE_ARG(w,h) ARG(#w "x" #h, w, h)
+#define SIZE_ARG_U8(w,h) ARG(#w "x" #h, w, h, VX_DF_IMAGE_U8)
+#define SIZE_ARG_U1(w,h) ARG("_U1_/" #w "x" #h, w, h, VX_DF_IMAGE_U1)
 
-#define NOT_SIZE_ARGS       \
-    SIZE_ARG(640, 480),     \
-    ARG_EXTENDED_BEGIN(),   \
-    SIZE_ARG(1, 1),         \
-    SIZE_ARG(15, 17),       \
-    SIZE_ARG(32, 32),       \
-    SIZE_ARG(1231, 1234),   \
-    SIZE_ARG(1280, 720),    \
-    SIZE_ARG(1920, 1080),   \
+#define NOT_SIZE_ARGS           \
+    SIZE_ARG_U8(640, 480),      \
+    SIZE_ARG_U1(640, 480),      \
+    ARG_EXTENDED_BEGIN(),       \
+    SIZE_ARG_U8(1, 1),          \
+    SIZE_ARG_U8(15, 17),        \
+    SIZE_ARG_U8(32, 32),        \
+    SIZE_ARG_U8(1231, 1234),    \
+    SIZE_ARG_U8(1280, 720),     \
+    SIZE_ARG_U8(1920, 1080),    \
+    SIZE_ARG_U1(1, 1),          \
+    SIZE_ARG_U1(15, 17),        \
+    SIZE_ARG_U1(32, 32),        \
+    SIZE_ARG_U1(1231, 1234),    \
+    SIZE_ARG_U1(1280, 720),     \
+    SIZE_ARG_U1(1920, 1080),    \
     ARG_EXTENDED_END()
 
 TEST_WITH_ARG(vxuNot, testSizes, size_arg, NOT_SIZE_ARGS)
 {
     vx_image src, dst;
-    CT_Image ref_src, refdst, vxdst;
+    CT_Image ref_src, ref_dst, vx_dst;
     vx_context context = context_->vx_context_;
 
     ASSERT_NO_FAILURE({
-        ref_src = ct_allocate_image(arg_->width, arg_->height, VX_DF_IMAGE_U8);
-        fillSquence(ref_src, (uint32_t)CT()->seed_);
+        ref_src = ct_allocate_image(arg_->width, arg_->height, arg_->format);
+        fillSequence(ref_src, (uint32_t)CT()->seed_);
         src = ct_image_to_vx_image(ref_src, context);
     });
 
-    ASSERT_VX_OBJECT(dst = vxCreateImage(context, arg_->width, arg_->height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst = vxCreateImage(context, arg_->width, arg_->height, arg_->format), VX_TYPE_IMAGE);
 
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxuNot(context, src, dst));
 
     ASSERT_NO_FAILURE({
-        vxdst = ct_image_from_vx_image(dst);
-        refdst = ct_allocate_image(arg_->width, arg_->height, VX_DF_IMAGE_U8);
-        referenceNot(ref_src, refdst);
+        vx_dst = ct_image_from_vx_image(dst);
+        ref_dst = ct_allocate_image(arg_->width, arg_->height, arg_->format);
+        referenceNot(ref_src, ref_dst);
     });
 
-    ASSERT_EQ_CTIMAGE(refdst, vxdst);
+    ASSERT_EQ_CTIMAGE(ref_dst, vx_dst);
 
     // checked release vx images
     VX_CALL(vxReleaseImage(&dst));
@@ -188,19 +301,19 @@
 TEST_WITH_ARG(vxNot, testSizes, size_arg, NOT_SIZE_ARGS)
 {
     vx_image src, dst;
-    CT_Image ref_src, refdst, vxdst;
+    CT_Image ref_src, ref_dst, vx_dst;
     vx_graph graph;
     vx_context context = context_->vx_context_;
 
     ASSERT_NO_FAILURE({
-        ref_src = ct_allocate_image(arg_->width, arg_->height, VX_DF_IMAGE_U8);
-        fillSquence(ref_src, (uint32_t)CT()->seed_);
+        ref_src = ct_allocate_image(arg_->width, arg_->height, arg_->format);
+        fillSequence(ref_src, (uint32_t)CT()->seed_);
         src = ct_image_to_vx_image(ref_src, context);
     });
 
     // build one-node graph
     ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
-    ASSERT_VX_OBJECT(dst   = vxCreateImage(context, arg_->width, arg_->height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst   = vxCreateImage(context, arg_->width, arg_->height, arg_->format), VX_TYPE_IMAGE);
     ASSERT_VX_OBJECT(vxNotNode(graph, src, dst), VX_TYPE_NODE);
 
     // run graph
@@ -212,19 +325,26 @@
 #endif
 
     ASSERT_NO_FAILURE({
-        vxdst = ct_image_from_vx_image(dst);
-        refdst = ct_allocate_image(arg_->width, arg_->height, VX_DF_IMAGE_U8);
-        referenceNot(ref_src, refdst);
+        vx_dst  = ct_image_from_vx_image(dst);
+        ref_dst = ct_allocate_image(arg_->width, arg_->height, arg_->format);
+        referenceNot(ref_src, ref_dst);
     });
 
-    ASSERT_EQ_CTIMAGE(refdst, vxdst);
+    ASSERT_EQ_CTIMAGE(ref_dst, vx_dst);
 
     VX_CALL(vxReleaseImage(&src));
     VX_CALL(vxReleaseImage(&dst));
     VX_CALL(vxReleaseGraph(&graph));
 }
 
+TESTCASE_TESTS(vxuNot, DISABLED_testNegativeSizes,
+                       DISABLED_testNegativeSizes_U1_,
+                       testSizes)
 
-TESTCASE_TESTS(vxuNot, DISABLED_testNegativeSizes,                testSizes)
-TESTCASE_TESTS(vxNot,  DISABLED_testNegativeSizes, testInference, testSizes)
+TESTCASE_TESTS(vxNot,  DISABLED_testNegativeSizes,
+                       DISABLED_testNegativeSizes_U1_,
+                       testInference,
+                       testInference_U1_,
+                       testSizes)
 
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_object_array.c b/test_conformance/test_object_array.c
index 84e787e..9cf8c02 100644
--- a/test_conformance/test_object_array.c
+++ b/test_conformance/test_object_array.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include <VX/vx.h>
 #include <VX/vxu.h>
 
@@ -75,7 +77,7 @@
             exemplar = (vx_reference)vxCreateLUT(context, obj_item_type, lut_num_items);
             break;
         case VX_TYPE_THRESHOLD:
-            exemplar = (vx_reference)vxCreateThreshold(context, thresh_type, obj_item_type);
+            exemplar = (vx_reference)vxCreateThresholdForImage(context, thresh_type, format, format);
             break;
         default:
             break;
@@ -318,6 +320,75 @@
     ASSERT(object_array == 0);
 }
 
+#define ADD_VX_VIRTUAL_OBJECT_ARRAY_TYPES(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/VX_TYPE_IMAGE", __VA_ARGS__, VX_TYPE_IMAGE)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_TYPE_ARRAY", __VA_ARGS__, VX_TYPE_ARRAY)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_TYPE_PYRAMID", __VA_ARGS__, VX_TYPE_PYRAMID))
+
+#define VIRTUAL_OBJECT_ARRAY_PARAMETERS \
+    CT_GENERATE_PARAMETERS("object_array", ADD_VX_VIRTUAL_OBJECT_ARRAY_TYPES, ARG, NULL)
+
+
+TEST_WITH_ARG(ObjectArray, test_vxCreateVirtualObjectArray, Obj_Array_Arg,
+              VIRTUAL_OBJECT_ARRAY_PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+
+    vx_reference exemplar = NULL;
+    vx_size num_items = OBJECT_ARRAY_NUM_ITEMS;
+    vx_enum item_type = arg_->item_type;
+
+    vx_graph graph = 0;
+    vx_object_array object_array = 0;
+
+    vx_reference actual_item = NULL;
+    vx_enum actual_type = VX_TYPE_INVALID;
+    vx_size actual_num_items = 0;
+
+    vx_uint32 i;
+
+    ASSERT_VX_OBJECT(exemplar = own_create_exemplar(context, item_type), (enum vx_type_e)item_type);
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    /* 1. check if object array can be created with allowed types*/
+    ASSERT_VX_OBJECT(object_array = vxCreateVirtualObjectArray(graph, exemplar, num_items), VX_TYPE_OBJECT_ARRAY);
+
+    /* 2. check if object array's actual item_type corresponds to requested item_type */
+    VX_CALL(vxQueryObjectArray(object_array, VX_OBJECT_ARRAY_ITEMTYPE, &actual_type, sizeof(actual_type)));
+    ASSERT_EQ_INT(item_type, actual_type);
+
+    /* 3. check if object array's actual item_size corresponds to requested item_type size */
+    VX_CALL(vxQueryObjectArray(object_array, VX_OBJECT_ARRAY_NUMITEMS, &actual_num_items, sizeof(actual_num_items)));
+    ASSERT_EQ_INT(num_items, actual_num_items);
+
+    /* 4. check meta formats of objects in object array */
+    for (i = 0u; i < num_items; i++)
+    {
+        ASSERT_VX_OBJECT(actual_item = vxGetObjectArrayItem(object_array, i), (enum vx_type_e)item_type);
+
+        ASSERT_NO_FAILURE(own_check_meta(actual_item, exemplar));
+
+        VX_CALL(vxReleaseReference(&actual_item));
+        ASSERT(actual_item == 0);
+    }
+
+    /* 5. check that we can't get item out of object array's range */
+    actual_item = vxGetObjectArrayItem(object_array, (vx_uint32)num_items);
+    ASSERT_NE_VX_STATUS(VX_SUCCESS, vxGetStatus((vx_reference)actual_item));
+
+    VX_CALL(vxReleaseReference(&exemplar));
+    ASSERT(exemplar == 0);
+
+    VX_CALL(vxReleaseObjectArray(&object_array));
+    ASSERT(object_array == 0);
+
+    VX_CALL(vxReleaseGraph(&graph));
+    ASSERT(graph == 0);
+}
+
 TESTCASE_TESTS(
     ObjectArray,
-    test_vxCreateObjectArray)
+    test_vxCreateObjectArray,
+    test_vxCreateVirtualObjectArray)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_optflowpyrlk.c b/test_conformance/test_optflowpyrlk.c
index ad0790d..9cc50aa 100644
--- a/test_conformance/test_optflowpyrlk.c
+++ b/test_conformance/test_optflowpyrlk.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -469,3 +471,5 @@
         testGraphProcessing,
         testImmediateProcessing
         )
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_phase.c b/test_conformance/test_phase.c
index ae0e212..ff5f77d 100644
--- a/test_conformance/test_phase.c
+++ b/test_conformance/test_phase.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 
 #include <VX/vx.h>
@@ -213,3 +215,5 @@
 }
 
 TESTCASE_TESTS(Phase, testOnRandom)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_remap.c b/test_conformance/test_remap.c
index b28752d..fcf6e9c 100644
--- a/test_conformance/test_remap.c
+++ b/test_conformance/test_remap.c
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -243,7 +245,7 @@
             }
         }
 
-        vxCopyRemapPatch(map, &rect, stride_y, ptr_w, VX_TYPE_COORDINATES2DF, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);    
+        vxCopyRemapPatch(map, &rect, stride_y, ptr_w, VX_TYPE_COORDINATES2DF, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
         ct_free_mem(ptr_w);
     }
 
@@ -636,7 +638,7 @@
     {
         expected_status = VX_SUCCESS;
     }
-        
+
     if (status == VX_SUCCESS)
     {
         ASSERT_NO_FAILURE(output = ct_image_from_vx_image(output_image));
@@ -855,3 +857,4 @@
 TESTCASE_TESTS(vxCopyRemapPatch, testCopyRandomReamp)
 TESTCASE_TESTS(vxMapRemapPatch, testMapRandomRemap)
 
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_scalar.c b/test_conformance/test_scalar.c
index eb9944c..5c92d19 100644
--- a/test_conformance/test_scalar.c
+++ b/test_conformance/test_scalar.c
@@ -15,7 +15,10 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include <math.h>
+#include <string.h>
 #include <float.h>
 #include <VX/vx.h>
 
@@ -42,6 +45,18 @@
     vx_bool     boolean;
     vx_uint8    data[8];
 
+    /* support type of scalar with size */
+    vx_rectangle_t rect;
+    vx_keypoint_t  key_point;
+    vx_coordinates2d_t coord2d;
+    vx_coordinates3d_t coord3d;
+#ifdef OPENVX_USE_ENHANCED_VISION
+    vx_coordinates2df_t coord2df;
+    vx_hog_t  hog;
+    vx_hough_lines_p_t houghlines;
+    vx_line2d_t line2d;
+    vx_tensor_matrix_multiply_params_t matrix;
+#endif
 } scalar_val;
 
 typedef struct
@@ -70,12 +85,268 @@
     case VX_TYPE_DF_IMAGE: val->fcc = variant == vx_true_e ? VX_DF_IMAGE_RGB : VX_DF_IMAGE_U8; break;
     case VX_TYPE_BOOL:     val->boolean = variant == vx_true_e ? vx_true_e : vx_false_e; break;
 
+    /* support type of scalar with size */
+    case VX_TYPE_RECTANGLE:
+        if (variant == vx_true_e)
+        {
+            val->rect.start_x = 0;
+            val->rect.start_y = 0;
+            val->rect.end_x = 1280;
+            val->rect.end_y = 720;
+        }
+        break;
+    case VX_TYPE_KEYPOINT:
+        if (variant == vx_true_e)
+        {
+            val->key_point.x = 128;
+            val->key_point.y = 256;
+            val->key_point.strength = 20.0;
+            val->key_point.scale = 0.8;
+            val->key_point.orientation = 0.3;
+            val->key_point.tracking_status = 1;
+            val->key_point.error = 0;
+        }
+        break;
+    case VX_TYPE_COORDINATES2D:
+        if (variant == vx_true_e)
+        {
+            val->coord2d.x = 10;
+            val->coord2d.y = 9;
+        }
+        break;
+    case VX_TYPE_COORDINATES3D:
+        if (variant == vx_true_e)
+        {
+            val->coord3d.x = 16;
+            val->coord3d.y = 31;
+            val->coord3d.z = 22;
+        }
+        break;
+#ifdef OPENVX_USE_ENHANCED_VISION
+    case VX_TYPE_COORDINATES2DF:
+        if (variant == vx_true_e)
+        {
+            val->coord2df.x = 2.7f;
+            val->coord2df.y = 3.5f;
+        }
+        break;
+    case VX_TYPE_HOG_PARAMS:
+        if (variant == vx_true_e)
+        {
+            val->hog.cell_width = 16;
+            val->hog.cell_height = 16;
+            val->hog.block_width = 128;
+            val->hog.block_height = 128;
+            val->hog.block_stride = 1280;
+            val->hog.num_bins = 5;
+            val->hog.window_width = 1280;
+            val->hog.window_height = 720;
+            val->hog.window_stride = 1280;
+            val->hog.threshold = 0.2f;
+        }
+        break;
+    case VX_TYPE_HOUGH_LINES_PARAMS:
+        if (variant == vx_true_e)
+        {
+            val->houghlines.rho = 0.8f;
+            val->houghlines.theta = 0.5f;
+            val->houghlines.threshold = 235;
+            val->houghlines.line_length = 8;
+            val->houghlines.line_gap = 3;
+            val->houghlines.theta_max = 1.2f;
+            val->houghlines.theta_min = 0.1f;
+        }
+        break;
+    case VX_TYPE_LINE_2D:
+        if (variant == vx_true_e)
+        {
+            val->line2d.start_x = 2.3f;
+            val->line2d.start_y = 1.5f;
+            val->line2d.end_x = 1279.9f;
+            val->line2d.end_y = 718.8f;
+        }
+        break;
+    case VX_TYPE_TENSOR_MATRIX_MULTIPLY_PARAMS:
+        if (variant == vx_true_e)
+        {
+            val->matrix.transpose_input1 = vx_true_e;
+            val->matrix.transpose_input2 = vx_false_e;
+            val->matrix.transpose_input3 = vx_true_e;
+        }
+        break;
+#endif
     default:
         FAIL("Unsupported type: (%.4s)", &type);
     }
     return;
 }
 
+static vx_size ownGetSizeByType(vx_enum type)
+{
+    vx_size size = 0;
+    switch (type)
+    {
+    case VX_TYPE_CHAR:     size = sizeof(vx_char); break;
+    case VX_TYPE_INT8:     size = sizeof(vx_int8); break;
+    case VX_TYPE_UINT8:    size = sizeof(vx_uint8); break;
+    case VX_TYPE_INT16:    size = sizeof(vx_int16); break;
+    case VX_TYPE_UINT16:   size = sizeof(vx_uint16); break;
+    case VX_TYPE_INT32:    size = sizeof(vx_int32); break;
+    case VX_TYPE_UINT32:   size = sizeof(vx_uint32); break;
+    case VX_TYPE_INT64:    size = sizeof(vx_int64); break;
+    case VX_TYPE_UINT64:   size = sizeof(vx_uint64); break;
+    case VX_TYPE_FLOAT32:  size = sizeof(vx_float32); break;
+    case VX_TYPE_FLOAT64:  size = sizeof(vx_float64); break;
+    case VX_TYPE_ENUM:     size = sizeof(vx_int32); break;
+    case VX_TYPE_SIZE:     size = sizeof(vx_size); break;
+    case VX_TYPE_DF_IMAGE: size = sizeof(vx_df_image); break;
+    case VX_TYPE_BOOL:     size = sizeof(vx_bool); break;
+
+    /* support type of scalar with size */
+    case VX_TYPE_RECTANGLE:
+        size = sizeof(vx_rectangle_t);
+        break;
+    case VX_TYPE_KEYPOINT:
+        size = sizeof(vx_keypoint_t);
+        break;
+    case VX_TYPE_COORDINATES2D:
+        size = sizeof(vx_coordinates2d_t);
+        break;
+    case VX_TYPE_COORDINATES3D:
+        size = sizeof(vx_coordinates3d_t);
+        break;
+#ifdef OPENVX_USE_ENHANCED_VISION
+    case VX_TYPE_COORDINATES2DF:
+        size = sizeof(vx_coordinates2df_t);
+        break;
+    case VX_TYPE_HOG_PARAMS:
+        size = sizeof(vx_hog_t);
+        break;
+    case VX_TYPE_HOUGH_LINES_PARAMS:
+        size = sizeof(vx_hough_lines_p_t);
+        break;
+    case VX_TYPE_LINE_2D:
+        size = sizeof(vx_line2d_t);
+        break;
+    case VX_TYPE_TENSOR_MATRIX_MULTIPLY_PARAMS:
+        size = sizeof(vx_tensor_matrix_multiply_params_t);
+        break;
+#endif
+    default:
+        CT_RecordFailureAtFormat("Unsupported type: (%.4s)", __FUNCTION__, __FILE__, __LINE__, &type);
+        break;
+    }
+    return size;
+}
+
+static void ownCheckScalarVal(vx_enum type, scalar_val *actual_val, scalar_val *expect_val)
+{
+    switch (type)
+    {
+    case VX_TYPE_CHAR:
+        ASSERT_EQ_INT(actual_val->chr, expect_val->chr);
+        break;
+
+    case VX_TYPE_INT8:
+        ASSERT_EQ_INT(actual_val->s08, expect_val->s08);
+        break;
+
+    case VX_TYPE_UINT8:
+        ASSERT_EQ_INT(actual_val->u08, expect_val->u08);
+        break;
+
+    case VX_TYPE_INT16:
+        ASSERT_EQ_INT(actual_val->s16, expect_val->s16);
+        break;
+
+    case VX_TYPE_UINT16:
+        ASSERT_EQ_INT(actual_val->u16, expect_val->u16);
+        break;
+
+    case VX_TYPE_INT32:
+        ASSERT_EQ_INT(actual_val->s32, expect_val->s32);
+        break;
+
+    case VX_TYPE_UINT32:
+        ASSERT_EQ_INT(actual_val->u32, expect_val->u32);
+        break;
+
+    case VX_TYPE_INT64:
+        ASSERT_EQ_INT(actual_val->s64, expect_val->s64);
+        break;
+
+    case VX_TYPE_UINT64:
+        ASSERT_EQ_INT(actual_val->u64, expect_val->u64);
+        break;
+
+    case VX_TYPE_FLOAT32:
+        ASSERT(fabs(actual_val->f32 - expect_val->f32) < 0.000001f);
+        break;
+
+    case VX_TYPE_FLOAT64:
+        ASSERT(fabs(actual_val->f64 - expect_val->f64) < 0.000001f);
+        break;
+
+    case VX_TYPE_DF_IMAGE:
+        ASSERT_EQ_INT(actual_val->fcc, expect_val->fcc);
+        break;
+
+    case VX_TYPE_ENUM:
+        ASSERT_EQ_INT(actual_val->enm, expect_val->enm);
+        break;
+
+    case VX_TYPE_SIZE:
+        ASSERT_EQ_INT(actual_val->size, expect_val->size);
+        break;
+
+    case VX_TYPE_BOOL:
+        ASSERT_EQ_INT(actual_val->boolean, expect_val->boolean);
+        break;
+
+    case VX_TYPE_RECTANGLE:
+        ASSERT_EQ_INT(memcmp(&actual_val->rect, &expect_val->rect, sizeof(VX_TYPE_RECTANGLE)), 0);
+        break;
+
+    case VX_TYPE_KEYPOINT:
+        ASSERT_EQ_INT(memcmp(&actual_val->key_point, &expect_val->key_point, sizeof(VX_TYPE_KEYPOINT)), 0);
+        break;
+
+    case VX_TYPE_COORDINATES2D:
+        ASSERT_EQ_INT(memcmp(&actual_val->coord2d, &expect_val->coord2d, sizeof(VX_TYPE_COORDINATES2D)), 0);
+        break;
+
+    case VX_TYPE_COORDINATES3D:
+        ASSERT_EQ_INT(memcmp(&actual_val->coord3d, &expect_val->coord3d, sizeof(VX_TYPE_COORDINATES3D)), 0);
+        break;
+
+#ifdef OPENVX_USE_ENHANCED_VISION
+    case VX_TYPE_COORDINATES2DF:
+        ASSERT_EQ_INT(memcmp(&actual_val->coord2df, &expect_val->coord2df, sizeof(VX_TYPE_COORDINATES2DF)), 0);
+        break;
+
+    case VX_TYPE_HOG_PARAMS:
+        ASSERT_EQ_INT(memcmp(&actual_val->hog, &expect_val->hog, sizeof(VX_TYPE_HOG_PARAMS)), 0);
+        break;
+
+    case VX_TYPE_HOUGH_LINES_PARAMS:
+        ASSERT_EQ_INT(memcmp(&actual_val->houghlines, &expect_val->houghlines, sizeof(VX_TYPE_HOUGH_LINES_PARAMS)), 0);
+        break;
+
+    case VX_TYPE_LINE_2D:
+        ASSERT_EQ_INT(memcmp(&actual_val->line2d, &expect_val->line2d, sizeof(VX_TYPE_LINE_2D)), 0);
+        break;
+
+    case VX_TYPE_TENSOR_MATRIX_MULTIPLY_PARAMS:
+        ASSERT_EQ_INT(memcmp(&actual_val->matrix, &expect_val->matrix, sizeof(VX_TYPE_TENSOR_MATRIX_MULTIPLY_PARAMS)), 0);
+        break;
+#endif
+    default:
+        FAIL("Unsupported type: (%.4s)", &type);
+        break;
+    }
+    return;
+}
+
 TEST_WITH_ARG(Scalar, testCreateScalar, format_arg,
     ARG_ENUM(VX_TYPE_CHAR),
     ARG_ENUM(VX_TYPE_INT8),
@@ -327,8 +598,166 @@
     return;
 } /* testCopyScalar() */
 
+TEST_WITH_ARG(Scalar, testCreateScalarWithSize, format_arg,
+    ARG_ENUM(VX_TYPE_CHAR),
+    ARG_ENUM(VX_TYPE_INT8),
+    ARG_ENUM(VX_TYPE_UINT8),
+    ARG_ENUM(VX_TYPE_INT16),
+    ARG_ENUM(VX_TYPE_UINT16),
+    ARG_ENUM(VX_TYPE_INT32),
+    ARG_ENUM(VX_TYPE_UINT32),
+    ARG_ENUM(VX_TYPE_INT64),
+    ARG_ENUM(VX_TYPE_UINT64),
+    ARG_ENUM(VX_TYPE_FLOAT32),
+    ARG_ENUM(VX_TYPE_FLOAT64),
+    ARG_ENUM(VX_TYPE_ENUM),
+    ARG_ENUM(VX_TYPE_SIZE),
+    ARG_ENUM(VX_TYPE_DF_IMAGE),
+    ARG_ENUM(VX_TYPE_BOOL),
+    ARG_ENUM(VX_TYPE_RECTANGLE),
+    ARG_ENUM(VX_TYPE_KEYPOINT),
+    ARG_ENUM(VX_TYPE_COORDINATES2D),
+    ARG_ENUM(VX_TYPE_COORDINATES3D),
+#ifdef OPENVX_USE_ENHANCED_VISION
+    ARG_ENUM(VX_TYPE_COORDINATES2DF),
+    ARG_ENUM(VX_TYPE_HOG_PARAMS),
+    ARG_ENUM(VX_TYPE_HOUGH_LINES_PARAMS),
+    ARG_ENUM(VX_TYPE_LINE_2D),
+    ARG_ENUM(VX_TYPE_TENSOR_MATRIX_MULTIPLY_PARAMS)
+#endif
+    )
+{
+    vx_context context = context_->vx_context_;
+    vx_scalar  scalar = 0;
+    vx_enum    ref_type = arg_->data_type;
+    scalar_val ref;
+    vx_size    ref_size = ownGetSizeByType(ref_type);
+
+    own_init_scalar_value(ref_type, &ref, vx_true_e);
+
+    ASSERT_VX_OBJECT(scalar = vxCreateScalarWithSize(context, ref_type, &ref, ref_size), VX_TYPE_SCALAR);
+
+    VX_CALL(vxReleaseScalar(&scalar));
+
+    ASSERT(scalar == 0);
+
+    return;
+}
+
+TEST_WITH_ARG(Scalar, testCopyScalarWithSize, format_arg,
+    ARG_ENUM(VX_TYPE_CHAR),
+    ARG_ENUM(VX_TYPE_INT8),
+    ARG_ENUM(VX_TYPE_UINT8),
+    ARG_ENUM(VX_TYPE_INT16),
+    ARG_ENUM(VX_TYPE_UINT16),
+    ARG_ENUM(VX_TYPE_INT32),
+    ARG_ENUM(VX_TYPE_UINT32),
+    ARG_ENUM(VX_TYPE_INT64),
+    ARG_ENUM(VX_TYPE_UINT64),
+    ARG_ENUM(VX_TYPE_FLOAT32),
+    ARG_ENUM(VX_TYPE_FLOAT64),
+    ARG_ENUM(VX_TYPE_ENUM),
+    ARG_ENUM(VX_TYPE_SIZE),
+    ARG_ENUM(VX_TYPE_DF_IMAGE),
+    ARG_ENUM(VX_TYPE_BOOL),
+    ARG_ENUM(VX_TYPE_RECTANGLE),
+    ARG_ENUM(VX_TYPE_KEYPOINT),
+    ARG_ENUM(VX_TYPE_COORDINATES2D),
+    ARG_ENUM(VX_TYPE_COORDINATES3D),
+#ifdef OPENVX_USE_ENHANCED_VISION
+    ARG_ENUM(VX_TYPE_COORDINATES2DF),
+    ARG_ENUM(VX_TYPE_HOG_PARAMS),
+    ARG_ENUM(VX_TYPE_HOUGH_LINES_PARAMS),
+    ARG_ENUM(VX_TYPE_LINE_2D),
+    ARG_ENUM(VX_TYPE_TENSOR_MATRIX_MULTIPLY_PARAMS)
+#endif
+    )
+{
+    vx_context context = context_->vx_context_;
+    vx_scalar  scalar = 0;
+    vx_enum    ref_type = arg_->data_type;
+    scalar_val ref;
+    vx_size    ref_size = ownGetSizeByType(ref_type);
+    scalar_val expect_ref;
+    scalar_val actual_ref;
+
+    own_init_scalar_value(ref_type, &ref, vx_true_e);
+
+    ASSERT_VX_OBJECT(scalar = vxCreateScalarWithSize(context, ref_type, &ref, ref_size), VX_TYPE_SCALAR);
+
+    //check result
+    VX_CALL(vxCopyScalarWithSize(scalar, ref_size, &expect_ref, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+    ownCheckScalarVal(ref_type, &ref, &expect_ref);
+
+    own_init_scalar_value(ref_type, &actual_ref, vx_false_e);
+    VX_CALL(vxCopyScalarWithSize(scalar, ref_size, &actual_ref, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+    VX_CALL(vxCopyScalarWithSize(scalar, ref_size, &expect_ref, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+    ownCheckScalarVal(ref_type, &actual_ref, &expect_ref);
+
+    VX_CALL(vxReleaseScalar(&scalar));
+
+    ASSERT(scalar == 0);
+
+    return;
+}
+
+TEST_WITH_ARG(Scalar, testCreateVirtualScalar, format_arg,
+    ARG_ENUM(VX_TYPE_CHAR),
+    ARG_ENUM(VX_TYPE_INT8),
+    ARG_ENUM(VX_TYPE_UINT8),
+    ARG_ENUM(VX_TYPE_INT16),
+    ARG_ENUM(VX_TYPE_UINT16),
+    ARG_ENUM(VX_TYPE_INT32),
+    ARG_ENUM(VX_TYPE_UINT32),
+    ARG_ENUM(VX_TYPE_INT64),
+    ARG_ENUM(VX_TYPE_UINT64),
+    ARG_ENUM(VX_TYPE_FLOAT32),
+    ARG_ENUM(VX_TYPE_FLOAT64),
+    ARG_ENUM(VX_TYPE_ENUM),
+    ARG_ENUM(VX_TYPE_SIZE),
+    ARG_ENUM(VX_TYPE_DF_IMAGE),
+    ARG_ENUM(VX_TYPE_BOOL),
+    ARG_ENUM(VX_TYPE_RECTANGLE),
+    ARG_ENUM(VX_TYPE_KEYPOINT),
+    ARG_ENUM(VX_TYPE_COORDINATES2D),
+    ARG_ENUM(VX_TYPE_COORDINATES3D),
+#ifdef OPENVX_USE_ENHANCED_VISION
+    ARG_ENUM(VX_TYPE_COORDINATES2DF),
+    ARG_ENUM(VX_TYPE_HOG_PARAMS),
+    ARG_ENUM(VX_TYPE_HOUGH_LINES_PARAMS),
+    ARG_ENUM(VX_TYPE_LINE_2D),
+    ARG_ENUM(VX_TYPE_TENSOR_MATRIX_MULTIPLY_PARAMS)
+#endif
+    )
+{
+    vx_context context = context_->vx_context_;
+    vx_scalar  scalar = 0;
+    vx_enum    ref_type = arg_->data_type;
+    vx_graph graph = 0;
+    vx_enum    expect_type = VX_TYPE_INVALID;
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(scalar = vxCreateVirtualScalar(graph, ref_type), VX_TYPE_SCALAR);
+
+    VX_CALL(vxQueryScalar(scalar, VX_SCALAR_TYPE, &expect_type, sizeof(vx_enum)));
+    EXPECT_EQ_INT(expect_type, ref_type);
+
+    VX_CALL(vxReleaseScalar(&scalar));
+    ASSERT(scalar == 0);
+
+    VX_CALL(vxReleaseGraph(&graph));
+    ASSERT(graph == 0);
+
+    return;
+}
+
 TESTCASE_TESTS(Scalar,
     testCreateScalar,
     testQueryScalar,
-    testCopyScalar
+    testCopyScalar,
+    testCreateScalarWithSize,
+    testCopyScalarWithSize,
+    testCreateVirtualScalar
     )
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_scale.c b/test_conformance/test_scale.c
index 9b4d225..d5a9369 100644
--- a/test_conformance/test_scale.c
+++ b/test_conformance/test_scale.c
@@ -15,11 +15,15 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
 
-#include <math.h> // floorf
+#include <math.h>   // floorf
+
+#define MAX(a, b) ((a) < (b) ? (b) : (a))
 
 TESTCASE(Scale, CT_VXContext, ct_setup_vx_context, 0)
 
@@ -48,18 +52,21 @@
     ASSERT(src_image == 0);
 }
 
-
-static CT_Image scale_generate_random(const char* fileName, int width, int height)
+static CT_Image scale_generate_random(const char* fileName, int width, int height, vx_df_image format)
 {
     CT_Image image;
 
-    ASSERT_NO_FAILURE_(return 0,
-            image = ct_allocate_ct_image_random(width, height, VX_DF_IMAGE_U8, &CT()->seed_, 0, 256));
+    ASSERT_(return 0, format == VX_DF_IMAGE_U1 || format == VX_DF_IMAGE_U8);
+
+    if (format == VX_DF_IMAGE_U1)
+        ASSERT_NO_FAILURE_(return 0, image = ct_allocate_ct_image_random(width, height, format, &CT()->seed_, 0, 2));
+    else
+        ASSERT_NO_FAILURE_(return 0, image = ct_allocate_ct_image_random(width, height, format, &CT()->seed_, 0, 256));
 
     return image;
 }
 
-static CT_Image _scale_generate_simple_gradient(int width, int height, int step_x, int step_y, int offset)
+static CT_Image _scale_generate_simple_gradient(int width, int height, int step_x, int step_y, int offset, vx_df_image format)
 {
     CT_Image image = NULL;
     uint32_t x, y;
@@ -67,61 +74,121 @@
     ASSERT_(return 0, step_x > 0);
     ASSERT_(return 0, step_y > 0);
 
-    ASSERT_NO_FAILURE_(return 0,
-            image = ct_allocate_image(width, height, VX_DF_IMAGE_U8));
+    ASSERT_NO_FAILURE_(return 0, image = ct_allocate_image(width, height, format));
 
     for (y = 0; y < image->height; y++)
     {
         for (x = 0; x < image->width; x++)
         {
-            uint8_t* ptr = CT_IMAGE_DATA_PTR_8U(image, x, y);
-            int v = offset + (y / step_y) + (x / step_x);
-            *ptr = (uint8_t)v;
+            uint8_t* ptr;
+            int v;
+            if (format == VX_DF_IMAGE_U1)
+            {
+                ptr = CT_IMAGE_DATA_PTR_1U(image, x, y);
+                v = MAX((offset + (y / step_y) + (x / step_x)) % 2, 0);
+                *ptr = (*ptr & ~(1 << (x % 8))) | (uint8_t)v << (x % 8);
+            }
+            else
+            {
+                ptr = CT_IMAGE_DATA_PTR_8U(image, x, y);
+                v = offset + (y / step_y) + (x / step_x);
+                *ptr = (uint8_t)v;
+            }
         }
     }
 
     return image;
 }
 
-static CT_Image scale_generate_gradient_2x2(const char* fileName, int width, int height)
+static CT_Image scale_generate_gradient_2x2(const char* fileName, int width, int height, vx_df_image format)
 {
-    return _scale_generate_simple_gradient(width, height, 2, 2, 0);
+    return _scale_generate_simple_gradient(width, height, 2, 2, 0, format);
 }
 
-static CT_Image scale_generate_gradient_16x16(const char* fileName, int width, int height)
+static CT_Image scale_generate_gradient_16x16(const char* fileName, int width, int height, vx_df_image format)
 {
-    return _scale_generate_simple_gradient(width, height, 16, 16, 32);
+    return _scale_generate_simple_gradient(width, height, 16, 16, 32, format);
 }
 
-static CT_Image scale_generate_pattern3x3(const char* fileName, int width, int height)
+static CT_Image scale_generate_pattern3x3(const char* fileName, int width, int height, vx_df_image format)
 {
     CT_Image image = NULL;
     uint32_t x, y;
 
-    ASSERT_NO_FAILURE_(return 0,
-            image = ct_allocate_image(width, height, VX_DF_IMAGE_U8));
+    ASSERT_NO_FAILURE_(return 0, image = ct_allocate_image(width, height, format));
 
     for (y = 0; y < image->height; y++)
     {
         for (x = 0; x < image->width; x++)
         {
-            uint8_t* ptr = CT_IMAGE_DATA_PTR_8U(image, x, y);
-            int v = ((y % 3) == 1 && (x % 3) == 1) ? 0 : 255;
-            *ptr = (uint8_t)v;
+            uint8_t* ptr;
+            int v;
+            if (format == VX_DF_IMAGE_U1)
+            {
+                ptr = CT_IMAGE_DATA_PTR_1U(image, x, y);
+                v = ((y % 3) == 1 && (x % 3) == 1) ? 0 : 1;
+                *ptr = (*ptr & ~(1 << (x % 8))) | (uint8_t)v << (x % 8);
+            }
+            else
+            {
+                ptr = CT_IMAGE_DATA_PTR_8U(image, x, y);
+                v = ((y % 3) == 1 && (x % 3) == 1) ? 0 : 255;
+                *ptr = (uint8_t)v;
+            }
         }
     }
 
     return image;
 }
 
-static CT_Image scale_read_image(const char* fileName, int width, int height)
+static CT_Image scale_read_image(const char* fileName, int width, int height, vx_df_image format)
 {
-    CT_Image image = NULL;
+    CT_Image image_load = NULL, image_ret = NULL;
     ASSERT_(return 0, width == 0 && height == 0);
-    image = ct_read_image(fileName, 1);
-    ASSERT_(return 0, image);
-    ASSERT_(return 0, image->format == VX_DF_IMAGE_U8);
-    return image;
+    ASSERT_(return 0, format == VX_DF_IMAGE_U1 || format == VX_DF_IMAGE_U8);
+
+    image_load = ct_read_image(fileName, 1);
+    ASSERT_(return 0, image_load);
+    ASSERT_(return 0, image_load->format == VX_DF_IMAGE_U8);
+
+    if (format == VX_DF_IMAGE_U1)
+    {
+        ASSERT_NO_FAILURE_(return 0, threshold_U8_ct_image(image_load, 127));   // Threshold to make the U1 image less trivial
+        ASSERT_NO_FAILURE_(return 0, image_ret = ct_allocate_image(image_load->width, image_load->height, VX_DF_IMAGE_U1));
+        ASSERT_NO_FAILURE_(return 0, U8_ct_image_to_U1_ct_image(image_load, image_ret));
+    }
+    else
+    {
+        image_ret = image_load;
+    }
+
+    ASSERT_(return 0, image_ret);
+    ASSERT_(return 0, image_ret->format == format);
+
+    return image_ret;
+}
+
+static vx_int32 ct_image_get_pixel_1u(CT_Image img, int x, int y, vx_border_t border)
+{
+    int border_x_start = img->roi.x % 8;     // Bit-shift offset from ROI
+    if (border.mode == VX_BORDER_UNDEFINED)
+    {
+        if (x < border_x_start || x >= (int)img->width + border_x_start || y < 0 || y >= (int)img->height)
+            return -1; //border
+        return (*CT_IMAGE_DATA_PTR_1U(img, x, y) & (1 << (x % 8))) >> (x % 8);
+    }
+    else if (border.mode == VX_BORDER_REPLICATE)
+    {
+        return CT_IMAGE_DATA_REPLICATE_1U(img, x, y);
+    }
+    else if (border.mode == VX_BORDER_CONSTANT)
+    {
+        return CT_IMAGE_DATA_CONSTANT_1U(img, x, y, border.constant_value.U1);
+    }
+    else
+    {
+        CT_FAIL_(return -1, "Invalid border type");
+    }
 }
 
 static vx_int32 ct_image_get_pixel_8u(CT_Image img, int x, int y, vx_border_t border)
@@ -148,10 +215,23 @@
 
 static int scale_check_pixel(CT_Image src, CT_Image dst, int x, int y, vx_enum interpolation, vx_border_t border)
 {
-    vx_uint8 res = *CT_IMAGE_DATA_PTR_8U(dst, x, y);
-    vx_float32 x_src = (((vx_float32)x + 0.5f) * (vx_float32)src->width / (vx_float32)dst->width) - 0.5f;
-    vx_float32 y_src = (((vx_float32)y + 0.5f) * (vx_float32)src->height / (vx_float32)dst->height) - 0.5f;
-    int x_min = (int)floorf(x_src), y_min = (int)floorf(y_src);
+    ASSERT_(return 0, src->format == dst->format && (src->format == VX_DF_IMAGE_U1 || src->format == VX_DF_IMAGE_U8));
+    vx_df_image format = src->format;
+    vx_uint8 res;
+    if (format == VX_DF_IMAGE_U1)
+        res = (*CT_IMAGE_DATA_PTR_1U(dst, x, y) & (1 << (x % 8))) >> (x % 8);
+    else
+        res =  *CT_IMAGE_DATA_PTR_8U(dst, x, y);
+
+    int x_dst = x + dst->roi.x - (format == VX_DF_IMAGE_U1 ? dst->roi.x % 8 : 0);   // ROI-independent coordinates
+    int y_dst = y + dst->roi.y;
+    vx_float32 x_src = (((vx_float32)x_dst + 0.5f) * (vx_float32)src->roi.width  / (vx_float32)dst->roi.width)  - 0.5f;
+    vx_float32 y_src = (((vx_float32)y_dst + 0.5f) * (vx_float32)src->roi.height / (vx_float32)dst->roi.height) - 0.5f;
+    x_src = x_src - src->roi.x + (format == VX_DF_IMAGE_U1 ? src->roi.x % 8 : 0);   // ROI-dependent coordinates
+    y_src = y_src - src->roi.y;
+    int x_min = (int)floorf(x_src);
+    int y_min = (int)floorf(y_src);
+
     if (interpolation == VX_INTERPOLATION_NEAREST_NEIGHBOR)
     {
         int sx, sy;
@@ -160,21 +240,36 @@
             for (sx = -1; sx <= 1; sx++)
             {
                 vx_int32 candidate = 0;
-                ASSERT_NO_FAILURE_(return 0, candidate = ct_image_get_pixel_8u(src, x_min + sx, y_min + sy, border));
+                if (format == VX_DF_IMAGE_U1)
+                    ASSERT_NO_FAILURE_(return 0, candidate = ct_image_get_pixel_1u(src, x_min + sx, y_min + sy, border));
+                else
+                    ASSERT_NO_FAILURE_(return 0, candidate = ct_image_get_pixel_8u(src, x_min + sx, y_min + sy, border));
+
                 if (candidate == -1 || candidate == res)
                     return 1;
             }
         }
         CT_FAIL_(return 0, "Check failed for pixel (%d, %d): %d", x, y, (int)res);
     }
-    if (interpolation == VX_INTERPOLATION_BILINEAR)
+    else if (interpolation == VX_INTERPOLATION_BILINEAR)
     {
         vx_float32 s = x_src - x_min;
         vx_float32 t = y_src - y_min;
-        vx_int32 p00 = ct_image_get_pixel_8u(src, x_min + 0, y_min + 0, border);
-        vx_int32 p01 = ct_image_get_pixel_8u(src, x_min + 0, y_min + 1, border);
-        vx_int32 p10 = ct_image_get_pixel_8u(src, x_min + 1, y_min + 0, border);
-        vx_int32 p11 = ct_image_get_pixel_8u(src, x_min + 1, y_min + 1, border);
+        vx_int32 p00, p01, p10, p11;
+        if (format == VX_DF_IMAGE_U1)
+        {
+            p00 = ct_image_get_pixel_1u(src, x_min + 0, y_min + 0, border);
+            p01 = ct_image_get_pixel_1u(src, x_min + 0, y_min + 1, border);
+            p10 = ct_image_get_pixel_1u(src, x_min + 1, y_min + 0, border);
+            p11 = ct_image_get_pixel_1u(src, x_min + 1, y_min + 1, border);
+        }
+        else
+        {
+            p00 = ct_image_get_pixel_8u(src, x_min + 0, y_min + 0, border);
+            p01 = ct_image_get_pixel_8u(src, x_min + 0, y_min + 1, border);
+            p10 = ct_image_get_pixel_8u(src, x_min + 1, y_min + 0, border);
+            p11 = ct_image_get_pixel_8u(src, x_min + 1, y_min + 1, border);
+        }
         vx_float32 ref_float;
         vx_int32 ref;
 
@@ -210,15 +305,17 @@
         // Take the nearest integer to avoid problems with casts in case of float rounding errors
         // (e.g: 30.999999 should give 31, not 30)
         ref = (vx_int32)(ref_float + 0.5f);
+        if (format == VX_DF_IMAGE_U1)
+            ref = (ref > 1) ? 1 : (ref < 0) ? 0 : ref;
 
-        // A difference of 1 is allowed
-        if (abs(res - ref) <= 1) {
+        // A difference of 1 is allowed (for U1 only a difference of 0 is allowed, otherwise the test is trivial)
+        if (abs(res - ref) <= (format == VX_DF_IMAGE_U1 ? 0 : 1)) {
             return 1;
         }
 
         return 0; // don't generate failure, we will check num failed pixels later
     }
-    if (interpolation == VX_INTERPOLATION_AREA)
+    else if (interpolation == VX_INTERPOLATION_AREA)
     {
         vx_int32 v_min = 256, v_max = -1;
         int sx, sy;
@@ -228,7 +325,11 @@
             for (sx = -2; sx <= 2; sx++)
             {
                 vx_int32 candidate = 0;
-                ASSERT_NO_FAILURE_(return 0, candidate = ct_image_get_pixel_8u(src, x_min + sx, y_min + sy, border));
+                if (format == VX_DF_IMAGE_U1)
+                    ASSERT_NO_FAILURE_(return 0, candidate = ct_image_get_pixel_1u(src, x_min + sx, y_min + sy, border));
+                else
+                    ASSERT_NO_FAILURE_(return 0, candidate = ct_image_get_pixel_8u(src, x_min + sx, y_min + sy, border));
+
                 if (candidate == -1)
                     return 1;
                 if (v_min > candidate)
@@ -241,14 +342,29 @@
         }
         CT_FAIL_(return 0, "Check failed for pixel (%d, %d): %d", x, y, (int)res);
     }
-    CT_FAIL_(return 0, "NOT IMPLEMENTED");
+    else
+    {
+        CT_FAIL_(return 0, "NOT IMPLEMENTED");
+    }
 }
 
 static int scale_check_pixel_exact(CT_Image src, CT_Image dst, int x, int y, vx_enum interpolation, vx_border_t border)
 {
-    vx_uint8 res = *CT_IMAGE_DATA_PTR_8U(dst, x, y);
-    vx_float32 x_src = (((vx_float32)x + 0.5f) * (vx_float32)src->width / (vx_float32)dst->width) - 0.5f;
-    vx_float32 y_src = (((vx_float32)y + 0.5f) * (vx_float32)src->height / (vx_float32)dst->height) - 0.5f;
+    ASSERT_(return 0, src->format == dst->format && (src->format == VX_DF_IMAGE_U1 || src->format == VX_DF_IMAGE_U8));
+    vx_df_image format = src->format;
+    vx_uint8 res;
+    if (format == VX_DF_IMAGE_U1)
+        res = (*CT_IMAGE_DATA_PTR_1U(dst, x, y) & (1 << (x % 8))) >> (x % 8);
+    else
+        res =  *CT_IMAGE_DATA_PTR_8U(dst, x, y);
+
+    int x_dst = x + dst->roi.x - (format == VX_DF_IMAGE_U1 ? dst->roi.x % 8 : 0);   // ROI-independent coordinates
+    int y_dst = y + dst->roi.y;
+    vx_float32 x_src = (((vx_float32)x_dst + 0.5f) * (vx_float32)src->roi.width  / (vx_float32)dst->roi.width)  - 0.5f;
+    vx_float32 y_src = (((vx_float32)y_dst + 0.5f) * (vx_float32)src->roi.height / (vx_float32)dst->roi.height) - 0.5f;
+    x_src = x_src - src->roi.x + (format == VX_DF_IMAGE_U1 ? src->roi.x % 8 : 0);   // ROI-dependent coordinates
+    y_src = y_src - src->roi.y;
+
     vx_float32 x_minf = floorf(x_src);
     vx_float32 y_minf = floorf(y_src);
     int x_min = (vx_int32)x_minf;
@@ -259,21 +375,37 @@
         x_ref++;
     if (y_src - y_minf >= 0.5f)
         y_ref++;
+
     if (interpolation == VX_INTERPOLATION_NEAREST_NEIGHBOR)
     {
-        vx_int32 ref = ct_image_get_pixel_8u(src, x_ref, y_ref, border);
+        vx_int32 ref;
+        if (format == VX_DF_IMAGE_U1)
+            ref = ct_image_get_pixel_1u(src, x_ref, y_ref, border);
+        else
+            ref = ct_image_get_pixel_8u(src, x_ref, y_ref, border);
         if (ref == -1 || ref == res)
             return 1;
         CT_FAIL_(return 0, "Check failed for pixel (%d, %d): %d (expected %d)", x, y, (int)res, (int)ref);
     }
-    if (interpolation == VX_INTERPOLATION_BILINEAR)
+    else if (interpolation == VX_INTERPOLATION_BILINEAR)
     {
         vx_float32 s = x_src - x_minf;
         vx_float32 t = y_src - y_minf;
-        vx_int32 p00 = ct_image_get_pixel_8u(src, x_min + 0, y_min + 0, border);
-        vx_int32 p01 = ct_image_get_pixel_8u(src, x_min + 0, y_min + 1, border);
-        vx_int32 p10 = ct_image_get_pixel_8u(src, x_min + 1, y_min + 0, border);
-        vx_int32 p11 = ct_image_get_pixel_8u(src, x_min + 1, y_min + 1, border);
+        vx_int32 p00, p01, p10, p11;
+        if (format == VX_DF_IMAGE_U1)
+        {
+            p00 = ct_image_get_pixel_1u(src, x_min + 0, y_min + 0, border);
+            p01 = ct_image_get_pixel_1u(src, x_min + 0, y_min + 1, border);
+            p10 = ct_image_get_pixel_1u(src, x_min + 1, y_min + 0, border);
+            p11 = ct_image_get_pixel_1u(src, x_min + 1, y_min + 1, border);
+        }
+        else
+        {
+            p00 = ct_image_get_pixel_8u(src, x_min + 0, y_min + 0, border);
+            p01 = ct_image_get_pixel_8u(src, x_min + 0, y_min + 1, border);
+            p10 = ct_image_get_pixel_8u(src, x_min + 1, y_min + 0, border);
+            p11 = ct_image_get_pixel_8u(src, x_min + 1, y_min + 1, border);
+        }
         vx_float32 ref_float;
         vx_int32 ref;
 
@@ -309,6 +441,8 @@
         // Take the nearest integer to avoid problems with casts in case of float rounding errors
         // (e.g: 30.999999 should give 31, not 30)
         ref = (vx_int32)(ref_float + 0.5f);
+        if (format == VX_DF_IMAGE_U1)
+            ref = (ref > 1) ? 1 : (ref < 0) ? 0 : ref;
 
         // The result must be exact
         if (ref == res) {
@@ -317,39 +451,64 @@
 
         CT_FAIL_(return 0, "Check failed for pixel (%d, %d): %d (expected %d)", x, y, (int)res, (int)ref);
     }
-    if (interpolation == VX_INTERPOLATION_AREA)
+    else if (interpolation == VX_INTERPOLATION_AREA)
     {
         vx_int32 ref;
         ASSERT_(return 0, dst->width % src->width == 0 && dst->height % src->height == 0);
-        ref = ct_image_get_pixel_8u(src, x_ref, y_ref, border);
+        if (format == VX_DF_IMAGE_U1)
+            ref = ct_image_get_pixel_1u(src, x_ref, y_ref, border);
+        else
+            ref = ct_image_get_pixel_8u(src, x_ref, y_ref, border);
+
         if (ref == -1)
             return 1;
         if (ref == res)
             return 1;
         CT_FAIL_(return 0, "Check failed for pixel (%d, %d): %d (expected %d)", x, y, (int)res, (int)ref);
     }
-    CT_FAIL_(return 0, "NOT IMPLEMENTED");
+    else
+    {
+        CT_FAIL_(return 0, "NOT IMPLEMENTED");
+    }
 }
 
 static void scale_validate(CT_Image src, CT_Image dst, vx_enum interpolation, vx_border_t border, int exact)
 {
     int num_failed = 0;
+    ASSERT(src->format == dst->format && (src->format == VX_DF_IMAGE_U1 || src->format == VX_DF_IMAGE_U8));
     if (src->width == dst->width && src->height == dst->height) // special case for scale=1.0
     {
         ASSERT_EQ_CTIMAGE(src, dst);
         return;
     }
-    CT_FILL_IMAGE_8U(, dst,
-            {
-                int check;
-                if (exact == 0)
-                    ASSERT_NO_FAILURE(check = scale_check_pixel(src, dst, x, y, interpolation, border));
-                else
-                    ASSERT_NO_FAILURE(check = scale_check_pixel_exact(src, dst, x, y, interpolation, border));
-                if (check == 0) {
-                    num_failed++;
-                }
-            });
+    if (src->format == VX_DF_IMAGE_U1)
+    {
+        CT_FILL_IMAGE_1U(, dst,
+                {
+                    int check;
+                    if (exact == 0)
+                        ASSERT_NO_FAILURE(check = scale_check_pixel(src, dst, xShftd, y, interpolation, border));
+                    else
+                        ASSERT_NO_FAILURE(check = scale_check_pixel_exact(src, dst, xShftd, y, interpolation, border));
+                    if (check == 0) {
+                        num_failed++;
+                    }
+                });
+    }
+    else
+    {
+        CT_FILL_IMAGE_8U(, dst,
+                {
+                    int check;
+                    if (exact == 0)
+                        ASSERT_NO_FAILURE(check = scale_check_pixel(src, dst, x, y, interpolation, border));
+                    else
+                        ASSERT_NO_FAILURE(check = scale_check_pixel_exact(src, dst, x, y, interpolation, border));
+                    if (check == 0) {
+                        num_failed++;
+                    }
+                });
+    }
     if (interpolation == VX_INTERPOLATION_BILINEAR)
     {
         int total = dst->width * dst->height;
@@ -379,11 +538,12 @@
     const char* testName;
     int dummy;
     vx_enum interpolation;
-    CT_Image (*generator)(const char* fileName, int width, int height);
+    CT_Image (*generator)(const char* fileName, int width, int height, vx_df_image format);
     const char* fileName;
     void (*dst_size_generator)(int width, int height, int* dst_width, int* dst_height);
     int exact_result;
     int width, height;
+    vx_df_image format;
     vx_border_t border;
 } Arg;
 
@@ -454,7 +614,10 @@
 
 #define SCALE_TEST(interpolation, inputDataGenerator, inputDataFile, scale, exact, nextmacro, ...) \
     CT_EXPAND(nextmacro(STR_##interpolation "/" inputDataFile "/" #scale, __VA_ARGS__, \
-            interpolation, inputDataGenerator, inputDataFile, dst_size_generator_ ## scale, exact))
+            interpolation, inputDataGenerator, inputDataFile, dst_size_generator_##scale, exact))
+#define SCALE_TEST_U1(interpolation, inputDataGenerator, inputDataFile, scale, exact, nextmacro, ...) \
+    CT_EXPAND(nextmacro("_U1_/" STR_##interpolation "/" inputDataFile "/" #scale, __VA_ARGS__, \
+            interpolation, inputDataGenerator, inputDataFile, dst_size_generator_##scale, exact))
 
 #define ADD_DST_SIZE_NN(testArgName, nextmacro, ...) \
     CT_EXPAND(nextmacro(testArgName "/1_1", __VA_ARGS__, dst_size_generator_1_1)), \
@@ -483,44 +646,84 @@
 
 #define PARAMETERS \
     /* 1:1 scale */ \
-    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 1_1, 1, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 1_1, 1, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_AREA,             scale_generate_random, "random", 1_1, 1, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 1_1, 1, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 1_1, 1, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_AREA,             scale_generate_random, "random", 1_1, 1, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
     /* NN upscale with integer factor */ \
-    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 1_2, 1, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 1_3, 1, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image, "lena.bmp", 1_2, 1, ADD_SIZE_NONE, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 1_2, 1, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 1_3, 1, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image,    "lena.bmp", 1_2, 1, ADD_SIZE_NONE,      ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
     /* NN downscale with odd integer factor */\
-    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 3_1, 1, ADD_SIZE_96x96, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 5_1, 1, ADD_SIZE_100x100, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_pattern3x3, "pattern3x3", 3_1, 1, ADD_SIZE_96x96, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image, "lena.bmp", 3_1, 0, ADD_SIZE_NONE, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 3_1, 1, ADD_SIZE_96x96,     ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 5_1, 1, ADD_SIZE_100x100,   ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_pattern3x3, "pattern3x3", 3_1, 1, ADD_SIZE_96x96, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image,    "lena.bmp", 3_1, 0, ADD_SIZE_NONE,      ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
     /* other NN downscales */ \
-    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 2_1, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 4_1, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", SCALE_PYRAMID_ORB, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 2_1, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 4_1, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", SCALE_PYRAMID_ORB, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
     /* BILINEAR upscale with integer factor */ \
-    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 1_2, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 1_3, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 1_2, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 1_3, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
     /* BILINEAR downscales */ \
-    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 2_1, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 3_1, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 4_1, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 5_1, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", SCALE_PYRAMID_ORB, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 2_1, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 3_1, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 4_1, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 5_1, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", SCALE_PYRAMID_ORB, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
     /* AREA tests */ \
-    SCALE_TEST(VX_INTERPOLATION_AREA,             scale_generate_gradient_16x16, "gradient16x16", 4_1, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_AREA,             scale_read_image, "lena.bmp", 4_1, 0, ADD_SIZE_NONE, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_AREA,             scale_generate_gradient_16x16, "gradient16x16", 4_1, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_AREA,             scale_read_image,    "lena.bmp", 4_1, 0, ADD_SIZE_NONE,      ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
     /* AREA upscale */ \
-    SCALE_TEST(VX_INTERPOLATION_AREA,             scale_generate_random, "random", 1_2, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_AREA,             scale_generate_random, "random", 1_3, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_AREA,             scale_generate_random, "random", 1_2, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_AREA,             scale_generate_random, "random", 1_3, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
     /* other */ \
-    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", SCALE_NEAR_UP, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", SCALE_NEAR_UP, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_AREA,             scale_generate_random, "random", SCALE_NEAR_UP, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", SCALE_NEAR_DOWN, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", SCALE_NEAR_DOWN, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
-    SCALE_TEST(VX_INTERPOLATION_AREA,             scale_generate_random, "random", SCALE_NEAR_DOWN, 0, ADD_SIZE_SMALL_SET, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", SCALE_NEAR_UP,   0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", SCALE_NEAR_UP,   0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_AREA,             scale_generate_random, "random", SCALE_NEAR_UP,   0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", SCALE_NEAR_DOWN, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", SCALE_NEAR_DOWN, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_AREA,             scale_generate_random, "random", SCALE_NEAR_DOWN, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U8, ADD_VX_BORDERS, ARG, 0), \
+    \
+    /* U1: 1:1 scale */ \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 1_1, 1, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 1_1, 1, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_AREA,             scale_generate_random, "random", 1_1, 1, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    /* U1: NN upscale with integer factor */ \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 1_2, 1, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 1_3, 1, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image,    "lena.bmp", 1_2, 1, ADD_SIZE_NONE,      ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    /* U1: NN downscale with odd integer factor */\
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 3_1, 1, ADD_SIZE_96x96,     ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 5_1, 1, ADD_SIZE_100x100,   ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_pattern3x3, "pattern3x3", 3_1, 1, ADD_SIZE_96x96, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image,    "lena.bmp", 3_1, 0, ADD_SIZE_NONE,      ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    /* U1: other NN downscales */ \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 2_1, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", 4_1, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", SCALE_PYRAMID_ORB, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    /* U1: BILINEAR upscale with integer factor */ \
+    SCALE_TEST_U1(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 1_2, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 1_3, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    /* U1: BILINEAR downscales */ \
+    SCALE_TEST_U1(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 2_1, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 3_1, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 4_1, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", 5_1, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", SCALE_PYRAMID_ORB, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    /* U1: AREA tests */ \
+    SCALE_TEST_U1(VX_INTERPOLATION_AREA,             scale_generate_gradient_16x16, "gradient16x16", 4_1, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_AREA,             scale_read_image,    "lena.bmp", 4_1, 0, ADD_SIZE_NONE,      ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    /* U1: AREA upscale */ \
+    SCALE_TEST_U1(VX_INTERPOLATION_AREA,             scale_generate_random, "random", 1_2, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_AREA,             scale_generate_random, "random", 1_3, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    /* U1: other */ \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", SCALE_NEAR_UP,   0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", SCALE_NEAR_UP,   0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_AREA,             scale_generate_random, "random", SCALE_NEAR_UP,   0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_generate_random, "random", SCALE_NEAR_DOWN, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_BILINEAR,         scale_generate_random, "random", SCALE_NEAR_DOWN, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_AREA,             scale_generate_random, "random", SCALE_NEAR_DOWN, 0, ADD_SIZE_SMALL_SET, ADD_TYPE_U1, ADD_VX_BORDERS_U1, ARG, 0)
 
 TEST_WITH_ARG(Scale, testGraphProcessing, Arg,
     PARAMETERS
@@ -534,12 +737,12 @@
 
     CT_Image src = NULL, dst = NULL;
 
-    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height));
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
     ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
 
     ASSERT_NO_FAILURE(arg_->dst_size_generator(src->width, src->height, &dst_width, &dst_height));
 
-    ASSERT_VX_OBJECT(dst_image = vxCreateImage(context, dst_width, dst_height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst_image = vxCreateImage(context, dst_width, dst_height, arg_->format), VX_TYPE_IMAGE);
 
     ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
 
@@ -577,12 +780,12 @@
 
     CT_Image src = NULL, dst = NULL;
 
-    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height));
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
     ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
 
     ASSERT_NO_FAILURE(arg_->dst_size_generator(src->width, src->height, &dst_width, &dst_height));
 
-    ASSERT_VX_OBJECT(dst_image = vxCreateImage(context, dst_width, dst_height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst_image = vxCreateImage(context, dst_width, dst_height, arg_->format), VX_TYPE_IMAGE);
 
     VX_CALL(vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, &arg_->border, sizeof(arg_->border)));
 
@@ -599,4 +802,95 @@
     ASSERT(src_image == 0);
 }
 
-TESTCASE_TESTS(Scale, testNodeCreation, testGraphProcessing, testImmediateProcessing)
+typedef struct {
+    const char* testName;
+    int dummy;
+    vx_enum interpolation;
+    CT_Image (*generator)(const char* fileName, int width, int height, vx_df_image format);
+    const char* fileName;
+    void (*dst_size_generator)(int width, int height, int* dst_width, int* dst_height);
+    int exact_result;
+    int width, height;
+    vx_df_image format;
+    vx_border_t border;
+    vx_rectangle_t regionShift;
+} ValidRegionTest_Arg;
+
+#define REGION_PARAMETERS \
+    /* NN scaling */ \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image, "lena.bmp", 1_1, 1, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image, "lena.bmp", 1_2, 1, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image, "lena.bmp", 1_3, 1, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image, "lena.bmp", 2_1, 0, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image, "lena.bmp", 4_1, 0, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    /* Bilinear scaling */ \
+    SCALE_TEST(VX_INTERPOLATION_BILINEAR, scale_read_image, "lena.bmp", 1_1, 0, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_BILINEAR, scale_read_image, "lena.bmp", 1_2, 0, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_BILINEAR, scale_read_image, "lena.bmp", 1_3, 0, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_BILINEAR, scale_read_image, "lena.bmp", 2_1, 0, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_BILINEAR, scale_read_image, "lena.bmp", 4_1, 0, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    /* Area scaling */ \
+    SCALE_TEST(VX_INTERPOLATION_AREA, scale_read_image, "lena.bmp", 1_1, 0, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_AREA, scale_read_image, "lena.bmp", 1_2, 0, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_AREA, scale_read_image, "lena.bmp", 1_3, 0, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_AREA, scale_read_image, "lena.bmp", 2_1, 0, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST(VX_INTERPOLATION_AREA, scale_read_image, "lena.bmp", 4_1, 0, ADD_SIZE_NONE, ADD_TYPE_U8, ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    /* U1: NN scaling */ \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image, "lena.bmp", 1_1, 1, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image, "lena.bmp", 1_2, 1, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image, "lena.bmp", 1_3, 1, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image, "lena.bmp", 2_1, 0, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_NEAREST_NEIGHBOR, scale_read_image, "lena.bmp", 4_1, 0, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    /* U1: Bilinear scaling */ \
+    SCALE_TEST_U1(VX_INTERPOLATION_BILINEAR, scale_read_image, "lena.bmp", 1_1, 0, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_BILINEAR, scale_read_image, "lena.bmp", 1_2, 0, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_BILINEAR, scale_read_image, "lena.bmp", 1_3, 0, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_BILINEAR, scale_read_image, "lena.bmp", 2_1, 0, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_BILINEAR, scale_read_image, "lena.bmp", 4_1, 0, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    /* U1: Area scaling */ \
+    SCALE_TEST_U1(VX_INTERPOLATION_AREA, scale_read_image, "lena.bmp", 1_1, 0, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_AREA, scale_read_image, "lena.bmp", 1_2, 0, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_AREA, scale_read_image, "lena.bmp", 1_3, 0, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_AREA, scale_read_image, "lena.bmp", 2_1, 0, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0), \
+    SCALE_TEST_U1(VX_INTERPOLATION_AREA, scale_read_image, "lena.bmp", 4_1, 0, ADD_SIZE_NONE, ADD_TYPE_U1, ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY, ADD_VALID_REGION_SHRINKS, ARG, 0)
+
+TEST_WITH_ARG(Scale, testWithValidRegion, ValidRegionTest_Arg,
+    REGION_PARAMETERS
+)
+{
+    vx_context context = context_->vx_context_;
+    int dst_width = 0, dst_height = 0;
+    vx_image src_image = 0, dst_image = 0;
+
+    CT_Image src = NULL, dst = NULL;
+    vx_rectangle_t rect = {0, 0, 0, 0}, rect_shft = arg_->regionShift;
+
+    ASSERT_NO_FAILURE(src = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
+    ASSERT_VX_OBJECT(src_image = ct_image_to_vx_image(src, context), VX_TYPE_IMAGE);
+
+    ASSERT_NO_FAILURE(arg_->dst_size_generator(src->width, src->height, &dst_width, &dst_height));
+    ASSERT_VX_OBJECT(dst_image = vxCreateImage(context, dst_width, dst_height, arg_->format), VX_TYPE_IMAGE);
+
+    ASSERT_NO_FAILURE(vxGetValidRegionImage(src_image, &rect));
+    ALTERRECTANGLE(rect, rect_shft.start_x, rect_shft.start_y, rect_shft.end_x, rect_shft.end_y);
+    ASSERT_NO_FAILURE(vxSetImageValidRectangle(src_image, &rect));
+
+    VX_CALL(vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, &arg_->border, sizeof(arg_->border)));
+
+    VX_CALL(vxuScaleImage(context, src_image, dst_image, arg_->interpolation));
+
+    ASSERT_NO_FAILURE(dst = ct_image_from_vx_image(dst_image));
+
+    ASSERT_NO_FAILURE(ct_adjust_roi(src, rect_shft.start_x, rect_shft.start_y, -rect_shft.end_x, -rect_shft.end_y));
+    ASSERT_NO_FAILURE(scale_check(src, dst, arg_->interpolation, arg_->border, arg_->exact_result));
+
+    VX_CALL(vxReleaseImage(&dst_image));
+    VX_CALL(vxReleaseImage(&src_image));
+
+    ASSERT(dst_image == 0);
+    ASSERT(src_image == 0);
+}
+
+TESTCASE_TESTS(Scale, testNodeCreation, testGraphProcessing, testImmediateProcessing, testWithValidRegion)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_smoke.c b/test_conformance/test_smoke.c
index 313ae5d..fd85dd2 100644
--- a/test_conformance/test_smoke.c
+++ b/test_conformance/test_smoke.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -20,6 +20,8 @@
 #include <VX/vxu.h>
 #include <string.h>
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 TESTCASE(SmokeTest, CT_VXContext, ct_setup_vx_context, 0)
 
 typedef struct _mystruct {
@@ -80,38 +82,6 @@
     vx_reference ref = 0;
 
     {
-        /* test context reference */
-        ref = (vx_reference)context;
-        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count0, sizeof(ref_count0)), VX_SUCCESS);
-        VX_CALL(vxRetainReference(ref));
-        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
-        ASSERT_EQ_INT(ref_count1 - ref_count0, 1);
-        VX_CALL(vxReleaseReference(&ref));
-        ref = (vx_reference)context;
-        ref_count1 = 0;
-        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
-        ASSERT_EQ_INT(ref_count1 - ref_count0, 0);
-    }
-
-    {
-        /* test graph reference */
-        vx_graph graph = 0;
-        EXPECT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
-        ref = (vx_reference)graph;
-        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count0, sizeof(ref_count0)), VX_SUCCESS);
-        VX_CALL(vxRetainReference(ref));
-        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
-        ASSERT_EQ_INT(ref_count1 - ref_count0, 1);
-        VX_CALL(vxReleaseReference(&ref));
-        ref = (vx_reference)graph;
-        ref_count1 = 0;
-        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
-        ASSERT_EQ_INT(ref_count1 - ref_count0, 0);
-
-        VX_CALL(vxReleaseGraph(&graph));
-    }
-
-    {
         /* test node reference */
         vx_graph graph = 0;
         vx_node node = 0;
@@ -232,24 +202,6 @@
     }
 
     {
-        /* test graph reference */
-        vx_graph graph = 0;
-        EXPECT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
-        ref = (vx_reference)graph;
-        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count0, sizeof(ref_count0)), VX_SUCCESS);
-        VX_CALL(vxRetainReference(ref));
-        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
-        ASSERT_EQ_INT(ref_count1 - ref_count0, 1);
-        VX_CALL(vxReleaseReference(&ref));
-        ref = (vx_reference)graph;
-        ref_count1 = 0;
-        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
-        ASSERT_EQ_INT(ref_count1 - ref_count0, 0);
-
-        VX_CALL(vxReleaseGraph(&graph));
-    }
-
-    {
         /* test matrix reference */
         vx_matrix matrix = 0;
         EXPECT_VX_OBJECT(matrix = vxCreateMatrix(context, VX_TYPE_FLOAT32, 32, 32), VX_TYPE_MATRIX);
@@ -325,7 +277,7 @@
     {
         /* test threshold reference */
         vx_threshold threshold = 0;
-        EXPECT_VX_OBJECT(threshold = vxCreateThreshold(context, VX_THRESHOLD_TYPE_BINARY, VX_TYPE_UINT8), VX_TYPE_THRESHOLD);
+        EXPECT_VX_OBJECT(threshold = vxCreateThresholdForImage(context, VX_THRESHOLD_TYPE_BINARY, VX_DF_IMAGE_U8, VX_DF_IMAGE_U8), VX_TYPE_THRESHOLD);
         ref = (vx_reference)threshold;
         ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count0, sizeof(ref_count0)), VX_SUCCESS);
         VX_CALL(vxRetainReference(ref));
@@ -406,122 +358,50 @@
 TEST(SmokeTest, test_vxRetainReference)
 {
     vx_image image = 0;
-    vx_graph graph = 0;
-    vx_reference image_ref = 0, graph_ref = 0;
-    vx_uint32 image_count = 0, graph_count = 0;
+    vx_reference image_ref = 0;
+    vx_uint32 image_count = 0;
     vx_context context = context_->vx_context_;
     vx_uint32 num_refs1 = 0, num_refs2 = 0, num_refs3 = 0, num_refs4 = 0;
 
     ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_REFERENCES, (void*)&num_refs1, sizeof(num_refs1)), VX_SUCCESS);
 
     ASSERT_VX_OBJECT(image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
-    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
 
     ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_REFERENCES, (void*)&num_refs2, sizeof(num_refs2)), VX_SUCCESS);
-    ASSERT_EQ_INT(num_refs2, num_refs1+2);
+    ASSERT_EQ_INT(num_refs2, num_refs1+1);
 
     image_ref = (vx_reference)image;
-    graph_ref = (vx_reference)graph;
     ASSERT_EQ_VX_STATUS(vxQueryReference(image_ref, VX_REFERENCE_COUNT, (void*)&image_count, sizeof(image_count)), VX_SUCCESS);
-    ASSERT_EQ_VX_STATUS(vxQueryReference(graph_ref, VX_REFERENCE_COUNT, (void*)&graph_count, sizeof(graph_count)), VX_SUCCESS);
     ASSERT_EQ_INT(image_count, 1);
-    ASSERT_EQ_INT(graph_count, 1);
 
     image_ref = (vx_reference)image;
-    graph_ref = (vx_reference)graph;
     VX_CALL(vxRetainReference(image_ref));
-    VX_CALL(vxRetainReference(graph_ref));
 
     image_ref = (vx_reference)image;
-    graph_ref = (vx_reference)graph;
     ASSERT_EQ_VX_STATUS(vxQueryReference(image_ref, VX_REFERENCE_COUNT, (void*)&image_count, sizeof(image_count)), VX_SUCCESS);
-    ASSERT_EQ_VX_STATUS(vxQueryReference(graph_ref, VX_REFERENCE_COUNT, (void*)&graph_count, sizeof(graph_count)), VX_SUCCESS);
     ASSERT_EQ_INT(image_count, 2);
-    ASSERT_EQ_INT(graph_count, 2);
 
     image_ref = (vx_reference)image;
-    graph_ref = (vx_reference)graph;
     VX_CALL(vxReleaseReference(&image_ref));
-    VX_CALL(vxReleaseReference(&graph_ref));
 
     ASSERT_EQ_PTR(0, image_ref);
-    ASSERT_EQ_PTR(0, graph_ref);
 
     image_ref = (vx_reference)image;
-    graph_ref = (vx_reference)graph;
     ASSERT_EQ_VX_STATUS(vxQueryReference(image_ref, VX_REFERENCE_COUNT, (void*)&image_count, sizeof(image_count)), VX_SUCCESS);
-    ASSERT_EQ_VX_STATUS(vxQueryReference(graph_ref, VX_REFERENCE_COUNT, (void*)&graph_count, sizeof(graph_count)), VX_SUCCESS);
     ASSERT_EQ_INT(image_count, 1);
-    ASSERT_EQ_INT(graph_count, 1);
 
     ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_REFERENCES, (void*)&num_refs3, sizeof(num_refs3)), VX_SUCCESS);
-    ASSERT_EQ_INT(num_refs3, num_refs1+2);
+    ASSERT_EQ_INT(num_refs3, num_refs1+1);
 
     image_ref = (vx_reference)image;
-    graph_ref = (vx_reference)graph;
     VX_CALL(vxReleaseReference(&image_ref));
-    VX_CALL(vxReleaseReference(&graph_ref));
 
     ASSERT_EQ_PTR(0, image_ref);
-    ASSERT_EQ_PTR(0, graph_ref);
 
     ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_REFERENCES, (void*)&num_refs4, sizeof(num_refs4)), VX_SUCCESS);
     ASSERT_EQ_INT(num_refs4, num_refs1);
 }
 
-TEST(SmokeTest, test_vxUnloadKernels)
-{
-    vx_context context = context_->vx_context_;
-    vx_kernel kernel = NULL;
-    vx_int32 num_modules1;
-    vx_int32 num_modules2;
-    vx_int32 num_unique_kernels1;
-    vx_int32 num_unique_kernels2;
-
-    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_MODULES, (void*)&num_modules1, sizeof(num_modules1)), VX_SUCCESS);
-    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_UNIQUE_KERNELS, (void*)&num_unique_kernels1, sizeof(num_unique_kernels1)), VX_SUCCESS);
-    ASSERT(num_modules1 >= 0u);
-    ASSERT(num_unique_kernels1 > 0u);
-
-    kernel = vxGetKernelByName(context, "org.khronos.test.testmodule");
-    ASSERT_NE_VX_STATUS(VX_SUCCESS, vxGetStatus((vx_reference)kernel));
-
-    VX_CALL(vxLoadKernels(context, "test-testmodule"));
-    ASSERT_VX_OBJECT(kernel = vxGetKernelByName(context, "org.khronos.test.testmodule"), VX_TYPE_KERNEL);
-    VX_CALL(vxReleaseKernel(&kernel));
-
-    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_MODULES, (void*)&num_modules2, sizeof(num_modules1)), VX_SUCCESS);
-    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_UNIQUE_KERNELS, (void*)&num_unique_kernels2, sizeof(num_unique_kernels2)), VX_SUCCESS);
-    ASSERT(num_modules2 > num_modules1);
-    ASSERT(num_unique_kernels2 > num_unique_kernels1);
-
-    VX_CALL(vxUnloadKernels(context, "test-testmodule"));
-
-    kernel = vxGetKernelByName(context, "org.khronos.test.testmodule");
-    ASSERT_NE_VX_STATUS(VX_SUCCESS, vxGetStatus((vx_reference)kernel));
-
-    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_MODULES, (void*)&num_modules2, sizeof(num_modules1)), VX_SUCCESS);
-    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_UNIQUE_KERNELS, (void*)&num_unique_kernels2, sizeof(num_unique_kernels2)), VX_SUCCESS);
-    ASSERT(num_modules2 == num_modules1);
-    ASSERT(num_unique_kernels2 == num_unique_kernels1);
-}
-
-TEST(SmokeTest, test_vxSetReferenceName)
-{
-    vx_context context = context_->vx_context_;
-
-    vx_image image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8);
-    const char* image_name = "Image";
-    char* actual_name = NULL;
-
-    VX_CALL(vxSetReferenceName((vx_reference)image, image_name));
-    VX_CALL(vxQueryReference((vx_reference)image, VX_REFERENCE_NAME, &actual_name, sizeof(actual_name)));
-
-    ASSERT(0 == strcmp(image_name, actual_name));
-
-    VX_CALL(vxReleaseImage(&image));
-}
-
 TEST(SmokeTest, test_vxSetParameterByIndex)
 {
     vx_context context = context_->vx_context_;
@@ -576,12 +456,389 @@
     VX_CALL(vxReleaseGraph(&graph));
 }
 
+TEST(SmokeTest, test_vxSetParameterByReference)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src_image = 0, dst_image = 0;
+    vx_graph graph = 0;
+    vx_kernel kernel = 0;
+    vx_node node = 0;
+    vx_uint32 num_params = 0;
+    vx_parameter parameter = 0;
+    vx_image p_image = 0;
+
+    ASSERT_VX_OBJECT(src_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(dst_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByEnum(context, VX_KERNEL_BOX_3x3), VX_TYPE_KERNEL);
+    VX_CALL(vxQueryKernel(kernel, VX_KERNEL_PARAMETERS, &num_params, sizeof(num_params)));
+    ASSERT_EQ_INT(2, num_params);
+    ASSERT_VX_OBJECT(node = vxCreateGenericNode(graph, kernel), VX_TYPE_NODE);
+    VX_CALL(vxSetParameterByIndex(node, 0, (vx_reference)src_image));
+
+    ASSERT_VX_OBJECT(parameter = vxGetParameterByIndex(node, 0), VX_TYPE_PARAMETER);
+    VX_CALL(vxQueryParameter(parameter, VX_PARAMETER_REF, &p_image, sizeof(p_image)));
+    ASSERT(p_image == src_image);
+    VX_CALL(vxReleaseImage(&p_image));
+    VX_CALL(vxReleaseParameter(&parameter));
+
+    ASSERT_VX_OBJECT(parameter = vxGetParameterByIndex(node, 1), VX_TYPE_PARAMETER);
+    VX_CALL(vxQueryParameter(parameter, VX_PARAMETER_REF, &p_image, sizeof(p_image)));
+    ASSERT(p_image != dst_image);
+    VX_CALL(vxSetParameterByReference(parameter, (vx_reference)dst_image));
+    VX_CALL(vxQueryParameter(parameter, VX_PARAMETER_REF, &p_image, sizeof(p_image)));
+    ASSERT(p_image == dst_image);
+
+    VX_CALL(vxReleaseImage(&p_image));
+    VX_CALL(vxReleaseParameter(&parameter));
+    VX_CALL(vxVerifyGraph(graph));
+    VX_CALL(vxProcessGraph(graph));
+    VX_CALL(vxReleaseNode(&node));
+    VX_CALL(vxReleaseKernel(&kernel));
+    VX_CALL(vxReleaseGraph(&graph));
+    VX_CALL(vxReleaseImage(&dst_image));
+    VX_CALL(vxReleaseImage(&src_image));
+
+    ASSERT(node == 0);
+    ASSERT(kernel == 0);
+    ASSERT(graph == 0);
+    ASSERT(dst_image == 0);
+    ASSERT(src_image == 0);
+}
+
+TEST(SmokeTest, test_vxGetParameterByIndex)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src_image = 0;
+    vx_graph graph = 0;
+    vx_kernel kernel = 0;
+    vx_node node = 0;
+    vx_uint32 num_params = 0;
+    vx_parameter parameter = 0;
+    vx_image p_image = 0;
+
+    ASSERT_VX_OBJECT(src_image = vxCreateImage(context, 128, 128, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByEnum(context, VX_KERNEL_BOX_3x3), VX_TYPE_KERNEL);
+    VX_CALL(vxQueryKernel(kernel, VX_KERNEL_PARAMETERS, &num_params, sizeof(num_params)));
+    ASSERT_EQ_INT(2, num_params);
+    ASSERT_VX_OBJECT(node = vxCreateGenericNode(graph, kernel), VX_TYPE_NODE);
+    VX_CALL(vxSetParameterByIndex(node, 0, (vx_reference)src_image));
+
+    ASSERT_VX_OBJECT(parameter = vxGetParameterByIndex(node, 0), VX_TYPE_PARAMETER);
+    VX_CALL(vxQueryParameter(parameter, VX_PARAMETER_REF, &p_image, sizeof(p_image)));
+    ASSERT(p_image == src_image);
+    VX_CALL(vxReleaseImage(&p_image));
+    VX_CALL(vxReleaseParameter(&parameter));
+
+    VX_CALL(vxReleaseNode(&node));
+    VX_CALL(vxReleaseKernel(&kernel));
+    VX_CALL(vxReleaseGraph(&graph));
+    VX_CALL(vxReleaseImage(&src_image));
+
+    ASSERT(node == 0);
+    ASSERT(kernel == 0);
+    ASSERT(graph == 0);
+    ASSERT(src_image == 0);
+}
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
+
+TESTCASE(SmokeTestBase, CT_VXContext, ct_setup_vx_context, 0)
+
+TEST(SmokeTestBase, test_vxReleaseReferenceBase)
+{
+    vx_context context = context_->vx_context_;
+    vx_uint32 ref_count0 = 0;
+    vx_uint32 ref_count1 = 0;
+    vx_reference ref = 0;
+
+    {
+        /* test context reference */
+        ref = (vx_reference)context;
+        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count0, sizeof(ref_count0)), VX_SUCCESS);
+        VX_CALL(vxRetainReference(ref));
+        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
+        ASSERT_EQ_INT(ref_count1 - ref_count0, 1);
+        VX_CALL(vxReleaseReference(&ref));
+        ref = (vx_reference)context;
+        ref_count1 = 0;
+        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
+        ASSERT_EQ_INT(ref_count1 - ref_count0, 0);
+    }
+
+    {
+        /* test graph reference */
+        vx_graph graph = 0;
+        EXPECT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+        ref = (vx_reference)graph;
+        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count0, sizeof(ref_count0)), VX_SUCCESS);
+        VX_CALL(vxRetainReference(ref));
+        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
+        ASSERT_EQ_INT(ref_count1 - ref_count0, 1);
+        VX_CALL(vxReleaseReference(&ref));
+        ref = (vx_reference)graph;
+        ref_count1 = 0;
+        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
+        ASSERT_EQ_INT(ref_count1 - ref_count0, 0);
+
+        VX_CALL(vxReleaseGraph(&graph));
+    }
+
+    {
+        /* test graph reference */
+        vx_graph graph = 0;
+        EXPECT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+        ref = (vx_reference)graph;
+        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count0, sizeof(ref_count0)), VX_SUCCESS);
+        VX_CALL(vxRetainReference(ref));
+        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
+        ASSERT_EQ_INT(ref_count1 - ref_count0, 1);
+        VX_CALL(vxReleaseReference(&ref));
+        ref = (vx_reference)graph;
+        ref_count1 = 0;
+        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
+        ASSERT_EQ_INT(ref_count1 - ref_count0, 0);
+
+        VX_CALL(vxReleaseGraph(&graph));
+    }
+
+    {
+        /* test kernel reference */
+        vx_kernel kernel = 0;
+        VX_CALL(vxLoadKernels(context, "test-testmodule"));
+        EXPECT_VX_OBJECT(kernel = vxGetKernelByName(context, "org.khronos.test.testmodule"), VX_TYPE_KERNEL);
+        ref = (vx_reference)kernel;
+        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count0, sizeof(ref_count0)), VX_SUCCESS);
+        VX_CALL(vxRetainReference(ref));
+        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
+        ASSERT_EQ_INT(ref_count1 - ref_count0, 1);
+        VX_CALL(vxReleaseReference(&ref));
+        ref = (vx_reference)kernel;
+        ref_count1 = 0;
+        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
+        ASSERT_EQ_INT(ref_count1 - ref_count0, 0);
+
+        VX_CALL(vxReleaseKernel(&kernel));
+        VX_CALL(vxUnloadKernels(context, "test-testmodule"));
+    }
+
+    {
+        /* test parameter reference */
+        vx_kernel kernel = 0;
+        vx_parameter parameter = 0;
+        VX_CALL(vxLoadKernels(context, "test-testmodule"));
+        EXPECT_VX_OBJECT(kernel = vxGetKernelByName(context, "org.khronos.test.testmodule"), VX_TYPE_KERNEL);
+        EXPECT_VX_OBJECT(parameter = vxGetKernelParameterByIndex(kernel, 0), VX_TYPE_PARAMETER);
+        ref = (vx_reference)parameter;
+        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count0, sizeof(ref_count0)), VX_SUCCESS);
+        VX_CALL(vxRetainReference(ref));
+        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
+        ASSERT_EQ_INT(ref_count1 - ref_count0, 1);
+        VX_CALL(vxReleaseReference(&ref));
+        ref = (vx_reference)parameter;
+        ref_count1 = 0;
+        ASSERT_EQ_VX_STATUS(vxQueryReference(ref, VX_REFERENCE_COUNT, (void*)&ref_count1, sizeof(ref_count1)), VX_SUCCESS);
+        ASSERT_EQ_INT(ref_count1 - ref_count0, 0);
+
+        VX_CALL(vxReleaseKernel(&kernel));
+        VX_CALL(vxReleaseParameter(&parameter));
+        VX_CALL(vxUnloadKernels(context, "test-testmodule"));
+    }
+
+}
+
+TEST(SmokeTestBase, test_vxLoadKernels)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = 0;
+
+    VX_CALL(vxLoadKernels(context, "test-testmodule"));
+    EXPECT_VX_OBJECT(kernel = vxGetKernelByName(context, "org.khronos.test.testmodule"), VX_TYPE_KERNEL);
+    VX_CALL(vxReleaseKernel(&kernel));
+    VX_CALL(vxUnloadKernels(context, "test-testmodule"));
+}
+
+
+TEST(SmokeTestBase, test_vxUnloadKernels)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = NULL;
+    vx_int32 num_modules1;
+    vx_int32 num_modules2;
+    vx_int32 num_unique_kernels1;
+    vx_int32 num_unique_kernels2;
+
+    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_MODULES, (void*)&num_modules1, sizeof(num_modules1)), VX_SUCCESS);
+    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_UNIQUE_KERNELS, (void*)&num_unique_kernels1, sizeof(num_unique_kernels1)), VX_SUCCESS);
+    ASSERT(num_modules1 >= 0u);
+    ASSERT(num_unique_kernels1 > 0u);
+
+    kernel = vxGetKernelByName(context, "org.khronos.test.testmodule");
+    ASSERT_NE_VX_STATUS(VX_SUCCESS, vxGetStatus((vx_reference)kernel));
+
+    VX_CALL(vxLoadKernels(context, "test-testmodule"));
+    ASSERT_VX_OBJECT(kernel = vxGetKernelByName(context, "org.khronos.test.testmodule"), VX_TYPE_KERNEL);
+    VX_CALL(vxReleaseKernel(&kernel));
+
+    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_MODULES, (void*)&num_modules2, sizeof(num_modules1)), VX_SUCCESS);
+    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_UNIQUE_KERNELS, (void*)&num_unique_kernels2, sizeof(num_unique_kernels2)), VX_SUCCESS);
+    ASSERT(num_modules2 > num_modules1);
+    ASSERT(num_unique_kernels2 > num_unique_kernels1);
+
+    VX_CALL(vxUnloadKernels(context, "test-testmodule"));
+
+    kernel = vxGetKernelByName(context, "org.khronos.test.testmodule");
+    ASSERT_NE_VX_STATUS(VX_SUCCESS, vxGetStatus((vx_reference)kernel));
+
+    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_MODULES, (void*)&num_modules2, sizeof(num_modules1)), VX_SUCCESS);
+    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_UNIQUE_KERNELS, (void*)&num_unique_kernels2, sizeof(num_unique_kernels2)), VX_SUCCESS);
+    ASSERT(num_modules2 == num_modules1);
+    ASSERT(num_unique_kernels2 == num_unique_kernels1);
+}
+
+TEST(SmokeTestBase, test_vxSetReferenceName)
+{
+    vx_context context = context_->vx_context_;
+
+    vx_graph graph = vxCreateGraph(context);
+    const char* graph_name = "Graph";
+    char* actual_name = NULL;
+
+    VX_CALL(vxSetReferenceName((vx_reference)graph, graph_name));
+    VX_CALL(vxQueryReference((vx_reference)graph, VX_REFERENCE_NAME, &actual_name, sizeof(actual_name)));
+
+    ASSERT(0 == strcmp(graph_name, actual_name));
+
+    VX_CALL(vxReleaseGraph(&graph));
+}
+
+TEST(SmokeTestBase, test_vxGetStatus)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = NULL;
+    vx_status status = VX_SUCCESS;
+
+    status = vxGetStatus((vx_reference)kernel);
+    ASSERT_EQ_INT(VX_ERROR_NO_RESOURCES, status);
+
+    kernel = vxGetKernelByName(context, "org.khronos.test.testmodule");
+    status = vxGetStatus((vx_reference)kernel);
+    ASSERT_NE_VX_STATUS(VX_SUCCESS, status);
+
+    kernel = vxGetKernelByName(context, "org.khronos.openvx.color_convert");
+    status = vxGetStatus((vx_reference)kernel);
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+}
+
+TEST(SmokeTestBase, test_vxGetContext)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = NULL;
+    vx_context context_test = 0;
+
+    context_test = vxGetContext((vx_reference)kernel);
+    EXPECT_EQ_PTR(context_test, NULL);
+
+    kernel = vxGetKernelByName(context, "org.khronos.openvx.color_convert");
+    context_test = vxGetContext((vx_reference)kernel);
+    ASSERT_VX_OBJECT(context_test, VX_TYPE_CONTEXT);
+}
+
+TEST(SmokeTestBase, test_vxQueryReference)
+{
+    vx_context context = context_->vx_context_;
+    char* actual_name = NULL;
+    vx_status status = VX_SUCCESS;
+    vx_graph graph = 0;
+
+    status = vxQueryReference((vx_reference)graph, VX_REFERENCE_NAME, &actual_name, sizeof(actual_name));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+
+    graph = vxCreateGraph(context);
+    status = vxQueryReference((vx_reference)graph, VX_REFERENCE_COUNT, &actual_name, 3);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+    status = vxQueryReference((vx_reference)graph, VX_REFERENCE_TYPE, &actual_name, 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+    status = vxQueryReference((vx_reference)graph, VX_REFERENCE_NAME, NULL, 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+    status = vxQueryReference((vx_reference)graph, VX_TYPE_REFERENCE, &actual_name, sizeof(actual_name));
+    ASSERT_EQ_INT(VX_ERROR_NOT_SUPPORTED, status);
+
+    VX_CALL(vxReleaseGraph(&graph));
+
+}
+
+TEST(SmokeTestBase, test_vxRetainReferenceBase)
+{
+    vx_graph graph = 0;
+    vx_reference graph_ref = 0;
+    vx_uint32 graph_count = 0;
+    vx_context context = context_->vx_context_;
+    vx_uint32 num_refs1 = 0, num_refs2 = 0, num_refs3 = 0, num_refs4 = 0;
+
+    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_REFERENCES, (void*)&num_refs1, sizeof(num_refs1)), VX_SUCCESS);
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_REFERENCES, (void*)&num_refs2, sizeof(num_refs2)), VX_SUCCESS);
+    ASSERT_EQ_INT(num_refs2, num_refs1 + 1);
+
+    graph_ref = (vx_reference)graph;
+    ASSERT_EQ_VX_STATUS(vxQueryReference(graph_ref, VX_REFERENCE_COUNT, (void*)&graph_count, sizeof(graph_count)), VX_SUCCESS);
+    ASSERT_EQ_INT(graph_count, 1);
+
+    graph_ref = (vx_reference)graph;
+    VX_CALL(vxRetainReference(graph_ref));
+
+    graph_ref = (vx_reference)graph;
+    ASSERT_EQ_VX_STATUS(vxQueryReference(graph_ref, VX_REFERENCE_COUNT, (void*)&graph_count, sizeof(graph_count)), VX_SUCCESS);
+    ASSERT_EQ_INT(graph_count, 2);
+
+    graph_ref = (vx_reference)graph;
+    VX_CALL(vxReleaseReference(&graph_ref));
+
+    ASSERT_EQ_PTR(0, graph_ref);
+
+    graph_ref = (vx_reference)graph;
+    ASSERT_EQ_VX_STATUS(vxQueryReference(graph_ref, VX_REFERENCE_COUNT, (void*)&graph_count, sizeof(graph_count)), VX_SUCCESS);
+    ASSERT_EQ_INT(graph_count, 1);
+
+    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_REFERENCES, (void*)&num_refs3, sizeof(num_refs3)), VX_SUCCESS);
+    ASSERT_EQ_INT(num_refs3, num_refs1 + 1);
+
+    graph_ref = (vx_reference)graph;
+    VX_CALL(vxReleaseReference(&graph_ref));
+
+    ASSERT_EQ_PTR(0, graph_ref);
+
+    ASSERT_EQ_VX_STATUS(vxQueryContext(context, VX_CONTEXT_REFERENCES, (void*)&num_refs4, sizeof(num_refs4)), VX_SUCCESS);
+    ASSERT_EQ_INT(num_refs4, num_refs1);
+}
+
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 TESTCASE_TESTS(SmokeTest,
         test_vxRegisterUserStruct,
         test_vxHint,
         test_vxReleaseReference,
         test_vxRetainReference,
+        test_vxSetParameterByIndex,
+        test_vxSetParameterByReference,
+        test_vxGetParameterByIndex
+        )
+
+#endif
+
+TESTCASE_TESTS(SmokeTestBase,
+        test_vxReleaseReferenceBase,
+        test_vxLoadKernels,
         test_vxUnloadKernels,
         test_vxSetReferenceName,
-        test_vxSetParameterByIndex
+        test_vxGetStatus,
+        test_vxGetContext,
+        test_vxQueryReference,
+        test_vxRetainReferenceBase
         )
+
diff --git a/test_conformance/test_sobel3x3.c b/test_conformance/test_sobel3x3.c
index 767406c..a3f94c5 100644
--- a/test_conformance/test_sobel3x3.c
+++ b/test_conformance/test_sobel3x3.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -335,3 +337,5 @@
 }
 
 TESTCASE_TESTS(Sobel3x3, testNodeCreation, testGraphProcessing, testImmediateProcessing)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_target.c b/test_conformance/test_target.c
index 6b08863..791161c 100644
--- a/test_conformance/test_target.c
+++ b/test_conformance/test_target.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -21,10 +21,6 @@
 #include <VX/vxu.h>
 #include <VX/vx_kernels.h>
 
-
-TESTCASE(Target, CT_VXContext, ct_setup_vx_context, 0)
-
-
 typedef struct
 {
     const char* testName;
@@ -47,6 +43,12 @@
 #define SET_IMM_MODE_TARGET_PARAMETERS \
     CT_GENERATE_PARAMETERS("target", ADD_SET_TARGET_PARAMETERS, ARG, NULL)
 
+
+
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
+TESTCASE(Target, CT_VXContext, ct_setup_vx_context, 0)
+
 TEST_WITH_ARG(Target, testvxSetNodeTarget, SetTarget_Arg, SET_NODE_TARGET_PARAMETERS)
 {
     vx_context context = context_->vx_context_;
@@ -103,8 +105,188 @@
     return;
 }
 
+#endif
+
+TESTCASE(TargetBase, CT_VXContext, ct_setup_vx_context, 0)
+
+TEST(TargetBase, testvxCreateContext)
+{
+    vx_context context = vxCreateContext();
+    ASSERT_VX_OBJECT(context, VX_TYPE_CONTEXT);
+    vxReleaseContext(&context);
+}
+
+TEST(TargetBase, testvxQueryContext)
+{
+    vx_context context = context_->vx_context_;
+    vx_status status = VX_SUCCESS;
+    vx_uint32 num_refs1 = 0;
+    char * test = (char*)ct_alloc_mem(VX_MAX_IMPLEMENTATION_NAME);
+
+    vx_context context_test = NULL;
+    status = vxQueryContext(context_test, VX_CONTEXT_REFERENCES, (void*)&num_refs1, sizeof(num_refs1));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+
+    status = vxQueryContext(context, VX_CONTEXT_VENDOR_ID, test, sizeof(vx_uint16));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryContext(context, VX_CONTEXT_VENDOR_ID, test, 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+
+    status = vxQueryContext(context, VX_CONTEXT_VERSION, test, sizeof(vx_uint16));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryContext(context, VX_CONTEXT_VERSION, test, 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+
+    status = vxQueryContext(context, VX_CONTEXT_MODULES, (void*)&num_refs1, sizeof(num_refs1));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryContext(context, VX_CONTEXT_MODULES, (void*)&num_refs1, 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+
+    status = vxQueryContext(context, VX_CONTEXT_REFERENCES, (void*)&num_refs1, sizeof(num_refs1));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryContext(context, VX_CONTEXT_REFERENCES, (void*)&num_refs1, 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+
+    status = vxQueryContext(context, VX_CONTEXT_IMPLEMENTATION, (void*)test, VX_MAX_IMPLEMENTATION_NAME);
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryContext(context, VX_CONTEXT_IMPLEMENTATION, (void*)&test, VX_MAX_IMPLEMENTATION_NAME + 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+
+    status = vxQueryContext(context, VX_CONTEXT_EXTENSIONS_SIZE, test, sizeof(vx_size));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryContext(context, VX_CONTEXT_EXTENSIONS_SIZE, test, 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+
+    status = vxQueryContext(context, VX_CONTEXT_EXTENSIONS, test, 2);
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryContext(context, VX_CONTEXT_EXTENSIONS, NULL, 2);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+
+    status = vxQueryContext(context, VX_CONTEXT_CONVOLUTION_MAX_DIMENSION, test, sizeof(vx_size));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryContext(context, VX_CONTEXT_CONVOLUTION_MAX_DIMENSION, test, 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+
+    status = vxQueryContext(context, VX_CONTEXT_NONLINEAR_MAX_DIMENSION, test, sizeof(vx_size));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryContext(context, VX_CONTEXT_NONLINEAR_MAX_DIMENSION, test, 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+
+    status = vxQueryContext(context, VX_CONTEXT_OPTICAL_FLOW_MAX_WINDOW_DIMENSION, test, sizeof(vx_size));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryContext(context, VX_CONTEXT_OPTICAL_FLOW_MAX_WINDOW_DIMENSION, test, 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+
+    status = vxQueryContext(context, VX_CONTEXT_IMMEDIATE_BORDER, test, sizeof(vx_border_t));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryContext(context, VX_CONTEXT_IMMEDIATE_BORDER, NULL, 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+
+    status = vxQueryContext(context, VX_CONTEXT_IMMEDIATE_BORDER_POLICY, test, sizeof(vx_enum));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryContext(context, VX_CONTEXT_IMMEDIATE_BORDER_POLICY, NULL, 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+
+    status = vxQueryContext(context, VX_CONTEXT_UNIQUE_KERNELS, (void*)&num_refs1, sizeof(num_refs1));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryContext(context, VX_CONTEXT_UNIQUE_KERNELS, NULL, 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+
+    status = vxQueryContext(context, VX_CONTEXT_MAX_TENSOR_DIMS, test, sizeof(vx_size));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxQueryContext(context, VX_CONTEXT_MAX_TENSOR_DIMS, NULL, 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+
+    status = vxQueryContext(context, VX_CONTEXT_UNIQUE_KERNEL_TABLE, NULL, 1);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+    status = vxQueryContext(context, VX_ERROR_INVALID_TYPE, (void*)&num_refs1, sizeof(num_refs1));
+    ASSERT_EQ_INT(VX_ERROR_NOT_SUPPORTED, status);
+
+    ct_free_mem(test);
+}
+
+TEST(TargetBase, testvxReleaseContext)
+{
+    vx_status status = VX_SUCCESS;
+    vx_context context_test = 0;
+
+    status = vxReleaseContext(&context_test);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+
+    context_test = vxCreateContext();
+    ASSERT_VX_OBJECT(context_test, VX_TYPE_CONTEXT);
+    vx_border_t ptr;
+    ptr.mode = VX_BORDER_CONSTANT;
+    status = vxSetContextAttribute(context_test, VX_CONTEXT_IMMEDIATE_BORDER, &ptr, sizeof(vx_border_t));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxReleaseContext(&context_test);
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+}
+
+TEST(TargetBase, testvxSetContextAttribute)
+{
+    vx_context context = context_->vx_context_;
+    vx_status status = VX_SUCCESS;
+
+    vx_context context_test = 0;
+    status = vxSetContextAttribute(context_test, VX_CONTEXT_IMMEDIATE_BORDER, NULL, 0);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+
+    status = vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, NULL, sizeof(int));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_PARAMETERS, status);
+
+    status = vxSetContextAttribute(context, VX_CONTEXT_EXTENSIONS, NULL, sizeof(vx_border_t));
+    ASSERT_EQ_INT(VX_ERROR_NOT_SUPPORTED, status);
+
+    vx_border_t ptr = { 0 };
+    ptr.mode = VX_BORDER_POLICY_DEFAULT_TO_UNDEFINED;
+    status = vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, &ptr, sizeof(vx_border_t));
+    ASSERT_EQ_INT(VX_ERROR_INVALID_VALUE, status);
+
+    ptr.mode = VX_BORDER_CONSTANT;
+    status = vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, &ptr, sizeof(vx_border_t));
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+}
+
+TEST_WITH_ARG(TargetBase, testvxSetImmediateModeTargetBase, SetTarget_Arg, SET_IMM_MODE_TARGET_PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_status status = VX_SUCCESS;
+    char * string = "test";
+
+    vx_context context_test = 0;
+    status = vxSetImmediateModeTarget(context_test, arg_->target_enum, arg_->target_string);
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+
+    status = vxSetImmediateModeTarget(context, arg_->target_enum, arg_->target_string);
+    ASSERT_EQ_INT(VX_SUCCESS, status);
+    status = vxSetImmediateModeTarget(context, VX_TARGET_STRING, string);
+    ASSERT_EQ_INT(VX_ERROR_NOT_SUPPORTED, status);
+}
+
+TEST(TargetBase, testvxSetNodeTargetBase)
+{
+    vx_node node = 0;
+    vx_status status = VX_SUCCESS;
+
+    status = vxSetNodeTarget(node, VX_TARGET_ANY, "any");
+    ASSERT_EQ_INT(VX_ERROR_INVALID_REFERENCE, status);
+}
+
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
 
 TESTCASE_TESTS(Target,
         testvxSetNodeTarget,
         testvxSetImmediateModeTarget
         )
+
+#endif
+
+TESTCASE_TESTS(TargetBase,
+        testvxCreateContext,
+        testvxQueryContext,
+        testvxReleaseContext,
+        testvxSetContextAttribute,
+        testvxSetImmediateModeTargetBase,
+        testvxSetNodeTargetBase
+        )
diff --git a/test_conformance/test_tensor_networks.c b/test_conformance/test_tensor_networks.c
index 775f285..bbdaf00 100644
--- a/test_conformance/test_tensor_networks.c
+++ b/test_conformance/test_tensor_networks.c
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#ifdef OPENVX_CONFORMANCE_NEURAL_NETWORKS
 #ifdef OPENVX_USE_NN_16
 
 #include "test_tensor_util.h"
@@ -220,7 +221,7 @@
     graph = vxCreateGraph(context);
     status |= vxGetStatus((vx_reference)graph);
     if(status == VX_SUCCESS)
-    {        
+    {
         /*
          * List of nodes to define a graph partition to create (use for debug purposes)
          * Note: 1) If the list is empty, the entire graph will be created
@@ -319,13 +320,13 @@
     if(graph)
     {
         // Release OpenVX graph
-        status = vxReleaseGraph(&graph);        
+        status = vxReleaseGraph(&graph);
         if(status != VX_SUCCESS)
         {
             WriteLog("ERROR: failed to release graph (vx_status=%s)\n", getVxStatusDesc(status));
         }
     }
-    
+
     VX_CALL(status);
     if (correct_detections < min_correct_alexnet)
     {
@@ -362,3 +363,4 @@
 //    FCN
 )
 #endif
+#endif//OPENVX_CONFORMANCE_NEURAL_NETWORKS
\ No newline at end of file
diff --git a/test_conformance/test_tensor_nn.c b/test_conformance/test_tensor_nn.c
index fd0fb6a..29e4e3c 100644
--- a/test_conformance/test_tensor_nn.c
+++ b/test_conformance/test_tensor_nn.c
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#ifdef OPENVX_CONFORMANCE_NEURAL_NETWORKS
 #ifdef OPENVX_USE_NN
 
 #include "test_tensor_util.h"
@@ -138,7 +139,7 @@
 
             sum = ownLoadValueAsRawInt(fmt, (char *)bias_ptr + bias_byte_offset);
         }
-        
+
         const size_t xx = x * stride_x;
         const size_t yy = y * stride_y;
 
@@ -282,9 +283,9 @@
         CT_RNG_INIT(rng, *seed);
     }
 
-    vx_enum data_type;
-    vx_uint8 fixed_point_position;
-    vx_size sizeof_data_type;
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
     ownUnpackFormat(arg_->fmt, &data_type, &fixed_point_position, &sizeof_data_type);
 
     const size_t inout_dim_num = 3 + arg_->batching_dim;
@@ -391,11 +392,11 @@
         const size_t weight_count = weight_bytes / sizeof_data_type;
         const size_t bias_count = bias_bytes / sizeof_data_type;
 
-        void * const in = malloc(in_bytes);
-        void * const weight = malloc(weight_bytes);
-        void * const bias = bias_dim_num ? malloc(bias_bytes) : NULL;
-        void * const out = malloc(out_bytes);
-        void * const refs = malloc(out_bytes);
+        void * const in = ct_alloc_mem(in_bytes);
+        void * const weight = ct_alloc_mem(weight_bytes);
+        void * const bias = bias_dim_num ? ct_alloc_mem(bias_bytes) : NULL;
+        void * const out = ct_alloc_mem(out_bytes);
+        void * const refs = ct_alloc_mem(out_bytes);
         ASSERT(in && weight && (!bias_count || bias) && out && refs);
 
         {
@@ -489,11 +490,11 @@
         EXPECT_EQ_PTR(NULL, bias_tensor);
         EXPECT_EQ_PTR(NULL, out_tensor);
 
-        free(in);
-        free(weight);
-        free(bias);
-        free(out);
-        free(refs);
+        ct_free_mem(in);
+        ct_free_mem(weight);
+        ct_free_mem(bias);
+        ct_free_mem(out);
+        ct_free_mem(refs);
     }
 }
 
@@ -696,9 +697,9 @@
         CT_RNG_INIT(rng, *seed);
     }
 
-    vx_enum data_type;
-    vx_uint8 fixed_point_position;
-    vx_size sizeof_data_type;
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
     ownUnpackFormat(arg_->fmt, &data_type, &fixed_point_position, &sizeof_data_type);
 
     const size_t in_dim_num = arg_->core_dim + arg_->batch_dim;
@@ -719,7 +720,7 @@
         vx_size bias_dims[1];
         vx_size out_dims[4];
         {
-            for (size_t i = 0; i < in_dim_num; ++i) 
+            for (size_t i = 0; i < in_dim_num; ++i)
             {
                 in_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ+1);
             }
@@ -744,7 +745,7 @@
                 weight_dims[0] = in_dims[0];
                 weight_dims[1] = in_dims[1];
                 weight_dims[2] = in_dims[2];
-            } 
+            }
 
             if (bias_dim_num) bias_dims[0] = out_dims[0];
         }
@@ -780,11 +781,11 @@
         const size_t weight_count = bias_bytes / sizeof_data_type;
         const size_t bias_count = bias_bytes / sizeof_data_type;
 
-        void * const in = malloc(in_bytes);
-        void * const weight = malloc(weight_bytes);
-        void * const bias = bias_dim_num ? malloc(bias_bytes) : NULL;
-        void * const out = malloc(out_bytes);
-        void * const refs = malloc(out_bytes);
+        void * const in = ct_alloc_mem(in_bytes);
+        void * const weight = ct_alloc_mem(weight_bytes);
+        void * const bias = bias_dim_num ? ct_alloc_mem(bias_bytes) : NULL;
+        void * const out = ct_alloc_mem(out_bytes);
+        void * const refs = ct_alloc_mem(out_bytes);
         ASSERT(in && weight && (!bias_dim_num || bias) && out && refs);
 
         vx_tensor in_tensor = vxCreateTensor(context_->vx_context_, in_dim_num, in_dims, data_type, fixed_point_position);
@@ -892,11 +893,11 @@
         EXPECT_EQ_PTR(NULL, bias_tensor);
         EXPECT_EQ_PTR(NULL, out_tensor);
 
-        free(in);
-        free(weight);
-        free(bias);
-        free(out);
-        free(refs);
+        ct_free_mem(in);
+        ct_free_mem(weight);
+        ct_free_mem(bias);
+        ct_free_mem(out);
+        ct_free_mem(refs);
     }
 }
 
@@ -1056,9 +1057,9 @@
         CT_RNG_INIT(rng, *seed);
     }
 
-    vx_enum data_type;
-    vx_uint8 fixed_point_position;
-    vx_size sizeof_data_type;
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
     ownUnpackFormat(arg_->fmt, &data_type, &fixed_point_position, &sizeof_data_type);
 
     const size_t dim_num = 3 + arg_->batching_dim;
@@ -1111,9 +1112,9 @@
 
         const size_t in_count = in_bytes / sizeof_data_type;
 
-        void * const in = malloc(in_bytes);
-        void * const out = malloc(out_bytes);
-        void * const refs = malloc(out_bytes);
+        void * const in = ct_alloc_mem(in_bytes);
+        void * const out = ct_alloc_mem(out_bytes);
+        void * const refs = ct_alloc_mem(out_bytes);
         ASSERT(in && out && refs);
 
         vx_tensor in_tensor = vxCreateTensor(context_->vx_context_, dim_num, in_dims, data_type, fixed_point_position);
@@ -1124,7 +1125,7 @@
         {
             // No real need to fo ownFillSmallRandData here because of the
             // guranteed 32bit accum and our data counts being small.
-            ownFillRandData(arg_->fmt, &rng, in_count, in); 
+            ownFillRandData(arg_->fmt, &rng, in_count, in);
 
             const vx_size view_start[4] = { 0 };
             VX_CALL(vxCopyTensorPatch(in_tensor, dim_num, view_start, in_dims, in_strides, in, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
@@ -1195,9 +1196,9 @@
         EXPECT_EQ_PTR(NULL, in_tensor);
         EXPECT_EQ_PTR(NULL, out_tensor);
 
-        free(in);
-        free(out);
-        free(refs);
+        ct_free_mem(in);
+        ct_free_mem(out);
+        ct_free_mem(refs);
     }
 }
 
@@ -1215,7 +1216,7 @@
 {
 //TODO: @Tomer, should we allow extra batch dims beyond 4? conv and poll have upto 3 of them! if not we can just discard this define and its usage
 #define SOFTMAX_ALLOW_EXTRA_DIMS
-    
+
 #ifdef SOFTMAX_ALLOW_EXTRA_DIMS
     assert(input.dim_num >= 1 && input.dim_num <= 4);
 #else
@@ -1234,7 +1235,7 @@
 
     size_t key_sz = 0;
     size_t key_in_stride = 0;
-    
+
 #ifdef SOFTMAX_ALLOW_EXTRA_DIMS
     size_t batch_sz[5] = { 1, 1, 1, 1, 1 };
     size_t batch_in_strides[5] = { 0 };
@@ -1392,7 +1393,7 @@
 
             max_val = MAX(max_val, in_val);
         }
-        
+
         // Note: It may be benificial to cache the exponents
         for (size_t i = 0; i < key_sz; ++i)
         {
@@ -1446,7 +1447,7 @@
     {   // TODO: ownTestGetMaxDims() ?
         vx_size max_dims = 0;
         VX_CALL(vxQueryContext(context_->vx_context_, VX_CONTEXT_MAX_TENSOR_DIMS, &max_dims, sizeof(max_dims)));
-        ASSERT(max_dims >= arg_->dim_num); 
+        ASSERT(max_dims >= arg_->dim_num);
     }
 
     uint64_t rng;
@@ -1456,9 +1457,9 @@
         CT_RNG_INIT(rng, *seed);
     }
 
-    vx_enum data_type;
-    vx_uint8 fixed_point_position;
-    vx_size sizeof_data_type;
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
     ownUnpackFormat(arg_->fmt, &data_type, &fixed_point_position, &sizeof_data_type);
 
     for (int iter = 0; iter < TEST_TENSOR_NUM_ITERATIONS; ++iter)
@@ -1488,9 +1489,9 @@
         const size_t bytes = dims[arg_->dim_num-1] * strides[arg_->dim_num-1];
         const size_t count = bytes / sizeof_data_type;
 
-        void * const in = malloc(bytes);
-        void * const out = malloc(bytes);
-        void * const refs = malloc(bytes);
+        void * const in = ct_alloc_mem(bytes);
+        void * const out = ct_alloc_mem(bytes);
+        void * const refs = ct_alloc_mem(bytes);
         ASSERT(in && out && refs);
 
         vx_tensor in_tensor = vxCreateTensor(context_->vx_context_, arg_->dim_num, dims, data_type, fixed_point_position);
@@ -1501,7 +1502,7 @@
         {
             // No real need to fo ownFillSmallRandData here because of the
             // guranteed 32bit accum and our data counts being small.
-            ownFillRandData(arg_->fmt, &rng, count, in); 
+            ownFillRandData(arg_->fmt, &rng, count, in);
 
             const vx_size view_start[4] = { 0 };
             VX_CALL(vxCopyTensorPatch(in_tensor, arg_->dim_num, view_start, dims, strides, in, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
@@ -1558,9 +1559,9 @@
         EXPECT_EQ_PTR(NULL, in_tensor);
         EXPECT_EQ_PTR(NULL, out_tensor);
 
-        free(in);
-        free(out);
-        free(refs);
+        ct_free_mem(in);
+        ct_free_mem(out);
+        ct_free_mem(refs);
     }
 }
 
@@ -1609,7 +1610,7 @@
             func == VX_NN_ACTIVATION_SQUARE ||
             func == VX_NN_ACTIVATION_SQRT ||
             func == VX_NN_ACTIVATION_LINEAR);
-    
+
     assert (input.dim_num == output.dim_num);
     assert (input.dim_num > 0 && input.dim_num <= 4);
 
@@ -1746,7 +1747,7 @@
     {   // TODO: ownTestGetMaxDims() ?
         vx_size max_dims = 0;
         VX_CALL(vxQueryContext(context_->vx_context_, VX_CONTEXT_MAX_TENSOR_DIMS, &max_dims, sizeof(max_dims)));
-        ASSERT(max_dims >= arg_->dim_num); 
+        ASSERT(max_dims >= arg_->dim_num);
     }
 
     uint64_t rng;
@@ -1756,9 +1757,9 @@
         CT_RNG_INIT(rng, *seed);
     }
 
-    vx_enum data_type;
-    vx_uint8 fixed_point_position;
-    vx_size sizeof_data_type;
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
     ownUnpackFormat(arg_->fmt, &data_type, &fixed_point_position, &sizeof_data_type);
 
     for (int iter = 0; iter < TEST_TENSOR_NUM_ITERATIONS; ++iter)
@@ -1788,9 +1789,9 @@
         const size_t bytes = dims[arg_->dim_num-1] * strides[arg_->dim_num-1];
         const size_t count = bytes / sizeof_data_type;
 
-        void * const in = malloc(bytes);
-        void * const out = malloc(bytes);
-        void * const refs = malloc(bytes);
+        void * const in = ct_alloc_mem(bytes);
+        void * const out = ct_alloc_mem(bytes);
+        void * const refs = ct_alloc_mem(bytes);
         ASSERT(in && out && refs);
 
         vx_tensor in_tensor = vxCreateTensor(context_->vx_context_, arg_->dim_num, dims, data_type, fixed_point_position);
@@ -1799,7 +1800,7 @@
         ASSERT_VX_OBJECT(out_tensor, VX_TYPE_TENSOR);
 
         {
-            ownFillRandData(arg_->fmt, &rng, count, in); 
+            ownFillRandData(arg_->fmt, &rng, count, in);
 
             const vx_size view_start[4] = { 0 };
             VX_CALL(vxCopyTensorPatch(in_tensor, arg_->dim_num, view_start, dims, strides, in, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
@@ -1857,9 +1858,9 @@
         EXPECT_EQ_PTR(NULL, in_tensor);
         EXPECT_EQ_PTR(NULL, out_tensor);
 
-        free(in);
-        free(out);
-        free(refs);
+        ct_free_mem(in);
+        ct_free_mem(out);
+        ct_free_mem(refs);
     }
 }
 
@@ -2011,23 +2012,23 @@
         CT_RNG_INIT(rng, *seed);
     }
 
-    vx_enum data_type;
-    vx_uint8 fixed_point_position;
-    vx_size sizeof_data_type;
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
     ownUnpackFormat(arg_->fmt, &data_type, &fixed_point_position, &sizeof_data_type);
 
     const size_t data_dim_num = arg_->with_batching ? 4 : 3;
     const size_t rois_dim_num = arg_->with_batching ? 3 : 2;
     const size_t out_dim_num = arg_->with_batching ? 5 : 4;
 
-    size_t * const data_dims = malloc(sizeof(*data_dims) * data_dim_num);
-    size_t * const rois_dims = malloc(sizeof(*rois_dims) * rois_dim_num);
-    size_t * const out_dims = malloc(sizeof(*out_dims) * out_dim_num);
+    size_t * const data_dims = ct_alloc_mem(sizeof(*data_dims) * data_dim_num);
+    size_t * const rois_dims = ct_alloc_mem(sizeof(*rois_dims) * rois_dim_num);
+    size_t * const out_dims = ct_alloc_mem(sizeof(*out_dims) * out_dim_num);
     ASSERT(data_dims && rois_dims && out_dims);
-    
-    size_t * const data_strides = malloc(sizeof(*data_strides) * data_dim_num);
-    size_t * const rois_strides = malloc(sizeof(*rois_strides) * rois_dim_num);
-    size_t * const out_strides = malloc(sizeof(*out_strides) * out_dim_num);
+
+    size_t * const data_strides = ct_alloc_mem(sizeof(*data_strides) * data_dim_num);
+    size_t * const rois_strides = ct_alloc_mem(sizeof(*rois_strides) * rois_dim_num);
+    size_t * const out_strides = ct_alloc_mem(sizeof(*out_strides) * out_dim_num);
     ASSERT(data_strides && rois_strides && out_strides);
 
     for (int iter = 0; iter < TEST_TENSOR_NUM_ITERATIONS; ++iter)
@@ -2088,10 +2089,10 @@
 
         const size_t data_count = data_bytes / sizeof_data_type;
 
-        void * const data = malloc(data_bytes);
-        void * const rois = malloc(rois_bytes);
-        void * const out = malloc(out_bytes);
-        void * const refs = malloc(out_bytes);
+        void * const data = ct_alloc_mem(data_bytes);
+        void * const rois = ct_alloc_mem(rois_bytes);
+        void * const out = ct_alloc_mem(out_bytes);
+        void * const refs = ct_alloc_mem(out_bytes);
         ASSERT(data && rois && out && refs);
 
         {
@@ -2188,19 +2189,19 @@
         EXPECT_EQ_PTR(NULL, rois_tensor);
         EXPECT_EQ_PTR(NULL, out_tensor);
 
-        free(data);
-        free(rois);
-        free(out);
-        free(refs);
+        ct_free_mem(data);
+        ct_free_mem(rois);
+        ct_free_mem(out);
+        ct_free_mem(refs);
     }
 
-    free(data_dims);
-    free(rois_dims);
-    free(out_dims);
+    ct_free_mem(data_dims);
+    ct_free_mem(rois_dims);
+    ct_free_mem(out_dims);
 
-    free(data_strides);
-    free(rois_strides);
-    free(out_strides);
+    ct_free_mem(data_strides);
+    ct_free_mem(rois_strides);
+    ct_free_mem(out_strides);
 }
 
 
@@ -2333,7 +2334,7 @@
 
             sum = ownLoadValueAsRawInt(fmt, (char *)bias_ptr + bias_byte_offset);
         }
-        
+
         for (size_t ifm = 0; ifm < input_c; ++ifm)
         {
             for (size_t w_y = 0; w_y < weight_h; ++w_y)
@@ -2461,9 +2462,9 @@
         CT_RNG_INIT(rng, *seed);
     }
 
-    vx_enum data_type;
-    vx_uint8 fixed_point_position;
-    vx_size sizeof_data_type;
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
     ownUnpackFormat(arg_->fmt, &data_type, &fixed_point_position, &sizeof_data_type);
 
     const size_t inout_dim_num = 3 + arg_->batching_dim;
@@ -2568,11 +2569,11 @@
         const size_t weight_count = weight_bytes / sizeof_data_type;
         const size_t bias_count = bias_bytes / sizeof_data_type;
 
-        void * const in = malloc(in_bytes);
-        void * const weight = malloc(weight_bytes);
-        void * const bias = bias_dim_num ? malloc(bias_bytes) : NULL;
-        void * const out = malloc(out_bytes);
-        void * const refs = malloc(out_bytes);
+        void * const in = ct_alloc_mem(in_bytes);
+        void * const weight = ct_alloc_mem(weight_bytes);
+        void * const bias = bias_dim_num ? ct_alloc_mem(bias_bytes) : NULL;
+        void * const out = ct_alloc_mem(out_bytes);
+        void * const refs = ct_alloc_mem(out_bytes);
         ASSERT(in && weight && (!bias_count || bias) && out && refs);
 
         {
@@ -2667,11 +2668,11 @@
         EXPECT_EQ_PTR(NULL, bias_tensor);
         EXPECT_EQ_PTR(NULL, out_tensor);
 
-        free(in);
-        free(weight);
-        free(bias);
-        free(out);
-        free(refs);
+        ct_free_mem(in);
+        ct_free_mem(weight);
+        ct_free_mem(bias);
+        ct_free_mem(out);
+        ct_free_mem(refs);
     }
 }
 
@@ -2687,3 +2688,4 @@
     testDeconvolutionLayer
 )
 #endif
+#endif//OPENVX_CONFORMANCE_NEURAL_NETWORKS
diff --git a/test_conformance/test_tensor_op.c b/test_conformance/test_tensor_op.c
index 832f31d..62f9e79 100644
--- a/test_conformance/test_tensor_op.c
+++ b/test_conformance/test_tensor_op.c
@@ -14,6 +14,10 @@
  * limitations under the License.
  */
 
+#ifdef OPENVX_USE_ENHANCED_VISION
+
+#include <VX/vx.h>
+#include <VX/vxu.h>
 #include "test_tensor_util.h"
 
 
@@ -66,7 +70,7 @@
                 const vx_int16 in0 = *(vx_int16*)in0_b_ptr;
                 const vx_int16 in1 = *(vx_int16*)in1_b_ptr;
                 const vx_int16 out = *(vx_int16*)out_b_ptr;
-                int16_t ref;
+                int16_t ref = 0;
 
                 switch (op)
                 {
@@ -85,7 +89,7 @@
                 case TT_MUL:
                     {
                         double tmp = in0 * in1 * q78_scale;
-                        tmp = to_ne ? nearbyint(tmp) : trunc(tmp); 
+                        tmp = to_ne ? nearbyint(tmp) : trunc(tmp);
                         ref = wrap ? trunc_to_int16(tmp) : CLAMP(tmp, INT16_MIN, INT16_MAX);
                     }
                     break;
@@ -118,7 +122,7 @@
                 const vx_uint8 in0 = *(vx_uint8*)in0_b_ptr;
                 const vx_uint8 in1 = *(vx_uint8*)in1_b_ptr;
                 const vx_uint8 out = *(vx_uint8*)out_b_ptr;
-                uint8_t ref;
+                uint8_t ref = 0;
 
                 switch (op)
                 {
@@ -137,7 +141,7 @@
                 case TT_MUL:
                     {
                         double tmp = in0 * in1 * scale;
-                        tmp = to_ne ? nearbyint(tmp) : trunc(tmp); 
+                        tmp = to_ne ? nearbyint(tmp) : trunc(tmp);
                         ref = wrap ? tmp : CLAMP(tmp, 0, UINT8_MAX);
                     }
                     break;
@@ -154,7 +158,7 @@
                 const vx_int8 in0 = *(vx_int8*)in0_b_ptr;
                 const vx_int8 in1 = *(vx_int8*)in1_b_ptr;
                 const vx_int8 out = *(vx_int8*)out_b_ptr;
-                int8_t ref;
+                int8_t ref = 0;
 
                 switch (op)
                 {
@@ -173,7 +177,7 @@
                 case TT_MUL:
                     {
                         double tmp = in0 * in1 * scale;
-                        tmp = to_ne ? nearbyint(tmp) : trunc(tmp); 
+                        tmp = to_ne ? nearbyint(tmp) : trunc(tmp);
                         ref = wrap ? trunc_to_int8(tmp) : CLAMP(tmp, INT8_MIN, INT8_MAX);
                     }
                     break;
@@ -228,7 +232,7 @@
     TT_ELEMENTWISE_OP0(U8)      \
     TT_ELEMENTWISE_OP0(S8)
 
-TEST_WITH_ARG(TensorOp, testTensorElementwiseOp, test_tensor_elementwise_op_arg,
+TEST_WITH_ARG(TensorOp, testvxTensorElementwiseOp, test_tensor_elementwise_op_arg,
         TT_ELEMENTWISE_OP_ALL()
 )
 {
@@ -245,7 +249,7 @@
     assert(op == TT_ADD || op == TT_SUB || op == TT_MUL);
     assert(overflow_policy == VX_CONVERT_POLICY_WRAP || overflow_policy == VX_CONVERT_POLICY_SATURATE);
     assert(rounding_policy == VX_ROUND_POLICY_TO_ZERO || rounding_policy == VX_ROUND_POLICY_TO_NEAREST_EVEN);
-    
+
     // Only MUL supports rounding_policy and scale, we chose not to allow anything but default values for other ops
     assert(TT_MUL || (rounding_policy == VX_ROUND_POLICY_TO_ZERO && scale == 1.f));
 
@@ -263,19 +267,19 @@
         CT_RNG_INIT(rng, *seed);
     }
 
-    vx_enum data_type;
-    vx_uint8 fixed_point_position;
-    vx_size sizeof_data_type;
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
     ownUnpackFormat(fmt, &data_type, &fixed_point_position, &sizeof_data_type);
 
-    size_t * const in0_dims = malloc(sizeof(*in0_dims) * max_dims);
-    size_t * const in1_dims = malloc(sizeof(*in1_dims) * max_dims);
-    size_t * const out_dims = malloc(sizeof(*out_dims) * max_dims);
+    size_t * const in0_dims = ct_alloc_mem(sizeof(*in0_dims) * max_dims);
+    size_t * const in1_dims = ct_alloc_mem(sizeof(*in1_dims) * max_dims);
+    size_t * const out_dims = ct_alloc_mem(sizeof(*out_dims) * max_dims);
     ASSERT(in0_dims && in1_dims && out_dims);
 
-    size_t * const in0_strides = malloc(sizeof(*in0_strides) * max_dims);
-    size_t * const in1_strides = malloc(sizeof(*in1_strides) * max_dims);
-    size_t * const out_strides = malloc(sizeof(*out_strides) * max_dims);
+    size_t * const in0_strides = ct_alloc_mem(sizeof(*in0_strides) * max_dims);
+    size_t * const in1_strides = ct_alloc_mem(sizeof(*in1_strides) * max_dims);
+    size_t * const out_strides = ct_alloc_mem(sizeof(*out_strides) * max_dims);
     ASSERT(in0_strides && in1_strides && out_strides);
 
     // The test strategy is a simple one: For each of the 1..max_dims supported
@@ -354,9 +358,9 @@
 
         // Second step is to allocate the input and output data locations and populate the inputs.
 
-        void * const in0_data = malloc(in0_bytes);
-        void * const in1_data = malloc(in1_bytes);
-        void * const out_data = malloc(out_bytes);
+        void * const in0_data = ct_alloc_mem(in0_bytes);
+        void * const in1_data = ct_alloc_mem(in1_bytes);
+        void * const out_data = ct_alloc_mem(out_bytes);
         ASSERT(in0_data && in1_data && out_data);
 
         {
@@ -435,20 +439,227 @@
         EXPECT_EQ_PTR(NULL, in1_tensor);
         EXPECT_EQ_PTR(NULL, out_tensor);
 
-        free(in0_data);
-        free(in1_data);
-        free(out_data);
+        ct_free_mem(in0_data);
+        ct_free_mem(in1_data);
+        ct_free_mem(out_data);
     }
 
-    free(in0_dims);
-    free(in1_dims);
-    free(out_dims);
+    ct_free_mem(in0_dims);
+    ct_free_mem(in1_dims);
+    ct_free_mem(out_dims);
 
-    free(in0_strides);
-    free(in1_strides);
-    free(out_strides);
+    ct_free_mem(in0_strides);
+    ct_free_mem(in1_strides);
+    ct_free_mem(out_strides);
 }
 
+TEST_WITH_ARG(TensorOp, testvxuTensorElementwiseOp, test_tensor_elementwise_op_arg,
+        TT_ELEMENTWISE_OP_ALL()
+)
+{
+    const vx_context context = context_->vx_context_;
+
+    const enum TestTensorDF fmt = arg_->fmt;
+    const enum TestTensorOp op = arg_->op;
+
+    const enum vx_convert_policy_e overflow_policy = arg_->convert_policy;
+    const enum vx_round_policy_e rounding_policy = arg_->rounding_policy;
+    const vx_float32 scale = arg_->scale;
+
+    assert(fmt == TT_Q78 || fmt == TT_U8 || fmt == TT_S8);
+    assert(op == TT_ADD || op == TT_SUB || op == TT_MUL);
+    assert(overflow_policy == VX_CONVERT_POLICY_WRAP || overflow_policy == VX_CONVERT_POLICY_SATURATE);
+    assert(rounding_policy == VX_ROUND_POLICY_TO_ZERO || rounding_policy == VX_ROUND_POLICY_TO_NEAREST_EVEN);
+
+    // Only MUL supports rounding_policy and scale, we chose not to allow anything but default values for other ops
+    assert(TT_MUL || (rounding_policy == VX_ROUND_POLICY_TO_ZERO && scale == 1.f));
+
+    vx_size max_dims = 0;
+    {   // TODO: ownTestGetMaxDims() ?
+        VX_CALL(vxQueryContext(context, VX_CONTEXT_MAX_TENSOR_DIMS, &max_dims, sizeof(max_dims)));
+        ASSERT(max_dims > 3);
+        if(!DEBUG_TEST_TENSOR_BEYOND_FOUR_DIMS) max_dims = 4; else max_dims = MIN(max_dims, MAX_TENSOR_DIMS);
+    }
+
+    uint64_t rng;
+    {   // TODO: ownTestGetRNG() ?
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
+    ownUnpackFormat(fmt, &data_type, &fixed_point_position, &sizeof_data_type);
+
+    size_t * const in0_dims = ct_alloc_mem(sizeof(*in0_dims) * max_dims);
+    size_t * const in1_dims = ct_alloc_mem(sizeof(*in1_dims) * max_dims);
+    size_t * const out_dims = ct_alloc_mem(sizeof(*out_dims) * max_dims);
+    ASSERT(in0_dims && in1_dims && out_dims);
+
+    size_t * const in0_strides = ct_alloc_mem(sizeof(*in0_strides) * max_dims);
+    size_t * const in1_strides = ct_alloc_mem(sizeof(*in1_strides) * max_dims);
+    size_t * const out_strides = ct_alloc_mem(sizeof(*out_strides) * max_dims);
+    ASSERT(in0_strides && in1_strides && out_strides);
+
+    // The test strategy is a simple one: For each of the 1..max_dims supported
+    // we test a TEST_TENSOR_NUM_ITERATIONS of random dim and broadcast
+    // configurations. This approach may have issues if the implementation
+    // supports a lot of dimensions since their random size being up to
+    // TEST_TENSOR_MAX_DIM_SZ, could result in a huge memory requirement.
+    // However from previous experience we expect this to typically be 4-6 dims.
+    // The other issue is that we do not test huge dimensions.
+    // Further limitations include lack of virtual/ view inputs and outputs as -
+    // well as lack of modified stride testing etc.
+
+    // Note that iter is the inner loop, so that if two implementations support
+    // D1 and D2 dims resp. the same (pseudo-random) values would be used when
+    // testing the common min(D1, D2) dimensions.
+    for (vx_size dims = 1; dims <= max_dims; ++dims)
+    for (int iter = 0; iter < TEST_TENSOR_NUM_ITERATIONS; ++iter)
+    {
+        if (DEBUG_TEST_TENSOR_ENABLE_PRINTF)
+        {
+            printf("dims #: %zu,\titer #: %d\n", dims, iter);
+            fflush(stdout);
+        }
+
+        // First step is to get some random dim sizes, calc the strides and create the tensors.
+
+        for (vx_size i = 0; i < dims; ++i)
+        {
+            const size_t new_dim = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ+1);
+
+            const int mask0 = !!CT_RNG_NEXT_INT(rng, 0, TEST_TENSOR_INVERSE_MASK_PROBABILITY);
+            const int mask1 = !!CT_RNG_NEXT_INT(rng, 0, TEST_TENSOR_INVERSE_MASK_PROBABILITY);
+
+            // Note: Broadcasting is described as for each dim, either in0 and in1 have the same
+            // size or "1" for a broadcasted value. And the output is strictly determined by them
+            // so that the implementation is required to support
+            // { in0, in1, out } = { 1, 5, 5 } but not { in0, in1, out } = { 1, 1, 5 }
+            // even though the KHR sample implementation currently supports both.
+            in0_dims[i] = mask0 ? new_dim : 1;
+            in1_dims[i] = mask1 ? new_dim : 1;
+            out_dims[i] = mask0 || mask1 ? new_dim : 1;
+
+            in0_strides[i] = i ? in0_strides[i - 1] * in0_dims[i - 1] : sizeof_data_type;
+            in1_strides[i] = i ? in1_strides[i - 1] * in1_dims[i - 1] : sizeof_data_type;
+            out_strides[i] = i ? out_strides[i - 1] * out_dims[i - 1] : sizeof_data_type;
+        }
+
+        vx_tensor in0_tensor = vxCreateTensor(context, dims, in0_dims, data_type, fixed_point_position);
+        vx_tensor in1_tensor = vxCreateTensor(context, dims, in1_dims, data_type, fixed_point_position);
+        vx_tensor out_tensor = vxCreateTensor(context, dims, out_dims, data_type, fixed_point_position);
+        ASSERT_VX_OBJECT(in0_tensor, VX_TYPE_TENSOR);
+        ASSERT_VX_OBJECT(in1_tensor, VX_TYPE_TENSOR);
+        ASSERT_VX_OBJECT(out_tensor, VX_TYPE_TENSOR);
+
+        const size_t in0_bytes = in0_dims[dims - 1] * in0_strides[dims - 1];
+        const size_t in1_bytes = in1_dims[dims - 1] * in1_strides[dims - 1];
+        const size_t out_bytes = out_dims[dims - 1] * out_strides[dims - 1];
+
+        const size_t in0_count = in0_bytes / sizeof_data_type;
+        const size_t in1_count = in1_bytes / sizeof_data_type;
+        const size_t out_count = out_bytes / sizeof_data_type;
+
+        if (DEBUG_TEST_TENSOR_ENABLE_PRINTF)
+        {
+            printf("\tconfig: {\n");
+            printf("\t          dim_num: %zu,\n", dims);
+            printf("\t          in0 : { dims: { "); for (size_t i = 0; i < dims; ++i) { printf("%zu, ", in0_dims[i]); } printf(" }, count: %zu, bytes: %zu },\n", in0_count, in0_bytes);
+            printf("\t          in1 : { dims: { "); for (size_t i = 0; i < dims; ++i) { printf("%zu, ", in1_dims[i]); } printf(" }, count: %zu, bytes: %zu },\n", in1_count, in1_bytes);
+            printf("\t          out : { dims: { "); for (size_t i = 0; i < dims; ++i) { printf("%zu, ", out_dims[i]); } printf(" }, count: %zu, bytes: %zu },\n", out_count, out_bytes);
+            printf("\t        }\n");
+        }
+
+        //TODO: This is pretty wasteful as it's repeating a lot of work per iteration:
+        //      Both in the repeated malloc + free and inefficient data population
+        //      which discards much of the random data, only using a part of it.
+
+        // Second step is to allocate the input and output data locations and populate the inputs.
+
+        void * const in0_data = ct_alloc_mem(in0_bytes);
+        void * const in1_data = ct_alloc_mem(in1_bytes);
+        void * const out_data = ct_alloc_mem(out_bytes);
+        ASSERT(in0_data && in1_data && out_data);
+
+        {
+            ownFillRandData(fmt, &rng, in0_count, in0_data);
+            ownFillRandData(fmt, &rng, in1_count, in1_data);
+
+            vx_size view_start[MAX_TENSOR_DIMS] = { 0 };
+            VX_CALL(vxCopyTensorPatch(in0_tensor, dims, view_start, in0_dims, in0_strides, in0_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+            VX_CALL(vxCopyTensorPatch(in1_tensor, dims, view_start, in1_dims, in1_strides, in1_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+        }
+
+        // Third step is creating, running and disposing of the graph.
+        {
+            switch (op)
+            {
+            case TT_ADD:
+                VX_CALL(vxuTensorAdd(context, in0_tensor, in1_tensor, overflow_policy, out_tensor));
+                break;
+            case TT_SUB:
+                VX_CALL(vxuTensorSubtract(context, in0_tensor, in1_tensor, overflow_policy, out_tensor));
+                break;
+            case TT_MUL:
+            {
+                vx_scalar scalar = vxCreateScalar(context, VX_TYPE_FLOAT32, &scale);
+                ASSERT_VX_OBJECT(scalar, VX_TYPE_SCALAR);
+
+                VX_CALL(vxuTensorMultiply(context, in0_tensor, in1_tensor, scalar, overflow_policy, rounding_policy, out_tensor));
+
+                VX_CALL(vxReleaseScalar(&scalar));
+                EXPECT_EQ_PTR(NULL, scalar);
+                break;
+            }
+            default:
+                ASSERT(0);
+                // Not implemented;
+            }
+        }
+
+        // Verify the results
+        {
+            const size_t view_start[MAX_TENSOR_DIMS] = { 0 };
+            VX_CALL(vxCopyTensorPatch(out_tensor, dims, view_start, out_dims, out_strides, out_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+            ownCheckAddSubMulResult(
+                    in0_data, in0_dims, in0_strides,
+                    in1_data, in1_dims, in1_strides,
+                    fmt,
+                    op,
+                    dims,
+                    out_count,
+                    overflow_policy == VX_CONVERT_POLICY_WRAP,
+                    rounding_policy == VX_ROUND_POLICY_TO_NEAREST_EVEN,
+                    scale,
+                    out_data, out_dims, out_strides);
+        }
+
+        VX_CALL(vxReleaseTensor(&in0_tensor));
+        VX_CALL(vxReleaseTensor(&in1_tensor));
+        VX_CALL(vxReleaseTensor(&out_tensor));
+        EXPECT_EQ_PTR(NULL, in0_tensor);
+        EXPECT_EQ_PTR(NULL, in1_tensor);
+        EXPECT_EQ_PTR(NULL, out_tensor);
+
+        ct_free_mem(in0_data);
+        ct_free_mem(in1_data);
+        ct_free_mem(out_data);
+    }
+
+    ct_free_mem(in0_dims);
+    ct_free_mem(in1_dims);
+    ct_free_mem(out_dims);
+
+    ct_free_mem(in0_strides);
+    ct_free_mem(in1_strides);
+    ct_free_mem(out_strides);
+}
+
+
 /****************************************************************************
  *                                                                          *
  *                              LUT Test                                    *
@@ -511,7 +722,7 @@
     enum TestTensorDF fmt;
 } test_tensor_lut_op_arg;
 
-TEST_WITH_ARG(TensorOp, testTensorTableLookupOp, test_tensor_lut_op_arg,
+TEST_WITH_ARG(TensorOp, testvxTensorTableLookup, test_tensor_lut_op_arg,
         ARG("Q78_TABLELOOKUP", TT_Q78),
         ARG("U8_TABLELOOKUP", TT_U8),
 //        ARG("S8_TABLELOOKUP", TT_S8),
@@ -536,19 +747,19 @@
         CT_RNG_INIT(rng, *seed);
     }
 
-    vx_enum data_type;
-    vx_uint8 fixed_point_position;
-    vx_size sizeof_data_type;
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
     ownUnpackFormat(fmt, &data_type, &fixed_point_position, &sizeof_data_type);
 
-    vx_size lut_max_count;
-    vx_enum lut_data_type;
+    vx_size lut_max_count = 0;
+    vx_enum lut_data_type = 0;
     ownUnpackFormatForLUT(fmt, &lut_max_count, &lut_data_type);
 
-    size_t * const tensor_dims = malloc(sizeof(*tensor_dims) * max_dims);
-    size_t * const tensor_strides = malloc(sizeof(*tensor_strides) * max_dims);
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+    size_t * const tensor_strides = ct_alloc_mem(sizeof(*tensor_strides) * max_dims);
     ASSERT(tensor_dims && tensor_strides);
-    
+
     // The strategy is a simple one: For each of the 1..max_dims supported,
     // we test a TEST_TENSOR_NUM_ITERATIONS of random tensor and LUT configs.
     // While LUT should be (Not verified) sufficiently tested by the Non NN
@@ -601,9 +812,9 @@
             printf("\t        }\n");
         }
 
-        void * const src_data = malloc(tensor_bytes);
-        void * const dst_data = malloc(tensor_bytes);
-        void * const lut_data = malloc(sizeof_data_type * lut_count);
+        void * const src_data = ct_alloc_mem(tensor_bytes);
+        void * const dst_data = ct_alloc_mem(tensor_bytes);
+        void * const lut_data = ct_alloc_mem(sizeof_data_type * lut_count);
         ASSERT(src_data && dst_data && lut_data);
 
         {   //TODO: ownTestInitTensors(..) ?
@@ -648,7 +859,7 @@
             VX_CALL(vxReleaseGraph(&graph));
             EXPECT_EQ_PTR(NULL, graph);
         }
-        
+
         // Verify the reuslts
         {
             const size_t view_start[MAX_TENSOR_DIMS] = { 0 };
@@ -742,14 +953,217 @@
         EXPECT_EQ_PTR(NULL, dst_tensor);
         EXPECT_EQ_PTR(NULL, lut);
 
-        free(src_data);
-        free(dst_data);
+        ct_free_mem(src_data);
+        ct_free_mem(dst_data);
     }
 
-    free(tensor_dims);
-    free(tensor_strides);
+    ct_free_mem(tensor_dims);
+    ct_free_mem(tensor_strides);
 }
 
+TEST_WITH_ARG(TensorOp, testvxuTensorTableLookup, test_tensor_lut_op_arg,
+        ARG("Q78_TABLELOOKUP", TT_Q78),
+        ARG("U8_TABLELOOKUP", TT_U8)
+)
+{
+    const vx_context context = context_->vx_context_;
+
+    const enum TestTensorDF fmt = arg_->fmt;
+    assert(fmt == TT_Q78 || fmt == TT_U8);
+
+    vx_size max_dims = 0;
+    {   // TODO: ownTestGetMaxDims() ?
+        VX_CALL(vxQueryContext(context, VX_CONTEXT_MAX_TENSOR_DIMS, &max_dims, sizeof(max_dims)));
+        ASSERT(max_dims > 3);
+        if(!DEBUG_TEST_TENSOR_BEYOND_FOUR_DIMS) max_dims = 4; else max_dims = MIN(max_dims, MAX_TENSOR_DIMS);
+    }
+
+    uint64_t rng;
+    {   // TODO: ownTestGetRNG() ?
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
+    ownUnpackFormat(fmt, &data_type, &fixed_point_position, &sizeof_data_type);
+
+    vx_size lut_max_count = 0;
+    vx_enum lut_data_type = 0;
+    ownUnpackFormatForLUT(fmt, &lut_max_count, &lut_data_type);
+
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+    size_t * const tensor_strides = ct_alloc_mem(sizeof(*tensor_strides) * max_dims);
+    ASSERT(tensor_dims && tensor_strides);
+
+    // The strategy is a simple one: For each of the 1..max_dims supported,
+    // we test a TEST_TENSOR_NUM_ITERATIONS of random tensor and LUT configs.
+    // While LUT should be (Not verified) sufficiently tested by the Non NN
+    // tests, we preffer to use random LUT dims and data each iteration anyway
+    // since the whole test should't take long and this can be used as a
+    // standalone conformance part for our own tests.
+    //TODO: @Tomer, should we rather use a single (per fmt) randomly populated
+    //      LUT, instead, anyway? Or is this acceptable?
+
+    // Note that iter is the inner loop, so that if two implementations support
+    // D1 and D2 dims resp. the same (pseudo-random) values would be used when
+    // testing the common min(D1, D2) dimensions.
+    for (vx_size dims = 1; dims <= max_dims; ++dims)
+    for (int iter = 0; iter < TEST_TENSOR_NUM_ITERATIONS; ++iter)
+    {
+        if (DEBUG_TEST_TENSOR_ENABLE_PRINTF)
+        {
+            printf("dims #: %zu,\titer #: %d\n", dims, iter);
+            fflush(stdout);
+        }
+
+        // First step is to get some random dim sizes, calc the strides and create the tensors.
+
+        for (vx_size i = 0; i < dims; ++i)
+        {
+            tensor_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ+1);
+
+            tensor_strides[i] = i ? tensor_strides[i-1] * tensor_dims[i-1] : sizeof_data_type;
+        }
+
+        vx_tensor src_tensor = vxCreateTensor(context, dims, tensor_dims, data_type, fixed_point_position);
+        vx_tensor dst_tensor = vxCreateTensor(context, dims, tensor_dims, data_type, fixed_point_position);
+        ASSERT_VX_OBJECT(src_tensor, VX_TYPE_TENSOR);
+        ASSERT_VX_OBJECT(dst_tensor, VX_TYPE_TENSOR);
+
+        const size_t tensor_bytes = tensor_dims[dims-1] * tensor_strides[dims-1];
+        const size_t tensor_count = tensor_bytes / sizeof_data_type;
+
+        const vx_size lut_count = (vx_size)CT_RNG_NEXT_INT(rng, 1, lut_max_count+1);
+        const vx_uint32 lut_offset = (lut_data_type == VX_TYPE_INT16) ? (vx_uint32)(lut_count / 2) : 0;
+
+        vx_lut lut = vxCreateLUT(context, lut_data_type, lut_count);
+        ASSERT_VX_OBJECT(lut, VX_TYPE_LUT);
+
+        if (DEBUG_TEST_TENSOR_ENABLE_PRINTF)
+        {
+            printf("\tconfig: {\n");
+            printf("\t          tensor_dims: { "); for (size_t i = 0; i < dims; ++i) { printf("%zu, ", tensor_dims[i]); } printf(" }, \n");
+            printf("\t          LUT_count: %zu,", lut_count);
+            printf("\t        }\n");
+        }
+
+        void * const src_data = ct_alloc_mem(tensor_bytes);
+        void * const dst_data = ct_alloc_mem(tensor_bytes);
+        void * const lut_data = ct_alloc_mem(sizeof_data_type * lut_count);
+        ASSERT(src_data && dst_data && lut_data);
+
+        {   //TODO: ownTestInitTensors(..) ?
+            ownFillRandDataForLUT(fmt, &rng, tensor_count, lut_count, lut_offset, src_data);
+
+            vx_size view_start[MAX_TENSOR_DIMS] = { 0 };
+            VX_CALL(vxCopyTensorPatch(src_tensor, dims, view_start, tensor_dims, tensor_strides, src_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+        }
+
+        {
+            for (size_t i = 0; i < lut_count; ++i)
+            {
+                switch (fmt)
+                {
+                case TT_Q78:
+                    ((vx_int16*)lut_data)[i] = (vx_int16)(CT_RNG_NEXT_INT(rng, INT16_MIN, INT16_MAX + 1));
+                    break;
+                case TT_U8:
+                    ((vx_uint8*)lut_data)[i] = (vx_uint8)(CT_RNG_NEXT_INT(rng, 0, UINT8_MAX + 1));
+                    break;
+                default: assert(0);
+                }
+            }
+
+            VX_CALL(vxCopyLUT(lut, lut_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+        }
+
+        // Third step is creating, running and disposing of the graph.
+        {
+            VX_CALL(vxuTensorTableLookup(context, src_tensor, lut, dst_tensor));
+        }
+
+        // Verify the reuslts
+        {
+            const size_t view_start[MAX_TENSOR_DIMS] = { 0 };
+            VX_CALL(vxCopyTensorPatch(dst_tensor, dims, view_start, tensor_dims, tensor_strides, dst_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+            for (size_t index = 0; index < tensor_count; ++index)
+            {
+                const size_t tensor_byte_offset = ownGetFlatByteOffset(index, dims, tensor_dims, tensor_strides);
+
+                switch(fmt)
+                {
+                case TT_Q78:
+                {
+                    const vx_int16 res = *(vx_int16*)((char*)dst_data + tensor_byte_offset);
+                    const vx_int16 val = *(vx_int16*)((char*)src_data + tensor_byte_offset);
+                    const int16_t ref = *((vx_int16*)lut_data + (size_t)((int32_t)lut_offset + (int32_t)val));
+
+                    if (res != ref)
+                    {
+                        printf("DIFF!!!\t\t{ src[%zu] : %f (raw: %d), LUT[%d + %u]: %f (raw: %d), res[%zu]: %f (raw: %d) }\n",
+                            tensor_byte_offset / sizeof(vx_int16), val / 256.f, val,
+                            val, lut_offset, ref / 256.f, ref,
+                            tensor_byte_offset / sizeof(vx_int16), res / 256.f, res);
+                    }
+                    if (!DEBUG_TEST_TENSOR_CONTINUE_AFTER_ERROR)
+                    {
+                        ASSERT_EQ_INT(res, ref);
+                    }
+                    else
+                    {
+                        EXPECT_EQ_INT(res, ref);
+                    }
+                }
+                break;
+                case TT_U8:
+                {
+                    const vx_uint8 res = *(vx_uint8*)((char*)dst_data + tensor_byte_offset);
+                    const vx_uint8 val = *(vx_uint8*)((char*)src_data + tensor_byte_offset);
+                    const uint8_t ref = *((vx_uint8*)lut_data + (size_t)((int32_t)lut_offset + (int32_t)val));
+
+                    if (res != ref)
+                    {
+                        printf("DIFF!!!\t\t{ src[%zu] : %d, LUT[%d + %u]: %d, res[%zu]: %d }\n",
+                            tensor_byte_offset / sizeof(vx_uint8), val,
+                            val, lut_offset, ref,
+                            tensor_byte_offset / sizeof(vx_uint8), res);
+                    }
+                    if (!DEBUG_TEST_TENSOR_CONTINUE_AFTER_ERROR)
+                    {
+                        ASSERT_EQ_INT(res, ref);
+                    }
+                    else
+                    {
+                        EXPECT_EQ_INT(res, ref);
+                    }
+                }
+                break;
+                default: assert(0);
+                }
+            }
+        }
+
+        VX_CALL(vxReleaseTensor(&src_tensor));
+        VX_CALL(vxReleaseTensor(&dst_tensor));
+        VX_CALL(vxReleaseLUT(&lut));
+
+        EXPECT_EQ_PTR(NULL, src_tensor);
+        EXPECT_EQ_PTR(NULL, dst_tensor);
+        EXPECT_EQ_PTR(NULL, lut);
+
+        ct_free_mem(src_data);
+        ct_free_mem(dst_data);
+    }
+
+    ct_free_mem(tensor_dims);
+    ct_free_mem(tensor_strides);
+}
+
+
 /****************************************************************************
  *                                                                          *
  *                          Test vxTensorTransposeNode                      *
@@ -763,7 +1177,7 @@
     enum TestTensorDF fmt;
 } test_tensor_transpose_op_arg;
 
-TEST_WITH_ARG(TensorOp, testTensorTransposeOp, test_tensor_transpose_op_arg,
+TEST_WITH_ARG(TensorOp, testvxTensorTranspose, test_tensor_transpose_op_arg,
         ARG("Q78_TRANSPOSE", TT_Q78),
         ARG("U8_TRANSPOSE", TT_U8),
         ARG("S8_TRANSPOSE", TT_S8),
@@ -788,25 +1202,25 @@
         CT_RNG_INIT(rng, *seed);
     }
 
-    vx_enum data_type;
-    vx_uint8 fixed_point_position;
-    vx_size sizeof_data_type;
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
     ownUnpackFormat(fmt, &data_type, &fixed_point_position, &sizeof_data_type);
 
-    size_t * const src_dims = malloc(sizeof(*src_dims) * max_dims);
-    size_t * const dst_dims = malloc(sizeof(*dst_dims) * max_dims);
+    size_t * const src_dims = ct_alloc_mem(sizeof(*src_dims) * max_dims);
+    size_t * const dst_dims = ct_alloc_mem(sizeof(*dst_dims) * max_dims);
     ASSERT(src_dims && dst_dims);
 
     //TODO: fix the following comment after its settlted :)
     // The way we implement the transposed query is simply swapping 2 of the
     // relevant dims strides in swizzled strides compared to original ones
-    size_t * const src_strides = malloc(sizeof(*src_strides) * max_dims);
-    size_t * const dst_strides = malloc(sizeof(*dst_strides) * max_dims);
-    size_t * const ref_strides = malloc(sizeof(*ref_strides) * max_dims);
+    size_t * const src_strides = ct_alloc_mem(sizeof(*src_strides) * max_dims);
+    size_t * const dst_strides = ct_alloc_mem(sizeof(*dst_strides) * max_dims);
+    size_t * const ref_strides = ct_alloc_mem(sizeof(*ref_strides) * max_dims);
     ASSERT(src_strides && dst_strides && ref_strides);
 
     //TODO: @Tomer, should swapping a dim with itself be acceptable?
-    
+
     // The strategy is a simple one: For each of the 1..max_dims supported,
     // we test all n^2 possible 2 dim combos for transposition.
     // We choose to do so since $sum_{n=1}^{max_dims} n ^2 ~ O(n^3)# which
@@ -821,7 +1235,7 @@
     // sequential values since the S8/U8 types would force a short repeating
     // pattern making it slightly harder to debug and possibly missing
     // perfectly aligned tranpose cases, however unlikely... :)
-    
+
     // Note that iter is the inner loop, so that if two implementations support
     // D1 and D2 dims resp. the same (pseudo-random) values would be used when
     // testing the common min(D1, D2) dimensions.
@@ -876,15 +1290,15 @@
             printf("            tensor_transpose_dims: { %zu, %zu }\n", transpose_dim0, transpose_dim1);
             printf("\t        }\n");
         }
-        
+
         //TODO: This is pretty wasteful as it's repeating a lot of work per iteration:
         //      Both in the repeated malloc + free and inefficient data population
         //      which discards much of the random data, only using a part of it.
 
         // Second step is to allocate the input and output data locations and populate the inputs.
 
-        void * const src_data = malloc(bytes);
-        void * const dst_data = malloc(bytes);
+        void * const src_data = ct_alloc_mem(bytes);
+        void * const dst_data = ct_alloc_mem(bytes);
         ASSERT(src_data && dst_data);
 
         {   //TODO: ownTestInitTensors(..) ?
@@ -989,7 +1403,7 @@
                 }
                 break;
                 default: assert(0);
-                } 
+                }
             }
         }
 
@@ -998,16 +1412,251 @@
         EXPECT_EQ_PTR(NULL, src_tensor);
         EXPECT_EQ_PTR(NULL, dst_tensor);
 
-        free(src_data);
-        free(dst_data);
+        ct_free_mem(src_data);
+        ct_free_mem(dst_data);
     }
 
-    free(src_dims);
-    free(dst_dims);
+    ct_free_mem(src_dims);
+    ct_free_mem(dst_dims);
 
-    free(src_strides);
-    free(dst_strides);
-    free(ref_strides);
+    ct_free_mem(src_strides);
+    ct_free_mem(dst_strides);
+    ct_free_mem(ref_strides);
+}
+
+TEST_WITH_ARG(TensorOp, testvxuTensorTranspose, test_tensor_transpose_op_arg,
+        ARG("Q78_TRANSPOSE", TT_Q78),
+        ARG("U8_TRANSPOSE", TT_U8),
+        ARG("S8_TRANSPOSE", TT_S8),
+)
+{
+    const vx_context context = context_->vx_context_;
+
+    const enum TestTensorDF fmt = arg_->fmt;
+    assert(fmt == TT_Q78 || fmt == TT_U8 || fmt == TT_S8);
+
+    vx_size max_dims = 0;
+    {   // TODO: ownTestGetMaxDims() ?
+        VX_CALL(vxQueryContext(context, VX_CONTEXT_MAX_TENSOR_DIMS, &max_dims, sizeof(max_dims)));
+        ASSERT(max_dims > 3);
+        if(!DEBUG_TEST_TENSOR_BEYOND_FOUR_DIMS) max_dims = 4; else max_dims = MIN(max_dims, MAX_TENSOR_DIMS);
+    }
+
+    uint64_t rng;
+    {   // TODO: ownTestGetRNG() ?
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
+    ownUnpackFormat(fmt, &data_type, &fixed_point_position, &sizeof_data_type);
+
+    size_t * const src_dims = ct_alloc_mem(sizeof(*src_dims) * max_dims);
+    size_t * const dst_dims = ct_alloc_mem(sizeof(*dst_dims) * max_dims);
+    ASSERT(src_dims && dst_dims);
+
+    //TODO: fix the following comment after its settlted :)
+    // The way we implement the transposed query is simply swapping 2 of the
+    // relevant dims strides in swizzled strides compared to original ones
+    size_t * const src_strides = ct_alloc_mem(sizeof(*src_strides) * max_dims);
+    size_t * const dst_strides = ct_alloc_mem(sizeof(*dst_strides) * max_dims);
+    size_t * const ref_strides = ct_alloc_mem(sizeof(*ref_strides) * max_dims);
+    ASSERT(src_strides && dst_strides && ref_strides);
+
+    //TODO: @Tomer, should swapping a dim with itself be acceptable?
+
+    // The strategy is a simple one: For each of the 1..max_dims supported,
+    // we test all n^2 possible 2 dim combos for transposition.
+    // We choose to do so since $sum_{n=1}^{max_dims} n ^2 ~ O(n^3)# which
+    // isn't much for any practical number of supported dimensions.
+    // An alternative method could be similar to the one used in the
+    // Elementwise Op tests, where we ran TEST_TENSOR_NUM_ITERATIONS iters
+    // with random 2 dim choice. But for practical values of max_dims (~6?)
+    // it's hardly any different.
+    //TODO: @Tomer, do you preffer the ranom approach?
+    //
+    // Not that we still chose to use the psuedo random data rather than
+    // sequential values since the S8/U8 types would force a short repeating
+    // pattern making it slightly harder to debug and possibly missing
+    // perfectly aligned tranpose cases, however unlikely... :)
+
+    // Note that iter is the inner loop, so that if two implementations support
+    // D1 and D2 dims resp. the same (pseudo-random) values would be used when
+    // testing the common min(D1, D2) dimensions.
+    //TODO: If a single dim cannot be "transposed with itself" (copy/NOP for virt), start from 2
+    for (vx_size dims = 1; dims <= max_dims; ++dims)
+    for (vx_size transpose_dim0 = 0; transpose_dim0 < dims; ++transpose_dim0)
+    for (vx_size transpose_dim1 = 1; transpose_dim1 < dims; ++transpose_dim1)
+    {
+        if (DEBUG_TEST_TENSOR_ENABLE_PRINTF)
+        {
+            printf("dims: %zu, transpose_dim0: %zu, transpose_dim1: %zu\n", dims, transpose_dim0, transpose_dim1);
+            fflush(stdout);
+        }
+
+        // First step is to get some random dim sizes, calc the strides and create the tensors.
+
+        {
+            for (vx_size i = 0; i < dims; ++i)
+            {
+                src_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ+1);
+                dst_dims[i] = src_dims[i];
+
+                src_strides[i] = i ? src_strides[i - 1] * src_dims[i - 1] : sizeof_data_type;
+                ref_strides[i] = src_strides[i];
+            }
+
+            dst_dims[transpose_dim1] = src_dims[transpose_dim0];
+            dst_dims[transpose_dim0] = src_dims[transpose_dim1];
+            ref_strides[transpose_dim1] = src_strides[transpose_dim0];
+            ref_strides[transpose_dim0] = src_strides[transpose_dim1];
+
+            for (vx_size i = 0; i < dims; ++i)
+            {
+                dst_strides[i] = i ? dst_strides[i - 1] * dst_dims[i - 1] : sizeof_data_type;
+            }
+        }
+
+        vx_tensor src_tensor = vxCreateTensor(context, dims, src_dims, data_type, fixed_point_position);
+        vx_tensor dst_tensor = vxCreateTensor(context, dims, dst_dims, data_type, fixed_point_position);
+        ASSERT_VX_OBJECT(src_tensor, VX_TYPE_TENSOR);
+        ASSERT_VX_OBJECT(dst_tensor, VX_TYPE_TENSOR);
+
+        const size_t bytes = src_dims[dims - 1] * src_strides[dims - 1];
+        const size_t count = bytes / sizeof_data_type;
+
+        if (DEBUG_TEST_TENSOR_ENABLE_PRINTF)
+        {
+            printf("\tconfig: {\n");
+            printf("\t          src_dims: { "); for (size_t i = 0; i < dims; ++i) { printf("%zu, ", src_dims[i]); } printf(" },\n");
+            printf("\t          dst_dims: { "); for (size_t i = 0; i < dims; ++i) { printf("%zu, ", dst_dims[i]); } printf(" },\n");
+            printf("            count: %zu, bytes: %zu,\n", count, bytes);
+            printf("            tensor_transpose_dims: { %zu, %zu }\n", transpose_dim0, transpose_dim1);
+            printf("\t        }\n");
+        }
+
+        //TODO: This is pretty wasteful as it's repeating a lot of work per iteration:
+        //      Both in the repeated malloc + free and inefficient data population
+        //      which discards much of the random data, only using a part of it.
+
+        // Second step is to allocate the input and output data locations and populate the inputs.
+
+        void * const src_data = ct_alloc_mem(bytes);
+        void * const dst_data = ct_alloc_mem(bytes);
+        ASSERT(src_data && dst_data);
+
+        {   //TODO: ownTestInitTensors(..) ?
+            ownFillRandData(fmt, &rng, count, src_data);
+
+            vx_size view_start[MAX_TENSOR_DIMS] = { 0 };
+            VX_CALL(vxCopyTensorPatch(src_tensor, dims, view_start, src_dims, src_strides, src_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+        }
+
+        // Third step is creating, running and disposing of the graph.
+        {
+            VX_CALL(vxuTensorTranspose(context, src_tensor, dst_tensor, transpose_dim0, transpose_dim1));
+        }
+
+        // Verify the results
+        {
+            const size_t view_start[MAX_TENSOR_DIMS] = { 0 };
+            VX_CALL(vxCopyTensorPatch(dst_tensor, dims, view_start, dst_dims, dst_strides, dst_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+            for (size_t index = 0; index < count; ++index)
+            {
+                const size_t res_byte_offset = ownGetFlatByteOffset(index, dims, dst_dims, dst_strides);
+                const size_t ref_byte_offset = ownGetFlatByteOffset(index, dims, dst_dims, ref_strides);
+
+                //TODO: can unify the following to avoid the copy pasta...
+
+                switch(fmt)
+                {
+                case TT_Q78:
+                {
+                    const vx_int16 res = *(vx_int16*)((char*)dst_data + res_byte_offset);
+                    const vx_int16 ref = *(vx_int16*)((char*)src_data + ref_byte_offset);
+
+                    if (res != ref)
+                    {
+                        printf("DIFF!!!\t\t{ src[%zu]: %f (raw: %d), dst[%zu]: %f (raw: %d) }\n",
+                            ref_byte_offset / sizeof(vx_int16), ref / 256.f, ref,
+                            res_byte_offset / sizeof(vx_int16), res / 256.f, res);
+                    }
+                    if (!DEBUG_TEST_TENSOR_CONTINUE_AFTER_ERROR)
+                    {
+                        ASSERT_EQ_INT(res, ref);
+                    }
+                    else
+                    {
+                        EXPECT_EQ_INT(res, ref);
+                    }
+                }
+                break;
+                case TT_U8:
+                {
+                    const vx_uint8 res = *(vx_uint8*)((char*)dst_data + res_byte_offset);
+                    const vx_uint8 ref = *(vx_uint8*)((char*)src_data + ref_byte_offset);
+
+                    if (res != ref)
+                    {
+                        printf("DIFF!!!\t\t{ src[%zu]: %d, dst[%zu]: %d }\n",
+                            ref_byte_offset / sizeof(vx_uint8), ref,
+                            res_byte_offset / sizeof(vx_uint8), res);
+                    }
+                    if (!DEBUG_TEST_TENSOR_CONTINUE_AFTER_ERROR)
+                    {
+                        ASSERT_EQ_INT(res, ref);
+                    }
+                    else
+                    {
+                        EXPECT_EQ_INT(res, ref);
+                    }
+                }
+                break;
+                case TT_S8:
+                {
+                    const vx_int8 res = *(vx_int8*)((char*)dst_data + res_byte_offset);
+                    const vx_int8 ref = *(vx_int8*)((char*)src_data + ref_byte_offset);
+
+                    if (res != ref)
+                    {
+                        printf("DIFF!!!\t\t{ src[%zu]: %d, dst[%zu]: %d }\n",
+                            ref_byte_offset / sizeof(vx_int8), ref,
+                            res_byte_offset / sizeof(vx_int8), res);
+                    }
+                    if (!DEBUG_TEST_TENSOR_CONTINUE_AFTER_ERROR)
+                    {
+                        ASSERT_EQ_INT(res, ref);
+                    }
+                    else
+                    {
+                        EXPECT_EQ_INT(res, ref);
+                    }
+                }
+                break;
+                default: assert(0);
+                }
+            }
+        }
+
+        VX_CALL(vxReleaseTensor(&src_tensor));
+        VX_CALL(vxReleaseTensor(&dst_tensor));
+        EXPECT_EQ_PTR(NULL, src_tensor);
+        EXPECT_EQ_PTR(NULL, dst_tensor);
+
+        ct_free_mem(src_data);
+        ct_free_mem(dst_data);
+    }
+
+    ct_free_mem(src_dims);
+    ct_free_mem(dst_dims);
+
+    ct_free_mem(src_strides);
+    ct_free_mem(dst_strides);
+    ct_free_mem(ref_strides);
 }
 
 /****************************************************************************
@@ -1029,7 +1678,7 @@
 } test_tensor_convert_depth_op_arg;
 
 //TODO: what kind of configs do we want to test? It doesn't have to be full width conversions
-TEST_WITH_ARG(TensorOp, testTensorConvertDepthOp, test_tensor_convert_depth_op_arg,
+TEST_WITH_ARG(TensorOp, testvxTensorConvertDepth, test_tensor_convert_depth_op_arg,
         ARG("DEPTH_CONVERT_SAT_Q78_TO_Q78_FULL", VX_CONVERT_POLICY_SATURATE, TT_Q78, TT_Q78, 0.f, 1.f),
         ARG("DEPTH_CONVERT_SAT_Q78_TO_U8_FULL", VX_CONVERT_POLICY_SATURATE, TT_Q78, TT_U8, 128.f, 1.f),
         ARG("DEPTH_CONVERT_SAT_Q78_TO_S8_FULL", VX_CONVERT_POLICY_SATURATE, TT_Q78, TT_S8, 0.f, 1.f),
@@ -1078,18 +1727,18 @@
         CT_RNG_INIT(rng, *seed);
     }
 
-    vx_enum src_data_type;
-    vx_enum dst_data_type;
-    vx_uint8 src_fixed_point_position;
-    vx_uint8 dst_fixed_point_position;
-    vx_size src_sizeof_data_type;
-    vx_size dst_sizeof_data_type;
+    vx_enum src_data_type = 0;
+    vx_enum dst_data_type = 0;
+    vx_uint8 src_fixed_point_position = 0;
+    vx_uint8 dst_fixed_point_position = 0;
+    vx_size src_sizeof_data_type = 0;
+    vx_size dst_sizeof_data_type = 0;
     ownUnpackFormat(src_fmt, &src_data_type, &src_fixed_point_position, &src_sizeof_data_type);
     ownUnpackFormat(dst_fmt, &dst_data_type, &dst_fixed_point_position, &dst_sizeof_data_type);
 
-    size_t * const tensor_dims = malloc(sizeof(*tensor_dims) * max_dims);
-    size_t * const src_tensor_strides = malloc(sizeof(*src_tensor_strides) * max_dims);
-    size_t * const dst_tensor_strides = malloc(sizeof(*dst_tensor_strides) * max_dims);
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+    size_t * const src_tensor_strides = ct_alloc_mem(sizeof(*src_tensor_strides) * max_dims);
+    size_t * const dst_tensor_strides = ct_alloc_mem(sizeof(*dst_tensor_strides) * max_dims);
     ASSERT(tensor_dims && src_tensor_strides && dst_tensor_strides);
 
     //TODO: what's the testing strategy here? missing desc.
@@ -1132,8 +1781,8 @@
             printf("\t        }\n");
         }
 
-        void * const src_data = malloc(src_tensor_bytes);
-        void * const dst_data = malloc(dst_tensor_bytes);
+        void * const src_data = ct_alloc_mem(src_tensor_bytes);
+        void * const dst_data = ct_alloc_mem(dst_tensor_bytes);
         ASSERT(src_data && dst_data);
 
         {   //TODO: ownTestInitTensors(..) ?
@@ -1183,7 +1832,7 @@
                 const size_t src_tensor_byte_offset = ownGetFlatByteOffset(index, dims, tensor_dims, src_tensor_strides);
                 const size_t dst_tensor_byte_offset = ownGetFlatByteOffset(index, dims, tensor_dims, dst_tensor_strides);
 
-                float tmp;
+                float tmp = .0f;
 
                 switch(src_fmt)
                 {
@@ -1241,13 +1890,222 @@
         EXPECT_EQ_PTR(NULL, src_tensor);
         EXPECT_EQ_PTR(NULL, dst_tensor);
 
-        free(src_data);
-        free(dst_data);
+        ct_free_mem(src_data);
+        ct_free_mem(dst_data);
     }
 
-    free(tensor_dims);
-    free(src_tensor_strides);
-    free(dst_tensor_strides);
+    ct_free_mem(tensor_dims);
+    ct_free_mem(src_tensor_strides);
+    ct_free_mem(dst_tensor_strides);
+}
+
+TEST_WITH_ARG(TensorOp, testvxuTensorConvertDepth, test_tensor_convert_depth_op_arg,
+        ARG("DEPTH_CONVERT_SAT_Q78_TO_Q78_FULL", VX_CONVERT_POLICY_SATURATE, TT_Q78, TT_Q78, 0.f, 1.f),
+        ARG("DEPTH_CONVERT_SAT_Q78_TO_U8_FULL", VX_CONVERT_POLICY_SATURATE, TT_Q78, TT_U8, 128.f, 1.f),
+        ARG("DEPTH_CONVERT_SAT_Q78_TO_S8_FULL", VX_CONVERT_POLICY_SATURATE, TT_Q78, TT_S8, 0.f, 1.f),
+        ARG("DEPTH_CONVERT_SAT_U8_TO_Q78_FULL", VX_CONVERT_POLICY_SATURATE, TT_U8, TT_Q78, -128.f, 1.f),
+        ARG("DEPTH_CONVERT_SAT_U8_TO_U8_FULL", VX_CONVERT_POLICY_SATURATE, TT_U8, TT_U8, 0.f, 1.f),
+        ARG("DEPTH_CONVERT_SAT_U8_TO_S8_FULL", VX_CONVERT_POLICY_SATURATE, TT_U8, TT_S8, -128.f, 1.f),
+        ARG("DEPTH_CONVERT_SAT_S8_TO_Q78_FULL", VX_CONVERT_POLICY_SATURATE, TT_S8, TT_Q78, 0.f, 1.f),
+        ARG("DEPTH_CONVERT_SAT_S8_TO_U8_FULL", VX_CONVERT_POLICY_SATURATE, TT_S8, TT_U8, 128.f, 1.f),
+        ARG("DEPTH_CONVERT_SAT_S8_TO_S8_FULL", VX_CONVERT_POLICY_SATURATE, TT_S8, TT_S8, 0.f, 1.f),
+
+        ARG("DEPTH_CONVERT_WRAP_Q78_TO_Q78_FULL", VX_CONVERT_POLICY_WRAP, TT_Q78, TT_Q78, 0.f, 1.f),
+        ARG("DEPTH_CONVERT_WRAP_Q78_TO_U8_FULL", VX_CONVERT_POLICY_WRAP, TT_Q78, TT_U8, 128.f, 1.f),
+        ARG("DEPTH_CONVERT_WRAP_Q78_TO_S8_FULL", VX_CONVERT_POLICY_WRAP, TT_Q78, TT_S8, 0.f, 1.f),
+        ARG("DEPTH_CONVERT_WRAP_U8_TO_Q78_FULL", VX_CONVERT_POLICY_WRAP, TT_U8, TT_Q78, -128.f, 1.f),
+        ARG("DEPTH_CONVERT_WRAP_U8_TO_U8_FULL", VX_CONVERT_POLICY_WRAP, TT_U8, TT_U8, 0.f, 1.f),
+        ARG("DEPTH_CONVERT_WRAP_U8_TO_S8_FULL", VX_CONVERT_POLICY_WRAP, TT_U8, TT_S8, -128.f, 1.f),
+        ARG("DEPTH_CONVERT_WRAP_S8_TO_Q78_FULL", VX_CONVERT_POLICY_WRAP, TT_S8, TT_Q78, 0.f, 1.f),
+        ARG("DEPTH_CONVERT_WRAP_S8_TO_U8_FULL", VX_CONVERT_POLICY_WRAP, TT_S8, TT_U8, 128.f, 1.f),
+        ARG("DEPTH_CONVERT_WRAP_S8_TO_S8_FULL", VX_CONVERT_POLICY_WRAP, TT_S8, TT_S8, 0.f, 1.f),
+)
+{
+    const vx_context context = context_->vx_context_;
+
+    const enum TestTensorDF src_fmt = arg_->src_fmt;
+    const enum TestTensorDF dst_fmt = arg_->dst_fmt;
+    assert(src_fmt == TT_Q78 || src_fmt == TT_U8 || src_fmt == TT_S8);
+    assert(dst_fmt == TT_Q78 || dst_fmt == TT_U8 || dst_fmt == TT_S8);
+
+    const enum vx_convert_policy_e policy = arg_->policy;
+    assert(policy == VX_CONVERT_POLICY_SATURATE || policy == VX_CONVERT_POLICY_WRAP);
+
+    const float offset = arg_->offset;
+    const float norm = arg_->norm;
+
+    vx_size max_dims = 0;
+    {   // TODO: ownTestGetMaxDims() ?
+        VX_CALL(vxQueryContext(context, VX_CONTEXT_MAX_TENSOR_DIMS, &max_dims, sizeof(max_dims)));
+        ASSERT(max_dims > 3);
+        if(!DEBUG_TEST_TENSOR_BEYOND_FOUR_DIMS) max_dims = 4; else max_dims = MIN(max_dims, MAX_TENSOR_DIMS);
+    }
+
+    uint64_t rng;
+    {   // TODO: ownTestGetRNG() ?
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    vx_enum src_data_type = 0;
+    vx_enum dst_data_type = 0;
+    vx_uint8 src_fixed_point_position = 0;
+    vx_uint8 dst_fixed_point_position = 0;
+    vx_size src_sizeof_data_type = 0;
+    vx_size dst_sizeof_data_type = 0;
+    ownUnpackFormat(src_fmt, &src_data_type, &src_fixed_point_position, &src_sizeof_data_type);
+    ownUnpackFormat(dst_fmt, &dst_data_type, &dst_fixed_point_position, &dst_sizeof_data_type);
+
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+    size_t * const src_tensor_strides = ct_alloc_mem(sizeof(*src_tensor_strides) * max_dims);
+    size_t * const dst_tensor_strides = ct_alloc_mem(sizeof(*dst_tensor_strides) * max_dims);
+    ASSERT(tensor_dims && src_tensor_strides && dst_tensor_strides);
+
+    //TODO: what's the testing strategy here? missing desc.
+
+    // Note that iter is the inner loop, so that if two implementations support
+    // D1 and D2 dims resp. the same (pseudo-random) values would be used when
+    // testing the common min(D1, D2) dimensions.
+    for (vx_size dims = 1; dims <= max_dims; ++dims)
+    for (int iter = 0; iter < TEST_TENSOR_NUM_ITERATIONS; ++iter)
+    {
+        if (DEBUG_TEST_TENSOR_ENABLE_PRINTF)
+        {
+            printf("dims #: %zu,\titer #: %d\n", dims, iter);
+            fflush(stdout);
+        }
+
+        // First step is to get some random dim sizes, calc the strides and create the tensors.
+
+        for (vx_size i = 0; i < dims; ++i)
+        {
+            tensor_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ+1);
+
+            src_tensor_strides[i] = i ? src_tensor_strides[i-1] * tensor_dims[i-1] : src_sizeof_data_type;
+            dst_tensor_strides[i] = i ? dst_tensor_strides[i-1] * tensor_dims[i-1] : dst_sizeof_data_type;
+        }
+
+        vx_tensor src_tensor = vxCreateTensor(context, dims, tensor_dims, src_data_type, src_fixed_point_position);
+        vx_tensor dst_tensor = vxCreateTensor(context, dims, tensor_dims, dst_data_type, dst_fixed_point_position);
+        ASSERT_VX_OBJECT(src_tensor, VX_TYPE_TENSOR);
+        ASSERT_VX_OBJECT(dst_tensor, VX_TYPE_TENSOR);
+
+        const size_t src_tensor_bytes = tensor_dims[dims-1] * src_tensor_strides[dims-1];
+        const size_t dst_tensor_bytes = tensor_dims[dims-1] * dst_tensor_strides[dims-1];
+        const size_t count = src_tensor_bytes / src_sizeof_data_type;
+
+        if (DEBUG_TEST_TENSOR_ENABLE_PRINTF)
+        {
+            printf("\tconfig: {\n");
+            printf("\t          tensor_dims: { "); for (size_t i = 0; i < dims; ++i) { printf("%zu, ", tensor_dims[i]); } printf(" }, \n");
+            printf("\t        }\n");
+        }
+
+        void * const src_data = ct_alloc_mem(src_tensor_bytes);
+        void * const dst_data = ct_alloc_mem(dst_tensor_bytes);
+        ASSERT(src_data && dst_data);
+
+        {   //TODO: ownTestInitTensors(..) ?
+            ownFillRandData(src_fmt, &rng, count, src_data);
+
+            vx_size view_start[MAX_TENSOR_DIMS] = { 0 };
+            VX_CALL(vxCopyTensorPatch(src_tensor, dims, view_start, tensor_dims, src_tensor_strides, src_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+        }
+
+        // Third step is creating, running and disposing of the graph.
+        {
+            vx_scalar norm_sc = vxCreateScalar(context, VX_TYPE_FLOAT32, &norm);
+            vx_scalar offset_sc = vxCreateScalar(context, VX_TYPE_FLOAT32, &offset);
+            ASSERT_VX_OBJECT(norm_sc, VX_TYPE_SCALAR);
+            ASSERT_VX_OBJECT(offset_sc, VX_TYPE_SCALAR);
+
+            VX_CALL(vxuTensorConvertDepth(context, src_tensor, policy, norm_sc, offset_sc, dst_tensor));
+
+            VX_CALL(vxReleaseScalar(&norm_sc));
+            VX_CALL(vxReleaseScalar(&offset_sc));
+            EXPECT_EQ_PTR(NULL, norm_sc);
+            EXPECT_EQ_PTR(NULL, offset_sc);
+        }
+
+        // Verify the reuslts
+        {
+            const size_t view_start[MAX_TENSOR_DIMS] = { 0 };
+            VX_CALL(vxCopyTensorPatch(dst_tensor, dims, view_start, tensor_dims, dst_tensor_strides, dst_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+            const float scale = 1.f / norm;
+            const bool wrap = policy == VX_CONVERT_POLICY_WRAP;
+
+            for (size_t index = 0; index < count; ++index)
+            {
+                const size_t src_tensor_byte_offset = ownGetFlatByteOffset(index, dims, tensor_dims, src_tensor_strides);
+                const size_t dst_tensor_byte_offset = ownGetFlatByteOffset(index, dims, tensor_dims, dst_tensor_strides);
+
+                float tmp = .0f;
+
+                switch(src_fmt)
+                {
+                case TT_Q78:
+                  tmp = *(vx_int16*)((char*)src_data + src_tensor_byte_offset);
+                    tmp /= Q78_SCALE;
+                    break;
+                case TT_U8:
+                    tmp = *(vx_uint8*)((char*)src_data + src_tensor_byte_offset);
+                    break;
+                case TT_S8:
+                    tmp = *(vx_int8*)((char*)src_data + src_tensor_byte_offset);
+                    break;
+                default: assert(0);
+                }
+
+                tmp = (tmp - offset) * scale;
+
+                //TODO: missing allowed eps
+                //TODO: missing diff printf
+                switch(dst_fmt)
+                {
+                case TT_Q78:
+                    {
+                        tmp *= Q78_SCALE;
+                        vx_int16 ref = wrap ? (vx_int16)tmp : CLAMP(tmp, INT16_MIN, INT16_MAX); //TODO: cast issue?
+                        vx_int16 res = *(vx_int16*)((char*)dst_data + dst_tensor_byte_offset);
+                        if (res != ref) printf("DIFF!!!\n");
+                        if (!DEBUG_TEST_TENSOR_CONTINUE_AFTER_ERROR) ASSERT_EQ_INT(res, ref); else EXPECT_EQ_INT(res, ref);
+                    }
+                    break;
+                case TT_U8:
+                    {
+                        vx_uint8 ref = wrap ? (vx_uint8)tmp : CLAMP(tmp, 0, UINT8_MAX);  // CLAMP not really needed
+                        vx_uint8 res = *(vx_uint8*)((char*)dst_data + dst_tensor_byte_offset);
+                        if (res != ref) printf("DIFF!!!\n");
+                        if (!DEBUG_TEST_TENSOR_CONTINUE_AFTER_ERROR) ASSERT_EQ_INT(res, ref); else EXPECT_EQ_INT(res, ref);
+                    }
+                    break;
+                case TT_S8:
+                    {
+                        vx_int8 ref = wrap ? (vx_int8)tmp : (vx_int8)CLAMP(tmp, INT8_MIN, INT8_MAX); //TODO: cast issue?
+                        vx_int8 res = *(vx_int8*)((char*)dst_data + dst_tensor_byte_offset);
+                        if (res != ref) printf("DIFF!!!\n");
+                        if (!DEBUG_TEST_TENSOR_CONTINUE_AFTER_ERROR) ASSERT_EQ_INT(res, ref); else EXPECT_EQ_INT(res, ref);
+                    }
+                    break;
+                default: assert(0);
+                }
+            }
+        }
+
+        VX_CALL(vxReleaseTensor(&src_tensor));
+        VX_CALL(vxReleaseTensor(&dst_tensor));
+        EXPECT_EQ_PTR(NULL, src_tensor);
+        EXPECT_EQ_PTR(NULL, dst_tensor);
+
+        ct_free_mem(src_data);
+        ct_free_mem(dst_data);
+    }
+
+    ct_free_mem(tensor_dims);
+    ct_free_mem(src_tensor_strides);
+    ct_free_mem(dst_tensor_strides);
 }
 
 
@@ -1385,7 +2243,7 @@
     TT_TENSOR_MAD_0(U8)     \
     TT_TENSOR_MAD_0(S8)
 
-TEST_WITH_ARG(TensorOp, testTensorMatrixMultiplyOp, test_tensor_matrix_multiply_op_arg,
+TEST_WITH_ARG(TensorOp, testvxTensorMatrixMultiply, test_tensor_matrix_multiply_op_arg,
         TT_TENSOR_MAD_ALL()
 )
 {
@@ -1403,9 +2261,9 @@
         CT_RNG_INIT(rng, *seed);
     }
 
-    vx_enum data_type;
-    vx_uint8 fixed_point_position;
-    vx_size sizeof_data_type;
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
     ownUnpackFormat(arg_->fmt, &data_type, &fixed_point_position, &sizeof_data_type);
 
     for (int iter = 0; iter < TEST_TENSOR_NUM_ITERATIONS; ++iter)
@@ -1443,11 +2301,11 @@
         const vx_size c_strides[2] = { sizeof_data_type, sizeof_data_type * c_dims[0] };
         const vx_size out_strides[2] = { sizeof_data_type, sizeof_data_type * out_dims[0] };
 
-        void * a_data = malloc(m * n * sizeof_data_type);
-        void * b_data = malloc(n * k * sizeof_data_type);
-        void * c_data = arg_->c_present ? malloc(m * k * sizeof_data_type) : NULL;
-        void * out_data = malloc(m * k * sizeof_data_type);
-        void * ref_data = malloc(m * k * sizeof_data_type);
+        void * a_data = ct_alloc_mem(m * n * sizeof_data_type);
+        void * b_data = ct_alloc_mem(n * k * sizeof_data_type);
+        void * c_data = arg_->c_present ? ct_alloc_mem(m * k * sizeof_data_type) : NULL;
+        void * out_data = ct_alloc_mem(m * k * sizeof_data_type);
+        void * ref_data = ct_alloc_mem(m * k * sizeof_data_type);
         ASSERT(a_data && b_data && (!arg_->c_present || c_data) && out_data && ref_data);
 
         // Since we check the sum of products here, and te accumulator is only
@@ -1537,20 +2395,176 @@
         EXPECT_EQ_PTR(NULL, c_tensor);
         EXPECT_EQ_PTR(NULL, out_tensor);
 
-        free(a_data);
-        free(b_data);
-        free(c_data);
-        free(out_data);
+        ct_free_mem(a_data);
+        ct_free_mem(b_data);
+        ct_free_mem(c_data);
+        ct_free_mem(out_data);
     }
 }
 
+TEST_WITH_ARG(TensorOp, testvxuTensorMatrixMultiply, test_tensor_matrix_multiply_op_arg,
+        TT_TENSOR_MAD_ALL()
+)
+{
+    const vx_context context = context_->vx_context_;
+    vx_size max_dims = 0;
+    {   // TODO: ownTestGetMaxDims() ?
+        VX_CALL(vxQueryContext(context_->vx_context_, VX_CONTEXT_MAX_TENSOR_DIMS, &max_dims, sizeof(max_dims)));
+        ASSERT(max_dims > 3);
+        if(!DEBUG_TEST_TENSOR_BEYOND_FOUR_DIMS) max_dims = 4; else max_dims = MIN(max_dims, MAX_TENSOR_DIMS);
+    }
+
+    uint64_t rng;
+    {   // TODO: ownTestGetRNG() ?
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position= 0;
+    vx_size sizeof_data_type = 0;
+    ownUnpackFormat(arg_->fmt, &data_type, &fixed_point_position, &sizeof_data_type);
+
+    for (int iter = 0; iter < TEST_TENSOR_NUM_ITERATIONS; ++iter)
+    {
+        if (DEBUG_TEST_TENSOR_ENABLE_PRINTF)
+        {
+            printf("iter #: %d\n", iter); fflush(stdout);
+        }
+
+        const vx_size m = (vx_size)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ+1);
+        const vx_size n = (vx_size)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ+1);
+        const vx_size k = (vx_size)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ+1);
+
+        // Not that unlike common GEMM, here we do not update an existing c but
+        // output to a different tensor!
+
+        const vx_size a_dims[2] = { arg_->a_transposed ? m : n, arg_->a_transposed ? n : m };
+        const vx_size b_dims[2] = { arg_->b_transposed ? n : k, arg_->b_transposed ? k : n };
+        const vx_size c_dims[2] = { arg_->c_transposed ? m : k, arg_->c_transposed ? k : m };
+        const vx_size out_dims[2] = { k, m };
+
+        if (DEBUG_TEST_TENSOR_ENABLE_PRINTF)
+        {
+            printf("\tconfig: {\n");
+            printf("\t          a_dims: { %zu, %zu },\n", a_dims[0], a_dims[1]);
+            printf("\t          b_dims: { %zu, %zu },\n", b_dims[0], b_dims[1]);
+            if (arg_->c_present)
+                printf("\t          c_dims: { %zu, %zu },\n", c_dims[0], c_dims[1]);
+            printf("\t          out_dims: { %zu, %zu },\n", out_dims[0], out_dims[1]);
+            printf("\t        }\n");
+        }
+
+        const vx_size a_strides[2] = { sizeof_data_type, sizeof_data_type * a_dims[0] };
+        const vx_size b_strides[2] = { sizeof_data_type, sizeof_data_type * b_dims[0] };
+        const vx_size c_strides[2] = { sizeof_data_type, sizeof_data_type * c_dims[0] };
+        const vx_size out_strides[2] = { sizeof_data_type, sizeof_data_type * out_dims[0] };
+
+        void * a_data = ct_alloc_mem(m * n * sizeof_data_type);
+        void * b_data = ct_alloc_mem(n * k * sizeof_data_type);
+        void * c_data = arg_->c_present ? ct_alloc_mem(m * k * sizeof_data_type) : NULL;
+        void * out_data = ct_alloc_mem(m * k * sizeof_data_type);
+        void * ref_data = ct_alloc_mem(m * k * sizeof_data_type);
+        ASSERT(a_data && b_data && (!arg_->c_present || c_data) && out_data && ref_data);
+
+        // Since we check the sum of products here, and te accumulator is only
+        // supposed to be 32 bits, we need smaller values so that the intermidiate
+        // results don't exceed it.
+        ownFillSmallRandData(arg_->fmt, &rng, m * n, a_dims[0] + 1, a_data);
+        ownFillSmallRandData(arg_->fmt, &rng, n * k, a_dims[0] + 1, b_data);
+        if (arg_->c_present) { ownFillSmallRandData(arg_->fmt, &rng, m * k, a_dims[0] + 1, c_data); }
+
+        vx_tensor a_tensor = vxCreateTensor(context_->vx_context_, 2, a_dims, data_type, fixed_point_position);
+        vx_tensor b_tensor = vxCreateTensor(context_->vx_context_, 2, b_dims, data_type, fixed_point_position);
+        vx_tensor c_tensor = arg_->c_present ? vxCreateTensor(context_->vx_context_, 2, c_dims, data_type, fixed_point_position) : NULL;
+        vx_tensor out_tensor = vxCreateTensor(context_->vx_context_, 2, out_dims, data_type, fixed_point_position);
+
+        ASSERT_VX_OBJECT(a_tensor, VX_TYPE_TENSOR);
+        ASSERT_VX_OBJECT(b_tensor, VX_TYPE_TENSOR);
+        if (arg_->c_present)
+        {
+            ASSERT_VX_OBJECT(c_tensor, VX_TYPE_TENSOR);
+        }
+        ASSERT_VX_OBJECT(out_tensor, VX_TYPE_TENSOR);
+
+        vx_size view_start[2] = { 0, 0 };
+        VX_CALL(vxCopyTensorPatch(a_tensor, 2, view_start, a_dims, a_strides, a_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+        VX_CALL(vxCopyTensorPatch(b_tensor, 2, view_start, b_dims, b_strides, b_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+        if (arg_->c_present)
+        {
+            VX_CALL(vxCopyTensorPatch(c_tensor, 2, view_start, c_dims, c_strides, c_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+        }
+
+        // Create, run vxuTensorMatrixMultiply
+        {
+            vx_tensor_matrix_multiply_params_t params = { arg_->a_transposed, arg_->b_transposed, arg_->c_transposed };
+            VX_CALL(vxuTensorMatrixMultiply(context, a_tensor, b_tensor, c_tensor, &params, out_tensor));
+        }
+
+        {
+            ownTensorMatrixMultiply(
+                    arg_->fmt,
+                    a_data, a_dims, a_strides, arg_->a_transposed,
+                    b_data, b_dims, b_strides, arg_->b_transposed,
+                    c_data, c_dims, c_strides, arg_->c_transposed,
+                    ref_data, out_dims, out_strides);
+
+            VX_CALL(vxCopyTensorPatch(out_tensor, 2, view_start, out_dims, out_strides, out_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+            size_t first_diff_index;
+            size_t first_diff_byte_offset0;
+            size_t first_diff_byte_offset1;
+            if (!ownExpectIdenticalData(
+                        arg_->fmt,
+                        out_data, out_dims, 2, out_strides,
+                        ref_data, out_dims, 2, out_strides,
+                        8, //(arg_->fmt == TT_Q78 ? 1 : 0),
+                        &first_diff_index,
+                        &first_diff_byte_offset0,
+                        &first_diff_byte_offset1))
+            {
+                printf("DIFF! { idx: %zu, out: ", first_diff_index);
+                ownPrettyPrintVal(arg_->fmt, (char*)out_data + first_diff_byte_offset0);
+                printf(", ref: ");
+                ownPrettyPrintVal(arg_->fmt, (char*)ref_data + first_diff_byte_offset1);
+                printf(" }\n");
+
+                if (!DEBUG_TEST_TENSOR_CONTINUE_AFTER_ERROR) ASSERT(0);
+            }
+        }
+
+        VX_CALL(vxReleaseTensor(&a_tensor));
+        VX_CALL(vxReleaseTensor(&b_tensor));
+        if (arg_->c_present) VX_CALL(vxReleaseTensor(&c_tensor));
+        VX_CALL(vxReleaseTensor(&out_tensor));
+        EXPECT_EQ_PTR(NULL, a_tensor);
+        EXPECT_EQ_PTR(NULL, b_tensor);
+        EXPECT_EQ_PTR(NULL, c_tensor);
+        EXPECT_EQ_PTR(NULL, out_tensor);
+
+        ct_free_mem(a_data);
+        ct_free_mem(b_data);
+        ct_free_mem(c_data);
+        ct_free_mem(out_data);
+    }
+}
+
+
 TESTCASE_TESTS(TensorOp,
     /* vx_nodes.h function tests */
-    testTensorElementwiseOp,
-    testTensorTableLookupOp,
-    testTensorTransposeOp,
-    testTensorConvertDepthOp,
-    testTensorMatrixMultiplyOp
+    testvxTensorElementwiseOp,
+    testvxuTensorElementwiseOp,
+    testvxTensorTableLookup,
+    testvxuTensorTableLookup,
+    testvxTensorTranspose,
+    testvxuTensorTranspose,
+    testvxTensorConvertDepth,
+    testvxuTensorConvertDepth,
+    testvxTensorMatrixMultiply,
+    testvxuTensorMatrixMultiply
     /* minigraph tests */
     /*, testTensorOpSanity*/
 )
+
+#endif //OPENVX_USE_ENHANCED_VISION
diff --git a/test_conformance/test_threshold.c b/test_conformance/test_threshold.c
index 778c815..fa0a629 100644
--- a/test_conformance/test_threshold.c
+++ b/test_conformance/test_threshold.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 
 #include <VX/vx.h>
@@ -28,7 +30,8 @@
     uint32_t x, y, width, height, srcstride, dststride;
 
     ASSERT(src && dst);
-    ASSERT((src->format == VX_DF_IMAGE_U8 || src->format == VX_DF_IMAGE_S16) && dst->format == VX_DF_IMAGE_U8);
+    ASSERT((src->format == VX_DF_IMAGE_U8 || src->format == VX_DF_IMAGE_S16) &&
+           (dst->format == VX_DF_IMAGE_U1 || dst->format == VX_DF_IMAGE_U8));
     ASSERT(src->width > 0 && src->height > 0 &&
            src->width == dst->width && src->height == dst->height);
     width = src->width;
@@ -45,16 +48,42 @@
             if( ttype == VX_THRESHOLD_TYPE_BINARY )
             {
                 for( x = 0; x < width; x++ )
-                    dstptr[x] = srcptr[x] > ta ? true_val : false_val;
+                {
+                    uint8_t dst_value = srcptr[x] > ta ? true_val : false_val;
+                    if (dst->format == VX_DF_IMAGE_U1)
+                    {
+                        uint32_t xShftd = x + dst->roi.x % 8;
+                        uint8_t  offset = xShftd % 8;
+                        dst_value = dst_value > 1 ? 1 : dst_value;
+                        dstptr[xShftd / 8] = (dstptr[xShftd / 8] & ~(1 << offset)) | (dst_value << offset);
+                    }
+                    else
+                    {
+                        dstptr[x] = dst_value;
+                    }
+                }
             }
-            else
+            else    // VX_THRESHOLD_TYPE_RANGE
             {
                 for( x = 0; x < width; x++ )
-                    dstptr[x] = srcptr[x] < ta || srcptr[x] > tb ? false_val : true_val;
+                {
+                    uint8_t dst_value = srcptr[x] < ta || srcptr[x] > tb ? false_val : true_val;
+                    if(dst->format == VX_DF_IMAGE_U1)
+                    {
+                        uint32_t xShftd = x + dst->roi.x % 8;
+                        uint8_t  offset = xShftd % 8;
+                        dst_value = dst_value > 1 ? 1 : dst_value;
+                        dstptr[xShftd / 8] = (dstptr[xShftd / 8] & ~(1 << offset)) | (dst_value << offset);
+                    }
+                    else
+                    {
+                        dstptr[x] = dst_value;
+                    }
+                }
             }
         }
     }
-    else
+    else    // src->format == VX_DF_IMAGE_S16
     {
         for( y = 0; y < height; y++ )
         {
@@ -68,52 +97,127 @@
                     dst_value = srcptr[x] > ta ? true_val : false_val;
 
                     dst_value = (dst_value < 0 ? 0 : dst_value);
-                    dst_value = (dst_value > UINT8_MAX ? UINT8_MAX : dst_value);
-                    dstptr[x] = (uint8_t)dst_value;
+                    if(dst->format == VX_DF_IMAGE_U1)
+                    {
+                        uint32_t xShftd = x + dst->roi.x % 8;
+                        uint8_t  offset = xShftd % 8;
+                        dst_value = (dst_value > 1 ? 1 : dst_value);
+                        dstptr[xShftd / 8] = (dstptr[xShftd / 8] & ~(1 << offset)) | ((uint8_t)dst_value << offset);
+                    }
+                    else
+                    {
+                        dst_value = (dst_value > UINT8_MAX ? UINT8_MAX : dst_value);
+                        dstptr[x] = (uint8_t)dst_value;
+                    }
                 }
             }
-            else
+            else    // VX_THRESHOLD_TYPE_RANGE
             {
                 for( x = 0; x < width; x++ )
                 {
                     dst_value = srcptr[x] < ta || srcptr[x] > tb ? false_val : true_val;
 
                     dst_value = (dst_value < 0 ? 0 : dst_value);
-                    dst_value = (dst_value > UINT8_MAX ? UINT8_MAX : dst_value);
-                    dstptr[x] = (uint8_t)dst_value;
+                    if(dst->format == VX_DF_IMAGE_U1)
+                    {
+                        uint32_t xShftd = x + dst->roi.x % 8;
+                        uint8_t  offset = xShftd % 8;
+                        dst_value = (dst_value > 1 ? 1 : dst_value);
+                        dstptr[xShftd / 8] = (dstptr[xShftd / 8] & ~(1 << offset)) | ((uint8_t)dst_value << offset);
+                    }
+                    else
+                    {
+                        dst_value = (dst_value > UINT8_MAX ? UINT8_MAX : dst_value);
+                        dstptr[x] = (uint8_t)dst_value;
+                    }
                 }
             }
         }
     }
 }
 
-
 TESTCASE(Threshold, CT_VXContext, ct_setup_vx_context, 0)
 
+#define CT_THRESHOLD_TRUE_VALUE  255
+#define CT_THRESHOLD_FALSE_VALUE 0
+
+typedef struct {
+    const char* name;
+    vx_enum src_type;
+    vx_enum dst_type;
+} threshold_create_arg;
+
+#define THRESHOLD_CREATE(src_type, dst_type)    {        #src_type "/" #dst_type, VX_DF_IMAGE_##src_type,  VX_DF_IMAGE_##dst_type}
+#define THRESHOLD_CREATE_U1(src_type, dst_type) {"_U1_/" #src_type "/" #dst_type, VX_DF_IMAGE_##src_type,  VX_DF_IMAGE_##dst_type}
+
+TEST_WITH_ARG(Threshold, testThresholdCreation, threshold_create_arg,
+              THRESHOLD_CREATE(U8, U8),
+              THRESHOLD_CREATE(S16, U8),
+              THRESHOLD_CREATE_U1(U8, U1),
+              )
+{
+    vx_context context = context_->vx_context_;
+    vx_enum thresh_type = VX_THRESHOLD_TYPE_BINARY;
+
+    vx_threshold threshold;
+    ASSERT_VX_OBJECT(threshold = vxCreateThresholdForImage(context, thresh_type, arg_->src_type, arg_->dst_type), VX_TYPE_THRESHOLD);
+
+    vx_df_image input_type, output_type;
+    vxQueryThreshold(threshold,  VX_THRESHOLD_INPUT_FORMAT, &input_type, sizeof(vx_df_image));
+    vxQueryThreshold(threshold,  VX_THRESHOLD_OUTPUT_FORMAT, &output_type, sizeof(vx_df_image));
+    ASSERT_EQ_INT(arg_->src_type, input_type);
+    ASSERT_EQ_INT(arg_->dst_type, output_type);
+
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxReleaseThreshold(&threshold));
+}
+
+TEST_WITH_ARG(Threshold, testVirtualThresholdCreation, threshold_create_arg,
+              THRESHOLD_CREATE(U8, U8),
+              THRESHOLD_CREATE(S16, U8),
+              THRESHOLD_CREATE_U1(U8, U1),
+              )
+{
+    vx_context context = context_->vx_context_;
+    vx_enum thresh_type = VX_THRESHOLD_TYPE_BINARY;
+    vx_graph graph = vxCreateGraph(context);
+
+    vx_threshold threshold;
+    ASSERT_VX_OBJECT(threshold = vxCreateVirtualThresholdForImage(graph, thresh_type, arg_->src_type, arg_->dst_type), VX_TYPE_THRESHOLD);
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxReleaseThreshold(&threshold));
+
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxReleaseGraph(&graph));
+}
+
 typedef struct {
     const char* name;
     int mode;
     vx_enum ttype;
-    int format;
+    int in_format;
+    int out_format;
 } format_arg;
 
-#define THRESHOLD_CASE(imm, ttype, format) { #imm "/" #ttype "/" #format "/U8", CT_##imm##_MODE, VX_THRESHOLD_TYPE_##ttype, VX_DF_IMAGE_##format}
-
-#define CT_THRESHOLD_TRUE_VALUE  255
-#define CT_THRESHOLD_FALSE_VALUE 0
+#define THRESHOLD_CASE(imm, ttype, in_format, out_format)    {        #imm "/" #ttype "/" #in_format "/" #out_format, \
+    CT_##imm##_MODE, VX_THRESHOLD_TYPE_##ttype, VX_DF_IMAGE_##in_format, VX_DF_IMAGE_##out_format}
+#define THRESHOLD_CASE_U1(imm, ttype, in_format, out_format) {"_U1_/" #imm "/" #ttype "/" #in_format "/" #out_format, \
+    CT_##imm##_MODE, VX_THRESHOLD_TYPE_##ttype, VX_DF_IMAGE_##in_format, VX_DF_IMAGE_##out_format}
 
 TEST_WITH_ARG(Threshold, testOnRandom, format_arg,
-              THRESHOLD_CASE(Immediate, BINARY, U8),
-              THRESHOLD_CASE(Immediate, BINARY, S16),
-              THRESHOLD_CASE(Immediate, RANGE, U8),
-              THRESHOLD_CASE(Immediate, RANGE, S16),
-              THRESHOLD_CASE(Graph, BINARY, U8),
-              THRESHOLD_CASE(Graph, BINARY, S16),
-              THRESHOLD_CASE(Graph, RANGE, U8),
-              THRESHOLD_CASE(Graph, RANGE, S16),
+              THRESHOLD_CASE(Immediate, BINARY,  U8, U8),
+              THRESHOLD_CASE(Immediate, BINARY, S16, U8),
+              THRESHOLD_CASE(Immediate, RANGE,   U8, U8),
+              THRESHOLD_CASE(Immediate, RANGE,  S16, U8),
+              THRESHOLD_CASE(Graph, BINARY,  U8, U8),
+              THRESHOLD_CASE(Graph, BINARY, S16, U8),
+              THRESHOLD_CASE(Graph, RANGE,   U8, U8),
+              THRESHOLD_CASE(Graph, RANGE,  S16, U8),
+              THRESHOLD_CASE_U1(Immediate, BINARY, U8, U1),
+              THRESHOLD_CASE_U1(Immediate, RANGE,  U8, U1),
+              THRESHOLD_CASE_U1(Graph, BINARY, U8, U1),
+              THRESHOLD_CASE_U1(Graph, RANGE,  U8, U1),
               )
 {
-    int format = arg_->format;
+    int in_format = arg_->in_format;
+    int out_format = arg_->out_format;
     int ttype = arg_->ttype;
     int mode = arg_->mode;
     vx_image src, dst;
@@ -139,10 +243,15 @@
 
         if( ct_check_any_size() )
         {
-            width = ct_roundf(ct_log_rng(&rng, 0, 10));
+            width  = ct_roundf(ct_log_rng(&rng, 0, 10));
             height = ct_roundf(ct_log_rng(&rng, 0, 10));
-            width = CT_MAX(width, 1);
+            width  = CT_MAX(width, 1);
             height = CT_MAX(height, 1);
+
+            if (in_format == VX_DF_IMAGE_U1 || out_format == VX_DF_IMAGE_U1)
+            {
+                width = ((width + 7) / 8) * 8;      // Width must be multiple of 8 for U1 images
+            }
         }
         else
         {
@@ -152,19 +261,19 @@
 
         ct_update_progress(iter, niters);
 
-        ASSERT_NO_FAILURE(src0 = ct_allocate_ct_image_random(width, height, format, &rng, a, b));
+        ASSERT_NO_FAILURE(src0 = ct_allocate_ct_image_random(width, height, in_format, &rng, a, b));
         if( iter % 20 == 0 )
         {
             uint8_t val = (uint8_t)CT_RNG_NEXT_INT(rng, a, b);
             ct_memset(src0->data.y, val, ct_stride_bytes(src0)*src0->height);
         }
-        ASSERT_NO_FAILURE(dst0 = ct_allocate_image(width, height, VX_DF_IMAGE_U8));
+        ASSERT_NO_FAILURE(dst0 = ct_allocate_image(width, height, out_format));
         ASSERT_NO_FAILURE(reference_threshold(src0, dst0, ttype, ta, tb, true_val, false_val));
 
         src = ct_image_to_vx_image(src0, context);
-        dst = vxCreateImage(context, width, height, VX_DF_IMAGE_U8);
+        dst = vxCreateImage(context, width, height, out_format);
         ASSERT_VX_OBJECT(dst, VX_TYPE_IMAGE);
-        vxt = vxCreateThresholdForImage(context,  ttype, format, VX_DF_IMAGE_U8);
+        vxt = vxCreateThresholdForImage(context, ttype, in_format, out_format);
         if( ttype == VX_THRESHOLD_TYPE_BINARY )
         {
               vx_pixel_value_t pa;
@@ -224,53 +333,131 @@
     }
 }
 
-
 typedef struct {
     const char* name;
-    vx_enum src_type;
-    vx_enum dst_type;
-} threshold_create_arg;
+    vx_enum ttype;
+    int in_format;
+    int out_format;
+    vx_uint32 shrink_sz;
+} ValidRegionTest_Arg;
 
-#define THRESHOLD_CREATE(src_type, dst_type) {#src_type "/" #dst_type, VX_DF_IMAGE_##src_type,  VX_DF_IMAGE_##dst_type}
+#define THRESHOLD_REGION_CASE(ttype, in_format, out_format, shrink_sz) { #ttype "/" #in_format "/" #out_format, VX_THRESHOLD_TYPE_##ttype, VX_DF_IMAGE_##in_format, VX_DF_IMAGE_##out_format, shrink_sz}
+#define THRESHOLD_REGION_CASE_U1(ttype, in_format, out_format, shrink_sz) {"_U1_/" #ttype "/" #in_format "/" #out_format, VX_THRESHOLD_TYPE_##ttype, VX_DF_IMAGE_##in_format, VX_DF_IMAGE_##out_format, shrink_sz}
 
-TEST_WITH_ARG(Threshold, testThresholdCreation, threshold_create_arg,
-              THRESHOLD_CREATE(U8, U8),
-              THRESHOLD_CREATE(S16, U8),
+TEST_WITH_ARG(Threshold, testWithValidRegion, ValidRegionTest_Arg,
+              THRESHOLD_REGION_CASE(BINARY,  U8, U8, 1),
+              THRESHOLD_REGION_CASE(BINARY, S16, U8, 1),
+              THRESHOLD_REGION_CASE(RANGE,   U8, U8, 1),
+              THRESHOLD_REGION_CASE(RANGE,  S16, U8, 1),
+              THRESHOLD_REGION_CASE(BINARY,  U8, U8, 7),
+              THRESHOLD_REGION_CASE(BINARY, S16, U8, 7),
+              THRESHOLD_REGION_CASE(RANGE,   U8, U8, 7),
+              THRESHOLD_REGION_CASE(RANGE,  S16, U8, 7),
+              THRESHOLD_REGION_CASE_U1(BINARY, U8, U1, 1),
+              THRESHOLD_REGION_CASE_U1(RANGE,  U8, U1, 1),
+              THRESHOLD_REGION_CASE_U1(BINARY, U8, U1, 7),
+              THRESHOLD_REGION_CASE_U1(RANGE,  U8, U1, 7),
               )
 {
+    int in_format = arg_->in_format;
+    int out_format = arg_->out_format;
+    int ttype = arg_->ttype;
+    vx_image src, dst;
+    vx_threshold vxt;
+    CT_Image src0, dst0, dst1;
     vx_context context = context_->vx_context_;
-    vx_enum thresh_type = VX_THRESHOLD_TYPE_BINARY;
+    int iter, niters = 100;
+    uint64_t rng;
+    int a = 0, b = 256;
+    int true_val = CT_THRESHOLD_TRUE_VALUE;
+    int false_val = CT_THRESHOLD_FALSE_VALUE;
+    vx_uint32 region_shrink = arg_->shrink_sz;
+    vx_rectangle_t rect;
 
-    vx_threshold threshold;
-    ASSERT_VX_OBJECT(threshold = vxCreateThresholdForImage(context, thresh_type, arg_->src_type, arg_->dst_type), VX_TYPE_THRESHOLD);
+    rng = CT()->seed_;
 
-    vx_df_image input_type, output_type;
-    vxQueryThreshold(threshold,  VX_THRESHOLD_INPUT_FORMAT, &input_type, sizeof(vx_df_image));
-    vxQueryThreshold(threshold,  VX_THRESHOLD_OUTPUT_FORMAT, &output_type, sizeof(vx_df_image));
-    ASSERT_EQ_INT(arg_->src_type, input_type);
-    ASSERT_EQ_INT(arg_->dst_type, output_type);
+    for( iter = 0; iter < niters; iter++ )
+    {
+        int width, height;
 
-    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxReleaseThreshold(&threshold));
-}
+        uint8_t _ta = CT_RNG_NEXT_INT(rng, 0, 256), _tb = CT_RNG_NEXT_INT(rng, 0, 256);
+        vx_int32 ta = CT_MIN(_ta, _tb), tb = CT_MAX(_ta, _tb);
 
-TEST_WITH_ARG(Threshold, testVirtualThresholdCreation, threshold_create_arg,
-              THRESHOLD_CREATE(U8, U8),
-              THRESHOLD_CREATE(S16, U8),
-              )
-{
-    vx_context context = context_->vx_context_;
-    vx_enum thresh_type = VX_THRESHOLD_TYPE_BINARY;
-    vx_graph graph = vxCreateGraph(context);
+        if( ct_check_any_size() )
+        {
+            width  = ct_roundf(ct_log_rng(&rng, 0, 10));
+            height = ct_roundf(ct_log_rng(&rng, 0, 10));
+            width  = CT_MAX(width, 15);             // Max region shrink is 7 on each side -> minimum size is 15
+            height = CT_MAX(height, 15);
 
-    vx_threshold threshold;
-    ASSERT_VX_OBJECT(threshold = vxCreateVirtualThresholdForImage(graph, thresh_type, arg_->src_type, arg_->dst_type), VX_TYPE_THRESHOLD);
-    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxReleaseThreshold(&threshold));
+            if (in_format == VX_DF_IMAGE_U1 || out_format == VX_DF_IMAGE_U1)
+            {
+                width = ((width + 7) / 8) * 8;      // Width must be multiple of 8 for U1 images
+            }
+        }
+        else
+        {
+            width = 640;
+            height = 480;
+        }
 
-    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxReleaseGraph(&graph));
+        ct_update_progress(iter, niters);
+
+        ASSERT_NO_FAILURE(src0 = ct_allocate_ct_image_random(width, height, in_format, &rng, a, b));
+        if( iter % 20 == 0 )
+        {
+            uint8_t val = (uint8_t)CT_RNG_NEXT_INT(rng, a, b);
+            ct_memset(src0->data.y, val, ct_stride_bytes(src0)*src0->height);
+        }
+
+        ASSERT_VX_OBJECT(src = ct_image_to_vx_image(src0, context), VX_TYPE_IMAGE);
+        ASSERT_VX_OBJECT(dst = vxCreateImage(context, width, height, out_format), VX_TYPE_IMAGE);
+        vxt = vxCreateThresholdForImage(context, ttype, in_format, out_format);
+        if( ttype == VX_THRESHOLD_TYPE_BINARY )
+        {
+            vx_pixel_value_t pa;
+            pa.S32 = ta;
+            ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxCopyThresholdValue(vxt, &pa, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+        }
+        else
+        {
+            vx_pixel_value_t pa, pb;
+            pa.S32 = ta;
+            pb.S32 = tb;
+            ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxCopyThresholdRange(vxt, &pa, &pb, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+        }
+
+        vx_pixel_value_t ptrue, pfalse;
+        ptrue.S32 = true_val;
+        pfalse.S32 = false_val;
+        ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxCopyThresholdOutput(vxt, &ptrue, &pfalse, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+
+        ASSERT_NO_FAILURE(vxGetValidRegionImage(src, &rect));
+        ALTERRECTANGLE(rect, region_shrink, region_shrink, -region_shrink, -region_shrink);
+        ASSERT_NO_FAILURE(vxSetImageValidRectangle(src, &rect));
+
+        ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxuThreshold(context, src, vxt, dst));
+
+        ASSERT_NO_FAILURE(ct_adjust_roi(src0, region_shrink, region_shrink, region_shrink, region_shrink));
+        ASSERT_NO_FAILURE(dst0 = ct_allocate_image(src0->width, src0->height, out_format));
+        ASSERT_NO_FAILURE(reference_threshold(src0, dst0, ttype, ta, tb, true_val, false_val));
+
+        ASSERT_NO_FAILURE(dst1 = ct_image_from_vx_image(dst));
+        ASSERT_NO_FAILURE(ct_adjust_roi(dst1, region_shrink, region_shrink, region_shrink, region_shrink));
+
+        ASSERT_CTIMAGE_NEAR(dst0, dst1, 0);
+        VX_CALL(vxReleaseImage(&src));
+        VX_CALL(vxReleaseImage(&dst));
+        VX_CALL(vxReleaseThreshold(&vxt));
+        CT_CollectGarbage(CT_GC_IMAGE);
+    }
 }
 
 TESTCASE_TESTS(Threshold,
                testThresholdCreation,
                testVirtualThresholdCreation,
-               testOnRandom
+               testOnRandom,
+               testWithValidRegion
                )
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_user_data_object.c b/test_conformance/test_user_data_object.c
new file mode 100644
index 0000000..5797cdd
--- /dev/null
+++ b/test_conformance/test_user_data_object.c
@@ -0,0 +1,817 @@
+/*
+
+ * Copyright (c) 2012-2018 The Khronos Group Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef OPENVX_USE_USER_DATA_OBJECT
+
+#include "test_engine/test.h"
+#include <VX/vx.h>
+#include <VX/vxu.h>
+#include <VX/vx_khr_user_data_object.h>
+
+#define VX_KERNEL_CONFORMANCE_TEST_OWN_BAD (VX_KERNEL_BASE(VX_ID_DEFAULT, 0) + 0)
+#define VX_KERNEL_CONFORMANCE_TEST_OWN_BAD_NAME "org.khronos.openvx.test.own_bad"
+
+#define VX_KERNEL_CONFORMANCE_TEST_OWN (VX_KERNEL_BASE(VX_ID_DEFAULT, 0) + 1)
+#define VX_KERNEL_CONFORMANCE_TEST_OWN_NAME "org.khronos.openvx.test.own"
+
+#define VX_KERNEL_CONFORMANCE_TEST_OWN_USER (VX_KERNEL_BASE(VX_ID_DEFAULT, 0) + 2)
+#define VX_KERNEL_CONFORMANCE_TEST_OWN_USER_NAME "org.khronos.openvx.test.own_user"
+
+TESTCASE(UserDataObject, CT_VXContext, ct_setup_vx_context, 0)
+
+typedef enum _own_params_e
+{
+    OWN_PARAM_INPUT = 0,
+    OWN_PARAM_OUTPUT,
+} own_params_e;
+
+static enum vx_type_e type = VX_TYPE_INVALID;
+static enum vx_type_e objarray_itemtype = VX_TYPE_INVALID;
+
+static vx_size local_size = 0;
+static vx_bool is_kernel_alloc = vx_false_e;
+static vx_size local_size_auto_alloc = 0;
+static vx_size local_size_kernel_alloc = 0;
+
+static vx_status set_local_size_status_init = VX_SUCCESS;
+static vx_status set_local_ptr_status_init = VX_SUCCESS;
+
+static vx_status query_local_size_status_deinit = VX_SUCCESS;
+static vx_status query_local_ptr_status_deinit = VX_SUCCESS;
+static vx_status set_local_size_status_deinit = VX_SUCCESS;
+static vx_status set_local_ptr_status_deinit = VX_SUCCESS;
+
+static const vx_char user_data_object_name[] = "wb_t";
+
+typedef struct
+{
+    vx_int32 mode;
+    vx_int32 gain[4];
+    vx_int32 offset[4];
+} wb_t;
+
+static vx_bool is_validator_called = vx_false_e;
+static vx_status VX_CALLBACK own_ValidatorMetaFromRef(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[])
+{
+    is_validator_called = vx_true_e;
+    ASSERT_VX_OBJECT_(return VX_FAILURE, node, VX_TYPE_NODE);
+
+    vx_reference input = parameters[OWN_PARAM_INPUT];
+    ASSERT_VX_OBJECT_(return VX_FAILURE, input, type);
+    vx_reference output = parameters[OWN_PARAM_OUTPUT];
+    ASSERT_VX_OBJECT_(return VX_FAILURE, output, type);
+
+    vx_meta_format meta = metas[OWN_PARAM_OUTPUT];
+
+    vx_enum in_ref_type = VX_TYPE_INVALID;
+    VX_CALL_(return VX_ERROR_INVALID_PARAMETERS, vxQueryReference(input, VX_REFERENCE_TYPE, &in_ref_type, sizeof(vx_enum)));
+    vx_enum out_ref_type = VX_TYPE_INVALID;
+    VX_CALL_(return VX_ERROR_INVALID_PARAMETERS, vxQueryReference(output, VX_REFERENCE_TYPE, &out_ref_type, sizeof(vx_enum)));
+
+    if (in_ref_type == out_ref_type)
+    {
+        vx_enum item_type = (type == VX_TYPE_OBJECT_ARRAY) ? objarray_itemtype : VX_TYPE_UINT8;
+        vx_size capacity = 20;
+
+        vx_enum actual_item_type = VX_TYPE_INVALID;
+        vx_size actual_capacity = 0;
+        switch (type)
+        {
+        case VX_TYPE_OBJECT_ARRAY:
+            VX_CALL_(return VX_FAILURE, vxQueryObjectArray((vx_object_array)input, VX_OBJECT_ARRAY_ITEMTYPE, &actual_item_type, sizeof(vx_enum)));
+            VX_CALL_(return VX_FAILURE, vxQueryObjectArray((vx_object_array)input, VX_OBJECT_ARRAY_NUMITEMS, &actual_capacity, sizeof(vx_size)));
+
+            if (actual_item_type == item_type && actual_capacity == capacity)
+            {
+                VX_CALL_(return VX_FAILURE, vxSetMetaFormatFromReference(meta, input));
+            }
+            else
+            {
+                return VX_ERROR_INVALID_PARAMETERS;
+            }
+            break;
+        case VX_TYPE_USER_DATA_OBJECT:
+            {
+                char actual_name[VX_REFERENCE_NAME];
+                vx_size actual_size;
+
+                VX_CALL_(return VX_FAILURE, vxQueryUserDataObject((vx_user_data_object)input, VX_USER_DATA_OBJECT_NAME, &actual_name, sizeof(actual_name)));
+                VX_CALL_(return VX_FAILURE, vxQueryUserDataObject((vx_user_data_object)input, VX_USER_DATA_OBJECT_SIZE, &actual_size, sizeof(vx_size)));
+
+                if ((strcmp(user_data_object_name, actual_name) == 0) && (actual_size == sizeof(wb_t)))
+                {
+                    VX_CALL_(return VX_FAILURE, vxSetMetaFormatFromReference(meta, input));
+                }
+                else
+                {
+                    return VX_ERROR_INVALID_PARAMETERS;
+                }
+            }
+            break;
+        default:
+            return VX_ERROR_INVALID_PARAMETERS;
+            break;
+        }
+
+    }
+
+    return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK own_ValidatorMetaFromAttr(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[])
+{
+    is_validator_called = vx_true_e;
+    ASSERT_VX_OBJECT_(return VX_FAILURE, node, VX_TYPE_NODE);
+
+    vx_reference input = parameters[OWN_PARAM_INPUT];
+
+    vx_meta_format meta = metas[OWN_PARAM_OUTPUT];
+
+    vx_enum item_type = (type == VX_TYPE_OBJECT_ARRAY) ? objarray_itemtype : VX_TYPE_UINT8;
+    vx_size capacity = 20;
+
+    vx_enum actual_item_type = VX_TYPE_INVALID;
+    vx_size actual_capacity = 0;
+    switch (type)
+    {
+
+    case VX_TYPE_OBJECT_ARRAY:
+        VX_CALL_(return VX_FAILURE, vxQueryObjectArray((vx_object_array)input, VX_OBJECT_ARRAY_ITEMTYPE, &actual_item_type, sizeof(vx_enum)));
+        VX_CALL_(return VX_FAILURE, vxQueryObjectArray((vx_object_array)input, VX_OBJECT_ARRAY_NUMITEMS, &actual_capacity, sizeof(vx_size)));
+
+        if (actual_item_type == item_type && actual_capacity == capacity)
+        {
+            VX_CALL_(return VX_FAILURE, vxSetMetaFormatAttribute(meta, VX_OBJECT_ARRAY_ITEMTYPE, &item_type, sizeof(vx_enum)));
+            VX_CALL_(return VX_FAILURE, vxSetMetaFormatAttribute(meta, VX_OBJECT_ARRAY_NUMITEMS, &capacity, sizeof(vx_size)));
+        }
+        else
+        {
+            return VX_ERROR_INVALID_PARAMETERS;
+        }
+        break;
+    case VX_TYPE_USER_DATA_OBJECT:
+        {
+            vx_size actual_size;
+            vx_size user_data_size = sizeof(wb_t);
+            char actual_name[VX_REFERENCE_NAME];
+
+            VX_CALL_(return VX_FAILURE, vxQueryUserDataObject((vx_user_data_object)input, VX_USER_DATA_OBJECT_NAME, &actual_name, sizeof(actual_name)));
+            VX_CALL_(return VX_FAILURE, vxQueryUserDataObject((vx_user_data_object)input, VX_USER_DATA_OBJECT_SIZE, &actual_size, sizeof(vx_size)));
+
+            if ((strcmp(user_data_object_name, actual_name) == 0) && (actual_size == sizeof(wb_t)))
+            {
+                VX_CALL_(return VX_FAILURE, vxSetMetaFormatAttribute(meta, VX_USER_DATA_OBJECT_NAME, &user_data_object_name, sizeof(user_data_object_name)));
+                VX_CALL_(return VX_FAILURE, vxSetMetaFormatAttribute(meta, VX_USER_DATA_OBJECT_SIZE, &user_data_size, sizeof(vx_size)));
+            }
+            else
+            {
+                return VX_ERROR_INVALID_PARAMETERS;
+            }
+        }
+        break;
+    default:
+        return VX_ERROR_INVALID_PARAMETERS;
+        break;
+    }
+
+    return VX_SUCCESS;
+}
+
+static vx_bool is_kernel_called = vx_false_e;
+static vx_status VX_CALLBACK own_Kernel(vx_node node, const vx_reference *parameters, vx_uint32 num)
+{
+    is_kernel_called = vx_true_e;
+    ASSERT_VX_OBJECT_(return VX_FAILURE, node, VX_TYPE_NODE);
+    EXPECT(parameters != NULL);
+    EXPECT(num == 2);
+    if (parameters != NULL && num == 2)
+    {
+        EXPECT_VX_OBJECT(parameters[0], type);
+        EXPECT_VX_OBJECT(parameters[1], type);
+    }
+
+    return VX_SUCCESS;
+}
+
+static vx_bool is_initialize_called = vx_false_e;
+static vx_status VX_CALLBACK own_Initialize(vx_node node, const vx_reference *parameters, vx_uint32 num)
+{
+    vx_size size = 0;
+    void* ptr = NULL;
+    is_initialize_called = vx_true_e;
+    ASSERT_VX_OBJECT_(return VX_FAILURE, node, VX_TYPE_NODE);
+    EXPECT(parameters != NULL);
+    EXPECT(num == 2);
+    if (parameters != NULL && num == 2)
+    {
+        EXPECT_VX_OBJECT(parameters[0], type);
+        EXPECT_VX_OBJECT(parameters[1], type);
+    }
+    if (local_size_kernel_alloc > 0)
+    {
+        size = local_size_kernel_alloc;
+        ptr = ct_calloc(1, local_size_kernel_alloc);
+    }
+    set_local_size_status_init = vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_SIZE, &size, sizeof(size));
+    set_local_ptr_status_init = vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &ptr, sizeof(ptr));
+    return VX_SUCCESS;
+}
+
+static vx_bool is_deinitialize_called = vx_false_e;
+static vx_status VX_CALLBACK own_Deinitialize(vx_node node, const vx_reference *parameters, vx_uint32 num)
+{
+    vx_size size = 0;
+    void* ptr = NULL;
+    is_deinitialize_called = vx_true_e;
+    EXPECT(node != 0);
+    EXPECT(parameters != NULL);
+    EXPECT(num == 2);
+    if (parameters != NULL && num == 2)
+    {
+        EXPECT_VX_OBJECT(parameters[0], type);
+        EXPECT_VX_OBJECT(parameters[1], type);
+    }
+    query_local_size_status_deinit = vxQueryNode(node, VX_NODE_LOCAL_DATA_SIZE, &size, sizeof(size));
+    query_local_ptr_status_deinit = vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &ptr, sizeof(ptr));
+    if (local_size_kernel_alloc > 0)
+    {
+        size = 0;
+        if (ptr != NULL)
+        {
+            ct_free_mem(ptr);
+            ptr = NULL;
+        }
+    }
+    set_local_size_status_deinit = vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_SIZE, &size, sizeof(size));
+    set_local_ptr_status_deinit = vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &ptr, sizeof(ptr));
+    return VX_SUCCESS;
+}
+
+static void own_register_kernel(vx_context context, vx_bool is_meta_from_ref)
+{
+    vx_kernel kernel = 0;
+    vx_size size = local_size_auto_alloc;
+
+    if (is_meta_from_ref)
+    {
+        ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+            context,
+            VX_KERNEL_CONFORMANCE_TEST_OWN_USER_NAME,
+            VX_KERNEL_CONFORMANCE_TEST_OWN_USER,
+            own_Kernel,
+            2,
+            own_ValidatorMetaFromRef,
+            own_Initialize,
+            own_Deinitialize), VX_TYPE_KERNEL);
+    }
+    else
+    {
+        ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+            context,
+            VX_KERNEL_CONFORMANCE_TEST_OWN_USER_NAME,
+            VX_KERNEL_CONFORMANCE_TEST_OWN_USER,
+            own_Kernel,
+            2,
+            own_ValidatorMetaFromAttr,
+            own_Initialize,
+            own_Deinitialize), VX_TYPE_KERNEL);
+    }
+
+    VX_CALL(vxAddParameterToKernel(kernel, OWN_PARAM_INPUT, VX_INPUT, type, VX_PARAMETER_STATE_REQUIRED));
+    {
+        vx_parameter parameter = 0;
+        vx_enum direction = 0;
+        ASSERT_VX_OBJECT(parameter = vxGetKernelParameterByIndex(kernel, OWN_PARAM_INPUT), VX_TYPE_PARAMETER);
+        VX_CALL(vxQueryParameter(parameter, VX_PARAMETER_DIRECTION, &direction, sizeof(direction)));
+        ASSERT(direction == VX_INPUT);
+        VX_CALL(vxReleaseParameter(&parameter));
+    }
+    VX_CALL(vxAddParameterToKernel(kernel, OWN_PARAM_OUTPUT, VX_OUTPUT, type, VX_PARAMETER_STATE_REQUIRED));
+    {
+        vx_parameter parameter = 0;
+        vx_enum direction = 0;
+        ASSERT_VX_OBJECT(parameter = vxGetKernelParameterByIndex(kernel, OWN_PARAM_OUTPUT), VX_TYPE_PARAMETER);
+        VX_CALL(vxQueryParameter(parameter, VX_PARAMETER_DIRECTION, &direction, sizeof(direction)));
+        ASSERT(direction == VX_OUTPUT);
+        VX_CALL(vxReleaseParameter(&parameter));
+    }
+    VX_CALL(vxSetKernelAttribute(kernel, VX_KERNEL_LOCAL_DATA_SIZE, &size, sizeof(size)));
+    VX_CALL(vxFinalizeKernel(kernel));
+    VX_CALL(vxReleaseKernel(&kernel));
+}
+
+typedef struct {
+    const char* name;
+    vx_enum type;
+    vx_bool is_meta_from_ref;
+    vx_size local_size;
+    vx_bool is_kernel_alloc;
+} type_arg;
+
+#define ADD_TYPE(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "USER_DATA_OBJECT", __VA_ARGS__, VX_TYPE_USER_DATA_OBJECT)) \
+
+#define ADD_FROM_FLAG(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "_FROM_REF", __VA_ARGS__, vx_true_e)), \
+    CT_EXPAND(nextmacro(testArgName "_FROM_ATTR", __VA_ARGS__, vx_false_e))
+
+#define ADD_LOCAL_SIZE_AND_ALLOC(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/LOCAL_SIZE=0", __VA_ARGS__, 0, vx_false_e)), \
+    CT_EXPAND(nextmacro(testArgName "/LOCAL_SIZE=10/ALLOC=AUTO", __VA_ARGS__, 10, vx_false_e)), \
+    CT_EXPAND(nextmacro(testArgName "/LOCAL_SIZE=10/ALLOC=KERNEL", __VA_ARGS__, 10, vx_true_e))
+
+#define USERKERNEL_PARAMETERS \
+    CT_GENERATE_PARAMETERS("", ADD_TYPE, ADD_FROM_FLAG, ADD_LOCAL_SIZE_AND_ALLOC, ARG)
+
+TEST_WITH_ARG(UserDataObject, testUserKernel, type_arg, USERKERNEL_PARAMETERS)
+{
+    vx_context context = context_->vx_context_;
+    vx_reference src = 0, dst = 0;
+    vx_graph graph = 0;
+    vx_kernel user_kernel = 0;
+    vx_node node = 0;
+    vx_bool is_meta_from_ref = arg_->is_meta_from_ref;
+
+    int phase = 0;
+
+    type = (enum vx_type_e)arg_->type;
+    local_size = arg_->local_size;
+    is_kernel_alloc = arg_->is_kernel_alloc;
+
+    if (is_kernel_alloc == vx_false_e)
+    {
+        local_size_auto_alloc = local_size;
+        local_size_kernel_alloc = 0;
+    }
+    else
+    {
+        local_size_auto_alloc = 0;
+        local_size_kernel_alloc = local_size;
+    }
+
+    is_validator_called = vx_false_e;
+    is_kernel_called = vx_false_e;
+    is_initialize_called = vx_false_e;
+    is_deinitialize_called = vx_false_e;
+
+    switch (type)
+    {
+
+    case VX_TYPE_USER_DATA_OBJECT:
+        {
+            ASSERT_VX_OBJECT(src = (vx_reference)vxCreateUserDataObject(context, (const vx_char*)&user_data_object_name, sizeof(wb_t), NULL), type);
+            ASSERT_VX_OBJECT(dst = (vx_reference)vxCreateUserDataObject(context, (const vx_char*)&user_data_object_name, sizeof(wb_t), NULL), type);
+        }
+        break;
+
+    default:
+        break;
+    }
+
+    ASSERT_NO_FAILURE(own_register_kernel(context, is_meta_from_ref));
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    ASSERT_VX_OBJECT(user_kernel = vxGetKernelByName(context, VX_KERNEL_CONFORMANCE_TEST_OWN_USER_NAME), VX_TYPE_KERNEL);
+    ASSERT_VX_OBJECT(node = vxCreateGenericNode(graph, user_kernel), VX_TYPE_NODE);
+
+    VX_CALL(vxSetParameterByIndex(node, 0, (vx_reference)src));
+    VX_CALL(vxSetParameterByIndex(node, 1, (vx_reference)dst));
+
+    // graph verification, first-time, and re-verify
+
+    for (phase = 0; phase < 2; ++phase)
+    {
+        vx_size size = 0;
+        void* ptr = NULL;
+
+        is_initialize_called = vx_false_e;
+        is_deinitialize_called = vx_false_e;
+        is_validator_called = vx_false_e;
+        set_local_size_status_init = VX_FAILURE;
+        set_local_ptr_status_init = VX_FAILURE;
+        query_local_size_status_deinit = VX_FAILURE;
+        query_local_ptr_status_deinit = VX_FAILURE;
+        set_local_size_status_deinit = VX_FAILURE;
+        set_local_ptr_status_deinit = VX_FAILURE;
+
+        VX_CALL(vxVerifyGraph(graph));
+
+        ASSERT(is_initialize_called == vx_true_e);
+        if (phase == 0)
+            ASSERT(is_deinitialize_called == vx_false_e);
+        else
+            ASSERT(is_deinitialize_called == vx_true_e);
+        ASSERT(is_validator_called == vx_true_e);
+
+        VX_CALL(vxQueryNode(node, VX_NODE_LOCAL_DATA_SIZE, &size, sizeof(size)));
+        VX_CALL(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &ptr, sizeof(ptr)));
+
+        ASSERT(VX_SUCCESS != vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_SIZE, &size, sizeof(size)));
+        ASSERT(VX_SUCCESS != vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &ptr, sizeof(ptr)));
+
+        ASSERT(size == local_size);
+        if (size > 0)
+            ASSERT(ptr != NULL);
+        else
+            ASSERT(ptr == NULL);
+
+        if (local_size_auto_alloc == 0) // change allowed
+        {
+            ASSERT(set_local_size_status_init == VX_SUCCESS);
+            ASSERT(set_local_ptr_status_init == VX_SUCCESS);
+            if (is_deinitialize_called)
+            {
+                ASSERT(set_local_size_status_deinit == VX_SUCCESS);
+                ASSERT(set_local_ptr_status_deinit == VX_SUCCESS);
+            }
+        }
+        else // change does not allowed: error must be generated
+        {
+            ASSERT(set_local_size_status_init != VX_SUCCESS);
+            ASSERT(set_local_ptr_status_init != VX_SUCCESS);
+            if (is_deinitialize_called)
+            {
+                ASSERT(set_local_size_status_deinit != VX_SUCCESS);
+                ASSERT(set_local_ptr_status_deinit != VX_SUCCESS);
+            }
+        }
+    }
+
+    // execute graph
+
+    is_initialize_called = vx_false_e;
+    is_deinitialize_called = vx_false_e;
+    is_validator_called = vx_false_e;
+    is_kernel_called = vx_false_e;
+
+    VX_CALL(vxProcessGraph(graph));
+
+    ASSERT(is_initialize_called == vx_false_e);
+    ASSERT(is_deinitialize_called == vx_false_e);
+    ASSERT(is_validator_called == vx_false_e);
+    ASSERT(is_kernel_called == vx_true_e);
+
+    // finalization
+
+    VX_CALL(vxReleaseNode(&node));
+    VX_CALL(vxReleaseGraph(&graph));
+    /* user kernel should be removed only after all references to it released */
+    /* Note, vxRemoveKernel doesn't zeroing kernel ref */
+    VX_CALL(vxRemoveKernel(user_kernel));
+
+    VX_CALL(vxReleaseReference(&dst));
+    VX_CALL(vxReleaseReference(&src));
+
+    ASSERT(node == 0);
+    ASSERT(graph == 0);
+    ASSERT(dst == 0);
+    ASSERT(src == 0);
+}
+
+TEST_WITH_ARG(UserDataObject, testUserKernelObjectArray, type_arg,
+    ARG("USER_DATA_OBJECT_FROM_REF", VX_TYPE_USER_DATA_OBJECT, vx_true_e),
+    ARG("USER_DATA_OBJECT_FROM_ATTR",VX_TYPE_USER_DATA_OBJECT, vx_false_e)
+)
+{
+    vx_context context = context_->vx_context_;
+    vx_reference exemplar = 0, src = 0, dst = 0;
+    vx_graph graph = 0;
+    vx_kernel user_kernel = 0;
+    vx_node node = 0;
+    vx_bool is_meta_from_ref = arg_->is_meta_from_ref;
+    objarray_itemtype = (enum vx_type_e)arg_->type;
+    type = VX_TYPE_OBJECT_ARRAY;
+
+    is_validator_called = vx_false_e;
+    is_kernel_called = vx_false_e;
+    is_initialize_called = vx_false_e;
+    is_deinitialize_called = vx_false_e;
+
+    vx_size capacity = 20;
+
+    switch (objarray_itemtype)
+    {
+    case VX_TYPE_USER_DATA_OBJECT:
+        ASSERT_VX_OBJECT(exemplar = (vx_reference)vxCreateUserDataObject(context, (const vx_char*)&user_data_object_name, sizeof(wb_t), NULL), objarray_itemtype);
+        break;
+    default:
+        break;
+    }
+
+    ASSERT_VX_OBJECT(src = (vx_reference)vxCreateObjectArray(context, exemplar, capacity), VX_TYPE_OBJECT_ARRAY);
+    ASSERT_VX_OBJECT(dst = (vx_reference)vxCreateObjectArray(context, exemplar, capacity), VX_TYPE_OBJECT_ARRAY);
+
+    ASSERT_NO_FAILURE(own_register_kernel(context, is_meta_from_ref));
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+
+    ASSERT_VX_OBJECT(user_kernel = vxGetKernelByName(context, VX_KERNEL_CONFORMANCE_TEST_OWN_USER_NAME), VX_TYPE_KERNEL);
+    ASSERT_VX_OBJECT(node = vxCreateGenericNode(graph, user_kernel), VX_TYPE_NODE);
+
+    VX_CALL(vxSetParameterByIndex(node, 0, (vx_reference)src));
+    VX_CALL(vxSetParameterByIndex(node, 1, (vx_reference)dst));
+
+    VX_CALL(vxVerifyGraph(graph));
+    VX_CALL(vxProcessGraph(graph));
+
+    VX_CALL(vxReleaseNode(&node));
+    VX_CALL(vxReleaseGraph(&graph));
+    /* user kernel should be removed only after all references to it released */
+    /* Note, vxRemoveKernel doesn't zeroing kernel ref */
+    VX_CALL(vxRemoveKernel(user_kernel));
+
+    VX_CALL(vxReleaseReference(&dst));
+    VX_CALL(vxReleaseReference(&src));
+    VX_CALL(vxReleaseReference(&exemplar));
+
+    ASSERT(node == 0);
+    ASSERT(graph == 0);
+    ASSERT(dst == 0);
+    ASSERT(src == 0);
+    ASSERT(exemplar == 0);
+
+    ASSERT(is_validator_called == vx_true_e);
+    ASSERT(is_kernel_called == vx_true_e);
+    ASSERT(is_initialize_called == vx_true_e);
+    ASSERT(is_deinitialize_called == vx_true_e);
+}
+
+TEST(UserDataObject, testRemoveKernel)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = 0;
+
+    EXPECT_VX_OBJECT(kernel = vxGetKernelByEnum(context, VX_KERNEL_ADD), VX_TYPE_KERNEL);
+    // Only kernels added through vxAddUserKernel can be removed
+    ASSERT_EQ_VX_STATUS(VX_ERROR_INVALID_PARAMETERS, vxRemoveKernel(kernel));
+    VX_CALL(vxReleaseKernel(&kernel));
+
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+            context,
+            VX_KERNEL_CONFORMANCE_TEST_OWN_BAD_NAME,
+            VX_KERNEL_CONFORMANCE_TEST_OWN_BAD,
+            own_Kernel,
+            2,
+            own_ValidatorMetaFromRef,
+            own_Initialize,
+            own_Deinitialize), VX_TYPE_KERNEL);
+
+    VX_CALL(vxRemoveKernel(kernel));
+}
+
+TEST(UserDataObject, testOutDelay)
+{
+    vx_context context = context_->vx_context_;
+    vx_kernel kernel = 0;
+
+    ASSERT_VX_OBJECT(kernel = vxAddUserKernel(
+        context,
+        VX_KERNEL_CONFORMANCE_TEST_OWN_BAD_NAME,
+        VX_KERNEL_CONFORMANCE_TEST_OWN_BAD,
+        own_Kernel,
+        2,
+        own_ValidatorMetaFromRef,
+        own_Initialize,
+        own_Deinitialize), VX_TYPE_KERNEL);
+
+    ASSERT_NE_VX_STATUS(vxAddParameterToKernel(kernel, 0, VX_OUTPUT, VX_TYPE_DELAY, VX_PARAMETER_STATE_REQUIRED), VX_SUCCESS);
+
+    VX_CALL(vxRemoveKernel(kernel));
+}
+
+
+
+TEST(UserDataObject, test_vxCreateUserDataObject)
+{
+    vx_context context = context_->vx_context_;
+    char actual_name[VX_REFERENCE_NAME];
+    vx_size actual_size = 0;
+    vx_user_data_object user_data_object = 0;
+
+    /* 1. check if user data object can be created with empty type_name and not initialized */
+    ASSERT_VX_OBJECT(user_data_object = vxCreateUserDataObject(context, NULL, sizeof(wb_t), NULL), VX_TYPE_USER_DATA_OBJECT);
+
+    /* 2. check if user data object actual name is a string with a null termination */
+    VX_CALL(vxQueryUserDataObject(user_data_object, VX_USER_DATA_OBJECT_NAME, &actual_name, sizeof(actual_name)));
+    ASSERT(strncmp("", actual_name, VX_REFERENCE_NAME) == 0);
+
+    /* 3. check if user data object actual size corresponds to requested size */
+    VX_CALL(vxQueryUserDataObject(user_data_object, VX_USER_DATA_OBJECT_SIZE, &actual_size, sizeof(vx_size)));
+    ASSERT_EQ_INT(sizeof(wb_t), actual_size);
+
+    /* 4. Initialize empty user data object after creation */
+	{
+		wb_t *p = NULL;
+        vx_map_id map_id;
+        vx_int32 i;
+
+		/* Initialize data using WRITE ONLY MAP */
+        VX_CALL(vxMapUserDataObject(user_data_object, 0, sizeof(wb_t), &map_id, (void **)&p, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0));
+        ASSERT(p != NULL);
+		p->mode = 2;
+		for (i = 0; i < 4; i++)
+		{
+			p->gain[i] = i;
+			p->offset[i] = i+4;
+		}
+        VX_CALL(vxUnmapUserDataObject(user_data_object, map_id));
+
+    /* 5. check data in user data object */
+
+        VX_CALL(vxMapUserDataObject(user_data_object, 0, sizeof(wb_t), &map_id, (void **)&p, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, 0));
+		ASSERT(2 == p->mode);
+		for (i = 0; i < 4; i++)
+		{
+			ASSERT(p->gain[i] == i);
+			ASSERT(p->offset[i] == i+4);
+		}
+        VX_CALL(vxUnmapUserDataObject(user_data_object, map_id));
+	}
+
+    VX_CALL(vxReleaseUserDataObject(&user_data_object));
+    ASSERT(user_data_object == 0);
+}
+
+TEST(UserDataObject, test_vxCopyUserDataObjectWrite)
+{
+    vx_context context = context_->vx_context_;
+    wb_t localUserDataObjectInit;
+    wb_t localUserDataObject;
+    vx_user_data_object user_data_object;
+    int i;
+
+    /* Initialization */
+    localUserDataObjectInit.mode = 0;
+    localUserDataObject.mode = 1;
+
+    for (i = 0; i < 4; i++)
+    {
+        localUserDataObjectInit.gain[i] = 0;
+        localUserDataObjectInit.offset[i] = 0;
+
+        localUserDataObject.gain[i] = i;
+        localUserDataObject.offset[i] = i+4;
+    }
+
+    ASSERT_VX_OBJECT(user_data_object = vxCreateUserDataObject(context, user_data_object_name, sizeof(wb_t), &localUserDataObjectInit), VX_TYPE_USER_DATA_OBJECT);
+
+    /* Write, COPY gains */
+    {
+        vx_size local_offset = offsetof(wb_t, gain);
+        vx_size local_bytes = sizeof(vx_int32)*4;
+        vx_int32 *p = &localUserDataObject.gain[0];
+        VX_CALL(vxCopyUserDataObject(user_data_object, local_offset, local_bytes, (void *)p, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+    }
+
+    /* Check (MAP) */
+    {
+        vx_int32 *p = NULL;
+        vx_size local_offset = offsetof(wb_t, gain);
+        vx_size local_bytes = sizeof(vx_int32)*4;
+        vx_map_id map_id;
+        VX_CALL(vxMapUserDataObject(user_data_object, local_offset, local_bytes, &map_id, (void **)&p, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, VX_NOGAP_X));
+
+        ASSERT(p != NULL);
+        for (i = 0; i<4; i++)
+        {
+            ASSERT(p[i] == i);
+        }
+
+        VX_CALL(vxUnmapUserDataObject(user_data_object, map_id));
+    }
+
+    VX_CALL(vxReleaseUserDataObject(&user_data_object));
+    ASSERT(user_data_object == 0);
+}
+
+TEST(UserDataObject, test_vxCopyUserDataObjectRead)
+{
+    vx_context context = context_->vx_context_;
+    wb_t localUserDataObjectInit;
+    wb_t localUserDataObject;
+    vx_user_data_object user_data_object;
+    int i;
+
+    /* Initialization */
+    localUserDataObjectInit.mode = 1;
+    localUserDataObject.mode =0;
+
+    for (i = 0; i < 4; i++)
+    {
+        localUserDataObjectInit.gain[i] = i;
+        localUserDataObjectInit.offset[i] = i+4;
+
+        localUserDataObject.gain[i] = 0;
+        localUserDataObject.offset[i] = 0;
+    }
+
+    ASSERT_VX_OBJECT(user_data_object = vxCreateUserDataObject(context, user_data_object_name, sizeof(wb_t), &localUserDataObjectInit), VX_TYPE_USER_DATA_OBJECT);
+
+    /* READ, COPY offsets */
+    {
+        vx_size local_offset = offsetof(wb_t, offset);
+        vx_size local_bytes = sizeof(vx_int32)*4;
+        vx_int32 *p = &localUserDataObject.offset[0];
+        VX_CALL(vxCopyUserDataObject(user_data_object, local_offset, local_bytes, (void *)p, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+    }
+    /* Check */
+    for (i = 0; i < 4; i++)
+    {
+        ASSERT(localUserDataObject.offset[i] == i+4);
+    }
+
+    VX_CALL(vxReleaseUserDataObject(&user_data_object));
+    ASSERT(user_data_object == 0);
+}
+
+TEST(UserDataObject, test_vxMapUserDataObjectWrite)
+{
+    vx_context context = context_->vx_context_;
+    wb_t localUserDataObjectInit;
+    vx_user_data_object user_data_object;
+    int i;
+
+    /* Initialization */
+    localUserDataObjectInit.mode = 1;
+
+    for (i = 0; i < 4; i++)
+    {
+        localUserDataObjectInit.gain[i] = i+0x10000000;
+        localUserDataObjectInit.offset[i] = i+0x10000004;
+    }
+
+    ASSERT_VX_OBJECT(user_data_object = vxCreateUserDataObject(context, user_data_object_name, sizeof(wb_t), NULL), VX_TYPE_USER_DATA_OBJECT);
+
+    {
+		wb_t *p = NULL;
+        vx_map_id map_id;
+
+		/* Map, WRITE_ONLY mode*/
+        VX_CALL(vxMapUserDataObject(user_data_object, 0, sizeof(wb_t), &map_id, (void **)&p, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0));
+        ASSERT(p != NULL);
+        memcpy(p, &localUserDataObjectInit, sizeof(wb_t));
+        VX_CALL(vxUnmapUserDataObject(user_data_object, map_id));
+
+		/* Map, READ_AND_WRITE mode*/
+
+        VX_CALL(vxMapUserDataObject(user_data_object, 0, sizeof(wb_t), &map_id, (void **)&p, VX_READ_AND_WRITE, VX_MEMORY_TYPE_HOST, 0));
+        ASSERT(p != NULL);
+        /* Check */
+		ASSERT(localUserDataObjectInit.mode == p->mode);
+		for (i = 0; i < 4; i++)
+		{
+			ASSERT(localUserDataObjectInit.gain[i] == p->gain[i]);
+			ASSERT(localUserDataObjectInit.offset[i] == p->offset[i]);
+		}
+
+        /* Write into user data object */
+		p->mode = 2;
+		for (i = 0; i < 4; i++)
+		{
+			p->gain[i] = i;
+			p->offset[i] = i+4;
+		}
+        VX_CALL(vxUnmapUserDataObject(user_data_object, map_id));
+
+        /* Check */
+        VX_CALL(vxMapUserDataObject(user_data_object, 0, sizeof(wb_t), &map_id, (void **)&p, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, 0));
+		ASSERT(2 == p->mode);
+		for (i = 0; i < 4; i++)
+		{
+			ASSERT(p->gain[i] == i);
+			ASSERT(p->offset[i] == i+4);
+		}
+        VX_CALL(vxUnmapUserDataObject(user_data_object, map_id));
+    }
+
+    VX_CALL(vxReleaseUserDataObject(&user_data_object));
+    ASSERT(user_data_object == 0);
+}
+
+TESTCASE_TESTS(UserDataObject,
+        test_vxCreateUserDataObject,
+        test_vxCopyUserDataObjectRead,
+        test_vxCopyUserDataObjectWrite,
+        test_vxMapUserDataObjectWrite,
+        testUserKernel,
+        testUserKernelObjectArray,
+        testRemoveKernel,
+        testOutDelay
+        )
+
+#endif
diff --git a/test_conformance/test_usernode.c b/test_conformance/test_usernode.c
index 20c8897..138c387 100644
--- a/test_conformance/test_usernode.c
+++ b/test_conformance/test_usernode.c
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include "test_engine/test.h"
 #include <VX/vx.h>
 #include <VX/vxu.h>
@@ -734,6 +736,14 @@
 
     case VX_TYPE_REMAP:
         {
+            vx_coordinates2df_t *data = ct_alloc_mem(dst_width * dst_height * sizeof(vx_coordinates2df_t));
+            vx_rectangle_t rect;
+            rect.start_x = 0;
+            rect.start_y = 0;
+            rect.end_x = dst_width;
+            rect.end_y = dst_height;
+            vx_size user_stride_y = dst_width * sizeof(vx_coordinates2df_t);
+
             ASSERT_VX_OBJECT(src = (vx_reference)vxCreateRemap(context, src_width, src_height, dst_width, dst_height), type);
             ASSERT_VX_OBJECT(dst = (vx_reference)vxCreateRemap(context, src_width, src_height, dst_width, dst_height), type);
 
@@ -741,9 +751,14 @@
             {
                 for (j = 0; j < dst_height; j++)
                 {
-                    VX_CALL(vxSetRemapPoint((vx_remap)src, i, j, (vx_float32)((i + j) % src_width), (vx_float32)((i * j) % src_height)));
+                    data[j * dst_width + i].x = (vx_float32)((i + j) % src_width);
+                    data[j * dst_width + i].y = (vx_float32)((i * j) % src_height);
                 }
             }
+
+            VX_CALL(vxCopyRemapPatch((vx_remap)src, &rect, user_stride_y, (void *)data,
+                                      VX_TYPE_COORDINATES2DF, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+            ct_free_mem(data);
         }
         break;
 
@@ -767,10 +782,12 @@
 
     case VX_TYPE_THRESHOLD:
         {
-            ASSERT_VX_OBJECT(src = (vx_reference)vxCreateThreshold(context, thresh_type, item_type), type);
-            ASSERT_VX_OBJECT(dst = (vx_reference)vxCreateThreshold(context, thresh_type, item_type), type);
+            vx_pixel_value_t pixel_value = {0};
+            pixel_value.U8 = thresh_val;
+            ASSERT_VX_OBJECT(src = (vx_reference)vxCreateThresholdForImage(context, thresh_type, format, format), type);
+            ASSERT_VX_OBJECT(dst = (vx_reference)vxCreateThresholdForImage(context, thresh_type, format, format), type);
 
-            VX_CALL(vxSetThresholdAttribute((vx_threshold)src, VX_THRESHOLD_THRESHOLD_VALUE, (void *)&thresh_val, sizeof(thresh_val)));
+            VX_CALL(vxCopyThresholdValue((vx_threshold)src, &pixel_value, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
         }
         break;
 
@@ -969,7 +986,7 @@
         ASSERT_VX_OBJECT(exemplar = (vx_reference)vxCreateLUT(context, item_type, num_items), objarray_itemtype);
         break;
     case VX_TYPE_THRESHOLD:
-        ASSERT_VX_OBJECT(exemplar = (vx_reference)vxCreateThreshold(context, thresh_type, item_type), objarray_itemtype);
+        ASSERT_VX_OBJECT(exemplar = (vx_reference)vxCreateThresholdForImage(context, thresh_type, format, format), objarray_itemtype);
         break;
     default:
         break;
@@ -1062,3 +1079,5 @@
         testRemoveKernel,
         testOutDelay
         )
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_vximage.c b/test_conformance/test_vximage.c
index 6257c54..d82efff 100644
--- a/test_conformance/test_vximage.c
+++ b/test_conformance/test_vximage.c
@@ -15,35 +15,113 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include <string.h>
 #include <VX/vx.h>
 #include <VX/vxu.h>
 
 #include "test_engine/test.h"
 
+/* ***************************************************************************
+//  local auxiliary function definitions (only those that need early definitions)
+*/
+static void mem_free(void**ptr);
+
+static CT_Image own_generate_rand_image(const char* fileName, int width, int height, vx_df_image format);
+
+static vx_uint32 own_plane_subsampling_x(vx_df_image format, vx_uint32 plane);
+
+static vx_uint32 own_plane_subsampling_y(vx_df_image format, vx_uint32 plane);
+
+static void own_allocate_image_ptrs(vx_df_image format, int width, int height, vx_uint32* nplanes, void* ptrs[],
+    vx_imagepatch_addressing_t addr[], vx_pixel_value_t* val);
+
+
+/* ***************************************************************************
+//  Image tests
+*/
 TESTCASE(Image, CT_VXContext, ct_setup_vx_context, 0)
 
 typedef struct
 {
     const char* name;
     vx_df_image format;
-} format_arg;
+} ImageFormat_Arg;
 
-TEST_WITH_ARG(Image, testRngImageCreation, format_arg,
-    ARG_ENUM(VX_DF_IMAGE_U8),
-    ARG_ENUM(VX_DF_IMAGE_U16),
-    ARG_ENUM(VX_DF_IMAGE_S16),
-    ARG_ENUM(VX_DF_IMAGE_U32),
-    ARG_ENUM(VX_DF_IMAGE_S32),
-    ARG_ENUM(VX_DF_IMAGE_RGB),
-    ARG_ENUM(VX_DF_IMAGE_RGBX),
-    ARG_ENUM(VX_DF_IMAGE_NV12),
-    ARG_ENUM(VX_DF_IMAGE_NV21),
-    ARG_ENUM(VX_DF_IMAGE_UYVY),
-    ARG_ENUM(VX_DF_IMAGE_YUYV),
-    ARG_ENUM(VX_DF_IMAGE_IYUV),
-    ARG_ENUM(VX_DF_IMAGE_YUV4),
+typedef struct
+{
+    const char* name;
+    int width;
+    int height;
+    vx_df_image format;
+} ImageDims_Arg;
+
+typedef struct
+{
+    const char* testName;
+    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
+    const char* fileName;
+    int width;
+    int height;
+    vx_df_image format;
+    vx_bool have_roi;
+} ImageGenerator_Arg;
+
+#define VX_PLANE_MAX (4)
+
+#define IMAGE_FORMAT_PARAMETERS_BASELINE \
+    ARG_ENUM(VX_DF_IMAGE_U8), \
+    ARG_ENUM(VX_DF_IMAGE_U16), \
+    ARG_ENUM(VX_DF_IMAGE_S16), \
+    ARG_ENUM(VX_DF_IMAGE_U32), \
+    ARG_ENUM(VX_DF_IMAGE_S32), \
+    ARG_ENUM(VX_DF_IMAGE_RGB), \
+    ARG_ENUM(VX_DF_IMAGE_RGBX), \
+    ARG_ENUM(VX_DF_IMAGE_NV12), \
+    ARG_ENUM(VX_DF_IMAGE_NV21), \
+    ARG_ENUM(VX_DF_IMAGE_UYVY), \
+    ARG_ENUM(VX_DF_IMAGE_YUYV), \
+    ARG_ENUM(VX_DF_IMAGE_IYUV), \
+    ARG_ENUM(VX_DF_IMAGE_YUV4)
+
+#define ADD_IMAGE_FORMATS(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_U8", __VA_ARGS__, VX_DF_IMAGE_U8)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_U16", __VA_ARGS__, VX_DF_IMAGE_U16)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_S16", __VA_ARGS__, VX_DF_IMAGE_S16)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_U32", __VA_ARGS__, VX_DF_IMAGE_U32)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_S32", __VA_ARGS__, VX_DF_IMAGE_S32)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_RGB", __VA_ARGS__, VX_DF_IMAGE_RGB)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_RGBX", __VA_ARGS__, VX_DF_IMAGE_RGBX)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_UYVY", __VA_ARGS__, VX_DF_IMAGE_UYVY)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_YUYV", __VA_ARGS__, VX_DF_IMAGE_YUYV)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_NV12", __VA_ARGS__, VX_DF_IMAGE_NV12)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_NV21", __VA_ARGS__, VX_DF_IMAGE_NV21)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_YUV4", __VA_ARGS__, VX_DF_IMAGE_YUV4)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_IYUV", __VA_ARGS__, VX_DF_IMAGE_IYUV))
+
+#define ADD_IMAGE_FORMAT_U1(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_U1", __VA_ARGS__, VX_DF_IMAGE_U1))
+
+#define ADD_IMAGE_ROI(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/ROI=true", __VA_ARGS__, vx_true_e)), \
+    CT_EXPAND(nextmacro(testArgName "/ROI=false", __VA_ARGS__, vx_false_e))
+
+#define NO_IMAGE_ROI(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "", __VA_ARGS__, vx_false_e))
+
+#define TEST_IMAGE_RANDOM_IMAGE_PARAMETERS \
+    CT_GENERATE_PARAMETERS(     "rand", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMATS,   NO_IMAGE_ROI, ARG, own_generate_rand_image, NULL), \
+    CT_GENERATE_PARAMETERS("_U1_/rand", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_U1, NO_IMAGE_ROI, ARG, own_generate_rand_image, NULL)
+
+#define TEST_IMAGE_RANDOM_IMAGE_WITH_ROI_PARAMETERS \
+    CT_GENERATE_PARAMETERS(     "rand", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMATS,   ADD_IMAGE_ROI, ARG, own_generate_rand_image, NULL), \
+    CT_GENERATE_PARAMETERS("_U1_/rand", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_U1, ADD_IMAGE_ROI, ARG, own_generate_rand_image, NULL)
+
+TEST_WITH_ARG(Image, testRngImageCreation, ImageFormat_Arg,
+    IMAGE_FORMAT_PARAMETERS_BASELINE,
     ARG_ENUM(VX_DF_IMAGE_VIRT),
+    ARG("_U1_/VX_DF_IMAGE_U1", VX_DF_IMAGE_U1),
 )
 {
     vx_context  context = context_->vx_context_;
@@ -51,7 +129,7 @@
     vx_image    clone   = 0;
     vx_df_image format  = arg_->format;
 
-    image = vxCreateImage(context, 4, 4, format);
+    image = vxCreateImage(context, 8, 8, format);
 
     if (format == VX_DF_IMAGE_VIRT)
     {
@@ -59,8 +137,6 @@
         PASS();
     }
 
-    // VX_CALL(ct_dump_vx_image_info(image));
-
     ASSERT_VX_OBJECT(image, VX_TYPE_IMAGE);
 
     ct_fill_image_random(image, &CT()->seed_);
@@ -73,23 +149,33 @@
 
     ASSERT(image == 0);
     ASSERT(clone == 0);
-}
+} /* testRngImageCreation() */
 
-TEST_WITH_ARG(Image, testVirtualImageCreation, format_arg,
-    ARG_ENUM(VX_DF_IMAGE_U8),
-    ARG_ENUM(VX_DF_IMAGE_U16),
-    ARG_ENUM(VX_DF_IMAGE_S16),
-    ARG_ENUM(VX_DF_IMAGE_U32),
-    ARG_ENUM(VX_DF_IMAGE_S32),
-    ARG_ENUM(VX_DF_IMAGE_RGB),
-    ARG_ENUM(VX_DF_IMAGE_RGBX),
-    ARG_ENUM(VX_DF_IMAGE_NV12),
-    ARG_ENUM(VX_DF_IMAGE_NV21),
-    ARG_ENUM(VX_DF_IMAGE_UYVY),
-    ARG_ENUM(VX_DF_IMAGE_YUYV),
-    ARG_ENUM(VX_DF_IMAGE_IYUV),
-    ARG_ENUM(VX_DF_IMAGE_YUV4),
+/*
+// Creation and destruction of U1 images should be supported even without the U1 conformance profile
+*/
+TEST(Image, testImageCreation_U1)
+{
+    // Test vxCreateImage()
+    vx_context context = context_->vx_context_;
+    vx_image image = 0;
+    vx_uint32 width = 16;
+    vx_uint32 height = 16;
+    vx_df_image format = VX_DF_IMAGE_U1;
+
+    image = vxCreateImage(context, width, height, format);
+
+    ASSERT_VX_OBJECT(image, VX_TYPE_IMAGE);
+
+    VX_CALL(vxReleaseImage(&image));
+
+    ASSERT(image == 0);
+} /* testImageCreation_U1() */
+
+TEST_WITH_ARG(Image, testVirtualImageCreation, ImageFormat_Arg,
+    IMAGE_FORMAT_PARAMETERS_BASELINE,
     ARG_ENUM(VX_DF_IMAGE_VIRT),
+    ARG_ENUM(VX_DF_IMAGE_U1),
 )
 {
     vx_context context = context_->vx_context_;
@@ -113,16 +199,9 @@
     ASSERT(image == 0);
     ASSERT(clone == 0);
     ASSERT(graph == 0);
-}
+} /* testVirtualImageCreation() */
 
-typedef struct {
-    const char* name;
-    int width;
-    int height;
-    vx_df_image format;
-} dims_arg;
-
-TEST_WITH_ARG(Image, testVirtualImageCreationDims, dims_arg,
+TEST_WITH_ARG(Image, testVirtualImageCreationDims, ImageDims_Arg,
     ARG("0_0_REAL", 0, 0, VX_DF_IMAGE_U8),
     ARG("DISABLED_0_4_REAL", 0, 4, VX_DF_IMAGE_U8),
     ARG("DISABLED_4_0_REAL", 4, 0, VX_DF_IMAGE_U8),
@@ -131,7 +210,7 @@
     ARG("DISABLED_0_4_VIRT", 0, 4, VX_DF_IMAGE_VIRT),
     ARG("DISABLED_4_0_VIRT", 4, 0, VX_DF_IMAGE_VIRT),
     ARG("4_4_VIRT", 4, 4, VX_DF_IMAGE_VIRT),
-    )
+)
 {
     vx_context context = context_->vx_context_;
     vx_image   image   = 0;
@@ -153,197 +232,12 @@
     ASSERT(image == 0);
     ASSERT(clone == 0);
     ASSERT(graph == 0);
-}
+} /* testVirtualImageCreationDims() */
 
-
-TEST_WITH_ARG(Image, testConvert_CT_Image, format_arg,
-    ARG_ENUM(VX_DF_IMAGE_U8),
-    ARG_ENUM(VX_DF_IMAGE_U16),
-    ARG_ENUM(VX_DF_IMAGE_S16),
-    ARG_ENUM(VX_DF_IMAGE_U32),
-    ARG_ENUM(VX_DF_IMAGE_S32),
-    ARG_ENUM(VX_DF_IMAGE_RGB),
-    ARG_ENUM(VX_DF_IMAGE_RGBX),
-    ARG_ENUM(VX_DF_IMAGE_NV12),
-    ARG_ENUM(VX_DF_IMAGE_NV21),
-    ARG_ENUM(VX_DF_IMAGE_UYVY),
-    ARG_ENUM(VX_DF_IMAGE_YUYV),
-    ARG_ENUM(VX_DF_IMAGE_IYUV),
-    ARG_ENUM(VX_DF_IMAGE_YUV4),
+TEST_WITH_ARG(Image, testCreateImageFromHandle, ImageGenerator_Arg,
+    TEST_IMAGE_RANDOM_IMAGE_PARAMETERS
 )
 {
-    vx_context context = context_->vx_context_;
-    vx_image   image   = 0,
-               image2  = 0;
-    CT_Image   ctimg   = 0,
-               ctimg2  = 0;
-
-    image = vxCreateImage(context, 16, 16, arg_->format);
-    ASSERT_VX_OBJECT(image, VX_TYPE_IMAGE);
-
-    ASSERT_NO_FAILURE(ct_fill_image_random(image, &CT()->seed_));
-
-    ASSERT_NO_FAILURE(ctimg = ct_image_from_vx_image(image));
-
-    ASSERT_NO_FAILURE(image2 = ct_image_to_vx_image(ctimg, context));
-
-    ASSERT_NO_FAILURE(ctimg2 = ct_image_from_vx_image(image2));
-
-    ASSERT_EQ_CTIMAGE(ctimg, ctimg2);
-
-    VX_CALL(vxReleaseImage(&image));
-    VX_CALL(vxReleaseImage(&image2));
-
-    ASSERT(image == 0);
-    ASSERT(image2 == 0);
-}
-
-
-/* ***************************************************************************
-//  local auxiliary functions
-*/
-
-/*
-// Generate input random pixel values
-*/
-static CT_Image own_generate_rand_image(const char* fileName, int width, int height, vx_df_image format)
-{
-    CT_Image image;
-
-    ASSERT_NO_FAILURE_(return 0,
-        image = ct_allocate_ct_image_random(width, height, format, &CT()->seed_, 0, 256));
-
-    return image;
-} /* own_generate_rand_image() */
-
-
-static
-vx_uint32 own_plane_subsampling_x(vx_df_image format, vx_uint32 plane)
-{
-    int subsampling_x = 0;
-
-    switch (format)
-    {
-    case VX_DF_IMAGE_IYUV:
-    case VX_DF_IMAGE_NV12:
-    case VX_DF_IMAGE_NV21:
-    case VX_DF_IMAGE_YUYV:
-    case VX_DF_IMAGE_UYVY:
-        subsampling_x = (0 == plane) ? 1 : 2;
-        break;
-
-    default:
-        subsampling_x = 1;
-        break;
-    }
-
-    return subsampling_x;
-}
-
-static
-vx_uint32 own_plane_subsampling_y(vx_df_image format, vx_uint32 plane)
-{
-    int subsampling_y = 0;
-
-    switch (format)
-    {
-    case VX_DF_IMAGE_IYUV:
-    case VX_DF_IMAGE_NV12:
-    case VX_DF_IMAGE_NV21:
-        subsampling_y = (0 == plane) ? 1 : 2;
-        break;
-
-    default:
-        subsampling_y = 1;
-        break;
-    }
-
-    return subsampling_y;
-}
-
-static
-vx_uint32 own_elem_size(vx_df_image format, vx_uint32 plane)
-{
-    int channel_step_x = 0;
-
-    switch (format)
-    {
-    case VX_DF_IMAGE_U8:
-        channel_step_x = 1;
-        break;
-
-    case VX_DF_IMAGE_U16:
-    case VX_DF_IMAGE_S16:
-        channel_step_x = 2;
-        break;
-
-    case VX_DF_IMAGE_U32:
-    case VX_DF_IMAGE_S32:
-    case VX_DF_IMAGE_RGBX:
-        channel_step_x = 4;
-        break;
-
-    case VX_DF_IMAGE_RGB:
-        channel_step_x = 3;
-        break;
-
-    case VX_DF_IMAGE_YUYV:
-    case VX_DF_IMAGE_UYVY:
-        channel_step_x = 2;
-        break;
-
-    case VX_DF_IMAGE_IYUV:
-    case VX_DF_IMAGE_YUV4:
-        channel_step_x = 1;
-        break;
-
-    case VX_DF_IMAGE_NV12:
-    case VX_DF_IMAGE_NV21:
-        channel_step_x = (0 == plane) ? 1 : 2;
-        break;
-
-    default:
-        channel_step_x = 0;
-    }
-
-    return channel_step_x;
-}
-
-typedef struct
-{
-    const char*      testName;
-    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
-    const char*      fileName;
-    int              width;
-    int              height;
-    vx_df_image      format;
-
-} CreateImageFromHandle_Arg;
-
-
-#define VX_PLANE_MAX (4)
-
-#undef ADD_IMAGE_FORMAT
-#define ADD_IMAGE_FORMAT(testArgName, nextmacro, ...) \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_U8", __VA_ARGS__, VX_DF_IMAGE_U8)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_U16", __VA_ARGS__, VX_DF_IMAGE_U16)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_S16", __VA_ARGS__, VX_DF_IMAGE_S16)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_U32", __VA_ARGS__, VX_DF_IMAGE_U32)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_S32", __VA_ARGS__, VX_DF_IMAGE_S32)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_RGB", __VA_ARGS__, VX_DF_IMAGE_RGB)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_RGBX", __VA_ARGS__, VX_DF_IMAGE_RGBX)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_UYVY", __VA_ARGS__, VX_DF_IMAGE_UYVY)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_YUYV", __VA_ARGS__, VX_DF_IMAGE_YUYV)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_NV12", __VA_ARGS__, VX_DF_IMAGE_NV12)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_NV21", __VA_ARGS__, VX_DF_IMAGE_NV21)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_YUV4", __VA_ARGS__, VX_DF_IMAGE_YUV4)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_IYUV", __VA_ARGS__, VX_DF_IMAGE_IYUV))
-
-#define CREATE_IMAGE_FROM_HANDLE_PARAMETERS \
-    CT_GENERATE_PARAMETERS("rand", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT, ARG, own_generate_rand_image, NULL)
-
-TEST_WITH_ARG(Image, testCreateImageFromHandle, CreateImageFromHandle_Arg, CREATE_IMAGE_FROM_HANDLE_PARAMETERS)
-{
     vx_uint32 n;
     vx_uint32 nplanes;
     vx_context context = context_->vx_context_;
@@ -365,6 +259,7 @@
 
     switch (arg_->format)
     {
+    case VX_DF_IMAGE_U1:
     case VX_DF_IMAGE_U8:
     case VX_DF_IMAGE_U16:
     case VX_DF_IMAGE_S16:
@@ -406,6 +301,8 @@
         addr[n].dim_y    = src->height / ct_image_get_channel_subsampling_y(src, channel[n]);
         addr[n].stride_x = ct_image_get_channel_step_x(src, channel[n]);
         addr[n].stride_y = ct_image_get_channel_step_y(src, channel[n]);
+        if (arg_->format == VX_DF_IMAGE_U1)
+            addr[n].stride_x_bits = 1;
 
         ptrs[n] = ct_image_get_plane_base(src, n);
     }
@@ -421,323 +318,9 @@
     ASSERT(image == 0);
 } /* testCreateImageFromHandle() */
 
-
-static uint32_t own_stride_bytes(vx_df_image format, int step)
-{
-    uint32_t factor = 0;
-
-    switch (format)
-    {
-    case VX_DF_IMAGE_U8:
-    case VX_DF_IMAGE_NV21:
-    case VX_DF_IMAGE_NV12:
-    case VX_DF_IMAGE_YUV4:
-    case VX_DF_IMAGE_IYUV:
-        factor = 1;
-        break;
-
-    case VX_DF_IMAGE_U16:
-    case VX_DF_IMAGE_S16:
-    case VX_DF_IMAGE_YUYV:
-    case VX_DF_IMAGE_UYVY:
-        factor = 2;
-        break;
-
-    case VX_DF_IMAGE_U32:
-    case VX_DF_IMAGE_S32:
-    case VX_DF_IMAGE_RGBX:
-        factor = 4;
-        break;
-
-    case VX_DF_IMAGE_RGB:
-        factor = 3;
-        break;
-
-    default:
-        ASSERT_(return 0, 0);
-    }
-
-    return step*factor;
-}
-
-
-static int own_get_channel_step_x(vx_df_image format, vx_enum channel)
-{
-    switch (format)
-    {
-    case VX_DF_IMAGE_U8:
-        return 1;
-
-    case VX_DF_IMAGE_U16:
-    case VX_DF_IMAGE_S16:
-        return 2;
-
-    case VX_DF_IMAGE_U32:
-    case VX_DF_IMAGE_S32:
-    case VX_DF_IMAGE_RGBX:
-        return 4;
-
-    case VX_DF_IMAGE_RGB:
-        return 3;
-
-    case VX_DF_IMAGE_YUYV:
-    case VX_DF_IMAGE_UYVY:
-        if (channel == VX_CHANNEL_Y)
-            return 2;
-        return 4;
-
-    case VX_DF_IMAGE_IYUV:
-    case VX_DF_IMAGE_YUV4:
-        return 1;
-
-    case VX_DF_IMAGE_NV12:
-    case VX_DF_IMAGE_NV21:
-        if (channel == VX_CHANNEL_Y)
-            return 1;
-        return 2;
-
-    default:
-        ASSERT_(return 0, 0);
-    }
-
-    return 0;
-}
-
-
-static int own_get_channel_step_y(vx_df_image format, vx_enum channel, int step)
-{
-    switch (format)
-    {
-    case VX_DF_IMAGE_U8:
-        return step;
-
-    case VX_DF_IMAGE_U16:
-    case VX_DF_IMAGE_S16:
-        return step * 2;
-
-    case VX_DF_IMAGE_U32:
-    case VX_DF_IMAGE_S32:
-    case VX_DF_IMAGE_RGBX:
-        return step * 4;
-
-    case VX_DF_IMAGE_RGB:
-        return step * 3;
-
-    case VX_DF_IMAGE_YUYV:
-    case VX_DF_IMAGE_UYVY:
-        return step * 2;
-
-    case VX_DF_IMAGE_IYUV:
-        return (channel == VX_CHANNEL_Y) ? step : step / 2;
-
-    case VX_DF_IMAGE_YUV4:
-    case VX_DF_IMAGE_NV12:
-    case VX_DF_IMAGE_NV21:
-        return step;
-
-    default:
-        ASSERT_(return 0, 0);
-    }
-
-    return 0;
-}
-
-
-static int own_get_channel_subsampling_x(vx_df_image format, vx_enum channel)
-{
-    if (channel == VX_CHANNEL_Y)
-        return 1;
-
-    switch (format)
-    {
-    case VX_DF_IMAGE_IYUV:
-    case VX_DF_IMAGE_NV12:
-    case VX_DF_IMAGE_NV21:
-    case VX_DF_IMAGE_YUYV:
-    case VX_DF_IMAGE_UYVY:
-        return 2;
-    }
-
-    return 1;
-}
-
-
-int own_get_channel_subsampling_y(vx_df_image format, vx_enum channel)
-{
-    if (channel == VX_CHANNEL_Y)
-        return 1;
-
-    switch (format)
-    {
-    case VX_DF_IMAGE_IYUV:
-    case VX_DF_IMAGE_NV12:
-    case VX_DF_IMAGE_NV21:
-        return 2;
-
-    case VX_DF_IMAGE_YUYV:
-    case VX_DF_IMAGE_UYVY:
-        return 1;
-    }
-
-    return 1;
-}
-
-
-static unsigned int own_image_bits_per_pixel(vx_df_image format, unsigned int p)
-{
-    switch (format)
-    {
-    case VX_DF_IMAGE_U8:
-        return 8 * 1;
-
-    case VX_DF_IMAGE_U16:
-    case VX_DF_IMAGE_S16:
-    case VX_DF_IMAGE_UYVY:
-    case VX_DF_IMAGE_YUYV:
-        return 8 * 2;
-
-    case VX_DF_IMAGE_U32:
-    case VX_DF_IMAGE_S32:
-    case VX_DF_IMAGE_RGBX:
-        return 8 * 4;
-
-    case VX_DF_IMAGE_RGB:
-    case VX_DF_IMAGE_YUV4:
-        return 8 * 3;
-
-    case VX_DF_IMAGE_IYUV:
-        return 8 * 3 / 2;
-
-    case VX_DF_IMAGE_NV12:
-    case VX_DF_IMAGE_NV21:
-        if (p == 0)
-            return 8 * 1;
-        else
-            return 8 * 2;
-
-    default:
-        CT_RecordFailure();
-        return 0;
-    };
-}
-
-static size_t own_plane_size(uint32_t width, uint32_t height, unsigned int p, vx_df_image format)
-{
-    return (size_t)(width * height * own_image_bits_per_pixel(format, p) / 8);
-}
-
-
-/*
-// Allocates image plane pointers from user controlled memory according to format, width, height params
-// and initialize with some value
-*/
-static void own_allocate_image_ptrs(
-    vx_df_image format, int width, int height,
-    vx_uint32* nplanes, void* ptrs[], vx_imagepatch_addressing_t addr[],
-    vx_pixel_value_t* val)
-{
-    unsigned int p;
-    int channel[VX_PLANE_MAX] = { 0, 0, 0, 0 };
-
-    switch (format)
-    {
-    case VX_DF_IMAGE_U8:
-    case VX_DF_IMAGE_U16:
-    case VX_DF_IMAGE_S16:
-    case VX_DF_IMAGE_U32:
-    case VX_DF_IMAGE_S32:
-        channel[0] = VX_CHANNEL_0;
-        break;
-
-    case VX_DF_IMAGE_RGB:
-    case VX_DF_IMAGE_RGBX:
-        channel[0] = VX_CHANNEL_R;
-        channel[1] = VX_CHANNEL_G;
-        channel[2] = VX_CHANNEL_B;
-        channel[3] = VX_CHANNEL_A;
-        break;
-
-    case VX_DF_IMAGE_UYVY:
-    case VX_DF_IMAGE_YUYV:
-    case VX_DF_IMAGE_NV12:
-    case VX_DF_IMAGE_NV21:
-    case VX_DF_IMAGE_YUV4:
-    case VX_DF_IMAGE_IYUV:
-        channel[0] = VX_CHANNEL_Y;
-        channel[1] = VX_CHANNEL_U;
-        channel[2] = VX_CHANNEL_V;
-        break;
-
-    default:
-        ASSERT(0);
-    }
-
-    ASSERT_NO_FAILURE(*nplanes = ct_get_num_planes(format));
-
-    for (p = 0; p < *nplanes; p++)
-    {
-        size_t plane_size = 0;
-
-        vx_uint32 subsampling_x = own_get_channel_subsampling_x(format, channel[p]);
-        vx_uint32 subsampling_y = own_get_channel_subsampling_y(format, channel[p]);
-
-        addr[p].dim_x    = width  / subsampling_x;
-        addr[p].dim_y    = height / subsampling_y;
-        addr[p].stride_x = own_get_channel_step_x(format, channel[p]);
-        addr[p].stride_y = own_get_channel_step_y(format, channel[p], width);
-
-        plane_size = addr[p].stride_y * addr[p].dim_y;
-
-        if (plane_size != 0)
-        {
-            ptrs[p] = ct_alloc_mem(plane_size);
-            /* init memory */
-            ct_memset(ptrs[p], val->reserved[p], plane_size);
-        }
-    }
-
-    return;
-}
-
-
-typedef struct
-{
-    const char*      testName;
-    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
-    const char*      fileName;
-    int              width;
-    int              height;
-    vx_df_image      format;
-    vx_bool          have_roi;
-
-} SwapImageHandle_Arg;
-
-
-#define VX_PLANE_MAX (4)
-
-#define ADD_IMAGE_FORMAT(testArgName, nextmacro, ...) \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_U8", __VA_ARGS__, VX_DF_IMAGE_U8)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_U16", __VA_ARGS__, VX_DF_IMAGE_U16)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_S16", __VA_ARGS__, VX_DF_IMAGE_S16)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_U32", __VA_ARGS__, VX_DF_IMAGE_U32)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_S32", __VA_ARGS__, VX_DF_IMAGE_S32)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_RGB", __VA_ARGS__, VX_DF_IMAGE_RGB)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_RGBX", __VA_ARGS__, VX_DF_IMAGE_RGBX)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_UYVY", __VA_ARGS__, VX_DF_IMAGE_UYVY)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_YUYV", __VA_ARGS__, VX_DF_IMAGE_YUYV)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_NV12", __VA_ARGS__, VX_DF_IMAGE_NV12)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_NV21", __VA_ARGS__, VX_DF_IMAGE_NV21)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_YUV4", __VA_ARGS__, VX_DF_IMAGE_YUV4)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_IYUV", __VA_ARGS__, VX_DF_IMAGE_IYUV))
-
-#define ADD_IMAGE_ROI(testArgName, nextmacro, ...) \
-    CT_EXPAND(nextmacro(testArgName "/ROI=true", __VA_ARGS__, vx_true_e)), \
-    CT_EXPAND(nextmacro(testArgName "/ROI=false", __VA_ARGS__, vx_false_e))
-
-#define SWAP_IMAGE_HANDLE_PARAMETERS \
-    CT_GENERATE_PARAMETERS("rand", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT, ADD_IMAGE_ROI, ARG, own_generate_rand_image, NULL)
-
-TEST_WITH_ARG(Image, testSwapImageHandle, SwapImageHandle_Arg, SWAP_IMAGE_HANDLE_PARAMETERS)
+TEST_WITH_ARG(Image, testSwapImageHandle, ImageGenerator_Arg,
+    TEST_IMAGE_RANDOM_IMAGE_WITH_ROI_PARAMETERS
+)
 {
     vx_uint32 n;
     vx_context context = context_->vx_context_;
@@ -805,10 +388,11 @@
 
         vx_rectangle_t roi1_rect =
         {
-            arg_->width / 2,
-            arg_->height / 2,
-            arg_->width,
-            arg_->height
+            /* U1 subimages must start on byte boundary */
+            (vx_uint32)(arg_->format == VX_DF_IMAGE_U1 ? ((arg_->width / 2 + 7) / 8) * 8 : arg_->width / 2),
+            (vx_uint32)arg_->height / 2,
+            (vx_uint32)arg_->width,
+            (vx_uint32)arg_->height
         };
 
         vx_rectangle_t roi2_rect;
@@ -819,7 +403,7 @@
         VX_CALL(vxQueryImage(roi1, VX_IMAGE_WIDTH, &roi1_width, sizeof(vx_uint32)));
         VX_CALL(vxQueryImage(roi1, VX_IMAGE_HEIGHT, &roi1_height, sizeof(vx_uint32)));
 
-        roi2_rect.start_x = roi1_width / 2;
+        roi2_rect.start_x = arg_->format == VX_DF_IMAGE_U1 ? ((roi1_width / 2 + 7) / 8 ) * 8 : roi1_width / 2;
         roi2_rect.start_y = roi1_height / 2;
         roi2_rect.end_x   = roi1_width;
         roi2_rect.end_y   = roi1_height;
@@ -827,7 +411,7 @@
         /* second level subimage */
         ASSERT_VX_OBJECT(roi2 = vxCreateImageFromROI(roi1, &roi2_rect), VX_TYPE_IMAGE);
 
-        /* try to get back ROI pointers */        
+        /* try to get back ROI pointers */
         ASSERT_NE_VX_STATUS(VX_SUCCESS, vxSwapImageHandle(roi2, NULL, prev_ptrs, nplanes1));
 
         /* try to replace and get back ROI pointers */
@@ -852,7 +436,7 @@
             {
                 for (j = 0; j < addr.dim_x; j += addr.step_x)
                 {
-                    unsigned char* p = vxFormatImagePatchAddress2d(plane_ptr, j, i, &addr);
+                    unsigned char* p = (unsigned char*)vxFormatImagePatchAddress2d(plane_ptr, j, i, &addr);
                     if (p[0] != val1.reserved[n])
                         CT_FAIL("ROI content mismath at [x=%d, y=%d]: expected %d, actual %d", j, i, val1, p[0]);
                 }
@@ -882,7 +466,7 @@
             {
                 for (j = 0; j < addr.dim_x; j += addr.step_x)
                 {
-                    unsigned char* p = vxFormatImagePatchAddress2d(plane_ptr, j, i, &addr);
+                    unsigned char* p = (unsigned char*)vxFormatImagePatchAddress2d(plane_ptr, j, i, &addr);
                     if (p[0] != val2.reserved[n])
                         CT_FAIL("ROI content mismath at [x=%d, y=%d]: expected %d, actual %d", j, i, val2, p[0]);
                 }
@@ -909,7 +493,7 @@
             {
                 for (j = 0; j < addr.dim_x; j += addr.step_x)
                 {
-                    unsigned char* p = vxFormatImagePatchAddress2d(plane_ptr, j, i, &addr);
+                    unsigned char* p = (unsigned char*)vxFormatImagePatchAddress2d(plane_ptr, j, i, &addr);
                     *p = val3.reserved[n];
                 }
             }
@@ -922,7 +506,7 @@
         /* check that the reclaimed host memory contains the correct data */
         for (n = 0; n < nplanes2; n++)
         {
-            vx_uint8* plane_ptr = prev_ptrs[n];
+            vx_uint8* plane_ptr = (vx_uint8*)prev_ptrs[n];
             vx_uint32 i;
             vx_uint32 j;
             vx_uint32 subsampling_x = own_plane_subsampling_x(arg_->format, n);
@@ -936,7 +520,8 @@
             {
                 for (j = 0; j < addr2[n].dim_x; j++)
                 {
-                    unsigned int k = i * addr2[n].stride_y + j * addr2[n].stride_x;
+                    unsigned int k = i * addr2[n].stride_y;
+                    k += (addr2[n].stride_x == 0) ? (j * addr2[n].stride_x_bits) / 8 : j * addr2[n].stride_x;
 
                     unsigned char p = plane_ptr[k];
 
@@ -1083,40 +668,9 @@
     ASSERT(image2 == 0);
 } /* testSwapImageHandle() */
 
-
-typedef struct
-{
-    const char*      testName;
-    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
-    const char*      fileName;
-    int              width;
-    int              height;
-    vx_df_image      format;
-
-} FormatImagePatchAddress1d_Arg;
-
-
-#define VX_PLANE_MAX (4)
-
-#define ADD_IMAGE_FORMAT(testArgName, nextmacro, ...) \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_U8", __VA_ARGS__, VX_DF_IMAGE_U8)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_U16", __VA_ARGS__, VX_DF_IMAGE_U16)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_S16", __VA_ARGS__, VX_DF_IMAGE_S16)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_U32", __VA_ARGS__, VX_DF_IMAGE_U32)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_S32", __VA_ARGS__, VX_DF_IMAGE_S32)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_RGB", __VA_ARGS__, VX_DF_IMAGE_RGB)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_RGBX", __VA_ARGS__, VX_DF_IMAGE_RGBX)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_UYVY", __VA_ARGS__, VX_DF_IMAGE_UYVY)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_YUYV", __VA_ARGS__, VX_DF_IMAGE_YUYV)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_NV12", __VA_ARGS__, VX_DF_IMAGE_NV12)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_NV21", __VA_ARGS__, VX_DF_IMAGE_NV21)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_YUV4", __VA_ARGS__, VX_DF_IMAGE_YUV4)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_IYUV", __VA_ARGS__, VX_DF_IMAGE_IYUV))
-
-#define FORMAT_IMAGE_PATCH_ADDRESS_1D_PARAMETERS \
-    CT_GENERATE_PARAMETERS("rand", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT, ARG, own_generate_rand_image, NULL)
-
-TEST_WITH_ARG(Image, testFormatImagePatchAddress1d, FormatImagePatchAddress1d_Arg, FORMAT_IMAGE_PATCH_ADDRESS_1D_PARAMETERS)
+TEST_WITH_ARG(Image, testFormatImagePatchAddress1d, ImageGenerator_Arg,
+    TEST_IMAGE_RANDOM_IMAGE_PARAMETERS
+)
 {
     vx_uint8* p1;
     vx_uint8* p2;
@@ -1175,10 +729,16 @@
         /* use linear addressing function */
         for (i = 0; i < addr1[n].dim_x*addr1[n].dim_y; i += addr1[n].step_x)
         {
-            p1 = vxFormatImagePatchAddress1d(ptrs1[n], i, &addr1[n]);
-            p2 = vxFormatImagePatchAddress1d(ptrs2[n], i, &addr2[n]);
+            p1 = (vx_uint8*)vxFormatImagePatchAddress1d(ptrs1[n], i, &addr1[n]);
+            p2 = (vx_uint8*)vxFormatImagePatchAddress1d(ptrs2[n], i, &addr2[n]);
             for (j = 0; j < addr1[n].stride_x; j++)
                 p2[j] = p1[j];
+            if (addr1[n].stride_x == 0 && addr1[n].stride_x_bits == 1)  // VX_DF_IMAGE_U1 image
+            {
+                vx_uint8 x = i % addr1[n].dim_x;
+                p2[0] = (p2[0] & ~(1 << (x % 8))) |
+                        (p1[0] &  (1 << (x % 8)));
+            }
         }
 
         VX_CALL(vxUnmapImagePatch(image1, map_id1));
@@ -1196,20 +756,40 @@
     ASSERT(image2 == 0);
 } /* testFormatImagePatchAddress1d() */
 
-TEST_WITH_ARG(Image, testvxSetImagePixelValues, format_arg,
-    ARG_ENUM(VX_DF_IMAGE_U8),
-    ARG_ENUM(VX_DF_IMAGE_U16),
-    ARG_ENUM(VX_DF_IMAGE_S16),
-    ARG_ENUM(VX_DF_IMAGE_U32),
-    ARG_ENUM(VX_DF_IMAGE_S32),
-    ARG_ENUM(VX_DF_IMAGE_RGB),
-    ARG_ENUM(VX_DF_IMAGE_RGBX),
-    ARG_ENUM(VX_DF_IMAGE_NV12),
-    ARG_ENUM(VX_DF_IMAGE_NV21),
-    ARG_ENUM(VX_DF_IMAGE_UYVY),
-    ARG_ENUM(VX_DF_IMAGE_YUYV),
-    ARG_ENUM(VX_DF_IMAGE_IYUV),
-    ARG_ENUM(VX_DF_IMAGE_YUV4),
+TEST_WITH_ARG(Image, testConvert_CT_Image, ImageFormat_Arg,
+    IMAGE_FORMAT_PARAMETERS_BASELINE,
+    ARG("_U1_/VX_DF_IMAGE_U1", VX_DF_IMAGE_U1),
+)
+{
+    vx_context context = context_->vx_context_;
+    vx_image   image   = 0,
+               image2  = 0;
+    CT_Image   ctimg   = 0,
+               ctimg2  = 0;
+
+    image = vxCreateImage(context, 16, 16, arg_->format);
+    ASSERT_VX_OBJECT(image, VX_TYPE_IMAGE);
+
+    ASSERT_NO_FAILURE(ct_fill_image_random(image, &CT()->seed_));
+
+    ASSERT_NO_FAILURE(ctimg = ct_image_from_vx_image(image));
+
+    ASSERT_NO_FAILURE(image2 = ct_image_to_vx_image(ctimg, context));
+
+    ASSERT_NO_FAILURE(ctimg2 = ct_image_from_vx_image(image2));
+
+    ASSERT_EQ_CTIMAGE(ctimg, ctimg2);
+
+    VX_CALL(vxReleaseImage(&image));
+    VX_CALL(vxReleaseImage(&image2));
+
+    ASSERT(image == 0);
+    ASSERT(image2 == 0);
+} /* testConvert_CT_Image() */
+
+TEST_WITH_ARG(Image, testvxSetImagePixelValues, ImageFormat_Arg,
+    IMAGE_FORMAT_PARAMETERS_BASELINE,
+    ARG("_U1_/VX_DF_IMAGE_U1", VX_DF_IMAGE_U1),
 )
 {
     vx_context  context = context_->vx_context_;
@@ -1228,16 +808,19 @@
     vals.reserved[1] = 0x22;
     vals.reserved[2] = 0x33;
     vals.reserved[3] = 0x44;
-	
+
     vx_status status = vxSetImagePixelValues(image, &vals);
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, status);
 
     ASSERT_NO_FAILURE(ctimg = ct_image_from_vx_image(image));
 
     ASSERT_NO_FAILURE(refimg = ct_allocate_image(640, 480, arg_->format));
-	
+
     switch (arg_->format)
     {
+        case VX_DF_IMAGE_U1:
+            ct_memset(refimg->data.y, (vals.U1 ? 0xFF : 0x00), ((640 + 7) / 8) * 480);    // Set 8 pixels at a time
+            break;
         case VX_DF_IMAGE_U8:
             ct_memset(refimg->data.y, vals.U8, 640*480);
             break;
@@ -1324,22 +907,11 @@
 
     VX_CALL(vxReleaseImage(&image));
     ASSERT(image == 0);
-}
+} /* testvxSetImagePixelValues() */
 
-TEST_WITH_ARG(Image, testUniformImage, format_arg,
-    ARG_ENUM(VX_DF_IMAGE_U8),
-    ARG_ENUM(VX_DF_IMAGE_U16),
-    ARG_ENUM(VX_DF_IMAGE_S16),
-    ARG_ENUM(VX_DF_IMAGE_U32),
-    ARG_ENUM(VX_DF_IMAGE_S32),
-    ARG_ENUM(VX_DF_IMAGE_RGB),
-    ARG_ENUM(VX_DF_IMAGE_RGBX),
-    ARG_ENUM(VX_DF_IMAGE_NV12),
-    ARG_ENUM(VX_DF_IMAGE_NV21),
-    ARG_ENUM(VX_DF_IMAGE_UYVY),
-    ARG_ENUM(VX_DF_IMAGE_YUYV),
-    ARG_ENUM(VX_DF_IMAGE_IYUV),
-    ARG_ENUM(VX_DF_IMAGE_YUV4),
+TEST_WITH_ARG(Image, testUniformImage, ImageFormat_Arg,
+    IMAGE_FORMAT_PARAMETERS_BASELINE,
+    ARG("_U1_/VX_DF_IMAGE_U1", VX_DF_IMAGE_U1),
 )
 {
     vx_context context = context_->vx_context_;
@@ -1362,6 +934,9 @@
 
     switch (arg_->format)
     {
+        case VX_DF_IMAGE_U1:
+            ct_memset(refimg->data.y, (vals.U1 ? 0xFF : 0x00), ((640 + 7) / 8) * 480);    // Set 8 pixels at a time
+            break;
         case VX_DF_IMAGE_U8:
             ct_memset(refimg->data.y, vals.U8, 640*480);
             break;
@@ -1450,58 +1025,6 @@
     ASSERT(image == 0);
 } /* testUniformImage() */
 
-static void mem_free(void**ptr)
-{
-    ct_free_mem(*ptr);
-    *ptr = 0;
-}
-
-TEST(Image, testComputeImagePatchSize)
-{
-    vx_context context = context_->vx_context_;
-    vx_image   image   = 0;
-    vx_pixel_value_t val = {{ 0xAB }};
-    vx_size memsz;
-    vx_size count_pixels = 0;
-    vx_uint32 i;
-    vx_uint32 j;
-    vx_uint8* buffer;
-    vx_uint8* buffer0;
-    vx_rectangle_t rect             = { 0, 0, 640, 480 };
-    vx_imagepatch_addressing_t addr = { 640, 480, 1, 640 };
-
-    ASSERT_VX_OBJECT(image = vxCreateUniformImage(context, 640, 480, VX_DF_IMAGE_U8, &val), VX_TYPE_IMAGE);
-
-    memsz = vxComputeImagePatchSize(image, &rect, 0);
-    ASSERT(memsz >= 640*480);
-
-    ASSERT(buffer = ct_alloc_mem(memsz));
-    CT_RegisterForGarbageCollection(buffer, mem_free, CT_GC_OBJECT);
-    buffer0 = buffer;
-
-    // copy image data to our buffer
-    VX_CALL(vxCopyImagePatch(image, &rect, 0, &addr, buffer, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
-    ASSERT_EQ_PTR(buffer0, buffer);
-
-    for (i = 0; i < 480; ++i)
-    {
-        for (j = 0; j < 640; ++j)
-        {
-            vx_uint8* ptr = buffer + i * addr.stride_y + j;
-
-            // no out-of-bound access
-            ASSERT(ptr >= buffer && (vx_size)(ptr - buffer) < memsz);
-
-            count_pixels += (*ptr == val.U8);
-        }
-    }
-
-    ASSERT_EQ_INT(640*480, count_pixels);
-
-    VX_CALL(vxReleaseImage(&image));
-    ASSERT(image == 0);
-} /* testComputeImagePatchSize() */
-
 #define IMAGE_SIZE_X 320
 #define IMAGE_SIZE_Y 200
 #define PATCH_SIZE_X 33
@@ -1512,8 +1035,8 @@
 TEST(Image, testAccessCopyWrite)
 {
     vx_context context = context_->vx_context_;
-    vx_uint8 *localPatchDense = ct_alloc_mem(PATCH_SIZE_X*PATCH_SIZE_Y*sizeof(vx_uint8));
-    vx_uint8 *localPatchSparse = ct_alloc_mem(PATCH_SIZE_X*PATCH_SIZE_Y*3*3*sizeof(vx_uint8));
+    vx_uint8 *localPatchDense  = (vx_uint8*)ct_alloc_mem(PATCH_SIZE_X*PATCH_SIZE_Y*sizeof(vx_uint8));
+    vx_uint8 *localPatchSparse = (vx_uint8*)ct_alloc_mem(PATCH_SIZE_X*PATCH_SIZE_Y*3*3*sizeof(vx_uint8));
     vx_image image;
     int x, y;
     vx_map_id map_id;
@@ -1626,8 +1149,8 @@
 TEST(Image, testAccessCopyRead)
 {
     vx_context context = context_->vx_context_;
-    vx_uint8 *localPatchDense = ct_alloc_mem(PATCH_SIZE_X*PATCH_SIZE_Y*sizeof(vx_uint8));
-    vx_uint8 *localPatchSparse = ct_alloc_mem(PATCH_SIZE_X*PATCH_SIZE_Y*3*3*sizeof(vx_uint8));
+    vx_uint8 *localPatchDense  = (vx_uint8*)ct_alloc_mem(PATCH_SIZE_X*PATCH_SIZE_Y*sizeof(vx_uint8));
+    vx_uint8 *localPatchSparse = (vx_uint8*)ct_alloc_mem(PATCH_SIZE_X*PATCH_SIZE_Y*3*3*sizeof(vx_uint8));
     vx_image image;
     int x, y;
     vx_map_id map_id;
@@ -1723,49 +1246,60 @@
 TEST(Image, testAccessCopyWriteUniformImage)
 {
     vx_context context = context_->vx_context_;
-    vx_image   image   = 0;
+    vx_image image = 0;
+    vx_image roi_image = 0;
     vx_uint32 width = 320;
     vx_uint32 height = 240;
+    vx_uint32 roi_width = 128;
+    vx_uint32 roi_height = 128;
     vx_map_id map_id;
 
     vx_pixel_value_t vals = {{0xFF}};
-    ASSERT_VX_OBJECT(image = vxCreateUniformImage(context, width, height, VX_DF_IMAGE_U8, &vals), VX_TYPE_IMAGE);
-    vx_rectangle_t rect = {0, 0, 320, 240};
-    vx_imagepatch_addressing_t addr;
+    vx_rectangle_t rect = {0, 0, width, height};
+    vx_rectangle_t roi_rect = {0, 0, roi_width, roi_height};
+    vx_imagepatch_addressing_t addr = VX_IMAGEPATCH_ADDR_INIT;
+    vx_imagepatch_addressing_t roi_addr = VX_IMAGEPATCH_ADDR_INIT;
+    roi_addr.dim_x = roi_width;
+    roi_addr.dim_y = roi_height;
+    roi_addr.stride_x = 1;
+    roi_addr.stride_y = roi_width;
+
     vx_uint8 *internal_data = NULL;
-    //can get read-access
+    vx_uint8 *external_data = (vx_uint8 *)ct_alloc_mem(roi_width * roi_height * sizeof(vx_uint8));
+
+    ASSERT_VX_OBJECT(image = vxCreateUniformImage(context, width, height, VX_DF_IMAGE_U8, &vals), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(roi_image = vxCreateImageFromROI(image, &roi_rect), VX_TYPE_IMAGE);
+
+    // Can get read-access, cannot get write-access
     vx_status status = vxMapImagePatch(image, &rect, 0, &map_id, &addr, (void **)&internal_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, VX_NOGAP_X);
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, status);
     status = vxUnmapImagePatch(image, map_id);
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, status);
-    //can not get write-access
-    vx_uint32 roi_width = 128;
-    vx_uint32 roi_height = 128;
-    vx_rectangle_t roi_rect = {0, 0, roi_width, roi_height};
-    vx_uint8 *external_data = (vx_uint8 *)ct_alloc_mem(roi_width * roi_height * sizeof(vx_uint8));
-
-    //Write is not be allowed for uniformimage
-    status = vxCopyImagePatch(image, &roi_rect, 0, &addr, (void *)external_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
-    ASSERT_NE_VX_STATUS(VX_SUCCESS, status);
-
-    //ok to read
-    status = vxCopyImagePatch(image, &roi_rect, 0, &addr, (void *)external_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
-    ASSERT_NE_VX_STATUS(VX_SUCCESS, status);
-
-    //test ROI image(from uniform image), behaviour must be equal to uniform image
-    vx_image roi_image = 0;
     internal_data = NULL;
+    status = vxMapImagePatch(image, &rect, 0, &map_id, &addr, (void **)&internal_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, VX_NOGAP_X);
+    ASSERT_NE_VX_STATUS(VX_SUCCESS, status);
 
-    ASSERT_VX_OBJECT(roi_image = vxCreateImageFromROI(image, &roi_rect), VX_TYPE_IMAGE);
+    // Reading from the image is allowed, writing to the image is not allowed
+    status = vxCopyImagePatch(image, &roi_rect, 0, &roi_addr, (void *)external_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, status);
+    status = vxCopyImagePatch(image, &roi_rect, 0, &roi_addr, (void *)external_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
+    ASSERT_NE_VX_STATUS(VX_SUCCESS, status);
+
+    // Test ROI image(from uniform image), behaviour must be equal to uniform image
+    // Can get read-access, cannot get write-access
+    internal_data = NULL;
     status = vxMapImagePatch(roi_image, &roi_rect, 0, &map_id, &addr, (void **)&internal_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, VX_NOGAP_X);
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, status);
     status = vxUnmapImagePatch(roi_image, map_id);
     ASSERT_EQ_VX_STATUS(VX_SUCCESS, status);
-
-    status = vxCopyImagePatch(roi_image, &roi_rect, 0, &addr, (void *)external_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
+    internal_data = NULL;
+    status = vxMapImagePatch(roi_image, &roi_rect, 0, &map_id, &addr, (void **)&internal_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, VX_NOGAP_X);
     ASSERT_NE_VX_STATUS(VX_SUCCESS, status);
 
-    status = vxCopyImagePatch(roi_image, &roi_rect, 0, &addr, (void *)external_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
+    // Reading from the image is allowed, writing to the image is not allowed
+    status = vxCopyImagePatch(roi_image, &roi_rect, 0, &roi_addr, (void *)external_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, status);
+    status = vxCopyImagePatch(roi_image, &roi_rect, 0, &roi_addr, (void *)external_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
     ASSERT_NE_VX_STATUS(VX_SUCCESS, status);
 
     EXPECT_EQ_VX_STATUS(VX_SUCCESS, vxReleaseImage(&image));
@@ -1823,6 +1357,419 @@
 } /* testQueryImage() */
 
 
+/* ***************************************************************************
+//  local auxiliary functions
+*/
+
+/*
+// Generate input random pixel values
+*/
+static CT_Image own_generate_rand_image(const char* fileName, int width, int height, vx_df_image format)
+{
+    CT_Image image;
+
+    if (format == VX_DF_IMAGE_U1)
+        ASSERT_NO_FAILURE_(return 0, image = ct_allocate_ct_image_random(width, height, format, &CT()->seed_, 0, 2));
+    else
+        ASSERT_NO_FAILURE_(return 0, image = ct_allocate_ct_image_random(width, height, format, &CT()->seed_, 0, 256));
+
+    return image;
+} /* own_generate_rand_image() */
+
+static vx_uint32 own_plane_subsampling_x(vx_df_image format, vx_uint32 plane)
+{
+    int subsampling_x = 0;
+
+    switch (format)
+    {
+    case VX_DF_IMAGE_IYUV:
+    case VX_DF_IMAGE_NV12:
+    case VX_DF_IMAGE_NV21:
+    case VX_DF_IMAGE_YUYV:
+    case VX_DF_IMAGE_UYVY:
+        subsampling_x = (0 == plane) ? 1 : 2;
+        break;
+
+    default:
+        subsampling_x = 1;
+        break;
+    }
+
+    return subsampling_x;
+} /* own_plane_subsampling_x() */
+
+static vx_uint32 own_plane_subsampling_y(vx_df_image format, vx_uint32 plane)
+{
+    int subsampling_y = 0;
+
+    switch (format)
+    {
+    case VX_DF_IMAGE_IYUV:
+    case VX_DF_IMAGE_NV12:
+    case VX_DF_IMAGE_NV21:
+        subsampling_y = (0 == plane) ? 1 : 2;
+        break;
+
+    default:
+        subsampling_y = 1;
+        break;
+    }
+
+    return subsampling_y;
+} /* own_plane_subsampling_y() */
+
+static vx_uint32 own_elem_size(vx_df_image format, vx_uint32 plane)
+{
+    int channel_step_x = 0;
+
+    switch (format)
+    {
+    case VX_DF_IMAGE_U1:
+        channel_step_x = 0;
+        break;
+
+    case VX_DF_IMAGE_U8:
+        channel_step_x = 1;
+        break;
+
+    case VX_DF_IMAGE_U16:
+    case VX_DF_IMAGE_S16:
+        channel_step_x = 2;
+        break;
+
+    case VX_DF_IMAGE_U32:
+    case VX_DF_IMAGE_S32:
+    case VX_DF_IMAGE_RGBX:
+        channel_step_x = 4;
+        break;
+
+    case VX_DF_IMAGE_RGB:
+        channel_step_x = 3;
+        break;
+
+    case VX_DF_IMAGE_YUYV:
+    case VX_DF_IMAGE_UYVY:
+        channel_step_x = 2;
+        break;
+
+    case VX_DF_IMAGE_IYUV:
+    case VX_DF_IMAGE_YUV4:
+        channel_step_x = 1;
+        break;
+
+    case VX_DF_IMAGE_NV12:
+    case VX_DF_IMAGE_NV21:
+        channel_step_x = (0 == plane) ? 1 : 2;
+        break;
+
+    default:
+        channel_step_x = 0;
+    }
+
+    return channel_step_x;
+} /* own_elem_size() */
+
+static uint32_t own_stride_bytes(vx_df_image format, int step)
+{
+    uint32_t factor = 0;
+
+    switch (format)
+    {
+    case VX_DF_IMAGE_U1:
+        return (step + 7) / 8;
+
+    case VX_DF_IMAGE_U8:
+    case VX_DF_IMAGE_NV21:
+    case VX_DF_IMAGE_NV12:
+    case VX_DF_IMAGE_YUV4:
+    case VX_DF_IMAGE_IYUV:
+        factor = 1;
+        break;
+
+    case VX_DF_IMAGE_U16:
+    case VX_DF_IMAGE_S16:
+    case VX_DF_IMAGE_YUYV:
+    case VX_DF_IMAGE_UYVY:
+        factor = 2;
+        break;
+
+    case VX_DF_IMAGE_U32:
+    case VX_DF_IMAGE_S32:
+    case VX_DF_IMAGE_RGBX:
+        factor = 4;
+        break;
+
+    case VX_DF_IMAGE_RGB:
+        factor = 3;
+        break;
+
+    default:
+        ASSERT_(return 0, 0);
+    }
+
+    return step*factor;
+} /* own_stride_bytes() */
+
+static int own_get_channel_step_x(vx_df_image format, vx_enum channel)
+{
+    switch (format)
+    {
+    case VX_DF_IMAGE_U1:
+        return 0;
+
+    case VX_DF_IMAGE_U8:
+        return 1;
+
+    case VX_DF_IMAGE_U16:
+    case VX_DF_IMAGE_S16:
+        return 2;
+
+    case VX_DF_IMAGE_U32:
+    case VX_DF_IMAGE_S32:
+    case VX_DF_IMAGE_RGBX:
+        return 4;
+
+    case VX_DF_IMAGE_RGB:
+        return 3;
+
+    case VX_DF_IMAGE_YUYV:
+    case VX_DF_IMAGE_UYVY:
+        if (channel == VX_CHANNEL_Y)
+            return 2;
+        return 4;
+
+    case VX_DF_IMAGE_IYUV:
+    case VX_DF_IMAGE_YUV4:
+        return 1;
+
+    case VX_DF_IMAGE_NV12:
+    case VX_DF_IMAGE_NV21:
+        if (channel == VX_CHANNEL_Y)
+            return 1;
+        return 2;
+
+    default:
+        ASSERT_(return 0, 0);
+    }
+
+    return 0;
+} /* own_get_channel_step_x() */
+
+static int own_get_channel_step_y(vx_df_image format, vx_enum channel, int step)
+{
+    switch (format)
+    {
+    case VX_DF_IMAGE_U1:
+        return (step + 7) / 8;
+
+    case VX_DF_IMAGE_U8:
+        return step;
+
+    case VX_DF_IMAGE_U16:
+    case VX_DF_IMAGE_S16:
+        return step * 2;
+
+    case VX_DF_IMAGE_U32:
+    case VX_DF_IMAGE_S32:
+    case VX_DF_IMAGE_RGBX:
+        return step * 4;
+
+    case VX_DF_IMAGE_RGB:
+        return step * 3;
+
+    case VX_DF_IMAGE_YUYV:
+    case VX_DF_IMAGE_UYVY:
+        return step * 2;
+
+    case VX_DF_IMAGE_IYUV:
+        return (channel == VX_CHANNEL_Y) ? step : step / 2;
+
+    case VX_DF_IMAGE_YUV4:
+    case VX_DF_IMAGE_NV12:
+    case VX_DF_IMAGE_NV21:
+        return step;
+
+    default:
+        ASSERT_(return 0, 0);
+    }
+
+    return 0;
+} /* own_get_channel_step_y() */
+
+static int own_get_channel_subsampling_x(vx_df_image format, vx_enum channel)
+{
+    if (channel == VX_CHANNEL_Y)
+        return 1;
+
+    switch (format)
+    {
+    case VX_DF_IMAGE_IYUV:
+    case VX_DF_IMAGE_NV12:
+    case VX_DF_IMAGE_NV21:
+    case VX_DF_IMAGE_YUYV:
+    case VX_DF_IMAGE_UYVY:
+        return 2;
+    }
+
+    return 1;
+} /* own_get_channel_subsampling_x() */
+
+int own_get_channel_subsampling_y(vx_df_image format, vx_enum channel)
+{
+    if (channel == VX_CHANNEL_Y)
+        return 1;
+
+    switch (format)
+    {
+    case VX_DF_IMAGE_IYUV:
+    case VX_DF_IMAGE_NV12:
+    case VX_DF_IMAGE_NV21:
+        return 2;
+
+    case VX_DF_IMAGE_YUYV:
+    case VX_DF_IMAGE_UYVY:
+        return 1;
+    }
+
+    return 1;
+} /* own_get_channel_subsampling_y() */
+
+static unsigned int own_image_bits_per_pixel(vx_df_image format, unsigned int p)
+{
+    switch (format)
+    {
+    case VX_DF_IMAGE_U1:
+        return 1 * 1;
+
+    case VX_DF_IMAGE_U8:
+        return 8 * 1;
+
+    case VX_DF_IMAGE_U16:
+    case VX_DF_IMAGE_S16:
+    case VX_DF_IMAGE_UYVY:
+    case VX_DF_IMAGE_YUYV:
+        return 8 * 2;
+
+    case VX_DF_IMAGE_U32:
+    case VX_DF_IMAGE_S32:
+    case VX_DF_IMAGE_RGBX:
+        return 8 * 4;
+
+    case VX_DF_IMAGE_RGB:
+    case VX_DF_IMAGE_YUV4:
+        return 8 * 3;
+
+    case VX_DF_IMAGE_IYUV:
+        return 8 * 3 / 2;
+
+    case VX_DF_IMAGE_NV12:
+    case VX_DF_IMAGE_NV21:
+        if (p == 0)
+            return 8 * 1;
+        else
+            return 8 * 2;
+
+    default:
+        CT_RecordFailure();
+        return 0;
+    };
+} /* own_image_bits_per_pixel() */
+
+static size_t own_plane_size(uint32_t width, uint32_t height, unsigned int p, vx_df_image format)
+{
+    if (format == VX_DF_IMAGE_U1)
+    {
+        /* round rows up to full bytes */
+        size_t rowSize = (size_t)(width * own_image_bits_per_pixel(format, p) + 7) / 8;
+        return (size_t)(rowSize * height);
+    }
+    else
+    {
+        return (size_t)(width * height * own_image_bits_per_pixel(format, p) / 8);
+    }
+} /* own_plane_size() */
+
+/*
+// Allocates image plane pointers from user controlled memory according to format, width, height params
+// and initialize with some value
+*/
+static void own_allocate_image_ptrs(
+    vx_df_image format, int width, int height,
+    vx_uint32* nplanes, void* ptrs[], vx_imagepatch_addressing_t addr[],
+    vx_pixel_value_t* val)
+{
+    unsigned int p;
+    int channel[VX_PLANE_MAX] = { 0, 0, 0, 0 };
+
+    switch (format)
+    {
+    case VX_DF_IMAGE_U1:
+    case VX_DF_IMAGE_U8:
+    case VX_DF_IMAGE_U16:
+    case VX_DF_IMAGE_S16:
+    case VX_DF_IMAGE_U32:
+    case VX_DF_IMAGE_S32:
+        channel[0] = VX_CHANNEL_0;
+        break;
+
+    case VX_DF_IMAGE_RGB:
+    case VX_DF_IMAGE_RGBX:
+        channel[0] = VX_CHANNEL_R;
+        channel[1] = VX_CHANNEL_G;
+        channel[2] = VX_CHANNEL_B;
+        channel[3] = VX_CHANNEL_A;
+        break;
+
+    case VX_DF_IMAGE_UYVY:
+    case VX_DF_IMAGE_YUYV:
+    case VX_DF_IMAGE_NV12:
+    case VX_DF_IMAGE_NV21:
+    case VX_DF_IMAGE_YUV4:
+    case VX_DF_IMAGE_IYUV:
+        channel[0] = VX_CHANNEL_Y;
+        channel[1] = VX_CHANNEL_U;
+        channel[2] = VX_CHANNEL_V;
+        break;
+
+    default:
+        ASSERT(0);
+    }
+
+    ASSERT_NO_FAILURE(*nplanes = ct_get_num_planes(format));
+
+    for (p = 0; p < *nplanes; p++)
+    {
+        size_t plane_size = 0;
+
+        vx_uint32 subsampling_x = own_get_channel_subsampling_x(format, channel[p]);
+        vx_uint32 subsampling_y = own_get_channel_subsampling_y(format, channel[p]);
+
+        addr[p].dim_x    = width  / subsampling_x;
+        addr[p].dim_y    = height / subsampling_y;
+        addr[p].stride_x = own_get_channel_step_x(format, channel[p]);
+        addr[p].stride_y = own_get_channel_step_y(format, channel[p], width);
+        if (format == VX_DF_IMAGE_U1)
+            addr[p].stride_x_bits = 1;
+
+        plane_size = addr[p].stride_y * addr[p].dim_y;
+
+        if (plane_size != 0)
+        {
+            ptrs[p] = ct_alloc_mem(plane_size);
+            /* init memory */
+            ct_memset(ptrs[p], val->reserved[p], plane_size);
+        }
+    }
+
+    return;
+} /* own_allocate_image_ptrs() */
+
+static void mem_free(void**ptr)
+{
+    ct_free_mem(*ptr);
+    *ptr = 0;
+} /* mem_free() */
+
 /*
 // Check image patch data in user memory against constant pixel value
 // Note:
@@ -1839,6 +1786,15 @@
         {
             switch (format)
             {
+            case VX_DF_IMAGE_U1:
+            {
+                vx_uint8 offset = x % 8;
+                vx_uint8* tst = (vx_uint8*)((vx_uint8*)ptr + y * addr->stride_y + (x * addr->stride_x_bits) / 8);
+                vx_uint8  ref = ref_val->U1 ? 1 : 0;
+                ASSERT_EQ_INT(ref, (tst[0] & (1 << offset)) >> offset );
+            }
+            break;
+
             case VX_DF_IMAGE_U8:
             {
                 vx_uint8* tst = (vx_uint8*)((vx_uint8*)ptr + y * addr->stride_y + x * addr->stride_x);
@@ -1981,6 +1937,18 @@
 {
     switch (format)
     {
+    case VX_DF_IMAGE_U1:
+    {
+        ref_addr[0].dim_x   = ref->width + ref->roi.x % 8;
+        ref_addr[0].dim_y   = ref->height;
+        ref_addr[0].stride_x = 0;
+        ref_addr[0].stride_y = (ref->stride + 7) / 8;
+        ref_addr[0].stride_x_bits = 1;
+
+        ref_ptrs[0] = ref->data.y;
+    }
+    break;
+
     case VX_DF_IMAGE_U8:
     {
         ref_addr[0].dim_x   = ref->width;
@@ -2165,12 +2133,25 @@
     vx_uint32 y;
     vx_uint32 elem_size = own_elem_size(format, plane);
 
+    uint32_t xROIOffset = (format == VX_DF_IMAGE_U1) ? ref->roi.x % 8 : 0;     // Offset needed for U1 ROI
     for (y = 0; y < tst_addr->dim_y; y++)
     {
-        for (x = 0; x < tst_addr->dim_x; x++)
+        for (x = xROIOffset; x < tst_addr->dim_x + xROIOffset; x++)
         {
             switch (format)
             {
+            case VX_DF_IMAGE_U1:
+            {
+                vx_uint8  offset  = x % 8;
+                vx_uint8* tst_ptr = (vx_uint8*)((vx_uint8*)ptr + y * tst_addr->stride_y +
+                                                (x * tst_addr->stride_x_bits) / 8);
+                vx_uint8* ref_ptr = (vx_uint8*)((vx_uint8*)ref->data.y + y * ct_stride_bytes(ref) +
+                                                (x * ct_image_bits_per_pixel(VX_DF_IMAGE_U1)) / 8);
+                ASSERT_EQ_INT((ref_ptr[0] & (1 << offset)) >> offset,
+                              (tst_ptr[0] & (1 << offset)) >> offset);
+            }
+            break;
+
             case VX_DF_IMAGE_U8:
             {
                 vx_uint8* tst_ptr = (vx_uint8*)((vx_uint8*)ptr + y * tst_addr->stride_y + x * tst_addr->stride_x);
@@ -2309,6 +2290,17 @@
         {
             switch (format)
             {
+            case VX_DF_IMAGE_U1:
+            {
+                vx_uint8  offset  = x % 8;
+                vx_uint8* ref_ptr = (vx_uint8*)((vx_uint8*)p_ct_base + y * ct_stride_bytes(ctimg) +
+                                                (x * ct_image_bits_per_pixel(VX_DF_IMAGE_U1)) / 8);
+                vx_uint8* tst_ptr = (vx_uint8*)vxFormatImagePatchAddress2d(p_vx_base, x, y, vx_addr);
+                ASSERT_EQ_INT((ref_ptr[0] & (1 << offset)) >> offset,
+                              (tst_ptr[0] & (1 << offset)) >> offset);
+            }
+            break;
+
             case VX_DF_IMAGE_U8:
             {
                 vx_uint8* ref_ptr = (vx_uint8*)((vx_uint8*)p_ct_base + y * ctimg->stride * ct_elem_size + x * ct_elem_size);
@@ -2436,6 +2428,319 @@
 
 
 /* ***************************************************************************
+//  vxCreateImageFromChannel tests
+*/
+TESTCASE(vxCreateImageFromChannel, CT_VXContext, ct_setup_vx_context, 0)
+
+typedef struct
+{
+    const char* testName;
+    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
+    const char* fileName;
+    int width;
+    int height;
+    vx_df_image format;
+    vx_enum channel;
+} CreateImageFromChannel_Arg;
+
+#define ADD_IMAGE_FORMAT_444(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_YUV4", __VA_ARGS__, VX_DF_IMAGE_YUV4))
+
+#define ADD_IMAGE_FORMAT_420(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_IYUV", __VA_ARGS__, VX_DF_IMAGE_IYUV)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_NV12", __VA_ARGS__, VX_DF_IMAGE_NV12)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_NV21", __VA_ARGS__, VX_DF_IMAGE_NV21))
+
+#define ADD_IMAGE_CHANNEL_YUV(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/VX_CHANNEL_Y", __VA_ARGS__, VX_CHANNEL_Y)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_CHANNEL_U", __VA_ARGS__, VX_CHANNEL_U)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_CHANNEL_V", __VA_ARGS__, VX_CHANNEL_V))
+
+#define ADD_IMAGE_CHANNEL_Y(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/VX_CHANNEL_Y", __VA_ARGS__, VX_CHANNEL_Y))
+
+#define CREATE_IMAGE_FROM_CHANNEL_UNIFORM_IMAGE_PARAMETERS \
+    CT_GENERATE_PARAMETERS("uniform", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_444, ADD_IMAGE_CHANNEL_YUV, ARG, NULL, NULL), \
+    CT_GENERATE_PARAMETERS("uniform", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_420, ADD_IMAGE_CHANNEL_Y,   ARG, NULL, NULL)
+
+#define CREATE_IMAGE_FROM_CHANNEL_RANDOM_IMAGE_PARAMETERS \
+    CT_GENERATE_PARAMETERS("rand", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_444, ADD_IMAGE_CHANNEL_YUV, ARG, own_generate_rand_image, NULL), \
+    CT_GENERATE_PARAMETERS("rand", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_420, ADD_IMAGE_CHANNEL_Y,   ARG, own_generate_rand_image, NULL)
+
+TEST_WITH_ARG(vxCreateImageFromChannel, testChannelFromUniformImage, CreateImageFromChannel_Arg,
+    CREATE_IMAGE_FROM_CHANNEL_UNIFORM_IMAGE_PARAMETERS
+)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src = 0;
+    vx_image ref = 0;
+    vx_image tst = 0;
+    vx_uint32 width  = arg_->width;
+    vx_uint32 height = arg_->height;
+    vx_pixel_value_t pixel_value;
+
+    pixel_value.YUV[0] = 0x55;
+    pixel_value.YUV[1] = 0xAA;
+    pixel_value.YUV[2] = 0x33;
+
+    EXPECT_VX_OBJECT(src = vxCreateUniformImage(context, arg_->width, arg_->height, arg_->format, &pixel_value), VX_TYPE_IMAGE);
+
+    if (VX_CHANNEL_Y != arg_->channel && VX_DF_IMAGE_IYUV == arg_->format)
+    {
+        width  /= 2;
+        height /= 2;
+    }
+
+    EXPECT_VX_OBJECT(ref = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    VX_CALL(vxuChannelExtract(context, src, arg_->channel, ref));
+
+    EXPECT_VX_OBJECT(tst = vxCreateImageFromChannel(src, arg_->channel), VX_TYPE_IMAGE);
+
+    {
+        CT_Image image_ref = ct_image_from_vx_image(ref);
+        CT_Image image_tst = ct_image_from_vx_image(tst);
+
+        EXPECT_EQ_CTIMAGE(image_ref, image_tst);
+    }
+
+    VX_CALL(vxReleaseImage(&ref));
+    VX_CALL(vxReleaseImage(&tst));
+    VX_CALL(vxReleaseImage(&src));
+} /* testChannelFromUniformImage() */
+
+TEST_WITH_ARG(vxCreateImageFromChannel, testChannelFromRandomImage, CreateImageFromChannel_Arg,
+    CREATE_IMAGE_FROM_CHANNEL_RANDOM_IMAGE_PARAMETERS
+)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src = 0;
+    vx_image ref = 0;
+    vx_image tst = 0;
+    vx_uint32 width  = arg_->width;
+    vx_uint32 height = arg_->height;
+    CT_Image image = NULL;
+
+    ASSERT_NO_FAILURE(image = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
+
+    EXPECT_VX_OBJECT(src = ct_image_to_vx_image(image, context), VX_TYPE_IMAGE);
+
+    if (VX_CHANNEL_Y != arg_->channel && VX_DF_IMAGE_IYUV == arg_->format)
+    {
+        width  /= 2;
+        height /= 2;
+    }
+
+    EXPECT_VX_OBJECT(ref = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    VX_CALL(vxuChannelExtract(context, src, arg_->channel, ref));
+
+    EXPECT_VX_OBJECT(tst = vxCreateImageFromChannel(src, arg_->channel), VX_TYPE_IMAGE);
+
+    {
+        /* 1. check if image created from channel is equal to channel extracted from image */
+        CT_Image image_ref = ct_image_from_vx_image(ref);
+        CT_Image image_tst = ct_image_from_vx_image(tst);
+
+        EXPECT_EQ_CTIMAGE(image_ref, image_tst);
+    }
+
+    {
+        /* 2. check if modification of image created from channel reflected into channel of original image */
+        vx_uint32 i;
+        vx_uint32 j;
+        vx_uint32 p = (VX_CHANNEL_Y == arg_->channel ? 0 : (VX_CHANNEL_U == arg_->channel ? 1 : 2));
+        vx_rectangle_t rect = { 1, 1, 6, 6 };
+        vx_imagepatch_addressing_t addr =
+        {
+            rect.end_x - rect.start_x,
+            rect.end_y - rect.start_y,
+            1,
+            rect.end_x - rect.start_x
+        };
+
+        vx_size sz = 0;
+        void* ptr = 0;
+        vx_map_id tst_map_id;
+        vx_imagepatch_addressing_t map_addr;
+        void *tst_base = NULL;
+        vx_size numPixels;
+
+        VX_CALL(vxMapImagePatch(tst, &rect, 0, &tst_map_id, &map_addr, &tst_base, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, 0));
+        numPixels = ((rect.end_x-rect.start_x) * VX_SCALE_UNITY/map_addr.scale_x) *
+                     ((rect.end_y-rect.start_y) * VX_SCALE_UNITY/map_addr.scale_y);
+        sz = numPixels * map_addr.stride_x;
+        VX_CALL(vxUnmapImagePatch(tst, tst_map_id));
+
+        ptr = ct_alloc_mem(sz);
+
+        /* fill image patch with some values */
+        for (i = 0; i < addr.dim_y; i++)
+        {
+            vx_uint8* p = (vx_uint8*)ptr + i * addr.stride_x;
+            for (j = 0; j < addr.dim_x; j++)
+            {
+                p[j] = (vx_uint8)(i + j);
+            }
+        }
+
+        /* copy patch to channel image */
+        vxCopyImagePatch(tst, &rect, 0, &addr, ptr, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
+
+        /* clean patch memory */
+        ct_memset(ptr, 0, sz);
+
+        /* get channel patch from original image */
+        vxCopyImagePatch(src, &rect, p, &addr, ptr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
+
+        /* check channel changes has been reflected into original image */
+        for (i = 0; i < addr.dim_y; i++)
+        {
+            vx_uint8* p = (vx_uint8*)ptr + i * addr.stride_x;
+            for (j = 0; j < addr.dim_x; j++)
+            {
+                EXPECT_EQ_INT((vx_uint8)(i + j), p[j]);
+            }
+        }
+
+        ct_free_mem(ptr);
+    }
+
+    {
+        /* 3. check if modification of channel in original image reflected into image created from channel */
+        vx_uint32 i;
+        vx_uint32 j;
+        vx_uint32 p = (VX_CHANNEL_Y == arg_->channel ? 0 : (VX_CHANNEL_U == arg_->channel ? 1 : 2));
+        vx_rectangle_t rect = { 1, 1, 6, 6 };
+        vx_imagepatch_addressing_t addr =
+        {
+            rect.end_x - rect.start_x,
+            rect.end_y - rect.start_y,
+            1,
+            rect.end_x - rect.start_x
+        };
+
+        vx_size sz = 0;
+        void* ptr = 0;
+        vx_map_id src_map_id;
+        vx_imagepatch_addressing_t map_addr;
+        void *src_base = NULL;
+        vx_size numPixels;
+
+        VX_CALL(vxMapImagePatch(src, &rect, p, &src_map_id, &map_addr, &src_base, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, 0));
+        numPixels = ((rect.end_x-rect.start_x) * VX_SCALE_UNITY/map_addr.scale_x) *
+                     ((rect.end_y-rect.start_y) * VX_SCALE_UNITY/map_addr.scale_y);
+        sz = numPixels * map_addr.stride_x;
+        VX_CALL(vxUnmapImagePatch(src, src_map_id));
+
+        ptr = ct_alloc_mem(sz);
+
+        /* fill image patch with some values */
+        for (i = 0; i < addr.dim_y; i++)
+        {
+            vx_uint8* p = (vx_uint8*)ptr + i * addr.stride_x;
+            for (j = 0; j < addr.dim_x; j++)
+            {
+                p[j] = (vx_uint8)(i + j);
+            }
+        }
+
+        /* copy patch to channel of original image */
+        vxCopyImagePatch(src, &rect, p, &addr, ptr, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
+
+        /* clean patch memory */
+        ct_memset(ptr, 0, sz);
+
+        /* get patch from image created from channel */
+        vxCopyImagePatch(tst, &rect, 0, &addr, ptr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
+
+        /* check changes of channel in original image has been reflected into channel image */
+        for (i = 0; i < addr.dim_y; i++)
+        {
+            vx_uint8* p = (vx_uint8*)ptr + i * addr.stride_x;
+            for (j = 0; j < addr.dim_x; j++)
+            {
+                EXPECT_EQ_INT((vx_uint8)(i + j), p[j]);
+            }
+        }
+
+        ct_free_mem(ptr);
+    }
+
+    VX_CALL(vxReleaseImage(&ref));
+    VX_CALL(vxReleaseImage(&tst));
+    VX_CALL(vxReleaseImage(&src));
+} /* testChannelFromRandomImage() */
+
+TEST_WITH_ARG(vxCreateImageFromChannel, testChannelFromHandle, CreateImageFromChannel_Arg,
+    CREATE_IMAGE_FROM_CHANNEL_RANDOM_IMAGE_PARAMETERS
+)
+{
+    vx_context context = context_->vx_context_;
+    vx_image src = 0;
+    vx_image ref = 0;
+    vx_image tst = 0;
+
+    vx_uint32 width  = arg_->width;
+    vx_uint32 height = arg_->height;
+
+    CT_Image image = NULL;
+
+    ASSERT_NO_FAILURE(image = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
+
+    {
+        vx_uint32 n;
+        vx_uint32 nplanes;
+
+        vx_enum channel[VX_PLANE_MAX] = { VX_CHANNEL_Y, VX_CHANNEL_U, VX_CHANNEL_V, 0 };
+
+        vx_imagepatch_addressing_t addr[VX_PLANE_MAX] =
+        {
+            VX_IMAGEPATCH_ADDR_INIT,
+            VX_IMAGEPATCH_ADDR_INIT,
+            VX_IMAGEPATCH_ADDR_INIT,
+            VX_IMAGEPATCH_ADDR_INIT
+        };
+        void* ptrs[VX_PLANE_MAX] = { 0, 0, 0, 0 };
+
+        ASSERT_NO_FAILURE(nplanes = ct_get_num_planes(arg_->format));
+
+        for (n = 0; n < nplanes; n++)
+        {
+            addr[n].dim_x    = image->width  / ct_image_get_channel_subsampling_x(image, channel[n]);
+            addr[n].dim_y    = image->height / ct_image_get_channel_subsampling_y(image, channel[n]);
+            addr[n].stride_x = ct_image_get_channel_step_x(image, channel[n]);
+            addr[n].stride_y = ct_image_get_channel_step_y(image, channel[n]);
+
+            ptrs[n] = ct_image_get_plane_base(image, n);
+        }
+
+        EXPECT_VX_OBJECT(src = vxCreateImageFromHandle(context, arg_->format, addr, ptrs, VX_MEMORY_TYPE_HOST), VX_TYPE_IMAGE);
+    }
+
+    if (VX_CHANNEL_Y != arg_->channel && VX_DF_IMAGE_IYUV == arg_->format)
+    {
+        width  /= 2;
+        height /= 2;
+    }
+
+    EXPECT_VX_OBJECT(ref = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
+    VX_CALL(vxuChannelExtract(context, src, arg_->channel, ref));
+
+    EXPECT_VX_OBJECT(tst = vxCreateImageFromChannel(src, arg_->channel), VX_TYPE_IMAGE);
+
+    {
+        CT_Image image_ref = ct_image_from_vx_image(ref);
+        CT_Image image_tst = ct_image_from_vx_image(tst);
+
+        EXPECT_EQ_CTIMAGE(image_ref, image_tst);
+    }
+
+    VX_CALL(vxReleaseImage(&ref));
+    VX_CALL(vxReleaseImage(&tst));
+    VX_CALL(vxReleaseImage(&src));
+} /* testChannelFromHandle() */
+
+
+/* ***************************************************************************
 //  vxCopyImagePatch tests
 */
 TESTCASE(vxCopyImagePatch, CT_VXContext, ct_setup_vx_context, 0)
@@ -2443,23 +2748,29 @@
 typedef struct
 {
     const char* testName;
+    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
     const char* fileName;
     int width;
     int height;
     vx_df_image format;
+} CopyImagePatch_Arg;
 
-} ReadUniformImage_Arg;
+#define COPY_IMAGE_PATCH_UNIFORM_IMAGE_PARAMETERS \
+    CT_GENERATE_PARAMETERS("uniform",      ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMATS,   ARG, NULL, NULL), \
+    CT_GENERATE_PARAMETERS("_U1_/uniform", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_U1, ARG, NULL, NULL)
 
-
-#define PARAMETERS \
-    CT_GENERATE_PARAMETERS("uniform", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT, ARG, NULL)
+#define COPY_IMAGE_PATCH_RANDOM_IMAGE_PARAMETERS \
+    CT_GENERATE_PARAMETERS("random",      ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMATS,   ARG, own_generate_rand_image, NULL), \
+    CT_GENERATE_PARAMETERS("_U1_/random", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_U1, ARG, own_generate_rand_image, NULL)
 
 /*
 // test vxCopyImagePatch in READ_ONLY mode from uniform image,
 // independed from vxCopyImagePatch in write mode
 // or vxAccessImagePatch/vxCommitImagePatch functions
 */
-TEST_WITH_ARG(vxCopyImagePatch, testReadUniformImage, ReadUniformImage_Arg, PARAMETERS)
+TEST_WITH_ARG(vxCopyImagePatch, testReadUniformImage, CopyImagePatch_Arg,
+    COPY_IMAGE_PATCH_UNIFORM_IMAGE_PARAMETERS
+)
 {
     vx_context context = context_->vx_context_;
 
@@ -2484,8 +2795,24 @@
         vx_imagepatch_addressing_t addr = VX_IMAGEPATCH_ADDR_INIT;
         void*   ptr = 0;
         vx_size sz  = 0;
+        vx_map_id image_map_id;
+        vx_imagepatch_addressing_t map_addr;
+        void *image_base = NULL;
+        vx_size numPixels;
 
-        sz = vxComputeImagePatchSize(image, &rect, (vx_uint32)plane);
+        VX_CALL(vxMapImagePatch(image, &rect, (vx_uint32)plane, &image_map_id, &map_addr, &image_base, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, 0));
+        numPixels = ((rect.end_x-rect.start_x) * VX_SCALE_UNITY/map_addr.scale_x) *
+                     ((rect.end_y-rect.start_y) * VX_SCALE_UNITY/map_addr.scale_y);
+        if (map_addr.stride_x == 0 && map_addr.stride_x_bits != 0)
+        {
+            sz = numPixels * (map_addr.stride_x_bits *
+                ((rect.end_x-rect.start_x) * VX_SCALE_UNITY/map_addr.scale_x) / 8);
+        }
+        else
+        {
+            sz = numPixels * map_addr.stride_x;
+        }
+        VX_CALL(vxUnmapImagePatch(image, image_map_id));
 
         ptr = ct_alloc_mem(sz);
         ASSERT(NULL != ptr);
@@ -2493,7 +2820,12 @@
         addr.dim_x    = arg_->width  / own_plane_subsampling_x(arg_->format, plane);
         addr.dim_y    = arg_->height / own_plane_subsampling_y(arg_->format, plane);
         addr.stride_x = own_elem_size(arg_->format, plane);
-        addr.stride_y = addr.dim_x * addr.stride_x;
+        if (arg_->format == VX_DF_IMAGE_U1) {
+            addr.stride_x_bits = 1;
+            addr.stride_y = (addr.dim_x * addr.stride_x_bits + 7) / 8;
+        }
+        else
+            addr.stride_y = addr.dim_x * addr.stride_x;
 
         /* read image patch */
         VX_CALL(vxCopyImagePatch(image, &rect, plane, &addr, ptr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
@@ -2510,24 +2842,9 @@
     return;
 } /* testReadUniformImage() */
 
-typedef struct
-{
-    const char* testName;
-    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
-    const char* fileName;
-    int width;
-    int height;
-    vx_df_image format;
-
-} ReadRandomImage_Arg;
-
-#ifdef PARAMETERS
-#undef PARAMETERS
-#endif
-#define PARAMETERS \
-    CT_GENERATE_PARAMETERS("random", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT, ARG, own_generate_rand_image, NULL)
-
-TEST_WITH_ARG(vxCopyImagePatch, testReadRandomImage, ReadRandomImage_Arg, PARAMETERS)
+TEST_WITH_ARG(vxCopyImagePatch, testReadRandomImage, CopyImagePatch_Arg,
+    COPY_IMAGE_PATCH_RANDOM_IMAGE_PARAMETERS
+)
 {
     vx_context context = context_->vx_context_;
 
@@ -2560,7 +2877,24 @@
         vx_size sz = 0;
         vx_uint32 elem_size = own_elem_size(arg_->format, plane);
 
-        sz = vxComputeImagePatchSize(image, &rect, plane);
+        vx_map_id image_map_id;
+        vx_imagepatch_addressing_t map_addr;
+        void *image_base = NULL;
+        vx_size numPixels;
+
+        VX_CALL(vxMapImagePatch(image, &rect, (vx_uint32)plane, &image_map_id, &map_addr, &image_base, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, 0));
+        numPixels = ((rect.end_x-rect.start_x) * VX_SCALE_UNITY/map_addr.scale_x) *
+                     ((rect.end_y-rect.start_y) * VX_SCALE_UNITY/map_addr.scale_y);
+        if (map_addr.stride_x == 0 && map_addr.stride_x_bits != 0)
+        {
+            sz = numPixels * (map_addr.stride_x_bits *
+                ((rect.end_x-rect.start_x) * VX_SCALE_UNITY/map_addr.scale_x) / 8);
+        }
+        else
+        {
+            sz = numPixels * map_addr.stride_x;
+        }
+        VX_CALL(vxUnmapImagePatch(image, image_map_id));
 
         ptr = ct_alloc_mem(sz);
         ASSERT(NULL != ptr);
@@ -2568,7 +2902,12 @@
         tst_addr.dim_x    = arg_->width  / own_plane_subsampling_x(arg_->format, plane);
         tst_addr.dim_y    = arg_->height / own_plane_subsampling_y(arg_->format, plane);
         tst_addr.stride_x = elem_size;
-        tst_addr.stride_y = tst_addr.dim_x * tst_addr.stride_x;
+        if (arg_->format == VX_DF_IMAGE_U1) {
+            tst_addr.stride_x_bits = 1;
+            tst_addr.stride_y = (tst_addr.dim_x * tst_addr.stride_x_bits + 7) / 8;
+        }
+        else
+            tst_addr.stride_y = tst_addr.dim_x * tst_addr.stride_x;
 
         VX_CALL(vxCopyImagePatch(image, &rect, plane, &tst_addr, ptr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
 
@@ -2584,25 +2923,9 @@
     return;
 } /* testReadRandomImage() */
 
-
-typedef struct
-{
-    const char* testName;
-    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
-    const char* fileName;
-    int width;
-    int height;
-    vx_df_image format;
-
-} WriteRandomImage_Arg;
-
-#ifdef PARAMETERS
-#undef PARAMETERS
-#endif
-#define PARAMETERS \
-    CT_GENERATE_PARAMETERS("random", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT, ARG, own_generate_rand_image, NULL)
-
-TEST_WITH_ARG(vxCopyImagePatch, testWriteRandomImage, WriteRandomImage_Arg, PARAMETERS)
+TEST_WITH_ARG(vxCopyImagePatch, testWriteRandomImage, CopyImagePatch_Arg,
+    COPY_IMAGE_PATCH_RANDOM_IMAGE_PARAMETERS
+)
 {
     vx_context context = context_->vx_context_;
 
@@ -2643,7 +2966,24 @@
         vx_size sz = 0;
         vx_uint32 elem_size = own_elem_size(arg_->format, plane);
 
-        sz = vxComputeImagePatchSize(image, &rect, plane);
+        vx_map_id image_map_id;
+        vx_imagepatch_addressing_t map_addr;
+        void *image_base = NULL;
+        vx_size numPixels;
+
+        VX_CALL(vxMapImagePatch(image, &rect, (vx_uint32)plane, &image_map_id, &map_addr, &image_base, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, 0));
+        numPixels = ((rect.end_x-rect.start_x) * VX_SCALE_UNITY/map_addr.scale_x) *
+                     ((rect.end_y-rect.start_y) * VX_SCALE_UNITY/map_addr.scale_y);
+        if (map_addr.stride_x == 0 && map_addr.stride_x_bits != 0)
+        {
+            sz = numPixels * (map_addr.stride_x_bits *
+                ((rect.end_x-rect.start_x) * VX_SCALE_UNITY/map_addr.scale_x) / 8);
+        }
+        else
+        {
+            sz = numPixels * map_addr.stride_x;
+        }
+        VX_CALL(vxUnmapImagePatch(image, image_map_id));
 
         ptr = ct_alloc_mem(sz);
         ASSERT(NULL != ptr);
@@ -2651,7 +2991,12 @@
         tst_addr.dim_x    = arg_->width  / own_plane_subsampling_x(arg_->format, plane);
         tst_addr.dim_y    = arg_->height / own_plane_subsampling_y(arg_->format, plane);
         tst_addr.stride_x = elem_size;
-        tst_addr.stride_y = tst_addr.dim_x * tst_addr.stride_x;
+        if (arg_->format == VX_DF_IMAGE_U1) {
+            tst_addr.stride_x_bits = 1;
+            tst_addr.stride_y = (tst_addr.dim_x * tst_addr.stride_x_bits + 7) / 8;
+        }
+        else
+            tst_addr.stride_y = tst_addr.dim_x * tst_addr.stride_x;
 
         VX_CALL(vxCopyImagePatch(image, &rect, plane, &tst_addr, ptr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
 
@@ -2676,24 +3021,29 @@
 typedef struct
 {
     const char* testName;
+    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
     const char* fileName;
     int width;
     int height;
     vx_df_image format;
+} MapImagePatch_Arg;
 
-} MapReadUniformImage_Arg;
+#define MAP_IMAGE_PATCH_UNIFORM_IMAGE_PARAMETERS \
+    CT_GENERATE_PARAMETERS("uniform",      ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMATS,   ARG, NULL, NULL), \
+    CT_GENERATE_PARAMETERS("_U1_/uniform", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_U1, ARG, NULL, NULL)
 
-
-#undef PARAMETERS
-#define PARAMETERS \
-    CT_GENERATE_PARAMETERS("uniform", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT, ARG, NULL)
+#define MAP_IMAGE_PATCH_RANDOM_IMAGE_PARAMETERS \
+    CT_GENERATE_PARAMETERS("random",      ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMATS,   ARG, own_generate_rand_image, NULL), \
+    CT_GENERATE_PARAMETERS("_U1_/random", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_U1, ARG, own_generate_rand_image, NULL)
 
 /*
 // test vxMapImagePatch in READ_ONLY mode from uniform image,
 // independed from vxMapImagePatch/vxCopyImagePatch in write mode
 // or vxAccessImagePatch/vxCommitImagePatch functions
 */
-TEST_WITH_ARG(vxMapImagePatch, testMapReadUniformImage, MapReadUniformImage_Arg, PARAMETERS)
+TEST_WITH_ARG(vxMapImagePatch, testMapReadUniformImage, MapImagePatch_Arg,
+    MAP_IMAGE_PATCH_UNIFORM_IMAGE_PARAMETERS
+)
 {
     vx_context context = context_->vx_context_;
 
@@ -2729,6 +3079,13 @@
             {
                 switch (arg_->format)
                 {
+                case VX_DF_IMAGE_U1:
+                {
+                    vx_uint8* tst = (vx_uint8*)vxFormatImagePatchAddress2d(ptr, x, y, &addr);
+                    ASSERT_EQ_INT(ref_val.U1 ? 1 : 0, (tst[0] & (1 << (x % 8))) >> (x % 8));
+                }
+                break;
+
                 case VX_DF_IMAGE_U8:
                 {
                     vx_uint8* tst = (vx_uint8*)vxFormatImagePatchAddress2d(ptr, x, y, &addr);
@@ -2865,24 +3222,9 @@
     return;
 } /* testMapReadUniformImage() */
 
-typedef struct
-{
-    const char* testName;
-    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
-    const char* fileName;
-    int width;
-    int height;
-    vx_df_image format;
-
-} MapReadRandomImage_Arg;
-
-#ifdef PARAMETERS
-#undef PARAMETERS
-#endif
-#define PARAMETERS \
-    CT_GENERATE_PARAMETERS("random", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT, ARG, own_generate_rand_image, NULL)
-
-TEST_WITH_ARG(vxMapImagePatch, testMapReadRandomImage, MapReadRandomImage_Arg, PARAMETERS)
+TEST_WITH_ARG(vxMapImagePatch, testMapReadRandomImage, MapImagePatch_Arg,
+    MAP_IMAGE_PATCH_RANDOM_IMAGE_PARAMETERS
+)
 {
     vx_context context = context_->vx_context_;
 
@@ -2929,24 +3271,9 @@
     return;
 } /* testMapReadRandomImage() */
 
-typedef struct
-{
-    const char* testName;
-    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
-    const char* fileName;
-    int width;
-    int height;
-    vx_df_image format;
-
-} MapReadWriteRandomImage_Arg;
-
-#ifdef PARAMETERS
-#undef PARAMETERS
-#endif
-#define PARAMETERS \
-    CT_GENERATE_PARAMETERS("random", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT, ARG, own_generate_rand_image, NULL)
-
-TEST_WITH_ARG(vxMapImagePatch, testMapReadWriteRandomImage, MapReadWriteRandomImage_Arg, PARAMETERS)
+TEST_WITH_ARG(vxMapImagePatch, testMapReadWriteRandomImage, MapImagePatch_Arg,
+    MAP_IMAGE_PATCH_RANDOM_IMAGE_PARAMETERS
+)
 {
     vx_context context = context_->vx_context_;
 
@@ -3000,6 +3327,15 @@
             {
                 switch (arg_->format)
                 {
+                case VX_DF_IMAGE_U1:
+                {
+                    vx_uint8 offset = x % 8;
+                    vx_uint8* tst_ptr = (vx_uint8*)vxFormatImagePatchAddress2d(ptr, x, y, &tst_addr);
+                    tst_ptr[0] = ( tst_ptr[0] & ~(1 << offset)) |
+                                 (~tst_ptr[0] &  (1 << offset));
+                }
+                break;
+
                 case VX_DF_IMAGE_U8:
                 {
                     vx_uint8* tst_ptr = (vx_uint8*)vxFormatImagePatchAddress2d(ptr, x, y, &tst_addr);
@@ -3130,6 +3466,17 @@
             {
                 switch (arg_->format)
                 {
+                case VX_DF_IMAGE_U1:
+                {
+                    vx_uint8  offset  = x % 8;
+                    vx_uint8* ref_ptr = (vx_uint8*)((vx_uint8*)ref->data.y + y * ct_stride_bytes(ref) +
+                                                    (x * ct_image_bits_per_pixel(VX_DF_IMAGE_U1)) / 8);
+                    vx_uint8* tst_ptr = (vx_uint8*)vxFormatImagePatchAddress2d(ptr, x, y, &tst_addr);
+                    ASSERT_EQ_INT(( ref_ptr[0] & (1 << offset)) >> offset,
+                                  (~tst_ptr[0] & (1 << offset)) >> offset);
+                }
+                break;
+
                 case VX_DF_IMAGE_U8:
                 {
                     vx_uint8* ref_ptr = (vx_uint8*)((vx_uint8*)ref->data.y + y * ref->stride * elem_size + x * elem_size);
@@ -3261,24 +3608,9 @@
     return;
 } /* testMapReadWriteRandomImage() */
 
-typedef struct
-{
-    const char* testName;
-    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
-    const char* fileName;
-    int width;
-    int height;
-    vx_df_image format;
-
-} MapWriteRandomImage_Arg;
-
-#ifdef PARAMETERS
-#undef PARAMETERS
-#endif
-#define PARAMETERS \
-    CT_GENERATE_PARAMETERS("random", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT, ARG, own_generate_rand_image, NULL)
-
-TEST_WITH_ARG(vxMapImagePatch, testMapWriteRandomImage, MapWriteRandomImage_Arg, PARAMETERS)
+TEST_WITH_ARG(vxMapImagePatch, testMapWriteRandomImage, MapImagePatch_Arg,
+    MAP_IMAGE_PATCH_RANDOM_IMAGE_PARAMETERS
+)
 {
     vx_context context = context_->vx_context_;
 
@@ -3327,6 +3659,17 @@
             {
                 switch (arg_->format)
                 {
+                case VX_DF_IMAGE_U1:
+                {
+                    vx_uint8  offset  = x % 8;
+                    vx_uint8* ref_ptr = (vx_uint8*)((vx_uint8*)ref->data.y + y * ct_stride_bytes(ref) +
+                                                    (x * ct_image_bits_per_pixel(VX_DF_IMAGE_U1)) / 8);
+                    vx_uint8* tst_ptr = (vx_uint8*)vxFormatImagePatchAddress2d(ptr, x, y, &tst_addr);
+                    tst_ptr[0] = (( tst_ptr[0] & ~(1 << offset)) |
+                                  (~ref_ptr[0] &  (1 << offset)));
+                }
+                break;
+
                 case VX_DF_IMAGE_U8:
                 {
                     vx_uint8* ref_ptr = (vx_uint8*)((vx_uint8*)ref->data.y + y * ref->stride * elem_size + x * elem_size);
@@ -3471,6 +3814,17 @@
             {
                 switch (arg_->format)
                 {
+                case VX_DF_IMAGE_U1:
+                {
+                    vx_uint8  offset  = x % 8;
+                    vx_uint8* ref_ptr = (vx_uint8*)((vx_uint8*)ref->data.y + y * ct_stride_bytes(ref) +
+                                                    (x * ct_image_bits_per_pixel(VX_DF_IMAGE_U1)) / 8);
+                    vx_uint8* tst_ptr = (vx_uint8*)vxFormatImagePatchAddress2d(ptr, x, y, &tst_addr);
+                    ASSERT_EQ_INT(( ref_ptr[0] & (1 << offset)) >> offset,
+                                  (~tst_ptr[0] & (1 << offset)) >> offset);
+                }
+                break;
+
                 case VX_DF_IMAGE_U8:
                 {
                     vx_uint8* ref_ptr = (vx_uint8*)((vx_uint8*)ref->data.y + y * ref->stride * elem_size + x * elem_size);
@@ -3602,310 +3956,9 @@
     return;
 } /* testMapWriteRandomImage() */
 
-
-/* ***************************************************************************
-//  vxCreateImageFromChannel tests
-*/
-TESTCASE(vxCreateImageFromChannel, CT_VXContext, ct_setup_vx_context, 0)
-
-typedef struct
-{
-    const char*      testName;
-    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
-    const char*      fileName;
-    int              width;
-    int              height;
-    vx_df_image      format;
-    vx_enum          channel;
-
-} CreateImageFromChannel_Arg;
-
-
-#define ADD_IMAGE_FORMAT_444(testArgName, nextmacro, ...) \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_YUV4", __VA_ARGS__, VX_DF_IMAGE_YUV4))
-
-#define ADD_IMAGE_FORMAT_420(testArgName, nextmacro, ...) \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_IYUV", __VA_ARGS__, VX_DF_IMAGE_IYUV)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_NV12", __VA_ARGS__, VX_DF_IMAGE_NV12)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_NV21", __VA_ARGS__, VX_DF_IMAGE_NV21))
-
-#define ADD_IMAGE_CHANNEL_YUV(testArgName, nextmacro, ...) \
-    CT_EXPAND(nextmacro(testArgName "/VX_CHANNEL_Y", __VA_ARGS__, VX_CHANNEL_Y)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_CHANNEL_U", __VA_ARGS__, VX_CHANNEL_U)), \
-    CT_EXPAND(nextmacro(testArgName "/VX_CHANNEL_V", __VA_ARGS__, VX_CHANNEL_V))
-
-#define ADD_IMAGE_CHANNEL_Y(testArgName, nextmacro, ...) \
-    CT_EXPAND(nextmacro(testArgName "/VX_CHANNEL_Y", __VA_ARGS__, VX_CHANNEL_Y))
-
-#undef PARAMETERS
-#define PARAMETERS \
-    CT_GENERATE_PARAMETERS("uniform", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_444, ADD_IMAGE_CHANNEL_YUV, ARG, NULL, NULL), \
-    CT_GENERATE_PARAMETERS("uniform", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_420, ADD_IMAGE_CHANNEL_Y, ARG, NULL, NULL)
-
-
-TEST_WITH_ARG(vxCreateImageFromChannel, testChannelFromUniformImage, CreateImageFromChannel_Arg, PARAMETERS)
-{
-    vx_context context = context_->vx_context_;
-    vx_image src = 0;
-    vx_image ref = 0;
-    vx_image tst = 0;
-    vx_uint32 width  = arg_->width;
-    vx_uint32 height = arg_->height;
-    vx_pixel_value_t pixel_value;
-
-    pixel_value.YUV[0] = 0x55;
-    pixel_value.YUV[1] = 0xAA;
-    pixel_value.YUV[2] = 0x33;
-
-    EXPECT_VX_OBJECT(src = vxCreateUniformImage(context, arg_->width, arg_->height, arg_->format, &pixel_value), VX_TYPE_IMAGE);
-
-    if (VX_CHANNEL_Y != arg_->channel && VX_DF_IMAGE_IYUV == arg_->format)
-    {
-        width  /= 2;
-        height /= 2;
-    }
-
-    EXPECT_VX_OBJECT(ref = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
-    VX_CALL(vxuChannelExtract(context, src, arg_->channel, ref));
-
-    EXPECT_VX_OBJECT(tst = vxCreateImageFromChannel(src, arg_->channel), VX_TYPE_IMAGE);
-
-    {
-        CT_Image image_ref = ct_image_from_vx_image(ref);
-        CT_Image image_tst = ct_image_from_vx_image(tst);
-
-        EXPECT_EQ_CTIMAGE(image_ref, image_tst);
-    }
-
-    VX_CALL(vxReleaseImage(&ref));
-    VX_CALL(vxReleaseImage(&tst));
-    VX_CALL(vxReleaseImage(&src));
-}
-
-#undef PARAMETERS
-#define PARAMETERS \
-    CT_GENERATE_PARAMETERS("rand", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_444, ADD_IMAGE_CHANNEL_YUV, ARG, own_generate_rand_image, NULL), \
-    CT_GENERATE_PARAMETERS("rand", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_420, ADD_IMAGE_CHANNEL_Y, ARG, own_generate_rand_image, NULL)
-
-TEST_WITH_ARG(vxCreateImageFromChannel, testChannelFromRandomImage, CreateImageFromChannel_Arg, PARAMETERS)
-{
-    vx_context context = context_->vx_context_;
-    vx_image src = 0;
-    vx_image ref = 0;
-    vx_image tst = 0;
-    vx_uint32 width  = arg_->width;
-    vx_uint32 height = arg_->height;
-    CT_Image image = NULL;
-
-    ASSERT_NO_FAILURE(image = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
-
-    EXPECT_VX_OBJECT(src = ct_image_to_vx_image(image, context), VX_TYPE_IMAGE);
-
-    if (VX_CHANNEL_Y != arg_->channel && VX_DF_IMAGE_IYUV == arg_->format)
-    {
-        width  /= 2;
-        height /= 2;
-    }
-
-    EXPECT_VX_OBJECT(ref = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
-    VX_CALL(vxuChannelExtract(context, src, arg_->channel, ref));
-
-    EXPECT_VX_OBJECT(tst = vxCreateImageFromChannel(src, arg_->channel), VX_TYPE_IMAGE);
-
-    {
-        /* 1. check if image created from channel is equal to channel extracted from image */
-        CT_Image image_ref = ct_image_from_vx_image(ref);
-        CT_Image image_tst = ct_image_from_vx_image(tst);
-
-        EXPECT_EQ_CTIMAGE(image_ref, image_tst);
-    }
-
-    {
-        /* 2. check if modification of image created from channel reflected into channel of original image */
-        vx_uint32 i;
-        vx_uint32 j;
-        vx_uint32 p = (VX_CHANNEL_Y == arg_->channel ? 0 : (VX_CHANNEL_U == arg_->channel ? 1 : 2));
-        vx_rectangle_t rect = { 1, 1, 6, 6 };
-        vx_imagepatch_addressing_t addr =
-        {
-            rect.end_x - rect.start_x,
-            rect.end_y - rect.start_y,
-            1,
-            rect.end_x - rect.start_x
-        };
-
-        vx_size sz = 0;
-        void* ptr = 0;
-
-        sz = vxComputeImagePatchSize(tst, &rect, 0);
-
-        ptr = ct_alloc_mem(sz);
-
-        /* fill image patch with some values */
-        for (i = 0; i < addr.dim_y; i++)
-        {
-            vx_uint8* p = (vx_uint8*)ptr + i * addr.stride_x;
-            for (j = 0; j < addr.dim_x; j++)
-            {
-                p[j] = (vx_uint8)(i + j);
-            }
-        }
-
-        /* copy patch to channel image */
-        vxCopyImagePatch(tst, &rect, 0, &addr, ptr, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
-
-        /* clean patch memory */
-        ct_memset(ptr, 0, sz);
-
-        /* get channel patch from original image */
-        vxCopyImagePatch(src, &rect, p, &addr, ptr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
-
-        /* check channel changes has been reflected into original image */
-        for (i = 0; i < addr.dim_y; i++)
-        {
-            vx_uint8* p = (vx_uint8*)ptr + i * addr.stride_x;
-            for (j = 0; j < addr.dim_x; j++)
-            {
-                EXPECT_EQ_INT((vx_uint8)(i + j), p[j]);
-            }
-        }
-
-        ct_free_mem(ptr);
-    }
-
-    {
-        /* 3. check if modification of channel in original image reflected into image created from channel */
-        vx_uint32 i;
-        vx_uint32 j;
-        vx_uint32 p = (VX_CHANNEL_Y == arg_->channel ? 0 : (VX_CHANNEL_U == arg_->channel ? 1 : 2));
-        vx_rectangle_t rect = { 1, 1, 6, 6 };
-        vx_imagepatch_addressing_t addr =
-        {
-            rect.end_x - rect.start_x,
-            rect.end_y - rect.start_y,
-            1,
-            rect.end_x - rect.start_x
-        };
-
-        vx_size sz = 0;
-        void* ptr = 0;
-
-        sz = vxComputeImagePatchSize(src, &rect, p);
-
-        ptr = ct_alloc_mem(sz);
-
-        /* fill image patch with some values */
-        for (i = 0; i < addr.dim_y; i++)
-        {
-            vx_uint8* p = (vx_uint8*)ptr + i * addr.stride_x;
-            for (j = 0; j < addr.dim_x; j++)
-            {
-                p[j] = (vx_uint8)(i + j);
-            }
-        }
-
-        /* copy patch to channel of original image */
-        vxCopyImagePatch(src, &rect, p, &addr, ptr, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
-
-        /* clean patch memory */
-        ct_memset(ptr, 0, sz);
-
-        /* get patch from image created from channel */
-        vxCopyImagePatch(tst, &rect, 0, &addr, ptr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
-
-        /* check changes of channel in original image has been reflected into channel image */
-        for (i = 0; i < addr.dim_y; i++)
-        {
-            vx_uint8* p = (vx_uint8*)ptr + i * addr.stride_x;
-            for (j = 0; j < addr.dim_x; j++)
-            {
-                EXPECT_EQ_INT((vx_uint8)(i + j), p[j]);
-            }
-        }
-
-        ct_free_mem(ptr);
-    }
-
-    VX_CALL(vxReleaseImage(&ref));
-    VX_CALL(vxReleaseImage(&tst));
-    VX_CALL(vxReleaseImage(&src));
-}
-
-#undef PARAMETERS
-#define PARAMETERS \
-    CT_GENERATE_PARAMETERS("rand", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_444, ADD_IMAGE_CHANNEL_YUV, ARG, own_generate_rand_image, NULL), \
-    CT_GENERATE_PARAMETERS("rand", ADD_SIZE_SMALL_SET, ADD_IMAGE_FORMAT_420, ADD_IMAGE_CHANNEL_Y, ARG, own_generate_rand_image, NULL)
-
-TEST_WITH_ARG(vxCreateImageFromChannel, testChannelFromHandle, CreateImageFromChannel_Arg, PARAMETERS)
-{
-    vx_context context = context_->vx_context_;
-    vx_image src = 0;
-    vx_image ref = 0;
-    vx_image tst = 0;
-
-    vx_uint32 width  = arg_->width;
-    vx_uint32 height = arg_->height;
-
-    CT_Image image = NULL;
-
-    ASSERT_NO_FAILURE(image = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
-
-    {
-        vx_uint32 n;
-        vx_uint32 nplanes;
-
-        vx_enum channel[VX_PLANE_MAX] = { VX_CHANNEL_Y, VX_CHANNEL_U, VX_CHANNEL_V, 0 };
-
-        vx_imagepatch_addressing_t addr[VX_PLANE_MAX] =
-        {
-            VX_IMAGEPATCH_ADDR_INIT,
-            VX_IMAGEPATCH_ADDR_INIT,
-            VX_IMAGEPATCH_ADDR_INIT,
-            VX_IMAGEPATCH_ADDR_INIT
-        };
-        void* ptrs[VX_PLANE_MAX] = { 0, 0, 0, 0 };
-
-        ASSERT_NO_FAILURE(nplanes = ct_get_num_planes(arg_->format));
-
-        for (n = 0; n < nplanes; n++)
-        {
-            addr[n].dim_x    = image->width  / ct_image_get_channel_subsampling_x(image, channel[n]);
-            addr[n].dim_y    = image->height / ct_image_get_channel_subsampling_y(image, channel[n]);
-            addr[n].stride_x = ct_image_get_channel_step_x(image, channel[n]);
-            addr[n].stride_y = ct_image_get_channel_step_y(image, channel[n]);
-
-            ptrs[n] = ct_image_get_plane_base(image, n);
-        }
-
-        EXPECT_VX_OBJECT(src = vxCreateImageFromHandle(context, arg_->format, addr, ptrs, VX_MEMORY_TYPE_HOST), VX_TYPE_IMAGE);
-    }
-
-    if (VX_CHANNEL_Y != arg_->channel && VX_DF_IMAGE_IYUV == arg_->format)
-    {
-        width  /= 2;
-        height /= 2;
-    }
-
-    EXPECT_VX_OBJECT(ref = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
-    VX_CALL(vxuChannelExtract(context, src, arg_->channel, ref));
-
-    EXPECT_VX_OBJECT(tst = vxCreateImageFromChannel(src, arg_->channel), VX_TYPE_IMAGE);
-
-    {
-        CT_Image image_ref = ct_image_from_vx_image(ref);
-        CT_Image image_tst = ct_image_from_vx_image(tst);
-
-        EXPECT_EQ_CTIMAGE(image_ref, image_tst);
-    }
-
-    VX_CALL(vxReleaseImage(&ref));
-    VX_CALL(vxReleaseImage(&tst));
-    VX_CALL(vxReleaseImage(&src));
-}
-
-
 TESTCASE_TESTS(Image,
     testRngImageCreation,
+    testImageCreation_U1,
     testVirtualImageCreation,
     testVirtualImageCreationDims,
     testCreateImageFromHandle,
@@ -3914,26 +3967,29 @@
     testConvert_CT_Image,
     testvxSetImagePixelValues,
     testUniformImage,
-    testComputeImagePatchSize,
     DISABLED_testAccessCopyWrite,
     DISABLED_testAccessCopyRead,
     DISABLED_testAccessCopyWriteUniformImage,
     testQueryImage
-    )
+)
+
+TESTCASE_TESTS(vxCreateImageFromChannel,
+    testChannelFromUniformImage,
+    testChannelFromRandomImage,
+    testChannelFromHandle
+)
 
 TESTCASE_TESTS(vxCopyImagePatch,
     testReadUniformImage,
     testReadRandomImage,
-    testWriteRandomImage)
+    testWriteRandomImage
+)
 
 TESTCASE_TESTS(vxMapImagePatch,
     testMapReadUniformImage,
     testMapReadRandomImage,
     testMapReadWriteRandomImage,
-    testMapWriteRandomImage)
+    testMapWriteRandomImage
+)
 
-TESTCASE_TESTS(vxCreateImageFromChannel,
-    testChannelFromUniformImage,
-    testChannelFromRandomImage,
-    testChannelFromHandle)
-
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_vxtensor.c b/test_conformance/test_vxtensor.c
new file mode 100644
index 0000000..ab657d9
--- /dev/null
+++ b/test_conformance/test_vxtensor.c
@@ -0,0 +1,665 @@
+/*
+
+* Copyright (c) 2012-2017 The Khronos Group Inc.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_NEURAL_NETWORKS || OPENVX_CONFORMANCE_NNEF_IMPORT
+
+#include <string.h>
+#include <VX/vx.h>
+#include <VX/vxu.h>
+
+#include "test_engine/test.h"
+#include "test_tensor_util.h"
+
+/* ***************************************************************************
+//  Tensor tests
+*/
+TESTCASE(Tensor, CT_VXContext, ct_setup_vx_context, 0)
+
+typedef struct
+{
+    const char * name;
+
+    enum TestTensorDF fmt;
+} test_tensor_arg;
+
+TEST_WITH_ARG(Tensor, testvxCreateTensorFromHandle, test_tensor_arg,
+    ARG("Q78_vxCreateTensorFromHandle", TT_Q78),
+    ARG("U8_vxCreateTensorFromHandle", TT_U8),
+    ARG("S8_vxCreateTensorFromHandle", TT_S8),
+    )
+{
+    const vx_context context = context_->vx_context_;
+
+    const enum TestTensorDF fmt = arg_->fmt;
+    assert(fmt == TT_Q78 || fmt == TT_U8 || fmt == TT_S8);
+
+    vx_size max_dims = 0;
+    {   // TODO: ownTestGetMaxDims() ?
+        VX_CALL(vxQueryContext(context, VX_CONTEXT_MAX_TENSOR_DIMS, &max_dims, sizeof(max_dims)));
+        ASSERT(max_dims > 3);
+        if (!DEBUG_TEST_TENSOR_BEYOND_FOUR_DIMS) max_dims = 4; else max_dims = MIN(max_dims, MAX_TENSOR_DIMS);
+    }
+
+    uint64_t rng;
+    {   // TODO: ownTestGetRNG() ?
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position= 0;
+    vx_size sizeof_data_type = 0;
+    ownUnpackFormat(fmt, &data_type, &fixed_point_position, &sizeof_data_type);
+
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+    size_t * const tensor_strides = ct_alloc_mem(sizeof(*tensor_strides) * max_dims);
+    ASSERT(tensor_dims && tensor_strides);
+
+    void * ptr = NULL;
+
+    for (vx_size dims = 1; dims <= max_dims; ++dims)
+    {
+        for (int iter = 0; iter < TEST_TENSOR_NUM_ITERATIONS; ++iter)
+        {
+            for (vx_size i = 0; i < dims; ++i)
+            {
+                tensor_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ + 1);
+
+                tensor_strides[i] = i ? tensor_strides[i - 1] * tensor_dims[i - 1] : sizeof_data_type;
+            }
+
+            vx_tensor src_tensor = vxCreateTensor(context, dims, tensor_dims, data_type, fixed_point_position);
+            ASSERT_VX_OBJECT(src_tensor, VX_TYPE_TENSOR);
+            vx_tensor dst_tensor = vxCreateTensorFromHandle(context, dims, tensor_dims, data_type, fixed_point_position,
+                tensor_strides, ptr, VX_MEMORY_TYPE_HOST);
+            ASSERT_VX_OBJECT(dst_tensor, VX_TYPE_TENSOR);
+
+            //check
+            vx_size src_check_size, dst_check_size;
+            void *src_check_ptr = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+            void *dst_check_ptr = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+
+            //VX_TENSOR_NUMBER_OF_DIMS
+            src_check_size = dst_check_size = sizeof(vx_size);
+            vxQueryTensor(src_tensor, VX_TENSOR_NUMBER_OF_DIMS, src_check_ptr, src_check_size);
+            vxQueryTensor(dst_tensor, VX_TENSOR_NUMBER_OF_DIMS, dst_check_ptr, dst_check_size);
+            EXPECT_EQ_INT((*(vx_size *)src_check_ptr), (*(vx_size *)dst_check_ptr));
+
+            //VX_TENSOR_DIMS
+            src_check_size = dst_check_size = sizeof(vx_size) * dims;
+            vxQueryTensor(src_tensor, VX_TENSOR_DIMS, src_check_ptr, src_check_size);
+            vxQueryTensor(dst_tensor, VX_TENSOR_DIMS, dst_check_ptr, dst_check_size);
+            EXPECT_EQ_INT((*(vx_size *)src_check_ptr), (*(vx_size *)dst_check_ptr));
+
+            //VX_TENSOR_DATA_TYPE
+            src_check_size = dst_check_size = sizeof(vx_enum);
+            vxQueryTensor(src_tensor, VX_TENSOR_DATA_TYPE, src_check_ptr, src_check_size);
+            vxQueryTensor(dst_tensor, VX_TENSOR_DATA_TYPE, dst_check_ptr, dst_check_size);
+            EXPECT_EQ_INT((*(vx_enum *)src_check_ptr), (*(vx_enum *)dst_check_ptr));
+
+            //VX_TENSOR_FIXED_POINT_POSITION
+            src_check_size = dst_check_size = sizeof(vx_int8);
+            vxQueryTensor(src_tensor, VX_TENSOR_FIXED_POINT_POSITION, src_check_ptr, src_check_size);
+            vxQueryTensor(dst_tensor, VX_TENSOR_FIXED_POINT_POSITION, dst_check_ptr, dst_check_size);
+            EXPECT_EQ_INT((*(vx_int8 *)src_check_ptr), (*(vx_int8 *)dst_check_ptr));
+
+            ct_free_mem(src_check_ptr);
+            ct_free_mem(dst_check_ptr);
+
+            VX_CALL(vxReleaseTensor(&src_tensor));
+            VX_CALL(vxReleaseTensor(&dst_tensor));
+
+            EXPECT_EQ_PTR(NULL, src_tensor);
+            EXPECT_EQ_PTR(NULL, dst_tensor);
+        }
+    }
+
+    ct_free_mem(tensor_dims);
+    ct_free_mem(tensor_strides);
+}
+
+TEST_WITH_ARG(Tensor, testvxSwapTensorHandle, test_tensor_arg,
+    ARG("Q78_vxSwapTensorHandle", TT_Q78),
+    ARG("U8_vxSwapTensorHandle", TT_U8),
+    ARG("S8_vxSwapTensorHandle", TT_S8),
+    )
+{
+    const vx_context context = context_->vx_context_;
+
+    vx_status ret;
+
+    const enum TestTensorDF fmt = arg_->fmt;
+    assert(fmt == TT_Q78 || fmt == TT_U8 || fmt == TT_S8);
+
+    vx_size max_dims = 0;
+    {
+        VX_CALL(vxQueryContext(context, VX_CONTEXT_MAX_TENSOR_DIMS, &max_dims, sizeof(max_dims)));
+        ASSERT(max_dims > 3);
+        if (!DEBUG_TEST_TENSOR_BEYOND_FOUR_DIMS) max_dims = 4; else max_dims = MIN(max_dims, MAX_TENSOR_DIMS);
+    }
+
+    uint64_t rng;
+    {
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    uint64_t rng2;
+    {
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng2, *seed);
+    }
+
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
+    ownUnpackFormat(fmt, &data_type, &fixed_point_position, &sizeof_data_type);
+
+    size_t * const in0_dims = ct_alloc_mem(sizeof(*in0_dims) * max_dims);
+    ASSERT(in0_dims);
+
+    size_t * const in0_strides = ct_alloc_mem(sizeof(*in0_strides) * max_dims);
+    ASSERT(in0_strides);
+
+    for (vx_size i = 0; i < max_dims; ++i)
+    {
+        in0_dims[i] = CLAMP(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ / 2);
+        in0_strides[i] = i ? in0_strides[i - 1] * in0_dims[i - 1] : sizeof_data_type;
+    }
+
+    vx_tensor in0_tensor = vxCreateTensor(context, max_dims, in0_dims, data_type, fixed_point_position);
+    vx_tensor in1_tensor = vxCreateTensor(context, max_dims, in0_dims, data_type, fixed_point_position);
+    ASSERT_VX_OBJECT(in0_tensor, VX_TYPE_TENSOR);
+    ASSERT_VX_OBJECT(in1_tensor, VX_TYPE_TENSOR);
+
+    size_t in0_bytes = 1;
+    for (vx_size i = 0; i < max_dims; ++i)
+    {
+        in0_bytes *= in0_dims[i];
+    }
+    size_t malloc_bytes = in0_bytes * sizeof_data_type;
+
+    void * in0_data = ct_alloc_mem(malloc_bytes);
+    void * in1_data = ct_alloc_mem(malloc_bytes);
+    void * out0_data = ct_alloc_mem(malloc_bytes);
+    void * out1_data = ct_alloc_mem(malloc_bytes);
+    ASSERT(in0_data && in1_data && out0_data &&out1_data);
+
+    {
+        ownFillRandData(fmt, &rng, in0_bytes, in0_data);
+        ownFillRandData(fmt, &rng2, in0_bytes, in1_data);
+
+        vx_size view_start[MAX_TENSOR_DIMS] = { 0 };
+        ret = vxCopyTensorPatch(in0_tensor, max_dims, view_start, in0_dims, in0_strides, in0_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
+        EXPECT_EQ_VX_STATUS(VX_SUCCESS, ret);
+        ret = vxCopyTensorPatch(in1_tensor, max_dims, view_start, in0_dims, in0_strides, in1_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
+        EXPECT_EQ_VX_STATUS(VX_SUCCESS, ret);
+    }
+
+    //SWAP
+    void* prev_ptrs[6] = { 0, 0, 0, 0, 0, 0 };
+    ret = vxSwapTensorHandle(in0_tensor, in1_data, prev_ptrs);
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, ret);
+
+    {
+        const size_t view_start[MAX_TENSOR_DIMS] = { 0 };
+        VX_CALL(vxCopyTensorPatch(in0_tensor, max_dims, view_start, in0_dims, in0_strides, out0_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+        VX_CALL(vxCopyTensorPatch(in1_tensor, max_dims, view_start, in0_dims, in0_strides, out1_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+
+        // Verify the results for new_ptr
+        for (size_t index = 0; index < in0_bytes; ++index)
+        {
+            size_t out_byte_offset = 0;
+            vx_size index_leftover = index;
+            int divisor = 1;
+            for (vx_size i = 0; i < max_dims; i++)
+            {
+                divisor = (vx_uint32)(in0_dims[i]);
+                vx_size curr_dim_index = index_leftover%divisor;
+                out_byte_offset += in0_strides[i] * (curr_dim_index);
+                index_leftover = index_leftover / divisor;
+            }
+
+            const char * out_b_ptr = (char*)out0_data + out_byte_offset;
+            const char * ref_b_ptr = (char*)out1_data + out_byte_offset;
+
+            switch (fmt)
+            {
+            case TT_Q78:
+                {
+                    const vx_int16 out = *(vx_int16*)out_b_ptr;
+                    int16_t ref = *(vx_int16*)ref_b_ptr;
+                    EXPECT_EQ_INT(ref, out);
+                    break;
+                }
+            case TT_U8:
+                {
+                    const vx_uint8 out = *(vx_uint8*)out_b_ptr;
+                    const uint8_t ref = *(vx_uint8*)ref_b_ptr;
+                    EXPECT_EQ_INT(ref, out);
+                    break;
+                }
+            case TT_S8:
+                {
+                    const vx_int8 out = *(vx_int8*)out_b_ptr;
+                    const vx_int8 ref = *(vx_int8*)ref_b_ptr;
+                    EXPECT_EQ_INT(ref, out);
+                    break;
+                }
+            default: assert(0);
+            }
+        }
+
+        //verify the result for prev_ptr
+        vx_tensor tmp = vxCreateTensorFromHandle(context, max_dims, in0_dims, data_type, fixed_point_position, in0_strides, *prev_ptrs, VX_MEMORY_TYPE_HOST);
+        VX_CALL(vxCopyTensorPatch(tmp, max_dims, view_start, in0_dims, in0_strides, out1_data, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+        for (size_t index = 0; index < in0_bytes; ++index)
+        {
+            size_t out_byte_offset = 0;
+            vx_size index_leftover = index;
+            int divisor = 1;
+            for (vx_size i = 0; i < max_dims; i++)
+            {
+                divisor = (vx_uint32)(in0_dims[i]);
+                vx_size curr_dim_index = index_leftover%divisor;
+                out_byte_offset += in0_strides[i] * (curr_dim_index);
+                index_leftover = index_leftover / divisor;
+            }
+            const char * out_b_ptr = (char*)out1_data + out_byte_offset;
+            const char * ref_b_ptr = (char*)in0_data + out_byte_offset;
+
+            switch (fmt)
+            {
+            case TT_Q78:
+            {
+                const vx_int16 out = *(vx_int16*)out_b_ptr;
+                int16_t ref = *(vx_int16*)ref_b_ptr;
+                EXPECT_EQ_INT(ref, out);
+                break;
+            }
+            case TT_U8:
+            {
+                const vx_uint8 out = *(vx_uint8*)out_b_ptr;
+                const uint8_t ref = *(vx_uint8*)ref_b_ptr;
+                EXPECT_EQ_INT(ref, out);
+                break;
+            }
+            case TT_S8:
+            {
+                const vx_int8 out = *(vx_int8*)out_b_ptr;
+                const vx_int8 ref = *(vx_int8*)ref_b_ptr;
+                EXPECT_EQ_INT(ref, out);
+                break;
+            }
+            default: assert(0);
+            }
+        }
+
+        VX_CALL(vxReleaseTensor(&tmp));
+        EXPECT_EQ_PTR(NULL, tmp);
+    }
+
+    VX_CALL(vxReleaseTensor(&in0_tensor));
+    VX_CALL(vxReleaseTensor(&in1_tensor));
+    EXPECT_EQ_PTR(NULL, in0_tensor);
+    EXPECT_EQ_PTR(NULL, in1_tensor);
+
+    ct_free_mem(in0_data);
+    //No need free in1_data, as free by vxReleaseTensor(&in0_tensor)
+    ct_free_mem(out0_data);
+    ct_free_mem(out1_data);
+
+    ct_free_mem(in0_dims);
+    ct_free_mem(in0_strides);
+}
+
+TEST_WITH_ARG(Tensor, testMapandUnMapTensorPatch, test_tensor_arg,
+    ARG("Q78_testMapandUnMapTensorPatch", TT_Q78),
+    ARG("U8_testMapandUnMapTensorPatch", TT_U8),
+    ARG("S8_testMapandUnMapTensorPatch", TT_S8),
+    )
+{
+    const vx_context context = context_->vx_context_;
+
+    vx_status ret;
+
+    const enum TestTensorDF fmt = arg_->fmt;
+    assert(fmt == TT_Q78 || fmt == TT_U8 || fmt == TT_S8);
+
+    vx_size max_dims = 0;
+    {
+        VX_CALL(vxQueryContext(context, VX_CONTEXT_MAX_TENSOR_DIMS, &max_dims, sizeof(max_dims)));
+        ASSERT(max_dims > 3);
+        if (!DEBUG_TEST_TENSOR_BEYOND_FOUR_DIMS) max_dims = 4; else max_dims = MIN(max_dims, MAX_TENSOR_DIMS);
+    }
+
+    uint64_t rng;
+    {
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
+    ownUnpackFormat(fmt, &data_type, &fixed_point_position, &sizeof_data_type);
+
+    size_t * const in0_dims = ct_alloc_mem(sizeof(*in0_dims) * max_dims);
+    ASSERT(in0_dims);
+
+    size_t * const in0_strides = ct_alloc_mem(sizeof(*in0_strides) * max_dims);
+    ASSERT(in0_strides);
+
+    for (vx_size i = 0; i < max_dims; ++i)
+    {
+        in0_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ + 1);
+        in0_strides[i] = i ? in0_strides[i - 1] * in0_dims[i - 1] : sizeof_data_type;
+    }
+
+    vx_tensor in0_tensor = vxCreateTensor(context, max_dims, in0_dims, data_type, fixed_point_position);
+    ASSERT_VX_OBJECT(in0_tensor, VX_TYPE_TENSOR);
+
+    size_t in0_bytes = 1;
+    for (vx_size i = 0; i < max_dims; ++i)
+    {
+        in0_bytes *= in0_dims[i];
+    }
+    size_t malloc_bytes = in0_bytes * sizeof_data_type;
+
+    void * in0_data = ct_alloc_mem(malloc_bytes);
+    ASSERT(in0_data);
+    //init in0_data
+    ownFillRandData(fmt, &rng, in0_bytes, in0_data);
+    vx_size view_start[MAX_TENSOR_DIMS] = { 0 };
+    ret = vxCopyTensorPatch(in0_tensor, max_dims, view_start, in0_dims, in0_strides, in0_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST);
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, ret);
+
+    vx_map_id map_id;
+    void* plane_ptr = 0;
+
+    ret = vxMapTensorPatch(in0_tensor, max_dims, view_start, in0_dims,
+        &map_id, in0_strides, &plane_ptr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST);
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, ret);
+
+    for (size_t index = 0; index < in0_bytes; ++index)
+    {
+        const char * out_b_ptr = (char*)plane_ptr + index;
+        const char * ref_b_ptr = (char*)in0_data + index;
+
+        switch (fmt)
+        {
+        case TT_Q78:
+        {
+            const vx_int16 out = *(vx_int16*)out_b_ptr;
+            int16_t ref = *(vx_int16*)ref_b_ptr;
+            EXPECT_EQ_INT(ref, out);
+            break;
+        }
+        case TT_U8:
+        {
+            const vx_uint8 out = *(vx_uint8*)out_b_ptr;
+            const uint8_t ref = *(vx_uint8*)ref_b_ptr;
+            EXPECT_EQ_INT(ref, out);
+            break;
+        }
+        case TT_S8:
+        {
+            const vx_int8 out = *(vx_int8*)out_b_ptr;
+            const vx_int8 ref = *(vx_int8*)ref_b_ptr;
+            EXPECT_EQ_INT(ref, out);
+            break;
+        }
+        default: assert(0);
+        }
+    }
+
+    ret = vxUnmapTensorPatch(in0_tensor, map_id);
+    EXPECT_EQ_VX_STATUS(VX_SUCCESS, ret);
+
+    VX_CALL(vxReleaseTensor(&in0_tensor));
+    EXPECT_EQ_PTR(NULL, in0_tensor);
+
+    ct_free_mem(in0_data);
+    ct_free_mem(in0_dims);
+    ct_free_mem(in0_strides);
+}
+
+TEST_WITH_ARG(Tensor, testvxCreateVirtualTensor, test_tensor_arg,
+    ARG("Q78_vxCreateVirtualTensor", TT_Q78),
+    ARG("U8_vxCreateVirtualTensor", TT_U8),
+    ARG("S8_vxCreateVirtualTensor", TT_S8),
+    )
+{
+    const vx_context context = context_->vx_context_;
+
+    const enum TestTensorDF fmt = arg_->fmt;
+    assert(fmt == TT_Q78 || fmt == TT_U8 || fmt == TT_S8);
+
+    vx_size max_dims = 0;
+    {   // TODO: ownTestGetMaxDims() ?
+        VX_CALL(vxQueryContext(context, VX_CONTEXT_MAX_TENSOR_DIMS, &max_dims, sizeof(max_dims)));
+        ASSERT(max_dims > 3);
+        if (!DEBUG_TEST_TENSOR_BEYOND_FOUR_DIMS) max_dims = 4; else max_dims = MIN(max_dims, MAX_TENSOR_DIMS);
+    }
+
+    uint64_t rng;
+    {   // TODO: ownTestGetRNG() ?
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
+    ownUnpackFormat(fmt, &data_type, &fixed_point_position, &sizeof_data_type);
+
+    size_t * const tensor_dims = ct_alloc_mem(sizeof(*tensor_dims) * max_dims);
+    ASSERT(tensor_dims);
+
+    for (vx_size dims = 1; dims <= max_dims; ++dims)
+    {
+        for (int iter = 0; iter < TEST_TENSOR_NUM_ITERATIONS; ++iter)
+        {
+            for (vx_size i = 0; i < dims; ++i)
+            {
+                tensor_dims[i] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ + 1);
+            }
+
+            vx_graph graph = vxCreateGraph(context);
+            ASSERT_VX_OBJECT(graph, VX_TYPE_GRAPH);
+            vx_tensor src_tensor = vxCreateVirtualTensor(graph, dims, tensor_dims, data_type, fixed_point_position);
+            ASSERT_VX_OBJECT(src_tensor, VX_TYPE_TENSOR);
+
+            //check
+            vx_size src_check_size = 0;
+            vx_size expect_num_of_dims = 0;
+            vx_enum expect_data_type = 0;
+            vx_int8 expect_fixed_point_position = 0;
+            size_t *src_check_ptr = ct_alloc_mem(sizeof(size_t) * max_dims);
+
+            //VX_TENSOR_NUMBER_OF_DIMS
+            src_check_size = sizeof(vx_size);
+            vxQueryTensor(src_tensor, VX_TENSOR_NUMBER_OF_DIMS, (void *)(&(expect_num_of_dims)), src_check_size);
+            EXPECT_EQ_INT(expect_num_of_dims, dims);
+
+            //VX_TENSOR_DIMS
+            src_check_size = sizeof(vx_size) * dims;
+            vxQueryTensor(src_tensor, VX_TENSOR_DIMS, (void *)src_check_ptr, src_check_size);
+            for (int tmpIdx = 0; tmpIdx < dims; tmpIdx++)
+            {
+                EXPECT_EQ_INT(src_check_ptr[tmpIdx], tensor_dims[tmpIdx]);
+            }
+
+            //VX_TENSOR_DATA_TYPE
+            src_check_size = sizeof(vx_enum);
+            vxQueryTensor(src_tensor, VX_TENSOR_DATA_TYPE, &expect_data_type, sizeof(vx_enum));
+            EXPECT_EQ_INT(expect_data_type, data_type);
+
+            //VX_TENSOR_FIXED_POINT_POSITION
+            src_check_size = sizeof(vx_int8);
+            vxQueryTensor(src_tensor, VX_TENSOR_FIXED_POINT_POSITION, &expect_fixed_point_position, src_check_size);
+            EXPECT_EQ_INT(expect_fixed_point_position, fixed_point_position);
+
+            ct_free_mem(src_check_ptr);
+
+            VX_CALL(vxReleaseTensor(&src_tensor));
+            VX_CALL(vxReleaseGraph(&graph));
+
+            EXPECT_EQ_PTR(NULL, src_tensor);
+            EXPECT_EQ_PTR(NULL, graph);
+        }
+    }
+
+    ct_free_mem(tensor_dims);
+}
+#endif
+
+#ifdef OPENVX_USE_ENHANCED_VISION
+/* ***************************************************************************
+ Test enhanced tensor interface:
+         vxCreateImageObjectArrayFromTensor
+*****************************************************************************/
+TESTCASE(TensorEnhanced, CT_VXContext, ct_setup_vx_context, 0)
+
+typedef struct
+{
+    const char * name;
+    int width, height;
+    enum TestTensorDF fmt;
+} test_create_image_objectarray_tensor_arg;
+
+#define TENSOR_FORMAT(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/fmt=Q78", __VA_ARGS__, TT_Q78)), \
+    CT_EXPAND(nextmacro(testArgName "/fmt=U8", __VA_ARGS__, TT_U8))
+
+
+#define CREATE_IMAGE_OBJECTARRAY_FROM_TENSOR_PARAMETERS \
+    CT_GENERATE_PARAMETERS("Adjacent2D", ADD_SIZE_SMALL_SET, TENSOR_FORMAT, ARG)
+
+TEST_WITH_ARG(TensorEnhanced, testvxCreateImageObjectArrayFromTensor,
+              test_create_image_objectarray_tensor_arg,
+              CREATE_IMAGE_OBJECTARRAY_FROM_TENSOR_PARAMETERS
+    )
+{
+    const vx_context context = context_->vx_context_;
+    const enum TestTensorDF fmt = arg_->fmt;
+    assert(fmt == TT_Q78 || fmt == TT_U8 );
+    vx_rectangle_t rect;
+    vx_size array_size;
+    vx_size stride;
+    vx_df_image image_format = VX_DF_IMAGE_U8;
+    vx_object_array objImgs = 0;
+    vx_size view_start[3] = {0};
+
+    vx_size max_dims = 3;
+    vx_size dim1 = arg_->height;
+    vx_size dim0 = arg_->width;
+
+    rect.start_x = 0;
+    rect.end_x = arg_->width;
+    rect.start_y = 0;
+    rect.end_y = arg_->height;
+
+    uint64_t rng;
+    {
+        uint64_t * seed = &CT()->seed_;
+        ASSERT(!!seed);
+        CT_RNG_INIT(rng, *seed);
+    }
+
+    vx_enum data_type = 0;
+    vx_uint8 fixed_point_position = 0;
+    vx_size sizeof_data_type = 0;
+    ownUnpackFormat(fmt, &data_type, &fixed_point_position, &sizeof_data_type);
+
+    size_t * const in0_dims = ct_alloc_mem(sizeof(*in0_dims) * max_dims);
+    ASSERT(in0_dims);
+
+    size_t * const in0_strides = ct_alloc_mem(sizeof(*in0_strides) * max_dims);
+    ASSERT(in0_strides);
+
+    in0_dims[2] = (size_t)CT_RNG_NEXT_INT(rng, TEST_TENSOR_MIN_DIM_SZ, TEST_TENSOR_MAX_DIM_SZ + 1);
+    in0_dims[0] = dim0;
+    in0_dims[1] = dim1;
+    for (vx_size i = 0; i < max_dims; ++i)
+    {
+        in0_strides[i] = i ? in0_strides[i - 1] * in0_dims[i - 1] : sizeof_data_type;
+    }
+
+    vx_tensor in0_tensor = vxCreateTensor(context, max_dims, in0_dims, data_type, fixed_point_position);
+    ASSERT_VX_OBJECT(in0_tensor, VX_TYPE_TENSOR);
+
+    size_t in0_bytes = 1;
+    for (vx_size i = 0; i < max_dims; ++i)
+    {
+        in0_bytes *= in0_dims[i];
+    }
+    size_t malloc_bytes = in0_bytes * sizeof_data_type;
+
+    void * in0_data = ct_alloc_mem(malloc_bytes);
+    ASSERT(in0_data);
+    //init in0_data
+    ownFillRandData(fmt, &rng, in0_bytes, in0_data);
+    VX_CALL(vxCopyTensorPatch(in0_tensor, max_dims, view_start, in0_dims, in0_strides, in0_data, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
+
+    array_size = in0_dims[2];
+    stride = in0_strides[2];
+    if (fmt == TT_Q78)
+    {
+        image_format = VX_DF_IMAGE_S16;
+    }
+    ASSERT_VX_OBJECT(objImgs = vxCreateImageObjectArrayFromTensor(in0_tensor, &rect, array_size, stride, image_format),
+        VX_TYPE_OBJECT_ARRAY);
+
+    //check result
+    vx_size expect_itemnums = 0;
+    VX_CALL(vxQueryObjectArray(objImgs, VX_OBJECT_ARRAY_NUMITEMS, (void *)&expect_itemnums, sizeof(expect_itemnums)));
+    EXPECT_EQ_INT(expect_itemnums, array_size);
+
+    VX_CALL(vxReleaseObjectArray(&objImgs));
+    EXPECT_EQ_PTR(NULL, objImgs);
+
+    VX_CALL(vxReleaseTensor(&in0_tensor));
+    EXPECT_EQ_PTR(NULL, in0_tensor);
+
+    ct_free_mem(in0_data);
+    ct_free_mem(in0_dims);
+    ct_free_mem(in0_strides);
+}
+
+#endif
+
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_NEURAL_NETWORKS || OPENVX_CONFORMANCE_NNEF_IMPORT
+
+TESTCASE_TESTS(Tensor,
+    testvxCreateTensorFromHandle,
+    testvxSwapTensorHandle,
+    testMapandUnMapTensorPatch,
+    testvxCreateVirtualTensor)
+
+#endif
+
+#ifdef OPENVX_USE_ENHANCED_VISION
+
+TESTCASE_TESTS(TensorEnhanced, testvxCreateImageObjectArrayFromTensor)
+
+#endif
diff --git a/test_conformance/test_warpaffine.c b/test_conformance/test_warpaffine.c
index 700597d..071b35c 100644
--- a/test_conformance/test_warpaffine.c
+++ b/test_conformance/test_warpaffine.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include <math.h>
 #include <float.h>
 #include <string.h>
@@ -99,28 +101,47 @@
 #define VX_NN_AREA_SIZE         1.5
 #define VX_BILINEAR_TOLERANCE   1
 
-static CT_Image warp_affine_read_image_8u(const char* fileName, int width, int height)
+static CT_Image warp_affine_read_image(const char* fileName, int width, int height, vx_df_image format)
 {
-    CT_Image image = NULL;
+    CT_Image image_load = NULL, image_ret = NULL;
+    ASSERT_(return 0, format == VX_DF_IMAGE_U1 || format == VX_DF_IMAGE_U8);
 
-    image = ct_read_image(fileName, 1);
-    ASSERT_(return 0, image);
-    ASSERT_(return 0, image->format == VX_DF_IMAGE_U8);
+    image_load = ct_read_image(fileName, 1);
+    ASSERT_(return 0, image_load);
+    ASSERT_(return 0, image_load->format == VX_DF_IMAGE_U8);
 
-    return image;
+    if (format == VX_DF_IMAGE_U1)
+    {
+        ASSERT_NO_FAILURE_(return 0, threshold_U8_ct_image(image_load, 127));   // Threshold to make the U1 image less trivial
+        ASSERT_NO_FAILURE_(return 0, image_ret = ct_allocate_image(image_load->width, image_load->height, VX_DF_IMAGE_U1));
+        ASSERT_NO_FAILURE_(return 0, U8_ct_image_to_U1_ct_image(image_load, image_ret));
+    }
+    else    // format == VX_DF_IMAGE_U8
+    {
+        image_ret = image_load;
+    }
+
+    ASSERT_(return 0, image_ret);
+    ASSERT_(return 0, image_ret->format == format);
+
+    return image_ret;
 }
 
-static CT_Image warp_affine_generate_random(const char* fileName, int width, int height)
+static CT_Image warp_affine_generate_random(const char* fileName, int width, int height, vx_df_image format)
 {
     CT_Image image;
+    ASSERT_(return 0, format == VX_DF_IMAGE_U1 || format == VX_DF_IMAGE_U8);
 
-    ASSERT_NO_FAILURE_(return 0,
-            image = ct_allocate_ct_image_random(width, height, VX_DF_IMAGE_U8, &CT()->seed_, 0, 256));
+    if (format == VX_DF_IMAGE_U1)
+        ASSERT_NO_FAILURE_(return 0, image = ct_allocate_ct_image_random(width, height, format, &CT()->seed_, 0, 2));
+    else    // format == VX_DF_IMAGE_U8
+        ASSERT_NO_FAILURE_(return 0, image = ct_allocate_ct_image_random(width, height, format, &CT()->seed_, 0, 256));
 
     return image;
 }
 
 #define RND_FLT(low, high)      (vx_float32)CT_RNG_NEXT_REAL(CT()->seed_, low, high);
+
 static void warp_affine_generate_matrix(vx_float32* m, int src_width, int src_height, int dst_width, int dst_height, int type)
 {
     vx_float32 mat[3][2];
@@ -212,16 +233,29 @@
     return matrix;
 }
 
-
 static int warp_affine_check_pixel(CT_Image input, CT_Image output, int x, int y, vx_enum interp_type, vx_border_t border, vx_float32 *m)
 {
     vx_float64 x0, y0, xlower, ylower, s, t;
-    vx_int32 xi, yi;
+    vx_int32 xo, yo, xi, yi, roi_xi, roi_yi, xiShft;
     int candidate;
-    vx_uint8 res = *CT_IMAGE_DATA_PTR_8U(output, x, y);
+    vx_df_image format = input->format;
 
-    x0 = (vx_float64)m[2 * 0 + 0] * (vx_float64)x + (vx_float64)m[2 * 1 + 0] * (vx_float64)y + (vx_float64)m[2 * 2 + 0];
-    y0 = (vx_float64)m[2 * 0 + 1] * (vx_float64)x + (vx_float64)m[2 * 1 + 1] * (vx_float64)y + (vx_float64)m[2 * 2 + 1];
+    xo = x + output->roi.x - (format == VX_DF_IMAGE_U1 ? output->roi.x % 8 : 0);    // ROI independent cordinates
+    yo = y + output->roi.y;
+    roi_xi = input->roi.x;
+    roi_yi = input->roi.y;
+    xiShft = (format == VX_DF_IMAGE_U1) ? input->roi.x % 8 : 0; // Bit-shift used for U1 input image
+
+    vx_uint8 res;
+    if (format == VX_DF_IMAGE_U1)
+        res = (*CT_IMAGE_DATA_PTR_1U(output, x, y) & (1 << (x % 8))) >> (x % 8);
+    else
+        res =  *CT_IMAGE_DATA_PTR_8U(output, x, y);
+
+    x0 = (vx_float64)m[2 * 0 + 0] * (vx_float64)xo + (vx_float64)m[2 * 1 + 0] * (vx_float64)yo + (vx_float64)m[2 * 2 + 0];
+    y0 = (vx_float64)m[2 * 0 + 1] * (vx_float64)xo + (vx_float64)m[2 * 1 + 1] * (vx_float64)yo + (vx_float64)m[2 * 2 + 1];
+    x0 = x0 - (vx_float64)roi_xi + xiShft;       // Switch to ROI-respecting coordinates
+    y0 = y0 - (vx_float64)roi_yi;
 
     if (VX_INTERPOLATION_NEAREST_NEIGHBOR == interp_type)
     {
@@ -229,13 +263,20 @@
         {
             for (xi = (vx_int32)ceil(x0 - VX_NN_AREA_SIZE); (vx_float64)xi <= x0 + VX_NN_AREA_SIZE; xi++)
             {
-                if (0 <= xi && 0 <= yi && xi < (vx_int32)input->width && yi < (vx_int32)input->height)
+                if (xi >= xiShft                          && yi >= 0 &&
+                    xi <  (vx_int32)input->width + xiShft && yi <  (vx_int32)input->height)
                 {
-                    candidate = *CT_IMAGE_DATA_PTR_8U(input, xi, yi);
+                    if (format == VX_DF_IMAGE_U1)
+                        candidate = (*CT_IMAGE_DATA_PTR_1U(input, xi, yi) & (1 << (xi % 8))) >> (xi % 8);
+                    else
+                        candidate =  *CT_IMAGE_DATA_PTR_8U(input, xi, yi);
                 }
                 else if (VX_BORDER_CONSTANT == border.mode)
                 {
-                    candidate = border.constant_value.U8;
+                    if (format == VX_DF_IMAGE_U1)
+                        candidate = border.constant_value.U1 ? 1 : 0;
+                    else
+                        candidate = border.constant_value.U8;
                 }
                 else
                 {
@@ -245,7 +286,7 @@
                     return 0;
             }
         }
-        CT_FAIL_(return 1, "Check failed for pixel (%d, %d): %d", x, y, (int)res);
+        CT_FAIL_(return 1, "Check failed for pixel (%d, %d): %d", xo, yo, (int)res);
     }
     else if (VX_INTERPOLATION_BILINEAR == interp_type)
     {
@@ -261,24 +302,61 @@
         candidate = -1;
         if (VX_BORDER_UNDEFINED == border.mode)
         {
-            if (0 <= xi && 0 <= yi && xi < (vx_int32)input->width - 1 && yi < (vx_int32)input->height - 1)
+            if (xi >= xiShft                             && yi >= 0 &&
+                xi < (vx_int32)input->width - 1 + xiShft && yi < (vx_int32)input->height - 1)
             {
-                candidate = (int)((1. - s) * (1. - t) * (vx_float64) *CT_IMAGE_DATA_PTR_8U(input, xi    , yi    ) +
-                                        s  * (1. - t) * (vx_float64) *CT_IMAGE_DATA_PTR_8U(input, xi + 1, yi    ) +
-                                  (1. - s) *       t  * (vx_float64) *CT_IMAGE_DATA_PTR_8U(input, xi    , yi + 1) +
-                                        s  *       t  * (vx_float64) *CT_IMAGE_DATA_PTR_8U(input, xi + 1, yi + 1));
+                if (format == VX_DF_IMAGE_U1)
+                {
+                    vx_uint8 p00 = (*CT_IMAGE_DATA_PTR_1U(input, xi    , yi    ) & (1 <<  xi      % 8)) >> ( xi      % 8);
+                    vx_uint8 p10 = (*CT_IMAGE_DATA_PTR_1U(input, xi + 1, yi    ) & (1 << (xi + 1) % 8)) >> ((xi + 1) % 8);
+                    vx_uint8 p01 = (*CT_IMAGE_DATA_PTR_1U(input, xi    , yi + 1) & (1 <<  xi      % 8)) >> ( xi      % 8);
+                    vx_uint8 p11 = (*CT_IMAGE_DATA_PTR_1U(input, xi + 1, yi + 1) & (1 << (xi + 1) % 8)) >> ((xi + 1) % 8);
+                    candidate = (int)((1. - s) * (1. - t) * (vx_float64) p00 +
+                                            s  * (1. - t) * (vx_float64) p10 +
+                                      (1. - s) *       t  * (vx_float64) p01 +
+                                            s  *       t  * (vx_float64) p11 + 0.5); // Arithmetic rounding instead of truncation
+                    candidate = (candidate > 1) ? 1 : (candidate < 0) ? 0 : candidate;
+                }
+                else
+                {
+                    candidate = (int)((1. - s) * (1. - t) * (vx_float64) *CT_IMAGE_DATA_PTR_8U(input, xi    , yi    ) +
+                                            s  * (1. - t) * (vx_float64) *CT_IMAGE_DATA_PTR_8U(input, xi + 1, yi    ) +
+                                      (1. - s) *       t  * (vx_float64) *CT_IMAGE_DATA_PTR_8U(input, xi    , yi + 1) +
+                                            s  *       t  * (vx_float64) *CT_IMAGE_DATA_PTR_8U(input, xi + 1, yi + 1));
+                }
             }
         }
         else if (VX_BORDER_CONSTANT == border.mode)
         {
-            candidate = (int)((1. - s) * (1. - t) * (vx_float32)CT_IMAGE_DATA_CONSTANT_8U(input, xi    , yi    , border.constant_value.U8) +
-                                    s  * (1. - t) * (vx_float32)CT_IMAGE_DATA_CONSTANT_8U(input, xi + 1, yi    , border.constant_value.U8) +
-                              (1. - s) *       t  * (vx_float32)CT_IMAGE_DATA_CONSTANT_8U(input, xi    , yi + 1, border.constant_value.U8) +
-                                    s  *       t  * (vx_float32)CT_IMAGE_DATA_CONSTANT_8U(input, xi + 1, yi + 1, border.constant_value.U8));
+            if (format == VX_DF_IMAGE_U1)
+            {
+                vx_uint8 p00 = CT_IMAGE_DATA_CONSTANT_1U(input, xi    , yi    , border.constant_value.U1);
+                vx_uint8 p10 = CT_IMAGE_DATA_CONSTANT_1U(input, xi + 1, yi    , border.constant_value.U1);
+                vx_uint8 p01 = CT_IMAGE_DATA_CONSTANT_1U(input, xi    , yi + 1, border.constant_value.U1);
+                vx_uint8 p11 = CT_IMAGE_DATA_CONSTANT_1U(input, xi + 1, yi + 1, border.constant_value.U1);
+                candidate = (int)((1. - s) * (1. - t) * (vx_float32)p00 +
+                                        s  * (1. - t) * (vx_float32)p10 +
+                                  (1. - s) *       t  * (vx_float32)p01 +
+                                        s  *       t  * (vx_float32)p11 + 0.5);
+                candidate = (candidate > 1) ? 1 : (candidate < 0) ? 0 : candidate;
+            }
+            else
+            {
+                vx_uint8 p00 = CT_IMAGE_DATA_CONSTANT_8U(input, xi    , yi    , border.constant_value.U8);
+                vx_uint8 p10 = CT_IMAGE_DATA_CONSTANT_8U(input, xi + 1, yi    , border.constant_value.U8);
+                vx_uint8 p01 = CT_IMAGE_DATA_CONSTANT_8U(input, xi    , yi + 1, border.constant_value.U8);
+                vx_uint8 p11 = CT_IMAGE_DATA_CONSTANT_8U(input, xi + 1, yi + 1, border.constant_value.U8);
+                candidate = (int)((1. - s) * (1. - t) * (vx_float32)p00 +
+                                        s  * (1. - t) * (vx_float32)p10 +
+                                  (1. - s) *       t  * (vx_float32)p01 +
+                                        s  *       t  * (vx_float32)p11);
+            }
         }
-        if (candidate == -1 || (abs(candidate - res) <= VX_BILINEAR_TOLERANCE))
+        // A tolerance of 1 would make tests on U1 images trivial
+        if ( candidate == -1 || (abs(candidate - res) <= ((format == VX_DF_IMAGE_U1) ? 0 : VX_BILINEAR_TOLERANCE)) )
             return 0;
-        return 1;
+        else
+            return 1;
     }
     CT_FAIL_(return 1, "Interpolation type undefined");
 }
@@ -287,10 +365,20 @@
 {
     vx_uint32 err_count = 0;
 
-    CT_FILL_IMAGE_8U(, output,
-            {
-                ASSERT_NO_FAILURE(err_count += warp_affine_check_pixel(input, output, x, y, interp_type, border, m));
-            });
+    if (input->format == VX_DF_IMAGE_U1)
+    {
+        CT_FILL_IMAGE_1U(, output,
+                {
+                    ASSERT_NO_FAILURE(err_count += warp_affine_check_pixel(input, output, xShftd, y, interp_type, border, m));
+                });
+    }
+    else
+    {
+        CT_FILL_IMAGE_8U(, output,
+                {
+                    ASSERT_NO_FAILURE(err_count += warp_affine_check_pixel(input, output, x, y, interp_type, border, m));
+                });
+    }
     if (10 * err_count > output->width * output->height)
         CT_FAIL_(return, "Check failed for %d pixels", err_count);
 }
@@ -304,6 +392,12 @@
     ASSERT( (border.mode == VX_BORDER_UNDEFINED) ||
             (border.mode == VX_BORDER_CONSTANT));
 
+    ASSERT( (input->format == output->format) &&
+            (input->format == VX_DF_IMAGE_U1 || input->format == VX_DF_IMAGE_U8));
+
+    ASSERT( ((input->width  == output->width)  || (input->roi.width  == output->roi.width)) &&
+            ((input->height == output->height) || (input->roi.height == output->roi.height)));
+
     warp_affine_validate(input, output, interp_type, border, m);
     if (CT_HasFailure())
     {
@@ -319,13 +413,13 @@
 
 typedef struct {
     const char* testName;
-    CT_Image(*generator)(const char* fileName, int width, int height);
+    CT_Image(*generator)(const char* fileName, int width, int height, vx_df_image format);
     const char* fileName;
-    int src_width, src_height;
     int width, height;
     vx_border_t border;
     vx_enum interp_type;
     int matrix_type;
+    vx_df_image format;
 } Arg;
 
 #define ADD_VX_BORDERS_WARP_AFFINE(testArgName, nextmacro, ...) \
@@ -335,6 +429,11 @@
     CT_EXPAND(nextmacro(testArgName "/VX_BORDER_CONSTANT=127", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 127 }} })), \
     CT_EXPAND(nextmacro(testArgName "/VX_BORDER_CONSTANT=255", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 255 }} }))
 
+#define ADD_VX_BORDERS_WARP_AFFINE_MINIMAL(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/VX_BORDER_UNDEFINED", __VA_ARGS__, { VX_BORDER_UNDEFINED, {{ 0 }} })), \
+    CT_EXPAND(nextmacro(testArgName "/VX_BORDER_CONSTANT=0", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 0 }} })), \
+    CT_EXPAND(nextmacro(testArgName "/VX_BORDER_CONSTANT=255", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 255 }} }))
+
 #define ADD_VX_INTERP_TYPE_WARP_AFFINE(testArgName, nextmacro, ...) \
     CT_EXPAND(nextmacro(testArgName "/VX_INTERPOLATION_NEAREST_NEIGHBOR", __VA_ARGS__, VX_INTERPOLATION_NEAREST_NEIGHBOR)), \
     CT_EXPAND(nextmacro(testArgName "/VX_INTERPOLATION_BILINEAR", __VA_ARGS__, VX_INTERPOLATION_BILINEAR ))
@@ -349,10 +448,16 @@
     CT_EXPAND(nextmacro(testArgName "/VX_MATRIX_SCALE_ROTATE", __VA_ARGS__, VX_MATRIX_SCALE_ROTATE)), \
     CT_EXPAND(nextmacro(testArgName "/VX_MATRIX_RANDOM", __VA_ARGS__,       VX_MATRIX_RANDOM))
 
+#define ADD_VX_MATRIX_PARAM_WARP_AFFINE_MINIMAL(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/VX_MATRIX_IDENT", __VA_ARGS__,        VX_MATRIX_IDENT)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_MATRIX_SCALE_ROTATE", __VA_ARGS__, VX_MATRIX_SCALE_ROTATE)), \
+    CT_EXPAND(nextmacro(testArgName "/VX_MATRIX_RANDOM", __VA_ARGS__,       VX_MATRIX_RANDOM))
 
 #define PARAMETERS \
-    CT_GENERATE_PARAMETERS("random", ADD_SIZE_SMALL_SET, ADD_VX_BORDERS_WARP_AFFINE, ADD_VX_INTERPOLATION_TYPE_NEAREST_NEIGHBOR, ADD_VX_MATRIX_PARAM_WARP_AFFINE, ARG, warp_affine_generate_random, NULL, 128, 128), \
-    CT_GENERATE_PARAMETERS("lena", ADD_SIZE_SMALL_SET, ADD_VX_BORDERS_WARP_AFFINE, ADD_VX_INTERP_TYPE_WARP_AFFINE, ADD_VX_MATRIX_PARAM_WARP_AFFINE, ARG, warp_affine_read_image_8u, "lena.bmp", 0, 0)
+    CT_GENERATE_PARAMETERS("random", ADD_SIZE_SMALL_SET, ADD_VX_BORDERS_WARP_AFFINE, ADD_VX_INTERPOLATION_TYPE_NEAREST_NEIGHBOR, ADD_VX_MATRIX_PARAM_WARP_AFFINE, ADD_TYPE_U8, ARG, warp_affine_generate_random, NULL), \
+    CT_GENERATE_PARAMETERS("lena", ADD_SIZE_NONE, ADD_VX_BORDERS_WARP_AFFINE, ADD_VX_INTERP_TYPE_WARP_AFFINE, ADD_VX_MATRIX_PARAM_WARP_AFFINE, ADD_TYPE_U8, ARG, warp_affine_read_image, "lena.bmp"), \
+    CT_GENERATE_PARAMETERS("_U1_/random", ADD_SIZE_SMALL_SET, ADD_VX_BORDERS_WARP_AFFINE_MINIMAL, ADD_VX_INTERPOLATION_TYPE_NEAREST_NEIGHBOR, ADD_VX_MATRIX_PARAM_WARP_AFFINE, ADD_TYPE_U1, ARG, warp_affine_generate_random, NULL), \
+    CT_GENERATE_PARAMETERS("_U1_/lena", ADD_SIZE_NONE, ADD_VX_BORDERS_WARP_AFFINE_MINIMAL, ADD_VX_INTERP_TYPE_WARP_AFFINE, ADD_VX_MATRIX_PARAM_WARP_AFFINE, ADD_TYPE_U1, ARG, warp_affine_read_image, "lena.bmp")
 
 TEST_WITH_ARG(WarpAffine, testGraphProcessing, Arg,
     PARAMETERS
@@ -369,12 +474,12 @@
 
     vx_border_t border = arg_->border;
 
-    ASSERT_NO_FAILURE(input = arg_->generator(arg_->fileName, arg_->src_width, arg_->src_height));
-    ASSERT_NO_FAILURE(output = ct_allocate_image(arg_->width, arg_->height, VX_DF_IMAGE_U8));
+    ASSERT_NO_FAILURE(input = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
+    ASSERT_NO_FAILURE(output = ct_allocate_image(input->width, input->height, input->format));
 
     ASSERT_VX_OBJECT(input_image = ct_image_to_vx_image(input, context), VX_TYPE_IMAGE);
     ASSERT_VX_OBJECT(output_image = ct_image_to_vx_image(output, context), VX_TYPE_IMAGE);
-    ASSERT_NO_FAILURE(warp_affine_generate_matrix(m, input->width, input->height, arg_->width, arg_->height, arg_->matrix_type));
+    ASSERT_NO_FAILURE(warp_affine_generate_matrix(m, input->width, input->height, input->width/2, input->height/2, arg_->matrix_type));
     ASSERT_VX_OBJECT(matrix = warp_affine_create_matrix(context, m), VX_TYPE_MATRIX);
 
     ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
@@ -415,12 +520,12 @@
 
     vx_border_t border = arg_->border;
 
-    ASSERT_NO_FAILURE(input = arg_->generator(arg_->fileName, arg_->src_width, arg_->src_height));
-    ASSERT_NO_FAILURE(output = ct_allocate_image(arg_->width, arg_->height, VX_DF_IMAGE_U8));
+    ASSERT_NO_FAILURE(input = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
+    ASSERT_NO_FAILURE(output = ct_allocate_image(input->width, input->height, input->format));
 
     ASSERT_VX_OBJECT(input_image = ct_image_to_vx_image(input, context), VX_TYPE_IMAGE);
     ASSERT_VX_OBJECT(output_image = ct_image_to_vx_image(output, context), VX_TYPE_IMAGE);
-    ASSERT_NO_FAILURE(warp_affine_generate_matrix(m, input->width, input->height, arg_->width, arg_->height, arg_->matrix_type));
+    ASSERT_NO_FAILURE(warp_affine_generate_matrix(m, input->width, input->height, input->width/2, input->height/2, arg_->matrix_type));
     ASSERT_VX_OBJECT(matrix = warp_affine_create_matrix(context, m), VX_TYPE_MATRIX);
 
     VX_CALL(vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, &border, sizeof(border)));
@@ -440,8 +545,71 @@
     ASSERT(input_image == 0);
 }
 
+typedef struct {
+    const char* testName;
+    CT_Image (*generator)(const char* fileName, int width, int height, vx_df_image format);
+    const char* fileName;
+    int width, height;
+    vx_border_t border;
+    vx_enum interp_type;
+    int matrix_type;
+    vx_df_image format;
+    vx_rectangle_t region_shift;
+} ValidRegionTest_Arg;
+
+#define REGION_PARAMETERS \
+    CT_GENERATE_PARAMETERS("lena", ADD_SIZE_256x256, ADD_VX_BORDERS_WARP_AFFINE_MINIMAL, ADD_VX_INTERP_TYPE_WARP_AFFINE, ADD_VX_MATRIX_PARAM_WARP_AFFINE_MINIMAL, ADD_TYPE_U8, ADD_VALID_REGION_SHRINKS, ARG, warp_affine_read_image, "lena.bmp"), \
+    CT_GENERATE_PARAMETERS("_U1_/lena", ADD_SIZE_256x256, ADD_VX_BORDERS_WARP_AFFINE_MINIMAL, ADD_VX_INTERP_TYPE_WARP_AFFINE, ADD_VX_MATRIX_PARAM_WARP_AFFINE_MINIMAL, ADD_TYPE_U1, ADD_VALID_REGION_SHRINKS, ARG, warp_affine_read_image, "lena.bmp")
+
+TEST_WITH_ARG(WarpAffine, testWithValidRegion, ValidRegionTest_Arg,
+    REGION_PARAMETERS
+)
+{
+    vx_context context = context_->vx_context_;
+    vx_image input_image = 0, output_image = 0;
+    vx_matrix matrix = 0;
+    vx_float32 m[6];
+
+    CT_Image input = NULL, output = NULL;
+
+    vx_border_t border = arg_->border;
+    vx_rectangle_t rect = {0, 0, 0, 0}, rect_shft = arg_->region_shift;
+
+    ASSERT_NO_FAILURE(input  = arg_->generator(arg_->fileName, arg_->width, arg_->height, arg_->format));
+    ASSERT_NO_FAILURE(output = ct_allocate_image(input->width, input->height, input->format));
+
+    ASSERT_VX_OBJECT(input_image  = ct_image_to_vx_image(input,  context), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(output_image = ct_image_to_vx_image(output, context), VX_TYPE_IMAGE);
+    ASSERT_NO_FAILURE(warp_affine_generate_matrix(m, input->width, input->height, input->width/2, input->height/2, arg_->matrix_type));
+    ASSERT_VX_OBJECT(matrix = warp_affine_create_matrix(context, m), VX_TYPE_MATRIX);
+
+    ASSERT_NO_FAILURE(vxGetValidRegionImage(input_image, &rect));
+    ALTERRECTANGLE(rect, rect_shft.start_x, rect_shft.start_y, rect_shft.end_x, rect_shft.end_y);
+    ASSERT_NO_FAILURE(vxSetImageValidRectangle(input_image, &rect));
+
+    VX_CALL(vxSetContextAttribute(context, VX_CONTEXT_IMMEDIATE_BORDER, &border, sizeof(border)));
+
+    VX_CALL(vxuWarpAffine(context, input_image, matrix, arg_->interp_type, output_image));
+
+    ASSERT_NO_FAILURE(output = ct_image_from_vx_image(output_image));
+
+    ASSERT_NO_FAILURE(ct_adjust_roi(input, rect_shft.start_x, rect_shft.start_y, -rect_shft.end_x, -rect_shft.end_y));
+    ASSERT_NO_FAILURE(warp_affine_check(input, output, arg_->interp_type, border, m));
+
+    VX_CALL(vxReleaseMatrix(&matrix));
+    VX_CALL(vxReleaseImage(&output_image));
+    VX_CALL(vxReleaseImage(&input_image));
+
+    ASSERT(matrix == 0);
+    ASSERT(output_image == 0);
+    ASSERT(input_image == 0);
+}
+
 TESTCASE_TESTS(WarpAffine,
         testNodeCreation,
         testGraphProcessing,
-        testImmediateProcessing
+        testImmediateProcessing,
+        testWithValidRegion
 )
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_warpperspective.c b/test_conformance/test_warpperspective.c
index 9ee0aed..149f222 100644
--- a/test_conformance/test_warpperspective.c
+++ b/test_conformance/test_warpperspective.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
 #include <math.h>
 #include <float.h>
 #include <string.h>
@@ -473,3 +475,5 @@
         testGraphProcessing,
         testImmediateProcessing
 )
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_conformance/test_weighted_average.c b/test_conformance/test_weighted_average.c
new file mode 100644
index 0000000..5ce7c50
--- /dev/null
+++ b/test_conformance/test_weighted_average.c
@@ -0,0 +1,223 @@
+/*
+
+* Copyright (c) 2017-2017 The Khronos Group Inc.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#if defined OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
+
+#include <string.h>
+#include <VX/vx.h>
+#include <VX/vxu.h>
+
+#include "test_engine/test.h"
+
+#ifdef _MSC_VER
+#define ONE_255 (1.0f/255)
+#else
+#define ONE_255 0x1.010102p-8f
+#endif
+#define ONE_2_0 1.0f
+#define ONE_2_1 (1.0f/(1<<1))
+#define ONE_2_2 (1.0f/(1<<2))
+#define ONE_2_3 (1.0f/(1<<3))
+#define ONE_2_4 (1.0f/(1<<4))
+#define ONE_2_5 (1.0f/(1<<5))
+#define ONE_2_6 (1.0f/(1<<6))
+#define ONE_2_7 (1.0f/(1<<7))
+#define ONE_2_8 (1.0f/(1<<8))
+#define ONE_2_9 (1.0f/(1<<9))
+#define ONE_2_10 (1.0f/(1<<10))
+#define ONE_2_11 (1.0f/(1<<11))
+#define ONE_2_12 (1.0f/(1<<12))
+#define ONE_2_13 (1.0f/(1<<13))
+#define ONE_2_14 (1.0f/(1<<14))
+#define ONE_2_15 (1.0f/(1<<15))
+
+#define ONE_255_STR "(1/255)"
+#define ONE_2_0_STR "(1/2^0)"
+#define ONE_2_1_STR "(1/2^1)"
+#define ONE_2_2_STR "(1/2^2)"
+#define ONE_2_3_STR "(1/2^3)"
+#define ONE_2_4_STR "(1/2^4)"
+#define ONE_2_5_STR "(1/2^5)"
+#define ONE_2_6_STR "(1/2^6)"
+#define ONE_2_7_STR "(1/2^7)"
+#define ONE_2_8_STR "(1/2^8)"
+#define ONE_2_9_STR "(1/2^9)"
+#define ONE_2_10_STR "(1/2^10)"
+#define ONE_2_11_STR "(1/2^11)"
+#define ONE_2_12_STR "(1/2^12)"
+#define ONE_2_13_STR "(1/2^13)"
+#define ONE_2_14_STR "(1/2^14)"
+#define ONE_2_15_STR "(1/2^15)"
+
+static void referenceWeightedAverage(CT_Image src0, CT_Image src1, vx_float32 scale, CT_Image dst)
+{
+    uint32_t i, j;
+    ASSERT(src0 && src1 && dst);
+    ASSERT(src0->width == src1->width  && src0->width == dst->width);
+    ASSERT(src0->height == src1->height && src0->height == dst->height);
+
+#define WEIGHTED_AVERAGE_LOOP(s0, s1, r)                                                                        \
+    do{                                                                                                         \
+        for (i = 0; i < dst->height; ++i)                                                                       \
+            for (j = 0; j < dst->width; ++j)                                                                    \
+            {                                                                                                   \
+                vx_int32 val0 = (vx_uint8)src0->data.s0[i * src0->stride + j];                                  \
+                vx_int32 val1 = (vx_uint8)src1->data.s1[i * src1->stride + j];                                  \
+                vx_int32 res0 = (vx_int32)((1 - scale)* (vx_float32)(val1)+ scale * (vx_float32)(val0)) ;       \
+                dst->data.r[i * dst->stride + j] = (vx_uint8)res0;                                              \
+            }                                                                                                   \
+    }while(0)
+
+    if (src0->format == VX_DF_IMAGE_U8 && src1->format == VX_DF_IMAGE_U8 && dst->format == VX_DF_IMAGE_U8)
+        WEIGHTED_AVERAGE_LOOP(y, y, y);
+    else
+        FAIL("Unsupported combination of argument formats: %.4s + %.4s = %.4s", &src0->format, &src1->format, &dst->format);
+
+#undef WEIGHTED_AVERAGE_LOOP
+}
+
+typedef struct {
+    const char* name;
+    vx_df_image format;
+    int width, height;
+    vx_float32 scale;
+} fuzzy_arg;
+
+#define FUZZY_ARG(owp, w, h, scale)                  \
+    ARG(#owp "/" #w "x" #h " " scale##_STR "=" ,     \
+        VX_DF_IMAGE_##owp, w, h, scale)
+
+#define APPEND_SCALE(macro, ...)                       \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_255)),            \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_0)),            \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_1)),            \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_2)),            \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_3)),            \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_4)),            \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_5)),            \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_6)),            \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_7)),            \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_8)),            \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_9)),            \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_10)),           \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_11)),           \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_12)),           \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_13)),           \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_14)),           \
+    CT_EXPAND(macro(__VA_ARGS__, ONE_2_15))
+
+#define WEIGHTED_AVERAGE_TEST_CASE(owp)         \
+    APPEND_SCALE(FUZZY_ARG, owp, 640, 480),     \
+    APPEND_SCALE(FUZZY_ARG, owp, 15, 15),       \
+    APPEND_SCALE(FUZZY_ARG, owp, 320, 240)
+
+TESTCASE(WeightedAverage, CT_VXContext, ct_setup_vx_context, 0)
+
+TEST_WITH_ARG(WeightedAverage, testvxWeightedAverage, fuzzy_arg,
+    WEIGHTED_AVERAGE_TEST_CASE(U8))
+{
+    int format = arg_->format;
+    vx_scalar scale = 0;
+    vx_image src_in0;
+    vx_image src_in1;
+    vx_image out;
+    CT_Image ref1, ref2, vxout, refdst;
+    vx_graph graph = 0;
+    vx_node node = 0;
+    vx_context context = context_->vx_context_;
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(out = vxCreateImage(context, arg_->width, arg_->height, format), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(scale = vxCreateScalar(context, VX_TYPE_FLOAT32, &arg_->scale), VX_TYPE_SCALAR);
+
+    ASSERT_VX_OBJECT(src_in0 = vxCreateImage(context, arg_->width, arg_->height, format), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(src_in1 = vxCreateImage(context, arg_->width, arg_->height, format), VX_TYPE_IMAGE);
+
+    ASSERT_NO_FAILURE(ct_fill_image_random(src_in0, &CT()->seed_));
+    ASSERT_NO_FAILURE(ct_fill_image_random(src_in1, &CT()->seed_));
+
+    ASSERT_VX_OBJECT(vxWeightedAverageNode(graph, src_in0, scale, src_in1, out), VX_TYPE_NODE);
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxProcessGraph(graph));
+
+    ref1 = ct_image_from_vx_image(src_in0);
+    ref2 = ct_image_from_vx_image(src_in1);
+    vxout = ct_image_from_vx_image(out);
+    refdst = ct_allocate_image(arg_->width, arg_->height, format);
+
+    referenceWeightedAverage(ref1, ref2, arg_->scale, refdst);
+
+    EXPECT_EQ_CTIMAGE(refdst, vxout);
+
+    if (node)
+        VX_CALL(vxReleaseNode(&node));
+    if (graph)
+        VX_CALL(vxReleaseGraph(&graph));
+    ASSERT(node == 0 && graph == 0);
+    VX_CALL(vxReleaseImage(&src_in0));
+    VX_CALL(vxReleaseImage(&src_in1));
+    VX_CALL(vxReleaseScalar(&scale));
+    VX_CALL(vxReleaseImage(&out));
+}
+
+TEST_WITH_ARG(WeightedAverage, testvxuWeightedAverage, fuzzy_arg,
+    WEIGHTED_AVERAGE_TEST_CASE(U8))
+{
+    int format = arg_->format;
+    vx_scalar scale = 0;
+    vx_image src_in0;
+    vx_image src_in1;
+    vx_image out;
+    CT_Image ref1, ref2, vxout, refdst;
+    vx_graph graph = 0;
+    vx_node node = 0;
+    vx_context context = context_->vx_context_;
+
+    ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
+    ASSERT_VX_OBJECT(out = vxCreateImage(context, arg_->width, arg_->height, format), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(scale = vxCreateScalar(context, VX_TYPE_FLOAT32, &arg_->scale), VX_TYPE_SCALAR);
+
+    ASSERT_VX_OBJECT(src_in0 = vxCreateImage(context, arg_->width, arg_->height, format), VX_TYPE_IMAGE);
+    ASSERT_VX_OBJECT(src_in1 = vxCreateImage(context, arg_->width, arg_->height, format), VX_TYPE_IMAGE);
+
+    ASSERT_NO_FAILURE(ct_fill_image_random(src_in0, &CT()->seed_));
+    ASSERT_NO_FAILURE(ct_fill_image_random(src_in1, &CT()->seed_));
+
+    ASSERT_EQ_VX_STATUS(VX_SUCCESS, vxuWeightedAverage(context, src_in0, scale, src_in1, out));
+
+    ref1 = ct_image_from_vx_image(src_in0);
+    ref2 = ct_image_from_vx_image(src_in1);
+    vxout = ct_image_from_vx_image(out);
+    refdst = ct_allocate_image(arg_->width, arg_->height, format);
+
+    referenceWeightedAverage(ref1, ref2, arg_->scale, refdst);
+
+    EXPECT_EQ_CTIMAGE(refdst, vxout);
+
+    if (node)
+        VX_CALL(vxReleaseNode(&node));
+    if (graph)
+        VX_CALL(vxReleaseGraph(&graph));
+    ASSERT(node == 0 && graph == 0);
+    VX_CALL(vxReleaseImage(&src_in0));
+    VX_CALL(vxReleaseImage(&src_in1));
+    VX_CALL(vxReleaseScalar(&scale));
+    VX_CALL(vxReleaseImage(&out));
+}
+
+TESTCASE_TESTS(WeightedAverage, testvxWeightedAverage, testvxuWeightedAverage)
+
+#endif //OPENVX_USE_ENHANCED_VISION || OPENVX_CONFORMANCE_VISION
diff --git a/test_engine/test_bmp.c b/test_engine/test_bmp.c
index c641cdc..15f5d6b 100644
--- a/test_engine/test_bmp.c
+++ b/test_engine/test_bmp.c
@@ -555,9 +555,19 @@
 {
     if( image )
     {
-        int channels = ct_channels(image->format);
-        return writeBMP(filename, image->data.y, (int)ct_stride_bytes(image),
-                        (int)image->width, (int)image->height, channels);
+        CT_Image wrt_image;
+        if (image->format == VX_DF_IMAGE_U1)
+        {
+            wrt_image = ct_allocate_image(image->width, image->height, VX_DF_IMAGE_U8);
+            U1_ct_image_to_U8_ct_image(image, wrt_image);
+        }
+        else
+        {
+            wrt_image = image;
+        }
+        int channels = ct_channels(wrt_image->format);
+        return writeBMP(filename, wrt_image->data.y, (int)ct_stride_bytes(wrt_image),
+                        (int)wrt_image->width, (int)wrt_image->height, channels);
     }
     return -1;
 }
diff --git a/test_engine/test_engine.c b/test_engine/test_engine.c
index 007ffc4..c9538c3 100644
--- a/test_engine/test_engine.c
+++ b/test_engine/test_engine.c
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -337,6 +337,14 @@
         result = 0;
     }
 
+#ifndef OPENVX_USE_U1
+    // Filter out tests whose name include the tag "_U1_" if the test suite isn't configured to test U1 conformance
+    if (result && strstr(test_name, "_U1_") != NULL)
+    {
+        result = 0;
+    }
+#endif
+
     return result;
 }
 
@@ -518,6 +526,62 @@
     int total_openvx_passed_ix_tests = 0;
     int total_openvx_failed_ix_tests = 0;
 #endif
+#ifdef OPENVX_USE_PIPELINING
+    int total_openvx_pipe_tests = 0;
+    int total_openvx_passed_pipe_tests = 0;
+    int total_openvx_failed_pipe_tests = 0;
+#endif
+#ifdef OPENVX_USE_STREAMING
+    int total_openvx_stream_tests = 0;
+    int total_openvx_passed_stream_tests = 0;
+    int total_openvx_failed_stream_tests = 0;
+#endif
+#ifdef OPENVX_USE_USER_DATA_OBJECT
+    int total_openvx_udo_tests = 0;
+    int total_openvx_passed_udo_tests = 0;
+    int total_openvx_failed_udo_tests = 0;
+#endif
+#ifdef OPENVX_USE_U1
+    int total_openvx_u1_tests = 0;
+    int total_openvx_passed_u1_tests = 0;
+    int total_openvx_failed_u1_tests = 0;
+#endif
+#ifdef OPENVX_CONFORMANCE_VISION
+    int total_openvx_vision_tests = 0;
+    int total_openvx_passed_vision_tests = 0;
+    int total_openvx_failed_vision_tests = 0;
+#define vision_test_num 51
+    int conformance_vision_status = 0;
+    const char* vision_test[vision_test_num] = {"vxuConvertDepth", "vxConvertDepth", "ChannelCombine", "ChannelExtract", "ColorConvert",
+                                   "vxuAddSub", "vxAddSub", "vxuNot", "vxNot", "vxuBinOp1u",
+                                   "vxBinOp1u", "vxuBinOp8u", "vxBinOp8u", "vxuBinOp16s", "vxBinOp16s",
+                                   "vxuMultiply", "vxMultiply", "Histogram", "EqualizeHistogram", "MeanStdDev",
+                                   "MinMaxLoc", "WeightedAverage", "Threshold", "Box3x3", "Convolve",
+                                   "Dilate3x3", "Erode3x3", "Gaussian3x3", "Median3x3", "Sobel3x3",
+                                   "NonLinearFilter", "Integral", "Magnitude", "Phase", "FastCorners",
+                                   "HarrisCorners", "Scale", "WarpAffine", "WarpPerspective", "Remap",
+                                   "GaussianPyramid", "HalfScaleGaussian", "LaplacianPyramid", "LaplacianReconstruct", "vxuCanny",
+                                   "vxCanny", "OptFlowPyrLK", "LUT", "Accumulate", "AccumulateSquare", "AccumulateWeighted"};
+#endif
+#ifdef OPENVX_CONFORMANCE_NEURAL_NETWORKS
+    int total_openvx_neural_networks_tests = 0;
+    int total_openvx_passed_neural_networks_tests = 0;
+    int total_openvx_failed_neural_networks_tests = 0;
+#endif
+#ifdef OPENVX_CONFORMANCE_NNEF_IMPORT
+    int total_openvx_nnef_tests = 0;
+    int total_openvx_passed_nnef_tests = 0;
+    int total_openvx_failed_nnef_tests = 0;
+#endif
+#ifdef OPENVX_USE_ENHANCED_VISION
+    int total_openvx_use_enhanced_vision_tests = 0;
+    int total_openvx_passed_use_enhanced_vision_tests = 0;
+    int total_openvx_failed_use_enhanced_vision_tests = 0;
+#define enhance_vision_num  13
+    int conformance_enhanced_vision_status = 0;
+    const char* enhanced_vision_test[enhance_vision_num] = {"Min", "Max", "Nonmaxsuppression", "TensorOp", "LBP", "BilateralFilter",
+    "MatchTemplate", "Houghlinesp", "Copy", "HogCells", "HogFeatures", "ControlFlow", "Scalar"};
+#endif
     //====================================================
 
     int use_global_context = 0;
@@ -671,6 +735,7 @@
 
     for (testcase = g_firstTestCase; testcase; testcase = testcase->next_)
     {
+        int test_ran = 0;
         int run_tests = 0;
         int extended_flag = 0;
         struct CT_TestEntry* test = testcase->tests_;
@@ -678,16 +743,48 @@
 #ifdef CT_TEST_TIME
         int64_t timestart_testCase = CT_getTickCount();
 #endif
+#ifdef OPENVX_USE_U1
+        // Counter for the number of U1 tests in the given testcase
+        int run_tests_u1 = 0;
+#endif
 
         for(; test; test = test->next_)
         {
             if (!test->args_)
-                run_tests += run_test(testcase, test, 0, run_tests, &extended_flag);
+            {
+                test_ran = run_test(testcase, test, 0, run_tests, &extended_flag);
+                run_tests += test_ran;
+#ifdef OPENVX_USE_U1
+                // Look for the "_U1_" tag in the full test name
+                char test_name_full[1024];
+                void *parg = get_test_params(test, 0);
+                get_test_name(test_name_full, sizeof(test_name_full), testcase, test, parg, 0);
+
+                if (test_ran && strstr(test_name_full, "_U1_") != NULL) {
+                    total_openvx_u1_tests++;
+                    run_tests_u1++;
+                }
+#endif
+            }
+
             else
             {
                 int narg = 0;
                 for (; narg < test->args_count_; narg++)
-                    run_tests += run_test(testcase, test, narg, run_tests, &extended_flag);
+                {
+                    test_ran = run_test(testcase, test, narg, run_tests, &extended_flag);
+                    run_tests += test_ran;
+#ifdef OPENVX_USE_U1
+                    char test_name_full[1024];
+                    void *parg = get_test_params(test, narg);
+                    get_test_name(test_name_full, sizeof(test_name_full), testcase, test, parg, narg);
+
+                    if (test_ran && strstr(test_name_full, "_U1_") != NULL) {
+                        total_openvx_u1_tests++;
+                        run_tests_u1++;
+                    }
+#endif
+                }
             }
         }
 
@@ -708,17 +805,74 @@
             if (strcmp("TensorNN", testcase->name_) == 0) {
                 total_openvx_nn_tests += run_tests;
             }
-            else
 #endif
 #ifdef OPENVX_USE_IX
             if (strcmp("ExtensionObject", testcase->name_) == 0) {
                     total_openvx_ix_tests += run_tests;
             }
-            else
 #endif
-            {
-                    total_openvx_core_tests += run_tests;
+#ifdef OPENVX_USE_PIPELINING
+            if (strcmp("GraphPipeline", testcase->name_) == 0) {
+                    total_openvx_pipe_tests += run_tests;
             }
+#endif
+#ifdef OPENVX_USE_STREAMING
+            if (strcmp("GraphStreaming", testcase->name_) == 0) {
+                    total_openvx_stream_tests += run_tests;
+            }
+#endif
+#ifdef OPENVX_USE_USER_DATA_OBJECT
+            if (strcmp("UserDataObject", testcase->name_) == 0) {
+                    total_openvx_udo_tests += run_tests;
+            }
+#endif
+#ifdef OPENVX_CONFORMANCE_VISION
+            for (int i = 0; i < vision_test_num; i++) {
+                if (strcmp(vision_test[i], testcase->name_) == 0) {
+                    conformance_vision_status = 1;
+                    i = vision_test_num;
+                }
+            }
+            if (conformance_vision_status == 1) {
+                total_openvx_vision_tests += run_tests;
+            }
+#endif
+#ifdef OPENVX_CONFORMANCE_NEURAL_NETWORKS
+#ifdef OPENVX_USE_NN
+            if (strcmp("TensorNN", testcase->name_) == 0) {
+                total_openvx_neural_networks_tests += run_tests;
+            }
+#endif
+#ifdef OPENVX_USE_NN_16
+            if (strcmp("TensorNetworks", testcase->name_) == 0) {
+                total_openvx_neural_networks_tests += run_tests;
+            }
+#endif
+#endif
+#ifdef OPENVX_CONFORMANCE_NNEF_IMPORT
+            if (strcmp("TensorNNEFImport", testcase->name_) == 0) {
+                total_openvx_nnef_tests += run_tests;
+            }
+#endif
+#ifdef OPENVX_USE_ENHANCED_VISION
+            for (int i = 0; i < enhance_vision_num; i++) {
+                if (strcmp(enhanced_vision_test[i], testcase->name_) == 0) {
+                    conformance_enhanced_vision_status = 1;
+                    i = enhance_vision_num;
+                }
+            }
+            if (conformance_enhanced_vision_status == 1) {
+                total_openvx_use_enhanced_vision_tests += run_tests;
+            }
+#endif
+            else
+            {
+#ifdef OPENVX_USE_U1
+                total_openvx_core_tests -= run_tests_u1;
+#endif
+                total_openvx_core_tests += run_tests;
+            }
+
             //====================================================
 
             total_run_tests += run_tests;
@@ -750,6 +904,24 @@
 
 
                 //================ OpenVX Specific ===================
+#ifdef OPENVX_CONFORMANCE_VISION
+                for (int i = 0; i < vision_test_num; i++) {
+                    conformance_vision_status = 0;
+                    if (strncmp(vision_test[i], test_name, sizeof(vision_test[i]) - 1) == 0) {
+                        conformance_vision_status = 1;
+                        i = vision_test_num;
+                    }
+                }
+#endif
+#ifdef OPENVX_USE_ENHANCED_VISION
+                for (int i = 0; i < enhance_vision_num; i++) {
+                    conformance_enhanced_vision_status = 0;
+                    if (strncmp(enhanced_vision_test[i], test_name, sizeof(enhanced_vision_test[i]) - 1) == 0) {
+                        conformance_enhanced_vision_status = 1;
+                        i = enhance_vision_num;
+            }
+        }
+#endif
 #ifdef OPENVX_USE_NN
                 if (strncmp("TensorNN", test_name, sizeof("TensorNN") - 1) == 0) {
                     total_openvx_failed_nn_tests ++;
@@ -762,6 +934,62 @@
                 }
                 else
 #endif
+#ifdef OPENVX_USE_PIPELINING
+                if (strncmp("GraphPipeline", test_name, sizeof("GraphPipeline") - 1) == 0) {
+                    total_openvx_failed_pipe_tests ++;
+                }
+                else
+#endif
+#ifdef OPENVX_USE_STREAMING
+                if (strncmp("GraphStreaming", test_name, sizeof("GraphStreaming") - 1) == 0) {
+                    total_openvx_failed_stream_tests ++;
+                }
+                else
+#endif
+#ifdef OPENVX_USE_USER_DATA_OBJECT
+                if (strncmp("UserDataObject", test_name, sizeof("UserDataObject") - 1) == 0) {
+                    total_openvx_failed_udo_tests ++;
+                }
+                else
+#endif
+#ifdef OPENVX_CONFORMANCE_VISION
+                if (conformance_vision_status == 1) {
+                    total_openvx_failed_vision_tests ++;
+                }
+                else
+#endif
+#ifdef OPENVX_CONFORMANCE_NEURAL_NETWORKS
+#ifdef OPENVX_USE_NN
+                if (strncmp("TensorNN", test_name, sizeof("TensorNN") - 1) == 0) {
+                    total_openvx_failed_neural_networks_tests ++;
+                }
+                else
+#endif
+#ifdef OPENVX_USE_NN_16
+                if (strncmp("TensorNetworks", test_name, sizeof("TensorNetworks") - 1) == 0) {
+                    total_openvx_failed_neural_networks_tests ++;
+                }
+                else
+#endif
+#endif
+#ifdef OPENVX_CONFORMANCE_NNEF_IMPORT
+                if (strncmp("TensorNNEFImport", test_name, sizeof("TensorNNEFImport") - 1) == 0) {
+                    total_openvx_failed_nnef_tests ++;
+                }
+                else
+#endif
+#ifdef OPENVX_USE_ENHANCED_VISION
+                if (conformance_enhanced_vision_status == 1) {
+                    total_openvx_failed_use_enhanced_vision_tests++;
+                }
+                else
+#endif
+#ifdef OPENVX_USE_U1
+                if (strstr(test_name, "_U1_") != NULL) {
+                    total_openvx_failed_u1_tests ++;
+                }
+                else
+#endif
                 {
                     total_openvx_failed_core_tests ++;
                 }
@@ -801,6 +1029,62 @@
                (total_openvx_failed_ix_tests==0?"PASSED":"FAILED")
                );
 #endif
+#ifdef OPENVX_USE_PIPELINING
+        total_openvx_passed_pipe_tests = total_openvx_pipe_tests - total_openvx_failed_pipe_tests;
+        printf("To be conformant to the Pipelining extension, %d required test(s) must pass. %d tests passed, %d tests failed. %s.\n",
+               total_openvx_pipe_tests, total_openvx_passed_pipe_tests, total_openvx_failed_pipe_tests,
+               (total_openvx_failed_pipe_tests==0?"PASSED":"FAILED")
+               );
+#endif
+#ifdef OPENVX_USE_STREAMING
+        total_openvx_passed_stream_tests = total_openvx_stream_tests - total_openvx_failed_stream_tests;
+        printf("To be conformant to the Streaming extension, %d required test(s) must pass. %d tests passed, %d tests failed. %s.\n",
+               total_openvx_stream_tests, total_openvx_passed_stream_tests, total_openvx_failed_stream_tests,
+               (total_openvx_failed_stream_tests==0?"PASSED":"FAILED")
+               );
+#endif
+#ifdef OPENVX_USE_USER_DATA_OBJECT
+        total_openvx_passed_udo_tests = total_openvx_udo_tests - total_openvx_failed_udo_tests;
+        printf("To be conformant to the User Data Object extension, %d required test(s) must pass. %d tests passed, %d tests failed. %s.\n",
+               total_openvx_udo_tests, total_openvx_passed_udo_tests, total_openvx_failed_udo_tests,
+               (total_openvx_failed_udo_tests==0?"PASSED":"FAILED")
+               );
+#endif
+#ifdef OPENVX_USE_U1
+        total_openvx_passed_u1_tests = total_openvx_u1_tests - total_openvx_failed_u1_tests;
+        printf("To be conformant to the U1 conformance profile, %d required test(s) must pass. %d tests passed, %d tests failed. %s.\n",
+               total_openvx_u1_tests, total_openvx_passed_u1_tests, total_openvx_failed_u1_tests,
+               (total_openvx_failed_u1_tests==0?"PASSED":"FAILED")
+               );
+#endif
+#ifdef OPENVX_CONFORMANCE_VISION
+        total_openvx_passed_vision_tests = total_openvx_vision_tests - total_openvx_failed_vision_tests;
+        printf("To be conformant to the Vision conformance profile, %d required test(s) must pass. %d tests passed, %d tests failed. %s.\n",
+            total_openvx_vision_tests, total_openvx_passed_vision_tests, total_openvx_failed_vision_tests,
+            (total_openvx_failed_vision_tests==0?"PASSED":"FAILED")
+            );
+#endif
+#ifdef OPENVX_CONFORMANCE_NEURAL_NETWORKS
+        total_openvx_passed_neural_networks_tests = total_openvx_neural_networks_tests - total_openvx_failed_neural_networks_tests;
+        printf("To be conformant to the Neural Networks conformance profile, %d required test(s) must pass. %d tests passed, %d tests failed. %s.\n",
+            total_openvx_neural_networks_tests, total_openvx_passed_neural_networks_tests, total_openvx_failed_neural_networks_tests,
+            (total_openvx_failed_neural_networks_tests==0?"PASSED":"FAILED")
+            );
+#endif
+#ifdef OPENVX_CONFORMANCE_NNEF_IMPORT
+        total_openvx_passed_nnef_tests = total_openvx_nnef_tests - total_openvx_failed_nnef_tests;
+        printf("To be conformant to the Vision NNEF conformance profile, %d required test(s) must pass. %d tests passed, %d tests failed. %s.\n",
+            total_openvx_nnef_tests, total_openvx_passed_nnef_tests, total_openvx_failed_nnef_tests,
+            (total_openvx_failed_nnef_tests==0?"PASSED":"FAILED")
+            );
+#endif
+#ifdef OPENVX_USE_ENHANCED_VISION
+        total_openvx_passed_use_enhanced_vision_tests = total_openvx_use_enhanced_vision_tests - total_openvx_failed_use_enhanced_vision_tests;
+        printf("To be conformant to the enhanced vision conformance profile, %d required test(s) must pass. %d tests passed, %d tests failed. %s.\n",
+            total_openvx_use_enhanced_vision_tests, total_openvx_passed_use_enhanced_vision_tests, total_openvx_failed_use_enhanced_vision_tests,
+            (total_openvx_failed_use_enhanced_vision_tests == 0 ? "PASSED" : "FAILED")
+            );
+#endif
 
         printf("Note: The %d disabled tests are optional and are not considered for conformance.\n",
                g_context.internal_->g_num_disabled_tests_);
diff --git a/test_engine/test_image.c b/test_engine/test_image.c
index 7f4eb93..5d06f97 100644
--- a/test_engine/test_image.c
+++ b/test_engine/test_image.c
@@ -27,6 +27,8 @@
 {
     switch(format)
     {
+        case VX_DF_IMAGE_U1:
+            return 1 * 1;
         case VX_DF_IMAGE_U8:
             return 8 * 1;
         case VX_DF_IMAGE_U16:
@@ -55,6 +57,7 @@
 {
     switch(format)
     {
+        case VX_DF_IMAGE_U1:
         case VX_DF_IMAGE_U8:
         case VX_DF_IMAGE_U16:
         case VX_DF_IMAGE_S16:
@@ -76,6 +79,8 @@
     uint32_t factor = 0;
     switch(image->format)
     {
+        case VX_DF_IMAGE_U1:
+            return (uint32_t)(image->stride + 7) / 8;
         case VX_DF_IMAGE_U8:
         case VX_DF_IMAGE_NV21:
         case VX_DF_IMAGE_NV12:
@@ -107,7 +112,16 @@
 
 static size_t ct_image_data_size(uint32_t width, uint32_t height, vx_df_image format)
 {
-    return (size_t)width * height * ct_image_bits_per_pixel(format) / 8;
+    if (format == VX_DF_IMAGE_U1)
+    {
+        /* round up to full bytes */
+        size_t rowSize = (size_t)(width * ct_image_bits_per_pixel(format) + 7) / 8;
+        return (size_t)rowSize * height;
+    }
+    else
+    {
+        return (size_t)width * height * ct_image_bits_per_pixel(format) / 8;
+    }
 }
 
 #ifdef DEBUG_CT_IMAGE
@@ -225,7 +239,8 @@
             break;
     };
 
-    image = ct_allocate_image_hdr_impl(width, height, width, format, 1);
+    uint32_t stride = (format == VX_DF_IMAGE_U1) ? ((width + 7) / 8) * 8 : width;   // U1 y-stride is multiple of 8
+    image = ct_allocate_image_hdr_impl(width, height, stride, format, 1);
 
 #ifdef DEBUG_CT_IMAGE
     printf("ALLOCATED: "); ct_print_image(image);
@@ -247,6 +262,9 @@
 
     switch(img->format) // not sure if roi for multi-plane formats is needed at all
     {
+        case VX_DF_IMAGE_U1:
+            CT_ASSERT_(return 0, (img->roi.x + roi.x) % 8 == 0);   // U1 subimage must start on byte boundary in parent image
+            break;
         case VX_DF_IMAGE_UYVY:
         case VX_DF_IMAGE_YUYV:
             CT_ASSERT_(return 0, roi.width % 2 == 0 && roi.x % 2 == 0); // width must be even
@@ -273,7 +291,18 @@
         ct_image_addref(img);
         image->data_begin_ = img->data_begin_;
         image->refcount_   = img->refcount_;
+
         image->data.y      = img->data.y + (img->stride * roi.y + roi.x) * bpp / 8;
+
+        if (img->format == VX_DF_IMAGE_U1)
+        {
+            int xShft = ct_div_floor(img->roi.x % 8 + roi.x, 8);
+            img->data.y = img->data.y + ct_stride_bytes(img) * roi.y + xShft;
+        }
+        else
+        {
+            img->data.y = img->data.y + (img->stride * roi.y + roi.x) * bpp / 8;
+        }
     }
 
 #ifdef DEBUG_CT_IMAGE
@@ -319,7 +348,15 @@
     }
     else
     {
-        img->data.y = img->data.y + (img->stride * top + left) * ct_image_bits_per_pixel(img->format) / 8;
+        if (img->format == VX_DF_IMAGE_U1)
+        {
+            int xShft = ct_div_floor(img->roi.x % 8 + left, 8);
+            img->data.y = img->data.y + ct_stride_bytes(img) * top + xShft;
+        }
+        else
+        {
+            img->data.y = img->data.y + (img->stride * top + left) * ct_image_bits_per_pixel(img->format) / 8;
+        }
         img->roi.x  = (uint32_t)new_x;
         img->roi.y  = (uint32_t)new_y;
         img->width  = (uint32_t)new_width;
@@ -405,6 +442,10 @@
 
     switch (format)
     {
+    case VX_DF_IMAGE_U1:
+        channel_step_x = 0;
+        break;
+
     case VX_DF_IMAGE_U8:
         channel_step_x = 1;
         break;
@@ -536,6 +577,18 @@
             {
                 switch (ct_format)
                 {
+                case VX_DF_IMAGE_U1:
+                {
+                    vx_uint8* ct_ptr = (vx_uint8*)((vx_uint8*)p_ct_base + y * ct_stride_bytes(ctimg) + x / 8);
+                    vx_uint8* vx_ptr = (vx_uint8*)vxFormatImagePatchAddress2d(p_vx_base, x, y, &addr);
+                    vx_uint8 mask = 1 << (x % 8);
+                    if (COPY_CT_IMAGE_TO_VX_IMAGE == dir)
+                        vx_ptr[0] = (vx_ptr[0] & ~mask) | (ct_ptr[0] & mask);
+                    else
+                        ct_ptr[0] = (ct_ptr[0] & ~mask) | (vx_ptr[0] & mask);
+                }
+                break;
+
                 case VX_DF_IMAGE_U8:
                 {
                     vx_uint8* ct_ptr = (vx_uint8*)((vx_uint8*)p_ct_base + y * ctimg->stride * ct_elem_size + x * ct_elem_size);
@@ -741,6 +794,72 @@
     return (COPY_CT_IMAGE_TO_VX_IMAGE == dir ? (void*)vximg : (void*)ctimg);
 }
 
+// Down conversion according to the spec (i.e. valueU1 = valueU8 != 0 ? 1 : 0)
+void U8_ct_image_to_U1_ct_image(CT_Image img_in, CT_Image img_out)
+{
+    ASSERT(img_in);
+    ASSERT(img_out);
+    ASSERT(img_in->format == VX_DF_IMAGE_U8);
+    ASSERT(img_out->format == VX_DF_IMAGE_U1);
+    ASSERT( (img_in->width == img_out->width) && (img_in->height == img_out->height) );
+
+    uint8_t pixel;
+    uint32_t x, y, xShftd;
+    uint8_t* in_base_ptr  = ct_image_get_plane_base(img_in, 0);
+    uint8_t* out_base_ptr = ct_image_get_plane_base(img_out, 0);
+    for (y = 0; y < img_in->height; ++y)
+    {
+        for (x = 0; x < img_in->width; ++x)
+        {
+            xShftd = x + img_out->roi.x % 8;     // U1 ROI respecting offset
+            pixel  = ((in_base_ptr[y * img_in->stride + x] != 0) ? 1 : 0) << (xShftd % 8);
+            out_base_ptr[y * ct_stride_bytes(img_out) + xShftd / 8] =
+                (out_base_ptr[y * ct_stride_bytes(img_out) + xShftd / 8] & ~(1 << (xShftd % 8))) | pixel;
+        }
+    }
+}
+
+// Up conversion according to the spec (i.e. valueU8 = valueU1 != 0 ? 255 : 0)
+void U1_ct_image_to_U8_ct_image(CT_Image img_in, CT_Image img_out)
+{
+    ASSERT(img_in);
+    ASSERT(img_out);
+    ASSERT(img_in->format == VX_DF_IMAGE_U1);
+    ASSERT(img_out->format == VX_DF_IMAGE_U8);
+    ASSERT( (img_in->width == img_out->width) && (img_in->height == img_out->height) );
+
+    uint8_t pixel;
+    uint32_t x, y, xShftd;
+    uint8_t* in_base_ptr  = ct_image_get_plane_base(img_in, 0);
+    uint8_t* out_base_ptr = ct_image_get_plane_base(img_out, 0);
+    for (y = 0; y < img_in->height; ++y)
+    {
+        for (x = 0; x < img_in->width; ++x)
+        {
+            xShftd = x + img_in->roi.x % 8;      // U1 ROI respecting offset
+            pixel  = in_base_ptr[y * ct_stride_bytes(img_in) + xShftd / 8] & (1 << (xShftd % 8));
+            out_base_ptr[y * img_out->stride + x] = (pixel != 0) ? 255 : 0;
+        }
+    }
+}
+
+// Threshold a U8 CT_Image with the input uint8_t threshold (new_val = old_val > thresh ? 255 : 0)
+void threshold_U8_ct_image(CT_Image img, uint8_t thresh)
+{
+    ASSERT(img);
+    ASSERT(img->width > 0 && img->height > 0);
+    ASSERT(img->format == VX_DF_IMAGE_U8);
+
+    uint32_t x, y;
+    for (y = 0; y < img->height; y++)
+    {
+        for (x = 0; x < img->width; x++)
+        {
+            img->data.y[y * img->stride + x] = (img->data.y[y * img->stride + x] > thresh) ? 255 : 0;
+        }
+    }
+}
+
 /*
     wrap_half_modulo: specifies if the smallest value follows the biggest (e.g. 255 + 1 == 0 or not)
     0            = default - half of data type range
@@ -799,7 +918,39 @@
             }                                                                                   \
     }
 
-    if (expected->format == VX_DF_IMAGE_U8)
+#define FIND_MAX_DIFF_SIMPLE_U1(eptr, aptr)                                                     \
+    {                                                                                           \
+        uint32_t xStartE, xStartA, xE, xA, maskE, maskA, diff;                                  \
+        xStartE = expected->roi.x % 8;                                                          \
+        xStartA =   actual->roi.x % 8;                                                          \
+        for (i = 0; i < expected->height; ++i)                                                  \
+            for (j = 0; j < expected->width; ++j)                                               \
+            {                                                                                   \
+                xE = j + xStartE;                                                               \
+                xA = j + xStartA;                                                               \
+                maskE = 1u << (xE % 8);                                                         \
+                maskA = 1u << (xA % 8);                                                         \
+                diff = ((eptr[i * ct_stride_bytes(expected) + xE / 8] & maskE) >> (xE % 8) !=   \
+                        (aptr[i * ct_stride_bytes(actual)   + xA / 8] & maskA) >> (xA % 8)) ? 1u : 0u; \
+               if (diff > max_diff)                                                             \
+               {                                                                                \
+                   max_diff = diff;                                                             \
+                   max_y = i;                                                                   \
+                   max_x = j;                                                                   \
+                   vale = (eptr[i * ct_stride_bytes(expected) + xE / 8] & maskE) >> (xE % 8);   \
+                   vala = (aptr[i * ct_stride_bytes(actual)   + xA / 8] & maskA) >> (xA % 8);   \
+               }                                                                                \
+               if (diff > threshold)                                                            \
+                   ++diff_pixels;                                                               \
+            }                                                                                   \
+    }                                                                                           \
+
+    if (expected->format == VX_DF_IMAGE_U1)
+    {
+        // Wrap_half_modulo is not used for U1 image comparisons
+        FIND_MAX_DIFF_SIMPLE_U1(expected->data.y, actual->data.y)
+    }
+    else if (expected->format == VX_DF_IMAGE_U8)
     {
         if (!wrap_half_modulo) wrap_half_modulo = 1u << 7;
         FIND_MAX_DIFF_SIMPLE(expected->data.y, actual->data.y)
@@ -829,7 +980,8 @@
         // skip check
     }
 
-    if (expected->format == VX_DF_IMAGE_U8 || expected->format == VX_DF_IMAGE_U16 || expected->format == VX_DF_IMAGE_U32)
+    if (expected->format == VX_DF_IMAGE_U1 || expected->format == VX_DF_IMAGE_U8 ||
+        expected->format == VX_DF_IMAGE_U16 || expected->format == VX_DF_IMAGE_U32)
     {
         if (max_diff > threshold)
         {
@@ -950,6 +1102,40 @@
     return 1;
 }
 
+uint8_t* ct_image_data_ptr_1u(CT_Image image, uint32_t x, uint32_t y)
+{
+    uint8_t* ptr = &image->data.y[y * ct_stride_bytes(image) + x / 8];
+    return ptr;
+}
+
+uint8_t ct_image_data_replicate_1u(CT_Image image, int32_t x, int32_t y)
+{
+    uint8_t offset, byte, pxl_val;
+    int32_t border_x_start = image->roi.x % 8;
+
+    EXPECT(image->width > 0 && image->height > 0);
+    if (x < border_x_start) x = border_x_start;     // Handle ROI byte-shift offset
+    if (x >= (int)image->width + border_x_start) x = image->width - 1 + border_x_start;
+    if (y < 0) y = 0;
+    if (y >= (int)image->height) y = image->height - 1;
+    offset = x % 8;
+    byte = image->data.y[y * ct_stride_bytes(image) + x / 8];
+    pxl_val = (byte & (1u << offset)) >> offset;
+    return pxl_val;
+}
+
+uint8_t ct_image_data_constant_1u(CT_Image image, int32_t x, int32_t y, vx_bool constant_value)
+{
+    uint8_t offset, byte, pxl_val;
+    int32_t border_x_start = image->roi.x % 8;
+
+    if (x < border_x_start || x >= (int)image->width + border_x_start || y < 0 || y >= (int)image->height)
+        return (uint8_t)(constant_value ? 1 : 0);
+    offset = x % 8;
+    byte = image->data.y[y * ct_stride_bytes(image) + x / 8];
+    pxl_val = (byte & (1u << offset)) >> offset;
+    return pxl_val;
+}
 
 uint8_t* ct_image_data_ptr_8u(CT_Image image, uint32_t x, uint32_t y)
 {
@@ -1005,13 +1191,27 @@
             image, (int)sizeof(image->format), (char*)&image->format,
             image->width, image->height);
 
-    if (image->format == VX_DF_IMAGE_U8)
+    if (image->format == VX_DF_IMAGE_U1)
     {
         for (y = 0; y < max_y; ++y)
         {
             for (x = 0; x < max_x; ++x)
             {
-                uint8_t* ptr = ct_image_data_ptr_8u(image, x, y);
+                int xShftd = x + image->roi.x % 8;
+                uint8_t offset = xShftd % 8;
+                uint8_t* ptr = CT_IMAGE_DATA_PTR_1U(image, xShftd, y);
+                printf("%1d ", (int)( (*ptr & (1u << offset)) >> offset ));
+            }
+            printf("%s", strend);
+        }
+    }
+    else if (image->format == VX_DF_IMAGE_U8)
+    {
+        for (y = 0; y < max_y; ++y)
+        {
+            for (x = 0; x < max_x; ++x)
+            {
+                uint8_t* ptr = CT_IMAGE_DATA_PTR_8U(image, x, y);
                 printf("%3d ", (int)*ptr);
             }
             printf("%s", strend);
@@ -1110,30 +1310,35 @@
         format = VX_DF_IMAGE_U8;
     }
 
-    ASSERT( format == VX_DF_IMAGE_U8 ||
+    ASSERT( format == VX_DF_IMAGE_U1  || format == VX_DF_IMAGE_U8  ||
             format == VX_DF_IMAGE_U16 || format == VX_DF_IMAGE_S16 ||
-            format == VX_DF_IMAGE_U32 || format == VX_DF_IMAGE_S32);
+            format == VX_DF_IMAGE_U32 || format == VX_DF_IMAGE_S32 );
 
 #undef CASE_FILL_RNG
-#define CASE_FILL_RNG(format, type, cast_macro) \
-    case format: \
-    { \
-    uint8_t* ptr = image->data.y; \
-    for( p = 0; p < nplanes; p++ ) \
-    for( y = 0; y < height[p]; y++, ptr += stride[p] ) \
-    { \
-        type* tptr = (type*)ptr; \
-        for( x = 0; x < width[p]; x++ ) \
-        { \
-            int val = CT_RNG_NEXT_INT(*seed, a, b); \
-            tptr[x] = cast_macro(val); \
-        } \
-    } \
-    } \
+#define CASE_FILL_RNG(format, type, cast_macro)                                                 \
+    case format:                                                                                \
+    {                                                                                           \
+        uint8_t* ptr = image->data.y;                                                           \
+        for( p = 0; p < nplanes; p++ )                                                          \
+            for( y = 0; y < height[p]; y++, ptr += stride[p] )                                  \
+            {                                                                                   \
+                type* tptr = (type*)ptr;                                                        \
+                for( x = 0; x < width[p]; x++ )                                                 \
+                {                                                                               \
+                    uint32_t x_adr = format != VX_DF_IMAGE_U1 ? x : (x / 8);                    \
+                    int   val = CT_RNG_NEXT_INT(*seed, a, b);                                   \
+                    type tval = cast_macro(val);                                                \
+                    tval = format != VX_DF_IMAGE_U1 ? tval :                                    \
+                           (tptr[x_adr] & ~(1 << (x % 8))) | (tval << (x % 8)); /* Set U1 bit */ \
+                    tptr[x_adr] = tval;                                                         \
+                }                                                                               \
+            }                                                                                   \
+    }                                                                                           \
     break
 
     switch( format )
     {
+        CASE_FILL_RNG(VX_DF_IMAGE_U1, uint8_t, CT_CAST_U1);
         CASE_FILL_RNG(VX_DF_IMAGE_U8, uint8_t, CT_CAST_U8);
         CASE_FILL_RNG(VX_DF_IMAGE_U16, uint16_t, CT_CAST_U16);
         CASE_FILL_RNG(VX_DF_IMAGE_S16, int16_t, CT_CAST_S16);
@@ -1160,6 +1365,7 @@
 
     switch (format)
     {
+    case VX_DF_IMAGE_U1:
     case VX_DF_IMAGE_U8:
     case VX_DF_IMAGE_U16:
     case VX_DF_IMAGE_S16:
@@ -1189,6 +1395,7 @@
 {
     switch (format)
     {
+        case VX_DF_IMAGE_U1:
         case VX_DF_IMAGE_U8:
         case VX_DF_IMAGE_U16:
         case VX_DF_IMAGE_S16:
@@ -1217,6 +1424,8 @@
 
     switch (format)
     {
+        case VX_DF_IMAGE_U1:
+            return 0;
         case VX_DF_IMAGE_U8:
             return 1;
         case VX_DF_IMAGE_U16:
@@ -1253,6 +1462,8 @@
 
     switch (format)
     {
+        case VX_DF_IMAGE_U1:
+            return (image->stride + 7) / 8;
         case VX_DF_IMAGE_U8:
             return image->stride;
         case VX_DF_IMAGE_U16:
diff --git a/test_engine/test_image.h b/test_engine/test_image.h
index f00d161..a62d13b 100644
--- a/test_engine/test_image.h
+++ b/test_engine/test_image.h
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -79,6 +79,15 @@
 CT_Image ct_get_image_roi_(CT_Image img, uint32_t x_start, uint32_t y_start, uint32_t width, uint32_t height);
 void ct_adjust_roi(CT_Image img, int left, int top, int right, int bottom);
 
+uint8_t* ct_image_data_ptr_1u(CT_Image image, uint32_t x, uint32_t y);
+#define CT_IMAGE_DATA_PTR_1U(image, x, y) ct_image_data_ptr_1u(image, x, y)
+
+uint8_t ct_image_data_replicate_1u(CT_Image image, int32_t x, int32_t y);
+#define CT_IMAGE_DATA_REPLICATE_1U(image, x, y) ct_image_data_replicate_1u(image, x, y)
+
+uint8_t ct_image_data_constant_1u(CT_Image image, int32_t x, int32_t y, vx_bool constant_value);
+#define CT_IMAGE_DATA_CONSTANT_1U(image, x, y, constant_value) ct_image_data_constant_1u(image, x, y, constant_value)
+
 #if 1
 #define CT_IMAGE_DATA_PTR_8U(image, x, y_) &(image)->data.y[(y_) * (image)->stride + (x)]
 #else
@@ -100,6 +109,23 @@
 #define CT_IMAGE_DATA_PTR_RGBX(image, x, y) &(image)->data.rgbx[(y) * (image)->stride + (x)]
 
 
+#define CT_FILL_IMAGE_1U(ret_error, image, op) \
+    ASSERT_(ret_error, image != NULL); \
+    ASSERT_(ret_error, image->format == VX_DF_IMAGE_U1); \
+    ASSERT_(ret_error, image->width > 0); \
+    ASSERT_(ret_error, image->height > 0); \
+    { \
+        uint32_t x, y; \
+        for (y = 0; y < image->height; y++) { \
+            for (x = 0; x < image->width; x++) { \
+                uint32_t xShftd = x + image->roi.x % 8; /* x respecting start of ROI in first byte */ \
+                uint8_t  offset = xShftd % 8; (void)offset; \
+                uint8_t* dst_data = CT_IMAGE_DATA_PTR_1U(image, xShftd, y); (void)dst_data; \
+                op; \
+            } \
+        } \
+    }
+
 #define CT_FILL_IMAGE_8U(ret_error, image, op) \
     ASSERT_(ret_error, image != NULL); \
     ASSERT_(ret_error, image->format == VX_DF_IMAGE_U8); \
@@ -115,7 +141,6 @@
         } \
     }
 
-
 #define CT_FILL_IMAGE_16S(ret_error, image, op) \
     ASSERT_(ret_error, image != NULL); \
     ASSERT_(ret_error, image->format == VX_DF_IMAGE_S16); \
@@ -161,6 +186,12 @@
 #define ct_image_copyto_vx_image(vximg, ctimg) ct_image_copy_impl(ctimg, vximg, COPY_CT_IMAGE_TO_VX_IMAGE, __FUNCTION__, __FILE__, __LINE__)
 #define ct_image_copyfrom_vx_image(ctimg, vximg) ct_image_copy_impl(ctimg, vximg, COPY_VX_IMAGE_TO_CT_IMAGE, __FUNCTION__, __FILE__, __LINE__)
 
+void U8_ct_image_to_U1_ct_image(CT_Image img_in, CT_Image img_out);
+
+void U1_ct_image_to_U8_ct_image(CT_Image img_in, CT_Image img_out);
+
+void threshold_U8_ct_image(CT_Image img, uint8_t thresh);
+
 #define EXPECT_EQ_CTIMAGE(expected, actual) ct_assert_eq_ctimage_impl(expected, actual, 0, (uint32_t)-1, #expected, #actual, __FUNCTION__, __FILE__, __LINE__)
 #define ASSERT_EQ_CTIMAGE(expected, actual)                                                                                     \
     do { if (ct_assert_eq_ctimage_impl(expected, actual, 0, (uint32_t)-1, #expected, #actual, __FUNCTION__, __FILE__, __LINE__))\
diff --git a/test_engine/test_utils.c b/test_engine/test_utils.c
index d302728..bc78815 100644
--- a/test_engine/test_utils.c
+++ b/test_engine/test_utils.c
@@ -17,6 +17,13 @@
 
 #include <math.h>
 #include <string.h>
+
+#if defined (_WIN32)
+#include <windows.h>
+#else
+#include <unistd.h>
+#endif
+
 #include "test.h"
 
 // As for OpenVX 1.0 both of following defines result in udefined behavior:
@@ -265,7 +272,15 @@
     }
 
         status = VX_SUCCESS;
-        if (format == VX_DF_IMAGE_U8) // 1 plane of 8-bit data
+        if (format == VX_DF_IMAGE_U1)      // 1 plane of 1-bit data
+            SET_PIXELS(vx_uint8,
+            {
+                vx_uint8 offset = x % 8;
+                vx_uint8 mask = 1 << offset;
+                vx_uint8 rng_val = (vx_uint8)CT_RNG_NEXT_INT(rng, 0, 2) << offset;
+                data[0] = (data[0] & ~mask) | rng_val;
+            })
+        else if (format == VX_DF_IMAGE_U8) // 1 plane of 8-bit data
             SET_PIXELS(vx_uint8,
             {
                 data[0] = (vx_uint8)CT_RNG_NEXT(rng);
@@ -519,6 +534,7 @@
             vx_uint32 elem_sz = 0;
             switch(format)
             {
+                case VX_DF_IMAGE_U1:
                 case VX_DF_IMAGE_U8:
                 case VX_DF_IMAGE_YUV4:
                 case VX_DF_IMAGE_IYUV:
@@ -584,14 +600,37 @@
 
                 for (x = 0; x < w; x++)
                 {
-                    int i_src = x * addr_src.stride_x * addr_src.step_x * addr_src.scale_x / VX_SCALE_UNITY;
-                    int i_dst = x * addr_dst.stride_x * addr_dst.step_x * addr_dst.scale_x / VX_SCALE_UNITY;
+                    int i_src, i_dst, src_offset = 0, dst_offset = 0;
+                    if (format == VX_DF_IMAGE_U1)
+                    {
+                        int x_bits_src = x * addr_src.stride_x_bits * addr_src.step_x * addr_src.scale_x / VX_SCALE_UNITY;
+                        int x_bits_dst = x * addr_dst.stride_x_bits * addr_dst.step_x * addr_dst.scale_x / VX_SCALE_UNITY;
+                        i_src = x_bits_src / 8;
+                        i_dst = x_bits_dst / 8;
+                        src_offset = x_bits_src % 8;
+                        dst_offset = x_bits_dst % 8;
+                    }
+                    else
+                    {
+                        i_src = x * addr_src.stride_x * addr_src.step_x * addr_src.scale_x / VX_SCALE_UNITY;
+                        i_dst = x * addr_dst.stride_x * addr_dst.step_x * addr_dst.scale_x / VX_SCALE_UNITY;
+                    }
 
                     vx_uint8 *psrc = (vx_uint8*)(((vx_uint8 *)base_ptr_src) + j_src + i_src);
                     vx_uint8 *pdst = (vx_uint8*)(((vx_uint8 *)base_ptr_dst) + j_dst + i_dst);
 
                     for(k = 0; k < elem_sz; ++k)
-                        pdst[k] = psrc[k];
+                    {
+                        if (format == VX_DF_IMAGE_U1)
+                        {
+                            vx_uint8 src_val = (psrc[k] & (1 << src_offset)) >> src_offset;
+                            pdst[k] = (pdst[k] & ~(1 << dst_offset)) | (src_val << dst_offset);  // Set target pixel
+                        }
+                        else
+                        {
+                            pdst[k] = psrc[k];
+                        }
+                    }
                 }
             }
 
@@ -782,10 +821,19 @@
     return (uint32_t)(v);
 }
 
+// Integer division with rounding towards minus infinity
+int ct_div_floor(int x, int y) {
+    int q = x / y;
+    int r = x % y;
+    if ( (r != 0) && ((r < 0) != (y < 0)) )
+        --q;
+    return q;
+}
+
 uint8_t ct_clamp_8u(int32_t v)
 {
     if (v >= 255)
-        return v;
+        return 255;
     if (v <= 0)
         return 0;
     return (uint8_t)v;
@@ -1000,13 +1048,22 @@
     *pContext = NULL;
 }
 
-char *ct_get_test_file_path()
+const char *ct_get_test_file_path()
 {
-    char *env = getenv("VX_TEST_DATA_PATH");
+    const char *env = getenv("VX_TEST_DATA_PATH");
     if (env == NULL)
     {
         /* Look in the current directory */
-        env = ".";
+        return ".";
     }
     return env;
 }
+
+void ct_delay_ms(uint32_t ms)
+{
+#if defined (_WIN32)
+    Sleep(ms);
+#else
+    usleep(ms * 1000);
+#endif
+}
diff --git a/test_engine/test_utils.h b/test_engine/test_utils.h
index d7dbf8f..21abd1b 100644
--- a/test_engine/test_utils.h
+++ b/test_engine/test_utils.h
@@ -1,4 +1,4 @@
-/* 
+/*
 
  * Copyright (c) 2012-2017 The Khronos Group Inc.
  *
@@ -19,7 +19,6 @@
 #define __VX_CT_TEST_UTILS_H__
 
 #include <VX/vx.h>
-#include <VX/vx_compatibility.h>
 
 #define MAXPATHLENGTH           (512u)
 
@@ -145,6 +144,12 @@
     CT_EXPAND(nextmacro(testArgName "/VX_BORDER_CONSTANT=127", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 127 }} })), \
     CT_EXPAND(nextmacro(testArgName "/VX_BORDER_CONSTANT=255", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 255 }} }))
 
+#define ADD_VX_BORDERS_U1(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/VX_BORDER_UNDEFINED", __VA_ARGS__, { VX_BORDER_UNDEFINED, {{ 0 }} })), \
+    CT_EXPAND(nextmacro(testArgName "/VX_BORDER_REPLICATE", __VA_ARGS__, { VX_BORDER_REPLICATE, {{ 0 }} })), \
+    CT_EXPAND(nextmacro(testArgName "/VX_BORDER_CONSTANT=0", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 0 }} })), \
+    CT_EXPAND(nextmacro(testArgName "/VX_BORDER_CONSTANT=1", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 1 }} }))
+
 #define ADD_VX_BORDERS_REQUIRE_UNDEFINED_ONLY(testArgName, nextmacro, ...) \
     CT_EXPAND(nextmacro(testArgName "/VX_BORDER_UNDEFINED", __VA_ARGS__, { VX_BORDER_UNDEFINED, {{ 0 }} })), \
     CT_EXPAND(nextmacro(testArgName "/DISABLED_VX_BORDER_REPLICATE", __VA_ARGS__, { VX_BORDER_REPLICATE, {{ 0 }} })), \
@@ -153,6 +158,23 @@
     CT_EXPAND(nextmacro(testArgName "/DISABLED_VX_BORDER_CONSTANT=127", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 127 }} })), \
     CT_EXPAND(nextmacro(testArgName "/DISABLED_VX_BORDER_CONSTANT=255", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 255 }} }))
 
+#define ADD_VX_BORDERS_U1_REQUIRE_UNDEFINED_ONLY(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/VX_BORDER_UNDEFINED", __VA_ARGS__, { VX_BORDER_UNDEFINED, {{ 0 }} })), \
+    CT_EXPAND(nextmacro(testArgName "/DISABLED_VX_BORDER_REPLICATE", __VA_ARGS__, { VX_BORDER_REPLICATE, {{ 0 }} })), \
+    CT_EXPAND(nextmacro(testArgName "/DISABLED_VX_BORDER_CONSTANT=0", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 0 }} })), \
+    CT_EXPAND(nextmacro(testArgName "/DISABLED_VX_BORDER_CONSTANT=1", __VA_ARGS__, { VX_BORDER_CONSTANT, {{ 1 }} }))
+
+#define ADD_TYPE_U1(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_U1", __VA_ARGS__, VX_DF_IMAGE_U1))
+
+#define ADD_TYPE_U8(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/VX_DF_IMAGE_U8", __VA_ARGS__, VX_DF_IMAGE_U8))
+
+#define ADD_VALID_REGION_SHRINKS(testArgName, nextmacro, ...) \
+    CT_EXPAND(nextmacro(testArgName "/REGION_SHRINK=1", __VA_ARGS__, {1, 1, -1, -1})), \
+    CT_EXPAND(nextmacro(testArgName "/REGION_SHRINK=7", __VA_ARGS__, {7, 7, -7, -7})), \
+    CT_EXPAND(nextmacro(testArgName "/REGION_SHRINK=odd", __VA_ARGS__, {1, 2, -3, -4}))
+
 void ct_fill_image_random_impl(vx_image image, uint64_t* seed, const char* func, const char* file, const int line);
 #define ct_fill_image_random(image, seed) ct_fill_image_random_impl(image, seed, __FUNCTION__, __FILE__, __LINE__)
 
@@ -171,23 +193,34 @@
     vx_image (*generator)(vx_context context, const char* fileName);
 };
 
+#define ALTERRECTANGLE(rectangle, dsx, dsy, dex, dey) \
+{ \
+    rectangle.start_x += dsx; \
+    rectangle.start_y += dsy; \
+    rectangle.end_x   += dex; \
+    rectangle.end_y   += dey; \
+} \
 
 vx_status ct_dump_vx_image_info(vx_image image);
 
 uint32_t ct_floor_u32_no_overflow(float v);
 
+int ct_div_floor(int x, int y);
+
 #define CT_RNG_INIT(rng, seed) ((rng) = (seed) ? (seed) : (uint64_t)(int64_t)(-1))
 #define CT_RNG_NEXT(rng) ((rng) = ((uint64_t)(uint32_t)(rng)*4164903690U + ((rng) >> 32)))
 #define CT_RNG_NEXT_INT(rng, a, b) (int)((uint32_t)CT_RNG_NEXT(rng) % ((b) - (a)) + (a))
 #define CT_RNG_NEXT_BOOL(rng)      CT_RNG_NEXT_INT(rng, 0, 2)
 #define CT_RNG_NEXT_REAL(rng, a, b) ((uint32_t)CT_RNG_NEXT(rng)*(2.3283064365386963e-10*((b) - (a))) + (a))
 
+#define CT_CAST_U1(x)  (uint8_t)((x) < 0 ? 0 : (x) > 1 ? 1 : (x))
 #define CT_CAST_U8(x)  (uint8_t)((x) < 0 ? 0 : (x) > 255 ? 255 : (x))
 #define CT_CAST_U16(x) (uint16_t)((x) < 0 ? 0 : (x) > 65535 ? 65535 : (x))
 #define CT_CAST_S16(x) (int16_t)((x) < -32768 ? -32768 : (x) > 32767 ? 32767 : (x))
 #define CT_CAST_U32(x) (uint32_t)((x) < 0 ? 0 : (x))
 #define CT_CAST_S32(x) (int32_t)(x)
 
+#define CT_SATURATE_U1(x) CT_CAST_U1(x)
 #define CT_SATURATE_U8(x) CT_CAST_U8(x)
 #define CT_SATURATE_U16(x) CT_CAST_U16(x)
 #define CT_SATURATE_S16(x) CT_CAST_S16(x)
@@ -215,12 +248,14 @@
 
 void ct_destroy_vx_context(void **pContext);
 
-char *ct_get_test_file_path();
+const char *ct_get_test_file_path();
 
 void *ct_alloc_mem(size_t size);
 
 void ct_free_mem(void *ptr);
 
+void ct_delay_ms(uint32_t ms);
+
 void ct_memset(void *ptr, vx_uint8 c, size_t);
 void *ct_calloc(size_t nmemb, size_t size);