blob: dc8b865c34c52636c076fb17865e8a0a4232acf3 [file] [log] [blame]
/*
* Copyright (c) 2012-2018 The Khronos Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef OPENVX_USE_PIPELINING
#include "test_engine/test.h"
#include <VX/vx.h>
#include <VX/vx_khr_pipelining.h>
#include "math.h"
#include <limits.h>
TESTCASE(GraphPipeline, CT_VXContext, ct_setup_vx_context, 0)
#define MAX_NUM_BUF (8u)
#define MAX_IMAGE_PLANES (3u)
#define MAX_NUM_OBJ_ARR_ELEMENTS (4u)
#define GRAPH_MAX_DATA_REF_QUEUE (16u)
#define DATA_REF_Q_MAX_OBJECTS (64u)
#define GRAPH_CONSUMED_EVENT (1u)
#define NODE0_COMPLETED_EVENT (2u)
#define NODE1_COMPLETED_EVENT (3u)
#define GRAPH_COMPLETED_EVENT (4u)
typedef struct {
const char* testName;
int width, height;
int num_buf;
int loop_count;
} Arg;
#define ADD_BUF_1(testArgName, nextmacro, ...) \
CT_EXPAND(nextmacro(testArgName "/buf=1", __VA_ARGS__, 1))
#define ADD_BUF_2(testArgName, nextmacro, ...) \
CT_EXPAND(nextmacro(testArgName "/buf=2", __VA_ARGS__, 2))
#define ADD_BUF_3(testArgName, nextmacro, ...) \
CT_EXPAND(nextmacro(testArgName "/buf=3", __VA_ARGS__, 3))
#define ADD_LOOP_0(testArgName, nextmacro, ...) \
CT_EXPAND(nextmacro(testArgName "/loop_count=0", __VA_ARGS__, 0))
#define ADD_LOOP_1(testArgName, nextmacro, ...) \
CT_EXPAND(nextmacro(testArgName "/loop_count=1", __VA_ARGS__, 1))
#define ADD_LOOP_10(testArgName, nextmacro, ...) \
CT_EXPAND(nextmacro(testArgName "/loop_count=10", __VA_ARGS__, 10))
#define ADD_LOOP_1000(testArgName, nextmacro, ...) \
CT_EXPAND(nextmacro(testArgName "/loop_count=1000", __VA_ARGS__, 1000))
#define ADD_LOOP_100000(testArgName, nextmacro, ...) \
CT_EXPAND(nextmacro(testArgName "/loop_count=100000", __VA_ARGS__, 100000))
#define ADD_LOOP_1000000(testArgName, nextmacro, ...) \
CT_EXPAND(nextmacro(testArgName "/loop_count=1000000", __VA_ARGS__, 1000000))
#define ADD_SIZE_2048x1024(testArgName, nextmacro, ...) \
CT_EXPAND(nextmacro(testArgName "/sz=2048x1024", __VA_ARGS__, 2048, 1024))
#define PARAMETERS \
CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_3, ADD_LOOP_0, ARG), \
CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_1, ADD_LOOP_0, ARG), \
CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_3, ADD_LOOP_1, ARG), \
CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_3, ADD_LOOP_1000, ARG), \
CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_3, ADD_LOOP_1000, ARG), \
CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_1, ADD_LOOP_1000, ARG), \
CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_2, ADD_LOOP_1000, ARG), \
CT_GENERATE_PARAMETERS("random", ADD_SIZE_64x64, ADD_BUF_2, ADD_LOOP_100000, ARG), \
CT_GENERATE_PARAMETERS("random", ADD_SIZE_2048x1024, ADD_BUF_3, ADD_LOOP_1000, ARG), \
/*
* Utility API used to add a graph parameter from a node, node parameter index
*/
static void add_graph_parameter_by_node_index(vx_graph graph, vx_node node, vx_uint32 node_parameter_index)
{
vx_parameter parameter = vxGetParameterByIndex(node, node_parameter_index);
vxAddParameterToGraph(graph, parameter);
vxReleaseParameter(&parameter);
}
static void referenceNot(CT_Image src, CT_Image dst)
{
uint32_t i, j;
ASSERT(src && dst);
ASSERT(src->width == dst->width);
ASSERT(src->height == dst->height);
ASSERT(src->format == dst->format && src->format == VX_DF_IMAGE_U8);
for (i = 0; i < dst->height; ++i)
for (j = 0; j < dst->width; ++j)
dst->data.y[i * dst->stride + j] = ~src->data.y[i * src->stride + j];
}
static void reference_mean_stddev(CT_Image src, vx_float32* _mean, vx_float32* _stddev)
{
uint32_t x, y, width = src ? src->width : 0, height = src ? src->height : 0;
uint32_t npix, stride;
double sum = 0, sqsum = 0;
int format = src ? src->format : VX_DF_IMAGE_U8;
ASSERT(src);
ASSERT(src->width > 0 && src->height > 0);
npix = width*height;
stride = ct_stride_bytes(src);
#define CASE_MEANSTDDEV(format, type, acctype) \
case format: \
{ \
acctype s = 0, s2 = 0; \
for( y = 0; y < src->height; y++ ) \
{ \
const type* ptr = (const type*)(src->data.y + stride*y); \
for( x = 0; x < src->width; x++ ) \
{ \
type val = ptr[x]; \
s += val; \
s2 += (acctype)val*val; \
} \
} \
sum = (double)s; sqsum = (double)s2; \
} \
break
switch(format)
{
CASE_MEANSTDDEV(VX_DF_IMAGE_U8, uint8_t, uint64_t);
default:
FAIL("Unsupported image format: (%d)", &src->format);
}
*_mean = (vx_float32)(sum/npix);
sqsum = sqsum/npix - (sum/npix)*(sum/npix);
*_stddev = (vx_float32)sqrt(CT_MAX(sqsum, 0.));
}
static void fillSequence(CT_Image dst, uint32_t seq_init)
{
uint32_t i, j;
uint32_t val = seq_init;
ASSERT(dst);
ASSERT(dst->format == VX_DF_IMAGE_U8);
for (i = 0; i < dst->height; ++i)
for (j = 0; j < dst->width; ++j)
dst->data.y[i * dst->stride + j] = val;
}
#define TEST_USER_KERNEL_NAME "test_graph_pipeline.user_kernel"
#define TEST_USER_KERNEL_NUM_PARAMS (4u)
static vx_kernel test_user_kernel = NULL;
static vx_status test_user_kernel_validate(vx_node node,
const vx_reference parameters[ ],
vx_uint32 num,
vx_meta_format metas[])
{
vx_status status = VX_SUCCESS;
vx_scalar scalar[TEST_USER_KERNEL_NUM_PARAMS];
vx_enum scalar_type[TEST_USER_KERNEL_NUM_PARAMS];
vx_uint32 i;
if (num != TEST_USER_KERNEL_NUM_PARAMS)
{
printf(" ERROR: Test user kernel: Number of parameters dont match !!!\n");
status = VX_ERROR_INVALID_PARAMETERS;
}
for (i = 0U; i < TEST_USER_KERNEL_NUM_PARAMS; i ++)
{
scalar[i] = (vx_scalar)parameters[i];
if(scalar[i] != NULL)
{
/* i.e not a optional parameter */
status = vxQueryScalar(scalar[i],
VX_SCALAR_TYPE, &scalar_type[i],
sizeof(vx_enum));
if(status==VX_SUCCESS)
{
if(scalar_type[i] != VX_TYPE_UINT32)
{
printf(" ERROR: Test user kernel: Scalar type MUST be VX_TYPE_UINT32 !!!\n");
status = VX_ERROR_INVALID_PARAMETERS;
}
vxSetMetaFormatAttribute(metas[i], VX_SCALAR_TYPE, &scalar_type[i],
sizeof(scalar_type[i]));
}
if(status!=VX_SUCCESS)
{
printf(" ERROR: Test user kernel: validate failed !!!\n");
break;
}
}
}
return status;
}
static vx_status test_user_kernel_run(vx_node node,
const vx_reference parameters[ ],
vx_uint32 num)
{
vx_status status = VX_SUCCESS;
vx_scalar in1, in2, out1, out2;
vx_uint32 in1_value = 0, in2_value = 0;
vx_uint32 out1_value = 0, out2_value = 0;
/* Any of the parameter can be NULL since parameter is marked
* as optional during kernel register */
in1 = (vx_scalar)parameters[0];
in2 = (vx_scalar)parameters[1];
out1 = (vx_scalar)parameters[2];
out2 = (vx_scalar)parameters[3];
if(in1!=NULL)
{
vxCopyScalar(in1,
&in1_value,
VX_READ_ONLY,
VX_MEMORY_TYPE_HOST
);
}
if(in2!=NULL)
{
vxCopyScalar(in2,
&in2_value,
VX_READ_ONLY,
VX_MEMORY_TYPE_HOST
);
}
/* just for test
* out1_value = in1_value + in2_value
* out2_value = out1_value * 2
* when in1 reference is not specified (since its optional), in1_value is considered to be 0
* when in2 reference is not specified (since its optional), in2_value is considered to be 0
*/
out1_value = in1_value + in2_value;
out2_value = out1_value*2;
if(out1!=NULL)
{
vxCopyScalar(out1,
&out1_value,
VX_WRITE_ONLY,
VX_MEMORY_TYPE_HOST
);
}
if(out2!=NULL)
{
vxCopyScalar(out2,
&out2_value,
VX_WRITE_ONLY,
VX_MEMORY_TYPE_HOST
);
}
return status;
}
static vx_status test_user_kernel_register(vx_context context)
{
vx_kernel kernel = NULL;
vx_status status;
uint32_t index;
vx_enum test_user_kernel_id = 0;
status = vxAllocateUserKernelId(context, &test_user_kernel_id);
if(status!=VX_SUCCESS)
{
printf(" ERROR: Test user kernel: vxAllocateUserKernelId failed (%d)!!!\n", status);
}
if(status==VX_SUCCESS)
{
kernel = vxAddUserKernel(
context,
TEST_USER_KERNEL_NAME,
test_user_kernel_id,
test_user_kernel_run,
TEST_USER_KERNEL_NUM_PARAMS, /* number of parameters objects for this user function */
test_user_kernel_validate,
NULL,
NULL);
}
status = vxGetStatus((vx_reference)kernel);
if ( status == VX_SUCCESS)
{
index = 0;
if ( status == VX_SUCCESS)
{
status = vxAddParameterToKernel(kernel,
index,
VX_INPUT,
VX_TYPE_SCALAR,
VX_PARAMETER_STATE_OPTIONAL
);
index++;
}
if ( status == VX_SUCCESS)
{
status = vxAddParameterToKernel(kernel,
index,
VX_INPUT,
VX_TYPE_SCALAR,
VX_PARAMETER_STATE_OPTIONAL
);
index++;
}
if ( status == VX_SUCCESS)
{
status = vxAddParameterToKernel(kernel,
index,
VX_OUTPUT,
VX_TYPE_SCALAR,
VX_PARAMETER_STATE_OPTIONAL
);
index++;
}
if ( status == VX_SUCCESS)
{
status = vxAddParameterToKernel(kernel,
index,
VX_OUTPUT,
VX_TYPE_SCALAR,
VX_PARAMETER_STATE_OPTIONAL
);
index++;
}
if ( status == VX_SUCCESS)
{
status = vxFinalizeKernel(kernel);
}
if( status != VX_SUCCESS)
{
printf(" ERROR: Test user kernel: vxAddParameterToKernel, vxFinalizeKernel failed (%d)!!!\n", status);
vxReleaseKernel(&kernel);
kernel = NULL;
}
}
else
{
kernel = NULL;
printf(" ERROR: Test user kernel: vxAddUserKernel failed (%d)!!!\n", status);
}
if(status==VX_SUCCESS)
{
test_user_kernel = kernel;
}
return status;
}
static vx_status test_user_kernel_unregister(vx_context context)
{
vx_status status;
status = vxRemoveKernel(test_user_kernel);
test_user_kernel = NULL;
if(status!=VX_SUCCESS)
{
printf(" ERROR: Test user kernel: Unable to remove kernel (%d)!!!\n", status);
}
return status;
}
static vx_node test_user_kernel_node(vx_graph graph,
vx_scalar in1,
vx_scalar in2,
vx_scalar out1,
vx_scalar out2)
{
vx_node node = 0;
vx_context context = vxGetContext((vx_reference)graph);
vx_kernel kernel = vxGetKernelByName(context, TEST_USER_KERNEL_NAME);
if(kernel!=NULL)
{
/* kernel is released inside vxCreateNodeByStructure */
//ASSERT_VX_OBJECT(node = vxCreateNodeByStructure(graph, kernel, 0, refs, sizeof(refs)/sizeof(refs[0])), VX_TYPE_NODE);
node = vxCreateGenericNode(graph, kernel);
if (NULL != in1)
{
vxSetParameterByIndex(node, 0, (vx_reference)in1);
}
if (NULL != in2)
{
vxSetParameterByIndex(node, 1, (vx_reference)in2);
}
if (NULL != out1)
{
vxSetParameterByIndex(node, 2, (vx_reference)out1);
}
if (NULL != out2)
{
vxSetParameterByIndex(node, 3, (vx_reference)out2);
}
vxReleaseKernel(&kernel);
}
return node;
}
/*
* d0 n0 d2
* IMG -- OR -- IMG (*)
* (*) |
* d1 (single ref)
*
* (*) = queueing enabled
*
* This test case test the below
* - A data reference on which queing is not enabled
* - No looping
* - fixed pipeline depth of 2
*
*/
TEST_WITH_ARG(GraphPipeline, testOneNode, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_image d0[MAX_NUM_BUF], d1, d2[MAX_NUM_BUF];
vx_node n0;
CT_Image ref_src[MAX_NUM_BUF], vxdst;
uint32_t width, height, seq_init, num_buf;
uint32_t buf_id, loop_id, loop_cnt;
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
seq_init = 1;
width = arg_->width;
height = arg_->height;
loop_cnt = arg_->loop_count;
num_buf = 2;
ASSERT(num_buf <= MAX_NUM_BUF);
/* fill reference data */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE({
ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
});
}
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
/* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(d2[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
}
/* create other refs, these are not multiple refs and same refs is fed as parameter to the graph */
ASSERT_VX_OBJECT(d1 = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
/* fill d1 with zero's so that OR operation acts like a NOP */
{
vx_imagepatch_addressing_t addr;
vx_rectangle_t rect;
void *ptr;
vx_map_id map_id;
rect.start_x = rect.start_y = 0;
rect.end_x = width;
rect.end_y = height;
VX_CALL(vxMapImagePatch(d1, &rect, 0, &map_id, &addr, &ptr, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, VX_NOGAP_X));
ct_memset(ptr, 0x0, addr.stride_y*addr.dim_y);
VX_CALL(vxUnmapImagePatch(d1, map_id));
}
/* create node, input (index 0) and output (index 2) will be made as graph parameter
* so that we can enqueue and dequeue refs to it and thus do graph pipelining.
* d0[0], d2[0] used only for their meta data.
* Actual input and output used for graph processing will be the
* refs that are enqueued later
*/
ASSERT_VX_OBJECT(n0 = vxOrNode(graph, d0[0], d1, d2[0]), VX_TYPE_NODE);
/* input @ node index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n0, 0);
/* output @ node index 2, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n0, 2);
/* set graph schedule config such that graph parameter @ index 0 and 1 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
/* Schedule mode auto is used, here we dont need to call vxScheduleGraph
* Graph gets scheduled automatically as refs are enqueued to it
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
2,
graph_parameters_queue_params_list
));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
}
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
}
buf_id = 0;
/* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
{
vx_image out_img, in_img;
uint32_t num_refs;
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_img, 1, &num_refs));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
/* when measuring performance dont check output since it affects graph performance numbers
*/
if(loop_cnt > 100)
{
ct_update_progress(loop_id, loop_cnt+num_buf);
}
ASSERT_NO_FAILURE({
vxdst = ct_image_from_vx_image(out_img);
});
/* compare output */
ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
buf_id = (buf_id+1)%num_buf;
/* recycles dequeued input and output refs 'loop_cnt' times */
if(loop_id<loop_cnt)
{
/* input and output can be enqueued in any order */
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_img, 1));
}
}
/* ensure all graph processing is complete */
VX_CALL(vxWaitGraph(graph));
#endif
VX_CALL(vxReleaseNode(&n0));
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseImage(&d0[buf_id]));
VX_CALL(vxReleaseImage(&d2[buf_id]));
}
VX_CALL(vxReleaseImage(&d1));
VX_CALL(vxReleaseGraph(&graph));
}
/*
* d0 n0 d1 n1 d2
* IMG -- NOT -- IMG -- NOT -- IMG
*
* This test case test the below
* - Single input, single output nodes
* - Two nodes on two different targets
* - Number of buffers = pipeline depth
* - Virtual objects, no hints provided except for pipeline depth
* - fixed pipeline depth of 2
*
*/
TEST_WITH_ARG(GraphPipeline, testTwoNodesBasic, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_image d0[MAX_NUM_BUF], d1, d2[MAX_NUM_BUF];
vx_node n0, n1;
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
CT_Image ref_src[MAX_NUM_BUF], vxdst;
uint32_t width, height, seq_init;
uint32_t buf_id, loop_id, loop_cnt, num_buf;
seq_init = 1;
width = arg_->width;
height = arg_->height;
loop_cnt = arg_->loop_count;
num_buf = 2;
ASSERT(num_buf <= MAX_NUM_BUF);
/* fill reference data */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE({
ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
});
}
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
/* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(d2[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
}
ASSERT_VX_OBJECT(d1 = vxCreateVirtualImage(graph, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(n0 = vxNotNode(graph, d0[0], d1), VX_TYPE_NODE);
ASSERT_VX_OBJECT(n1 = vxNotNode(graph, d1, d2[0]), VX_TYPE_NODE);
/* input @ n0 index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n0, 0);
/* output @ n1 index 1, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n1, 1);
/* set graph schedule config such that graph parameter @ index 0 and 1 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
/* Schedule mode auto is used, here we dont need to call vxScheduleGraph
* Graph gets scheduled automatically as refs are enqueued to it
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
2,
graph_parameters_queue_params_list
));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
}
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
}
buf_id = 0;
/* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
{
vx_image out_img, in_img;
uint32_t num_refs;
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_img, 1, &num_refs));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
if(loop_cnt > 100)
{
ct_update_progress(loop_id, loop_cnt+num_buf);
}
ASSERT_NO_FAILURE({
vxdst = ct_image_from_vx_image(out_img);
});
/* compare output */
ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
buf_id = (buf_id+1)%num_buf;
/* recycles dequeued input and output refs 'loop_cnt' times */
if(loop_id<loop_cnt)
{
/* input and output can be enqueued in any order */
vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1);
vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_img, 1);
}
}
/* ensure all graph processing is complete */
VX_CALL(vxWaitGraph(graph));
#endif
VX_CALL(vxReleaseNode(&n0));
VX_CALL(vxReleaseNode(&n1));
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseImage(&d0[buf_id]));
VX_CALL(vxReleaseImage(&d2[buf_id]));
}
VX_CALL(vxReleaseImage(&d1));
VX_CALL(vxReleaseGraph(&graph));
}
/*
* d0 n0 d1 n1 d2
* IMG -- NOT -- IMG -- NOT -- IMG
*
* This test case test the below
* - Single input, single output nodes
* - Two nodes on two different targets
*
*/
TEST_WITH_ARG(GraphPipeline, testTwoNodes, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_image d0[MAX_NUM_BUF], d1[MAX_NUM_BUF], d2[MAX_NUM_BUF];
vx_node n0, n1;
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[3];
CT_Image ref_src[MAX_NUM_BUF], vxdst;
uint32_t width, height, seq_init, num_buf;
uint32_t buf_id, loop_id, loop_cnt;
seq_init = 1;
width = arg_->width;
height = arg_->height;
num_buf = arg_->num_buf;
loop_cnt = arg_->loop_count;
ASSERT(num_buf <= MAX_NUM_BUF);
/* fill reference data */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE({
ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
});
}
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
/* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(d1[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(d2[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
}
ASSERT_VX_OBJECT(n0 = vxNotNode(graph, d0[0], d1[0]), VX_TYPE_NODE);
ASSERT_VX_OBJECT(n1 = vxNotNode(graph, d1[0], d2[0]), VX_TYPE_NODE);
/* input @ n0 index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n0, 0);
/* intermediate output @ n0 index 1, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n0, 1);
/* output @ n1 index 1, becomes graph parameter 2 */
add_graph_parameter_by_node_index(graph, n1, 1);
/* set graph schedule config such that graph parameter @ index 0, 1, 2 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d1[0];
graph_parameters_queue_params_list[2].graph_parameter_index = 2;
graph_parameters_queue_params_list[2].refs_list_size = num_buf;
graph_parameters_queue_params_list[2].refs_list = (vx_reference*)&d2[0];
/* Schedule mode auto is used, here we dont need to call vxScheduleGraph
* Graph gets scheduled automatically as refs are enqueued to it
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
3,
graph_parameters_queue_params_list
));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
}
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d1[buf_id], 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&d2[buf_id], 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
}
buf_id = 0;
/* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
{
vx_image out_img, in_img, intermediate_img;
uint32_t num_refs;
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 2, (vx_reference*)&out_img, 1, &num_refs));
/* Get consumed intermediate reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&intermediate_img, 1, &num_refs));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
/* when measuring performance dont check output since it affects graph performance numbers
*/
if(loop_cnt > 100)
{
ct_update_progress(loop_id, loop_cnt+num_buf);
}
ASSERT_NO_FAILURE({
vxdst = ct_image_from_vx_image(out_img);
});
/* compare output */
ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
buf_id = (buf_id+1)%num_buf;
/* recycles dequeued input and output refs 'loop_cnt' times */
if(loop_id<loop_cnt)
{
/* input and output can be enqueued in any order */
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&intermediate_img, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&out_img, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1));
}
}
/* ensure all graph processing is complete */
VX_CALL(vxWaitGraph(graph));
#endif
VX_CALL(vxReleaseNode(&n0));
VX_CALL(vxReleaseNode(&n1));
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseImage(&d0[buf_id]));
VX_CALL(vxReleaseImage(&d1[buf_id]));
VX_CALL(vxReleaseImage(&d2[buf_id]));
}
VX_CALL(vxReleaseGraph(&graph));
}
/*
* d0 n0 d1 n1 d2 n2 d3 n3 d5
* IMG -- NOT -- IMG -- NOT -- IMG -- OR -- IMG -- AND -- IMG
* | | |
* +-------------------+ IMG
* d4
*
* This test case test the below
* - Same input going to multiple nodes
* - Outputs from multiple nodes going to a single node
* - Node taking input from another node as well as from user
*
*/
TEST_WITH_ARG(GraphPipeline, testFourNodes, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_image d0[MAX_NUM_BUF], d1, d2, d3, d4[MAX_NUM_BUF], d5[MAX_NUM_BUF];
vx_node n0, n1, n2, n3;
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[3];
CT_Image ref_src[MAX_NUM_BUF], vxdst;
uint32_t width, height, seq_init, num_buf;
uint32_t buf_id, loop_id, loop_cnt;
seq_init = 1;
width = arg_->width;
height = arg_->height;
num_buf = arg_->num_buf;
loop_cnt = arg_->loop_count;
ASSERT(num_buf <= MAX_NUM_BUF);
/* fill reference data */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE({
ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
});
}
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
/* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(d4[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(d5[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
}
ASSERT_VX_OBJECT(d1 = vxCreateVirtualImage(graph, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(d2 = vxCreateVirtualImage(graph, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(d3 = vxCreateVirtualImage(graph, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(n0 = vxNotNode(graph, d0[0], d1), VX_TYPE_NODE);
ASSERT_VX_OBJECT(n1 = vxNotNode(graph, d1, d2), VX_TYPE_NODE);
ASSERT_VX_OBJECT(n2 = vxOrNode(graph, d1, d2, d3), VX_TYPE_NODE);
ASSERT_VX_OBJECT(n3 = vxAndNode(graph, d3, d4[0], d5[0]), VX_TYPE_NODE);
/* input @ n0 index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n0, 0);
/* input @ n3 index 1, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n3, 1);
/* output @ n3 index 2, becomes graph parameter 2 */
add_graph_parameter_by_node_index(graph, n3, 2);
/* set graph schedule config such that graph parameter @ index 0, 1, 2 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d4[0];
graph_parameters_queue_params_list[2].graph_parameter_index = 2;
graph_parameters_queue_params_list[2].refs_list_size = num_buf;
graph_parameters_queue_params_list[2].refs_list = (vx_reference*)&d5[0];
/* Schedule mode auto is used, here we dont need to call vxScheduleGraph
* Graph gets scheduled automatically as refs are enqueued to it
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
3,
graph_parameters_queue_params_list
));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d4[buf_id], ref_src[buf_id]));
}
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d4[buf_id], 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&d5[buf_id], 1));
}
buf_id = 0;
/* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
{
vx_image out_img, in1_img, in2_img;
uint32_t num_refs;
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 2, (vx_reference*)&out_img, 1, &num_refs));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&in2_img, 1, &num_refs));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in1_img, 1, &num_refs));
/* when measuring performance dont check output since it affects graph performance numbers
*/
if(loop_cnt > 100)
{
ct_update_progress(loop_id, loop_cnt+num_buf);
}
ASSERT_NO_FAILURE({
vxdst = ct_image_from_vx_image(out_img);
});
/* compare output */
ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
buf_id = (buf_id+1)%num_buf;
/* recycles dequeued input and output refs 'loop_cnt' times */
if(loop_id<loop_cnt)
{
/* input and output can be enqueued in any order */
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in1_img, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&in2_img, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&out_img, 1));
}
}
/* ensure all graph processing is complete */
VX_CALL(vxWaitGraph(graph));
#endif
VX_CALL(vxReleaseNode(&n0));
VX_CALL(vxReleaseNode(&n1));
VX_CALL(vxReleaseNode(&n2));
VX_CALL(vxReleaseNode(&n3));
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseImage(&d0[buf_id]));
VX_CALL(vxReleaseImage(&d4[buf_id]));
VX_CALL(vxReleaseImage(&d5[buf_id]));
}
VX_CALL(vxReleaseImage(&d1));
VX_CALL(vxReleaseImage(&d2));
VX_CALL(vxReleaseImage(&d3));
VX_CALL(vxReleaseGraph(&graph));
}
/*
* d0 n0 d1
* IMG -- NOT -- IMG -- NOT --, etc. for GRAPH_MAX_DATA_REF_QUEUE length
*
*
* This test case test the below
* - Tests the limit of GRAPH_MAX_DATA_REF_QUEUE
*
*/
TEST_WITH_ARG(GraphPipeline, testMaxDataRef, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_image d0[MAX_NUM_BUF], d_virt[GRAPH_MAX_DATA_REF_QUEUE], d1[MAX_NUM_BUF];
vx_node n[GRAPH_MAX_DATA_REF_QUEUE+1];
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
CT_Image ref_src[MAX_NUM_BUF];
uint32_t width, height, seq_init, num_buf;
uint32_t buf_id, loop_id, loop_cnt, i;
seq_init = 1;
width = arg_->width;
height = arg_->height;
num_buf = arg_->num_buf;
loop_cnt = arg_->loop_count;
ASSERT(num_buf <= MAX_NUM_BUF);
/* fill reference data */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE({
ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
});
}
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
/* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(d1[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
}
for (i = 0; i < GRAPH_MAX_DATA_REF_QUEUE; i++)
{
ASSERT_VX_OBJECT(d_virt[i] = vxCreateVirtualImage(graph, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
}
ASSERT_VX_OBJECT(n[0] = vxNotNode(graph, d0[0], d_virt[0]), VX_TYPE_NODE);
for (i = 0; i < GRAPH_MAX_DATA_REF_QUEUE-1; i++)
{
ASSERT_VX_OBJECT(n[i+1] = vxNotNode(graph, d_virt[i], d_virt[i+1]), VX_TYPE_NODE);
}
ASSERT_VX_OBJECT(n[GRAPH_MAX_DATA_REF_QUEUE] = vxNotNode(graph, d_virt[GRAPH_MAX_DATA_REF_QUEUE-1], d1[0]), VX_TYPE_NODE);
/* input @ n0 index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n[0], 0);
/* input @ n3 index 1, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n[GRAPH_MAX_DATA_REF_QUEUE], 1);
/* set graph schedule config such that graph parameter @ index 0, 1, 2 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d1[0];
/* Schedule mode auto is used, here we dont need to call vxScheduleGraph
* Graph gets scheduled automatically as refs are enqueued to it
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
2,
graph_parameters_queue_params_list
));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d1[buf_id], ref_src[buf_id]));
}
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d1[buf_id], 1));
}
buf_id = 0;
/* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
{
vx_image out_img, in_img;
uint32_t num_refs;
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_img, 1, &num_refs));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
buf_id = (buf_id+1)%num_buf;
/* recycles dequeued input and output refs 'loop_cnt' times */
if(loop_id<loop_cnt)
{
/* input and output can be enqueued in any order */
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_img, 1));
}
}
/* ensure all graph processing is complete */
VX_CALL(vxWaitGraph(graph));
#endif
for (i = 0; i < GRAPH_MAX_DATA_REF_QUEUE+1; i++)
{
VX_CALL(vxReleaseNode(&n[i]));
}
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseImage(&d0[buf_id]));
VX_CALL(vxReleaseImage(&d1[buf_id]));
}
for (i = 0; i < GRAPH_MAX_DATA_REF_QUEUE; i++)
{
VX_CALL(vxReleaseImage(&d_virt[i]));
}
VX_CALL(vxReleaseGraph(&graph));
}
/*
* d0 n0 d2
* IMG -- AND -- IMG
* |
* d1 (uniform image) filled with 0xFF
*
* This test case test the below
* - Uniform image as input
* - No looping
* - fixed pipeline depth of 2
*
*/
TEST_WITH_ARG(GraphPipeline, testUniformImage, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_image d0[MAX_NUM_BUF], d1, d2[MAX_NUM_BUF];
vx_node n0;
vx_pixel_value_t pixel_value;
CT_Image ref_src[MAX_NUM_BUF], vxdst;
uint32_t width, height, seq_init, num_buf;
uint32_t buf_id, loop_id, loop_cnt;
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
seq_init = 1;
width = arg_->width;
height = arg_->height;
loop_cnt = arg_->loop_count;
num_buf = 2;
ASSERT(num_buf <= MAX_NUM_BUF);
/* fill reference data */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE({
ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
});
}
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
/* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(d2[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
}
/* create other refs, these are not multiple refs and same refs is fed as parameter to the graph */
pixel_value.U8 = 0xFF;
ASSERT_VX_OBJECT(d1 = vxCreateUniformImage(context, width, height, VX_DF_IMAGE_U8, &pixel_value), VX_TYPE_IMAGE);
/* create node, input (index 0) and output (index 2) will be made as graph parameter
* so that we can enqueue and dequeue refs to it and thus do graph pipelining.
* d0[0], d2[0] used only for their meta data.
* Actual input and output used for graph processing will be the
* refs that are enqueued later.
*
* d1 also made a graph parameter, however it wont made as enqueable.
*/
ASSERT_VX_OBJECT(n0 = vxAndNode(graph, d0[0], d1, d2[0]), VX_TYPE_NODE);
/* input @ node index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n0, 0);
/* output @ node index 2, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n0, 2);
/* input @ node index 1, becomes graph parameter 3 */
add_graph_parameter_by_node_index(graph, n0, 1);
/* set graph schedule config such that graph parameter @ index 0 and 1 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
/* Schedule mode auto is used, here we dont need to call vxScheduleGraph
* Graph gets scheduled automatically as refs are enqueued to it
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
2,
graph_parameters_queue_params_list
));
/* This graph parameter @ index 2 is not enqueue-able */
VX_CALL(vxSetGraphParameterByIndex(graph, 2, (vx_reference)d1));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
}
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
}
buf_id = 0;
/* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
{
vx_image out_img, in_img;
uint32_t num_refs;
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_img, 1, &num_refs));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
/* when measuring performance dont check output since it affects graph performance numbers
*/
if(loop_cnt > 100)
{
ct_update_progress(loop_id, loop_cnt+num_buf);
}
ASSERT_NO_FAILURE({
vxdst = ct_image_from_vx_image(out_img);
});
/* compare output */
ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
buf_id = (buf_id+1)%num_buf;
/* recycles dequeued input and output refs 'loop_cnt' times */
if(loop_id<loop_cnt)
{
/* input and output can be enqueued in any order */
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_img, 1));
}
}
/* ensure all graph processing is complete */
VX_CALL(vxWaitGraph(graph));
#endif
VX_CALL(vxReleaseNode(&n0));
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseImage(&d0[buf_id]));
VX_CALL(vxReleaseImage(&d2[buf_id]));
}
VX_CALL(vxReleaseImage(&d1));
VX_CALL(vxReleaseGraph(&graph));
}
static inline uint32_t get_ref_src_index(uint32_t num_buf, uint32_t objarr_idx, uint32_t buf_id)
{
return (objarr_idx*num_buf + buf_id);
}
static inline vx_object_array get_object_array_parent_of_image(vx_image out_img,
vx_object_array d2[], vx_image img[], vx_uint32 num_buf)
{
vx_object_array objarr = NULL;
vx_uint32 buf_id;
for(buf_id=0; buf_id<num_buf; buf_id++)
{
if(out_img==img[buf_id])
{
objarr = d2[buf_id];
break;
}
}
return objarr;
}
/*
* d0 n0 d1 n1 d2
* OBJ -- NOT -- OBJ -- NOT -- OBJ
* ARR ARR ARR
* replicate replicate replicate
*
* This test case test the below
* - Object array with replciate attribute set
*
*/
TEST_WITH_ARG(GraphPipeline, testReplicateImage, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_object_array d0[MAX_NUM_BUF], d1, d2[MAX_NUM_BUF];
vx_image img0[MAX_NUM_BUF], img1, img2[MAX_NUM_BUF];
vx_image img_exemplar;
vx_node n0, n1;
vx_bool replicate[2] = { vx_true_e, vx_true_e };
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
CT_Image ref_src[MAX_NUM_BUF*MAX_NUM_OBJ_ARR_ELEMENTS], vxdst[MAX_NUM_OBJ_ARR_ELEMENTS];
uint32_t width, height, seq_init, num_buf;
uint32_t buf_id, loop_id, loop_cnt;
uint32_t idx, objarr_idx, objarr_elements;
seq_init = 1;
width = arg_->width;
height = arg_->height;
num_buf = arg_->num_buf;
loop_cnt = arg_->loop_count;
objarr_elements = 2;
ASSERT(num_buf <= MAX_NUM_BUF);
ASSERT(objarr_elements <= MAX_NUM_OBJ_ARR_ELEMENTS);
/* fill reference data */
for(objarr_idx=0;objarr_idx<objarr_elements;objarr_idx++)
{
for(buf_id=0; buf_id<num_buf; buf_id++)
{
idx = get_ref_src_index(num_buf, objarr_idx, buf_id);
ASSERT_NO_FAILURE({
ref_src[idx] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
fillSequence(ref_src[idx], (uint32_t)(seq_init+(idx)));
});
}
}
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
ASSERT_VX_OBJECT(img_exemplar = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
/* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateObjectArray(context, (vx_reference)img_exemplar, objarr_elements), VX_TYPE_OBJECT_ARRAY);
ASSERT_VX_OBJECT(d2[buf_id] = vxCreateObjectArray(context, (vx_reference)img_exemplar, objarr_elements), VX_TYPE_OBJECT_ARRAY);
}
ASSERT_VX_OBJECT(d1 = vxCreateObjectArray(context, (vx_reference)img_exemplar, objarr_elements), VX_TYPE_OBJECT_ARRAY);
for(buf_id=0; buf_id<num_buf; buf_id++)
{
img0[buf_id] = (vx_image)vxGetObjectArrayItem(d0[buf_id], 0);
img2[buf_id] = (vx_image)vxGetObjectArrayItem(d2[buf_id], 0);
}
img1 = (vx_image)vxGetObjectArrayItem(d1, 0);
ASSERT_VX_OBJECT(n0 = vxNotNode(graph, img0[0], img1), VX_TYPE_NODE);
ASSERT_VX_OBJECT(n1 = vxNotNode(graph, img1, img2[0]), VX_TYPE_NODE);
VX_CALL(vxReplicateNode(graph, n0, replicate, 2));
VX_CALL(vxReplicateNode(graph, n1, replicate, 2));
/* input @ node0 index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n0, 0);
/* output @ node1 index 1, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n1, 1);
/* set graph schedule config such that graph parameter @ index 0 and 1 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&img0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&img2[0];
/* Schedule mode auto is used, here we dont need to call vxScheduleGraph
* Graph gets scheduled automatically as refs are enqueued to it
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
2,
graph_parameters_queue_params_list
));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
for(objarr_idx=0;objarr_idx<objarr_elements;objarr_idx++)
{
vx_image image;
idx = get_ref_src_index(num_buf, objarr_idx, buf_id);
image = (vx_image)vxGetObjectArrayItem(d0[buf_id], objarr_idx);
ASSERT_NO_FAILURE(ct_image_copyto_vx_image(image, ref_src[idx]));
VX_CALL(vxReleaseImage(&image));
}
}
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
vx_image image;
image = (vx_image)vxGetObjectArrayItem(d0[buf_id], 0);
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&image, 1));
vxReleaseImage(&image);
image = (vx_image)vxGetObjectArrayItem(d2[buf_id], 0);
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&image, 1));
vxReleaseImage(&image);
}
buf_id = 0;
/* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
{
vx_image out_img, in_img;
vx_object_array out_objarr;
uint32_t num_refs;
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_img, 1, &num_refs));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
if(loop_cnt > 100)
{
ct_update_progress(loop_id, loop_cnt+num_buf);
}
out_objarr = get_object_array_parent_of_image(out_img, d2, img2, num_buf);
for(objarr_idx=0;objarr_idx<objarr_elements;objarr_idx++)
{
vx_image image;
image = (vx_image)vxGetObjectArrayItem(out_objarr, objarr_idx);
ASSERT_NO_FAILURE({
vxdst[objarr_idx] = ct_image_from_vx_image(image);
});
VX_CALL(vxReleaseImage(&image));
}
for(objarr_idx=0;objarr_idx<objarr_elements;objarr_idx++)
{
idx = get_ref_src_index(num_buf, objarr_idx, buf_id);
/* compare output */
/* NOT of NOT should give back original image */
ASSERT_EQ_CTIMAGE(ref_src[idx], vxdst[objarr_idx]);
}
buf_id = (buf_id+1)%num_buf;
/* recycles dequeued input and output refs 'loop_cnt' times */
if(loop_id<loop_cnt)
{
/* input and output can be enqueued in any order */
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_img, 1));
}
}
/* ensure all graph processing is complete */
VX_CALL(vxWaitGraph(graph));
#endif
VX_CALL(vxReleaseNode(&n0));
VX_CALL(vxReleaseNode(&n1));
VX_CALL(vxReleaseImage(&img_exemplar));
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseImage(&img0[buf_id]));
VX_CALL(vxReleaseImage(&img2[buf_id]));
}
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseObjectArray(&d0[buf_id]));
VX_CALL(vxReleaseObjectArray(&d2[buf_id]));
}
VX_CALL(vxReleaseImage(&img1));
VX_CALL(vxReleaseObjectArray(&d1));
VX_CALL(vxReleaseGraph(&graph));
}
/*
*
* IMAGE -> MeanStdDev -> MEAN (SCALAR)
* |
* +-------> STD_DEV (SCALAR)
*
* This test case test the below
* - Scalar with pipeline with scalar at output
*
*/
TEST_WITH_ARG(GraphPipeline, testScalarOutput, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_image d0[MAX_NUM_BUF];
vx_scalar mean_s[MAX_NUM_BUF], stddev_s[MAX_NUM_BUF];
vx_node n0;
vx_float32 mean_out=0.0, stddev_out=0.0;
vx_float32 mean_tolerance=1e-4, stddev_tolerance=1e-4;
vx_float32 mean_diff, stddev_diff;
CT_Image ref_src[MAX_NUM_BUF];
vx_float32 mean_ref[MAX_NUM_BUF];
vx_float32 stddev_ref[MAX_NUM_BUF];
uint32_t width, height, num_buf;
uint32_t buf_id, loop_id, loop_cnt;
uint64_t rng;
int a = 0, b = 256;
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[3];
width = arg_->width;
height = arg_->height;
num_buf = arg_->num_buf;
loop_cnt = arg_->loop_count;
rng = CT()->seed_;
mean_tolerance *= b;
stddev_tolerance *= b;
ASSERT(num_buf <= MAX_NUM_BUF);
/* fill reference data */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE({
ref_src[buf_id] = ct_allocate_ct_image_random(width, height, VX_DF_IMAGE_U8, &rng, a, b);
});
reference_mean_stddev(ref_src[buf_id], &mean_ref[buf_id], &stddev_ref[buf_id]);
//printf("Ref %d: mean=%5.3f, stddev=%5.3f\n", buf_id, (float)mean_ref[buf_id], (float)stddev_ref[buf_id]);
}
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(mean_s[buf_id] = vxCreateScalar(context, VX_TYPE_FLOAT32, &mean_out), VX_TYPE_SCALAR);
ASSERT_VX_OBJECT(stddev_s[buf_id] = vxCreateScalar(context, VX_TYPE_FLOAT32, &stddev_out), VX_TYPE_SCALAR);
}
ASSERT_VX_OBJECT(n0 = vxMeanStdDevNode(graph, d0[0], mean_s[0], stddev_s[0]), VX_TYPE_NODE);
/* input @ node0 index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n0, 0);
/* output @ node0 index 1, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n0, 1);
/* output @ node0 index 2, becomes graph parameter 2 */
add_graph_parameter_by_node_index(graph, n0, 2);
/* set graph schedule config such that graph parameter @ index 0 and 1 and 2 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&mean_s[0];
graph_parameters_queue_params_list[2].graph_parameter_index = 1;
graph_parameters_queue_params_list[2].refs_list_size = num_buf;
graph_parameters_queue_params_list[2].refs_list = (vx_reference*)&stddev_s[0];
/* Schedule mode auto is used, here we dont need to call vxScheduleGraph
* Graph gets scheduled automatically as refs are enqueued to it
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
3,
graph_parameters_queue_params_list
));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
}
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&mean_s[buf_id], 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&stddev_s[buf_id], 1));
}
buf_id = 0;
/* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
{
vx_image in_img;
vx_scalar out_mean, out_stddev;
uint32_t num_refs;
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 2, (vx_reference*)&out_stddev, 1, &num_refs));
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_mean, 1, &num_refs));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
/* when measuring performance dont check output since it affects graph performance numbers
*/
if(loop_cnt > 100)
{
ct_update_progress(loop_id, loop_cnt+num_buf);
}
VX_CALL(vxCopyScalar(out_mean, &mean_out, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
VX_CALL(vxCopyScalar(out_stddev, &stddev_out, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
/* compare output with reference */
mean_diff = fabs(mean_ref[buf_id] - mean_out);
stddev_diff = fabs(stddev_ref[buf_id] - stddev_out);
//printf("Out %d: mean=%5.3f, stddev=%5.3f\n", loop_id, (float)mean_out, (float)stddev_out);
#if 1
if( mean_diff > mean_tolerance ||
stddev_diff > stddev_tolerance )
{
CT_RecordFailureAtFormat("Test case %d. width=%d, height=%d,\n"
"\tExpected: mean=%.5g, stddev=%.5g\n"
"\tActual: mean=%.5g (diff=%.5g %s %.5g), stddev=%.5f (diff=%.5g %s %.5g)\n",
__FUNCTION__, __FILE__, __LINE__,
loop_id, width, height,
mean_ref[buf_id], stddev_ref[buf_id],
mean_out, mean_diff, mean_diff > mean_tolerance ? ">" : "<=", mean_tolerance,
stddev_out, stddev_diff, stddev_diff > stddev_tolerance ? ">" : "<=", stddev_tolerance);
}
#endif
buf_id = (buf_id+1)%num_buf;
/* recycles dequeued input and output refs 'loop_cnt' times */
if(loop_id<loop_cnt)
{
/* input and output can be enqueued in any order */
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_mean, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&out_stddev, 1));
}
}
/* ensure all graph processing is complete */
VX_CALL(vxWaitGraph(graph));
#endif
VX_CALL(vxReleaseNode(&n0));
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseImage(&d0[buf_id]));
VX_CALL(vxReleaseScalar(&mean_s[buf_id]));
VX_CALL(vxReleaseScalar(&stddev_s[buf_id]));
}
VX_CALL(vxReleaseGraph(&graph));
}
/*
* d0 n0 d1 n1 d2
* IMG -- NOT -- IMG -- NOT -- IMG
*
* This test case test the below
* - Single input, single output nodes
* - Two nodes on two different targets
* - Events are used to enqueue/dequeue buffers
*
*/
TEST_WITH_ARG(GraphPipeline, testEventHandling, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_image d0[MAX_NUM_BUF], d1, d2[MAX_NUM_BUF];
vx_node n0, n1;
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
CT_Image ref_src[MAX_NUM_BUF], vxdst;
uint32_t width, height, seq_init, num_buf, loop_cnt;
uint32_t buf_id, loop_id, in_q_cnt;
vx_bool done;
vx_event_t event;
seq_init = 1;
width = arg_->width;
height = arg_->height;
num_buf = arg_->num_buf;
loop_cnt = arg_->loop_count;
ASSERT(num_buf <= MAX_NUM_BUF);
/* fill reference data */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE({
ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
});
}
VX_CALL(vxEnableEvents(context));
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(d2[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
}
ASSERT_VX_OBJECT(d1 = vxCreateVirtualImage(graph, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(n0 = vxNotNode(graph, d0[0], d1), VX_TYPE_NODE);
ASSERT_VX_OBJECT(n1 = vxNotNode(graph, d1, d2[0]), VX_TYPE_NODE);
/* input @ n0 index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n0, 0);
/* output @ n1 index 1, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n1, 1);
/* set graph schedule config such that graph parameter @ index 0 and 1 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
/* Schedule mode auto is used, here we dont need to call vxScheduleGraph
* Graph gets scheduled automatically as refs are enqueued to it
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
2,
graph_parameters_queue_params_list
));
VX_CALL(vxRegisterEvent((vx_reference)graph, VX_EVENT_GRAPH_PARAMETER_CONSUMED, 0, GRAPH_CONSUMED_EVENT));
VX_CALL(vxRegisterEvent((vx_reference)n0, VX_EVENT_NODE_COMPLETED, 0, NODE0_COMPLETED_EVENT));
VX_CALL(vxRegisterEvent((vx_reference)n1, VX_EVENT_NODE_COMPLETED, 0, NODE1_COMPLETED_EVENT));
VX_CALL(vxRegisterEvent((vx_reference)graph, VX_EVENT_GRAPH_COMPLETED, 0, GRAPH_COMPLETED_EVENT));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* clear pending events */
while( vxWaitEvent(context, &event, vx_true_e) == VX_SUCCESS);
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
}
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
}
buf_id = 0;
done = vx_false_e;
loop_id = 0;
in_q_cnt = 0;
while(!done)
{
VX_CALL(vxWaitEvent(context, &event, vx_false_e));
if(event.app_value==GRAPH_CONSUMED_EVENT)
{
vx_image in_img;
uint32_t num_refs;
/* input should be free at this point */
/* recycle input buffer, input data is not changed in this test */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &num_refs));
if(in_q_cnt<loop_cnt)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_img, 1));
}
in_q_cnt++;
}
else
if(event.app_value==NODE1_COMPLETED_EVENT)
{
vx_image out_img;
uint32_t num_refs;
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_img, 1, &num_refs));
/* when measuring performance dont check output since it affects graph performance numbers
*/
if(loop_cnt > 100)
{
ct_update_progress(loop_id, loop_cnt+num_buf);
}
ASSERT_NO_FAILURE({
vxdst = ct_image_from_vx_image(out_img);
});
/* compare output */
ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
buf_id = (buf_id+1)%num_buf;
/* recycles dequeued input and output refs 'loop_cnt' times */
if(loop_id<loop_cnt)
{
/* input and output can be enqueued in any order */
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_img, 1));
}
else
{
/* send user event to exit, id is 0xDEADBEAF, and parameter is some known pattern 0x12345678 */
VX_CALL(vxSendUserEvent(context, 0xDEADBEAFu, (void*)0x12345678u));
}
loop_id++;
}
else
if((event.type==VX_EVENT_USER)
&& (event.app_value == 0xDEADBEAFu)
&& (event.event_info.user_event.user_event_parameter == (void*)0x12345678u)
)
{
done = vx_true_e;
}
}
VX_CALL(vxWaitGraph(graph));
/* handle last few buffers */
done = vx_false_e;
while(!done)
{
vx_image in_img, out_img;
vx_uint32 in_num_refs, out_num_refs;
/* recycle and access output data */
VX_CALL(vxGraphParameterCheckDoneRef(graph, 1, &out_num_refs));
if(out_num_refs>0)
{
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_img, 1, &out_num_refs));
ASSERT_NO_FAILURE({
vxdst = ct_image_from_vx_image(out_img);
});
/* compare output */
/* NOT of NOT should give back original image */
ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
}
/* recycle and access input data */
VX_CALL(vxGraphParameterCheckDoneRef(graph, 0, &in_num_refs));
if(in_num_refs>0)
{
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_img, 1, &in_num_refs));
}
buf_id = (buf_id+1)%num_buf;
if(in_num_refs == 0 && out_num_refs == 0)
{
done = vx_true_e;
}
}
/* clear pending events */
while( vxWaitEvent(context, &event, vx_true_e) == VX_SUCCESS);
#endif
VX_CALL(vxReleaseNode(&n0));
VX_CALL(vxReleaseNode(&n1));
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseImage(&d0[buf_id]));
VX_CALL(vxReleaseImage(&d2[buf_id]));
}
VX_CALL(vxReleaseImage(&d1));
VX_CALL(vxReleaseGraph(&graph));
}
/*
*
* This test case test the below
* - Disable of events and reenable of events
*
*/
TEST(GraphPipeline, testEventHandlingDisableEvents)
{
vx_context context = context_->vx_context_;
vx_event_t event;
VX_CALL(vxEnableEvents(context));
/* send one user event, this should be received */
VX_CALL(vxSendUserEvent(context, 0x1u, NULL));
/* disable events and send another event */
VX_CALL(vxDisableEvents(context));
/* this event should get dropped and send event API should return failure */
ASSERT(vxSendUserEvent(context, 0x2u, NULL)!=VX_SUCCESS);
/* re-enable events and send another event */
VX_CALL(vxEnableEvents(context));
/* this event should get received */
VX_CALL(vxSendUserEvent(context, 0x3u, NULL));
/* wait for one event, this should be the first one */
VX_CALL(vxWaitEvent(context, &event, vx_true_e));
ASSERT(event.type==VX_EVENT_USER && event.app_value==0x1u);
/* wait for one more event, this should be the third one */
VX_CALL(vxWaitEvent(context, &event, vx_true_e));
ASSERT(event.type==VX_EVENT_USER && event.app_value==0x3u);
/* wait for one more event, there should be no more events */
ASSERT(vxWaitEvent(context, &event, vx_true_e) != VX_SUCCESS);
}
/*
* d0 n0 d1 n1 d2
* SCALAR -- USER_KERNEL -- SCALAR -- USER_KERNEL -- SCALAR
* | |
* + -----------+
*
* This test case test the below
* - User kernel nodes
* - Nodes with optional parameters
*
*/
TEST_WITH_ARG(GraphPipeline, testUserKernel, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_scalar d0[MAX_NUM_BUF], d1, d2[MAX_NUM_BUF];
vx_scalar in_scalar, out_scalar;
vx_node n0, n1;
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
vx_uint32 in_value[MAX_NUM_BUF], ref_out_value[MAX_NUM_BUF];
vx_uint32 tmp_value = 0;
uint32_t num_buf;
uint32_t buf_id, loop_id, loop_cnt;
test_user_kernel_register(context);
num_buf = arg_->num_buf;
loop_cnt = arg_->loop_count;
ASSERT(num_buf <= MAX_NUM_BUF);
/* fill reference data */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
in_value[buf_id] = 10*(buf_id+1);
ref_out_value[buf_id] = 2 * in_value[buf_id];
}
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
/* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
ASSERT_VX_OBJECT(d2[buf_id] = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
}
ASSERT_VX_OBJECT(d1 = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
ASSERT_VX_OBJECT(n0 = test_user_kernel_node(graph, d0[0], NULL, d1, NULL), VX_TYPE_NODE);
ASSERT_VX_OBJECT(n1 = test_user_kernel_node(graph, d1, d1, d2[0], NULL), VX_TYPE_NODE);
/* input @ n0 index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n0, 0);
/* output @ n1 index 2, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n1, 2);
/* set graph schedule config such that graph parameter @ index 0, 1 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
/* Schedule mode auto is used, here we dont need to call vxScheduleGraph
* Graph gets scheduled automatically as refs are enqueued to it
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
2,
graph_parameters_queue_params_list
));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE(
vxCopyScalar(d0[buf_id],
&in_value[buf_id],
VX_WRITE_ONLY,
VX_MEMORY_TYPE_HOST));
}
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
}
buf_id = 0;
/* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
{
uint32_t num_refs;
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_scalar, 1, &num_refs));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_scalar, 1, &num_refs));
if(loop_cnt > 100)
{
ct_update_progress(loop_id, loop_cnt+num_buf);
}
VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
/* compare output */
ASSERT_EQ_INT(tmp_value, ref_out_value[buf_id]);
/* clear value in output */
tmp_value = 0;
VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
buf_id = (buf_id+1)%num_buf;
/* recycles dequeued input and output refs 'loop_cnt' times */
if(loop_id<loop_cnt)
{
/* input and output can be enqueued in any order */
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_scalar, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_scalar, 1));
}
}
/* ensure all graph processing is complete */
VX_CALL(vxWaitGraph(graph));
#endif
VX_CALL(vxReleaseNode(&n0));
VX_CALL(vxReleaseNode(&n1));
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseScalar(&d0[buf_id]));
VX_CALL(vxReleaseScalar(&d2[buf_id]));
}
VX_CALL(vxReleaseScalar(&d1));
VX_CALL(vxReleaseGraph(&graph));
test_user_kernel_unregister(context);
}
/*
* d0 n0 d1 n1 d2
* IMG -- NOT -- IMG -- NOT -- IMG
*
* This test case test the below
* - Single input, single output nodes
* - Two nodes on two different targets
*
*/
TEST_WITH_ARG(GraphPipeline, testManualSchedule, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_image d0[MAX_NUM_BUF], d1, d2[MAX_NUM_BUF];
vx_node n0, n1;
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[3];
CT_Image ref_src[MAX_NUM_BUF], vxdst;
uint32_t width, height, seq_init, num_buf;
uint32_t buf_id, loop_id, loop_cnt;
seq_init = 1;
width = arg_->width;
height = arg_->height;
num_buf = arg_->num_buf;
loop_cnt = arg_->loop_count;
ASSERT(num_buf <= MAX_NUM_BUF);
/* fill reference data */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE({
ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
});
}
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
/* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(d2[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
}
ASSERT_VX_OBJECT(d1 = vxCreateVirtualImage(graph, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(n0 = vxNotNode(graph, d0[0], d1), VX_TYPE_NODE);
ASSERT_VX_OBJECT(n1 = vxNotNode(graph, d1, d2[0]), VX_TYPE_NODE);
/* input @ n0 index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n0, 0);
/* output @ n1 index 1, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n1, 1);
/* set graph schedule config such that graph parameter @ index 0, 1 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
/* Schedule mode manual is used, here we need to call vxScheduleGraph
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_MANUAL,
2,
graph_parameters_queue_params_list
));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[buf_id]));
}
loop_cnt = (loop_cnt + num_buf) / num_buf;
/* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
for(loop_id=0; loop_id<(loop_cnt); loop_id++)
{
vx_image out_img[MAX_NUM_BUF], in_img[MAX_NUM_BUF];
uint32_t num_refs_in, num_refs_out;
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
}
VX_CALL(vxScheduleGraph(graph));
VX_CALL(vxWaitGraph(graph));
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)out_img, num_buf, &num_refs_in));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)in_img, num_buf, &num_refs_out));
ASSERT_EQ_INT(num_refs_in, num_buf);
ASSERT_EQ_INT(num_refs_out, num_buf);
if(loop_cnt > 100)
{
ct_update_progress(loop_id, loop_cnt);
}
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE({
vxdst = ct_image_from_vx_image(out_img[buf_id]);
});
/* compare output */
ASSERT_EQ_CTIMAGE(ref_src[buf_id], vxdst);
}
}
#endif
VX_CALL(vxReleaseNode(&n0));
VX_CALL(vxReleaseNode(&n1));
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseImage(&d0[buf_id]));
VX_CALL(vxReleaseImage(&d2[buf_id]));
}
VX_CALL(vxReleaseImage(&d1));
VX_CALL(vxReleaseGraph(&graph));
}
/*
* d0 n0 d2
* SCALAR -- delay (0) -- USER_KERNEL -- SCALAR
* | |
* delay (-1) ------+
*
*
* This test case test the below
* - Delay objects
*
*/
TEST_WITH_ARG(GraphPipeline, testDelay1, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_scalar d0[MAX_NUM_BUF], d2[MAX_NUM_BUF], exemplar;
vx_delay delay;
vx_scalar in_scalar, out_scalar;
vx_node n0;
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
vx_uint32 in_value[MAX_NUM_BUF], ref_out_value[MAX_NUM_BUF];
vx_uint32 tmp_value = 0;
uint32_t num_buf;
uint32_t buf_id, loop_id, loop_cnt;
test_user_kernel_register(context);
num_buf = arg_->num_buf;
loop_cnt = arg_->loop_count;
/* since delay is of 2 slots, num_buf MUST be >= 2 at input atleast */
if(num_buf < 2)
{
num_buf = 2;
}
ASSERT(num_buf <= MAX_NUM_BUF);
/* fill reference data */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
in_value[buf_id] = 10*(buf_id+1);
}
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ref_out_value[buf_id] = in_value[buf_id]
+ in_value[ (num_buf + buf_id-1)%num_buf ];
}
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
/* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d2[buf_id] = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
}
/* allocate output, delay slot 0 is d0[0], delay slot -1 is d0[num_buf-1]
* allocate other objects in between
*/
ASSERT_VX_OBJECT(exemplar = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
ASSERT_VX_OBJECT(delay = vxCreateDelay(context, (vx_reference)exemplar, 2), VX_TYPE_DELAY);
d0[0] = (vx_scalar)vxGetReferenceFromDelay(delay, 0);
for(buf_id=1; buf_id<num_buf-1; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
}
d0[num_buf-1] = (vx_scalar)vxGetReferenceFromDelay(delay, -1);
vxReleaseScalar(&exemplar);
ASSERT_VX_OBJECT(n0 = test_user_kernel_node(graph, d0[0], d0[num_buf-1], d2[0], NULL), VX_TYPE_NODE);
/* input @ n0 index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n0, 0);
/* output @ n0 index 2, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n0, 2);
/* set graph schedule config such that graph parameter @ index 0, 1 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
/* Schedule mode auto is used, here we dont need to call vxScheduleGraph
* Graph gets scheduled automatically as refs are enqueued to it
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
2,
graph_parameters_queue_params_list
));
/* always auto age delay in pipelined graph */
VX_CALL(vxRegisterAutoAging(graph, delay));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE(
vxCopyScalar(d0[buf_id],
&in_value[buf_id],
VX_WRITE_ONLY,
VX_MEMORY_TYPE_HOST));
}
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
}
/* last buf is already set at the delay slot -1 so dont enqueue that ref */
for(buf_id=0; buf_id<num_buf-1; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
}
buf_id = 0;
/* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
for(loop_id=0; loop_id<(loop_cnt+num_buf-1); loop_id++)
{
uint32_t num_refs;
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_scalar, 1, &num_refs));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_scalar, 1, &num_refs));
VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
/* compare output */
//printf(" %d: out = %d ref = %d\n", loop_id, tmp_value, ref_out_value[buf_id]);
ASSERT_EQ_INT(tmp_value, ref_out_value[buf_id]);
/* clear value in output */
tmp_value = 0;
VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
if(loop_cnt > 100)
{
ct_update_progress(loop_id, loop_cnt+num_buf);
}
buf_id = (buf_id+1)%num_buf;
/* recycles dequeued input and output refs 'loop_cnt' times */
if(loop_id<loop_cnt)
{
/* input and output can be enqueued in any order */
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_scalar, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_scalar, 1));
}
}
/* ensure all graph processing is complete */
vxWaitGraph(graph);
#endif
VX_CALL(vxReleaseNode(&n0));
for(buf_id=1; buf_id<num_buf-1; buf_id++)
{
VX_CALL(vxReleaseScalar(&d0[buf_id]));
}
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseScalar(&d2[buf_id]));
}
VX_CALL(vxReleaseDelay(&delay));
VX_CALL(vxReleaseGraph(&graph));
test_user_kernel_unregister(context);
}
/*
* d0 n0 d2
* SCALAR -- delay (0) -- USER_KERNEL -- SCALAR
* | |
* delay (-1) ------+--- USER_KERNEL -- null
* | n1
* | |
* delay (-2) ---------------+
*
* This test case test the below
* - Delay objects with 3 delay slots
* - Delay slot connected to two inputs
*
*/
TEST_WITH_ARG(GraphPipeline, testDelay2, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_scalar d0[MAX_NUM_BUF], d2[MAX_NUM_BUF], exemplar;
vx_delay delay;
vx_scalar in_scalar, out_scalar;
vx_node n0, n1;
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
vx_uint32 in_value[MAX_NUM_BUF], ref_out_value[MAX_NUM_BUF];
vx_uint32 tmp_value = 0;
uint32_t num_buf;
uint32_t buf_id, loop_id, loop_cnt, k;
test_user_kernel_register(context);
num_buf = arg_->num_buf;
loop_cnt = arg_->loop_count;
/* since delay is of 3 slots, num_buf MUST be >= 3 at input atleast */
if(num_buf < 3)
{
num_buf = 3;
}
ASSERT(num_buf <= MAX_NUM_BUF);
/* fill reference data */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
in_value[buf_id] = 10*(buf_id+1);
}
k=0;
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ref_out_value[buf_id] = in_value[k]
+ in_value[ (num_buf + k - 2)%num_buf ];
k = (k+2)%num_buf;
}
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
/* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d2[buf_id] = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
}
/* allocate output, delay slot 0 is d0[0], delay slot -1 is d0[num_buf-1]
* allocate other objects in between
*/
ASSERT_VX_OBJECT(exemplar = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
ASSERT_VX_OBJECT(delay = vxCreateDelay(context, (vx_reference)exemplar, 3), VX_TYPE_DELAY);
d0[0] = (vx_scalar)vxGetReferenceFromDelay(delay, 0);
for(buf_id=1; buf_id<num_buf-2; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
}
d0[num_buf-2] = (vx_scalar)vxGetReferenceFromDelay(delay, -1);
d0[num_buf-1] = (vx_scalar)vxGetReferenceFromDelay(delay, -2);
VX_CALL(vxReleaseScalar(&exemplar));
ASSERT_VX_OBJECT(n0 = test_user_kernel_node(graph, d0[0], d0[num_buf-2], d2[0], NULL), VX_TYPE_NODE);
ASSERT_VX_OBJECT(n1 = test_user_kernel_node(graph, d0[num_buf-1], d0[num_buf-2], NULL, NULL), VX_TYPE_NODE);
/* input @ n0 index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n0, 0);
/* output @ n0 index 2, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n0, 2);
/* set graph schedule config such that graph parameter @ index 0, 1 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
/* Schedule mode auto is used, here we dont need to call vxScheduleGraph
* Graph gets scheduled automatically as refs are enqueued to it
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
2,
graph_parameters_queue_params_list
));
/* always auto age delay in pipelined graph */
VX_CALL(vxRegisterAutoAging(graph, delay));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE(
vxCopyScalar(d0[buf_id],
&in_value[buf_id],
VX_WRITE_ONLY,
VX_MEMORY_TYPE_HOST));
}
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
}
/* last buf is already set at the delay slot -1 so dont enqueue that ref */
for(buf_id=0; buf_id<num_buf-2; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
}
buf_id = 0;
/* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
for(loop_id=0; loop_id<(loop_cnt+num_buf-2); loop_id++)
{
uint32_t num_refs;
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_scalar, 1, &num_refs));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_scalar, 1, &num_refs));
VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
/* compare output */
//printf(" %d: out = %d ref = %d\n", loop_id, tmp_value, ref_out_value[buf_id]);
ASSERT_EQ_INT(tmp_value, ref_out_value[buf_id]);
/* clear value in output */
tmp_value = 0;
VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
if(loop_cnt > 100)
{
ct_update_progress(loop_id, loop_cnt+num_buf);
}
buf_id = (buf_id+1)%num_buf;
/* recycles dequeued input and output refs 'loop_cnt' times */
if(loop_id<loop_cnt)
{
/* input and output can be enqueued in any order */
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_scalar, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_scalar, 1));
}
}
/* ensure all graph processing is complete */
VX_CALL(vxWaitGraph(graph));
#endif
VX_CALL(vxReleaseNode(&n0));
VX_CALL(vxReleaseNode(&n1));
for(buf_id=1; buf_id<num_buf-2; buf_id++)
{
VX_CALL(vxReleaseScalar(&d0[buf_id]));
}
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseScalar(&d2[buf_id]));
}
VX_CALL(vxReleaseDelay(&delay));
VX_CALL(vxReleaseGraph(&graph));
test_user_kernel_unregister(context);
}
/*
* d0 n0 n1 d2
* SCALAR -- USER_KERNEL -- delay (-1) -- USER_KERNEL -- SCALAR
* | |
* delay (-2) ------+--- USER_KERNEL -- null
* | n2
* | |
* delay (0) ---------------+
*
* This test case test the below
* - Delay objects with 3 delay slots
* - Delay slot connected to two inputs
* - Delay intermediate to a graph, no graph parameter at any delay slot
* - node output to delay slot -1 (instead of typical slot 0)
* - multiple buffers at output of n0 i.e delay slot -1
*
*/
TEST_WITH_ARG(GraphPipeline, testDelay3, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_scalar d0[MAX_NUM_BUF], d2[MAX_NUM_BUF], exemplar;
vx_delay delay;
vx_scalar in_scalar, out_scalar;
vx_node n0, n1, n2;
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
vx_uint32 in_value[MAX_NUM_BUF], ref_out_value[MAX_NUM_BUF];
vx_uint32 tmp_value = 0;
uint32_t num_buf;
uint32_t buf_id, loop_id, loop_cnt;
test_user_kernel_register(context);
num_buf = arg_->num_buf;
loop_cnt = arg_->loop_count;
/* since delay is of 3 slots, num_buf MUST be >= 3 at input atleast */
if(num_buf < 3)
{
num_buf = 3;
}
ASSERT(num_buf <= MAX_NUM_BUF);
/* fill reference data */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
in_value[buf_id] = 10*(buf_id+1);
}
{
uint32_t tmp_value[3];
tmp_value[0] = 0;
tmp_value[1] = in_value[num_buf-1];
tmp_value[2] = in_value[num_buf-2];
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ref_out_value[buf_id] = in_value[buf_id]
+ tmp_value[ 1 ];
tmp_value[ 0 ] = tmp_value[ 2 ];
tmp_value[ 2 ] = tmp_value[ 1 ];
tmp_value[ 1 ] = in_value[buf_id];
}
}
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
/* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
ASSERT_VX_OBJECT(d2[buf_id] = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
}
/* allocate delay
*/
ASSERT_VX_OBJECT(exemplar = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
ASSERT_VX_OBJECT(delay = vxCreateDelay(context, (vx_reference)exemplar, 3), VX_TYPE_DELAY);
vxReleaseScalar(&exemplar);
ASSERT_VX_OBJECT(
n0 = test_user_kernel_node( graph,
d0[0], NULL,
(vx_scalar)vxGetReferenceFromDelay(delay, -1), NULL),
VX_TYPE_NODE);
ASSERT_VX_OBJECT(
n1 = test_user_kernel_node(graph,
(vx_scalar)vxGetReferenceFromDelay(delay, -1), (vx_scalar)vxGetReferenceFromDelay(delay, -2),
d2[0], NULL),
VX_TYPE_NODE);
ASSERT_VX_OBJECT(
n2 = test_user_kernel_node(graph,
(vx_scalar)vxGetReferenceFromDelay(delay, -2), (vx_scalar)vxGetReferenceFromDelay(delay, 0),
NULL, NULL),
VX_TYPE_NODE);
/* input @ n0 index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n0, 0);
/* output @ n1 index 2, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n1, 2);
/* set graph schedule config such that graph parameter @ index 0, 1 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
/* Schedule mode auto is used, here we dont need to call vxScheduleGraph
* Graph gets scheduled automatically as refs are enqueued to it
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
2,
graph_parameters_queue_params_list
));
/* always auto age delay in pipelined graph */
VX_CALL(vxRegisterAutoAging(graph, delay));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE(
vxCopyScalar(d0[buf_id],
&in_value[buf_id],
VX_WRITE_ONLY,
VX_MEMORY_TYPE_HOST));
}
{
vx_scalar tmp_scalar;
tmp_scalar = (vx_scalar)vxGetReferenceFromDelay(delay, 0);
ASSERT_NO_FAILURE(
vxCopyScalar(tmp_scalar,
&in_value[num_buf-2],
VX_WRITE_ONLY,
VX_MEMORY_TYPE_HOST));
tmp_scalar = (vx_scalar)vxGetReferenceFromDelay(delay, -2);
ASSERT_NO_FAILURE(
vxCopyScalar(tmp_scalar,
&in_value[num_buf-1],
VX_WRITE_ONLY,
VX_MEMORY_TYPE_HOST));
}
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
}
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
}
buf_id = 0;
/* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
{
uint32_t num_refs;
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_scalar, 1, &num_refs));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_scalar, 1, &num_refs));
VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
/* compare output */
//printf(" %d: out = %d ref = %d\n", loop_id, tmp_value, ref_out_value[buf_id]);
ASSERT_EQ_INT(tmp_value, ref_out_value[buf_id]);
if(loop_cnt > 100)
{
ct_update_progress(loop_id, loop_cnt+num_buf);
}
/* clear value in output */
tmp_value = 0;
VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
buf_id = (buf_id+1)%num_buf;
/* recycles dequeued input and output refs 'loop_cnt' times */
if(loop_id<loop_cnt)
{
/* input and output can be enqueued in any order */
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_scalar, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_scalar, 1));
}
}
/* ensure all graph processing is complete */
VX_CALL(vxWaitGraph(graph));
#endif
VX_CALL(vxReleaseNode(&n0));
VX_CALL(vxReleaseNode(&n1));
VX_CALL(vxReleaseNode(&n2));
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseScalar(&d0[buf_id]));
VX_CALL(vxReleaseScalar(&d2[buf_id]));
}
VX_CALL(vxReleaseDelay(&delay));
VX_CALL(vxReleaseGraph(&graph));
test_user_kernel_unregister(context);
}
/*
* d0 n0 n1 d2
* SCALAR -- USER_KERNEL -- delay (0) -- USER_KERNEL -- SCALAR
* | |
* delay (-1) |
* | |
* delay (-2) |
* | |
* delay (-3) -----+
*
* This test case test the below
* - Delay objects with 4 delay slots
* - Delay intermediate to a graph, no graph parameter at any delay slot
* - Delay with slot's not connected to any input - tests auto age at these slots
*
*/
TEST_WITH_ARG(GraphPipeline, testDelay4, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_scalar d0[MAX_NUM_BUF], d2[MAX_NUM_BUF], exemplar;
vx_delay delay;
vx_scalar in_scalar, out_scalar;
vx_node n0, n1;
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[2];
vx_uint32 in_value[MAX_NUM_BUF], ref_out_value;
vx_uint32 ref_delay_value[MAX_NUM_BUF];
vx_uint32 tmp_value = 0;
uint32_t num_buf;
uint32_t buf_id, loop_id, loop_cnt;
test_user_kernel_register(context);
num_buf = arg_->num_buf;
loop_cnt = arg_->loop_count;
/* since delay is of 4 slots, num_buf MUST be >= 4 at input atleast */
if(num_buf < 4)
{
num_buf = 4;
}
ASSERT(num_buf <= MAX_NUM_BUF);
/* fill reference data */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
in_value[buf_id] = 10*(buf_id+1);
}
ref_delay_value[0] = 0;
ref_delay_value[1] = in_value[num_buf-3];
ref_delay_value[2] = in_value[num_buf-2];
ref_delay_value[3] = in_value[num_buf-1];
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
/* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
ASSERT_VX_OBJECT(d2[buf_id] = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
}
/* allocate delay
*/
ASSERT_VX_OBJECT(exemplar = vxCreateScalar(context, VX_TYPE_UINT32, &tmp_value), VX_TYPE_SCALAR);
ASSERT_VX_OBJECT(delay = vxCreateDelay(context, (vx_reference)exemplar, 4), VX_TYPE_DELAY);
vxReleaseScalar(&exemplar);
ASSERT_VX_OBJECT(
n0 = test_user_kernel_node( graph,
d0[0], NULL,
(vx_scalar)vxGetReferenceFromDelay(delay, 0), NULL),
VX_TYPE_NODE);
ASSERT_VX_OBJECT(
n1 = test_user_kernel_node(graph,
(vx_scalar)vxGetReferenceFromDelay(delay, 0), (vx_scalar)vxGetReferenceFromDelay(delay, -3),
d2[0], NULL),
VX_TYPE_NODE);
/* input @ n0 index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n0, 0);
/* output @ n1 index 2, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n1, 2);
/* set graph schedule config such that graph parameter @ index 0, 1 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
/* Schedule mode auto is used, here we dont need to call vxScheduleGraph
* Graph gets scheduled automatically as refs are enqueued to it
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
2,
graph_parameters_queue_params_list
));
/* always auto age delay in pipelined graph */
VX_CALL(vxRegisterAutoAging(graph, delay));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE(
vxCopyScalar(d0[buf_id],
&in_value[buf_id],
VX_WRITE_ONLY,
VX_MEMORY_TYPE_HOST));
}
{
vx_scalar tmp_scalar;
tmp_scalar = (vx_scalar)vxGetReferenceFromDelay(delay, -1);
ASSERT_NO_FAILURE(
vxCopyScalar(tmp_scalar,
&in_value[num_buf-3],
VX_WRITE_ONLY,
VX_MEMORY_TYPE_HOST));
tmp_scalar = (vx_scalar)vxGetReferenceFromDelay(delay, -2);
ASSERT_NO_FAILURE(
vxCopyScalar(tmp_scalar,
&in_value[num_buf-2],
VX_WRITE_ONLY,
VX_MEMORY_TYPE_HOST));
tmp_scalar = (vx_scalar)vxGetReferenceFromDelay(delay, -3);
ASSERT_NO_FAILURE(
vxCopyScalar(tmp_scalar,
&in_value[num_buf-1],
VX_WRITE_ONLY,
VX_MEMORY_TYPE_HOST));
}
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
}
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
}
buf_id = 0;
/* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
{
uint32_t num_refs;
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&out_scalar, 1, &num_refs));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&in_scalar, 1, &num_refs));
VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
ref_out_value = in_value[buf_id] + ref_delay_value[ 3 ];
ref_delay_value[ 3 ] = ref_delay_value[ 2 ];
ref_delay_value[ 2 ] = ref_delay_value[ 1 ];
ref_delay_value[ 1 ] = in_value[buf_id];
/* compare output */
//printf(" %d: out = %d ref = %d\n", loop_id, tmp_value, ref_out_value);
ASSERT_EQ_INT(tmp_value, ref_out_value);
/* clear value in output */
tmp_value = 0;
VX_CALL(vxCopyScalar(out_scalar, &tmp_value, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST));
if(loop_cnt > 100)
{
ct_update_progress(loop_id, loop_cnt+num_buf);
}
buf_id = (buf_id+1)%num_buf;
/* recycles dequeued input and output refs 'loop_cnt' times */
if(loop_id<loop_cnt)
{
/* input and output can be enqueued in any order */
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&out_scalar, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&in_scalar, 1));
}
}
/* ensure all graph processing is complete */
VX_CALL(vxWaitGraph(graph));
#endif
VX_CALL(vxReleaseNode(&n0));
VX_CALL(vxReleaseNode(&n1));
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseScalar(&d0[buf_id]));
VX_CALL(vxReleaseScalar(&d2[buf_id]));
}
VX_CALL(vxReleaseDelay(&delay));
VX_CALL(vxReleaseGraph(&graph));
test_user_kernel_unregister(context);
}
/*
* d0 n0 d2 n1 d3 n2 d4
* IMG -- ADD -- IMG -- NOT -- IMG -- NOT -- IMG
* | |
* | |
* +--------------------+
*
* This test case test for loop carried dependency functional correctness
*
*/
TEST_WITH_ARG(GraphPipeline, testLoopCarriedDependency, Arg, PARAMETERS)
{
vx_context context = context_->vx_context_;
vx_graph graph;
vx_delay delay;
vx_image delay_image;
vx_image d0[MAX_NUM_BUF], d2[MAX_NUM_BUF], d4[MAX_NUM_BUF];
vx_node n0, n1, n2;
vx_graph_parameter_queue_params_t graph_parameters_queue_params_list[3];
int i;
vx_graph graph_1 = 0;
vx_image images[4];
vx_node nodes[3];
vx_delay delay_1 = 0;
vx_image delay_image_0 = 0;
vx_image delay_image_1 = 0;
vx_image delay_image_0_nopipeline = 0;
vx_image delay_image_1_nopipeline = 0;
vx_imagepatch_addressing_t addr;
vx_uint8 *pdata = 0;
vx_rectangle_t rect = {0, 0, arg_->width, arg_->height};
vx_map_id map_id;
CT_Image ref_src[MAX_NUM_BUF], ref_src1[MAX_NUM_BUF], vxdst0, vxdst1;
uint32_t width, height, seq_init, num_buf;
uint32_t buf_id, loop_id, loop_cnt;
seq_init = 1;
width = arg_->width;
height = arg_->height;
num_buf = arg_->num_buf;
loop_cnt = arg_->loop_count;
ASSERT(num_buf <= MAX_NUM_BUF);
/* fill reference data */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE({
ref_src[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
fillSequence(ref_src[buf_id], (uint32_t)(seq_init+buf_id*10));
});
ASSERT_NO_FAILURE({
ref_src1[buf_id] = ct_allocate_image(width, height, VX_DF_IMAGE_U8);
fillSequence(ref_src1[buf_id], (uint32_t)(seq_init+buf_id*10));
});
}
/* Non-pipelining graph */
ASSERT_VX_OBJECT(graph_1 = vxCreateGraph(context), VX_TYPE_GRAPH);
ASSERT_VX_OBJECT(images[0] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(images[1] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(images[2] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(images[3] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_NO_FAILURE(ct_image_copyto_vx_image(images[0], ref_src[0]));
ASSERT_VX_OBJECT(delay_1 = vxCreateDelay(context, (vx_reference)images[3], 2), VX_TYPE_DELAY);
ASSERT_VX_OBJECT(delay_image_0_nopipeline = (vx_image)vxGetReferenceFromDelay(delay_1, 0), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(delay_image_1_nopipeline = (vx_image)vxGetReferenceFromDelay(delay_1,-1), VX_TYPE_IMAGE);
/* Filling reference data */
pdata = NULL;
VX_CALL(vxMapImagePatch(delay_image_0_nopipeline, &rect, 0, &map_id, &addr, (void **)&pdata,
VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0));
for (i = 0; i < width*height; i++)
{
*(pdata+i) = 1;
}
VX_CALL(vxUnmapImagePatch(delay_image_0_nopipeline, map_id));
pdata = NULL;
VX_CALL(vxMapImagePatch(delay_image_1_nopipeline, &rect, 0, &map_id, &addr, (void **)&pdata,
VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0));
for (i = 0; i < width*height; i++)
{
*(pdata+i) = 1;
}
VX_CALL(vxUnmapImagePatch(delay_image_1_nopipeline, map_id));
ASSERT_VX_OBJECT(nodes[0] = vxAddNode(graph_1, images[0], (vx_image)vxGetReferenceFromDelay(delay_1, -1), VX_CONVERT_POLICY_WRAP, images[1]), VX_TYPE_NODE);
ASSERT_VX_OBJECT(nodes[1] = vxNotNode(graph_1, images[1], (vx_image)vxGetReferenceFromDelay(delay_1, 0)), VX_TYPE_NODE);
ASSERT_VX_OBJECT(nodes[2] = vxNotNode(graph_1, (vx_image)vxGetReferenceFromDelay(delay_1, 0), images[2]), VX_TYPE_NODE);
VX_CALL(vxRegisterAutoAging(graph_1, delay_1));
VX_CALL(vxVerifyGraph(graph_1));
ASSERT_VX_OBJECT(graph = vxCreateGraph(context), VX_TYPE_GRAPH);
/* allocate Input and Output refs, multiple refs created to allow pipelining of graph */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_VX_OBJECT(d0[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(d2[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(d4[buf_id] = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
}
ASSERT_VX_OBJECT(delay_image = vxCreateImage(context, width, height, VX_DF_IMAGE_U8), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(delay = vxCreateDelay(context, (vx_reference)delay_image, 2), VX_TYPE_DELAY);
ASSERT_VX_OBJECT(delay_image_0 = (vx_image)vxGetReferenceFromDelay(delay, 0), VX_TYPE_IMAGE);
ASSERT_VX_OBJECT(delay_image_1 = (vx_image)vxGetReferenceFromDelay(delay, -1), VX_TYPE_IMAGE);
/* Filling reference data */
pdata = NULL;
VX_CALL(vxMapImagePatch(delay_image_0, &rect, 0, &map_id, &addr, (void **)&pdata,
VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0));
for (i = 0; i < width*height; i++)
{
*(pdata+i) = 1;
}
VX_CALL(vxUnmapImagePatch(delay_image_0, map_id));
pdata = NULL;
VX_CALL(vxMapImagePatch(delay_image_1, &rect, 0, &map_id, &addr, (void **)&pdata,
VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0));
for (i = 0; i < width*height; i++)
{
*(pdata+i) = 1;
}
VX_CALL(vxUnmapImagePatch(delay_image_1, map_id));
ASSERT_VX_OBJECT(n0 = vxAddNode(graph, d0[0], (vx_image)vxGetReferenceFromDelay(delay, -1), VX_CONVERT_POLICY_WRAP, d2[0]), VX_TYPE_NODE);
ASSERT_VX_OBJECT(n1 = vxNotNode(graph, d2[0], (vx_image)vxGetReferenceFromDelay(delay, 0)), VX_TYPE_NODE);
ASSERT_VX_OBJECT(n2 = vxNotNode(graph, (vx_image)vxGetReferenceFromDelay(delay, 0), d4[0]), VX_TYPE_NODE);
/* input0 @ n0 index 0, becomes graph parameter 0 */
add_graph_parameter_by_node_index(graph, n0, 0);
/* output @ n0 index 2, becomes graph parameter 1 */
add_graph_parameter_by_node_index(graph, n0, 3);
/* output @ n2 index 1, becomes graph parameter 2 */
add_graph_parameter_by_node_index(graph, n2, 1);
/* set graph schedule config such that graph parameter @ index 0, 1, 2 are enqueuable */
graph_parameters_queue_params_list[0].graph_parameter_index = 0;
graph_parameters_queue_params_list[0].refs_list_size = num_buf;
graph_parameters_queue_params_list[0].refs_list = (vx_reference*)&d0[0];
graph_parameters_queue_params_list[1].graph_parameter_index = 1;
graph_parameters_queue_params_list[1].refs_list_size = num_buf;
graph_parameters_queue_params_list[1].refs_list = (vx_reference*)&d2[0];
graph_parameters_queue_params_list[2].graph_parameter_index = 2;
graph_parameters_queue_params_list[2].refs_list_size = num_buf;
graph_parameters_queue_params_list[2].refs_list = (vx_reference*)&d4[0];
/* Schedule mode auto is used, here we dont need to call vxScheduleGraph
* Graph gets scheduled automatically as refs are enqueued to it
*/
VX_CALL(vxSetGraphScheduleConfig(graph,
VX_GRAPH_SCHEDULE_MODE_QUEUE_AUTO,
3,
graph_parameters_queue_params_list
));
/* always auto age delay in pipelined graph */
VX_CALL(vxRegisterAutoAging(graph, delay));
VX_CALL(vxVerifyGraph(graph));
#if 1
/* fill reference data into input data reference */
for(buf_id=0; buf_id<num_buf; buf_id++)
{
ASSERT_NO_FAILURE(ct_image_copyto_vx_image(d0[buf_id], ref_src[0]));
}
/* enqueue input and output references,
* input and output can be enqueued in any order
* can be enqueued all together, here they are enqueue one by one just as a example
*/
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&d2[buf_id], 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&d0[buf_id], 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&d4[buf_id], 1));
}
buf_id = 0;
/* wait for graph instances to complete, compare output and recycle data buffers, schedule again */
for(loop_id=0; loop_id<(loop_cnt+num_buf); loop_id++)
{
vx_image add_in0_img, add_out_img, not_out1_img;
uint32_t num_refs;
VX_CALL(vxProcessGraph(graph_1));
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 2, (vx_reference*)&not_out1_img, 1, &num_refs));
/* Get output reference, waits until a reference is available */
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 1, (vx_reference*)&add_out_img, 1, &num_refs));
/* Get consumed input reference, waits until a reference is available
*/
VX_CALL(vxGraphParameterDequeueDoneRef(graph, 0, (vx_reference*)&add_in0_img, 1, &num_refs));
/* when measuring performance dont check output since it affects graph performance numbers
*/
if(loop_cnt > 100)
{
ct_update_progress(loop_id, loop_cnt+num_buf);
}
ASSERT_NO_FAILURE({
vxdst0 = ct_image_from_vx_image(not_out1_img);
});
ASSERT_NO_FAILURE({
vxdst1 = ct_image_from_vx_image(images[2]);
});
ASSERT_EQ_CTIMAGE(vxdst1, vxdst0);
buf_id = (buf_id+1)%num_buf;
/* recycles dequeued input and output refs 'loop_cnt' times */
if(loop_id<loop_cnt)
{
/* input and output can be enqueued in any order */
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 1, (vx_reference*)&add_out_img, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 0, (vx_reference*)&add_in0_img, 1));
VX_CALL(vxGraphParameterEnqueueReadyRef(graph, 2, (vx_reference*)&not_out1_img, 1));
}
}
/* ensure all graph processing is complete */
VX_CALL(vxWaitGraph(graph));
#endif
VX_CALL(vxReleaseNode(&n0));
VX_CALL(vxReleaseNode(&n1));
VX_CALL(vxReleaseNode(&n2));
for(buf_id=0; buf_id<num_buf; buf_id++)
{
VX_CALL(vxReleaseImage(&d0[buf_id]));
VX_CALL(vxReleaseImage(&d2[buf_id]));
VX_CALL(vxReleaseImage(&d4[buf_id]));
}
VX_CALL(vxReleaseImage(&delay_image));
VX_CALL(vxReleaseDelay(&delay));
VX_CALL(vxReleaseGraph(&graph));
for (i = 0; i < (sizeof(nodes)/sizeof(nodes[0])); i++)
{
VX_CALL(vxReleaseNode(&nodes[i]));
}
for (i = 0; i < (sizeof(images)/sizeof(images[0])); i++)
{
VX_CALL(vxReleaseImage(&images[i]));
}
VX_CALL(vxReleaseGraph(&graph_1));
VX_CALL(vxReleaseDelay(&delay_1));
ASSERT(graph_1 == 0);
ASSERT(delay_1 == 0);
CT_CollectGarbage(CT_GC_ALL);
}
TESTCASE_TESTS(GraphPipeline,
testOneNode,
testTwoNodesBasic,
testTwoNodes,
testFourNodes,
testMaxDataRef,
testUniformImage,
testScalarOutput,
testEventHandling,
testEventHandlingDisableEvents,
testReplicateImage,
testUserKernel,
testManualSchedule,
/*testDelay1,
testDelay2,
testDelay3,
testDelay4,*/
testLoopCarriedDependency
)
#endif