blob: 53b6cae15f45c4468a58c18e163ee1af1ab6639a [file] [log] [blame]
#include <stdio.h>
#include <stdlib.h>
void vecaddgpu(float* r, float* a, float* b, int n)
{
#pragma acc kernels loop present(r, a, b)
for (int i = 0; i < n; ++i)
r[i] = a[i] + b[i];
}
int main()
{
int n = 100000; /* vector length */
float* a; /* input vector 1 */
float* b; /* input vector 2 */
float* r; /* output vector */
float* e; /* expected output values */
int i, errs;
a = (float*)malloc(n * sizeof(float));
b = (float*)malloc(n * sizeof(float));
r = (float*)malloc(n * sizeof(float));
e = (float*)malloc(n * sizeof(float));
for (i = 0; i < n; ++i) {
a[i] = (float)(i + 1);
b[i] = (float)(1000 * i);
}
/* compute on the GPU */
#pragma acc data copyin(a [0:n], b [0:n]) copyout(r [0:n])
{
vecaddgpu(r, a, b, n);
}
/* compute on the host to compare */
for (i = 0; i < n; ++i)
e[i] = a[i] + b[i];
/* compare results */
errs = 0;
for (i = 0; i < n; ++i) {
if (r[i] != e[i]) {
++errs;
}
}
return errs;
}