测试用例

C++
#define CL_TARGET_OPENCL_VERSION 210
#include <stdio.h>
#include <stdlib.h>
#include <CL/cl.h>
#include <math.h>
extern  cl_program
clCreateProgramWithIL(cl_context    context,
                             const void*    il,
                                                  size_t         length,
                                                                       cl_int*        errcode_ret);

#if 0
const char* src = "__kernel void add_kernel(__global const int *a,\n " 
                  "        __global const int *b,\n " 
                  "        __global const int *result)\n" 
                  "{\n"
                  " int gid = get_global_id(0);\n"
                  "     int lid = get_local_id(0);\n"
                  " result[gid] = a[gid] + b[gid];\n"
                  "     printf(\"gid=%d, lid=%d\n\", gid,lid);\n"
                  //" printf(\"result[%d]=%d, a[%d]=%d, b[%d]=%d\", gid, result[gid],gid, a[gid],gid,b[gid]);\n"
                  "}\n";

#endif
void* getFileContent(char *filename, size_t *length)
{
    char* text;
    FILE *pf = fopen(filename, "r");
    fseek(pf, 0, SEEK_END);
    long lSize = ftell(pf);
    text = (char*)malloc(lSize+1);
    rewind(pf);
    fread(text, sizeof(char), lSize, pf);
    text[lSize] = '\0';
    *length = lSize;
    return text;
}

int main(int argc, char** argv)
{
    cl_context context = 0;
    cl_command_queue cmd = 0;
    cl_program program = 0;
    cl_device_id* device = NULL;
    cl_kernel kernel = 0;
    cl_mem mem[3] = {0};
    cl_platform_id* platform = NULL;
    cl_uint num_platforms;
    cl_uint numDevices;
    cl_event Execute_event;
    cl_event Read_event;
    //cl_ulong time_start, time_end;
    double time_total;
    char* spirv_kernel = "add_opt/add.cl";
    //time_start = 0;
    //time_end = 0;
    size_t size = 100;

    int err;

    if(argc > 1)
        spirv_kernel = argv[1];
    err = clGetPlatformIDs(5, NULL, &num_platforms);
    platform = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id));
    err = clGetPlatformIDs(num_platforms, platform, NULL);
    if(err != CL_SUCCESS)
    {
        printf("err: get platform  err");
        return -1;
    }

    err = clGetDeviceIDs(*platform, CL_DEVICE_TYPE_GPU, 5, NULL, &numDevices);
    device = (cl_device_id*)malloc(numDevices*sizeof(cl_device_id));

    err = clGetDeviceIDs(*platform, CL_DEVICE_TYPE_GPU, numDevices, device, NULL);
    if(err != CL_SUCCESS)
    {
        printf("err: get device err");
        return -1;
    }

    context = clCreateContext(NULL, 1, device, NULL, NULL, NULL);
    cmd = clCreateCommandQueue(context, *device, CL_QUEUE_PROFILING_ENABLE, NULL);
        size_t length = 0;
    void* buffer = getFileContent(spirv_kernel, &length);
    //    program = clCreateProgramWithIL(context, buffer, length, NULL);
    program = clCreateProgramWithSource(context, 1, &buffer, NULL, NULL);
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if(err != CL_SUCCESS)
    {
        printf("err: build program err");
        return -1;
    } 
#if 1
    kernel = clCreateKernel(program, "foo", NULL);

    int a[size], b[size],c[size], result[size], Value[size];
    for(int i = 0; i < size; i++)
    {
        a[i] = i;
        b[i] = 2*i;
        c[i]=100;
    }

    mem[0] = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, sizeof(int)*size, a, NULL);
    mem[1] = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, sizeof(int)*size, b, NULL);
    mem[2] = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,sizeof(int)*size, c, NULL);
//        clEnqueueWriteBuffer(cmd, mem[0], CL_TRUE, 0, sizeof(int)*size, a, 0, NULL, NULL);
 ////       clEnqueueWriteBuffer(cmd, mem[1], CL_TRUE, 0, sizeof(int)*size, b, 0, NULL, NULL);
    clSetKernelArg(kernel, 0 , sizeof(cl_mem), &mem[0]);
    clSetKernelArg(kernel, 1 , sizeof(cl_mem), &mem[1]);
    clSetKernelArg(kernel, 2 , sizeof(cl_mem), &mem[2]);

    size_t globalworksize[1] = {size};
    size_t localworksize[1] = {1};
    err = clEnqueueNDRangeKernel(cmd, kernel, 1, NULL, globalworksize, localworksize, 0, NULL, &Execute_event);
    if(err != CL_SUCCESS)
    {
        printf("err: execute kernel err\n");
        return -1;
    }
    clFinish(cmd);

//  clGetEventProfilingInfo(Execute_event, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL);
//  clGetEventProfilingInfo(Execute_event, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL);
//  time_total = (double)1.0e-9 * (time_end - time_start);

    err = clEnqueueReadBuffer(cmd, mem[2], CL_TRUE, 0, size*sizeof(int), result, 0, NULL, NULL);
#if 1
    for(int i = 0; i < size; i++)
    {
        if(i==0)printf("\n");
        printf("result[%d] is %d\n", i,result[i]);
    }
//  printf("start time is %lu\n end time is %lu\nExecute command total time: %10.6f s\n", time_start, time_end, time_total);
#endif
#endif
    return 0;
}