Example
C++
#include <stdio.h>
#include <stdlib.h>
#include <CL/opencl.h>
// OpenCL 内核代码 - 向量加法
const char *kernelSource =
"__kernel void vector_add(__global const float *A, \n"
" __global const float *B, \n"
" __global float *C, \n"
" const unsigned int n) \n"
"{ \n"
" int i = get_global_id(0); \n"
" if (i < n) { \n"
" C[i] = A[i] + B[i]; \n"
" } \n"
"} \n";
#define VECTOR_SIZE 1024
int main(void) {
// 主机端数据
float *h_A = (float*)malloc(sizeof(float) * VECTOR_SIZE);
float *h_B = (float*)malloc(sizeof(float) * VECTOR_SIZE);
float *h_C = (float*)malloc(sizeof(float) * VECTOR_SIZE);
// 初始化输入向量
printf("初始化向量数据...\n");
for(int i = 0; i < VECTOR_SIZE; i++) {
h_A[i] = (float)i;
h_B[i] = (float)(i * 2);
}
// OpenCL 变量
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem d_A = NULL;
cl_mem d_B = NULL;
cl_mem d_C = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
// 1. 获取平台
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
if (ret != CL_SUCCESS) {
printf("错误: 获取平台失败! 错误代码: %d\n", ret);
return -1;
}
printf("找到 %d 个 OpenCL 平台\n", ret_num_platforms);
// 获取平台名称
char platform_name[128];
clGetPlatformInfo(platform_id, CL_PLATFORM_NAME, sizeof(platform_name), platform_name, NULL);
printf("平台名称: %s\n", platform_name);
char platform_version[128];
clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, sizeof(platform_version), platform_version, NULL);
printf("平台版本: %s\n", platform_version);
// 2. 获取设备
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
if (ret != CL_SUCCESS) {
printf("错误: 获取设备失败! 错误代码: %d\n", ret);
return -1;
}
printf("找到 %d 个 OpenCL 设备\n", ret_num_devices);
// 获取设备名称
char device_name[128];
clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name), device_name, NULL);
printf("设备名称: %s\n", device_name);
// 3. 创建上下文
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
if (ret != CL_SUCCESS) {
printf("错误: 创建上下文失败! 错误代码: %d\n", ret);
return -1;
}
printf("OpenCL 上下文创建成功\n");
// 4. 创建命令队列
command_queue = clCreateCommandQueueWithProperties(context, device_id, NULL, &ret);
if (ret != CL_SUCCESS) {
// 尝试使用旧版 API (OpenCL 1.x)
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
}
if (ret != CL_SUCCESS) {
printf("错误: 创建命令队列失败! 错误代码: %d\n", ret);
return -1;
}
printf("命令队列创建成功\n");
// 5. 创建缓冲区对象
d_A = clCreateBuffer(context, CL_MEM_READ_ONLY,
VECTOR_SIZE * sizeof(float), NULL, &ret);
if (ret != CL_SUCCESS) {
printf("错误: 创建缓冲区 A 失败! 错误代码: %d\n", ret);
return -1;
}
d_B = clCreateBuffer(context, CL_MEM_READ_ONLY,
VECTOR_SIZE * sizeof(float), NULL, &ret);
if (ret != CL_SUCCESS) {
printf("错误: 创建缓冲区 B 失败! 错误代码: %d\n", ret);
return -1;
}
d_C = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
VECTOR_SIZE * sizeof(float), NULL, &ret);
if (ret != CL_SUCCESS) {
printf("错误: 创建缓冲区 C 失败! 错误代码: %d\n", ret);
return -1;
}
printf("设备缓冲区创建成功\n");
// 6. 将数据传输到设备
ret = clEnqueueWriteBuffer(command_queue, d_A, CL_TRUE, 0,
VECTOR_SIZE * sizeof(float), h_A, 0, NULL, NULL);
ret |= clEnqueueWriteBuffer(command_queue, d_B, CL_TRUE, 0,
VECTOR_SIZE * sizeof(float), h_B, 0, NULL, NULL);
if (ret != CL_SUCCESS) {
printf("错误: 数据传输到设备失败! 错误代码: %d\n", ret);
return -1;
}
printf("数据已传输到设备\n");
// 7. 创建程序对象
program = clCreateProgramWithSource(context, 1, &kernelSource, NULL, &ret);
if (ret != CL_SUCCESS) {
printf("错误: 创建程序对象失败! 错误代码: %d\n", ret);
return -1;
}
// 8. 编译程序
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
if (ret != CL_SUCCESS) {
printf("错误: 编译程序失败! 错误代码: %d\n", ret);
// 获取编译日志
size_t log_size;
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
char *log = (char*)malloc(log_size);
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
printf("编译日志:\n%s\n", log);
free(log);
return -1;
}
printf("内核程序编译成功\n");
// 9. 创建内核对象
kernel = clCreateKernel(program, "vector_add", &ret);
if (ret != CL_SUCCESS) {
printf("错误: 创建内核失败! 错误代码: %d\n", ret);
return -1;
}
printf("内核对象创建成功\n");
// 10. 设置内核参数
unsigned int vector_size = VECTOR_SIZE;
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&d_A);
ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&d_B);
ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&d_C);
ret |= clSetKernelArg(kernel, 3, sizeof(unsigned int), (void *)&vector_size);
if (ret != CL_SUCCESS) {
printf("错误: 设置内核参数失败! 错误代码: %d\n", ret);
return -1;
}
printf("内核参数设置成功\n");
// 11. 执行内核
size_t global_work_size = VECTOR_SIZE;
size_t local_work_size = 64; // 工作组大小
printf("执行内核计算...\n");
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
&global_work_size, &local_work_size, 0, NULL, NULL);
if (ret != CL_SUCCESS) {
printf("错误: 执行内核失败! 错误代码: %d\n", ret);
return -1;
}
// 12. 读取结果
ret = clEnqueueReadBuffer(command_queue, d_C, CL_TRUE, 0,
VECTOR_SIZE * sizeof(float), h_C, 0, NULL, NULL);
if (ret != CL_SUCCESS) {
printf("错误: 读取结果失败! 错误代码: %d\n", ret);
return -1;
}
printf("结果已从设备读取\n");
// 13. 验证结果
printf("\n验证结果...\n");
int errors = 0;
for(int i = 0; i < VECTOR_SIZE; i++) {
float expected = h_A[i] + h_B[i];
if (h_C[i] != expected) {
if (errors < 10) { // 只打印前10个错误
printf("错误 [%d]: 期望 %.2f, 实际 %.2f\n", i, expected, h_C[i]);
}
errors++;
}
}
if (errors == 0) {
printf("✓ 验证成功! 所有 %d 个元素计算正确\n", VECTOR_SIZE);
printf("\n示例结果 (前10个元素):\n");
for(int i = 0; i < 10; i++) {
printf(" %.2f + %.2f = %.2f\n", h_A[i], h_B[i], h_C[i]);
}
} else {
printf("✗ 验证失败! 发现 %d 个错误\n", errors);
}
// 14. 清理资源
clFlush(command_queue);
clFinish(command_queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseMemObject(d_A);
clReleaseMemObject(d_B);
clReleaseMemObject(d_C);
clReleaseCommandQueue(command_queue);
clReleaseContext(context);
free(h_A);
free(h_B);
free(h_C);
printf("\n程序执行完毕!\n");
return 0;
}