#include "opencl_mesh_kit.hpp" // TODO : print streamsdk::getOpenCLErrorCodeStr(res) #define CL_RETURN_VAL_IF_FAIL(val, expr) do { \ cl_int res=(expr); \ if ( res != CL_SUCCESS ) { \ std::cerr << "file " << __FILE__ << ": line " << __LINE__ << " (" << __PRETTY_FUNCTION__ \ << "): '" << "expr" << "' failed (return code : " << res << ")" << std::endl; \ return val; \ } \ } while(0) cl_int OpenCLMeshKit::initCL(intptr_t gl_display, intptr_t gl_context, intptr_t gl_vbo, size_t meshWidth, size_t meshHeight, size_t groupSize) { cl_uint id, numPlatforms; cl_int res; char pbuf[100]; cl_platform_id *platforms, platform; bool usableDeviceFound=false; this->meshWidth = meshWidth; this->meshHeight = meshHeight; this->groupSize = groupSize; this->gl_vbo = gl_vbo; // Get platform count CL_RETURN_VAL_IF_FAIL(10, clGetPlatformIDs(0, NULL, &numPlatforms) ); std::cout << "Detected " << numPlatforms << " platform(s)" << std::endl; if ( ! ( numPlatforms > 0 ) ) return 2; // Allocate room for all platform IDs platforms = new cl_platform_id[numPlatforms]; // Get platform IDs CL_RETURN_VAL_IF_FAIL(11, clGetPlatformIDs(numPlatforms, platforms, &numPlatforms) ); // Enumerate platforms and grab informations for(id=0;id<numPlatforms;id++) { platform=platforms[id]; CL_RETURN_VAL_IF_FAIL(12, clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL) ); std::cout << "Platform " << id << " : " << pbuf << std::endl; // Dynamically get the function pointer for clGetGLConetextInfoKHR clGetGLContextInfoKHR_fn clGetGLContextInfoKHR_proc = (clGetGLContextInfoKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clGetGLContextInfoKHR"); if (!clGetGLContextInfoKHR_proc) { std::cerr << "clGetExtensionFunctionAddressForPlatform(platform, clGetGLContextInfoKHR) failed" << std::endl; continue; } // Try to get the device corresponding to the GL context/display on this platform cl_context_properties cpsGL[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, CL_GLX_DISPLAY_KHR, gl_display, CL_GL_CONTEXT_KHR, gl_context, 0 }; std::cout << "cl_context_properties cpsGL :" << std::endl; std::cout << "\tCL_CONTEXT_PLATFORM :" << (void *)cpsGL[1] << std::endl; std::cout << "\tCL_GLX_DISPLAY_KHR :" << (void *)cpsGL[3] << std::endl; std::cout << "\tCL_GL_CONTEXT_KHR :" << (void *)cpsGL[5] << std::endl; size_t deviceSize=0; // get deviceSize (should be 1*sizeof(cl_device_id) with CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR) res=clGetGLContextInfoKHR_proc(cpsGL,CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,0,NULL,&deviceSize); if ( res!=CL_SUCCESS || deviceSize!=1*sizeof(cl_device_id)) { std::cerr << "clGetGLContextInfoKHR_proc(cpsGL,CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,0,...) failed" << std::endl; std::cerr << " (return code : " << res << ")" << std::endl; continue; } cl_dev=0; res=clGetGLContextInfoKHR_proc(cpsGL,CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,deviceSize,&cl_dev,NULL); if ( res!=CL_SUCCESS || cl_dev==0 ) { std::cerr << "clGetGLContextInfoKHR_proc(cpsGL,CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR," << deviceSize << ",...) failed" << std::endl; std::cerr << " (return code : " << res << ")" << std::endl; continue; } std::cout << "cl_device :" << (void *)cl_dev << std::endl; cl_ctx = clCreateContext(cpsGL,1,&cl_dev,0,0,&res); if ( res!=CL_SUCCESS ) { std::cerr << "clCreateContext() failed" << std::endl; std::cerr << " (return code : " << res << ")" << std::endl; continue; } cl_cq = clCreateCommandQueue(cl_ctx,cl_dev,0,&res); if ( res!=CL_SUCCESS ) { std::cerr << "clCreateCommandQueue() failed" << std::endl; std::cerr << " (return code : " << res << ")" << std::endl; continue; } usableDeviceFound=true; break; } if (! usableDeviceFound) { std::cerr << "No OpenCL device has been successfully initialized" << std::endl; return 13; } cl_vbo = clCreateFromGLBuffer(cl_ctx, CL_MEM_WRITE_ONLY, gl_vbo, &res); if ( res!=CL_SUCCESS ) { std::cerr << "clCreateFromGLBuffer(..., gl_vbo, &res) failed" << std::endl; return 14; } std::cout << "OpenCL initialization done." << std::endl; return 0; } cl_int OpenCLMeshKit::compileKernels(std::list<std::string> names, const char source[], size_t sourceLen) { cl_int res=0; const char *p_source=source; cl_program program = clCreateProgramWithSource(cl_ctx, 1, &p_source, &sourceLen,&res); if ( res!=CL_SUCCESS ) { std::cerr << "Failed to clCreateProgramWithSource(<source of zero_z kernel>)" << std::endl; return 21; } res = clBuildProgram(program, 1, &cl_dev, "", NULL, NULL); if ( res!=CL_SUCCESS ) { std::cerr << "Failed to clBuildProgram()" << std::endl; return 22; } for (std::list<std::string>::iterator ii = names.begin(); ii != names.end(); ++ii) { std::string kName = (*ii); char *kNameZTS = new char[kName.length()+1]; std::strcpy(kNameZTS, kName.c_str()); cl_kernel kernel = clCreateKernel(program,kNameZTS,&res); delete [] kNameZTS; if ( res!=CL_SUCCESS ) { std::cerr << "Failed to clCreateKernel(program,\"" << kName << "\",&res);" << std::endl; return 23; } kernels[kName]=kernel; } return 0; } cl_int OpenCLMeshKit::execKernel(std::string kernelName, float karg_time) { //cl_int res; cl_event eventND[1]; size_t globalWorkSize[2], localWorkSize[2]; cl_kernel kernel; //struct timespec before, after; //clock_gettime(CLOCK_MONOTONIC_RAW, &before); std::map<std::string,cl_kernel>::iterator ii=this->kernels.find(kernelName); if ( ii==this->kernels.end() ) { std::cerr << "execKernel(\"" << kernelName << "\", " << karg_time \ << ") failed : no kernel found with this name" << std::endl; return -1; } kernel=this->kernels[kernelName]; // Set local and global work group sizes globalWorkSize[0]=this->meshWidth; globalWorkSize[1]=this->meshHeight; localWorkSize[0]=this->groupSize; localWorkSize[1]=1; CL_RETURN_VAL_IF_FAIL(1, clEnqueueAcquireGLObjects(this->cl_cq, 1, &(this->cl_vbo), 0, 0, NULL) ); clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&(this->cl_vbo)); // float4 *pos clSetKernelArg(kernel, 1, sizeof(cl_uint), (void *)&(this->meshWidth)); clSetKernelArg(kernel, 2, sizeof(cl_uint), (void *)&(this->meshHeight)); clSetKernelArg(kernel, 3, sizeof(float), (void *)&karg_time); // Execute kernel on given device CL_RETURN_VAL_IF_FAIL(2, clEnqueueNDRangeKernel(this->cl_cq, kernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, eventND) ); //TODO : return values checking CL_RETURN_VAL_IF_FAIL(3, clFlush(this->cl_cq) ); // (CPU) Wait until GPU kernel execution end CL_RETURN_VAL_IF_FAIL(4, clWaitForEvents(1,eventND) ); //XXX: SimpleGL utilise une attente active, pourquoi ? CL_RETURN_VAL_IF_FAIL(5, clReleaseEvent(eventND[0]) ); CL_RETURN_VAL_IF_FAIL(6, clEnqueueReleaseGLObjects(this->cl_cq, 1, &(this->cl_vbo), 0, 0, 0) ); CL_RETURN_VAL_IF_FAIL(7, clFinish(this->cl_cq) ); //clock_gettime(CLOCK_MONOTONIC_RAW, &after); //TODO : remove this debug hint //std::cout << "kernel exec time : " << after.tv_nsec - before.tv_nsec << std::endl; return CL_SUCCESS; } void OpenCLMeshKit::releaseKernels() { for (std::map<std::string,cl_kernel>::iterator ii = kernels.begin(); ii != kernels.end(); ++ii ) { clReleaseKernel((*ii).second); } kernels.clear(); } cl_int OpenCLMeshKit::resetVBO() { cl_int res; std::map<std::string, cl_kernel> user_kernels=kernels; std::list<std::string> n; n.push_back("zero_z"); res = compileKernels(n, kernel_src_zero_z, sizeof(kernel_src_zero_z)); if(res==0) res = execKernel("zero_z", 0.0f); releaseKernels(); kernels=user_kernels; return res; } size_t OpenCLMeshKit::getMeshWidth() { return this->meshWidth; } size_t OpenCLMeshKit::getMeshHeight() { return this->meshHeight; } size_t OpenCLMeshKit::getMeshItemCount() { return this->meshWidth * this->meshHeight; } size_t OpenCLMeshKit::getGroupSize() { return this->groupSize; } intptr_t OpenCLMeshKit::getGLVBO() { return this->gl_vbo; } void OpenCLMeshKit::setGroupSize(size_t groupSize) { this->groupSize=groupSize; } OpenCLMeshKit::~OpenCLMeshKit() { }