#include "opencl_mesh_kit.hpp"

// TODO : print streamsdk::getOpenCLErrorCodeStr(res)
#define CL_RETURN_VAL_IF_FAIL(val, expr) do { \
	cl_int res=(expr); \
	if ( res != CL_SUCCESS ) {	\
		std::cerr << "file " << __FILE__ << ": line " << __LINE__ << " (" << __PRETTY_FUNCTION__ \
			<< "): '" << "expr" << "' failed (return code : " << res << ")" << std::endl; \
		return val; \
	} \
} while(0)

cl_int OpenCLMeshKit::initCL(intptr_t gl_display, intptr_t gl_context, intptr_t gl_vbo, size_t meshWidth, size_t meshHeight, size_t groupSize) {
	cl_uint id, numPlatforms;
	cl_int res;
	char pbuf[100];
	cl_platform_id *platforms, platform;
	bool usableDeviceFound=false;

	this->meshWidth = meshWidth;
	this->meshHeight = meshHeight;
	this->groupSize = groupSize;
	this->gl_vbo = gl_vbo;

	// Get platform count
	CL_RETURN_VAL_IF_FAIL(10,
		clGetPlatformIDs(0, NULL, &numPlatforms)
	);
	
	std::cout << "Detected " << numPlatforms << " platform(s)" << std::endl;
	if ( ! ( numPlatforms > 0 ) ) return 2;

	// Allocate room for all platform IDs
	platforms = new cl_platform_id[numPlatforms];

	// Get platform IDs
	CL_RETURN_VAL_IF_FAIL(11,
		clGetPlatformIDs(numPlatforms, platforms, &numPlatforms)
	);

	// Enumerate platforms and grab informations
	for(id=0;id<numPlatforms;id++) {
		platform=platforms[id];

		CL_RETURN_VAL_IF_FAIL(12,
			clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL)
		);
		std::cout << "Platform " << id << " : " << pbuf << std::endl;

		// Dynamically get the function pointer for clGetGLConetextInfoKHR
		clGetGLContextInfoKHR_fn clGetGLContextInfoKHR_proc = (clGetGLContextInfoKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clGetGLContextInfoKHR");
		if (!clGetGLContextInfoKHR_proc) {
			std::cerr << "clGetExtensionFunctionAddressForPlatform(platform, clGetGLContextInfoKHR) failed" << std::endl;
			continue;
		}

		// Try to get the device corresponding to the GL context/display on this platform
		cl_context_properties cpsGL[] = {
			CL_CONTEXT_PLATFORM, (cl_context_properties)platform,
			CL_GLX_DISPLAY_KHR, gl_display,
			CL_GL_CONTEXT_KHR, gl_context,
			0
		};

		std::cout << "cl_context_properties cpsGL :" << std::endl;
		std::cout << "\tCL_CONTEXT_PLATFORM :" << (void *)cpsGL[1] << std::endl;
		std::cout << "\tCL_GLX_DISPLAY_KHR :"  << (void *)cpsGL[3] << std::endl;
		std::cout << "\tCL_GL_CONTEXT_KHR :"   << (void *)cpsGL[5] << std::endl;

    size_t deviceSize=0;
    // get deviceSize (should be 1*sizeof(cl_device_id) with CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR)
    res=clGetGLContextInfoKHR_proc(cpsGL,CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,0,NULL,&deviceSize);
    if ( res!=CL_SUCCESS || deviceSize!=1*sizeof(cl_device_id)) {
      std::cerr << "clGetGLContextInfoKHR_proc(cpsGL,CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,0,...) failed" << std::endl;
			std::cerr << " (return code : " << res << ")" << std::endl;
      continue;
    }

		cl_dev=0;
		res=clGetGLContextInfoKHR_proc(cpsGL,CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,deviceSize,&cl_dev,NULL);
		if ( res!=CL_SUCCESS || cl_dev==0 ) {
			std::cerr << "clGetGLContextInfoKHR_proc(cpsGL,CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR," << deviceSize << ",...) failed" << std::endl;
			std::cerr << " (return code : " << res << ")" << std::endl;
			continue;
		}

		std::cout << "cl_device :" << (void *)cl_dev << std::endl;

		cl_ctx = clCreateContext(cpsGL,1,&cl_dev,0,0,&res);
		if ( res!=CL_SUCCESS ) {
			std::cerr << "clCreateContext() failed" << std::endl; 
			std::cerr << " (return code : " << res << ")" << std::endl;
			continue;
		}

		cl_cq = clCreateCommandQueue(cl_ctx,cl_dev,0,&res);
		if ( res!=CL_SUCCESS ) {
			std::cerr << "clCreateCommandQueue() failed" << std::endl; 
			std::cerr << " (return code : " << res << ")" << std::endl;
			continue;
		}

		usableDeviceFound=true;
		break;
	}

	if (! usableDeviceFound) {
		std::cerr << "No OpenCL device has been successfully initialized" << std::endl;
		return 13;
	}

	cl_vbo = clCreateFromGLBuffer(cl_ctx, CL_MEM_WRITE_ONLY, gl_vbo, &res);
  if ( res!=CL_SUCCESS ) {
		std::cerr << "clCreateFromGLBuffer(..., gl_vbo, &res) failed" << std::endl;
		return 14;
	}

	std::cout << "OpenCL initialization done." << std::endl;
	return 0;
	
}

cl_int OpenCLMeshKit::compileKernels(std::list<std::string> names, const char source[], size_t sourceLen) {
	cl_int res=0;

	const char *p_source=source;

	cl_program program = clCreateProgramWithSource(cl_ctx, 1, &p_source, &sourceLen,&res);
	if ( res!=CL_SUCCESS ) {
		std::cerr << "Failed to clCreateProgramWithSource(<source of zero_z kernel>)" << std::endl;
		return 21;
	}

	res = clBuildProgram(program, 1, &cl_dev, "", NULL, NULL);
  if ( res!=CL_SUCCESS ) {
    std::cerr << "Failed to clBuildProgram()"  << std::endl;
		return 22;
	}

	for (std::list<std::string>::iterator ii = names.begin(); ii != names.end(); ++ii) {
		std::string kName = (*ii);
		char *kNameZTS = new char[kName.length()+1];
		std::strcpy(kNameZTS, kName.c_str());

		cl_kernel kernel = clCreateKernel(program,kNameZTS,&res);
		delete [] kNameZTS;

		if ( res!=CL_SUCCESS ) {
			std::cerr << "Failed to clCreateKernel(program,\"" << kName << "\",&res);" << std::endl;
			return 23;
		}
		kernels[kName]=kernel;
	}

	return 0;
}

cl_int OpenCLMeshKit::execKernel(std::string kernelName, float karg_time) {

	//cl_int res;
	cl_event  eventND[1];
	size_t globalWorkSize[2], localWorkSize[2];
	cl_kernel kernel;
	//struct timespec before, after;

	//clock_gettime(CLOCK_MONOTONIC_RAW, &before);

	std::map<std::string,cl_kernel>::iterator ii=this->kernels.find(kernelName);
	if ( ii==this->kernels.end() ) {
		std::cerr << "execKernel(\"" << kernelName << "\", " << karg_time \
			<< ") failed : no kernel found with this name" << std::endl;
		return -1;
	}

	kernel=this->kernels[kernelName];

	// Set local and global work group sizes
	globalWorkSize[0]=this->meshWidth;
	globalWorkSize[1]=this->meshHeight;
	localWorkSize[0]=this->groupSize;
	localWorkSize[1]=1;

	CL_RETURN_VAL_IF_FAIL(1,
		clEnqueueAcquireGLObjects(this->cl_cq, 1, &(this->cl_vbo), 0, 0, NULL)
	);

	clSetKernelArg(kernel, 0, sizeof(cl_mem),  (void *)&(this->cl_vbo)); // float4 *pos
	clSetKernelArg(kernel, 1, sizeof(cl_uint), (void *)&(this->meshWidth));
	clSetKernelArg(kernel, 2, sizeof(cl_uint), (void *)&(this->meshHeight));
	clSetKernelArg(kernel, 3, sizeof(float),   (void *)&karg_time);

	// Execute kernel on given device
	CL_RETURN_VAL_IF_FAIL(2,
		clEnqueueNDRangeKernel(this->cl_cq, kernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, eventND)
	);

	//TODO : return values checking
	CL_RETURN_VAL_IF_FAIL(3,
		clFlush(this->cl_cq)
	);

	// (CPU) Wait until GPU kernel execution end
	CL_RETURN_VAL_IF_FAIL(4, clWaitForEvents(1,eventND) ); //XXX: SimpleGL utilise une attente active, pourquoi ?
	CL_RETURN_VAL_IF_FAIL(5, clReleaseEvent(eventND[0]) );
	CL_RETURN_VAL_IF_FAIL(6, 
		clEnqueueReleaseGLObjects(this->cl_cq, 1, &(this->cl_vbo), 0, 0, 0)
	);
	CL_RETURN_VAL_IF_FAIL(7, clFinish(this->cl_cq) );

	//clock_gettime(CLOCK_MONOTONIC_RAW, &after);
	//TODO : remove this debug hint
	//std::cout << "kernel exec time : " << after.tv_nsec - before.tv_nsec << std::endl;

	return CL_SUCCESS;
}

void OpenCLMeshKit::releaseKernels() {
	for (std::map<std::string,cl_kernel>::iterator ii = kernels.begin(); ii != kernels.end(); ++ii ) {
		clReleaseKernel((*ii).second);
	}
	kernels.clear();
}

cl_int OpenCLMeshKit::resetVBO() {
	cl_int res;
	std::map<std::string, cl_kernel> user_kernels=kernels;

	std::list<std::string> n; n.push_back("zero_z");
	res = compileKernels(n, kernel_src_zero_z, sizeof(kernel_src_zero_z));

	if(res==0) res = execKernel("zero_z", 0.0f);	
	
	releaseKernels();
	kernels=user_kernels;

	return res;
}

size_t OpenCLMeshKit::getMeshWidth() { return this->meshWidth; }
size_t OpenCLMeshKit::getMeshHeight() { return this->meshHeight; }
size_t OpenCLMeshKit::getMeshItemCount() { return this->meshWidth * this->meshHeight; }
size_t OpenCLMeshKit::getGroupSize() { return this->groupSize; }
intptr_t OpenCLMeshKit::getGLVBO() { return this->gl_vbo; }

void OpenCLMeshKit::setGroupSize(size_t groupSize) { 	this->groupSize=groupSize; }

OpenCLMeshKit::~OpenCLMeshKit() { }