#include <stdlib.h>

#include <string.h> /* For strlen (to be removed ?) */

#include <gtk/gtk.h>
#include <gtk/gtkgl.h>

/*#include <GL/gl.h>*/
#include <GL/glew.h> // For VBO OpenGL extensions
#include <GL/glu.h>

#include <CL/opencl.h>

#include <gdk/x11/gdkglx.h> // X11 specific

typedef struct {
	GLuint gl_vbo;
	size_t mesh_width, mesh_height;
} on_expose_data_t;

gboolean gl_area_on_draw (GtkObject* area, GdkEventExpose* event, gpointer void_data) {
	if (void_data == NULL) {
		fputs("gl_area_on_draw() : NULL void_data arg\n", stderr);
		return FALSE;
	}

	GdkGLDrawable* drawable = gtk_widget_get_gl_drawable (GTK_WIDGET (area));
	GdkGLContext* context = gtk_widget_get_gl_context (GTK_WIDGET (area));

	gboolean status = gdk_gl_drawable_gl_begin (drawable, context);

	if (status == FALSE) {
		return FALSE;
	}

	on_expose_data_t *data = (on_expose_data_t *) void_data;

	GtkAllocation allocation;
	gtk_widget_get_allocation (GTK_WIDGET (area), &allocation);
	GLdouble viewport_width = (GLdouble) allocation.width;
	GLdouble viewport_height = (GLdouble) allocation.height;

	// Z negative forward oriented.

	GLdouble aspect =  viewport_width / viewport_height;
	GLdouble fovy   =  35.0; // The one which looks to most natural.
	GLdouble zNear  =   2.0; // Enough for a moderately sized sample model.
	GLdouble zFar   =  -2.0; // Idem.

	// Z positive forward oriented.

	GLdouble projection_dx =  0.0;
	GLdouble projection_dy =  0.0;
	GLdouble projection_dz = -2.0;

	// Reset picture.
	glViewport (0, 0, (GLint) viewport_width, (GLint) viewport_height);
	glClear (GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

	// Model defined in default coordinates.
	glMatrixMode (GL_MODELVIEW);
	glLoadIdentity ();

	// Projection using perspective and a few units backward.
	glMatrixMode (GL_PROJECTION);
	glLoadIdentity ();
	gluPerspective (fovy, aspect, zNear, zFar);
	glTranslated (projection_dx, projection_dy, projection_dz);

	if (data->gl_vbo) {
		glBindBuffer(GL_ARRAY_BUFFER, data->gl_vbo);
		glVertexPointer(4, GL_FLOAT, 0, (GLvoid *) 0);
		glEnableClientState(GL_VERTEX_ARRAY);
		glDrawArrays(GL_POINTS, 0, data->mesh_width * data->mesh_height);
		glDisableClientState(GL_COLOR_ARRAY);
		glBindBuffer(GL_ARRAY_BUFFER, 0);
	}

	gdk_gl_drawable_swap_buffers (drawable);

	gdk_gl_drawable_gl_end (drawable);

	return TRUE; // Do not propagate the event.
}

cl_int execKernel(cl_context cl_ctx, cl_command_queue commandQueue, cl_kernel kernel, size_t mesh_width, size_t mesh_height, size_t group_size, cl_mem cl_vbo, float time) {

	cl_int res;
  cl_event  eventND[1];

	// Set local and global work group sizes
	size_t globalWorkSize[2], localWorkSize[2];
	globalWorkSize[0]=mesh_width;
	globalWorkSize[1]=mesh_height;
	localWorkSize[0]=group_size;
	localWorkSize[1]=1;

  res=clEnqueueAcquireGLObjects(commandQueue, 1, &cl_vbo, 0, 0, NULL);

	res=clSetKernelArg(kernel, 0, sizeof(cl_mem), &cl_vbo); // float4 *pos
	res=clSetKernelArg(kernel, 1, sizeof(cl_uint), (void *)&mesh_width);
	res=clSetKernelArg(kernel, 2, sizeof(cl_uint), (void *)&mesh_height);
	res=clSetKernelArg(kernel, 3, sizeof(float), &time);

  // Execute kernel on given device
  res=clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, eventND);
	if ( res != CL_SUCCESS ) {
		fputs("Failed to clEnqueueNDRangeKernel()\n", stderr);
		return 1;
	}

  res=clFlush(commandQueue);
	res=clWaitForEvents(1,eventND); //XXX: SimpleGL utilie une attente active, pourquoi ?
	res=clReleaseEvent(eventND[0]);
  res=clEnqueueReleaseGLObjects(commandQueue, 1, &cl_vbo, 0, 0, 0);
  res=clFinish(commandQueue);

	return CL_SUCCESS;
}

int main(int argc, char *argv[]) {
	GtkWidget* main_win;
	GtkVBox* main_box;
	GtkDrawingArea* gl_area;

	GdkGLConfig* gl_config;
	GdkGLContext* gl_context;
	GdkGLDrawable* drawable;
	GLXContext glx_context; // X11 specific
	Display *glx_display; // X11 specific
	gboolean res;

	GLuint gl_vbo;
	on_expose_data_t on_expose_data;
	on_expose_data.gl_vbo = 0;


	gtk_init(&argc, &argv);
	if (gdk_gl_init_check(&argc, &argv) == FALSE ) {
		fputs ("Failed to initialize GDKGLExt.\n", stderr);
		return EXIT_FAILURE;
	}

	main_win = gtk_window_new(GTK_WINDOW_TOPLEVEL);
	gtk_window_set_default_size(GTK_WINDOW(main_win), 400, 300);
	gtk_window_set_title(GTK_WINDOW(main_win), "GTK OpenCL Proof of Concept");
	gtk_signal_connect(GTK_OBJECT(main_win), "destroy", G_CALLBACK(gtk_main_quit), NULL);

	main_box = GTK_VBOX(gtk_vbox_new(FALSE, 0));
	gtk_container_add(GTK_CONTAINER(main_win), GTK_WIDGET(main_box));

	gl_area = GTK_DRAWING_AREA(gtk_drawing_area_new());
	gl_config = gdk_gl_config_new_by_mode(GDK_GL_MODE_RGBA | GDK_GL_MODE_DOUBLE | GDK_GL_MODE_DEPTH);
	res = gtk_widget_set_gl_capability(GTK_WIDGET(gl_area), gl_config,
			NULL, // GdkGLContext *share_list
			TRUE, // gboolean direct
			GDK_GL_RGBA_TYPE); //int render_type
	if ( !res ) {
		fputs ("Failed to set_gl_capability(gl_area)\n", stderr);
		return EXIT_FAILURE;
	}
	gtk_signal_connect(GTK_OBJECT(gl_area), "expose-event", GTK_SIGNAL_FUNC(gl_area_on_draw), &on_expose_data);
	gtk_box_pack_start(GTK_BOX(main_box), GTK_WIDGET(gl_area), TRUE, TRUE, 0);

	gtk_widget_show_all(main_win);

	// gl_area needs to be realized to do that (so, it is after gtk_widget_show_all(main_win) )
	gl_context=gtk_widget_get_gl_context(GTK_WIDGET(gl_area));
	if ( !gl_context) {
		fputs ("Failed to get_gl_context(gl_area)\n", stderr);
		return EXIT_FAILURE;
	}
	drawable = gtk_widget_get_gl_drawable(GTK_WIDGET(gl_area));
	res = gdk_gl_drawable_gl_begin(drawable, gl_context);

	GLenum err = glewInit();
	if (GLEW_OK != err) {
  	fprintf(stderr, "glewInit() failure : %s\n", glewGetErrorString(err));
		return EXIT_FAILURE;
	}

	if ( ! glewIsSupported("GL_ARB_vertex_buffer_object") ) {
		fputs ("OpenGL extension GL_ARB_vertex_buffer_object is mandatory for this application\n", stderr);
		return EXIT_FAILURE;
	}


	/* BEGIN : X11 specific */
	glx_context=glXGetCurrentContext();
	if ( !glx_context ) {
		fputs ("Failed to glXGetCurrentContext()", stderr);
		return EXIT_FAILURE;
	}
	glx_display=glXGetCurrentDisplay();
	if ( !glx_display ) {
		fputs ("Failed to glXGetCurrentDisplay()", stderr);
		return EXIT_FAILURE;
	}

	printf("glx_current_context==%p\n", (void *)glx_context);
	printf("glx_current_display==%p\n", (void *)glx_display);
	
	cl_context_properties cpsGL[] = {
		// CL_CONTEXT_PLATFORM value to be filled later
		CL_CONTEXT_PLATFORM, (cl_context_properties) NULL,
		CL_GLX_DISPLAY_KHR, (intptr_t) glx_display,
		CL_GL_CONTEXT_KHR, (intptr_t) glx_context, 0
	};
	/* END : X11 specific */
	gdk_gl_drawable_gl_end (drawable);

	cl_uint i, plat_count;
	cl_platform_id *plat_ids, plat_id;
	char pbuf[100];
	size_t deviceSize;
	cl_device_id device;
	cl_context cl_ctx;
	cl_command_queue commandQueue;

	typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
			const cl_context_properties *properties,
			cl_gl_context_info param_name,
			size_t param_value_size,
			void *param_value,
			size_t *param_value_size_ret);

	clGetGLContextInfoKHR_fn clGetGLContextInfoKHR_proc;

	clGetPlatformIDs(0, NULL, &plat_count); // Get platform count
	if ( ! ( plat_count > 0 ) ) {
		fputs ("Failed to find an OpenCL platform\n", stderr);
		return EXIT_FAILURE;
	}
	plat_ids=malloc(sizeof(cl_platform_id *) * plat_count);
	clGetPlatformIDs(plat_count, plat_ids, &plat_count); // Get platform IDs

        for(i=0;i<plat_count;i++) {
		plat_id=plat_ids[i];

		clGetPlatformInfo(plat_id, CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL);
		printf("Plaform %i (id %p) : VENDOR : '%s'\n", i, (void *) plat_id, pbuf);

		// CL_CONTEXT_PLATFORM value filled now
		cpsGL[1]=(cl_context_properties) plat_id;

		// TODO : use the clGetGLContextInfoKHR() normally when available
		clGetGLContextInfoKHR_proc = (clGetGLContextInfoKHR_fn) clGetExtensionFunctionAddressForPlatform(plat_id, "clGetGLContextInfoKHR");
		 if (!clGetGLContextInfoKHR_proc) {
			 fputs ("Failed to query proc address of clGetGLContextInfoKHR for this platform\n", stderr);
			 continue;
		 }

		deviceSize=0;
		// get deviceSize (should be 1*sizeof(cl_device_id) with CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR)
		res=clGetGLContextInfoKHR_proc(cpsGL,CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,0,NULL,&deviceSize);
		if ( res!=CL_SUCCESS || deviceSize!=1*sizeof(cl_device_id)) {
			fputs ("Failed to get CL_CURRENT_DEVICE_FOR_GL_CONTEXT deviceSize\n", stderr);
			continue;
		}

		device=0;
		res=clGetGLContextInfoKHR_proc(cpsGL,CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,deviceSize,&device,NULL);
		if ( res!=CL_SUCCESS || device==0 ) {
			fputs ("Failed to get CL_CURRENT_DEVICE_FOR_GL_CONTEXT device\n", stderr);
			continue;
		}

		//TODO : implement selection if multiple devices are usable (instead of taking the first one)
		printf("cl_device==%p\n", (void *)device);

		cl_ctx = clCreateContext(cpsGL,1,&device,0,0,&res);
		if ( res!=CL_SUCCESS ) {
			fputs ("Failed to clCreateContext()\n", stderr);
			continue;
		}

		commandQueue = clCreateCommandQueue(cl_ctx,device,0,&res);
		if ( res!=CL_SUCCESS ) {
			fputs ("Failed to clCreateCommandQueue()\n", stderr);
			continue;
		}

		break;
	}

	unsigned int mesh_width=32, mesh_height=32, group_size=2;

	//fprintf(stderr, "sizeof(cl_float4)==%ld\n",  sizeof(cl_float4));
	GLsizeiptr gl_vbo_data_size = mesh_width * mesh_height * sizeof(cl_float4);

/*
	float gl_vertex_default_pos[16] = { // to be removed
		 0.5, -0.5, 0.0, 1.0,
		 0.5,  0.5, 0.0, 1.0,
		-0.5,  0.5, 0.0, 1.0,
		-0.5, -0.5, 0.0, 1.0
	};
*/
	res = gdk_gl_drawable_gl_begin(drawable, gl_context);
	glGenBuffers(1, &gl_vbo);
	glBindBuffer(GL_ARRAY_BUFFER, gl_vbo);
	glBufferData(GL_ARRAY_BUFFER, gl_vbo_data_size, NULL, GL_STREAM_DRAW); // GL_DYNAMIC_DRAW vu dans SimpleGL de ATI

	on_expose_data.gl_vbo = gl_vbo;
	on_expose_data.mesh_width = mesh_width;
	on_expose_data.mesh_height = mesh_height;

	// to be removed : default values
	//glBufferSubData(GL_ARRAY_BUFFER,0, 16* sizeof(cl_float4), gl_vertex_default_pos);
	

	
	cl_mem cl_vbo = clCreateFromGLBuffer(cl_ctx, CL_MEM_WRITE_ONLY, gl_vbo, &res);
	if ( res!=CL_SUCCESS ) {
		fputs ("Failed to clCreateFromGLBuffer()\n", stderr);
		return EXIT_FAILURE;
	}

	glBindBuffer(GL_ARRAY_BUFFER, 0); // Unbind buffer (no more current buffer)
	gdk_gl_drawable_gl_end (drawable);

	
	const char *source="\
__kernel void zero_z(__global float4 *pos, unsigned int width, unsigned int height, float time) { \
	unsigned int x = get_global_id(0); \
	unsigned int y = get_global_id(1); \
  /* calculate uv coordinates of the mesh point [0.0;1.0] */ \
  float u = x / (float) width; \
  float v = y / (float) height; \
	/* calculate centered coordinates [-0.5;0.5] */\
	float u2 = (u*2-1)/2;\
	float v2 = (v*2-1)/2;\
	/* We only use normalized quaterinons here */ \
	float w = 1.0f; \
	/* Calculate the desirated value of the mesh point */ \
	float z = 0.0f; \
	/* Write output vertex (centered) */\
	pos[y*width+x] = (float4)(u2, v2, z, w); \
}\
";
	size_t sourceLen=strlen(source);

	cl_program program = clCreateProgramWithSource(cl_ctx, 1, &source, &sourceLen,&res);
	if ( res!=CL_SUCCESS ) {
		fputs("Failed to clCreateProgramWithSource()\n", stderr);
		return EXIT_FAILURE;
	}

	res = clBuildProgram(program, 1, &device, "", NULL, NULL);
	if ( res!=CL_SUCCESS ) {
		fputs("Failed to clBuildProgram()\n", stderr);

		if(res == CL_BUILD_PROGRAM_FAILURE) {
			char *buildLog = NULL;
			size_t buildLogSize = 0;
			clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,buildLogSize,buildLog,&buildLogSize);
			buildLog = (char*)malloc(buildLogSize);
			memset(buildLog, 0, buildLogSize);
			clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,buildLogSize,buildLog,NULL);

			fputs("\n                   BUILD LOG\n", stderr);
			fputs("************************************************\n", stderr);
			fputs(buildLog, stderr);
			free(buildLog);
			fputs("\n************************************************\n", stderr);
			return EXIT_FAILURE;
		}
	}

	cl_kernel kernel = clCreateKernel(program,"zero_z",&res);
	execKernel(cl_ctx, commandQueue, kernel, mesh_width, mesh_height, group_size, cl_vbo, 0.0f);
	clReleaseKernel(kernel);


	gtk_main();
	return EXIT_SUCCESS;
}