diff --git a/src/common/darktable.c b/src/common/darktable.c index 85dc9cbbbb09..a24a7d8d99de 100644 --- a/src/common/darktable.c +++ b/src/common/darktable.c @@ -513,6 +513,20 @@ void dt_dump_pipe_pfm( dt_dump_pfm_file(pipe, data, width, height, bpp, mod, "[dt_dump_pipe_pfm]", input, !input, TRUE); } +static int32_t _detect_opencl_job_run(dt_job_t *job) +{ + darktable.opencl = (dt_opencl_t *)calloc(1, sizeof(dt_opencl_t)); + dt_opencl_init(darktable.opencl, GPOINTER_TO_INT(dt_control_job_get_params(job)), TRUE); + return 0; +} + +static dt_job_t *_detect_opencl_job_create(gboolean exclude_opencl) +{ + dt_job_t *job = dt_control_job_create(&_detect_opencl_job_run, "detect opencl devices"); + if(!job) return NULL; + dt_control_job_set_params(job, GINT_TO_POINTER(exclude_opencl), NULL); + return job; +} int dt_init(int argc, char *argv[], const gboolean init_gui, const gboolean load_data, lua_State *L) { @@ -1354,10 +1368,11 @@ int dt_init(int argc, char *argv[], const gboolean init_gui, const gboolean load #endif darktable.opencl = (dt_opencl_t *)calloc(1, sizeof(dt_opencl_t)); - dt_opencl_init(darktable.opencl, exclude_opencl, print_statistics); -#ifdef HAVE_OPENCL - dt_opencl_update_settings(); -#endif + if(init_gui) + dt_control_add_job(darktable.control, DT_JOB_QUEUE_SYSTEM_BG, + _detect_opencl_job_create(exclude_opencl)); + else + dt_opencl_init(darktable.opencl, exclude_opencl, print_statistics); darktable.points = (dt_points_t *)calloc(1, sizeof(dt_points_t)); dt_points_init(darktable.points, dt_get_num_threads()); diff --git a/src/common/opencl.c b/src/common/opencl.c index d90104946368..5fe3dea874c5 100644 --- a/src/common/opencl.c +++ b/src/common/opencl.c @@ -64,6 +64,7 @@ static float _opencl_benchmark_cpu(const size_t width, static gboolean _opencl_load_program(const int dev, const int prog, + const char *programname, const char *filename, const char *binname, const char *cachedir, @@ -1074,7 +1075,7 @@ static gboolean _opencl_device_init(dt_opencl_t *cl, "[dt_opencl_device_init] testing program `%s' ..\n", programname); int loaded_cached; char md5sum[33]; - if(_opencl_load_program(dev, prog, filename, binname, cachedir, + if(_opencl_load_program(dev, prog, programname, filename, binname, cachedir, md5sum, includemd5, &loaded_cached) && _opencl_build_program(dev, prog, binname, cachedir, md5sum, loaded_cached)) { @@ -1543,7 +1544,7 @@ void dt_opencl_init( free(locale); } - return; + dt_opencl_update_settings(); } void dt_opencl_cleanup(dt_opencl_t *cl) @@ -2297,6 +2298,7 @@ void dt_opencl_md5sum(const char **files, char **md5sums) static gboolean _opencl_load_program( const int dev, const int prog, + const char *programname, const char *filename, const char *binname, const char *cachedir, @@ -2463,6 +2465,8 @@ static gboolean _opencl_load_program( "[opencl_load_program] could not load cached binary program," " trying to compile source\n"); + dt_control_log(_("building OpenCL program %s for %s"), + programname, cl->dev[dev].fullname); cl->dev[dev].program[prog] = (cl->dlocl->symbols->dt_clCreateProgramWithSource)( cl->dev[dev].context, 1, (const char **)&file, &filesize, &err); free(file); @@ -2660,51 +2664,56 @@ static gboolean _opencl_build_program(const int dev, int dt_opencl_create_kernel(const int prog, const char *name) { dt_opencl_t *cl = darktable.opencl; - if(!cl->inited) return -1; - if(prog < 0 || prog >= DT_OPENCL_MAX_PROGRAMS) return -1; + + static int k = 0; + cl->name_saved[k] = name; + cl->program_saved[k] = prog; + + if(k >= DT_OPENCL_MAX_KERNELS) + { + dt_print(DT_DEBUG_OPENCL, + "[opencl_create_kernel] too many kernels! can't create kernel `%s'\n", + name); + return -1; + } + return k++; +} + + +static gboolean _check_kernel(const int dev, const int kernel) +{ + dt_opencl_t *cl = darktable.opencl; + + if(!cl->inited || dev < 0) return FALSE; + if(kernel < 0 || kernel >= DT_OPENCL_MAX_KERNELS) return FALSE; + + if(cl->dev[dev].kernel_used[kernel]) return TRUE; + + const int prog = cl->program_saved[kernel]; + if(prog < 0 || prog >= DT_OPENCL_MAX_PROGRAMS) return FALSE; dt_pthread_mutex_lock(&cl->lock); - int k = 0; - for(int dev = 0; dev < cl->num_devs; dev++) + + cl_int err; + if(!cl->dev[dev].kernel_used[kernel] + && cl->name_saved[kernel]) { - cl_int err; - for(; k < DT_OPENCL_MAX_KERNELS; k++) - if(!cl->dev[dev].kernel_used[k]) - { - cl->dev[dev].kernel_used[k] = 1; - cl->dev[dev].kernel[k] = - (cl->dlocl->symbols->dt_clCreateKernel) - (cl->dev[dev].program[prog], name, &err); - if(err != CL_SUCCESS) - { - dt_print(DT_DEBUG_OPENCL, - "[opencl_create_kernel] could not create kernel `%s'! (%s)\n", - name, cl_errstr(err)); - cl->dev[dev].kernel_used[k] = 0; - goto error; - } - else - break; - } - if(k < DT_OPENCL_MAX_KERNELS) - { - dt_print(DT_DEBUG_OPENCL | DT_DEBUG_VERBOSE, - "[opencl_create_kernel] successfully loaded kernel `%s' (%d)" - " for device %d\n", - name, k, dev); - } - else + cl->dev[dev].kernel_used[kernel] = 1; + cl->dev[dev].kernel[kernel] = + (cl->dlocl->symbols->dt_clCreateKernel) + (cl->dev[dev].program[prog], cl->name_saved[kernel], &err); + if(err != CL_SUCCESS) { dt_print(DT_DEBUG_OPENCL, - "[opencl_create_kernel] too many kernels! can't create kernel `%s'\n", - name); - goto error; + "[opencl_create_kernel] could not create kernel `%s'! (%s)\n", + cl->name_saved[kernel], cl_errstr(err)); + cl->dev[dev].kernel_used[kernel] = 0; + cl->name_saved[kernel] = NULL; // don't try again + dt_pthread_mutex_unlock(&cl->lock); + return FALSE; } } dt_pthread_mutex_unlock(&cl->lock); - return k; -error: - dt_pthread_mutex_unlock(&cl->lock); - return -1; + return TRUE; } void dt_opencl_free_kernel(const int kernel) @@ -2763,10 +2772,9 @@ int dt_opencl_get_kernel_work_group_size( const int kernel, size_t *kernelworkgroupsize) { - dt_opencl_t *cl = darktable.opencl; - if(!cl->inited || dev < 0) return -1; - if(kernel < 0 || kernel >= DT_OPENCL_MAX_KERNELS) return -1; + if(!_check_kernel(dev, kernel)) return -1; + dt_opencl_t *cl = darktable.opencl; return (cl->dlocl->symbols->dt_clGetKernelWorkGroupInfo)(cl->dev[dev].kernel[kernel], cl->dev[dev].devid, CL_KERNEL_WORK_GROUP_SIZE, @@ -2781,9 +2789,9 @@ int dt_opencl_set_kernel_arg( const size_t size, const void *arg) { + if(!_check_kernel(dev, kernel)) return -1; + dt_opencl_t *cl = darktable.opencl; - if(!cl->inited || dev < 0) return -1; - if(kernel < 0 || kernel >= DT_OPENCL_MAX_KERNELS) return -1; return (cl->dlocl->symbols->dt_clSetKernelArg) (cl->dev[dev].kernel[kernel], num, size, arg); } diff --git a/src/common/opencl.h b/src/common/opencl.h index 5879311e907b..44db407a88ee 100644 --- a/src/common/opencl.h +++ b/src/common/opencl.h @@ -268,6 +268,10 @@ typedef struct dt_opencl_t // global kernels for guided filter. struct dt_guided_filter_cl_global_t *guided_filter; + + // saved kernel info for deferred initialisation + int program_saved[DT_OPENCL_MAX_KERNELS]; + const char *name_saved[DT_OPENCL_MAX_KERNELS]; } dt_opencl_t; /** description of memory requirements of local buffer diff --git a/src/common/variables.c b/src/common/variables.c index 8cfd1ae0cfe8..7696760865ed 100644 --- a/src/common/variables.c +++ b/src/common/variables.c @@ -699,7 +699,7 @@ static char *_get_base_value(dt_variables_params_t *params, char **variable) else if(_has_prefix(variable, "OPENCL.ACTIVATED") || _has_prefix(variable, "OPENCL_ACTIVATED")) { - if(dt_opencl_is_enabled()) + if(dt_opencl_running()) result = g_strdup(_("yes")); else result = g_strdup(_("no")); diff --git a/src/develop/imageop.c b/src/develop/imageop.c index 58a86d9cf93b..c41258c0cecd 100644 --- a/src/develop/imageop.c +++ b/src/develop/imageop.c @@ -337,12 +337,6 @@ int dt_iop_load_module_so(void *m, const char *libname, const char *module_name) if(!module->modify_roi_in) module->modify_roi_in = _iop_modify_roi_in; if(!module->modify_roi_out) module->modify_roi_out = _iop_modify_roi_out; - #ifdef HAVE_OPENCL - if(!module->process_tiling_cl) - module->process_tiling_cl = darktable.opencl->inited ? default_process_tiling_cl : NULL; - if(!darktable.opencl->inited) module->process_cl = NULL; - #endif // HAVE_OPENCL - module->process_plain = module->process; module->process = default_process; diff --git a/src/iop/iop_api.h b/src/iop/iop_api.h index 0a13743a806f..92f665ca8e17 100644 --- a/src/iop/iop_api.h +++ b/src/iop/iop_api.h @@ -86,28 +86,28 @@ DEFAULT(int, operation_tags_filter, void); /** what do the iop want as an input? */ DEFAULT(void, input_format, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_t *pipe, - struct dt_dev_pixelpipe_iop_t *piece, struct dt_iop_buffer_dsc_t *dsc); + struct dt_dev_pixelpipe_iop_t *piece, struct dt_iop_buffer_dsc_t *dsc); /** what will it output? */ DEFAULT(void, output_format, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_t *pipe, - struct dt_dev_pixelpipe_iop_t *piece, struct dt_iop_buffer_dsc_t *dsc); + struct dt_dev_pixelpipe_iop_t *piece, struct dt_iop_buffer_dsc_t *dsc); /** what default colorspace this iop use? */ REQUIRED(dt_iop_colorspace_type_t, default_colorspace, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_t *pipe, - struct dt_dev_pixelpipe_iop_t *piece); + struct dt_dev_pixelpipe_iop_t *piece); /** what input colorspace it expects? */ DEFAULT(dt_iop_colorspace_type_t, input_colorspace, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_t *pipe, - struct dt_dev_pixelpipe_iop_t *piece); + struct dt_dev_pixelpipe_iop_t *piece); /** what will it output? */ DEFAULT(dt_iop_colorspace_type_t, output_colorspace, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_t *pipe, - struct dt_dev_pixelpipe_iop_t *piece); + struct dt_dev_pixelpipe_iop_t *piece); /** what colorspace the blend module operates with? */ DEFAULT(dt_iop_colorspace_type_t, blend_colorspace, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_t *pipe, - struct dt_dev_pixelpipe_iop_t *piece); + struct dt_dev_pixelpipe_iop_t *piece); /** report back info for tiling: memory usage and overlap. Memory usage: factor * input_size + overhead */ DEFAULT(void, tiling_callback, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece, - const struct dt_iop_roi_t *roi_in, const struct dt_iop_roi_t *roi_out, - struct dt_develop_tiling_t *tiling); + const struct dt_iop_roi_t *roi_in, const struct dt_iop_roi_t *roi_out, + struct dt_develop_tiling_t *tiling); /** callback methods for gui. */ /** synch gtk interface with gui params, if necessary. */ @@ -146,12 +146,12 @@ DEFAULT(void, cleanup, struct dt_iop_module_t *self); /** this inits the piece of the pipe, allocing piece->data as necessary. */ DEFAULT(void, init_pipe, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_t *pipe, - struct dt_dev_pixelpipe_iop_t *piece); + struct dt_dev_pixelpipe_iop_t *piece); /** this resets the params to factory defaults. used at the beginning of each history synch. */ /** this commits (a mutex will be locked to synch pipe/gui) the given history params to the pixelpipe piece. */ DEFAULT(void, commit_params, struct dt_iop_module_t *self, dt_iop_params_t *params, struct dt_dev_pixelpipe_t *pipe, - struct dt_dev_pixelpipe_iop_t *piece); + struct dt_dev_pixelpipe_iop_t *piece); /** this is the chance to update default parameters, after the full raw is loaded. */ OPTIONAL(void, reload_defaults, struct dt_iop_module_t *self); /** called after the image has changed in darkroom */ @@ -159,7 +159,7 @@ OPTIONAL(void, change_image, struct dt_iop_module_t *self); /** this destroys all resources needed by the piece of the pixelpipe. */ DEFAULT(void, cleanup_pipe, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_t *pipe, - struct dt_dev_pixelpipe_iop_t *piece); + struct dt_dev_pixelpipe_iop_t *piece); OPTIONAL(void, modify_roi_in, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece, const struct dt_iop_roi_t *roi_out, struct dt_iop_roi_t *roi_in); OPTIONAL(void, modify_roi_out, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece, @@ -185,8 +185,8 @@ REQUIRED(void, process, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_io const struct dt_iop_roi_t *const roi_out); /** a tiling variant of process(). */ DEFAULT(void, process_tiling, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece, const void *const i, - void *const o, const struct dt_iop_roi_t *const roi_in, - const struct dt_iop_roi_t *const roi_out, const int bpp); + void *const o, const struct dt_iop_roi_t *const roi_in, + const struct dt_iop_roi_t *const roi_out, const int bpp); #ifdef HAVE_OPENCL /** the opencl equivalent of process(). */ @@ -194,9 +194,9 @@ OPTIONAL(int, process_cl, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_ cl_mem dev_out, const struct dt_iop_roi_t *const roi_in, const struct dt_iop_roi_t *const roi_out); /** a tiling variant of process_cl(). */ -OPTIONAL(int, process_tiling_cl, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece, const void *const i, - void *const o, const struct dt_iop_roi_t *const roi_in, - const struct dt_iop_roi_t *const roi_out, const int bpp); +DEFAULT(int, process_tiling_cl, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece, const void *const i, + void *const o, const struct dt_iop_roi_t *const roi_in, + const struct dt_iop_roi_t *const roi_out, const int bpp); #endif /** this functions are used for distort iop @@ -204,10 +204,10 @@ OPTIONAL(int, process_tiling_cl, struct dt_iop_module_t *self, struct dt_dev_pix * size is 2*points_count */ /** points before the iop is applied => point after processed */ DEFAULT(int, distort_transform, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece, float *points, - size_t points_count); + size_t points_count); /** reverse points after the iop is applied => point before process */ DEFAULT(int, distort_backtransform, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece, float *points, - size_t points_count); + size_t points_count); OPTIONAL(void, distort_mask, struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t *piece, const float *const in, float *const out, const struct dt_iop_roi_t *const roi_in, const struct dt_iop_roi_t *const roi_out);