Skip to content

Commit

Permalink
Merge pull request #17372 from jenshannoschwalm/ce_opencl
Browse files Browse the repository at this point in the history
Color equalizer OpenCL implementation
  • Loading branch information
TurboGit authored Aug 27, 2024
2 parents 80f8355 + 8468c73 commit 28f3414
Show file tree
Hide file tree
Showing 12 changed files with 1,279 additions and 81 deletions.
2 changes: 1 addition & 1 deletion data/kernels/basic.cl
Original file line number Diff line number Diff line change
Expand Up @@ -971,7 +971,7 @@ interpolate_bilinear(read_only image2d_t in,
write_only image2d_t out,
const int width_out,
const int height_out,
const int RGBa)
const int ch) // works with 1-4 channels
{
const int x = get_global_id(0);
const int y = get_global_id(1);
Expand Down
515 changes: 515 additions & 0 deletions data/kernels/colorequal.cl

Large diffs are not rendered by default.

47 changes: 47 additions & 0 deletions data/kernels/colorspace.h
Original file line number Diff line number Diff line change
Expand Up @@ -944,3 +944,50 @@ static inline float4 dt_UCS_HCB_to_JCH(const float4 HCB)
JCH.x = HCB.z / (native_powr(HCB.y, 1.33654221029386f) + 1.f);
return JCH;
}

static inline float4 dt_UCS_HSB_to_XYZ(const float4 HSB, const float L_w)
{
const float4 JCH = dt_UCS_HSB_to_JCH(HSB);
const float4 xyY = dt_UCS_JCH_to_xyY(JCH, L_w);
return dt_xyY_to_XYZ(xyY);
}

static inline float4 dt_UCS_LUV_to_JCH(const float L_star, const float L_white, const float4 UV_star_prime)
{
const float M2 = UV_star_prime.x * UV_star_prime.x + UV_star_prime.y * UV_star_prime.y; // square of colorfulness M
const float4 JCH = { L_star / L_white,
15.932993652962535f * native_powr(L_star, 0.6523997524738018f) * native_powr(M2, 0.6007557017508491f) / L_white,
atan2(UV_star_prime.y, UV_star_prime.x),
0.0f };
return JCH;
}

#define LUT_ELEM 360 // gamut LUT number of elements: resolution of 1°
static inline float lookup_gamut(global const float *gamut_lut, const float x)
{
// WARNING : x should be between [-pi ; pi ], which is the default output of atan2 anyway

// convert in LUT coordinate
const float x_test = (LUT_ELEM - 1) * (x + M_PI_F) / (2.f * M_PI_F);

// find the 2 closest integer coordinates (next/previous)
float x_prev = floor(x_test);
float x_next = ceil(x_test);

// get the 2 closest LUT elements at integer coordinates
// cycle on the hue ring if out of bounds
int xi = (int)x_prev;
if(xi < 0) xi = LUT_ELEM - 1;
else if(xi > LUT_ELEM - 1) xi = 0;

int xii = (int)x_next;
if(xii < 0) xii = LUT_ELEM - 1;
else if(xii > LUT_ELEM - 1) xii = 0;

// fetch the corresponding y values
const float y_prev = gamut_lut[xi];

// return y_prev if we are on the same integer LUT element or do linear interpolation
return y_prev + ((xi != xii) ? (x_test - x_prev) * (gamut_lut[xii] - y_prev) : 0.0f);
}

32 changes: 0 additions & 32 deletions data/kernels/extended.cl
Original file line number Diff line number Diff line change
Expand Up @@ -747,38 +747,6 @@ static inline float4 opacity_masks(const float x,
return output;
}


#define LUT_ELEM 360 // gamut LUT number of elements: resolution of 1°

static inline float lookup_gamut(global const float *gamut_lut, const float x)
{
// WARNING : x should be between [-pi ; pi ], which is the default output of atan2 anyway

// convert in LUT coordinate
const float x_test = (LUT_ELEM - 1) * (x + M_PI_F) / (2.f * M_PI_F);

// find the 2 closest integer coordinates (next/previous)
float x_prev = floor(x_test);
float x_next = ceil(x_test);

// get the 2 closest LUT elements at integer coordinates
// cycle on the hue ring if out of bounds
int xi = (int)x_prev;
if(xi < 0) xi = LUT_ELEM - 1;
else if(xi > LUT_ELEM - 1) xi = 0;

int xii = (int)x_next;
if(xii < 0) xii = LUT_ELEM - 1;
else if(xii > LUT_ELEM - 1) xii = 0;

// fetch the corresponding y values
const float y_prev = gamut_lut[xi];

// return y_prev if we are on the same integer LUT element or do linear interpolation
return y_prev + ((xi != xii) ? (x_test - x_prev) * (gamut_lut[xii] - y_prev) : 0.0f);
}


typedef enum dt_iop_colorbalancrgb_saturation_t
{
DT_COLORBALANCE_SATURATION_JZAZBZ = 0, // $DESCRIPTION: "JzAzBz (2021)"
Expand Down
126 changes: 102 additions & 24 deletions data/kernels/gaussian.cl
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
is needed to have read-write access to some buffers which openCL does not offer for image object. */


kernel void
gaussian_transpose_4c(global float4 *in, global float4 *out, unsigned int width, unsigned int height,
kernel void
gaussian_transpose_4c(global float4 *in, global float4 *out, unsigned int width, unsigned int height,
unsigned int blocksize, local float4 *buffer)
{
unsigned int x = get_global_id(0);
Expand All @@ -49,9 +49,34 @@ gaussian_transpose_4c(global float4 *in, global float4 *out, unsigned int width,
}
}

kernel void
gaussian_transpose_2c(global float2 *in, global float2 *out, unsigned int width, unsigned int height,
unsigned int blocksize, local float2 *buffer)
{
unsigned int x = get_global_id(0);
unsigned int y = get_global_id(1);

if((x < width) && (y < height))
{
const unsigned int iindex = mad24(y, width, x);
buffer[mad24(get_local_id(1), blocksize + 1, get_local_id(0))] = in[iindex];
}

kernel void
gaussian_transpose_1c(global float *in, global float *out, unsigned int width, unsigned int height,
barrier(CLK_LOCAL_MEM_FENCE);

x = mad24(get_group_id(1), blocksize, get_local_id(0));
y = mad24(get_group_id(0), blocksize, get_local_id(1));

if((x < height) && (y < width))
{
const unsigned int oindex = mad24(y, height, x);
out[oindex] = buffer[mad24(get_local_id(0), blocksize + 1, get_local_id(1))];
}
}


kernel void
gaussian_transpose_1c(global float *in, global float *out, unsigned int width, unsigned int height,
unsigned int blocksize, local float *buffer)
{
unsigned int x = get_global_id(0);
Expand All @@ -76,7 +101,7 @@ gaussian_transpose_1c(global float *in, global float *out, unsigned int width, u
}


kernel void
kernel void
gaussian_column_4c(global float4 *in, global float4 *out, unsigned int width, unsigned int height,
const float a0, const float a1, const float a2, const float a3, const float b1, const float b2,
const float coefp, const float coefn, const float4 Labmax, const float4 Labmin)
Expand All @@ -100,7 +125,6 @@ gaussian_column_4c(global float4 *in, global float4 *out, unsigned int width, un
yb = xp * coefp;
yp = yb;


for(int y=0; y<height; y++)
{
const int idx = mad24((unsigned int)y, width, x);
Expand All @@ -113,7 +137,6 @@ gaussian_column_4c(global float4 *in, global float4 *out, unsigned int width, un
yp = yc;

out[idx] = yc;

}

// backward filter
Expand All @@ -122,25 +145,83 @@ gaussian_column_4c(global float4 *in, global float4 *out, unsigned int width, un
yn = xn * coefn;
ya = yn;


for(int y=height-1; y>-1; y--)
{
const int idx = mad24((unsigned int)y, width, x);

xc = clamp(in[idx], Labmin, Labmax);
yc = (a2 * xn) + (a3 * xa) - (b1 * yn) - (b2 * ya);

xa = xn;
xn = xc;
ya = yn;
xa = xn;
xn = xc;
ya = yn;
yn = yc;

out[idx] += yc;
}
}

kernel void
gaussian_column_2c(global float2 *in, global float2 *out, unsigned int width, unsigned int height,
const float a0, const float a1, const float a2, const float a3, const float b1, const float b2,
const float coefp, const float coefn, const float2 Labmax, const float2 Labmin)
{
const unsigned int x = get_global_id(0);

if(x >= width) return;

float2 xp = (float2)0.0f;
float2 yb = (float2)0.0f;
float2 yp = (float2)0.0f;
float2 xc = (float2)0.0f;
float2 yc = (float2)0.0f;
float2 xn = (float2)0.0f;
float2 xa = (float2)0.0f;
float2 yn = (float2)0.0f;
float2 ya = (float2)0.0f;

// forward filter
xp = clamp(in[x], Labmin, Labmax); // 0*width+x
yb = xp * coefp;
yp = yb;

for(int y=0; y<height; y++)
{
const int idx = mad24((unsigned int)y, width, x);

xc = clamp(in[idx], Labmin, Labmax);
yc = (a0 * xc) + (a1 * xp) - (b1 * yp) - (b2 * yb);

xp = xc;
yb = yp;
yp = yc;

out[idx] = yc;
}

// backward filter
xn = clamp(in[mad24(height - 1, width, x)], Labmin, Labmax);
xa = xn;
yn = xn * coefn;
ya = yn;

for(int y=height-1; y>-1; y--)
{
const int idx = mad24((unsigned int)y, width, x);

xc = clamp(in[idx], Labmin, Labmax);
yc = (a2 * xn) + (a3 * xa) - (b1 * yn) - (b2 * ya);

xa = xn;
xn = xc;
ya = yn;
yn = yc;

out[idx] += yc;
}
}

kernel void
kernel void
gaussian_column_1c(global float *in, global float *out, unsigned int width, unsigned int height,
const float a0, const float a1, const float a2, const float a3, const float b1, const float b2,
const float coefp, const float coefn, const float Labmax, const float Labmin)
Expand All @@ -164,7 +245,6 @@ gaussian_column_1c(global float *in, global float *out, unsigned int width, unsi
yb = xp * coefp;
yp = yb;


for(int y=0; y<height; y++)
{
const int idx = mad24((unsigned int)y, width, x);
Expand All @@ -177,7 +257,6 @@ gaussian_column_1c(global float *in, global float *out, unsigned int width, unsi
yp = yc;

out[idx] = yc;

}

// backward filter
Expand All @@ -194,13 +273,12 @@ gaussian_column_1c(global float *in, global float *out, unsigned int width, unsi
xc = clamp(in[idx], Labmin, Labmax);
yc = (a2 * xn) + (a3 * xa) - (b1 * yn) - (b2 * ya);

xa = xn;
xn = xc;
ya = yn;
xa = xn;
xn = xc;
ya = yn;
yn = yc;

out[idx] += yc;

}
}

Expand All @@ -225,8 +303,8 @@ lookup_unbounded(read_only image2d_t lut, const float x, global float *a)
}


kernel void
lowpass_mix(read_only image2d_t in, write_only image2d_t out, unsigned int width, unsigned int height, const float saturation,
kernel void
lowpass_mix(read_only image2d_t in, write_only image2d_t out, unsigned int width, unsigned int height, const float saturation,
read_only image2d_t ctable, global float *ca, read_only image2d_t ltable, global float *la, const int unbound)
{
const unsigned int x = get_global_id(0);
Expand Down Expand Up @@ -307,11 +385,11 @@ overlay(const float4 in_a, const float4 in_b, const float opacity, const float t
#define UNBOUND_HIGHLIGHTS_A (UNBOUND_A << 3) /* 16 */
#define UNBOUND_HIGHLIGHTS_B (UNBOUND_B << 3) /* 32 */

kernel void
shadows_highlights_mix(read_only image2d_t in, read_only image2d_t mask, write_only image2d_t out,
unsigned int width, unsigned int height,
kernel void
shadows_highlights_mix(read_only image2d_t in, read_only image2d_t mask, write_only image2d_t out,
unsigned int width, unsigned int height,
const float shadows, const float highlights, const float compress,
const float shadows_ccorrect, const float highlights_ccorrect,
const float shadows_ccorrect, const float highlights_ccorrect,
const unsigned int flags, const int unbound_mask, const float low_approximation,
const float whitepoint)
{
Expand Down
1 change: 1 addition & 0 deletions data/kernels/programs.conf
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ diffuse.cl 33
blurs.cl 34
bspline.cl 35
sigmoid.cl 36
colorequal.cl 37
16 changes: 13 additions & 3 deletions src/common/gaussian.c
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,8 @@ dt_gaussian_cl_global_t *dt_gaussian_init_cl_global()
const int program = 6; // gaussian.cl, from programs.conf
g->kernel_gaussian_column_1c = dt_opencl_create_kernel(program, "gaussian_column_1c");
g->kernel_gaussian_transpose_1c = dt_opencl_create_kernel(program, "gaussian_transpose_1c");
g->kernel_gaussian_column_2c = dt_opencl_create_kernel(program, "gaussian_column_2c");
g->kernel_gaussian_transpose_2c = dt_opencl_create_kernel(program, "gaussian_transpose_2c");
g->kernel_gaussian_column_4c = dt_opencl_create_kernel(program, "gaussian_column_4c");
g->kernel_gaussian_transpose_4c = dt_opencl_create_kernel(program, "gaussian_transpose_4c");
return g;
Expand Down Expand Up @@ -501,9 +503,9 @@ dt_gaussian_cl_t *dt_gaussian_init_cl(const int devid,
const float sigma, // gaussian sigma
const int order) // order of gaussian blur
{
assert(channels == 1 || channels == 4);
assert(channels == 1 || channels == 2 || channels == 4);

if(!(channels == 1 || channels == 4)) return NULL;
if(!(channels == 1 || channels == 2 || channels == 4)) return NULL;

dt_gaussian_cl_t *g = (dt_gaussian_cl_t *)malloc(sizeof(dt_gaussian_cl_t));
if(!g) return NULL;
Expand All @@ -529,7 +531,8 @@ dt_gaussian_cl_t *dt_gaussian_init_cl(const int devid,
}

int kernel_gaussian_transpose = (channels == 1) ? g->global->kernel_gaussian_transpose_1c
: g->global->kernel_gaussian_transpose_4c;
: ((channels == 2) ? g->global->kernel_gaussian_transpose_2c
: g->global->kernel_gaussian_transpose_4c);
int blocksize;

dt_opencl_local_buffer_t locopt
Expand Down Expand Up @@ -602,6 +605,11 @@ cl_int dt_gaussian_blur_cl(dt_gaussian_cl_t *g, cl_mem dev_in, cl_mem dev_out)
kernel_gaussian_column = g->global->kernel_gaussian_column_1c;
kernel_gaussian_transpose = g->global->kernel_gaussian_transpose_1c;
}
else if(channels == 2)
{
kernel_gaussian_column = g->global->kernel_gaussian_column_2c;
kernel_gaussian_transpose = g->global->kernel_gaussian_transpose_2c;
}
else if(channels == 4)
{
kernel_gaussian_column = g->global->kernel_gaussian_column_4c;
Expand Down Expand Up @@ -689,6 +697,8 @@ void dt_gaussian_free_cl_global(dt_gaussian_cl_global_t *g)
// destroy kernels
dt_opencl_free_kernel(g->kernel_gaussian_column_1c);
dt_opencl_free_kernel(g->kernel_gaussian_transpose_1c);
dt_opencl_free_kernel(g->kernel_gaussian_column_2c);
dt_opencl_free_kernel(g->kernel_gaussian_transpose_2c);
dt_opencl_free_kernel(g->kernel_gaussian_column_4c);
dt_opencl_free_kernel(g->kernel_gaussian_transpose_4c);
free(g);
Expand Down
1 change: 1 addition & 0 deletions src/common/gaussian.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ void dt_gaussian_free(dt_gaussian_t *g);
typedef struct dt_gaussian_cl_global_t
{
int kernel_gaussian_column_4c, kernel_gaussian_transpose_4c;
int kernel_gaussian_column_2c, kernel_gaussian_transpose_2c;
int kernel_gaussian_column_1c, kernel_gaussian_transpose_1c;
} dt_gaussian_cl_global_t;

Expand Down
Loading

0 comments on commit 28f3414

Please sign in to comment.