diff --git a/video/out/filter_kernels.c b/video/out/filter_kernels.c index 1b8c70ba1ae33..5955bc09d1939 100644 --- a/video/out/filter_kernels.c +++ b/video/out/filter_kernels.c @@ -266,6 +266,14 @@ static double jinc(kernel *k, double x) return 2.0 * j1(pix) / pix; } +static double sphinx(kernel *k, double x) +{ + if (fabs(x) < 1e-8) + return 1.0; + double pix = M_PI * x / k->params[0]; // blur factor + return 3.0 * (sin(pix) - pix*cos(pix)) / (pix * pix * pix); +} + static double lanczos(kernel *k, double x) { double radius = k->size / 2; @@ -298,6 +306,15 @@ static double ewa_lanczos(kernel *k, double x) return jinc(k, x) * jinc(k, x * jinc_zero / radius); } +static double swa_lanczos(kernel *k, double x) +{ + double radius = k->radius; + if (fabs(x) >= radius) + return 0.0; + double sphinx_zero = 1.4302966531242027; // First zero, as computed by WA + return sphinx(k, x) * sphinx(k, x * sphinx_zero / radius); +} + static double ewa_hanning(kernel *k, double x) { double radius = k->radius; @@ -340,6 +357,8 @@ const struct filter_kernel mp_filter_kernels[] = { {"ewa_lanczos", -1, ewa_lanczos, .params = {1.0, NAN}, .polar = true}, {"ewa_hanning", -1, ewa_hanning, .params = {1.0, NAN}, .polar = true}, {"ewa_ginseng", -1, ewa_ginseng, .params = {1.0, NAN}, .polar = true}, + {"swa_lanczos", -1, swa_lanczos, .params = {1.0, NAN}, + .polar = true, .sphinx = true}, // Radius is based on the true jinc radius, slightly sharpened as per // calculations by Nicolas Robidoux. Source: Imagemagick's magick/resize.c {"ewa_lanczossharp", 3.2383154841662362, ewa_lanczos, diff --git a/video/out/filter_kernels.h b/video/out/filter_kernels.h index b2e07863fd5dc..162760aca358f 100644 --- a/video/out/filter_kernels.h +++ b/video/out/filter_kernels.h @@ -30,6 +30,9 @@ struct filter_kernel { float params[2]; // Whether or not the filter uses polar coordinates bool polar; + // Whether or not the filter is based on a sphinx kernel and requires + // 3D interpolation + bool sphinx; // The following values are set by mp_init_filter() at runtime. int size; // number of coefficients (may depend on radius) double inv_scale; // scale factor (<1.0 is upscale, >1.0 downscale) diff --git a/video/out/gl_video.c b/video/out/gl_video.c index 756aae3e441c2..38c7d749da2d6 100644 --- a/video/out/gl_video.c +++ b/video/out/gl_video.c @@ -1113,6 +1113,63 @@ static void pass_sample_polar(struct gl_video *p, struct scaler *scaler) GLSLF("}\n"); } +static void pass_sample_sphinx(struct gl_video *p, struct scaler *scaler) +{ + double radius = scaler->kernel->radius; + int bound = (int)ceil(radius); + bool use_ar = scaler->antiring > 0; + GLSL(vec4 color = vec4(0.0);) + GLSLF("{\n"); + GLSL(vec2 pt = vec2(1.0) / sample_size;) + GLSL(vec3 coord = vec3(fract(sample_pos * sample_size - vec2(0.5)), fcoord);) + GLSL(vec2 base = sample_pos - coord.xy * pt;) + GLSL(vec4 c;) + GLSLF("float w, d, wsum = 0.0;\n"); + if (use_ar) { + GLSL(vec4 lo = vec4(1.0);) + GLSL(vec4 hi = vec4(0.0);) + } + gl_sc_uniform_sampler(p->sc, "lut", scaler->gl_target, + TEXUNIT_SCALERS + scaler->index); + GLSLF("// scaler samples\n"); + for (int z = 1-bound; z <= bound; z++) { + for (int y = 1-bound; y <= bound; y++) { + for (int x = 1-bound; x <= bound; x++) { + // Since we can't know the subpixel position in advance, assume + // a worst case scenario + int yy = y > 0 ? y-1 : y; + int xx = x > 0 ? x-1 : x; + int zz = z > 0 ? z-1 : z; + double dmax = sqrt(xx*xx + yy*yy + zz*zz); + // Skip samples definitely outside the radius + if (dmax >= radius) + continue; + GLSLF("d = length(vec3(%d, %d, %d) - coord)/%f;\n", + x, y, z, radius); + // Check for samples that might be skippable + if (dmax >= radius - 1) + GLSLF("if (d < 1.0) {\n"); + GLSL(w = texture1D(lut, d).r;) + GLSL(wsum += w;) + GLSLF("c = texture(texture%d, base + pt * vec2(%d, %d));\n", + z + bound - 1, x, y); + GLSL(color += vec4(w) * c;) + if (use_ar && x >= 0 && y >= 0 && z >= 0 + && x <= 1 && y <= 1 && z <= 0) { + GLSL(lo = min(lo, c);) + GLSL(hi = max(hi, c);) + } + if (dmax >= radius -1) + GLSLF("}\n"); + } + } + } + GLSL(color = color / vec4(wsum);) + if (use_ar) + GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", scaler->antiring); + GLSLF("}\n"); +} + static void bicubic_calcweights(struct gl_video *p, const char *t, const char *s) { // Explanation of how bicubic scaling with only 4 texel fetches is done: @@ -1415,11 +1472,12 @@ static void pass_convert_yuv(struct gl_video *p) } } -static void get_scale_factors(struct gl_video *p, double xy[2]) +static void get_scale_factors(struct gl_video *p, struct mp_rect dst_rect, + double xy[2]) { - xy[0] = (p->dst_rect.x1 - p->dst_rect.x0) / + xy[0] = (dst_rect.x1 - dst_rect.x0) / (double)(p->src_rect.x1 - p->src_rect.x0); - xy[1] = (p->dst_rect.y1 - p->dst_rect.y0) / + xy[1] = (dst_rect.y1 - dst_rect.y0) / (double)(p->src_rect.y1 - p->src_rect.y0); } @@ -1443,11 +1501,11 @@ static void pass_linearize(struct gl_video *p) } // Takes care of the main scaling and pre/post-conversions -static void pass_scale_main(struct gl_video *p) +static void pass_scale_main(struct gl_video *p, struct mp_rect dst_rect) { // Figure out the main scaler. double xy[2]; - get_scale_factors(p, xy); + get_scale_factors(p, dst_rect, xy); bool downscaling = xy[0] < 1.0 || xy[1] < 1.0; bool upscaling = !downscaling && (xy[0] > 1.0 || xy[1] > 1.0); double scale_factor = 1.0; @@ -1501,8 +1559,8 @@ static void pass_scale_main(struct gl_video *p) struct gl_transform transform = {{{sx,0.0}, {0.0,sy}}, {ox,oy}}; int xc = 0, yc = 1, - vp_w = p->dst_rect.x1 - p->dst_rect.x0, - vp_h = p->dst_rect.y1 - p->dst_rect.y0; + vp_w = dst_rect.x1 - dst_rect.x0, + vp_h = dst_rect.y1 - dst_rect.y0; if ((p->image_params.rotate % 180) == 90) { MPSWAP(float, transform.m[0][xc], transform.m[0][yc]); @@ -1749,17 +1807,17 @@ static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts, // The main rendering function, takes care of everything up to and including // upscaling -static void pass_render_frame(struct gl_video *p) +static void pass_render_frame(struct gl_video *p, struct mp_rect dst_rect) { p->use_indirect = false; // set to true as needed by pass_* pass_read_video(p); pass_convert_yuv(p); - pass_scale_main(p); + pass_scale_main(p, dst_rect); if (p->opts.blend_subs) { // Recreate the real video size from the src/dst rects - int vp_w = p->dst_rect.x1 - p->dst_rect.x0, - vp_h = p->dst_rect.y1 - p->dst_rect.y0; + int vp_w = dst_rect.x1 - dst_rect.x0, + vp_h = dst_rect.y1 - dst_rect.y0; struct mp_osd_res rect = { .w = vp_w, .h = vp_h, .ml = -p->src_rect.x0, .mr = p->src_rect.x1 - p->image_w, @@ -1768,7 +1826,7 @@ static void pass_render_frame(struct gl_video *p) }; // Adjust margins for scale double scale[2]; - get_scale_factors(p, scale); + get_scale_factors(p, dst_rect, scale); rect.ml *= scale[0]; rect.mr *= scale[0]; rect.mt *= scale[1]; rect.mb *= scale[1]; finish_pass_fbo(p, &p->blend_subs_fbo, vp_w, vp_h, 0, @@ -1792,16 +1850,22 @@ static void pass_draw_to_screen(struct gl_video *p, int fbo) static void gl_video_interpolate_frame(struct gl_video *p, int fbo, struct frame_timing *t) { - int vp_w = p->dst_rect.x1 - p->dst_rect.x0, - vp_h = p->dst_rect.y1 - p->dst_rect.y0, - fuzz = FBOTEX_FUZZY_W | FBOTEX_FUZZY_H; + // Initialize the interpolation filter + struct scaler *tscale = &p->scalers[2]; + reinit_scaler(p, 2, p->opts.scalers[2], 1, tscale_sizes); + bool oversample = strcmp(tscale->name, "oversample") == 0; + bool sphinx = tscale->kernel && tscale->kernel->sphinx; + struct mp_rect dst_rect = sphinx ? p->src_rect : p->dst_rect; + int vp_w = dst_rect.x1 - dst_rect.x0, + vp_h = dst_rect.y1 - dst_rect.y0, + fuzz = sphinx ? 0 : FBOTEX_FUZZY_W | FBOTEX_FUZZY_H; double new_pts = p->image.mpi->pts; // First of all, figure out if we have a frame availble at all, and draw // it manually + reset the queue if not if (p->surfaces[p->surface_now].pts < 0) { - pass_render_frame(p); + pass_render_frame(p, dst_rect); finish_pass_fbo(p, &p->surfaces[p->surface_now].fbotex, vp_w, vp_h, 0, fuzz); p->surfaces[p->surface_now].pts = new_pts; @@ -1812,18 +1876,17 @@ static void gl_video_interpolate_frame(struct gl_video *p, int fbo, // look like this: _ A [B] C D _ // A is surface_bse, B is surface_now, C is surface_nxt and D is // surface_end. - struct scaler *tscale = &p->scalers[2]; - reinit_scaler(p, 2, p->opts.scalers[2], 1, tscale_sizes); - bool oversample = strcmp(tscale->name, "oversample") == 0; int size; if (oversample) { size = 2; + } else if (sphinx) { + size = ceil(tscale->kernel->radius*2); } else { assert(tscale->kernel && !tscale->kernel->polar); size = ceil(tscale->kernel->size); - assert(size <= TEXUNIT_VIDEO_NUM); } int radius = size/2; + assert(size <= TEXUNIT_VIDEO_NUM); int surface_now = p->surface_now; int surface_nxt = fbosurface_wrap(surface_now + 1); @@ -1835,7 +1898,7 @@ static void gl_video_interpolate_frame(struct gl_video *p, int fbo, int surface_dst = fbosurface_wrap(p->surface_idx+1); if (surface_dst != surface_bse && p->surfaces[p->surface_idx].pts < new_pts) { MP_STATS(p, "new-pts"); - pass_render_frame(p); + pass_render_frame(p, dst_rect); finish_pass_fbo(p, &p->surfaces[surface_dst].fbotex, vp_w, vp_h, 0, fuzz); p->surfaces[surface_dst].pts = new_pts; @@ -1875,8 +1938,15 @@ static void gl_video_interpolate_frame(struct gl_video *p, int fbo, if (!t || !valid) { // surface_now is guaranteed to be valid, so we can safely use it. pass_load_fbotex(p, &p->surfaces[surface_now].fbotex, 0, vp_w, vp_h); - GLSL(vec4 color = texture(texture0, texcoord0);) p->is_interpolated = false; + // In sphinx mode, the interpolation code should be upscaling, so we + // still have to do that manually + if (sphinx && false) { + // FIXME: resample properly here + //pass_sample(p, 0, 0, p->opts.scaler[0], f, + } else { + GLSL(vec4 color = texture(texture0, texcoord0);) + } } else { double fscale = pts_nxt - pts_now, mix; if (oversample) { @@ -1892,7 +1962,13 @@ static void gl_video_interpolate_frame(struct gl_video *p, int fbo, } else { mix = (next_vsync - pts_now) / fscale; gl_sc_uniform_f(p->sc, "fcoord", mix); - pass_sample_separated_gen(p, tscale, 0, 0); + if (sphinx) { + GLSL(#define sample_pos texcoord0) + GLSL(#define sample_size texture_size0) + pass_sample_sphinx(p, tscale); + } else { + pass_sample_separated_gen(p, tscale, 0, 0); + } } for (int i = 0; i < size; i++) { pass_load_fbotex(p, &p->surfaces[fbosurface_wrap(surface_bse+i)].fbotex, @@ -1940,7 +2016,7 @@ void gl_video_render_frame(struct gl_video *p, int fbo, struct frame_timing *t) gl_video_interpolate_frame(p, fbo, t); } else { // Skip interpolation if there's nothing to be done - pass_render_frame(p); + pass_render_frame(p, p->dst_rect); pass_draw_to_screen(p, fbo); } @@ -2457,7 +2533,9 @@ static const char *handle_scaler_opt(const char *name, bool tscale) { if (name && name[0]) { const struct filter_kernel *kernel = mp_find_filter_kernel(name); - if (kernel && (!tscale || !kernel->polar)) + if (kernel && ((!tscale && !kernel->sphinx) || + (tscale && !kernel->polar) || + (tscale && kernel->sphinx))) return kernel->name; for (const char *const *filter = tscale ? fixed_tscale_filters @@ -2534,7 +2612,9 @@ static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt, mp_info(log, " %s\n", *filter); } for (int n = 0; mp_filter_kernels[n].name; n++) { - if (!tscale || !mp_filter_kernels[n].polar) + if ((!tscale && !mp_filter_kernels[n].sphinx) + || (tscale && !mp_filter_kernels[n].polar) + || (tscale && mp_filter_kernels[n].sphinx)) mp_info(log, " %s\n", mp_filter_kernels[n].name); } if (s[0])