diff --git a/README.md b/README.md index 2c565231..01874207 100644 --- a/README.md +++ b/README.md @@ -164,6 +164,11 @@ identifiers (so they can be freely merged together). - `renderer.h`: A high-level renderer which combines the shader primitives and dispatch mechanism into a fully-fledged rendering pipeline that takes raw texture data and transforms it into the desired output image. +- `utils/frame_queue.h`: A high-level frame queuing abstraction. This API + can be used to interface with a decoder (or other source of frames), and + takes care of translating timestamped frames into a virtual stream of + presentation events suitable for use with `renderer.h`, including any extra + context required for frame interpolation (`pl_frame_mix`). - `utils/upload.h`: A high-level helper for uploading generic data in some user-described format to a plane texture suitable for use with `renderer.h`. These helpers essentially take care of picking/mapping a good image format diff --git a/meson.build b/meson.build index f28b0a2c..231dcafd 100644 --- a/meson.build +++ b/meson.build @@ -2,7 +2,7 @@ project('libplacebo', ['c', 'cpp'], license: 'LGPL2.1+', default_options: ['c_std=c99', 'cpp_std=c++11', 'warning_level=2'], meson_version: '>=0.51', - version: '3.117.0', + version: '3.118.0', ) # Version number diff --git a/src/common.h b/src/common.h index 19433560..f97a9a83 100644 --- a/src/common.h +++ b/src/common.h @@ -69,6 +69,7 @@ #include "include/libplacebo/shaders/lut.h" #include "include/libplacebo/shaders/sampling.h" #include "include/libplacebo/swapchain.h" +#include "include/libplacebo/utils/frame_queue.h" #include "include/libplacebo/utils/upload.h" #ifdef PL_HAVE_LCMS diff --git a/src/include/libplacebo/renderer.h b/src/include/libplacebo/renderer.h index f2a3accc..fd50e7e8 100644 --- a/src/include/libplacebo/renderer.h +++ b/src/include/libplacebo/renderer.h @@ -91,13 +91,15 @@ struct pl_render_params { // Configures the algorithm used for frame mixing (when using // `pl_render_image_mix`). Ignored otherwise. As a special requirement, // this must be a filter config with `polar` set to false, since it's only - // used for 1D mixing and thus only 1D filters are compatible. If left as - // NULL, then libplacebo will use a built-in, inexpensive frame mixing - // algorithm. + // used for 1D mixing and thus only 1D filters are compatible. // - // It's worth pointing out that this built-in frame mixing can often be - // better than any of the available filter configurations. So it's not a - // bad idea to leave this as NULL. In fact, that's the recommended default. + // As a special case, if `frame_mixer->kernel` is NULL, then libplacebo + // will use a built-in, inexpensive and relatively unobtrusive oversampling + // frame mixing algorithm. (See `pl_oversample_frame_mixer`) + // + // If set to NULL, frame mixing is disabled, in which case + // `pl_render_image_mix` behaves as `pl_render_image`, also completely + // bypassing the mixing cache. const struct pl_filter_config *frame_mixer; // Configures the settings used to deband source textures. Leaving this as @@ -252,6 +254,10 @@ extern const struct pl_render_params pl_render_default_params; // and where maximum image quality is desired. extern const struct pl_render_params pl_render_high_quality_params; +// Special filter config for the built-in oversampling frame mixing algorithm. +// This is equivalent to (struct pl_filter_config) {0}. +extern const struct pl_filter_config pl_oversample_frame_mixer; + #define PL_MAX_PLANES 4 // High level description of a single slice of an image. This basically @@ -518,7 +524,10 @@ struct pl_frame_mix { // A list of the frames themselves. The frames can have different // colorspaces, configurations of planes, or even sizes. - const struct pl_frame *frames; + // + // Note: This is a list of pointers, to avoid users having to copy + // around `pl_frame` structs when re-organizing this array. + const struct pl_frame **frames; // A list of unique signatures, one for each frame. These are used to // identify frames across calls to this function, so it's crucial that they @@ -556,7 +565,7 @@ struct pl_frame_mix { float vsync_duration; // Explanation of the frame mixing radius: The algorithm chosen in - // `pl_render_params.frame_mixing` has a canonical radius equal to + // `pl_render_params.frame_mixer` has a canonical radius equal to // `pl_filter_config.kernel->radius`. This means that the frame mixing // algorithm will (only) need to consult all of the frames that have a // distance within the interval [-radius, radius]. As such, the user should @@ -568,6 +577,14 @@ struct pl_frame_mix { // "next" frames. }; +// Helper function to calculate the frame mixing radius. +static inline float pl_frame_mix_radius(const struct pl_render_params *params) +{ + return (params->frame_mixer && params->frame_mixer->kernel) + ? params->frame_mixer->kernel->radius + : 0.0; +} + // Render a mixture of images to the target using the given parameters. This // functions much like a generalization of `pl_render_image`, for when the API // user has more control over the frame queue / vsync loop, and can provide a diff --git a/src/include/libplacebo/utils/frame_queue.h b/src/include/libplacebo/utils/frame_queue.h new file mode 100644 index 00000000..846fec25 --- /dev/null +++ b/src/include/libplacebo/utils/frame_queue.h @@ -0,0 +1,149 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#include + +#ifndef LIBPLACEBO_FRAME_QUEUE_H +#define LIBPLACEBO_FRAME_QUEUE_H + +// This file contains an abstraction layer for automatically turning a +// conceptual stream of (frame, pts) pairs, as emitted by a decoder or filter +// graph, into a `pl_frame_mix` suitable for `pl_render_image_mix`. +// +// This API ensures that minimal work is performed (e.g. only mapping frames +// that are actually required), while also satisfying the requirements +// of any configured frame mixer. + +enum pl_queue_status { + QUEUE_OK, // success + QUEUE_EOF, // no more frames are available + QUEUE_MORE, // more frames needed, but not (yet) available + QUEUE_ERR = -1, // some unknown error occurred while retrieving frames +}; + +struct pl_source_frame { + // The frame's presentation timestamp, in seconds relative to the first + // frame. These must be monotonically increasing for subsequent frames. + // To implement a discontinuous jump, users must explicitly reset the + // frame queue with `pl_queue_reset` and restart from PTS 0.0. + float pts; + + // Abstract frame data itself. To allow mapping frames only when they're + // actually needed, frames use a lazy representation. The provided + // callbacks will be invoked to interface with it. + void *frame_data; + + // This will be called to map the frame to the GPU, only if needed. + // + // `tex` is a pointer to an array of 4 texture objects (or NULL), which + // *may* serve as backing storage for the texture being mapped. These are + // intended to be recreated by `map`, e.g. using `pl_tex_recreate` or + // `pl_upload_plane` as appropriate. They will be managed internally by + // `pl_queue` and destroyed at some unspecified future point in time. + // + // Note: If `map` fails, it will not be retried, nor will `discard` be run. + // The user should clean up state in this case. + bool (*map)(const struct pl_gpu *gpu, const struct pl_tex **tex, + const struct pl_source_frame *src, struct pl_frame *out_frame); + + // If present, this will be called on frames that are done being used by + // `pl_queue`. This may be useful to e.g. unmap textures backed by external + // APIs such as hardware decoders. (Optional) + void (*unmap)(const struct pl_gpu *gpu, struct pl_frame *frame, + const struct pl_source_frame *src); + + // This function will be called for frames that are deemed unnecessary + // (e.g. never became visible) and should instead be cleanly freed. + // (Optional) + void (*discard)(const struct pl_source_frame *src); +}; + +// Create a new, empty frame queue. +// +// It's highly recommended to fully render a single frame with `pts == 0.0`, +// and flush the GPU pipeline with `pl_gpu_finish`, prior to starting the timed +// playback loop. +struct pl_queue *pl_queue_create(const struct pl_gpu *gpu); +void pl_queue_destroy(struct pl_queue **queue); + +// Explicitly clear the queue. This is essentially equivalent to destroying +// and recreating the queue, but preserves any internal memory allocations. +void pl_queue_reset(struct pl_queue *queue); + +// Explicitly push a frame. This is an alternative way to feed the frame queue +// with incoming frames, the other method being the asynchronous callback +// specified as `pl_queue_params.get_frame`. Both methods may be used +// simultaneously, although providing `get_frame` is recommended since it +// avoids the risk of the queue underrunning. +// +// When no more frames are available, call this function with `frame == NULL` +// to indicate EOF and begin draining the frame queue. +void pl_queue_push(struct pl_queue *queue, const struct pl_source_frame *frame); + +struct pl_queue_params { + // The PTS of the frame that will be rendered. This should be set to the + // timestamp (in seconds) of the next vsync, relative to the initial frame. + // + // These must be monotonically increasing. To implement a discontinuous + // jump, users must explicitly reset the frame queue with `pl_queue_reset` + // and restart from PTS 0.0. + float pts; + + // The radius of the configured mixer. This should be set to the value + // as returned by `pl_frame_mix_radius`. + float radius; + + // The estimated duration of a vsync, in seconds. This will only be used as + // a hint, the true value will be estimated by comparing `pts` timestamps + // between calls to `pl_queue_update`. (Optional) + float vsync_duration; + + // The estimated duration of a frame, in seconds. This will only be used as + // an initial hint, the true value will be estimated by comparing `pts` + // timestamps between source frames. (Optional) + float frame_duration; + + // This callback will be used to pull new frames from the decoder. It may + // block if needed. The user is responsible for setting appropriate time + // limits and/or returning and interpreting QUEUE_MORE as sensible. + // + // Providing this callback is entirely optional. Users can instead choose + // to manually feed the frame queue with new frames using `pl_queue_push`. + enum pl_queue_status (*get_frame)(struct pl_source_frame *out_frame, + const struct pl_queue_params *params); + void *priv; +}; + +// Advance the frame queue's internal state to the target timestamp. Any frames +// which are no longer needed (i.e. too far in the past) are automatically +// unmapped and evicted. Any future frames which are needed to fill the queue +// must either have been pushed in advance, or will be requested using the +// provided `get_frame` callback. +// +// This function may fail with QUEUE_MORE, in which case the user must +// ensure more frames are available and then re-run this function with +// the same parameters. +// +// The resulting mix of frames in `out_mix` will represent the neighbourhood of +// the target timestamp, and can be passed to `pl_render_image_mix` as-is. +// +// Note: `out_mix` will only remain valid until the next call to `pl_queue_*`. +enum pl_queue_status pl_queue_update(struct pl_queue *queue, + struct pl_frame_mix *out_mix, + const struct pl_queue_params *params); + +#endif // LIBPLACEBO_FRAME_QUEUE_H diff --git a/src/meson.build b/src/meson.build index e214d7c8..34c384b5 100644 --- a/src/meson.build +++ b/src/meson.build @@ -165,6 +165,7 @@ headers = [ 'swapchain.h', 'utils/dav1d.h', 'utils/dav1d_internal.h', + 'utils/frame_queue.h', 'utils/libav.h', 'utils/libav_internal.h', 'utils/upload.h', @@ -192,6 +193,7 @@ sources = [ 'shaders/sampling.c', 'spirv.c', 'swapchain.c', + 'utils/frame_queue.c', 'utils/upload.c', ] diff --git a/src/pl_alloc.h b/src/pl_alloc.h index bfb6b478..b0c52aa9 100644 --- a/src/pl_alloc.h +++ b/src/pl_alloc.h @@ -145,14 +145,18 @@ void pl_ref_deref(struct pl_ref **ref); } \ } while (0) -#define PL_ARRAY_REMOVE_AT(arr, idx) \ +#define PL_ARRAY_REMOVE_RANGE(arr, idx, count) \ do { \ size_t _idx = (idx); \ - assert(_idx < (arr).num); \ - memmove(&(arr).elem[_idx], &(arr).elem[_idx + 1], \ - (--(arr).num - _idx) * sizeof((arr).elem[0])); \ + size_t _count = (count); \ + assert(_idx + _count <= (arr).num); \ + memmove(&(arr).elem[_idx], &(arr).elem[_idx + _count], \ + ((arr).num - _idx - _count) * sizeof((arr).elem[0])); \ + (arr).num -= _count; \ } while (0) +#define PL_ARRAY_REMOVE_AT(arr, idx) PL_ARRAY_REMOVE_RANGE(arr, idx, 1) + #define PL_ARRAY_INSERT_AT(parent, arr, idx, ...) \ do { \ size_t _idx = (idx); \ diff --git a/src/renderer.c b/src/renderer.c index 08cc9154..e58f96ca 100644 --- a/src/renderer.c +++ b/src/renderer.c @@ -70,6 +70,7 @@ struct pl_renderer { // Frame cache (for frame mixing / interpolation) PL_ARRAY(struct cached_frame) frames; + PL_ARRAY(const struct pl_tex *) frame_fbos; }; enum { @@ -174,6 +175,8 @@ void pl_renderer_destroy(struct pl_renderer **p_rr) pl_tex_destroy(rr->gpu, &rr->fbos.elem[i]); for (int i = 0; i < rr->frames.num; i++) pl_tex_destroy(rr->gpu, &rr->frames.elem[i].tex); + for (int i = 0; i < rr->frame_fbos.num; i++) + pl_tex_destroy(rr->gpu, &rr->frame_fbos.elem[i]); // Free all shader resource objects pl_shader_obj_destroy(&rr->peak_detect_state); @@ -219,7 +222,7 @@ void pl_renderer_flush_cache(struct pl_renderer *rr) const struct pl_render_params pl_render_default_params = { .upscaler = &pl_filter_spline36, .downscaler = &pl_filter_mitchell, - .frame_mixer = NULL, + .frame_mixer = &pl_oversample_frame_mixer, .sigmoid_params = &pl_sigmoid_default_params, .peak_detect_params = &pl_peak_detect_default_params, @@ -230,7 +233,7 @@ const struct pl_render_params pl_render_default_params = { const struct pl_render_params pl_render_high_quality_params = { .upscaler = &pl_filter_ewa_lanczos, .downscaler = &pl_filter_mitchell, - .frame_mixer = NULL, + .frame_mixer = &pl_oversample_frame_mixer, .deband_params = &pl_deband_default_params, .sigmoid_params = &pl_sigmoid_default_params, @@ -239,6 +242,8 @@ const struct pl_render_params pl_render_high_quality_params = { .dither_params = &pl_dither_default_params, }; +const struct pl_filter_config pl_oversample_frame_mixer = {0}; + #define FBOFMT(n) (params->disable_fbos ? NULL : rr->fbofmt[n]) // Represents a "in-flight" image, which is either a shader that's in the @@ -2245,7 +2250,7 @@ bool pl_render_image_mix(struct pl_renderer *rr, const struct pl_frame_mix *imag struct pass_state pass = { .rr = rr, - .image = images->frames[0], + .image = *images->frames[0], .target = *ptarget, }; @@ -2253,33 +2258,17 @@ bool pl_render_image_mix(struct pl_renderer *rr, const struct pl_frame_mix *imag // currently visible on an idealized zero-order-hold display. for (int i = 1; i < images->num_frames; i++) { if (images->timestamps[i] <= 0.0) - pass.image = images->frames[i]; + pass.image = *images->frames[i]; } - if (rr->disable_mixing || !FBOFMT(4)) + if (!params->frame_mixer || rr->disable_mixing || !FBOFMT(4)) goto fallback; if (!pass_infer_state(&pass, false)) return false; - // Round the output rect and clip it to the framebuffer dimensions. This - // will determine the size of the intermediate crop that we actually care - // about rendering. Note that we necessarily drop sub-pixel offsets in the - // target, because these may change from frame to frame - compensating for - // them in the src_rect will result in misalignment. - struct pl_frame *target = &pass.target; - const struct pl_tex *dst_ref = target->planes[pass.dst_ref].texture; - target->crop = (struct pl_rect2df) { - .x0 = roundf(PL_MAX(target->crop.x0, 0.0)), - .y0 = roundf(PL_MAX(target->crop.y0, 0.0)), - .x1 = roundf(PL_MIN(target->crop.x1, dst_ref->params.w)), - .y1 = roundf(PL_MIN(target->crop.y1, dst_ref->params.h)), - }; - - int out_w = fabs(pl_rect_w(target->crop)), - out_h = fabs(pl_rect_h(target->crop)); - out_w = PL_DEF(out_w, dst_ref->params.w); - out_h = PL_DEF(out_h, dst_ref->params.h); + int out_w = abs(pl_rect_w(pass.dst_rect)), + out_h = abs(pl_rect_h(pass.dst_rect)); // The color space to mix the frames in. We arbitrarily choose to use the // "current" frame's color space, but converted to RGB. @@ -2310,7 +2299,7 @@ bool pl_render_image_mix(struct pl_renderer *rr, const struct pl_frame_mix *imag (unsigned long long) sig, pts); float weight; - if (params->frame_mixer) { + if (params->frame_mixer->kernel) { float radius = params->frame_mixer->kernel->radius; if (fabs(pts) >= radius) { @@ -2367,8 +2356,8 @@ bool pl_render_image_mix(struct pl_renderer *rr, const struct pl_frame_mix *imag f = &rr->frames.elem[rr->frames.num++]; *f = (struct cached_frame) { .signature = sig, - .color = images->frames[i].color, - .profile = images->frames[i].profile, + .color = images->frames[i]->color, + .profile = images->frames[i]->profile, }; } @@ -2384,6 +2373,10 @@ bool pl_render_image_mix(struct pl_renderer *rr, const struct pl_frame_mix *imag // If we can't reuse the entry, we need to render to this // texture first PL_TRACE(rr, " -> Cached texture missing or invalid.. (re)creating"); + if (!f->tex) { + if (PL_ARRAY_POP(rr->frame_fbos, &f->tex)) + pl_tex_invalidate(rr->gpu, f->tex); + } bool ok = pl_tex_recreate(rr->gpu, &f->tex, &(struct pl_tex_params) { .w = out_w, .h = out_h, @@ -2404,7 +2397,7 @@ bool pl_render_image_mix(struct pl_renderer *rr, const struct pl_frame_mix *imag // in their native colorspaces. Preserving the original colorspace // avoids precision loss due to unnecessary color space roundtrips. // We also explicitly clear the ICC profile, see below for why. - struct pl_frame image = images->frames[i]; + struct pl_frame image = *images->frames[i]; image.profile = (struct pl_icc_profile) {0}; struct pl_frame inter_target = { @@ -2437,7 +2430,7 @@ bool pl_render_image_mix(struct pl_renderer *rr, const struct pl_frame_mix *imag if (rr->frames.elem[i].evict) { PL_TRACE(rr, "Evicting frame with signature %llx from cache", (unsigned long long) rr->frames.elem[i].signature); - pl_tex_destroy(rr->gpu, &rr->frames.elem[i].tex); + PL_ARRAY_APPEND(rr, rr->frame_fbos, rr->frames.elem[i].tex); PL_ARRAY_REMOVE_AT(rr->frames, i); continue; } else { diff --git a/src/tests/gpu_tests.h b/src/tests/gpu_tests.h index 83d9d15e..aaf87cef 100644 --- a/src/tests/gpu_tests.h +++ b/src/tests/gpu_tests.h @@ -1041,12 +1041,13 @@ static void pl_render_tests(const struct pl_gpu *gpu) // Attempt frame mixing #define NUM_MIX_FRAMES 10 struct pl_frame frames[NUM_MIX_FRAMES]; + const struct pl_frame *pframes[NUM_MIX_FRAMES]; uint64_t signatures[NUM_MIX_FRAMES]; float timestamps[NUM_MIX_FRAMES]; struct pl_frame_mix mix = { .num_frames = NUM_MIX_FRAMES, - .frames = frames, + .frames = pframes, .signatures = signatures, .timestamps = timestamps, .vsync_duration = 24.0 / 60.0, @@ -1063,6 +1064,7 @@ static void pl_render_tests(const struct pl_gpu *gpu) }, }; + pframes[i] = &frames[i]; signatures[i] = i; timestamps[i] = i; } diff --git a/src/utils/frame_queue.c b/src/utils/frame_queue.c new file mode 100644 index 00000000..8d8b526a --- /dev/null +++ b/src/utils/frame_queue.c @@ -0,0 +1,560 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#include + +#include "common.h" +#include "context.h" + +struct cache_entry { + const struct pl_tex *tex[4]; +}; + +struct entry { + struct cache_entry cache; + struct pl_source_frame src; + struct pl_frame frame; + uint64_t signature; + bool mapped; + bool ok; +}; + +// Hard limits for vsync timing validity +#define MIN_FPS 10 +#define MAX_FPS 200 + +// Limits for FPS estimation state +#define MAX_SAMPLES 32 +#define MIN_SAMPLES 8 + +struct pool { + float samples[MAX_SAMPLES]; + float estimate; + float sum; + int idx; + int num; + int total; +}; + +struct pl_queue { + const struct pl_gpu *gpu; + struct pl_context *ctx; + + // Frame queue and state + PL_ARRAY(struct entry) queue; + uint64_t signature; + bool eof; + + // Average vsync/frame fps estimation state + struct pool vps, fps; + float reported_vps; + float reported_fps; + float prev_pts; + + // Storage for temporary arrays + PL_ARRAY(uint64_t) tmp_sig; + PL_ARRAY(float) tmp_ts; + PL_ARRAY(const struct pl_frame *) tmp_frame; + + // Queue of GPU objects to reuse + PL_ARRAY(struct cache_entry) cache; +}; + +struct pl_queue *pl_queue_create(const struct pl_gpu *gpu) +{ + struct pl_queue *p = pl_alloc_ptr(NULL, p); + *p = (struct pl_queue) { + .gpu = gpu, + .ctx = gpu->ctx, + }; + + return p; +} + +static inline void unmap_frame(struct pl_queue *p, struct entry *entry) +{ + if (!entry->mapped && entry->src.discard) { + PL_TRACE(p, "Discarding unused frame with PTS %f", entry->src.pts); + entry->src.discard(&entry->src); + } + + if (entry->mapped && entry->ok && entry->src.unmap) { + PL_TRACE(p, "Unmapping frame with PTS %f", entry->src.pts); + entry->src.unmap(p->gpu, &entry->frame, &entry->src); + } +} + + +void pl_queue_destroy(struct pl_queue **queue) +{ + struct pl_queue *p = *queue; + if (!p) + return; + + for (int n = 0; n < p->queue.num; n++) { + struct entry *entry = &p->queue.elem[n]; + unmap_frame(p, entry); + for (int i = 0; i < PL_ARRAY_SIZE(entry->cache.tex); i++) + pl_tex_destroy(p->gpu, &entry->cache.tex[i]); + } + + for (int n = 0; n < p->cache.num; n++) { + for (int i = 0; i < PL_ARRAY_SIZE(p->cache.elem[n].tex); i++) + pl_tex_destroy(p->gpu, &p->cache.elem[n].tex[i]); + } + + pl_free(p); + *queue = NULL; +} + +static inline void cull_entry(struct pl_queue *p, struct entry *entry) +{ + unmap_frame(p, entry); + + // Recycle non-empty texture cache entries + static const struct cache_entry null_cache = {0}; + if (memcmp(&entry->cache, &null_cache, sizeof(null_cache)) != 0) { + for (int i = 0; i < PL_ARRAY_SIZE(entry->cache.tex); i++) { + if (entry->cache.tex[i]) + pl_tex_invalidate(p->gpu, entry->cache.tex[i]); + } + PL_ARRAY_APPEND(p, p->cache, entry->cache); + } +} + +void pl_queue_reset(struct pl_queue *p) +{ + for (int i = 0; i < p->queue.num; i++) + cull_entry(p, &p->queue.elem[i]); + + *p = (struct pl_queue) { + .gpu = p->gpu, + .ctx = p->ctx, + + // Explicitly preserve allocations + .queue.elem = p->queue.elem, + .tmp_sig.elem = p->tmp_sig.elem, + .tmp_ts.elem = p->tmp_ts.elem, + .tmp_frame.elem = p->tmp_frame.elem, + + // Reuse GPU object cache entirely + .cache = p->cache, + }; +} + +static inline float delta(float old, float new) +{ + return fabs((new - old) / PL_MIN(new, old)); +} + +static inline void update_estimate(struct pool *pool, float cur) +{ + if (pool->num) { + static const float max_delta = 0.3; + if (delta(pool->sum / pool->num, cur) > max_delta) { + pool->sum = 0.0; + pool->num = pool->idx = 0; + } + } + + if (pool->num++ == MAX_SAMPLES) { + pool->sum -= pool->samples[pool->idx]; + pool->num--; + } + + pool->sum += pool->samples[pool->idx] = cur; + pool->idx = (pool->idx + 1) % MAX_SAMPLES; + pool->total++; + + if (pool->total < MIN_SAMPLES || pool->num >= MIN_SAMPLES) + pool->estimate = pool->sum / pool->num; +} + +void pl_queue_push(struct pl_queue *p, const struct pl_source_frame *src) +{ + if (!src) { + PL_TRACE(p, "Received EOF, draining frame queue..."); + p->eof = true; + return; + } + + PL_TRACE(p, "Received new frame with PTS %f", src->pts); + + struct cache_entry cache = {0}; + PL_ARRAY_POP(p->cache, &cache); + PL_ARRAY_APPEND(p, p->queue, (struct entry) { + .signature = p->signature++, + .cache = cache, + .src = *src, + }); + + if (p->queue.num > 1) { + float prev_pts = p->queue.elem[p->queue.num - 2].src.pts; + float delta = src->pts - prev_pts; + if ((p->fps.estimate && delta > 10.0 * p->fps.estimate) || delta < 0.0) { + // Ignore very large discontinuities or backwards jumps in PTS + PL_TRACE(p, "Discontinuous source PTS jump %f -> %f, ignoring...", + prev_pts, src->pts); + } else { + update_estimate(&p->fps, delta); + } + } +} + +static void report_estimates(struct pl_queue *p) +{ + if (p->fps.total >= MIN_SAMPLES && p->vps.total >= MIN_SAMPLES) { + if (p->reported_fps && p->reported_vps) { + // Only re-eport the estimates if they've changed considerably from + // the previously reported values + static const float report_delta = 0.3; + float delta_fps = delta(p->reported_fps, p->fps.estimate); + float delta_vps = delta(p->reported_vps, p->vps.estimate); + if (delta_fps < report_delta && delta_vps < report_delta) + return; + } + + PL_INFO(p, "Estimated source FPS: %.3f, display FPS: %.3f", + 1.0 / p->fps.estimate, 1.0 / p->vps.estimate); + + p->reported_fps = p->fps.estimate; + p->reported_vps = p->vps.estimate; + } +} + +static enum pl_queue_status get_frame(struct pl_queue *p, + const struct pl_queue_params *params) +{ + if (p->eof) + return QUEUE_EOF; + + if (!params->get_frame) + return QUEUE_MORE; + + struct pl_source_frame src; + enum pl_queue_status ret; + switch ((ret = params->get_frame(&src, params))) { + case QUEUE_OK: + pl_queue_push(p, &src); + break; + case QUEUE_EOF: + pl_queue_push(p, NULL); + break; + default: break; + } + + return ret; +} + +static bool map_frame(struct pl_queue *p, struct entry *entry) +{ + if (!entry->mapped) { + PL_TRACE(p, "Mapping frame with PTS %f", entry->src.pts); + entry->mapped = true; + entry->ok = entry->src.map(p->gpu, entry->cache.tex, + &entry->src, &entry->frame); + } + + return entry->ok; +} + +// Advance the queue as needed to make sure idx 0 is the last frame before +// `pts`, and idx 1 is the first frame after `pts` (unless this is the last). +// +// Returns QUEUE_OK only if idx 0 is still legal under ZOH semantics. +static enum pl_queue_status advance(struct pl_queue *p, float pts, + const struct pl_queue_params *params) +{ + // Cull all frames except the last frame before `pts` + int culled = 0; + for (int i = 1; i < p->queue.num; i++) { + if (p->queue.elem[i].src.pts <= pts) { + cull_entry(p, &p->queue.elem[i - 1]); + culled++; + } + } + PL_ARRAY_REMOVE_RANGE(p->queue, 0, culled); + + // Keep adding new frames until we find one in the future, or EOF + while (p->queue.num < 2) { + enum pl_queue_status ret; + switch ((ret = get_frame(p, params))) { + case QUEUE_ERR: + case QUEUE_MORE: + return ret; + case QUEUE_EOF: + if (!p->queue.num) + return ret; + goto done; + case QUEUE_OK: + if (p->queue.num > 1 && p->queue.elem[1].src.pts <= pts) { + cull_entry(p, &p->queue.elem[0]); + PL_ARRAY_REMOVE_AT(p->queue, 0); + } + continue; + } + } + +done: + if (p->eof && p->queue.num == 1 && p->fps.estimate) { + // Last frame is held for an extra `p->fps.estimate` duration, + // afterwards this function just returns EOF. + // + // Note that if `p->fps.estimate` is not available, then we're + // displaying a source that only has a single frame, in which case we + // most likely just want to repeat it forever. (Not a perfect + // heuristic, but w/e) + if (p->queue.elem[0].src.pts + p->fps.estimate < pts) { + cull_entry(p, &p->queue.elem[0]); + p->queue.num = 0; + return QUEUE_EOF; + } + } + + pl_assert(p->queue.num); + return QUEUE_OK; +} + +// Present a single frame as appropriate for `pts` +static enum pl_queue_status nearest(struct pl_queue *p, struct pl_frame_mix *mix, + const struct pl_queue_params *params) +{ + enum pl_queue_status ret; + if ((ret = advance(p, params->pts, params))) + return ret; + + struct entry *entry = &p->queue.elem[0]; + if (!map_frame(p, entry)) + return QUEUE_ERR; + + // Return a mix containing only this single frame + p->tmp_sig.num = p->tmp_ts.num = p->tmp_frame.num = 0; + PL_ARRAY_APPEND(p, p->tmp_sig, entry->signature); + PL_ARRAY_APPEND(p, p->tmp_frame, &entry->frame); + PL_ARRAY_APPEND(p, p->tmp_ts, 0.0); + *mix = (struct pl_frame_mix) { + .num_frames = 1, + .frames = p->tmp_frame.elem, + .signatures = p->tmp_sig.elem, + .timestamps = p->tmp_ts.elem, + .vsync_duration = 1.0, + }; + + PL_TRACE(p, "Showing single frame with PTS %f for target PTS %f", + entry->src.pts, params->pts); + + report_estimates(p); + return QUEUE_OK; +} + +// Special case of `interpolate` for radius = 0, in which case we need exactly +// the previous frame and the following frame +static enum pl_queue_status oversample(struct pl_queue *p, struct pl_frame_mix *mix, + const struct pl_queue_params *params) +{ + enum pl_queue_status ret; + if ((ret = advance(p, params->pts, params))) + return ret; + + // Can't oversample with only a single frame, fall back to ZOH semantics + if (p->queue.num < 2) + return nearest(p, mix, params); + + struct entry *entries[2] = { &p->queue.elem[0], &p->queue.elem[1] }; + pl_assert(entries[0]->src.pts <= params->pts); + pl_assert(entries[1]->src.pts >= params->pts); + + // Returning a mix containing both of these two frames + p->tmp_sig.num = p->tmp_ts.num = p->tmp_frame.num = 0; + for (int i = 0; i < 2; i++) { + if (!map_frame(p, entries[i])) + return QUEUE_ERR; + + float ts = (entries[i]->src.pts - params->pts) / p->fps.estimate; + PL_ARRAY_APPEND(p, p->tmp_sig, entries[i]->signature); + PL_ARRAY_APPEND(p, p->tmp_frame, &entries[i]->frame); + PL_ARRAY_APPEND(p, p->tmp_ts, ts); + } + + *mix = (struct pl_frame_mix) { + .num_frames = 2, + .frames = p->tmp_frame.elem, + .signatures = p->tmp_sig.elem, + .timestamps = p->tmp_ts.elem, + .vsync_duration = p->vps.estimate / p->fps.estimate, + }; + + PL_TRACE(p, "Oversampling 2 frames for target PTS %f:", params->pts); + for (int i = 0; i < mix->num_frames; i++) + PL_TRACE(p, " id %"PRIu64" ts %f", mix->signatures[i], mix->timestamps[i]); + + report_estimates(p); + return QUEUE_OK; +} + +// Present a mixture of frames, relative to the vsync ratio +static enum pl_queue_status interpolate(struct pl_queue *p, + struct pl_frame_mix *mix, + const struct pl_queue_params *params) +{ + // No FPS estimate available, possibly source contains only a single file, + // or this is the first frame to be rendered. Fall back to ZOH semantics. + if (!p->fps.estimate) + return nearest(p, mix, params); + + // No radius information, special case in which we only need the previous + // and next frames. + if (!params->radius) + return oversample(p, mix, params); + + float min_pts = params->pts - params->radius * p->fps.estimate, + max_pts = params->pts + params->radius * p->fps.estimate; + + enum pl_queue_status ret; + if ((ret = advance(p, min_pts, params))) + return ret; + + // Keep adding new frames until we've covered the range we care about + pl_assert(p->queue.num); + while (p->queue.elem[p->queue.num - 1].src.pts < max_pts) { + switch ((ret = get_frame(p, params))) { + case QUEUE_ERR: + case QUEUE_MORE: + return ret; + case QUEUE_EOF: + goto done; + case QUEUE_OK: + continue; + } + } + +done: ; + + // Construct a mix object representing the current queue state, starting at + // the last frame before `min_pts` to make sure there's a fallback frame + // available for ZOH semantics. + p->tmp_sig.num = p->tmp_ts.num = p->tmp_frame.num = 0; + for (int i = 0; i < p->queue.num; i++) { + struct entry *entry = &p->queue.elem[i]; + if (entry->src.pts > max_pts) + break; + if (!map_frame(p, entry)) + return QUEUE_ERR; + + float ts = (entry->src.pts - params->pts) / p->fps.estimate; + PL_ARRAY_APPEND(p, p->tmp_sig, entry->signature); + PL_ARRAY_APPEND(p, p->tmp_frame, &entry->frame); + PL_ARRAY_APPEND(p, p->tmp_ts, ts); + } + + *mix = (struct pl_frame_mix) { + .num_frames = p->tmp_frame.num, + .frames = p->tmp_frame.elem, + .signatures = p->tmp_sig.elem, + .timestamps = p->tmp_ts.elem, + .vsync_duration = p->vps.estimate / p->fps.estimate, + }; + + pl_assert(mix->num_frames); + PL_TRACE(p, "Showing mix of %d frames for target PTS %f:", + mix->num_frames, params->pts); + for (int i = 0; i < mix->num_frames; i++) + PL_TRACE(p, " id %"PRIu64" ts %f", mix->signatures[i], mix->timestamps[i]); + + report_estimates(p); + return QUEUE_OK; +} + +static bool prefill(struct pl_queue *p, const struct pl_queue_params *params) +{ + int min_frames = 2 * ceilf(params->radius); + min_frames = PL_MAX(min_frames, 2); + + while (p->queue.num < min_frames) { + switch (get_frame(p, params)) { + case QUEUE_ERR: + return false; + case QUEUE_EOF: + case QUEUE_MORE: + return true; + case QUEUE_OK: + continue; + } + } + + // In the most likely case, the first few frames will all be required. So + // force-map them all to initialize GPU state on initial rendering. This is + // better than the alternative of missing the cache later, when timing is + // more relevant. + for (int i = 0; i < min_frames; i++) { + if (!map_frame(p, &p->queue.elem[i])) + return false; + } + + return true; +} + +enum pl_queue_status pl_queue_update(struct pl_queue *p, + struct pl_frame_mix *out_mix, + const struct pl_queue_params *params) +{ + p->fps.estimate = PL_DEF(p->fps.estimate, params->frame_duration); + p->vps.estimate = PL_DEF(p->vps.estimate, params->vsync_duration); + + float delta = params->pts - p->prev_pts; + if (delta < 0.0) { + + PL_ERR(p, "Requested PTS %f is lower than the previously rendered " + "PTS %f. This is not supported, PTS must be monotonically " + "increasing! Please use `pl_queue_reset` to reset the frame " + "queue on discontinuous PTS jumps.", params->pts, p->prev_pts); + return QUEUE_ERR; + + } else if (delta > 1.0) { + + // A jump of more than a second is probably the result of a + // discontinuous jump after a suspend. To prevent this from exploding + // the FPS estimate, treat this as a new frame. + PL_TRACE(p, "Discontinuous target PTS jump %f -> %f, ignoring...", + p->prev_pts, params->pts); + + } else if (delta > 0) { + + update_estimate(&p->vps, params->pts - p->prev_pts); + + } + + p->prev_pts = params->pts; + + // As a special case, prefill the queue if this is the first frame + if (!params->pts && !p->queue.num) { + if (!prefill(p, params)) + return QUEUE_ERR; + } + + // Ignore unrealistically high or low FPS, common near start of playback + static const float max_vsync = 1.0 / MIN_FPS; + static const float min_vsync = 1.0 / MAX_FPS; + if (p->vps.estimate > min_vsync && p->vps.estimate < max_vsync) { + // We know the vsync duration, so construct an interpolation mix + return interpolate(p, out_mix, params); + } else { + // We don't know the vsync duration (yet), so just point-sample the + // nearest (zero-order-hold) frame + return nearest(p, out_mix, params); + } +}