From 0e5944078199500bb17648131871723501d0fa22 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.xyz>
Date: Mon, 22 Mar 2021 22:35:10 +0100
Subject: [PATCH] utils/frame_queue: add new interpolation helper

The API for `pl_frame_mix` is currently a bit abstract and high-level.
Most clients will probably not care to reimplement the massive amounts
of logic that go into constructing a proper frame queue.

Enter `pl_queue`, the solution for all your frame queuing needs! This
will interface with the decoder via your choice of a push or pull API,
and translate all decoded frames into an abstract internal timeline,
`pl_frame_mix` slices of which can be probed for arbitrary timestamps.

As an extra bonus, this also contains code for things like FPS
estimation and VPS (vsyncs per second) estimation, somewhat mirroring
the logic in e.g. mpv's interpolation/display-sync code.

It's worth pointing out that this is a rather severe API break for
`pl_frame_mix`, but I'm pretty sure this API has absolutely zero users,
given the massive amount of effort required to use it before this
commit. So I didn't bother with backwards compatibility, aside from an
API bump as usualy.
---
 README.md                                  |   5 +
 meson.build                                |   2 +-
 src/common.h                               |   1 +
 src/include/libplacebo/renderer.h          |  33 +-
 src/include/libplacebo/utils/frame_queue.h | 149 ++++++
 src/meson.build                            |   2 +
 src/pl_alloc.h                             |  12 +-
 src/renderer.c                             |  49 +-
 src/tests/gpu_tests.h                      |   4 +-
 src/utils/frame_queue.c                    | 560 +++++++++++++++++++++
 10 files changed, 775 insertions(+), 42 deletions(-)
 create mode 100644 src/include/libplacebo/utils/frame_queue.h
 create mode 100644 src/utils/frame_queue.c

diff --git a/README.md b/README.md
index 2c565231..01874207 100644
--- a/README.md
+++ b/README.md
@@ -164,6 +164,11 @@ identifiers (so they can be freely merged together).
 - `renderer.h`: A high-level renderer which combines the shader primitives
   and dispatch mechanism into a fully-fledged rendering pipeline that takes
   raw texture data and transforms it into the desired output image.
+- `utils/frame_queue.h`: A high-level frame queuing abstraction. This API
+  can be used to interface with a decoder (or other source of frames), and
+  takes care of translating timestamped frames into a virtual stream of
+  presentation events suitable for use with `renderer.h`, including any extra
+  context required for frame interpolation (`pl_frame_mix`).
 - `utils/upload.h`: A high-level helper for uploading generic data in some
   user-described format to a plane texture suitable for use with `renderer.h`.
   These helpers essentially take care of picking/mapping a good image format
diff --git a/meson.build b/meson.build
index f28b0a2c..231dcafd 100644
--- a/meson.build
+++ b/meson.build
@@ -2,7 +2,7 @@ project('libplacebo', ['c', 'cpp'],
   license: 'LGPL2.1+',
   default_options: ['c_std=c99', 'cpp_std=c++11', 'warning_level=2'],
   meson_version: '>=0.51',
-  version: '3.117.0',
+  version: '3.118.0',
 )
 
 # Version number
diff --git a/src/common.h b/src/common.h
index 19433560..f97a9a83 100644
--- a/src/common.h
+++ b/src/common.h
@@ -69,6 +69,7 @@
 #include "include/libplacebo/shaders/lut.h"
 #include "include/libplacebo/shaders/sampling.h"
 #include "include/libplacebo/swapchain.h"
+#include "include/libplacebo/utils/frame_queue.h"
 #include "include/libplacebo/utils/upload.h"
 
 #ifdef PL_HAVE_LCMS
diff --git a/src/include/libplacebo/renderer.h b/src/include/libplacebo/renderer.h
index f2a3accc..fd50e7e8 100644
--- a/src/include/libplacebo/renderer.h
+++ b/src/include/libplacebo/renderer.h
@@ -91,13 +91,15 @@ struct pl_render_params {
     // Configures the algorithm used for frame mixing (when using
     // `pl_render_image_mix`). Ignored otherwise. As a special requirement,
     // this must be a filter config with `polar` set to false, since it's only
-    // used for 1D mixing and thus only 1D filters are compatible. If left as
-    // NULL, then libplacebo will use a built-in, inexpensive frame mixing
-    // algorithm.
+    // used for 1D mixing and thus only 1D filters are compatible.
     //
-    // It's worth pointing out that this built-in frame mixing can often be
-    // better than any of the available filter configurations. So it's not a
-    // bad idea to leave this as NULL. In fact, that's the recommended default.
+    // As a special case, if `frame_mixer->kernel` is NULL, then libplacebo
+    // will use a built-in, inexpensive and relatively unobtrusive oversampling
+    // frame mixing algorithm. (See `pl_oversample_frame_mixer`)
+    //
+    // If set to NULL, frame mixing is disabled, in which case
+    // `pl_render_image_mix` behaves as `pl_render_image`, also completely
+    // bypassing the mixing cache.
     const struct pl_filter_config *frame_mixer;
 
     // Configures the settings used to deband source textures. Leaving this as
@@ -252,6 +254,10 @@ extern const struct pl_render_params pl_render_default_params;
 // and where maximum image quality is desired.
 extern const struct pl_render_params pl_render_high_quality_params;
 
+// Special filter config for the built-in oversampling frame mixing algorithm.
+// This is equivalent to (struct pl_filter_config) {0}.
+extern const struct pl_filter_config pl_oversample_frame_mixer;
+
 #define PL_MAX_PLANES 4
 
 // High level description of a single slice of an image. This basically
@@ -518,7 +524,10 @@ struct pl_frame_mix {
 
     // A list of the frames themselves. The frames can have different
     // colorspaces, configurations of planes, or even sizes.
-    const struct pl_frame *frames;
+    //
+    // Note: This is a list of pointers, to avoid users having to copy
+    // around `pl_frame` structs when re-organizing this array.
+    const struct pl_frame **frames;
 
     // A list of unique signatures, one for each frame. These are used to
     // identify frames across calls to this function, so it's crucial that they
@@ -556,7 +565,7 @@ struct pl_frame_mix {
     float vsync_duration;
 
     // Explanation of the frame mixing radius: The algorithm chosen in
-    // `pl_render_params.frame_mixing` has a canonical radius equal to
+    // `pl_render_params.frame_mixer` has a canonical radius equal to
     // `pl_filter_config.kernel->radius`. This means that the frame mixing
     // algorithm will (only) need to consult all of the frames that have a
     // distance within the interval [-radius, radius]. As such, the user should
@@ -568,6 +577,14 @@ struct pl_frame_mix {
     // "next" frames.
 };
 
+// Helper function to calculate the frame mixing radius.
+static inline float pl_frame_mix_radius(const struct pl_render_params *params)
+{
+    return (params->frame_mixer && params->frame_mixer->kernel)
+        ? params->frame_mixer->kernel->radius
+        : 0.0;
+}
+
 // Render a mixture of images to the target using the given parameters. This
 // functions much like a generalization of `pl_render_image`, for when the API
 // user has more control over the frame queue / vsync loop, and can provide a
diff --git a/src/include/libplacebo/utils/frame_queue.h b/src/include/libplacebo/utils/frame_queue.h
new file mode 100644
index 00000000..846fec25
--- /dev/null
+++ b/src/include/libplacebo/utils/frame_queue.h
@@ -0,0 +1,149 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <libplacebo/renderer.h>
+
+#ifndef LIBPLACEBO_FRAME_QUEUE_H
+#define LIBPLACEBO_FRAME_QUEUE_H
+
+// This file contains an abstraction layer for automatically turning a
+// conceptual stream of (frame, pts) pairs, as emitted by a decoder or filter
+// graph, into a `pl_frame_mix` suitable for `pl_render_image_mix`.
+//
+// This API ensures that minimal work is performed (e.g. only mapping frames
+// that are actually required), while also satisfying the requirements
+// of any configured frame mixer.
+
+enum pl_queue_status {
+    QUEUE_OK,       // success
+    QUEUE_EOF,      // no more frames are available
+    QUEUE_MORE,     // more frames needed, but not (yet) available
+    QUEUE_ERR = -1, // some unknown error occurred while retrieving frames
+};
+
+struct pl_source_frame {
+    // The frame's presentation timestamp, in seconds relative to the first
+    // frame. These must be monotonically increasing for subsequent frames.
+    // To implement a discontinuous jump, users must explicitly reset the
+    // frame queue with `pl_queue_reset` and restart from PTS 0.0.
+    float pts;
+
+    // Abstract frame data itself. To allow mapping frames only when they're
+    // actually needed, frames use a lazy representation. The provided
+    // callbacks will be invoked to interface with it.
+    void *frame_data;
+
+    // This will be called to map the frame to the GPU, only if needed.
+    //
+    // `tex` is a pointer to an array of 4 texture objects (or NULL), which
+    // *may* serve as backing storage for the texture being mapped. These are
+    // intended to be recreated by `map`, e.g. using `pl_tex_recreate` or
+    // `pl_upload_plane` as appropriate. They will be managed internally by
+    // `pl_queue` and destroyed at some unspecified future point in time.
+    //
+    // Note: If `map` fails, it will not be retried, nor will `discard` be run.
+    // The user should clean up state in this case.
+    bool (*map)(const struct pl_gpu *gpu, const struct pl_tex **tex,
+                const struct pl_source_frame *src, struct pl_frame *out_frame);
+
+    // If present, this will be called on frames that are done being used by
+    // `pl_queue`. This may be useful to e.g. unmap textures backed by external
+    // APIs such as hardware decoders. (Optional)
+    void (*unmap)(const struct pl_gpu *gpu, struct pl_frame *frame,
+                  const struct pl_source_frame *src);
+
+    // This function will be called for frames that are deemed unnecessary
+    // (e.g. never became visible) and should instead be cleanly freed.
+    // (Optional)
+    void (*discard)(const struct pl_source_frame *src);
+};
+
+// Create a new, empty frame queue.
+//
+// It's highly recommended to fully render a single frame with `pts == 0.0`,
+// and flush the GPU pipeline with `pl_gpu_finish`, prior to starting the timed
+// playback loop.
+struct pl_queue *pl_queue_create(const struct pl_gpu *gpu);
+void pl_queue_destroy(struct pl_queue **queue);
+
+// Explicitly clear the queue. This is essentially equivalent to destroying
+// and recreating the queue, but preserves any internal memory allocations.
+void pl_queue_reset(struct pl_queue *queue);
+
+// Explicitly push a frame. This is an alternative way to feed the frame queue
+// with incoming frames, the other method being the asynchronous callback
+// specified as `pl_queue_params.get_frame`. Both methods may be used
+// simultaneously, although providing `get_frame` is recommended since it
+// avoids the risk of the queue underrunning.
+//
+// When no more frames are available, call this function with `frame == NULL`
+// to indicate EOF and begin draining the frame queue.
+void pl_queue_push(struct pl_queue *queue, const struct pl_source_frame *frame);
+
+struct pl_queue_params {
+    // The PTS of the frame that will be rendered. This should be set to the
+    // timestamp (in seconds) of the next vsync, relative to the initial frame.
+    //
+    // These must be monotonically increasing. To implement a discontinuous
+    // jump, users must explicitly reset the frame queue with `pl_queue_reset`
+    // and restart from PTS 0.0.
+    float pts;
+
+    // The radius of the configured mixer. This should be set to the value
+    // as returned by `pl_frame_mix_radius`.
+    float radius;
+
+    // The estimated duration of a vsync, in seconds. This will only be used as
+    // a hint, the true value will be estimated by comparing `pts` timestamps
+    // between calls to `pl_queue_update`. (Optional)
+    float vsync_duration;
+
+    // The estimated duration of a frame, in seconds. This will only be used as
+    // an initial hint, the true value will be estimated by comparing `pts`
+    // timestamps between source frames. (Optional)
+    float frame_duration;
+
+    // This callback will be used to pull new frames from the decoder. It may
+    // block if needed. The user is responsible for setting appropriate time
+    // limits and/or returning and interpreting QUEUE_MORE as sensible.
+    //
+    // Providing this callback is entirely optional. Users can instead choose
+    // to manually feed the frame queue with new frames using `pl_queue_push`.
+    enum pl_queue_status (*get_frame)(struct pl_source_frame *out_frame,
+                                      const struct pl_queue_params *params);
+    void *priv;
+};
+
+// Advance the frame queue's internal state to the target timestamp. Any frames
+// which are no longer needed (i.e. too far in the past) are automatically
+// unmapped and evicted. Any future frames which are needed to fill the queue
+// must either have been pushed in advance, or will be requested using the
+// provided `get_frame` callback.
+//
+// This function may fail with QUEUE_MORE, in which case the user must
+// ensure more frames are available and then re-run this function with
+// the same parameters.
+//
+// The resulting mix of frames in `out_mix` will represent the neighbourhood of
+// the target timestamp, and can be passed to `pl_render_image_mix` as-is.
+//
+// Note: `out_mix` will only remain valid until the next call to `pl_queue_*`.
+enum pl_queue_status pl_queue_update(struct pl_queue *queue,
+                                     struct pl_frame_mix *out_mix,
+                                     const struct pl_queue_params *params);
+
+#endif // LIBPLACEBO_FRAME_QUEUE_H
diff --git a/src/meson.build b/src/meson.build
index e214d7c8..34c384b5 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -165,6 +165,7 @@ headers = [
   'swapchain.h',
   'utils/dav1d.h',
   'utils/dav1d_internal.h',
+  'utils/frame_queue.h',
   'utils/libav.h',
   'utils/libav_internal.h',
   'utils/upload.h',
@@ -192,6 +193,7 @@ sources = [
   'shaders/sampling.c',
   'spirv.c',
   'swapchain.c',
+  'utils/frame_queue.c',
   'utils/upload.c',
 ]
 
diff --git a/src/pl_alloc.h b/src/pl_alloc.h
index bfb6b478..b0c52aa9 100644
--- a/src/pl_alloc.h
+++ b/src/pl_alloc.h
@@ -145,14 +145,18 @@ void pl_ref_deref(struct pl_ref **ref);
         }                                                                       \
     } while (0)
 
-#define PL_ARRAY_REMOVE_AT(arr, idx)                                            \
+#define PL_ARRAY_REMOVE_RANGE(arr, idx, count)                                  \
     do {                                                                        \
         size_t _idx = (idx);                                                    \
-        assert(_idx < (arr).num);                                               \
-        memmove(&(arr).elem[_idx], &(arr).elem[_idx + 1],                       \
-                (--(arr).num - _idx) * sizeof((arr).elem[0]));                  \
+        size_t _count = (count);                                                \
+        assert(_idx + _count <= (arr).num);                                     \
+        memmove(&(arr).elem[_idx], &(arr).elem[_idx + _count],                  \
+                ((arr).num - _idx - _count) * sizeof((arr).elem[0]));           \
+        (arr).num -= _count;                                                    \
     } while (0)
 
+#define PL_ARRAY_REMOVE_AT(arr, idx) PL_ARRAY_REMOVE_RANGE(arr, idx, 1)
+
 #define PL_ARRAY_INSERT_AT(parent, arr, idx, ...)                               \
     do {                                                                        \
         size_t _idx = (idx);                                                    \
diff --git a/src/renderer.c b/src/renderer.c
index 08cc9154..e58f96ca 100644
--- a/src/renderer.c
+++ b/src/renderer.c
@@ -70,6 +70,7 @@ struct pl_renderer {
 
     // Frame cache (for frame mixing / interpolation)
     PL_ARRAY(struct cached_frame) frames;
+    PL_ARRAY(const struct pl_tex *) frame_fbos;
 };
 
 enum {
@@ -174,6 +175,8 @@ void pl_renderer_destroy(struct pl_renderer **p_rr)
         pl_tex_destroy(rr->gpu, &rr->fbos.elem[i]);
     for (int i = 0; i < rr->frames.num; i++)
         pl_tex_destroy(rr->gpu, &rr->frames.elem[i].tex);
+    for (int i = 0; i < rr->frame_fbos.num; i++)
+        pl_tex_destroy(rr->gpu, &rr->frame_fbos.elem[i]);
 
     // Free all shader resource objects
     pl_shader_obj_destroy(&rr->peak_detect_state);
@@ -219,7 +222,7 @@ void pl_renderer_flush_cache(struct pl_renderer *rr)
 const struct pl_render_params pl_render_default_params = {
     .upscaler           = &pl_filter_spline36,
     .downscaler         = &pl_filter_mitchell,
-    .frame_mixer        = NULL,
+    .frame_mixer        = &pl_oversample_frame_mixer,
 
     .sigmoid_params     = &pl_sigmoid_default_params,
     .peak_detect_params = &pl_peak_detect_default_params,
@@ -230,7 +233,7 @@ const struct pl_render_params pl_render_default_params = {
 const struct pl_render_params pl_render_high_quality_params = {
     .upscaler           = &pl_filter_ewa_lanczos,
     .downscaler         = &pl_filter_mitchell,
-    .frame_mixer        = NULL,
+    .frame_mixer        = &pl_oversample_frame_mixer,
 
     .deband_params      = &pl_deband_default_params,
     .sigmoid_params     = &pl_sigmoid_default_params,
@@ -239,6 +242,8 @@ const struct pl_render_params pl_render_high_quality_params = {
     .dither_params      = &pl_dither_default_params,
 };
 
+const struct pl_filter_config pl_oversample_frame_mixer = {0};
+
 #define FBOFMT(n) (params->disable_fbos ? NULL : rr->fbofmt[n])
 
 // Represents a "in-flight" image, which is either a shader that's in the
@@ -2245,7 +2250,7 @@ bool pl_render_image_mix(struct pl_renderer *rr, const struct pl_frame_mix *imag
 
     struct pass_state pass = {
         .rr = rr,
-        .image = images->frames[0],
+        .image = *images->frames[0],
         .target = *ptarget,
     };
 
@@ -2253,33 +2258,17 @@ bool pl_render_image_mix(struct pl_renderer *rr, const struct pl_frame_mix *imag
     // currently visible on an idealized zero-order-hold display.
     for (int i = 1; i < images->num_frames; i++) {
         if (images->timestamps[i] <= 0.0)
-            pass.image = images->frames[i];
+            pass.image = *images->frames[i];
     }
 
-    if (rr->disable_mixing || !FBOFMT(4))
+    if (!params->frame_mixer || rr->disable_mixing || !FBOFMT(4))
         goto fallback;
 
     if (!pass_infer_state(&pass, false))
         return false;
 
-    // Round the output rect and clip it to the framebuffer dimensions. This
-    // will determine the size of the intermediate crop that we actually care
-    // about rendering. Note that we necessarily drop sub-pixel offsets in the
-    // target, because these may change from frame to frame - compensating for
-    // them in the src_rect will result in misalignment.
-    struct pl_frame *target = &pass.target;
-    const struct pl_tex *dst_ref = target->planes[pass.dst_ref].texture;
-    target->crop = (struct pl_rect2df) {
-        .x0 = roundf(PL_MAX(target->crop.x0, 0.0)),
-        .y0 = roundf(PL_MAX(target->crop.y0, 0.0)),
-        .x1 = roundf(PL_MIN(target->crop.x1, dst_ref->params.w)),
-        .y1 = roundf(PL_MIN(target->crop.y1, dst_ref->params.h)),
-    };
-
-    int out_w = fabs(pl_rect_w(target->crop)),
-        out_h = fabs(pl_rect_h(target->crop));
-    out_w = PL_DEF(out_w, dst_ref->params.w);
-    out_h = PL_DEF(out_h, dst_ref->params.h);
+    int out_w = abs(pl_rect_w(pass.dst_rect)),
+        out_h = abs(pl_rect_h(pass.dst_rect));
 
     // The color space to mix the frames in. We arbitrarily choose to use the
     // "current" frame's color space, but converted to RGB.
@@ -2310,7 +2299,7 @@ bool pl_render_image_mix(struct pl_renderer *rr, const struct pl_frame_mix *imag
                  (unsigned long long) sig, pts);
 
         float weight;
-        if (params->frame_mixer) {
+        if (params->frame_mixer->kernel) {
 
             float radius = params->frame_mixer->kernel->radius;
             if (fabs(pts) >= radius) {
@@ -2367,8 +2356,8 @@ bool pl_render_image_mix(struct pl_renderer *rr, const struct pl_frame_mix *imag
             f = &rr->frames.elem[rr->frames.num++];
             *f = (struct cached_frame) {
                 .signature = sig,
-                .color = images->frames[i].color,
-                .profile = images->frames[i].profile,
+                .color = images->frames[i]->color,
+                .profile = images->frames[i]->profile,
             };
         }
 
@@ -2384,6 +2373,10 @@ bool pl_render_image_mix(struct pl_renderer *rr, const struct pl_frame_mix *imag
             // If we can't reuse the entry, we need to render to this
             // texture first
             PL_TRACE(rr, "  -> Cached texture missing or invalid.. (re)creating");
+            if (!f->tex) {
+                if (PL_ARRAY_POP(rr->frame_fbos, &f->tex))
+                    pl_tex_invalidate(rr->gpu, f->tex);
+            }
             bool ok = pl_tex_recreate(rr->gpu, &f->tex, &(struct pl_tex_params) {
                 .w = out_w,
                 .h = out_h,
@@ -2404,7 +2397,7 @@ bool pl_render_image_mix(struct pl_renderer *rr, const struct pl_frame_mix *imag
             // in their native colorspaces. Preserving the original colorspace
             // avoids precision loss due to unnecessary color space roundtrips.
             // We also explicitly clear the ICC profile, see below for why.
-            struct pl_frame image = images->frames[i];
+            struct pl_frame image = *images->frames[i];
             image.profile = (struct pl_icc_profile) {0};
 
             struct pl_frame inter_target = {
@@ -2437,7 +2430,7 @@ bool pl_render_image_mix(struct pl_renderer *rr, const struct pl_frame_mix *imag
         if (rr->frames.elem[i].evict) {
             PL_TRACE(rr, "Evicting frame with signature %llx from cache",
                      (unsigned long long) rr->frames.elem[i].signature);
-            pl_tex_destroy(rr->gpu, &rr->frames.elem[i].tex);
+            PL_ARRAY_APPEND(rr, rr->frame_fbos, rr->frames.elem[i].tex);
             PL_ARRAY_REMOVE_AT(rr->frames, i);
             continue;
         } else {
diff --git a/src/tests/gpu_tests.h b/src/tests/gpu_tests.h
index 83d9d15e..aaf87cef 100644
--- a/src/tests/gpu_tests.h
+++ b/src/tests/gpu_tests.h
@@ -1041,12 +1041,13 @@ static void pl_render_tests(const struct pl_gpu *gpu)
     // Attempt frame mixing
 #define NUM_MIX_FRAMES 10
     struct pl_frame frames[NUM_MIX_FRAMES];
+    const struct pl_frame *pframes[NUM_MIX_FRAMES];
     uint64_t signatures[NUM_MIX_FRAMES];
     float timestamps[NUM_MIX_FRAMES];
 
     struct pl_frame_mix mix = {
         .num_frames = NUM_MIX_FRAMES,
-        .frames = frames,
+        .frames = pframes,
         .signatures = signatures,
         .timestamps = timestamps,
         .vsync_duration = 24.0 / 60.0,
@@ -1063,6 +1064,7 @@ static void pl_render_tests(const struct pl_gpu *gpu)
             },
         };
 
+        pframes[i] = &frames[i];
         signatures[i] = i;
         timestamps[i] = i;
     }
diff --git a/src/utils/frame_queue.c b/src/utils/frame_queue.c
new file mode 100644
index 00000000..8d8b526a
--- /dev/null
+++ b/src/utils/frame_queue.c
@@ -0,0 +1,560 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <math.h>
+
+#include "common.h"
+#include "context.h"
+
+struct cache_entry {
+    const struct pl_tex *tex[4];
+};
+
+struct entry {
+    struct cache_entry cache;
+    struct pl_source_frame src;
+    struct pl_frame frame;
+    uint64_t signature;
+    bool mapped;
+    bool ok;
+};
+
+// Hard limits for vsync timing validity
+#define MIN_FPS 10
+#define MAX_FPS 200
+
+// Limits for FPS estimation state
+#define MAX_SAMPLES 32
+#define MIN_SAMPLES 8
+
+struct pool {
+    float samples[MAX_SAMPLES];
+    float estimate;
+    float sum;
+    int idx;
+    int num;
+    int total;
+};
+
+struct pl_queue {
+    const struct pl_gpu *gpu;
+    struct pl_context *ctx;
+
+    // Frame queue and state
+    PL_ARRAY(struct entry) queue;
+    uint64_t signature;
+    bool eof;
+
+    // Average vsync/frame fps estimation state
+    struct pool vps, fps;
+    float reported_vps;
+    float reported_fps;
+    float prev_pts;
+
+    // Storage for temporary arrays
+    PL_ARRAY(uint64_t) tmp_sig;
+    PL_ARRAY(float) tmp_ts;
+    PL_ARRAY(const struct pl_frame *) tmp_frame;
+
+    // Queue of GPU objects to reuse
+    PL_ARRAY(struct cache_entry) cache;
+};
+
+struct pl_queue *pl_queue_create(const struct pl_gpu *gpu)
+{
+    struct pl_queue *p = pl_alloc_ptr(NULL, p);
+    *p = (struct pl_queue) {
+        .gpu = gpu,
+        .ctx = gpu->ctx,
+    };
+
+    return p;
+}
+
+static inline void unmap_frame(struct pl_queue *p, struct entry *entry)
+{
+    if (!entry->mapped && entry->src.discard) {
+        PL_TRACE(p, "Discarding unused frame with PTS %f", entry->src.pts);
+        entry->src.discard(&entry->src);
+    }
+
+    if (entry->mapped && entry->ok && entry->src.unmap) {
+        PL_TRACE(p, "Unmapping frame with PTS %f", entry->src.pts);
+        entry->src.unmap(p->gpu, &entry->frame, &entry->src);
+    }
+}
+
+
+void pl_queue_destroy(struct pl_queue **queue)
+{
+    struct pl_queue *p = *queue;
+    if (!p)
+        return;
+
+    for (int n = 0; n < p->queue.num; n++) {
+        struct entry *entry = &p->queue.elem[n];
+        unmap_frame(p, entry);
+        for (int i = 0; i < PL_ARRAY_SIZE(entry->cache.tex); i++)
+            pl_tex_destroy(p->gpu, &entry->cache.tex[i]);
+    }
+
+    for (int n = 0; n < p->cache.num; n++) {
+        for (int i = 0; i < PL_ARRAY_SIZE(p->cache.elem[n].tex); i++)
+            pl_tex_destroy(p->gpu, &p->cache.elem[n].tex[i]);
+    }
+
+    pl_free(p);
+    *queue = NULL;
+}
+
+static inline void cull_entry(struct pl_queue *p, struct entry *entry)
+{
+    unmap_frame(p, entry);
+
+    // Recycle non-empty texture cache entries
+    static const struct cache_entry null_cache = {0};
+    if (memcmp(&entry->cache, &null_cache, sizeof(null_cache)) != 0) {
+        for (int i = 0; i < PL_ARRAY_SIZE(entry->cache.tex); i++) {
+            if (entry->cache.tex[i])
+                pl_tex_invalidate(p->gpu, entry->cache.tex[i]);
+        }
+        PL_ARRAY_APPEND(p, p->cache, entry->cache);
+    }
+}
+
+void pl_queue_reset(struct pl_queue *p)
+{
+    for (int i = 0; i < p->queue.num; i++)
+        cull_entry(p, &p->queue.elem[i]);
+
+    *p = (struct pl_queue) {
+        .gpu = p->gpu,
+        .ctx = p->ctx,
+
+        // Explicitly preserve allocations
+        .queue.elem = p->queue.elem,
+        .tmp_sig.elem = p->tmp_sig.elem,
+        .tmp_ts.elem = p->tmp_ts.elem,
+        .tmp_frame.elem = p->tmp_frame.elem,
+
+        // Reuse GPU object cache entirely
+        .cache = p->cache,
+    };
+}
+
+static inline float delta(float old, float new)
+{
+    return fabs((new - old) / PL_MIN(new, old));
+}
+
+static inline void update_estimate(struct pool *pool, float cur)
+{
+    if (pool->num) {
+        static const float max_delta = 0.3;
+        if (delta(pool->sum / pool->num, cur) > max_delta) {
+            pool->sum = 0.0;
+            pool->num = pool->idx = 0;
+        }
+    }
+
+    if (pool->num++ == MAX_SAMPLES) {
+        pool->sum -= pool->samples[pool->idx];
+        pool->num--;
+    }
+
+    pool->sum += pool->samples[pool->idx] = cur;
+    pool->idx = (pool->idx + 1) % MAX_SAMPLES;
+    pool->total++;
+
+    if (pool->total < MIN_SAMPLES || pool->num >= MIN_SAMPLES)
+        pool->estimate = pool->sum / pool->num;
+}
+
+void pl_queue_push(struct pl_queue *p, const struct pl_source_frame *src)
+{
+    if (!src) {
+        PL_TRACE(p, "Received EOF, draining frame queue...");
+        p->eof = true;
+        return;
+    }
+
+    PL_TRACE(p, "Received new frame with PTS %f", src->pts);
+
+    struct cache_entry cache = {0};
+    PL_ARRAY_POP(p->cache, &cache);
+    PL_ARRAY_APPEND(p, p->queue, (struct entry) {
+        .signature = p->signature++,
+        .cache = cache,
+        .src = *src,
+    });
+
+    if (p->queue.num > 1) {
+        float prev_pts = p->queue.elem[p->queue.num - 2].src.pts;
+        float delta = src->pts - prev_pts;
+        if ((p->fps.estimate && delta > 10.0 * p->fps.estimate) || delta < 0.0) {
+            // Ignore very large discontinuities or backwards jumps in PTS
+            PL_TRACE(p, "Discontinuous source PTS jump %f -> %f, ignoring...",
+                     prev_pts, src->pts);
+        } else {
+            update_estimate(&p->fps, delta);
+        }
+    }
+}
+
+static void report_estimates(struct pl_queue *p)
+{
+    if (p->fps.total >= MIN_SAMPLES && p->vps.total >= MIN_SAMPLES) {
+        if (p->reported_fps && p->reported_vps) {
+            // Only re-eport the estimates if they've changed considerably from
+            // the previously reported values
+            static const float report_delta = 0.3;
+            float delta_fps = delta(p->reported_fps, p->fps.estimate);
+            float delta_vps = delta(p->reported_vps, p->vps.estimate);
+            if (delta_fps < report_delta && delta_vps < report_delta)
+                return;
+        }
+
+        PL_INFO(p, "Estimated source FPS: %.3f, display FPS: %.3f",
+                1.0 / p->fps.estimate, 1.0 / p->vps.estimate);
+
+        p->reported_fps = p->fps.estimate;
+        p->reported_vps = p->vps.estimate;
+    }
+}
+
+static enum pl_queue_status get_frame(struct pl_queue *p,
+                                      const struct pl_queue_params *params)
+{
+    if (p->eof)
+        return QUEUE_EOF;
+
+    if (!params->get_frame)
+        return QUEUE_MORE;
+
+    struct pl_source_frame src;
+    enum pl_queue_status ret;
+    switch ((ret = params->get_frame(&src, params))) {
+    case QUEUE_OK:
+        pl_queue_push(p, &src);
+        break;
+    case QUEUE_EOF:
+        pl_queue_push(p, NULL);
+        break;
+    default: break;
+    }
+
+    return ret;
+}
+
+static bool map_frame(struct pl_queue *p, struct entry *entry)
+{
+    if (!entry->mapped) {
+        PL_TRACE(p, "Mapping frame with PTS %f", entry->src.pts);
+        entry->mapped = true;
+        entry->ok = entry->src.map(p->gpu, entry->cache.tex,
+                                   &entry->src, &entry->frame);
+    }
+
+    return entry->ok;
+}
+
+// Advance the queue as needed to make sure idx 0 is the last frame before
+// `pts`, and idx 1 is the first frame after `pts` (unless this is the last).
+//
+// Returns QUEUE_OK only if idx 0 is still legal under ZOH semantics.
+static enum pl_queue_status advance(struct pl_queue *p, float pts,
+                                    const struct pl_queue_params *params)
+{
+    // Cull all frames except the last frame before `pts`
+    int culled = 0;
+    for (int i = 1; i < p->queue.num; i++) {
+        if (p->queue.elem[i].src.pts <= pts) {
+            cull_entry(p, &p->queue.elem[i - 1]);
+            culled++;
+        }
+    }
+    PL_ARRAY_REMOVE_RANGE(p->queue, 0, culled);
+
+    // Keep adding new frames until we find one in the future, or EOF
+    while (p->queue.num < 2) {
+        enum pl_queue_status ret;
+        switch ((ret = get_frame(p, params))) {
+        case QUEUE_ERR:
+        case QUEUE_MORE:
+            return ret;
+        case QUEUE_EOF:
+            if (!p->queue.num)
+                return ret;
+            goto done;
+        case QUEUE_OK:
+            if (p->queue.num > 1 && p->queue.elem[1].src.pts <= pts) {
+                cull_entry(p, &p->queue.elem[0]);
+                PL_ARRAY_REMOVE_AT(p->queue, 0);
+            }
+            continue;
+        }
+    }
+
+done:
+    if (p->eof && p->queue.num == 1 && p->fps.estimate) {
+        // Last frame is held for an extra `p->fps.estimate` duration,
+        // afterwards this function just returns EOF.
+        //
+        // Note that if `p->fps.estimate` is not available, then we're
+        // displaying a source that only has a single frame, in which case we
+        // most likely just want to repeat it forever. (Not a perfect
+        // heuristic, but w/e)
+        if (p->queue.elem[0].src.pts + p->fps.estimate < pts) {
+            cull_entry(p, &p->queue.elem[0]);
+            p->queue.num = 0;
+            return QUEUE_EOF;
+        }
+    }
+
+    pl_assert(p->queue.num);
+    return QUEUE_OK;
+}
+
+// Present a single frame as appropriate for `pts`
+static enum pl_queue_status nearest(struct pl_queue *p, struct pl_frame_mix *mix,
+                                    const struct pl_queue_params *params)
+{
+    enum pl_queue_status ret;
+    if ((ret = advance(p, params->pts, params)))
+        return ret;
+
+    struct entry *entry = &p->queue.elem[0];
+    if (!map_frame(p, entry))
+        return QUEUE_ERR;
+
+    // Return a mix containing only this single frame
+    p->tmp_sig.num = p->tmp_ts.num = p->tmp_frame.num = 0;
+    PL_ARRAY_APPEND(p, p->tmp_sig, entry->signature);
+    PL_ARRAY_APPEND(p, p->tmp_frame, &entry->frame);
+    PL_ARRAY_APPEND(p, p->tmp_ts, 0.0);
+    *mix = (struct pl_frame_mix) {
+        .num_frames = 1,
+        .frames = p->tmp_frame.elem,
+        .signatures = p->tmp_sig.elem,
+        .timestamps = p->tmp_ts.elem,
+        .vsync_duration = 1.0,
+    };
+
+    PL_TRACE(p, "Showing single frame with PTS %f for target PTS %f",
+             entry->src.pts, params->pts);
+
+    report_estimates(p);
+    return QUEUE_OK;
+}
+
+// Special case of `interpolate` for radius = 0, in which case we need exactly
+// the previous frame and the following frame
+static enum pl_queue_status oversample(struct pl_queue *p, struct pl_frame_mix *mix,
+                                       const struct pl_queue_params *params)
+{
+    enum pl_queue_status ret;
+    if ((ret = advance(p, params->pts, params)))
+        return ret;
+
+    // Can't oversample with only a single frame, fall back to ZOH semantics
+    if (p->queue.num < 2)
+        return nearest(p, mix, params);
+
+    struct entry *entries[2] = { &p->queue.elem[0], &p->queue.elem[1] };
+    pl_assert(entries[0]->src.pts <= params->pts);
+    pl_assert(entries[1]->src.pts >= params->pts);
+
+    // Returning a mix containing both of these two frames
+    p->tmp_sig.num = p->tmp_ts.num = p->tmp_frame.num = 0;
+    for (int i = 0; i < 2; i++) {
+        if (!map_frame(p, entries[i]))
+            return QUEUE_ERR;
+
+        float ts = (entries[i]->src.pts - params->pts) / p->fps.estimate;
+        PL_ARRAY_APPEND(p, p->tmp_sig, entries[i]->signature);
+        PL_ARRAY_APPEND(p, p->tmp_frame, &entries[i]->frame);
+        PL_ARRAY_APPEND(p, p->tmp_ts, ts);
+    }
+
+    *mix = (struct pl_frame_mix) {
+        .num_frames = 2,
+        .frames = p->tmp_frame.elem,
+        .signatures = p->tmp_sig.elem,
+        .timestamps = p->tmp_ts.elem,
+        .vsync_duration = p->vps.estimate / p->fps.estimate,
+    };
+
+    PL_TRACE(p, "Oversampling 2 frames for target PTS %f:", params->pts);
+    for (int i = 0; i < mix->num_frames; i++)
+        PL_TRACE(p, "    id %"PRIu64" ts %f", mix->signatures[i], mix->timestamps[i]);
+
+    report_estimates(p);
+    return QUEUE_OK;
+}
+
+// Present a mixture of frames, relative to the vsync ratio
+static enum pl_queue_status interpolate(struct pl_queue *p,
+                                        struct pl_frame_mix *mix,
+                                        const struct pl_queue_params *params)
+{
+    // No FPS estimate available, possibly source contains only a single file,
+    // or this is the first frame to be rendered. Fall back to ZOH semantics.
+    if (!p->fps.estimate)
+        return nearest(p, mix, params);
+
+    // No radius information, special case in which we only need the previous
+    // and next frames.
+    if (!params->radius)
+        return oversample(p, mix, params);
+
+    float min_pts = params->pts - params->radius * p->fps.estimate,
+          max_pts = params->pts + params->radius * p->fps.estimate;
+
+    enum pl_queue_status ret;
+    if ((ret = advance(p, min_pts, params)))
+        return ret;
+
+    // Keep adding new frames until we've covered the range we care about
+    pl_assert(p->queue.num);
+    while (p->queue.elem[p->queue.num - 1].src.pts < max_pts) {
+        switch ((ret = get_frame(p, params))) {
+        case QUEUE_ERR:
+        case QUEUE_MORE:
+            return ret;
+        case QUEUE_EOF:
+            goto done;
+        case QUEUE_OK:
+            continue;
+        }
+    }
+
+done: ;
+
+    // Construct a mix object representing the current queue state, starting at
+    // the last frame before `min_pts` to make sure there's a fallback frame
+    // available for ZOH semantics.
+    p->tmp_sig.num = p->tmp_ts.num = p->tmp_frame.num = 0;
+    for (int i = 0; i < p->queue.num; i++) {
+        struct entry *entry = &p->queue.elem[i];
+        if (entry->src.pts > max_pts)
+            break;
+        if (!map_frame(p, entry))
+            return QUEUE_ERR;
+
+        float ts = (entry->src.pts - params->pts) / p->fps.estimate;
+        PL_ARRAY_APPEND(p, p->tmp_sig, entry->signature);
+        PL_ARRAY_APPEND(p, p->tmp_frame, &entry->frame);
+        PL_ARRAY_APPEND(p, p->tmp_ts, ts);
+    }
+
+    *mix = (struct pl_frame_mix) {
+        .num_frames = p->tmp_frame.num,
+        .frames = p->tmp_frame.elem,
+        .signatures = p->tmp_sig.elem,
+        .timestamps = p->tmp_ts.elem,
+        .vsync_duration = p->vps.estimate / p->fps.estimate,
+    };
+
+    pl_assert(mix->num_frames);
+    PL_TRACE(p, "Showing mix of %d frames for target PTS %f:",
+             mix->num_frames, params->pts);
+    for (int i = 0; i < mix->num_frames; i++)
+        PL_TRACE(p, "    id %"PRIu64" ts %f", mix->signatures[i], mix->timestamps[i]);
+
+    report_estimates(p);
+    return QUEUE_OK;
+}
+
+static bool prefill(struct pl_queue *p, const struct pl_queue_params *params)
+{
+    int min_frames = 2 * ceilf(params->radius);
+    min_frames = PL_MAX(min_frames, 2);
+
+    while (p->queue.num < min_frames) {
+        switch (get_frame(p, params)) {
+        case QUEUE_ERR:
+            return false;
+        case QUEUE_EOF:
+        case QUEUE_MORE:
+            return true;
+        case QUEUE_OK:
+            continue;
+        }
+    }
+
+    // In the most likely case, the first few frames will all be required. So
+    // force-map them all to initialize GPU state on initial rendering. This is
+    // better than the alternative of missing the cache later, when timing is
+    // more relevant.
+    for (int i = 0; i < min_frames; i++) {
+        if (!map_frame(p, &p->queue.elem[i]))
+            return false;
+    }
+
+    return true;
+}
+
+enum pl_queue_status pl_queue_update(struct pl_queue *p,
+                                     struct pl_frame_mix *out_mix,
+                                     const struct pl_queue_params *params)
+{
+    p->fps.estimate = PL_DEF(p->fps.estimate, params->frame_duration);
+    p->vps.estimate = PL_DEF(p->vps.estimate, params->vsync_duration);
+
+    float delta = params->pts - p->prev_pts;
+    if (delta < 0.0) {
+
+        PL_ERR(p, "Requested PTS %f is lower than the previously rendered "
+               "PTS %f. This is not supported, PTS must be monotonically "
+               "increasing! Please use `pl_queue_reset` to reset the frame "
+               "queue on discontinuous PTS jumps.", params->pts, p->prev_pts);
+        return QUEUE_ERR;
+
+    } else if (delta > 1.0) {
+
+        // A jump of more than a second is probably the result of a
+        // discontinuous jump after a suspend. To prevent this from exploding
+        // the FPS estimate, treat this as a new frame.
+        PL_TRACE(p, "Discontinuous target PTS jump %f -> %f, ignoring...",
+                 p->prev_pts, params->pts);
+
+    } else if (delta > 0) {
+
+        update_estimate(&p->vps, params->pts - p->prev_pts);
+
+    }
+
+    p->prev_pts = params->pts;
+
+    // As a special case, prefill the queue if this is the first frame
+    if (!params->pts && !p->queue.num) {
+        if (!prefill(p, params))
+            return QUEUE_ERR;
+    }
+
+    // Ignore unrealistically high or low FPS, common near start of playback
+    static const float max_vsync = 1.0 / MIN_FPS;
+    static const float min_vsync = 1.0 / MAX_FPS;
+    if (p->vps.estimate > min_vsync && p->vps.estimate < max_vsync) {
+        // We know the vsync duration, so construct an interpolation mix
+        return interpolate(p, out_mix, params);
+    } else {
+        // We don't know the vsync duration (yet), so just point-sample the
+        // nearest (zero-order-hold) frame
+        return nearest(p, out_mix, params);
+    }
+}