audio: redo video-sync=display-adrop

This mode drops or repeats audio data to adapt to video speed, instead of resampling it or such. It was added to deal with SPDIF. The implementation was part of fill_audio_out_buffers() - the entire function is something whose complexity exploded in my face, and which I want to clean up, and this is hopefully a first step. Put it in a filter, and mess with the shitty glue code. It's all sort of roundabout and illogical, but that can be rectified later. The important part is that it works much like the resample or scaletempo filters. For PCM audio, this does not work on samples anymore. This makes it much worse. But for PCM you can use saner mechanisms that sound better. Also, something about PTS tracking is wrong. But not wasting more time on this.
2020-05-23 04:04:46 +02:00 · 2020-05-23 04:04:46 +02:00 · ab4e0c42fb
parent 43a67970b6
commit ab4e0c42fb
12 changed files with 192 additions and 56 deletions
--- a/DOCS/man/af.rst
+++ b/DOCS/man/af.rst
@ -220,3 +220,10 @@ Available filters are:
        broken filters. In practice, these broken filters will either cause slow
        A/V desync over time (with some files), or break playback completely if
        you seek or start playback from the middle of a file.
+
+``drop``
+    This filter drops or repeats audio frames to adapt to playback speed. It
+    always operates on full audio frames, because it was made to handle SPDIF
+    (compressed audio passthrough). This is used automatically if the
+    ``--video-sync=display-adrop`` option is used. Do not use this filter (or
+    the given option); they are extremely low quality.
--- a/DOCS/man/options.rst
+++ b/DOCS/man/options.rst
@ -6403,7 +6403,8 @@ Miscellaneous
                        video. See ``--video-sync-adrop-size``. This mode will
                        cause severe audio artifacts if the real monitor
                        refresh rate is too different from the reported or
-                        forced rate.
+                        forced rate. Sicne mpv 0.33.0, this acts on entire audio
+                        frames, instead of single samples.
    :display-desync:    Sync video to display, and let audio play on its own.
    :desync:            Sync video according to system clock, and let audio play
                        on its own.
--- a/audio/filter/af_drop.c
+++ b/audio/filter/af_drop.c
@ -0,0 +1,114 @@
+#include "audio/aframe.h"
+#include "audio/format.h"
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+
+struct priv {
+    double speed;
+    double diff; // amount of too many additional samples in normal speed
+    struct mp_aframe *last; // for repeating
+};
+
+static void process(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    if (!mp_pin_in_needs_data(f->ppins[1]))
+        return;
+
+    struct mp_frame frame = {0};
+
+    double last_dur = p->last ? mp_aframe_duration(p->last) : 0;
+    if (p->last && p->diff < 0 && -p->diff > last_dur / 2) {
+        MP_VERBOSE(f, "repeat\n");
+        frame = MAKE_FRAME(MP_FRAME_AUDIO, p->last);
+        p->last = NULL;
+    } else {
+        frame = mp_pin_out_read(f->ppins[0]);
+
+        if (frame.type == MP_FRAME_AUDIO) {
+            last_dur = mp_aframe_duration(frame.data);
+            p->diff -= last_dur;
+            if (p->diff > last_dur / 2) {
+                MP_VERBOSE(f, "drop\n");
+                mp_frame_unref(&frame);
+                mp_filter_internal_mark_progress(f);
+            }
+        }
+    }
+
+    if (frame.type == MP_FRAME_AUDIO) {
+        struct mp_aframe *fr = frame.data;
+        talloc_free(p->last);
+        p->last = mp_aframe_new_ref(fr);
+        mp_aframe_mul_speed(fr, p->speed);
+        p->diff += mp_aframe_duration(fr);
+        mp_aframe_set_pts(p->last, mp_aframe_end_pts(fr));
+    } else if (frame.type == MP_FRAME_EOF) {
+        TA_FREEP(&p->last);
+    }
+    mp_pin_in_write(f->ppins[1], frame);
+}
+
+static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+{
+    struct priv *p = f->priv;
+
+    switch (cmd->type) {
+    case MP_FILTER_COMMAND_SET_SPEED:
+        p->speed = cmd->speed;
+        return true;
+    }
+
+    return false;
+}
+
+static void reset(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    TA_FREEP(&p->last);
+    p->diff = 0;
+}
+
+static void destroy(struct mp_filter *f)
+{
+    reset(f);
+}
+
+static const struct mp_filter_info af_drop_filter = {
+    .name = "drop",
+    .priv_size = sizeof(struct priv),
+    .process = process,
+    .command = command,
+    .reset = reset,
+    .destroy = destroy,
+};
+
+static struct mp_filter *af_drop_create(struct mp_filter *parent, void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &af_drop_filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    struct priv *p = f->priv;
+    p->speed = 1.0;
+
+    return f;
+}
+
+const struct mp_user_filter_entry af_drop = {
+    .desc = {
+        .description = "Change audio speed by dropping/repeating frames",
+        .name = "drop",
+        .priv_size = sizeof(struct priv),
+    },
+    .create = af_drop_create,
+};
--- a/filters/f_auto_filters.c
+++ b/filters/f_auto_filters.c
@ -1,5 +1,7 @@
 #include <math.h>

+#include "audio/aframe.h"
+#include "audio/format.h"
 #include "common/common.h"
 #include "common/msg.h"
 #include "options/m_config.h"
@ -295,7 +297,8 @@ struct mp_filter *mp_autorotate_create(struct mp_filter *parent)

 struct aspeed_priv {
    struct mp_subfilter sub;
-    double cur_speed;
+    double cur_speed, cur_speed_drop;
+    int current_filter;
 };

 static void aspeed_process(struct mp_filter *f)
@ -305,26 +308,48 @@ static void aspeed_process(struct mp_filter *f)
    if (!mp_subfilter_read(&p->sub))
        return;

-    if (fabs(p->cur_speed - 1.0) < 1e-8) {
+    if (!p->sub.filter)
+        p->current_filter = 0;
+
+    double speed = p->cur_speed * p->cur_speed_drop;
+
+    int req_filter = 0;
+    if (fabs(speed - 1.0) >= 1e-8) {
+        req_filter = p->cur_speed_drop == 1.0 ? 1 : 2;
+        if (p->sub.frame.type == MP_FRAME_AUDIO &&
+            !af_fmt_is_pcm(mp_aframe_get_format(p->sub.frame.data)))
+            req_filter = 2;
+    }
+
+    if (req_filter != p->current_filter) {
        if (p->sub.filter)
-            MP_VERBOSE(f, "removing scaletempo\n");
+            MP_VERBOSE(f, "removing audio speed filter\n");
        if (!mp_subfilter_drain_destroy(&p->sub))
            return;
-    } else if (!p->sub.filter) {
-        MP_VERBOSE(f, "adding scaletempo\n");
-        p->sub.filter =
-            mp_create_user_filter(f, MP_OUTPUT_CHAIN_AUDIO, "scaletempo", NULL);
-        if (!p->sub.filter) {
-            MP_ERR(f, "could not create scaletempo filter\n");
-            mp_subfilter_continue(&p->sub);
-            return;
+
+        if (req_filter) {
+            if (req_filter == 1) {
+                MP_VERBOSE(f, "adding scaletempo\n");
+                p->sub.filter = mp_create_user_filter(f, MP_OUTPUT_CHAIN_AUDIO,
+                                                      "scaletempo", NULL);
+            } else if (req_filter == 2) {
+                MP_VERBOSE(f, "adding drop\n");
+                p->sub.filter = mp_create_user_filter(f, MP_OUTPUT_CHAIN_AUDIO,
+                                                      "drop", NULL);
+            }
+            if (!p->sub.filter) {
+                MP_ERR(f, "could not create filter\n");
+                mp_subfilter_continue(&p->sub);
+                return;
+            }
+            p->current_filter = req_filter;
        }
    }

    if (p->sub.filter) {
        struct mp_filter_command cmd = {
            .type = MP_FILTER_COMMAND_SET_SPEED,
-            .speed = p->cur_speed,
+            .speed = speed,
        };
        mp_filter_command(p->sub.filter, &cmd);
    }
@ -341,6 +366,11 @@ static bool aspeed_command(struct mp_filter *f, struct mp_filter_command *cmd)
        return true;
    }

+    if (cmd->type == MP_FILTER_COMMAND_SET_SPEED_DROP) {
+        p->cur_speed_drop = cmd->speed;
+        return true;
+    }
+
    if (cmd->type == MP_FILTER_COMMAND_IS_ACTIVE) {
        cmd->is_active = !!p->sub.filter;
        return true;
@ -381,6 +411,7 @@ struct mp_filter *mp_autoaspeed_create(struct mp_filter *parent)

    struct aspeed_priv *p = f->priv;
    p->cur_speed = 1.0;
+    p->cur_speed_drop = 1.0;

    p->sub.in = mp_filter_add_pin(f, MP_PIN_IN, "in");
    p->sub.out = mp_filter_add_pin(f, MP_PIN_OUT, "out");
--- a/filters/f_output_chain.c
+++ b/filters/f_output_chain.c
@ -454,13 +454,12 @@ bool mp_output_chain_command(struct mp_output_chain *c, const char *target,
 // supports it, reset *speed, then keep setting the speed on the other filters.
 // The purpose of this is to make sure only 1 filter changes speed.
 static void set_speed_any(struct mp_user_filter **filters, int num_filters,
-                          bool resample, double *speed)
+                          int command, double *speed)
 {
    for (int n = num_filters - 1; n >= 0; n--) {
        assert(*speed);
        struct mp_filter_command cmd = {
-            .type = resample ? MP_FILTER_COMMAND_SET_SPEED_RESAMPLE
-                             : MP_FILTER_COMMAND_SET_SPEED,
+            .type = command,
            .speed = *speed,
        };
        if (mp_filter_command(filters[n]->f, &cmd))
@ -469,17 +468,24 @@ static void set_speed_any(struct mp_user_filter **filters, int num_filters,
 }

 void mp_output_chain_set_audio_speed(struct mp_output_chain *c,
-                                     double speed, double resample)
+                                     double speed, double resample, double drop)
 {
    struct chain *p = c->f->priv;

    // We always resample with the final libavresample instance.
-    set_speed_any(p->post_filters, p->num_post_filters, true, &resample);
+    set_speed_any(p->post_filters, p->num_post_filters,
+                  MP_FILTER_COMMAND_SET_SPEED_RESAMPLE, &resample);

    // If users have filters like "scaletempo" insert anywhere, use that,
    // otherwise use the builtin ones.
-    set_speed_any(p->user_filters, p->num_user_filters, false, &speed);
-    set_speed_any(p->post_filters, p->num_post_filters, false, &speed);
+    set_speed_any(p->user_filters, p->num_user_filters,
+                  MP_FILTER_COMMAND_SET_SPEED, &speed);
+    set_speed_any(p->post_filters, p->num_post_filters,
+                  MP_FILTER_COMMAND_SET_SPEED, &speed);
+    set_speed_any(p->user_filters, p->num_user_filters,
+                  MP_FILTER_COMMAND_SET_SPEED_DROP, &drop);
+    set_speed_any(p->post_filters, p->num_post_filters,
+                  MP_FILTER_COMMAND_SET_SPEED_DROP, &drop);
 }

 double mp_output_get_measured_total_delay(struct mp_output_chain *c)
--- a/filters/f_output_chain.h
+++ b/filters/f_output_chain.h
@ -77,7 +77,7 @@ bool mp_output_chain_update_filters(struct mp_output_chain *p,

 // Desired audio speed, with resample being strict resampling.
 void mp_output_chain_set_audio_speed(struct mp_output_chain *p,
-                                     double speed, double resample);
+                                     double speed, double resample, double drop);

 // Total delay incurred by the filter chain, as measured by the recent filtered
 // frames. The intention is that this sums the measured delays for each filter,
--- a/filters/filter.h
+++ b/filters/filter.h
@ -364,6 +364,7 @@ enum mp_filter_command_type {
    MP_FILTER_COMMAND_GET_META,
    MP_FILTER_COMMAND_SET_SPEED,
    MP_FILTER_COMMAND_SET_SPEED_RESAMPLE,
+    MP_FILTER_COMMAND_SET_SPEED_DROP,
    MP_FILTER_COMMAND_IS_ACTIVE,
 };

--- a/filters/user_filters.c
+++ b/filters/user_filters.c
@ -39,6 +39,7 @@ const struct mp_user_filter_entry *af_list[] = {
    &af_rubberband,
 #endif
    &af_lavcac3enc,
+    &af_drop,
 };

 static bool get_af_desc(struct m_obj_desc *dst, int index)
--- a/filters/user_filters.h
+++ b/filters/user_filters.h
@ -24,6 +24,7 @@ extern const struct mp_user_filter_entry af_scaletempo;
 extern const struct mp_user_filter_entry af_format;
 extern const struct mp_user_filter_entry af_rubberband;
 extern const struct mp_user_filter_entry af_lavcac3enc;
+extern const struct mp_user_filter_entry af_drop;

 extern const struct mp_user_filter_entry vf_lavfi;
 extern const struct mp_user_filter_entry vf_lavfi_bridge;
--- a/player/audio.c
+++ b/player/audio.c
@ -54,13 +54,19 @@ static void update_speed_filters(struct MPContext *mpctx)

    double speed = mpctx->opts->playback_speed;
    double resample = mpctx->speed_factor_a;
+    double drop = 1.0;

    if (!mpctx->opts->pitch_correction) {
        resample *= speed;
        speed = 1.0;
    }

-    mp_output_chain_set_audio_speed(ao_c->filter, speed, resample);
+    if (mpctx->display_sync_active && mpctx->opts->video_sync == VS_DISP_ADROP) {
+        drop *= speed * resample;
+        resample = speed = 1.0;
+    }
+
+    mp_output_chain_set_audio_speed(ao_c->filter, speed, resample, drop);
 }

 static int recreate_audio_filters(struct MPContext *mpctx)
@ -878,24 +884,6 @@ void fill_audio_out_buffers(struct MPContext *mpctx)
        playsize = MPMAX(1, playsize + skip); // silence will be prepended
    }

-    int skip_duplicate = 0; // >0: skip, <0: duplicate
-    double drop_limit =
-        (opts->sync_max_audio_change + opts->sync_max_video_change) / 100;
-    if (mpctx->display_sync_active && opts->video_sync == VS_DISP_ADROP &&
-        fabs(mpctx->last_av_difference) >= opts->sync_audio_drop_size &&
-        mpctx->audio_drop_throttle < drop_limit &&
-        mpctx->audio_status == STATUS_PLAYING)
-    {
-        int samples = ceil(opts->sync_audio_drop_size * play_samplerate);
-        samples = (samples + align / 2) / align * align;
-
-        skip_duplicate = mpctx->last_av_difference >= 0 ? -samples : samples;
-
-        playsize = MPMAX(playsize, samples);
-
-        mpctx->audio_drop_throttle += 1 - drop_limit - samples / play_samplerate;
-    }
-
    playsize = playsize / align * align;

    int status = mpctx->audio_status >= STATUS_DRAINING ? AD_EOF : AD_OK;
@ -940,21 +928,6 @@ void fill_audio_out_buffers(struct MPContext *mpctx)
        end_sync = true;
    }

-    if (skip_duplicate) {
-        int max = mp_audio_buffer_samples(ao_c->ao_buffer);
-        if (abs(skip_duplicate) > max)
-            skip_duplicate = skip_duplicate >= 0 ? max : -max;
-        mpctx->last_av_difference += skip_duplicate / play_samplerate;
-        if (skip_duplicate >= 0) {
-            mp_audio_buffer_skip(ao_c->ao_buffer, skip_duplicate);
-            MP_STATS(mpctx, "drop-audio");
-        } else {
-            mp_audio_buffer_duplicate(ao_c->ao_buffer, -skip_duplicate);
-            MP_STATS(mpctx, "duplicate-audio");
-        }
-        MP_VERBOSE(mpctx, "audio skip_duplicate=%d\n", skip_duplicate);
-    }
-
    if (mpctx->audio_status == STATUS_SYNCING) {
        if (end_sync)
            mpctx->audio_status = STATUS_FILLING;
--- a/player/video.c
+++ b/player/video.c
@ -898,7 +898,7 @@ static void handle_display_sync_frame(struct MPContext *mpctx,
    mpctx->past_frames[0].num_vsyncs = num_vsyncs;
    mpctx->past_frames[0].av_diff = mpctx->last_av_difference;

-    if (resample) {
+    if (resample || mode == VS_DISP_ADROP) {
        adjust_audio_resample_speed(mpctx, vsync);
    } else {
        mpctx->speed_factor_a = 1.0;
--- a/wscript_build.py
+++ b/wscript_build.py
@ -235,6 +235,7 @@ def build(ctx):
        ( "audio/chmap_sel.c" ),
        ( "audio/decode/ad_lavc.c" ),
        ( "audio/decode/ad_spdif.c" ),
+        ( "audio/filter/af_drop.c" ),
        ( "audio/filter/af_format.c" ),
        ( "audio/filter/af_lavcac3enc.c" ),
        ( "audio/filter/af_rubberband.c",        "rubberband" ),