diff --git a/Makefile b/Makefile index b714e15b..6180f50a 100644 --- a/Makefile +++ b/Makefile @@ -22,6 +22,7 @@ SRCS = common/mc.c common/predict.c common/pixel.c common/macroblock.c \ common/mvpred.c common/bitstream.c \ encoder/analyse.c encoder/me.c encoder/ratecontrol.c \ encoder/set.c encoder/macroblock.c encoder/cabac.c \ + encoder/speed.c \ encoder/cavlc.c encoder/encoder.c encoder/lookahead.c SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \ diff --git a/common/common.c b/common/common.c index 2d72bc77..4ad6db22 100644 --- a/common/common.c +++ b/common/common.c @@ -150,6 +150,12 @@ void x264_param_default( x264_param_t *param ) param->rc.i_zones = 0; param->rc.b_mb_tree = 1; + // speedcontrol + param->sc.f_speed = 0; + param->sc.i_buffer_size = 12; + param->sc.f_buffer_init = 0.75; + param->sc.max_preset = SC_PRESETS; + /* Log */ param->pf_log = x264_log_default; param->p_log_private = NULL; @@ -1145,6 +1151,14 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value ) p->rc.f_complexity_blur = atof(value); OPT("zones") p->rc.psz_zones = strdup(value); + OPT("speed") + p->sc.f_speed = atof(value); + OPT("speed-bufsize") + p->sc.i_buffer_size = atoi(value); + OPT("speed-init") + p->sc.f_buffer_init = atof(value); + OPT("speed-alt-timer") + p->sc.b_alt_timer = atobool(value); OPT("crop-rect") b_error |= sscanf( value, "%u,%u,%u,%u", &p->crop_rect.i_left, &p->crop_rect.i_top, &p->crop_rect.i_right, &p->crop_rect.i_bottom ) != 4; @@ -1474,6 +1488,8 @@ char *x264_param2string( x264_param_t *p, int b_res ) s += sprintf( s, "bitdepth=%d ", BIT_DEPTH ); } + // FIXME speedcontrol stuff + if( p->b_opencl ) s += sprintf( s, "opencl=%d ", p->b_opencl ); s += sprintf( s, "cabac=%d", p->b_cabac ); diff --git a/common/common.h b/common/common.h index f9b93a0b..b0583905 100644 --- a/common/common.h +++ b/common/common.h @@ -118,6 +118,7 @@ do {\ #define LOG2_16(x) (31 - x264_clz((x)|1)) +#define SC_PRESETS 13 /**************************************************************************** * Includes ****************************************************************************/ @@ -514,6 +515,7 @@ typedef struct x264_lookahead_t } x264_lookahead_t; typedef struct x264_ratecontrol_t x264_ratecontrol_t; +typedef struct x264_speedcontrol_t x264_speedcontrol_t; typedef struct x264_left_table_t { @@ -969,6 +971,7 @@ struct x264_t /* rate control encoding only */ x264_ratecontrol_t *rc; + x264_speedcontrol_t *sc; /* stats */ struct diff --git a/common/osdep.c b/common/osdep.c index e97aaeda..1c291063 100644 --- a/common/osdep.c +++ b/common/osdep.c @@ -47,10 +47,14 @@ extern int ptw32_processInitialized; int64_t x264_mdate( void ) { -#if SYS_WINDOWS +#ifdef __MINGW32__ struct timeb tb; ftime( &tb ); return ((int64_t)tb.time * 1000 + (int64_t)tb.millitm) * 1000; +#elif SYS_LINUX + struct timespec ts_current; + clock_gettime( CLOCK_MONOTONIC, &ts_current ); + return (int64_t)ts_current.tv_sec * 1000000 + (int64_t)ts_current.tv_nsec / 1000; #else struct timeval tv_date; gettimeofday( &tv_date, NULL ); diff --git a/common/quant.c b/common/quant.c index b16895dc..883ef158 100644 --- a/common/quant.c +++ b/common/quant.c @@ -730,7 +730,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf ) { pf->quant_4x4 = x264_quant_4x4_avx2; pf->quant_4x4_dc = x264_quant_4x4_dc_avx2; - pf->quant_8x8 = x264_quant_8x8_avx2; + //pf->quant_8x8 = x264_quant_8x8_avx2; pf->quant_4x4x4 = x264_quant_4x4x4_avx2; pf->dequant_4x4 = x264_dequant_4x4_avx2; pf->dequant_8x8 = x264_dequant_8x8_avx2; diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm index 12eef14d..1a579457 100644 --- a/common/x86/mc-a.asm +++ b/common/x86/mc-a.asm @@ -1276,6 +1276,10 @@ AVG16_CACHELINE_LOOP_SSSE3 j, k %endrep %endif ; !HIGH_BIT_DEPTH +;============================================================================= +; hpel_filter_c +;============================================================================= + ;============================================================================= ; pixel copy ;============================================================================= diff --git a/configure b/configure index 3bda05af..e8959d6d 100755 --- a/configure +++ b/configure @@ -499,6 +499,7 @@ case $host_os in SYS="LINUX" define HAVE_MALLOC_H libm="-lm" + LDFLAGS="$LDFLAGS -lrt" ;; gnu*) SYS="HURD" diff --git a/encoder/encoder.c b/encoder/encoder.c index 3e89e52e..503213bb 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -1328,10 +1328,15 @@ static int x264_validate_parameters( x264_t *h, int b_open ) h->param.rc.f_qblur = 0; if( h->param.rc.f_complexity_blur < 0 ) h->param.rc.f_complexity_blur = 0; + if( h->param.sc.i_buffer_size < 0 || h->param.sc.f_speed <= 0 ) + h->param.sc.i_buffer_size = 0; h->param.i_sps_id &= 31; - h->param.i_nal_hrd = x264_clip3( h->param.i_nal_hrd, X264_NAL_HRD_NONE, X264_NAL_HRD_CBR ); + if( PARAM_INTERLACED ) + h->param.b_pic_struct = 1; + + h->param.i_nal_hrd = x264_clip3( h->param.i_nal_hrd, X264_NAL_HRD_NONE, X264_NAL_HRD_FAKE_CBR ); if( h->param.i_nal_hrd && !h->param.rc.i_vbv_buffer_size ) { @@ -1339,13 +1344,15 @@ static int x264_validate_parameters( x264_t *h, int b_open ) h->param.i_nal_hrd = X264_NAL_HRD_NONE; } - if( h->param.i_nal_hrd == X264_NAL_HRD_CBR && - (h->param.rc.i_bitrate != h->param.rc.i_vbv_max_bitrate || !h->param.rc.i_vbv_max_bitrate) ) + if( (h->param.i_nal_hrd == X264_NAL_HRD_CBR || h->param.i_nal_hrd == X264_NAL_HRD_FAKE_CBR) && + (h->param.rc.i_bitrate != h->param.rc.i_vbv_max_bitrate || !h->param.rc.i_vbv_max_bitrate) ) { x264_log( h, X264_LOG_WARNING, "CBR HRD requires constant bitrate\n" ); - h->param.i_nal_hrd = X264_NAL_HRD_VBR; + h->param.i_nal_hrd = h->param.i_nal_hrd == X264_NAL_HRD_CBR ? X264_NAL_HRD_VBR : X264_NAL_HRD_FAKE_CBR; } + h->param.sc.max_preset = x264_clip3( h->param.sc.max_preset, 1, SC_PRESETS ); + if( h->param.i_nal_hrd == X264_NAL_HRD_CBR ) h->param.rc.b_filler = 1; @@ -1580,6 +1587,7 @@ x264_t *x264_encoder_open( x264_param_t *param ) x264_set_aspect_ratio( h, &h->param, 1 ); x264_sps_init( h->sps, h->param.i_sps_id, &h->param ); + h->param.i_profile = h->sps->i_profile_idc; x264_pps_init( h, h->pps, h->param.i_sps_id, &h->param, h->sps ); x264_validate_levels( h, 1 ); @@ -1680,6 +1688,10 @@ x264_t *x264_encoder_open( x264_param_t *param ) mbcmp_init( h ); chroma_dsp_init( h ); + if( h->param.sc.i_buffer_size ) + x264_speedcontrol_new( h ); + + p = buf + sprintf( buf, "using cpu capabilities:" ); for( int i = 0; x264_cpu_names[i].flags; i++ ) { @@ -2674,7 +2686,7 @@ static inline void x264_slice_init( x264_t *h, int i_nal_type, int i_global_qp ) if( h->sps->i_poc_type == 0 ) { h->sh.i_poc = h->fdec->i_poc; - if( PARAM_INTERLACED ) + if( PARAM_INTERLACED && h->pps->b_pic_order ) { h->sh.i_delta_poc_bottom = h->param.b_tff ? 1 : -1; h->sh.i_poc += h->sh.i_delta_poc_bottom == -1; @@ -3853,7 +3865,7 @@ int x264_encoder_encode( x264_t *h, } /* when frame threading is used, buffering period sei is written in x264_encoder_frame_end */ - if( h->i_thread_frames == 1 && h->sps->vui.b_nal_hrd_parameters_present ) + if( h->i_thread_frames == 1 && h->param.i_nal_hrd ) { x264_hrd_fullness( h ); x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); @@ -3946,6 +3958,10 @@ int x264_encoder_encode( x264_t *h, overhead += h->out.nal[h->out.i_nal-1].i_payload + SEI_OVERHEAD; } + /* Init the speed control */ + if( h->param.sc.i_buffer_size ) + x264_speedcontrol_frame( h ); + if( h->fenc->b_keyframe && h->param.b_intra_refresh ) h->i_cpb_delay_pir_offset_next = h->fenc->i_cpb_delay; @@ -4061,7 +4077,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, x264_emms(); /* generate buffering period sei and insert it into place */ - if( h->i_thread_frames > 1 && h->fenc->b_keyframe && h->sps->vui.b_nal_hrd_parameters_present ) + if( h->i_thread_frames > 1 && h->fenc->b_keyframe && h->param.i_nal_hrd ) { x264_hrd_fullness( h ); x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); @@ -4085,7 +4101,6 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, /* Set output picture properties */ pic_out->i_type = h->fenc->i_type; - pic_out->b_keyframe = h->fenc->b_keyframe; pic_out->i_pic_struct = h->fenc->i_pic_struct; @@ -4162,6 +4177,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, } } + if( h->fenc->b_keyframe && h->param.b_intra_refresh ) + h->i_cpb_delay_pir_offset = h->fenc->i_cpb_delay; + /* End bitstream, set output */ *pi_nal = h->out.i_nal; *pp_nal = h->out.nal; @@ -4170,6 +4188,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, x264_noise_reduction_update( h ); + if( h->param.sc.i_buffer_size ) + x264_speedcontrol_frame_end( h ); + /* ---------------------- Compute/Print statistics --------------------- */ x264_thread_sync_stat( h, h->thread[0] ); @@ -4622,6 +4643,7 @@ void x264_encoder_close ( x264_t *h ) /* rc */ x264_ratecontrol_delete( h ); + x264_speedcontrol_delete( h ); /* param */ if( h->param.rc.psz_stat_out ) diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c index 01943850..747b78a4 100644 --- a/encoder/ratecontrol.c +++ b/encoder/ratecontrol.c @@ -171,8 +171,12 @@ struct x264_ratecontrol_t /* hrd stuff */ int initial_cpb_removal_delay; int initial_cpb_removal_delay_offset; - double nrt_first_access_unit; /* nominal removal time */ - double previous_cpb_final_arrival_time; + int64_t nrt_first_access_unit; /* nominal removal time */ + + /* Integer and fractional part of the cpb arrival time */ + int64_t previous_cpb_final_arrival_time_int; + int64_t previous_cpb_final_arrival_time_frac; + uint64_t hrd_multiply_denom; }; @@ -667,46 +671,47 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init ) if( h->param.i_nal_hrd && b_init ) { h->sps->vui.hrd.i_cpb_cnt = 1; - h->sps->vui.hrd.b_cbr_hrd = h->param.i_nal_hrd == X264_NAL_HRD_CBR; + h->sps->vui.hrd.b_cbr_hrd = h->param.i_nal_hrd == X264_NAL_HRD_CBR || h->param.i_nal_hrd == X264_NAL_HRD_FAKE_CBR; h->sps->vui.hrd.i_time_offset_length = 0; #define BR_SHIFT 6 #define CPB_SHIFT 4 - // normalize HRD size and rate to the value / scale notation - h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( vbv_max_bitrate ) - BR_SHIFT, 0, 15 ); - h->sps->vui.hrd.i_bit_rate_value = vbv_max_bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT ); - h->sps->vui.hrd.i_bit_rate_unscaled = h->sps->vui.hrd.i_bit_rate_value << ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT ); - h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( vbv_buffer_size ) - CPB_SHIFT, 0, 15 ); - h->sps->vui.hrd.i_cpb_size_value = vbv_buffer_size >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT ); - h->sps->vui.hrd.i_cpb_size_unscaled = h->sps->vui.hrd.i_cpb_size_value << ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT ); + if( h->param.i_nal_hrd == X264_NAL_HRD_VBR || h->param.i_nal_hrd == X264_NAL_HRD_CBR ) + { + // normalize HRD size and rate to the value / scale notation + h->sps->vui.hrd.i_bit_rate_scale = x264_clip3( x264_ctz( vbv_max_bitrate ) - BR_SHIFT, 0, 15 ); + h->sps->vui.hrd.i_bit_rate_value = vbv_max_bitrate >> ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT ); + h->sps->vui.hrd.i_bit_rate_unscaled = h->sps->vui.hrd.i_bit_rate_value << ( h->sps->vui.hrd.i_bit_rate_scale + BR_SHIFT ); + h->sps->vui.hrd.i_cpb_size_scale = x264_clip3( x264_ctz( vbv_buffer_size ) - CPB_SHIFT, 0, 15 ); + h->sps->vui.hrd.i_cpb_size_value = vbv_buffer_size >> ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT ); + h->sps->vui.hrd.i_cpb_size_unscaled = h->sps->vui.hrd.i_cpb_size_value << ( h->sps->vui.hrd.i_cpb_size_scale + CPB_SHIFT ); - #undef CPB_SHIFT - #undef BR_SHIFT + #undef CPB_SHIFT + #undef BR_SHIFT - // arbitrary - #define MAX_DURATION 0.5 + // arbitrary + #define MAX_DURATION 0.5 - int max_cpb_output_delay = X264_MIN( h->param.i_keyint_max * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick, INT_MAX ); - int max_dpb_output_delay = h->sps->vui.i_max_dec_frame_buffering * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick; - int max_delay = (int)(90000.0 * (double)h->sps->vui.hrd.i_cpb_size_unscaled / h->sps->vui.hrd.i_bit_rate_unscaled + 0.5); + int max_cpb_output_delay = X264_MIN( h->param.i_keyint_max * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick, INT_MAX ); + int max_dpb_output_delay = h->sps->vui.i_max_dec_frame_buffering * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick; + int max_delay = (int)(90000.0 * (double)h->sps->vui.hrd.i_cpb_size_unscaled / h->sps->vui.hrd.i_bit_rate_unscaled + 0.5); - h->sps->vui.hrd.i_initial_cpb_removal_delay_length = 2 + x264_clip3( 32 - x264_clz( max_delay ), 4, 22 ); - h->sps->vui.hrd.i_cpb_removal_delay_length = x264_clip3( 32 - x264_clz( max_cpb_output_delay ), 4, 31 ); - h->sps->vui.hrd.i_dpb_output_delay_length = x264_clip3( 32 - x264_clz( max_dpb_output_delay ), 4, 31 ); + h->sps->vui.hrd.i_initial_cpb_removal_delay_length = 2 + x264_clip3( 32 - x264_clz( max_delay ), 4, 22 ); + h->sps->vui.hrd.i_cpb_removal_delay_length = x264_clip3( 32 - x264_clz( max_cpb_output_delay ), 4, 31 ); + h->sps->vui.hrd.i_dpb_output_delay_length = x264_clip3( 32 - x264_clz( max_dpb_output_delay ), 4, 31 ); - #undef MAX_DURATION + #undef MAX_DURATION - vbv_buffer_size = h->sps->vui.hrd.i_cpb_size_unscaled; - vbv_max_bitrate = h->sps->vui.hrd.i_bit_rate_unscaled; + vbv_buffer_size = h->sps->vui.hrd.i_cpb_size_unscaled; + vbv_max_bitrate = h->sps->vui.hrd.i_bit_rate_unscaled; + } } else if( h->param.i_nal_hrd && !b_init ) { - x264_log( h, X264_LOG_WARNING, "VBV parameters cannot be changed when NAL HRD is in use\n" ); + //x264_log( h, X264_LOG_WARNING, "VBV parameters cannot be changed when NAL HRD is in use\n" ); return; } - h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate; - h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size; if( rc->b_vbv_min_rate ) rc->bitrate = (double)h->param.rc.i_bitrate * kilobit_size; @@ -714,6 +719,12 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init ) rc->vbv_max_rate = vbv_max_bitrate; rc->buffer_size = vbv_buffer_size; rc->single_frame_vbv = rc->buffer_rate * 1.1 > rc->buffer_size; + if( rc->single_frame_vbv && h->param.i_nal_hrd > X264_NAL_HRD_CBR ) + rc->buffer_size = rc->buffer_rate; + + h->sps->vui.hrd.i_bit_rate_unscaled = rc->vbv_max_rate; + h->sps->vui.hrd.i_cpb_size_unscaled = rc->buffer_size; + rc->cbr_decay = 1.0 - rc->buffer_rate / rc->buffer_size * 0.5 * X264_MAX(0, 1.5 - rc->buffer_rate * rc->fps / rc->bitrate); if( h->param.rc.i_rc_method == X264_RC_CRF && h->param.rc.f_rf_constant_max ) @@ -782,11 +793,11 @@ int x264_ratecontrol_new( x264_t *h ) if( h->param.i_nal_hrd ) { uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale; - uint64_t num = 180000; + uint64_t num = 54000000; x264_reduce_fraction64( &num, &denom ); - rc->hrd_multiply_denom = 180000 / num; + rc->hrd_multiply_denom = 54000000 / num; - double bits_required = log2( 180000 / rc->hrd_multiply_denom ) + double bits_required = log2( 54000000 / rc->hrd_multiply_denom ) + log2( h->sps->vui.i_time_scale ) + log2( h->sps->vui.hrd.i_cpb_size_unscaled ); if( bits_required >= 63 ) @@ -1914,42 +1925,60 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler ) *filler = update_vbv( h, bits ); rc->filler_bits_sum += *filler * 8; - if( h->sps->vui.b_nal_hrd_parameters_present ) + if( h->param.i_nal_hrd ) { + uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale / rc->hrd_multiply_denom; + uint64_t multiply_factor = 54000000 / rc->hrd_multiply_denom; + if( h->fenc->i_frame == 0 ) { // access unit initialises the HRD - h->fenc->hrd_timing.cpb_initial_arrival_time = 0; + h->fenc->hrd_timing.cpb_initial_arrival_time = h->fenc->hrd_timing.safe_cpb_initial_arrival_time = 0; rc->initial_cpb_removal_delay = h->initial_cpb_removal_delay; rc->initial_cpb_removal_delay_offset = h->initial_cpb_removal_delay_offset; - h->fenc->hrd_timing.cpb_removal_time = rc->nrt_first_access_unit = (double)rc->initial_cpb_removal_delay / 90000; + h->fenc->hrd_timing.cpb_removal_time = rc->nrt_first_access_unit = (int64_t)h->initial_cpb_removal_delay * 300; } else { - h->fenc->hrd_timing.cpb_removal_time = rc->nrt_first_access_unit + (double)(h->fenc->i_cpb_delay - h->i_cpb_delay_pir_offset) * - h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; + h->fenc->hrd_timing.cpb_removal_time = rc->nrt_first_access_unit + (int64_t)(h->fenc->i_cpb_delay - h->i_cpb_delay_pir_offset) * + 27000000LL * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale; - double cpb_earliest_arrival_time = h->fenc->hrd_timing.cpb_removal_time - (double)rc->initial_cpb_removal_delay / 90000; if( h->fenc->b_keyframe ) { - rc->nrt_first_access_unit = h->fenc->hrd_timing.cpb_removal_time; - rc->initial_cpb_removal_delay = h->initial_cpb_removal_delay; - rc->initial_cpb_removal_delay_offset = h->initial_cpb_removal_delay_offset; + rc->nrt_first_access_unit = h->fenc->hrd_timing.cpb_removal_time; + rc->initial_cpb_removal_delay = h->initial_cpb_removal_delay; + rc->initial_cpb_removal_delay_offset = h->initial_cpb_removal_delay_offset; } - else - cpb_earliest_arrival_time -= (double)rc->initial_cpb_removal_delay_offset / 90000; + + int64_t cpb_earliest_arrival_time = h->fenc->hrd_timing.cpb_removal_time - rc->initial_cpb_removal_delay * 300; + + if( !h->fenc->b_keyframe ) + cpb_earliest_arrival_time -= rc->initial_cpb_removal_delay_offset * 300; + + /* Compare the arrival times using the 27MHz clock which should be acceptable */ + int64_t previous_cpb_arrival_time = rc->previous_cpb_final_arrival_time_int + (multiply_factor * rc->previous_cpb_final_arrival_time_frac + denom) + / (2*denom); if( h->sps->vui.hrd.b_cbr_hrd ) - h->fenc->hrd_timing.cpb_initial_arrival_time = rc->previous_cpb_final_arrival_time; + h->fenc->hrd_timing.cpb_initial_arrival_time = previous_cpb_arrival_time; else - h->fenc->hrd_timing.cpb_initial_arrival_time = X264_MAX( rc->previous_cpb_final_arrival_time, cpb_earliest_arrival_time ); + h->fenc->hrd_timing.cpb_initial_arrival_time = X264_MAX( previous_cpb_arrival_time, cpb_earliest_arrival_time ); + + /* With single frame vbv it's safe to allow the frame to arrive as early as possible */ + h->fenc->hrd_timing.safe_cpb_initial_arrival_time = rc->single_frame_vbv ? cpb_earliest_arrival_time : h->fenc->hrd_timing.cpb_initial_arrival_time; } + int filler_bits = *filler ? X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), *filler )*8 : 0; - // Equation C-6 - h->fenc->hrd_timing.cpb_final_arrival_time = rc->previous_cpb_final_arrival_time = h->fenc->hrd_timing.cpb_initial_arrival_time + - (double)(bits + filler_bits) / h->sps->vui.hrd.i_bit_rate_unscaled; + uint64_t frame_size = (bits + filler_bits) * h->sps->vui.i_time_scale; + uint64_t integer = (multiply_factor * frame_size) / (2*denom); + + rc->previous_cpb_final_arrival_time_int = h->fenc->hrd_timing.cpb_initial_arrival_time + integer; + rc->previous_cpb_final_arrival_time_frac = frame_size - ((integer * (2*denom)) / multiply_factor); + + h->fenc->hrd_timing.cpb_final_arrival_time = rc->previous_cpb_final_arrival_time_int + (multiply_factor * rc->previous_cpb_final_arrival_time_frac + denom) + / (2*denom); - h->fenc->hrd_timing.dpb_output_time = (double)h->fenc->i_dpb_output_delay * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale + + h->fenc->hrd_timing.dpb_output_time = (int64_t)h->fenc->i_dpb_output_delay * 27000000LL * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale + h->fenc->hrd_timing.cpb_removal_time; } @@ -2154,7 +2183,7 @@ void x264_hrd_fullness( x264_t *h ) uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale / rct->hrd_multiply_denom; uint64_t cpb_state = rct->buffer_fill_final; uint64_t cpb_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale; - uint64_t multiply_factor = 180000 / rct->hrd_multiply_denom; + uint64_t multiply_factor = 54000000 / rct->hrd_multiply_denom; if( rct->buffer_fill_final < 0 || rct->buffer_fill_final > cpb_size ) { @@ -2162,8 +2191,8 @@ void x264_hrd_fullness( x264_t *h ) rct->buffer_fill_final < 0 ? "underflow" : "overflow", (float)rct->buffer_fill_final/denom, (float)cpb_size/denom ); } - h->initial_cpb_removal_delay = (multiply_factor * cpb_state + denom) / (2*denom); - h->initial_cpb_removal_delay_offset = (multiply_factor * cpb_size + denom) / (2*denom) - h->initial_cpb_removal_delay; + h->initial_cpb_removal_delay = (multiply_factor * cpb_state + denom) / (600*denom); + h->initial_cpb_removal_delay_offset = (multiply_factor * cpb_size + denom) / (600*denom) - h->initial_cpb_removal_delay; } // provisionally update VBV according to the planned size of all frames currently in progress @@ -2728,8 +2757,9 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next ) COPY(bframe_bits); COPY(initial_cpb_removal_delay); COPY(initial_cpb_removal_delay_offset); + COPY(previous_cpb_final_arrival_time_int); + COPY(previous_cpb_final_arrival_time_frac); COPY(nrt_first_access_unit); - COPY(previous_cpb_final_arrival_time); #undef COPY } //FIXME row_preds[] (not strictly necessary, but would improve prediction) diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h index 9321e726..70640c42 100644 --- a/encoder/ratecontrol.h +++ b/encoder/ratecontrol.h @@ -64,5 +64,12 @@ int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t void x264_threads_distribute_ratecontrol( x264_t *h ); void x264_threads_merge_ratecontrol( x264_t *h ); void x264_hrd_fullness( x264_t *h ); + +// speedcontrol +void x264_speedcontrol_new( x264_t *h ); +void x264_speedcontrol_delete( x264_t *h ); +void x264_speedcontrol_frame( x264_t *h ); +void x264_speedcontrol_frame_end( x264_t *h ); + #endif diff --git a/encoder/set.c b/encoder/set.c index a807becb..cb81bcb3 100644 --- a/encoder/set.c +++ b/encoder/set.c @@ -281,7 +281,7 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param ) } sps->vui.b_vcl_hrd_parameters_present = 0; // we don't support VCL HRD - sps->vui.b_nal_hrd_parameters_present = !!param->i_nal_hrd; + sps->vui.b_nal_hrd_parameters_present = param->i_nal_hrd == X264_NAL_HRD_VBR || param->i_nal_hrd == X264_NAL_HRD_CBR; sps->vui.b_pic_struct_present = param->b_pic_struct; // NOTE: HRD related parts of the SPS are initialised in x264_ratecontrol_init_reconfigurable diff --git a/encoder/speed.c b/encoder/speed.c new file mode 100644 index 00000000..dcc389ab --- /dev/null +++ b/encoder/speed.c @@ -0,0 +1,269 @@ +#include +#include +#include +#include "common/common.h" +#include "common/cpu.h" + +struct x264_speedcontrol_t +{ + // all times are in usec + int64_t timestamp; // when was speedcontrol last invoked + int64_t cpu_time; // time spent encoding the previous frame + int64_t buffer_size; // assumed application-side buffer of frames to be streamed, + int64_t buffer_fill; // where full = we don't have to hurry + int64_t compensation_period; // how quickly we try to return to the target buffer fullness + float fps, spf; + int preset; // which setting was used in the previous frame + int prev_frame; + float cplx_num; // rolling average of estimated spf for preset #0 + float cplx_den; + float cplx_decay; + float dither; + + int first; + int buffer_complete; + + struct + { + int64_t min_buffer, max_buffer; + double avg_preset; + int den; + } stat; +}; + +void x264_speedcontrol_new( x264_t *h ) +{ + x264_speedcontrol_t *sc = h->sc = x264_malloc( sizeof(x264_speedcontrol_t) ); + x264_emms(); + memset( sc, 0, sizeof(x264_speedcontrol_t) ); + + if( h->param.sc.f_speed <= 0 ) + h->param.sc.f_speed = 1; + sc->fps = h->param.i_fps_num / h->param.i_fps_den; + sc->spf = 1e6 / sc->fps; + h->param.sc.i_buffer_size = X264_MAX( 3, h->param.sc.i_buffer_size ); + sc->buffer_size = h->param.sc.i_buffer_size * 1e6 / sc->fps; + sc->buffer_fill = sc->buffer_size * h->param.sc.f_buffer_init; + sc->buffer_fill = x264_clip3( sc->buffer_fill, sc->spf, sc->buffer_size ); + sc->compensation_period = sc->buffer_size/4; + sc->timestamp = x264_mdate(); + sc->preset = -1; + sc->prev_frame = 0; + sc->cplx_num = 3e3; //FIXME estimate initial complexity + sc->cplx_den = .1; + sc->cplx_decay = 1 - 1./h->param.sc.i_buffer_size; + sc->stat.min_buffer = sc->buffer_size; + sc->stat.max_buffer = 0; + sc->first = 1; + sc->buffer_complete = 0; +} + +void x264_speedcontrol_delete( x264_t *h ) +{ + x264_speedcontrol_t *sc = h->sc; + if( !sc ) + return; + x264_log( h, X264_LOG_INFO, "speedcontrol: avg preset=%.3f buffer min=%.3f max=%.3f\n", + sc->stat.avg_preset / sc->stat.den, + (float)sc->stat.min_buffer / sc->buffer_size, + (float)sc->stat.max_buffer / sc->buffer_size ); +// x264_log( h, X264_LOG_INFO, "speedcontrol: avg cplx=%.5f\n", sc->cplx_num / sc->cplx_den ); + x264_free( sc ); +} + +static int dither( x264_speedcontrol_t *sc, float f ) +{ + int i = f; + if( f < 0 ) + i--; + sc->dither += f - i; + if( sc->dither >= 1. ) + { + sc->dither--; + i++; + } + return i; +} + +typedef struct +{ + float time; // relative encoding time, compared to the other presets + int subme; + int me; + int refs; + int mix; + int trellis; + int partitions; + int chromame; + float psy_rd; + float psy_trellis; +} sc_preset_t; + +static const sc_preset_t presets[SC_PRESETS] = +{ +#define I4 X264_ANALYSE_I4x4 +#define I8 X264_ANALYSE_I8x8 +#define P8 X264_ANALYSE_PSUB16x16 +#define B8 X264_ANALYSE_BSUB16x16 +/*0*/ { .time=1.000, .subme=1, .me=X264_ME_DIA, .refs=1, .mix=0, .chromame=0, .trellis=0, .partitions=0, .psy_rd=0 }, +/*1*/ { .time=1.009, .subme=1, .me=X264_ME_DIA, .refs=1, .mix=0, .chromame=0, .trellis=0, .partitions=I8|I4, .psy_rd=0 }, +/*2*/ { .time=1.843, .subme=3, .me=X264_ME_HEX, .refs=1, .mix=0, .chromame=0, .trellis=0, .partitions=I8|I4, .psy_rd=0 }, +/*3*/ { .time=1.984, .subme=5, .me=X264_ME_HEX, .refs=1, .mix=0, .chromame=0, .trellis=0, .partitions=I8|I4, .psy_rd=1.0 }, +/*4*/ { .time=2.289, .subme=6, .me=X264_ME_HEX, .refs=1, .mix=0, .chromame=0, .trellis=0, .partitions=I8|I4, .psy_rd=1.0 }, +/*5*/ { .time=3.113, .subme=6, .me=X264_ME_HEX, .refs=1, .mix=0, .chromame=0, .trellis=1, .partitions=I8|I4, .psy_rd=1.0 }, +/*6*/ { .time=3.400, .subme=6, .me=X264_ME_HEX, .refs=2, .mix=0, .chromame=0, .trellis=1, .partitions=I8|I4, .psy_rd=1.0 }, +/*7*/ { .time=3.755, .subme=7, .me=X264_ME_HEX, .refs=2, .mix=0, .chromame=0, .trellis=1, .partitions=I8|I4, .psy_rd=1.0 }, +/*8*/ { .time=4.592, .subme=7, .me=X264_ME_HEX, .refs=2, .mix=0, .chromame=0, .trellis=1, .partitions=I8|I4|P8|B8, .psy_rd=1.0 }, +/*9*/ { .time=4.730, .subme=7, .me=X264_ME_HEX, .refs=3, .mix=0, .chromame=0, .trellis=1, .partitions=I8|I4|P8|B8, .psy_rd=1.0 }, +/*10*/ { .time=5.453, .subme=8, .me=X264_ME_HEX, .refs=3, .mix=0, .chromame=0, .trellis=1, .partitions=I8|I4|P8|B8, .psy_rd=1.0 }, +/*11*/ { .time=8.277, .subme=8, .me=X264_ME_UMH, .refs=3, .mix=1, .chromame=1, .trellis=1, .partitions=I8|I4|P8|B8, .psy_rd=1.0 }, +/*12*/ { .time=8.410, .subme=8, .me=X264_ME_UMH, .refs=4, .mix=1, .chromame=1, .trellis=1, .partitions=I8|I4|P8|B8, .psy_rd=1.0 } +}; + +static void apply_preset( x264_t *h, int preset ) +{ + x264_speedcontrol_t *sc = h->sc; + preset = x264_clip3( preset, 0, h->param.sc.max_preset-1 ); + if( 0 ) + { + const sc_preset_t *s = &presets[preset]; + x264_param_t p = h->param; + + p.i_frame_reference = s->refs; + p.analyse.inter = s->partitions; + p.analyse.i_subpel_refine = s->subme; + p.analyse.i_me_method = s->me; + p.analyse.i_trellis = s->trellis; + p.analyse.b_mixed_references = s->mix; + p.analyse.b_chroma_me = s->chromame; + p.analyse.f_psy_rd = s->psy_rd; + p.analyse.f_psy_trellis = s->psy_trellis; + x264_encoder_reconfig( h, &p ); + sc->preset = preset; + x264_log( h, X264_LOG_DEBUG, "Applying speedcontrol preset %d.\n", preset ); + } +} + +void x264_speedcontrol_frame_end( x264_t *h ) +{ + x264_speedcontrol_t *sc = h->sc; + if( h->param.sc.b_alt_timer ) + sc->cpu_time = x264_mdate() - sc->timestamp; +} + +void x264_speedcontrol_frame( x264_t *h ) +{ + x264_speedcontrol_t *sc = h->sc; + int64_t t, delta_t, delta_buffer; + int delta_f; + + x264_emms(); + + // update buffer state after encoding and outputting the previous frame(s) + if( sc->first ) + { + t = sc->timestamp = x264_mdate(); + sc->first = 0; + } + else + t = x264_mdate(); + + delta_f = h->i_frame - sc->prev_frame; + delta_t = t - sc->timestamp; + delta_buffer = delta_f * sc->spf / h->param.sc.f_speed - delta_t; + if( !sc->buffer_complete ) + sc->buffer_fill += delta_buffer; + sc->prev_frame = h->i_frame; + sc->timestamp = t; + + // update the time predictor + if( delta_f ) + { + int cpu_time = h->param.sc.b_alt_timer ? sc->cpu_time : delta_t; + float decay = powf( sc->cplx_decay, delta_f ); + sc->cplx_num *= decay; + sc->cplx_den *= decay; + sc->cplx_num += cpu_time / presets[sc->preset].time; + sc->cplx_den += delta_f; + + sc->stat.avg_preset += sc->preset * delta_f; + sc->stat.den += delta_f; + } + sc->stat.min_buffer = X264_MIN( sc->buffer_fill, sc->stat.min_buffer ); + sc->stat.max_buffer = X264_MAX( sc->buffer_fill, sc->stat.max_buffer ); + + if( sc->buffer_fill > sc->buffer_size ) // oops, cpu was idle + { + // not really an error, but we'll warn for debugging purposes + static int64_t idle_t = 0, print_interval = 0; + idle_t += sc->buffer_fill - sc->buffer_size; + if( t - print_interval > 1e6 ) + { + x264_log( h, X264_LOG_DEBUG, "speedcontrol idle (%.6f sec)\n", idle_t/1e6 ); + print_interval = t; + idle_t = 0; + } + sc->buffer_fill = sc->buffer_size; + } + else if( sc->buffer_fill < 0 && delta_buffer < 0 ) // oops, we're late + { + // don't clip fullness to 0; we'll hope the real buffer was bigger than + // specified, and maybe we can catch up. if the application had to drop + // frames, then it should override the buffer fullness (FIXME implement this). + x264_log( h, X264_LOG_WARNING, "speedcontrol underflow (%.6f sec)\n", sc->buffer_fill/1e6 ); + } + + { + // pick the preset that should return the buffer to 3/4-full within a time + // specified by compensation_period + float target = sc->spf / h->param.sc.f_speed + * (sc->buffer_fill + sc->compensation_period) + / (sc->buffer_size*3/4 + sc->compensation_period); + float cplx = sc->cplx_num / sc->cplx_den; + float set, t0, t1; + float filled = (float) sc->buffer_fill / sc->buffer_size; + int i; + t0 = presets[0].time * cplx; + for( i=1;; i++ ) + { + t1 = presets[i].time * cplx; + if( t1 >= target || i == h->param.sc.max_preset-1 ) + break; + t0 = t1; + } + // linear interpolation between states + set = i-1 + (target - t0) / (t1 - t0); + // Even if our time estimations in the SC_PRESETS array are off + // this will push us towards our target fullness + set += (20 * (filled-0.75)); + set = x264_clip3f( set, 0 , h->param.sc.max_preset-1 ); + apply_preset( h, dither( sc, set ) ); + + // FIXME + if (h->param.i_log_level >= X264_LOG_DEBUG) + { + static float cpu, wall, tgt, den; + float decay = 1-1/100.; + cpu = cpu*decay + sc->cpu_time; + wall = wall*decay + delta_t; + tgt = tgt*decay + target; + den = den*decay + 1; + x264_log( h, X264_LOG_DEBUG, "speed: %.2f %d[%.5f] (t/c/w: %6.0f/%6.0f/%6.0f = %.4f) fps=%.2f\r", + set, sc->preset, (float)sc->buffer_fill / sc->buffer_size, + tgt/den, cpu/den, wall/den, cpu/wall, 1e6*den/wall ); + } + } + +} + +void x264_speedcontrol_sync( x264_t *h, float f_buffer_fill, int i_buffer_size, int buffer_complete ) +{ + x264_speedcontrol_t *sc = h->sc; + if( !h->param.sc.i_buffer_size ) + return; + if( i_buffer_size ) + h->param.sc.i_buffer_size = X264_MAX( 3, h->param.sc.i_buffer_size ); + sc->buffer_size = h->param.sc.i_buffer_size * 1e6 / sc->fps; + sc->buffer_fill = sc->buffer_size * f_buffer_fill; + sc->buffer_complete = !!buffer_complete; +} diff --git a/x264.c b/x264.c index 5fe7eaef..e1670a53 100644 --- a/x264.c +++ b/x264.c @@ -756,6 +756,14 @@ static void help( x264_param_t *defaults, int longhelp ) " K= depending on open-gop setting\n" " QPs are restricted by qpmin/qpmax.\n" ); H1( "\n" ); + + H1( "Speedcontrol:\n" ); + H1( "\n" ); + H1( " --speed Automatically adjust other options to achieve this\n" ); + H1( " fraction of realtime.\n" ); + H1( " --speed-bufsize Averaging period for speed. (in frames) [%d]\n", defaults->sc.i_buffer_size ); + H1( "\n" ); + H1( "Analysis:\n" ); H1( "\n" ); H1( " -A, --partitions Partitions to consider [\"p8x8,b8x8,i8x8,i4x4\"]\n" @@ -1096,6 +1104,8 @@ static struct option long_options[] = { "cplxblur", required_argument, NULL, 0 }, { "zones", required_argument, NULL, 0 }, { "qpfile", required_argument, NULL, OPT_QPFILE }, + { "speed", required_argument, NULL, 0 }, + { "speed-bufsize", required_argument, NULL, 0 }, { "threads", required_argument, NULL, 0 }, { "lookahead-threads", required_argument, NULL, 0 }, { "sliced-threads", no_argument, NULL, 0 }, diff --git a/x264.h b/x264.h index 8b10b7a1..b71c7805 100644 --- a/x264.h +++ b/x264.h @@ -213,7 +213,7 @@ static const char * const x264_colorprim_names[] = { "", "bt709", "undef", "", " static const char * const x264_transfer_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "linear", "log100", "log316", "iec61966-2-4", "bt1361e", "iec61966-2-1", "bt2020-10", "bt2020-12", 0 }; static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", "", "fcc", "bt470bg", "smpte170m", "smpte240m", "YCgCo", "bt2020nc", "bt2020c", 0 }; -static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 }; +static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", "fakevbr", "fakecbr", 0 }; /* Colorspace type */ #define X264_CSP_MASK 0x00ff /* */ @@ -260,6 +260,8 @@ static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 }; #define X264_NAL_HRD_NONE 0 #define X264_NAL_HRD_VBR 1 #define X264_NAL_HRD_CBR 2 +#define X264_NAL_HRD_FAKE_VBR 3 +#define X264_NAL_HRD_FAKE_CBR 4 /* Intra DC Precision */ #define X264_INTRA_DC_8_BIT 0 @@ -313,6 +315,7 @@ typedef struct x264_param_t int i_csp; /* CSP of encoded bitstream */ int i_level_idc; int i_frame_total; /* number of frames to encode if known, else 0 */ + int i_profile; /* Output Only */ int b_mpeg2; /* encode MPEG-2 instead of H.264 */ @@ -491,6 +494,16 @@ typedef struct x264_param_t MPEG-2: Use extra_sei to write appropriate user_data instead */ int i_frame_packing; + /* Speed control parameters */ + struct + { + float f_speed; /* ratio from realtime */ + int i_buffer_size; /* number of frames */ + float f_buffer_init; /* fraction of size */ + int b_alt_timer; /* use a different method of measuring encode time */ + int max_preset; /* maximum number of speedcontrol presets to use */ + } sc; + /* Muxing parameters */ int b_aud; /* generate access unit delimiters */ int b_repeat_headers; /* put SPS/PPS before each keyframe */ @@ -760,11 +773,11 @@ enum pic_struct_e typedef struct { - double cpb_initial_arrival_time; - double cpb_final_arrival_time; - double cpb_removal_time; - - double dpb_output_time; + int64_t cpb_initial_arrival_time; + int64_t safe_cpb_initial_arrival_time; + int64_t cpb_final_arrival_time; + int64_t cpb_removal_time; + int64_t dpb_output_time; } x264_hrd_t; /* Arbitrary user SEI: @@ -1022,4 +1035,9 @@ void x264_encoder_intra_refresh( x264_t * ); * Returns 0 on success, negative on failure. */ int x264_encoder_invalidate_reference( x264_t *, int64_t pts ); +/* x264_speedcontrol_sync: + * override speedcontrol's internal clock */ +void x264_speedcontrol_sync( x264_t *, float f_buffer_fill, int i_buffer_size, int buffer_complete ); + + #endif