-
Notifications
You must be signed in to change notification settings - Fork 53
/
minih264e.h
11714 lines (10624 loc) · 401 KB
/
minih264e.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#ifndef MINIH264_H
#define MINIH264_H
/*
https://github.com/lieff/minih264
To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide.
This software is distributed without any warranty.
See <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifdef __cplusplus
extern "C" {
#endif
#ifndef H264E_SVC_API
# define H264E_SVC_API 1
#endif
#ifndef H264E_MAX_THREADS
# define H264E_MAX_THREADS 4
#endif
/**
* API return error codes
*/
#define H264E_STATUS_SUCCESS 0
#define H264E_STATUS_BAD_ARGUMENT 1
#define H264E_STATUS_BAD_PARAMETER 2
#define H264E_STATUS_BAD_FRAME_TYPE 3
#define H264E_STATUS_SIZE_NOT_MULTIPLE_16 4
#define H264E_STATUS_SIZE_NOT_MULTIPLE_2 5
#define H264E_STATUS_BAD_LUMA_ALIGN 6
#define H264E_STATUS_BAD_LUMA_STRIDE 7
#define H264E_STATUS_BAD_CHROMA_ALIGN 8
#define H264E_STATUS_BAD_CHROMA_STRIDE 9
/**
* Frame type definitions
* - Sequence must start with key (IDR) frame.
* - P (Predicted) frames are most efficiently coded
* - Dropable frames may be safely removed from bitstream, and used
* for frame rate scalability
* - Golden and Recovery frames used for error recovery. These
* frames uses "long-term reference" for prediction, and
* can be decoded if P frames sequence is interrupted.
* They acts similarly to key frame, but coded more efficiently.
*
* Type Refers to Saved as long-term Saved as short-term
* ---------------------------------------------------------------
* Key (IDR) : N/A Yes Yes |
* Golden : long-term Yes Yes |
* Recovery : long-term No Yes |
* P : short-term No Yes |
* Droppable : short-term No No |
* |
* Example sequence: K P P G D P R D K |
* long-term reference 1K 1K 1K 4G 4G 4G 4G 4G 9K |
* / \ / \ / |
* coded frame 1K 2P 3P 4G 5D 6P 7R 8D 9K |
* \ / \ / \ \ / / \ \ / \ |
* short-term reference 1K 2P 3P 4G 4G 6P 7R 7R 9K |
*
*/
#define H264E_FRAME_TYPE_DEFAULT 0 // Frame type set according to GOP size
#define H264E_FRAME_TYPE_KEY 6 // Random access point: SPS+PPS+Intra frame
#define H264E_FRAME_TYPE_I 5 // Intra frame: updates long & short-term reference
#define H264E_FRAME_TYPE_GOLDEN 4 // Use and update long-term reference
#define H264E_FRAME_TYPE_RECOVERY 3 // Use long-term reference, updates short-term reference
#define H264E_FRAME_TYPE_P 2 // Use and update short-term reference
#define H264E_FRAME_TYPE_DROPPABLE 1 // Use short-term reference, don't update anything
#define H264E_FRAME_TYPE_CUSTOM 99 // Application specifies reference frame
/**
* Speed preset index.
* Currently used values are 0, 1, 8 and 9
*/
#define H264E_SPEED_SLOWEST 0 // All coding tools enabled, including denoise filter
#define H264E_SPEED_BALANCED 5
#define H264E_SPEED_FASTEST 10 // Minimum tools enabled
/**
* Creations parameters
*/
typedef struct H264E_create_param_tag
{
// Frame width: must be multiple of 16
int width;
// Frame height: must be multiple of 16
int height;
// GOP size == key frame period
// If 0: no key frames generated except 1st frame (infinite GOP)
// If 1: Only intra-frames produced
int gop;
// Video Buffer Verifier size, bits
// If 0: VBV model would be disabled
// Note, that this value defines Level,
int vbv_size_bytes;
// If set: transparent frames produced on VBV overflow
// If not set: VBV overflow ignored, produce bitrate bigger than specified
int vbv_overflow_empty_frame_flag;
// If set: keep minimum bitrate using stuffing, prevent VBV underflow
// If not set: ignore VBV underflow, produce bitrate smaller than specified
int vbv_underflow_stuffing_flag;
// If set: control bitrate at macroblock-level (better bitrate precision)
// If not set: control bitrate at frame-level (better quality)
int fine_rate_control_flag;
// If set: don't change input, but allocate additional frame buffer
// If not set: use input as a scratch
int const_input_flag;
// If 0: golden, recovery, and custom frames are disabled
// If >0: Specifies number of persistent frame buffer's used
int max_long_term_reference_frames;
int enableNEON;
// If set: enable temporal noise suppression
int temporal_denoise_flag;
int sps_id;
#if H264E_SVC_API
// SVC extension
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Number of SVC layers:
// 1 = AVC
// 2 = SVC with 2-layers of spatial scalability
int num_layers;
// If set, SVC extension layer will use predictors from base layer
// (sometimes can slightly increase efficiency)
int inter_layer_pred_flag;
#endif
#if H264E_MAX_THREADS
// Multi-thread extension
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Maximum threads, supported by the callback
int max_threads;
// Opaque token, passed to callback
void *token;
// Application-supplied callback function.
// This callback runs given jobs, by calling provided job_func(), passing
// job_data[i] to each one.
//
// The h264e_thread_pool_run() can be used here, example:
//
// int max_threads = 4;
// void *thread_pool = h264e_thread_pool_init(max_threads);
//
// H264E_create_param_t par;
// par.max_threads = max_threads;
// par.token = thread_pool;
// par.run_func_in_thread = h264e_thread_pool_run;
//
// The reason to use double callbacks is to avoid mixing portable and
// system-dependent code, and to avoid close() function in the encoder API.
//
void (*run_func_in_thread)(void *token, void (*job_func)(void*), void *job_data[], int njobs);
#endif
} H264E_create_param_t;
/**
* Run-time parameters
*/
typedef struct H264E_run_param_tag
{
// Variable, indicating speed/quality tradeoff
// 0 means best quality
int encode_speed;
// Frame type override: one of H264E_FRAME_TYPE_* values
// if 0: GOP pattern defined by create_param::gop value
int frame_type;
// Used only if frame_type == H264E_FRAME_TYPE_CUSTOM
// Reference long-term frame index [1..max_long_term_reference_frames]
// 0 = use previous frame (short-term)
// -1 = IDR frame, kill all long-term frames
int long_term_idx_use;
// Used only if frame_type == H264E_FRAME_TYPE_CUSTOM
// Store decoded frame in long-term buffer with given index in the
// range [1..max_long_term_reference_frames]
// 0 = save to short-term buffer
// -1 = Don't save frame (dropable)
int long_term_idx_update;
// Target frame size. Typically = bitrate/framerate
int desired_frame_bytes;
// Minimum quantizer value, 10 indicates good quality
// range: [10; qp_max]
int qp_min;
// Maximum quantizer value, 51 indicates very bad quality
// range: [qp_min; 51]
int qp_max;
// Desired NALU size. NALU produced as soon as it's size exceed this value
// if 0: frame would be coded with a single NALU
int desired_nalu_bytes;
// Optional NALU notification callback, called by the encoder
// as soon as NALU encoding complete.
void (*nalu_callback)(
const unsigned char *nalu_data, // Coded NALU data, w/o start code
int sizeof_nalu_data, // Size of NALU data
void *token // optional transparent token
);
// token to pass to NALU callback
void *nalu_callback_token;
} H264E_run_param_t;
/**
* Planar YUV420 descriptor
*/
typedef struct H264E_io_yuv_tag
{
// Pointers to 3 pixel planes of YUV image
unsigned char *yuv[3];
// Stride for each image plane
int stride[3];
} H264E_io_yuv_t;
typedef struct H264E_persist_tag H264E_persist_t;
typedef struct H264E_scratch_tag H264E_scratch_t;
/**
* Return persistent and scratch memory requirements
* for given encoding options.
*
* Return value:
* -zero in case of success
* -error code (H264E_STATUS_*), if fails
*
* example:
*
* int sizeof_persist, sizeof_scratch, error;
* H264E_persist_t * enc;
* H264E_scratch_t * scratch;
*
* error = H264E_sizeof(param, &sizeof_persist, &sizeof_scratch);
* if (!error)
* {
* enc = malloc(sizeof_persist);
* scratch = malloc(sizeof_scratch);
* error = H264E_init(enc, param);
* }
*/
int H264E_sizeof(
const H264E_create_param_t *param, ///< Encoder creation parameters
int *sizeof_persist, ///< [OUT] Size of persistent RAM
int *sizeof_scratch ///< [OUT] Size of scratch RAM
);
/**
* Initialize encoding session
*
* Return value:
* -zero in case of success
* -error code (H264E_STATUS_*), if fails
*/
int H264E_init(
H264E_persist_t *enc, ///< Encoder object
const H264E_create_param_t *param ///< Encoder creation parameters
);
/**
* Encode single video frame
*
* Output buffer is in the scratch RAM
*
* Return value:
* -zero in case of success
* -error code (H264E_STATUS_*), if fails
*/
int H264E_encode(
H264E_persist_t *enc, ///< Encoder object
H264E_scratch_t *scratch, ///< Scratch memory
const H264E_run_param_t *run_param, ///< run-time parameters
H264E_io_yuv_t *frame, ///< Input video frame
unsigned char **coded_data, ///< [OUT] Pointer to coded data
int *sizeof_coded_data ///< [OUT] Size of coded data
);
/**
* This is a "hack" function to set internal rate-control state
* Note that encoder allows application to completely override rate-control,
* so this function should be used only by lazy coders, who just want to change
* VBV size, without implementing custom rate-control.
*
* Note that H.264 level defined by VBV size on initialization.
*/
void H264E_set_vbv_state(
H264E_persist_t *enc, ///< Encoder object
int vbv_size_bytes, ///< New VBV size
int vbv_fullness_bytes ///< New VBV fulness, -1 = no change
);
#ifdef __cplusplus
}
#endif
#endif //MINIH264_H
#if defined(MINIH264_IMPLEMENTATION) && !defined(MINIH264_IMPLEMENTATION_GUARD)
#define MINIH264_IMPLEMENTATION_GUARD
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
/************************************************************************/
/* Build configuration */
/************************************************************************/
#ifndef H264E_ENABLE_DENOISE
#define H264E_ENABLE_DENOISE 1 // Build-in noise supressor
#endif
#ifndef MAX_LONG_TERM_FRAMES
#define MAX_LONG_TERM_FRAMES 8 // Max long-term frames count
#endif
#if !defined(MINIH264_ONLY_SIMD) && (defined(_M_X64) || defined(_M_ARM64) || defined(__x86_64__) || defined(__aarch64__))
/* x64 always have SSE2, arm64 always have neon, no need for generic code */
#define MINIH264_ONLY_SIMD
#endif /* SIMD checks... */
#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__))
#define H264E_ENABLE_SSE2 1
#if defined(_MSC_VER)
#include <intrin.h>
#else
#include <emmintrin.h>
#endif
#elif defined(__ARM_NEON) || defined(__aarch64__)
#define H264E_ENABLE_NEON 1
#include <arm_neon.h>
#else
#ifdef MINIH264_ONLY_SIMD
#error MINIH264_ONLY_SIMD used, but SSE/NEON not enabled
#endif
#endif
#ifndef MINIH264_ONLY_SIMD
#define H264E_ENABLE_PLAIN_C 1
#endif
#define H264E_CONFIGS_COUNT ((H264E_ENABLE_SSE2) + (H264E_ENABLE_PLAIN_C) + (H264E_ENABLE_NEON))
#if defined(__ARMCC_VERSION) || defined(_WIN32) || defined(__EMSCRIPTEN__)
#define __BYTE_ORDER 0
#define __BIG_ENDIAN 1
#elif defined(__linux__) || defined(__CYGWIN__)
#include <endian.h>
#elif defined(__APPLE__)
#include <libkern/OSByteOrder.h>
#define __BYTE_ORDER BYTE_ORDER
#define __BIG_ENDIAN BIG_ENDIAN
#elif defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
#include <sys/endian.h>
#else
#error platform not supported
#endif
#if defined(__aarch64__) && defined(__clang__)
// uintptr_t broken with aarch64 clang on ubuntu 18
#define uintptr_t unsigned long
#endif
#if defined(__arm__) && defined(__clang__)
#include <arm_acle.h>
#elif defined(__arm__) && defined(__GNUC__) && !defined(__ARMCC_VERSION)
static inline unsigned int __usad8(unsigned int val1, unsigned int val2)
{
unsigned int result;
__asm__ volatile ("usad8 %0, %1, %2\n\t"
: "=r" (result)
: "r" (val1), "r" (val2));
return result;
}
static inline unsigned int __usada8(unsigned int val1, unsigned int val2, unsigned int val3)
{
unsigned int result;
__asm__ volatile ("usada8 %0, %1, %2, %3\n\t"
: "=r" (result)
: "r" (val1), "r" (val2), "r" (val3));
return result;
}
static inline unsigned int __sadd16(unsigned int val1, unsigned int val2)
{
unsigned int result;
__asm__ volatile ("sadd16 %0, %1, %2\n\t"
: "=r" (result)
: "r" (val1), "r" (val2));
return result;
}
static inline unsigned int __ssub16(unsigned int val1, unsigned int val2)
{
unsigned int result;
__asm__ volatile ("ssub16 %0, %1, %2\n\t"
: "=r" (result)
: "r" (val1), "r" (val2));
return result;
}
static inline unsigned int __clz(unsigned int val1)
{
unsigned int result;
__asm__ volatile ("clz %0, %1\n\t"
: "=r" (result)
: "r" (val1));
return result;
}
#endif
#ifdef __cplusplus
extern "C" {
#endif //__cplusplus
#if defined(_MSC_VER) && _MSC_VER >= 1400
# define h264e_restrict __restrict
#elif defined(__arm__)
# define h264e_restrict __restrict
#else
# define h264e_restrict
#endif
#if defined(_MSC_VER)
# define ALIGN(n) __declspec(align(n))
# define ALIGN2(n)
#else
# define ALIGN(n)
# define ALIGN2(n) __attribute__((aligned(n)))
#endif
#if __GNUC__ || __clang__
typedef int int_u __attribute__ ((__aligned__ (1)));
#else
typedef int int_u;
#endif
#ifndef MAX
# define MAX(x, y) ((x) > (y) ? (x) : (y))
#endif
#ifndef MIN
# define MIN(x, y) ((x) < (y) ? (x) : (y))
#endif
#ifndef ABS
# define ABS(x) ((x) >= 0 ? (x) : -(x))
#endif
#define IS_ALIGNED(p, n) (!((uintptr_t)(p) & (uintptr_t)((n) - 1)))
// bit-stream
#if __BYTE_ORDER == __BIG_ENDIAN
# define SWAP32(x) (uint32_t)(x)
#else
#ifdef _MSC_VER
# define SWAP32(x) _byteswap_ulong(x)
#elif defined(__GNUC__) || defined(__clang__)
# define SWAP32(x) __builtin_bswap32(x)
#else
# define SWAP32(x) (uint32_t)((((x) >> 24) & 0xFF) | (((x) >> 8) & 0xFF00) | (((x) << 8) & 0xFF0000) | ((x & 0xFF) << 24))
#endif
#endif
#define BS_OPEN(bs) uint32_t cache = bs->cache; int shift = bs->shift; uint32_t *buf = bs->buf;
#define BS_CLOSE(bs) bs->cache = cache; bs->shift = shift; bs->buf = buf;
#define BS_PUT(n, val) \
if ((shift -= n) < 0) \
{ \
cache |= val >> -shift; \
*buf++ = SWAP32(cache); \
shift += 32; \
cache = 0; \
} \
cache |= (uint32_t)val << shift;
// Quantizer-dequantizer modes
#define QDQ_MODE_INTRA_4 2 // intra 4x4
#define QDQ_MODE_INTER 8 // inter
#define QDQ_MODE_INTRA_16 (8 + 1) // intra 16x61
#define QDQ_MODE_CHROMA (4 + 1) // chroma
// put most frequently used bits to lsb, to use these as look-up tables
#define AVAIL_TR 8
#define AVAIL_TL 4
#define AVAIL_L 2
#define AVAIL_T 1
typedef uint8_t pix_t;
typedef uint32_t bs_item_t;
/**
* Output bitstream
*/
typedef struct
{
int shift; // bit position in the cache
uint32_t cache; // bit cache
bs_item_t *buf; // current position
bs_item_t *origin; // initial position
} bs_t;
/**
* Tuple for motion vector, or height/width representation
*/
typedef union
{
struct
{
int16_t x; // horizontal or width
int16_t y; // vertical or height
} s;
int32_t u32; // packed representation
} point_t;
/**
* Rectangle
*/
typedef struct
{
point_t tl; // top-left corner
point_t br; // bottom-right corner
} rectangle_t;
/**
* Quantized/dequantized representation for 4x4 block
*/
typedef struct
{
int16_t qv[16]; // quantized coefficient
int16_t dq[16]; // dequantized
} quant_t;
/**
* Scratch RAM, used only for current MB encoding
*/
typedef struct H264E_scratch_tag
{
pix_t mb_pix_inp[256]; // Input MB (cached)
pix_t mb_pix_store[4*256]; // Prediction variants
// Quantized/dequantized
int16_t dcy[16]; // Y DC
quant_t qy[16]; // Y 16x4x4 blocks
int16_t dcu[16]; // U DC: 4 used + align
quant_t qu[4]; // U 4x4x4 blocks
int16_t dcv[16]; // V DC: 4 used + align
quant_t qv[4]; // V 4x4x4 blocks
// Quantized DC:
int16_t quant_dc[16]; // Y
int16_t quant_dc_u[4]; // U
int16_t quant_dc_v[4]; // V
uint16_t nz_mask; // Bit flags for non-zero 4x4 blocks
} scratch_t;
/**
* Deblock filter frame context
*/
typedef struct
{
// Motion vectors for 4x4 MB internal sub-blocks, top and left border,
// 5x5 array without top-left cell:
// T0 T1 T2 T4
// L0 i0 i1 i2 i3
// L1 ...
// ......
//
point_t df_mv[5*5 - 1]; // MV for current macroblock and neighbors
uint8_t *df_qp; // QP for current row of macroblocks
int8_t *mb_type; // Macroblock type for current row of macroblocks
uint32_t nzflag; // Bit flags for non-zero 4x4 blocks (left neighbors)
// Huffman and deblock uses different nnz...
uint8_t *df_nzflag; // Bit flags for non-zero 4x4 blocks (top neighbors), only 4 bits used
} deblock_filter_t;
/**
* Deblock filter parameters for current MB
*/
typedef struct
{
uint32_t strength32[4*2]; // Strength for 4 colums and 4 rows
uint8_t tc0[16*2]; // TC0 parameter for 4 colums and 4 rows
uint8_t alpha[2*2]; // alpha for border/internals
uint8_t beta[2*2]; // beta for border/internals
} deblock_params_t;
/**
* Persistent RAM
*/
typedef struct H264E_persist_tag
{
H264E_create_param_t param; // Copy of create parameters
H264E_io_yuv_t inp; // Input picture
struct
{
int pic_init_qp; // Initial QP
} sps;
struct
{
int num; // Frame number
int nmbx; // Frame width, macroblocks
int nmby; // Frame height, macroblocks
int nmb; // Number of macroblocks in frame
int w; // Frame width, pixels
int h; // Frame height, pixels
rectangle_t mv_limit; // Frame MV limits = frame + border extension
rectangle_t mv_qpel_limit; // Reduced MV limits for qpel interpolation filter
int cropping_flag; // Cropping indicator
} frame;
struct
{
int type; // Current slice type (I/P)
int start_mb_num; // # of 1st MB in the current slice
} slice;
struct
{
int x; // MB x position (in MB's)
int y; // MB y position (in MB's)
int num; // MB number
int skip_run; // Skip run count
// according to table 7-13
// -1 = skip, 0 = P16x16, 1 = P16x8, 2=P8x16, 3 = P8x8, 5 = I4x4, >=6 = I16x16
int type; // MB type
struct
{
int pred_mode_luma; // Intra 16x16 prediction mode
} i16;
int8_t i4x4_mode[16]; // Intra 4x4 prediction modes
int cost; // Best coding cost
int avail; // Neighbor availability flags
point_t mvd[16]; // Delta-MV for each 4x4 sub-part
point_t mv[16]; // MV for each 4x4 sub-part
point_t mv_skip_pred; // Skip MV predictor
} mb;
H264E_io_yuv_t ref; // Current reference picture
H264E_io_yuv_t dec; // Reconstructed current macroblock
#if H264E_ENABLE_DENOISE
H264E_io_yuv_t denoise; // Noise suppression filter
#endif
unsigned char *lt_yuv[MAX_LONG_TERM_FRAMES][3]; // Long-term reference pictures
unsigned char lt_used[MAX_LONG_TERM_FRAMES]; // Long-term "used" flags
struct
{
int qp; // Current QP
int vbv_bits; // Current VBV fullness, bits
int qp_smooth; // Averaged QP
int dqp_smooth; // Adaptive QP adjustment, account for "compressibility"
int max_dqp; // Worst-case DQP, for long-term reference QP adjustment
int bit_budget; // Frame bit budget
int prev_qp; // Previous MB QP
int prev_err; // Accumulated coded size error
int stable_count; // Stable/not stable state machine
int vbv_target_level; // Desired VBV fullness after frame encode
// Quantizer data, passed to low-level functions
// layout:
// multiplier_quant0, multiplier_dequant0,
// multiplier_quant2, multiplier_dequant2,
// multiplier_quant1, multiplier_dequant1,
// rounding_factor_pos,
// zero_thr_inter
// zero_thr_inter2
// ... and same data for chroma
//uint16_t qdat[2][(6 + 4)];
#define OFFS_RND_INTER 6
#define OFFS_RND_INTRA 7
#define OFFS_THR_INTER 8
#define OFFS_THR2_INTER 9
#define OFFS_THR_1_OFF 10
#define OFFS_THR_2_OFF 18
#define OFFS_QUANT_VECT 26
#define OFFS_DEQUANT_VECT 34
//struct
//{
// uint16_t qdq[6];
// uint16_t rnd[2]; // inter/intra
// uint16_t thr[2]; // thresholds
// uint16_t zero_thr[2][8];
// uint16_t qfull[8];
// uint16_t dqfull[8];
//} qdat[2];
uint16_t qdat[2][6 + 2 + 2 + 8 + 8 + 8 + 8];
} rc;
deblock_filter_t df; // Deblock filter
// Speed/quality trade-off
struct
{
int disable_deblock; // Disable deblock filter flags
} speed;
int most_recent_ref_frame_idx; // Last updated long-term reference
// predictors contexts
point_t *mv_pred; // MV for left&top 4x4 blocks
uint8_t *nnz; // Number of non-zero coeffs per 4x4 block for left&top
int32_t *i4x4mode; // Intra 4x4 mode for left&top
pix_t *top_line; // left&top neighbor pixels
// output data
uint8_t *out; // Output data storage (pointer to scratch RAM!)
unsigned int out_pos; // Output byte position
bs_t bs[1]; // Output bitbuffer
scratch_t *scratch; // Pointer to scratch RAM
#if H264E_MAX_THREADS > 1
scratch_t *scratch_store[H264E_MAX_THREADS]; // Pointer to scratch RAM
int sizeof_scaratch;
#endif
H264E_run_param_t run_param; // Copy of run-time parameters
// Consecutive IDR's must have different idr_pic_id,
// unless there are some P between them
uint8_t next_idr_pic_id;
pix_t *pbest; // Macroblock best predictor
pix_t *ptest; // Macroblock predictor under test
point_t mv_clusters[2]; // MV clusterization for prediction
// Flag to track short-term reference buffer, for MMCO 1 command
int short_term_used;
#if H264E_SVC_API
//svc ext
int current_layer;
int adaptive_base_mode_flag;
void *enc_next;
#endif
} h264e_enc_t;
#ifdef __cplusplus
}
#endif //__cplusplus
/************************************************************************/
/* Constants */
/************************************************************************/
// Tunable constants can be adjusted by the "training" application
#ifndef ADJUSTABLE
# define ADJUSTABLE static const
#endif
// Huffman encode tables
#define CODE8(val, len) (uint8_t)((val << 4) + len)
#define CODE(val, len) (uint8_t)((val << 4) + (len - 1))
const uint8_t h264e_g_run_before[57] =
{
15, 17, 20, 24, 29, 35, 42, 42, 42, 42, 42, 42, 42, 42, 42,
/**** Table # 0 size 2 ****/
CODE8(1, 1), CODE8(0, 1),
/**** Table # 1 size 3 ****/
CODE8(1, 1), CODE8(1, 2), CODE8(0, 2),
/**** Table # 2 size 4 ****/
CODE8(3, 2), CODE8(2, 2), CODE8(1, 2), CODE8(0, 2),
/**** Table # 3 size 5 ****/
CODE8(3, 2), CODE8(2, 2), CODE8(1, 2), CODE8(1, 3), CODE8(0, 3),
/**** Table # 4 size 6 ****/
CODE8(3, 2), CODE8(2, 2), CODE8(3, 3), CODE8(2, 3), CODE8(1, 3), CODE8(0, 3),
/**** Table # 5 size 7 ****/
CODE8(3, 2), CODE8(0, 3), CODE8(1, 3), CODE8(3, 3), CODE8(2, 3), CODE8(5, 3), CODE8(4, 3),
/**** Table # 6 size 15 ****/
CODE8(7, 3), CODE8(6, 3), CODE8(5, 3), CODE8(4, 3), CODE8(3, 3), CODE8(2, 3), CODE8(1, 3), CODE8(1, 4),
CODE8(1, 5), CODE8(1, 6), CODE8(1, 7), CODE8(1, 8), CODE8(1, 9), CODE8(1, 10), CODE8(1, 11),
};
const uint8_t h264e_g_total_zeros_cr_2x2[12] =
{
3, 7, 10,
/**** Table # 0 size 4 ****/
CODE8(1, 1), CODE8(1, 2), CODE8(1, 3), CODE8(0, 3),
/**** Table # 1 size 3 ****/
CODE8(1, 1), CODE8(1, 2), CODE8(0, 2),
/**** Table # 2 size 2 ****/
CODE8(1, 1), CODE8(0, 1),
};
const uint8_t h264e_g_total_zeros[150] =
{
15, 31, 46, 60, 73, 85, 96, 106, 115, 123, 130, 136, 141, 145, 148,
/**** Table # 0 size 16 ****/
CODE8(1, 1), CODE8(3, 3), CODE8(2, 3), CODE8(3, 4), CODE8(2, 4), CODE8(3, 5), CODE8(2, 5), CODE8(3, 6),
CODE8(2, 6), CODE8(3, 7), CODE8(2, 7), CODE8(3, 8), CODE8(2, 8), CODE8(3, 9), CODE8(2, 9), CODE8(1, 9),
/**** Table # 1 size 15 ****/
CODE8(7, 3), CODE8(6, 3), CODE8(5, 3), CODE8(4, 3), CODE8(3, 3), CODE8(5, 4), CODE8(4, 4), CODE8(3, 4),
CODE8(2, 4), CODE8(3, 5), CODE8(2, 5), CODE8(3, 6), CODE8(2, 6), CODE8(1, 6), CODE8(0, 6),
/**** Table # 2 size 14 ****/
CODE8(5, 4), CODE8(7, 3), CODE8(6, 3), CODE8(5, 3), CODE8(4, 4), CODE8(3, 4), CODE8(4, 3), CODE8(3, 3),
CODE8(2, 4), CODE8(3, 5), CODE8(2, 5), CODE8(1, 6), CODE8(1, 5), CODE8(0, 6),
/**** Table # 3 size 13 ****/
CODE8(3, 5), CODE8(7, 3), CODE8(5, 4), CODE8(4, 4), CODE8(6, 3), CODE8(5, 3), CODE8(4, 3), CODE8(3, 4),
CODE8(3, 3), CODE8(2, 4), CODE8(2, 5), CODE8(1, 5), CODE8(0, 5),
/**** Table # 4 size 12 ****/
CODE8(5, 4), CODE8(4, 4), CODE8(3, 4), CODE8(7, 3), CODE8(6, 3), CODE8(5, 3), CODE8(4, 3), CODE8(3, 3),
CODE8(2, 4), CODE8(1, 5), CODE8(1, 4), CODE8(0, 5),
/**** Table # 5 size 11 ****/
CODE8(1, 6), CODE8(1, 5), CODE8(7, 3), CODE8(6, 3), CODE8(5, 3), CODE8(4, 3), CODE8(3, 3), CODE8(2, 3),
CODE8(1, 4), CODE8(1, 3), CODE8(0, 6),
/**** Table # 6 size 10 ****/
CODE8(1, 6), CODE8(1, 5), CODE8(5, 3), CODE8(4, 3), CODE8(3, 3), CODE8(3, 2), CODE8(2, 3), CODE8(1, 4),
CODE8(1, 3), CODE8(0, 6),
/**** Table # 7 size 9 ****/
CODE8(1, 6), CODE8(1, 4), CODE8(1, 5), CODE8(3, 3), CODE8(3, 2), CODE8(2, 2), CODE8(2, 3), CODE8(1, 3),
CODE8(0, 6),
/**** Table # 8 size 8 ****/
CODE8(1, 6), CODE8(0, 6), CODE8(1, 4), CODE8(3, 2), CODE8(2, 2), CODE8(1, 3), CODE8(1, 2), CODE8(1, 5),
/**** Table # 9 size 7 ****/
CODE8(1, 5), CODE8(0, 5), CODE8(1, 3), CODE8(3, 2), CODE8(2, 2), CODE8(1, 2), CODE8(1, 4),
/**** Table # 10 size 6 ****/
CODE8(0, 4), CODE8(1, 4), CODE8(1, 3), CODE8(2, 3), CODE8(1, 1), CODE8(3, 3),
/**** Table # 11 size 5 ****/
CODE8(0, 4), CODE8(1, 4), CODE8(1, 2), CODE8(1, 1), CODE8(1, 3),
/**** Table # 12 size 4 ****/
CODE8(0, 3), CODE8(1, 3), CODE8(1, 1), CODE8(1, 2),
/**** Table # 13 size 3 ****/
CODE8(0, 2), CODE8(1, 2), CODE8(1, 1),
/**** Table # 14 size 2 ****/
CODE8(0, 1), CODE8(1, 1),
};
const uint8_t h264e_g_coeff_token[277 + 18] =
{
17 + 18, 17 + 18,
82 + 18, 82 + 18,
147 + 18, 147 + 18, 147 + 18, 147 + 18,
212 + 18, 212 + 18, 212 + 18, 212 + 18, 212 + 18, 212 + 18, 212 + 18, 212 + 18, 212 + 18,
0 + 18,
/**** Table # 4 size 17 ****/ // offs: 0
CODE(1, 2), CODE(1, 1), CODE(1, 3), CODE(5, 6), CODE(7, 6), CODE(6, 6), CODE(2, 7), CODE(0, 7), CODE(4, 6),
CODE(3, 7), CODE(2, 8), CODE(0, 0), CODE(3, 6), CODE(3, 8), CODE(0, 0), CODE(0, 0), CODE(2, 6),
/**** Table # 0 size 65 ****/ // offs: 17
CODE( 1, 1), CODE( 1, 2), CODE( 1, 3), CODE( 3, 5), CODE( 5, 6), CODE( 4, 6), CODE( 5, 7), CODE( 3, 6),
CODE( 7, 8), CODE( 6, 8), CODE( 5, 8), CODE( 4, 7), CODE( 7, 9), CODE( 6, 9), CODE( 5, 9), CODE( 4, 8),
CODE( 7, 10), CODE( 6, 10), CODE( 5, 10), CODE( 4, 9), CODE( 7, 11), CODE( 6, 11), CODE( 5, 11), CODE( 4, 10),
CODE(15, 13), CODE(14, 13), CODE(13, 13), CODE( 4, 11), CODE(11, 13), CODE(10, 13), CODE( 9, 13), CODE(12, 13),
CODE( 8, 13), CODE(14, 14), CODE(13, 14), CODE(12, 14), CODE(15, 14), CODE(10, 14), CODE( 9, 14), CODE( 8, 14),
CODE(11, 14), CODE(14, 15), CODE(13, 15), CODE(12, 15), CODE(15, 15), CODE(10, 15), CODE( 9, 15), CODE( 8, 15),
CODE(11, 15), CODE( 1, 15), CODE(13, 16), CODE(12, 16), CODE(15, 16), CODE(14, 16), CODE( 9, 16), CODE( 8, 16),
CODE(11, 16), CODE(10, 16), CODE( 5, 16), CODE( 0, 0), CODE( 7, 16), CODE( 6, 16), CODE( 0, 0), CODE( 0, 0), CODE( 4, 16),
/**** Table # 1 size 65 ****/ // offs: 82
CODE( 3, 2), CODE( 2, 2), CODE( 3, 3), CODE( 5, 4), CODE(11, 6), CODE( 7, 5), CODE( 9, 6), CODE( 4, 4),
CODE( 7, 6), CODE(10, 6), CODE( 5, 6), CODE( 6, 5), CODE( 7, 7), CODE( 6, 6), CODE( 5, 7), CODE( 8, 6),
CODE( 7, 8), CODE( 6, 7), CODE( 5, 8), CODE( 4, 6), CODE( 4, 8), CODE( 6, 8), CODE( 5, 9), CODE( 4, 7),
CODE( 7, 9), CODE( 6, 9), CODE(13, 11), CODE( 4, 9), CODE(15, 11), CODE(14, 11), CODE( 9, 11), CODE(12, 11),
CODE(11, 11), CODE(10, 11), CODE(13, 12), CODE( 8, 11), CODE(15, 12), CODE(14, 12), CODE( 9, 12), CODE(12, 12),
CODE(11, 12), CODE(10, 12), CODE(13, 13), CODE(12, 13), CODE( 8, 12), CODE(14, 13), CODE( 9, 13), CODE( 8, 13),
CODE(15, 13), CODE(10, 13), CODE( 6, 13), CODE( 1, 13), CODE(11, 13), CODE(11, 14), CODE(10, 14), CODE( 4, 14),
CODE( 7, 13), CODE( 8, 14), CODE( 5, 14), CODE( 0, 0), CODE( 9, 14), CODE( 6, 14), CODE( 0, 0), CODE( 0, 0), CODE( 7, 14),
/**** Table # 2 size 65 ****/ // offs: 147
CODE(15, 4), CODE(14, 4), CODE(13, 4), CODE(12, 4), CODE(15, 6), CODE(15, 5), CODE(14, 5), CODE(11, 4),
CODE(11, 6), CODE(12, 5), CODE(11, 5), CODE(10, 4), CODE( 8, 6), CODE(10, 5), CODE( 9, 5), CODE( 9, 4),
CODE(15, 7), CODE( 8, 5), CODE(13, 6), CODE( 8, 4), CODE(11, 7), CODE(14, 6), CODE( 9, 6), CODE(13, 5),
CODE( 9, 7), CODE(10, 6), CODE(13, 7), CODE(12, 6), CODE( 8, 7), CODE(14, 7), CODE(10, 7), CODE(12, 7),
CODE(15, 8), CODE(14, 8), CODE(13, 8), CODE(12, 8), CODE(11, 8), CODE(10, 8), CODE( 9, 8), CODE( 8, 8),
CODE(15, 9), CODE(14, 9), CODE(13, 9), CODE(12, 9), CODE(11, 9), CODE(10, 9), CODE( 9, 9), CODE(10, 10),
CODE( 8, 9), CODE( 7, 9), CODE(11, 10), CODE( 6, 10), CODE(13, 10), CODE(12, 10), CODE( 7, 10), CODE( 2, 10),
CODE( 9, 10), CODE( 8, 10), CODE( 3, 10), CODE( 0, 0), CODE( 5, 10), CODE( 4, 10), CODE( 0, 0), CODE( 0, 0), CODE( 1, 10),
/**** Table # 3 size 65 ****/ // offs: 212
3, 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 19, 8, 13, 18, 23, 12, 17, 22, 27, 16, 21, 26, 31, 20, 25, 30, 35,
24, 29, 34, 39, 28, 33, 38, 43, 32, 37, 42, 47, 36, 41, 46, 51, 40, 45, 50, 55, 44, 49, 54, 59, 48, 53, 58, 63,
52, 57, 62, 0, 56, 61, 0, 0, 60
};
/*
Block scan order
0 1 4 5
2 3 6 7
8 9 C D
A B E F
*/
static const uint8_t decode_block_scan[16] = { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15 };
static const uint8_t qpy2qpc[52] = { // todo: [0 - 9] not used
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 29, 30, 31, 32, 32, 33, 34, 34, 35,
35, 36, 36, 37, 37, 37, 38, 38, 38, 39, 39, 39, 39,
};
/**
* Rate-control LUT for intra/inter macroblocks: number of bits per macroblock for given QP
* Estimated experimentally
*/
static const uint16_t bits_per_mb[2][42 - 1] =
{
// 10 20 30 40 50
{ 664, 597, 530, 484, 432, 384, 341, 297, 262, 235, 198, 173, 153, 131, 114, 102, 84, 74, 64, 54, 47, 42, 35, 31, 26, 22, 20, 17, 15, 13, 12, 10, 9, 9, 7, 7, 6, 5, 4, 1, 1}, // P
{1057, 975, 925, 868, 803, 740, 694, 630, 586, 547, 496, 457, 420, 378, 345, 318, 284, 258, 234, 210, 190, 178, 155, 141, 129, 115, 102, 95, 82, 75, 69, 60, 55, 51, 45, 41, 40, 35, 31, 28, 24} // I
};
/**
* Deblock filter constants:
* <alpha> <thr[1]> <thr[2]> <thr[3]> <beta>
*/
static const uint8_t g_a_tc0_b[52 - 10][5] = {
{ 0, 0, 0, 0, 0}, // 10
{ 0, 0, 0, 0, 0}, // 11
{ 0, 0, 0, 0, 0}, // 12
{ 0, 0, 0, 0, 0}, // 13
{ 0, 0, 0, 0, 0}, // 14
{ 0, 0, 0, 0, 0}, // 15
{ 4, 0, 0, 0, 2},
{ 4, 0, 0, 1, 2},
{ 5, 0, 0, 1, 2},
{ 6, 0, 0, 1, 3},
{ 7, 0, 0, 1, 3},
{ 8, 0, 1, 1, 3},
{ 9, 0, 1, 1, 3},
{ 10, 1, 1, 1, 4},
{ 12, 1, 1, 1, 4},
{ 13, 1, 1, 1, 4},
{ 15, 1, 1, 1, 6},
{ 17, 1, 1, 2, 6},
{ 20, 1, 1, 2, 7},
{ 22, 1, 1, 2, 7},
{ 25, 1, 1, 2, 8},
{ 28, 1, 2, 3, 8},
{ 32, 1, 2, 3, 9},
{ 36, 2, 2, 3, 9},
{ 40, 2, 2, 4, 10},
{ 45, 2, 3, 4, 10},
{ 50, 2, 3, 4, 11},
{ 56, 3, 3, 5, 11},
{ 63, 3, 4, 6, 12},
{ 71, 3, 4, 6, 12},
{ 80, 4, 5, 7, 13},
{ 90, 4, 5, 8, 13},
{101, 4, 6, 9, 14},
{113, 5, 7, 10, 14},
{127, 6, 8, 11, 15},
{144, 6, 8, 13, 15},
{162, 7, 10, 14, 16},
{182, 8, 11, 16, 16},
{203, 9, 12, 18, 17},
{226, 10, 13, 20, 17},
{255, 11, 15, 23, 18},
{255, 13, 17, 25, 18},
};
/************************************************************************/
/* Adjustable encoder parameters. Initial MIN_QP values never used */
/************************************************************************/
ADJUSTABLE uint16_t g_rnd_inter[] = {
11665, 11665, 11665, 11665, 11665, 11665, 11665, 11665, 11665, 11665,
11665, 12868, 14071, 15273, 16476,
17679, 17740, 17801, 17863, 17924,
17985, 17445, 16904, 16364, 15823,
15283, 15198, 15113, 15027, 14942,
14857, 15667, 16478, 17288, 18099,
18909, 19213, 19517, 19822, 20126,
20430, 16344, 12259, 8173, 4088,
4088, 4088, 4088, 4088, 4088,
4088, 4088,
};