diff --git a/3rdparty/loongarch/include/zconf.h b/3rdparty/loongarch/include/zconf.h new file mode 100644 index 0000000..fe81abf --- /dev/null +++ b/3rdparty/loongarch/include/zconf.h @@ -0,0 +1,551 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + * Even better than compiling with -DZ_PREFIX would be to use configure to set + * this permanently in zconf.h using "./configure --zprefix". + */ +#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ +# define Z_PREFIX_SET + +/* all linked symbols and init macros */ +# define _dist_code z__dist_code +# define _length_code z__length_code +# define _tr_align z__tr_align +# define _tr_flush_bits z__tr_flush_bits +# define _tr_flush_block z__tr_flush_block +# define _tr_init z__tr_init +# define _tr_stored_block z__tr_stored_block +# define _tr_tally z__tr_tally +# define adler32 z_adler32 +# define adler32_combine z_adler32_combine +# define adler32_combine64 z_adler32_combine64 +# define adler32_z z_adler32_z +# ifndef Z_SOLO +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# endif +# define crc32 z_crc32 +# define crc32_combine z_crc32_combine +# define crc32_combine64 z_crc32_combine64 +# define crc32_combine_gen z_crc32_combine_gen +# define crc32_combine_gen64 z_crc32_combine_gen64 +# define crc32_combine_op z_crc32_combine_op +# define crc32_z z_crc32_z +# define deflate z_deflate +# define deflateBound z_deflateBound +# define deflateCopy z_deflateCopy +# define deflateEnd z_deflateEnd +# define deflateGetDictionary z_deflateGetDictionary +# define deflateInit z_deflateInit +# define deflateInit2 z_deflateInit2 +# define deflateInit2_ z_deflateInit2_ +# define deflateInit_ z_deflateInit_ +# define deflateParams z_deflateParams +# define deflatePending z_deflatePending +# define deflatePrime z_deflatePrime +# define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep +# define deflateSetDictionary z_deflateSetDictionary +# define deflateSetHeader z_deflateSetHeader +# define deflateTune z_deflateTune +# define deflate_copyright z_deflate_copyright +# define get_crc_table z_get_crc_table +# ifndef Z_SOLO +# define gz_error z_gz_error +# define gz_intmax z_gz_intmax +# define gz_strwinerror z_gz_strwinerror +# define gzbuffer z_gzbuffer +# define gzclearerr z_gzclearerr +# define gzclose z_gzclose +# define gzclose_r z_gzclose_r +# define gzclose_w z_gzclose_w +# define gzdirect z_gzdirect +# define gzdopen z_gzdopen +# define gzeof z_gzeof +# define gzerror z_gzerror +# define gzflush z_gzflush +# define gzfread z_gzfread +# define gzfwrite z_gzfwrite +# define gzgetc z_gzgetc +# define gzgetc_ z_gzgetc_ +# define gzgets z_gzgets +# define gzoffset z_gzoffset +# define gzoffset64 z_gzoffset64 +# define gzopen z_gzopen +# define gzopen64 z_gzopen64 +# ifdef _WIN32 +# define gzopen_w z_gzopen_w +# endif +# define gzprintf z_gzprintf +# define gzputc z_gzputc +# define gzputs z_gzputs +# define gzread z_gzread +# define gzrewind z_gzrewind +# define gzseek z_gzseek +# define gzseek64 z_gzseek64 +# define gzsetparams z_gzsetparams +# define gztell z_gztell +# define gztell64 z_gztell64 +# define gzungetc z_gzungetc +# define gzvprintf z_gzvprintf +# define gzwrite z_gzwrite +# endif +# define inflate z_inflate +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define inflateBackInit z_inflateBackInit +# define inflateBackInit_ z_inflateBackInit_ +# define inflateCodesUsed z_inflateCodesUsed +# define inflateCopy z_inflateCopy +# define inflateEnd z_inflateEnd +# define inflateGetDictionary z_inflateGetDictionary +# define inflateGetHeader z_inflateGetHeader +# define inflateInit z_inflateInit +# define inflateInit2 z_inflateInit2 +# define inflateInit2_ z_inflateInit2_ +# define inflateInit_ z_inflateInit_ +# define inflateMark z_inflateMark +# define inflatePrime z_inflatePrime +# define inflateReset z_inflateReset +# define inflateReset2 z_inflateReset2 +# define inflateResetKeep z_inflateResetKeep +# define inflateSetDictionary z_inflateSetDictionary +# define inflateSync z_inflateSync +# define inflateSyncPoint z_inflateSyncPoint +# define inflateUndermine z_inflateUndermine +# define inflateValidate z_inflateValidate +# define inflate_copyright z_inflate_copyright +# define inflate_fast z_inflate_fast +# define inflate_table z_inflate_table +# ifndef Z_SOLO +# define uncompress z_uncompress +# define uncompress2 z_uncompress2 +# endif +# define zError z_zError +# ifndef Z_SOLO +# define zcalloc z_zcalloc +# define zcfree z_zcfree +# endif +# define zlibCompileFlags z_zlibCompileFlags +# define zlibVersion z_zlibVersion + +/* all zlib typedefs in zlib.h and zconf.h */ +# define Byte z_Byte +# define Bytef z_Bytef +# define alloc_func z_alloc_func +# define charf z_charf +# define free_func z_free_func +# ifndef Z_SOLO +# define gzFile z_gzFile +# endif +# define gz_header z_gz_header +# define gz_headerp z_gz_headerp +# define in_func z_in_func +# define intf z_intf +# define out_func z_out_func +# define uInt z_uInt +# define uIntf z_uIntf +# define uLong z_uLong +# define uLongf z_uLongf +# define voidp z_voidp +# define voidpc z_voidpc +# define voidpf z_voidpf + +/* all zlib structs in zlib.h and zconf.h */ +# define gz_header_s z_gz_header_s +# define internal_state z_internal_state + +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#ifndef STDC +# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ +# define const /* note: need a more gentle solution here */ +# endif +#endif + +#if defined(ZLIB_CONST) && !defined(z_const) +# define z_const const +#else +# define z_const +#endif + +#ifdef Z_SOLO +# ifdef _WIN64 + typedef unsigned long long z_size_t; +# else + typedef unsigned long z_size_t; +# endif +#else +# define z_longlong long long +# if defined(NO_SIZE_T) + typedef unsigned NO_SIZE_T z_size_t; +# elif defined(STDC) +# include + typedef size_t z_size_t; +# else + typedef unsigned long z_size_t; +# endif +# undef z_longlong +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus about 7 kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +#ifndef Z_ARG /* function prototypes for stdarg */ +# if defined(STDC) || defined(Z_HAVE_STDARG_H) +# define Z_ARG(args) args +# else +# define Z_ARG(args) () +# endif +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC; for other MSDOS compilers you may have + * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + +#if defined(WINDOWS) || defined(WIN32) + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +# ifdef ZLIB_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef ZLIB_INTERNAL +# define ZEXTERN extern __declspec(dllexport) +# else +# define ZEXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* ZLIB_DLL */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +# ifdef ZLIB_WINAPI +# ifdef FAR +# undef FAR +# endif +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# include + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define ZEXPORT WINAPI +# ifdef WIN32 +# define ZEXPORTVA WINAPIV +# else +# define ZEXPORTVA FAR CDECL +# endif +# endif +#endif + +#if defined (__BEOS__) +# ifdef ZLIB_DLL +# ifdef ZLIB_INTERNAL +# define ZEXPORT __declspec(dllexport) +# define ZEXPORTVA __declspec(dllexport) +# else +# define ZEXPORT __declspec(dllimport) +# define ZEXPORTVA __declspec(dllimport) +# endif +# endif +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#ifdef SMALL_MEDIUM + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC) +# include +# if (UINT_MAX == 0xffffffffUL) +# define Z_U4 unsigned +# elif (ULONG_MAX == 0xffffffffUL) +# define Z_U4 unsigned long +# elif (USHRT_MAX == 0xffffffffUL) +# define Z_U4 unsigned short +# endif +#endif + +#ifdef Z_U4 + typedef Z_U4 z_crc_t; +#else + typedef unsigned long z_crc_t; +#endif + +#if 1 /* was set to #if 1 by ./configure */ +# define Z_HAVE_UNISTD_H +#endif + +#if 1 /* was set to #if 1 by ./configure */ +# define Z_HAVE_STDARG_H +#endif + +#ifdef STDC +# ifndef Z_SOLO +# include /* for off_t */ +# endif +#endif + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +# include /* for va_list */ +# endif +#endif + +#ifdef _WIN32 +# ifndef Z_SOLO +# include /* for wchar_t */ +# endif +#endif + +/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and + * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even + * though the former does not conform to the LFS document), but considering + * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as + * equivalently requesting no 64-bit operations + */ +#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1 +# undef _LARGEFILE64_SOURCE +#endif + +#ifndef Z_HAVE_UNISTD_H +# ifdef __WATCOMC__ +# define Z_HAVE_UNISTD_H +# endif +#endif +#ifndef Z_HAVE_UNISTD_H +# if defined(_LARGEFILE64_SOURCE) && !defined(_WIN32) +# define Z_HAVE_UNISTD_H +# endif +#endif +#ifndef Z_SOLO +# if defined(Z_HAVE_UNISTD_H) +# include /* for SEEK_*, off_t, and _LFS64_LARGEFILE */ +# ifdef VMS +# include /* for off_t */ +# endif +# ifndef z_off_t +# define z_off_t off_t +# endif +# endif +#endif + +#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0 +# define Z_LFS64 +#endif + +#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64) +# define Z_LARGE64 +#endif + +#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64) +# define Z_WANT64 +#endif + +#if !defined(SEEK_SET) && !defined(Z_SOLO) +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif + +#ifndef z_off_t +# define z_off_t long +#endif + +#if !defined(_WIN32) && defined(Z_LARGE64) +# define z_off64_t off64_t +#else +# if defined(_WIN32) && !defined(__GNUC__) +# define z_off64_t __int64 +# else +# define z_off64_t z_off_t +# endif +#endif + +/* MVS linker does not support external names larger than 8 bytes */ +#if defined(__MVS__) + #pragma map(deflateInit_,"DEIN") + #pragma map(deflateInit2_,"DEIN2") + #pragma map(deflateEnd,"DEEND") + #pragma map(deflateBound,"DEBND") + #pragma map(inflateInit_,"ININ") + #pragma map(inflateInit2_,"ININ2") + #pragma map(inflateEnd,"INEND") + #pragma map(inflateSync,"INSY") + #pragma map(inflateSetDictionary,"INSEDI") + #pragma map(compressBound,"CMBND") + #pragma map(inflate_table,"INTABL") + #pragma map(inflate_fast,"INFA") + #pragma map(inflate_copyright,"INCOPY") +#endif + +#endif /* ZCONF_H */ diff --git a/3rdparty/loongarch/include/zlib.h b/3rdparty/loongarch/include/zlib.h new file mode 100644 index 0000000..6b7244f --- /dev/null +++ b/3rdparty/loongarch/include/zlib.h @@ -0,0 +1,1938 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.3, August 18th, 2023 + + Copyright (C) 1995-2023 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 + (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). +*/ + +#ifndef ZLIB_H +#define ZLIB_H + +#include "zconf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZLIB_VERSION "1.3" +#define ZLIB_VERNUM 0x1300 +#define ZLIB_VER_MAJOR 1 +#define ZLIB_VER_MINOR 3 +#define ZLIB_VER_REVISION 0 +#define ZLIB_VER_SUBREVISION 0 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed data. + This version of the library supports only one compression method (deflation) + but other algorithms will be added later and will have the same stream + interface. + + Compression can be done in a single step if the buffers are large enough, + or can be done by repeated calls of the compression function. In the latter + case, the application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip and raw deflate streams in + memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never crash + even in the case of corrupted input. +*/ + +typedef voidpf (*alloc_func)(voidpf opaque, uInt items, uInt size); +typedef void (*free_func)(voidpf opaque, voidpf address); + +struct internal_state; + +typedef struct z_stream_s { + z_const Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total number of input bytes read so far */ + + Bytef *next_out; /* next output byte will go here */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total number of bytes output so far */ + + z_const char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text + for deflate, or the decoding state for inflate */ + uLong adler; /* Adler-32 or CRC-32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has dropped + to zero. It must update next_out and avail_out when avail_out has dropped + to zero. The application must initialize zalloc, zfree and opaque before + calling the init function. All other fields are set by the compression + library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. In that case, zlib is thread-safe. When zalloc and zfree are + Z_NULL on entry to the initialization function, they are set to internal + routines that use the standard library functions malloc() and free(). + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this if + the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers + returned by zalloc for objects of exactly 65536 bytes *must* have their + offset normalized to zero. The default allocation function provided by this + library ensures this (see zutil.c). To reduce memory requirements and avoid + any allocation of 64K objects, at the expense of compression ratio, compile + the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or progress + reports. After compression, total_in holds the total size of the + uncompressed data and may be saved for use by the decompressor (particularly + if the decompressor wants to decompress everything in a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 +#define Z_SYNC_FLUSH 2 +#define Z_FULL_FLUSH 3 +#define Z_FINISH 4 +#define Z_BLOCK 5 +#define Z_TREES 6 +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative values + * are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +#define Z_UNKNOWN 2 +/* Possible values of the data_type field for deflate() */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +#define zlib_version zlibVersion() +/* for compatibility with versions < 1.0.2 */ + + + /* basic functions */ + +ZEXTERN const char * ZEXPORT zlibVersion(void); +/* The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is not + compatible with the zlib.h header file used by the application. This check + is automatically made by deflateInit and inflateInit. + */ + +/* +ZEXTERN int ZEXPORT deflateInit(z_streamp strm, int level); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. If + zalloc and zfree are set to Z_NULL, deflateInit updates them to use default + allocation functions. total_in, total_out, adler, and msg are initialized. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at all + (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION + requests a default compromise between speed and compression (currently + equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if level is not a valid compression level, or + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). msg is set to null + if there is no error message. deflateInit does not perform any compression: + this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate(z_streamp strm, int flush); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary. Some output may be provided even if + flush is zero. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating avail_in or avail_out accordingly; avail_out should + never be zero before the call. The application can consume the compressed + output when it wants, for example when the output buffer is full (avail_out + == 0), or after each call of deflate(). If deflate returns Z_OK and with + zero avail_out, it must be called again after making room in the output + buffer because there might be more output pending. See deflatePending(), + which can be used if desired to determine whether or not there is more output + in that case. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumulate before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In + particular avail_in is zero after the call if enough output space has been + provided before the call.) Flushing may degrade compression for some + compression algorithms and so it should be used only when necessary. This + completes the current deflate block and follows it with an empty stored block + that is three bits plus filler bits to the next byte, followed by four bytes + (00 00 ff ff). + + If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the + output buffer, but the output is not aligned to a byte boundary. All of the + input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. + This completes the current deflate block and follows it with an empty fixed + codes block that is 10 bits long. This assures that enough bytes are output + in order for the decompressor to finish the block before the empty fixed + codes block. + + If flush is set to Z_BLOCK, a deflate block is completed and emitted, as + for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to + seven bits of the current block are held to be written as the next byte after + the next deflate block is completed. In this case, the decompressor may not + be provided enough bits at this point in order to complete decompression of + the data provided so far to the compressor. It may need to wait for the next + block to be emitted. This is for advanced applications that need to control + the emission of deflate blocks. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six when the flush marker begins, in order to avoid + repeated flush markers upon calling deflate() again when avail_out == 0. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there was + enough output space. If deflate returns with Z_OK or Z_BUF_ERROR, this + function must be called again with Z_FINISH and more output space (updated + avail_out) but no more input data, until it returns with Z_STREAM_END or an + error. After deflate has returned Z_STREAM_END, the only possible operations + on the stream are deflateReset or deflateEnd. + + Z_FINISH can be used in the first deflate call after deflateInit if all the + compression is to be done in a single step. In order to complete in one + call, avail_out must be at least the value returned by deflateBound (see + below). Then deflate is guaranteed to return Z_STREAM_END. If not enough + output space is provided, deflate will not return Z_STREAM_END, and it must + be called again as described above. + + deflate() sets strm->adler to the Adler-32 checksum of all input read + so far (that is, total_in bytes). If a gzip stream is being generated, then + strm->adler will be the CRC-32 checksum of the input read so far. (See + deflateInit2 below.) + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). If in doubt, the data is + considered binary. This field is only for information purposes and does not + affect the compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was Z_NULL or the state was inadvertently written over + by the application), or Z_BUF_ERROR if no progress is possible (for example + avail_in or avail_out was zero). Note that Z_BUF_ERROR is not fatal, and + deflate() can be called again with more input and more output space to + continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd(z_streamp strm); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, msg + may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit(z_streamp strm); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. In the current version of inflate, the provided input is not + read or consumed. The allocation of a sliding window will be deferred to + the first call of inflate (if the decompression does not complete on the + first call). If zalloc and zfree are set to Z_NULL, inflateInit updates + them to use default allocation functions. total_in, total_out, adler, and + msg are initialized. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit does not perform any decompression. + Actual decompression will be done by inflate(). So next_in, and avail_in, + next_out, and avail_out are unused and unchanged. The current + implementation of inflateInit() does not process any header information -- + that is deferred until inflate() is called. +*/ + + +ZEXTERN int ZEXPORT inflate(z_streamp strm, int flush); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), then next_in and avail_in are updated + accordingly, and processing will resume at this point for the next call of + inflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there is + no more input data or no more space in the output buffer (see below about + the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating the next_* and avail_* values accordingly. If the + caller of inflate() does not provide both available input and available + output space, it is possible that there will be no progress made. The + application can consume the uncompressed output when it wants, for example + when the output buffer is full (avail_out == 0), or after each call of + inflate(). If inflate returns Z_OK and with zero avail_out, it must be + called again after making room in the output buffer because there might be + more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, + Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() + stop if and when it gets to the next deflate block boundary. When decoding + the zlib or gzip format, this will cause inflate() to return immediately + after the header and before the first block. When doing a raw inflate, + inflate() will go ahead and process the first block, and will return when it + gets to the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + To assist in this, on return inflate() always sets strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 if + inflate() is currently decoding the last block in the deflate stream, plus + 128 if inflate() returned immediately after decoding an end-of-block code or + decoding the complete header up to just before the first byte of the deflate + stream. The end-of-block will not be indicated until all of the uncompressed + data from that block has been written to strm->next_out. The number of + unused bits may in general be greater than seven, except when bit 7 of + data_type is set, in which case the number of unused bits will be less than + eight. data_type is set as noted here every time inflate() returns for all + flush options, and so can be used to determine the amount of currently + consumed input in bits. + + The Z_TREES option behaves as Z_BLOCK does, but it also returns when the + end of each deflate block header is reached, before any actual data in that + block is decoded. This allows the caller to determine the length of the + deflate block header for later use in random access within a deflate block. + 256 is added to the value of strm->data_type when inflate() returns + immediately after reaching the end of the deflate block header. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step (a + single call of inflate), the parameter flush should be set to Z_FINISH. In + this case all pending input is processed and all pending output is flushed; + avail_out must be large enough to hold all of the uncompressed data for the + operation to complete. (The size of the uncompressed data may have been + saved by the compressor for this purpose.) The use of Z_FINISH is not + required to perform an inflation in one step. However it may be used to + inform inflate that a faster approach can be used for the single inflate() + call. Z_FINISH also informs inflate to not maintain a sliding window if the + stream completes, which reduces inflate's memory footprint. If the stream + does not complete, either because not all of the stream is provided or not + enough output space is provided, then a sliding window will be allocated and + inflate() can be called again to continue the operation as if Z_NO_FLUSH had + been used. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the effects of the flush parameter in this implementation are + on the return value of inflate() as noted below, when inflate() returns early + when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of + memory for a sliding window when Z_FINISH is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the Adler-32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the Adler-32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed Adler-32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() can decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically, if requested when + initializing with inflateInit2(). Any information contained in the gzip + header is not retained unless inflateGetHeader() is used. When processing + gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output + produced so far. The CRC-32 is checked against the gzip trailer, as is the + uncompressed length, modulo 2^32. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value, in which case strm->msg points to a string with a more specific + error), Z_STREAM_ERROR if the stream structure was inconsistent (for example + next_in or next_out was Z_NULL, or the state was inadvertently written over + by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR + if no progress was possible or if there was not enough room in the output + buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may + then call inflateSync() to look for a good compression block if a partial + recovery of the data is to be attempted. +*/ + + +ZEXTERN int ZEXPORT inflateEnd(z_streamp strm); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state + was inconsistent. +*/ + + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2(z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy); + + This is another version of deflateInit with more compression options. The + fields zalloc, zfree and opaque must be initialized before by the caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + For the current implementation of deflate(), a windowBits value of 8 (a + window size of 256 bytes) is not supported. As a result, a request for 8 + will result in 9 (a 512-byte window). In that case, providing 8 to + inflateInit2() will result in an error when the zlib header with 9 is + checked against the initialization of inflate(). The remedy is to not use 8 + with deflateInit2() with this initialization, or at least in that case use 9 + with inflateInit2(). + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute a check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), no + header crc, and the operating system will be set to the appropriate value, + if the operating system was determined at compile time. If a gzip stream is + being written, strm->adler is a CRC-32 instead of an Adler-32. + + For raw deflate or gzip encoding, a request for a 256-byte window is + rejected as invalid, since only the zlib header provides a means of + transmitting the window size to the decompressor. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but is + slow and reduces compression ratio; memLevel=9 uses maximum memory for + optimal speed. The default value is 8. See zconf.h for total memory usage + as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as + fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The + strategy parameter only affects the compression ratio but not the + correctness of the compressed output even if it is not set appropriately. + Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler + decoder for special applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid + method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is + incompatible with the version assumed by the caller (ZLIB_VERSION). msg is + set to null if there is no error message. deflateInit2 does not perform any + compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary(z_streamp strm, + const Bytef *dictionary, + uInt dictLength); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. When using the zlib format, this + function must be called immediately after deflateInit, deflateInit2 or + deflateReset, and before any call of deflate. When doing raw deflate, this + function must be called either before any call of deflate, or immediately + after the completion of a deflate block, i.e. after all input has been + consumed and all output has been delivered when using any of the flush + options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The + compressor and decompressor must use exactly the same dictionary (see + inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size + provided in deflateInit or deflateInit2. Thus the strings most likely to be + useful should be put at the end of the dictionary, not at the front. In + addition, the current implementation of deflate will use at most the window + size minus 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the Adler-32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The Adler-32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + Adler-32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if not at a block boundary for raw deflate). deflateSetDictionary does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateGetDictionary(z_streamp strm, + Bytef *dictionary, + uInt *dictLength); +/* + Returns the sliding dictionary being maintained by deflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If deflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similarly, if dictLength is Z_NULL, then it is not set. + + deflateGetDictionary() may return a length less than the window size, even + when more than the window size in input has been provided. It may return up + to 258 bytes less in that case, due to how zlib's implementation of deflate + manages the sliding window and lookahead for matches, where matches can be + up to 258 bytes long. If the application needs the last window-size bytes of + input, then that would need to be saved by the application outside of zlib. + + deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateCopy(z_streamp dest, + z_streamp source); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and can + consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset(z_streamp strm); +/* + This function is equivalent to deflateEnd followed by deflateInit, but + does not free and reallocate the internal compression state. The stream + will leave the compression level and any other attributes that may have been + set unchanged. total_in, total_out, adler, and msg are initialized. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams(z_streamp strm, + int level, + int strategy); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2(). This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different strategy. + If the compression approach (which is a function of the level) or the + strategy is changed, and if there have been any deflate() calls since the + state was initialized or reset, then the input available so far is + compressed with the old level and strategy using deflate(strm, Z_BLOCK). + There are three approaches for the compression levels 0, 1..3, and 4..9 + respectively. The new level and strategy will take effect at the next call + of deflate(). + + If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does + not have enough output space to complete, then the parameter change will not + take effect. In this case, deflateParams() can be called again with the + same parameters and more output space to try again. + + In order to assure a change in the parameters on the first try, the + deflate stream should be flushed using deflate() with Z_BLOCK or other flush + request until strm.avail_out is not zero, before calling deflateParams(). + Then no more input data should be provided before the deflateParams() call. + If this is done, the old level and strategy will be applied to the data + compressed before deflateParams(), and the new level and strategy will be + applied to the data compressed after deflateParams(). + + deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream + state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if + there was not enough output space to complete the compression of the + available input data before a change in the strategy or approach. Note that + in the case of a Z_BUF_ERROR, the parameters are not changed. A return + value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be + retried with more output space. +*/ + +ZEXTERN int ZEXPORT deflateTune(z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound(z_streamp strm, + uLong sourceLen); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() or + deflateInit2(), and after deflateSetHeader(), if used. This would be used + to allocate an output buffer for deflation in a single pass, and so would be + called before deflate(). If that first deflate() call is provided the + sourceLen input bytes, an output buffer allocated to the size returned by + deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed + to return Z_STREAM_END. Note that it is possible for the compressed size to + be larger than the value returned by deflateBound() if flush options other + than Z_FINISH or Z_NO_FLUSH are used. +*/ + +ZEXTERN int ZEXPORT deflatePending(z_streamp strm, + unsigned *pending, + int *bits); +/* + deflatePending() returns the number of bytes and bits of output that have + been generated, but not yet provided in the available output. The bytes not + provided would be due to the available output space having being consumed. + The number of bits of output not provided are between 0 and 7, where they + await more bits to join them in order to fill out a full byte. If pending + or bits are Z_NULL, then those values are not set. + + deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. + */ + +ZEXTERN int ZEXPORT deflatePrime(z_streamp strm, + int bits, + int value); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the bits + leftover from a previous deflate stream when appending to it. As such, this + function can only be used for raw deflate, and must be used before the first + deflate() call after a deflateInit2() or deflateReset(). bits must be less + than or equal to 16, and that many of the least significant bits of value + will be inserted in the output. + + deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough + room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader(z_streamp strm, + gz_headerp head); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to the current operating system, with no + extra, name, or comment fields. The gzip header is returned to the default + state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2(z_streamp strm, + int windowBits); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be zero to request that inflate use the window size in + the zlib header of the compressed stream. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an Adler-32 or a CRC-32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a + CRC-32 instead of an Adler-32. Unlike the gunzip utility and gzread() (see + below), inflate() will *not* automatically decode concatenated gzip members. + inflate() will return Z_STREAM_END at the end of the gzip member. The state + would need to be reset to continue decoding a subsequent gzip member. This + *must* be done if there is more data after a gzip member, in order for the + decompression to be compliant with the gzip standard (RFC 1952). + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit2 does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit2() does not process any header information -- that is + deferred until inflate() is called. +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary(z_streamp strm, + const Bytef *dictionary, + uInt dictLength); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the Adler-32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called at any + time to set the dictionary. If the provided dictionary is smaller than the + window and there is already data in the window, then the provided dictionary + will amend what's there. The application must insure that the dictionary + that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect Adler-32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateGetDictionary(z_streamp strm, + Bytef *dictionary, + uInt *dictLength); +/* + Returns the sliding dictionary being maintained by inflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If inflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similarly, if dictLength is Z_NULL, then it is not set. + + inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateSync(z_streamp strm); +/* + Skips invalid compressed data until a possible full flush point (see above + for the description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync searches for a 00 00 FF FF pattern in the compressed data. + All full flush points have this pattern, but not all occurrences of this + pattern are full flush points. + + inflateSync returns Z_OK if a possible full flush point has been found, + Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point + has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. + In the success case, the application may save the current current value of + total_in which indicates where valid compressed data was found. In the + error case, the application may repeatedly call inflateSync, providing more + input each time, until success or end of the input data. +*/ + +ZEXTERN int ZEXPORT inflateCopy(z_streamp dest, + z_streamp source); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT inflateReset(z_streamp strm); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate the internal decompression state. The + stream will keep attributes that may have been set by inflateInit2. + total_in, total_out, adler, and msg are initialized. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT inflateReset2(z_streamp strm, + int windowBits); +/* + This function is the same as inflateReset, but it also permits changing + the wrap and window size requests. The windowBits parameter is interpreted + the same as it is for inflateInit2. If the window size is changed, then the + memory allocated for the window is freed, and the window will be reallocated + by inflate() if needed. + + inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL), or if + the windowBits parameter is invalid. +*/ + +ZEXTERN int ZEXPORT inflatePrime(z_streamp strm, + int bits, + int value); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + If bits is negative, then the input stream bit buffer is emptied. Then + inflatePrime() can be called again to put bits in the buffer. This is used + to clear out bits leftover after feeding inflate a block description prior + to feeding inflate codes. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN long ZEXPORT inflateMark(z_streamp strm); +/* + This function returns two values, one in the lower 16 bits of the return + value, and the other in the remaining upper bits, obtained by shifting the + return value down 16 bits. If the upper value is -1 and the lower value is + zero, then inflate() is currently decoding information outside of a block. + If the upper value is -1 and the lower value is non-zero, then inflate is in + the middle of a stored block, with the lower value equaling the number of + bytes from the input remaining to copy. If the upper value is not -1, then + it is the number of bits back from the current bit position in the input of + the code (literal or length/distance pair) currently being processed. In + that case the lower value is the number of bytes already emitted for that + code. + + A code is being processed if inflate is waiting for more input to complete + decoding of the code, or if it has completed decoding but is waiting for + more output space to write the literal or match data. + + inflateMark() is used to mark locations in the input data for random + access, which may be at bit positions, and to note those cases where the + output of a code may span boundaries of random access blocks. The current + location in the input stream can be determined from avail_in and data_type + as noted in the description for the Z_BLOCK flush parameter for inflate. + + inflateMark returns the value noted above, or -65536 if the provided + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader(z_streamp strm, + gz_headerp head); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be + used to force inflate() to return immediately after header processing is + complete and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When any + of extra, name, or comment are not Z_NULL and the respective field is not + present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit(z_streamp strm, int windowBits, + unsigned char FAR *window); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the parameters are invalid, Z_MEM_ERROR if the internal state could not be + allocated, or Z_VERSION_ERROR if the version of the library does not match + the version of the header file. +*/ + +typedef unsigned (*in_func)(void FAR *, + z_const unsigned char FAR * FAR *); +typedef int (*out_func)(void FAR *, unsigned char FAR *, unsigned); + +ZEXTERN int ZEXPORT inflateBack(z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is potentially more efficient than + inflate() for file i/o applications, in that it avoids copying between the + output and the sliding window by simply making the window itself the output + buffer. inflate() can be faster on modern CPUs when used with large + buffers. inflateBack() trusts the application to not change the output + buffer passed by the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free the + allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects only + the raw deflate stream to decompress. This is different from the default + behavior of inflate(), which expects a zlib header and trailer around the + deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero -- buf is ignored in that + case -- and inflateBack() will return a buffer error. inflateBack() will + call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. + out() should return zero on success, or non-zero on failure. If out() + returns non-zero, inflateBack() will return with an error. Neither in() nor + out() are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format error + in the deflate stream (in which case strm->msg is set to indicate the nature + of the error), or Z_STREAM_ERROR if the stream was not properly initialized. + In the case of Z_BUF_ERROR, an input or output error can be distinguished + using strm->next_in which will be Z_NULL only if in() returned an error. If + strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning + non-zero. (in() will always be called before out(), so strm->next_in is + assured to be defined if out() returns non-zero.) Note that inflateBack() + cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd(z_streamp strm); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags(void); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: ZLIB_DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + +#ifndef Z_SOLO + + /* utility functions */ + +/* + The following utility functions are implemented on top of the basic + stream-oriented functions. To simplify the interface, some default options + are assumed (compression level and memory usage, standard memory allocation + functions). The source code of these utility functions can be modified if + you need special options. +*/ + +ZEXTERN int ZEXPORT compress(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. compress() is equivalent to compress2() with a level + parameter of Z_DEFAULT_COMPRESSION. + + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound(uLong sourceLen); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before a + compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, destLen + is the actual size of the uncompressed data. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In + the case where there is not enough room, uncompress() will fill the output + buffer with the uncompressed data up to that point. +*/ + +ZEXTERN int ZEXPORT uncompress2(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong *sourceLen); +/* + Same as uncompress, except that sourceLen is a pointer, where the + length of the source is *sourceLen. On return, *sourceLen is the number of + source bytes consumed. +*/ + + /* gzip file access functions */ + +/* + This library supports reading and writing files in gzip (.gz) format with + an interface similar to that of stdio, using the functions that start with + "gz". The gzip format is different from the zlib format. gzip is a gzip + wrapper, documented in RFC 1952, wrapped around a deflate stream. +*/ + +typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ + +/* +ZEXTERN gzFile ZEXPORT gzopen(const char *path, const char *mode); + + Open the gzip (.gz) file at path for reading and decompressing, or + compressing and writing. The mode parameter is as in fopen ("rb" or "wb") + but can also include a compression level ("wb9") or a strategy: 'f' for + filtered data as in "wb6f", 'h' for Huffman-only compression as in "wb1h", + 'R' for run-length encoding as in "wb1R", or 'F' for fixed code compression + as in "wb9F". (See the description of deflateInit2 for more information + about the strategy parameter.) 'T' will request transparent writing or + appending with no compression and not using the gzip format. + + "a" can be used instead of "w" to request that the gzip stream that will + be written be appended to the file. "+" will result in an error, since + reading and writing to the same gzip file is not supported. The addition of + "x" when writing will create the file exclusively, which fails if the file + already exists. On systems that support it, the addition of "e" when + reading or writing will set the flag to close the file on an execve() call. + + These functions, as well as gzip, will read and decode a sequence of gzip + streams in a file. The append function of gzopen() can be used to create + such a file. (Also see gzflush() for another way to do this.) When + appending, gzopen does not test whether the file begins with a gzip stream, + nor does it look for the end of the gzip streams to begin appending. gzopen + will simply append a gzip stream to the existing file. + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. When + reading, this will be detected automatically by looking for the magic two- + byte gzip header. + + gzopen returns NULL if the file could not be opened, if there was + insufficient memory to allocate the gzFile state, or if an invalid mode was + specified (an 'r', 'w', or 'a' was not provided, or '+' was provided). + errno can be checked to determine if the reason gzopen failed was that the + file could not be opened. +*/ + +ZEXTERN gzFile ZEXPORT gzdopen(int fd, const char *mode); +/* + Associate a gzFile with the file descriptor fd. File descriptors are + obtained from calls like open, dup, creat, pipe or fileno (if the file has + been previously opened with fopen). The mode parameter is as in gzopen. + + The next call of gzclose on the returned gzFile will also close the file + descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor + fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd, + mode);. The duplicated descriptor should be saved to avoid a leak, since + gzdopen does not close fd if it fails. If you are using fileno() to get the + file descriptor from a FILE *, then you will have to use dup() to avoid + double-close()ing the file descriptor. Both gzclose() and fclose() will + close the associated file descriptor, so they need to have different file + descriptors. + + gzdopen returns NULL if there was insufficient memory to allocate the + gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not + provided, or '+' was provided), or if fd is -1. The file descriptor is not + used until the next gz* read, write, seek, or close operation, so gzdopen + will not detect if fd is invalid (unless fd is -1). +*/ + +ZEXTERN int ZEXPORT gzbuffer(gzFile file, unsigned size); +/* + Set the internal buffer size used by this library's functions for file to + size. The default buffer size is 8192 bytes. This function must be called + after gzopen() or gzdopen(), and before any other calls that read or write + the file. The buffer memory allocation is always deferred to the first read + or write. Three times that size in buffer space is allocated. A larger + buffer size of, for example, 64K or 128K bytes will noticeably increase the + speed of decompression (reading). + + The new buffer size also affects the maximum length for gzprintf(). + + gzbuffer() returns 0 on success, or -1 on failure, such as being called + too late. +*/ + +ZEXTERN int ZEXPORT gzsetparams(gzFile file, int level, int strategy); +/* + Dynamically update the compression level and strategy for file. See the + description of deflateInit2 for the meaning of these parameters. Previously + provided data is flushed before applying the parameter changes. + + gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not + opened for writing, Z_ERRNO if there is an error writing the flushed data, + or Z_MEM_ERROR if there is a memory allocation error. +*/ + +ZEXTERN int ZEXPORT gzread(gzFile file, voidp buf, unsigned len); +/* + Read and decompress up to len uncompressed bytes from file into buf. If + the input file is not in gzip format, gzread copies the given number of + bytes into the buffer directly from the file. + + After reaching the end of a gzip stream in the input, gzread will continue + to read, looking for another gzip stream. Any number of gzip streams may be + concatenated in the input file, and will all be decompressed by gzread(). + If something other than a gzip stream is encountered after a gzip stream, + that remaining trailing garbage is ignored (and no error is returned). + + gzread can be used to read a gzip file that is being concurrently written. + Upon reaching the end of the input, gzread will return with the available + data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then + gzclearerr can be used to clear the end of file indicator in order to permit + gzread to be tried again. Z_OK indicates that a gzip stream was completed + on the last gzread. Z_BUF_ERROR indicates that the input file ended in the + middle of a gzip stream. Note that gzread does not return -1 in the event + of an incomplete gzip stream. This error is deferred until gzclose(), which + will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip + stream. Alternatively, gzerror can be used before gzclose to detect this + case. + + gzread returns the number of uncompressed bytes actually read, less than + len for end of file, or -1 for error. If len is too large to fit in an int, + then nothing is read, -1 is returned, and the error state is set to + Z_STREAM_ERROR. +*/ + +ZEXTERN z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, + gzFile file); +/* + Read and decompress up to nitems items of size size from file into buf, + otherwise operating as gzread() does. This duplicates the interface of + stdio's fread(), with size_t request and return types. If the library + defines size_t, then z_size_t is identical to size_t. If not, then z_size_t + is an unsigned integer type that can contain a pointer. + + gzfread() returns the number of full items read of size size, or zero if + the end of the file was reached and a full item could not be read, or if + there was an error. gzerror() must be consulted if zero is returned in + order to determine if there was an error. If the multiplication of size and + nitems overflows, i.e. the product does not fit in a z_size_t, then nothing + is read, zero is returned, and the error state is set to Z_STREAM_ERROR. + + In the event that the end of file is reached and only a partial item is + available at the end, i.e. the remaining uncompressed data length is not a + multiple of size, then the final partial item is nevertheless read into buf + and the end-of-file flag is set. The length of the partial item read is not + provided, but could be inferred from the result of gztell(). This behavior + is the same as the behavior of fread() implementations in common libraries, + but it prevents the direct use of gzfread() to read a concurrently written + file, resetting and retrying on end-of-file, when size is not 1. +*/ + +ZEXTERN int ZEXPORT gzwrite(gzFile file, voidpc buf, unsigned len); +/* + Compress and write the len uncompressed bytes at buf to file. gzwrite + returns the number of uncompressed bytes written or 0 in case of error. +*/ + +ZEXTERN z_size_t ZEXPORT gzfwrite(voidpc buf, z_size_t size, + z_size_t nitems, gzFile file); +/* + Compress and write nitems items of size size from buf to file, duplicating + the interface of stdio's fwrite(), with size_t request and return types. If + the library defines size_t, then z_size_t is identical to size_t. If not, + then z_size_t is an unsigned integer type that can contain a pointer. + + gzfwrite() returns the number of full items written of size size, or zero + if there was an error. If the multiplication of size and nitems overflows, + i.e. the product does not fit in a z_size_t, then nothing is written, zero + is returned, and the error state is set to Z_STREAM_ERROR. +*/ + +ZEXTERN int ZEXPORTVA gzprintf(gzFile file, const char *format, ...); +/* + Convert, format, compress, and write the arguments (...) to file under + control of the string format, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written, or a negative zlib error code in case + of error. The number of uncompressed bytes written is limited to 8191, or + one less than the buffer size given to gzbuffer(). The caller should assure + that this limit is not exceeded. If it is exceeded, then gzprintf() will + return an error (0) with nothing written. In this case, there may also be a + buffer overflow with unpredictable consequences, which is possible only if + zlib was compiled with the insecure functions sprintf() or vsprintf(), + because the secure snprintf() or vsnprintf() functions were not available. + This can be determined using zlibCompileFlags(). +*/ + +ZEXTERN int ZEXPORT gzputs(gzFile file, const char *s); +/* + Compress and write the given null-terminated string s to file, excluding + the terminating null character. + + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets(gzFile file, char *buf, int len); +/* + Read and decompress bytes from file into buf, until len-1 characters are + read, or until a newline character is read and transferred to buf, or an + end-of-file condition is encountered. If any characters are read or if len + is one, the string is terminated with a null character. If no characters + are read due to an end-of-file or len is less than one, then the buffer is + left untouched. + + gzgets returns buf which is a null-terminated string, or it returns NULL + for end-of-file or in case of error. If there was an error, the contents at + buf are indeterminate. +*/ + +ZEXTERN int ZEXPORT gzputc(gzFile file, int c); +/* + Compress and write c, converted to an unsigned char, into file. gzputc + returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc(gzFile file); +/* + Read and decompress one byte from file. gzgetc returns this byte or -1 + in case of end of file or error. This is implemented as a macro for speed. + As such, it does not do all of the checking the other functions do. I.e. + it does not check to see if file is NULL, nor whether the structure file + points to has been clobbered or not. +*/ + +ZEXTERN int ZEXPORT gzungetc(int c, gzFile file); +/* + Push c back onto the stream for file to be read as the first character on + the next read. At least one character of push-back is always allowed. + gzungetc() returns the character pushed, or -1 on failure. gzungetc() will + fail if c is -1, and may fail if a character has been pushed but not read + yet. If gzungetc is used immediately after gzopen or gzdopen, at least the + output buffer size of pushed characters is allowed. (See gzbuffer above.) + The pushed character will be discarded if the stream is repositioned with + gzseek() or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush(gzFile file, int flush); +/* + Flush all pending output to file. The parameter flush is as in the + deflate() function. The return value is the zlib error number (see function + gzerror below). gzflush is only permitted when writing. + + If the flush parameter is Z_FINISH, the remaining data is written and the + gzip stream is completed in the output. If gzwrite() is called again, a new + gzip stream will be started in the output. gzread() is able to read such + concatenated gzip streams. + + gzflush should be called only when strictly necessary because it will + degrade compression if called too often. +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzseek(gzFile file, + z_off_t offset, int whence); + + Set the starting position to offset relative to whence for the next gzread + or gzwrite on file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind(gzFile file); +/* + Rewind file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET). +*/ + +/* +ZEXTERN z_off_t ZEXPORT gztell(gzFile file); + + Return the starting position for the next gzread or gzwrite on file. + This position represents a number of bytes in the uncompressed data stream, + and is zero when starting, even if appending or reading a gzip stream from + the middle of a file using gzdopen(). + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzoffset(gzFile file); + + Return the current compressed (actual) read or write offset of file. This + offset includes the count of bytes that precede the gzip stream, for example + when appending or when using gzdopen() for reading. When reading, the + offset does not include as yet unused buffered input. This information can + be used for a progress indicator. On error, gzoffset() returns -1. +*/ + +ZEXTERN int ZEXPORT gzeof(gzFile file); +/* + Return true (1) if the end-of-file indicator for file has been set while + reading, false (0) otherwise. Note that the end-of-file indicator is set + only if the read tried to go past the end of the input, but came up short. + Therefore, just like feof(), gzeof() may return false even if there is no + more data to read, in the event that the last read request was for the exact + number of bytes remaining in the input file. This will happen if the input + file size is an exact multiple of the buffer size. + + If gzeof() returns true, then the read functions will return no more data, + unless the end-of-file indicator is reset by gzclearerr() and the input file + has grown since the previous end of file was detected. +*/ + +ZEXTERN int ZEXPORT gzdirect(gzFile file); +/* + Return true (1) if file is being copied directly while reading, or false + (0) if file is a gzip stream being decompressed. + + If the input file is empty, gzdirect() will return true, since the input + does not contain a gzip stream. + + If gzdirect() is used immediately after gzopen() or gzdopen() it will + cause buffers to be allocated to allow reading the file to determine if it + is a gzip file. Therefore if gzbuffer() is used, it should be called before + gzdirect(). + + When writing, gzdirect() returns true (1) if transparent writing was + requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note: + gzdirect() is not needed when writing. Transparent writing must be + explicitly requested, so the application already knows the answer. When + linking statically, using gzdirect() will include all of the zlib code for + gzip file reading and decompression, which may not be desired.) +*/ + +ZEXTERN int ZEXPORT gzclose(gzFile file); +/* + Flush all pending output for file, if necessary, close file and + deallocate the (de)compression state. Note that once file is closed, you + cannot call gzerror with file, since its structures have been deallocated. + gzclose must not be called more than once on the same file, just as free + must not be called more than once on the same allocation. + + gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a + file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the + last read ended in the middle of a gzip stream, or Z_OK on success. +*/ + +ZEXTERN int ZEXPORT gzclose_r(gzFile file); +ZEXTERN int ZEXPORT gzclose_w(gzFile file); +/* + Same as gzclose(), but gzclose_r() is only for use when reading, and + gzclose_w() is only for use when writing or appending. The advantage to + using these instead of gzclose() is that they avoid linking in zlib + compression or decompression code that is not used when only reading or only + writing respectively. If gzclose() is used, then both compression and + decompression code will be included the application when linking to a static + zlib library. +*/ + +ZEXTERN const char * ZEXPORT gzerror(gzFile file, int *errnum); +/* + Return the error message for the last error which occurred on file. + errnum is set to zlib error number. If an error occurred in the file system + and not in the compression library, errnum is set to Z_ERRNO and the + application may consult errno to get the exact error code. + + The application must not modify the returned string. Future calls to + this function may invalidate the previously returned string. If file is + closed, then the string previously returned by gzerror will no longer be + available. + + gzerror() should be used to distinguish errors from end-of-file for those + functions above that do not distinguish those cases in their return values. +*/ + +ZEXTERN void ZEXPORT gzclearerr(gzFile file); +/* + Clear the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + +#endif /* !Z_SOLO */ + + /* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the compression + library. +*/ + +ZEXTERN uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. An Adler-32 value is in the range of a 32-bit + unsigned integer. If buf is Z_NULL, this function returns the required + initial value for the checksum. + + An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed + much faster. + + Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +ZEXTERN uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf, + z_size_t len); +/* + Same as adler32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, + z_off_t len2); + + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note + that the z_off_t type (like off_t) is a signed integer. If len2 is + negative, the result has no meaning or utility. +*/ + +ZEXTERN uLong ZEXPORT crc32(uLong crc, const Bytef *buf, uInt len); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. A CRC-32 value is in the range of a 32-bit unsigned integer. + If buf is Z_NULL, this function returns the required initial value for the + crc. Pre- and post-conditioning (one's complement) is performed within this + function so it shouldn't be done by the application. + + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +ZEXTERN uLong ZEXPORT crc32_z(uLong crc, const Bytef *buf, + z_size_t len); +/* + Same as crc32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2); + + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t len2); + + Return the operator corresponding to length len2, to be used with + crc32_combine_op(). +*/ + +ZEXTERN uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op); +/* + Give the same result as crc32_combine(), using op in place of len2. op is + is generated from len2 by crc32_combine_gen(). This will be faster than + crc32_combine() if the generated op is used more than once. +*/ + + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT deflateInit_(z_streamp strm, int level, + const char *version, int stream_size); +ZEXTERN int ZEXPORT inflateInit_(z_streamp strm, + const char *version, int stream_size); +ZEXTERN int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy, const char *version, + int stream_size); +ZEXTERN int ZEXPORT inflateInit2_(z_streamp strm, int windowBits, + const char *version, int stream_size); +ZEXTERN int ZEXPORT inflateBackInit_(z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size); +#ifdef Z_PREFIX_SET +# define z_deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define z_inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#else +# define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#endif + +#ifndef Z_SOLO + +/* gzgetc() macro and its supporting function and exposed data structure. Note + * that the real internal state is much larger than the exposed structure. + * This abbreviated structure exposes just enough for the gzgetc() macro. The + * user should not mess with these exposed elements, since their names or + * behavior could change in the future, perhaps even capriciously. They can + * only be used by the gzgetc() macro. You have been warned. + */ +struct gzFile_s { + unsigned have; + unsigned char *next; + z_off64_t pos; +}; +ZEXTERN int ZEXPORT gzgetc_(gzFile file); /* backward compatibility */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +# define z_gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#else +# define gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#endif + +/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or + * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if + * both are true, the application gets the *64 functions, and the regular + * functions are changed to 64 bits) -- in case these are set on systems + * without large file support, _LFS64_LARGEFILE must also be true + */ +#ifdef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *); + ZEXTERN z_off64_t ZEXPORT gzseek64(gzFile, z_off64_t, int); + ZEXTERN z_off64_t ZEXPORT gztell64(gzFile); + ZEXTERN z_off64_t ZEXPORT gzoffset64(gzFile); + ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off64_t); + ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off64_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off64_t); +#endif + +#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64) +# ifdef Z_PREFIX_SET +# define z_gzopen z_gzopen64 +# define z_gzseek z_gzseek64 +# define z_gztell z_gztell64 +# define z_gzoffset z_gzoffset64 +# define z_adler32_combine z_adler32_combine64 +# define z_crc32_combine z_crc32_combine64 +# define z_crc32_combine_gen z_crc32_combine_gen64 +# else +# define gzopen gzopen64 +# define gzseek gzseek64 +# define gztell gztell64 +# define gzoffset gzoffset64 +# define adler32_combine adler32_combine64 +# define crc32_combine crc32_combine64 +# define crc32_combine_gen crc32_combine_gen64 +# endif +# ifndef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *); + ZEXTERN z_off_t ZEXPORT gzseek64(gzFile, z_off_t, int); + ZEXTERN z_off_t ZEXPORT gztell64(gzFile); + ZEXTERN z_off_t ZEXPORT gzoffset64(gzFile); + ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off_t); +# endif +#else + ZEXTERN gzFile ZEXPORT gzopen(const char *, const char *); + ZEXTERN z_off_t ZEXPORT gzseek(gzFile, z_off_t, int); + ZEXTERN z_off_t ZEXPORT gztell(gzFile); + ZEXTERN z_off_t ZEXPORT gzoffset(gzFile); + ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t); +#endif + +#else /* Z_SOLO */ + + ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t); + +#endif /* !Z_SOLO */ + +/* undocumented functions */ +ZEXTERN const char * ZEXPORT zError(int); +ZEXTERN int ZEXPORT inflateSyncPoint(z_streamp); +ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table(void); +ZEXTERN int ZEXPORT inflateUndermine(z_streamp, int); +ZEXTERN int ZEXPORT inflateValidate(z_streamp, int); +ZEXTERN unsigned long ZEXPORT inflateCodesUsed(z_streamp); +ZEXTERN int ZEXPORT inflateResetKeep(z_streamp); +ZEXTERN int ZEXPORT deflateResetKeep(z_streamp); +#if defined(_WIN32) && !defined(Z_SOLO) +ZEXTERN gzFile ZEXPORT gzopen_w(const wchar_t *path, + const char *mode); +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +ZEXTERN int ZEXPORTVA gzvprintf(gzFile file, + const char *format, + va_list va); +# endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ZLIB_H */ diff --git a/3rdparty/loongarch/lib/libz.so b/3rdparty/loongarch/lib/libz.so new file mode 100644 index 0000000..0c01e3d Binary files /dev/null and b/3rdparty/loongarch/lib/libz.so differ diff --git a/3rdparty/soc/include/zconf.h b/3rdparty/soc/include/zconf.h new file mode 100644 index 0000000..fe81abf --- /dev/null +++ b/3rdparty/soc/include/zconf.h @@ -0,0 +1,551 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + * Even better than compiling with -DZ_PREFIX would be to use configure to set + * this permanently in zconf.h using "./configure --zprefix". + */ +#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ +# define Z_PREFIX_SET + +/* all linked symbols and init macros */ +# define _dist_code z__dist_code +# define _length_code z__length_code +# define _tr_align z__tr_align +# define _tr_flush_bits z__tr_flush_bits +# define _tr_flush_block z__tr_flush_block +# define _tr_init z__tr_init +# define _tr_stored_block z__tr_stored_block +# define _tr_tally z__tr_tally +# define adler32 z_adler32 +# define adler32_combine z_adler32_combine +# define adler32_combine64 z_adler32_combine64 +# define adler32_z z_adler32_z +# ifndef Z_SOLO +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# endif +# define crc32 z_crc32 +# define crc32_combine z_crc32_combine +# define crc32_combine64 z_crc32_combine64 +# define crc32_combine_gen z_crc32_combine_gen +# define crc32_combine_gen64 z_crc32_combine_gen64 +# define crc32_combine_op z_crc32_combine_op +# define crc32_z z_crc32_z +# define deflate z_deflate +# define deflateBound z_deflateBound +# define deflateCopy z_deflateCopy +# define deflateEnd z_deflateEnd +# define deflateGetDictionary z_deflateGetDictionary +# define deflateInit z_deflateInit +# define deflateInit2 z_deflateInit2 +# define deflateInit2_ z_deflateInit2_ +# define deflateInit_ z_deflateInit_ +# define deflateParams z_deflateParams +# define deflatePending z_deflatePending +# define deflatePrime z_deflatePrime +# define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep +# define deflateSetDictionary z_deflateSetDictionary +# define deflateSetHeader z_deflateSetHeader +# define deflateTune z_deflateTune +# define deflate_copyright z_deflate_copyright +# define get_crc_table z_get_crc_table +# ifndef Z_SOLO +# define gz_error z_gz_error +# define gz_intmax z_gz_intmax +# define gz_strwinerror z_gz_strwinerror +# define gzbuffer z_gzbuffer +# define gzclearerr z_gzclearerr +# define gzclose z_gzclose +# define gzclose_r z_gzclose_r +# define gzclose_w z_gzclose_w +# define gzdirect z_gzdirect +# define gzdopen z_gzdopen +# define gzeof z_gzeof +# define gzerror z_gzerror +# define gzflush z_gzflush +# define gzfread z_gzfread +# define gzfwrite z_gzfwrite +# define gzgetc z_gzgetc +# define gzgetc_ z_gzgetc_ +# define gzgets z_gzgets +# define gzoffset z_gzoffset +# define gzoffset64 z_gzoffset64 +# define gzopen z_gzopen +# define gzopen64 z_gzopen64 +# ifdef _WIN32 +# define gzopen_w z_gzopen_w +# endif +# define gzprintf z_gzprintf +# define gzputc z_gzputc +# define gzputs z_gzputs +# define gzread z_gzread +# define gzrewind z_gzrewind +# define gzseek z_gzseek +# define gzseek64 z_gzseek64 +# define gzsetparams z_gzsetparams +# define gztell z_gztell +# define gztell64 z_gztell64 +# define gzungetc z_gzungetc +# define gzvprintf z_gzvprintf +# define gzwrite z_gzwrite +# endif +# define inflate z_inflate +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define inflateBackInit z_inflateBackInit +# define inflateBackInit_ z_inflateBackInit_ +# define inflateCodesUsed z_inflateCodesUsed +# define inflateCopy z_inflateCopy +# define inflateEnd z_inflateEnd +# define inflateGetDictionary z_inflateGetDictionary +# define inflateGetHeader z_inflateGetHeader +# define inflateInit z_inflateInit +# define inflateInit2 z_inflateInit2 +# define inflateInit2_ z_inflateInit2_ +# define inflateInit_ z_inflateInit_ +# define inflateMark z_inflateMark +# define inflatePrime z_inflatePrime +# define inflateReset z_inflateReset +# define inflateReset2 z_inflateReset2 +# define inflateResetKeep z_inflateResetKeep +# define inflateSetDictionary z_inflateSetDictionary +# define inflateSync z_inflateSync +# define inflateSyncPoint z_inflateSyncPoint +# define inflateUndermine z_inflateUndermine +# define inflateValidate z_inflateValidate +# define inflate_copyright z_inflate_copyright +# define inflate_fast z_inflate_fast +# define inflate_table z_inflate_table +# ifndef Z_SOLO +# define uncompress z_uncompress +# define uncompress2 z_uncompress2 +# endif +# define zError z_zError +# ifndef Z_SOLO +# define zcalloc z_zcalloc +# define zcfree z_zcfree +# endif +# define zlibCompileFlags z_zlibCompileFlags +# define zlibVersion z_zlibVersion + +/* all zlib typedefs in zlib.h and zconf.h */ +# define Byte z_Byte +# define Bytef z_Bytef +# define alloc_func z_alloc_func +# define charf z_charf +# define free_func z_free_func +# ifndef Z_SOLO +# define gzFile z_gzFile +# endif +# define gz_header z_gz_header +# define gz_headerp z_gz_headerp +# define in_func z_in_func +# define intf z_intf +# define out_func z_out_func +# define uInt z_uInt +# define uIntf z_uIntf +# define uLong z_uLong +# define uLongf z_uLongf +# define voidp z_voidp +# define voidpc z_voidpc +# define voidpf z_voidpf + +/* all zlib structs in zlib.h and zconf.h */ +# define gz_header_s z_gz_header_s +# define internal_state z_internal_state + +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#ifndef STDC +# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ +# define const /* note: need a more gentle solution here */ +# endif +#endif + +#if defined(ZLIB_CONST) && !defined(z_const) +# define z_const const +#else +# define z_const +#endif + +#ifdef Z_SOLO +# ifdef _WIN64 + typedef unsigned long long z_size_t; +# else + typedef unsigned long z_size_t; +# endif +#else +# define z_longlong long long +# if defined(NO_SIZE_T) + typedef unsigned NO_SIZE_T z_size_t; +# elif defined(STDC) +# include + typedef size_t z_size_t; +# else + typedef unsigned long z_size_t; +# endif +# undef z_longlong +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus about 7 kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +#ifndef Z_ARG /* function prototypes for stdarg */ +# if defined(STDC) || defined(Z_HAVE_STDARG_H) +# define Z_ARG(args) args +# else +# define Z_ARG(args) () +# endif +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC; for other MSDOS compilers you may have + * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + +#if defined(WINDOWS) || defined(WIN32) + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +# ifdef ZLIB_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef ZLIB_INTERNAL +# define ZEXTERN extern __declspec(dllexport) +# else +# define ZEXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* ZLIB_DLL */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +# ifdef ZLIB_WINAPI +# ifdef FAR +# undef FAR +# endif +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# include + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define ZEXPORT WINAPI +# ifdef WIN32 +# define ZEXPORTVA WINAPIV +# else +# define ZEXPORTVA FAR CDECL +# endif +# endif +#endif + +#if defined (__BEOS__) +# ifdef ZLIB_DLL +# ifdef ZLIB_INTERNAL +# define ZEXPORT __declspec(dllexport) +# define ZEXPORTVA __declspec(dllexport) +# else +# define ZEXPORT __declspec(dllimport) +# define ZEXPORTVA __declspec(dllimport) +# endif +# endif +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#ifdef SMALL_MEDIUM + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC) +# include +# if (UINT_MAX == 0xffffffffUL) +# define Z_U4 unsigned +# elif (ULONG_MAX == 0xffffffffUL) +# define Z_U4 unsigned long +# elif (USHRT_MAX == 0xffffffffUL) +# define Z_U4 unsigned short +# endif +#endif + +#ifdef Z_U4 + typedef Z_U4 z_crc_t; +#else + typedef unsigned long z_crc_t; +#endif + +#if 1 /* was set to #if 1 by ./configure */ +# define Z_HAVE_UNISTD_H +#endif + +#if 1 /* was set to #if 1 by ./configure */ +# define Z_HAVE_STDARG_H +#endif + +#ifdef STDC +# ifndef Z_SOLO +# include /* for off_t */ +# endif +#endif + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +# include /* for va_list */ +# endif +#endif + +#ifdef _WIN32 +# ifndef Z_SOLO +# include /* for wchar_t */ +# endif +#endif + +/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and + * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even + * though the former does not conform to the LFS document), but considering + * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as + * equivalently requesting no 64-bit operations + */ +#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1 +# undef _LARGEFILE64_SOURCE +#endif + +#ifndef Z_HAVE_UNISTD_H +# ifdef __WATCOMC__ +# define Z_HAVE_UNISTD_H +# endif +#endif +#ifndef Z_HAVE_UNISTD_H +# if defined(_LARGEFILE64_SOURCE) && !defined(_WIN32) +# define Z_HAVE_UNISTD_H +# endif +#endif +#ifndef Z_SOLO +# if defined(Z_HAVE_UNISTD_H) +# include /* for SEEK_*, off_t, and _LFS64_LARGEFILE */ +# ifdef VMS +# include /* for off_t */ +# endif +# ifndef z_off_t +# define z_off_t off_t +# endif +# endif +#endif + +#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0 +# define Z_LFS64 +#endif + +#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64) +# define Z_LARGE64 +#endif + +#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64) +# define Z_WANT64 +#endif + +#if !defined(SEEK_SET) && !defined(Z_SOLO) +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif + +#ifndef z_off_t +# define z_off_t long +#endif + +#if !defined(_WIN32) && defined(Z_LARGE64) +# define z_off64_t off64_t +#else +# if defined(_WIN32) && !defined(__GNUC__) +# define z_off64_t __int64 +# else +# define z_off64_t z_off_t +# endif +#endif + +/* MVS linker does not support external names larger than 8 bytes */ +#if defined(__MVS__) + #pragma map(deflateInit_,"DEIN") + #pragma map(deflateInit2_,"DEIN2") + #pragma map(deflateEnd,"DEEND") + #pragma map(deflateBound,"DEBND") + #pragma map(inflateInit_,"ININ") + #pragma map(inflateInit2_,"ININ2") + #pragma map(inflateEnd,"INEND") + #pragma map(inflateSync,"INSY") + #pragma map(inflateSetDictionary,"INSEDI") + #pragma map(compressBound,"CMBND") + #pragma map(inflate_table,"INTABL") + #pragma map(inflate_fast,"INFA") + #pragma map(inflate_copyright,"INCOPY") +#endif + +#endif /* ZCONF_H */ diff --git a/3rdparty/soc/include/zlib.h b/3rdparty/soc/include/zlib.h new file mode 100644 index 0000000..6b7244f --- /dev/null +++ b/3rdparty/soc/include/zlib.h @@ -0,0 +1,1938 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.3, August 18th, 2023 + + Copyright (C) 1995-2023 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 + (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). +*/ + +#ifndef ZLIB_H +#define ZLIB_H + +#include "zconf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZLIB_VERSION "1.3" +#define ZLIB_VERNUM 0x1300 +#define ZLIB_VER_MAJOR 1 +#define ZLIB_VER_MINOR 3 +#define ZLIB_VER_REVISION 0 +#define ZLIB_VER_SUBREVISION 0 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed data. + This version of the library supports only one compression method (deflation) + but other algorithms will be added later and will have the same stream + interface. + + Compression can be done in a single step if the buffers are large enough, + or can be done by repeated calls of the compression function. In the latter + case, the application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip and raw deflate streams in + memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never crash + even in the case of corrupted input. +*/ + +typedef voidpf (*alloc_func)(voidpf opaque, uInt items, uInt size); +typedef void (*free_func)(voidpf opaque, voidpf address); + +struct internal_state; + +typedef struct z_stream_s { + z_const Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total number of input bytes read so far */ + + Bytef *next_out; /* next output byte will go here */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total number of bytes output so far */ + + z_const char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text + for deflate, or the decoding state for inflate */ + uLong adler; /* Adler-32 or CRC-32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has dropped + to zero. It must update next_out and avail_out when avail_out has dropped + to zero. The application must initialize zalloc, zfree and opaque before + calling the init function. All other fields are set by the compression + library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. In that case, zlib is thread-safe. When zalloc and zfree are + Z_NULL on entry to the initialization function, they are set to internal + routines that use the standard library functions malloc() and free(). + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this if + the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers + returned by zalloc for objects of exactly 65536 bytes *must* have their + offset normalized to zero. The default allocation function provided by this + library ensures this (see zutil.c). To reduce memory requirements and avoid + any allocation of 64K objects, at the expense of compression ratio, compile + the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or progress + reports. After compression, total_in holds the total size of the + uncompressed data and may be saved for use by the decompressor (particularly + if the decompressor wants to decompress everything in a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 +#define Z_SYNC_FLUSH 2 +#define Z_FULL_FLUSH 3 +#define Z_FINISH 4 +#define Z_BLOCK 5 +#define Z_TREES 6 +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative values + * are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +#define Z_UNKNOWN 2 +/* Possible values of the data_type field for deflate() */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +#define zlib_version zlibVersion() +/* for compatibility with versions < 1.0.2 */ + + + /* basic functions */ + +ZEXTERN const char * ZEXPORT zlibVersion(void); +/* The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is not + compatible with the zlib.h header file used by the application. This check + is automatically made by deflateInit and inflateInit. + */ + +/* +ZEXTERN int ZEXPORT deflateInit(z_streamp strm, int level); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. If + zalloc and zfree are set to Z_NULL, deflateInit updates them to use default + allocation functions. total_in, total_out, adler, and msg are initialized. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at all + (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION + requests a default compromise between speed and compression (currently + equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if level is not a valid compression level, or + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). msg is set to null + if there is no error message. deflateInit does not perform any compression: + this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate(z_streamp strm, int flush); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary. Some output may be provided even if + flush is zero. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating avail_in or avail_out accordingly; avail_out should + never be zero before the call. The application can consume the compressed + output when it wants, for example when the output buffer is full (avail_out + == 0), or after each call of deflate(). If deflate returns Z_OK and with + zero avail_out, it must be called again after making room in the output + buffer because there might be more output pending. See deflatePending(), + which can be used if desired to determine whether or not there is more output + in that case. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumulate before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In + particular avail_in is zero after the call if enough output space has been + provided before the call.) Flushing may degrade compression for some + compression algorithms and so it should be used only when necessary. This + completes the current deflate block and follows it with an empty stored block + that is three bits plus filler bits to the next byte, followed by four bytes + (00 00 ff ff). + + If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the + output buffer, but the output is not aligned to a byte boundary. All of the + input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. + This completes the current deflate block and follows it with an empty fixed + codes block that is 10 bits long. This assures that enough bytes are output + in order for the decompressor to finish the block before the empty fixed + codes block. + + If flush is set to Z_BLOCK, a deflate block is completed and emitted, as + for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to + seven bits of the current block are held to be written as the next byte after + the next deflate block is completed. In this case, the decompressor may not + be provided enough bits at this point in order to complete decompression of + the data provided so far to the compressor. It may need to wait for the next + block to be emitted. This is for advanced applications that need to control + the emission of deflate blocks. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six when the flush marker begins, in order to avoid + repeated flush markers upon calling deflate() again when avail_out == 0. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there was + enough output space. If deflate returns with Z_OK or Z_BUF_ERROR, this + function must be called again with Z_FINISH and more output space (updated + avail_out) but no more input data, until it returns with Z_STREAM_END or an + error. After deflate has returned Z_STREAM_END, the only possible operations + on the stream are deflateReset or deflateEnd. + + Z_FINISH can be used in the first deflate call after deflateInit if all the + compression is to be done in a single step. In order to complete in one + call, avail_out must be at least the value returned by deflateBound (see + below). Then deflate is guaranteed to return Z_STREAM_END. If not enough + output space is provided, deflate will not return Z_STREAM_END, and it must + be called again as described above. + + deflate() sets strm->adler to the Adler-32 checksum of all input read + so far (that is, total_in bytes). If a gzip stream is being generated, then + strm->adler will be the CRC-32 checksum of the input read so far. (See + deflateInit2 below.) + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). If in doubt, the data is + considered binary. This field is only for information purposes and does not + affect the compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was Z_NULL or the state was inadvertently written over + by the application), or Z_BUF_ERROR if no progress is possible (for example + avail_in or avail_out was zero). Note that Z_BUF_ERROR is not fatal, and + deflate() can be called again with more input and more output space to + continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd(z_streamp strm); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, msg + may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit(z_streamp strm); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. In the current version of inflate, the provided input is not + read or consumed. The allocation of a sliding window will be deferred to + the first call of inflate (if the decompression does not complete on the + first call). If zalloc and zfree are set to Z_NULL, inflateInit updates + them to use default allocation functions. total_in, total_out, adler, and + msg are initialized. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit does not perform any decompression. + Actual decompression will be done by inflate(). So next_in, and avail_in, + next_out, and avail_out are unused and unchanged. The current + implementation of inflateInit() does not process any header information -- + that is deferred until inflate() is called. +*/ + + +ZEXTERN int ZEXPORT inflate(z_streamp strm, int flush); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), then next_in and avail_in are updated + accordingly, and processing will resume at this point for the next call of + inflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there is + no more input data or no more space in the output buffer (see below about + the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating the next_* and avail_* values accordingly. If the + caller of inflate() does not provide both available input and available + output space, it is possible that there will be no progress made. The + application can consume the uncompressed output when it wants, for example + when the output buffer is full (avail_out == 0), or after each call of + inflate(). If inflate returns Z_OK and with zero avail_out, it must be + called again after making room in the output buffer because there might be + more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, + Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() + stop if and when it gets to the next deflate block boundary. When decoding + the zlib or gzip format, this will cause inflate() to return immediately + after the header and before the first block. When doing a raw inflate, + inflate() will go ahead and process the first block, and will return when it + gets to the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + To assist in this, on return inflate() always sets strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 if + inflate() is currently decoding the last block in the deflate stream, plus + 128 if inflate() returned immediately after decoding an end-of-block code or + decoding the complete header up to just before the first byte of the deflate + stream. The end-of-block will not be indicated until all of the uncompressed + data from that block has been written to strm->next_out. The number of + unused bits may in general be greater than seven, except when bit 7 of + data_type is set, in which case the number of unused bits will be less than + eight. data_type is set as noted here every time inflate() returns for all + flush options, and so can be used to determine the amount of currently + consumed input in bits. + + The Z_TREES option behaves as Z_BLOCK does, but it also returns when the + end of each deflate block header is reached, before any actual data in that + block is decoded. This allows the caller to determine the length of the + deflate block header for later use in random access within a deflate block. + 256 is added to the value of strm->data_type when inflate() returns + immediately after reaching the end of the deflate block header. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step (a + single call of inflate), the parameter flush should be set to Z_FINISH. In + this case all pending input is processed and all pending output is flushed; + avail_out must be large enough to hold all of the uncompressed data for the + operation to complete. (The size of the uncompressed data may have been + saved by the compressor for this purpose.) The use of Z_FINISH is not + required to perform an inflation in one step. However it may be used to + inform inflate that a faster approach can be used for the single inflate() + call. Z_FINISH also informs inflate to not maintain a sliding window if the + stream completes, which reduces inflate's memory footprint. If the stream + does not complete, either because not all of the stream is provided or not + enough output space is provided, then a sliding window will be allocated and + inflate() can be called again to continue the operation as if Z_NO_FLUSH had + been used. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the effects of the flush parameter in this implementation are + on the return value of inflate() as noted below, when inflate() returns early + when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of + memory for a sliding window when Z_FINISH is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the Adler-32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the Adler-32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed Adler-32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() can decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically, if requested when + initializing with inflateInit2(). Any information contained in the gzip + header is not retained unless inflateGetHeader() is used. When processing + gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output + produced so far. The CRC-32 is checked against the gzip trailer, as is the + uncompressed length, modulo 2^32. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value, in which case strm->msg points to a string with a more specific + error), Z_STREAM_ERROR if the stream structure was inconsistent (for example + next_in or next_out was Z_NULL, or the state was inadvertently written over + by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR + if no progress was possible or if there was not enough room in the output + buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may + then call inflateSync() to look for a good compression block if a partial + recovery of the data is to be attempted. +*/ + + +ZEXTERN int ZEXPORT inflateEnd(z_streamp strm); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state + was inconsistent. +*/ + + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2(z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy); + + This is another version of deflateInit with more compression options. The + fields zalloc, zfree and opaque must be initialized before by the caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + For the current implementation of deflate(), a windowBits value of 8 (a + window size of 256 bytes) is not supported. As a result, a request for 8 + will result in 9 (a 512-byte window). In that case, providing 8 to + inflateInit2() will result in an error when the zlib header with 9 is + checked against the initialization of inflate(). The remedy is to not use 8 + with deflateInit2() with this initialization, or at least in that case use 9 + with inflateInit2(). + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute a check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), no + header crc, and the operating system will be set to the appropriate value, + if the operating system was determined at compile time. If a gzip stream is + being written, strm->adler is a CRC-32 instead of an Adler-32. + + For raw deflate or gzip encoding, a request for a 256-byte window is + rejected as invalid, since only the zlib header provides a means of + transmitting the window size to the decompressor. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but is + slow and reduces compression ratio; memLevel=9 uses maximum memory for + optimal speed. The default value is 8. See zconf.h for total memory usage + as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as + fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The + strategy parameter only affects the compression ratio but not the + correctness of the compressed output even if it is not set appropriately. + Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler + decoder for special applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid + method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is + incompatible with the version assumed by the caller (ZLIB_VERSION). msg is + set to null if there is no error message. deflateInit2 does not perform any + compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary(z_streamp strm, + const Bytef *dictionary, + uInt dictLength); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. When using the zlib format, this + function must be called immediately after deflateInit, deflateInit2 or + deflateReset, and before any call of deflate. When doing raw deflate, this + function must be called either before any call of deflate, or immediately + after the completion of a deflate block, i.e. after all input has been + consumed and all output has been delivered when using any of the flush + options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The + compressor and decompressor must use exactly the same dictionary (see + inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size + provided in deflateInit or deflateInit2. Thus the strings most likely to be + useful should be put at the end of the dictionary, not at the front. In + addition, the current implementation of deflate will use at most the window + size minus 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the Adler-32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The Adler-32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + Adler-32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if not at a block boundary for raw deflate). deflateSetDictionary does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateGetDictionary(z_streamp strm, + Bytef *dictionary, + uInt *dictLength); +/* + Returns the sliding dictionary being maintained by deflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If deflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similarly, if dictLength is Z_NULL, then it is not set. + + deflateGetDictionary() may return a length less than the window size, even + when more than the window size in input has been provided. It may return up + to 258 bytes less in that case, due to how zlib's implementation of deflate + manages the sliding window and lookahead for matches, where matches can be + up to 258 bytes long. If the application needs the last window-size bytes of + input, then that would need to be saved by the application outside of zlib. + + deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateCopy(z_streamp dest, + z_streamp source); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and can + consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset(z_streamp strm); +/* + This function is equivalent to deflateEnd followed by deflateInit, but + does not free and reallocate the internal compression state. The stream + will leave the compression level and any other attributes that may have been + set unchanged. total_in, total_out, adler, and msg are initialized. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams(z_streamp strm, + int level, + int strategy); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2(). This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different strategy. + If the compression approach (which is a function of the level) or the + strategy is changed, and if there have been any deflate() calls since the + state was initialized or reset, then the input available so far is + compressed with the old level and strategy using deflate(strm, Z_BLOCK). + There are three approaches for the compression levels 0, 1..3, and 4..9 + respectively. The new level and strategy will take effect at the next call + of deflate(). + + If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does + not have enough output space to complete, then the parameter change will not + take effect. In this case, deflateParams() can be called again with the + same parameters and more output space to try again. + + In order to assure a change in the parameters on the first try, the + deflate stream should be flushed using deflate() with Z_BLOCK or other flush + request until strm.avail_out is not zero, before calling deflateParams(). + Then no more input data should be provided before the deflateParams() call. + If this is done, the old level and strategy will be applied to the data + compressed before deflateParams(), and the new level and strategy will be + applied to the data compressed after deflateParams(). + + deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream + state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if + there was not enough output space to complete the compression of the + available input data before a change in the strategy or approach. Note that + in the case of a Z_BUF_ERROR, the parameters are not changed. A return + value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be + retried with more output space. +*/ + +ZEXTERN int ZEXPORT deflateTune(z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound(z_streamp strm, + uLong sourceLen); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() or + deflateInit2(), and after deflateSetHeader(), if used. This would be used + to allocate an output buffer for deflation in a single pass, and so would be + called before deflate(). If that first deflate() call is provided the + sourceLen input bytes, an output buffer allocated to the size returned by + deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed + to return Z_STREAM_END. Note that it is possible for the compressed size to + be larger than the value returned by deflateBound() if flush options other + than Z_FINISH or Z_NO_FLUSH are used. +*/ + +ZEXTERN int ZEXPORT deflatePending(z_streamp strm, + unsigned *pending, + int *bits); +/* + deflatePending() returns the number of bytes and bits of output that have + been generated, but not yet provided in the available output. The bytes not + provided would be due to the available output space having being consumed. + The number of bits of output not provided are between 0 and 7, where they + await more bits to join them in order to fill out a full byte. If pending + or bits are Z_NULL, then those values are not set. + + deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. + */ + +ZEXTERN int ZEXPORT deflatePrime(z_streamp strm, + int bits, + int value); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the bits + leftover from a previous deflate stream when appending to it. As such, this + function can only be used for raw deflate, and must be used before the first + deflate() call after a deflateInit2() or deflateReset(). bits must be less + than or equal to 16, and that many of the least significant bits of value + will be inserted in the output. + + deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough + room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader(z_streamp strm, + gz_headerp head); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to the current operating system, with no + extra, name, or comment fields. The gzip header is returned to the default + state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2(z_streamp strm, + int windowBits); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be zero to request that inflate use the window size in + the zlib header of the compressed stream. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an Adler-32 or a CRC-32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a + CRC-32 instead of an Adler-32. Unlike the gunzip utility and gzread() (see + below), inflate() will *not* automatically decode concatenated gzip members. + inflate() will return Z_STREAM_END at the end of the gzip member. The state + would need to be reset to continue decoding a subsequent gzip member. This + *must* be done if there is more data after a gzip member, in order for the + decompression to be compliant with the gzip standard (RFC 1952). + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit2 does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit2() does not process any header information -- that is + deferred until inflate() is called. +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary(z_streamp strm, + const Bytef *dictionary, + uInt dictLength); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the Adler-32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called at any + time to set the dictionary. If the provided dictionary is smaller than the + window and there is already data in the window, then the provided dictionary + will amend what's there. The application must insure that the dictionary + that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect Adler-32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateGetDictionary(z_streamp strm, + Bytef *dictionary, + uInt *dictLength); +/* + Returns the sliding dictionary being maintained by inflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If inflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similarly, if dictLength is Z_NULL, then it is not set. + + inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateSync(z_streamp strm); +/* + Skips invalid compressed data until a possible full flush point (see above + for the description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync searches for a 00 00 FF FF pattern in the compressed data. + All full flush points have this pattern, but not all occurrences of this + pattern are full flush points. + + inflateSync returns Z_OK if a possible full flush point has been found, + Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point + has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. + In the success case, the application may save the current current value of + total_in which indicates where valid compressed data was found. In the + error case, the application may repeatedly call inflateSync, providing more + input each time, until success or end of the input data. +*/ + +ZEXTERN int ZEXPORT inflateCopy(z_streamp dest, + z_streamp source); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT inflateReset(z_streamp strm); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate the internal decompression state. The + stream will keep attributes that may have been set by inflateInit2. + total_in, total_out, adler, and msg are initialized. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT inflateReset2(z_streamp strm, + int windowBits); +/* + This function is the same as inflateReset, but it also permits changing + the wrap and window size requests. The windowBits parameter is interpreted + the same as it is for inflateInit2. If the window size is changed, then the + memory allocated for the window is freed, and the window will be reallocated + by inflate() if needed. + + inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL), or if + the windowBits parameter is invalid. +*/ + +ZEXTERN int ZEXPORT inflatePrime(z_streamp strm, + int bits, + int value); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + If bits is negative, then the input stream bit buffer is emptied. Then + inflatePrime() can be called again to put bits in the buffer. This is used + to clear out bits leftover after feeding inflate a block description prior + to feeding inflate codes. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN long ZEXPORT inflateMark(z_streamp strm); +/* + This function returns two values, one in the lower 16 bits of the return + value, and the other in the remaining upper bits, obtained by shifting the + return value down 16 bits. If the upper value is -1 and the lower value is + zero, then inflate() is currently decoding information outside of a block. + If the upper value is -1 and the lower value is non-zero, then inflate is in + the middle of a stored block, with the lower value equaling the number of + bytes from the input remaining to copy. If the upper value is not -1, then + it is the number of bits back from the current bit position in the input of + the code (literal or length/distance pair) currently being processed. In + that case the lower value is the number of bytes already emitted for that + code. + + A code is being processed if inflate is waiting for more input to complete + decoding of the code, or if it has completed decoding but is waiting for + more output space to write the literal or match data. + + inflateMark() is used to mark locations in the input data for random + access, which may be at bit positions, and to note those cases where the + output of a code may span boundaries of random access blocks. The current + location in the input stream can be determined from avail_in and data_type + as noted in the description for the Z_BLOCK flush parameter for inflate. + + inflateMark returns the value noted above, or -65536 if the provided + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader(z_streamp strm, + gz_headerp head); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be + used to force inflate() to return immediately after header processing is + complete and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When any + of extra, name, or comment are not Z_NULL and the respective field is not + present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit(z_streamp strm, int windowBits, + unsigned char FAR *window); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the parameters are invalid, Z_MEM_ERROR if the internal state could not be + allocated, or Z_VERSION_ERROR if the version of the library does not match + the version of the header file. +*/ + +typedef unsigned (*in_func)(void FAR *, + z_const unsigned char FAR * FAR *); +typedef int (*out_func)(void FAR *, unsigned char FAR *, unsigned); + +ZEXTERN int ZEXPORT inflateBack(z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is potentially more efficient than + inflate() for file i/o applications, in that it avoids copying between the + output and the sliding window by simply making the window itself the output + buffer. inflate() can be faster on modern CPUs when used with large + buffers. inflateBack() trusts the application to not change the output + buffer passed by the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free the + allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects only + the raw deflate stream to decompress. This is different from the default + behavior of inflate(), which expects a zlib header and trailer around the + deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero -- buf is ignored in that + case -- and inflateBack() will return a buffer error. inflateBack() will + call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. + out() should return zero on success, or non-zero on failure. If out() + returns non-zero, inflateBack() will return with an error. Neither in() nor + out() are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format error + in the deflate stream (in which case strm->msg is set to indicate the nature + of the error), or Z_STREAM_ERROR if the stream was not properly initialized. + In the case of Z_BUF_ERROR, an input or output error can be distinguished + using strm->next_in which will be Z_NULL only if in() returned an error. If + strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning + non-zero. (in() will always be called before out(), so strm->next_in is + assured to be defined if out() returns non-zero.) Note that inflateBack() + cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd(z_streamp strm); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags(void); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: ZLIB_DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + +#ifndef Z_SOLO + + /* utility functions */ + +/* + The following utility functions are implemented on top of the basic + stream-oriented functions. To simplify the interface, some default options + are assumed (compression level and memory usage, standard memory allocation + functions). The source code of these utility functions can be modified if + you need special options. +*/ + +ZEXTERN int ZEXPORT compress(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. compress() is equivalent to compress2() with a level + parameter of Z_DEFAULT_COMPRESSION. + + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound(uLong sourceLen); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before a + compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, destLen + is the actual size of the uncompressed data. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In + the case where there is not enough room, uncompress() will fill the output + buffer with the uncompressed data up to that point. +*/ + +ZEXTERN int ZEXPORT uncompress2(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong *sourceLen); +/* + Same as uncompress, except that sourceLen is a pointer, where the + length of the source is *sourceLen. On return, *sourceLen is the number of + source bytes consumed. +*/ + + /* gzip file access functions */ + +/* + This library supports reading and writing files in gzip (.gz) format with + an interface similar to that of stdio, using the functions that start with + "gz". The gzip format is different from the zlib format. gzip is a gzip + wrapper, documented in RFC 1952, wrapped around a deflate stream. +*/ + +typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ + +/* +ZEXTERN gzFile ZEXPORT gzopen(const char *path, const char *mode); + + Open the gzip (.gz) file at path for reading and decompressing, or + compressing and writing. The mode parameter is as in fopen ("rb" or "wb") + but can also include a compression level ("wb9") or a strategy: 'f' for + filtered data as in "wb6f", 'h' for Huffman-only compression as in "wb1h", + 'R' for run-length encoding as in "wb1R", or 'F' for fixed code compression + as in "wb9F". (See the description of deflateInit2 for more information + about the strategy parameter.) 'T' will request transparent writing or + appending with no compression and not using the gzip format. + + "a" can be used instead of "w" to request that the gzip stream that will + be written be appended to the file. "+" will result in an error, since + reading and writing to the same gzip file is not supported. The addition of + "x" when writing will create the file exclusively, which fails if the file + already exists. On systems that support it, the addition of "e" when + reading or writing will set the flag to close the file on an execve() call. + + These functions, as well as gzip, will read and decode a sequence of gzip + streams in a file. The append function of gzopen() can be used to create + such a file. (Also see gzflush() for another way to do this.) When + appending, gzopen does not test whether the file begins with a gzip stream, + nor does it look for the end of the gzip streams to begin appending. gzopen + will simply append a gzip stream to the existing file. + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. When + reading, this will be detected automatically by looking for the magic two- + byte gzip header. + + gzopen returns NULL if the file could not be opened, if there was + insufficient memory to allocate the gzFile state, or if an invalid mode was + specified (an 'r', 'w', or 'a' was not provided, or '+' was provided). + errno can be checked to determine if the reason gzopen failed was that the + file could not be opened. +*/ + +ZEXTERN gzFile ZEXPORT gzdopen(int fd, const char *mode); +/* + Associate a gzFile with the file descriptor fd. File descriptors are + obtained from calls like open, dup, creat, pipe or fileno (if the file has + been previously opened with fopen). The mode parameter is as in gzopen. + + The next call of gzclose on the returned gzFile will also close the file + descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor + fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd, + mode);. The duplicated descriptor should be saved to avoid a leak, since + gzdopen does not close fd if it fails. If you are using fileno() to get the + file descriptor from a FILE *, then you will have to use dup() to avoid + double-close()ing the file descriptor. Both gzclose() and fclose() will + close the associated file descriptor, so they need to have different file + descriptors. + + gzdopen returns NULL if there was insufficient memory to allocate the + gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not + provided, or '+' was provided), or if fd is -1. The file descriptor is not + used until the next gz* read, write, seek, or close operation, so gzdopen + will not detect if fd is invalid (unless fd is -1). +*/ + +ZEXTERN int ZEXPORT gzbuffer(gzFile file, unsigned size); +/* + Set the internal buffer size used by this library's functions for file to + size. The default buffer size is 8192 bytes. This function must be called + after gzopen() or gzdopen(), and before any other calls that read or write + the file. The buffer memory allocation is always deferred to the first read + or write. Three times that size in buffer space is allocated. A larger + buffer size of, for example, 64K or 128K bytes will noticeably increase the + speed of decompression (reading). + + The new buffer size also affects the maximum length for gzprintf(). + + gzbuffer() returns 0 on success, or -1 on failure, such as being called + too late. +*/ + +ZEXTERN int ZEXPORT gzsetparams(gzFile file, int level, int strategy); +/* + Dynamically update the compression level and strategy for file. See the + description of deflateInit2 for the meaning of these parameters. Previously + provided data is flushed before applying the parameter changes. + + gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not + opened for writing, Z_ERRNO if there is an error writing the flushed data, + or Z_MEM_ERROR if there is a memory allocation error. +*/ + +ZEXTERN int ZEXPORT gzread(gzFile file, voidp buf, unsigned len); +/* + Read and decompress up to len uncompressed bytes from file into buf. If + the input file is not in gzip format, gzread copies the given number of + bytes into the buffer directly from the file. + + After reaching the end of a gzip stream in the input, gzread will continue + to read, looking for another gzip stream. Any number of gzip streams may be + concatenated in the input file, and will all be decompressed by gzread(). + If something other than a gzip stream is encountered after a gzip stream, + that remaining trailing garbage is ignored (and no error is returned). + + gzread can be used to read a gzip file that is being concurrently written. + Upon reaching the end of the input, gzread will return with the available + data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then + gzclearerr can be used to clear the end of file indicator in order to permit + gzread to be tried again. Z_OK indicates that a gzip stream was completed + on the last gzread. Z_BUF_ERROR indicates that the input file ended in the + middle of a gzip stream. Note that gzread does not return -1 in the event + of an incomplete gzip stream. This error is deferred until gzclose(), which + will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip + stream. Alternatively, gzerror can be used before gzclose to detect this + case. + + gzread returns the number of uncompressed bytes actually read, less than + len for end of file, or -1 for error. If len is too large to fit in an int, + then nothing is read, -1 is returned, and the error state is set to + Z_STREAM_ERROR. +*/ + +ZEXTERN z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, + gzFile file); +/* + Read and decompress up to nitems items of size size from file into buf, + otherwise operating as gzread() does. This duplicates the interface of + stdio's fread(), with size_t request and return types. If the library + defines size_t, then z_size_t is identical to size_t. If not, then z_size_t + is an unsigned integer type that can contain a pointer. + + gzfread() returns the number of full items read of size size, or zero if + the end of the file was reached and a full item could not be read, or if + there was an error. gzerror() must be consulted if zero is returned in + order to determine if there was an error. If the multiplication of size and + nitems overflows, i.e. the product does not fit in a z_size_t, then nothing + is read, zero is returned, and the error state is set to Z_STREAM_ERROR. + + In the event that the end of file is reached and only a partial item is + available at the end, i.e. the remaining uncompressed data length is not a + multiple of size, then the final partial item is nevertheless read into buf + and the end-of-file flag is set. The length of the partial item read is not + provided, but could be inferred from the result of gztell(). This behavior + is the same as the behavior of fread() implementations in common libraries, + but it prevents the direct use of gzfread() to read a concurrently written + file, resetting and retrying on end-of-file, when size is not 1. +*/ + +ZEXTERN int ZEXPORT gzwrite(gzFile file, voidpc buf, unsigned len); +/* + Compress and write the len uncompressed bytes at buf to file. gzwrite + returns the number of uncompressed bytes written or 0 in case of error. +*/ + +ZEXTERN z_size_t ZEXPORT gzfwrite(voidpc buf, z_size_t size, + z_size_t nitems, gzFile file); +/* + Compress and write nitems items of size size from buf to file, duplicating + the interface of stdio's fwrite(), with size_t request and return types. If + the library defines size_t, then z_size_t is identical to size_t. If not, + then z_size_t is an unsigned integer type that can contain a pointer. + + gzfwrite() returns the number of full items written of size size, or zero + if there was an error. If the multiplication of size and nitems overflows, + i.e. the product does not fit in a z_size_t, then nothing is written, zero + is returned, and the error state is set to Z_STREAM_ERROR. +*/ + +ZEXTERN int ZEXPORTVA gzprintf(gzFile file, const char *format, ...); +/* + Convert, format, compress, and write the arguments (...) to file under + control of the string format, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written, or a negative zlib error code in case + of error. The number of uncompressed bytes written is limited to 8191, or + one less than the buffer size given to gzbuffer(). The caller should assure + that this limit is not exceeded. If it is exceeded, then gzprintf() will + return an error (0) with nothing written. In this case, there may also be a + buffer overflow with unpredictable consequences, which is possible only if + zlib was compiled with the insecure functions sprintf() or vsprintf(), + because the secure snprintf() or vsnprintf() functions were not available. + This can be determined using zlibCompileFlags(). +*/ + +ZEXTERN int ZEXPORT gzputs(gzFile file, const char *s); +/* + Compress and write the given null-terminated string s to file, excluding + the terminating null character. + + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets(gzFile file, char *buf, int len); +/* + Read and decompress bytes from file into buf, until len-1 characters are + read, or until a newline character is read and transferred to buf, or an + end-of-file condition is encountered. If any characters are read or if len + is one, the string is terminated with a null character. If no characters + are read due to an end-of-file or len is less than one, then the buffer is + left untouched. + + gzgets returns buf which is a null-terminated string, or it returns NULL + for end-of-file or in case of error. If there was an error, the contents at + buf are indeterminate. +*/ + +ZEXTERN int ZEXPORT gzputc(gzFile file, int c); +/* + Compress and write c, converted to an unsigned char, into file. gzputc + returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc(gzFile file); +/* + Read and decompress one byte from file. gzgetc returns this byte or -1 + in case of end of file or error. This is implemented as a macro for speed. + As such, it does not do all of the checking the other functions do. I.e. + it does not check to see if file is NULL, nor whether the structure file + points to has been clobbered or not. +*/ + +ZEXTERN int ZEXPORT gzungetc(int c, gzFile file); +/* + Push c back onto the stream for file to be read as the first character on + the next read. At least one character of push-back is always allowed. + gzungetc() returns the character pushed, or -1 on failure. gzungetc() will + fail if c is -1, and may fail if a character has been pushed but not read + yet. If gzungetc is used immediately after gzopen or gzdopen, at least the + output buffer size of pushed characters is allowed. (See gzbuffer above.) + The pushed character will be discarded if the stream is repositioned with + gzseek() or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush(gzFile file, int flush); +/* + Flush all pending output to file. The parameter flush is as in the + deflate() function. The return value is the zlib error number (see function + gzerror below). gzflush is only permitted when writing. + + If the flush parameter is Z_FINISH, the remaining data is written and the + gzip stream is completed in the output. If gzwrite() is called again, a new + gzip stream will be started in the output. gzread() is able to read such + concatenated gzip streams. + + gzflush should be called only when strictly necessary because it will + degrade compression if called too often. +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzseek(gzFile file, + z_off_t offset, int whence); + + Set the starting position to offset relative to whence for the next gzread + or gzwrite on file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind(gzFile file); +/* + Rewind file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET). +*/ + +/* +ZEXTERN z_off_t ZEXPORT gztell(gzFile file); + + Return the starting position for the next gzread or gzwrite on file. + This position represents a number of bytes in the uncompressed data stream, + and is zero when starting, even if appending or reading a gzip stream from + the middle of a file using gzdopen(). + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzoffset(gzFile file); + + Return the current compressed (actual) read or write offset of file. This + offset includes the count of bytes that precede the gzip stream, for example + when appending or when using gzdopen() for reading. When reading, the + offset does not include as yet unused buffered input. This information can + be used for a progress indicator. On error, gzoffset() returns -1. +*/ + +ZEXTERN int ZEXPORT gzeof(gzFile file); +/* + Return true (1) if the end-of-file indicator for file has been set while + reading, false (0) otherwise. Note that the end-of-file indicator is set + only if the read tried to go past the end of the input, but came up short. + Therefore, just like feof(), gzeof() may return false even if there is no + more data to read, in the event that the last read request was for the exact + number of bytes remaining in the input file. This will happen if the input + file size is an exact multiple of the buffer size. + + If gzeof() returns true, then the read functions will return no more data, + unless the end-of-file indicator is reset by gzclearerr() and the input file + has grown since the previous end of file was detected. +*/ + +ZEXTERN int ZEXPORT gzdirect(gzFile file); +/* + Return true (1) if file is being copied directly while reading, or false + (0) if file is a gzip stream being decompressed. + + If the input file is empty, gzdirect() will return true, since the input + does not contain a gzip stream. + + If gzdirect() is used immediately after gzopen() or gzdopen() it will + cause buffers to be allocated to allow reading the file to determine if it + is a gzip file. Therefore if gzbuffer() is used, it should be called before + gzdirect(). + + When writing, gzdirect() returns true (1) if transparent writing was + requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note: + gzdirect() is not needed when writing. Transparent writing must be + explicitly requested, so the application already knows the answer. When + linking statically, using gzdirect() will include all of the zlib code for + gzip file reading and decompression, which may not be desired.) +*/ + +ZEXTERN int ZEXPORT gzclose(gzFile file); +/* + Flush all pending output for file, if necessary, close file and + deallocate the (de)compression state. Note that once file is closed, you + cannot call gzerror with file, since its structures have been deallocated. + gzclose must not be called more than once on the same file, just as free + must not be called more than once on the same allocation. + + gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a + file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the + last read ended in the middle of a gzip stream, or Z_OK on success. +*/ + +ZEXTERN int ZEXPORT gzclose_r(gzFile file); +ZEXTERN int ZEXPORT gzclose_w(gzFile file); +/* + Same as gzclose(), but gzclose_r() is only for use when reading, and + gzclose_w() is only for use when writing or appending. The advantage to + using these instead of gzclose() is that they avoid linking in zlib + compression or decompression code that is not used when only reading or only + writing respectively. If gzclose() is used, then both compression and + decompression code will be included the application when linking to a static + zlib library. +*/ + +ZEXTERN const char * ZEXPORT gzerror(gzFile file, int *errnum); +/* + Return the error message for the last error which occurred on file. + errnum is set to zlib error number. If an error occurred in the file system + and not in the compression library, errnum is set to Z_ERRNO and the + application may consult errno to get the exact error code. + + The application must not modify the returned string. Future calls to + this function may invalidate the previously returned string. If file is + closed, then the string previously returned by gzerror will no longer be + available. + + gzerror() should be used to distinguish errors from end-of-file for those + functions above that do not distinguish those cases in their return values. +*/ + +ZEXTERN void ZEXPORT gzclearerr(gzFile file); +/* + Clear the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + +#endif /* !Z_SOLO */ + + /* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the compression + library. +*/ + +ZEXTERN uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. An Adler-32 value is in the range of a 32-bit + unsigned integer. If buf is Z_NULL, this function returns the required + initial value for the checksum. + + An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed + much faster. + + Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +ZEXTERN uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf, + z_size_t len); +/* + Same as adler32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, + z_off_t len2); + + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note + that the z_off_t type (like off_t) is a signed integer. If len2 is + negative, the result has no meaning or utility. +*/ + +ZEXTERN uLong ZEXPORT crc32(uLong crc, const Bytef *buf, uInt len); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. A CRC-32 value is in the range of a 32-bit unsigned integer. + If buf is Z_NULL, this function returns the required initial value for the + crc. Pre- and post-conditioning (one's complement) is performed within this + function so it shouldn't be done by the application. + + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +ZEXTERN uLong ZEXPORT crc32_z(uLong crc, const Bytef *buf, + z_size_t len); +/* + Same as crc32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2); + + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t len2); + + Return the operator corresponding to length len2, to be used with + crc32_combine_op(). +*/ + +ZEXTERN uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op); +/* + Give the same result as crc32_combine(), using op in place of len2. op is + is generated from len2 by crc32_combine_gen(). This will be faster than + crc32_combine() if the generated op is used more than once. +*/ + + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT deflateInit_(z_streamp strm, int level, + const char *version, int stream_size); +ZEXTERN int ZEXPORT inflateInit_(z_streamp strm, + const char *version, int stream_size); +ZEXTERN int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy, const char *version, + int stream_size); +ZEXTERN int ZEXPORT inflateInit2_(z_streamp strm, int windowBits, + const char *version, int stream_size); +ZEXTERN int ZEXPORT inflateBackInit_(z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size); +#ifdef Z_PREFIX_SET +# define z_deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define z_inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#else +# define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#endif + +#ifndef Z_SOLO + +/* gzgetc() macro and its supporting function and exposed data structure. Note + * that the real internal state is much larger than the exposed structure. + * This abbreviated structure exposes just enough for the gzgetc() macro. The + * user should not mess with these exposed elements, since their names or + * behavior could change in the future, perhaps even capriciously. They can + * only be used by the gzgetc() macro. You have been warned. + */ +struct gzFile_s { + unsigned have; + unsigned char *next; + z_off64_t pos; +}; +ZEXTERN int ZEXPORT gzgetc_(gzFile file); /* backward compatibility */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +# define z_gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#else +# define gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#endif + +/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or + * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if + * both are true, the application gets the *64 functions, and the regular + * functions are changed to 64 bits) -- in case these are set on systems + * without large file support, _LFS64_LARGEFILE must also be true + */ +#ifdef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *); + ZEXTERN z_off64_t ZEXPORT gzseek64(gzFile, z_off64_t, int); + ZEXTERN z_off64_t ZEXPORT gztell64(gzFile); + ZEXTERN z_off64_t ZEXPORT gzoffset64(gzFile); + ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off64_t); + ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off64_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off64_t); +#endif + +#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64) +# ifdef Z_PREFIX_SET +# define z_gzopen z_gzopen64 +# define z_gzseek z_gzseek64 +# define z_gztell z_gztell64 +# define z_gzoffset z_gzoffset64 +# define z_adler32_combine z_adler32_combine64 +# define z_crc32_combine z_crc32_combine64 +# define z_crc32_combine_gen z_crc32_combine_gen64 +# else +# define gzopen gzopen64 +# define gzseek gzseek64 +# define gztell gztell64 +# define gzoffset gzoffset64 +# define adler32_combine adler32_combine64 +# define crc32_combine crc32_combine64 +# define crc32_combine_gen crc32_combine_gen64 +# endif +# ifndef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *); + ZEXTERN z_off_t ZEXPORT gzseek64(gzFile, z_off_t, int); + ZEXTERN z_off_t ZEXPORT gztell64(gzFile); + ZEXTERN z_off_t ZEXPORT gzoffset64(gzFile); + ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off_t); +# endif +#else + ZEXTERN gzFile ZEXPORT gzopen(const char *, const char *); + ZEXTERN z_off_t ZEXPORT gzseek(gzFile, z_off_t, int); + ZEXTERN z_off_t ZEXPORT gztell(gzFile); + ZEXTERN z_off_t ZEXPORT gzoffset(gzFile); + ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t); +#endif + +#else /* Z_SOLO */ + + ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t); + +#endif /* !Z_SOLO */ + +/* undocumented functions */ +ZEXTERN const char * ZEXPORT zError(int); +ZEXTERN int ZEXPORT inflateSyncPoint(z_streamp); +ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table(void); +ZEXTERN int ZEXPORT inflateUndermine(z_streamp, int); +ZEXTERN int ZEXPORT inflateValidate(z_streamp, int); +ZEXTERN unsigned long ZEXPORT inflateCodesUsed(z_streamp); +ZEXTERN int ZEXPORT inflateResetKeep(z_streamp); +ZEXTERN int ZEXPORT deflateResetKeep(z_streamp); +#if defined(_WIN32) && !defined(Z_SOLO) +ZEXTERN gzFile ZEXPORT gzopen_w(const wchar_t *path, + const char *mode); +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +ZEXTERN int ZEXPORTVA gzvprintf(gzFile file, + const char *format, + va_list va); +# endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ZLIB_H */ diff --git a/3rdparty/soc/lib/libz.so b/3rdparty/soc/lib/libz.so new file mode 100755 index 0000000..334c093 Binary files /dev/null and b/3rdparty/soc/lib/libz.so differ diff --git a/CMakeLists.txt b/CMakeLists.txt index 4207629..875e554 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.6) cmake_policy(SET CMP0048 NEW) project("libsophon" - VERSION 0.5.0) + VERSION 0.5.1) set(CMAKE_CXX_STANDARD 11) diff --git a/bm-smi/CMakeLists.txt b/bm-smi/CMakeLists.txt index b1eb843..54e3e7d 100644 --- a/bm-smi/CMakeLists.txt +++ b/bm-smi/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.6) cmake_policy(SET CMP0046 NEW) cmake_policy(SET CMP0048 NEW) diff --git a/bm-smi/src/bm_smi_cmdline.cpp b/bm-smi/src/bm_smi_cmdline.cpp index 3449d8b..f70cf87 100644 --- a/bm-smi/src/bm_smi_cmdline.cpp +++ b/bm-smi/src/bm_smi_cmdline.cpp @@ -22,10 +22,10 @@ DECLARE_bool(helpshort); bm_smi_cmdline::bm_smi_cmdline(int argc, char *argv[]) { #ifndef SOC_MODE /* get and validate flags*/ - gflags::SetUsageMessage("command line brew\n" + gflags::SetUsageMessage("command line prompt\n" "usage: bm-smi [--ecc=on/off] [--file=/xx/yy.txt] [--dev=0/1...]" "[--start_dev=x] [--last_dev=y] [--text_format]" - " [--lms=500] [--recovery] [-loop] [--led=on/off/blink]\n" + " [--lms=500] [--recovery] [--loop] [--led=on/off/blink]\n" "ecc:\n" " set ecc status, default is off\n" "file:\n" @@ -39,19 +39,19 @@ bm_smi_cmdline::bm_smi_cmdline(int argc, char *argv[]) { "lms:\n" " how many ms of the sample interval, default is 500.\n" "loop:\n" - " if -loop (default): smi sample device every lms ms.\n" - " if -noloop: smi sample device only once.\n" + " if --loop (default): smi sample device every lms ms.\n" + " if --noloop: smi sample device only once.\n" "recovery:\n" " recovery dev from fault to active status.\n" "text_format:\n" " if true only display attr value from start_dev to last_dev.\n" "led:\n" - " pcie card LED status: on/off/blink.\n" + " PCIE card LED status: on/off/blink.\n" "\n" "New usage: bm-smi [--opmode=display/ecc/led/recovery...]" "[--opval=on/off/...] [--file=/xx/yy.txt]" "[--dev=0/1...] [--start_dev=x] [--last_dev=y] [--text_format]" - "[--lms=500] [-loop]\n" + "[--lms=500] [--loop]\n" "opmode(default null):\n" " choose different mode,example:display, ecc, led, recovery\n" " display: means open bm-smi window and check info, use like ./bm-smi\n" @@ -69,9 +69,9 @@ bm_smi_cmdline::bm_smi_cmdline(int argc, char *argv[]) { "other flags have same usage, Both usage can be used!\n"); #else - gflags::SetUsageMessage("command line brew\n" + gflags::SetUsageMessage("command line prompt\n" "usage: bm-smi [--opmode=display] [--file=/xx/yy.txt]" - " [--lms=500] [-loop]\n" + " [--lms=500] [--loop]\n" "opmode:\n" " SOC mode just only use display.\n" "file:\n" @@ -79,8 +79,8 @@ bm_smi_cmdline::bm_smi_cmdline(int argc, char *argv[]) { "lms:\n" " how many ms of the sample interval, default is 500.\n" "loop:\n" - " if -loop (default): smi sample device every lms ms.\n" - " if -noloop: smi sample device only once.\n"); + " if --loop (default): smi sample device every lms ms.\n" + " if --noloop: smi sample device only once.\n"); #endif diff --git a/bm-smi/src/bm_smi_display.cpp b/bm-smi/src/bm_smi_display.cpp index a57f24f..c715cbe 100755 --- a/bm-smi/src/bm_smi_display.cpp +++ b/bm-smi/src/bm_smi_display.cpp @@ -188,27 +188,15 @@ static void bm_smi_display_format(std::ofstream &file, bool save_file) { "----------------------------------------------+\n"); break; case 2: - if ((g_driver_version >> 24) == 0x6) { - snprintf( - line_str, - BUFFER_LEN, - "| SDK Version:%9s LTS Driver Version: " - "%1d.%1d.%1d LTS |\n", - bm_smi_version, - (g_driver_version >> 16) & 0xff, - (g_driver_version >> 8) & 0xff, - g_driver_version & 0xff); - } else { - snprintf( - line_str, - BUFFER_LEN, - "| SDK Version:%9s LTS Driver Version: " - "%1d.%1d.%1d |\n", - bm_smi_version, - (g_driver_version >> 16) & 0xff, - (g_driver_version >> 8) & 0xff, - g_driver_version & 0xff); - } + snprintf( + line_str, + BUFFER_LEN, + "| Lib Version:%9s Driver Version: " + "%1d.%1d.%1d |\n", + bm_smi_version, + g_driver_version >> 16, + (g_driver_version >> 8) & 0xff, + g_driver_version & 0xff); break; case 3: snprintf(line_str, @@ -329,7 +317,7 @@ static void bm_smi_tpuv_to_str(int dev_id, char *s) { } else if (g_attr[dev_id].vdd_tpu_volt < 0) { snprintf(s, 5, "%s", " F "); } else { - snprintf(s, 7, "%dmV", g_attr[dev_id].vdd_tpu_volt); + snprintf(s, 13, "%dmV", g_attr[dev_id].vdd_tpu_volt); } } @@ -379,7 +367,12 @@ static void bm_smi_sn_to_str(int dev_id, char *s) { /* convert board type to string*/ static void bm_smi_board_type_to_str(int dev_id, char *s) { - snprintf(s, 6, "%s", g_attr[dev_id].board_type); + int length = strlen(g_attr[dev_id].board_type); + if (g_attr[dev_id].chip_mode == 0 && length < 5){ + snprintf(s, 10, "%s%s", " ",g_attr[dev_id].board_type); + } else { + snprintf(s, 10, "%s", g_attr[dev_id].board_type); + } } /* @@ -538,7 +531,7 @@ static void bm_smi_display_attr(int dev_id, char chipt_s[5]; char boardp_s[5]; char tpup_s[6]; - char tpuv_s[7]; + char tpuv_s[13]; char c12v_s[7]; char ecc_s[4]; char cnum_s[5]; @@ -552,7 +545,7 @@ static void bm_smi_display_attr(int dev_id, char tpuc_s[6]; char fan_s[4]; char tpu_util_s[6]; - char board_type_s[7]; + char board_type_s[10]; bm_smi_chipid_to_str(dev_id, chip_id_s); bm_smi_card_index_to_str(dev_id, card_index_s); @@ -585,17 +578,30 @@ static void bm_smi_display_attr(int dev_id, switch (i) { case 0: { if (g_attr[dev_id].board_attr) { - snprintf(line_str, - BUFFER_LEN, - "|%2s %5s-%-5s %5s %17s |%2d %4s %4s", - card_index_s, - chip_id_s, - board_type_s, - mode_s, - sn_s, - dev_id, - boardt_s, - chipt_s); + if (g_attr[dev_id].chip_mode == 0){ + snprintf(line_str, + BUFFER_LEN, + "|%2s %-10s %5s %17s |%2d %4s %4s", + card_index_s, + board_type_s, + mode_s, + sn_s, + dev_id, + boardt_s, + chipt_s); + } else { + snprintf(line_str, + BUFFER_LEN, + "|%2s %5s-%-5s %5s %17s |%2d %4s %4s", + card_index_s, + chip_id_s, + board_type_s, + mode_s, + sn_s, + dev_id, + boardt_s, + chipt_s); + } str_length = snprintf(color_str, BUFFER_LEN, " "); snprintf(after_color_str, BUFFER_LEN, @@ -607,10 +613,9 @@ static void bm_smi_display_attr(int dev_id, tpu_util_s); snprintf(whole_str, BUFFER_LEN, - "|%2s %5s-%-5s %5s %17s |%2d %4s %4s %5s " + "|%2s %-10s %5s %17s |%2d %4s %4s %5s " " %5s %3s %3s %5s |\n", card_index_s, - chip_id_s, board_type_s, mode_s, sn_s, @@ -1008,6 +1013,19 @@ static void bm_smi_fetch_all(bm_handle_t handle, bm_smi_get_attr(bmctl_device, i); bm_smi_get_proc_gmem(bmctl_device, i); #endif + + char *sg_env; + + sg_env = getenv("SOPHONVM"); + if (sg_env != NULL && (*sg_env) == 'y') { + g_attr[i].card_index = i/3; + g_attr[i].board_power = (g_attr[i].board_power)/2; + g_attr[i].atx12v_curr = (g_attr[i].atx12v_curr)/2; + } else if (sg_env != NULL && !strncmp(sg_env, "SOPHONVM", 8)) { + g_attr[i].board_power = (g_attr[i].board_power)/2; + g_attr[i].atx12v_curr = (g_attr[i].atx12v_curr)/2; + } + if (dev_cnt == 1) { g_attr[i].board_endline = 1; g_attr[i].board_attr = 1; @@ -1074,7 +1092,7 @@ static void bm_smi_print_text_info(HANDLE bmctl_device, int start_dev, int last_ char boardt_s[5]; char chipt_s[5]; char tpup_s[6]; - char tpuv_s[7]; + char tpuv_s[13]; char ecc_s[4]; char cnum_s[5]; char busid_s[12]; @@ -1084,7 +1102,7 @@ static void bm_smi_print_text_info(HANDLE bmctl_device, int start_dev, int last_ char currclk_s[6]; char tpuc_s[6]; char tpu_util_s[6]; - char board_type_s[7]; + char board_type_s[10]; #ifdef __linux__ bm_smi_fetch_all(fd, last_dev - start_dev + 1, start_dev); @@ -1116,7 +1134,7 @@ static void bm_smi_print_text_info(HANDLE bmctl_device, int start_dev, int last_ bm_smi_tpu_util_to_str(i, tpu_util_s); bm_smi_board_type_to_str(i, board_type_s); - printf("%s-%s ", chip_id_s, board_type_s); + printf("%s ", board_type_s); printf("%s ", mode_s); printf("chip%d: %d ", i - start_dev, g_attr[i].dev_id); printf("%s ", busid_s); @@ -1210,6 +1228,13 @@ int bm_smi_display::validate_input_para() { dev_cnt = g_cmdline.m_last_dev - g_cmdline.m_start_dev + 1; } #endif + char *sg_env; + + sg_env = getenv("SOPHONVM"); + if (sg_env != NULL && strlen(sg_env) != 8 && !strncmp(sg_env, "SOPHONVM", 8)) { + start_dev = atoi(sg_env + 8) * 3; + dev_cnt = 3; + } /* check lms value */ if (g_cmdline.m_lms < 300) { diff --git a/bm-smi/src/bm_smi_display_memory_detail.cpp b/bm-smi/src/bm_smi_display_memory_detail.cpp index 1295466..6e74381 100644 --- a/bm-smi/src/bm_smi_display_memory_detail.cpp +++ b/bm-smi/src/bm_smi_display_memory_detail.cpp @@ -183,27 +183,15 @@ static void bm_smi_display_format(std::ofstream &file, bool save_file) { "----------------------------------------------+\n"); break; case 2: - if ((g_driver_version >> 24) == 0x6) { - snprintf( - line_str, - BUFFER_LEN, - "| SDK Version:%9s LTS Driver Version: " - "%1d.%1d.%1d LTS |\n", - bm_smi_version, - (g_driver_version >> 16) & 0xff, - (g_driver_version >> 8) & 0xff, - g_driver_version & 0xff); - } else { - snprintf( - line_str, - BUFFER_LEN, - "| SDK Version:%9s LTS Driver Version: " - "%1d.%1d.%1d |\n", - bm_smi_version, - (g_driver_version >> 16) & 0xff, - (g_driver_version >> 8) & 0xff, - g_driver_version & 0xff); - } + snprintf( + line_str, + BUFFER_LEN, + "| Lib Version:%9s Driver Version: " + "%1d.%1d.%1d |\n", + bm_smi_version, + g_driver_version >> 16, + (g_driver_version >> 8) & 0xff, + g_driver_version & 0xff); break; case 3: snprintf(line_str, @@ -226,7 +214,7 @@ static void bm_smi_display_format(std::ofstream &file, bool save_file) { case 6: snprintf(line_str, BUFFER_LEN, "| |" - " Heap0 Heap1 Heap2 Vpu-Memory-Usage|\n"); + " Heap0 Heap1 Heap2 Vpu-Fw-Memory-Usage|\n"); break; case 7: snprintf(line_str, @@ -329,7 +317,7 @@ static void bm_smi_tpuv_to_str(int dev_id, char *s) { } else if (g_attr[dev_id].vdd_tpu_volt < 0) { snprintf(s, 5, "%s", " F "); } else { - snprintf(s, 7, "%dmV", g_attr[dev_id].vdd_tpu_volt); + snprintf(s, 13, "%dmV", g_attr[dev_id].vdd_tpu_volt); } } @@ -376,7 +364,12 @@ static void bm_smi_sn_to_str(int dev_id, char *s) { /* convert board type to string*/ static void bm_smi_board_type_to_str(int dev_id, char *s) { - snprintf(s, 6, "%s", g_attr[dev_id].board_type); + int length = strlen(g_attr[dev_id].board_type); + if (g_attr[dev_id].chip_mode == 0 && length < 5){ + snprintf(s, 10, "%s%s", " ",g_attr[dev_id].board_type); + } else { + snprintf(s, 10, "%s", g_attr[dev_id].board_type); + } } /* @@ -433,7 +426,7 @@ static void bm_smi_maxclk_to_str(int dev_id, char *s) { } else if (g_attr[dev_id].tpu_max_clock == ATTR_FAULT_VALUE) { snprintf(s, 5, "%s", " F "); } else { - snprintf(s, 6, "%dM", g_attr[dev_id].tpu_max_clock); + snprintf(s, 13, "%dM", g_attr[dev_id].tpu_max_clock); } } @@ -444,7 +437,7 @@ static void bm_smi_currclk_to_str(int dev_id, char *s) { } else if (g_attr[dev_id].tpu_current_clock == ATTR_FAULT_VALUE) { snprintf(s, 5, "%s", " F "); } else { - snprintf(s, 6, "%dM", g_attr[dev_id].tpu_current_clock); + snprintf(s, 13, "%dM", g_attr[dev_id].tpu_current_clock); } } @@ -536,7 +529,7 @@ static void bm_smi_display_attr(int dev_id, char chipt_s[5]; char boardp_s[5]; char tpup_s[6]; - char tpuv_s[7]; + char tpuv_s[13]; char c12v_s[7]; char ecc_s[4]; char cnum_s[5]; @@ -544,13 +537,13 @@ static void bm_smi_display_attr(int dev_id, char busid_s[12]; char mode_s[5]; char minclk_s[5]; - char maxclk_s[6]; - char currclk_s[6]; + char maxclk_s[13]; + char currclk_s[13]; char maxp_s[5]; char tpuc_s[6]; char fan_s[4]; char tpu_util_s[6]; - char board_type_s[7]; + char board_type_s[10]; bm_smi_chipid_to_str(dev_id, chip_id_s); bm_smi_card_index_to_str(dev_id, card_index_s); @@ -583,17 +576,30 @@ static void bm_smi_display_attr(int dev_id, switch (i) { case 0: { if (g_attr[dev_id].board_attr) { - snprintf(line_str, - BUFFER_LEN, - "|%2s %5s-%-5s %5s %17s |%2d %4s %4s", - card_index_s, - chip_id_s, - board_type_s, - mode_s, - sn_s, - dev_id, - boardt_s, - chipt_s); + if (g_attr[dev_id].chip_mode == 0){ + snprintf(line_str, + BUFFER_LEN, + "|%2s %-10s %5s %17s |%2d %4s %4s", + card_index_s, + board_type_s, + mode_s, + sn_s, + dev_id, + boardt_s, + chipt_s); + } else { + snprintf(line_str, + BUFFER_LEN, + "|%2s %5s-%-5s %5s %17s |%2d %4s %4s", + card_index_s, + chip_id_s, + board_type_s, + mode_s, + sn_s, + dev_id, + boardt_s, + chipt_s); + } str_length = snprintf(color_str, BUFFER_LEN, " "); snprintf(after_color_str, BUFFER_LEN, @@ -605,10 +611,9 @@ static void bm_smi_display_attr(int dev_id, tpu_util_s); snprintf(whole_str, BUFFER_LEN, - "|%2s %5s-%-5s %5s %17s |%2d %4s %4s %5s " + "|%2s %-10s %5s %17s |%2d %4s %4s %5s " " %5s %3s %3s %5s |\n", card_index_s, - chip_id_s, board_type_s, mode_s, sn_s, @@ -1031,6 +1036,19 @@ static void bm_smi_fetch_all(bm_handle_t handle, bm_smi_get_attr(bmctl_device, i); bm_smi_get_proc_gmem(bmctl_device, i); #endif + + char *sg_env; + + sg_env = getenv("SOPHONVM"); + if (sg_env != NULL && (*sg_env) == 'y') { + g_attr[i].card_index = i/3; + g_attr[i].board_power = (g_attr[i].board_power)/2; + g_attr[i].atx12v_curr = (g_attr[i].atx12v_curr)/2; + } else if (sg_env != NULL && !strncmp(sg_env, "SOPHONVM", 8)) { + g_attr[i].board_power = (g_attr[i].board_power)/2; + g_attr[i].atx12v_curr = (g_attr[i].atx12v_curr)/2; + } + if (dev_cnt == 1) { g_attr[i].board_endline = 1; g_attr[i].board_attr = 1; @@ -1100,17 +1118,17 @@ static void bm_smi_print_text_info(HANDLE bmctl_device, int start_dev, int last_ char boardt_s[5]; char chipt_s[5]; char tpup_s[6]; - char tpuv_s[7]; + char tpuv_s[13]; char ecc_s[4]; char cnum_s[5]; char busid_s[12]; char mode_s[5]; char minclk_s[5]; - char maxclk_s[5]; - char currclk_s[5]; + char maxclk_s[13]; + char currclk_s[13]; char tpuc_s[6]; char tpu_util_s[6]; - char board_type_s[7]; + char board_type_s[10]; #ifdef __linux__ bm_smi_fetch_all(fd, last_dev - start_dev + 1, start_dev); @@ -1142,7 +1160,7 @@ static void bm_smi_print_text_info(HANDLE bmctl_device, int start_dev, int last_ bm_smi_tpu_util_to_str(i, tpu_util_s); bm_smi_board_type_to_str(i, board_type_s); - printf("%s-%s ", chip_id_s, board_type_s); + printf("%s ", board_type_s); printf("%s ", mode_s); printf("chip%d: %d ", i - start_dev, g_attr[i].dev_id); printf("%s ", busid_s); @@ -1239,6 +1257,14 @@ int bm_smi_display_memory_detail::validate_input_para() { } #endif + char *sg_env; + + sg_env = getenv("SOPHONVM"); + if (sg_env != NULL && strlen(sg_env) != 8 && !strncmp(sg_env, "SOPHONVM", 8)) { + start_dev = atoi(sg_env + 8) * 3; + dev_cnt = 3; + } + /* check lms value */ if (g_cmdline.m_lms < 300) { printf("invalid lsm = %d, it is less than 300\n", g_cmdline.m_lms); diff --git a/bm-smi/src/bm_smi_display_util_detail.cpp b/bm-smi/src/bm_smi_display_util_detail.cpp index 53649ea..15a49bc 100644 --- a/bm-smi/src/bm_smi_display_util_detail.cpp +++ b/bm-smi/src/bm_smi_display_util_detail.cpp @@ -187,27 +187,15 @@ static void bm_smi_display_format(std::ofstream &file, bool save_file) { "----------------------------------------------+\n"); break; case 2: - if ((g_driver_version >> 24) == 0x6) { - snprintf( - line_str, - BUFFER_LEN, - "| SDK Version:%9s LTS Driver Version: " - "%1d.%1d.%1d LTS |\n", - bm_smi_version, - (g_driver_version >> 16) & 0xff, - (g_driver_version >> 8) & 0xff, - g_driver_version & 0xff); - } else { - snprintf( - line_str, - BUFFER_LEN, - "| SDK Version:%9s LTS Driver Version: " - "%1d.%1d.%1d |\n", - bm_smi_version, - (g_driver_version >> 16) & 0xff, - (g_driver_version >> 8) & 0xff, - g_driver_version & 0xff); - } + snprintf( + line_str, + BUFFER_LEN, + "| Lib Version:%9s Driver Version: " + "%1d.%1d.%1d |\n", + bm_smi_version, + g_driver_version >> 16, + (g_driver_version >> 8) & 0xff, + g_driver_version & 0xff); break; case 3: snprintf(line_str, @@ -338,7 +326,7 @@ static void bm_smi_tpuv_to_str(int dev_id, char *s) { } else if (g_attr[dev_id].vdd_tpu_volt < 0) { snprintf(s, 5, "%s", " F "); } else { - snprintf(s, 7, "%dmV", g_attr[dev_id].vdd_tpu_volt); + snprintf(s, 13, "%dmV", g_attr[dev_id].vdd_tpu_volt); } } @@ -385,7 +373,12 @@ static void bm_smi_sn_to_str(int dev_id, char *s) { /* convert board type to string*/ static void bm_smi_board_type_to_str(int dev_id, char *s) { - snprintf(s, 6, "%s", g_attr[dev_id].board_type); + int length = strlen(g_attr[dev_id].board_type); + if (g_attr[dev_id].chip_mode == 0 && length < 5){ + snprintf(s, 10, "%s%s", " ",g_attr[dev_id].board_type); + } else { + snprintf(s, 10, "%s", g_attr[dev_id].board_type); + } } /* @@ -537,12 +530,12 @@ static void bm_smi_get_vpu_decode(int dev_id, char *s) { for (int i = 0; i < MAX_NUM_VPU_CORE_BM1686 - 1; i++) { vpu_decoder += g_attr[dev_id].vpu_instant_usage[i]; } - snprintf(s, 5, "%d%%", vpu_decoder / 2); + snprintf(s, 13, "%d%%", vpu_decoder / 2); } else { for (int i = 0; i < MAX_NUM_VPU_CORE - 1; i++) { vpu_decoder += g_attr[dev_id].vpu_instant_usage[i]; } - snprintf(s, 5, "%d%%", vpu_decoder / 4); + snprintf(s, 13, "%d%%", vpu_decoder / 4); } #endif @@ -579,7 +572,7 @@ static void bm_smi_get_jpu_core_instace(int dev_id, char *s) { for (int i = 0; i < MAX_NUM_JPU_CORE; i++) { jpu_decoder += g_attr[dev_id].jpu_core_usage[i]; } - snprintf(s, 5, "%d%%", jpu_decoder / MAX_NUM_JPU_CORE); + snprintf(s, 13, "%d%%", jpu_decoder / MAX_NUM_JPU_CORE); #endif #else snprintf(s, 5, " N/A "); @@ -605,7 +598,7 @@ static void bm_smi_display_attr(int dev_id, char chipt_s[5]; char boardp_s[5]; char tpup_s[6]; - char tpuv_s[7]; + char tpuv_s[13]; char c12v_s[7]; char ecc_s[4]; char cnum_s[5]; @@ -619,10 +612,10 @@ static void bm_smi_display_attr(int dev_id, char tpuc_s[6]; char fan_s[4]; char tpu_util_s[6]; - char board_type_s[7]; - char vpu_decode[5]; + char board_type_s[10]; + char vpu_decode[13]; char vpu_encode[5]; - char jpu_instance_core[5]; + char jpu_instance_core[13]; bm_smi_chipid_to_str(dev_id, chip_id_s); bm_smi_card_index_to_str(dev_id, card_index_s); @@ -658,17 +651,30 @@ static void bm_smi_display_attr(int dev_id, switch (i) { case 0: { if (g_attr[dev_id].board_attr) { - snprintf(line_str, - BUFFER_LEN, - "|%2s %5s-%-5s %5s %17s |%2d %4s %4s", - card_index_s, - chip_id_s, - board_type_s, - mode_s, - sn_s, - dev_id, - boardt_s, - chipt_s); + if (g_attr[dev_id].chip_mode == 0){ + snprintf(line_str, + BUFFER_LEN, + "|%2s %-10s %5s %17s |%2d %4s %4s", + card_index_s, + board_type_s, + mode_s, + sn_s, + dev_id, + boardt_s, + chipt_s); + } else { + snprintf(line_str, + BUFFER_LEN, + "|%2s %5s-%-5s %5s %17s |%2d %4s %4s", + card_index_s, + chip_id_s, + board_type_s, + mode_s, + sn_s, + dev_id, + boardt_s, + chipt_s); + } str_length = snprintf(color_str, BUFFER_LEN, " "); snprintf(after_color_str, BUFFER_LEN, @@ -680,10 +686,9 @@ static void bm_smi_display_attr(int dev_id, tpu_util_s); snprintf(whole_str, BUFFER_LEN, - "|%2s %5s-%-5s %5s %17s |%2d %4s %4s %5s " + "|%2s %-10s %5s %17s |%2d %4s %4s %5s " " %5s %3s %3s %5s |\n", card_index_s, - chip_id_s, board_type_s, mode_s, sn_s, @@ -1123,6 +1128,19 @@ static void bm_smi_fetch_all(bm_handle_t handle, bm_smi_get_attr(bmctl_device, i); bm_smi_get_proc_gmem(bmctl_device, i); #endif + + char *sg_env; + + sg_env = getenv("SOPHONVM"); + if (sg_env != NULL && (*sg_env) == 'y') { + g_attr[i].card_index = i/3; + g_attr[i].board_power = (g_attr[i].board_power)/2; + g_attr[i].atx12v_curr = (g_attr[i].atx12v_curr)/2; + } else if (sg_env != NULL && !strncmp(sg_env, "SOPHONVM", 8)) { + g_attr[i].board_power = (g_attr[i].board_power)/2; + g_attr[i].atx12v_curr = (g_attr[i].atx12v_curr)/2; + } + if (dev_cnt == 1) { g_attr[i].board_endline = 1; g_attr[i].board_attr = 1; @@ -1192,17 +1210,17 @@ static void bm_smi_print_text_info(HANDLE bmctl_device, int start_dev, int last_ char boardt_s[5]; char chipt_s[5]; char tpup_s[6]; - char tpuv_s[7]; + char tpuv_s[13]; char ecc_s[4]; char cnum_s[5]; char busid_s[12]; char mode_s[5]; - char minclk_s[5]; - char maxclk_s[5]; - char currclk_s[5]; + char minclk_s[6]; + char maxclk_s[6]; + char currclk_s[6]; char tpuc_s[6]; char tpu_util_s[6]; - char board_type_s[7]; + char board_type_s[10]; #ifdef __linux__ bm_smi_fetch_all(fd, last_dev - start_dev + 1, start_dev); @@ -1234,7 +1252,7 @@ static void bm_smi_print_text_info(HANDLE bmctl_device, int start_dev, int last_ bm_smi_tpu_util_to_str(i, tpu_util_s); bm_smi_board_type_to_str(i, board_type_s); - printf("%s-%s ", chip_id_s, board_type_s); + printf("%s ", board_type_s); printf("%s ", mode_s); printf("chip%d: %d ", i - start_dev, g_attr[i].dev_id); printf("%s ", busid_s); @@ -1331,6 +1349,14 @@ int bm_smi_display_util_detail::validate_input_para() { } #endif + char *sg_env; + + sg_env = getenv("SOPHONVM"); + if (sg_env != NULL && strlen(sg_env) != 8 && !strncmp(sg_env, "SOPHONVM", 8)) { + start_dev = atoi(sg_env + 8) * 3; + dev_cnt = 3; + } + /* check lms value */ if (g_cmdline.m_lms < 300) { printf("invalid lsm = %d, it is less than 300\n", g_cmdline.m_lms); diff --git a/bm-smi/src/bm_smi_recovery.cpp b/bm-smi/src/bm_smi_recovery.cpp index d64f5a0..7477fd1 100644 --- a/bm-smi/src/bm_smi_recovery.cpp +++ b/bm-smi/src/bm_smi_recovery.cpp @@ -19,6 +19,7 @@ int bm_smi_recovery::validate_input_para() { #else if ((g_cmdline.m_dev == 0xff) || ((g_cmdline.m_dev < 0) || (g_cmdline.m_dev >= dev_cnt))) { printf("error dev = %d\n", g_cmdline.m_dev); + printf("error No input dev parameter, eg. bm-smi --recovery --dev=0\n"); return -EINVAL; } else { start_dev = g_cmdline.m_dev; @@ -57,7 +58,9 @@ int bm_smi_recovery::run_opmode() { ",and some servers will restart.\n" "For more information, please refer to the documents provided.\n"); printf("Are you sure to perform recovery option?(yes/no)\n"); - scanf("%s", conf); + if (0 >= scanf("%s", conf)) + printf("no input!\n"); + if (strcmp(conf, "yes") == 0 || strcmp(conf, "Y") == 0 || strcmp(conf, "y") == 0 || strcmp(conf, "YES") == 0) { if (dev_cnt == 1) { ret = ioctl(fd, BMCTL_DEV_RECOVERY, start_dev); diff --git a/bmlib/CMakeLists.txt b/bmlib/CMakeLists.txt index 7a8e5aa..b46486a 100644 --- a/bmlib/CMakeLists.txt +++ b/bmlib/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.6) cmake_policy(SET CMP0046 NEW) cmake_policy(SET CMP0048 NEW) set(CMAKE_PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}) @@ -29,6 +29,14 @@ set(SRCS src/bmlib_profile.cpp src/linux/bmlib_ioctl.cpp src/bmlib_md5.cpp + src/rbtree.c +) + +execute_process( + COMMAND bash -c "find \"$(git rev-parse --show-toplevel)\" -type f -name \"update_bmlib_version.sh\" -exec bash {} \\;" + RESULT_VARIABLE result + OUTPUT_VARIABLE output + ERROR_VARIABLE error_output ) file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/debian-lib") @@ -68,6 +76,10 @@ if("${PLATFORM}" STREQUAL "cmodel") target_link_libraries(${TARGET_NAME} dl pthread) endif() +if("${PLATFORM}" STREQUAL "pcie_riscv64") + add_definitions(-DSMMU_MODE=1) +endif() + install(TARGETS ${TARGET_NAME} LIBRARY DESTINATION lib COMPONENT libsophon) @@ -89,4 +101,4 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ add_subdirectory(tools) -endif() \ No newline at end of file +endif() diff --git a/bmlib/include/bmlib_runtime.h b/bmlib/include/bmlib_runtime.h index 4231edf..eac8897 100644 --- a/bmlib/include/bmlib_runtime.h +++ b/bmlib/include/bmlib_runtime.h @@ -160,6 +160,32 @@ typedef struct sg_mem_desc { typedef struct sg_mem_desc sg_device_mem_t; typedef struct sg_mem_desc sg_system_mem_t; + +typedef struct bm_mem_desc_u64 { + union { + struct { +#ifdef __linux__ + unsigned long device_addr; +#else + unsigned long long device_addr; +#endif + unsigned int reserved; + int dmabuf_fd; + } device; + + struct { + void *system_addr; + unsigned int reserved0; + int reserved1; + } system; + } u; + + bm_mem_flags_t flags; + unsigned long long size; +} bm_mem_desc_u64_t; + +typedef struct bm_mem_desc_u64 bm_device_mem_u64_t; +typedef struct bm_mem_desc_u64 bm_system_mem_u64_t; #endif struct bm_context; @@ -215,6 +241,19 @@ tpu_kernel_module_t tpu_kernel_load_module_file_key(bm_handle_t handle, const ch */ bm_status_t tpu_kernel_unload_module(bm_handle_t handle, tpu_kernel_module_t p_module); +/** + * @name tpu_kernel_unload_module_from_core + * @brief To unload dyn file + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] p_module dyn lib ptr + * @param [in] core_id core id + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +bm_status_t tpu_kernel_unload_module_from_core(bm_handle_t handle, tpu_kernel_module_t p_module, int core_id); + /** * @name tpu_kernel_free_module * @brief To free p_module when not use @@ -239,6 +278,19 @@ bm_status_t tpu_kernel_free_module(bm_handle_t handle, tpu_kernel_module_t p_mod */ tpu_kernel_module_t tpu_kernel_load_module(bm_handle_t handle, const char *data, size_t length); +/** + * @name tpu_kernel_load_module_to_core + * @brief To load dyn module + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] data dyn module + * @param [in] length dyn module size + * @param [in] core_id core id + * @retval dyn lib ptr + */ +tpu_kernel_module_t tpu_kernel_load_module_to_core(bm_handle_t handle, const char *data, size_t length, int core_id); + /** * @name tpu_kernel_get_function * @brief To get function from lib @@ -251,6 +303,19 @@ tpu_kernel_module_t tpu_kernel_load_module(bm_handle_t handle, const char *data, */ tpu_kernel_function_t tpu_kernel_get_function(bm_handle_t handle, tpu_kernel_module_t module, const char *function); +/** + * @name tpu_kernel_get_function_from_core + * @brief To get function from lib + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] module dyn module + * @param [in] function funtion name + * @param [in] core_id core id + * @retval function id + */ +tpu_kernel_function_t tpu_kernel_get_function_from_core(bm_handle_t handle, tpu_kernel_module_t module, const char *function, int core_id); + /** * @name tpu_kernel_launch * @brief To launch function with sync @@ -265,6 +330,21 @@ tpu_kernel_function_t tpu_kernel_get_function(bm_handle_t handle, tpu_kernel_mod */ bm_status_t tpu_kernel_launch(bm_handle_t handle, tpu_kernel_function_t function, void *args, size_t size); +/** + * @name tpu_kernel_launch_from_core + * @brief To launch function with sync + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] function function id + * @param [in] args funtion args + * @param [in] size args size + * @param [in] core_id core id + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +bm_status_t tpu_kernel_launch_from_core(bm_handle_t handle, tpu_kernel_function_t function, void *args, size_t size, int core_id); + /** * @name tpu_kernel_launch_async * @brief To launch function with async @@ -279,6 +359,21 @@ bm_status_t tpu_kernel_launch(bm_handle_t handle, tpu_kernel_function_t function */ bm_status_t tpu_kernel_launch_async(bm_handle_t handle, tpu_kernel_function_t function, void *args, size_t size); +/** + * @name tpu_kernel_launch_async_from_core + * @brief To launch function with async + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] function function id + * @param [in] args funtion args + * @param [in] size args size + * @param [in] core_id core_id + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +bm_status_t tpu_kernel_launch_async_from_core(bm_handle_t handle, tpu_kernel_function_t function, void *args, size_t size, int core_id); + /** * @name tpu_kernel_sync * @brief To sync @@ -392,6 +487,17 @@ DECL_EXPORT bm_mem_type_t bm_mem_get_type(struct bm_mem_desc mem); */ DECL_EXPORT bm_mem_type_t sg_mem_get_type(struct sg_mem_desc mem); +/** + * @name bm_mem_get_type_u64 + * @brief To get a memory descriptor's type + * @ingroup bmlib_runtime + * + * @param [in] mem The memory descriptor queried + * @retval BM_MEM_TYPE_DEVICE Device global memory + * @retval BM_MEM_TYPE_SYSTEM Host user memory + */ +DECL_EXPORT bm_mem_type_t bm_mem_get_type_u64(struct bm_mem_desc_u64 mem); + /** * @name bm_mem_get_device_addr * @brief To get a device memory descriptor's address @@ -412,6 +518,16 @@ DECL_EXPORT unsigned long long bm_mem_get_device_addr(struct bm_mem_desc mem); */ DECL_EXPORT unsigned long long sg_mem_get_device_addr(struct sg_mem_desc mem); +/** + * @name bm_mem_get_device_addr_u64 + * @brief To get a device memory descriptor's address + * @ingroup bmlib_runtime + * + * @param [in] mem The device memory descriptor queried + * @retval unsigned long long The device memory address + */ +DECL_EXPORT unsigned long long bm_mem_get_device_addr_u64(struct bm_mem_desc_u64 mem); + /** * @name bm_mem_set_device_addr * @brief To set a device memory descriptor's address @@ -432,6 +548,16 @@ DECL_EXPORT void bm_mem_set_device_addr(struct bm_mem_desc* pmem, unsigned long */ DECL_EXPORT void sg_mem_set_device_addr(struct sg_mem_desc* pmem, unsigned long long addr); +/** + * @name bm_mem_set_device_addr_u64 + * @brief To set a device memory descriptor's address + * @ingroup bmlib_runtime + * + * @param [in] pmem The device memory descriptor pointer + * @param ]in] addr The new device address of the device memory + */ +DECL_EXPORT void bm_mem_set_device_addr_u64(struct bm_mem_desc_u64* pmem, unsigned long long addr); + /** * @name bm_mem_get_device_size * @brief To get a device memory descriptor's size @@ -452,6 +578,16 @@ DECL_EXPORT unsigned int bm_mem_get_device_size(struct bm_mem_desc mem); */ DECL_EXPORT unsigned long long sg_mem_get_device_size(struct sg_mem_desc mem); +/** + * @name bm_mem_get_device_size_u64 + * @brief To get a device memory descriptor's size + * @ingroup bmlib_runtime + * + * @param [in] mem The device memory descriptor queried + * @retval unsigned int The device memory's size in bytes + */ +DECL_EXPORT unsigned long long bm_mem_get_device_size_u64(struct bm_mem_desc_u64 mem); + /** * @name bm_mem_set_device_size * @brief To set a device memory descriptor's size @@ -472,6 +608,16 @@ DECL_EXPORT void bm_mem_set_device_size(struct bm_mem_desc* pmem, unsigned int s */ DECL_EXPORT void sg_mem_set_device_size(struct sg_mem_desc* pmem, unsigned long long size); +/** + * @name bm_mem_set_device_size_u64 + * @brief To set a device memory descriptor's size + * @ingroup bmlib_runtime + * + * @param [out] pmem The device memory descriptor pointer + * @param [in] size The new device memory size (in bytes) of the device memory + */ +DECL_EXPORT void bm_mem_set_device_size_u64(struct bm_mem_desc_u64* pmem, unsigned long long size); + /** * @name bm_set_device_mem * @brief To fill in a device memory descriptor with size and address @@ -496,6 +642,18 @@ DECL_EXPORT void bm_set_device_mem(bm_device_mem_t* pmem, unsigned int size, DECL_EXPORT void sg_set_device_mem(sg_device_mem_t* pmem, unsigned long long size, unsigned long long addr); +/** + * @name bm_set_device_mem_u64 + * @brief To fill in a device memory descriptor with size and address + * @ingroup bmlib_runtime + * + * @param [in] pmem The device memory descriptor pointer + * @param [in] size The device memory descriptor's size + * @param [in] addr The device memory descriptor's address + */ +DECL_EXPORT void bm_set_device_mem_u64(bm_device_mem_u64_t* pmem, unsigned long long size, + unsigned long long addr); + /** * @name bm_mem_from_device * @brief To create a device memory descriptor from address and size @@ -520,6 +678,18 @@ DECL_EXPORT bm_device_mem_t bm_mem_from_device(unsigned long long device_addr, DECL_EXPORT sg_device_mem_t sg_mem_from_device(unsigned long long device_addr, unsigned long long len); +/** + * @name bm_mem_from_device_u64 + * @brief To create a device memory descriptor from address and size + * @ingroup bmlib_runtime + * + * @param [in] device_addr The device memory address + * @param [in] len The device memory size + * @retval bm_device_mem_t The device memory descriptor created + */ +DECL_EXPORT bm_device_mem_u64_t bm_mem_from_device_u64(unsigned long long device_addr, + unsigned long long len); + /** * @name bm_mem_get_system_addr * @brief To get a system memory descriptor's address @@ -540,6 +710,16 @@ DECL_EXPORT void *bm_mem_get_system_addr(struct bm_mem_desc mem); */ DECL_EXPORT void *sg_mem_get_system_addr(struct sg_mem_desc mem); +/** + * @name bm_mem_get_system_addr_u64 + * @brief To get a system memory descriptor's address + * @ingroup bmlib_runtime + * + * @param [in] mem The system memory descriptor + * @retval void * The system memory descriptor's address + */ +DECL_EXPORT void *bm_mem_get_system_addr_u64(struct bm_mem_desc_u64 mem); + /** * @name bm_mem_set_system_addr * @brief To set a system memory descriptor's address @@ -560,6 +740,16 @@ DECL_EXPORT void bm_mem_set_system_addr(struct bm_mem_desc* pmem, void *addr); */ DECL_EXPORT void sg_mem_set_system_addr(struct sg_mem_desc* pmem, void *addr); +/** + * @name bm_mem_set_system_addr_u64 + * @brief To set a system memory descriptor's address + * @ingroup bmlib_runtime + * + * @param [in] pmem The system memory descriptor pointer + * @param [in] addr The system memory address + */ +DECL_EXPORT void bm_mem_set_system_addr_u64(struct bm_mem_desc_u64* pmem, void *addr); + /** * @name bm_mem_from_system * @brief To create a system memory descriptor with the given system address @@ -612,6 +802,22 @@ DECL_EXPORT bm_status_t sg_malloc_neuron_device(bm_handle_t handle, sg_device_me unsigned long long n, unsigned long long c, unsigned long long h, unsigned long long w); +/** + * @name bm_malloc_neuron_device_u64 + * @brief To malloc device memory according to a tensor shape + * (each neuron is 32 bits) + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [out] pmem The result devcie memory descriptor + * @param [in] n, c, h, w The shape of the input tensor + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_malloc_neuron_device_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, + unsigned long long n, unsigned long long c, + unsigned long long h, unsigned long long w); + /** * @name bm_malloc_device_dword * @brief To malloc device memory in size of dword (32 bits) @@ -640,6 +846,20 @@ DECL_EXPORT bm_status_t bm_malloc_device_dword(bm_handle_t handle, bm_device_mem DECL_EXPORT bm_status_t sg_malloc_device_dword(bm_handle_t handle, sg_device_mem_t *pmem, unsigned long long count); +/** + * @name bm_malloc_device_dword_u64 + * @brief To malloc device memory in size of dword (32 bits) + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [out] pmem The result device memory descriptor + * @param [in] count The number of dwords(32bits) to allocate + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_malloc_device_dword_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, + unsigned long long count); + /** * @name bm_malloc_device_byte * @brief To malloc device memory in size of byte @@ -654,6 +874,20 @@ DECL_EXPORT bm_status_t sg_malloc_device_dword(bm_handle_t handle, sg_device_mem DECL_EXPORT bm_status_t bm_malloc_device_byte(bm_handle_t handle, bm_device_mem_t *pmem, unsigned int size); +/** + * @name bm_malloc_device_mem + * @brief To malloc device memory in size of byte and output paddr + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [out] paddr The result malloc device memory addr + * @param [in] heap_id The heap where to allocate 0/1/2 + * @param [in] size The number of bytes to allocate + * @retval paddr + */ +DECL_EXPORT bm_status_t bm_malloc_device_mem(bm_handle_t handle, unsigned long long *paddr, + int heap_id, unsigned long long size); + /** * @name sg_malloc_device_byte * @brief To malloc device memory in size of byte @@ -668,6 +902,20 @@ DECL_EXPORT bm_status_t bm_malloc_device_byte(bm_handle_t handle, bm_device_mem_ DECL_EXPORT bm_status_t sg_malloc_device_byte(bm_handle_t handle, sg_device_mem_t *pmem, unsigned long long size); +/** + * @name bm_malloc_device_byte_u64 + * @brief To malloc device memory in size of byte + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [out] pmem The result device memory descriptor + * @param [in] size The number of bytes to allocate + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_malloc_device_byte_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, + unsigned long long size); + /** * @name bm_malloc_device_byte_heap * @brief To malloc device memory in size of byte within the specified heap @@ -698,6 +946,21 @@ DECL_EXPORT bm_status_t bm_malloc_device_byte_heap(bm_handle_t handle, bm_device DECL_EXPORT bm_status_t sg_malloc_device_byte_heap(bm_handle_t handle, sg_device_mem_t *pmem, int heap_id, unsigned long long size); +/** + * @name bm_malloc_device_byte_heap_u64 + * @brief To malloc device memory in size of byte within the specified heap + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [out] pmem The result device memory descriptor + * @param [in] heap_id The heap where to allocate 0/1/2 + * @param [in] size The number of bytes to allocate + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_malloc_device_byte_heap_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, + int heap_id, unsigned long long size); + /** * @name bm_malloc_device_byte_heap_mask * @brief To malloc device memory in size of byte within the specified heaps @@ -728,6 +991,31 @@ DECL_EXPORT bm_status_t bm_malloc_device_byte_heap_mask(bm_handle_t handle, bm_d DECL_EXPORT bm_status_t sg_malloc_device_byte_heap_mask(bm_handle_t handle, sg_device_mem_t *pmem, int heap_id_mask, unsigned long long size); +/** + * @name bm_malloc_device_byte_heap_mask_u64 + * @brief To malloc device memory in size of byte within the specified heaps + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [out] pmem The result device memory descriptor + * @param [in] heap_id_mask The mask which heaps allocate from. each bit indicate one heap + * @param [in] size The number of bytes to allocate + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_malloc_device_byte_heap_mask_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, + int heap_id_mask, unsigned long long size); + +/** + * @name bm_free_device_mem + * @brief To free device memory and input paddr + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] paddr The device memory addr to free + */ +DECL_EXPORT void bm_free_device_mem(bm_handle_t ctx, unsigned long long paddr); + /** * @name bm_free_device * @brief To free device memory @@ -748,6 +1036,16 @@ DECL_EXPORT void bm_free_device(bm_handle_t handle, bm_device_mem_t mem); */ DECL_EXPORT void sg_free_device(bm_handle_t handle, sg_device_mem_t mem); +/** + * @name bm_free_device_u64 + * @brief To free device memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] mem The device memory descriptor to free + */ +DECL_EXPORT void bm_free_device_u64(bm_handle_t handle, bm_device_mem_u64_t mem); + /** * @name bm_gmem_arm_reserved_request * @brief To obtain the address of global memory reserved for arm926 @@ -781,6 +1079,35 @@ DECL_EXPORT void bm_gmem_arm_reserved_release(bm_handle_t handle); */ DECL_EXPORT bm_status_t bm_memcpy_s2d(bm_handle_t handle, bm_device_mem_t dst, void *src); +/** + * @name bm_memcpy_s2d_gather + * @brief To copy data from system virtual memory to device memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dst The destination memory (device memory descriptor ) + * @param [in] argc The number of system memory and len (system memory, a void* pointer) + * @param [in] ... void *src and unsigned long long len + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_s2d_gather(bm_handle_t handle, bm_device_mem_t dst, int argc, ...); + +/** + * @name bm_memcpy_d2s_scatter + * @brief To copy data from device memory to system virtual memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] src The destination memory (device memory descriptor ) + * @param [in] argc The number of system memory and len (system memory, a void* pointer) + * @param [in] ... void *dst and unsigned long long len + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_d2s_scatter(bm_handle_t handle, bm_device_mem_t src, int argc, ...); /** * @name bm_memcpy_p2p * @brief To copy data from one chip to another chip @@ -810,6 +1137,20 @@ DECL_EXPORT bm_status_t bm_memcpy_p2p(bm_handle_t handle_src, bm_device_mem_t sr */ DECL_EXPORT bm_status_t sg_memcpy_s2d(bm_handle_t handle, sg_device_mem_t dst, void *src); +/** + * @name bm_memcpy_s2d_u64 + * @brief To copy data from system memory to device memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dst The destination memory (device memory descriptor ) + * @param [in] src The source memory (system memory, a void* pointer) + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_s2d_u64(bm_handle_t handle, bm_device_mem_u64_t dst, void *src); + /** * @name bm_memcpy_s2d_partial_offset * @brief To copy specified bytes of data from system memory to device memory @@ -850,6 +1191,26 @@ DECL_EXPORT bm_status_t sg_memcpy_s2d_partial_offset(bm_handle_t handle, unsigned long long size, unsigned long long offset); +/** + * @name bm_memcpy_s2d_partial_offset_u64 + * @brief To copy specified bytes of data from system memory to device memory + * with an offset in device memory address. + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dst The destination memory (device memory descriptor) + * @param [in] src The source memory (system memory, a void* pointer) + * @param [in] size The size of data to copy (in bytes) + * @param [in] offset The offset of the device memory address + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_s2d_partial_offset_u64(bm_handle_t handle, + bm_device_mem_u64_t dst, void *src, + unsigned long long size, + unsigned long long offset); + /** * @name bm_memcpy_s2d_partial * @brief To copy specified bytes of data from system memory to device memory @@ -882,6 +1243,22 @@ DECL_EXPORT bm_status_t bm_memcpy_s2d_partial(bm_handle_t handle, bm_device_mem_ DECL_EXPORT bm_status_t sg_memcpy_s2d_partial(bm_handle_t handle, sg_device_mem_t dst, void *src, unsigned long long size); +/** + * @name bm_memcpy_s2d_partial_u64 + * @brief To copy specified bytes of data from system memory to device memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dst The destination memory (device memory descriptor) + * @param [in] src The source memory (system memory, a void* pointer) + * @param [in] size The size of data to copy (in bytes) + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_s2d_partial_u64(bm_handle_t handle, bm_device_mem_u64_t dst, + void *src, unsigned long long size); + /** * @name bm_memcpy_d2s * @brief To copy data from device memory to system memory @@ -910,6 +1287,20 @@ DECL_EXPORT bm_status_t bm_memcpy_d2s(bm_handle_t handle, void *dst, bm_device_m */ DECL_EXPORT bm_status_t sg_memcpy_d2s(bm_handle_t handle, void *dst, sg_device_mem_t src); +/** + * @name bm_memcpy_d2s_u64 + * @brief To copy data from device memory to system memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dst The destination memory (system memory, a void* pointer) + * @param [in] src The source memory (device memory descriptor) + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_d2s_u64(bm_handle_t handle, void *dst, bm_device_mem_u64_t src); + /** * @name bm_memcpy_d2s_partial_offset * @brief To copy specified bytes of data from device memory to system memory @@ -948,6 +1339,25 @@ DECL_EXPORT bm_status_t sg_memcpy_d2s_partial_offset(bm_handle_t handle, void *d sg_device_mem_t src, unsigned long long size, unsigned long long offset); +/** + * @name bm_memcpy_d2s_partial_offset_u64 + * @brief To copy specified bytes of data from device memory to system memory + * with an offset in device memory address. + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dst The destination memory (system memory, a void* pointer) + * @param [in] src The source memory (device memory descriptor) + * @param [in] size The size of data to copy (in bytes) + * @param [in] offset The offset of the device memory address + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_d2s_partial_offset_u64(bm_handle_t handle, void *dst, + bm_device_mem_u64_t src, unsigned long long size, + unsigned long long offset); + /** * @name bm_memcpy_d2s_partial * @brief To copy specified bytes of data from device memory to system memory @@ -980,6 +1390,22 @@ DECL_EXPORT bm_status_t bm_memcpy_d2s_partial(bm_handle_t handle, void *dst, DECL_EXPORT bm_status_t sg_memcpy_d2s_partial(bm_handle_t handle, void *dst, sg_device_mem_t src, unsigned long long size); +/** + * @name bm_memcpy_d2s_partial_u64 + * @brief To copy specified bytes of data from device memory to system memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dst The destination memory (system memory, a void* pointer) + * @param [in] src The source memory (device memory descriptor) + * @param [in] size The size of data to copy (in bytes) + * + * @retval BM_SUCCESS Data transfer succeeds. + * Other code Data transfer fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_d2s_partial_u64(bm_handle_t handle, void *dst, + bm_device_mem_u64_t src, unsigned long long size); + /** * @name bm_memcpy_d2d * @brief To copy specified dwords of data from one piece of device memory @@ -1215,6 +1641,22 @@ DECL_EXPORT bm_status_t bm_mem_mmap_device_mem(bm_handle_t handle, bm_device_mem DECL_EXPORT bm_status_t sg_mem_mmap_device_mem(bm_handle_t handle, sg_device_mem_t *dmem, unsigned long long *vmem); +/** + * @name bm_mem_mmap_device_mem_u64 + * @brief To map a piece of device memory to user space with cache enabled. + * (only valid in SoC mode; Not supported in PCIE mode). + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dev_mem The device memory to map + * @param [out] vmem The virtual address of the mapped device memory + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_mem_mmap_device_mem_u64(bm_handle_t handle, bm_device_mem_u64_t *dmem, + unsigned long long *vmem); + /*******************memory map functions *************************************/ /** * @name bm_mem_mmap_device_mem_no_cache @@ -1249,6 +1691,22 @@ DECL_EXPORT bm_status_t bm_mem_mmap_device_mem_no_cache(bm_handle_t handle, bm_d DECL_EXPORT bm_status_t sg_mem_mmap_device_mem_no_cache(bm_handle_t handle, sg_device_mem_t *dmem, unsigned long long *vmem); +/** + * @name bm_mem_mmap_device_mem_no_cache_u64 + * @brief To map a piece of device memory to user space with cache disabled. + * (only valid in SoC mode; Not supported in PCIE mode). + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dev_mem The device memory to map + * @param [out] vmem The virtual address of the mapped device memory + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_mem_mmap_device_mem_no_cache_u64(bm_handle_t handle, bm_device_mem_u64_t *dmem, + unsigned long long *vmem); + /** * @name bm_mem_vir_to_phy * @brief To get device mem address through the mapped virtual address . @@ -1298,6 +1756,23 @@ DECL_EXPORT bm_status_t bm_mem_invalidate_device_mem(bm_handle_t handle, DECL_EXPORT bm_status_t sg_mem_invalidate_device_mem(bm_handle_t handle, sg_device_mem_t *dmem); +/** + * @name bm_mem_invalidate_device_mem_u64 + * @brief To invalidate a piece of mapped device memory to maintain + * cache coherence + * (only valid in SoC mode; Not supported in PCIE mode). + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dmem The device memory to invalidate + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ + +DECL_EXPORT bm_status_t bm_mem_invalidate_device_mem_u64(bm_handle_t handle, + bm_device_mem_u64_t *dmem); + /** * @name bm_mem_invalidate_partial_device_mem * @brief To invalidate part of mapped device memory to maintain @@ -1338,6 +1813,26 @@ DECL_EXPORT bm_status_t sg_mem_invalidate_partial_device_mem(bm_handle_t handle, unsigned long long offset, unsigned long long len); +/** + * @name bm_mem_invalidate_partial_device_mem_u64 + * @brief To invalidate part of mapped device memory to maintain + * cache coherence + * (only valid in SoC mode; Not supported in PCIE mode). + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dmem The device memory to invalidate + * @param [in] offset The offset of device memory address + * @param [in] len The length of memory to invalidate in bytes + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_mem_invalidate_partial_device_mem_u64(bm_handle_t handle, + bm_device_mem_u64_t *dmem, + unsigned long long offset, + unsigned long long len); + /** * @name bm_mem_flush_device_mem * @brief To flush a piece of mapped device memory to maintain @@ -1368,6 +1863,21 @@ DECL_EXPORT bm_status_t bm_mem_flush_device_mem(bm_handle_t handle, bm_device_me */ DECL_EXPORT bm_status_t sg_mem_flush_device_mem(bm_handle_t handle, sg_device_mem_t *dmem); +/** + * @name bm_mem_flush_device_mem_u64 + * @brief To flush a piece of mapped device memory to maintain + * cache coherence + * (only valid in SoC mode; Not supported in PCIE mode). + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dmem The device memory to flush + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_mem_flush_device_mem_u64(bm_handle_t handle, bm_device_mem_u64_t *dmem); + /** * @name bm_mem_flush_partial_device_mem * @brief To flush part of mapped device memory to maintain @@ -1408,6 +1918,26 @@ DECL_EXPORT bm_status_t sg_mem_flush_partial_device_mem(bm_handle_t handle, unsigned long long offset, unsigned long long len); +/** + * @name bm_mem_flush_partial_device_mem_u64 + * @brief To flush part of mapped device memory to maintain + * cache coherence + * (only valid in SoC mode; Not supported in PCIE mode). + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dmem The device memory to flush + * @param [in] offset The offset of device memory address + * @param [in] len The length of memory to flush in bytes + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_mem_flush_partial_device_mem_u64(bm_handle_t handle, + bm_device_mem_u64_t *dmem, + unsigned long long offset, + unsigned long long len); + /** * @name bm_mem_unmap_device_mem * @brief To unmap a piece of mapped device memory @@ -1438,6 +1968,21 @@ DECL_EXPORT bm_status_t bm_mem_unmap_device_mem(bm_handle_t handle, void *vmem, */ DECL_EXPORT bm_status_t sg_mem_unmap_device_mem(bm_handle_t handle, void *vmem, unsigned long long size); +/** + * @name bm_mem_unmap_device_mem_u64 + * @brief To unmap a piece of mapped device memory + * (only valid in SoC mode; Not supported in PCIE mode). + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] vmem The virtual address of the mapped device memory + * @param [in] size The size of unmapped memory + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_mem_unmap_device_mem_u64(bm_handle_t handle, void *vmem, unsigned long long size); + /*******************api(kernel) functions *************************************/ /** * @name bm_flush @@ -1485,6 +2030,31 @@ DECL_EXPORT bm_status_t bm_handle_sync(bm_handle_t handle); */ DECL_EXPORT bm_status_t bm_thread_sync(bm_handle_t handle); +/** + * @name bm_set_sync_timeout + * @brief To set sync timeout ms. + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] timeout Sync timeout + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_set_sync_timeout(bm_handle_t handle, int timeout); + +/** + * @name bm_thread_sync_from_core + * @brief To synchronize APIs of the current thread on the specified core. The thread will block + * until all the outstanding APIs of the current thread are finished. + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] core_id The device core id + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_thread_sync_from_core(bm_handle_t handle, int core_id); + /*******************trace and profile releated functions **********************/ typedef struct bm_profile { #ifdef __linux__ @@ -1493,6 +2063,7 @@ typedef struct bm_profile { unsigned long cdma_out_time; unsigned long cdma_out_counter; unsigned long tpu_process_time; + unsigned long tpu1_process_time; unsigned long sent_api_counter; unsigned long completed_api_counter; #else @@ -1501,6 +2072,7 @@ typedef struct bm_profile { unsigned long long cdma_out_time; unsigned long long cdma_out_counter; unsigned long long tpu_process_time; + unsigned long long tpu1_process_time; unsigned long long sent_api_counter; unsigned long long completed_api_counter; #endif @@ -2068,6 +2640,19 @@ DECL_EXPORT bm_status_t bm_get_gmem_heap_id(bm_handle_t handle, bm_device_mem_t DECL_EXPORT bm_status_t sg_get_gmem_heap_id(bm_handle_t handle, sg_device_mem_t *pmem, unsigned int *heapid); +/** + * @name bm_get_gmem_heap_id_u64 + * @brief To get the heap id of allocated global memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] pmem The allocted global memory + * @param [out] heapid The result of get heap id + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_get_gmem_heap_id_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, unsigned int *heapid); + /** * @name bm_get_gmem_total_heap_num * @brief To get the total heap num of global memory @@ -2356,7 +2941,7 @@ DECL_EXPORT bm_status_t bm_get_tpu_power(bm_handle_t handle, float *tpu_power); * @ingroup device management api * * @param [in] handle The device handle - * @param [out] tpu_volt + * @param [out] The tpu current volt * @retval BM_SUCCESS Succeeds. * Other code Fails. */ @@ -2423,6 +3008,40 @@ DECL_EXPORT bm_status_t bm_get_dynfreq_status(bm_handle_t handle, int *dynfreq_s */ DECL_EXPORT bm_status_t bm_change_dynfreq_status(bm_handle_t handle, int new_status); +/** + * @name bm_get_tpu_scalar_num + * @brief To get the core number of TPU scalar + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [out] core_num The core number of TPU scalar + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_get_tpu_scalar_num(bm_handle_t handle, unsigned int *core_num); + +#define bm_get_tpu_core_num bm_get_tpu_scalar_num + +typedef struct{ + int core_id; + tpu_kernel_function_t func_id; + void *param_data; + unsigned int param_size; +} tpu_launch_param_t; + +/** + * @name tpu_kernel_launch_async_multi_cores + * @brief To launch function with async for multi cores + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] param_list param_list + * @param [in] param_num param_num + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +bm_status_t tpu_kernel_launch_async_multicores(bm_handle_t handle, tpu_launch_param_t *param_list, int param_num); + #if defined(__cplusplus) } #endif diff --git a/bmlib/src/a53lite_api.cpp b/bmlib/src/a53lite_api.cpp index 58a2c5d..eaa82b0 100644 --- a/bmlib/src/a53lite_api.cpp +++ b/bmlib/src/a53lite_api.cpp @@ -350,7 +350,9 @@ tpu_kernel_module_t tpu_kernel_load_module_file(bm_handle_t handle, const char * else tmp = (const char *)module_file; - if (strlen(tmp) > LIB_MAX_NAME_LEN - 1) + size_t len = strlen(tmp); + + if (len > LIB_MAX_NAME_LEN - 1) { bmlib_log(A53LITE_RUNTIME_LOG_TAG, BMLIB_LOG_ERROR, @@ -359,7 +361,7 @@ tpu_kernel_module_t tpu_kernel_load_module_file(bm_handle_t handle, const char * free(p_module); return NULL; } - strncpy((char *)api_load_lib.lib_name, tmp, strlen(tmp)); + strncpy((char *)api_load_lib.lib_name, tmp, len); api_load_lib.lib_addr = (void *)dev_mem.u.device.device_addr; api_load_lib.size = file_size; read_md5((unsigned char *)module_file, api_load_lib.md5); @@ -388,7 +390,7 @@ tpu_kernel_module_t tpu_kernel_load_module_file(bm_handle_t handle, const char * free(p_module); return nullptr; } - strncpy(p_module->lib_name, tmp, strlen(tmp)); + strncpy(p_module->lib_name, tmp, len); memcpy(p_module->md5, api_load_lib.md5, MD5SUM_LEN); bm_free_device(handle, dev_mem); @@ -438,7 +440,9 @@ tpu_kernel_module_t tpu_kernel_load_module_file_key(bm_handle_t handle, const ch else tmp = (const char *)module_file; - if (strlen(tmp) > LIB_MAX_NAME_LEN - 1) + size_t len = strlen(tmp); + + if (len > LIB_MAX_NAME_LEN - 1) { bmlib_log(A53LITE_RUNTIME_LOG_TAG, BMLIB_LOG_ERROR, @@ -450,7 +454,7 @@ tpu_kernel_module_t tpu_kernel_load_module_file_key(bm_handle_t handle, const ch if (loaded_lib.loaded == 1) { - strncpy(p_module->lib_name, tmp, strlen(tmp)); + strncpy(p_module->lib_name, tmp, len); memcpy(p_module->md5, loaded_lib.md5, MD5SUM_LEN); return p_module; } @@ -476,7 +480,7 @@ tpu_kernel_module_t tpu_kernel_load_module_file_key(bm_handle_t handle, const ch return nullptr; } - strncpy((char *)api_load_lib.lib_name, tmp, strlen(tmp)); + strncpy((char *)api_load_lib.lib_name, tmp, len); api_load_lib.lib_addr = (void *)dev_mem.u.device.device_addr; api_load_lib.size = file_size; calc_md5((unsigned char *)key, size, api_load_lib.md5); @@ -505,7 +509,7 @@ tpu_kernel_module_t tpu_kernel_load_module_file_key(bm_handle_t handle, const ch free(p_module); return nullptr; } - strncpy(p_module->lib_name, tmp, strlen(tmp)); + strncpy(p_module->lib_name, tmp, len); memcpy(p_module->md5, api_load_lib.md5, MD5SUM_LEN); bm_free_device(handle, dev_mem); @@ -785,3 +789,13 @@ bm_status_t tpu_kernel_free_module(bm_handle_t handle, tpu_kernel_module_t p_mod return BM_SUCCESS; #endif } + +bm_status_t tpu_kernel_launch_async_multicores(bm_handle_t handle, tpu_launch_param_t *param_list, int param_num) { + for(int i=0; isize = copy_size; - ret = bm_memcpy_s2d_poll(handle, *dev_mem_ptr, file_buffer); + if (handle->cdma_iommu_mode == BMLIB_USER_SETUP_IOMMU) { + ret = bm_smmu_s2d_poll(handle, *dev_mem_ptr, file_buffer); + } else { + ret = bm_memcpy_s2d_poll(handle, *dev_mem_ptr, file_buffer); + } if (ret != BM_SUCCESS) { bmlib_log(BMCPU_RUNTIME_LOG_TAG, BMLIB_LOG_ERROR, @@ -144,7 +148,11 @@ bm_status_t bm_load_file(bm_handle_t handle, free(file_buffer); return BM_ERR_FAILURE; } - ret = bm_memcpy_d2s_poll(handle, file_buffer_verify, *dev_mem_ptr, copy_size); + if (handle->cdma_iommu_mode == BMLIB_USER_SETUP_IOMMU) { + ret = bm_smmu_d2s_poll(handle, file_buffer_verify, *dev_mem_ptr, copy_size); + } else { + ret = bm_memcpy_d2s_poll(handle, file_buffer_verify, *dev_mem_ptr, copy_size); + } if (ret != BM_SUCCESS) { bmlib_log( BMCPU_RUNTIME_LOG_TAG, @@ -859,21 +867,6 @@ bm_status_t bmcpu_start_mix_cpu(bm_handle_t handle, "bmcpu is not enable in misc info, %d\n", misc_info.a53_enable); return BM_ERR_FAILURE; } - ret = bm_send_api_ext(handle, - BM_API_ID_START_CPU, - (const u8 *)&api_start_cpu, - sizeof(bm_api_start_cpu_t), - &api_handle); - if (ret != BM_SUCCESS) { - bmlib_log(BMCPU_RUNTIME_LOG_TAG, - BMLIB_LOG_ERROR, - "start cpu send api error, ret %d\n", - ret); - return BM_ERR_FAILURE; - } - ret = bm_query_api_data(handle, BM_API_ID_START_CPU, api_handle, &data, 3000); - if (ret == 0) - return BM_SUCCESS; bmcpu_set_arm9_fw_mode(handle, FW_MIX_MODE); dev_mem.u.device.device_addr = 0x10100000; @@ -889,7 +882,7 @@ bm_status_t bmcpu_start_mix_cpu(bm_handle_t handle, } dev_mem.u.device.device_addr = 0x310000000; dev_mem.flags.u.mem_type = BM_MEM_TYPE_DEVICE; - dev_mem.size = 0x10000000; + dev_mem.size = 0x18000000; ret = bm_load_file(handle, core_file, &dev_mem, NULL); if (ret != BM_SUCCESS) { bmlib_log(BMCPU_RUNTIME_LOG_TAG, @@ -898,6 +891,7 @@ bm_status_t bmcpu_start_mix_cpu(bm_handle_t handle, ret); return BM_ERR_FAILURE; } + if (0 != platform_ioctl(handle, BMDEV_TRIGGER_BMCPU, (void *)&delay)) { bmlib_log(BMCPU_RUNTIME_LOG_TAG, BMLIB_LOG_ERROR, @@ -906,24 +900,19 @@ bm_status_t bmcpu_start_mix_cpu(bm_handle_t handle, bmcpu_set_cpu_status(handle, BMCPU_FAULT); return BM_ERR_FAILURE; } - ret = bm_send_api_ext(handle, - BM_API_ID_START_CPU, - (const u8 *)&api_start_cpu, - sizeof(bm_api_start_cpu_t), - &api_handle); - if (ret != BM_SUCCESS) { - bmlib_log(BMCPU_RUNTIME_LOG_TAG, - BMLIB_LOG_ERROR, - "start cpu send api error, ret %d\n", - ret); - bmcpu_set_cpu_status(handle, BMCPU_FAULT); - return BM_ERR_FAILURE; - } - sleep(15); + + sleep(30); platform_ioctl(handle, BMDEV_GET_VETH_STATE, &data); if (((u32)data) == 0x66668888) { bmcpu_set_cpu_status(handle, BMCPU_RUNNING); + + ret = bmcpu_sync_time_mix(handle); + if (ret != BM_SUCCESS) { + printf("ERROR!!! sync cpu time error!\n"); + return BM_ERR_FAILURE; + } + return BM_SUCCESS; } @@ -979,6 +968,27 @@ bm_status_t bmcpu_sync_time(bm_handle_t handle) { } } +bm_status_t bmcpu_sync_time_mix(bm_handle_t handle) { + u64 api_handle; + u64 data; + int ret; + bm_api_set_time_t api_set_time; + struct timeval tv; + struct timezone tz; + + (void)gettimeofday(&tv, &tz); + api_set_time.tv_sec = tv.tv_sec + UTC_8; + api_set_time.tv_usec = tv.tv_usec; + api_set_time.tz_minuteswest = tz.tz_minuteswest; + api_set_time.tz_dsttime = tz.tz_dsttime; + + if(0 == platform_ioctl(handle, BMDEV_SYNC_TIME_MIX, &api_set_time)){ + return BM_SUCCESS; + } else { + return BM_ERR_FAILURE; + } +} + int bmcpu_open_process(bm_handle_t handle, unsigned int flags, int timeout) { int ret; u64 api_handle; diff --git a/bmlib/src/bmlib_device.cpp b/bmlib/src/bmlib_device.cpp old mode 100644 new mode 100755 index 660f290..0229429 --- a/bmlib/src/bmlib_device.cpp +++ b/bmlib/src/bmlib_device.cpp @@ -4,9 +4,6 @@ #include "bmlib_device.h" #include "api.h" #include -#ifdef BM_TV_GEN - #include "bm_tv_gen_util.h" -#endif #define BM_API_QUIT 0xffffffff @@ -38,7 +35,7 @@ unsigned long long get_global_mem_size() return 0x100000000; } } -void *cmodel_so_handle_; +void *cmodel_so_handle_ = NULL; void bm_device::cmodel_setup(void) { get_global_memaddr_ = (t_get_global_memaddr)dlsym(NULL, "get_global_memaddr"); @@ -49,17 +46,30 @@ void bm_device::cmodel_setup(void) cmodel_deinit_ = (t_cmodel_deinit)dlsym(NULL, "cmodel_deinit"); cmodel_get_share_memory_addr_ = (t_cmodel_get_share_memory_addr)dlsym(NULL, "cmodel_get_share_memory_addr"); cmodel_write_share_reg_ = (t_cmodel_write_share_reg)dlsym(NULL, "cmodel_write_share_reg"); + cmodel_write_share_memory_ = (t_cmodel_write_share_memory)dlsym(NULL, "cmodel_write_share_memory"); cmodel_read_share_reg_ = (t_cmodel_read_share_reg)dlsym(NULL, "cmodel_read_share_reg"); host_dma_copy_s2d_cmodel_ = (t_host_dma_copy_s2d_cmodel)dlsym(NULL, "host_dma_copy_s2d_cmodel"); host_dma_copy_d2s_cmodel_ = (t_host_dma_copy_d2s_cmodel)dlsym(NULL, "host_dma_copy_d2s_cmodel"); + + cmodel_share_reg_message_rp_ = (t_cmodel_share_reg_func)dlsym(NULL, "cmodel_share_reg_message_rp"); + cmodel_share_reg_message_wp_ = (t_cmodel_share_reg_func)dlsym(NULL, "cmodel_share_reg_message_wp"); + cmodel_share_reg_fw_status_ = (t_cmodel_share_reg_func)dlsym(NULL, "cmodel_share_reg_fw_status"); + cmodel_wait_share_reg_equal_ = (t_cmodel_wait_share_reg_equal)dlsym(NULL, "cmodel_wait_share_reg_equal"); + cmodel_api_poll_ = (t_cmodel_api_poll)dlsym(NULL, "api_poll"); - get_chip_id_ = (t_cmodel_get_chip_id)dlsym(NULL, "get_chip_id"); - if (cmodel_deinit_ == NULL) { + cmodel_api_signal_ = (t_cmodel_api_poll)dlsym(NULL, "api_signal"); + cmodel_api_signal_begin_ = (t_cmodel_api_poll)dlsym(NULL, "api_signal_begin"); + cmodel_get_chip_id_ = (t_cmodel_get_chip_id)dlsym(NULL, "get_chip_id"); + cmodel_get_total_nodechip_num_ = (t_cmodel_get_total_nodechip_num)dlsym(NULL, "get_total_nodechip_num"); + cmodel_get_gmem_start_addr_ = (t_cmodel_get_gmem_start_addr)dlsym(NULL, "cmodel_get_gmem_start_addr"); + cmodel_get_last_func_id = (t_cmodel_get_last_func_id)dlsym(NULL, "cmodel_get_last_func_id"); + + if (cmodel_nodechip_runtime_init_== NULL) { const char *path = getenv("TPUKERNEL_FIRMWARE_PATH"); cmodel_so_handle_ = dlopen(path ? path : "libcmodel.so", RTLD_LAZY); if(!cmodel_so_handle_) { - printf("not able to open libcmodel.so\n"); - return; + printf("not able to open libcmodel.so: %s\n", dlerror()); + exit(-1); } get_global_memaddr_ = (t_get_global_memaddr)dlsym(cmodel_so_handle_, "get_global_memaddr"); cmodel_init_ = (t_cmodel_init)dlsym(cmodel_so_handle_, "cmodel_init"); @@ -69,11 +79,23 @@ void bm_device::cmodel_setup(void) cmodel_deinit_ = (t_cmodel_deinit)dlsym(cmodel_so_handle_, "cmodel_deinit"); cmodel_get_share_memory_addr_ = (t_cmodel_get_share_memory_addr)dlsym(cmodel_so_handle_, "cmodel_get_share_memory_addr"); cmodel_write_share_reg_ = (t_cmodel_write_share_reg)dlsym(cmodel_so_handle_, "cmodel_write_share_reg"); + cmodel_write_share_memory_ = (t_cmodel_write_share_memory)dlsym(cmodel_so_handle_, "cmodel_write_share_memory"); cmodel_read_share_reg_ = (t_cmodel_read_share_reg)dlsym(cmodel_so_handle_, "cmodel_read_share_reg"); host_dma_copy_d2s_cmodel_ = (t_host_dma_copy_d2s_cmodel)dlsym(cmodel_so_handle_, "host_dma_copy_d2s_cmodel"); host_dma_copy_s2d_cmodel_ = (t_host_dma_copy_s2d_cmodel)dlsym(cmodel_so_handle_, "host_dma_copy_s2d_cmodel"); + + cmodel_share_reg_message_rp_ = (t_cmodel_share_reg_func)dlsym(cmodel_so_handle_, "cmodel_share_reg_message_rp"); + cmodel_share_reg_message_wp_ = (t_cmodel_share_reg_func)dlsym(cmodel_so_handle_, "cmodel_share_reg_message_wp"); + cmodel_share_reg_fw_status_ = (t_cmodel_share_reg_func)dlsym(cmodel_so_handle_, "cmodel_share_reg_fw_status"); + cmodel_wait_share_reg_equal_ = (t_cmodel_wait_share_reg_equal)dlsym(cmodel_so_handle_, "cmodel_wait_share_reg_equal"); + cmodel_api_poll_ = (t_cmodel_api_poll)dlsym(cmodel_so_handle_, "api_poll"); - get_chip_id_ = (t_cmodel_get_chip_id)dlsym(cmodel_so_handle_, "get_chip_id"); + cmodel_api_signal_ = (t_cmodel_api_poll)dlsym(cmodel_so_handle_, "api_signal"); + cmodel_api_signal_begin_ = (t_cmodel_api_poll)dlsym(cmodel_so_handle_, "api_signal_begin"); + cmodel_get_chip_id_ = (t_cmodel_get_chip_id)dlsym(cmodel_so_handle_, "get_chip_id"); + cmodel_get_total_nodechip_num_ = (t_cmodel_get_total_nodechip_num)dlsym(cmodel_so_handle_, "get_total_nodechip_num"); + cmodel_get_gmem_start_addr_ = (t_cmodel_get_gmem_start_addr)dlsym(cmodel_so_handle_, "cmodel_get_gmem_start_addr"); + cmodel_get_last_func_id = (t_cmodel_get_last_func_id)dlsym(cmodel_so_handle_, "cmodel_get_last_func_id"); } return; } @@ -84,83 +106,140 @@ bm_device::bm_device(int _dev_id) device_mem_pool(get_global_mem_size()), device_sync_last(0), device_sync_cpl(0) { - thread_api_table.clear(); - cmodel_setup(); + core_num = 1; + if(cmodel_get_total_nodechip_num_) core_num = cmodel_get_total_nodechip_num_(); + printf("begin to cmodel init...\n"); - if (cmodel_init_(dev_id, get_global_mem_size()) - != 0) { - printf("BM: cmodel_init failed\n"); - exit(-1); - } + for (int core_idx = 0; core_idx < core_num; ++core_idx) { + if (cmodel_init_(core_idx, get_global_mem_size()) != BM_SUCCESS) { + printf("BM: cmodel_init (core_idx=%d) failed\n", core_idx); + exit(-1); + } + } set_cur_nodechip_idx_(0); - chip_id = get_chip_id_(); - cmodel_nodechip_runtime_init_(dev_id); + chip_id = cmodel_get_chip_id_(); + for (int core_idx = 0; core_idx < core_num; ++core_idx) { + cmodel_nodechip_runtime_init_(core_idx); + } reserved_ddr_arm = 0x1000000; - share_reg_message_wp = 0; - share_reg_message_rp = 1; api_message_empty_slot_num = 2; sharemem_size_bit = 11; sharemem_mask = ((1 << sharemem_size_bit) - 1); - share_reg_fw_status = 9; + share_reg_message_wp = cmodel_share_reg_message_wp_(); + share_reg_message_rp = cmodel_share_reg_message_rp_(); + share_reg_fw_status = cmodel_share_reg_fw_status_(); last_ini_reg_val = 0x76125438; // ctx->device_mem_size = cmodel_get_global_mem_size(devid); - BM_CHECK_RET(bm_alloc_instr_reserved()); BM_CHECK_RET(bm_alloc_arm_reserved()); BM_CHECK_RET(bm_alloc_iommu_reserved()); - bm_wait_fwinit_done(); + auto env_sys_tvgen_multi_core = + std::getenv("BUILD_BMLIB_SYS_TVGEN_MULTI_CORE"); + if (env_sys_tvgen_multi_core) { + BM_CHECK_RET(bm_alloc_sys_tvgen_multi_core_reserved()); + } + for (int core_idx = 0; core_idx < core_num; ++core_idx) { + bm_wait_fwinit_done(core_idx); + } + + thread_api_tables = new std::map[core_num]; + pending_api_queues = new std::queue[core_num]; + + // init sync_last and sync_cpl + device_sync_last = new u32[core_num]; + device_sync_cpl = new std::atomic[core_num]; + for (int core_idx = 0; core_idx < core_num; ++core_idx) { + device_sync_last[core_idx] = 0; + device_sync_cpl[core_idx] = 0; + } - pthread_mutex_init(&api_lock, nullptr); + api_locks = new pthread_mutex_t[core_num]; + for (int core_idx = 0; core_idx < core_num; ++core_idx) { + pthread_mutex_init(&api_locks[core_idx], nullptr); + } pthread_mutex_init(&arm_reserved_lock, nullptr); // init msg poll thread - pthread_create(&msg_poll_thread, nullptr, bm_msg_done_poll, this); + msg_poll_threads = new pthread_t[core_num]; + msg_done_poll_params = new msg_done_poll_param_t[core_num]; + for (int core_idx = 0; core_idx < core_num; ++core_idx) { + msg_done_poll_params[core_idx].dev = this; + msg_done_poll_params[core_idx].core_idx = core_idx; + pthread_create(&msg_poll_threads[core_idx], nullptr, bm_msg_done_poll, (void*)&msg_done_poll_params[core_idx]); + } } bm_device::~bm_device() { - printf("destroy device %d\n", dev_id); - pthread_cancel(msg_poll_thread); - pthread_join(msg_poll_thread, nullptr); + //printf("destroy device %d\n", dev_id); + for (int core_idx = 0; core_idx < core_num; ++core_idx) { + pthread_cancel(msg_poll_threads[core_idx]); + pthread_join(msg_poll_threads[core_idx], nullptr); + } + delete[] msg_poll_threads; + delete[] msg_done_poll_params; bm_free_arm_reserved(); bm_free_instr_reserved(); bm_free_iommu_reserved(); + auto env_sys_tv_gen_multi_core = std::getenv("BUILD_BMLIB_SYS_TVGEN_MULTI_CORE"); + if (env_sys_tv_gen_multi_core) { + bm_free_sys_tvgen_multi_core_reserved(); + } + + for (int core_idx = 0; core_idx < core_num; ++core_idx) { + bm_send_quit_message(core_idx); + } + for (int core_idx = 0; core_idx < core_num; ++core_idx) { + pthread_mutex_destroy(&api_locks[core_idx]); + } + + delete[] api_locks; + delete[] thread_api_tables; + delete[] pending_api_queues; + delete[] device_sync_last; + delete[] device_sync_cpl; - bm_send_quit_message(); + for (int core_idx = 0; core_idx < core_num; ++core_idx) { + cmodel_nodechip_runtime_exit_(core_idx); + } + for (int core_idx = 0; core_idx < core_num; ++core_idx) { + cmodel_deinit_(core_idx); + } + printf("cmodel_deinit complete\n"); - cmodel_nodechip_runtime_exit_(dev_id); - cmodel_deinit_(dev_id); + if (cmodel_so_handle_) { + dlclose(cmodel_so_handle_); + } } -u64 bm_device::bm_device_alloc_mem(u32 size) { - return device_mem_pool.bm_mem_pool_alloc(size); +u64 bm_device::bm_device_alloc_mem(u64 size) { + return device_mem_pool.bm_mem_pool_alloc(size) + cmodel_get_gmem_start_addr_(); } void bm_device::bm_device_free_mem(u64 addr) { - device_mem_pool.bm_mem_pool_free(addr); + device_mem_pool.bm_mem_pool_free(addr - cmodel_get_gmem_start_addr_()); } -void bm_device::_write_share_mem(u32 offset, u32 data) { - u32 * write_addr = cmodel_get_share_memory_addr_(offset, dev_id); - *write_addr = data; +void bm_device::_write_share_mem(u32 offset, u32 data, int core_idx) { + cmodel_write_share_memory_(offset, data, core_idx); } -void bm_device::_write_share_reg(u32 idx, u32 data) { - cmodel_write_share_reg_(idx, data, dev_id); +void bm_device::_write_share_reg(u32 idx, u32 data, int core_idx) { + cmodel_write_share_reg_(idx, data, core_idx); } -u32 bm_device::_read_share_reg(u32 idx) { - return cmodel_read_share_reg_(idx, dev_id); +u32 bm_device::_read_share_reg(u32 idx, int core_idx) { + return cmodel_read_share_reg_(idx, core_idx); } -u32 bm_device::_poll_message_fifo_cnt() { +u32 bm_device::_poll_message_fifo_cnt(int core_idx) { u32 wp, rp; - wp = _read_share_reg(share_reg_message_wp); - rp = _read_share_reg(share_reg_message_rp); + wp = _read_share_reg(share_reg_message_wp, core_idx); + rp = _read_share_reg(share_reg_message_rp, core_idx); u32 wp_tog = wp >> sharemem_size_bit; u32 rp_tog = rp >> sharemem_size_bit; @@ -174,90 +253,109 @@ u32 bm_device::_poll_message_fifo_cnt() { } void bm_device::copy_message_to_sharemem(const u32 *src_msg_buf, - u32 *wp, u32 size, u32 api_id) { - u32 cur_wp = _read_share_reg(share_reg_message_wp); + u32 *wp, u32 size, u32 api_id, int core_idx) { + // read writing pointer from the share register + u32 cur_wp = _read_share_reg(share_reg_message_wp, core_idx); *wp = cur_wp; - _write_share_mem(cur_wp & sharemem_mask, api_id); + // copy api_id to buffer + _write_share_mem(cur_wp & sharemem_mask, api_id, core_idx); u32 next_wp = pointer_wrap_around(cur_wp, 1, sharemem_size_bit) & sharemem_mask; - _write_share_mem(next_wp & sharemem_mask, size); - if (api_id == BM_API_QUIT) - return; + // copy size to buffer + _write_share_mem(next_wp & sharemem_mask, size, core_idx); + + // copy sg_api_* structure date to buffer for (u32 idx = 0; idx < size; idx++) { next_wp = pointer_wrap_around(*wp, 2 + idx, sharemem_size_bit); - _write_share_mem(next_wp & sharemem_mask, src_msg_buf[idx]); + _write_share_mem(next_wp & sharemem_mask, src_msg_buf[idx], core_idx); } + + // write back writing pointer to the share register + next_wp = pointer_wrap_around(*wp, size + 2, sharemem_size_bit); + _write_share_reg(share_reg_message_wp, next_wp, core_idx); } -bm_status_t bm_device::bm_device_send_api(int api_id, const u8 *api, u32 size) { - pthread_mutex_lock(&api_lock); +bm_status_t bm_device::bm_device_send_api(int api_id, const u8 *api, u32 size, int core_idx) { + cmodel_wait_share_reg_equal_(share_reg_message_rp, + cmodel_read_share_reg_(share_reg_message_wp, core_idx), 32, 0, core_idx); + + pthread_mutex_lock(&api_locks[core_idx]); pthread_t thd_id = pthread_self(); // get thread api info - thread_api_info *thd_api_info = bm_get_thread_api_info(thd_id); + thread_api_info *thd_api_info = bm_get_thread_api_info(thd_id, core_idx); if (!thd_api_info) { - bm_add_thread_api_info(thd_id); - thd_api_info = bm_get_thread_api_info(thd_id); + bm_add_thread_api_info(thd_id, core_idx); + thd_api_info = bm_get_thread_api_info(thd_id, core_idx); } // update thread api last seq + pthread_mutex_lock(&thd_api_info->lock); thd_api_info->last_seq++; + pthread_mutex_unlock(&thd_api_info->lock); // add api queue entry into fifo - pending_api_queue.push({thd_id, thd_api_info->last_seq, 0}); + pending_api_queues[core_idx].push({thd_id, thd_api_info->last_seq, 0}); /* printf("SEND API: thread %lu --- seq_id %d\n", thd_id, thd_api_info->last_seq); */ u32 fifo_empty_number = api_message_empty_slot_num * (size/sizeof(u32) + 2); - while (_poll_message_fifo_cnt() <= fifo_empty_number) { + while (_poll_message_fifo_cnt(core_idx) <= fifo_empty_number) { } u32 wp; + if (cmodel_api_signal_) cmodel_api_signal_begin_(core_idx); copy_message_to_sharemem(reinterpret_cast < const u32 *>(api), &wp, - size/sizeof(u32), api_id); - u32 next_wp = pointer_wrap_around(wp, size/sizeof(u32) + 2, - sharemem_size_bit); - _write_share_reg(share_reg_message_wp, next_wp); + size/sizeof(u32), api_id, core_idx); - pthread_mutex_unlock(&api_lock); + if (cmodel_api_signal_) cmodel_api_signal_(core_idx); + + pthread_mutex_unlock(&api_locks[core_idx]); return BM_SUCCESS; } bm_status_t bm_device::bm_device_sync() { - pthread_mutex_lock(&api_lock); - u32 dev_sync_last = ++device_sync_last; - pending_api_queue.push({DEVICE_SYNC_MARKER, 0, dev_sync_last}); - pthread_mutex_unlock(&api_lock); + for (int core_idx =0; core_idx < core_num; ++core_idx) { + pthread_mutex_lock(&api_locks[core_idx]); + + u32 dev_sync_last = ++device_sync_last[core_idx]; + pending_api_queues[core_idx].push({DEVICE_SYNC_MARKER, 0, dev_sync_last}); + //printf("SYNC DEVICE API: device (core_idx=%d) last seq %d\n", core_idx, device_sync_last[core_idx]); + pthread_mutex_unlock(&api_locks[core_idx]); + } - printf("SYNC DEVICE API: device last seq %d\n", dev_sync_last); - while (dev_sync_last != device_sync_cpl) { + while (true) { + bool success = true; + for (int core_idx = 0; core_idx < core_num; ++core_idx) { + if (device_sync_last[core_idx] != device_sync_cpl[core_idx]) { + success = false; + break; + } + } + if (success) break; } - #ifdef BM_TV_GEN - bm_read32_wait_eq_tv(SHARE_REG_BASE_ADDR + share_reg_message_rp * 4, - cmodel_read_share_reg(share_reg_message_wp, dev_id), 32, 0, HOST_REG); - #endif + for (int core_idx = 0; core_idx < core_num; ++core_idx) { + cmodel_wait_share_reg_equal_(share_reg_message_rp, + cmodel_read_share_reg_(share_reg_message_wp, core_idx), 32, 0, core_idx); + } // while (_poll_message_fifo_cnt() != (1 << sharemem_size_bit)); return BM_SUCCESS; } -bm_status_t bm_device::bm_send_quit_message() { +bm_status_t bm_device::bm_send_quit_message(int core_idx) { printf("BMLIB Send Quit Message\n"); - bm_device_send_api((sglib_api_id_t)BM_API_QUIT, nullptr, 0); + bm_device_send_api((sglib_api_id_t)BM_API_QUIT, nullptr, 0, core_idx); sleep(1); return BM_SUCCESS; } -void bm_device::bm_wait_fwinit_done() { -#ifdef BM_TV_GEN - bm_read32_wait_eq_tv(SHARE_REG_BASE_ADDR + share_reg_fw_status * 4, - last_ini_reg_val, 32, 0, HOST_REG); -#endif - while (_read_share_reg(share_reg_fw_status) != last_ini_reg_val) { - } +void bm_device::bm_wait_fwinit_done(int core_idx) { + cmodel_wait_share_reg_equal_(share_reg_fw_status, last_ini_reg_val, 32, 0, core_idx); + while (_read_share_reg(share_reg_fw_status, core_idx) != last_ini_reg_val) { } } bm_status_t bm_device::bm_malloc_device_dword( @@ -265,7 +363,7 @@ bm_status_t bm_device::bm_malloc_device_dword( u32 size = cnt * FLOAT_SIZE; u64 addr = 0; - addr = device_mem_pool.bm_mem_pool_alloc(size); + addr = device_mem_pool.bm_mem_pool_alloc(size) + cmodel_get_gmem_start_addr_(); pmem->u.device.device_addr = addr; pmem->flags.u.mem_type = BM_MEM_TYPE_DEVICE; @@ -275,7 +373,7 @@ bm_status_t bm_device::bm_malloc_device_dword( void bm_device::bm_free_device(bm_device_mem_t mem) { u64 addr = (u64)bm_mem_get_device_addr(mem); - device_mem_pool.bm_mem_pool_free(addr); + device_mem_pool.bm_mem_pool_free(addr - cmodel_get_gmem_start_addr_()); } bm_status_t bm_device::bm_alloc_arm_reserved() { @@ -309,22 +407,60 @@ void bm_device::bm_free_iommu_reserved() { bm_free_device(iommu_reserved_dev_mem); } +#define SYS_TVGEN_MULTI_CORE_RESERVED_SIZE (1024 * 1024 * (24 * 8 - 81)) +bm_status_t bm_device::bm_alloc_sys_tvgen_multi_core_reserved() { + BM_CHECK_RET(bm_malloc_device_dword(&sys_tvgen_multi_core_reserved_dev_mem, + SYS_TVGEN_MULTI_CORE_RESERVED_SIZE / sizeof(float))); + return BM_SUCCESS; +} +void bm_device::bm_free_sys_tvgen_multi_core_reserved() { + bm_free_device(sys_tvgen_multi_core_reserved_dev_mem); +} -bm_status_t bm_device::bm_device_memcpy_s2d(bm_device_mem_t dst, void *src) { +bm_status_t bm_device::bm_device_memcpy_s2d(bm_device_mem_t dst, void *src, int core_idx) { u32 size_total = bm_mem_get_size(dst); u64 dst_addr = bm_mem_get_device_addr(dst); - host_dma_copy_s2d_cmodel_(dst_addr, src, (u64)size_total, dev_id); + host_dma_copy_s2d_cmodel_(dst_addr, src, (u64)size_total, core_idx); return BM_SUCCESS; } -bm_status_t bm_device::bm_device_memcpy_d2s(void *dst, bm_device_mem_t src) { +bm_status_t bm_device::bm_device_memcpy_d2s(void *dst, bm_device_mem_t src, int core_idx) { u32 size_total = bm_mem_get_size(src); u64 src_addr = bm_mem_get_device_addr(src); - host_dma_copy_d2s_cmodel_(dst, src_addr, (u64)size_total, dev_id); + host_dma_copy_d2s_cmodel_(dst, src_addr, (u64)size_total, core_idx); return BM_SUCCESS; } +bm_status_t bm_device::bm_device_memcpy_s2d_u64(bm_device_mem_u64_t dst, void *src, int core_idx) { + u64 size_total = bm_mem_get_size_u64(dst); + u64 dst_addr = bm_mem_get_device_addr_u64(dst); + host_dma_copy_s2d_cmodel_(dst_addr, src, (u64)size_total, core_idx); + return BM_SUCCESS; +} + +bm_status_t bm_device::bm_device_memcpy_d2s_u64(void *dst, bm_device_mem_u64_t src, int core_idx) { + u64 size_total = bm_mem_get_size_u64(src); + u64 src_addr = bm_mem_get_device_addr_u64(src); + + host_dma_copy_d2s_cmodel_(dst, src_addr, (u64)size_total, core_idx); + return BM_SUCCESS; +} + +bm_status_t bm_device::sg_device_memcpy_s2d(sg_device_mem_t dst, void *src, int core_idx) { + u64 size_total = sg_mem_get_size(dst); + u64 dst_addr = sg_mem_get_device_addr(dst); + host_dma_copy_s2d_cmodel_(dst_addr, src, size_total, core_idx); + return BM_SUCCESS; +} + +bm_status_t bm_device::sg_device_memcpy_d2s(void *dst, sg_device_mem_t src, int core_idx) { + u64 size_total = sg_mem_get_size(src); + u64 src_addr = sg_mem_get_device_addr(src); + + host_dma_copy_d2s_cmodel_(dst, src_addr, size_total, core_idx); + return BM_SUCCESS; +} u64 bm_device::bm_device_arm_reserved_req() { pthread_mutex_lock(&arm_reserved_lock); return arm_reserved_dev_mem.u.device.device_addr; @@ -333,81 +469,87 @@ void bm_device::bm_device_arm_reserved_rel() { pthread_mutex_unlock(&arm_reserved_lock); } -extern int fun_id; -bm_status_t bm_device::bm_device_thread_sync() { - if(fun_id != 0) - return BM_SUCCESS; - // should add volatile, if not, thread_api_info will be - // optimized by c++ in nodebug mode - volatile thread_api_info *thd_api_info; - thd_api_info = bm_get_thread_api_info(pthread_self()); +bm_status_t bm_device::bm_device_thread_sync_from_core(int core_idx) { + thread_api_info *thd_api_info; + thd_api_info = bm_get_thread_api_info(pthread_self(), core_idx); if (!thd_api_info) { printf("Error: thread api info %lu is not found!\n", pthread_self()); ASSERT(0); return BM_ERR_FAILURE; } + pthread_mutex_lock(&thd_api_info->lock); while (thd_api_info->last_seq != thd_api_info->cpl_seq) { + pthread_cond_wait(&thd_api_info->cond, &thd_api_info->lock); } + pthread_mutex_unlock(&thd_api_info->lock); - #ifdef BM_TV_GEN - bm_read32_wait_eq_tv(SHARE_REG_BASE_ADDR + share_reg_message_rp * 4, - cmodel_read_share_reg(share_reg_message_wp, dev_id), 32, 0, HOST_REG); - #endif + cmodel_wait_share_reg_equal_(share_reg_message_rp, + cmodel_read_share_reg_(share_reg_message_wp, core_idx), 32, 0, core_idx); return BM_SUCCESS; } -thread_api_info *bm_device::bm_get_thread_api_info(pthread_t thd_id) { +thread_api_info *bm_device::bm_get_thread_api_info(pthread_t thd_id, int core_idx) { std::map < pthread_t, thread_api_info>::iterator it; - it = thread_api_table.find(thd_id); - if (it != thread_api_table.end()) + it = thread_api_tables[core_idx].find(thd_id); + if (it != thread_api_tables[core_idx].end()) return &it->second; else return nullptr; } -bm_status_t bm_device::bm_add_thread_api_info(pthread_t thd_id) { - thread_api_table.insert(std::pair < pthread_t, thread_api_info > (thd_id, +bm_status_t bm_device::bm_add_thread_api_info(pthread_t thd_id, int core_idx) { + thread_api_tables[core_idx].insert(std::pair(thd_id, {thd_id, 0, 0})); return BM_SUCCESS; } -bm_status_t bm_device::bm_remove_thread_api_info(pthread_t thd_id) { +bm_status_t bm_device::bm_remove_thread_api_info(pthread_t thd_id, int core_idx) { std::map < pthread_t, thread_api_info>::iterator it; - it = thread_api_table.find(thd_id); - if (it != thread_api_table.end()) - thread_api_table.erase(it); + it = thread_api_tables[core_idx].find(thd_id); + if (it != thread_api_tables[core_idx].end()) + thread_api_tables[core_idx].erase(it); return BM_SUCCESS; } void *bm_device::bm_msg_done_poll(void *arg) { - bm_device *bm_dev = reinterpret_cast < bm_device *>(arg); + msg_done_poll_param_t *_param = reinterpret_cast < msg_done_poll_param_t *>(arg); + bm_device *bm_dev = _param->dev; + int core_idx = _param->core_idx; while (1) { - while (!bm_dev->pending_api_queue.empty()) { - api_queue_entry api_front = bm_dev->pending_api_queue.front(); + while (!bm_dev->pending_api_queues[core_idx].empty()) { + api_queue_entry api_front = bm_dev->pending_api_queues[core_idx].front(); if (api_front.thd_id == DEVICE_SYNC_MARKER) { // device sync - bm_dev->device_sync_cpl = api_front.dev_seq; - pthread_mutex_lock(&bm_dev->api_lock); - bm_dev->pending_api_queue.pop(); - pthread_mutex_unlock(&bm_dev->api_lock); + bm_dev->device_sync_cpl[core_idx] = api_front.dev_seq; + pthread_mutex_lock(&bm_dev->api_locks[core_idx]); + bm_dev->pending_api_queues[core_idx].pop(); + pthread_mutex_unlock(&bm_dev->api_locks[core_idx]); pthread_yield(); } else { // msg api pending - bm_dev->cmodel_api_poll_(bm_dev->dev_id); + bm_dev->cmodel_api_poll_(core_idx); if (api_front.thd_id != 0) { - bm_dev->thread_api_table[api_front.thd_id].cpl_seq = - api_front.thd_seq; - pthread_mutex_lock(&bm_dev->api_lock); - bm_dev->pending_api_queue.pop(); - pthread_mutex_unlock(&bm_dev->api_lock); + thread_api_info *thd_api_info = bm_dev->bm_get_thread_api_info( + api_front.thd_id, core_idx); + pthread_mutex_lock(&thd_api_info->lock); + thd_api_info->cpl_seq = api_front.thd_seq; + pthread_mutex_unlock(&thd_api_info->lock); + pthread_cond_signal(&thd_api_info->cond); + + pthread_mutex_lock(&bm_dev->api_locks[core_idx]); + bm_dev->pending_api_queues[core_idx].pop(); + pthread_mutex_unlock(&bm_dev->api_locks[core_idx]); } else { ASSERT(0); } } } - +// busy waiting sleep 200ms, reduce cpu usage +#if defined(USING_CMODEL) && !defined(USING_MULTI_THREAD_ENGINE) + usleep(200000); +#endif pthread_testcancel(); } return nullptr; @@ -420,8 +562,11 @@ bm_device_manager::bm_device_manager(int _max_dev_cnt) bm_dev_list = new bm_device *[max_dev_cnt]; if (!bm_dev_list) return; - for (int i = 0; i < max_dev_cnt; i++) - bm_dev_list[i] = nullptr; + dev_user = new int [max_dev_cnt]; + for (int i = 0; i < max_dev_cnt; i++) { + bm_dev_list[i] = nullptr; + dev_user[i] = 0; + } } bm_device_manager::~bm_device_manager() { @@ -434,17 +579,20 @@ bm_device_manager::~bm_device_manager() { } delete []bm_dev_list; } + if (dev_user) { + delete [] dev_user; + } } bm_device_manager *bm_device_manager::get_dev_mgr() { pthread_mutex_lock(&init_lock); if (!bm_dev_mgr) - bm_dev_mgr = new bm_device_manager(MAX_NODECHIP_NUM); + bm_dev_mgr = new bm_device_manager(MAX_DEVICE_NUM); pthread_mutex_unlock(&init_lock); return bm_dev_mgr; } -bm_device *bm_device_manager::get_bm_device(int dev_id) { +bm_device * bm_device_manager::get_bm_device(int dev_id) { ASSERT(bm_dev_list); ASSERT(dev_id < max_dev_cnt); pthread_mutex_lock(&init_lock); @@ -452,16 +600,32 @@ bm_device *bm_device_manager::get_bm_device(int dev_id) { bm_dev_list[dev_id] = new bm_device(dev_id); dev_cnt++; } + dev_user[dev_id]++; pthread_mutex_unlock(&init_lock); return bm_dev_list[dev_id]; } void bm_device_manager::destroy_dev_mgr() { // std::cout << "bm_dev_mgr "< 0); + pthread_mutex_lock(&init_lock); + dev_user[dev_id]--; + if (dev_user[dev_id] == 0) { + delete bm_dev_list[dev_id]; + bm_dev_list[dev_id] = nullptr; + } + pthread_mutex_unlock(&init_lock); +} bm_device_manager *bm_device_manager::bm_dev_mgr = nullptr; pthread_mutex_t bm_device_manager::init_lock = PTHREAD_MUTEX_INITIALIZER; #endif diff --git a/bmlib/src/bmlib_device.h b/bmlib/src/bmlib_device.h index 73623f1..250316f 100644 --- a/bmlib/src/bmlib_device.h +++ b/bmlib/src/bmlib_device.h @@ -12,55 +12,80 @@ #include #ifdef __cplusplus -extern "C" { +extern "C" +{ #endif #define DEVICE_SYNC_MARKER 0xFFFF -#define MAX_NODECHIP_NUM 0x2 +#define MAX_DEVICE_NUM 0x2 -struct thread_api_info { + struct thread_api_info + { pthread_t thd_id; u32 last_seq; u32 cpl_seq; -}; + pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + pthread_cond_t cond = PTHREAD_COND_INITIALIZER; -struct api_queue_entry { + thread_api_info(pthread_t tid, u32 lsq, u32 csq) + : thd_id(tid), last_seq(lsq), cpl_seq(csq) + { + } + }; + + struct api_queue_entry + { pthread_t thd_id; u32 thd_seq; u32 dev_seq; -}; + }; -class bm_device { + class bm_device + { public: bm_device(int _dev_id); ~bm_device(); - int bm_device_id() { - return dev_id; + int bm_device_id() + { + return dev_id; } - bm_status_t bm_device_send_api(int api_id, const u8 *api, u32 size); + int bm_core_num() { return core_num; } + bm_status_t bm_device_send_api(int api_id, const u8 *api, u32 size, int core_idx = 0); bm_status_t bm_device_sync(); - bm_status_t bm_device_thread_sync(); - u64 bm_device_alloc_mem(u32 size); + bm_status_t bm_device_thread_sync_from_core(int core_idx); + u64 bm_device_alloc_mem(u64 size); void bm_device_free_mem(u64 addr); - bm_status_t bm_device_memcpy_s2d(bm_device_mem_t dst, void *src); - bm_status_t bm_device_memcpy_d2s(void *dst, bm_device_mem_t src); + bm_status_t bm_device_memcpy_s2d(bm_device_mem_t dst, void *src, int core_idx = 0); + bm_status_t bm_device_memcpy_d2s(void *dst, bm_device_mem_t src, int core_idx = 0); + bm_status_t bm_device_memcpy_s2d_u64(bm_device_mem_u64_t dst, void *src, int core_idx = 0); + bm_status_t bm_device_memcpy_d2s_u64(void *dst, bm_device_mem_u64_t src, int core_idx = 0); + bm_status_t sg_device_memcpy_s2d(sg_device_mem_t dst, void *src, int core_idx = 0); + bm_status_t sg_device_memcpy_d2s(void *dst, sg_device_mem_t src, int core_idx = 0); u64 bm_device_arm_reserved_req(); void bm_device_arm_reserved_rel(); void cmodel_setup(void); - typedef void* (*t_get_global_memaddr)(int); - typedef int (*t_cmodel_init)(int, unsigned long long); + typedef void *(*t_get_global_memaddr)(int); + typedef int (*t_cmodel_init)(int, unsigned long long); typedef void (*t_set_cur_nodechip_idx)(int); typedef void (*t_cmodel_nodechip_runtime_init)(int); typedef void (*t_cmodel_nodechip_runtime_exit)(int); typedef void (*t_cmodel_deinit)(int); - typedef u32* (*t_cmodel_get_share_memory_addr)(u32, int); + typedef u32 *(*t_cmodel_get_share_memory_addr)(u32, int); typedef void (*t_cmodel_write_share_reg)(u32, u32, int); - typedef u32 (*t_cmodel_read_share_reg)(u32, int); - typedef void (*t_host_dma_copy_s2d_cmodel)(u64, void *, u64, u32); - typedef void (*t_host_dma_copy_d2s_cmodel)(void *, u64, u64, u32); - typedef void (*t_cmodel_api_poll)(int); - typedef u32 (*t_cmodel_get_chip_id)(void); - + typedef void (*t_cmodel_write_share_memory)(u32, u32, int); + typedef u32 (*t_cmodel_read_share_reg)(u32, int); + typedef void (*t_host_dma_copy_s2d_cmodel)(u64, void *, u64, u32); + typedef void (*t_host_dma_copy_d2s_cmodel)(void *, u64, u64, u32); + typedef void (*t_cmodel_api_poll)(int); + typedef u32 (*t_cmodel_get_chip_id)(void); + typedef u32 (*t_cmodel_share_reg_func)(); + typedef void (*t_cmodel_wait_)(void *, u64, u64, u32); + typedef void (*t_cmodel_wait_share_reg_equal)(u32, int, int, int, int); + typedef int (*t_cmodel_get_total_nodechip_num)(void); + typedef u64 (*t_cmodel_get_gmem_start_addr)(); + typedef int (*t_cmodel_get_last_func_id)(int); + + t_cmodel_get_gmem_start_addr cmodel_get_gmem_start_addr_; t_get_global_memaddr get_global_memaddr_; t_cmodel_init cmodel_init_; t_set_cur_nodechip_idx set_cur_nodechip_idx_; @@ -69,11 +94,21 @@ class bm_device { t_cmodel_deinit cmodel_deinit_; t_cmodel_get_share_memory_addr cmodel_get_share_memory_addr_; t_cmodel_write_share_reg cmodel_write_share_reg_; + t_cmodel_write_share_memory cmodel_write_share_memory_; t_cmodel_read_share_reg cmodel_read_share_reg_; t_host_dma_copy_d2s_cmodel host_dma_copy_d2s_cmodel_; t_host_dma_copy_s2d_cmodel host_dma_copy_s2d_cmodel_; t_cmodel_api_poll cmodel_api_poll_; - t_cmodel_get_chip_id get_chip_id_; + t_cmodel_api_poll cmodel_api_signal_; + t_cmodel_api_poll cmodel_api_signal_begin_; + t_cmodel_get_chip_id cmodel_get_chip_id_; + + t_cmodel_share_reg_func cmodel_share_reg_message_rp_; + t_cmodel_share_reg_func cmodel_share_reg_message_wp_; + t_cmodel_share_reg_func cmodel_share_reg_fw_status_; + t_cmodel_wait_share_reg_equal cmodel_wait_share_reg_equal_; + t_cmodel_get_total_nodechip_num cmodel_get_total_nodechip_num_; + t_cmodel_get_last_func_id cmodel_get_last_func_id; u64 global_gmem_size; u32 share_reg_message_wp; @@ -88,16 +123,16 @@ class bm_device { private: /* add lock */ - void _write_share_mem(u32 offset, u32 data); - void _write_share_reg(u32 idx, u32 data); - u32 _read_share_reg(u32 idx); + void _write_share_mem(u32 offset, u32 data, int core_idx); + void _write_share_reg(u32 idx, u32 data, int core_idx); + u32 _read_share_reg(u32 idx, int core_idx); - u32 _poll_message_fifo_cnt(); + u32 _poll_message_fifo_cnt(int core_idx); void copy_message_to_sharemem(const u32 *src_msg_buf, - u32 *wp, u32 size, u32 api_id); + u32 *wp, u32 size, u32 api_id, int core_idx); - bm_status_t bm_send_quit_message(); - void bm_wait_fwinit_done(); + bm_status_t bm_send_quit_message(int core_idx); + void bm_wait_fwinit_done(int core_idx); bm_status_t bm_alloc_arm_reserved(); void bm_free_arm_reserved(); @@ -105,37 +140,49 @@ class bm_device { void bm_free_instr_reserved(); bm_status_t bm_alloc_iommu_reserved(); void bm_free_iommu_reserved(); + bm_status_t bm_alloc_sys_tvgen_multi_core_reserved(); + void bm_free_sys_tvgen_multi_core_reserved(); bm_status_t bm_malloc_device_dword(bm_device_mem_t *pmem, int cnt); void bm_free_device(bm_device_mem_t mem); bm_status_t bm_init_l2_sram(); - thread_api_info *bm_get_thread_api_info(pthread_t thd_id); - bm_status_t bm_add_thread_api_info(pthread_t thd_id); - bm_status_t bm_remove_thread_api_info(pthread_t thd_id); + thread_api_info *bm_get_thread_api_info(pthread_t thd_id, int core_idx); + bm_status_t bm_add_thread_api_info(pthread_t thd_id, int core_idx); + bm_status_t bm_remove_thread_api_info(pthread_t thd_id, int core_idx); static void *bm_msg_done_poll(void *arg); - int dev_id; + int dev_id; + int core_num; // u64 device_mem_size; - bm_mem_pool device_mem_pool; - bm_device_mem_t instr_reserved_mem; - bm_device_mem_t arm_reserved_dev_mem; - bm_device_mem_t iommu_reserved_dev_mem; + bm_mem_pool device_mem_pool; + bm_device_mem_t instr_reserved_mem; + bm_device_mem_t arm_reserved_dev_mem; + bm_device_mem_t iommu_reserved_dev_mem; + bm_device_mem_t sys_tvgen_multi_core_reserved_dev_mem; /* arm reserved memory lock */ - pthread_mutex_t arm_reserved_lock; - - pthread_mutex_t api_lock; - pthread_t msg_poll_thread; - - std::map < pthread_t, thread_api_info> thread_api_table; - std::queue pending_api_queue; - u32 device_sync_last; - std::atomic device_sync_cpl; -}; - -class bm_device_manager { + pthread_mutex_t arm_reserved_lock; + + struct msg_done_poll_param_t + { + bm_device *dev; + int core_idx; + }; + pthread_t *msg_poll_threads; + msg_done_poll_param_t *msg_done_poll_params; + + pthread_mutex_t *api_locks; + std::map *thread_api_tables; + std::queue *pending_api_queues; + volatile u32 *device_sync_last; + std::atomic *device_sync_cpl; + }; + + class bm_device_manager + { public: bm_device *get_bm_device(int dev_id); + void free_bm_device(int dev_id); static bm_device_manager *get_dev_mgr(); static void destroy_dev_mgr(); @@ -146,18 +193,22 @@ class bm_device_manager { int dev_cnt; int max_dev_cnt; bm_device **bm_dev_list; + int *dev_user; static bm_device_manager *bm_dev_mgr; static pthread_mutex_t init_lock; -}; + }; -class bm_device_manager_control { + class bm_device_manager_control + { public: - bm_device_manager_control() { + bm_device_manager_control() + { } - ~ bm_device_manager_control() { - bm_device_manager::destroy_dev_mgr(); + ~bm_device_manager_control() + { + bm_device_manager::destroy_dev_mgr(); } -}; + }; #ifdef __cplusplus } diff --git a/bmlib/src/bmlib_internal.h b/bmlib/src/bmlib_internal.h index e6f61eb..55c4c38 100644 --- a/bmlib/src/bmlib_internal.h +++ b/bmlib/src/bmlib_internal.h @@ -15,6 +15,8 @@ //#include "..\..\common\bm1684\include_win\common_win.h" #else #include "linux/bmlib_ioctl.h" +#include +#include "rbtree.h" #endif #ifdef USING_CMODEL #include "bmlib_device.h" @@ -122,6 +124,8 @@ typedef struct bm_context { bm_cdma_iommu_mode cdma_iommu_mode; #endif bmlib_profile_t *profile; + struct rb_root root; + pthread_mutex_t mem_mutex; } bm_context_t, *bm_handle_t; DECL_EXPORT bm_status_t bm_send_api( @@ -130,6 +134,13 @@ DECL_EXPORT bm_status_t bm_send_api( const u8 *api, u32 size); +DECL_EXPORT bm_status_t bm_send_api_to_core( + bm_handle_t handle, + int api_id, + const u8 *api, + u32 size, + int core_id); + DECL_EXPORT bm_status_t bm_send_api_ext( bm_handle_t handle, int api_id, @@ -248,6 +259,20 @@ bm_status_t bm_get_device_time_us(bm_handle_t handle, void bm_enable_iommu(bm_handle_t handle); void bm_disable_iommu(bm_handle_t handle); +enum bm_rw_op { + BM_READ = 0, + BM_WRITE = 1, + BM_MALLOC = 2, + BM_FREE = 3, +}; + +struct bm_rw { + enum bm_rw_op op; + u64 paddr; + u32 value; + void *vaddr; +}; + struct bm_reg { int reg_addr; int reg_value; @@ -270,6 +295,8 @@ struct bm_card { bm_status_t bm_get_reg(bm_handle_t handle, struct bm_reg *p_reg); bm_status_t bm_set_reg(bm_handle_t handle, struct bm_reg *p_reg); +bm_status_t bm_rw_mix(bm_handle_t handle, struct bm_rw *reg); +bm_status_t bm_rw_host(bm_handle_t handle, struct bm_rw *reg); typedef struct bm_fw_desc { unsigned int *itcm_fw; diff --git a/bmlib/src/bmlib_md5.cpp b/bmlib/src/bmlib_md5.cpp index 8b1beb1..8e9d5de 100644 --- a/bmlib/src/bmlib_md5.cpp +++ b/bmlib/src/bmlib_md5.cpp @@ -178,13 +178,15 @@ void read_md5(unsigned char *file_path, unsigned char *md5sum) struct stat fileStat; unsigned int u32FileSize; unsigned char *file_buffer; - int i = 0; + ssize_t ret; fd = open((const char *)file_path, O_RDONLY); fstat(fd, &fileStat); u32FileSize = fileStat.st_size; file_buffer = (unsigned char *)malloc(u32FileSize); - read(fd, file_buffer, u32FileSize); + ret = read(fd, file_buffer, u32FileSize); + if (ret == -1) + printf("%s read %s failed\n", __func__, file_path); MD5_CTX md5; MD5Init(&md5); diff --git a/bmlib/src/bmlib_memory.cpp b/bmlib/src/bmlib_memory.cpp index e1bbee0..5136311 100644 --- a/bmlib/src/bmlib_memory.cpp +++ b/bmlib/src/bmlib_memory.cpp @@ -32,6 +32,7 @@ #include "bm_tv_gen_util.h" #endif #endif +#include "rbtree.h" #define BMLIB_MEMORY_LOG_TAG "bmlib_memory" @@ -43,10 +44,59 @@ } #endif +static bm_status_t buffer_add(bm_handle_t handle, + struct bm_mem_paddr *buffer) +{ + struct rb_node **p = &(handle->root.rb_node); + struct rb_node *parent = NULL; + struct bm_mem_paddr *entry; + long long result; + + while (*p) { + entry = container_of(*p, struct bm_mem_paddr, node); + result = buffer->paddr - entry->paddr; + parent = *p; + if (result < 0) + p = &((*p)->rb_left); + else if (result > 0) + p = &((*p)->rb_right); + else + return BM_ERR_FAILURE; + } + + rb_link_node(&buffer->node, parent, p); + rb_insert_color(&buffer->node, &handle->root); + + return BM_SUCCESS; +} + +static struct bm_mem_paddr *buffer_search(bm_handle_t handle, + unsigned long long paddr) +{ + struct rb_node *node = handle->root.rb_node; + long long result; + + while (node) { + struct bm_mem_paddr *data = container_of(node, struct bm_mem_paddr, node); + + result = paddr - data->paddr; + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return data; + } + + return NULL; +} + u32 bm_mem_get_size(struct bm_mem_desc mem) { return mem.size; } u64 sg_mem_get_size(struct sg_mem_desc mem) { return mem.size; } +u64 bm_mem_get_size_u64(struct bm_mem_desc_u64 mem) { return mem.size; } + static u64 bm_get_neuron_size(int n, int c, int h, int w) { u64 tensor_dim = (u64)n * (u64)c * (u64)h * (u64)w * FLOAT_SIZE; if (tensor_dim >= 0x400000000ULL) @@ -65,6 +115,15 @@ static u64 sg_get_neuron_size(u64 n, u64 c, u64 h, u64 w) { return tensor_dim; } +static u64 bm_get_neuron_size_u64(u64 n, u64 c, u64 h, u64 w) { + u64 tensor_dim = n * c * h * w * FLOAT_SIZE; + if (tensor_dim >= 0x400000000ULL) + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_WARNING, + "tensor_dim = 0x%llx is illegal %s: %s: %d\n", + tensor_dim, __FILE__, __func__, __LINE__); + return tensor_dim; +} + static u32 bm_get_coeff_size(int coeff_count) { return (coeff_count * FLOAT_SIZE); } @@ -73,6 +132,10 @@ static u64 sg_get_coeff_size(u64 coeff_count) { return (coeff_count * FLOAT_SIZE); } +static u64 bm_get_coeff_size_u64(u64 coeff_count) { + return (coeff_count * FLOAT_SIZE); +} + bm_mem_type_t bm_mem_get_type(struct bm_mem_desc mem) { return mem.flags.u.mem_type; } @@ -81,6 +144,10 @@ bm_mem_type_t sg_mem_get_type(struct sg_mem_desc mem) { return mem.flags.u.mem_type; } +bm_mem_type_t bm_mem_get_type_u64(struct bm_mem_desc_u64 mem) { + return mem.flags.u.mem_type; +} + bm_device_mem_t bm_mem_from_device(unsigned long long device_addr, unsigned int len) { bm_device_mem_t mem; @@ -101,6 +168,16 @@ sg_device_mem_t sg_mem_from_device(unsigned long long device_addr, return mem; } +bm_device_mem_u64_t bm_mem_from_device_u64(unsigned long long device_addr, + unsigned long long len) { + bm_device_mem_u64_t mem; + memset(&mem, 0x0, sizeof(bm_device_mem_u64_t)); + mem.u.device.device_addr = device_addr; + mem.flags.u.mem_type = BM_MEM_TYPE_DEVICE; + mem.size = len; + return mem; +} + u32 bm_mem_get_device_size(struct bm_mem_desc mem) { if (bm_mem_get_type(mem) != BM_MEM_TYPE_DEVICE) bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_WARNING, @@ -117,6 +194,14 @@ u64 sg_mem_get_device_size(struct sg_mem_desc mem) { return mem.size; } +u64 bm_mem_get_device_size_u64(struct bm_mem_desc_u64 mem) { + if (bm_mem_get_type_u64(mem) != BM_MEM_TYPE_DEVICE) + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_WARNING, + "mem type is illegal %s: %s: %d\n", + __FILE__, __func__, __LINE__); + return mem.size; +} + void bm_mem_set_device_size(struct bm_mem_desc* pmem, unsigned int size) { if (size % sizeof(float) != 0) bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_WARNING, @@ -133,6 +218,14 @@ void sg_mem_set_device_size(struct sg_mem_desc* pmem, unsigned long long size) { pmem->size = size; } +void bm_mem_set_device_size_u64(struct bm_mem_desc_u64* pmem, unsigned long long size) { + if (size % sizeof(float) != 0) + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_WARNING, + "size = 0x%x is illegal %s: %s: %d\n", size, + __FILE__, __func__, __LINE__); + pmem->size = size; +} + u64 bm_mem_get_device_addr(struct bm_mem_desc mem) { if (bm_mem_get_type(mem) != BM_MEM_TYPE_DEVICE) bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_WARNING, @@ -147,6 +240,13 @@ u64 sg_mem_get_device_addr(struct sg_mem_desc mem) { return mem.u.device.device_addr; } +u64 bm_mem_get_device_addr_u64(struct bm_mem_desc_u64 mem) { + if (bm_mem_get_type_u64(mem) != BM_MEM_TYPE_DEVICE) + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_WARNING, + "mem type is illegal %s: %s: %d\n", __FILE__, __func__, __LINE__); + return mem.u.device.device_addr; +} + void bm_mem_set_device_addr(struct bm_mem_desc* pmem, unsigned long long addr) { if (addr % sizeof(float) != 0) bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_WARNING, @@ -165,6 +265,15 @@ void sg_mem_set_device_addr(struct sg_mem_desc* pmem, unsigned long long addr) { pmem->u.device.device_addr = addr; } +void bm_mem_set_device_addr_u64(struct bm_mem_desc_u64* pmem, unsigned long long addr) { + if (addr % sizeof(float) != 0) + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_WARNING, + "addr = 0x%llx is illegal %s: %s: %d\n", addr, + __FILE__, __func__, __LINE__); + + pmem->u.device.device_addr = addr; +} + void bm_mem_set_system_addr(struct bm_mem_desc* pmem, void *addr) { if ((u64)addr % sizeof(float) != 0) bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_WARNING, @@ -183,6 +292,15 @@ void sg_mem_set_system_addr(struct sg_mem_desc* pmem, void *addr) { pmem->u.system.system_addr = addr; } +void bm_mem_set_system_addr_u64(struct bm_mem_desc_u64* pmem, void *addr) { + if ((u64)addr % sizeof(float) != 0) + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_WARNING, + "addr = 0x%llx is illegal %s: %s: %d\n", (u64)addr, + __FILE__, __func__, __LINE__); + + pmem->u.system.system_addr = addr; +} + void *bm_mem_get_system_addr(struct bm_mem_desc mem) { if (bm_mem_get_type(mem) != BM_MEM_TYPE_SYSTEM) bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_WARNING, @@ -199,6 +317,14 @@ void *sg_mem_get_system_addr(struct sg_mem_desc mem) { return mem.u.system.system_addr; } +void *bm_mem_get_system_addr_u64(struct bm_mem_desc_u64 mem) { + if (bm_mem_get_type_u64(mem) != BM_MEM_TYPE_SYSTEM) + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_WARNING, + "mem type is illegal %s: %s: %d\n", + __FILE__, __func__, __LINE__); + return mem.u.system.system_addr; +} + #ifndef USING_CMODEL bm_status_t bm_get_carveout_heap_id(bm_handle_t ctx) { #ifdef __linux__ @@ -350,7 +476,49 @@ static int sg_alloc_gmem(bm_handle_t ctx, sg_device_mem_t *pmem, int heap_id_mas ret = ioctl(ctx->ion_fd, ION_IOC_ALLOC, &alloc_data); if (ret == 0) { - ioctl(ctx->dev_fd, BMDEV_ALLOC_GMEM_ION, pmem); + ioctl(ctx->dev_fd, BMDEV_ALLOC_GMEM_ION_U64, pmem); + break; + } + } + } + } else +#endif + { + alloc_data.heap_id_mask = heap_id_mask; + ret = platform_ioctl(ctx, BMDEV_ALLOC_GMEM, &alloc_data); + pmem->flags.u.gmem_heapid = alloc_data.heap_id; + } + + if (ret) { + pmem->u.device.device_addr = BM_MEM_ADDR_NULL; + pmem->u.device.dmabuf_fd = -1; + return BM_ERR_FAILURE; + } + pmem->u.device.device_addr = alloc_data.paddr; + pmem->u.device.dmabuf_fd = alloc_data.fd; + bm_profile_record_mem_end(ctx, bm_mem_op_type_t::ALLOC, alloc_data.paddr, pmem->size); + return BM_SUCCESS; +} + +static int bm_alloc_gmem_u64(bm_handle_t ctx, bm_device_mem_u64_t *pmem, int heap_id_mask) { + int ret; + + struct ion_allocation_data alloc_data; + memset(&alloc_data, 0, sizeof(alloc_data)); + alloc_data.len = pmem->size; + alloc_data.flags = 0; + bm_profile_record_mem_begin(ctx); +#ifdef __linux__ + if (ctx->ion_fd) { + // try all heaps as heap_id_mask set + for (int i = 0; i < ctx->heap_cnt; i++) { + if (((heap_id_mask >> i) & 0x1) == 0x1) { + pmem->flags.u.gmem_heapid = ctx->carveout_heap_id[i]; + alloc_data.heap_id_mask = (1 << ctx->carveout_heap_id[i]); + ret = ioctl(ctx->ion_fd, ION_IOC_ALLOC, &alloc_data); + + if (ret == 0) { + ioctl(ctx->dev_fd, BMDEV_ALLOC_GMEM_ION_U64, pmem); break; } } @@ -405,7 +573,25 @@ static bm_status_t sg_free_gmem(bm_handle_t ctx, sg_device_mem_t *pmem) { } #endif bm_profile_record_mem_begin(ctx); - if (platform_ioctl(ctx, BMDEV_FREE_GMEM, pmem)) return BM_ERR_FAILURE; + if (platform_ioctl(ctx, BMDEV_FREE_GMEM_U64, pmem)) return BM_ERR_FAILURE; + bm_profile_record_mem_end(ctx, bm_mem_op_type_t::FREE, pmem->u.device.device_addr, pmem->size); + return BM_SUCCESS; +} + +static bm_status_t bm_free_gmem_u64(bm_handle_t ctx, bm_device_mem_u64_t *pmem) { + if (pmem->u.device.device_addr < 0x100000000 || pmem->u.device.device_addr > 0x500000000){ + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, "free gmem addr 0x%llx is invalide!\n",pmem->u.device.device_addr); + return BM_ERR_FAILURE; + } + #ifdef __linux__ + if (close(pmem->u.device.dmabuf_fd)) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "free gmem failed!\n"); + return BM_ERR_FAILURE; + } + #endif + bm_profile_record_mem_begin(ctx); + if (platform_ioctl(ctx, BMDEV_FREE_GMEM_U64, pmem)) return BM_ERR_FAILURE; bm_profile_record_mem_end(ctx, bm_mem_op_type_t::FREE, pmem->u.device.device_addr, pmem->size); return BM_SUCCESS; } @@ -474,6 +660,42 @@ static bm_status_t __alloc_sg_device_mem_raw(bm_handle_t ctx, #endif } +static bm_status_t __alloc_bm_device_mem_raw_u64(bm_handle_t ctx, + bm_device_mem_u64_t *pmem, + int heap_id_mask) { + if (ctx == nullptr) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "handle is nullptr %s: %s: %d\n", __FILE__, __func__, __LINE__); + return BM_ERR_FAILURE; + } +#ifdef USING_CMODEL + UNUSED(heap_id_mask); + u64 addr; + addr = ctx->bm_dev->bm_device_alloc_mem(pmem->size); + pmem->u.device.device_addr = addr; + if (addr == MEM_POOL_ADDR_INVALID) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "No memory in device mem\n"); + return BM_ERR_NOMEM; + } + return BM_SUCCESS; +#else + int ret = 0; + + ret = bm_alloc_gmem_u64(ctx, pmem, heap_id_mask); + if (ret) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "bm_alloc_gmem failed, dev_id = %d, size = 0x%x\n", + ctx->dev_id, pmem->size); + return BM_ERR_NOMEM; + } +#ifdef SOC_MODE + bm_mem_invalidate_device_mem_u64(ctx, pmem); +#endif + return BM_SUCCESS; +#endif +} + bm_status_t bm_malloc_neuron_device(bm_handle_t handle, bm_device_mem_t *pmem, int n, int c, int h, int w) { u32 size = 0; @@ -537,6 +759,35 @@ bm_status_t sg_malloc_neuron_device(bm_handle_t handle, sg_device_mem_t *pmem, return BM_SUCCESS; } +bm_status_t bm_malloc_neuron_device_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, + u64 n, u64 c, u64 h, u64 w) { + u32 size = 0; + u64 size_tmp = 0ULL; + int any_heap_mask = 0; + any_heap_mask = (2 << (ION_MAX_HEAP_CNT - 1)) - 1; + + if (handle == nullptr || pmem == nullptr) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "handle = 0x%p, or pmem = 0x%p is nullptr %s: %s: %d\n", handle, + pmem, __FILE__, __func__, __LINE__); + return BM_ERR_DEVNOTREADY; + } + + size_tmp = bm_get_neuron_size_u64(n, c, h, w); + + size = (u32)size_tmp; + pmem->flags.u.mem_type = BM_MEM_TYPE_DEVICE; + pmem->size = size; + BM_CHECK_RET(__alloc_bm_device_mem_raw_u64(handle, pmem, any_heap_mask)); + +#ifdef MM_DEBUG + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_INFO, + "%s, total_size %d, num = %d, addr = %lx\n", __func__, + size, n, pmem->u.device.device_addr); +#endif + return BM_SUCCESS; +} + bm_status_t bm_malloc_device_dword(bm_handle_t handle, bm_device_mem_t *pmem, int count) { int any_heap_mask = 0; @@ -588,6 +839,32 @@ bm_status_t sg_malloc_device_dword(bm_handle_t handle, sg_device_mem_t *pmem, return BM_SUCCESS; } +bm_status_t bm_malloc_device_dword_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, + u64 count) { + int any_heap_mask = 0; + any_heap_mask = (2 << (ION_MAX_HEAP_CNT - 1)) - 1; + + if (handle == nullptr || pmem == nullptr) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "handle = 0x%p, or pmem = 0x%p is nullptr %s: %s: %d\n", handle, + pmem, __FILE__, __func__, __LINE__); + return BM_ERR_DEVNOTREADY; + } + + u64 size = bm_get_coeff_size_u64(count); + + pmem->flags.u.mem_type = BM_MEM_TYPE_DEVICE; + pmem->size = size; + BM_CHECK_RET(__alloc_bm_device_mem_raw_u64(handle, pmem, any_heap_mask)); + +#ifdef MM_DEBUG + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_INFO, + "%s, size %d, addr = 0x%lx\n", __func__, size, + pmem->u.device.device_addr); +#endif + return BM_SUCCESS; +} + bm_status_t bm_malloc_device_byte(bm_handle_t handle, bm_device_mem_t *pmem, unsigned int size) { int any_heap_mask = 0; @@ -614,6 +891,36 @@ bm_status_t bm_malloc_device_byte(bm_handle_t handle, bm_device_mem_t *pmem, return BM_SUCCESS; } +bm_status_t bm_malloc_device_mem(bm_handle_t handle, unsigned long long *paddr, + int heap_id, unsigned long long size) { + int ret; + bm_device_mem_u64_t *dev_buffer; + struct bm_mem_paddr *bm_mem; + + bm_mem = (struct bm_mem_paddr *)malloc(sizeof(struct bm_mem_paddr)); + dev_buffer = (bm_device_mem_u64_t *)malloc(sizeof(bm_device_mem_u64_t)); + + ret = bm_malloc_device_byte_heap_u64(handle, dev_buffer, heap_id, size); + if (ret != BM_SUCCESS) { + printf("malloc device memory size = %llu failed, ret = %d\n", size, ret); + return BM_ERR_DEVNOTREADY; + } + + *paddr = bm_mem_get_device_addr_u64(*dev_buffer); + bm_mem->paddr = *paddr; + bm_mem->dev_buffer = dev_buffer; + + pthread_mutex_lock(&handle->mem_mutex); + ret = buffer_add(handle, bm_mem); + pthread_mutex_unlock(&handle->mem_mutex); + if (ret != BM_SUCCESS) { + printf("malloc device memory size = %llu failed, ret = %d\n", size, ret); + return BM_ERR_DEVNOTREADY; + } + + return BM_SUCCESS; +} + bm_status_t sg_malloc_device_byte(bm_handle_t handle, sg_device_mem_t *pmem, unsigned long long size) { int any_heap_mask = 0; @@ -640,8 +947,11 @@ bm_status_t sg_malloc_device_byte(bm_handle_t handle, sg_device_mem_t *pmem, return BM_SUCCESS; } -bm_status_t bm_malloc_device_byte_heap(bm_handle_t handle, bm_device_mem_t *pmem, - int heap_id, unsigned int size) { +bm_status_t bm_malloc_device_byte_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, + unsigned long long size) { + int any_heap_mask = 0; + any_heap_mask = (2 << (ION_MAX_HEAP_CNT - 1)) - 1; + if (handle == nullptr || pmem == nullptr) { bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, "handle = 0x%p, or pmem = 0x%p is nullptr %s: %s: %d\n", handle, @@ -658,12 +968,13 @@ bm_status_t bm_malloc_device_byte_heap(bm_handle_t handle, bm_device_mem_t *pmem } #endif pmem->size = size; - BM_CHECK_RET(__alloc_device_mem_raw(handle, pmem, 0x1 << heap_id)); + + BM_CHECK_RET(__alloc_bm_device_mem_raw_u64(handle, pmem, any_heap_mask)); return BM_SUCCESS; } -bm_status_t sg_malloc_device_byte_heap(bm_handle_t handle, sg_device_mem_t *pmem, - int heap_id, unsigned long long size) { +bm_status_t bm_malloc_device_byte_heap(bm_handle_t handle, bm_device_mem_t *pmem, + int heap_id, unsigned int size) { if (handle == nullptr || pmem == nullptr) { bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, "handle = 0x%p, or pmem = 0x%p is nullptr %s: %s: %d\n", handle, @@ -680,16 +991,16 @@ bm_status_t sg_malloc_device_byte_heap(bm_handle_t handle, sg_device_mem_t *pmem } #endif pmem->size = size; - BM_CHECK_RET(__alloc_sg_device_mem_raw(handle, pmem, 0x1 << heap_id)); + BM_CHECK_RET(__alloc_device_mem_raw(handle, pmem, 0x1 << heap_id)); return BM_SUCCESS; } -bm_status_t bm_malloc_device_byte_heap_mask(bm_handle_t handle, bm_device_mem_t *pmem, - int heap_id_mask, unsigned int size) { - if (handle == nullptr || pmem == nullptr || heap_id_mask == 0) { +bm_status_t sg_malloc_device_byte_heap(bm_handle_t handle, sg_device_mem_t *pmem, + int heap_id, unsigned long long size) { + if (handle == nullptr || pmem == nullptr) { bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, - "handle = 0x%p, or pmem = 0x%p is nullptr, or heap_id_mask = 0x%x, %s: %s: %d\n", handle, - pmem, heap_id_mask, __FILE__, __func__, __LINE__); + "handle = 0x%p, or pmem = 0x%p is nullptr %s: %s: %d\n", handle, + pmem, __FILE__, __func__, __LINE__); return BM_ERR_DEVNOTREADY; } @@ -702,11 +1013,55 @@ bm_status_t bm_malloc_device_byte_heap_mask(bm_handle_t handle, bm_device_mem_t } #endif pmem->size = size; - BM_CHECK_RET(__alloc_device_mem_raw(handle, pmem, heap_id_mask)); + BM_CHECK_RET(__alloc_sg_device_mem_raw(handle, pmem, 0x1 << heap_id)); return BM_SUCCESS; } -bm_status_t sg_malloc_device_byte_heap_mask(bm_handle_t handle, sg_device_mem_t *pmem, +bm_status_t bm_malloc_device_byte_heap_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, + int heap_id, unsigned long long size) { + if (handle == nullptr || pmem == nullptr) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "handle = 0x%p, or pmem = 0x%p is nullptr %s: %s: %d\n", handle, + pmem, __FILE__, __func__, __LINE__); + return BM_ERR_DEVNOTREADY; + } + + pmem->flags.u.mem_type = BM_MEM_TYPE_DEVICE; + +#ifndef USING_CMODEL + if (handle->misc_info.chipid == 0x1682) { + // keep 4byte aligned + size = ((size + FLOAT_SIZE - 1) / FLOAT_SIZE) * FLOAT_SIZE; + } +#endif + pmem->size = size; + BM_CHECK_RET(__alloc_bm_device_mem_raw_u64(handle, pmem, 0x1 << heap_id)); + return BM_SUCCESS; +} + +bm_status_t bm_malloc_device_byte_heap_mask(bm_handle_t handle, bm_device_mem_t *pmem, + int heap_id_mask, unsigned int size) { + if (handle == nullptr || pmem == nullptr || heap_id_mask == 0) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "handle = 0x%p, or pmem = 0x%p is nullptr, or heap_id_mask = 0x%x, %s: %s: %d\n", handle, + pmem, heap_id_mask, __FILE__, __func__, __LINE__); + return BM_ERR_DEVNOTREADY; + } + + pmem->flags.u.mem_type = BM_MEM_TYPE_DEVICE; + +#ifndef USING_CMODEL + if (handle->misc_info.chipid == 0x1682) { + // keep 4byte aligned + size = ((size + FLOAT_SIZE - 1) / FLOAT_SIZE) * FLOAT_SIZE; + } +#endif + pmem->size = size; + BM_CHECK_RET(__alloc_device_mem_raw(handle, pmem, heap_id_mask)); + return BM_SUCCESS; +} + +bm_status_t sg_malloc_device_byte_heap_mask(bm_handle_t handle, sg_device_mem_t *pmem, int heap_id_mask, unsigned long long size) { if (handle == nullptr || pmem == nullptr || heap_id_mask == 0) { bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, @@ -728,6 +1083,28 @@ bm_status_t sg_malloc_device_byte_heap_mask(bm_handle_t handle, sg_device_mem_t return BM_SUCCESS; } +bm_status_t bm_malloc_device_byte_heap_mask_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, + int heap_id_mask, unsigned long long size) { + if (handle == nullptr || pmem == nullptr || heap_id_mask == 0) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "handle = 0x%p, or pmem = 0x%p is nullptr, or heap_id_mask = 0x%x, %s: %s: %d\n", handle, + pmem, heap_id_mask, __FILE__, __func__, __LINE__); + return BM_ERR_DEVNOTREADY; + } + + pmem->flags.u.mem_type = BM_MEM_TYPE_DEVICE; + +#ifndef USING_CMODEL + if (handle->misc_info.chipid == 0x1682) { + // keep 4byte aligned + size = ((size + FLOAT_SIZE - 1) / FLOAT_SIZE) * FLOAT_SIZE; + } +#endif + pmem->size = size; + BM_CHECK_RET(__alloc_bm_device_mem_raw_u64(handle, pmem, heap_id_mask)); + return BM_SUCCESS; +} + void bm_free_device(bm_handle_t ctx, bm_device_mem_t mem) { if (ctx == nullptr) { bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, @@ -752,6 +1129,22 @@ void bm_free_device(bm_handle_t ctx, bm_device_mem_t mem) { #endif } +void bm_free_device_mem(bm_handle_t ctx, unsigned long long paddr) { + + struct bm_mem_paddr *bm_mem; + bm_device_mem_u64_t mem; + + pthread_mutex_lock(&ctx->mem_mutex); + bm_mem = buffer_search(ctx, paddr); + mem = *(bm_mem->dev_buffer); + rb_erase(&bm_mem->node, &ctx->root); + pthread_mutex_unlock(&ctx->mem_mutex); + + free(bm_mem->dev_buffer); + free(bm_mem); + bm_free_device_u64(ctx, mem); +} + void sg_free_device(bm_handle_t ctx, sg_device_mem_t mem) { if (ctx == nullptr) { bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, @@ -776,6 +1169,30 @@ void sg_free_device(bm_handle_t ctx, sg_device_mem_t mem) { #endif } +void bm_free_device_u64(bm_handle_t ctx, bm_device_mem_u64_t mem) { + if (ctx == nullptr) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "handle is nullptr %s: %s: %d\n", __FILE__, __func__, __LINE__); + return; + } + if (bm_mem_get_type_u64(mem) != BM_MEM_TYPE_DEVICE) + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_WARNING, + "mem type is illegal %s: %s: %d\n", __FILE__, __func__, __LINE__); + +#ifdef MM_DEBUG + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_INFO, + "%s, type %d, size %d, addr = 0x%llx\n", __func__, + bm_mem_get_type_u64(mem), bm_mem_get_size_u64(mem), + bm_mem_get_device_addr_u64(mem)); +#endif + +#ifdef USING_CMODEL + ctx->bm_dev->bm_device_free_mem(bm_mem_get_device_addr_u64(mem)); +#else + bm_free_gmem_u64(ctx, &mem); +#endif +} + void bm_set_device_mem(bm_device_mem_t *pmem, unsigned int size, u64 addr) { pmem->u.device.device_addr = addr; pmem->flags.u.mem_type = BM_MEM_TYPE_DEVICE; @@ -788,6 +1205,12 @@ void sg_set_device_mem(sg_device_mem_t *pmem, unsigned long long size, u64 addr) pmem->size = size; } +void bm_set_device_mem_u64(bm_device_mem_u64_t *pmem, unsigned long long size, u64 addr) { + pmem->u.device.device_addr = addr; + pmem->flags.u.mem_type = BM_MEM_TYPE_DEVICE; + pmem->size = size; +} + static bool bm_device_mem_page_aligned(bm_device_mem_t mem) { u64 device_mem_addr = bm_mem_get_device_addr(mem); if ((device_mem_addr & (PAGE_SIZE - 1)) == 0) { @@ -806,6 +1229,15 @@ static bool sg_device_mem_page_aligned(sg_device_mem_t mem) { } } +static bool bm_device_mem_page_aligned_u64(bm_device_mem_u64_t mem) { + u64 device_mem_addr = bm_mem_get_device_addr_u64(mem); + if ((device_mem_addr & (PAGE_SIZE - 1)) == 0) { + return true; + } else { + return false; + } +} + static bool bm_device_mem_range_valid(bm_handle_t handle, bm_device_mem_t mem) { #ifdef USING_CMODEL UNUSED(handle); @@ -872,6 +1304,39 @@ static bool sg_device_mem_range_valid(bm_handle_t handle, sg_device_mem_t mem) { return true; } +static bool bm_device_mem_range_valid_u64(bm_handle_t handle, bm_device_mem_u64_t mem) { +#ifdef USING_CMODEL + UNUSED(handle); + UNUSED(mem); +#else + u64 saddr = bm_mem_get_device_addr_u64(mem); + u64 eaddr = bm_mem_get_size_u64(mem) + saddr; + + if (handle->misc_info.chipid == 0x1684 || handle->misc_info.chipid == 0x1686) { + if (((saddr >= 0x100000000 && saddr <= 0x4ffffffff) || (saddr >= 0x0 && saddr <= 0x103fffff)) + && ((eaddr >= 0x100000000 && eaddr <= 0x500000000) || (eaddr >= 0x0 && eaddr <= 0x10400000))) { + return true; + } else { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "%s saddr=0x%llx eaddr=0x%llx out of range\n", __func__, saddr, eaddr); + return false; + } + } + + if (handle->misc_info.chipid == 0x1682) { + if (saddr >= 0x100000000 && saddr <= 0x2ffffffff + && eaddr >= 0x100000000 && eaddr <= 0x300000000) { + return true; + } else { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "%s saddr=0x%llx eaddr=0x%llx out of range\n", __func__, saddr, eaddr); + return false; + } + } +#endif + return true; +} + bm_status_t bm_get_gmem_heap_id(bm_handle_t handle, bm_device_mem_t *pmem, unsigned int *heapid) { unsigned int val = 0; @@ -940,6 +1405,40 @@ bm_status_t sg_get_gmem_heap_id(bm_handle_t handle, sg_device_mem_t *pmem, unsig return BM_SUCCESS; } +bm_status_t bm_get_gmem_heap_id_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, unsigned int *heapid) { + unsigned int val = 0; + + if (!handle || !pmem || !heapid) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "nullptr %s: %s: %d\n", __FILE__, __func__, __LINE__); + return BM_ERR_PARAM; + } + + if (bm_mem_get_type_u64(*pmem) != BM_MEM_TYPE_DEVICE) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "mem type is illegal %s: %s: %d\n", __FILE__, __func__, __LINE__); + return BM_ERR_FAILURE; + } + + if (!bm_device_mem_range_valid_u64(handle, *pmem)) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "mem range is illegal %s: %s: %d\n", __FILE__, __func__, __LINE__); + return BM_ERR_PARAM; + } + +#ifndef USING_CMODEL + val = pmem->flags.u.gmem_heapid; + if (val > ION_MAX_HEAP_CNT) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "heap id is illegal %s: %s: %d\n", __FILE__, __func__, __LINE__); + return BM_ERR_FAILURE; + } +#endif + + *heapid = val; + return BM_SUCCESS; +} + bm_status_t bm_get_gmem_total_heap_num(bm_handle_t handle, unsigned int *heap_num) { #ifdef USING_CMODEL UNUSED(handle); @@ -1090,6 +1589,48 @@ bm_status_t sg_mem_mmap_device_mem(bm_handle_t handle, sg_device_mem_t *dmem, return BM_SUCCESS; } +bm_status_t bm_mem_mmap_device_mem_u64(bm_handle_t handle, bm_device_mem_u64_t *dmem, + u64 *vmem) { +#ifndef USING_CMODEL + void *ret = 0; + + if (handle->misc_info.pcie_soc_mode == 0) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "bmlib not support mmap in pcie mode\n"); + return BM_ERR_FAILURE; + } +#ifdef __linux__ + if (!bm_device_mem_page_aligned_u64(*dmem)) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "bm_mem_mmap_device_mem device_mem_addr = 0x%llx is illegal\n", + bm_mem_get_device_addr_u64(*dmem)); + return BM_ERR_PARAM; + } + + if (!bm_device_mem_range_valid_u64(handle, *dmem)) { + return BM_ERR_PARAM; + } + + unsigned long long size = bm_mem_get_device_size_u64(*dmem); + unsigned long long aligned_size = (size + PAGE_SIZE - 1) & (~(PAGE_SIZE - 1)); + + ret = mmap(0, aligned_size, PROT_READ | PROT_WRITE, MAP_SHARED, + handle->dev_fd, bm_mem_get_device_addr_u64(*dmem)); + if (MAP_FAILED != ret) { + *vmem = (u64)ret; + return BM_SUCCESS; + } else { + return BM_ERR_FAILURE; + } + #endif +#else + //handle->bm_dev->get_global_memaddr_(handle->dev_id); + *vmem = (u64)((u8*)handle->bm_dev->get_global_memaddr_(handle->dev_id) + + bm_mem_get_device_addr_u64(*dmem) - handle->bm_dev->cmodel_get_gmem_start_addr_()); +#endif +return BM_SUCCESS; +} + bm_status_t bm_mem_mmap_device_mem_no_cache(bm_handle_t handle, bm_device_mem_t *dmem, u64 *vmem) { @@ -1188,6 +1729,54 @@ bm_status_t sg_mem_mmap_device_mem_no_cache(bm_handle_t handle, return BM_SUCCESS; } +bm_status_t bm_mem_mmap_device_mem_no_cache_u64(bm_handle_t handle, + bm_device_mem_u64_t *dmem, + u64 *vmem) { + +#ifndef USING_CMODEL + void *ret = 0; + + if (handle->misc_info.pcie_soc_mode == 0) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "bmlib not support mmap in pcie mode\n"); + return BM_ERR_FAILURE; + } +#ifdef __linux__ + if (!bm_device_mem_page_aligned_u64(*dmem)) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "bm_mem_mmap_device_mem device_mem_addr = 0x%llx is illegal\n", + bm_mem_get_device_addr_u64(*dmem)); + return BM_ERR_PARAM; + } + + if (!bm_device_mem_range_valid_u64(handle, *dmem)) { + return BM_ERR_PARAM; + } + + unsigned long long size = bm_mem_get_device_size_u64(*dmem); + unsigned long long aligned_size = (size + PAGE_SIZE - 1) & (~(PAGE_SIZE - 1)); + + /*0x1000000000 is used to set the flag + in driver bmdev_mmap function to open the mmap with no cache*/ + ret = mmap(0, aligned_size, PROT_READ | PROT_WRITE, MAP_SHARED, + handle->dev_fd, bm_mem_get_device_addr_u64(*dmem) | 0x1000000000); + + if (MAP_FAILED != ret) { + *vmem = (u64)ret; + return BM_SUCCESS; + } else { + return BM_ERR_FAILURE; + } + + #endif +#else + //handle->bm_dev->get_global_memaddr_(handle->dev_id); + *vmem = (u64)((u8*)handle->bm_dev->get_global_memaddr_(handle->dev_id) + + bm_mem_get_device_addr_u64(*dmem) - handle->bm_dev->cmodel_get_gmem_start_addr_()); +#endif +return BM_SUCCESS; +} + /* use his funtion to make cache of part of the device memory invalid */ @@ -1249,29 +1838,58 @@ bm_status_t sg_mem_invalidate_partial_device_mem(bm_handle_t handle, return BM_SUCCESS; } -bm_status_t bm_mem_vir_to_phy(bm_handle_t handle, unsigned long long vir_addr, unsigned long long *phy_addr) { +bm_status_t bm_mem_invalidate_partial_device_mem_u64(bm_handle_t handle, + bm_device_mem_u64_t *dmem, + u64 offset, u64 len) { #ifndef USING_CMODEL - struct bm_gmem_addr addr; - addr.vir_addr = vir_addr; - if (handle->misc_info.pcie_soc_mode == 0) { bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, - "bmlib not support bm_mem_vir_to_phy in pcie mode\n"); + "bmlib not support invalidate parital mem in pcie mode\n"); return BM_ERR_FAILURE; } - if (0 != platform_ioctl(handle, BMDEV_GMEM_ADDR, &addr)) { - bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, - "bm_mem_vir_to_phy fail vir add = 0x%llx\n", vir_addr); + if (!bm_device_mem_range_valid_u64(handle, *dmem)) { return BM_ERR_PARAM; - } - *phy_addr = addr.phy_addr; + u64 device_mem_addr = bm_mem_get_device_addr_u64(*dmem); + u64 para = (((device_mem_addr + offset)>>6) << 32) + len + + ((device_mem_addr + offset)&63); + bm_profile_record_mem_begin(handle); + if (0 != platform_ioctl(handle, BMDEV_INVALIDATE_GMEM, ¶)) return BM_ERR_FAILURE; + bm_profile_record_mem_end(handle, bm_mem_op_type_t::INVALIDATE, device_mem_addr+offset, len); #else UNUSED(handle); - UNUSED(vir_addr); - UNUSED(phy_addr); + UNUSED(dmem); + UNUSED(offset); + UNUSED(len); +#endif + return BM_SUCCESS; +} + +bm_status_t bm_mem_vir_to_phy(bm_handle_t handle, unsigned long long vir_addr, unsigned long long *phy_addr) { +#ifndef USING_CMODEL + struct bm_gmem_addr addr; + addr.vir_addr = vir_addr; + + if (handle->misc_info.pcie_soc_mode == 0) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "bmlib not support bm_mem_vir_to_phy in pcie mode\n"); + return BM_ERR_FAILURE; + } + + if (0 != platform_ioctl(handle, BMDEV_GMEM_ADDR, &addr)) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "bm_mem_vir_to_phy fail vir add = 0x%llx\n", vir_addr); + return BM_ERR_PARAM; + + } + + *phy_addr = addr.phy_addr; +#else + UNUSED(handle); + UNUSED(vir_addr); + UNUSED(phy_addr); #endif return BM_SUCCESS; } @@ -1305,6 +1923,19 @@ bm_status_t sg_mem_invalidate_device_mem(bm_handle_t handle, sg_mem_get_device_size(*dmem)); } +bm_status_t bm_mem_invalidate_device_mem_u64(bm_handle_t handle, + bm_device_mem_u64_t *dmem) { +#ifndef USING_CMODEL + if (handle->misc_info.pcie_soc_mode == 0) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "bmlib not support invalidate mem in pcie mode\n"); + return BM_ERR_FAILURE; + } +#endif + return bm_mem_invalidate_partial_device_mem_u64(handle, dmem, 0, + bm_mem_get_device_size_u64(*dmem)); +} + /* use his funtion to flush part of device mem data to real memory currently, the speed of mmecpy_s2d is not so slow, this function may not be @@ -1368,6 +1999,35 @@ bm_status_t sg_mem_flush_partial_device_mem(bm_handle_t handle, return BM_SUCCESS; } +bm_status_t bm_mem_flush_partial_device_mem_u64(bm_handle_t handle, + bm_device_mem_u64_t *dmem, + u64 offset, u64 len) { +#ifndef USING_CMODEL + if (handle->misc_info.pcie_soc_mode == 0) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "bmlib not support flush parital mem in pcie mode\n"); + return BM_ERR_FAILURE; + } + + if (!bm_device_mem_range_valid_u64(handle, *dmem)) { + return BM_ERR_PARAM; + } + + u64 device_mem_addr = bm_mem_get_device_addr_u64(*dmem); + u64 para = (((device_mem_addr + (u64)offset)>>6) << 32) + len + + ((device_mem_addr + offset)&63); + bm_profile_record_mem_begin(handle); + if (0 != platform_ioctl(handle, BMDEV_FLUSH_GMEM, ¶)) return BM_ERR_FAILURE; + bm_profile_record_mem_end(handle, bm_mem_op_type_t::FLUSH, device_mem_addr+offset, len); +#else + UNUSED(handle); + UNUSED(dmem); + UNUSED(offset); + UNUSED(len); +#endif + return BM_SUCCESS; +} + /* use his funtion to flush data to real memory currently, the speed of mmecpy_s2d is not so slow, this function may not be @@ -1397,6 +2057,18 @@ bm_status_t sg_mem_flush_device_mem(bm_handle_t handle, sg_device_mem_t *dmem) { sg_mem_get_device_size(*dmem)); } +bm_status_t bm_mem_flush_device_mem_u64(bm_handle_t handle, bm_device_mem_u64_t *dmem) { +#ifndef USING_CMODEL + if (handle->misc_info.pcie_soc_mode == 0) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "bmlib not support flush mem in pcie mode\n"); + return BM_ERR_FAILURE; + } +#endif + return bm_mem_flush_partial_device_mem_u64(handle, dmem, 0, + bm_mem_get_device_size_u64(*dmem)); +} + /* use this function to unmap device memory in user space we will unmap the page aligned size @@ -1439,6 +2111,25 @@ bm_status_t sg_mem_unmap_device_mem(bm_handle_t handle, void *vmem, u64 size) { return BM_SUCCESS; } +bm_status_t bm_mem_unmap_device_mem_u64(bm_handle_t handle, void *vmem, u64 size) { +#ifndef USING_CMODEL + if (handle->misc_info.pcie_soc_mode == 0) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "bmlib not support unmap in pcie mode\n"); + return BM_ERR_FAILURE; + } +#ifdef __linux__ + unsigned long long aligned_size = (size + PAGE_SIZE - 1) & (~(PAGE_SIZE - 1)); + (void)munmap(vmem, aligned_size); +#endif +#else + UNUSED(handle); + UNUSED(vmem); + UNUSED(size); +#endif + return BM_SUCCESS; +} + bm_status_t bm_memcpy_s2d(bm_handle_t handle, bm_device_mem_t dst, void *src) { #ifdef USING_CMODEL return handle->bm_dev->bm_device_memcpy_s2d(dst, src); @@ -1585,6 +2276,65 @@ bm_status_t sg_memcpy_s2d(bm_handle_t handle, sg_device_mem_t dst, void *src) { #endif } +bm_status_t bm_memcpy_s2d_u64(bm_handle_t handle, bm_device_mem_u64_t dst, void *src) { +#ifdef USING_CMODEL + return handle->bm_dev->bm_device_memcpy_s2d_u64(dst, src); +#else + u64 size; + int trans_size = 0x10000000;//256MB + int tran_over = 0; + + if (handle == nullptr) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "handle is nullptr %s: %s: %d\n", __FILE__, __func__, __LINE__); + return BM_ERR_DEVNOTREADY; + } + + if (!bm_device_mem_range_valid_u64(handle, dst)) { + return BM_ERR_PARAM; + } + + size = bm_mem_get_size_u64(dst); + + for(int i=0; tran_over == 0; i++) { + bm_memcpy_info_t bm_mem_s2d; +#ifdef __linux__ + #ifdef USING_INT_CDMA + bm_mem_s2d.intr = true; + #else + bm_mem_s2d.intr = false; + #endif + bm_mem_s2d.host_addr = (void *)((u64)src + i * trans_size); +#else + bm_mem_s2d.intr = 1; + bm_mem_s2d.host_addr = (u64)src + i * trans_size; +#endif + bm_mem_s2d.device_addr = bm_mem_get_device_addr_u64(dst) + i * trans_size; + if(size > trans_size) { + bm_mem_s2d.size = trans_size; + size -= trans_size; + } else { + bm_mem_s2d.size = size; + tran_over = 1; + } + + bm_mem_s2d.dir = HOST2CHIP; + bm_mem_s2d.src_device_addr = 0; + bm_mem_s2d.cdma_iommu_mode = handle->cdma_iommu_mode; + + union { void* ptr; u64 val; } ptr_to_u64; + ptr_to_u64.ptr = (void *)((u64)src + i * trans_size); + bm_profile_record_memcpy_begin(handle); + auto res = platform_ioctl(handle, BMDEV_MEMCPY, &bm_mem_s2d); + bm_profile_record_memcpy_end(handle, ptr_to_u64.val, bm_mem_s2d.device_addr, bm_mem_s2d.size, bm_mem_s2d.dir); + if (0 != res) + return BM_ERR_FAILURE; + } + + return BM_SUCCESS; +#endif +} + bm_status_t bm_memcpy_s2d_poll(bm_handle_t handle, bm_device_mem_t dst, void * src) { @@ -1701,6 +2451,138 @@ bm_status_t sg_memcpy_s2d_poll(bm_handle_t handle, #endif } +bm_status_t bm_memcpy_s2d_poll_u64(bm_handle_t handle, + bm_device_mem_u64_t dst, + void * src) { +#ifdef USING_CMODEL + return handle->bm_dev->bm_device_memcpy_s2d_u64(dst, src); +#else + u64 size; + int trans_size = 0x10000000;//256MB + int tran_over = 0; + + if (handle == nullptr) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, + BMLIB_LOG_ERROR, + "handle is nullptr %s: %s: %d\n", + __FILE__, + __func__, + __LINE__); + return BM_ERR_DEVNOTREADY; + } + + if (!bm_device_mem_range_valid_u64(handle, dst)) { + return BM_ERR_PARAM; + } + + size = bm_mem_get_size_u64(dst); + bm_memcpy_info_t bm_mem_s2d; + + for(int i=0; tran_over == 0; i++) { +#ifdef __linux__ + bm_mem_s2d.intr = false; + bm_mem_s2d.host_addr = (void *)((u64)src + i * trans_size); +#else + bm_mem_s2d.intr = true; + bm_mem_s2d.host_addr = (u64)src + i * trans_size; +#endif + bm_mem_s2d.device_addr = bm_mem_get_device_addr_u64(dst) + i * trans_size; + if(size > trans_size) { + bm_mem_s2d.size = trans_size; + size -= trans_size; + } else { + bm_mem_s2d.size = size; + tran_over = 1; + } + bm_mem_s2d.dir = HOST2CHIP; + bm_mem_s2d.src_device_addr = 0; + bm_mem_s2d.cdma_iommu_mode = handle->cdma_iommu_mode; + + union { + void *ptr; + u64 val; + } ptr_to_u64; + ptr_to_u64.ptr = (void *)((u64)src + i * trans_size); + bm_profile_record_memcpy_begin(handle); + auto res = platform_ioctl(handle, BMDEV_MEMCPY, &bm_mem_s2d); + bm_profile_record_memcpy_end(handle, + ptr_to_u64.val, + bm_mem_s2d.device_addr, + bm_mem_s2d.size, + bm_mem_s2d.dir); + if (0 != res) + return BM_ERR_FAILURE; + } + return BM_SUCCESS; +#endif +} + +bm_status_t bm_smmu_s2d_poll(bm_handle_t handle, + bm_device_mem_t dst, + void * src) { +#ifdef USING_CMODEL + return BM_SUCCESS; +#else + u32 size; + int trans_size = 0x800000;//8MB + int tran_over = 0; + + if (handle == nullptr) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, + BMLIB_LOG_ERROR, + "handle is nullptr %s: %s: %d\n", + __FILE__, + __func__, + __LINE__); + return BM_ERR_DEVNOTREADY; + } + + if (!bm_device_mem_range_valid(handle, dst)) { + return BM_ERR_PARAM; + } + + size = bm_mem_get_size(dst); + bm_memcpy_info_t bm_mem_s2d; + + for(int i=0; tran_over == 0; i++) { +#ifdef __linux__ + bm_mem_s2d.intr = false; + bm_mem_s2d.host_addr = (void *)((u64)src + i * trans_size); +#else + bm_mem_s2d.intr = true; + bm_mem_s2d.host_addr = (u64)src + i * trans_size; +#endif + bm_mem_s2d.device_addr = bm_mem_get_device_addr(dst) + i * trans_size; + if(size > trans_size) { + bm_mem_s2d.size = trans_size; + size -= trans_size; + } else { + bm_mem_s2d.size = size; + tran_over = 1; + } + bm_mem_s2d.dir = HOST2CHIP; + bm_mem_s2d.src_device_addr = 0; + bm_mem_s2d.cdma_iommu_mode = handle->cdma_iommu_mode; + + union { + void *ptr; + u64 val; + } ptr_to_u64; + ptr_to_u64.ptr = (void *)((u64)src + i * trans_size); + bm_profile_record_memcpy_begin(handle); + auto res = platform_ioctl(handle, BMDEV_MEMCPY, &bm_mem_s2d); + bm_profile_record_memcpy_end(handle, + ptr_to_u64.val, + bm_mem_s2d.device_addr, + bm_mem_s2d.size, + bm_mem_s2d.dir); + if (0 != res) + return BM_ERR_FAILURE; + } + return BM_SUCCESS; +#endif +} + bm_status_t bm_memcpy_s2d_partial_offset(bm_handle_t handle, bm_device_mem_t dst, void *src, unsigned int size, @@ -1747,6 +2629,29 @@ bm_status_t sg_memcpy_s2d_partial_offset(bm_handle_t handle, return sg_memcpy_s2d(handle, target_dev_mem, src); } +bm_status_t bm_memcpy_s2d_partial_offset_u64(bm_handle_t handle, + bm_device_mem_u64_t dst, void *src, + u64 size, + u64 offset) { + unsigned long long old_devmem_size = bm_mem_get_device_size_u64(dst); +#ifdef USING_CMODEL + ASSERT(old_devmem_size >= offset + size); +#else + if (old_devmem_size < offset + size) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "new device addr exceeds old device addr %s: %s: %d\n", + __FILE__, __func__, __LINE__); + return BM_ERR_PARAM; + } +#endif + u64 dev_mem_addr = bm_mem_get_device_addr_u64(dst); + + bm_device_mem_u64_t target_dev_mem = + bm_mem_from_device_u64(dev_mem_addr + offset, size); + + return bm_memcpy_s2d_u64(handle, target_dev_mem, src); +} + bm_status_t bm_memcpy_s2d_partial(bm_handle_t handle, bm_device_mem_t dst, void *src, unsigned int size) { return bm_memcpy_s2d_partial_offset(handle, dst, src, size, 0); @@ -1757,6 +2662,11 @@ bm_status_t sg_memcpy_s2d_partial(bm_handle_t handle, sg_device_mem_t dst, return sg_memcpy_s2d_partial_offset(handle, dst, src, size, 0); } +bm_status_t bm_memcpy_s2d_partial_u64(bm_handle_t handle, bm_device_mem_u64_t dst, + void *src, u64 size) { + return bm_memcpy_s2d_partial_offset_u64(handle, dst, src, size, 0); +} + bm_status_t bm_memcpy_d2s_normal(bm_handle_t handle, void *dst, bm_device_mem_t src) { #ifndef USING_CMODEL bm_memcpy_info_t bm_mem_d2s; @@ -1815,7 +2725,57 @@ bm_status_t sg_memcpy_d2s_normal(bm_handle_t handle, void *dst, sg_device_mem_t bm_mem_d2s.host_addr = (u64)dst + i*trans_size; #endif - bm_mem_d2s.device_addr = sg_mem_get_device_addr(src) + i * trans_size; + bm_mem_d2s.device_addr = sg_mem_get_device_addr(src) + i * trans_size; + if (size > trans_size) { + bm_mem_d2s.size = trans_size; + size -= trans_size; + } else { + bm_mem_d2s.size = size; + tran_over = 1; + } + + bm_mem_d2s.dir = CHIP2HOST; + bm_mem_d2s.src_device_addr = 0; + bm_mem_d2s.cdma_iommu_mode = handle->cdma_iommu_mode; + + union { void* ptr; u64 val; } ptr_to_u64; + ptr_to_u64.ptr = (void *)((u64)dst + i*trans_size); + bm_profile_record_memcpy_begin(handle); + auto res = platform_ioctl(handle, BMDEV_MEMCPY, &bm_mem_d2s); + bm_profile_record_memcpy_end(handle, bm_mem_d2s.device_addr, ptr_to_u64.val, bm_mem_d2s.size, bm_mem_d2s.dir); + if(0 != res) return BM_ERR_FAILURE; + } +#else + UNUSED(handle); + UNUSED(dst); + UNUSED(src); +#endif + return BM_SUCCESS; +} + +bm_status_t bm_memcpy_d2s_normal_u64(bm_handle_t handle, void *dst, bm_device_mem_u64_t src) { +#ifndef USING_CMODEL + bm_memcpy_info_t bm_mem_d2s; + u64 size; + int trans_size = 0x10000000;//256MB + int tran_over = 0; + + size = bm_mem_get_size_u64(src); + + for(int i=0; tran_over == 0; i++) { + #ifdef __linux__ +#ifdef USING_INT_CDMA + bm_mem_d2s.intr = true; +#else + bm_mem_d2s.intr = false; +#endif + bm_mem_d2s.host_addr = (void *)((u64)dst + i*trans_size); +#else + bm_mem_d2s.intr = 1; + bm_mem_d2s.host_addr = (u64)dst + i*trans_size; + #endif + + bm_mem_d2s.device_addr = bm_mem_get_device_addr_u64(src) + i * trans_size; if (size > trans_size) { bm_mem_d2s.size = trans_size; size -= trans_size; @@ -1911,6 +2871,40 @@ bm_status_t sg_memcpy_d2s_fast(bm_handle_t handle, void *dst, sg_device_mem_t sr return BM_SUCCESS; } +bm_status_t bm_memcpy_d2s_fast_u64(bm_handle_t handle, void *dst, bm_device_mem_u64_t src) { +#ifndef USING_CMODEL + u64 src_vaddr = 0; + bm_status_t ret; + if (handle->misc_info.pcie_soc_mode == 0) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "bmlib not support d2s fast in pcie mode\n"); + return BM_ERR_FAILURE; + } + ret = bm_mem_mmap_device_mem_u64(handle, &src, &src_vaddr); + if (ret != BM_SUCCESS) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "bmlib mmap in d2s fast failed\n"); + return BM_ERR_FAILURE; + } + + ret = bm_mem_invalidate_device_mem_u64(handle, &src); + if (ret != BM_SUCCESS) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "bmlib invalidate device mem in d2s fast failed\n"); + return BM_ERR_FAILURE; + } + + memcpy(dst, (void *)src_vaddr, bm_mem_get_device_size_u64(src)); + + bm_mem_unmap_device_mem_u64(handle, (void *)src_vaddr, bm_mem_get_device_size_u64(src)); +#else + UNUSED(handle); + UNUSED(dst); + UNUSED(src); +#endif + return BM_SUCCESS; +} + bm_status_t bm_memcpy_d2s(bm_handle_t handle, void *dst, bm_device_mem_t src) { #ifdef USING_CMODEL return handle->bm_dev->bm_device_memcpy_d2s(dst, src); @@ -1999,6 +2993,50 @@ bm_status_t sg_memcpy_d2s(bm_handle_t handle, void *dst, sg_device_mem_t src) { #endif } +bm_status_t bm_memcpy_d2s_u64(bm_handle_t handle, void *dst, bm_device_mem_u64_t src) { +#ifdef USING_CMODEL + return handle->bm_dev->bm_device_memcpy_d2s_u64(dst, src); +#else + bm_status_t ret; + u64 dev_addr; + u64 unaligned_size; + u64 aligned_addr; + if (handle == nullptr) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "handle is nullptr %s: %s: %d\n", __FILE__, __func__, + __LINE__); + return BM_ERR_DEVNOTREADY; + } + + if (!bm_device_mem_range_valid_u64(handle, src)) { + return BM_ERR_PARAM; + } + + if (handle->misc_info.pcie_soc_mode == 0) { + // PCIE mode + return bm_memcpy_d2s_normal_u64(handle, dst, src); + } else { + // SoC mode + if (bm_device_mem_page_aligned_u64(src)) { + return bm_memcpy_d2s_fast_u64(handle, dst, src); + } else if (bm_mem_get_device_size_u64(src) <= PAGE_SIZE) { + return bm_memcpy_d2s_normal_u64(handle, dst, src); + } else { + dev_addr = bm_mem_get_device_addr_u64(src); + unaligned_size = PAGE_SIZE - (dev_addr & (PAGE_SIZE - 1)); + aligned_addr = (dev_addr + PAGE_SIZE) & (~(PAGE_SIZE - 1)); + u64 aligned_size = bm_mem_get_device_size_u64(src) - unaligned_size; + ret = bm_memcpy_d2s_normal_u64(handle, dst, bm_mem_from_device_u64(dev_addr, unaligned_size)); + if (ret != BM_SUCCESS) { + return ret; + } + return bm_memcpy_d2s_fast_u64(handle, (void *)((u64)dst + unaligned_size), + bm_mem_from_device_u64(aligned_addr, aligned_size)); + } + } +#endif +} + bm_status_t bm_memcpy_d2s_partial_offset(bm_handle_t handle, void *dst, bm_device_mem_t src, unsigned int size, unsigned int offset) { @@ -2043,6 +3081,28 @@ bm_status_t sg_memcpy_d2s_partial_offset(bm_handle_t handle, void *dst, return sg_memcpy_d2s(handle, dst, target_dev_mem); } +bm_status_t bm_memcpy_d2s_partial_offset_u64(bm_handle_t handle, void *dst, + bm_device_mem_u64_t src, u64 size, + u64 offset) { + unsigned long long old_devmem_size = bm_mem_get_device_size_u64(src); +#ifdef USING_CMODEL + ASSERT(old_devmem_size >= offset + size); +#else + if (old_devmem_size < offset + size) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "new device addr exceeds old device addr %s: %s: %d\n", + __FILE__, __func__, __LINE__); + return BM_ERR_PARAM; + } +#endif + u64 dev_mem_addr = bm_mem_get_device_addr_u64(src); + + bm_device_mem_u64_t target_dev_mem = + bm_mem_from_device_u64(dev_mem_addr + offset, size); + + return bm_memcpy_d2s_u64(handle, dst, target_dev_mem); +} + bm_status_t bm_memcpy_d2s_partial(bm_handle_t handle, void *dst, bm_device_mem_t src, unsigned int size) { return bm_memcpy_d2s_partial_offset(handle, dst, src, size, 0); @@ -2053,6 +3113,11 @@ bm_status_t sg_memcpy_d2s_partial(bm_handle_t handle, void *dst, return sg_memcpy_d2s_partial_offset(handle, dst, src, size, 0); } +bm_status_t bm_memcpy_d2s_partial_u64(bm_handle_t handle, void *dst, + bm_device_mem_u64_t src, u64 size) { + return bm_memcpy_d2s_partial_offset_u64(handle, dst, src, size, 0); +} + bm_status_t bm_memcpy_d2s_poll(bm_handle_t handle, void * dst, bm_device_mem_t src, @@ -2211,6 +3276,176 @@ bm_status_t sg_memcpy_d2s_poll(bm_handle_t handle, #endif } +bm_status_t bm_memcpy_d2s_poll_u64(bm_handle_t handle, + void * dst, + bm_device_mem_u64_t src, + u64 size) { +#ifdef USING_CMODEL + (void)size; + return handle->bm_dev->bm_device_memcpy_d2s_u64(dst, src); +#else + + int trans_size = 0x10000000;//256MB + int tran_over = 0; + + unsigned long long old_devmem_size = bm_mem_get_device_size_u64(src); + if (old_devmem_size < size) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, + BMLIB_LOG_ERROR, + "new device addr exceeds old device addr %s: %s: %d\n", + __FILE__, + __func__, + __LINE__); + return BM_ERR_PARAM; + } + + if (handle == nullptr) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, + BMLIB_LOG_ERROR, + "handle is nullptr %s: %s: %d\n", + __FILE__, + __func__, + __LINE__); + return BM_ERR_DEVNOTREADY; + } + + if (!bm_device_mem_range_valid_u64(handle, src)) { + return BM_ERR_PARAM; + } + + if (handle->misc_info.pcie_soc_mode == 0) { + // PCIE mode + + bm_memcpy_info_t bm_mem_d2s; + for(int i=0; tran_over == 0; i++) { +#ifdef __linux__ + bm_mem_d2s.intr = false; + bm_mem_d2s.host_addr = (void *)((u64)dst + i*trans_size); +#else + bm_mem_d2s.intr = true; + bm_mem_d2s.host_addr = (u64)dst + i*trans_size; +#endif + + bm_mem_d2s.device_addr = bm_mem_get_device_addr_u64(src) + i * trans_size; + if (size > trans_size) { + bm_mem_d2s.size = trans_size; + size -= trans_size; + } else { + bm_mem_d2s.size = size; + tran_over = 1; + } + + bm_mem_d2s.dir = CHIP2HOST; + bm_mem_d2s.src_device_addr = 0; + bm_mem_d2s.cdma_iommu_mode = handle->cdma_iommu_mode; + + union { + void *ptr; + u64 val; + } ptr_to_u64; + ptr_to_u64.ptr = (void *)((u64)dst + i*trans_size); + bm_profile_record_memcpy_begin(handle); + auto res = platform_ioctl(handle, BMDEV_MEMCPY, &bm_mem_d2s); + bm_profile_record_memcpy_end(handle, + bm_mem_d2s.device_addr, + ptr_to_u64.val, + bm_mem_d2s.size, + bm_mem_d2s.dir); + if (0 != res) + return BM_ERR_FAILURE; + } + return BM_SUCCESS; + } else { + return BM_ERR_FAILURE; + } + #endif +} + +bm_status_t bm_smmu_d2s_poll(bm_handle_t handle, + void * dst, + bm_device_mem_t src, + unsigned int size) { +#ifdef USING_CMODEL + (void)size; + return BM_SUCCESS; +#else + + int trans_size = 0x800000;//8MB + int tran_over = 0; + + unsigned int old_devmem_size = bm_mem_get_device_size(src); + if (old_devmem_size < size) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, + BMLIB_LOG_ERROR, + "new device addr exceeds old device addr %s: %s: %d\n", + __FILE__, + __func__, + __LINE__); + return BM_ERR_PARAM; + } + + if (handle == nullptr) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, + BMLIB_LOG_ERROR, + "handle is nullptr %s: %s: %d\n", + __FILE__, + __func__, + __LINE__); + return BM_ERR_DEVNOTREADY; + } + + if (!bm_device_mem_range_valid(handle, src)) { + return BM_ERR_PARAM; + } + + if (handle->misc_info.pcie_soc_mode == 0) { + // PCIE mode + + bm_memcpy_info_t bm_mem_d2s; + for(int i=0; tran_over == 0; i++) { +#ifdef __linux__ + bm_mem_d2s.intr = false; + bm_mem_d2s.host_addr = (void *)((u64)dst + i*trans_size); +#else + bm_mem_d2s.intr = true; + bm_mem_d2s.host_addr = (u64)dst + i*trans_size; +#endif + + bm_mem_d2s.device_addr = bm_mem_get_device_addr(src) + i * trans_size; + if (size > trans_size) { + bm_mem_d2s.size = trans_size; + size -= trans_size; + } else { + bm_mem_d2s.size = size; + tran_over = 1; + } + + bm_mem_d2s.dir = CHIP2HOST; + bm_mem_d2s.src_device_addr = 0; + bm_mem_d2s.cdma_iommu_mode = handle->cdma_iommu_mode; + + union { + void *ptr; + u64 val; + } ptr_to_u64; + ptr_to_u64.ptr = (void *)((u64)dst + i*trans_size); + bm_profile_record_memcpy_begin(handle); + auto res = platform_ioctl(handle, BMDEV_MEMCPY, &bm_mem_d2s); + bm_profile_record_memcpy_end(handle, + bm_mem_d2s.device_addr, + ptr_to_u64.val, + bm_mem_d2s.size, + bm_mem_d2s.dir); + if (0 != res) + return BM_ERR_FAILURE; + } + return BM_SUCCESS; + } else { + return BM_ERR_FAILURE; + } + #endif +} + bm_status_t bm_memset_device_ext(bm_handle_t handle, void* value, int mode, bm_device_mem_t mem) { bm_status_t ret = BM_SUCCESS; @@ -2696,3 +3931,65 @@ bm_status_t bm_mem_convert_system_to_device_coeff_byte( } return BM_SUCCESS; } + +bm_status_t bm_memcpy_s2d_gather(bm_handle_t handle, bm_device_mem_t dst, int argc, ...) +{ + bm_status_t ret; + va_list args; + void *vaddr; + u64 len; + u32 total = dst.size; + u64 sum = 0; + + va_start(args, argc); + for (int i = 0; i < argc; i+=2) { + vaddr = va_arg(args, void *); + len = va_arg(args, unsigned long long); + sum += len; + if (sum > total) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "%s sum: %u, total: %lu\n", __func__, sum, total); + } + dst.size = len; + ret = bm_memcpy_s2d(handle, dst, vaddr); + if (ret != BM_SUCCESS) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "%s failed, dst address: %lu, len: %llu\n", __func__, dst.u.device.device_addr, dst.size); + } + dst.u.device.device_addr += len; + } + va_end(args); + + return ret; +} + +bm_status_t bm_memcpy_d2s_scatter(bm_handle_t handle, bm_device_mem_t src, int argc, ...) +{ + bm_status_t ret; + va_list args; + void *vaddr; + u64 len; + u32 total = src.size; + u64 sum = 0; + + va_start(args, argc); + for (int i = 0; i < argc; i+=2) { + vaddr = va_arg(args, void *); + len = va_arg(args, unsigned long long); + sum += len; + if (sum > total) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "%s sum: %u, total: %lu\n", __func__, sum, total); + } + src.size = len; + ret = bm_memcpy_d2s(handle, vaddr, src); + if (ret != BM_SUCCESS) { + bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + "%s failed, src address: %lu, len: %llu\n", __func__, src.u.device.device_addr, src.size); + } + src.u.device.device_addr += len; + } + va_end(args); + + return ret; +} diff --git a/bmlib/src/bmlib_memory.h b/bmlib/src/bmlib_memory.h index a4c23f9..75555af 100644 --- a/bmlib/src/bmlib_memory.h +++ b/bmlib/src/bmlib_memory.h @@ -54,6 +54,12 @@ struct bm_gmem_addr { u64 phy_addr; }; +struct bm_mem_paddr { + struct rb_node node; + unsigned long long paddr; + bm_device_mem_u64_t *dev_buffer; +}; + bm_status_t bm_total_gmem(bm_handle_t ctx, u64* total); bm_status_t bm_avail_gmem(bm_handle_t ctx, u64* avail); bm_status_t bm_memcpy_d2s_poll(bm_handle_t handle, @@ -64,15 +70,30 @@ bm_status_t sg_memcpy_d2s_poll(bm_handle_t handle, void * dst, sg_device_mem_t src, unsigned long long size); +bm_status_t bm_memcpy_d2s_poll_u64(bm_handle_t handle, + void * dst, + bm_device_mem_u64_t src, + unsigned long long size); bm_status_t bm_memcpy_s2d_poll(bm_handle_t handle, bm_device_mem_t dst, void * src); bm_status_t sg_memcpy_s2d_poll(bm_handle_t handle, sg_device_mem_t dst, void * src); +bm_status_t bm_memcpy_s2d_poll_u64(bm_handle_t handle, + bm_device_mem_u64_t dst, + void * src); +bm_status_t bm_smmu_d2s_poll(bm_handle_t handle, + void * dst, + bm_device_mem_t src, + unsigned int size); +bm_status_t bm_smmu_s2d_poll(bm_handle_t handle, + bm_device_mem_t dst, + void * src); void *bm_mem_get_system_addr(struct bm_mem_desc mem); u32 bm_mem_get_size(struct bm_mem_desc mem); u64 sg_mem_get_size(struct sg_mem_desc mem); +u64 bm_mem_get_size_u64(struct bm_mem_desc_u64 mem); bm_status_t bm_mem_mmap_device_mem( bm_handle_t handle, bm_device_mem_t *dmem, @@ -108,6 +129,8 @@ bm_status_t bm_mem_unmap_device_mem( void * vmem, int size); bm_status_t bm_get_carveout_heap_id(bm_handle_t ctx); +extern void rb_insert_color(struct rb_node *, struct rb_root *); +extern void rb_erase(struct rb_node *, struct rb_root *); #ifdef __cplusplus } #endif diff --git a/bmlib/src/bmlib_profile.cpp b/bmlib/src/bmlib_profile.cpp index 9f792e6..756ffe1 100644 --- a/bmlib/src/bmlib_profile.cpp +++ b/bmlib/src/bmlib_profile.cpp @@ -83,6 +83,7 @@ static int bm_mkdir(const char *dirname, bool must_new) string dname = dirname; struct stat st; dname += "/"; + int cmd_ret; if (stat(dname.c_str(), &st) == -1) { #ifdef __linux__ ASSERT(mkdir(dname.c_str(), 0777) == 0); @@ -94,7 +95,11 @@ static int bm_mkdir(const char *dirname, bool must_new) if(must_new){ string cmd = "rm "; cmd += dname + "*"; - system(cmd.c_str()); + cmd_ret = system(cmd.c_str()); + if(cmd_ret == -1){ + PROFILE_ERROR("exec %s failed!\n", cmd.c_str()); + return -1; + } } return 0; } diff --git a/bmlib/src/bmlib_runtime.cpp b/bmlib/src/bmlib_runtime.cpp index 65851b1..3bddbcf 100644 --- a/bmlib/src/bmlib_runtime.cpp +++ b/bmlib/src/bmlib_runtime.cpp @@ -6,6 +6,7 @@ #include "bmlib_memory.h" #include "bmlib_utils.h" #include "bmlib_interface.h" +#include "bmlib_version.h" #ifdef _WIN32 #include @@ -28,6 +29,14 @@ #define BMLIB_RUNTIME_LOG_TAG "bmlib_runtime" static bmlib_api_dbg_callback api_debug_callback = NULL; +extern "C" { + //__attribute__((visibility("default"))) + DLLEXPORT const char* libbmlib_version() { + static const char* version_string = "libbmlib_version:1.0.0, branch:" BRANCH_NAME ", commit:" COMMIT_HASH ", compiled on " __DATE__ " at " __TIME__", "; + return version_string; + } +} + #ifdef _WIN32 // Define an Interface Guid so that apps can find the device and talk to it. // {84703EC3-9B1B-49D7-9AA6-0C42C6465681} @@ -282,6 +291,16 @@ bm_status_t bm_send_api(bm_handle_t handle, int api_id, const u8 *api, #endif } +bm_status_t bm_send_api_to_core( + bm_handle_t handle, + int api_id, + const u8 *api, + u32 size, + int core_id) { + (void)core_id; + return bm_send_api(handle, api_id, api, size); +} + bm_status_t bm_device_sync(bm_handle_t handle) { #ifdef USING_CMODEL return handle->bm_dev->bm_device_sync(); @@ -338,7 +357,7 @@ bm_status_t bm_thread_sync(bm_handle_t handle) { bm_profile_record_sync_begin(handle); bm_status_t status = BM_SUCCESS; #ifdef USING_CMODEL - status = handle->bm_dev->bm_device_thread_sync(); + status = handle->bm_dev->bm_device_sync(); #else if (handle == nullptr) { bmlib_log(BMLIB_RUNTIME_LOG_TAG, BMLIB_LOG_ERROR, @@ -355,14 +374,36 @@ bm_status_t bm_thread_sync(bm_handle_t handle) { return status; } +bm_status_t bm_thread_sync_from_core(bm_handle_t handle, int core_id) { + (void)core_id; + return bm_thread_sync(handle); +} + bm_status_t bm_sync_api(bm_handle_t handle) { return bm_thread_sync(handle); } +bm_status_t bm_set_sync_timeout(bm_handle_t handle, int timeout) { + bm_status_t status = BM_SUCCESS; +#ifndef USING_CMODEL + if (handle == nullptr) { + bmlib_log(BMLIB_RUNTIME_LOG_TAG, BMLIB_LOG_ERROR, + "handle is nullptr %s: %s: %d\n", + __FILE__, __func__, __LINE__); + status = BM_ERR_DEVNOTREADY; + } else if (0 == platform_ioctl(handle, BMDEV_SYNC_TIMEOUT_API, &timeout)) { + status = BM_SUCCESS; + } else { + status = BM_ERR_FAILURE; + } +#endif + return status; +} + bm_status_t bm_dev_getcount(int *count) { if (!count) return BM_ERR_PARAM; #ifdef USING_CMODEL - *count = MAX_NODECHIP_NUM; + *count = MAX_DEVICE_NUM; #else int fd; #ifdef __linux__ @@ -582,7 +623,14 @@ bm_status_t bm_dev_request(bm_handle_t *handle, int devid) { return BM_ERR_PARAM; } *handle = ctx; - bm_disable_iommu(*handle); + #ifdef SMMU_MODE + bm_enable_iommu(*handle); + #else + bm_disable_iommu(*handle); + #endif + + pthread_mutex_init(&(*handle)->mem_mutex, NULL); + (*handle)->root = RB_ROOT; #endif #else if (!GetDevicePath(ctx)) { @@ -648,6 +696,9 @@ void bm_dev_free(bm_handle_t handle) { __FILE__, __func__, __LINE__); return; } + + pthread_mutex_destroy(&handle->mem_mutex); + if (handle->profile){ bm_profile_deinit(handle); } @@ -971,6 +1022,22 @@ int bm_get_devid(bm_handle_t handle) { return handle->dev_id; } +bm_status_t bm_get_tpu_scalar_num(bm_handle_t handle, unsigned int *core_num) { +#ifdef USING_CMODEL + *core_num = handle->bm_dev->bm_core_num(); +#else + unsigned int chip_id = 0; + bm_status_t ret = BM_SUCCESS; + ret = bm_get_chipid(handle, &chip_id); + if (ret != BM_SUCCESS) + return ret; + + *core_num = 1; +#endif + + return BM_SUCCESS; +} + bm_status_t bm_get_boot_info(bm_handle_t handle, bm_boot_info *pboot_info) { #ifdef USING_CMODEL UNUSED(handle); @@ -1176,6 +1243,48 @@ bm_status_t bm_get_reg(bm_handle_t handle, struct bm_reg *reg) { #endif } +bm_status_t bm_rw_host(bm_handle_t handle, struct bm_rw *reg) { +#ifdef USING_CMODEL + UNUSED(handle); + UNUSED(reg); + + return BM_SUCCESS; +#else + if (handle == nullptr) { + bmlib_log(BMLIB_RUNTIME_LOG_TAG, BMLIB_LOG_ERROR, + "handle is nullptr %s: %s: %d\n", + __FILE__, __func__, __LINE__); + return BM_ERR_DEVNOTREADY; + } + + if (0 == platform_ioctl(handle, BMDEV_RW_HOST, reg)) + return BM_SUCCESS; + else + return BM_ERR_FAILURE; +#endif +} + +bm_status_t bm_rw_mix(bm_handle_t handle, struct bm_rw *reg) { +#ifdef USING_CMODEL + UNUSED(handle); + UNUSED(reg); + + return BM_SUCCESS; +#else + if (handle == nullptr) { + bmlib_log(BMLIB_RUNTIME_LOG_TAG, BMLIB_LOG_ERROR, + "handle is nullptr %s: %s: %d\n", + __FILE__, __func__, __LINE__); + return BM_ERR_DEVNOTREADY; + } + + if (0 == platform_ioctl(handle, BMDEV_RW_MIX, reg)) + return BM_SUCCESS; + else + return BM_ERR_FAILURE; +#endif +} + #ifdef __linux__ bm_status_t bm_get_last_api_process_time_us(bm_handle_t handle, unsigned long *time_us) { @@ -2008,6 +2117,7 @@ bm_status_t bm_get_tpu_volt(bm_handle_t handle, unsigned int *tpu_volt) { #endif } + bm_status_t bm_get_card_id(bm_handle_t handle, unsigned int *card_id) { #ifdef USING_CMODEL UNUSED(handle); @@ -2209,3 +2319,28 @@ bm_status_t bm_get_handle_fd(bm_handle_t handle,FD_ID id, int *fd){ #endif #endif } + +tpu_kernel_module_t tpu_kernel_load_module_to_core(bm_handle_t handle, const char *data, size_t length, int core_id) { + (void) core_id; + return tpu_kernel_load_module(handle, data, length); +} + +tpu_kernel_function_t tpu_kernel_get_function_from_core(bm_handle_t handle, tpu_kernel_module_t module, const char *function, int core_id) { + (void) core_id; + return tpu_kernel_get_function(handle, module, function); +} + +bm_status_t tpu_kernel_launch_from_core(bm_handle_t handle, tpu_kernel_function_t function, void *args, size_t size, int core_id) { + (void) core_id; + return tpu_kernel_launch(handle, function, args, size); +} + +bm_status_t tpu_kernel_launch_async_from_core(bm_handle_t handle, tpu_kernel_function_t function, void *args, size_t size, int core_id) { + (void) core_id; + return tpu_kernel_launch_async(handle, function, args, size); +} + +bm_status_t tpu_kernel_unload_module_from_core(bm_handle_t handle, tpu_kernel_module_t p_module, int core_id) { + (void) core_id; + return tpu_kernel_unload_module(handle, p_module); +} diff --git a/bmlib/src/bmlib_version.h b/bmlib/src/bmlib_version.h new file mode 100644 index 0000000..d6a2b0e --- /dev/null +++ b/bmlib/src/bmlib_version.h @@ -0,0 +1,12 @@ +#ifndef _BMLIB_VERSION_H_ +#define _BMLIB_VERSION_H_ + +#ifdef _MSC_VER +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT __attribute__((visibility("default"))) +#endif +#define COMMIT_HASH "0ec89f7ab393194111c9cdfa0c9492e9d58e0894" +#define BRANCH_NAME "main" + +#endif \ No newline at end of file diff --git a/bmlib/src/linux/bmlib_ioctl.h b/bmlib/src/linux/bmlib_ioctl.h index 30c210b..2a97a21 100644 --- a/bmlib/src/linux/bmlib_ioctl.h +++ b/bmlib/src/linux/bmlib_ioctl.h @@ -20,9 +20,12 @@ #define BMDEV_FLUSH_GMEM _IOWR('p', 0x18, unsigned long) #define BMDEV_ALLOC_GMEM_ION _IOW('p', 0x19, unsigned long) #define BMDEV_GMEM_ADDR _IOW('p', 0x1a, unsigned long) +#define BMDEV_ALLOC_GMEM_ION_U64 _IOW('p', 0x1b, unsigned long) +#define BMDEV_FREE_GMEM_U64 _IOW('p', 0x1c, unsigned long) #define BMDEV_SEND_API _IOW('p', 0x20, unsigned long) #define BMDEV_THREAD_SYNC_API _IOW('p', 0x21, unsigned long) +#define BMDEV_SYNC_TIMEOUT_API _IOW('p', 0x22, unsigned long) #define BMDEV_DEVICE_SYNC_API _IOW('p', 0x23, unsigned long) #define BMDEV_HANDLE_SYNC_API _IOW('p', 0x27, unsigned long) #define BMDEV_SEND_API_EXT _IOW('p', 0x28, unsigned long) @@ -43,6 +46,7 @@ #define BMDEV_SET_REG _IOWR('p', 0x3c, unsigned long) #define BMDEV_GET_REG _IOWR('p', 0x3d, unsigned long) #define BMDEV_GET_DEV_STAT _IOWR('p', 0x3e, unsigned long) +#define BMDEV_RW_MIX _IOWR('p', 0x3f, unsigned long) #define BMDEV_TRACE_ENABLE _IOW('p', 0x40, unsigned long) #define BMDEV_TRACE_DISABLE _IOW('p', 0x41, unsigned long) @@ -52,11 +56,16 @@ #define BMDEV_ENABLE_PERF_MONITOR _IOWR('p', 0x45, unsigned long) #define BMDEV_DISABLE_PERF_MONITOR _IOWR('p', 0x46, unsigned long) #define BMDEV_GET_DEVICE_TIME _IOWR('p', 0x47, unsigned long) +#define BMDEV_RW_HOST _IOWR('p', 0x48, unsigned long) #define BMDEV_SET_TPU_DIVIDER _IOWR('p', 0x50, unsigned long) #define BMDEV_SET_MODULE_RESET _IOWR('p', 0x51, unsigned long) #define BMDEV_SET_TPU_FREQ _IOWR('p', 0x52, unsigned long) #define BMDEV_GET_TPU_FREQ _IOWR('p', 0x53, unsigned long) +#define BMDEV_SET_TPU_VOLT _IOWR('p', 0x54, unsigned long) +#define BMDEV_SET_RDROP _IOWR('p', 0x55, unsigned long) +#define BMDEV_GET_RDROP _IOWR('p', 0x56, unsigned long) +#define BMDEV_SET_VDDC_VOLT _IOWR('p', 0x57, unsigned long) #define BMDEV_TRIGGER_VPP _IOWR('p', 0x60, unsigned long) #define BMDEV_TRIGGER_SPACC _IOWR('p', 0x61, unsigned long) @@ -93,6 +102,7 @@ #define BMDEV_COMM_SET_CARDID _IOWR('p', 0xAA, unsigned long) #define BMDEV_SET_IP _IOWR('p', 0xAC, unsigned long) #define BMDEV_SET_GATE _IOWR('p', 0xAD, unsigned long) +#define BMDEV_SYNC_TIME_MIX _IOWR('p', 0xAE, unsigned long) #define BMDEV_GET_TPUC _IOWR('p', 0x81, unsigned long) #define BMDEV_GET_MAXP _IOWR('p', 0x82, unsigned long) diff --git a/bmlib/src/rbtree.c b/bmlib/src/rbtree.c new file mode 100644 index 0000000..413d51e --- /dev/null +++ b/bmlib/src/rbtree.c @@ -0,0 +1,399 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + (C) 2002 David Woodhouse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + linux/lib/rbtree.c +*/ + +#include "rbtree.h" + +static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *right = node->rb_right; + struct rb_node *parent = rb_parent(node); + + if ((node->rb_right = right->rb_left)) + rb_set_parent(right->rb_left, node); + right->rb_left = node; + + rb_set_parent(right, parent); + + if (parent) + { + if (node == parent->rb_left) + parent->rb_left = right; + else + parent->rb_right = right; + } + else + root->rb_node = right; + rb_set_parent(node, right); +} + +static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *left = node->rb_left; + struct rb_node *parent = rb_parent(node); + + if ((node->rb_left = left->rb_right)) + rb_set_parent(left->rb_right, node); + left->rb_right = node; + + rb_set_parent(left, parent); + + if (parent) + { + if (node == parent->rb_right) + parent->rb_right = left; + else + parent->rb_left = left; + } + else + root->rb_node = left; + rb_set_parent(node, left); +} + +void rb_insert_color(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *parent, *gparent; + + while ((parent = rb_parent(node)) && rb_is_red(parent)) + { + gparent = rb_parent(parent); + + if (parent == gparent->rb_left) + { + { + register struct rb_node *uncle = gparent->rb_right; + if (uncle && rb_is_red(uncle)) + { + rb_set_black(uncle); + rb_set_black(parent); + rb_set_red(gparent); + node = gparent; + continue; + } + } + + if (parent->rb_right == node) + { + register struct rb_node *tmp; + __rb_rotate_left(parent, root); + tmp = parent; + parent = node; + node = tmp; + } + + rb_set_black(parent); + rb_set_red(gparent); + __rb_rotate_right(gparent, root); + } + else + { + { + register struct rb_node *uncle = gparent->rb_left; + if (uncle && rb_is_red(uncle)) + { + rb_set_black(uncle); + rb_set_black(parent); + rb_set_red(gparent); + node = gparent; + continue; + } + } + + if (parent->rb_left == node) + { + register struct rb_node *tmp; + __rb_rotate_right(parent, root); + tmp = parent; + parent = node; + node = tmp; + } + + rb_set_black(parent); + rb_set_red(gparent); + __rb_rotate_left(gparent, root); + } + } + + rb_set_black(root->rb_node); +} +// EXPORT_SYMBOL(rb_insert_color); + +static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, + struct rb_root *root) +{ + struct rb_node *other; + + while ((!node || rb_is_black(node)) && node != root->rb_node) + { + if (parent->rb_left == node) + { + other = parent->rb_right; + if (rb_is_red(other)) + { + rb_set_black(other); + rb_set_red(parent); + __rb_rotate_left(parent, root); + other = parent->rb_right; + } + if ((!other->rb_left || rb_is_black(other->rb_left)) && + (!other->rb_right || rb_is_black(other->rb_right))) + { + rb_set_red(other); + node = parent; + parent = rb_parent(node); + } + else + { + if (!other->rb_right || rb_is_black(other->rb_right)) + { + rb_set_black(other->rb_left); + rb_set_red(other); + __rb_rotate_right(other, root); + other = parent->rb_right; + } + rb_set_color(other, rb_color(parent)); + rb_set_black(parent); + rb_set_black(other->rb_right); + __rb_rotate_left(parent, root); + node = root->rb_node; + break; + } + } + else + { + other = parent->rb_left; + if (rb_is_red(other)) + { + rb_set_black(other); + rb_set_red(parent); + __rb_rotate_right(parent, root); + other = parent->rb_left; + } + if ((!other->rb_left || rb_is_black(other->rb_left)) && + (!other->rb_right || rb_is_black(other->rb_right))) + { + rb_set_red(other); + node = parent; + parent = rb_parent(node); + } + else + { + if (!other->rb_left || rb_is_black(other->rb_left)) + { + rb_set_black(other->rb_right); + rb_set_red(other); + __rb_rotate_left(other, root); + other = parent->rb_left; + } + rb_set_color(other, rb_color(parent)); + rb_set_black(parent); + rb_set_black(other->rb_left); + __rb_rotate_right(parent, root); + node = root->rb_node; + break; + } + } + } + if (node) + rb_set_black(node); +} + +void rb_erase(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *child, *parent; + int color; + + if (!node->rb_left) + child = node->rb_right; + else if (!node->rb_right) + child = node->rb_left; + else + { + struct rb_node *old = node, *left; + + node = node->rb_right; + while ((left = node->rb_left) != NULL) + node = left; + + if (rb_parent(old)) + { + if (rb_parent(old)->rb_left == old) + rb_parent(old)->rb_left = node; + else + rb_parent(old)->rb_right = node; + } + else + root->rb_node = node; + + child = node->rb_right; + parent = rb_parent(node); + color = rb_color(node); + + if (parent == old) + { + parent = node; + } + else + { + if (child) + rb_set_parent(child, parent); + parent->rb_left = child; + + node->rb_right = old->rb_right; + rb_set_parent(old->rb_right, node); + } + + node->rb_parent_color = old->rb_parent_color; + node->rb_left = old->rb_left; + rb_set_parent(old->rb_left, node); + + goto color; + } + + parent = rb_parent(node); + color = rb_color(node); + + if (child) + rb_set_parent(child, parent); + if (parent) + { + if (parent->rb_left == node) + parent->rb_left = child; + else + parent->rb_right = child; + } + else + root->rb_node = child; + +color: + if (color == RB_BLACK) + __rb_erase_color(child, parent, root); +} +// EXPORT_SYMBOL(rb_erase); + +/* + * This function returns the first node (in sort order) of the tree. + */ +struct rb_node *rb_first(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_left) + n = n->rb_left; + return n; +} +// EXPORT_SYMBOL(rb_first); + +struct rb_node *rb_last(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} +// EXPORT_SYMBOL(rb_last); + +struct rb_node *rb_next(const struct rb_node *node) +{ + struct rb_node *parent; + + if (rb_parent(node) == node) + return NULL; + + /* If we have a right-hand child, go down and then left as far + as we can. */ + if (node->rb_right) + { + node = node->rb_right; + while (node->rb_left) + node = node->rb_left; + return (struct rb_node *)node; + } + + /* No right-hand children. Everything down and left is + smaller than us, so any 'next' node must be in the general + direction of our parent. Go up the tree; any time the + ancestor is a right-hand child of its parent, keep going + up. First time it's a left-hand child of its parent, said + parent is our 'next' node. */ + while ((parent = rb_parent(node)) && node == parent->rb_right) + node = parent; + + return parent; +} +// EXPORT_SYMBOL(rb_next); + +struct rb_node *rb_prev(const struct rb_node *node) +{ + struct rb_node *parent; + + if (rb_parent(node) == node) + return NULL; + + /* If we have a left-hand child, go down and then right as far + as we can. */ + if (node->rb_left) + { + node = node->rb_left; + while (node->rb_right) + node = node->rb_right; + return (struct rb_node *)node; + } + + /* No left-hand children. Go up till we find an ancestor which + is a right-hand child of its parent */ + while ((parent = rb_parent(node)) && node == parent->rb_left) + node = parent; + + return parent; +} +// EXPORT_SYMBOL(rb_prev); + +void rb_replace_node(struct rb_node *victim, struct rb_node *new_root, + struct rb_root *root) +{ + struct rb_node *parent = rb_parent(victim); + + /* Set the surrounding nodes to point to the replacement */ + if (parent) + { + if (victim == parent->rb_left) + parent->rb_left = new_root; + else + parent->rb_right = new_root; + } + else + { + root->rb_node = new_root; + } + if (victim->rb_left) + rb_set_parent(victim->rb_left, new_root); + if (victim->rb_right) + rb_set_parent(victim->rb_right, new_root); + + /* Copy the pointers/colour from the victim to the replacement */ + *new_root = *victim; +} +// EXPORT_SYMBOL(rb_replace_node); diff --git a/bmlib/src/rbtree.h b/bmlib/src/rbtree.h new file mode 100644 index 0000000..b8a6bda --- /dev/null +++ b/bmlib/src/rbtree.h @@ -0,0 +1,149 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + linux/include/linux/rbtree.h + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + Some example of insert and search follows here. The search is a plain + normal search over an ordered tree. The insert instead must be implemented + int two steps: as first thing the code must insert the element in + order as a red leaf in the tree, then the support library function + rb_insert_color() must be called. Such function will do the + not trivial work to rebalance the rbtree if necessary. +----------------------------------------------------------------------- +static inline struct page * rb_search_page_cache(struct inode * inode, + unsigned long offset) +{ + struct rb_node * n = inode->i_rb_page_cache.rb_node; + struct page * page; + while (n) + { + page = rb_entry(n, struct page, rb_page_cache); + if (offset < page->offset) + n = n->rb_left; + else if (offset > page->offset) + n = n->rb_right; + else + return page; + } + return NULL; +} +static inline struct page * __rb_insert_page_cache(struct inode * inode, + unsigned long offset, + struct rb_node * node) +{ + struct rb_node ** p = &inode->i_rb_page_cache.rb_node; + struct rb_node * parent = NULL; + struct page * page; + while (*p) + { + parent = *p; + page = rb_entry(parent, struct page, rb_page_cache); + if (offset < page->offset) + p = &(*p)->rb_left; + else if (offset > page->offset) + p = &(*p)->rb_right; + else + return page; + } + rb_link_node(node, parent, p); + return NULL; +} +static inline struct page * rb_insert_page_cache(struct inode * inode, + unsigned long offset, + struct rb_node * node) +{ + struct page * ret; + if ((ret = __rb_insert_page_cache(inode, offset, node))) + goto out; + rb_insert_color(node, &inode->i_rb_page_cache); + out: + return ret; +} +----------------------------------------------------------------------- +*/ +#include + +#ifndef _LINUX_RBTREE_H +#define _LINUX_RBTREE_H + +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *mptr = (ptr); \ + (type *)( (char *)mptr - offsetof(type,member) );}) + +struct rb_node +{ + unsigned long rb_parent_color; +#define RB_RED 0 +#define RB_BLACK 1 + struct rb_node *rb_right; + struct rb_node *rb_left; +} __attribute__((aligned(sizeof(long)))); + /* The alignment might seem pointless, but allegedly CRIS needs it */ + +struct rb_root +{ + struct rb_node *rb_node; +}; + + +#define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3)) +#define rb_color(r) ((r)->rb_parent_color & 1) +#define rb_is_red(r) (!rb_color(r)) +#define rb_is_black(r) rb_color(r) +#define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0) +#define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0) + +static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) +{ + rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p; +} +static inline void rb_set_color(struct rb_node *rb, int color) +{ + rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; +} + +#define RB_ROOT (struct rb_root) { NULL, } +#define rb_entry(ptr, type, member) container_of(ptr, type, member) + +#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) +#define RB_EMPTY_NODE(node) (rb_parent(node) == node) +#define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) + +// extern void rb_insert_color(struct rb_node *, struct rb_root *); +// extern void rb_erase(struct rb_node *, struct rb_root *); + +/* Find logical next and previous nodes in a tree */ +extern struct rb_node *rb_next(const struct rb_node *); +extern struct rb_node *rb_prev(const struct rb_node *); +extern struct rb_node *rb_first(const struct rb_root *); +extern struct rb_node *rb_last(const struct rb_root *); + +/* Fast replacement of a single node without remove/rebalance/add/rebalance */ +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new_root, + struct rb_root *root); + +static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, + struct rb_node ** rb_link) +{ + node->rb_parent_color = (unsigned long )parent; + node->rb_left = node->rb_right = NULL; + + *rb_link = node; +} + +#endif /* _LINUX_RBTREE_H */ diff --git a/bmlib/tools/CMakeLists.txt b/bmlib/tools/CMakeLists.txt index 271c385..a6b2502 100644 --- a/bmlib/tools/CMakeLists.txt +++ b/bmlib/tools/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.6) cmake_policy(SET CMP0046 NEW) cmake_policy(SET CMP0048 NEW) @@ -35,7 +35,7 @@ else() foreach(src ${TOOLS_SRC_PATH}) get_filename_component(target ${src} NAME_WE) add_executable(${target} ${src}) - target_link_libraries(${target} gflags_nothreads.a pthread dl bmlib) + target_link_libraries(${target} gflags_nothreads.a bmlib pthread dl) install(TARGETS ${target} RUNTIME DESTINATION bin COMPONENT libsophon) diff --git a/bmlib/tools/a53lite/CMakeLists.txt b/bmlib/tools/a53lite/CMakeLists.txt index 4e7f020..91f9f06 100644 --- a/bmlib/tools/a53lite/CMakeLists.txt +++ b/bmlib/tools/a53lite/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.6) cmake_policy(SET CMP0048 NEW) set(TARGET_NAME "bmcpu") @@ -12,7 +12,7 @@ file(GLOB TEST_SRC_PATH ./*.cpp) foreach(src ${TEST_SRC_PATH}) get_filename_component(target ${src} NAME_WE) add_executable(${target} ${src}) - target_link_libraries(${target} pthread bmlib dl) + target_link_libraries(${target} bmlib pthread dl) install(TARGETS ${target} RUNTIME DESTINATION bin COMPONENT libsophon) diff --git a/bmlib/tools/a53lite/a53lite_memcpy_test.cpp b/bmlib/tools/a53lite/a53lite_memcpy_test.cpp index ae8a4ae..1ef7ece 100644 --- a/bmlib/tools/a53lite/a53lite_memcpy_test.cpp +++ b/bmlib/tools/a53lite/a53lite_memcpy_test.cpp @@ -88,7 +88,7 @@ int main(int argc, char *argv[]) ret = bm_malloc_device_dword(handle, &device_buffer, transfer_size / 4); if (ret != BM_SUCCESS) { - printf("malloc device memory size = %llu failed, ret = %d\n", transfer_size, ret); + printf("malloc device memory size = %u failed, ret = %d\n", transfer_size, ret); return -1; } diff --git a/bmlib/tools/bm_firmware_update.cpp b/bmlib/tools/bm_firmware_update.cpp index b21ae82..1c71050 100644 --- a/bmlib/tools/bm_firmware_update.cpp +++ b/bmlib/tools/bm_firmware_update.cpp @@ -56,6 +56,36 @@ SM5MS 12 SM5MA 13 */ +/** + * The board id of PCIe EP device + */ +#define BOARD_TYPE_EVB 0x0 +#define BOARD_TYPE_SA5 0x1 +#define BOARD_TYPE_SC5 0x2 +#define BOARD_TYPE_SE5 0x3 +#define BOARD_TYPE_SM5_P 0x4 +#define BOARD_TYPE_SM5_S 0x5 +#define BOARD_TYPE_SA6 0x6 +#define BOARD_TYPE_SC5_PLUS 0x7 +#define BOARD_TYPE_SC5_H 0x8 +#define BOARD_TYPE_SC5_PRO 0x9 +#define BOARD_TYPE_AIV01T 0x10 +#define BOARD_TYPE_AIV02T 0x11 +#define BOARD_TYPE_AIV03T 0x12 +#define BOARD_TYPE_AIV03T_24G 0x13 + +#define BOARD_TYPE_SM5M_P 0xb +#define BOARD_TYPE_BM1684X_EVB 0x20 +#define BOARD_TYPE_SC7_PRO 0x21 +#define BOARD_TYPE_SC7_PLUS 0x22 +#define BOARD_TYPE_SC7_FP150 0x23 +#define BOARD_TYPE_SM7_V0_0 0x30 +#define BOARD_TYPE_SM7_MP1_1 0x36 +#define BOARD_TYPE_CP24 0x40 +#define BOARD_TYPE_AIV01X 0x50 +#define BOARD_TYPE_AIV02X 0x51 +#define BOARD_TYPE_AIV03X 0x52 + #define EVB "EVB" #define SA5 "SA5" #define SC5 "SC5" @@ -74,11 +104,16 @@ SM5MA 13 #define SM5MS "SM5M" #define SM5MA "SM5M" #define BM1684X_EVB "BM1684X_EVB" -#define SC7P "SC7P" +#define SC7P "SC7-224T" #define SC7PLUS "SC7+" +#define SC7FP150 "SC7-FP150" #define CP24 "CP24" #define SM7M_V1_0 "SM7" +// EMA product NAME +#define AIV02X "AIV02X" +#define AIV03X "AIV03X" + DEFINE_int32(dev, 0, "device id"); DEFINE_string(file, "", "bin file with pathname"); DEFINE_string(target, "", "target of the bin file to program; a53/mcu"); @@ -111,10 +146,15 @@ struct { {"SC5PRO", {9, -1}, {SC5PRO, "Error"}}, {"SM5MINI", {10, -1}, {SM5ME, SM5MP, SM5MS, SM5MA, "Error"}}, {"BM1684X_EVB", {32, -1}, {EVB, "Error"}}, - {"SC7P", {33, -1}, {SC7P, "Error"}}, + {"SC7-224T", {33, -1}, {SC7P, "Error"}}, {"CP24/SM7_V0_0", {48, -1}, {CP24, SM7M_V1_0, "Error"}}, {"SC7+", {34, -1}, {SC7PLUS, "Error"}}, - {"SM7_MP1_1",{54,-1},{SM7M_V1_0, "Error"}} + {"SC7-FP150", {35, -1}, {SC7FP150, "Error"}}, + {"SM7_MP1_1",{54,-1},{SM7M_V1_0, "Error"}}, + + // EMA product + {"AIV02X", {81,-1}, {AIV02X, "Error"}}, + {"AIV03X", {82,-1}, {AIV03X, "Error"}}, }; @@ -592,7 +632,9 @@ bool is_valid_mcu(bm_handle_t handle, Bin_buffer *bin_buf) { int board_type = (int)((handle->misc_info.board_version & 0x0000FFFF) >> 8); /* check file size */ - if ((board_type == 33) || (board_type == 34) || (board_type == 64)) { + if ((board_type == BOARD_TYPE_SC7_PRO) || (board_type == BOARD_TYPE_SC7_PLUS) || + (board_type == BOARD_TYPE_SC7_FP150) || (board_type == BOARD_TYPE_CP24) || + (board_type ==BOARD_TYPE_AIV02X) || (board_type == BOARD_TYPE_AIV03X)) { if (size != FLASH_SIZE_SC7P) { printf("wrong upgrade file size %ld, it should %ld bytes\n", size, (unsigned long)FLASH_SIZE_SC7P); @@ -610,7 +652,9 @@ bool is_valid_mcu(bm_handle_t handle, Bin_buffer *bin_buf) { MD5_CTX md_ctx; MD5Init(&md_ctx); unsigned long md_size; - if ((board_type == 33) || (board_type == 34) || (board_type == 64)) { + if ((board_type == BOARD_TYPE_SC7_PRO) || (board_type == BOARD_TYPE_SC7_PLUS) || + (board_type == BOARD_TYPE_SC7_FP150) || (board_type == BOARD_TYPE_CP24) || + (board_type ==BOARD_TYPE_AIV02X) || (board_type == BOARD_TYPE_AIV03X)) { md_size = PROGRAM_LIMIT_SC7P; } else { md_size = PROGRAM_LIMIT; @@ -630,7 +674,9 @@ bool is_valid_mcu(bm_handle_t handle, Bin_buffer *bin_buf) { /* check application efie */ struct efie *app_efie; - if ((board_type == 33) || (board_type == 34) || (board_type == 64)) { + if ((board_type == BOARD_TYPE_SC7_PRO) || (board_type == BOARD_TYPE_SC7_PLUS) || + (board_type == BOARD_TYPE_SC7_FP150) || (board_type == BOARD_TYPE_CP24) || + (board_type ==BOARD_TYPE_AIV02X) || (board_type == BOARD_TYPE_AIV03X)) { app_efie = (struct efie *)(image + EFIT_START_SC7P); if (app_efie->offset + app_efie->length > PROGRAM_LIMIT_SC7P) { printf("wrong efie of app\n"); @@ -673,7 +719,9 @@ bm_status_t bm1684_firmware_update_mcu_app(bm_handle_t handle, Bin_buffer *bin_b int board_type = (int)((handle->misc_info.board_version & 0x0000FFFF) >> 8); - if ((board_type == 33) || (board_type == 34) || (board_type == 64)) { + if ((board_type == BOARD_TYPE_SC7_PRO) || (board_type == BOARD_TYPE_SC7_PLUS) || + (board_type == BOARD_TYPE_SC7_FP150) || (board_type == BOARD_TYPE_CP24) || + (board_type ==BOARD_TYPE_AIV02X) || (board_type == BOARD_TYPE_AIV03X)) { efie_buf.buf = bin_buf->buf + EFIT_START_SC7P; efie_buf.size = sizeof(struct efie); efie_buf.target_addr = EFIT_START_SC7P; @@ -720,7 +768,9 @@ bm_status_t bm1684_firmware_update_mcu_app(bm_handle_t handle, Bin_buffer *bin_b } printf("program app succeeds.\n"); // calculate checksum - if((board_type != 33) && (board_type != 34) && (board_type != 64)) { + if ((board_type != BOARD_TYPE_SC7_PRO) && (board_type != BOARD_TYPE_SC7_PLUS) && + (board_type != BOARD_TYPE_SC7_FP150) && (board_type != BOARD_TYPE_CP24) && + (board_type !=BOARD_TYPE_AIV02X) && (board_type != BOARD_TYPE_AIV03X)) { memset(calc_cksum, 0x0, sizeof(calc_cksum)); chksum_buf.buf = calc_cksum; chksum_buf.size = app_efie->length; diff --git a/bmlib/tools/bmcpu/CMakeLists.txt b/bmlib/tools/bmcpu/CMakeLists.txt index 4e30dd8..7e8dc87 100644 --- a/bmlib/tools/bmcpu/CMakeLists.txt +++ b/bmlib/tools/bmcpu/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.6) cmake_policy(SET CMP0048 NEW) set(TARGET_NAME "bmcpu") @@ -19,7 +19,7 @@ else() foreach(src ${TEST_SRC_PATH}) get_filename_component(target ${src} NAME_WE) add_executable(${target} ${src}) - target_link_libraries(${target} pthread bmlib) + target_link_libraries(${target} bmlib pthread) install(TARGETS ${target} RUNTIME DESTINATION bin COMPONENT libsophon) diff --git a/bmlib/tools/bmcpu/src/test_pre_start_cpu.cpp b/bmlib/tools/bmcpu/src/test_pre_start_cpu.cpp index 743f9c6..f363499 100644 --- a/bmlib/tools/bmcpu/src/test_pre_start_cpu.cpp +++ b/bmlib/tools/bmcpu/src/test_pre_start_cpu.cpp @@ -13,11 +13,12 @@ void *bmcpu_pre_start(void *arg) { bm_handle_t handle; bm_status_t ret; int dev_id = *(int *)arg; - char* dev = "/dev/bm-sophon"; - char* kernel_path = "/opt/sophon/libsophon-current/data"; + const char* dev = "/dev/bm-sophon"; + const char* kernel_path = "/opt/sophon/libsophon-current/data"; char fip_path[100]; char ramdisk_path[100]; char dev_path[30]; + bm_cpu_status_t status; sprintf(dev_path, "%s%d", dev, dev_id); sprintf(fip_path, "%s%s", kernel_path, "/fip.bin"); @@ -35,6 +36,16 @@ void *bmcpu_pre_start(void *arg) { return (void *)BM_ERR_FAILURE; } + status = bmcpu_get_cpu_status(handle); + if (status != BMCPU_IDLE) { + printf("chip %d bmcpu status is not idle!\n", dev_id); + ret = bmcpu_reset_cpu(handle); + if (ret != BM_SUCCESS) { + printf("reset cpu failed!\r\n"); + return (void *)BM_ERR_FAILURE; + } + } + ret = bmcpu_start_cpu(handle, fip_path, ramdisk_path); if ((ret != BM_SUCCESS) && (ret != BM_NOT_SUPPORTED)) { printf("start cpu %d failed!\r\n", dev_id); diff --git a/bmlib/tools/bmcpu/src/test_pre_start_mix_cpu.cpp b/bmlib/tools/bmcpu/src/test_pre_start_mix_cpu.cpp index c4e9dd7..99d1901 100644 --- a/bmlib/tools/bmcpu/src/test_pre_start_mix_cpu.cpp +++ b/bmlib/tools/bmcpu/src/test_pre_start_mix_cpu.cpp @@ -7,19 +7,63 @@ #include #include #include +#include #ifdef __linux__ #define MAX_CHIP_NUM 256 -void *bmcpu_pre_start(void *arg) { +unsigned int ipstr2num(char* str) +{ + int i = 0; + int j = 0; + char new_str[3]; + int num = 0; + char ipstr[15]; + unsigned int result[4]; + unsigned int ip = 0; + + while(*str != '\0') + { + while((*str != '.') && (*str != '\0')) + { + new_str[i] = *str; + num = num * 10 + new_str[i] - '0'; + str += 1; + i += 1; + } + result[j] = num; + num = 0; + if (*str == '\0') + { + break; + } + else + { + str += 1; + i = 0; + j += 1; + } + } + + ip |= ((result[3]&0xff)<<24); + ip |= ((result[2]&0xff)<<16); + ip |= ((result[1]&0xff)<<8); + ip |= ((result[0]&0xff)<<0); + + return htonl(ip); +} + +void *single_start_mixmode(int dev_id) { bm_handle_t handle; bm_status_t ret; - int dev_id = *(int *)arg; - char* dev = "/dev/bm-sophon"; - char* kernel_path = "/opt/sophon/libsophon-current/data"; + const char* dev = "/dev/bm-sophon"; + const char* kernel_path = "/opt/sophon/libsophon-current/data"; char fip_path[100]; char ramdisk_path[100]; char dev_path[30]; + char cmd[100]; + bm_veth_ip_t ip_mask; + bm_cpu_status_t status; sprintf(dev_path, "%s%d", dev, dev_id); sprintf(fip_path, "%s%s", kernel_path, "/fip.bin"); @@ -38,24 +82,80 @@ void *bmcpu_pre_start(void *arg) { } if (handle->misc_info.chipid == 0x1686) { + int cmd_ret; + status = bmcpu_get_cpu_status(handle); + if (status != BMCPU_IDLE) { + ret = bmcpu_reset_cpu(handle); + if (ret != BM_SUCCESS) { + printf("reset cpu failed!\r\n"); + return (void *)BM_ERR_FAILURE; + } + } + + ret = bm_setup_veth(handle); + if (ret != BM_SUCCESS) + { + bmlib_log(BMCPU_RUNTIME_LOG_TAG, + BMLIB_LOG_ERROR, + "setup virtual ethernet error, ret %d\n", + ret); + return (void *)BM_ERR_FAILURE; + } else { + printf("Setup veth%d success!\n", dev_id); + } + ret = bmcpu_start_mix_cpu(handle, fip_path, ramdisk_path); if ((ret != BM_SUCCESS) && (ret != BM_NOT_SUPPORTED)) { printf("start cpu %d failed!\r\n", dev_id); bm_dev_free(handle); return (void *)BM_ERR_FAILURE; } + + sprintf(cmd, "192.192.%d.2", dev_id); + ip_mask.ip = ipstr2num(cmd); + ip_mask.mask = 0xFFFFFF00; + ret = bm_set_ip(handle, ip_mask); + if (ret != BM_SUCCESS) + { + bmlib_log(BMCPU_RUNTIME_LOG_TAG, + BMLIB_LOG_ERROR, + "set ip error, ret %d\n", + ret); + return (void *)BM_ERR_FAILURE; + } else { + printf("set chip%d ip: %s\n", dev_id, cmd); + } + + sprintf(cmd, "sudo ifconfig veth%d 192.192.%d.3", dev_id, dev_id); + cmd_ret = system(cmd); + if (cmd_ret == -1) + printf("exec %s failed\n", cmd); } + bm_dev_free(handle); return (void *)BM_SUCCESS; } -int main(void) { +void *bmcpu_pre_start(void *arg) { + int dev_id = *(int *)arg; + + single_start_mixmode(dev_id); + + return (void *)BM_SUCCESS; +} + +int main(int argc, char *argv[]) { int dev_num; int i; int arg[MAX_CHIP_NUM]; pthread_t threads[MAX_CHIP_NUM]; int ret; + if (argc == 2) { + single_start_mixmode(atoi(argv[1])); + return 0; + } + if (BM_SUCCESS != bm_dev_getcount(&dev_num)) { printf("no sophon device found! when sophon device plugin in, sophon-rpc will run!\n"); return 0; diff --git a/bmlib/tools/bmcpu/src/test_pre_start_one.cpp b/bmlib/tools/bmcpu/src/test_pre_start_one.cpp index 4c08d54..9b08a89 100644 --- a/bmlib/tools/bmcpu/src/test_pre_start_one.cpp +++ b/bmlib/tools/bmcpu/src/test_pre_start_one.cpp @@ -12,8 +12,8 @@ bm_status_t bmcpu_pre_start(int dev_id) { bm_handle_t handle; bm_status_t ret; - char* dev = "/dev/bm-sophon"; - char* kernel_path = "/opt/sophon/libsophon-current/data"; + const char* dev = "/dev/bm-sophon"; + const char* kernel_path = "/opt/sophon/libsophon-current/data"; char fip_path[100]; char ramdisk_path[100]; char dev_path[30]; @@ -56,7 +56,7 @@ int main(int argc, char* argv[]) { int ret; if (argc != 2) { - printf("please input param just like: test_pre_start_one bm-sophon0\n"); + printf("please input param just like: test_pre_start_one 0\n"); return -1; } @@ -70,12 +70,10 @@ int main(int argc, char* argv[]) { return -1; } - rel_num = atoi(argv[1]); - printf("%s %d: input param is %s\n", __func__, __LINE__, argv[1]); rel_num = atoi(argv[1]); ret = bmcpu_pre_start(rel_num); if (ret != BM_SUCCESS) { - printf("bm-sophon%d start a53 failed!\n"); + printf("bm-sophon%d start a53 failed!\n", rel_num); return -1; } diff --git a/bmlib/tools/device_mem_mutex.cpp b/bmlib/tools/device_mem_mutex.cpp new file mode 100644 index 0000000..fd07ebc --- /dev/null +++ b/bmlib/tools/device_mem_mutex.cpp @@ -0,0 +1,103 @@ +#if defined(SOC_MODE) +#include +#include +#include +#include +#include +#include +#include "bmlib_runtime.h" +#include +#include + +int gnum = 0; + +static void *func_1(void *); +static void *func_2(void *); +pthread_mutex_t *p_mutex; + +int main(int argc, char *argv[]) +{ + bm_handle_t handle = NULL; + bm_status_t ret = BM_SUCCESS; + int ret_p; + bm_device_mem_t dev_buffer; + unsigned long long vaddr; + volatile int i = 0; + + pthread_t pt1 = 0; + pthread_t pt2 = 0; + + if (argc != 1) { + printf("run ./device_mem_mutex\n"); + return -1; + } + ret = bm_dev_request(&handle, 0); + if (ret != BM_SUCCESS || handle == NULL) { + printf("bm_dev_request failed, ret = %d\n", ret); + return -1; + } + + bm_malloc_device_byte(handle, &dev_buffer, sizeof(pthread_mutex_t)); + printf("mem paddr: 0x%llx\t", bm_mem_get_device_addr(dev_buffer)); + ret = bm_mem_mmap_device_mem(handle, &dev_buffer, &vaddr); + if (ret != BM_SUCCESS) { + printf("%s %d map failed!\n", __func__, __LINE__); + return ret; + } + + p_mutex = (pthread_mutex_t *)vaddr; + printf("vaddr: 0x%llx\n", p_mutex); + + pthread_mutex_init(p_mutex, NULL); + ret_p = pthread_create(&pt1, NULL, func_1, NULL); + if (ret_p != 0) + perror("pthread 1 create"); + ret_p = pthread_create(&pt2, NULL, func_2, NULL); + if (ret_p != 0) + perror("pthread 2 create"); + + pthread_join(pt1, NULL); + pthread_join(pt2, NULL); + pthread_mutex_destroy(p_mutex); + + bm_mem_unmap_device_mem(handle, p_mutex, sizeof(pthread_mutex_t)); + printf("test end\n"); + bm_dev_free(handle); + + return 0; +} + +static void *func_1(void *) +{ + for (int i = 0; i < 3; i++) + { + sleep(1); + printf("This is pthread 1\n"); + pthread_mutex_lock(p_mutex); + gnum++; + printf("thread 1 add 1 to num: %d\n", gnum); + pthread_mutex_unlock(p_mutex); + } +} + +static void *func_2(void *) +{ + for (int i = 0; i < 5; i++) + { + sleep(1); + printf("This is pthread 2\n"); + pthread_mutex_lock(p_mutex); + gnum++; + printf("thread 2 add 1 to num: %d\n", gnum); + pthread_mutex_unlock(p_mutex); + } +} +#else +#include +int main() +{ + printf("only support in soc mode!\n"); + + return 0; +} +#endif diff --git a/bmlib/tools/device_mem_seg.cpp b/bmlib/tools/device_mem_seg.cpp new file mode 100644 index 0000000..7259fcc --- /dev/null +++ b/bmlib/tools/device_mem_seg.cpp @@ -0,0 +1,93 @@ +#if defined(SOC_MODE) +#include +#include +#include +#include +#include +#include +#include "bmlib_runtime.h" +#include +#include +#include + +int gnum = 0; +pthread_mutex_t *p_mutex; +static void *func_1(void *); +static void *func_2(void *); +sem_t *p_sem; + +int main(int argc, char *argv[]) +{ + bm_handle_t handle = NULL; + bm_status_t ret = BM_SUCCESS; + int ret_p; + bm_device_mem_t dev_buffer; + unsigned long long vaddr; + + pthread_t pt1 = 0; + pthread_t pt2 = 0; + + if (argc != 1) { + printf("run ./device_mem_seg\n"); + return -1; + } + + ret = bm_dev_request(&handle, 0); + if (ret != BM_SUCCESS || handle == NULL) { + printf("bm_dev_request failed, ret = %d\n", ret); + return -1; + } + + bm_malloc_device_byte(handle, &dev_buffer, sizeof(sem_t)); + printf("mem paddr: 0x%llx\t", bm_mem_get_device_addr(dev_buffer)); + + ret = bm_mem_mmap_device_mem(handle, &dev_buffer, &vaddr); + if (ret != BM_SUCCESS) { + printf("%s %d map failed!\n", __func__, __LINE__); + return ret; + } + + p_sem = (sem_t *)vaddr; + printf("vaddr is 0x%llx\n", p_sem); + sem_init(p_sem, 0, 0); + ret_p = pthread_create(&pt1, NULL, func_1, NULL); + if (ret_p != 0) + perror("pthread 1 create"); + printf("create pthread 1\n"); + sleep(7); + ret_p = pthread_create(&pt2, NULL, func_2, NULL); + if (ret_p != 0) + perror("pthread 2 create"); + printf("create pthread 2\n"); + pthread_join(pt1, NULL); + pthread_join(pt2, NULL); + + printf("test end\n"); + sem_destroy(p_sem); + bm_dev_free(handle); + + return 0; +} + +static void *func_1(void *) +{ + printf("%s wait sem\n", __func__); + sem_wait(p_sem); + printf("thread 1 running\n"); +} + +static void *func_2(void *) +{ + printf("thread 2 running\n"); + sem_post(p_sem); + printf("thread 2 end\n"); +} +#else +#include +int main() +{ + printf("only support in soc mode!\n"); + + return 0; +} +#endif diff --git a/bmlib/tools/test_cdma_perf_smmu.cpp b/bmlib/tools/test_cdma_perf_smmu.cpp new file mode 100644 index 0000000..9f575b9 --- /dev/null +++ b/bmlib/tools/test_cdma_perf_smmu.cpp @@ -0,0 +1,562 @@ +#include +#include +#include +#include +#include +#include +#include "bmlib_runtime.h" +#include "bmlib_internal.h" +#include "string.h" +#ifdef __linux +#include +#include +#include +#else +#pragma comment(lib, "libbmlib-static.lib") +#endif + +int array_cmp_int( + unsigned char *p_exp, + unsigned char *p_got, + int len, + const char *info_label) +{ + int idx; + for (idx = 0; idx < len; idx++) { + if (p_exp[idx] != p_got[idx]) { + printf("%s error at index %d exp %x got %x\n", + info_label, idx, p_exp[idx], p_got[idx]); + return -1; + } + } + return 0; +} + +#ifdef __linux__ +void test_msleep(int n_ms) +{ + int i = 0; + for (i = 0; i < n_ms; i++) + usleep(1000); + +} + +void test_sleep(int n_ms) +{ + int loop = n_ms / 1000; + int res = n_ms % 1000; + int i = 0; + + test_msleep(res); + for (i = 0; i < loop; i ++) + test_msleep(1000); +} +#endif + +int test_cdma_ctoc_transfer(int chip_num, int transfer_size, unsigned long long src_device_addr, unsigned long long dst_device_addr) +{ + bool force_cdma_dst = 0; + bm_handle_t handle = NULL; + bm_status_t ret = BM_SUCCESS; + unsigned char *sys_send_buffer, *sys_recieve_buffer; + unsigned long long consume = 0; + struct timespec tp; + bm_trace_item_data trace_data; + + #ifdef __linux__ + clock_gettime(CLOCK_THREAD_CPUTIME_ID,&tp); + #else + clock_gettime(0, &tp); + #endif + srand(tp.tv_nsec); + + sys_send_buffer = (unsigned char *)malloc(transfer_size); + sys_recieve_buffer = (unsigned char *)malloc(transfer_size); + if (!sys_send_buffer || !sys_recieve_buffer) { + printf("malloc buffer for test failed\n"); + return -1; + } + + for (int i = 0; i < transfer_size; i++) { + sys_send_buffer[i] = rand()%0xff; + sys_recieve_buffer[i] = 0x0; + } + + ret = bm_dev_request(&handle, chip_num); + bm_enable_iommu(handle); + if (ret != BM_SUCCESS || handle == NULL) { + printf("bm_dev_request failed, ret = %d\n", ret); + return -1; + } + + ret = bm_memcpy_s2d(handle, + bm_mem_from_device(src_device_addr,transfer_size), + sys_send_buffer); + if (ret != BM_SUCCESS) { + if (sys_send_buffer) free(sys_send_buffer); + if (sys_recieve_buffer) free(sys_recieve_buffer); + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return -1; + } + + bm_trace_enable(handle); + ret = bm_memcpy_c2c(handle, handle, + bm_mem_from_device(src_device_addr, transfer_size), + bm_mem_from_device(dst_device_addr, transfer_size), + force_cdma_dst); + if (ret != BM_SUCCESS) { + if (sys_send_buffer) free(sys_send_buffer); + if (sys_recieve_buffer) free(sys_recieve_buffer); + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return -1; + } + + bm_trace_dump(handle, &trace_data); + consume = trace_data.end_time - trace_data.start_time; + bm_trace_disable(handle); + if (consume > 0) { + float bandwidth = (float)transfer_size / (1024.0*1024.0) / (consume / 1000000.0); + printf("D2D:Transfer size:0x%x byte. Cost time:%lld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + + ret = bm_memcpy_d2s(handle, + sys_recieve_buffer, + bm_mem_from_device(dst_device_addr, transfer_size)); + if (ret != BM_SUCCESS) { + if (sys_send_buffer) free(sys_send_buffer); + if(sys_recieve_buffer) free(sys_recieve_buffer); + printf("CDMA transfer from device to device failed, ret = %d\n", ret); + return -1; + } + + if (array_cmp_int(sys_send_buffer, sys_recieve_buffer, transfer_size, "test_cdma_traversal")) { + if (sys_send_buffer) free(sys_send_buffer); + if (sys_recieve_buffer) free(sys_recieve_buffer); + printf("cdma traversal src device addr 0x%llx, dst device addr 0x%llx, size 0x%x failed\n", src_device_addr, dst_device_addr, transfer_size); + return -1; + } + + if (sys_send_buffer) free(sys_send_buffer); + if (sys_recieve_buffer) free(sys_recieve_buffer); + bm_disable_iommu(handle); + bm_dev_free(handle); + return 0; +} + +int test_cdma_stod_transfer(int chip_num, int transfer_size, unsigned long long dst_addr) +{ + bm_handle_t handle = NULL; + bm_status_t ret = BM_SUCCESS; + unsigned char *sys_send_buffer, *sys_recieve_buffer; + int cmp_ret = 0; + unsigned long consume_sys = 0; + unsigned long consume_real = 0; + unsigned long consume = 0; + struct timespec tp; + bm_device_mem_t dev_buffer; + bm_profile_t profile_start, profile_end; + struct timeval tv_start; + struct timeval tv_end; + struct timeval timediff; + #ifdef __linux__ + clock_gettime(CLOCK_THREAD_CPUTIME_ID,&tp); + #else + clock_gettime(0, &tp); + #endif + srand(tp.tv_nsec); + + if (transfer_size == 0x0) + transfer_size = 1024*1024*4; + + sys_send_buffer = (unsigned char *)malloc(transfer_size); + sys_recieve_buffer = (unsigned char *)malloc(transfer_size); + if (!sys_send_buffer || !sys_recieve_buffer) { + printf("malloc buffer for test failed\n"); + return -1; + } + + for (int i = 0; i < transfer_size; i++) { + sys_send_buffer[i] = rand()%0xff; + sys_recieve_buffer[i] = 0x0; + } + + ret = bm_dev_request(&handle, chip_num); + bm_enable_iommu(handle); + if (ret != BM_SUCCESS || handle == NULL) { + printf("bm_dev_request failed, ret = %d\n", ret); + if (sys_send_buffer) free(sys_send_buffer); + if (sys_recieve_buffer) free(sys_recieve_buffer); + return -1; + } + + if (dst_addr == 0x0) { + ret = bm_malloc_device_dword(handle, &dev_buffer, transfer_size/4); + if (ret != BM_SUCCESS) { + printf("malloc device memory size = %d failed, ret = %d\n", transfer_size, ret); + return -1; + } + } else { + dev_buffer = bm_mem_from_device(dst_addr, transfer_size); + } + for (int i = 0; i < 10; i++) { + bm_trace_enable(handle); + gettimeofday(&tv_start, NULL); + bm_get_profile(handle, &profile_start); + ret = bm_memcpy_s2d(handle, dev_buffer, sys_send_buffer); + if (ret != BM_SUCCESS) { + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return -1; + } + + gettimeofday(&tv_end, NULL); + timersub(&tv_end, &tv_start, &timediff); + consume = timediff.tv_sec * 1000000 + timediff.tv_usec; + consume_sys += consume; + + bm_get_profile(handle, &profile_end); + consume = profile_end.cdma_out_time - profile_start.cdma_out_time; + bm_trace_disable(handle); + + consume_real += consume; + } + consume = consume_sys / 10; + if (consume > 0) { + float bandwidth = (float)transfer_size / (1024.0*1024.0) / (consume / 1000000.0); + printf("S2D sys:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + consume = consume_real / 10; + if (consume > 0) { + float bandwidth = (float)transfer_size / (1024.0*1024.0) / (consume / 1000000.0); + printf("S2D real:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + + consume_sys = 0x0; + consume_real = 0x0; + + for (int i = 0; i < 10; i++) { + bm_trace_enable(handle); + gettimeofday(&tv_start, NULL); + bm_get_profile(handle, &profile_start); + ret = bm_memcpy_d2s(handle, sys_recieve_buffer, dev_buffer); + if (ret != BM_SUCCESS) { + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return -1; + } + + gettimeofday(&tv_end, NULL); + timersub(&tv_end, &tv_start, &timediff); + consume = timediff.tv_sec * 1000000 + timediff.tv_usec; + consume_sys += consume; + bm_get_profile(handle, &profile_end); + consume = profile_end.cdma_in_time - profile_start.cdma_in_time; + consume_real += consume; + bm_trace_disable(handle); + } + consume = consume_sys / 10; + if (consume > 0) { + float bandwidth = (float)transfer_size / (1024.0*1024.0) / (consume / 1000000.0); + printf("D2S sys:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + consume = consume_real / 10; + if (consume > 0) { + float bandwidth = (float)transfer_size / (1024.0*1024.0) / (consume / 1000000.0); + printf("D2S real:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + cmp_ret = array_cmp_int(sys_send_buffer, sys_recieve_buffer, transfer_size, "cdma test"); + printf("dev = %d, cdma transfer test %s.\n", chip_num, cmp_ret ? "Failed" : "Success"); + + if (sys_send_buffer) free(sys_send_buffer); + if (sys_recieve_buffer) free(sys_recieve_buffer); + if (dst_addr == 0x0) { + bm_free_device(handle, dev_buffer); + } + bm_disable_iommu(handle); + bm_dev_free(handle); + return cmp_ret; +} + +struct cdma_process_para +{ + int dev_id; + int size; + int launch_num; + int dir; +}; + +#ifdef __linux__ +void *test_cdma_thread(void *arg) { +#else +DWORD WINAPI test_cdma_thread(LPVOID arg) { +#endif + bm_handle_t handle; + bm_status_t ret = BM_SUCCESS; + struct cdma_process_para *ppara = (struct cdma_process_para *)arg; + unsigned char * sys_buffer; + bm_device_mem_t dev_buffer; + int i = 0x0; + + sys_buffer = (unsigned char*)malloc(ppara->size); + + ret = bm_dev_request(&handle, ppara->dev_id); + bm_enable_iommu(handle); + if (BM_SUCCESS != ret) { + printf("request dev %d failed, ret = %d\n", ppara->dev_id, ret); + return NULL; + } + + ret = bm_malloc_device_byte(handle, &dev_buffer, ppara->size); + if (ret != BM_SUCCESS) { + printf("malloc device memory size = %d failed, ret = %d\n", ppara->size, ret); + free(sys_buffer); + return NULL; + } + + for (i = 0; i < ppara->launch_num; i++) { + if (ppara->dir == 0x0) { + ret = bm_memcpy_s2d(handle, dev_buffer, sys_buffer); + if (ret != BM_SUCCESS) { + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + } + } else { + ret = bm_memcpy_d2s(handle, sys_buffer, dev_buffer); + if (ret != BM_SUCCESS) { + printf("CDMA transfer from device to sys failed, ret = %d\n", ret); + } + } + + } + bm_disable_iommu(handle); + bm_dev_free(handle); + return NULL; +} + +#define THREAD_NUM 64 +// dir = 0, s2d; dir = 1, d2s +int test_cmda_perf_mutithread(int thread_num, int dir, int dev_id, int size, int launch_num) +{ + #ifdef __linux__ + pthread_t threads[THREAD_NUM]; + #else + DWORD dwThreadIdArray[THREAD_NUM]; + HANDLE hThreadArray[THREAD_NUM]; + #endif + + struct cdma_process_para para; + int i = 0x0; + int ret = 0x0; + unsigned long long total_size = size * thread_num *launch_num; + float sys_bandwidth = 0; + unsigned long long sys_trans_time_us = 0; + struct timeval tv_start; + struct timeval tv_end; + struct timeval timediff; + + if (thread_num > THREAD_NUM) { + printf("thread num = %d is too much\n", thread_num); + return -1; + } + para.dev_id = dev_id; + para.size = size; + para.launch_num = launch_num; + para.dir = dir; + + gettimeofday(&tv_start, NULL); + for (i = 0; i < thread_num; i++) { + #ifdef __linux__ + ret = pthread_create(&threads[i], NULL, test_cdma_thread, ¶); + if (ret < 0) { + printf("pthread_create %d error: error_code = %d\n", i, ret); + return -1; + } + #else + hThreadArray[i] = + CreateThread(NULL, // default security attributes + 0, // use default stack size + test_cdma_thread, // thread function name + ¶, // argument to thread function + 0, // use default creation flags + &dwThreadIdArray[i]); // returns the thread identifier + if (hThreadArray[i] == NULL) { + printf("creatthread %d and thread_id 0x%08lx failed\n", i, dwThreadIdArray[i]); + //ExitProcess(3); + return -1; + } + #endif + } + #ifdef __linux__ + for (i = 0; i < thread_num; i++) { + ret = pthread_join(threads[i], NULL); + if (ret < 0) { + printf("pthread_join %d error: error_code = %d\n", i, ret); + return -1; + } + } + #endif + #ifdef _WIN32 + for (i = 0; i < thread_num; i++) { + DWORD dwWaitResult = WaitForSingleObject(hThreadArray[i], INFINITE); + switch (dwWaitResult) { + case WAIT_OBJECT_0: + ret = 0; + break; + case WAIT_FAILED: + ret = -1; + break; + case WAIT_ABANDONED: + ret = -2; + break; + case WAIT_TIMEOUT: + ret = -3; + break; + default: + ret = 0; + break; + } + if (ret < 0) { + printf("WaitForSingleObject %d error: error_code = %d\n", i, ret); + return -1; + } + } + + for (i = 0; i < thread_num; i++) + CloseHandle(hThreadArray[i]); + #endif + + gettimeofday(&tv_end, NULL); + timersub(&tv_end, &tv_start, &timediff); + sys_trans_time_us = timediff.tv_sec * 1000000 + timediff.tv_usec; + + if (sys_trans_time_us > 0) { + sys_bandwidth = (float)(total_size / (1024.0 * 1024.0)) / (sys_trans_time_us / 1000000.0); + } else { + return -1; + } + + if (dir == 0x0) { + printf ("cdma s2d test use %d thread bandwidth : %.2f MB/s\n", thread_num, sys_bandwidth); + } else { + printf ("cdma d2s test use %d thread bandwidth : %.2f MB/s\n", thread_num, sys_bandwidth); + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + int chip_num = 0; + int transfer_size = 0; + unsigned long long src_addr = 0; + unsigned long long dst_addr = 0; + int loop_num = 0; + int interval = 0; + int ret = 0; + int count = 0; + int i = 0; + int j = 0; + if (argv[1]) { + if (strcmp("chip", argv[1])== 0) { + if (argc != 5) { + printf("invalid arg\n"); + printf("example test_cdma_perf chip chip_num size address \n"); + printf("like test_cdma_perf chip 0 0x400000 0x150000000 \n"); + return -1; + } + chip_num = atoi(argv[2]); + transfer_size = (int)strtol(argv[3], NULL, 16); + dst_addr = strtoll(argv[4], NULL, 16); + printf("test chip num = 0x%x, transfer_size = 0x%x, dst_addr = 0x%llx\n", + chip_num, + transfer_size, + dst_addr); + ret = test_cdma_stod_transfer(chip_num, transfer_size, dst_addr); + } else if (strcmp("ctoc", argv[1]) == 0) { + if (argc != 6) { + printf("invalid arg\n"); + printf("example test_cdma_perf ctoc chip_num size src_addr dst_addr \n"); + printf("like test_cdma_perf ctoc 0 0x400000 0x150000000 0x160000000\n"); + return -1; + } + chip_num = atoi(argv[2]); + transfer_size = (int)strtol(argv[3], NULL, 16); + + src_addr = strtoll(argv[4], NULL, 16); + dst_addr = strtoll(argv[5], NULL, 16); + printf("test chip num = 0x%x, transfer_size = 0x%x, src_addr = 0x%llx, dst_addr = 0x%llx\n", + chip_num, transfer_size, src_addr, dst_addr); + + ret = test_cdma_ctoc_transfer(chip_num, transfer_size, src_addr, dst_addr); + } else if (strcmp("stress", argv[1]) == 0) { + if (argc != 5) { + printf("invalid arg \n"); + printf("example test_cdma_perf stress dev_id loop_num interval_time_ms \n"); + printf("like test_cdma_perf stress 0 100 100\n"); + printf("if dev_id set 255, test all the device\n"); + return -1; + } + chip_num = atoi(argv[2]); + loop_num = atoi(argv[3]); + interval = atoi(argv[4]); + printf("pcie stress dev_id = %d, total = %d lopp , interval = %d ms start\n", + chip_num, loop_num, interval); + + for (j = 1; j <= loop_num; j++) { + if (chip_num == 255) { + bm_dev_getcount(&count); + + for (i = 0; i < count; i++) { + ret = test_cdma_stod_transfer(i, transfer_size, dst_addr); + if (ret != 0) { + printf("pcie stress loop_num = %d, total = %d fail\n", j, loop_num); + return -1; + } + } + } else { + ret = test_cdma_stod_transfer(chip_num, transfer_size, dst_addr); + if (ret != 0) { + printf("pcie stress loop_num = %d, total = %d fail\n", j, loop_num); + return -1; + } + } + #ifdef __linux__ + test_sleep(interval); + #else + Sleep(interval); + #endif + printf("pcie stress loop_num = %d, total = %d pass\n", j, loop_num); + + } + printf("pcie stress dev_id = %d, total = %d lopp, interval = %d ms all pass\n", + chip_num, loop_num, interval); + return 0; + } + } else { + bm_dev_getcount(&count); + for (i = 0; i < count; i++) { + ret = test_cdma_stod_transfer(i, transfer_size, dst_addr); + if (ret != 0) + break; + test_cmda_perf_mutithread(4, 0, i, (1024 * 1024 * 4), 20); + test_cmda_perf_mutithread(4, 1, i, (1024 * 1024 * 4), 20); + test_cmda_perf_mutithread(8, 0, i, (1024 * 1024 * 4), 20); + test_cmda_perf_mutithread(8, 1, i, (1024 * 1024 * 4), 20); + test_cmda_perf_mutithread(16, 0, i, (1024 * 1024 * 4), 20); + test_cmda_perf_mutithread(16, 1, i, (1024 * 1024 * 4), 20); + } + } + return ret; +} diff --git a/bmlib/tools/test_cdma_perf_u64.cpp b/bmlib/tools/test_cdma_perf_u64.cpp index f5ed42b..6b5b97c 100644 --- a/bmlib/tools/test_cdma_perf_u64.cpp +++ b/bmlib/tools/test_cdma_perf_u64.cpp @@ -33,7 +33,7 @@ int array_cmp_int( return 0; } -int sg_array_cmp_int( +int bm_array_cmp_int_u64( unsigned char *p_exp, unsigned char *p_got, u64 len, @@ -42,7 +42,7 @@ int sg_array_cmp_int( u64 idx; for (idx = 0; idx < len; idx++) { if (p_exp[idx] != p_got[idx]) { - printf("%s error at index %d exp %x got %x\n", + printf("%s error at index %llu exp %x got %x\n", info_label, idx, p_exp[idx], p_got[idx]); return -1; } @@ -172,7 +172,7 @@ int test_cdma_stod_transfer(int chip_num, u64 transfer_size, unsigned long long unsigned long consume_real = 0; unsigned long consume = 0; struct timespec tp; - sg_device_mem_t dev_buffer; + bm_device_mem_u64_t dev_buffer; bm_profile_t profile_start, profile_end; struct timeval tv_start; struct timeval tv_end; @@ -208,24 +208,24 @@ int test_cdma_stod_transfer(int chip_num, u64 transfer_size, unsigned long long return -1; } if (dst_addr == 0x0) { - ret = sg_malloc_device_dword(handle, &dev_buffer, transfer_size/4); - // ret = sg_malloc_device_byte(handle, &dev_buffer, transfer_size); - // ret = sg_malloc_device_byte_heap(handle, &dev_buffer, 0, transfer_size); - // ret = sg_malloc_device_byte_heap_mask(handle, &dev_buffer, 0x1, transfer_size); + ret = bm_malloc_device_dword_u64(handle, &dev_buffer, transfer_size/4); + // ret = bm_malloc_device_byte_u64(handle, &dev_buffer, transfer_size); + // ret = bm_malloc_device_byte_heap_u64(handle, &dev_buffer, 0, transfer_size); + // ret = bm_malloc_device_byte_heap_mask_u64(handle, &dev_buffer, 0x1, transfer_size); if (ret != BM_SUCCESS) { - printf("malloc device memory size = %d failed, ret = %d\n", transfer_size, ret); + printf("malloc device memory size = %llu failed, ret = %d\n", transfer_size, ret); return -1; } } else { - dev_buffer = sg_mem_from_device(dst_addr, transfer_size); + dev_buffer = bm_mem_from_device_u64(dst_addr, transfer_size); } for (int i = 0; i < 10; i++) { bm_trace_enable(handle); gettimeofday(&tv_start, NULL); bm_get_profile(handle, &profile_start); - ret = sg_memcpy_s2d(handle, dev_buffer, sys_send_buffer); - // ret = sg_memcpy_s2d_poll(handle, dev_buffer, sys_send_buffer); + ret = bm_memcpy_s2d_u64(handle, dev_buffer, sys_send_buffer); + // ret = bm_memcpy_s2d_poll_u64(handle, dev_buffer, sys_send_buffer); if (ret != BM_SUCCESS) { printf("CDMA transfer from system to device failed, ret = %d\n", ret); return -1; @@ -266,8 +266,8 @@ int test_cdma_stod_transfer(int chip_num, u64 transfer_size, unsigned long long bm_trace_enable(handle); gettimeofday(&tv_start, NULL); bm_get_profile(handle, &profile_start); - ret = sg_memcpy_d2s(handle, sys_recieve_buffer, dev_buffer); - // ret = sg_memcpy_d2s_poll(handle, sys_recieve_buffer, dev_buffer, transfer_size); + ret = bm_memcpy_d2s_u64(handle, sys_recieve_buffer, dev_buffer); + // ret = bm_memcpy_d2s_poll_u64(handle, sys_recieve_buffer, dev_buffer, transfer_size); if (ret != BM_SUCCESS) { printf("CDMA transfer from system to device failed, ret = %d\n", ret); return -1; @@ -298,13 +298,13 @@ int test_cdma_stod_transfer(int chip_num, u64 transfer_size, unsigned long long consume, bandwidth); } - cmp_ret = sg_array_cmp_int(sys_send_buffer, sys_recieve_buffer, transfer_size, "cdma test"); + cmp_ret = bm_array_cmp_int_u64(sys_send_buffer, sys_recieve_buffer, transfer_size, "cdma test"); printf("dev = %d, cdma transfer test %s.\n", chip_num, cmp_ret ? "Failed" : "Success"); if (sys_send_buffer) free(sys_send_buffer); if (sys_recieve_buffer) free(sys_recieve_buffer); if (dst_addr == 0x0) { - sg_free_device(handle, dev_buffer); + bm_free_device_u64(handle, dev_buffer); } bm_dev_free(handle); return cmp_ret; @@ -327,7 +327,7 @@ DWORD WINAPI test_cdma_thread(LPVOID arg) { bm_status_t ret = BM_SUCCESS; struct cdma_process_para *ppara = (struct cdma_process_para *)arg; unsigned char * sys_buffer; - sg_device_mem_t dev_buffer; + bm_device_mem_u64_t dev_buffer; int i = 0x0; sys_buffer = (unsigned char*)malloc(ppara->size); @@ -338,21 +338,21 @@ DWORD WINAPI test_cdma_thread(LPVOID arg) { return NULL; } - ret = sg_malloc_device_byte(handle, &dev_buffer, ppara->size); + ret = bm_malloc_device_byte_u64(handle, &dev_buffer, ppara->size); if (ret != BM_SUCCESS) { - printf("malloc device memory size = %d failed, ret = %d\n", ppara->size, ret); + printf("malloc device memory size = %llu failed, ret = %d\n", ppara->size, ret); free(sys_buffer); return NULL; } for (i = 0; i < ppara->launch_num; i++) { if (ppara->dir == 0x0) { - ret = sg_memcpy_s2d(handle, dev_buffer, sys_buffer); + ret = bm_memcpy_s2d_u64(handle, dev_buffer, sys_buffer); if (ret != BM_SUCCESS) { printf("CDMA transfer from system to device failed, ret = %d\n", ret); } } else { - ret = sg_memcpy_d2s(handle, sys_buffer, dev_buffer); + ret = bm_memcpy_d2s_u64(handle, sys_buffer, dev_buffer); if (ret != BM_SUCCESS) { printf("CDMA transfer from device to sys failed, ret = %d\n", ret); } diff --git a/bmlib/tools/test_cdma_sg.cpp b/bmlib/tools/test_cdma_sg.cpp new file mode 100644 index 0000000..8c81362 --- /dev/null +++ b/bmlib/tools/test_cdma_sg.cpp @@ -0,0 +1,738 @@ +#include +#include +#include +#include +#include +#include +// #include "bmlib_runtime.h" +#include "bmlib_internal.h" +#include "string.h" +#ifdef __linux +#include +#include +#include +#else +#pragma comment(lib, "libbmlib-static.lib") +#endif + +int array_cmp_int( + unsigned char *p_exp, + unsigned char *p_got, + int len, + const char *info_label) +{ + int idx; + for (idx = 0; idx < len; idx++) { + if (p_exp[idx] != p_got[idx]) { + printf("%s error at index %d exp %x got %x\n", + info_label, idx, p_exp[idx], p_got[idx]); + return -1; + } + } + return 0; +} + +#ifdef __linux__ +void test_msleep(int n_ms) +{ + int i = 0; + for (i = 0; i < n_ms; i++) + usleep(1000); + +} + +void test_sleep(int n_ms) +{ + int loop = n_ms / 1000; + int res = n_ms % 1000; + int i = 0; + + test_msleep(res); + for (i = 0; i < loop; i ++) + test_msleep(1000); +} +#endif + +int test_cdma_ctoc_transfer(int chip_num, int transfer_size, unsigned long long src_device_addr, unsigned long long dst_device_addr) +{ + bool force_cdma_dst = 0; + bm_handle_t handle = NULL; + bm_status_t ret = BM_SUCCESS; + unsigned char *sys_send_buffer, *sys_recieve_buffer; + unsigned long long consume = 0; + struct timespec tp; + bm_trace_item_data trace_data; + + #ifdef __linux__ + clock_gettime(CLOCK_THREAD_CPUTIME_ID,&tp); + #else + clock_gettime(0, &tp); + #endif + srand(tp.tv_nsec); + + sys_send_buffer = (unsigned char *)malloc(transfer_size); + sys_recieve_buffer = (unsigned char *)malloc(transfer_size); + if (!sys_send_buffer || !sys_recieve_buffer) { + printf("malloc buffer for test failed\n"); + return -1; + } + + for (int i = 0; i < transfer_size; i++) { + sys_send_buffer[i] = rand()%0xff; + sys_recieve_buffer[i] = 0x0; + } + + ret = bm_dev_request(&handle, chip_num); + bm_enable_iommu(handle); + if (ret != BM_SUCCESS || handle == NULL) { + printf("bm_dev_request failed, ret = %d\n", ret); + return -1; + } + + ret = bm_memcpy_s2d(handle, + bm_mem_from_device(src_device_addr,transfer_size), + sys_send_buffer); + if (ret != BM_SUCCESS) { + if (sys_send_buffer) free(sys_send_buffer); + if (sys_recieve_buffer) free(sys_recieve_buffer); + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return -1; + } + + bm_trace_enable(handle); + ret = bm_memcpy_c2c(handle, handle, + bm_mem_from_device(src_device_addr, transfer_size), + bm_mem_from_device(dst_device_addr, transfer_size), + force_cdma_dst); + if (ret != BM_SUCCESS) { + if (sys_send_buffer) free(sys_send_buffer); + if (sys_recieve_buffer) free(sys_recieve_buffer); + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return -1; + } + + bm_trace_dump(handle, &trace_data); + consume = trace_data.end_time - trace_data.start_time; + bm_trace_disable(handle); + if (consume > 0) { + float bandwidth = (float)transfer_size / (1024.0*1024.0) / (consume / 1000000.0); + printf("D2D:Transfer size:0x%x byte. Cost time:%lld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + + ret = bm_memcpy_d2s(handle, + sys_recieve_buffer, + bm_mem_from_device(dst_device_addr, transfer_size)); + if (ret != BM_SUCCESS) { + if (sys_send_buffer) free(sys_send_buffer); + if(sys_recieve_buffer) free(sys_recieve_buffer); + printf("CDMA transfer from device to device failed, ret = %d\n", ret); + return -1; + } + + if (array_cmp_int(sys_send_buffer, sys_recieve_buffer, transfer_size, "test_cdma_traversal")) { + if (sys_send_buffer) free(sys_send_buffer); + if (sys_recieve_buffer) free(sys_recieve_buffer); + printf("cdma traversal src device addr 0x%llx, dst device addr 0x%llx, size 0x%x failed\n", src_device_addr, dst_device_addr, transfer_size); + return -1; + } + + if (sys_send_buffer) free(sys_send_buffer); + if (sys_recieve_buffer) free(sys_recieve_buffer); + bm_disable_iommu(handle); + bm_dev_free(handle); + return 0; +} + +int test_cdma_stod_transfer(int chip_num, int transfer_size, unsigned long long dst_addr) +{ + bm_handle_t handle = NULL; + bm_status_t ret = BM_SUCCESS; + unsigned char *sys_send_buffer, *sys_recieve_buffer; + int cmp_ret = 0; + unsigned long consume_sys = 0; + unsigned long consume_real = 0; + unsigned long consume = 0; + struct timespec tp; + bm_device_mem_t dev_buffer; + bm_profile_t profile_start, profile_end; + struct timeval tv_start; + struct timeval tv_end; + struct timeval timediff; + #ifdef __linux__ + clock_gettime(CLOCK_THREAD_CPUTIME_ID,&tp); + #else + clock_gettime(0, &tp); + #endif + srand(tp.tv_nsec); + + if (transfer_size == 0x0) + transfer_size = 1024*1024*4; + + sys_send_buffer = (unsigned char *)malloc(transfer_size); + sys_recieve_buffer = (unsigned char *)malloc(transfer_size); + if (!sys_send_buffer || !sys_recieve_buffer) { + printf("malloc buffer for test failed\n"); + return -1; + } + + for (int i = 0; i < transfer_size; i++) { + sys_send_buffer[i] = rand()%0xff; + sys_recieve_buffer[i] = 0x0; + } + + ret = bm_dev_request(&handle, chip_num); + bm_enable_iommu(handle); + if (ret != BM_SUCCESS || handle == NULL) { + printf("bm_dev_request failed, ret = %d\n", ret); + if (sys_send_buffer) free(sys_send_buffer); + if (sys_recieve_buffer) free(sys_recieve_buffer); + return -1; + } + + if (dst_addr == 0x0) { + ret = bm_malloc_device_dword(handle, &dev_buffer, transfer_size/4); + if (ret != BM_SUCCESS) { + printf("malloc device memory size = %d failed, ret = %d\n", transfer_size, ret); + return -1; + } + } else { + dev_buffer = bm_mem_from_device(dst_addr, transfer_size); + } + for (int i = 0; i < 10; i++) { + bm_trace_enable(handle); + gettimeofday(&tv_start, NULL); + bm_get_profile(handle, &profile_start); + ret = bm_memcpy_s2d_gather(handle, dev_buffer, 2, (void *)sys_send_buffer, (unsigned long long)transfer_size); + if (ret != BM_SUCCESS) { + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return -1; + } + + gettimeofday(&tv_end, NULL); + timersub(&tv_end, &tv_start, &timediff); + consume = timediff.tv_sec * 1000000 + timediff.tv_usec; + consume_sys += consume; + + bm_get_profile(handle, &profile_end); + consume = profile_end.cdma_out_time - profile_start.cdma_out_time; + bm_trace_disable(handle); + + consume_real += consume; + } + consume = consume_sys / 10; + if (consume > 0) { + float bandwidth = (float)transfer_size / (1024.0*1024.0) / (consume / 1000000.0); + printf("S2D sys:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + consume = consume_real / 10; + if (consume > 0) { + float bandwidth = (float)transfer_size / (1024.0*1024.0) / (consume / 1000000.0); + printf("S2D real:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + + consume_sys = 0x0; + consume_real = 0x0; + + for (int i = 0; i < 10; i++) { + bm_trace_enable(handle); + gettimeofday(&tv_start, NULL); + bm_get_profile(handle, &profile_start); + ret = bm_memcpy_d2s_scatter(handle, dev_buffer, 2, (void *)sys_recieve_buffer, (u64)transfer_size); + if (ret != BM_SUCCESS) { + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return -1; + } + + gettimeofday(&tv_end, NULL); + timersub(&tv_end, &tv_start, &timediff); + consume = timediff.tv_sec * 1000000 + timediff.tv_usec; + consume_sys += consume; + bm_get_profile(handle, &profile_end); + consume = profile_end.cdma_in_time - profile_start.cdma_in_time; + consume_real += consume; + bm_trace_disable(handle); + } + consume = consume_sys / 10; + if (consume > 0) { + float bandwidth = (float)transfer_size / (1024.0*1024.0) / (consume / 1000000.0); + printf("D2S sys:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + consume = consume_real / 10; + if (consume > 0) { + float bandwidth = (float)transfer_size / (1024.0*1024.0) / (consume / 1000000.0); + printf("D2S real:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + cmp_ret = array_cmp_int(sys_send_buffer, sys_recieve_buffer, transfer_size, "cdma test"); + printf("dev = %d, cdma transfer test %s.\n", chip_num, cmp_ret ? "Failed" : "Success"); + + if (sys_send_buffer) free(sys_send_buffer); + if (sys_recieve_buffer) free(sys_recieve_buffer); + if (dst_addr == 0x0) { + bm_free_device(handle, dev_buffer); + } + bm_disable_iommu(handle); + bm_dev_free(handle); + return cmp_ret; +} + +struct cdma_process_para +{ + int dev_id; + int size; + int launch_num; + int dir; +}; + +#ifdef __linux__ +void *test_cdma_thread(void *arg) { +#else +DWORD WINAPI test_cdma_thread(LPVOID arg) { +#endif + bm_handle_t handle; + bm_status_t ret = BM_SUCCESS; + struct cdma_process_para *ppara = (struct cdma_process_para *)arg; + unsigned char * sys_buffer; +// unsigned char * sys_buf2; + bm_device_mem_t dev_buffer; + int i = 0x0; + + sys_buffer = (unsigned char*)malloc(ppara->size); +// sys_buf2 = (unsigned char *)malloc(ppara->size/2); + + ret = bm_dev_request(&handle, ppara->dev_id); + bm_enable_iommu(handle); + if (BM_SUCCESS != ret) { + printf("request dev %d failed, ret = %d\n", ppara->dev_id, ret); + return NULL; + } + + ret = bm_malloc_device_byte(handle, &dev_buffer, ppara->size); + if (ret != BM_SUCCESS) { + printf("malloc device memory size = %d failed, ret = %d\n", ppara->size, ret); + free(sys_buffer); + // free(sys_buf2); + return NULL; + } + + for (i = 0; i < ppara->launch_num; i++) { + if (ppara->dir == 0x0) { + ret = bm_memcpy_s2d_gather(handle, dev_buffer, 2, (void *)sys_buffer, (unsigned long long)ppara->size); + if (ret != BM_SUCCESS) { + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + } + } else { + ret = bm_memcpy_d2s_scatter(handle, dev_buffer, 2, sys_buffer, ppara->size); + if (ret != BM_SUCCESS) { + printf("CDMA transfer from device to sys failed, ret = %d\n", ret); + } + } + + } + bm_disable_iommu(handle); + bm_dev_free(handle); + return NULL; +} + +#define THREAD_NUM 64 +// dir = 0, s2d; dir = 1, d2s +int test_cmda_perf_mutithread(int thread_num, int dir, int dev_id, int size, int launch_num) +{ + #ifdef __linux__ + pthread_t threads[THREAD_NUM]; + #else + DWORD dwThreadIdArray[THREAD_NUM]; + HANDLE hThreadArray[THREAD_NUM]; + #endif + + struct cdma_process_para para; + int i = 0x0; + int ret = 0x0; + unsigned long long total_size = size * thread_num *launch_num; + float sys_bandwidth = 0; + unsigned long long sys_trans_time_us = 0; + struct timeval tv_start; + struct timeval tv_end; + struct timeval timediff; + + if (thread_num > THREAD_NUM) { + printf("thread num = %d is too much\n", thread_num); + return -1; + } + para.dev_id = dev_id; + para.size = size; + para.launch_num = launch_num; + para.dir = dir; + + gettimeofday(&tv_start, NULL); + for (i = 0; i < thread_num; i++) { + #ifdef __linux__ + ret = pthread_create(&threads[i], NULL, test_cdma_thread, ¶); + if (ret < 0) { + printf("pthread_create %d error: error_code = %d\n", i, ret); + return -1; + } + #else + hThreadArray[i] = + CreateThread(NULL, // default security attributes + 0, // use default stack size + test_cdma_thread, // thread function name + ¶, // argument to thread function + 0, // use default creation flags + &dwThreadIdArray[i]); // returns the thread identifier + if (hThreadArray[i] == NULL) { + printf("creatthread %d and thread_id 0x%08lx failed\n", i, dwThreadIdArray[i]); + //ExitProcess(3); + return -1; + } + #endif + } + #ifdef __linux__ + for (i = 0; i < thread_num; i++) { + ret = pthread_join(threads[i], NULL); + if (ret < 0) { + printf("pthread_join %d error: error_code = %d\n", i, ret); + return -1; + } + } + #endif + #ifdef _WIN32 + for (i = 0; i < thread_num; i++) { + DWORD dwWaitResult = WaitForSingleObject(hThreadArray[i], INFINITE); + switch (dwWaitResult) { + case WAIT_OBJECT_0: + ret = 0; + break; + case WAIT_FAILED: + ret = -1; + break; + case WAIT_ABANDONED: + ret = -2; + break; + case WAIT_TIMEOUT: + ret = -3; + break; + default: + ret = 0; + break; + } + if (ret < 0) { + printf("WaitForSingleObject %d error: error_code = %d\n", i, ret); + return -1; + } + } + + for (i = 0; i < thread_num; i++) + CloseHandle(hThreadArray[i]); + #endif + + gettimeofday(&tv_end, NULL); + timersub(&tv_end, &tv_start, &timediff); + sys_trans_time_us = timediff.tv_sec * 1000000 + timediff.tv_usec; + + if (sys_trans_time_us > 0) { + sys_bandwidth = (float)(total_size / (1024.0 * 1024.0)) / (sys_trans_time_us / 1000000.0); + } else { + return -1; + } + + if (dir == 0x0) { + printf ("cdma s2d test use %d thread bandwidth : %.2f MB/s\n", thread_num, sys_bandwidth); + } else { + printf ("cdma d2s test use %d thread bandwidth : %.2f MB/s\n", thread_num, sys_bandwidth); + } + + return 0; +} + +int test_cdma_stod_transfer_sg(int chip_num, int transfer_size, unsigned long long dst_addr) +{ + bm_handle_t handle = NULL; + bm_status_t ret = BM_SUCCESS; + unsigned char *sys_send_buffer1, *sys_recieve_buffer1, *sys_send_buffer2, *sys_recieve_buffer2; + int cmp_ret = 0; + unsigned long consume_sys = 0; + unsigned long consume_real = 0; + unsigned long consume = 0; + struct timespec tp; + bm_device_mem_t dev_buffer; + bm_profile_t profile_start, profile_end; + struct timeval tv_start; + struct timeval tv_end; + struct timeval timediff; + #ifdef __linux__ + clock_gettime(CLOCK_THREAD_CPUTIME_ID,&tp); + #else + clock_gettime(0, &tp); + #endif + srand(tp.tv_nsec); + + if (transfer_size == 0x0) + transfer_size = 1024*1024*4; + + sys_send_buffer1 = (unsigned char *)malloc(transfer_size/2); + sys_recieve_buffer1 = (unsigned char *)malloc(transfer_size/2); + sys_send_buffer2 = (unsigned char *)malloc(transfer_size/2); + sys_recieve_buffer2 = (unsigned char *)malloc(transfer_size/2); + if (!sys_send_buffer1 || !sys_recieve_buffer1 || !sys_send_buffer2 || !sys_recieve_buffer2) { + printf("malloc buffer for test failed\n"); + return -1; + } + printf("%s sys_send_buffer1: %p\t sys_send_buffer2: %p\n", __func__, sys_send_buffer1, sys_send_buffer2); + printf("%s sys_recieve_buffer1: %p\t sys_recieve_buffer2: %p\n", __func__, sys_recieve_buffer1, sys_recieve_buffer2); + for (int i = 0; i < transfer_size/2; i++) { + sys_send_buffer1[i] = rand()%0xff; + sys_send_buffer2[i] = rand()%0xff; + sys_recieve_buffer1[i] = 0x0; + sys_recieve_buffer2[i] = 0x0; + } + + ret = bm_dev_request(&handle, chip_num); + bm_enable_iommu(handle); + if (ret != BM_SUCCESS || handle == NULL) { + printf("bm_dev_request failed, ret = %d\n", ret); + if (sys_send_buffer1) free(sys_send_buffer1); + if (sys_recieve_buffer1) free(sys_recieve_buffer1); + if (sys_send_buffer2) free(sys_send_buffer2); + if (sys_recieve_buffer2) free(sys_recieve_buffer2); + return -1; + } + + if (dst_addr == 0x0) { + ret = bm_malloc_device_dword(handle, &dev_buffer, transfer_size/4); + if (ret != BM_SUCCESS) { + printf("malloc device memory size = %d failed, ret = %d\n", transfer_size, ret); + return -1; + } + } else { + dev_buffer = bm_mem_from_device(dst_addr, transfer_size); + } + for (int i = 0; i < 10; i++) { + bm_trace_enable(handle); + gettimeofday(&tv_start, NULL); + bm_get_profile(handle, &profile_start); + ret = bm_memcpy_s2d_gather(handle, dev_buffer, 4, (void *)sys_send_buffer1, (unsigned long long)transfer_size/2, + (void *)sys_send_buffer2, (unsigned long long)transfer_size/2); + if (ret != BM_SUCCESS) { + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return -1; + } + + gettimeofday(&tv_end, NULL); + timersub(&tv_end, &tv_start, &timediff); + consume = timediff.tv_sec * 1000000 + timediff.tv_usec; + consume_sys += consume; + + bm_get_profile(handle, &profile_end); + consume = profile_end.cdma_out_time - profile_start.cdma_out_time; + bm_trace_disable(handle); + + consume_real += consume; + } + consume = consume_sys / 10; + if (consume > 0) { + float bandwidth = (float)transfer_size / (1024.0*1024.0) / (consume / 1000000.0); + printf("S2D sys:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + consume = consume_real / 10; + if (consume > 0) { + float bandwidth = (float)transfer_size / (1024.0*1024.0) / (consume / 1000000.0); + printf("S2D real:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + + consume_sys = 0x0; + consume_real = 0x0; + + for (int i = 0; i < 10; i++) { + bm_trace_enable(handle); + gettimeofday(&tv_start, NULL); + bm_get_profile(handle, &profile_start); + ret = bm_memcpy_d2s_scatter(handle, dev_buffer, 4, (void *)sys_recieve_buffer1, (u64)transfer_size/2, + (void *)sys_recieve_buffer2, (u64)transfer_size/2); + if (ret != BM_SUCCESS) { + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return -1; + } + + gettimeofday(&tv_end, NULL); + timersub(&tv_end, &tv_start, &timediff); + consume = timediff.tv_sec * 1000000 + timediff.tv_usec; + consume_sys += consume; + bm_get_profile(handle, &profile_end); + consume = profile_end.cdma_in_time - profile_start.cdma_in_time; + consume_real += consume; + bm_trace_disable(handle); + } + consume = consume_sys / 10; + if (consume > 0) { + float bandwidth = (float)transfer_size / (1024.0*1024.0) / (consume / 1000000.0); + printf("D2S sys:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + consume = consume_real / 10; + if (consume > 0) { + float bandwidth = (float)transfer_size / (1024.0*1024.0) / (consume / 1000000.0); + printf("D2S real:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + cmp_ret = array_cmp_int(sys_send_buffer1, sys_recieve_buffer1, transfer_size/2, "cdma test"); + printf("dev = %d, cdma transfer test1 %s.\n", chip_num, cmp_ret ? "Failed" : "Success"); + cmp_ret = array_cmp_int(sys_send_buffer2, sys_recieve_buffer2, transfer_size/2, "cdma test"); + printf("dev = %d, cdma transfer test2 %s.\n", chip_num, cmp_ret ? "Failed" : "Success"); + + if (sys_send_buffer1) free(sys_send_buffer1); + if (sys_recieve_buffer1) free(sys_recieve_buffer1); + if (sys_send_buffer2) free(sys_send_buffer2); + if (sys_recieve_buffer2) free(sys_recieve_buffer2); + if (dst_addr == 0x0) { + bm_free_device(handle, dev_buffer); + } + bm_disable_iommu(handle); + bm_dev_free(handle); + return cmp_ret; +} + +int main(int argc, char *argv[]) +{ + int chip_num = 0; + int transfer_size = 0; + unsigned long long src_addr = 0; + unsigned long long dst_addr = 0; + int loop_num = 0; + int interval = 0; + int ret = 0; + int count = 0; + int i = 0; + int j = 0; + + if (argv[1]) { + if (strcmp("chip", argv[1])== 0) { + if (argc != 5) { + printf("invalid arg\n"); + printf("example test_cdma_perf chip chip_num size address \n"); + printf("like test_cdma_perf chip 0 0x400000 0x150000000 \n"); + return -1; + } + chip_num = atoi(argv[2]); + transfer_size = (int)strtol(argv[3], NULL, 16); + dst_addr = strtoll(argv[4], NULL, 16); + printf("test chip num = 0x%x, transfer_size = 0x%x, dst_addr = 0x%llx\n", + chip_num, + transfer_size, + dst_addr); + ret = test_cdma_stod_transfer(chip_num, transfer_size, dst_addr); + } else if (strcmp("smmu", argv[1]) == 0) { + if (argc != 5) { + printf("invalid arg\n"); + printf("example test_cdma_perf chip chip_num size address \n"); + printf("like test_cdma_perf chip 0 0x400000 0x150000000 \n"); + return -1; + } + chip_num = atoi(argv[2]); + transfer_size = (int)strtol(argv[3], NULL, 16); + dst_addr = strtoll(argv[4], NULL, 16); + printf("test chip num = 0x%x, transfer_size = 0x%x, dst_addr = 0x%llx\n", + chip_num, + transfer_size, + dst_addr); + ret = test_cdma_stod_transfer_sg(chip_num, transfer_size, dst_addr); + } else if (strcmp("ctoc", argv[1]) == 0) { + if (argc != 6) { + printf("invalid arg\n"); + printf("example test_cdma_perf ctoc chip_num size src_addr dst_addr \n"); + printf("like test_cdma_perf ctoc 0 0x400000 0x150000000 0x160000000\n"); + return -1; + } + chip_num = atoi(argv[2]); + transfer_size = (int)strtol(argv[3], NULL, 16); + + src_addr = strtoll(argv[4], NULL, 16); + dst_addr = strtoll(argv[5], NULL, 16); + printf("test chip num = 0x%x, transfer_size = 0x%x, src_addr = 0x%llx, dst_addr = 0x%llx\n", + chip_num, transfer_size, src_addr, dst_addr); + + ret = test_cdma_ctoc_transfer(chip_num, transfer_size, src_addr, dst_addr); + } else if (strcmp("stress", argv[1]) == 0) { + if (argc != 5) { + printf("invalid arg \n"); + printf("example test_cdma_perf stress dev_id loop_num interval_time_ms \n"); + printf("like test_cdma_perf stress 0 100 100\n"); + printf("if dev_id set 255, test all the device\n"); + return -1; + } + chip_num = atoi(argv[2]); + loop_num = atoi(argv[3]); + interval = atoi(argv[4]); + printf("pcie stress dev_id = %d, total = %d lopp , interval = %d ms start\n", + chip_num, loop_num, interval); + + for (j = 1; j <= loop_num; j++) { + if (chip_num == 255) { + bm_dev_getcount(&count); + + for (i = 0; i < count; i++) { + ret = test_cdma_stod_transfer(i, transfer_size, dst_addr); + if (ret != 0) { + printf("pcie stress loop_num = %d, total = %d fail\n", j, loop_num); + return -1; + } + } + } else { + ret = test_cdma_stod_transfer(chip_num, transfer_size, dst_addr); + if (ret != 0) { + printf("pcie stress loop_num = %d, total = %d fail\n", j, loop_num); + return -1; + } + } + #ifdef __linux__ + test_sleep(interval); + #else + Sleep(interval); + #endif + printf("pcie stress loop_num = %d, total = %d pass\n", j, loop_num); + + } + printf("pcie stress dev_id = %d, total = %d lopp, interval = %d ms all pass\n", + chip_num, loop_num, interval); + return 0; + } + } else { + bm_dev_getcount(&count); + for (i = 0; i < count; i++) { + ret = test_cdma_stod_transfer(i, transfer_size, dst_addr); + if (ret != 0) + break; + test_cmda_perf_mutithread(4, 0, i, (1024 * 1024 * 4), 20); + test_cmda_perf_mutithread(4, 1, i, (1024 * 1024 * 4), 20); + test_cmda_perf_mutithread(8, 0, i, (1024 * 1024 * 4), 20); + test_cmda_perf_mutithread(8, 1, i, (1024 * 1024 * 4), 20); + test_cmda_perf_mutithread(16, 0, i, (1024 * 1024 * 4), 20); + test_cmda_perf_mutithread(16, 1, i, (1024 * 1024 * 4), 20); + } + } + return ret; +} diff --git a/bmlib/tools/test_compare_bandwidth.cpp b/bmlib/tools/test_compare_bandwidth.cpp new file mode 100644 index 0000000..454c0bf --- /dev/null +++ b/bmlib/tools/test_compare_bandwidth.cpp @@ -0,0 +1,757 @@ +#include +#include +#include +#include +#include +#include +#include "bmlib_runtime.h" +#include "bmlib_internal.h" +#include "string.h" +#include "bmlib_memory.h" +#ifdef __linux +#include +#include +#include +#include +#else +#pragma comment(lib, "libbmlib-static.lib") +#endif + +int array_cmp_int( + unsigned char *p_exp, + unsigned char *p_got, + int len, + const char *info_label) +{ + int idx; + for (idx = 0; idx < len; idx++) + { + if (p_exp[idx] != p_got[idx]) + { + printf("%s error at index %d exp %x got %x\n", + info_label, idx, p_exp[idx], p_got[idx]); + return -1; + } + } + return 0; +} + +#ifdef __linux__ +void test_msleep(int n_ms) +{ + int i = 0; + for (i = 0; i < n_ms; i++) + usleep(1000); +} + +void test_sleep(int n_ms) +{ + int loop = n_ms / 1000; + int res = n_ms % 1000; + int i = 0; + + test_msleep(res); + for (i = 0; i < loop; i++) + test_msleep(1000); +} +#endif + +static bool bm_device_mem_range_valid(bm_handle_t handle, bm_device_mem_t mem) +{ +#ifdef USING_CMODEL + UNUSED(handle); + UNUSED(mem); +#else + u64 saddr = bm_mem_get_device_addr(mem); + u64 eaddr = bm_mem_get_size(mem) + saddr; + + if (handle->misc_info.chipid == 0x1684 || handle->misc_info.chipid == 0x1686) + { + if (((saddr >= 0x100000000 && saddr <= 0x4ffffffff) || (saddr >= 0x0 && saddr <= 0x103fffff)) && ((eaddr >= 0x100000000 && eaddr <= 0x500000000) || (eaddr >= 0x0 && eaddr <= 0x10400000))) + { + return true; + } + else + { + printf("%s saddr=0x%llx eaddr=0x%llx out of range\n", __func__, saddr, eaddr); + return false; + } + } + + if (handle->misc_info.chipid == 0x1682) + { + if (saddr >= 0x100000000 && saddr <= 0x2ffffffff && eaddr >= 0x100000000 && eaddr <= 0x300000000) + { + return true; + } + else + { + printf("%s saddr=0x%llx eaddr=0x%llx out of range\n", __func__, saddr, eaddr); + return false; + } + } +#endif + return true; +} + +bm_status_t bm_mem_mmap_device_mem_mix(bm_handle_t handle, bm_device_mem_t *dmem, + u64 *vmem) +{ +#ifndef USING_CMODEL + void *ret = 0; + u64 addr, size, addr_end; + u64 aligned_size, aligned_addr; + + if (handle->misc_info.pcie_soc_mode == 0) + { + printf("bmlib not support mmap in pcie mode\n"); + return BM_ERR_FAILURE; + } +#ifdef __linux__ + // if (!bm_device_mem_page_aligned(*dmem)) { + // bmlib_log(BMLIB_MEMORY_LOG_TAG, BMLIB_LOG_ERROR, + // "bm_mem_mmap_device_mem device_mem_addr = 0x%llx is illegal\n", + // bm_mem_get_device_addr(*dmem)); + // return BM_ERR_PARAM; + // } + + if (!bm_device_mem_range_valid(handle, *dmem)) + { + return BM_ERR_PARAM; + } + size = bm_mem_get_device_size(*dmem); + addr = bm_mem_get_device_addr(*dmem); + addr_end = addr + size; + + aligned_addr = bm_mem_get_device_addr(*dmem) & (~(PAGE_SIZE - 1)); + aligned_size = ((addr_end + (PAGE_SIZE - 1)) & (~(PAGE_SIZE - 1))) - aligned_addr; + + ret = mmap(0, aligned_size, PROT_READ | PROT_WRITE, MAP_SHARED, + handle->dev_fd, aligned_addr); + // printf("mmap, addr: 0x%llx, size: 0x%llx\n", (u64)ret); + if (MAP_FAILED != ret) + { + // ret = (((u64)ret) + (addr - aligned_addr)) + *vmem = (u64)ret + addr - aligned_addr; + // printf("PAGE_SIZE: 0x%llx, aligned_addr: 0x%llx, aligned_size: 0x%llx, mmap_vaddr: 0x%llx, final_vmem: 0x%llx\n", PAGE_SIZE, aligned_addr, aligned_size, (u64)ret, *vmem); + return BM_SUCCESS; + } + else + { + return BM_ERR_FAILURE; + } +#endif +#else +#define GLOBAL_MEM_START_ADDR 0x100000000 + // handle->bm_dev->get_global_memaddr_(handle->dev_id); + *vmem = (u64)((u8 *)handle->bm_dev->get_global_memaddr_(handle->dev_id) + + bm_mem_get_device_addr(*dmem) - GLOBAL_MEM_START_ADDR); +#endif + return BM_SUCCESS; +} + +bm_status_t bm_mem_unmap_device_mem_mix(bm_handle_t handle, void *vmem, int size) +{ +#ifndef USING_CMODEL + if (handle->misc_info.pcie_soc_mode == 0) + { + printf("bmlib not support unmap in pcie mode\n"); + return BM_ERR_FAILURE; + } +#ifdef __linux__ + // unsigned int aligned_size = (size + PAGE_SIZE - 1) & (~(PAGE_SIZE - 1)); + (void)munmap(vmem, size); + // printf("unmap, addr: 0x%llx, size: 0x%llx\n", (u64)vmem, size); +#endif +#else + UNUSED(handle); + UNUSED(vmem); + UNUSED(size); +#endif + return BM_SUCCESS; +} + +bm_status_t bm_memcpy_s2d_fast_mix(bm_handle_t handle, bm_device_mem_t dst, void *src) +{ +#ifndef USING_CMODEL + u64 dst_vaddr = 0; + bm_status_t ret; + u64 addr, size, addr_end; + u64 aligned_size, aligned_addr; + + if (handle->misc_info.pcie_soc_mode == 0) + { + printf("bmlib not support s2d fast in pcie mode\n"); + return BM_ERR_FAILURE; + } + ret = bm_mem_mmap_device_mem_mix(handle, &dst, &dst_vaddr); + if (ret != BM_SUCCESS) + { + printf("bmlib mmap in s2d fast failed\n"); + return BM_ERR_FAILURE; + } + + memcpy((void *)dst_vaddr, src, bm_mem_get_device_size(dst)); + + ret = bm_mem_flush_device_mem(handle, &dst); + if (ret != BM_SUCCESS) + { + printf("bmlib invalidate device mem in s2d fast failed\n"); + return BM_ERR_FAILURE; + } + + size = bm_mem_get_device_size(dst); + addr = bm_mem_get_device_addr(dst); + addr_end = addr + size; + + aligned_addr = addr & (~(PAGE_SIZE - 1)); + aligned_size = ((addr_end + (PAGE_SIZE - 1)) & (~(PAGE_SIZE - 1))) - aligned_addr; + + dst_vaddr = dst_vaddr - (addr - aligned_addr); + bm_mem_unmap_device_mem_mix(handle, (void *)dst_vaddr, aligned_size); +#else + UNUSED(handle); + UNUSED(dst); + UNUSED(src); +#endif + return BM_SUCCESS; +} + +bm_status_t bm_memcpy_d2s_fast_mix(bm_handle_t handle, void *dst, bm_device_mem_t src) +{ +#ifndef USING_CMODEL + u64 src_vaddr = 0; + bm_status_t ret; + u64 addr, size, addr_end; + u64 aligned_size, aligned_addr; + + if (handle->misc_info.pcie_soc_mode == 0) + { + printf("bmlib not support d2s fast in pcie mode\n"); + return BM_ERR_FAILURE; + } + ret = bm_mem_mmap_device_mem_mix(handle, &src, &src_vaddr); + if (ret != BM_SUCCESS) + { + printf("bmlib mmap in d2s fast failed\n"); + return BM_ERR_FAILURE; + } + + ret = bm_mem_invalidate_device_mem(handle, &src); + if (ret != BM_SUCCESS) + { + printf("bmlib invalidate device mem in d2s fast failed\n"); + return BM_ERR_FAILURE; + } + + memcpy(dst, (void *)src_vaddr, bm_mem_get_device_size(src)); + + size = bm_mem_get_device_size(src); + addr = bm_mem_get_device_addr(src); + addr_end = addr + size; + + aligned_addr = addr & (~(PAGE_SIZE - 1)); + aligned_size = ((addr_end + (PAGE_SIZE - 1)) & (~(PAGE_SIZE - 1))) - aligned_addr; + + src_vaddr = src_vaddr - (addr - aligned_addr); + + bm_mem_unmap_device_mem_mix(handle, (void *)src_vaddr, aligned_size); +#else + UNUSED(handle); + UNUSED(dst); + UNUSED(src); +#endif + return BM_SUCCESS; +} + +bm_status_t bm_memcpy_d2s_cdma(bm_handle_t handle, void *dst, bm_device_mem_t src) +{ +#ifndef USING_CMODEL + bm_memcpy_info_t bm_mem_d2s; + +#ifdef __linux__ +#ifdef USING_INT_CDMA + bm_mem_d2s.intr = true; +#else + bm_mem_d2s.intr = false; +#endif + bm_mem_d2s.host_addr = dst; +#else + bm_mem_d2s.intr = 1; + bm_mem_d2s.host_addr = (u64)dst; +#endif + + bm_mem_d2s.device_addr = bm_mem_get_device_addr(src); + bm_mem_d2s.size = bm_mem_get_size(src); + bm_mem_d2s.dir = CHIP2HOST; + bm_mem_d2s.src_device_addr = 0; + bm_mem_d2s.cdma_iommu_mode = handle->cdma_iommu_mode; + + union + { + void *ptr; + u64 val; + } ptr_to_u64; + ptr_to_u64.ptr = dst; + bm_profile_record_memcpy_begin(handle); + auto res = platform_ioctl(handle, BMDEV_MEMCPY, &bm_mem_d2s); + bm_profile_record_memcpy_end(handle, bm_mem_d2s.device_addr, ptr_to_u64.val, bm_mem_d2s.size, bm_mem_d2s.dir); + if (0 != res) + return BM_ERR_FAILURE; +#else + UNUSED(handle); + UNUSED(dst); + UNUSED(src); +#endif + return BM_SUCCESS; +} + +bm_status_t bm_memcpy_s2d_cdma(bm_handle_t handle, bm_device_mem_t dst, void *src) +{ +#ifdef USING_CMODEL + return handle->bm_dev->bm_device_memcpy_s2d(dst, src); +#else + if (handle == nullptr) + { + printf("handle is nullptr %s: %s: %d\n", __FILE__, __func__, __LINE__); + return BM_ERR_DEVNOTREADY; + } + + if (!bm_device_mem_range_valid(handle, dst)) + { + return BM_ERR_PARAM; + } + + bm_memcpy_info_t bm_mem_s2d; + +#ifdef USING_INT_CDMA + bm_mem_s2d.intr = true; +#else + bm_mem_s2d.intr = false; +#endif + bm_mem_s2d.host_addr = src; + + bm_mem_s2d.device_addr = bm_mem_get_device_addr(dst); + bm_mem_s2d.size = bm_mem_get_size(dst); + bm_mem_s2d.dir = HOST2CHIP; + bm_mem_s2d.src_device_addr = 0; + bm_mem_s2d.cdma_iommu_mode = handle->cdma_iommu_mode; + + union + { + void *ptr; + u64 val; + } ptr_to_u64; + ptr_to_u64.ptr = src; + bm_profile_record_memcpy_begin(handle); + auto res = platform_ioctl(handle, BMDEV_MEMCPY, &bm_mem_s2d); + bm_profile_record_memcpy_end(handle, ptr_to_u64.val, bm_mem_s2d.device_addr, bm_mem_s2d.size, bm_mem_s2d.dir); + return (0 != res) ? BM_ERR_FAILURE : BM_SUCCESS; +#endif +} + +int test_cdma_stod_transfer(int chip_num, int transfer_size, unsigned long long dst_addr, int use_cdma) +{ + bm_handle_t handle = NULL; + bm_status_t ret = BM_SUCCESS; + unsigned char *sys_send_buffer, *sys_recieve_buffer; + int cmp_ret = 0; + unsigned long consume_sys = 0; + unsigned long consume_real = 0; + unsigned long consume = 0; + struct timespec tp; + bm_device_mem_t dev_buffer; + bm_profile_t profile_start, profile_end; + struct timeval tv_start; + struct timeval tv_end; + struct timeval timediff; +#ifdef __linux__ + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp); +#else + clock_gettime(0, &tp); +#endif + srand(tp.tv_nsec); + + if (transfer_size == 0x0) + transfer_size = 1024 * 1024 * 4; + + sys_send_buffer = (unsigned char *)malloc(transfer_size); + sys_recieve_buffer = (unsigned char *)malloc(transfer_size); + if (!sys_send_buffer || !sys_recieve_buffer) + { + printf("malloc buffer for test failed\n"); + return -1; + } + + for (int i = 0; i < transfer_size; i++) + { + sys_send_buffer[i] = rand() % 0xff; + sys_recieve_buffer[i] = 0x0; + } + + ret = bm_dev_request(&handle, chip_num); + if (ret != BM_SUCCESS || handle == NULL) + { + printf("bm_dev_request failed, ret = %d\n", ret); + if (sys_send_buffer) + free(sys_send_buffer); + if (sys_recieve_buffer) + free(sys_recieve_buffer); + return -1; + } + + if (dst_addr == 0x0) + { + ret = bm_malloc_device_dword(handle, &dev_buffer, transfer_size / 4); + if (ret != BM_SUCCESS) + { + printf("malloc device memory size = %d failed, ret = %d\n", transfer_size, ret); + return -1; + } + } + else + { + dev_buffer = bm_mem_from_device(dst_addr, transfer_size); + } + for (int i = 0; i < 10; i++) + { + bm_trace_enable(handle); + gettimeofday(&tv_start, NULL); + bm_get_profile(handle, &profile_start); + if (use_cdma == 1) + { + ret = bm_memcpy_s2d_cdma(handle, dev_buffer, sys_send_buffer); + if (ret != BM_SUCCESS) + { + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return -1; + } + } + else + { + ret = bm_memcpy_s2d_fast_mix(handle, dev_buffer, sys_send_buffer); + if (ret != BM_SUCCESS) + { + printf("memcpy transfer from system to device failed, ret = %d\n", ret); + return -1; + } + } + + gettimeofday(&tv_end, NULL); + timersub(&tv_end, &tv_start, &timediff); + consume = timediff.tv_sec * 1000000 + timediff.tv_usec; + consume_sys += consume; + + bm_get_profile(handle, &profile_end); + consume = profile_end.cdma_out_time - profile_start.cdma_out_time; + bm_trace_disable(handle); + + consume_real += consume; + } + consume = consume_sys / 10; + if (consume > 0) + { + float bandwidth = (float)transfer_size / (1024.0 * 1024.0) / (consume / 1000000.0); + printf("S2D sys:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + + if (use_cdma == 1) + { + consume = consume_real / 10; + if (consume > 0) + { + float bandwidth = (float)transfer_size / (1024.0 * 1024.0) / (consume / 1000000.0); + printf("S2D real:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + } + consume_sys = 0x0; + consume_real = 0x0; + + for (int i = 0; i < 10; i++) + { + bm_trace_enable(handle); + gettimeofday(&tv_start, NULL); + bm_get_profile(handle, &profile_start); + if (use_cdma == 1) + { + ret = bm_memcpy_d2s_cdma(handle, sys_recieve_buffer, dev_buffer); + if (ret != BM_SUCCESS) + { + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return -1; + } + } + else + { + ret = bm_memcpy_d2s_fast_mix(handle, sys_recieve_buffer, dev_buffer); + if (ret != BM_SUCCESS) + { + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return -1; + } + } + + gettimeofday(&tv_end, NULL); + timersub(&tv_end, &tv_start, &timediff); + consume = timediff.tv_sec * 1000000 + timediff.tv_usec; + consume_sys += consume; + bm_get_profile(handle, &profile_end); + consume = profile_end.cdma_in_time - profile_start.cdma_in_time; + consume_real += consume; + bm_trace_disable(handle); + } + consume = consume_sys / 10; + if (consume > 0) + { + float bandwidth = (float)transfer_size / (1024.0 * 1024.0) / (consume / 1000000.0); + printf("D2S sys:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + if (use_cdma == 1) + { + consume = consume_real / 10; + if (consume > 0) + { + float bandwidth = (float)transfer_size / (1024.0 * 1024.0) / (consume / 1000000.0); + printf("D2S real:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + } + cmp_ret = array_cmp_int(sys_send_buffer, sys_recieve_buffer, transfer_size, "cdma test"); + printf("dev = %d, transfer test %s.\n", chip_num, cmp_ret ? "Failed" : "Success"); + + if (sys_send_buffer) + free(sys_send_buffer); + if (sys_recieve_buffer) + free(sys_recieve_buffer); + if (dst_addr == 0x0) + { + bm_free_device(handle, dev_buffer); + } + bm_dev_free(handle); + return cmp_ret; +} + +struct cdma_process_para +{ + int dev_id; + int size; + int launch_num; + int dir; +}; + +#ifdef __linux__ +void *test_cdma_thread(void *arg) +{ +#else +DWORD WINAPI test_cdma_thread(LPVOID arg) +{ +#endif + bm_handle_t handle; + bm_status_t ret = BM_SUCCESS; + struct cdma_process_para *ppara = (struct cdma_process_para *)arg; + unsigned char *sys_buffer; + bm_device_mem_t dev_buffer; + int i = 0x0; + + sys_buffer = (unsigned char *)malloc(ppara->size); + + ret = bm_dev_request(&handle, ppara->dev_id); + if (BM_SUCCESS != ret) + { + printf("request dev %d failed, ret = %d\n", ppara->dev_id, ret); + return NULL; + } + + ret = bm_malloc_device_byte(handle, &dev_buffer, ppara->size); + if (ret != BM_SUCCESS) + { + printf("malloc device memory size = %d failed, ret = %d\n", ppara->size, ret); + free(sys_buffer); + return NULL; + } + + for (i = 0; i < ppara->launch_num; i++) + { + if (ppara->dir == 0x0) + { + ret = bm_memcpy_s2d(handle, dev_buffer, sys_buffer); + if (ret != BM_SUCCESS) + { + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + } + } + else + { + ret = bm_memcpy_d2s(handle, sys_buffer, dev_buffer); + if (ret != BM_SUCCESS) + { + printf("CDMA transfer from device to sys failed, ret = %d\n", ret); + } + } + } + + bm_dev_free(handle); + return NULL; +} + +#define THREAD_NUM 64 +// dir = 0, s2d; dir = 1, d2s +int test_cmda_perf_mutithread(int thread_num, int dir, int dev_id, int size, int launch_num) +{ +#ifdef __linux__ + pthread_t threads[THREAD_NUM]; +#else + DWORD dwThreadIdArray[THREAD_NUM]; + HANDLE hThreadArray[THREAD_NUM]; +#endif + + struct cdma_process_para para; + int i = 0x0; + int ret = 0x0; + unsigned long long total_size = size * thread_num * launch_num; + float sys_bandwidth = 0; + unsigned long long sys_trans_time_us = 0; + struct timeval tv_start; + struct timeval tv_end; + struct timeval timediff; + + if (thread_num > THREAD_NUM) + { + printf("thread num = %d is too much\n", thread_num); + return -1; + } + para.dev_id = dev_id; + para.size = size; + para.launch_num = launch_num; + para.dir = dir; + + gettimeofday(&tv_start, NULL); + for (i = 0; i < thread_num; i++) + { +#ifdef __linux__ + ret = pthread_create(&threads[i], NULL, test_cdma_thread, ¶); + if (ret < 0) + { + printf("pthread_create %d error: error_code = %d\n", i, ret); + return -1; + } +#else + hThreadArray[i] = + CreateThread(NULL, // default security attributes + 0, // use default stack size + test_cdma_thread, // thread function name + ¶, // argument to thread function + 0, // use default creation flags + &dwThreadIdArray[i]); // returns the thread identifier + if (hThreadArray[i] == NULL) + { + printf("creatthread %d and thread_id 0x%08lx failed\n", i, dwThreadIdArray[i]); + // ExitProcess(3); + return -1; + } +#endif + } +#ifdef __linux__ + for (i = 0; i < thread_num; i++) + { + ret = pthread_join(threads[i], NULL); + if (ret < 0) + { + printf("pthread_join %d error: error_code = %d\n", i, ret); + return -1; + } + } +#endif +#ifdef _WIN32 + for (i = 0; i < thread_num; i++) + { + DWORD dwWaitResult = WaitForSingleObject(hThreadArray[i], INFINITE); + switch (dwWaitResult) + { + case WAIT_OBJECT_0: + ret = 0; + break; + case WAIT_FAILED: + ret = -1; + break; + case WAIT_ABANDONED: + ret = -2; + break; + case WAIT_TIMEOUT: + ret = -3; + break; + default: + ret = 0; + break; + } + if (ret < 0) + { + printf("WaitForSingleObject %d error: error_code = %d\n", i, ret); + return -1; + } + } + + for (i = 0; i < thread_num; i++) + CloseHandle(hThreadArray[i]); +#endif + + gettimeofday(&tv_end, NULL); + timersub(&tv_end, &tv_start, &timediff); + sys_trans_time_us = timediff.tv_sec * 1000000 + timediff.tv_usec; + + if (sys_trans_time_us > 0) + { + sys_bandwidth = (float)(total_size / (1024.0 * 1024.0)) / (sys_trans_time_us / 1000000.0); + } + else + { + return -1; + } + + if (dir == 0x0) + { + printf("cdma s2d test use %d thread bandwidth : %.2f MB/s\n", thread_num, sys_bandwidth); + } + else + { + printf("cdma d2s test use %d thread bandwidth : %.2f MB/s\n", thread_num, sys_bandwidth); + } + + return 0; +} + +int main(int argc, char *argv[]) +{ +#if defined(SOC_MODE) + int chip_num = 0; + int transfer_size = 0; + unsigned long long src_addr = 0; + unsigned long long dst_addr = 0; + int loop_num = 0; + int interval = 0; + int ret = 0; + int count = 0; + int i = 0; + int j = 0; + + printf("cdma test:\n"); + test_cdma_stod_transfer(0, transfer_size, dst_addr, 1); + + printf("memcpy test:\n"); + test_cdma_stod_transfer(0, transfer_size, dst_addr, 0); +#else + printf("This test case is only valid in SOC mode!\n"); +#endif + return 0; +} diff --git a/bmlib/tools/test_d2d_handle.cpp b/bmlib/tools/test_d2d_handle.cpp new file mode 100644 index 0000000..1c7d63d --- /dev/null +++ b/bmlib/tools/test_d2d_handle.cpp @@ -0,0 +1,350 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bmlib_runtime.h" +#include "bmlib_internal.h" +#include "bmlib_memory.h" +#include "api.h" +#ifdef __linux +#include +#include +#include +#else +#pragma comment(lib, "libbmlib-static.lib") +#endif + +int array_cmp_int( + unsigned char *p_exp, + unsigned char *p_got, + int len, + const char *info_label) +{ + int idx; + for (idx = 0; idx < len; idx++) + { + if (p_exp[idx] != p_got[idx]) + { + printf("%s error at index %d exp %x got %x\n", + info_label, idx, p_exp[idx], p_got[idx]); + return -1; + } + } + return 0; +} + +static bool test_device_mem_range_valid(bm_handle_t handle, bm_device_mem_t mem) +{ +#ifdef USING_CMODEL + UNUSED(handle); + UNUSED(mem); +#else + u64 saddr = bm_mem_get_device_addr(mem); + u64 eaddr = bm_mem_get_size(mem) + saddr; + + if (handle->misc_info.chipid == 0x1684 || handle->misc_info.chipid == 0x1686) + { + if (((saddr >= 0x100000000 && saddr <= 0x4ffffffff) || (saddr >= 0x0 && saddr <= 0x103fffff)) && ((eaddr >= 0x100000000 && eaddr <= 0x500000000) || (eaddr >= 0x0 && eaddr <= 0x10400000))) + { + return true; + } + else + { + bmlib_log("bmlib_memory", BMLIB_LOG_ERROR, + "%s saddr=0x%llx eaddr=0x%llx out of range\n", __func__, saddr, eaddr); + return false; + } + } + + if (handle->misc_info.chipid == 0x1682) + { + if (saddr >= 0x100000000 && saddr <= 0x2ffffffff && eaddr >= 0x100000000 && eaddr <= 0x300000000) + { + return true; + } + else + { + bmlib_log("bmlib_memory", BMLIB_LOG_ERROR, + "%s saddr=0x%llx eaddr=0x%llx out of range\n", __func__, saddr, eaddr); + return false; + } + } +#endif + return true; +} + +bm_status_t test_memcpy_d2d_byte(bm_handle_t handle, bm_device_mem_t dst, + size_t dst_offset, bm_device_mem_t src, + size_t src_offset, size_t size) +{ + bm_status_t ret = BM_SUCCESS; + tpu_kernel_module_t bm_module; + tpu_kernel_function_t f_id; + const char lib_path[80] = "/opt/sophon/libsophon-current/lib/tpu_module/libbm1684x_kernel_module.so"; + const char key[64] = "libbm1684x_kernel_module.so"; + int key_size = strlen(key); + bm_profile_t profile_start, profile_end; + unsigned long long time = 0; + struct timeval t1, t2; + + if (!test_device_mem_range_valid(handle, src)) + { + return BM_ERR_PARAM; + } + + if (!test_device_mem_range_valid(handle, dst)) + { + return BM_ERR_PARAM; + } + + bm_api_memcpy_byte_t api = {bm_mem_get_device_addr(src) + src_offset, + bm_mem_get_device_addr(dst) + dst_offset, size}; +#ifdef USING_CMODEL + if (fun_id != 0) + { +#else + if (handle->misc_info.chipid == 0x1686) + { + bm_module = tpu_kernel_load_module_file_key(handle, lib_path, key, key_size); + if (bm_module == NULL) + { + printf("bm_module is null!\n"); + return BM_ERR_FAILURE; + } +#endif + + f_id = tpu_kernel_get_function(handle, bm_module, "sg_api_memcpy_byte"); + gettimeofday(&t1, NULL); + ret = tpu_kernel_launch(handle, f_id, (void *)(&api), sizeof(bm_api_memcpy_byte_t)); + gettimeofday(&t2, NULL); + if (bm_module != NULL) + { + free(bm_module); + bm_module = NULL; + } + time = ((t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec); + if (time > 0) + { + float bandwidth = (float)size / (1024.0 * 1024.0) / (time / 1000000.0); + printf("Src:%llx, Dst:%llx, Transfer size:0x%lx byte. Cost time:%lld us, Write Bandwidth:%.2f MB/s\n", + bm_mem_get_device_addr(src), + bm_mem_get_device_addr(dst), + size, + time, + bandwidth); + } + } + else + { + bm_trace_enable(handle); + bm_get_profile(handle, &profile_start); + ret = bm_send_api(handle, BM_API_ID_MEMCPY_BYTE, (u8 *)(&api), sizeof(api)); + ret = bm_sync_api(handle); + bm_get_profile(handle, &profile_end); + bm_trace_disable(handle); + } + return ret; +} + +struct gdma_process_para +{ + bm_handle_t handle; + int dev_id; +}; + +void *test_gdma_thread(void *arg) +{ + bm_status_t ret = BM_SUCCESS; + int i = 0; + int transfer_size = 1024 * 1024 * 64; + int rand_num = rand(); + unsigned long long time = 0; + struct gdma_process_para *ppara = (struct gdma_process_para *)arg; + bm_device_mem_t src_device_buffer; + bm_device_mem_t dst_device_buffer; + unsigned char *sys_send_buffer = (unsigned char *)malloc(transfer_size); + unsigned char *sys_receive_buffer = (unsigned char *)malloc(transfer_size); + bm_handle_t handle = NULL; + + if (ppara->handle == NULL) + { + // printf("multi handle\n"); + ret = bm_dev_request(&handle, ppara->dev_id); + if (BM_SUCCESS != ret) + { + printf("request dev %d failed, ret = %d\n", ppara->dev_id, ret); + return NULL; + } + } + else + { + handle = ppara->handle; + // printf("single handle\n"); + } + + if (!sys_send_buffer || !sys_receive_buffer) + { + printf("malloc buffer for test failed\n"); + return NULL; + } + + for (i = 0; i < transfer_size; i++) + *(sys_send_buffer + i) = i + rand_num; + + ret = bm_malloc_device_dword(handle, &src_device_buffer, transfer_size / 4); + if (ret != BM_SUCCESS) + { + printf("malloc device memory size = %d failed, ret = %d\n", transfer_size, ret); + return NULL; + } + + ret = bm_malloc_device_dword(handle, &dst_device_buffer, transfer_size / 4); + if (ret != BM_SUCCESS) + { + printf("malloc device memory size = %d failed, ret = %d\n", transfer_size, ret); + return NULL; + } + + ret = bm_memcpy_s2d(handle, + src_device_buffer, + sys_send_buffer); + if (ret != BM_SUCCESS) + { + if (sys_send_buffer) + free(sys_send_buffer); + if (sys_receive_buffer) + free(sys_receive_buffer); + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return NULL; + } + + ret = test_memcpy_d2d_byte(handle, + dst_device_buffer, 0, src_device_buffer, 0, + transfer_size); + + if (ret != BM_SUCCESS) + { + printf("GDMA transfer from system to system failed, ret = %d\n", ret); + return NULL; + } + + ret = bm_memcpy_d2s(handle, sys_receive_buffer, dst_device_buffer); + + if (ret != BM_SUCCESS) + { + if (sys_send_buffer) + free(sys_send_buffer); + if (sys_receive_buffer) + free(sys_receive_buffer); + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return NULL; + } + + if (array_cmp_int(sys_send_buffer, sys_receive_buffer, transfer_size, "test_gdma")) + { + if (sys_send_buffer) + free(sys_send_buffer); + if (sys_receive_buffer) + free(sys_receive_buffer); + printf("Src:%llx, Dst:%llx, Transfer size:0x%x byte fail\n", + bm_mem_get_device_addr(src_device_buffer), + bm_mem_get_device_addr(dst_device_buffer), + transfer_size); + return NULL; + } + + if (sys_send_buffer) + free(sys_send_buffer); + if (sys_receive_buffer) + free(sys_receive_buffer); + + if (ppara->handle == NULL) + { + bm_dev_free(handle); + } + + return NULL; +} + +int main(int argc, char *argv[]) +{ + bm_handle_t handle = NULL; + int ret = 0x0; + struct timespec tp; + int count = 0x0; + int chip_num = 0x0; + int transfer_size = 0; + unsigned long long src_addr = 0; + unsigned long long dst_addr = 0; + int i; + struct gdma_process_para para; + int thread_num = 5; + pthread_t threads[64]; + + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp); + srand(tp.tv_nsec); + + ret = bm_dev_request(&handle, 0); + if (ret != BM_SUCCESS || handle == NULL) + { + printf("bm_dev_request failed, ret = %d\n", ret); + return -1; + } + + printf("multi handle\n"); + para.handle = NULL; + para.dev_id = 0; + + for (i = 0; i < thread_num; i++) + { + ret = pthread_create(&threads[i], NULL, test_gdma_thread, ¶); + if (ret < 0) + { + printf("pthread_create %d error: error_code = %d\n", i, ret); + return -1; + } + } + + for (i = 0; i < thread_num; i++) + { + ret = pthread_join(threads[i], NULL); + if (ret < 0) + { + printf("pthread_join %d error: error_code = %d\n", i, ret); + return -1; + } + } + + printf("single handle\n"); + para.handle = handle; + para.dev_id = 0; + + for (i = 0; i < thread_num; i++) + { + ret = pthread_create(&threads[i], NULL, test_gdma_thread, ¶); + if (ret < 0) + { + printf("pthread_create %d error: error_code = %d\n", i, ret); + return -1; + } + } + + for (i = 0; i < thread_num; i++) + { + ret = pthread_join(threads[i], NULL); + if (ret < 0) + { + printf("pthread_join %d error: error_code = %d\n", i, ret); + return -1; + } + } + + bm_dev_free(handle); + + return 0; +} diff --git a/bmlib/tools/test_malloc_paddr.cpp b/bmlib/tools/test_malloc_paddr.cpp new file mode 100644 index 0000000..dea5da6 --- /dev/null +++ b/bmlib/tools/test_malloc_paddr.cpp @@ -0,0 +1,420 @@ +#include +#include +#include +#include +#include +#include +#include "bmlib_runtime.h" +#include "bmlib_internal.h" +#include "string.h" +#ifdef __linux +#include +#include +#include +#else +#pragma comment(lib, "libbmlib-static.lib") +#endif + +int array_cmp_int( + unsigned char *p_exp, + unsigned char *p_got, + int len, + const char *info_label) +{ + int idx; + for (idx = 0; idx < len; idx++) + { + if (p_exp[idx] != p_got[idx]) + { + printf("%s error at index %d exp %x got %x\n", + info_label, idx, p_exp[idx], p_got[idx]); + return -1; + } + } + return 0; +} + +#ifdef __linux__ +void test_msleep(int n_ms) +{ + int i = 0; + for (i = 0; i < n_ms; i++) + usleep(1000); +} + +void test_sleep(int n_ms) +{ + int loop = n_ms / 1000; + int res = n_ms % 1000; + int i = 0; + + test_msleep(res); + for (i = 0; i < loop; i++) + test_msleep(1000); +} +#endif + +int test_cdma_stod_transfer(int chip_num, int transfer_size) +{ + bm_handle_t handle = NULL; + bm_status_t ret = BM_SUCCESS; + unsigned char *sys_send_buffer, *sys_recieve_buffer; + int cmp_ret = 0; + unsigned long consume_sys = 0; + unsigned long consume_real = 0; + unsigned long consume = 0; + struct timespec tp; + bm_device_mem_t dev_buffer; + bm_profile_t profile_start, profile_end; + struct timeval tv_start; + struct timeval tv_end; + struct timeval timediff; + u64 paddr; +#ifdef __linux__ + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp); +#else + clock_gettime(0, &tp); +#endif + srand(tp.tv_nsec); + + if (transfer_size == 0x0) + transfer_size = 1024 * 1024 * 4; + + sys_send_buffer = (unsigned char *)malloc(transfer_size); + sys_recieve_buffer = (unsigned char *)malloc(transfer_size); + if (!sys_send_buffer || !sys_recieve_buffer) + { + printf("malloc buffer for test failed\n"); + return -1; + } + + for (int i = 0; i < transfer_size; i++) + { + sys_send_buffer[i] = rand() % 0xff; + sys_recieve_buffer[i] = 0x0; + } + + ret = bm_dev_request(&handle, chip_num); + if (ret != BM_SUCCESS || handle == NULL) + { + printf("bm_dev_request failed, ret = %d\n", ret); + if (sys_send_buffer) + free(sys_send_buffer); + if (sys_recieve_buffer) + free(sys_recieve_buffer); + return -1; + } + + bm_malloc_device_mem(handle, &paddr, 0, transfer_size); + dev_buffer = bm_mem_from_device(paddr, transfer_size); + + for (int i = 0; i < 10; i++) + { + bm_trace_enable(handle); + gettimeofday(&tv_start, NULL); + bm_get_profile(handle, &profile_start); + ret = bm_memcpy_s2d(handle, dev_buffer, sys_send_buffer); + if (ret != BM_SUCCESS) + { + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return -1; + } + + gettimeofday(&tv_end, NULL); + timersub(&tv_end, &tv_start, &timediff); + consume = timediff.tv_sec * 1000000 + timediff.tv_usec; + consume_sys += consume; + + bm_get_profile(handle, &profile_end); + consume = profile_end.cdma_out_time - profile_start.cdma_out_time; + bm_trace_disable(handle); + + consume_real += consume; + } + consume = consume_sys / 10; + if (consume > 0) + { + float bandwidth = (float)transfer_size / (1024.0 * 1024.0) / (consume / 1000000.0); + printf("S2D sys:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + consume = consume_real / 10; + if (consume > 0) + { + float bandwidth = (float)transfer_size / (1024.0 * 1024.0) / (consume / 1000000.0); + printf("S2D real:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + + consume_sys = 0x0; + consume_real = 0x0; + + for (int i = 0; i < 10; i++) + { + bm_trace_enable(handle); + gettimeofday(&tv_start, NULL); + bm_get_profile(handle, &profile_start); + ret = bm_memcpy_d2s(handle, sys_recieve_buffer, dev_buffer); + if (ret != BM_SUCCESS) + { + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + return -1; + } + + gettimeofday(&tv_end, NULL); + timersub(&tv_end, &tv_start, &timediff); + consume = timediff.tv_sec * 1000000 + timediff.tv_usec; + consume_sys += consume; + bm_get_profile(handle, &profile_end); + consume = profile_end.cdma_in_time - profile_start.cdma_in_time; + consume_real += consume; + bm_trace_disable(handle); + } + consume = consume_sys / 10; + if (consume > 0) + { + float bandwidth = (float)transfer_size / (1024.0 * 1024.0) / (consume / 1000000.0); + printf("D2S sys:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + consume = consume_real / 10; + if (consume > 0) + { + float bandwidth = (float)transfer_size / (1024.0 * 1024.0) / (consume / 1000000.0); + printf("D2S real:Transfer size:0x%x byte. Cost time:%ld us, Write Bandwidth:%.2f MB/s\n", + transfer_size, + consume, + bandwidth); + } + cmp_ret = array_cmp_int(sys_send_buffer, sys_recieve_buffer, transfer_size, "cdma test"); + printf("dev = %d, cdma transfer test %s.\n", chip_num, cmp_ret ? "Failed" : "Success"); + + if (sys_send_buffer) + free(sys_send_buffer); + if (sys_recieve_buffer) + free(sys_recieve_buffer); + + bm_free_device_mem(handle, paddr); + bm_dev_free(handle); + return cmp_ret; +} + +struct cdma_process_para +{ + int dev_id; + int size; + int launch_num; + int dir; +}; + +#ifdef __linux__ +void *test_cdma_thread(void *arg) +{ +#else +DWORD WINAPI test_cdma_thread(LPVOID arg) +{ +#endif + bm_handle_t handle; + bm_status_t ret = BM_SUCCESS; + struct cdma_process_para *ppara = (struct cdma_process_para *)arg; + unsigned char *sys_buffer; + bm_device_mem_t dev_buffer; + int i = 0x0; + u64 paddr; + + sys_buffer = (unsigned char *)malloc(ppara->size); + + ret = bm_dev_request(&handle, ppara->dev_id); + if (BM_SUCCESS != ret) + { + printf("request dev %d failed, ret = %d\n", ppara->dev_id, ret); + return NULL; + } + + bm_malloc_device_mem(handle, &paddr, 0, ppara->size); + dev_buffer = bm_mem_from_device(paddr, ppara->size); + + for (i = 0; i < ppara->launch_num; i++) + { + if (ppara->dir == 0x0) + { + ret = bm_memcpy_s2d(handle, dev_buffer, sys_buffer); + if (ret != BM_SUCCESS) + { + printf("CDMA transfer from system to device failed, ret = %d\n", ret); + } + } + else + { + ret = bm_memcpy_d2s(handle, sys_buffer, dev_buffer); + if (ret != BM_SUCCESS) + { + printf("CDMA transfer from device to sys failed, ret = %d\n", ret); + } + } + } + + bm_free_device_mem(handle, paddr); + bm_dev_free(handle); + return NULL; +} + +#define THREAD_NUM 64 +// dir = 0, s2d; dir = 1, d2s +int test_cmda_perf_mutithread(int thread_num, int dir, int dev_id, int size, int launch_num) +{ +#ifdef __linux__ + pthread_t threads[THREAD_NUM]; +#else + DWORD dwThreadIdArray[THREAD_NUM]; + HANDLE hThreadArray[THREAD_NUM]; +#endif + + struct cdma_process_para para; + int i = 0x0; + int ret = 0x0; + unsigned long long total_size = size * thread_num * launch_num; + float sys_bandwidth = 0; + unsigned long long sys_trans_time_us = 0; + struct timeval tv_start; + struct timeval tv_end; + struct timeval timediff; + + if (thread_num > THREAD_NUM) + { + printf("thread num = %d is too much\n", thread_num); + return -1; + } + para.dev_id = dev_id; + para.size = size; + para.launch_num = launch_num; + para.dir = dir; + + gettimeofday(&tv_start, NULL); + for (i = 0; i < thread_num; i++) + { +#ifdef __linux__ + ret = pthread_create(&threads[i], NULL, test_cdma_thread, ¶); + if (ret < 0) + { + printf("pthread_create %d error: error_code = %d\n", i, ret); + return -1; + } +#else + hThreadArray[i] = + CreateThread(NULL, // default security attributes + 0, // use default stack size + test_cdma_thread, // thread function name + ¶, // argument to thread function + 0, // use default creation flags + &dwThreadIdArray[i]); // returns the thread identifier + if (hThreadArray[i] == NULL) + { + printf("creatthread %d and thread_id 0x%08lx failed\n", i, dwThreadIdArray[i]); + // ExitProcess(3); + return -1; + } +#endif + } +#ifdef __linux__ + for (i = 0; i < thread_num; i++) + { + ret = pthread_join(threads[i], NULL); + if (ret < 0) + { + printf("pthread_join %d error: error_code = %d\n", i, ret); + return -1; + } + } +#endif +#ifdef _WIN32 + for (i = 0; i < thread_num; i++) + { + DWORD dwWaitResult = WaitForSingleObject(hThreadArray[i], INFINITE); + switch (dwWaitResult) + { + case WAIT_OBJECT_0: + ret = 0; + break; + case WAIT_FAILED: + ret = -1; + break; + case WAIT_ABANDONED: + ret = -2; + break; + case WAIT_TIMEOUT: + ret = -3; + break; + default: + ret = 0; + break; + } + if (ret < 0) + { + printf("WaitForSingleObject %d error: error_code = %d\n", i, ret); + return -1; + } + } + + for (i = 0; i < thread_num; i++) + CloseHandle(hThreadArray[i]); +#endif + + gettimeofday(&tv_end, NULL); + timersub(&tv_end, &tv_start, &timediff); + sys_trans_time_us = timediff.tv_sec * 1000000 + timediff.tv_usec; + + if (sys_trans_time_us > 0) + { + sys_bandwidth = (float)(total_size / (1024.0 * 1024.0)) / (sys_trans_time_us / 1000000.0); + } + else + { + return -1; + } + + if (dir == 0x0) + { + printf("cdma s2d test use %d thread bandwidth : %.2f MB/s\n", thread_num, sys_bandwidth); + } + else + { + printf("cdma d2s test use %d thread bandwidth : %.2f MB/s\n", thread_num, sys_bandwidth); + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + int chip_num = 0; + int transfer_size = 0; + int loop_num = 0; + int interval = 0; + int ret = 0; + int count = 0; + int i = 0; + int j = 0; + + bm_dev_getcount(&count); + for (i = 0; i < count; i++) + { + ret = test_cdma_stod_transfer(i, transfer_size); + if (ret != 0) + break; + test_cmda_perf_mutithread(4, 0, i, (1024 * 1024 * 4), 20); + test_cmda_perf_mutithread(4, 1, i, (1024 * 1024 * 4), 20); + test_cmda_perf_mutithread(8, 0, i, (1024 * 1024 * 4), 20); + test_cmda_perf_mutithread(8, 1, i, (1024 * 1024 * 4), 20); + test_cmda_perf_mutithread(16, 0, i, (1024 * 1024 * 4), 20); + test_cmda_perf_mutithread(16, 1, i, (1024 * 1024 * 4), 20); + } + + return ret; +} diff --git a/bmlib/tools/test_malloc_time.cpp b/bmlib/tools/test_malloc_time.cpp new file mode 100644 index 0000000..a6b4b9e --- /dev/null +++ b/bmlib/tools/test_malloc_time.cpp @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bmlib_runtime.h" +#include "bmlib_internal.h" +#include "bmlib_memory.h" +#include "api.h" +#include +#include +#include + +int test_alloc_time(unsigned long long transfer_size, int test_num) +{ + bm_handle_t handle = NULL; + bm_status_t ret = BM_SUCCESS; + int i = 0; + struct timeval t1, t2, timediff; + unsigned long consume_alloc = 0; + unsigned long consume_free = 0; + u64 paddr[test_num]; + bm_device_mem_u64_t dev_buffer[test_num]; + + ret = bm_dev_request(&handle, 0); + if (ret != BM_SUCCESS || handle == NULL) { + printf("bm_dev_request failed, ret = %d\n", ret); + return -1; + } + + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include "bmlib_runtime.h" +#include "bmlib_internal.h" +#include "bmlib_memory.h" +#include "api.h" +#include +#include +#include + +#define INT_MAX 2147483647 + +int main(int argc, char *argv[]) +{ + bm_handle_t handle = NULL; + bm_status_t ret = BM_SUCCESS; + int transfer_size = 0x100; + int i = 0; + bm_device_mem_t dev_buffer; + int test_num = INT_MAX; + + ret = bm_dev_request(&handle, 0); + if (ret != BM_SUCCESS || handle == NULL) { + printf("bm_dev_request failed, ret = %d\n", ret); + return -1; + } + + for (i=0; i +#include +#include +#include +#include +#include "bmlib_runtime.h" +#include "bmlib_internal.h" +#include "string.h" + +#ifdef WIN32 +#include +#include +#else +#include +#include +#endif + +int main(int argc, char *argv[]) +{ +#ifndef SOC_MODE + int chip_num = 0; + struct bm_rw reg; + bm_handle_t handle = NULL; + bm_status_t ret = BM_SUCCESS; + enum bm_rw_op option; + + if (argv[1]) { + if (strcmp("r", argv[1]) == 0) + reg.op = BM_READ; + else if (strcmp("w", argv[1]) == 0) { + reg.op = BM_WRITE; + reg.value = strtol(argv[2], NULL, 16); + } else if (strcmp("m", argv[1]) == 0) + reg.op = BM_MALLOC; + else if (strcmp("f", argv[1]) == 0) + reg.op = BM_FREE; + else { + printf("invalid arg\n"); + return -1; + } + } + + ret = bm_dev_request(&handle, chip_num); + if (ret != BM_SUCCESS || handle == NULL) { + printf("bm_dev_request failed, ret = %d\n", ret); + return -1; + } + + option = reg.op; + ret = bm_rw_host(handle, ®); + if (ret != BM_SUCCESS) { + printf("bm_rw_mix failed, ret = %d\n", ret); + return -1; + } + + if (option == BM_READ) + printf("read addr = 0x%llx, value = 0x%x\n", reg.paddr, reg.value); + else if (option == BM_WRITE) + printf("write addr = 0x%llx, value = 0x%x\n", reg.paddr, reg.value); + else if (option == BM_MALLOC) + printf("malloc addr = 0x%llx\n", reg.paddr); + else if (option == BM_FREE) + printf("free addr = 0x%llx\n", reg.paddr); + + bm_dev_free(handle); + return ret; +#else + + printf("only support in pcie mode!\n"); + + return 0; +#endif +} diff --git a/bmlib/tools/test_rw_mix.cpp b/bmlib/tools/test_rw_mix.cpp new file mode 100644 index 0000000..c698894 --- /dev/null +++ b/bmlib/tools/test_rw_mix.cpp @@ -0,0 +1,75 @@ +#include +#include +#include +#include +#include +#include "bmlib_runtime.h" +#include "bmlib_internal.h" +#include "string.h" + +#ifdef WIN32 +#include +#include +#else +#include +#include +#endif + +#if defined(SOC_MODE) +int main(int argc, char *argv[]) +{ + int chip_num = 0; + struct bm_rw reg; + bm_handle_t handle = NULL; + bm_status_t ret = BM_SUCCESS; + + if (argv[1]) + { + if (strcmp("r", argv[1]) == 0) + { + reg.op = BM_READ; + reg.paddr = (u64)strtol(argv[2], NULL, 16); + } + else if (strcmp("w", argv[1]) == 0) + { + reg.op = BM_WRITE; + reg.paddr = (u64)strtol(argv[2], NULL, 16); + reg.value = strtol(argv[3], NULL, 16); + } + else + { + printf("invalid arg\n"); + return -1; + } + } + + ret = bm_dev_request(&handle, chip_num); + if (ret != BM_SUCCESS || handle == NULL) + { + printf("bm_dev_request failed, ret = %d\n", ret); + return -1; + } + + ret = bm_rw_mix(handle, ®); + if (ret != BM_SUCCESS) + { + printf("bm_rw_mix failed, ret = %d\n", ret); + return -1; + } + + if (reg.op == BM_READ) + printf("read addr = 0x%llx, value = 0x%x\n", reg.paddr, reg.value); + else if (reg.op == BM_WRITE) + printf("write addr = 0x%llx, value = 0x%x\n", reg.paddr, reg.value); + + bm_dev_free(handle); + return ret; +} +#else +int main() +{ + printf("only support in mix mode!\n"); + + return 0; +} +#endif \ No newline at end of file diff --git a/bmlib/tools/version.cpp b/bmlib/tools/version.cpp index fb76ab5..9aed32b 100644 --- a/bmlib/tools/version.cpp +++ b/bmlib/tools/version.cpp @@ -19,8 +19,6 @@ int main(int argc, char const *argv[]) bm_status_t ret = BM_SUCCESS; int chip_num = 0; boot_loader_version version; - int bl1_strlen, bl1_print = 1; - int i; ret = bm_dev_request(&handle, chip_num); if (ret != BM_SUCCESS || handle == NULL) { @@ -28,28 +26,16 @@ int main(int argc, char const *argv[]) return -1; } - version.bl1_version = (char *)malloc(BL1_VERSION_SIZE); version.bl2_version = (char *)malloc(BL2_VERSION_SIZE); version.bl31_version = (char *)malloc(BL31_VERSION_SIZE); version.uboot_version = (char *)malloc(UBOOT_VERSION_SIZE); bm_get_boot_loader_version(handle, &version); - bl1_strlen = strlen(version.bl1_version); - for (i = 0; i < bl1_strlen; i++) { - if (isprint(version.bl1_version[i]) == 0){ - bl1_print = 0; - break; - } - } - - if (bl1_print == 1) - printf("BL1 %s\n", version.bl1_version); printf("BL2 %s\n", version.bl2_version); printf("BL31 %s\n", version.bl31_version); printf("%s\n", version.uboot_version); - free(version.bl1_version); free(version.bl2_version); free(version.bl31_version); free(version.uboot_version); diff --git a/bmlib/update_bmlib_version.sh b/bmlib/update_bmlib_version.sh new file mode 100755 index 0000000..870dcaa --- /dev/null +++ b/bmlib/update_bmlib_version.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +function update_bmlib_commit_and_branch() +{ + file_path=$(find "$(git rev-parse --show-toplevel)" -type f -name "bmlib_version.h" -print -quit) + + if [ -n "$file_path" ]; then + file_dir=$(dirname "$file_path") + pushd . > /dev/null + + cd "$file_dir" || exit + + if git rev-parse --git-dir > /dev/null 2>&1; then + commit_hash=$(git log -1 --pretty=format:"%H") + branch_name=$(git branch --contains HEAD | sed -n '/\* /s///p') + + sed -i "s|#define COMMIT_HASH .*|#define COMMIT_HASH \"$commit_hash\"|" "bmlib_version.h" + sed -i "s|#define BRANCH_NAME .*|#define BRANCH_NAME \"$branch_name\"|" "bmlib_version.h" + + echo "Commit hash $commit_hash has been written to $file_path" + echo "Branch name $branch_name has been written to $file_path" + else + echo "This directory is not a git repository." + fi + + popd > /dev/null + else + echo "bmlib_version.h not found." + fi +} + +update_bmlib_commit_and_branch \ No newline at end of file diff --git a/bmvid/.gitmodules b/bmvid/.gitmodules deleted file mode 100644 index 7b810ef..0000000 --- a/bmvid/.gitmodules +++ /dev/null @@ -1,9 +0,0 @@ -[submodule "3rdparty/3rdparty-tools/googletest"] - path = 3rdparty/3rdparty-tools/googletest - url = https://github.com/google/googletest.git -[submodule "provider/cnm/test/bmtest"] - path = provider/cnm/test/bmtest - url = ../bmtest.git -[submodule "bmcv/sg_lib_driver"] - path = bmcv/sg_lib_driver - url = ../sg_lib_driver.git diff --git a/bmvid/3rdparty/libbmcv/common/bm1686/include/bm_api_struct.h b/bmvid/3rdparty/libbmcv/common/bm1686/include/bm_api_struct.h index 4aa378c..9139254 100755 --- a/bmvid/3rdparty/libbmcv/common/bm1686/include/bm_api_struct.h +++ b/bmvid/3rdparty/libbmcv/common/bm1686/include/bm_api_struct.h @@ -18,7 +18,7 @@ typedef struct bm_api_absval_forward { int input_c; int input_h; int input_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_absval_forward_t; #else } bm_api_absval_forward_t; @@ -32,7 +32,7 @@ typedef struct bm_api_absval_backward { int input_c; int input_h; int input_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_absval_backward_t; #else } bm_api_absval_backward_t; @@ -47,7 +47,7 @@ typedef struct bm_api_accuracy_layer { int Tensor_Dim; int Tensor_N; int Top_K; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_accuracy_layer_t; #else } bm_api_accuracy_layer_t; @@ -68,7 +68,7 @@ typedef struct bm_api_batchnorm_forward_inference_parallel { int need_var; int need_calc; int if_relu; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_batchnorm_forward_inference_parallel_t; #else } bm_api_batchnorm_forward_inference_parallel_t; @@ -88,7 +88,7 @@ typedef struct bm_api_batchnorm_forward_train_parallel { int input_c; int input_h; int input_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_batchnorm_forward_train_parallel_t; #else } bm_api_batchnorm_forward_train_parallel_t; @@ -104,7 +104,7 @@ typedef struct bm_api_batchnorm_backward_parallel { int input_h; int input_w; int using_global_stats; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_batchnorm_backward_parallel_t; #else } bm_api_batchnorm_backward_parallel_t; @@ -124,7 +124,7 @@ typedef struct bm_api_bilinear_interpolation { int output_w; int filter_size_h; int filter_size_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_bilinear_interpolation_t; #else } bm_api_bilinear_interpolation_t; @@ -143,7 +143,7 @@ typedef struct bm_api_bilinear_interpolation_parallel { int output_w; int filter_size_h; int filter_size_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_bilinear_interpolation_parallel_t; #else } bm_api_bilinear_interpolation_parallel_t; @@ -297,7 +297,7 @@ typedef struct bm_api_bnll_forward { int input_c; int input_h; int input_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_bnll_forward_t; #else } bm_api_bnll_forward_t; @@ -312,7 +312,7 @@ typedef struct bm_api_bnll_backward { int input_c; int input_h; int input_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_bnll_backward_t; #else } bm_api_bnll_backward_t; @@ -326,7 +326,7 @@ typedef struct bm_api_coeff_update_SGD_parallel { float base_lr; float momentum; float weight_decay; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_coeff_update_SGD_parallel_t; #else } bm_api_coeff_update_SGD_parallel_t; @@ -346,7 +346,7 @@ typedef struct bm_api_contrastive_loss_forward { int Tensor_W; float margin; int legacy_version; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_contrastive_loss_forward_t; #else } bm_api_contrastive_loss_forward_t; @@ -368,7 +368,7 @@ typedef struct bm_api_contrastive_loss_backward { float margin; int legacy_version; int propagate_down_flag; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_contrastive_loss_backward_t; #else } bm_api_contrastive_loss_backward_t; @@ -409,7 +409,7 @@ typedef struct bm_api_conv_forward_parallel { int nsecs; int icsecs; int ocsecs; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_conv_forward_parallel_t; #else } bm_api_conv_forward_parallel_t; @@ -452,7 +452,7 @@ typedef struct bm_api_conv_forward_parallel_fix8b { int opd1_sign; int opd2_sign; int opd0_short_str; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_conv_forward_parallel_fix8b_t; #else } bm_api_conv_forward_parallel_fix8b_t; @@ -475,7 +475,7 @@ typedef struct bm_api_bnscale_forward_parallel_fix8b { // int hsecs; // int nsecs; int if_relu; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_bnscale_forward_parallel_fix8b_t; #else } bm_api_bnscale_forward_parallel_fix8b_t; @@ -560,7 +560,7 @@ typedef struct bm_api_conv_backward_bias_parallel { int stride_h; int stride_w; int result_add; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_conv_backward_bias_parallel_t; #else } bm_api_conv_backward_bias_parallel_t; @@ -583,7 +583,7 @@ typedef struct nodechip_winograd_bottom_diff_parallel { int kw; int pad_h; int pad_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) nodechip_winograd_bottom_diff_parallel_t; #else } nodechip_winograd_bottom_diff_parallel_t; @@ -610,7 +610,7 @@ typedef struct bm_api_conv_parallel_bank_conflict { int stride_w; int using_bias; int result_add; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_conv_parallel_bank_conflict_t; #else } bm_api_conv_parallel_bank_conflict_t; @@ -637,7 +637,7 @@ typedef struct bm_api_conv_parallel_power_evaluation { int stride_w; int using_bias; int result_add; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_conv_parallel_power_evaluation_t; #else } bm_api_conv_parallel_power_evaluation_t; @@ -661,7 +661,7 @@ typedef struct bm_api_depthwise_forward { int stride_h; int stride_w; int using_bias; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_depthwise_forward_t; #else } bm_api_depthwise_forward_t; @@ -692,7 +692,7 @@ typedef struct bm_api_depthwise_fix8b_forward_parallel { int opd2_sign; int res0_sign; int if_relu; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_depthwise_fix8b_forward_parallel_t; #else } bm_api_depthwise_fix8b_forward_parallel_t; @@ -782,7 +782,7 @@ typedef struct bm_api_dropout_forward { float dropout_ratio; int input_n; int input_dim; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_dropout_forward_t; #else } bm_api_dropout_forward_t; @@ -802,7 +802,7 @@ typedef struct bm_api_eltwise_forward { int need_mask; float mask_index[MAX_ELTWISELAYER_INPUT_NUM]; int if_relu; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_eltwise_forward_t; #else } bm_api_eltwise_forward_t; @@ -824,7 +824,7 @@ typedef struct bm_api_eltwise_fix8b_forward { int rshift_A; int rshift_B; int if_relu; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_eltwise_fix8b_forward_t; #else } bm_api_eltwise_fix8b_forward_t; @@ -844,7 +844,7 @@ typedef struct bm_api_eltwise_backward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_eltwise_backward_t; #else } bm_api_eltwise_backward_t; @@ -859,7 +859,7 @@ typedef struct bm_api_elu_forward { int input_c; int input_h; int input_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_elu_forward_t; #else } bm_api_elu_forward_t; @@ -876,7 +876,7 @@ typedef struct bm_api_elu_backward { int input_c; int input_h; int input_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_elu_backward_t; #else } bm_api_elu_backward_t; @@ -892,7 +892,7 @@ typedef struct bm_api_euclidean_loss_forward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_euclidean_loss_forward_t; #else } bm_api_euclidean_loss_forward_t; @@ -906,7 +906,7 @@ typedef struct bm_api_euclidean_loss_backward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_euclidean_loss_backward_t; #else } bm_api_euclidean_loss_backward_t; @@ -922,7 +922,7 @@ typedef struct bm_api_exp_forward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_exp_forward_t; #else } bm_api_exp_forward_t; @@ -939,7 +939,7 @@ typedef struct bm_api_exp_backward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_exp_backward_t; #else } bm_api_exp_backward_t; @@ -962,7 +962,7 @@ typedef struct bm_api_fc_forward_parallel { int channel_shared; float shared_slope; int W_param; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_fc_forward_parallel_t; #else } bm_api_fc_forward_parallel_t; @@ -988,7 +988,7 @@ typedef struct bm_api_fc_fix8b_forward_parallel { int if_global_out_4N; float alpha; float beta; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_fc_fix8b_forward_parallel_t; #else } bm_api_fc_fix8b_forward_parallel_t; @@ -1001,7 +1001,7 @@ typedef struct bm_api_fc_weight_decompress { int index_size; int row; int col; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_fc_weight_decompress_t; #else } bm_api_fc_weight_decompress_t; @@ -1022,7 +1022,7 @@ typedef struct bm_api_fc_backward_parallel { int propagate_down_bottom; int using_bias; int W_param; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_fc_backward_parallel_t; #else } bm_api_fc_backward_parallel_t; @@ -1040,7 +1040,7 @@ typedef struct bm_api_fc_forward_parallel_bank_conflict { int have_bias; int using_relu; int W_param; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_fc_forward_parallel_bank_conflict_t; #else } bm_api_fc_forward_parallel_bank_conflict_t; @@ -1064,7 +1064,7 @@ typedef struct bm_api_fc_backward_sgd_parallel { float base_lr; float momentum; float weight_decay; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_fc_backward_sgd_parallel_t; #else } bm_api_fc_backward_sgd_parallel_t; @@ -1079,7 +1079,7 @@ typedef struct bm_api_filter_forward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_filter_forward_t; #else } bm_api_filter_forward_t; @@ -1094,7 +1094,7 @@ typedef struct bm_api_filter_backward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_filter_backward_t; #else } bm_api_filter_backward_t; @@ -1104,7 +1104,7 @@ typedef struct bm_api_fullnet { u64 bdc_cmd_offset; u64 gdma_cmd_offset; u64 cmd_num_offset; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_fullnet_t; #else } bm_api_fullnet_t; @@ -1118,7 +1118,7 @@ typedef struct bm_api_img_sum { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_img_sum_t; #else } bm_api_img_sum_t; @@ -1131,7 +1131,7 @@ typedef struct bm_api_active_forward { int input_dim; int active_type; void *param; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_active_forward_t; #else } bm_api_active_forward_t; @@ -1148,7 +1148,7 @@ typedef struct bm_api_active_forward_fix8b { int input_signed; int output_signed; void *param; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_active_forward_fix8b_t; #else } bm_api_active_forward_fix8b_t; @@ -1164,7 +1164,7 @@ typedef struct bm_api_log_forward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_log_forward_t; #else } bm_api_log_forward_t; @@ -1181,7 +1181,7 @@ typedef struct bm_api_log_backward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_log_backward_t; #else } bm_api_log_backward_t; @@ -1198,7 +1198,7 @@ typedef struct bm_api_lrn_forward_parallel { int size; float beta; float k; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_lrn_forward_parallel_t; #else } bm_api_lrn_forward_parallel_t; @@ -1217,7 +1217,7 @@ typedef struct bm_api_lrn_backward_parallel { float alpha; float beta; float k; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_lrn_backward_parallel_t; #else } bm_api_lrn_backward_parallel_t; @@ -1237,7 +1237,7 @@ typedef struct bm_api_lrn_fix8b_forward_parallel { float k; float scale_in; float scale_out; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_lrn_fix8b_forward_parallel_t; #else } bm_api_lrn_fix8b_forward_parallel_t; @@ -1266,7 +1266,7 @@ typedef struct bm_api_lstm_forward { int expose_hidden; int method; int n_slice; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_lstm_forward_t; #else } bm_api_lstm_forward_t; @@ -1285,7 +1285,7 @@ typedef struct bm_api_lstm_unit_forward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_lstm_unit_forward_t; #else } bm_api_lstm_unit_forward_t; @@ -1310,7 +1310,7 @@ typedef struct bm_api_lstm_unit_backward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_lstm_unit_backward_t; #else } bm_api_lstm_unit_backward_t; @@ -1339,7 +1339,7 @@ typedef struct bm_api_nodechip_md_cmp { int is_top_1; int is_min; int is_md_cmp; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_nodechip_md_cmp_t; #else } bm_api_nodechip_md_cmp_t; @@ -1354,7 +1354,7 @@ typedef struct bm_api_nodechip_float2int8 { int input_h; int input_w; int sign_unsign; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_nodechip_float2int8_t; #else } bm_api_nodechip_float2int8_t; @@ -1375,7 +1375,7 @@ typedef struct bm_api_md_linear { float B_const_val; float S_const_val; LINEAR_OP linear_op; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_md_linear_t; #else } bm_api_md_linear_t; @@ -1394,7 +1394,7 @@ typedef struct { int B_constant_flag; float A_constant; float B_constant; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_nodechip_1d_scalar_t; #else } bm_api_nodechip_1d_scalar_t; @@ -1417,7 +1417,7 @@ typedef struct bm_api_nodechip_md_scalar { float B_constant; int B_N_is_1; int B_index_is_1; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_nodechip_md_scalar_t; #else } bm_api_nodechip_md_scalar_t; @@ -1434,7 +1434,7 @@ typedef struct { int n; SFU_OP sfu_op; u64 table_start_addr; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_nodechip_md_sfu_t; #else } bm_api_nodechip_md_sfu_t; @@ -1448,7 +1448,7 @@ typedef struct { int n; SFU_OP sfu_op; u64 table_start_addr; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_nodechip_1d_sfu_t; #else } bm_api_nodechip_1d_sfu_t; @@ -1462,7 +1462,7 @@ typedef struct bm_api_md_sum { int input_h; int input_w; int result_add; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_md_sum_t; #else } bm_api_md_sum_t; @@ -1485,7 +1485,7 @@ typedef struct bm_api_md_ops { int B_is_constant; int A_const_val; int B_const_val; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_md_ops_t; #else } bm_api_md_ops_t; @@ -1504,7 +1504,7 @@ typedef struct bm_api_normalize_forward { float eps; float scale; int if_relu; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_normalize_forward_t; #else } bm_api_normalize_forward_t; @@ -1525,7 +1525,7 @@ typedef struct bm_api_normalize_fix8b_forward { int if_relu; int in_sign; int out_sign; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_normalize_fix8b_forward_t; #else } bm_api_normalize_fix8b_forward_t; @@ -1538,7 +1538,7 @@ typedef struct bm_api_permute_param { int input_c; int input_h; int input_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_permute_param_t; #else } bm_api_permute_param_t; @@ -1551,7 +1551,7 @@ typedef struct bm_api_permute_fix8b_param { int input_c; int input_h; int input_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_permute_fix8b_param_t; #else } bm_api_permute_fix8b_param_t; @@ -1578,7 +1578,7 @@ typedef struct bm_api_pooling_backward_parallel { int h_step; float Ratio; int result; /* For split result*/ -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_pooling_backward_parallel_t; #else } bm_api_pooling_backward_parallel_t; @@ -1606,7 +1606,7 @@ typedef struct bm_api_pooling_forward_parallel { int c_step; int h_step; float Ratio; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_pooling_forward_parallel_t; #else } bm_api_pooling_forward_parallel_t; @@ -1640,7 +1640,7 @@ typedef struct bm_api_pooling_fix8b_forward_parallel { int c_step; int h_step; float Ratio; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_pooling_fix8b_forward_parallel_t; #else } bm_api_pooling_fix8b_forward_parallel_t; @@ -1666,7 +1666,7 @@ typedef struct bm_api_pooling_train_index_forward_parallel { int h_step; float Ratio; int result; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_pooling_train_index_forward_parallel_t; #else } bm_api_pooling_train_index_forward_parallel_t; @@ -1693,7 +1693,7 @@ typedef struct bm_api_pooling_train_forward_parallel { int h_step; float Ratio; int result; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_pooling_train_forward_parallel_t; #else } bm_api_pooling_train_forward_parallel_t; @@ -1709,7 +1709,7 @@ typedef struct bm_api_adaptive_pooling_forward { int pooled_h; int pooled_w; int is_avg_pooling; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_adaptive_pooling_forward_t; #else } bm_api_adaptive_pooling_forward_t; @@ -1732,7 +1732,7 @@ typedef struct bm_api_adaptive_pooling_fix8b_forward { int opd2_sign; int res0_sign; int if_relu; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_adaptive_pooling_fix8b_forward_t; #else } bm_api_adaptive_pooling_fix8b_forward_t; @@ -1749,7 +1749,7 @@ typedef struct bm_api_upsample_forward_parallel { int c_step; int h_step; float Ratio; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_upsample_forward_parallel_t; #else } bm_api_upsample_forward_parallel_t; @@ -1772,7 +1772,7 @@ typedef struct bm_api_pooling_forward_parallel_bank_conflict { int c_step; int h_step; float Ratio; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_pooling_forward_parallel_bank_conflict_t; #else } bm_api_pooling_forward_parallel_bank_conflict_t; @@ -1788,7 +1788,7 @@ typedef struct bm_api_power_forward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_power_forward_t; #else } bm_api_power_forward_t; @@ -1806,7 +1806,7 @@ typedef struct bm_api_power_backward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_power_backward_t; #else } bm_api_power_backward_t; @@ -1823,7 +1823,7 @@ typedef struct bm_api_prelu_forward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_prelu_forward_t; #else } bm_api_prelu_forward_t; @@ -1842,7 +1842,7 @@ typedef struct bm_api_prelu_backward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_prelu_backward_t; #else } bm_api_prelu_backward_t; @@ -1857,7 +1857,7 @@ typedef struct bm_api_relu_forward_parallel { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_relu_forward_parallel_t; #else } bm_api_relu_forward_parallel_t; @@ -1873,7 +1873,7 @@ typedef struct bm_api_relu_backward_parallel { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_relu_backward_parallel_t; #else } bm_api_relu_backward_parallel_t; @@ -1892,7 +1892,7 @@ typedef struct bm_api_roi_pooling_forward { int pooled_w; int roi_num; float spatial_scale; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_roi_pooling_forward_t; #else } bm_api_roi_pooling_forward_t; @@ -1913,7 +1913,7 @@ typedef struct bm_api_psroipooling_forward { float spatial_scale; int input_sign; int output_sign; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_psroipooling_forward_t; #else } bm_api_psroipooling_forward_t; @@ -1940,7 +1940,7 @@ typedef struct bm_api_rpnproposal_forward_t { u64 arm_reserved_global_offset; int bottom_prec; // 0: FP32, 1: INT8, 2: UINT8 float scale_val; // only vaid if FIX8B -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_rpnproposal_forward_t; #else } bm_api_rpnproposal_forward_t; @@ -1959,7 +1959,7 @@ typedef struct bm_api_scale_forward { int shape_axis_num; int add_bias; int if_relu; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_scale_forward_t; #else } bm_api_scale_forward_t; @@ -1970,7 +1970,7 @@ typedef struct bm_api_sigmoid_forward_parallel { u64 top_global_offset; int input_n; // note this is total input_n int input_c; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_sigmoid_forward_parallel_t; #else } bm_api_sigmoid_forward_parallel_t; @@ -1985,7 +1985,7 @@ typedef struct bm_api_sigmoid_forward_parallel_fix8b { float output_scale; int input_signed; int output_signed; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_sigmoid_forward_parallel_fix8b_t; #else } bm_api_sigmoid_forward_parallel_fix8b_t; @@ -1997,7 +1997,7 @@ typedef struct bm_api_sigmoid_backward_parallel { u64 bottom_diff_global_offset; int input_n; // note this is total input_n int input_c; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_sigmoid_backward_parallel_t; #else } bm_api_sigmoid_backward_parallel_t; @@ -2013,7 +2013,7 @@ typedef struct bm_api_sigmoid_cross_entropy_loss_forward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_sigmoid_cross_entropy_loss_forward_t; #else } bm_api_sigmoid_cross_entropy_loss_forward_t; @@ -2029,7 +2029,7 @@ typedef struct bm_api_sigmoid_cross_entropy_loss_backward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_sigmoid_cross_entropy_loss_backward_t; #else } bm_api_sigmoid_cross_entropy_loss_backward_t; @@ -2041,7 +2041,7 @@ typedef struct bm_api_silence_backward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_silence_backward_t; #else } bm_api_silence_backward_t; @@ -2058,7 +2058,7 @@ typedef struct bm_api_softmax_loss_forward_parallel { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_softmax_loss_forward_parallel_t; #else } bm_api_softmax_loss_forward_parallel_t; @@ -2075,7 +2075,7 @@ typedef struct bm_api_softmax_loss_backward_parallel { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_softmax_loss_backward_parallel_t; #else } bm_api_softmax_loss_backward_parallel_t; @@ -2092,7 +2092,7 @@ typedef struct bm_api_softmax_loss_bidirection_parallel { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_softmax_loss_bidirection_parallel_t; #else } bm_api_softmax_loss_forward_parallel_t; @@ -2109,7 +2109,7 @@ typedef struct bm_api_softmax_forward_parallel { float scale_val; int in_tensor_global_store_mode; u64 global_offset_1N_buf; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_softmax_forward_parallel_t; #else } bm_api_softmax_forward_parallel_t; @@ -2123,7 +2123,7 @@ typedef struct bm_api_softmax_backward_parallel { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_softmax_backward_parallel_t; #else } bm_api_softmax_backward_parallel_t; @@ -2136,7 +2136,7 @@ typedef struct bm_api_global_memcpy { int src_nstride; int dst_nstride; int count; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_global_memcpy_t; #else } bm_api_global_memcpy_t; @@ -2148,7 +2148,7 @@ typedef struct bm_api_crop { int offset[4]; int topshape[4]; int bottomshape[4]; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_crop_t; #else } bm_api_crop_t; @@ -2162,7 +2162,7 @@ typedef struct bm_api_concat { int st_by_concatway[MAX_CONCATLAYER_INPUT_NUM]; int bottom_size; int concat_axis; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_concat_t; #else } bm_api_concat_t; @@ -2180,7 +2180,7 @@ typedef struct bm_api_multiregion_forward_parallel { int coords; int nums; int Activate_parm[4]; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_multiregion_forward_parallel_t; #else } bm_api_multiregion_forward_parallel_t; @@ -2193,7 +2193,7 @@ typedef struct bm_api_split_backward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_split_backward_t; #else } bm_api_split_backward_t; @@ -2204,7 +2204,7 @@ typedef struct bm_api_tanh_forward_parallel { u64 top_global_offset; int Tensor_N; int Tensor_C; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_tanh_forward_parallel_t; #else } bm_api_tanh_forward_parallel_t; @@ -2219,7 +2219,7 @@ typedef struct bm_api_tanh_forward_parallel_fix8b { float output_scale; int input_signed; int output_signed; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_tanh_forward_parallel_fix8b_t; #else } bm_api_tanh_forward_parallel_fix8b_t; @@ -2229,7 +2229,7 @@ typedef struct { u64 bottom_global_offset; u64 top_global_offset; u64 length; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_sign_t; #else } bm_api_sign_t; @@ -2241,7 +2241,7 @@ typedef struct bm_api_tanh_backward_parallel { u64 bottom_diff_global_offset; int Tensor_N; int Tensor_C; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_tanh_backward_parallel_t; #else } bm_api_tanh_backward_parallel_t; @@ -2255,7 +2255,7 @@ typedef struct bm_api_threshold_forward { int input_c; int input_h; int input_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_threshold_forward_t; #else } bm_api_threshold_forward_t; @@ -2269,7 +2269,7 @@ typedef struct bm_api_bias_forward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_bias_forward_t; #else } bm_api_bias_forward_t; @@ -2286,7 +2286,7 @@ typedef struct bm_api_bias_fix8b_forward { int rshift; int if_relu; int relu_limit; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_bias_fix8b_forward_t; #else } bm_api_bias_fix8b_forward_t; @@ -2301,7 +2301,7 @@ typedef struct bm_api_bias_backward { int Tensor_C; int Tensor_H; int Tensor_W; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_bias_backward_t; #else } bm_api_bias_backward_t; @@ -2321,7 +2321,7 @@ typedef struct bm_api_scale_backward { int scale_dim; int inner_dim; int scale_is_neuron; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_scale_backward_t; #else } bm_api_scale_backward_t; @@ -2349,7 +2349,7 @@ typedef struct bm_api_conv_correlation { int stride_h; int stride_w; int add_result; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_conv_correlation_t; #else } bm_api_conv_correlation_t; @@ -2363,7 +2363,7 @@ typedef struct bm_api_regularization_l1 { int h; int w; float local_decay; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_regularization_l1_t; #else } bm_api_regularization_l1_t; @@ -2377,7 +2377,7 @@ typedef struct bm_api_global_int2float { int h; int w; int sign_unsign; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_global_int2float_t; #else } bm_api_global_int2float_t; @@ -2400,7 +2400,7 @@ typedef struct bm_api_cv_warp { int blockIdx_x; int blockIdx_y; int type; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_warp_t; #else } bm_api_cv_warp_t; @@ -2420,7 +2420,7 @@ typedef struct bm_api_cv_warp_bilinear { int blockIdx_x; int blockIdx_y; int src_stride; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_warp_bilinear_t; #else } bm_api_cv_warp_bilinear_t; @@ -2440,7 +2440,7 @@ typedef struct bm_api_cv_filter { float delta; int is_packed; int out_type; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_filter_t; #else } bm_api_cv_filter_t; @@ -2458,7 +2458,7 @@ typedef struct bm_api_cv_canny { float low_thresh; float high_thresh; int l2gradient; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_canny_t; #else } bm_api_cv_canny_t; @@ -2477,7 +2477,7 @@ typedef struct bm_api_cv_add_weighted { float alpha; float beta; float gamma; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_add_weighted_t; #else } bm_api_cv_add_weighted_t; @@ -2493,7 +2493,7 @@ typedef struct bm_api_cv_absdiff { int input1_str[3]; int input2_str[3]; int output_str[3]; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_absdiff_t; #else } bm_api_cv_absdiff_t; @@ -2509,7 +2509,7 @@ typedef struct bm_api_cv_draw_line { int rval; int gval; int bval; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_draw_line_t; #else } bm_bm_api_cv_draw_line_t; @@ -2526,7 +2526,7 @@ typedef struct bm_api_cv_threshold { int type; u32 thresh; u32 max_value; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_threshold_t; #else } bm_api_cv_threshold_t; @@ -2544,7 +2544,7 @@ typedef struct bm_api_laplacian { unsigned int stride_o[3]; float delta; int is_packed; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_laplacian_t; #else } bm_api_laplacian_t; @@ -2559,7 +2559,7 @@ typedef struct bm_api_cv_axpy { int input_c; int input_h; int input_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_axpy_t; #else } bm_api_cv_axpy_t; @@ -2583,7 +2583,7 @@ typedef struct bm_api_cv_fusion { int kw; int format; // int op; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_fusion_t; #else } bm_api_cv_fusion_t; @@ -2599,7 +2599,7 @@ typedef struct { int input_format; int output_format; int type; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_tile_forward_t; #else } bm_api_tile_forward_t; @@ -2615,7 +2615,7 @@ typedef struct { int in_store_mode; int out_store_mode; int type; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_tile_fix8b_forward_t; #else } bm_api_tile_fix8b_forward_t; @@ -2630,7 +2630,7 @@ typedef struct { int type_len; int store_mode; u64 buffer_global_mem_addr; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_transpose_t; #else } bm_api_transpose_t; @@ -2645,7 +2645,7 @@ typedef struct { int in_store_mode; int out_store_mode; u64 buffer_global_mem_addr; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_transpose_fix8b_t; #else } bm_api_transpose_fix8b_t; @@ -2658,7 +2658,7 @@ typedef struct { int height; int width; int type_len; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_transpose_t; #else } bm_api_cv_transpose_t; @@ -2672,7 +2672,7 @@ typedef struct { int input_h; int input_w; int sum_dim; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_sum_forward_t; #else } bm_api_tile_forward_t; @@ -2692,7 +2692,7 @@ typedef struct bm_api_width_align { int dst_c_stride; int dst_h_stride; int data_size; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_width_align_t; #else } bm_api_cv_width_align_t; @@ -2719,7 +2719,7 @@ typedef struct bm_api_cv_resize_st { int padding_b; int padding_g; int padding_r; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_resize_t; #else } bm_api_cv_resize_t; @@ -2730,7 +2730,7 @@ typedef struct bm_api_yuv_resize_st { u64 output_para_addr; int data_type; int image_num; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_yuv_resize_t; #else } bm_api_yuv_resize_t; @@ -2763,7 +2763,7 @@ typedef struct bm_api_cv_yuv2rgb { int dst_store_mode; int dst_image_format; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_yuv2rgb_t; #else } bm_api_cv_yuv2rgb_t; @@ -2779,7 +2779,7 @@ typedef struct bm_api_cv_yuv2hsv { unsigned int oh; unsigned int src_format; unsigned int dst_format; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_yuv2hsv_t; #else } bm_api_cv_yuv2hsv_t; @@ -2815,7 +2815,7 @@ typedef struct bm_api_cv_storage_convert { int width; u32 op; u32 csc_type; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_storage_convert_t; #else } bm_api_cv_storage_convert_t; @@ -2832,7 +2832,7 @@ typedef struct bm_api_compare { int B_is_constant; float A_constant; float B_constant; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_compare_t; #else } bm_api_compare_t; @@ -2852,7 +2852,7 @@ typedef struct bm_api_eltwise_binary_fix8b_forward { int sign_A; int sign_B; int if_relu; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_eltwise_binary_fix8b_forward_t; #else } bm_api_eltwise_binary_fix8b_forward_t; @@ -2873,7 +2873,7 @@ typedef struct bm_api_const_binary_fix8b_forward { int sign_B; int inversed; int if_relu; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_const_binary_fix8b_forward_t; #else } bm_api_const_binary_fix8b_forward_t; @@ -2894,7 +2894,7 @@ typedef struct bm_api_broadcast_binary_fix8b_forward { int sign_A; int sign_B; int if_relu; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_broadcast_binary_fix8b_forward_t; #else } bm_api_broadcast_binary_fix8b_forward_t; @@ -2922,7 +2922,7 @@ typedef struct bm_api_binary { int B_is_constant; float B_value; int type; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_binary_t; #else } bm_api_binary_t; @@ -2939,7 +2939,7 @@ typedef struct bm_api_simple_binary { int B_is_constant; float B_value; int type; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_simple_binary_t; #else } bm_api_simple_binary_t; @@ -2952,7 +2952,7 @@ typedef struct { u64 length; int type; unsigned char param[UNARY_PARAM_MAX_LEN]; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_unary_t; #else } bm_api_unary_t; @@ -2969,7 +2969,7 @@ typedef struct bm_api_logical { int B_is_constant; unsigned int A_constant; unsigned int B_constant; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_logical_t; #else } bm_api_logical_t; @@ -2986,7 +2986,7 @@ typedef struct bm_api_ternary { int B_is_constant; float A_constant; float B_constant; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_ternary_t; #else } bm_api_ternary_t; @@ -3020,7 +3020,7 @@ typedef struct bm_api_select { float then_value; int else_is_constant; float else_value; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_select_t; #else } bm_api_select_t; @@ -3039,7 +3039,7 @@ typedef struct bm_api_simple_select { float then_value; int else_is_constant; float else_value; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_simple_select_t; #else } bm_api_simple_select_t; @@ -3051,7 +3051,7 @@ typedef struct { u64 length; int N; // sum(x^(2^N)) float multiplier; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_sum_x2n_t; #else } bm_api_sum_x2n_t; @@ -3075,7 +3075,7 @@ typedef struct { float base_threshold; int per_nms_topn; u64 output_proposal_addr; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_gen_proposal_t; #else } bm_api_cv_gen_proposal_t; @@ -3093,7 +3093,7 @@ typedef struct bm_api_word2vec { int example_num; int dims; int vocab_size; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_word2vec_t; #else } bm_api_word2vec_t; @@ -3108,7 +3108,7 @@ typedef struct bm_api_ctcloss { int num_classes; int lprime_s; int seq_l; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_ctcloss_t; #else } bm_api_ctcloss_t; @@ -3119,7 +3119,7 @@ typedef struct { int proposal_size; float nms_threshold; u64 output_proposal_addr; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_nms_t; #else } bm_api_cv_nms_t; @@ -3130,7 +3130,7 @@ typedef struct { u64 output_global_addr; u64 length; int convert_flag; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_f32_convert_t; #else } bm_api_f32_convert_t; @@ -3141,7 +3141,7 @@ typedef struct { u64 output_global_addr; u64 length; int cast_flag; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cast_t; #else } bm_api_cast_t; @@ -3152,7 +3152,7 @@ typedef struct { u64 output_global_addr; u64 length; u32 op_code; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_triangle_t; #else } bm_api_triangle_t; @@ -3163,7 +3163,7 @@ typedef struct { u64 output_global_addr; u64 length; int what; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_f32_is_t; #else } bm_api_f32_is_t; @@ -3178,7 +3178,7 @@ typedef struct { int axis_list[FW_MAX_SHAPE_DIMS]; int axis_number; int method; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_reduce_t; #else } bm_api_reduce_t; @@ -3197,7 +3197,7 @@ typedef struct { int store_mode; float bottom_scale_val; float top_scale_val; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_reduce_fix8b_t; #else } bm_api_reduce_fix8b_t; @@ -3210,7 +3210,7 @@ typedef struct { int output_rows; int row_size; int segment_reduce_flag; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_segment_reduce_t; #else } bm_api_segment_reduce_t; @@ -3232,7 +3232,7 @@ typedef struct { int input_img_data_type; int output_img_data_type; u64 output_img_addr; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_convert_to_t; #else } bm_api_cv_convert_to_t; @@ -3299,7 +3299,7 @@ typedef struct { u64 output_proposal_addr; u64 filter_output; u64 filter_output_shape_addr; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_gen_proposal_and_nms_t; #else } bm_api_cv_gen_proposal_and_nms_t; @@ -3314,7 +3314,7 @@ typedef struct { u64 output_img_addr_2; u64 convert_to_attr_addr; int times; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_convert_to_inter_t; #else } bm_api_cv_convert_to_inter_t; @@ -3329,7 +3329,7 @@ typedef struct { int location; int data_cnt; int sort_cnt; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_sort_test_t; #else } bm_api_cv_sort_test_t; @@ -3345,7 +3345,7 @@ typedef struct { int order; int index_enable; int auto_index; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_sort_t; #else } bm_api_cv_sort_t; @@ -3361,7 +3361,7 @@ typedef struct bm_api_pad { int pad[4][2]; float const_val; int type; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_pad_t; #else } bm_api_pad_t; @@ -3381,7 +3381,7 @@ typedef struct bm_api_pad_fix8b { int pad[4][2]; int const_val; int type; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_pad_fix8b_t; #else } bm_api_pad_fix8b_t; @@ -3397,7 +3397,7 @@ typedef struct bm_api_arg { int input_w; int axis; int method; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_arg_t; #else } bm_api_arg_t; @@ -3416,7 +3416,7 @@ typedef struct bm_api_arg_fix8b { int input_w; int axis; int method; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_arg_fix8b_t; #else } bm_api_arg_fix8b_t; @@ -3430,7 +3430,7 @@ typedef struct bm_api_shuffle_channel_forward { int input_h; int input_w; int group_num; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_shuffle_channel_forward_t; #else } bm_api_shuffle_channel_forward_t; @@ -3444,7 +3444,7 @@ typedef struct bm_api_shuffle_channel_fix8b_forward { int input_h; int input_w; int group_num; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_shuffle_channel_fix8b_forward_t; #else } bm_api_shuffle_channel_fix8b_forward_t; @@ -3462,7 +3462,7 @@ typedef struct bm_api_split_tf_fix8b { int split_size[MAX_SPLIT_TF_OUTPUT_NUM]; int split_num; u64 top_global_offset[MAX_SPLIT_TF_OUTPUT_NUM]; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_split_tf_fix8b_t; #else } bm_api_split_tf_fix8b_t; @@ -3477,7 +3477,7 @@ typedef struct bm_api_cv_feature_match_t { int batch_size; int feature_size; int db_size; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_feature_match_t; #else } bm_api_cv_feature_match_t; @@ -3493,7 +3493,7 @@ typedef struct { int input_c; int input_h; int input_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_topk_t; #else } bm_api_topk_t; @@ -3513,7 +3513,7 @@ typedef struct { int per_batch_size_is_same; int batch_stride; int descending; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_batch_topk_t; #else } bm_api_cv_batch_topk_t; @@ -3528,7 +3528,7 @@ typedef struct { int input_c; int input_h; int input_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_where_t; #else } bm_api_where_t; @@ -3542,7 +3542,7 @@ typedef struct { int input_c; int input_h; int input_w; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cumsum_t; #else } bm_api_cumsum_t; @@ -3561,7 +3561,7 @@ typedef struct { int begin_index[FW_MAX_STRIDE_DIMS]; int end_index[FW_MAX_STRIDE_DIMS]; int stride[FW_MAX_STRIDE_DIMS]; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_stride_slice_t; #else } bm_api_stride_slice_t; @@ -3581,7 +3581,7 @@ typedef struct { int *begin_index; int *end_index; int *stride; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_stride_slice_fix8b_t; #else } bm_api_stride_slice_fix8b_t; @@ -3597,7 +3597,7 @@ typedef struct { int *begin_index; int *end_index; int *stride; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_stride_slice_md_t; #else } bm_api_stride_slice_md_t; @@ -3615,7 +3615,7 @@ typedef struct bm_api_interp_forward_parallel { int output_h; int output_w; int platform_sp; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_interp_forward_parallel_t; #else } bm_api_interp_forward_parallel_t; @@ -3632,7 +3632,7 @@ typedef struct { int out_store_mode; int block_sizes[2]; // must have 2 elements int crop_sizes[4]; // must have 4 elements -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_batch2space_fix8b_forward_t; #else } bm_api_batch2space_fix8b_forward_t; @@ -3648,7 +3648,7 @@ typedef struct { int out_store_mode; int block_sizes[2]; // must have 2 elements int pad_sizes[4]; // must have 4 elements -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_space2batch_fix8b_forward_t; #else } bm_api_space2batch_fix8b_forward_t; @@ -3666,7 +3666,7 @@ typedef struct { int coords; int background; int softmax; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_yolo_t; #else } bm_api_yolo_t; @@ -3690,7 +3690,7 @@ typedef struct { float nms_threshold; float conf_threshold; float eta; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_ssd_detect_out_t; #else } bm_api_ssd_detect_out_t; @@ -3714,7 +3714,7 @@ typedef struct bm_api_cv_copy_to_st { int padding_g; int padding_r; int if_padding; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_copy_to_t; #else } bm_api_cv_copy_to_t; @@ -3731,7 +3731,7 @@ typedef struct bm_api_cv_feature_match_fix8b_st { int db_size; int sort_cnt; int rshiftbits; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_feature_match_fix8b_t; #else } bm_api_cv_feature_match_t; @@ -3744,7 +3744,7 @@ typedef struct { float sigma; u64 overlap_output_addr; u64 weighting_res_output_addr; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_soft_nms_t; #else } bm_api_cv_soft_nms_t; @@ -3757,7 +3757,7 @@ typedef struct { u32 H; u32 W; u32 is_inversed; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_dct_coeff_t; #else } bm_api_dct_coeff_t; @@ -3775,7 +3775,7 @@ typedef struct { u32 W; u32 coeff_ready; u32 is_inversed; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_dct_t; #else } bm_api_dct_t; @@ -3789,7 +3789,7 @@ typedef struct bm_api_cv_calc_hist_index { int len; int xdtype; float upper; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_calc_hist_index_t; #else } bm_api_cv_calc_hist_index_t; @@ -3809,7 +3809,7 @@ typedef struct bm_api_cv_fft_1d { int forward; int inputIsReal; int trans; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_fft_1d_t; #else } bm_api_cv_fft_1d_t; @@ -3824,7 +3824,7 @@ typedef struct bm_api_cv_cmulp { u64 YI; int batch; int len; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_cmulp_t; #else } bm_api_cv_cmulp_t; @@ -3836,7 +3836,7 @@ typedef struct bm_api_cv_distance { int dim; float pnt[8]; int len; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_distance_t; #else } bm_api_cv_distance_t; @@ -3848,7 +3848,7 @@ typedef struct bm_api_cv_min_max { u64 maxAddr; int len; int mode; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_min_max_t; #else } bm_api_cv_min_max_t; @@ -3865,7 +3865,7 @@ typedef struct bm_api_cv_morph { int stride_o; int format; int op; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_morph_t; #else } bm_api_cv_morph_t; @@ -3881,7 +3881,7 @@ typedef struct bm_api_cv_pyramid { int oh; int stride_i; int stride_o; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_pyramid_t; #else } bm_api_cv_pyramid_t; @@ -3898,7 +3898,7 @@ typedef struct bm_api_cv_lkpyramid { int winW; int winH; int max_level; -#ifndef WIN32 +#ifndef _WIN32 } __attribute__((packed)) bm_api_cv_lkpyramid_t; #else } bm_api_cv_lkpyramid_t; diff --git a/bmvid/3rdparty/libbmcv/include/bmlib_runtime.h b/bmvid/3rdparty/libbmcv/include/bmlib_runtime.h index 4231edf..c0d43d1 100644 --- a/bmvid/3rdparty/libbmcv/include/bmlib_runtime.h +++ b/bmvid/3rdparty/libbmcv/include/bmlib_runtime.h @@ -160,6 +160,32 @@ typedef struct sg_mem_desc { typedef struct sg_mem_desc sg_device_mem_t; typedef struct sg_mem_desc sg_system_mem_t; + +typedef struct bm_mem_desc_u64 { + union { + struct { +#ifdef __linux__ + unsigned long device_addr; +#else + unsigned long long device_addr; +#endif + unsigned int reserved; + int dmabuf_fd; + } device; + + struct { + void *system_addr; + unsigned int reserved0; + int reserved1; + } system; + } u; + + bm_mem_flags_t flags; + unsigned long long size; +} bm_mem_desc_u64_t; + +typedef struct bm_mem_desc_u64 bm_device_mem_u64_t; +typedef struct bm_mem_desc_u64 bm_system_mem_u64_t; #endif struct bm_context; @@ -215,6 +241,19 @@ tpu_kernel_module_t tpu_kernel_load_module_file_key(bm_handle_t handle, const ch */ bm_status_t tpu_kernel_unload_module(bm_handle_t handle, tpu_kernel_module_t p_module); +/** + * @name tpu_kernel_unload_module_from_core + * @brief To unload dyn file + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] p_module dyn lib ptr + * @param [in] core_id core id + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +bm_status_t tpu_kernel_unload_module_from_core(bm_handle_t handle, tpu_kernel_module_t p_module, int core_id); + /** * @name tpu_kernel_free_module * @brief To free p_module when not use @@ -239,6 +278,19 @@ bm_status_t tpu_kernel_free_module(bm_handle_t handle, tpu_kernel_module_t p_mod */ tpu_kernel_module_t tpu_kernel_load_module(bm_handle_t handle, const char *data, size_t length); +/** + * @name tpu_kernel_load_module_to_core + * @brief To load dyn module + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] data dyn module + * @param [in] length dyn module size + * @param [in] core_id core id + * @retval dyn lib ptr + */ +tpu_kernel_module_t tpu_kernel_load_module_to_core(bm_handle_t handle, const char *data, size_t length, int core_id); + /** * @name tpu_kernel_get_function * @brief To get function from lib @@ -251,6 +303,19 @@ tpu_kernel_module_t tpu_kernel_load_module(bm_handle_t handle, const char *data, */ tpu_kernel_function_t tpu_kernel_get_function(bm_handle_t handle, tpu_kernel_module_t module, const char *function); +/** + * @name tpu_kernel_get_function_from_core + * @brief To get function from lib + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] module dyn module + * @param [in] function funtion name + * @param [in] core_id core id + * @retval function id + */ +tpu_kernel_function_t tpu_kernel_get_function_from_core(bm_handle_t handle, tpu_kernel_module_t module, const char *function, int core_id); + /** * @name tpu_kernel_launch * @brief To launch function with sync @@ -265,6 +330,21 @@ tpu_kernel_function_t tpu_kernel_get_function(bm_handle_t handle, tpu_kernel_mod */ bm_status_t tpu_kernel_launch(bm_handle_t handle, tpu_kernel_function_t function, void *args, size_t size); +/** + * @name tpu_kernel_launch_from_core + * @brief To launch function with sync + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] function function id + * @param [in] args funtion args + * @param [in] size args size + * @param [in] core_id core id + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +bm_status_t tpu_kernel_launch_from_core(bm_handle_t handle, tpu_kernel_function_t function, void *args, size_t size, int core_id); + /** * @name tpu_kernel_launch_async * @brief To launch function with async @@ -279,6 +359,21 @@ bm_status_t tpu_kernel_launch(bm_handle_t handle, tpu_kernel_function_t function */ bm_status_t tpu_kernel_launch_async(bm_handle_t handle, tpu_kernel_function_t function, void *args, size_t size); +/** + * @name tpu_kernel_launch_async_from_core + * @brief To launch function with async + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] function function id + * @param [in] args funtion args + * @param [in] size args size + * @param [in] core_id core_id + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +bm_status_t tpu_kernel_launch_async_from_core(bm_handle_t handle, tpu_kernel_function_t function, void *args, size_t size, int core_id); + /** * @name tpu_kernel_sync * @brief To sync @@ -392,6 +487,17 @@ DECL_EXPORT bm_mem_type_t bm_mem_get_type(struct bm_mem_desc mem); */ DECL_EXPORT bm_mem_type_t sg_mem_get_type(struct sg_mem_desc mem); +/** + * @name bm_mem_get_type_u64 + * @brief To get a memory descriptor's type + * @ingroup bmlib_runtime + * + * @param [in] mem The memory descriptor queried + * @retval BM_MEM_TYPE_DEVICE Device global memory + * @retval BM_MEM_TYPE_SYSTEM Host user memory + */ +DECL_EXPORT bm_mem_type_t bm_mem_get_type_u64(struct bm_mem_desc_u64 mem); + /** * @name bm_mem_get_device_addr * @brief To get a device memory descriptor's address @@ -412,6 +518,16 @@ DECL_EXPORT unsigned long long bm_mem_get_device_addr(struct bm_mem_desc mem); */ DECL_EXPORT unsigned long long sg_mem_get_device_addr(struct sg_mem_desc mem); +/** + * @name bm_mem_get_device_addr_u64 + * @brief To get a device memory descriptor's address + * @ingroup bmlib_runtime + * + * @param [in] mem The device memory descriptor queried + * @retval unsigned long long The device memory address + */ +DECL_EXPORT unsigned long long bm_mem_get_device_addr_u64(struct bm_mem_desc_u64 mem); + /** * @name bm_mem_set_device_addr * @brief To set a device memory descriptor's address @@ -432,6 +548,16 @@ DECL_EXPORT void bm_mem_set_device_addr(struct bm_mem_desc* pmem, unsigned long */ DECL_EXPORT void sg_mem_set_device_addr(struct sg_mem_desc* pmem, unsigned long long addr); +/** + * @name bm_mem_set_device_addr_u64 + * @brief To set a device memory descriptor's address + * @ingroup bmlib_runtime + * + * @param [in] pmem The device memory descriptor pointer + * @param ]in] addr The new device address of the device memory + */ +DECL_EXPORT void bm_mem_set_device_addr_u64(struct bm_mem_desc_u64* pmem, unsigned long long addr); + /** * @name bm_mem_get_device_size * @brief To get a device memory descriptor's size @@ -452,6 +578,16 @@ DECL_EXPORT unsigned int bm_mem_get_device_size(struct bm_mem_desc mem); */ DECL_EXPORT unsigned long long sg_mem_get_device_size(struct sg_mem_desc mem); +/** + * @name bm_mem_get_device_size_u64 + * @brief To get a device memory descriptor's size + * @ingroup bmlib_runtime + * + * @param [in] mem The device memory descriptor queried + * @retval unsigned int The device memory's size in bytes + */ +DECL_EXPORT unsigned long long bm_mem_get_device_size_u64(struct bm_mem_desc_u64 mem); + /** * @name bm_mem_set_device_size * @brief To set a device memory descriptor's size @@ -472,6 +608,16 @@ DECL_EXPORT void bm_mem_set_device_size(struct bm_mem_desc* pmem, unsigned int s */ DECL_EXPORT void sg_mem_set_device_size(struct sg_mem_desc* pmem, unsigned long long size); +/** + * @name bm_mem_set_device_size_u64 + * @brief To set a device memory descriptor's size + * @ingroup bmlib_runtime + * + * @param [out] pmem The device memory descriptor pointer + * @param [in] size The new device memory size (in bytes) of the device memory + */ +DECL_EXPORT void bm_mem_set_device_size_u64(struct bm_mem_desc_u64* pmem, unsigned long long size); + /** * @name bm_set_device_mem * @brief To fill in a device memory descriptor with size and address @@ -496,6 +642,18 @@ DECL_EXPORT void bm_set_device_mem(bm_device_mem_t* pmem, unsigned int size, DECL_EXPORT void sg_set_device_mem(sg_device_mem_t* pmem, unsigned long long size, unsigned long long addr); +/** + * @name bm_set_device_mem_u64 + * @brief To fill in a device memory descriptor with size and address + * @ingroup bmlib_runtime + * + * @param [in] pmem The device memory descriptor pointer + * @param [in] size The device memory descriptor's size + * @param [in] addr The device memory descriptor's address + */ +DECL_EXPORT void bm_set_device_mem_u64(bm_device_mem_u64_t* pmem, unsigned long long size, + unsigned long long addr); + /** * @name bm_mem_from_device * @brief To create a device memory descriptor from address and size @@ -520,6 +678,18 @@ DECL_EXPORT bm_device_mem_t bm_mem_from_device(unsigned long long device_addr, DECL_EXPORT sg_device_mem_t sg_mem_from_device(unsigned long long device_addr, unsigned long long len); +/** + * @name bm_mem_from_device_u64 + * @brief To create a device memory descriptor from address and size + * @ingroup bmlib_runtime + * + * @param [in] device_addr The device memory address + * @param [in] len The device memory size + * @retval bm_device_mem_t The device memory descriptor created + */ +DECL_EXPORT bm_device_mem_u64_t bm_mem_from_device_u64(unsigned long long device_addr, + unsigned long long len); + /** * @name bm_mem_get_system_addr * @brief To get a system memory descriptor's address @@ -540,6 +710,16 @@ DECL_EXPORT void *bm_mem_get_system_addr(struct bm_mem_desc mem); */ DECL_EXPORT void *sg_mem_get_system_addr(struct sg_mem_desc mem); +/** + * @name bm_mem_get_system_addr_u64 + * @brief To get a system memory descriptor's address + * @ingroup bmlib_runtime + * + * @param [in] mem The system memory descriptor + * @retval void * The system memory descriptor's address + */ +DECL_EXPORT void *bm_mem_get_system_addr_u64(struct bm_mem_desc_u64 mem); + /** * @name bm_mem_set_system_addr * @brief To set a system memory descriptor's address @@ -560,6 +740,16 @@ DECL_EXPORT void bm_mem_set_system_addr(struct bm_mem_desc* pmem, void *addr); */ DECL_EXPORT void sg_mem_set_system_addr(struct sg_mem_desc* pmem, void *addr); +/** + * @name bm_mem_set_system_addr_u64 + * @brief To set a system memory descriptor's address + * @ingroup bmlib_runtime + * + * @param [in] pmem The system memory descriptor pointer + * @param [in] addr The system memory address + */ +DECL_EXPORT void bm_mem_set_system_addr_u64(struct bm_mem_desc_u64* pmem, void *addr); + /** * @name bm_mem_from_system * @brief To create a system memory descriptor with the given system address @@ -612,6 +802,22 @@ DECL_EXPORT bm_status_t sg_malloc_neuron_device(bm_handle_t handle, sg_device_me unsigned long long n, unsigned long long c, unsigned long long h, unsigned long long w); +/** + * @name bm_malloc_neuron_device_u64 + * @brief To malloc device memory according to a tensor shape + * (each neuron is 32 bits) + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [out] pmem The result devcie memory descriptor + * @param [in] n, c, h, w The shape of the input tensor + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_malloc_neuron_device_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, + unsigned long long n, unsigned long long c, + unsigned long long h, unsigned long long w); + /** * @name bm_malloc_device_dword * @brief To malloc device memory in size of dword (32 bits) @@ -640,6 +846,20 @@ DECL_EXPORT bm_status_t bm_malloc_device_dword(bm_handle_t handle, bm_device_mem DECL_EXPORT bm_status_t sg_malloc_device_dword(bm_handle_t handle, sg_device_mem_t *pmem, unsigned long long count); +/** + * @name bm_malloc_device_dword_u64 + * @brief To malloc device memory in size of dword (32 bits) + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [out] pmem The result device memory descriptor + * @param [in] count The number of dwords(32bits) to allocate + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_malloc_device_dword_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, + unsigned long long count); + /** * @name bm_malloc_device_byte * @brief To malloc device memory in size of byte @@ -654,6 +874,20 @@ DECL_EXPORT bm_status_t sg_malloc_device_dword(bm_handle_t handle, sg_device_mem DECL_EXPORT bm_status_t bm_malloc_device_byte(bm_handle_t handle, bm_device_mem_t *pmem, unsigned int size); +/** + * @name bm_malloc_device_mem + * @brief To malloc device memory in size of byte and output paddr + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [out] paddr The result malloc device memory addr + * @param [in] heap_id The heap where to allocate 0/1/2 + * @param [in] size The number of bytes to allocate + * @retval paddr + */ +DECL_EXPORT bm_status_t bm_malloc_device_mem(bm_handle_t handle, unsigned long long *paddr, + int heap_id, unsigned long long size); + /** * @name sg_malloc_device_byte * @brief To malloc device memory in size of byte @@ -668,6 +902,20 @@ DECL_EXPORT bm_status_t bm_malloc_device_byte(bm_handle_t handle, bm_device_mem_ DECL_EXPORT bm_status_t sg_malloc_device_byte(bm_handle_t handle, sg_device_mem_t *pmem, unsigned long long size); +/** + * @name bm_malloc_device_byte_u64 + * @brief To malloc device memory in size of byte + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [out] pmem The result device memory descriptor + * @param [in] size The number of bytes to allocate + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_malloc_device_byte_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, + unsigned long long size); + /** * @name bm_malloc_device_byte_heap * @brief To malloc device memory in size of byte within the specified heap @@ -698,6 +946,21 @@ DECL_EXPORT bm_status_t bm_malloc_device_byte_heap(bm_handle_t handle, bm_device DECL_EXPORT bm_status_t sg_malloc_device_byte_heap(bm_handle_t handle, sg_device_mem_t *pmem, int heap_id, unsigned long long size); +/** + * @name bm_malloc_device_byte_heap_u64 + * @brief To malloc device memory in size of byte within the specified heap + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [out] pmem The result device memory descriptor + * @param [in] heap_id The heap where to allocate 0/1/2 + * @param [in] size The number of bytes to allocate + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_malloc_device_byte_heap_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, + int heap_id, unsigned long long size); + /** * @name bm_malloc_device_byte_heap_mask * @brief To malloc device memory in size of byte within the specified heaps @@ -728,6 +991,31 @@ DECL_EXPORT bm_status_t bm_malloc_device_byte_heap_mask(bm_handle_t handle, bm_d DECL_EXPORT bm_status_t sg_malloc_device_byte_heap_mask(bm_handle_t handle, sg_device_mem_t *pmem, int heap_id_mask, unsigned long long size); +/** + * @name bm_malloc_device_byte_heap_mask_u64 + * @brief To malloc device memory in size of byte within the specified heaps + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [out] pmem The result device memory descriptor + * @param [in] heap_id_mask The mask which heaps allocate from. each bit indicate one heap + * @param [in] size The number of bytes to allocate + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_malloc_device_byte_heap_mask_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, + int heap_id_mask, unsigned long long size); + +/** + * @name bm_free_device_mem + * @brief To free device memory and input paddr + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] paddr The device memory addr to free + */ +DECL_EXPORT void bm_free_device_mem(bm_handle_t ctx, unsigned long long paddr); + /** * @name bm_free_device * @brief To free device memory @@ -748,6 +1036,16 @@ DECL_EXPORT void bm_free_device(bm_handle_t handle, bm_device_mem_t mem); */ DECL_EXPORT void sg_free_device(bm_handle_t handle, sg_device_mem_t mem); +/** + * @name bm_free_device_u64 + * @brief To free device memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] mem The device memory descriptor to free + */ +DECL_EXPORT void bm_free_device_u64(bm_handle_t handle, bm_device_mem_u64_t mem); + /** * @name bm_gmem_arm_reserved_request * @brief To obtain the address of global memory reserved for arm926 @@ -781,6 +1079,35 @@ DECL_EXPORT void bm_gmem_arm_reserved_release(bm_handle_t handle); */ DECL_EXPORT bm_status_t bm_memcpy_s2d(bm_handle_t handle, bm_device_mem_t dst, void *src); +/** + * @name bm_memcpy_s2d_gather + * @brief To copy data from system virtual memory to device memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dst The destination memory (device memory descriptor ) + * @param [in] argc The number of system memory and len (system memory, a void* pointer) + * @param [in] ... void *src and unsigned long long len + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_s2d_gather(bm_handle_t handle, bm_device_mem_t dst, int argc, ...); + +/** + * @name bm_memcpy_d2s_scatter + * @brief To copy data from device memory to system virtual memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] src The destination memory (device memory descriptor ) + * @param [in] argc The number of system memory and len (system memory, a void* pointer) + * @param [in] ... void *dst and unsigned long long len + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_d2s_scatter(bm_handle_t handle, bm_device_mem_t src, int argc, ...); /** * @name bm_memcpy_p2p * @brief To copy data from one chip to another chip @@ -810,6 +1137,20 @@ DECL_EXPORT bm_status_t bm_memcpy_p2p(bm_handle_t handle_src, bm_device_mem_t sr */ DECL_EXPORT bm_status_t sg_memcpy_s2d(bm_handle_t handle, sg_device_mem_t dst, void *src); +/** + * @name bm_memcpy_s2d_u64 + * @brief To copy data from system memory to device memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dst The destination memory (device memory descriptor ) + * @param [in] src The source memory (system memory, a void* pointer) + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_s2d_u64(bm_handle_t handle, bm_device_mem_u64_t dst, void *src); + /** * @name bm_memcpy_s2d_partial_offset * @brief To copy specified bytes of data from system memory to device memory @@ -850,6 +1191,26 @@ DECL_EXPORT bm_status_t sg_memcpy_s2d_partial_offset(bm_handle_t handle, unsigned long long size, unsigned long long offset); +/** + * @name bm_memcpy_s2d_partial_offset_u64 + * @brief To copy specified bytes of data from system memory to device memory + * with an offset in device memory address. + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dst The destination memory (device memory descriptor) + * @param [in] src The source memory (system memory, a void* pointer) + * @param [in] size The size of data to copy (in bytes) + * @param [in] offset The offset of the device memory address + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_s2d_partial_offset_u64(bm_handle_t handle, + bm_device_mem_u64_t dst, void *src, + unsigned long long size, + unsigned long long offset); + /** * @name bm_memcpy_s2d_partial * @brief To copy specified bytes of data from system memory to device memory @@ -882,6 +1243,22 @@ DECL_EXPORT bm_status_t bm_memcpy_s2d_partial(bm_handle_t handle, bm_device_mem_ DECL_EXPORT bm_status_t sg_memcpy_s2d_partial(bm_handle_t handle, sg_device_mem_t dst, void *src, unsigned long long size); +/** + * @name bm_memcpy_s2d_partial_u64 + * @brief To copy specified bytes of data from system memory to device memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dst The destination memory (device memory descriptor) + * @param [in] src The source memory (system memory, a void* pointer) + * @param [in] size The size of data to copy (in bytes) + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_s2d_partial_u64(bm_handle_t handle, bm_device_mem_u64_t dst, + void *src, unsigned long long size); + /** * @name bm_memcpy_d2s * @brief To copy data from device memory to system memory @@ -910,6 +1287,20 @@ DECL_EXPORT bm_status_t bm_memcpy_d2s(bm_handle_t handle, void *dst, bm_device_m */ DECL_EXPORT bm_status_t sg_memcpy_d2s(bm_handle_t handle, void *dst, sg_device_mem_t src); +/** + * @name bm_memcpy_d2s_u64 + * @brief To copy data from device memory to system memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dst The destination memory (system memory, a void* pointer) + * @param [in] src The source memory (device memory descriptor) + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_d2s_u64(bm_handle_t handle, void *dst, bm_device_mem_u64_t src); + /** * @name bm_memcpy_d2s_partial_offset * @brief To copy specified bytes of data from device memory to system memory @@ -948,6 +1339,25 @@ DECL_EXPORT bm_status_t sg_memcpy_d2s_partial_offset(bm_handle_t handle, void *d sg_device_mem_t src, unsigned long long size, unsigned long long offset); +/** + * @name bm_memcpy_d2s_partial_offset_u64 + * @brief To copy specified bytes of data from device memory to system memory + * with an offset in device memory address. + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dst The destination memory (system memory, a void* pointer) + * @param [in] src The source memory (device memory descriptor) + * @param [in] size The size of data to copy (in bytes) + * @param [in] offset The offset of the device memory address + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_d2s_partial_offset_u64(bm_handle_t handle, void *dst, + bm_device_mem_u64_t src, unsigned long long size, + unsigned long long offset); + /** * @name bm_memcpy_d2s_partial * @brief To copy specified bytes of data from device memory to system memory @@ -980,6 +1390,22 @@ DECL_EXPORT bm_status_t bm_memcpy_d2s_partial(bm_handle_t handle, void *dst, DECL_EXPORT bm_status_t sg_memcpy_d2s_partial(bm_handle_t handle, void *dst, sg_device_mem_t src, unsigned long long size); +/** + * @name bm_memcpy_d2s_partial_u64 + * @brief To copy specified bytes of data from device memory to system memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dst The destination memory (system memory, a void* pointer) + * @param [in] src The source memory (device memory descriptor) + * @param [in] size The size of data to copy (in bytes) + * + * @retval BM_SUCCESS Data transfer succeeds. + * Other code Data transfer fails. + */ +DECL_EXPORT bm_status_t bm_memcpy_d2s_partial_u64(bm_handle_t handle, void *dst, + bm_device_mem_u64_t src, unsigned long long size); + /** * @name bm_memcpy_d2d * @brief To copy specified dwords of data from one piece of device memory @@ -1215,6 +1641,22 @@ DECL_EXPORT bm_status_t bm_mem_mmap_device_mem(bm_handle_t handle, bm_device_mem DECL_EXPORT bm_status_t sg_mem_mmap_device_mem(bm_handle_t handle, sg_device_mem_t *dmem, unsigned long long *vmem); +/** + * @name bm_mem_mmap_device_mem_u64 + * @brief To map a piece of device memory to user space with cache enabled. + * (only valid in SoC mode; Not supported in PCIE mode). + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dev_mem The device memory to map + * @param [out] vmem The virtual address of the mapped device memory + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_mem_mmap_device_mem_u64(bm_handle_t handle, bm_device_mem_u64_t *dmem, + unsigned long long *vmem); + /*******************memory map functions *************************************/ /** * @name bm_mem_mmap_device_mem_no_cache @@ -1249,6 +1691,22 @@ DECL_EXPORT bm_status_t bm_mem_mmap_device_mem_no_cache(bm_handle_t handle, bm_d DECL_EXPORT bm_status_t sg_mem_mmap_device_mem_no_cache(bm_handle_t handle, sg_device_mem_t *dmem, unsigned long long *vmem); +/** + * @name bm_mem_mmap_device_mem_no_cache_u64 + * @brief To map a piece of device memory to user space with cache disabled. + * (only valid in SoC mode; Not supported in PCIE mode). + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dev_mem The device memory to map + * @param [out] vmem The virtual address of the mapped device memory + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_mem_mmap_device_mem_no_cache_u64(bm_handle_t handle, bm_device_mem_u64_t *dmem, + unsigned long long *vmem); + /** * @name bm_mem_vir_to_phy * @brief To get device mem address through the mapped virtual address . @@ -1298,6 +1756,23 @@ DECL_EXPORT bm_status_t bm_mem_invalidate_device_mem(bm_handle_t handle, DECL_EXPORT bm_status_t sg_mem_invalidate_device_mem(bm_handle_t handle, sg_device_mem_t *dmem); +/** + * @name bm_mem_invalidate_device_mem_u64 + * @brief To invalidate a piece of mapped device memory to maintain + * cache coherence + * (only valid in SoC mode; Not supported in PCIE mode). + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dmem The device memory to invalidate + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ + +DECL_EXPORT bm_status_t bm_mem_invalidate_device_mem_u64(bm_handle_t handle, + bm_device_mem_u64_t *dmem); + /** * @name bm_mem_invalidate_partial_device_mem * @brief To invalidate part of mapped device memory to maintain @@ -1338,6 +1813,26 @@ DECL_EXPORT bm_status_t sg_mem_invalidate_partial_device_mem(bm_handle_t handle, unsigned long long offset, unsigned long long len); +/** + * @name bm_mem_invalidate_partial_device_mem_u64 + * @brief To invalidate part of mapped device memory to maintain + * cache coherence + * (only valid in SoC mode; Not supported in PCIE mode). + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dmem The device memory to invalidate + * @param [in] offset The offset of device memory address + * @param [in] len The length of memory to invalidate in bytes + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_mem_invalidate_partial_device_mem_u64(bm_handle_t handle, + bm_device_mem_u64_t *dmem, + unsigned long long offset, + unsigned long long len); + /** * @name bm_mem_flush_device_mem * @brief To flush a piece of mapped device memory to maintain @@ -1368,6 +1863,21 @@ DECL_EXPORT bm_status_t bm_mem_flush_device_mem(bm_handle_t handle, bm_device_me */ DECL_EXPORT bm_status_t sg_mem_flush_device_mem(bm_handle_t handle, sg_device_mem_t *dmem); +/** + * @name bm_mem_flush_device_mem_u64 + * @brief To flush a piece of mapped device memory to maintain + * cache coherence + * (only valid in SoC mode; Not supported in PCIE mode). + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dmem The device memory to flush + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_mem_flush_device_mem_u64(bm_handle_t handle, bm_device_mem_u64_t *dmem); + /** * @name bm_mem_flush_partial_device_mem * @brief To flush part of mapped device memory to maintain @@ -1408,6 +1918,26 @@ DECL_EXPORT bm_status_t sg_mem_flush_partial_device_mem(bm_handle_t handle, unsigned long long offset, unsigned long long len); +/** + * @name bm_mem_flush_partial_device_mem_u64 + * @brief To flush part of mapped device memory to maintain + * cache coherence + * (only valid in SoC mode; Not supported in PCIE mode). + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] dmem The device memory to flush + * @param [in] offset The offset of device memory address + * @param [in] len The length of memory to flush in bytes + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_mem_flush_partial_device_mem_u64(bm_handle_t handle, + bm_device_mem_u64_t *dmem, + unsigned long long offset, + unsigned long long len); + /** * @name bm_mem_unmap_device_mem * @brief To unmap a piece of mapped device memory @@ -1438,6 +1968,21 @@ DECL_EXPORT bm_status_t bm_mem_unmap_device_mem(bm_handle_t handle, void *vmem, */ DECL_EXPORT bm_status_t sg_mem_unmap_device_mem(bm_handle_t handle, void *vmem, unsigned long long size); +/** + * @name bm_mem_unmap_device_mem_u64 + * @brief To unmap a piece of mapped device memory + * (only valid in SoC mode; Not supported in PCIE mode). + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] vmem The virtual address of the mapped device memory + * @param [in] size The size of unmapped memory + * + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_mem_unmap_device_mem_u64(bm_handle_t handle, void *vmem, unsigned long long size); + /*******************api(kernel) functions *************************************/ /** * @name bm_flush @@ -1485,6 +2030,31 @@ DECL_EXPORT bm_status_t bm_handle_sync(bm_handle_t handle); */ DECL_EXPORT bm_status_t bm_thread_sync(bm_handle_t handle); +/** + * @name bm_set_sync_timeout + * @brief To set sync timeout ms. + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] timeout Sync timeout + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_set_sync_timeout(bm_handle_t handle, int timeout); + +/** + * @name bm_thread_sync_from_core + * @brief To synchronize APIs of the current thread on the specified core. The thread will block + * until all the outstanding APIs of the current thread are finished. + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] core_id The device core id + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_thread_sync_from_core(bm_handle_t handle, int core_id); + /*******************trace and profile releated functions **********************/ typedef struct bm_profile { #ifdef __linux__ @@ -1493,6 +2063,7 @@ typedef struct bm_profile { unsigned long cdma_out_time; unsigned long cdma_out_counter; unsigned long tpu_process_time; + unsigned long tpu1_process_time; unsigned long sent_api_counter; unsigned long completed_api_counter; #else @@ -1501,6 +2072,7 @@ typedef struct bm_profile { unsigned long long cdma_out_time; unsigned long long cdma_out_counter; unsigned long long tpu_process_time; + unsigned long long tpu1_process_time; unsigned long long sent_api_counter; unsigned long long completed_api_counter; #endif @@ -2068,6 +2640,19 @@ DECL_EXPORT bm_status_t bm_get_gmem_heap_id(bm_handle_t handle, bm_device_mem_t DECL_EXPORT bm_status_t sg_get_gmem_heap_id(bm_handle_t handle, sg_device_mem_t *pmem, unsigned int *heapid); +/** + * @name bm_get_gmem_heap_id_u64 + * @brief To get the heap id of allocated global memory + * @ingroup bmlib_runtime + * + * @param [in] handle The device handle + * @param [in] pmem The allocted global memory + * @param [out] heapid The result of get heap id + * @retval BM_SUCCESS Succeeds. + * Other code Fails. + */ +DECL_EXPORT bm_status_t bm_get_gmem_heap_id_u64(bm_handle_t handle, bm_device_mem_u64_t *pmem, unsigned int *heapid); + /** * @name bm_get_gmem_total_heap_num * @brief To get the total heap num of global memory @@ -2356,7 +2941,7 @@ DECL_EXPORT bm_status_t bm_get_tpu_power(bm_handle_t handle, float *tpu_power); * @ingroup device management api * * @param [in] handle The device handle - * @param [out] tpu_volt + * @param [out] The tpu current volt * @retval BM_SUCCESS Succeeds. * Other code Fails. */ diff --git a/bmvid/3rdparty/libbmcv/lib/pcie/libbmlib.so.0 b/bmvid/3rdparty/libbmcv/lib/pcie/libbmlib.so.0 index 6f4b687..b07f068 100644 Binary files a/bmvid/3rdparty/libbmcv/lib/pcie/libbmlib.so.0 and b/bmvid/3rdparty/libbmcv/lib/pcie/libbmlib.so.0 differ diff --git a/bmvid/3rdparty/libbmcv/lib/pcie_arm64/libbmlib.so.0 b/bmvid/3rdparty/libbmcv/lib/pcie_arm64/libbmlib.so.0 index dae115d..e387678 100755 Binary files a/bmvid/3rdparty/libbmcv/lib/pcie_arm64/libbmlib.so.0 and b/bmvid/3rdparty/libbmcv/lib/pcie_arm64/libbmlib.so.0 differ diff --git a/bmvid/3rdparty/libbmcv/lib/pcie_loongarch64/libbmlib.so.0 b/bmvid/3rdparty/libbmcv/lib/pcie_loongarch64/libbmlib.so.0 index 18e42e7..22d65ab 100755 Binary files a/bmvid/3rdparty/libbmcv/lib/pcie_loongarch64/libbmlib.so.0 and b/bmvid/3rdparty/libbmcv/lib/pcie_loongarch64/libbmlib.so.0 differ diff --git a/bmvid/3rdparty/libbmcv/lib/pcie_riscv64/libbmlib.so b/bmvid/3rdparty/libbmcv/lib/pcie_riscv64/libbmlib.so deleted file mode 100644 index f89c67a..0000000 Binary files a/bmvid/3rdparty/libbmcv/lib/pcie_riscv64/libbmlib.so and /dev/null differ diff --git a/bmvid/3rdparty/libbmcv/lib/pcie_riscv64/libbmlib.so b/bmvid/3rdparty/libbmcv/lib/pcie_riscv64/libbmlib.so new file mode 120000 index 0000000..3018cb7 --- /dev/null +++ b/bmvid/3rdparty/libbmcv/lib/pcie_riscv64/libbmlib.so @@ -0,0 +1 @@ +libbmlib.so.0 \ No newline at end of file diff --git a/bmvid/3rdparty/libbmcv/lib/pcie_riscv64/libbmlib.so.0 b/bmvid/3rdparty/libbmcv/lib/pcie_riscv64/libbmlib.so.0 new file mode 100644 index 0000000..b4fee2c Binary files /dev/null and b/bmvid/3rdparty/libbmcv/lib/pcie_riscv64/libbmlib.so.0 differ diff --git a/bmvid/3rdparty/libbmcv/lib/pcie_sw64/libvpp_cmodel.so b/bmvid/3rdparty/libbmcv/lib/pcie_sw64/libvpp_cmodel.so new file mode 100755 index 0000000..a98fc2d Binary files /dev/null and b/bmvid/3rdparty/libbmcv/lib/pcie_sw64/libvpp_cmodel.so differ diff --git a/bmvid/3rdparty/libbmcv/lib/pcie_windows/libbmlib-static.lib b/bmvid/3rdparty/libbmcv/lib/pcie_windows/libbmlib-static.lib index e406e7c..abcf3d5 100644 Binary files a/bmvid/3rdparty/libbmcv/lib/pcie_windows/libbmlib-static.lib and b/bmvid/3rdparty/libbmcv/lib/pcie_windows/libbmlib-static.lib differ diff --git a/bmvid/3rdparty/libbmcv/lib/pcie_windows/libbmlib.dll b/bmvid/3rdparty/libbmcv/lib/pcie_windows/libbmlib.dll index e746a71..b9207d6 100644 Binary files a/bmvid/3rdparty/libbmcv/lib/pcie_windows/libbmlib.dll and b/bmvid/3rdparty/libbmcv/lib/pcie_windows/libbmlib.dll differ diff --git a/bmvid/3rdparty/libbmcv/lib/pcie_windows/libbmlib.lib b/bmvid/3rdparty/libbmcv/lib/pcie_windows/libbmlib.lib index 37f7555..276ea63 100644 Binary files a/bmvid/3rdparty/libbmcv/lib/pcie_windows/libbmlib.lib and b/bmvid/3rdparty/libbmcv/lib/pcie_windows/libbmlib.lib differ diff --git a/bmvid/3rdparty/libbmcv/lib/soc/libbmlib.so.0 b/bmvid/3rdparty/libbmcv/lib/soc/libbmlib.so.0 index 8fbc2d0..e387678 100644 Binary files a/bmvid/3rdparty/libbmcv/lib/soc/libbmlib.so.0 and b/bmvid/3rdparty/libbmcv/lib/soc/libbmlib.so.0 differ diff --git a/bmvid/3rdparty/tpu_kernel_module/libbm1684x_kernel_module.so b/bmvid/3rdparty/tpu_kernel_module/libbm1684x_kernel_module.so old mode 100755 new mode 100644 index 994e5cd..a987386 Binary files a/bmvid/3rdparty/tpu_kernel_module/libbm1684x_kernel_module.so and b/bmvid/3rdparty/tpu_kernel_module/libbm1684x_kernel_module.so differ diff --git a/bmvid/CMakeLists.txt b/bmvid/CMakeLists.txt index 3e3d387..9fe1cc4 100644 --- a/bmvid/CMakeLists.txt +++ b/bmvid/CMakeLists.txt @@ -5,6 +5,15 @@ endif() project(bmvid_win C CXX) +# update commit and branch +execute_process( + COMMAND bash -c "find \"$(git rev-parse --show-toplevel)\" -type f -name \"update_bmvid_version.sh\" -exec bash {} \\;" + RESULT_VARIABLE result + OUTPUT_VARIABLE output + ERROR_VARIABLE error_output +) +#end + if(CMAKE_HOST_WIN32) set(WINDOWS 1) elseif(CMAKE_HOST_APPLE) @@ -140,7 +149,7 @@ if (CHIP_NAME STREQUAL bm1684) if(${CHIP_NAME} STREQUAL "bm1684") ADD_TARGET_ION_LIB(ion_lib ${CHIP_NAME} ${PLATFORM} ${SUBTYPE} ${DEBUG} ${BM_MEDIA_ION} libsophon ${BMVID_OUT_PATH}) - ADD_TARGET_BMCV_LIB(bmcv_lib ${CHIP_NAME} ${PLATFORM} ${SUBTYPE} ${DEBUG} ${BM_MEDIA_ION} + ADD_TARGET_BMCV_LIB(bmcv_lib ${CHIP_NAME} ${PLATFORM} ${SUBTYPE} ${DEBUG} ${BM_MEDIA_ION} libsophon ${BMVID_OUT_PATH} ${BMVID_OUT_PATH} ${BMVID_OUT_PATH} ${BMVID_OUT_PATH}) add_dependencies(vpp_lib ion_lib) @@ -177,6 +186,7 @@ if (CHIP_NAME STREQUAL bm1684) elseif(WINDOWS) message(STATUS "entry WINDOW") + add_definitions(-DBM_PCIE_MODE=1) if(NOT CACHED_INSTALL_PATH) set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR}/release) diff --git a/bmvid/allocator/ion/libbmion.so.0 b/bmvid/allocator/ion/libbmion.so.0 index 6913bcc..e0a7845 120000 --- a/bmvid/allocator/ion/libbmion.so.0 +++ b/bmvid/allocator/ion/libbmion.so.0 @@ -1 +1 @@ -libbmion.so.0.7.1 \ No newline at end of file +libbmion.so.0.10.0 \ No newline at end of file diff --git a/bmvid/allocator/ion/libbmion.so.0.7.1 b/bmvid/allocator/ion/libbmion.so.0.10.0 similarity index 100% rename from bmvid/allocator/ion/libbmion.so.0.7.1 rename to bmvid/allocator/ion/libbmion.so.0.10.0 diff --git a/bmvid/allocator/ion/libbmion.so.0.6.1 b/bmvid/allocator/ion/libbmion.so.0.6.1 new file mode 100755 index 0000000..590b921 Binary files /dev/null and b/bmvid/allocator/ion/libbmion.so.0.6.1 differ diff --git a/bmvid/bmcv/CMakeLists.txt b/bmvid/bmcv/CMakeLists.txt index d6969f2..c413a1f 100755 --- a/bmvid/bmcv/CMakeLists.txt +++ b/bmvid/bmcv/CMakeLists.txt @@ -104,7 +104,8 @@ if(WIN32) src/bm1684/bmcv_1684_vpp_internal.cpp src/bm1684x/bmcv_1684x_vpp_internal.cpp src/bmcv_api_bayer2rgb.cpp - src/bmcv_api_as_strided.cpp + src/bmcv_api_as_strided.cpp + src/bmcv_api_hist_balance.cpp # src/md5.cpp ) endif() diff --git a/bmvid/bmcv/doc/source/api/draw_rectangle.rst b/bmvid/bmcv/doc/source/api/draw_rectangle.rst index e416756..b739bd2 100644 --- a/bmvid/bmcv/doc/source/api/draw_rectangle.rst +++ b/bmvid/bmcv/doc/source/api/draw_rectangle.rst @@ -63,11 +63,11 @@ bmcv_image_draw_rectangle .. code-block:: c - typedef struct bmcv_rect { + typedef struct bmcv_rect { int start_x; int start_y; int crop_w; - int crop_h; + int crop_h; } bmcv_rect_t; @@ -110,8 +110,6 @@ bmcv_image_draw_rectangle 6. 如果line_width小于零,则返回失败。 -7. 所有输入矩形对象部分在image之外,则只会画出在image之内的线条,并返回成功。 - **代码示例** @@ -126,15 +124,15 @@ bmcv_image_draw_rectangle #include "stdlib.h" #include "string.h" #include - + int main(int argc, char *argv[]) { bm_handle_t handle; bm_dev_request(&handle, 0); - + int image_h = 1080; int image_w = 1920; bm_image src; - bm_image_create(handle, image_h, image_w, FORMAT_NV12, + bm_image_create(handle, image_h, image_w, FORMAT_NV12, DATA_TYPE_EXT_1N_BYTE, &src); std::shared_ptr y_ptr = std::make_shared( new u8[image_h * image_w]); diff --git a/bmvid/bmcv/doc/source/bm_image/bm_image_zeros.rst b/bmvid/bmcv/doc/source/bm_image/bm_image_zeros.rst new file mode 100644 index 0000000..38be17a --- /dev/null +++ b/bmvid/bmcv/doc/source/bm_image/bm_image_zeros.rst @@ -0,0 +1,26 @@ +bm_image_zeros +===================== + +该接口用于将申请的 bm_image 空间进行内存清零,避免内存中原本的数据造成影响。 + +**接口形式:** + + .. code-block:: c + + bm_status_t bm_image_zeros(bm_image image); + + +**输入参数说明:** + +* bm_image image + +输入参数。 bm_image 结构体。 + + + +**返回值说明:** + +* BM_SUCCESS: 成功 + +* 其他:失败 + diff --git a/bmvid/bmcv/doc/source/index.rst b/bmvid/bmcv/doc/source/index.rst index 721a4c7..929d7ca 100755 --- a/bmvid/bmcv/doc/source/index.rst +++ b/bmvid/bmcv/doc/source/index.rst @@ -78,6 +78,7 @@ bm_image 介绍 bm_image/bm_image_get_plane_num bm_image/bm_image_is_attached bm_image/bm_image_get_handle + bm_image/bm_image_zeros bm_image device memory 管理 diff --git a/bmvid/bmcv/include/bmcv_api_ext.h b/bmvid/bmcv/include/bmcv_api_ext.h index eea8a37..984a842 100755 --- a/bmvid/bmcv/include/bmcv_api_ext.h +++ b/bmvid/bmcv/include/bmcv_api_ext.h @@ -145,6 +145,7 @@ typedef enum bm_image_format_ext_ { FORMAT_HSV180_PACKED, FORMAT_HSV256_PACKED, FORMAT_BAYER, + FORMAT_BAYER_RG8, } bm_image_format_ext; typedef enum bmcv_resize_algorithm_ { @@ -1099,6 +1100,12 @@ DECL_EXPORT bm_status_t bmcv_calc_hist_with_weight(bm_handle_t handle, const int *histSizes, const float *ranges, int inputDtype); +DECL_EXPORT bm_status_t bmcv_hist_balance( + bm_handle_t handle, + bm_device_mem_t input, + bm_device_mem_t output, + int H, + int W); DECL_EXPORT bm_status_t bmcv_distance(bm_handle_t handle, bm_device_mem_t input, @@ -1215,6 +1222,11 @@ DECL_EXPORT bm_status_t bmcv_image_threshold( unsigned char max_value, bm_thresh_type_t type); +DECL_EXPORT bm_status_t bmcv_image_quantify( + bm_handle_t handle, + bm_image input, + bm_image output); + DECL_EXPORT bm_status_t bmcv_image_gaussian_blur( bm_handle_t handle, bm_image input, @@ -1532,6 +1544,16 @@ DECL_EXPORT bm_status_t bmcv_matrix_log( bm_handle_t handle, bm_image src, bm_image dst); + +DECL_EXPORT bm_status_t bmcv_image_rotate( + bm_handle_t handle, + bm_image input, + bm_image output, + int rotation_angle); + +DECL_EXPORT bm_status_t bm_image_zeros(bm_image image); + +DECL_EXPORT unsigned long long bmcv_calc_cbcr_addr(unsigned long long y_addr, unsigned int y_stride, unsigned int frame_height); #if defined(__cplusplus) } #endif diff --git a/bmvid/bmcv/include/bmcv_api_ext_c.h b/bmvid/bmcv/include/bmcv_api_ext_c.h index cd6495f..eadf511 100644 --- a/bmvid/bmcv/include/bmcv_api_ext_c.h +++ b/bmvid/bmcv/include/bmcv_api_ext_c.h @@ -145,6 +145,7 @@ typedef enum bm_image_format_ext_ { FORMAT_HSV180_PACKED, FORMAT_HSV256_PACKED, FORMAT_BAYER, + FORMAT_BAYER_RG8, } bm_image_format_ext; typedef enum bmcv_resize_algorithm_ { @@ -579,13 +580,15 @@ DECL_EXPORT bm_status_t bmcv_image_jpeg_enc( bm_image * src, void ** p_jpeg_data, size_t * out_size, - int quality_factor); + int quality_factor, + int bs_in_device); DECL_EXPORT bm_status_t bmcv_image_jpeg_dec( bm_handle_t handle, void ** p_jpeg_data, size_t * in_size, int image_num, - bm_image * dst); + bm_image * dst, + int bs_in_device); DECL_EXPORT bm_status_t bmcv_nms( bm_handle_t handle, @@ -1106,6 +1109,13 @@ DECL_EXPORT bm_status_t bmcv_calc_hist_with_weight(bm_handle_t handle, const float *ranges, int inputDtype); +DECL_EXPORT bm_status_t bmcv_hist_balance( + bm_handle_t handle, + bm_device_mem_t input, + bm_device_mem_t output, + int H, + int W); + DECL_EXPORT bm_status_t bmcv_distance(bm_handle_t handle, bm_device_mem_t input, bm_device_mem_t output, @@ -1221,6 +1231,11 @@ DECL_EXPORT bm_status_t bmcv_image_threshold( unsigned char max_value, bm_thresh_type_t type); +DECL_EXPORT bm_status_t bmcv_image_quantify( + bm_handle_t handle, + bm_image input, + bm_image output); + DECL_EXPORT bm_status_t bmcv_image_gaussian_blur( bm_handle_t handle, bm_image input, @@ -1538,6 +1553,11 @@ DECL_EXPORT bm_status_t bmcv_matrix_log( bm_handle_t handle, bm_image src, bm_image dst); + +DECL_EXPORT bm_status_t bm_image_zeros(bm_image image); +DECL_EXPORT unsigned long long bmcv_calc_cbcr_addr(unsigned long long y_addr, unsigned int y_stride, unsigned int frame_height); + + #if defined(__cplusplus) } #endif diff --git a/bmvid/bmcv/include/bmcv_internal.h b/bmvid/bmcv/include/bmcv_internal.h index ca1baef..e41d561 100644 --- a/bmvid/bmcv/include/bmcv_internal.h +++ b/bmvid/bmcv/include/bmcv_internal.h @@ -5,7 +5,7 @@ #include #ifndef USING_CMODEL -#include "bmjpuapi_jpeg.h" +#include "bm_jpeg_interface.h" #endif @@ -234,6 +234,15 @@ struct dynamic_load_param{ char param[0]; }; +bm_status_t bm_handle_check_1(bm_handle_t handle, + bm_image image1); +bm_status_t bm_handle_check_2(bm_handle_t handle, + bm_image image1, + bm_image image2); +bm_status_t bm_handle_check_3(bm_handle_t handle, + bm_image image1, + bm_image image2, + bm_image image3); int find_tpufirmaware_path(char fw_path[512], const char* path); bm_status_t bm_load_tpu_module(bm_handle_t handle, tpu_kernel_module_t *tpu_module); bm_status_t bm_kernel_main_launch(bm_handle_t handle, int api_id, void *param, size_t size); diff --git a/bmvid/bmcv/release/bmcv/include/bmcv_api_ext.h b/bmvid/bmcv/release/bmcv/include/bmcv_api_ext.h index eea8a37..984a842 100755 --- a/bmvid/bmcv/release/bmcv/include/bmcv_api_ext.h +++ b/bmvid/bmcv/release/bmcv/include/bmcv_api_ext.h @@ -145,6 +145,7 @@ typedef enum bm_image_format_ext_ { FORMAT_HSV180_PACKED, FORMAT_HSV256_PACKED, FORMAT_BAYER, + FORMAT_BAYER_RG8, } bm_image_format_ext; typedef enum bmcv_resize_algorithm_ { @@ -1099,6 +1100,12 @@ DECL_EXPORT bm_status_t bmcv_calc_hist_with_weight(bm_handle_t handle, const int *histSizes, const float *ranges, int inputDtype); +DECL_EXPORT bm_status_t bmcv_hist_balance( + bm_handle_t handle, + bm_device_mem_t input, + bm_device_mem_t output, + int H, + int W); DECL_EXPORT bm_status_t bmcv_distance(bm_handle_t handle, bm_device_mem_t input, @@ -1215,6 +1222,11 @@ DECL_EXPORT bm_status_t bmcv_image_threshold( unsigned char max_value, bm_thresh_type_t type); +DECL_EXPORT bm_status_t bmcv_image_quantify( + bm_handle_t handle, + bm_image input, + bm_image output); + DECL_EXPORT bm_status_t bmcv_image_gaussian_blur( bm_handle_t handle, bm_image input, @@ -1532,6 +1544,16 @@ DECL_EXPORT bm_status_t bmcv_matrix_log( bm_handle_t handle, bm_image src, bm_image dst); + +DECL_EXPORT bm_status_t bmcv_image_rotate( + bm_handle_t handle, + bm_image input, + bm_image output, + int rotation_angle); + +DECL_EXPORT bm_status_t bm_image_zeros(bm_image image); + +DECL_EXPORT unsigned long long bmcv_calc_cbcr_addr(unsigned long long y_addr, unsigned int y_stride, unsigned int frame_height); #if defined(__cplusplus) } #endif diff --git a/bmvid/bmcv/release/bmcv/include/bmcv_api_ext_c.h b/bmvid/bmcv/release/bmcv/include/bmcv_api_ext_c.h index cd6495f..eadf511 100644 --- a/bmvid/bmcv/release/bmcv/include/bmcv_api_ext_c.h +++ b/bmvid/bmcv/release/bmcv/include/bmcv_api_ext_c.h @@ -145,6 +145,7 @@ typedef enum bm_image_format_ext_ { FORMAT_HSV180_PACKED, FORMAT_HSV256_PACKED, FORMAT_BAYER, + FORMAT_BAYER_RG8, } bm_image_format_ext; typedef enum bmcv_resize_algorithm_ { @@ -579,13 +580,15 @@ DECL_EXPORT bm_status_t bmcv_image_jpeg_enc( bm_image * src, void ** p_jpeg_data, size_t * out_size, - int quality_factor); + int quality_factor, + int bs_in_device); DECL_EXPORT bm_status_t bmcv_image_jpeg_dec( bm_handle_t handle, void ** p_jpeg_data, size_t * in_size, int image_num, - bm_image * dst); + bm_image * dst, + int bs_in_device); DECL_EXPORT bm_status_t bmcv_nms( bm_handle_t handle, @@ -1106,6 +1109,13 @@ DECL_EXPORT bm_status_t bmcv_calc_hist_with_weight(bm_handle_t handle, const float *ranges, int inputDtype); +DECL_EXPORT bm_status_t bmcv_hist_balance( + bm_handle_t handle, + bm_device_mem_t input, + bm_device_mem_t output, + int H, + int W); + DECL_EXPORT bm_status_t bmcv_distance(bm_handle_t handle, bm_device_mem_t input, bm_device_mem_t output, @@ -1221,6 +1231,11 @@ DECL_EXPORT bm_status_t bmcv_image_threshold( unsigned char max_value, bm_thresh_type_t type); +DECL_EXPORT bm_status_t bmcv_image_quantify( + bm_handle_t handle, + bm_image input, + bm_image output); + DECL_EXPORT bm_status_t bmcv_image_gaussian_blur( bm_handle_t handle, bm_image input, @@ -1538,6 +1553,11 @@ DECL_EXPORT bm_status_t bmcv_matrix_log( bm_handle_t handle, bm_image src, bm_image dst); + +DECL_EXPORT bm_status_t bm_image_zeros(bm_image image); +DECL_EXPORT unsigned long long bmcv_calc_cbcr_addr(unsigned long long y_addr, unsigned int y_stride, unsigned int frame_height); + + #if defined(__cplusplus) } #endif diff --git a/bmvid/bmcv/src/bm1684x/bmcv_1684x_vpp_internal.cpp b/bmvid/bmcv/src/bm1684x/bmcv_1684x_vpp_internal.cpp index 4e1d903..f0b0ead 100755 --- a/bmvid/bmcv/src/bm1684x/bmcv_1684x_vpp_internal.cpp +++ b/bmvid/bmcv/src/bm1684x/bmcv_1684x_vpp_internal.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include "bmlib_interface.h" #include "bmcv_api_ext.h" @@ -116,17 +117,17 @@ static bm_status_t simple_check_bm1684x_input_param( if (handle == NULL) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "handle is nullptr"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } if (input_or_output == NULL) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "input or output is nullptr"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } if((frame_number > VPP1684X_MAX_CROP_NUM) || (frame_number <= 0)) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "input num should less than 512"); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } return BM_SUCCESS;; @@ -198,16 +199,16 @@ static bm_status_t check_bm1684x_bm_image_param( !bm_image_is_attached(output[frame_idx])) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "not correctly create bm_image ,frame [%d] input or output not attache mem %s: %s: %d\n", - frame_idx,filename(__FILE__), __func__, __LINE__); - return BM_ERR_PARAM; + "not correctly create bm_image ,frame [%d] input or output not attache mem check_bm_image_param: %d\n", + frame_idx, __LINE__); + return BM_ERR_DATA; } if(input[frame_idx].data_type != DATA_TYPE_EXT_1N_BYTE) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "vpp input only support DATA_TYPE_EXT_1N_BYTE,frame [%d], %s: %s: %d\n", - frame_idx, filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + "vpp input only support DATA_TYPE_EXT_1N_BYTE,frame [%d], check_bm_image_param: %d\n", + frame_idx, __LINE__); + return BM_ERR_DATA; } if((output[frame_idx].data_type != DATA_TYPE_EXT_FLOAT32) && @@ -217,27 +218,27 @@ static bm_status_t check_bm1684x_bm_image_param( (output[frame_idx].data_type != DATA_TYPE_EXT_BF16)) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x vpp[%d] output data type %d not support %s: %s: %d\n", - frame_idx ,output[frame_idx].data_type, filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + "vpp[%d] output data type %d not support check_bm_image_param: %d\n", + frame_idx ,output[frame_idx].data_type, __LINE__); + return BM_ERR_DATA; } if((bm_image_get_stride(input[frame_idx], stride) != BM_SUCCESS) || (bm_image_get_stride(output[frame_idx], stride) != BM_SUCCESS)) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "not correctly create input bm_image , frame [%d],input or output get stride err %s: %s: %d\n", - frame_idx,filename(__FILE__), __func__, __LINE__); - return BM_ERR_FAILURE; + "not correctly create input bm_image , frame [%d],input or output get stride err check_bm_image_param: %d\n", + frame_idx, __LINE__); + return BM_ERR_DATA; } plane_num = bm_image_get_plane_num(input[frame_idx]); if(plane_num == 0 || bm_image_get_device_mem(input[frame_idx], device_mem) != BM_SUCCESS) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "not correctly create input[%d] bm_image, get plane num or device mem err %s: %s: %d\n", - frame_idx, filename(__FILE__), __func__, __LINE__); - return BM_ERR_FAILURE; + "not correctly create input[%d] bm_image, get plane num or device mem err check_bm_image_param: %d\n", + frame_idx, __LINE__); + return BM_ERR_DATA; } #ifndef USING_CMODEL u64 device_addr = 0; @@ -248,9 +249,9 @@ static bm_status_t check_bm1684x_bm_image_param( if((device_addr > 0x4ffffffff) || (device_addr < 0x100000000)) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "input[%d] device memory should between 0x100000000 and 0x4ffffffff %s: %s: %d\n", - frame_idx, filename(__FILE__), __func__, __LINE__); - return BM_ERR_FAILURE; + "input[%d] device memory should between 0x100000000 and 0x4ffffffff check_bm_image_param: %d\n", + frame_idx, __LINE__); + return BM_ERR_DATA; } } #endif @@ -258,9 +259,9 @@ static bm_status_t check_bm1684x_bm_image_param( if(plane_num == 0 || bm_image_get_device_mem(output[frame_idx], device_mem) != BM_SUCCESS) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "not correctly create output[%d] bm_image, get plane num or device mem err %s: %s: %d\n", - frame_idx, filename(__FILE__), __func__, __LINE__); - return BM_ERR_FAILURE; + "not correctly create output[%d] bm_image, get plane num or device mem err check_bm_image_param: %d\n", + frame_idx, __LINE__); + return BM_ERR_DATA; } #ifndef USING_CMODEL for (i = 0; i < plane_num; i++) { @@ -268,9 +269,9 @@ static bm_status_t check_bm1684x_bm_image_param( if((device_addr > 0x4ffffffff) || (device_addr < 0x100000000)) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "output[%d] device memory should between 0x100000000 and 0x4ffffffff %s: %s: %d\n", - frame_idx, filename(__FILE__), __func__, __LINE__); - return BM_ERR_FAILURE; + "output[%d] device memory should between 0x100000000 and 0x4ffffffff check_bm_image_param: %d\n", + frame_idx, __LINE__); + return BM_ERR_DATA; } } #endif @@ -297,11 +298,10 @@ static bm_status_t check_bm1684x_vpp_csctype( if((is_csc_yuv_or_rgb(input[0].image_format) != is_csc_yuv_or_rgb(input[idx].image_format)) || (is_csc_yuv_or_rgb(output[0].image_format) != is_csc_yuv_or_rgb(output[idx].image_format))) { - ret = BM_ERR_PARAM; + ret = BM_ERR_DATA; bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x vpp Input and output color space changes will cause hardware hang," - " %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); + "vpp Input and output color space changes will cause hardware hang," + " check_vpp_csctype: %d\n", __LINE__); break; } @@ -313,14 +313,14 @@ static bm_status_t check_bm1684x_vpp_csctype( case CSC_YCbCr2RGB_BT709: if(COLOR_SPACE_YUV != input_color_space) { - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_PARAM; break; } if((COLOR_SPACE_RGB != output_color_space) && (COLOR_SPACE_HSV != output_color_space) && (COLOR_SPACE_RGBY != output_color_space)) { - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_PARAM; break; } break; @@ -330,13 +330,13 @@ static bm_status_t check_bm1684x_vpp_csctype( case CSC_RGB2YPbPr_BT709: if(COLOR_SPACE_RGB != input_color_space) { - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_PARAM; break; } if((COLOR_SPACE_YUV != output_color_space) && (COLOR_SPACE_RGBY != output_color_space)) { - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_PARAM; break; } break; @@ -346,19 +346,19 @@ static bm_status_t check_bm1684x_vpp_csctype( case CSC_USER_DEFINED_MATRIX: if(NULL == matrix) { - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_PARAM; bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x vpp csc_type param %d is CSC_USER_DEFINED_MATRIX ," - "matrix can not be null, %s: %s: %d\n", - csc_type, filename(__FILE__), __func__, __LINE__); + "vpp csc_type param %d is CSC_USER_DEFINED_MATRIX ," + "matrix can not be null, check_vpp_csctype: %d\n", + csc_type, __LINE__); return ret; } break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_PARAM; bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x vpp csc_type param %d not support,%s: %s: %d\n", - csc_type, filename(__FILE__), __func__, __LINE__); + "vpp csc_type param %d not support,check_vpp_csctype: %d\n", + csc_type, __LINE__); return ret; } } @@ -386,10 +386,6 @@ static bm_status_t check_bm1684x_vpp_input_format( case FORMAT_NV61: case FORMAT_YUV444_PACKED: case FORMAT_YVU444_PACKED: - case FORMAT_YUV422_YUYV: - case FORMAT_YUV422_YVYU: - case FORMAT_YUV422_UYVY: - case FORMAT_YUV422_VYUY: case FORMAT_RGBP_SEPARATE: case FORMAT_BGRP_SEPARATE: case FORMAT_RGB_PLANAR: @@ -398,11 +394,23 @@ static bm_status_t check_bm1684x_vpp_input_format( case FORMAT_BGR_PACKED: case FORMAT_COMPRESSED: break; + case FORMAT_YUV422_YUYV: + case FORMAT_YUV422_YVYU: + case FORMAT_YUV422_UYVY: + case FORMAT_YUV422_VYUY: + if (input[frame_idx].width % 2 != 0) { + ret = BM_ERR_PARAM; + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "vpp input[%d] width %d is odd, which is not supported by format %d, check_vpp_input_format: %d\n", + frame_idx, input[frame_idx].width, input[frame_idx].image_format, __LINE__); + return ret; + } + break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_DATA; bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "1684x vpp input[%d] format %d not support %s: %s: %d\n", - frame_idx,input[frame_idx].image_format,filename(__FILE__), __func__, __LINE__); + "vpp input[%d] format %d not support, check_vpp_input_format: %d\n", + frame_idx,input[frame_idx].image_format, __LINE__); return ret; } } @@ -435,10 +443,10 @@ static bm_status_t check_bm1684x_vpp_output_format( case FORMAT_HSV256_PACKED: break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_DATA; bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "1684x vpp output[%d] format %d not support %s: %s: %d\n", - frame_idx,output[frame_idx].image_format,filename(__FILE__), __func__, __LINE__); + "vpp output[%d] format %d not support check_vpp_output_format: %d\n", + frame_idx,output[frame_idx].image_format, __LINE__); return ret; } } @@ -461,9 +469,8 @@ static bm_status_t check_bm1684x_vpp_image_param( if((frame_number > VPP1684X_MAX_CROP_NUM) || (frame_number <= 0)) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - " input num should less than 256 %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + "input num (%d) should less than 256, check_vpp_image_param: %d\n", frame_number, __LINE__); + return BM_ERR_PARAM; } for (frame_idx = 0; frame_idx < frame_number; frame_idx++) { @@ -489,8 +496,8 @@ static bm_status_t check_bm1684x_vpp_image_param( if((padding_attr[frame_idx].if_memset != 0) && (padding_attr[frame_idx].if_memset != 1)) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "frame [%d], padding_attr if_memset wrong %s: %s: %d\n", - frame_idx, filename(__FILE__), __func__, __LINE__); + "frame [%d], padding_attr if_memset wrong check_vpp_image_param: %d\n", + frame_idx, __LINE__); return BM_ERR_PARAM; } if(padding_attr[frame_idx].if_memset == 1) @@ -500,9 +507,9 @@ static bm_status_t check_bm1684x_vpp_image_param( (output[frame_idx].height- padding_attr[frame_idx].dst_crop_h - padding_attr[frame_idx].dst_crop_sty > 255) ) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "frame [%d], padding_attr x,y,w,h may be > 255 %s: %s: %d\n", - frame_idx, filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + "frame [%d], padding_attr x,y,w,h may be > 255 check_vpp_image_param: %d\n", + frame_idx, __LINE__); + return BM_ERR_PARAM; } dst_crop_rect.start_x = 0; dst_crop_rect.start_y = 0; @@ -526,8 +533,6 @@ static bm_status_t check_bm1684x_vpp_image_param( (src_crop_rect.crop_h > VPP1684X_MAX_H) || (src_crop_rect.crop_w < VPP1684X_MIN_W) || (src_crop_rect.crop_h < VPP1684X_MIN_H) || - (output[frame_idx].width > VPP1684X_MAX_W) || - (output[frame_idx].height > VPP1684X_MAX_H) || (output[frame_idx].width < VPP1684X_MIN_W) || (output[frame_idx].height < VPP1684X_MIN_H) || (dst_crop_rect.crop_w > VPP1684X_MAX_W) || @@ -536,17 +541,16 @@ static bm_status_t check_bm1684x_vpp_image_param( (dst_crop_rect.crop_h < VPP1684X_MIN_H) ) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR,\ - "bm1684x vpp frame_idx %d, width or height abnormal," + "vpp frame_idx %d, width or height abnormal," "input[frame_idx].width %d,input[frame_idx].height %d," "src_crop_rect[frame_idx].crop_w %d,src_crop_rect[frame_idx].crop_h %d," "output[frame_idx].width %d, output[frame_idx].height %d," "dst_crop_rect[frame_idx].crop_w %d, dst_crop_rect[frame_idx].crop_h %d," - "%s: %s: %d\n",\ + "check_vpp_image_param: %d\n",\ frame_idx,input[frame_idx].width,input[frame_idx].height,src_crop_rect.crop_w, src_crop_rect.crop_h, output[frame_idx].width, output[frame_idx].height, - dst_crop_rect.crop_w,dst_crop_rect.crop_h, - filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + dst_crop_rect.crop_w,dst_crop_rect.crop_h, __LINE__); + return BM_ERR_PARAM; } if((src_crop_rect.start_x < 0) || @@ -560,9 +564,16 @@ static bm_status_t check_bm1684x_vpp_image_param( ) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "frame [%d], input or output crop is out of range %s: %s: %d\n", - frame_idx, filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + "frame [%d], input or output crop is out of range: src_crop_rect=(start_x: %d, " + "start_y: %d, crop_w: %d, crop_h: %d), input=(width: %d, height: %d), " + "dst_crop_rect=(start_x: %d, start_y: %d, crop_w: %d, crop_h: %d), " + "output=(width: %d, height: %d) check_vpp_image_param: %d\n", + frame_idx, src_crop_rect.start_x, src_crop_rect.start_y, src_crop_rect.crop_w, + src_crop_rect.crop_h, input[frame_idx].width, input[frame_idx].height, + dst_crop_rect.start_x, dst_crop_rect.start_y, dst_crop_rect.crop_w, + dst_crop_rect.crop_h, output[frame_idx].width, output[frame_idx].height, + __LINE__); + return BM_ERR_PARAM; } } @@ -581,13 +592,17 @@ static bm_status_t check_bm1684x_vpp_image_param( { if(border_param[0].rect_border_enable == 1) { - if((border_param[0].st_x > input[0].width) || - (border_param[0].st_y > input[0].height) || + if((border_param[0].st_x + border_param[0].width > input[0].width) || + (border_param[0].st_y + border_param[0].height > input[0].height) || (output[0].data_type != DATA_TYPE_EXT_1N_BYTE)) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x vpp draw rectangle param wrong,maybe st_x,st_y wrong or data_type not supported, %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); + "vpp draw rectangle param wrong: st_x=%d, st_y=%d, rect_width=%d, rect_height=%d, input_width=%d, input_height=%d, output_data_type=%d. check_vpp_image_param: %d\n", + border_param[0].st_x, border_param[0].st_y, + border_param[0].width, border_param[0].height, + input[0].width, input[0].height, + output[0].data_type, + __LINE__); return BM_ERR_PARAM; } } @@ -614,8 +629,8 @@ static bm_status_t check_bm1684x_vpp_param( if((input == NULL) || (output == NULL)) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "input or output is nullptr , %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_ERR_PARAM; + "input or output is nullptr , check_vpp_param: %d\n", __LINE__); + return BM_ERR_DATA; } ret = check_bm1684x_bm_image_param(frame_number, input, output); @@ -633,17 +648,17 @@ static bm_status_t check_bm1684x_vpp_param( if((algorithm != BMCV_INTER_NEAREST) && (algorithm != BMCV_INTER_LINEAR)) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x vpp not support algorithm %d,%s: %s: %d\n", - algorithm, filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + "vpp not support algorithm %d, check_vpp_param: %d\n", + algorithm, __LINE__); + return BM_ERR_PARAM; } ret = check_bm1684x_vpp_csctype(frame_number, input,output,csc_type, matrix); if(ret != BM_SUCCESS) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x vpp csctype %d, %s: %s: %d\n", - csc_type, filename(__FILE__), __func__, __LINE__); + "vpp csctype %d, check_vpp_param: %d\n", + csc_type, __LINE__); return ret; } @@ -664,9 +679,8 @@ static bm_status_t check_bm1684x_vpp_continuity( if(input_or_output[i].image_private== NULL) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm_image image_private cannot be empty, %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + "bm_image image_private cannot be empty, check_vpp_continuity: %d\n", __LINE__); + return BM_ERR_DATA; } } return BM_SUCCESS; @@ -687,8 +701,7 @@ static bm_status_t bm1684x_check_vpp_internal_param( (vpp_param[idx].post_padding_param.post_padding_enable == 1)) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x vpp postpadding left right top bottom all is 0 , %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); + "vpp postpadding left right top bottom all is 0 , check_vpp_internal_param: %d\n", __LINE__); return BM_ERR_PARAM; } if(((vpp_input[idx].cropW+ vpp_param[idx].padding_param.left + vpp_param[idx].padding_param.right) > VPP1684X_MAX_W) || @@ -697,8 +710,7 @@ static bm_status_t bm1684x_check_vpp_internal_param( ((vpp_output[idx].cropH - vpp_param[idx].post_padding_param.top - vpp_param[idx].post_padding_param.bottom) <= 0)) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x vpp after padding > 8192, or after postpadding < 0 , %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); + "vpp after padding > 8192, or after postpadding < 0 , check_vpp_internal_param: %d\n", __LINE__); return BM_ERR_PARAM; } scl_x = (float)(vpp_input[idx].cropW+ vpp_param[idx].padding_param.left + vpp_param[idx].padding_param.right) / (float)(vpp_output[idx].cropW - vpp_param[idx].post_padding_param.left - vpp_param[idx].post_padding_param.right); @@ -709,9 +721,8 @@ static bm_status_t bm1684x_check_vpp_internal_param( (scl_y > ((float)VPP1684X_MAX_SCALE_RATIO))) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x vpp not support: scaling ratio greater than 128,pay attention to postpadding, %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + "vpp not support: scaling ratio greater than 128,pay attention to postpadding, check_vpp_internal_param: %d\n", __LINE__); + return BM_ERR_PARAM; } if(vpp_param[idx].border_param.rect_border_enable == 1) @@ -737,8 +748,7 @@ static bm_status_t bm1684x_check_vpp_internal_param( (vpp_output[idx].wdma_form != DATA_TYPE_1N_BYTE)) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x vpp border_param wrong, %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); + "vpp border_param wrong, check_vpp_internal_param: %d\n", __LINE__); return BM_ERR_PARAM; } } @@ -746,23 +756,20 @@ static bm_status_t bm1684x_check_vpp_internal_param( ((vpp_input[idx].format == IN_FBC) && (vpp_output[idx].format == OUT_HSV256))) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x vpp direct conversion from compressed format to HSV is not supported, %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); - return BM_ERR_PARAM; + "vpp direct conversion from compressed format to HSV is not supported, check_vpp_internal_param: %d\n", __LINE__); + return BM_ERR_DATA; } if((vpp_param[idx].csc_scale_order == 1) && (vpp_output[idx].format == OUT_RGBYP)) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x vpp rgbyp does not support CSC before sacle, %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); - return BM_ERR_PARAM; + "vpp rgbyp does not support CSC before sacle, check_vpp_internal_param: %d\n", __LINE__); + return BM_ERR_DATA; } if((vpp_input[idx].format == IN_FBC) && ((vpp_input[idx].cropW % 16 != 0) || (vpp_input[idx].cropH % 4 != 0) || (vpp_input[idx].axisX % 32 != 0) || (vpp_input[idx].axisY % 2 != 0))) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x vpp When compressing the format, cropw requires 16 alignment, croph requires 4 alignment, and start_x requires 32 alignment.start_y requires 2 alignment, %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); + "vpp When compressing the format, cropw requires 16 alignment, croph requires 4 alignment, and start_x requires 32 alignment.start_y requires 2 alignment, check_vpp_internal_param: %d\n", __LINE__); return BM_ERR_PARAM; } @@ -772,9 +779,8 @@ static bm_status_t bm1684x_check_vpp_internal_param( ((vpp_output[idx].format != OUT_RGBP) && (vpp_output[idx].format != OUT_YUV444P) && (vpp_output[idx].format != OUT_YUV400P))) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x vpp fp32,bf16,fp16 only supprot yuv444p, yonly and rgbp, %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); - return BM_ERR_PARAM; + "vpp fp32,bf16,fp16 only supprot yuv444p, yonly and rgbp, check_vpp_internal_param: %d\n", __LINE__); + return BM_ERR_DATA; } } return BM_SUCCESS; @@ -785,7 +791,7 @@ static void vpp1684x_dump(struct vpp_batch_n *batch) int idx = 0; descriptor *pdes = NULL; - bmlib_log("BMCV VPP DUMP", BMLIB_LOG_ERROR, " %s: %s: %d\n", __FILE__, __func__, __LINE__); + bmlib_log("BMCV VPP DUMP", BMLIB_LOG_ERROR, " vpp_dump: %d\n", __LINE__); for (idx = 0; idx < batch->num; idx++) { pdes = (batch->cmd + idx); bmlib_log("BMCV VPP DUMP", BMLIB_LOG_ERROR, " idx %d, batch->num %d\n",idx, batch->num); @@ -1010,9 +1016,8 @@ static bm_status_t input_format_match(bm_image *input,uint8* input_format) *input_format = IN_FBC; break; default: - ret = BM_NOT_SUPPORTED; - bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "1684x vpp input format not support %s: %s: %d\n", - __FILE__, __func__, __LINE__); + ret = BM_ERR_DATA; + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "input format not support : %d\n", __LINE__); break; } @@ -1062,9 +1067,8 @@ static bm_status_t output_format_match(bm_image *output,uint8* output_format) *output_format = OUT_HSV256; break; default: - ret = BM_NOT_SUPPORTED; - bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "1684x vpp output format not support %s: %s: %d\n", - __FILE__, __func__, __LINE__); + ret = BM_ERR_DATA; + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "vpp output format not support : %d\n", __LINE__); break; } @@ -1100,9 +1104,8 @@ static bm_status_t default_csc_type(bm1684x_vpp_mat *input, bm1684x_vpp_mat *out color_space_in = COLOR_IN_RGB; break; default: - ret = BM_NOT_SUPPORTED; - bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "1684x vpp input format not support %s: %s: %d\n", - __FILE__, __func__, __LINE__); + ret = BM_ERR_DATA; + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "vpp input format not support : %d\n", __LINE__); break; } @@ -1132,9 +1135,8 @@ static bm_status_t default_csc_type(bm1684x_vpp_mat *input, bm1684x_vpp_mat *out break; default: - ret = BM_NOT_SUPPORTED; - bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "1684x vpp output format not support %s: %s: %d\n", - __FILE__, __func__, __LINE__); + ret = BM_ERR_DATA; + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "vpp output format not support : %d\n", __LINE__); break; } @@ -1185,8 +1187,7 @@ static bm_status_t bm_image_to_1684x_vpp_input_mat( ret = input_format_match(input, &(mat->format)); if(BM_SUCCESS != ret ) { - bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_WARNING, "1684x vppinput not support this format %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_WARNING, "vppinput not support this format : %d\n", __LINE__); // return BM_NOT_SUPPORTED; } mat->frm_w = input->width; @@ -1279,8 +1280,7 @@ static bm_status_t bm_image_to_1684x_vpp_output_mat( ret = output_format_match(output,&(mat->format)); if(BM_SUCCESS != ret ) { - bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_WARNING, "1684x vppinput not support this format %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_WARNING, "vppinput not support this format bm_image_to_vpp_output_mat: %d\n", __LINE__); // return BM_NOT_SUPPORTED; } @@ -1348,9 +1348,8 @@ static bm_status_t bm_image_to_1684x_vpp_output_mat( mat->wdma_form = DATA_TYPE_FLOAT32; break; default: - bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_WARNING, "1684x vpp output data type wrong %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); - ret = BM_ERR_PARAM; + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_WARNING, "vpp output data_type(%d) wrong bm_image_to_vpp_output_mat: %d\n", output->data_type, __LINE__); + ret = BM_ERR_DATA; break; } return ret; @@ -1374,9 +1373,9 @@ bm_status_t vpp_algorithm_config( vpp_param->resize_param.scl_init_y = 1; break; default: - bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_WARNING, "1684x vpp not support algorithm %d ,%s: %s: %d\n", - algorithm,filename(__FILE__), __func__, __LINE__); - ret = BM_NOT_SUPPORTED; + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_WARNING, "vpp not support algorithm %d ,bm_image_to_vpp_output_mat: %d\n", + algorithm, __LINE__); + ret = BM_ERR_PARAM; break; } return ret; @@ -1474,8 +1473,7 @@ static bm_status_t bm1684x_vpp_misc( batch.cmd = new descriptor [batch.num * (sizeof(descriptor))]; if (batch.cmd == NULL) { ret = BM_ERR_NOMEM; - bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "vpp malloc failed %s: %s: %d\n", - __FILE__, __func__, __LINE__); + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "vpp malloc failed vpp_misc: %d\n", __LINE__); return ret; } @@ -1700,7 +1698,6 @@ static bm_status_t bm1684x_vpp_misc( UNUSED(cmodel_flag); -#ifdef __linux__ #ifdef USING_CMODEL UNUSED(handle); UNUSED(vpp_dev_fd); @@ -1712,21 +1709,20 @@ static bm_status_t bm1684x_vpp_misc( { ret = bm_get_handle_fd(handle, VPP_FD, &vpp_dev_fd); if ((ret != 0 ) || (vpp_dev_fd < 0)){ - bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "invalid vpp fd %s: %s: %d\n", - __FILE__, __func__, __LINE__); + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "invalid vpp fd vpp_misc: %d\n", __LINE__); delete[] batch.cmd; return ret; } if(0 != ioctl(vpp_dev_fd, VPP_UPDATE_BATCH_FD_PA, &batch)) { - printf("1684x vpp soc run failed\n"); + printf("vpp soc run failed\n"); vpp1684x_dump(&batch); - ret = BM_ERR_FAILURE; + ret = BM_ERR_TIMEOUT; } } else if(cmodel_flag == 1) { - bm1684x_vpp_cmodel(&batch, vpp_input, vpp_output,vpp_param); + bm1684x_vpp_cmodel(&batch, vpp_input, vpp_output, vpp_param); } #endif @@ -1737,17 +1733,18 @@ static bm_status_t bm1684x_vpp_misc( { if(0 != bm_trigger_vpp(handle, &batch)) { - printf("1684x vpp pcie run failed\n"); + printf("vpp pcie run failed\n"); +#ifndef _WIN32 vpp1684x_dump(&batch); - ret = BM_ERR_FAILURE; +#endif + ret = BM_ERR_TIMEOUT; } - } - else if(cmodel_flag == 1) - { - bm1684x_vpp_cmodel(&batch, vpp_input, vpp_output,vpp_param); + }else if(cmodel_flag == 1){ +#ifndef _WIN32 + bm1684x_vpp_cmodel(&batch, vpp_input, vpp_output, vpp_param); +#endif } -#endif #endif delete[] batch.cmd; @@ -1876,9 +1873,9 @@ static bm_status_t bm1684x_vpp_multi_parameter_processing( if (!bm_image_is_attached(output[i])) { if (bm_image_alloc_dev_mem(output[i], BMCV_HEAP_ANY) != BM_SUCCESS) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, \ - "output dev alloc fail %s: %s: %d\n", __FILE__, __func__, __LINE__); + "output dev alloc fail vpp_multi_parameter_processing: %d\n", __LINE__); free_dmem(); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } } @@ -1887,8 +1884,8 @@ static bm_status_t bm1684x_vpp_multi_parameter_processing( frame_number, input, output, input_crop_rect, padding_attr, algorithm, csc_type, matrix, border_param); if(ret != BM_SUCCESS) { - bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "bm1684x vpp error parameters found,%s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "vpp error parameters found, vpp_multi_parameter_processing: %d\n", + __LINE__); return ret; } @@ -1905,9 +1902,11 @@ static bm_status_t check_bm1684x_convert_to_param(bm_image* input, for(int i=0; iheight, input->width, FORMAT_YUV420P, DATA_TYPE_EXT_1N_BYTE, input_temp); if(BM_SUCCESS != bm_image_alloc_dev_mem(input_temp[0])) { BMCV_ERR_LOG("bm_image_alloc_dev_mem error\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } ret = bm1684x_vpp_single_input_multi_output(handle, 1, input[0], input_temp, NULL, NULL, algorithm, CSC_MAX_ENUM, matrix); if(ret != BM_SUCCESS) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x_vpp_single_input_multi_output error , %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_ERR_FAILURE; + "vpp_single_input_multi_output error , vpp_compressed2yuv: %d\n", __LINE__); + return ret; } *input = *input_temp; } @@ -2189,7 +2188,7 @@ bm_status_t bm1684x_vpp_cvt_padding( if (padding_attr == NULL) { bmlib_log("VPP_PADDING", BMLIB_LOG_ERROR, "vpp padding info is nullptr"); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } int compressed_flag = 0; @@ -2252,11 +2251,17 @@ bm_status_t bm1684x_vpp_basic( return ret; } + int compressed_flag = 0; + bm_image input_temp; + ret = bm1684x_vpp_compressed2yuv(handle, in_img_num, input, &input_temp, matrix, algorithm, crop_rect, &compressed_flag); + if(ret != BM_SUCCESS) + goto failed; + if (crop_rect == NULL) { if (crop_num_vec != NULL) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, \ - "crop_num_vec should be NULL err %s: %s: %d\n", __FILE__, __func__, __LINE__); - return BM_ERR_FAILURE; + "crop_num_vec should be NULL err vpp_basic: %d\n", __LINE__); + return BM_ERR_PARAM; } out_img_num = in_img_num; @@ -2265,8 +2270,8 @@ bm_status_t bm1684x_vpp_basic( } else { if (crop_num_vec == NULL) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, \ - "crop_num_vec should not be NULL err %s: %s: %d\n", __FILE__, __func__, __LINE__); - return BM_ERR_FAILURE; + "crop_num_vec should not be NULL err vpp_basic: %d\n", __LINE__); + return BM_ERR_PARAM; } for (i = 0; i < in_img_num; i++) { @@ -2300,6 +2305,12 @@ bm_status_t bm1684x_vpp_basic( { delete [] input_inner; } +failed: + if(1 == compressed_flag) + { + bm_image_destroy(input_temp); + input_temp.image_private = NULL; + } return ret; } @@ -2325,7 +2336,7 @@ bm_status_t bm1684x_vpp_stitch( if (dst_crop_rect == NULL) { bmlib_log("VPP-STITCH", BMLIB_LOG_ERROR, "dst_crop_rect is nullptr"); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } @@ -2360,8 +2371,10 @@ bm_status_t bm1684x_vpp_draw_rectangle( { int i = 0; bm_status_t ret = BM_SUCCESS; + int draw_num = 0; /*check border param*/ + draw_num = (rect_num + 31) >> 5; border_t* border_param = new border_t [rect_num]; for(i = 0; i < rect_num; i++) { @@ -2383,8 +2396,14 @@ bm_status_t bm1684x_vpp_draw_rectangle( border_image[i] = image; } - ret = bm1684x_vpp_multi_parameter_processing( - handle, rect_num, border_image, border_image, NULL, NULL, BMCV_INTER_LINEAR, CSC_MAX_ENUM, NULL, NULL, border_param,NULL); + for(i = 0; i < draw_num; i++) { + int draw_num_current = 32; + if (i == draw_num - 1) { + draw_num_current = rect_num - 32 * i; + } + ret = bm1684x_vpp_multi_parameter_processing( + handle, draw_num_current, border_image + 32 * i, border_image + 32 * i, NULL, NULL, BMCV_INTER_LINEAR, CSC_MAX_ENUM, NULL, NULL, border_param + 32 * i,NULL); + } delete [] border_image; delete [] border_param; @@ -2435,9 +2454,8 @@ bm_status_t bm1684x_vpp_resize( if(resize_attr[0].stretch_fit!= resize_attr[input_idx].stretch_fit) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "expected consistant input image stretch_fit %s: %s: %d\n", - filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + "expected consistant input image stretch_fit, vpp_resize: %d\n", __LINE__); + return BM_ERR_PARAM; } } @@ -2499,7 +2517,7 @@ bm_status_t bm1684x_vpp_resize( else { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, \ - "resize_attr.stretch_fit must be 0 or 1 %s: %s: %d\n", __FILE__, __func__, __LINE__); + "resize_attr.stretch_fit(%u) must be 0 or 1, vpp_resize: %d\n", resize_attr[0].stretch_fit, __LINE__); } output_idx++; } @@ -2541,8 +2559,8 @@ bm_status_t bm1684x_vpp_storage_convert( (output_[0].data_type == DATA_TYPE_EXT_4N_BYTE || output_[0].data_type == DATA_TYPE_EXT_4N_BYTE_SIGNED)) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "1684x vpp not support 4N mode %s: %s: %d\n", __FILE__, __func__, __LINE__); - ret = BM_NOT_SUPPORTED; + "vpp not support 4N mode, vpp_storage_convert: %d\n", __LINE__); + ret = BM_ERR_DATA; goto done; } @@ -2597,9 +2615,9 @@ bm_status_t bm1684x_vpp_put_text( if(font_rects[i].start_x < 0 || font_rects[i].start_y < 0 || font_rects[i].start_x + font_rects[i].crop_w > image[i].width || \ font_rects[i].start_y + font_rects[i].crop_h > image[i].height){ bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "rects out of range, idx = %d, stx = %d, sty = %d, crop_w = %d, crop_h = %d, %s: %s: %d\n", i, font_rects[i].start_x, font_rects[i].start_y, \ - font_rects[i].crop_w, font_rects[i].crop_h, __FILE__, __func__, __LINE__); - ret = BM_ERR_FAILURE; + "rects out of range, idx = %d, stx = %d, sty = %d, crop_w = %d, crop_h = %d, vpp_put_text: %d\n", i, font_rects[i].start_x, font_rects[i].start_y, \ + font_rects[i].crop_w, font_rects[i].crop_h, __LINE__); + ret = BM_ERR_PARAM; goto fail; } #ifdef USING_CMODEL @@ -2696,7 +2714,7 @@ void bm1684x_vpp_read_bin(bm_image src, const char *input_name) (void*)((char*)input_ptr + image_byte_size[0] + image_byte_size[1] + image_byte_size[2])}; - FILE *fp_src = fopen(input_name, "rb+"); + FILE *fp_src = fopen(input_name, "rb"); if (fread((void *)input_ptr, 1, byte_size, fp_src) < (unsigned int)byte_size){ printf("file size is less than %d required bytes\n", byte_size); @@ -2805,6 +2823,7 @@ bm_status_t bm1684x_vpp_fill_rectangle( unsigned char b) { bm_status_t ret = BM_SUCCESS; + bm_handle_check_2(handle, *input, *output); ret = simple_check_bm1684x_input_param(handle, input, input_num); if(ret != BM_SUCCESS) return ret; @@ -2853,6 +2872,7 @@ bm_status_t bm1684x_vpp_cmodel_csc_resize_convert_to( bmcv_convert_to_attr* convert_to_attr) { bm_status_t ret = BM_SUCCESS; + bm_handle_check_2(handle, *input, *output); ret = bm1684x_vpp_asic_and_cmodel( handle, frame_number, input, output, input_crop_rect, padding_attr, algorithm, csc_type, matrix, convert_to_attr, NULL, NULL, USE_CMODEL); return ret; @@ -2870,6 +2890,7 @@ bm_status_t bm1684x_vpp_cmodel_border( unsigned char b) { bm_status_t ret = BM_SUCCESS; + bm_handle_check_2(handle, *input, *output); border_t* border_param = new border_t [rect_num]; for(int i = 0; i < rect_num; i++){ border_param[i].rect_border_enable = 1; @@ -2925,7 +2946,7 @@ bm_status_t bm1684x_vpp_mosaic_special(bm_handle_t handle, ret = bm_image_alloc_dev_mem(masaic_pad[i]); if(ret != BM_SUCCESS){ bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm_image alloc dev mem fail %s: %s: %d\n", __FILE__, __func__, __LINE__); + "bm_image alloc dev mem fail, vpp_mosaic_special: %d\n", __LINE__); goto fail2; } } @@ -2941,7 +2962,7 @@ bm_status_t bm1684x_vpp_mosaic_special(bm_handle_t handle, ret = bm_image_alloc_dev_mem(masaic_narrow[i]); if(ret != BM_SUCCESS){ bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm_image alloc dev mem fail %s: %s: %d\n", __FILE__, __func__, __LINE__); + "bm_image alloc dev mem fail, vpp_mosaic_special: %d\n", __LINE__); goto fail1; } padding_enlarge[i].dst_crop_stx = mosaic_rect[i].start_x; @@ -3007,7 +3028,7 @@ bm_status_t bm1684x_vpp_mosaic_normal(bm_handle_t handle, ret = bm_image_alloc_dev_mem(masaic_narrow[i]); if(ret != BM_SUCCESS){ bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm_image alloc dev mem fail %s: %s: %d\n", __FILE__, __func__, __LINE__); + "bm_image alloc dev mem fail, vpp_mosaic_normal: %d\n", __LINE__); goto fail; } } @@ -3093,16 +3114,16 @@ bm_status_t bm1684x_vpp_basic_v2( if (crop_rect == NULL) { if (crop_num_vec != NULL) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, \ - "crop_num_vec should be NULL err %s: %s: %d\n", __FILE__, __func__, __LINE__); - return BM_ERR_FAILURE; + "crop_num_vec should be NULL err, vpp_basic_v2: %d\n", __LINE__); + return BM_ERR_PARAM; } out_crop_num = img_num; } else { if (crop_num_vec == NULL) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, \ - "crop_num_vec should not be NULL err %s: %s: %d\n", __FILE__, __func__, __LINE__); - return BM_ERR_FAILURE; + "crop_num_vec should not be NULL err, vpp_basic_v2: %d\n", __LINE__); + return BM_ERR_PARAM; } for (i = 0; i < img_num; i++) { out_crop_num += crop_num_vec[i]; @@ -3111,31 +3132,25 @@ bm_status_t bm1684x_vpp_basic_v2( bmcv_convert_to_attr black_attr; - if(NULL != padding_attr) - { - for (i = 0; i < out_crop_num; i++) - { - padding_attr[i].padding_r = 0; - padding_attr[i].padding_g = 0; - padding_attr[i].padding_b = 0; - padding_attr[i].if_memset = 1; - if ((padding_attr[i].dst_crop_stx > 255) || (padding_attr[i].dst_crop_sty > 255) || - (output[i].width - padding_attr[i].dst_crop_w - padding_attr[i].dst_crop_stx > 255) || - (output[i].height- padding_attr[i].dst_crop_h - padding_attr[i].dst_crop_sty > 255) ) - { - black_attr.alpha_0 = 0; - black_attr.alpha_1 = 0; - black_attr.alpha_2 = 0; - black_attr.beta_0 = padding_attr[i].padding_r; - black_attr.beta_1 = padding_attr[i].padding_g; - black_attr.beta_2 = padding_attr[i].padding_b; - padding_attr[i].if_memset = 0; - ret = bm1684x_vpp_convert_to(handle, 1, black_attr, input, &output[i]); - if(ret != BM_SUCCESS) - { - bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, - "bm1684x_vpp_convert_to error , %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_ERR_PARAM; + if(NULL != padding_attr) { + for (i = 0; i < out_crop_num; i++) { + if(padding_attr[i].if_memset == 1) { + if ((padding_attr[i].dst_crop_stx > 255) || (padding_attr[i].dst_crop_sty > 255) || + (output[i].width - padding_attr[i].dst_crop_w - padding_attr[i].dst_crop_stx > 255) || + (output[i].height- padding_attr[i].dst_crop_h - padding_attr[i].dst_crop_sty > 255) ) { + black_attr.alpha_0 = 0; + black_attr.alpha_1 = 0; + black_attr.alpha_2 = 0; + black_attr.beta_0 = padding_attr[i].padding_r; + black_attr.beta_1 = padding_attr[i].padding_g; + black_attr.beta_2 = padding_attr[i].padding_b; + padding_attr[i].if_memset = 0; + ret = bm1684x_vpp_convert_to(handle, 1, black_attr, input, &output[i]); + if(ret != BM_SUCCESS) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "vpp_convert_to error , vpp_basic_v2: %d\n", __LINE__); + return ret; + } } } } @@ -3171,4 +3186,13 @@ bm_status_t bm1684x_vpp_basic_v2( } delete [] convert_to_attr; return ret; -} \ No newline at end of file +} + +unsigned long long bmcv_calc_cbcr_addr(unsigned long long y_addr, unsigned int y_stride, unsigned int frame_height) +{ + u64 c_addr = 0, y_len=0; + + y_len = y_stride * ALIGN(frame_height,32); + c_addr = y_addr + y_len; + return c_addr; +} diff --git a/bmvid/bmcv/src/bmcv_api_absdiff.cpp b/bmvid/bmcv/src/bmcv_api_absdiff.cpp index 0030555..995ab49 100644 --- a/bmvid/bmcv/src/bmcv_api_absdiff.cpp +++ b/bmvid/bmcv/src/bmcv_api_absdiff.cpp @@ -4,6 +4,7 @@ #include "bmlib_runtime.h" #include #include +#include static bm_status_t bmcv_absdiff_check( bm_handle_t handle, @@ -12,7 +13,7 @@ static bm_status_t bmcv_absdiff_check( bm_image output) { if (handle == NULL) { bmlib_log("ABSDIFF", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DEVNOTREADY; } bm_image_format_ext src1_format = input1.image_format; @@ -30,7 +31,7 @@ static bm_status_t bmcv_absdiff_check( if (src1_format != src2_format && src1_format != dst_format) { bmlib_log("ABSDIFF", BMLIB_LOG_ERROR, "input and output image format is NOT same"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if (src1_format != FORMAT_YUV420P && src1_format != FORMAT_YUV422P && @@ -48,18 +49,18 @@ static bm_status_t bmcv_absdiff_check( src1_format != FORMAT_RGBP_SEPARATE && src1_format != FORMAT_BGRP_SEPARATE) { bmlib_log("ABSDIFF", BMLIB_LOG_ERROR, "Not supported image format"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (src1_type != DATA_TYPE_EXT_1N_BYTE || src2_type != DATA_TYPE_EXT_1N_BYTE || dst_type != DATA_TYPE_EXT_1N_BYTE) { bmlib_log("ABSDIFF", BMLIB_LOG_ERROR, "Not supported image data type"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (src1_h != src2_h || src1_w != src2_w || src1_h != dst_h || src1_w != dst_w) { bmlib_log("ABSDIFF", BMLIB_LOG_ERROR, "inputs and output image size should be same"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } return BM_SUCCESS; } @@ -70,6 +71,7 @@ bm_status_t bmcv_image_absdiff( bm_image input2, bm_image output) { bm_status_t ret = BM_SUCCESS; + bm_handle_check_3(handle, input1, input2, output); ret = bmcv_absdiff_check(handle, input1, input2, output); if (BM_SUCCESS != ret) { return ret; @@ -155,8 +157,9 @@ bm_status_t bmcv_image_absdiff( default: printf("BM_NOT_SUPPORTED!\n"); + ret = BM_ERR_NOFEATURE; break; } - return BM_SUCCESS; + return ret; } \ No newline at end of file diff --git a/bmvid/bmcv/src/bmcv_api_add_weighted.cpp b/bmvid/bmcv/src/bmcv_api_add_weighted.cpp index 8d593ed..02136b2 100755 --- a/bmvid/bmcv/src/bmcv_api_add_weighted.cpp +++ b/bmvid/bmcv/src/bmcv_api_add_weighted.cpp @@ -3,6 +3,7 @@ #include "bmcv_common_bm1684.h" #include #include +#include static bm_status_t bmcv_add_weighted_check( bm_handle_t handle, @@ -11,7 +12,7 @@ static bm_status_t bmcv_add_weighted_check( bm_image output) { if (handle == NULL) { bmlib_log("ADD_WEIGHTED", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DEVNOTREADY; } bm_image_format_ext src1_format = input1.image_format; bm_image_data_format_ext src1_type = input1.data_type; @@ -28,7 +29,7 @@ static bm_status_t bmcv_add_weighted_check( if (src1_format != src2_format && src1_format != dst_format) { bmlib_log("ADD_WEIGHTED", BMLIB_LOG_ERROR, "input and output image format is NOT same"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if (src1_format != FORMAT_YUV420P && src1_format != FORMAT_YUV422P && @@ -46,18 +47,18 @@ static bm_status_t bmcv_add_weighted_check( src1_format != FORMAT_RGBP_SEPARATE && src1_format != FORMAT_BGRP_SEPARATE) { bmlib_log("ADD_WEIGHTED", BMLIB_LOG_ERROR, "Not supported image format"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (src1_type != DATA_TYPE_EXT_1N_BYTE || src2_type != DATA_TYPE_EXT_1N_BYTE || dst_type != DATA_TYPE_EXT_1N_BYTE) { bmlib_log("ADD_WEIGHTED", BMLIB_LOG_ERROR, "Not supported image data type"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (src1_h != src2_h || src1_w != src2_w || src1_h != dst_h || src1_w != dst_w) { bmlib_log("ADD_WEIGHTED", BMLIB_LOG_ERROR, "inputs and output image size should be same"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } return BM_SUCCESS; } @@ -71,6 +72,7 @@ bm_status_t bmcv_image_add_weighted( float gamma, bm_image output) { bm_status_t ret = BM_SUCCESS; + bm_handle_check_3(handle, input1, input2, output); ret = bmcv_add_weighted_check(handle, input1, input2, output); if (BM_SUCCESS != ret) { return ret; @@ -158,8 +160,9 @@ bm_status_t bmcv_image_add_weighted( default: printf("BM_NOT_SUPPORTED!\n"); + ret = BM_ERR_NOFEATURE; break; } - return BM_SUCCESS; + return ret; } diff --git a/bmvid/bmcv/src/bmcv_api_as_strided.cpp b/bmvid/bmcv/src/bmcv_api_as_strided.cpp index 148a81a..5800972 100644 --- a/bmvid/bmcv/src/bmcv_api_as_strided.cpp +++ b/bmvid/bmcv/src/bmcv_api_as_strided.cpp @@ -15,7 +15,7 @@ bm_status_t bmcv_as_strided( bm_status_t ret = BM_SUCCESS; if (handle == NULL){ bmlib_log("AS_STRIDED", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } bm_api_cv_as_strided_t api; @@ -37,7 +37,7 @@ bm_status_t bmcv_as_strided( { case 0x1684: bmlib_log("AS_STRIDED", BMLIB_LOG_ERROR, "1684 not support!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_NOFEATURE; case BM1684X: ret = bm_tpu_kernel_launch(handle, "cv_as_strided", (u8 *)&api, sizeof(api)); @@ -47,7 +47,7 @@ bm_status_t bmcv_as_strided( break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } diff --git a/bmvid/bmcv/src/bmcv_api_axpy.cpp b/bmvid/bmcv/src/bmcv_api_axpy.cpp index 6136b5f..2437094 100755 --- a/bmvid/bmcv/src/bmcv_api_axpy.cpp +++ b/bmvid/bmcv/src/bmcv_api_axpy.cpp @@ -132,7 +132,7 @@ bm_status_t bmcv_image_axpy( break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } diff --git a/bmvid/bmcv/src/bmcv_api_base64.cpp b/bmvid/bmcv/src/bmcv_api_base64.cpp index 818e0d1..3c4514f 100644 --- a/bmvid/bmcv/src/bmcv_api_base64.cpp +++ b/bmvid/bmcv/src/bmcv_api_base64.cpp @@ -83,14 +83,14 @@ bm_status_t bmcv_base64_dec(bm_handle_t handle, bm_device_mem_t src, if (bm_mem_get_type(src) == BM_MEM_TYPE_DEVICE) { src_device = src; - src_device.u.device.device_addr + src_device.u.device.device_addr = src.u.device.device_addr + len[0] - 2; src_device.size = 2; if (BM_SUCCESS !=bm_memcpy_d2s(handle, (void *)check_buf, src_device)) { BMCV_ERR_LOG("bm_memcpy_d2s when check len error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } for (i = 0; i < 2; i++) { if(check_buf[i] == '=') @@ -105,7 +105,7 @@ bm_status_t bmcv_base64_codec(bm_handle_t handle, bm_device_mem_t src, if (handle == NULL) { bmlib_log("BASE64", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } struct ce_base base; int fd; @@ -130,12 +130,12 @@ bm_status_t bmcv_base64_codec(bm_handle_t handle, bm_device_mem_t src, ret = 0; if (fd < 0) { perror("open"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } if (len > MAX_LEN) { printf("base64 lenth should be less than 128M!\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } if (bm_mem_get_type(src) == BM_MEM_TYPE_SYSTEM) @@ -145,9 +145,9 @@ bm_status_t bmcv_base64_codec(bm_handle_t handle, bm_device_mem_t src, if ((bm_mem_get_type(src) == BM_MEM_TYPE_DEVICE) && len > MAX_LOOP_LEN) { printf("len of device_mem should be less than 3M!\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } - + while (len > 0) { if (len > MAX_LOOP_LEN) { loop_len = MAX_LOOP_LEN; @@ -163,7 +163,7 @@ bm_status_t bmcv_base64_codec(bm_handle_t handle, bm_device_mem_t src, loop_len)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } if(BM_SUCCESS != bm_memcpy_s2d(handle, @@ -173,7 +173,7 @@ bm_status_t bmcv_base64_codec(bm_handle_t handle, bm_device_mem_t src, if(bm_mem_get_type(src) == BM_MEM_TYPE_SYSTEM){ bm_free_device(handle, src_buf_device); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } @@ -188,7 +188,7 @@ bm_status_t bmcv_base64_codec(bm_handle_t handle, bm_device_mem_t src, if(bm_mem_get_type(src) == BM_MEM_TYPE_SYSTEM){ bm_free_device(handle, src_buf_device); } - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } /* system to device for destination device ? */ @@ -207,7 +207,7 @@ bm_status_t bmcv_base64_codec(bm_handle_t handle, bm_device_mem_t src, }*/ } else { dst_buf_device = dst; - } + } #ifndef SOC_MODE base.src = (unsigned long long)bm_mem_get_device_addr(src_buf_device); @@ -232,7 +232,7 @@ bm_status_t bmcv_base64_codec(bm_handle_t handle, bm_device_mem_t src, #endif if (ret < 0) { printf("ioctl failed!\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } if (bm_mem_get_type(dst) == BM_MEM_TYPE_SYSTEM) { @@ -247,7 +247,7 @@ bm_status_t bmcv_base64_codec(bm_handle_t handle, bm_device_mem_t src, if (bm_mem_get_type(dst) == BM_MEM_TYPE_SYSTEM) { bm_free_device(handle, dst_buf_device); } - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } bm_free_device(handle, dst_buf_device); } diff --git a/bmvid/bmcv/src/bmcv_api_batch_topk.cpp b/bmvid/bmcv/src/bmcv_api_batch_topk.cpp index 22839a6..4bc9b24 100644 --- a/bmvid/bmcv/src/bmcv_api_batch_topk.cpp +++ b/bmvid/bmcv/src/bmcv_api_batch_topk.cpp @@ -171,7 +171,7 @@ bm_status_t bmcv_batch_topk( break; } default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } @@ -198,7 +198,7 @@ bm_status_t bmcv_batch_topk( bm_free_device(handle, *mem); } - return (ret == BM_SUCCESS) ? BM_SUCCESS : BM_ERR_FAILURE; + return ret; } diff --git a/bmvid/bmcv/src/bmcv_api_bayer2rgb.cpp b/bmvid/bmcv/src/bmcv_api_bayer2rgb.cpp index 7de38fe..3afd191 100644 --- a/bmvid/bmcv/src/bmcv_api_bayer2rgb.cpp +++ b/bmvid/bmcv/src/bmcv_api_bayer2rgb.cpp @@ -4,6 +4,7 @@ #include "bmcv_common_bm1684.h" #include "bmlib_runtime.h" #include +#include #define KERNEL_SIZE 3 * 3 * 3 * 4 * 64 @@ -14,38 +15,42 @@ static bm_status_t bmcv_bayer2rgb_check( bm_image output) { if (handle == NULL) { BMCV_ERR_LOG("bayer2rgb can not get handle!\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } bm_image_format_ext src_format = input.image_format; bm_image_format_ext dst_format = output.image_format; - bm_image_data_format_ext src_type = input.data_type; + bm_image_data_format_ext src_data_type = input.data_type; bm_image_data_format_ext dst_type = output.data_type; if (convd_kernel == nullptr) { BMCV_ERR_LOG("The convd_kernel is nullptr !\n"); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } - if (src_format != FORMAT_BAYER || dst_format != FORMAT_RGB_PLANAR) { - BMCV_ERR_LOG("src_img or dst_img format not support\n"); - return BM_NOT_SUPPORTED; + if (src_format != FORMAT_BAYER && src_format != FORMAT_BAYER_RG8) { + BMCV_ERR_LOG("src_img format not supported !\n"); + return BM_ERR_DATA; } - if (src_type != DATA_TYPE_EXT_1N_BYTE || dst_type != DATA_TYPE_EXT_1N_BYTE) { + if (dst_format != FORMAT_RGB_PLANAR) { + BMCV_ERR_LOG("dst_img format not supported !\n"); + return BM_ERR_DATA; + } + + if (src_data_type != DATA_TYPE_EXT_1N_BYTE || dst_type != DATA_TYPE_EXT_1N_BYTE) { BMCV_ERR_LOG("src_type or dst_type not support\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if ((input.height != output.height) || (input.width != output.width)) { BMCV_ERR_LOG("The width and height of the input and output should be same !\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } if ((input.height % 2 != 0) || (input.width % 2 != 0)) { BMCV_ERR_LOG("The width and height of the image need to be divisible by 2 !\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } - return BM_SUCCESS; } @@ -55,9 +60,11 @@ bm_status_t bmcv_image_bayer2rgb( bm_image input, bm_image output) { bm_status_t ret = BM_SUCCESS; - if(BM_SUCCESS !=bmcv_bayer2rgb_check(handle, convd_kernel, input, output)) { + bm_handle_check_2(handle, input, output); + ret = bmcv_bayer2rgb_check(handle, convd_kernel, input, output); + if(BM_SUCCESS != ret) { BMCV_ERR_LOG("bayer2rgb_check error\r\n"); - return BM_ERR_FAILURE; + return ret; } if (!bm_image_is_attached(output)) { @@ -78,11 +85,17 @@ bm_status_t bmcv_image_bayer2rgb( sg_device_mem_st sys_addr_br; sg_device_mem_st convd_kernel_data; sg_device_mem_st sys_addr_temp_g; + sg_device_mem_st sys_addr_temp_b; bm_api_cv_bayer2rgb_t param; param.height = input.height; param.width = input.width; - param.dst_fmt = output.image_format; + param.dst_fmt = output.image_format; + if(input.image_format == FORMAT_BAYER) { + param.src_type = 0; + } else { + param.src_type = 1; + } param.input_addr = bm_mem_get_device_addr(input_mem); param.output_addr = bm_mem_get_device_addr(output_mem); @@ -102,7 +115,15 @@ bm_status_t bmcv_image_bayer2rgb( goto FREEMEM; } + ret = sg_malloc_device_mem(handle, &sys_addr_temp_b, input.width * input.height * 4); + if (BM_SUCCESS != ret) { + bm_image_destroy(output); + goto FREEMEM; + } + + param.sys_mem_addr_temp_g = bm_mem_get_device_addr(sys_addr_temp_g.bm_device_mem); + param.sys_mem_addr_temp_b = bm_mem_get_device_addr(sys_addr_temp_b.bm_device_mem); param.sys_mem_addr_temp_ul = bm_mem_get_device_addr(sys_addr_ul.bm_device_mem); param.sys_mem_addr_temp_br = bm_mem_get_device_addr(sys_addr_br.bm_device_mem); @@ -134,7 +155,7 @@ bm_status_t bmcv_image_bayer2rgb( { case 0x1684: BMCV_ERR_LOG("bm1684 not support\n"); - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; bm_image_destroy(output); goto FREEMEM; break; @@ -150,6 +171,7 @@ bm_status_t bmcv_image_bayer2rgb( default: BMCV_ERR_LOG("BM_NOT_SUPPORTED!\n"); + ret = BM_ERR_NOFEATURE; break; } @@ -158,8 +180,9 @@ bm_status_t bmcv_image_bayer2rgb( sg_free_device_mem(handle, input_dev_buffer_padding_ul); sg_free_device_mem(handle, input_dev_buffer_padding_br); sg_free_device_mem(handle, sys_addr_temp_g); + sg_free_device_mem(handle, sys_addr_temp_b); sg_free_device_mem(handle, sys_addr_ul); sg_free_device_mem(handle, sys_addr_br); return ret; -} \ No newline at end of file +} diff --git a/bmvid/bmcv/src/bmcv_api_bitwise.cpp b/bmvid/bmcv/src/bmcv_api_bitwise.cpp index ec992be..556f86f 100644 --- a/bmvid/bmcv/src/bmcv_api_bitwise.cpp +++ b/bmvid/bmcv/src/bmcv_api_bitwise.cpp @@ -4,6 +4,7 @@ #include "bmcv_common_bm1684.h" #include #include +#include typedef enum bmcv_bitwise_op { AND = 7, @@ -174,6 +175,7 @@ bm_status_t bmcv_image_bitwise_and( bm_image input1, bm_image input2, bm_image output) { + bm_handle_check_3(handle, input1, input2, output); return bmcv_image_bitwise(handle, input1, input2, output, AND); } @@ -182,6 +184,7 @@ bm_status_t bmcv_image_bitwise_or( bm_image input1, bm_image input2, bm_image output) { + bm_handle_check_3(handle, input1, input2, output); return bmcv_image_bitwise(handle, input1, input2, output, OR); } @@ -190,5 +193,6 @@ bm_status_t bmcv_image_bitwise_xor( bm_image input1, bm_image input2, bm_image output) { + bm_handle_check_3(handle, input1, input2, output); return bmcv_image_bitwise(handle, input1, input2, output, XOR); } diff --git a/bmvid/bmcv/src/bmcv_api_canny.cpp b/bmvid/bmcv/src/bmcv_api_canny.cpp index 70e2242..76486ae 100644 --- a/bmvid/bmcv/src/bmcv_api_canny.cpp +++ b/bmvid/bmcv/src/bmcv_api_canny.cpp @@ -1,8 +1,10 @@ #include "bmcv_api_ext.h" #include "bmcv_common_bm1684.h" +#include "bmcv_internal.h" #include #include #include +#include #define IS_YUV(a) (a == FORMAT_NV12 || a == FORMAT_NV21 || a == FORMAT_NV16 || \ @@ -88,7 +90,7 @@ static bm_status_t bmcv_canny_check( int aperture_size) { if (handle == NULL) { bmlib_log("CANNY", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DEVNOTREADY; } if (aperture_size != 3) { bmlib_log("CANNY", BMLIB_LOG_ERROR, "Only support the aperture size is 3!\n" ); @@ -106,29 +108,29 @@ static bm_status_t bmcv_canny_check( bm_image_get_stride(output, stride_o); if (image_sw + aperture_size - 1 >= 2048) { bmlib_log("CANNY", BMLIB_LOG_ERROR, "image width is too large!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (!IS_YUV(src_format) && src_format != FORMAT_GRAY) { bmlib_log("CANNY", BMLIB_LOG_ERROR, "Not supported input image format\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (dst_format != FORMAT_GRAY) { bmlib_log("CANNY", BMLIB_LOG_ERROR, "Not supported output image format\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (src_type != DATA_TYPE_EXT_1N_BYTE || dst_type != DATA_TYPE_EXT_1N_BYTE) { bmlib_log("CANNY", BMLIB_LOG_ERROR, "Not supported image data type\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (image_sh != image_dh || image_sw != image_dw) { bmlib_log("CANNY", BMLIB_LOG_ERROR, "input and output image size should be same\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (output.width != stride_o[0]) { bmlib_log("CANNY", BMLIB_LOG_ERROR, "output image stride should be equal to width\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } return BM_SUCCESS; } @@ -310,7 +312,7 @@ bm_status_t bmcv_image_canny( unsigned int chipid = 0x1686; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_2(handle, input, output); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -323,12 +325,12 @@ bm_status_t bmcv_image_canny( break; case 0x1686: - printf("bm1684x not support\n"); - ret = BM_NOT_SUPPORTED; + printf("current card not support\n"); + ret = BM_ERR_NOFEATURE; break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } diff --git a/bmvid/bmcv/src/bmcv_api_convert_to.cpp b/bmvid/bmcv/src/bmcv_api_convert_to.cpp index cec2d15..d326886 100644 --- a/bmvid/bmcv/src/bmcv_api_convert_to.cpp +++ b/bmvid/bmcv/src/bmcv_api_convert_to.cpp @@ -138,7 +138,7 @@ bm_status_t bmcv_convert_to_internal(bm_handle_t handle, if (BM_SUCCESS != bm_image_tensor_alloc_dev_mem(output, BMCV_HEAP0_ID)) { - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } bm_image_tensor_get_device_mem(output, &output_img_addr); @@ -178,26 +178,28 @@ bm_status_t bmcv_convert_to_internal(bm_handle_t handle, switch (chipid) { case 0x1684:{ + if (BM_SUCCESS != bm_send_api(handle, BM_API_ID_CV_CONVERT_TO, (uint8_t *)&arg, sizeof(arg))) { BMCV_ERR_LOG("convert_to send api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } if(BM_SUCCESS != bm_sync_api(handle)){ BMCV_ERR_LOG("convert_to sync api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } break; } case BM1684X:{ if(BM_SUCCESS != bm_tpu_kernel_launch(handle, "cv_convert_to", &arg, sizeof(arg))){ BMCV_ERR_LOG("convert_to sync api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } // tpu_kernel_launch_sync(handle, "cv_convert_to", &arg, sizeof(arg)); break; } default: printf("ChipID is NOT supported\n"); + return BM_ERR_NOFEATURE; break; } @@ -213,6 +215,7 @@ bm_status_t bmcv_convert_to_intergrated(bm_handle_t handle, bm_device_mem_t output_img_addr_2, bm_device_mem_t convert_to_attr_addr, int times) { + bm_status_t ret = BM_SUCCESS; bm_api_cv_convert_to_inter_t arg; bm_device_mem_t input_img_buf_device[MAX_INTERGRATED_NUM]; bm_device_mem_t output_img_buf_device[MAX_INTERGRATED_NUM]; @@ -238,7 +241,7 @@ bm_status_t bmcv_convert_to_intergrated(bm_handle_t handle, &convert_to_attr_buf_device, sizeof(bmcv_convert_to_attr_t) * times)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err0; } if (BM_SUCCESS != @@ -246,7 +249,7 @@ bm_status_t bmcv_convert_to_intergrated(bm_handle_t handle, convert_to_attr_buf_device, bm_mem_get_system_addr(convert_to_attr_addr))) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); - + ret = BM_ERR_NOMEM; goto err1; } } else { @@ -255,6 +258,7 @@ bm_status_t bmcv_convert_to_intergrated(bm_handle_t handle, "convert_to_attr must be sys memory:%s:%d\n", filename(__FILE__), __LINE__); + ret = BM_ERR_DATA; goto err0; } for (int idx = 0; idx < times; idx++) { @@ -294,7 +298,7 @@ bm_status_t bmcv_convert_to_intergrated(bm_handle_t handle, bm_free_device(handle, output_img_addr[free_idx]); } } - + ret = BM_ERR_NOMEM; goto err1; } if (BM_SUCCESS != @@ -314,7 +318,7 @@ bm_status_t bmcv_convert_to_intergrated(bm_handle_t handle, bm_free_device(handle, output_img_addr[free_idx]); } } - + ret = BM_ERR_NOMEM; goto err1; } } else { @@ -337,7 +341,7 @@ bm_status_t bmcv_convert_to_intergrated(bm_handle_t handle, bm_free_device(handle, output_img_addr[free_idx]); } } - + ret = BM_ERR_NOMEM; goto err1; } if (BM_SUCCESS != @@ -355,7 +359,7 @@ bm_status_t bmcv_convert_to_intergrated(bm_handle_t handle, bm_free_device(handle, output_img_addr[free_idx]); } } - + ret = BM_ERR_NOMEM; goto err1; } } else { @@ -373,10 +377,12 @@ bm_status_t bmcv_convert_to_intergrated(bm_handle_t handle, arg.times = times; if (BM_SUCCESS != bm_send_api(handle, BM_API_ID_CV_CONVERT_TO_INTERGRATED, (uint8_t *)&arg, sizeof(arg))) { BMCV_ERR_LOG("convert_to_intergrated send api error\r\n"); + ret = BM_ERR_TIMEOUT; goto err1; } if (BM_SUCCESS != bm_sync_api(handle)) { BMCV_ERR_LOG("convert_to_intergrated sync api error\r\n"); + ret = BM_ERR_TIMEOUT; goto err1; } for (int idx = 0; idx < times; idx++) { @@ -396,6 +402,7 @@ bm_status_t bmcv_convert_to_intergrated(bm_handle_t handle, bm_free_device(handle, input_img_buf_device[free_idx]); } } + ret = BM_ERR_NOMEM; goto err1; } bm_free_device(handle, output_img_buf_device[idx]); @@ -408,14 +415,14 @@ bm_status_t bmcv_convert_to_intergrated(bm_handle_t handle, bm_free_device(handle, convert_to_attr_buf_device); } - return BM_SUCCESS; + return ret; err1: if (bm_mem_get_type(convert_to_attr_addr) == BM_MEM_TYPE_SYSTEM) { bm_free_device(handle, convert_to_attr_buf_device); } err0: - return BM_ERR_FAILURE; + return ret; } static bm_status_t bm_convert_to_get_stride(bm_image input, int &w_stride) { @@ -446,7 +453,7 @@ static bm_status_t bmcv_convert_to_check(bm_handle_t handle, UNUSED(convert_to_attr); if (handle == NULL) { bmlib_log("CONVERT TO", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } if (image_num == 0) { // bmlib_log(BMCV_LOG_TAG, @@ -455,21 +462,21 @@ static bm_status_t bmcv_convert_to_check(bm_handle_t handle, // image_num); BMCV_ERR_LOG("input image num not support:%d\n", image_num); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } for (int i = 0; i < image_num; i++) { if ((input[i].width != output[i].width) || (input[i].height != output[i].height)) { BMCV_ERR_LOG("input size must be same to output\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } int width_stride = 0; bm_convert_to_get_stride(output[i], width_stride); if (output[i].width != width_stride) { BMCV_ERR_LOG("output width must be equal to stride\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } // this limit maybe too strict // if (0 != memcmp(handle, @@ -496,7 +503,7 @@ static bm_status_t bmcv_convert_to_check(bm_handle_t handle, (output[i].data_type != output[i + 1].data_type)) { BMCV_ERR_LOG("input attr must be same to output\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } } if (!(((input[0].data_type == DATA_TYPE_EXT_1N_BYTE) && @@ -517,7 +524,7 @@ static bm_status_t bmcv_convert_to_check(bm_handle_t handle, (output[0].data_type == DATA_TYPE_EXT_4N_BYTE)))) { BMCV_ERR_LOG("data type not support\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if ((input[0].data_type == DATA_TYPE_EXT_FP16) || (output[0].data_type == DATA_TYPE_EXT_FP16)|| @@ -525,7 +532,7 @@ static bm_status_t bmcv_convert_to_check(bm_handle_t handle, (output[0].data_type == DATA_TYPE_EXT_BF16)){ BMCV_ERR_LOG("data type not support\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (((input[0].image_format != FORMAT_BGR_PLANAR) && @@ -534,7 +541,7 @@ static bm_status_t bmcv_convert_to_check(bm_handle_t handle, (input[0].image_format != output[0].image_format)) { BMCV_ERR_LOG("image format not support\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } return BM_SUCCESS; @@ -545,11 +552,12 @@ bm_status_t bmcv_image_convert_to_(bm_handle_t handle, bmcv_convert_to_attr convert_to_attr, bm_image * input, bm_image * output) { - if (BM_SUCCESS != bmcv_convert_to_check( - handle, input_num, convert_to_attr, input, output)) { + bm_status_t ret = bmcv_convert_to_check( + handle, input_num, convert_to_attr, input, output); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("bm_memcpy_d2s error\r\n"); - return BM_ERR_FAILURE; + return ret; } int in_concat_status = 0; int out_concat_status = 0; @@ -573,7 +581,7 @@ bm_status_t bmcv_image_convert_to_(bm_handle_t handle, bm_free_device(handle, dmem); } - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } } @@ -624,7 +632,7 @@ bm_status_t bmcv_image_convert_to( unsigned int chipid = BM1684X; int loop = (input_num + 3) / 4; int i = 0; - + bm_handle_check_2(handle, input[0], output[0]); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -644,7 +652,7 @@ bm_status_t bmcv_image_convert_to( bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "not support, %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_PARAM; } else if(input->data_type == DATA_TYPE_EXT_FLOAT32 || input->data_type == DATA_TYPE_EXT_1N_BYTE_SIGNED){ for (i = 0; i < loop; i++) { int num = (i == loop - 1) ? (input_num - (loop - 1) * 4) : 4; @@ -657,7 +665,7 @@ bm_status_t bmcv_image_convert_to( break; } default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } diff --git a/bmvid/bmcv/src/bmcv_api_copy_to.cpp b/bmvid/bmcv/src/bmcv_api_copy_to.cpp index 0b72290..19bb357 100644 --- a/bmvid/bmcv/src/bmcv_api_copy_to.cpp +++ b/bmvid/bmcv/src/bmcv_api_copy_to.cpp @@ -36,7 +36,7 @@ bm_status_t bmcv_copy_to_check(bmcv_copy_to_atrr_t copy_to_attr, "[CopyTo] input data_type and image_format must be same to " "output!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } // image format check if ((input.image_format != FORMAT_RGB_PLANAR) && @@ -46,7 +46,7 @@ bm_status_t bmcv_copy_to_check(bmcv_copy_to_atrr_t copy_to_attr, (input.image_format != FORMAT_GRAY)) { BMCV_ERR_LOG("[CopyTo] image format not support\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (((input.image_format == FORMAT_RGB_PACKED) || (input.image_format == FORMAT_BGR_PACKED)) && @@ -54,7 +54,7 @@ bm_status_t bmcv_copy_to_check(bmcv_copy_to_atrr_t copy_to_attr, (input.data_type == DATA_TYPE_EXT_4N_BYTE_SIGNED))) { BMCV_ERR_LOG("[CopyTo] 4n image should match planner\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if ((input.data_type == DATA_TYPE_EXT_FP16) || (output.data_type == DATA_TYPE_EXT_FP16)|| @@ -62,7 +62,7 @@ bm_status_t bmcv_copy_to_check(bmcv_copy_to_atrr_t copy_to_attr, (output.data_type == DATA_TYPE_EXT_BF16)){ BMCV_ERR_LOG("data type not support\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } // shape check @@ -70,14 +70,14 @@ bm_status_t bmcv_copy_to_check(bmcv_copy_to_atrr_t copy_to_attr, BMCV_ERR_LOG( "[CopyTo] input.with should be less than or equal to output's\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (input.height > output.height) { BMCV_ERR_LOG( "[CopyTo] input.height should be less than or equal to " "output's\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } int in_image_stride[3] = {0}; bm_image_get_stride(input, in_image_stride); @@ -88,13 +88,13 @@ bm_status_t bmcv_copy_to_check(bmcv_copy_to_atrr_t copy_to_attr, out_image_stride[0]) { BMCV_ERR_LOG("[CopyTo] width exceeds range\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } // compare by elems if ((copy_to_attr.start_y + input.height) > output.height) { BMCV_ERR_LOG("[CopyTo] height exceeds range\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } return BM_SUCCESS; @@ -104,10 +104,11 @@ bm_status_t bmcv_image_copy_to_(bm_handle_t handle, bmcv_copy_to_atrr_t copy_to_attr, bm_image input, bm_image output) { + bm_status_t ret = BM_SUCCESS; if (handle == NULL) { BMCV_ERR_LOG("[CopyTo] Can not get handle!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } int data_size = 1; int data_type = STORAGE_MODE_1N_INT8; @@ -115,7 +116,6 @@ bm_status_t bmcv_image_copy_to_(bm_handle_t handle, int channel = 3; int planner_or_packed = PLANNER; unsigned int chipid = BM1684X; - bm_status_t ret = BM_SUCCESS; ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret){ @@ -183,17 +183,17 @@ bm_status_t bmcv_image_copy_to_(bm_handle_t handle, } int elem_byte_stride = (planner_or_packed == PACKED) ? (data_size * 3) : (data_size); - if (BM_SUCCESS != - bmcv_copy_to_check(copy_to_attr, input, output, elem_byte_stride)) { + ret = bmcv_copy_to_check(copy_to_attr, input, output, elem_byte_stride); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("[CopyTo] bmcv_copy_to_check error!\r\n"); - return BM_ERR_FAILURE; + return ret; } if (!bm_image_is_attached(output)) { if (BM_SUCCESS != bm_image_alloc_dev_mem(output, BMCV_HEAP_ANY)) { BMCV_ERR_LOG("[CopyTo] bm_image_alloc_dev_mem error!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } bm_device_mem_t in_dev_mem, out_dev_mem; @@ -232,11 +232,11 @@ bm_status_t bmcv_image_copy_to_(bm_handle_t handle, case 0x1684:{ if (BM_SUCCESS != bm_send_api(handle, BM_API_ID_CV_COPY_TO, (uint8_t *)&arg, sizeof(arg))) { BMCV_ERR_LOG("copy_to send api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } if (BM_SUCCESS != bm_sync_api(handle)) { BMCV_ERR_LOG("copy_to sync api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } break; } @@ -245,12 +245,12 @@ bm_status_t bmcv_image_copy_to_(bm_handle_t handle, // tpu_kernel_launch_sync(handle, "sg_cv_copy_to", &arg, sizeof(arg)); if(BM_SUCCESS != bm_tpu_kernel_launch(handle, "sg_cv_copy_to", &arg, sizeof(arg))){ BMCV_ERR_LOG("copy_to launch api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } break; default: - return BM_NOT_SUPPORTED; + return BM_ERR_NOFEATURE; break; } return BM_SUCCESS; @@ -264,7 +264,7 @@ bm_status_t bmcv_image_copy_to( { unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_2(handle, input, output); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -286,13 +286,13 @@ bm_status_t bmcv_image_copy_to( bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "not support, %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_PARAM; } break; } default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } return ret; diff --git a/bmvid/bmcv/src/bmcv_api_crop.cpp b/bmvid/bmcv/src/bmcv_api_crop.cpp index a5245a0..be47fc6 100644 --- a/bmvid/bmcv/src/bmcv_api_crop.cpp +++ b/bmvid/bmcv/src/bmcv_api_crop.cpp @@ -1,4 +1,5 @@ #include +#include #include "bmcv_api.h" #include "bmcv_api_ext.h" #include "bmcv_internal.h" @@ -35,7 +36,7 @@ INLINE static bm_image_format_ext bmcv_get_image_format_from_sc3(bm_image_format case RGB: return FORMAT_RGB_PLANAR; break; - case BGR: + case BGR: return FORMAT_BGR_PLANAR; break; case RGB_PACKED: @@ -60,23 +61,23 @@ bm_status_t bmcv_crop_check(int crop_num, BMCV_ERR_LOG( "[Crop] input data_type and image_format must be same to " "output!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (rects[i].start_x < 0 || rects[i].start_x > input.width || rects[i].start_y < 0 || rects[i].start_y > input.height) { BMCV_ERR_LOG("[Crop] %dth rect coordinate is illegal\r\n", i); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } if (rects[i].start_x + rects[i].crop_w > input.width || rects[i].start_y + rects[i].crop_h > input.height) { BMCV_ERR_LOG("[Crop] %dth crop box is out of input range\r\n", i); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } if (rects[i].crop_w != output[i].width || rects[i].crop_h != output[i].height) { BMCV_ERR_LOG( "[Crop] %dth output size should equal to crop size\r\n", i); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } } // image format check @@ -86,14 +87,14 @@ bm_status_t bmcv_crop_check(int crop_num, (input.image_format != FORMAT_BGR_PACKED) && (input.image_format != FORMAT_GRAY)) { BMCV_ERR_LOG("[Crop] image format only support RGB/BGR/GRAY\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if ((input.data_type != DATA_TYPE_EXT_1N_BYTE_SIGNED) && (input.data_type != DATA_TYPE_EXT_1N_BYTE) && (input.data_type != DATA_TYPE_EXT_FLOAT32)) { BMCV_ERR_LOG( "[Crop] image data type only support 1N int8 and float32\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } return BM_SUCCESS; @@ -122,7 +123,7 @@ static bm_status_t bmcv_image_crop(bm_handle_t handle, break; default: BMCV_ERR_LOG("[Crop] not support this format!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } switch (input.image_format) { case FORMAT_BGR_PLANAR: @@ -175,13 +176,13 @@ static bm_status_t bmcv_image_crop(bm_handle_t handle, } default: BMCV_ERR_LOG("[Crop] not support this format!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } if (!bm_image_is_attached(output)) { if (BM_SUCCESS != bm_image_alloc_dev_mem(output, BMCV_HEAP_ANY)) { BMCV_ERR_LOG("[Crop] bm_image_alloc_dev_mem error!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } bm_device_mem_t in_dev_mem, out_dev_mem; @@ -212,11 +213,11 @@ static bm_status_t bmcv_image_crop(bm_handle_t handle, bm_send_api( handle, BM_API_ID_MEMCPY_TENSOR, (uint8_t*)&arg, sizeof(arg))) { BMCV_ERR_LOG("crop send api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } if (BM_SUCCESS != bm_sync_api(handle)) { BMCV_ERR_LOG("crop sync api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } return BM_SUCCESS; @@ -229,7 +230,7 @@ bm_status_t bm1684_bmcv_image_crop(bm_handle_t handle, bm_image* output) { if (handle == NULL) { BMCV_ERR_LOG("[Crop] Can not get handle!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } bm_status_t ret = BM_SUCCESS; ret = bmcv_crop_check(crop_num, rects, input, output); @@ -265,7 +266,7 @@ bm_status_t bm1684_bmcv_image_crop(bm_handle_t handle, } delete[] output_alloc_flag; delete[] copy_info; - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } output_alloc_flag[i] = true; } @@ -292,7 +293,7 @@ bm_status_t bm1684_bmcv_image_crop(bm_handle_t handle, break; default: BMCV_ERR_LOG("[Crop] not support this format!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } switch (input.image_format) { case FORMAT_BGR_PLANAR: @@ -353,7 +354,7 @@ bm_status_t bm1684_bmcv_image_crop(bm_handle_t handle, } default: BMCV_ERR_LOG("[Crop] not support this format!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } bm_device_mem_t dev_mem; bm_image_get_device_mem(output[i], &dev_mem); @@ -377,7 +378,7 @@ bm_status_t bm1684_bmcv_image_crop(bm_handle_t handle, } delete[] output_alloc_flag; delete[] copy_info; - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } if (BM_SUCCESS != bm_memcpy_s2d(handle, info_mem, copy_info)) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); @@ -391,7 +392,7 @@ bm_status_t bm1684_bmcv_image_crop(bm_handle_t handle, bm_free_device(handle, info_mem); delete[] output_alloc_flag; delete[] copy_info; - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } bm_api_memcpy_tensors_t arg; @@ -404,14 +405,14 @@ bm_status_t bm1684_bmcv_image_crop(bm_handle_t handle, delete[] output_alloc_flag; delete[] copy_info; bm_free_device(handle, info_mem); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } if (BM_SUCCESS != bm_sync_api(handle)) { BMCV_ERR_LOG("crop sync api error\r\n"); delete[] output_alloc_flag; delete[] copy_info; bm_free_device(handle, info_mem); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } delete[] output_alloc_flag; delete[] copy_info; @@ -429,7 +430,7 @@ bm_status_t bmcv_image_crop( { unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_2(handle, input, output[0]); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -447,7 +448,7 @@ bm_status_t bmcv_image_crop( break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } @@ -471,24 +472,24 @@ bm_status_t bmcv_img_crop(bm_handle_t handle, if (handle == NULL) { bmlib_log("CROP", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } if (channels != 1 && channels != 3) { bmlib_log("CROP", BMLIB_LOG_ERROR, "channels should be 1 or 3!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } if (input.color_space != COLOR_RGB || output.color_space != COLOR_RGB) { bmlib_log("CROP", BMLIB_LOG_ERROR, "color_space of input and output bmcv_image should be " "COLOR_RGB!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (input.image_format != BGR && input.image_format != RGB) { bmlib_log("CROP", BMLIB_LOG_ERROR, "image_format of input bmcv_image should be RGB or BGR!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (input.data_format != DATA_TYPE_FLOAT && input.data_format != DATA_TYPE_BYTE) { @@ -496,7 +497,7 @@ bm_status_t bmcv_img_crop(bm_handle_t handle, BMLIB_LOG_ERROR, "data_format of input bmcv_image should be DATA_TYPE_FLOAT " "or DATA_TYPE_BYTE!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (output.data_format != DATA_TYPE_FLOAT && output.data_format != DATA_TYPE_BYTE) { @@ -504,13 +505,13 @@ bm_status_t bmcv_img_crop(bm_handle_t handle, BMLIB_LOG_ERROR, "data_format of output bmcv_image should be DATA_TYPE_FLOAT " "or DATA_TYPE_BYTE!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (output.stride[0] < output.image_width) { bmlib_log("CROP", BMLIB_LOG_ERROR, "stride of output should be greater than width!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } int input_n = 1; diff --git a/bmvid/bmcv/src/bmcv_api_csc_convert_to.cpp b/bmvid/bmcv/src/bmcv_api_csc_convert_to.cpp index 380b423..5fd7e4d 100644 --- a/bmvid/bmcv/src/bmcv_api_csc_convert_to.cpp +++ b/bmvid/bmcv/src/bmcv_api_csc_convert_to.cpp @@ -20,25 +20,25 @@ static bm_status_t bm1684_image_csc_convert_to_check( if (handle == NULL) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "handle is nullptr"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } if (input == NULL || output == NULL) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "input or output is nullptr"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } for(int i = 0; i < img_num; i++){ if(input[i].data_type != DATA_TYPE_EXT_1N_BYTE) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_WARNING, "vpp only support DATA_TYPE_EXT_1N_BYTE %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if(input[i].image_format != input[0].image_format) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_WARNING, "input image list format must be the same %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } } for(int i = 0; i < crop_num; i++){ @@ -46,30 +46,30 @@ static bm_status_t bm1684_image_csc_convert_to_check( { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_WARNING, "vpp only support DATA_TYPE_EXT_1N_BYTE %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if(output[i].image_format < FORMAT_RGB_PLANAR || output[i].image_format > FORMAT_BGR_PLANAR) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_WARNING, "output image format not supposted %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if(output[i].image_format != output[0].image_format) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_WARNING, "output image list format must be the same %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } } if (padding_attr == NULL){ bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, \ "padding_attr should not be NULL err %s: %s: %d\n", __FILE__, __func__, __LINE__); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } if (convert_to_attr == NULL){ bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, \ "convert_to_attr should not be NULL err %s: %s: %d\n", __FILE__, __func__, __LINE__); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } return ret; } @@ -91,6 +91,7 @@ static bm_status_t bm1684_image_csc_convert_to( bm_status_t ret = BM_SUCCESS; int* crop_num_vec = NULL; bmcv_rect_t *crop_rect = NULL; + bmcv_copy_to_atrr_t copy_to_attr; if(crop_rect_ == NULL && crop_num_vec_ == NULL){ crop_num = img_num; } else if (crop_rect_ != NULL && crop_num_vec_ != NULL){ @@ -102,7 +103,7 @@ static bm_status_t bm1684_image_csc_convert_to( } else { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, \ "crop_num_vec or crop_rect should not be NULL err %s: %s: %d\n", __FILE__, __func__, __LINE__); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } ret = bm1684_image_csc_convert_to_check(handle, img_num, crop_num, input, output, padding_attr, convert_to_attr); @@ -138,13 +139,17 @@ static bm_status_t bm1684_image_csc_convert_to( } } - csc_matrix_t black_matrix; - bmcv_copy_to_atrr_t copy_to_attr; - memset(&black_matrix, 0, sizeof(csc_matrix_t)); - ret = bm1684_vpp_csc_matrix_convert(handle, crop_num, input[0], output, CSC_USER_DEFINED_MATRIX, &black_matrix); - if(ret != BM_SUCCESS){ - bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_WARNING, "get black background failed %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - goto fail1; + if(padding_attr[0].if_memset == 1){ + csc_matrix_t black_matrix; + memset(&black_matrix, 0, sizeof(csc_matrix_t)); + black_matrix.csc_add0 = ((int)padding_attr[0].padding_r) << 10; + black_matrix.csc_add1 = ((int)padding_attr[0].padding_g) << 10; + black_matrix.csc_add2 = ((int)padding_attr[0].padding_b) << 10; + ret = bm1684_vpp_csc_matrix_convert(handle, crop_num, input[0], output, CSC_USER_DEFINED_MATRIX, &black_matrix); + if(ret != BM_SUCCESS){ + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_WARNING, "get black background failed %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); + goto fail1; + } } crop_idx = 0; for(int i = 0; i < img_num; i++){ @@ -200,6 +205,7 @@ bm_status_t bmcv_image_vpp_basic_v2( { bm_status_t ret = BM_SUCCESS; unsigned int chipid = BM1684X; + bm_handle_check_2(handle, *input, *output); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -211,7 +217,7 @@ bm_status_t bmcv_image_vpp_basic_v2( break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } return ret; @@ -232,7 +238,7 @@ bm_status_t bmcv_image_csc_convert_to( { unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_2(handle, *input, *output); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -252,7 +258,7 @@ bm_status_t bmcv_image_csc_convert_to( break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } diff --git a/bmvid/bmcv/src/bmcv_api_cv_sort.cpp b/bmvid/bmcv/src/bmcv_api_cv_sort.cpp index e42b6bb..b069f16 100644 --- a/bmvid/bmcv/src/bmcv_api_cv_sort.cpp +++ b/bmvid/bmcv/src/bmcv_api_cv_sort.cpp @@ -15,6 +15,7 @@ bm_status_t bmcv_sort_test_bm1684(bm_handle_t handle, int location, int data_cnt, int sort_cnt) { + bm_status_t ret; bm_api_cv_sort_test_t arg; bm_device_mem_t src_index_buf_device; bm_device_mem_t src_data_buf_device; @@ -26,7 +27,7 @@ bm_status_t bmcv_sort_test_bm1684(bm_handle_t handle, &src_index_buf_device, sizeof(int) * data_cnt)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err0; } if (BM_SUCCESS != @@ -34,7 +35,7 @@ bm_status_t bmcv_sort_test_bm1684(bm_handle_t handle, src_index_buf_device, bm_mem_get_system_addr(src_index_addr))) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); - + ret = BM_ERR_NOMEM; goto err1; } } else { @@ -46,7 +47,7 @@ bm_status_t bmcv_sort_test_bm1684(bm_handle_t handle, &src_data_buf_device, sizeof(bm_sort_data_type_t) * data_cnt)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err1; } if (BM_SUCCESS != @@ -54,7 +55,7 @@ bm_status_t bmcv_sort_test_bm1684(bm_handle_t handle, src_data_buf_device, bm_mem_get_system_addr(src_data_addr))) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); - + ret = BM_ERR_NOMEM; goto err2; } } else { @@ -65,7 +66,7 @@ bm_status_t bmcv_sort_test_bm1684(bm_handle_t handle, &dst_index_buf_device, sizeof(int) * sort_cnt)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err2; } if (BM_SUCCESS != @@ -73,7 +74,7 @@ bm_status_t bmcv_sort_test_bm1684(bm_handle_t handle, dst_index_buf_device, bm_mem_get_system_addr(dst_index_addr))) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); - + ret = BM_ERR_NOMEM; goto err3; } } else { @@ -85,7 +86,7 @@ bm_status_t bmcv_sort_test_bm1684(bm_handle_t handle, &dst_data_buf_device, sizeof(bm_sort_data_type_t) * sort_cnt)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err3; } if (BM_SUCCESS != @@ -93,7 +94,7 @@ bm_status_t bmcv_sort_test_bm1684(bm_handle_t handle, dst_data_buf_device, bm_mem_get_system_addr(dst_data_addr))) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); - + ret = BM_ERR_NOMEM; goto err4; } } else { @@ -111,10 +112,12 @@ bm_status_t bmcv_sort_test_bm1684(bm_handle_t handle, if (BM_SUCCESS != bm_send_api(handle, BM_API_ID_CV_SROT_TEST, (uint8_t *)&arg, sizeof(arg))) { BMCV_ERR_LOG("sort_test send api error\r\n"); + ret = BM_ERR_TIMEOUT; goto err4; } if (BM_SUCCESS != bm_sync_api(handle)) { BMCV_ERR_LOG("sort_test sync api error\r\n"); + ret = BM_ERR_TIMEOUT; goto err4; } @@ -123,7 +126,7 @@ bm_status_t bmcv_sort_test_bm1684(bm_handle_t handle, bm_mem_get_system_addr(dst_data_addr), dst_data_buf_device)) { BMCV_ERR_LOG("bm_memcpy_d2s error\r\n"); - + ret = BM_ERR_NOMEM; goto err4; } bm_free_device(handle, dst_data_buf_device); @@ -133,7 +136,7 @@ bm_status_t bmcv_sort_test_bm1684(bm_handle_t handle, bm_mem_get_system_addr(dst_index_addr), dst_index_buf_device)) { BMCV_ERR_LOG("bm_memcpy_d2s error\r\n"); - + ret = BM_ERR_NOMEM; goto err3; } bm_free_device(handle, dst_index_buf_device); @@ -164,7 +167,7 @@ bm_status_t bmcv_sort_test_bm1684(bm_handle_t handle, bm_free_device(handle, src_index_buf_device); } err0: - return BM_ERR_FAILURE; + return ret; } bm_status_t bmcv_sort_test(bm_handle_t handle, @@ -199,12 +202,12 @@ bm_status_t bmcv_sort_test(bm_handle_t handle, break; case BM1684X: - printf("bm1684x not support\n"); - ret = BM_NOT_SUPPORTED; + printf("current card not support\n"); + ret = BM_ERR_NOFEATURE; break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } diff --git a/bmvid/bmcv/src/bmcv_api_dct.cpp b/bmvid/bmcv/src/bmcv_api_dct.cpp index 688f4b4..5d6fb72 100644 --- a/bmvid/bmcv/src/bmcv_api_dct.cpp +++ b/bmvid/bmcv/src/bmcv_api_dct.cpp @@ -1,7 +1,9 @@ #include #include #include +#include #include "bmcv_api_ext.h" +#include "bmcv_internal.h" #include "bmcv_common_bm1684.h" #include "device_mem_allocator.h" @@ -9,17 +11,17 @@ static bm_status_t check_image(bm_image input) { if (input.image_format != FORMAT_GRAY) { bmlib_log("DCT", BMLIB_LOG_ERROR, "input format only support gray!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (input.data_type != DATA_TYPE_EXT_FLOAT32) { bmlib_log("DCT", BMLIB_LOG_ERROR, "input data type only support float32!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } int stride[3]; bm_image_get_stride(input, stride); if ((u32)stride[0] != input.width * sizeof(float)) { bmlib_log("DCT", BMLIB_LOG_ERROR, "stride[0] should equal to input.width*sizeof(float)!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } return BM_SUCCESS; } @@ -78,12 +80,12 @@ bm_status_t bmcv_dct_coeff( break; case 0x1686: - printf("bm1684x not support\n"); - ret = BM_NOT_SUPPORTED; + printf("current card not support\n"); + ret = BM_ERR_NOFEATURE; break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } @@ -190,12 +192,12 @@ static bm_status_t __bmcv_dct_inner( break; case 0x1686: - printf("bm1684x not support\n"); - ret = BM_NOT_SUPPORTED; + printf("current card not support\n"); + ret = BM_ERR_NOFEATURE; break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } @@ -209,6 +211,7 @@ bm_status_t bmcv_image_dct_with_coeff( bm_device_mem_t wcoeff, bm_image output ){ + bm_handle_check_2(handle, input, output); return __bmcv_dct_inner(handle, input, 0, 1, hcoeff, wcoeff, output); } @@ -219,5 +222,6 @@ bm_status_t bmcv_image_dct( bool is_inversed ){ bm_device_mem_t dummy_mem; + bm_handle_check_2(handle, input, output); return __bmcv_dct_inner(handle, input, is_inversed, 0, dummy_mem, dummy_mem, output); } diff --git a/bmvid/bmcv/src/bmcv_api_draw_lines.cpp b/bmvid/bmcv/src/bmcv_api_draw_lines.cpp index 40d52d2..713b2a7 100644 --- a/bmvid/bmcv/src/bmcv_api_draw_lines.cpp +++ b/bmvid/bmcv/src/bmcv_api_draw_lines.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #ifdef __linux__ #include @@ -202,7 +203,7 @@ static bm_status_t bmcv_draw_line_check( int thickness) { if (handle == NULL) { bmlib_log("DRAW_LINE", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DEVNOTREADY; } if (thickness <= 0) { bmlib_log("DRAW_LINE", BMLIB_LOG_ERROR, "thickness should greater than 0!\r\n"); @@ -210,7 +211,7 @@ static bm_status_t bmcv_draw_line_check( } if (!IS_CS_YUV(image.image_format) && image.image_format != FORMAT_GRAY) { bmlib_log("DRAW_LINE", BMLIB_LOG_ERROR, "image format not supported %d !\r\n", image.image_format); - return BM_ERR_PARAM; + return BM_ERR_DATA; } return BM_SUCCESS; } @@ -223,6 +224,8 @@ bm_status_t bmcv_image_draw_lines( int line_num, bmcv_color_t color, int thickness) { + bm_status_t ret = BM_SUCCESS; + bm_handle_check_1(handle, image); if (BM_SUCCESS != bmcv_draw_line_check(handle, image, thickness)) { return BM_ERR_FAILURE; } @@ -279,14 +282,14 @@ bm_status_t bmcv_image_draw_lines( return ret; } u64 param_addr_mapped = get_mapped_addr(handle, ¶m_mem); - int ret = bmcpu_exec_function_ext(handle, + ret = (bm_status_t)bmcpu_exec_function_ext(handle, process_id, (char*)"bmcv_cpu_draw_line", (void*)¶m_addr_mapped, sizeof(void*), 1, timeout); - if (ret != 0) { + if (ret != BM_SUCCESS) { bmlib_log("DRAW_LINE", BMLIB_LOG_ERROR, "exec function failed! return %d\r\n", ret); return BM_ERR_FAILURE; } diff --git a/bmvid/bmcv/src/bmcv_api_fft.cpp b/bmvid/bmcv/src/bmcv_api_fft.cpp index b245ffa..3c1efe2 100644 --- a/bmvid/bmcv/src/bmcv_api_fft.cpp +++ b/bmvid/bmcv/src/bmcv_api_fft.cpp @@ -1,6 +1,7 @@ #include "bmcv_api_ext.h" #include "bmcv_common_bm1684.h" - +#include "bmcv_bm1684x.h" +#include "bmcv_internal.h" #include #include #include @@ -316,7 +317,7 @@ static bm_status_t bmcv_fft_1d_execute_bm1684(bm_handle_t handle, return ret; } -static bm_status_t bmcv_fft_1d_execute(bm_handle_t handle, +static bm_status_t bmcv_fft_1d_execute_bm1684X(bm_handle_t handle, bm_device_mem_t inputReal, bm_device_mem_t inputImag, bm_device_mem_t outputReal, @@ -324,13 +325,45 @@ static bm_status_t bmcv_fft_1d_execute(bm_handle_t handle, const void *plan, bool realInput, int trans) { - unsigned int chipid = 0x1686; bm_status_t ret = BM_SUCCESS; + sg_api_cv_fft_t api; + auto P = reinterpret_cast(plan); + api.XR = bm_mem_get_device_addr(inputReal); + api.XI = realInput ? 0 : bm_mem_get_device_addr(inputImag); + api.YR = bm_mem_get_device_addr(outputReal); + api.YI = bm_mem_get_device_addr(outputImag); + api.ER = bm_mem_get_device_addr(P->ER); + api.EI = bm_mem_get_device_addr(P->EI); + api.batch = P->batch; + api.len = P->L; + api.forward = P->forward ? 1 : 0; + api.realInput = realInput ? 1 : 0; + api.trans = trans; + for (size_t i = 0; i < P->factors.size(); ++i) + api.factors[i] = P->factors[i]; + api.factorSize = static_cast(P->factors.size()); + ret = bm_tpu_kernel_launch(handle, "cv_fft", &api, sizeof(api)); + if (BM_SUCCESS != ret) { + bmlib_log("FFT", BMLIB_LOG_ERROR, "fft sync api error\n"); + return BM_ERR_FAILURE; + } + return ret; +} + +static bm_status_t bmcv_fft_1d_execute(bm_handle_t handle, + bm_device_mem_t inputReal, + bm_device_mem_t inputImag, + bm_device_mem_t outputReal, + bm_device_mem_t outputImag, + const void *plan, + bool realInput, + int trans) { + unsigned int chipid = 0x1686; + bm_status_t ret = BM_SUCCESS; ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; - switch(chipid) { @@ -345,9 +378,15 @@ static bm_status_t bmcv_fft_1d_execute(bm_handle_t handle, trans); break; - case 0x1686: - printf("bm1684x not support\n"); - ret = BM_NOT_SUPPORTED; + case 0x1686: + ret = bmcv_fft_1d_execute_bm1684X(handle, inputReal, + inputImag, + outputReal, + outputImag, + plan, + + realInput, + trans); break; default: diff --git a/bmvid/bmcv/src/bmcv_api_fill_rectangle.cpp b/bmvid/bmcv/src/bmcv_api_fill_rectangle.cpp index 46b89ea..e64fb19 100644 --- a/bmvid/bmcv/src/bmcv_api_fill_rectangle.cpp +++ b/bmvid/bmcv/src/bmcv_api_fill_rectangle.cpp @@ -1,4 +1,5 @@ #include +#include #include "bmcv_api.h" #include "bmcv_internal.h" #include "bmcv_common_bm1684.h" @@ -17,11 +18,11 @@ static bm_status_t bmcv_draw_solid_rectangle(bm_handle_t handle, if (BM_SUCCESS != bm_send_api(handle, BM_API_ID_MEMSET_BYTE, (uint8_t *)&api, sizeof(api))) { BMCV_ERR_LOG("fill rectangle send api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } if (BM_SUCCESS != bm_sync_api(handle)) { BMCV_ERR_LOG("fill rectangle sync api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } return BM_SUCCESS; } @@ -155,7 +156,7 @@ bm_status_t bmcv_image_fill_rectangle(bm_handle_t handle, } else { BMCV_ERR_LOG("error currently not support this format to fill rectangle\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } return BM_SUCCESS; @@ -226,20 +227,21 @@ bm_status_t bmcv_image_fill_rectangle(bm_handle_t handle, } if(!image.image_private) { BMCV_ERR_LOG("invalidate image, not created\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if(image.data_type != DATA_TYPE_EXT_1N_BYTE) { BMCV_ERR_LOG("invalidate image, data type should be DATA_TYPE_EXT_1N_BYTE\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if(!bm_image_is_attached(image)) { BMCV_ERR_LOG("invalidate image, please attach device memory\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if(image.height >= (1 << 16) || image.width >= (1 << 16)) { BMCV_ERR_LOG("Not support such big size image\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } + bm_handle_check_1(handle, image); bm_status_t ret = BM_SUCCESS; unsigned int chipid = BM1684X; @@ -255,9 +257,10 @@ bm_status_t bmcv_image_fill_rectangle(bm_handle_t handle, { for (int i = 0; i < rect_num; i++) { bmcv_rect_t rect = refine_rect(rects[i], image.height, image.width); - if (bmcv_image_fill_rectangle(handle, image, rect, r, g, b) != BM_SUCCESS) { + ret = bmcv_image_fill_rectangle(handle, image, rect, r, g, b); + if (ret != BM_SUCCESS) { BMCV_ERR_LOG("error call fill rectangle\n"); - return BM_ERR_FAILURE; + return ret; } } break; @@ -268,13 +271,13 @@ bm_status_t bmcv_image_fill_rectangle(bm_handle_t handle, calculate_yuv(r, g, b, fill_val, fill_val + 1, fill_val + 2); ret = bm1684x_fill_vpp_rectangle(handle, image, rect_num, rects, fill_val[0], fill_val[1], fill_val[2]); if(ret!=BM_SUCCESS){ - BMCV_ERR_LOG("error 1684x fill rectangle\n"); + BMCV_ERR_LOG("error call fill rectangle\n"); } break; } default: { - return BM_NOT_SUPPORTED; + return BM_ERR_NOFEATURE; } } return ret; diff --git a/bmvid/bmcv/src/bmcv_api_fusion.cpp b/bmvid/bmcv/src/bmcv_api_fusion.cpp index 4474eb8..c78c530 100755 --- a/bmvid/bmcv/src/bmcv_api_fusion.cpp +++ b/bmvid/bmcv/src/bmcv_api_fusion.cpp @@ -5,6 +5,7 @@ #include "pcie_cpu/bmcv_api_struct.h" #include #include +#include #include #include @@ -18,7 +19,7 @@ static bm_status_t bmcv_fusion_check( bm_thresh_type_t type) { if (handle == NULL) { bmlib_log("FUSION", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DEVNOTREADY; } if (type >= BM_THRESH_TYPE_MAX) { bmlib_log("FUSION", BMLIB_LOG_ERROR, "Threshold type is error!\r\n"); @@ -29,23 +30,23 @@ static bm_status_t bmcv_fusion_check( input2.height != output.height || input2.width != output.width) { bmlib_log("FUSION", BMLIB_LOG_ERROR, "input and output image size should be same\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (input1.image_format != FORMAT_GRAY || input2.image_format != FORMAT_GRAY) { bmlib_log("FUSION", BMLIB_LOG_ERROR, "Not supported input image format, only GRAY supported!\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (input1.image_format != output.image_format || input2.image_format != output.image_format) { bmlib_log("FUSION", BMLIB_LOG_ERROR, "input and output image format should be same\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (input1.data_type != DATA_TYPE_EXT_1N_BYTE || input2.data_type != DATA_TYPE_EXT_1N_BYTE || output.data_type != DATA_TYPE_EXT_1N_BYTE) { bmlib_log("FUSION", BMLIB_LOG_ERROR, "Not supported image data type\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } return BM_SUCCESS; } @@ -92,7 +93,7 @@ bm_status_t bmcv_image_fusion_bm1684( if (BM_SUCCESS != ret) { bmlib_log("FUSION", BMLIB_LOG_ERROR, "kernel d2s failed!\r\n"); delete [] kernel; - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } bm_api_cv_fusion_t api; api.input1_addr = bm_mem_get_device_addr(input1_mem); @@ -139,7 +140,7 @@ bm_status_t bmcv_image_fusion( unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_3(handle, input1, input2, output); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -154,12 +155,12 @@ bm_status_t bmcv_image_fusion( break; case BM1684X: - printf("bm1684x not support\n"); - ret = BM_NOT_SUPPORTED; + printf("current card not support\n"); + ret = BM_ERR_NOFEATURE; break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } diff --git a/bmvid/bmcv/src/bmcv_api_gaussian_blur.cpp b/bmvid/bmcv/src/bmcv_api_gaussian_blur.cpp index 90d2fd2..099045c 100644 --- a/bmvid/bmcv/src/bmcv_api_gaussian_blur.cpp +++ b/bmvid/bmcv/src/bmcv_api_gaussian_blur.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include using namespace std; @@ -67,7 +68,7 @@ static void create_gaussian_kernel( } } -static bm_status_t bmcv_gaussian_blur_check( +static bm_status_t bmcv_gaussian_blur_check_bm1684( bm_handle_t handle, bm_image input, bm_image output, @@ -122,6 +123,55 @@ static bm_status_t bmcv_gaussian_blur_check( return BM_SUCCESS; } +static bm_status_t bmcv_gaussian_blur_check_bm1684x(bm_handle_t handle, bm_image input, bm_image output, + int kw, int kh) { + if (handle == NULL) { + bmlib_log("GAUSSIAN_BLUR", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); + return BM_ERR_PARAM; + } + if (kw > 3 || kh > 3) { + bmlib_log("GAUSSIAN_BLUR", BMLIB_LOG_ERROR, "The kernel size only support 3 now!\n"); + return BM_ERR_PARAM; + } + bm_image_format_ext src_format = input.image_format; + bm_image_data_format_ext src_type = input.data_type; + bm_image_format_ext dst_format = output.image_format; + bm_image_data_format_ext dst_type = output.data_type; + int image_sh = input.height; + int image_sw = input.width; + int image_dh = output.height; + int image_dw = output.width; + + if (image_sw > 4096) { + bmlib_log("GAUSSIAN_BLUR", BMLIB_LOG_ERROR, "image width should not be greater than 4096!\r\n"); + return BM_NOT_SUPPORTED; + } + if (src_format != FORMAT_RGB_PLANAR && + src_format != FORMAT_BGR_PLANAR && + src_format != FORMAT_RGB_PACKED && + src_format != FORMAT_BGR_PACKED && + src_format != FORMAT_BGRP_SEPARATE && + src_format != FORMAT_RGBP_SEPARATE && + src_format != FORMAT_GRAY) { + bmlib_log("GAUSSIAN_BLUR", BMLIB_LOG_ERROR, "Not supported input image format!\n"); + return BM_NOT_SUPPORTED; + } + if (dst_format != src_format) { + bmlib_log("GAUSSIAN_BLUR", BMLIB_LOG_ERROR, "input and output image format should be same!\n"); + return BM_NOT_SUPPORTED; + } + if (src_type != DATA_TYPE_EXT_1N_BYTE || + dst_type != DATA_TYPE_EXT_1N_BYTE) { + bmlib_log("GAUSSIAN_BLUR", BMLIB_LOG_ERROR, "Not supported image data type\n"); + return BM_NOT_SUPPORTED; + } + if (image_sh != image_dh || image_sw != image_dw) { + bmlib_log("GAUSSIAN_BLUR", BMLIB_LOG_ERROR, "input and output image size should be same\n"); + return BM_NOT_SUPPORTED; + } + return BM_SUCCESS; +} + bm_status_t bmcv_image_gaussian_blur_bm1684( bm_handle_t handle, bm_image input, @@ -131,7 +181,7 @@ bm_status_t bmcv_image_gaussian_blur_bm1684( float sigmaX, float sigmaY) { bm_status_t ret = BM_SUCCESS; - ret = bmcv_gaussian_blur_check(handle, input, output, kw, kh); + ret = bmcv_gaussian_blur_check_bm1684(handle, input, output, kw, kh); if (BM_SUCCESS != ret) { return ret; } @@ -226,6 +276,92 @@ bm_status_t bmcv_image_gaussian_blur_bm1684( return BM_SUCCESS; } +bm_status_t bmcv_image_gaussian_blur_bm1684x( + bm_handle_t handle, + bm_image input, + bm_image output, + int kw, + int kh, + float sigmaX, + float sigmaY) { + bm_status_t ret = BM_SUCCESS; + ret = bmcv_gaussian_blur_check_bm1684x(handle, input, output, kw, kh); + if (BM_SUCCESS != ret) { + return ret; + } + float* kernel = new float [kw * kh]; + create_gaussian_kernel(kernel, kw, kh, sigmaX, sigmaY); + bm_device_mem_t kernel_mem; + ret = bm_malloc_device_byte(handle, &kernel_mem, kw * kh * sizeof(float)); + if (BM_SUCCESS != ret) { + delete [] kernel; + return ret; + } + ret = bm_memcpy_s2d(handle, kernel_mem, kernel); + if (BM_SUCCESS != ret) { + bm_free_device(handle, kernel_mem); + delete [] kernel; + return ret; + } + if (!bm_image_is_attached(output)) { + ret = bm_image_alloc_dev_mem(output, BMCV_HEAP_ANY); + if (ret != BM_SUCCESS) { + bm_free_device(handle, kernel_mem); + delete [] kernel; + return ret; + } + } + // construct and send api + int stride_i[3], stride_o[3]; + bm_image_get_stride(input, stride_i); + bm_image_get_stride(output, stride_o); + bm_device_mem_t input_mem[3]; + bm_image_get_device_mem(input, input_mem); + bm_device_mem_t output_mem[3]; + bm_image_get_device_mem(output, output_mem); + int channel = bm_image_get_plane_num(input); + bm_api_cv_filter_t api; + api.channel = channel; + api.kernel_addr = bm_mem_get_device_addr(kernel_mem); + api.kh = kh; + api.kw = kw; + api.delta = 0; + api.is_packed = (input.image_format == FORMAT_RGB_PACKED || + input.image_format == FORMAT_BGR_PACKED); + api.out_type = 0; // 0-uint8 1-uint16 + for (int i = 0; i < channel; i++) { + api.input_addr[i] = bm_mem_get_device_addr(input_mem[i]); + api.output_addr[i] = bm_mem_get_device_addr(output_mem[i]); + api.width[i] = input.image_private->memory_layout[i].W / (api.is_packed ? 3 : 1); + api.height[i] = input.image_private->memory_layout[i].H; + api.stride_i[i] = stride_i[i]; + api.stride_o[i] = stride_o[i]; + } + if (input.image_format == FORMAT_RGB_PLANAR || + input.image_format == FORMAT_BGR_PLANAR) { + api.channel = 3; + api.stride_i[1] = api.stride_i[0]; + api.stride_i[2] = api.stride_i[0]; + api.stride_o[1] = api.stride_o[0]; + api.stride_o[2] = api.stride_o[0]; + api.width[1] = api.width[0]; + api.width[2] = api.width[0]; + api.height[1] = api.height[0]; + api.height[2] = api.height[0]; + api.input_addr[1] = api.input_addr[0] + api.height[0] * api.stride_i[0]; + api.input_addr[2] = api.input_addr[0] + api.height[0] * api.stride_i[0] * 2; + api.output_addr[1] = api.output_addr[0] + api.height[0] * api.stride_i[0]; + api.output_addr[2] = api.output_addr[0] + api.height[0] * api.stride_i[0] * 2; + } + ret = bm_tpu_kernel_launch(handle, "cv_gaussinan_blur_", (u8 *)&api, sizeof(api)); + if (BM_SUCCESS != ret) { + bmlib_log("gaussinan_blur", BMLIB_LOG_ERROR, "gaussinan_blur sync api error\n"); + } + bm_free_device(handle, kernel_mem); + delete [] kernel; + return ret; +} + bm_status_t bmcv_image_gaussian_blur( bm_handle_t handle, bm_image input, @@ -237,7 +373,7 @@ bm_status_t bmcv_image_gaussian_blur( unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_2(handle, input, output); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -250,8 +386,7 @@ bm_status_t bmcv_image_gaussian_blur( break; case BM1684X: - printf("bm1684x not support\n"); - ret = BM_NOT_SUPPORTED; + ret = bmcv_image_gaussian_blur_bm1684x(handle, input, output, kw, kh, sigmaX, sigmaY); break; default: diff --git a/bmvid/bmcv/src/bmcv_api_hist_balance.cpp b/bmvid/bmcv/src/bmcv_api_hist_balance.cpp new file mode 100644 index 0000000..6a654f6 --- /dev/null +++ b/bmvid/bmcv/src/bmcv_api_hist_balance.cpp @@ -0,0 +1,90 @@ +#include "bmcv_api_ext_c.h" +#include "bmcv_internal.h" +#include "bmcv_bm1684x.h" +#include +#include +#ifdef __linux__ +#include +#else +#include +#include "time.h" +#endif + +#define GRAY_SERIES 256 + +static float get_cdf_min(int32_t* cdf, int len) +{ + int i; + + for(i = 0; i < len; ++i) { + if (cdf[i] != 0) { + return (float)cdf[i]; + } + } + return 0.f; +} + +bm_status_t bmcv_hist_balance(bm_handle_t handle, bm_device_mem_t input, bm_device_mem_t output, + int H, int W) +{ + bm_status_t ret = BM_SUCCESS; + bm_api_cv_hist_balance_t1 api1; + bm_api_cv_hist_balance_t2 api2; + int imageSize = H * W; + bm_device_mem_t cdf; + int32_t* cdf_sys = NULL; + float cdf_min = 0.f; + int i; + + cdf_sys = (int32_t*)malloc(GRAY_SERIES * sizeof(int32_t)); + memset(cdf_sys, 0, GRAY_SERIES * sizeof(int32_t)); + + for (i = 0; i < GRAY_SERIES; ++i) { + cdf_sys[i] = i; + } + + ret = bm_malloc_device_byte(handle, &cdf, GRAY_SERIES * sizeof(int32_t)); + if (ret != BM_SUCCESS) { + printf("the bm_malloc_device_byte failed!\n"); + free(cdf_sys); + return ret; + } + + ret = bm_memcpy_s2d(handle, cdf, cdf_sys); + if (ret != BM_SUCCESS) { + printf("bm_memcpy_s2d failed!\n"); + goto exit; + } + + api1.Xaddr = bm_mem_get_device_addr(input); + api1.len = imageSize; + api1.cdf_addr = bm_mem_get_device_addr(cdf); + ret = bm_tpu_kernel_launch(handle, "cv_hist_balance1", (u8*)(&api1), sizeof(api1)); + if (ret != BM_SUCCESS) { + printf("bm_tpu_kernel_launch cv_hist_balance1 failed!\n"); + goto exit; + } + + ret = bm_memcpy_d2s(handle, cdf_sys, cdf); + if (ret != BM_SUCCESS) { + printf("bm_memcpy_s2d failed!\n"); + goto exit; + } + cdf_min = get_cdf_min(cdf_sys, GRAY_SERIES); + + api2.Xaddr = bm_mem_get_device_addr(input); + api2.len = imageSize; + api2.cdf_min = cdf_min; + api2.cdf_addr = bm_mem_get_device_addr(cdf); + api2.Yaddr = bm_mem_get_device_addr(output); + ret = bm_tpu_kernel_launch(handle, "cv_hist_balance2", (u8*)(&api2), sizeof(api2)); + if (ret != BM_SUCCESS) { + printf("bm_tpu_kernel_launch cv_hist_balance2 failed!\n"); + goto exit; + } + +exit: + free(cdf_sys); + bm_free_device(handle, cdf); + return ret; +} \ No newline at end of file diff --git a/bmvid/bmcv/src/bmcv_api_img_scale.cpp b/bmvid/bmcv/src/bmcv_api_img_scale.cpp index e826a3c..f2f3884 100644 --- a/bmvid/bmcv/src/bmcv_api_img_scale.cpp +++ b/bmvid/bmcv/src/bmcv_api_img_scale.cpp @@ -19,41 +19,41 @@ bmcv_img_scale_check(bm_handle_t handle, bmcv_image input, bmcv_image output) { if ((if_4n == 1) && (if_fp32 == 1)) { BMCV_ERR_LOG("Not support 4n and fp32\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (!handle) { BMCV_ERR_LOG("Can not get handle!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DEVNOTREADY; } if ((input.color_space != COLOR_RGB) || (output.color_space != COLOR_RGB)) { BMCV_ERR_LOG("color_space not support\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if ((input.image_format != BGR) && (input.image_format != RGB) && (input.image_format != BGR4N) && (input.image_format != RGB4N)) { BMCV_ERR_LOG("input image_format not support:%d\n", input.image_format); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if ((output.image_format != BGR) && (output.image_format != RGB) && (output.image_format != BGR4N) && (output.image_format != RGB4N)) { BMCV_ERR_LOG("output image_format not support:%d\n", output.image_format); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if ((input.data_format != DATA_TYPE_FLOAT) && (input.data_format != DATA_TYPE_BYTE)) { BMCV_ERR_LOG("input data format not support:%d\n", input.data_format); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if ((output.data_format != DATA_TYPE_FLOAT) && (output.data_format != DATA_TYPE_BYTE)) { BMCV_ERR_LOG("output data format not support:%d\n", output.data_format); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } return BM_SUCCESS; @@ -79,10 +79,11 @@ bm_status_t bmcv_img_scale(bm_handle_t handle, float weight_r, float bias_r, bmcv_image output) { - if (BM_SUCCESS != bmcv_img_scale_check(handle, input, output)) { + bm_status_t ret = bmcv_img_scale_check(handle, input, output); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("bmcv_img_scale_check error\r\n"); - return BM_ERR_FAILURE; + return ret; } bmcv_resize_image resize_attr[4]; bmcv_resize_t resize_img_attr[4]; @@ -158,7 +159,7 @@ bm_status_t bmcv_img_scale(bm_handle_t handle, bm_image_destroy(output_image[i]); } - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } char *input_addr = (char *)bm_mem_get_system_addr(input.data[0]); int img_offset = input_image[0].width * input_image[0].height * c * @@ -183,7 +184,7 @@ bm_status_t bmcv_img_scale(bm_handle_t handle, bm_image_destroy(output_image[i]); } - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } else { bm_image_attach_contiguous_mem(n, output_image, output.data[0]); @@ -229,7 +230,7 @@ bm_status_t bmcv_img_scale(bm_handle_t handle, bm_image_destroy(temp_input_image[i]); } - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } bmcv_image_convert_to( handle, n, convert_to_attr, input_image, temp_input_image); diff --git a/bmvid/bmcv/src/bmcv_api_jpeg_dec.cpp b/bmvid/bmcv/src/bmcv_api_jpeg_dec.cpp index 424b949..918f6ef 100644 --- a/bmvid/bmcv/src/bmcv_api_jpeg_dec.cpp +++ b/bmvid/bmcv/src/bmcv_api_jpeg_dec.cpp @@ -19,20 +19,21 @@ typedef struct bmcv_jpeg_decoder_struct { BmJpuJPEGDecoder *decoder_; } bmcv_jpeg_decoder_t; +#if 0 static int format_switch(BmJpuJPEGDecInfo info) { BmJpuColorFormat jpu_format = info.color_format; - int chroma_itlv = info.chroma_interleave; + BmJpuChromaFormat chroma_itlv = info.chroma_interleave; int bmcv_fmt = -1; switch(jpu_format) { case BM_JPU_COLOR_FORMAT_YUV420: - bmcv_fmt = (chroma_itlv == 0) ? FORMAT_YUV420P : - ((chroma_itlv == 1) ? FORMAT_NV12 : - ((chroma_itlv == 2) ? FORMAT_NV21 : -1)); + bmcv_fmt = (chroma_itlv == BM_JPU_CHROMA_FORMAT_CBCR_SEPARATED) ? FORMAT_YUV420P : + ((chroma_itlv == BM_JPU_CHROMA_FORMAT_CBCR_INTERLEAVE) ? FORMAT_NV12 : + ((chroma_itlv == BM_JPU_CHROMA_FORMAT_CRCB_INTERLEAVE) ? FORMAT_NV21 : -1)); break; case BM_JPU_COLOR_FORMAT_YUV422_HORIZONTAL: - bmcv_fmt = (chroma_itlv == 0) ? FORMAT_YUV422P : - ((chroma_itlv == 1) ? FORMAT_NV16 : - ((chroma_itlv == 2) ? FORMAT_NV61 : -1)); + bmcv_fmt = (chroma_itlv == BM_JPU_CHROMA_FORMAT_CBCR_SEPARATED) ? FORMAT_YUV422P : + ((chroma_itlv == BM_JPU_CHROMA_FORMAT_CBCR_INTERLEAVE) ? FORMAT_NV16 : + ((chroma_itlv == BM_JPU_CHROMA_FORMAT_CRCB_INTERLEAVE) ? FORMAT_NV61 : -1)); break; case BM_JPU_COLOR_FORMAT_YUV422_VERTICAL: bmcv_fmt = -1; @@ -48,6 +49,42 @@ static int format_switch(BmJpuJPEGDecInfo info) { } return bmcv_fmt; } +#endif + +/* Version 2: change to use image format */ +static int format_switch(BmJpuJPEGDecInfo info) { + BmJpuImageFormat jpu_format = info.image_format; + int bmcv_fmt = -1; + switch (jpu_format) { + case BM_JPU_IMAGE_FORMAT_YUV420P: + bmcv_fmt = FORMAT_YUV420P; + break; + case BM_JPU_IMAGE_FORMAT_NV12: + bmcv_fmt = FORMAT_NV12; + break; + case BM_JPU_IMAGE_FORMAT_NV21: + bmcv_fmt = FORMAT_NV21; + break; + case BM_JPU_IMAGE_FORMAT_YUV422P: + bmcv_fmt = FORMAT_YUV422P; + break; + case BM_JPU_IMAGE_FORMAT_NV16: + bmcv_fmt = FORMAT_NV16; + break; + case BM_JPU_IMAGE_FORMAT_NV61: + bmcv_fmt = FORMAT_NV61; + break; + case BM_JPU_IMAGE_FORMAT_YUV444P: + bmcv_fmt = FORMAT_YUV444P; + break; + case BM_JPU_IMAGE_FORMAT_GRAY: + bmcv_fmt = FORMAT_GRAY; + break; + default: + break; + } + return bmcv_fmt; +} static bm_status_t bmcv_jpeg_dec_check(bm_image* src, BmJpuJPEGDecInfo info) { @@ -56,14 +93,14 @@ static bm_status_t bmcv_jpeg_dec_check(bm_image* src, bmlib_log("JEPG-DEC", BMLIB_LOG_ERROR, "data type only support 1N_BYTE %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if ((format_switch(info) == -1) || (format_switch(info) != src->image_format)) { bmlib_log("JEPG-DEC", BMLIB_LOG_ERROR, "bm_image format should be same with image %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } // only support yuv420p, yuv422p, yuv444p, nv12, nv16, gray. if (!((src->image_format == FORMAT_YUV420P) || @@ -75,7 +112,7 @@ static bm_status_t bmcv_jpeg_dec_check(bm_image* src, bmlib_log("JEPG-DEC", BMLIB_LOG_ERROR, "dst image format only support those format now:\n \ FORMAT_YUV420P/FORMAT_YUV444P/FORMAT_YUV422P/FORMAT_NV12/FORMAT_NV16/FORMAT_GRAY"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } // width and height should be same if (((unsigned int)src->width != info.actual_frame_width) && @@ -83,7 +120,7 @@ static bm_status_t bmcv_jpeg_dec_check(bm_image* src, bmlib_log("JEPG-DEC", BMLIB_LOG_ERROR, "bm_image width and height should be same with image %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } return BM_SUCCESS; @@ -137,15 +174,21 @@ static int bmcv_jpeg_decoder_create(bmcv_jpeg_decoder_t** p_jpeg_decoder, memset(dec, 0, sizeof(*dec)); /* Open the JPEG decoder */ memset(&open_params, 0, sizeof(BmJpuDecOpenParams)); - open_params.frame_width = 0; - open_params.frame_height = 0; - open_params.chroma_interleave = 0; + open_params.min_frame_width = 0; + open_params.min_frame_height = 0; + open_params.max_frame_width = 0; + open_params.max_frame_height = 0; + open_params.chroma_interleave = BM_JPU_CHROMA_FORMAT_CBCR_SEPARATED; /* avoid the false alarm that bs buffer is empty */ open_params.bs_buffer_size = (bs_size + 256); open_params.device_index = devid; if (dst->image_private != NULL && - ((dst->image_format == FORMAT_NV12) || dst->image_format == FORMAT_NV16)) { - open_params.chroma_interleave = 1; + ((dst->image_format == FORMAT_NV12) || (dst->image_format == FORMAT_NV16))) { + open_params.chroma_interleave = BM_JPU_CHROMA_FORMAT_CBCR_INTERLEAVE; + } + if (dst->image_private != NULL && + ((dst->image_format == FORMAT_NV21) || (dst->image_format == FORMAT_NV61))) { + open_params.chroma_interleave = BM_JPU_CHROMA_FORMAT_CRCB_INTERLEAVE; } ret = bm_jpu_dec_load(devid); @@ -424,7 +467,6 @@ static int try_soft_decoding(bm_handle_t handle, int soft_decoding = 1; volatile int bmimage_created = 0; volatile int bmimage_allocated = 0; - cinfo.err = jpeg_std_error(&errorMgr.pub); errorMgr.pub.error_exit = error_exit; @@ -441,7 +483,6 @@ static int try_soft_decoding(bm_handle_t handle, jpeg_mem_src(&cinfo, (const unsigned char*)buf, size); jpeg_read_header(&cinfo, TRUE); - if (!determine_hw_decoding(&cinfo)){ unsigned char *p_raw_pic; int row_stride; @@ -580,8 +621,9 @@ bm_status_t bmcv_jpeg_dec_one_image(bm_handle_t handle, bm_image* dst) { BmJpuDecReturnCodes ret; BmJpuJPEGDecInfo info; + bm_status_t bmret; - ret = bm_jpu_jpeg_dec_decode(jpeg_dec->decoder_, (const unsigned char*)buf, in_size); + ret = bm_jpu_jpeg_dec_decode(jpeg_dec->decoder_, (const unsigned char*)buf, in_size, 0, 0); if (ret != BM_JPU_DEC_RETURN_CODE_OK) { bmlib_log("JPEG-DEC", BMLIB_LOG_ERROR, "jpeg decode failed!\r\n"); @@ -597,20 +639,18 @@ bm_status_t bmcv_jpeg_dec_one_image(bm_handle_t handle, "pixel Y/Cb/Cr stride: %u/%u/%u\n" "pixel Y/Cb/Cr size: %u/%u/%u\n" "pixel Y/Cb/Cr offset: %u/%u/%u\n" - "color format: %s\n" - "chroma interleave: %d\n", + "image format: %s\n", info.aligned_frame_width, info.aligned_frame_height, info.actual_frame_width, info.actual_frame_height, info.y_stride, info.cbcr_stride, info.cbcr_stride, info.y_size, info.cbcr_size, info.cbcr_size, info.y_offset, info.cb_offset, info.cr_offset, - bm_jpu_color_format_string(info.color_format), - info.chroma_interleave); + bm_jpu_image_format_string(info.image_format)); if (info.framebuffer == NULL) { bmlib_log("JPEG-DEC", BMLIB_LOG_ERROR, "could not decode this JPEG image : no framebuffer returned!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } bm_jpu_phys_addr_t phys_addr = bm_mem_get_device_addr(*(info.framebuffer->dma_buffer)); @@ -620,7 +660,7 @@ bm_status_t bmcv_jpeg_dec_one_image(bm_handle_t handle, if(BM_SUCCESS != bmcv_jpeg_dec_check(dst, info)) { BMCV_ERR_LOG("bmcv_jpeg_dec_check error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } // if stride is not same, need to convert int jpu_stride[3] = {(int)info.y_stride, (int)info.cbcr_stride, (int)info.cbcr_stride}; @@ -635,10 +675,11 @@ bm_status_t bmcv_jpeg_dec_one_image(bm_handle_t handle, } else { int format = format_switch(info); int stride[3] = {(int)info.y_stride, (int)info.cbcr_stride, (int)info.cbcr_stride}; - if(BM_SUCCESS != bm_image_create(handle, info.actual_frame_height, info.actual_frame_width, - (bm_image_format_ext)format, DATA_TYPE_EXT_1N_BYTE, dst, stride)) { + bmret = bm_image_create(handle, info.actual_frame_height, info.actual_frame_width, + (bm_image_format_ext)format, DATA_TYPE_EXT_1N_BYTE, dst, stride); + if(BM_SUCCESS != bmret) { BMCV_ERR_LOG("bm_image_create error\r\n"); - return BM_ERR_FAILURE; + return bmret; } } // create device mem, and attach it to bm_image @@ -649,35 +690,39 @@ bm_status_t bmcv_jpeg_dec_one_image(bm_handle_t handle, dev_mem[1] = bm_mem_from_device(phys_addr + info.cb_offset, img_size[1]); dev_mem[2] = bm_mem_from_device(phys_addr + info.cr_offset, img_size[2]); if (!need_convert) { - if(BM_SUCCESS != bm_image_attach(*dst, dev_mem)) { + bmret = bm_image_attach(*dst, dev_mem); + if(BM_SUCCESS != bmret) { BMCV_ERR_LOG("bm_image_attach error\r\n"); - return BM_ERR_FAILURE; + return bmret; } } else { bm_image dst_tmp; int stride[3] = {(int)info.y_stride, (int)info.cbcr_stride, (int)info.cbcr_stride}; - if (BM_SUCCESS != bm_image_create(handle, info.actual_frame_height, info.actual_frame_width, - dst->image_format, DATA_TYPE_EXT_1N_BYTE, &dst_tmp, stride)) { + bmret = bm_image_create(handle, info.actual_frame_height, info.actual_frame_width, + dst->image_format, DATA_TYPE_EXT_1N_BYTE, &dst_tmp, stride); + if (BM_SUCCESS != bmret) { BMCV_ERR_LOG("bm_image_create error\r\n"); - return BM_ERR_FAILURE; + return bmret; } - if (BM_SUCCESS != bm_image_attach(dst_tmp, dev_mem)) { + bmret = bm_image_attach(dst_tmp, dev_mem); + if (BM_SUCCESS != bmret) { BMCV_ERR_LOG("bm_image_attach error\r\n"); bm_image_destroy(dst_tmp); - return BM_ERR_FAILURE; + return bmret; } if (!bm_image_is_attached(*dst)) { // not alloc in heap0, because it maybe use by VPP after this operation if (BM_SUCCESS != bm_image_alloc_dev_mem_heap_mask(*dst, 6)) { BMCV_ERR_LOG("bm_image_alloc_dev_mem error\r\n"); bm_image_destroy(dst_tmp); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } - if (BM_SUCCESS != bmcv_width_align(handle, dst_tmp, *dst)) { + bmret = bmcv_width_align(handle, dst_tmp, *dst); + if (BM_SUCCESS != bmret) { BMCV_ERR_LOG("bmcv_width_align error\r\n"); bm_image_destroy(dst_tmp); - return BM_ERR_FAILURE; + return bmret; } bm_image_destroy(dst_tmp); } @@ -690,7 +735,7 @@ bm_status_t bmcv_jpeg_dec_one_image(bm_handle_t handle, if (mapped_virtual_address==NULL) { fprintf(stderr, "bm_jpu_dma_buffer_map failed\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } char* filename = (char*)"test_dbg.dec"; @@ -711,18 +756,23 @@ bm_status_t bmcv_image_jpeg_dec(bm_handle_t handle, int image_num, bm_image* dst, int bs_in_device) { + if (dst != NULL) { + if (dst->image_private != NULL) { + bm_handle_check_1(handle, dst[0]); + } + } if (handle == NULL) { bmlib_log("JPEG-DEC", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } if (in_size == NULL || p_jpeg_data == NULL) { bmlib_log("JPEG-DEC", BMLIB_LOG_ERROR, "The pointer of data and size should not be NULL!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } for (int i = 0; i < image_num; i++) { if (p_jpeg_data[i] == NULL) { bmlib_log("JPEG-DEC", BMLIB_LOG_ERROR, "The pointer of data should not be NULL!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } @@ -734,13 +784,12 @@ bm_status_t bmcv_image_jpeg_dec(bm_handle_t handle, if(bs_in_device == 1 ) { bm_device_mem_t *mem = (bm_device_mem_t *)p_jpeg_data[i]; - uint8_t* tmp_data = (uint8_t *)malloc(*in_size + 64); + uint8_t* tmp_data = (uint8_t *)malloc(in_size[i]); if(bm_memcpy_d2s(handle, tmp_data, *mem) != BM_SUCCESS) { printf("d2s failed!!! p_jpeg_data_addr=%p\n",p_jpeg_data[i]); } - bm_free_device(handle, *((bm_device_mem_t *)p_jpeg_data[i])); free(p_jpeg_data[i]); p_jpeg_data[i] = tmp_data; @@ -767,10 +816,10 @@ bm_status_t bmcv_image_jpeg_dec(bm_handle_t handle, if (info.framebuffer) bm_jpu_jpeg_dec_frame_finished(jpeg_dec[i]->decoder_, info.framebuffer); else - return BM_ERR_FAILURE; + return BM_ERR_DATA; bmcv_jpeg_decoder_destroy(jpeg_dec[i]); } - return BM_ERR_FAILURE; + return ret; } } diff --git a/bmvid/bmcv/src/bmcv_api_jpeg_enc.cpp b/bmvid/bmcv/src/bmcv_api_jpeg_enc.cpp index 3d4c4fe..7069820 100644 --- a/bmvid/bmcv/src/bmcv_api_jpeg_enc.cpp +++ b/bmvid/bmcv/src/bmcv_api_jpeg_enc.cpp @@ -4,13 +4,17 @@ #ifndef USING_CMODEL #include "bmcv_api_ext.h" -#include "bmjpuapi.h" #include #include #include -#include "bmjpuapi_jpeg.h" #include "bmcv_internal.h" +#ifdef __linux__ +#include +#else +#include +#endif + typedef struct bmcv_jpeg_encoder_struct { BmJpuJPEGEncoder *encoder_; } bmcv_jpeg_encoder_t; @@ -27,28 +31,30 @@ static void* acquire_output_buffer(void *context, size_t size, void **acquired_h { bmcv_jpeg_buffer_t *p_encoded = (bmcv_jpeg_buffer_t *)context; - if (context == NULL || p_encoded->buffer == NULL) { -/* + if (p_encoded == NULL) { bmlib_log("JPEG-ENC", BMLIB_LOG_WARNING, "User NOT malloc memory for output bitstream, it will malloc memory automatically. "\ "But user need remember to free it.\n"); -*/ void *mem; mem = malloc(size); *acquired_handle = mem; return mem; + } else if (p_encoded->bs_in_device == 1) { + bmlib_log("JPEG-ENC", BMLIB_LOG_INFO, "acquire_output_buffer: bs_in_device = 1\n"); + bm_handle_t handle; + bm_dev_request(&handle, p_encoded->soc_idx); + bm_device_mem_t *bs_device_mem = (bm_device_mem_t *)malloc(sizeof(bm_device_mem_t)); + //only vpp + bm_malloc_device_byte_heap(handle, bs_device_mem, 1, size); + *acquired_handle = bs_device_mem; + bm_dev_free(handle); + return (void *)bs_device_mem; } else { - - if(p_encoded->bs_in_device == 1) - { - bm_handle_t handle; - bm_dev_request(&handle, p_encoded->soc_idx); - bm_device_mem_t *bs_device_mem = (bm_device_mem_t *)malloc(sizeof(bm_device_mem_t)); - //only vpp - bm_malloc_device_byte_heap(handle, bs_device_mem, 1,size); - *acquired_handle = bs_device_mem; - bm_dev_free(handle); - return (void *)bs_device_mem; + if (p_encoded->buffer == NULL) { + bmlib_log("JPEG-ENC", BMLIB_LOG_WARNING, + "encoded output buffer is null, allocate it.\n"); + p_encoded->buffer = malloc(size); + p_encoded->buffer_size = size; } if (p_encoded->buffer_size < size){ @@ -74,7 +80,7 @@ static bm_status_t bmcv_jpeg_enc_check(bm_image *src, int image_num, int quality bmlib_log("JPEG-ENC", BMLIB_LOG_ERROR, "bm_image is nullptr %s: %s: %d\n", filename(filename(__FILE__)), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if(image_num < 1 || image_num > 4){ @@ -88,7 +94,7 @@ static bm_status_t bmcv_jpeg_enc_check(bm_image *src, int image_num, int quality bmlib_log("JPEG-ENC", BMLIB_LOG_ERROR, "quality_factor(%d) should be between 0 and 100 %s: %s: %d\n", quality_factor, filename(filename(__FILE__)), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } for (int i = 0; i < image_num; i++) { @@ -96,14 +102,14 @@ static bm_status_t bmcv_jpeg_enc_check(bm_image *src, int image_num, int quality bmlib_log("JPEG-ENC", BMLIB_LOG_ERROR, "input image is not attached data %s: %s: %d\n", filename(filename(__FILE__)), __func__, __LINE__); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if (src[i].data_type != DATA_TYPE_EXT_1N_BYTE) { bmlib_log("JPEG-ENC", BMLIB_LOG_ERROR, "data type only support 1N_BYTE %s: %s: %d\n", filename(filename(__FILE__)), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } // only support yuv420p, nv12, nv21, yuv422p, nv16, nv61, yuv444p, gray. @@ -240,16 +246,23 @@ static bm_status_t bmcv_jpeg_enc_check(bm_image *src, int image_num, int quality printf(" ---> FORMAT_BAYER\n\n"); break; } + case FORMAT_BAYER_RG8: { + printf(" ---> FORMAT_BAYER_RG8\n\n"); + break; + } + default: + printf(" ---> UNKNOWN FORMAT\n\n"); + break; } - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } // width and height should >= 16 if ((src[i].width < 16) || src[i].height < 16) { bmlib_log("JPEG-ENC", BMLIB_LOG_ERROR, "width and height should not less than 16\n", filename(filename(__FILE__)), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } } return BM_SUCCESS; @@ -333,7 +346,7 @@ static int bmcv_jpeg_encoder_create(bmcv_jpeg_encoder_t** p_jpeg_encoder, break; } int buffer_size = src->height * src->width * factor; - buffer_size = buffer_size > 16368 * 1024 ? 16368 * 1024 : buffer_size; + // buffer_size = buffer_size > 16368 * 1024 ? 16368 * 1024 : buffer_size; ret = bm_jpu_enc_load(devid); if (BM_JPU_ENC_RETURN_CODE_OK != ret) { @@ -341,7 +354,7 @@ static int bmcv_jpeg_encoder_create(bmcv_jpeg_encoder_t** p_jpeg_encoder, goto End; } - ret = bm_jpu_jpeg_enc_open(&enc->encoder_, buffer_size, devid); + ret = bm_jpu_jpeg_enc_open(&enc->encoder_, 0, buffer_size, devid); if (BM_JPU_ENC_RETURN_CODE_OK != ret) { bmlib_log("JPEG-ENC", BMLIB_LOG_ERROR, "open jpeg encoder failed!\r\n"); goto End; @@ -383,13 +396,11 @@ bm_status_t bmcv_jpeg_enc_one_image(bmcv_jpeg_encoder_t *jpeg_enc, bm_image_get_format_info(src, &info); int width = src->width; int height = src->height; - BmJpuColorFormat out_pixformat; + BmJpuImageFormat image_format; int y_size = info.stride[0] * height; int c_size = 0; int total_size = 0; - int interleave = 0; - int packed_format = 0; int src_image_format = src->image_format; bmcv_jpeg_buffer_t encoded_buffer; @@ -398,62 +409,55 @@ bm_status_t bmcv_jpeg_enc_one_image(bmcv_jpeg_encoder_t *jpeg_enc, encoded_buffer.buffer_size = *out_size; encoded_buffer.opaque = NULL; encoded_buffer.soc_idx = 0; + encoded_buffer.bs_in_device = bs_in_device; if(bs_in_device == 1){ encoded_buffer.soc_idx = bm_get_devid(bm_image_get_handle(src)); - encoded_buffer.bs_in_device = bs_in_device; } switch (src_image_format) { case FORMAT_YUV420P: - out_pixformat = BM_JPU_COLOR_FORMAT_YUV420; + image_format = BM_JPU_IMAGE_FORMAT_YUV420P; c_size = info.stride[1] * ((height + 1) / 2); total_size = y_size + c_size * 2; - interleave = 0; break; case FORMAT_NV12: - out_pixformat = BM_JPU_COLOR_FORMAT_YUV420; + image_format = BM_JPU_IMAGE_FORMAT_NV12; c_size = info.stride[1] * ((height + 1) / 2); total_size = y_size + c_size; - interleave = 1; break; case FORMAT_NV21: - out_pixformat = BM_JPU_COLOR_FORMAT_YUV420; + image_format = BM_JPU_IMAGE_FORMAT_NV21; c_size = info.stride[1] * ((height + 1) / 2); total_size = y_size + c_size; - interleave = 2; break; case FORMAT_YUV422P: - out_pixformat = BM_JPU_COLOR_FORMAT_YUV422_HORIZONTAL; + image_format = BM_JPU_IMAGE_FORMAT_YUV422P; c_size = info.stride[1] * height; total_size = y_size + c_size * 2; - interleave = 0; break; case FORMAT_NV16: - out_pixformat = BM_JPU_COLOR_FORMAT_YUV422_HORIZONTAL; + image_format = BM_JPU_IMAGE_FORMAT_NV16; c_size = info.stride[1] * height; total_size = y_size + c_size; - interleave = 1; break; case FORMAT_NV61: - out_pixformat = BM_JPU_COLOR_FORMAT_YUV422_HORIZONTAL; + image_format = BM_JPU_IMAGE_FORMAT_NV61; c_size = info.stride[1] * height; total_size = y_size + c_size; - interleave = 2; break; case FORMAT_YUV444P: - out_pixformat = BM_JPU_COLOR_FORMAT_YUV444; + image_format = BM_JPU_IMAGE_FORMAT_YUV444P; c_size = info.stride[1] * height; total_size = y_size + c_size * 2; - interleave = 0; break; case FORMAT_GRAY: - out_pixformat = BM_JPU_COLOR_FORMAT_YUV400; + image_format = BM_JPU_IMAGE_FORMAT_GRAY; c_size = 0; total_size = y_size; break; default: - out_pixformat = BM_JPU_COLOR_FORMAT_YUV400; + image_format = BM_JPU_IMAGE_FORMAT_GRAY; c_size = 0; total_size = y_size; break; @@ -468,17 +472,15 @@ bm_status_t bmcv_jpeg_enc_one_image(bmcv_jpeg_encoder_t *jpeg_enc, for (int i = 0; i < info.plane_nb; i++) { dev_addr[i] = info.plane_data[i].u.device.device_addr; } - - wrapped_mem.u.device.device_addr = dev_addr[0]; - wrapped_mem.u.device.dmabuf_fd = 1; - wrapped_mem.size = total_size; + + wrapped_mem = bm_mem_from_device(dev_addr[0], total_size); framebuffer.y_stride = info.stride[0]; framebuffer.cbcr_stride = info.stride[1]; framebuffer.y_offset = 0; framebuffer.cb_offset = dev_addr[1] - dev_addr[0]; framebuffer.cr_offset = dev_addr[2] - dev_addr[0]; - + framebuffer.dma_buffer = &wrapped_mem; /* Set up the encoding parameters */ @@ -486,9 +488,7 @@ bm_status_t bmcv_jpeg_enc_one_image(bmcv_jpeg_encoder_t *jpeg_enc, enc_params.frame_width = width; enc_params.frame_height = height; enc_params.quality_factor = quality_factor; - enc_params.color_format = out_pixformat; - enc_params.packed_format = packed_format; - enc_params.chroma_interleave = interleave; + enc_params.image_format = image_format; enc_params.output_buffer_context = (void*)&encoded_buffer; enc_params.acquire_output_buffer = acquire_output_buffer; @@ -498,10 +498,11 @@ bm_status_t bmcv_jpeg_enc_one_image(bmcv_jpeg_encoder_t *jpeg_enc, /* Do the actual encoding */ ret = bm_jpu_jpeg_enc_encode(jpeg_enc->encoder_, - &framebuffer, - &enc_params, - p_jpeg_data, - out_size); + &framebuffer, + &enc_params, + p_jpeg_data, + out_size); + if (ret != BM_JPU_ENC_RETURN_CODE_OK) { bmlib_log("JPEG-ENC", BMLIB_LOG_ERROR, "jpeg encode failed!\r\n"); return BM_ERR_FAILURE; @@ -517,13 +518,16 @@ bm_status_t bmcv_image_jpeg_enc(bm_handle_t handle, size_t* out_size, int quality_factor, int bs_in_device) { - if(BM_SUCCESS != bmcv_jpeg_enc_check(src, image_num, quality_factor)) { - BMCV_ERR_LOG("bmcv_jpeg_enc_check error\r\n"); - return BM_ERR_FAILURE; + + bm_status_t ret; + ret = bmcv_jpeg_enc_check(src, image_num, quality_factor); + if(BM_SUCCESS != ret) { + return ret; } + bm_handle_check_1(handle, src[0]); if (handle == NULL) { bmlib_log("JPEG-ENC", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } int devid = bm_get_devid(handle); bmcv_jpeg_encoder_t *jpeg_enc = NULL; @@ -556,7 +560,7 @@ bm_status_t bmcv_image_jpeg_enc(bm_handle_t handle, free(out_size); } bmcv_jpeg_encoder_destroy(jpeg_enc); - return BM_ERR_FAILURE; + return ret; } ret = bmcv_width_align(handle, src[i], tmp); if (ret != BM_SUCCESS) { @@ -568,7 +572,7 @@ bm_status_t bmcv_image_jpeg_enc(bm_handle_t handle, } bm_image_destroy(tmp); bmcv_jpeg_encoder_destroy(jpeg_enc); - return BM_ERR_FAILURE; + return ret; } src_align = &tmp; } @@ -604,7 +608,7 @@ bm_status_t bmcv_image_jpeg_enc(bm_handle_t handle, free(out_size); } bmcv_jpeg_encoder_destroy(jpeg_enc); - return BM_ERR_FAILURE; + return ret; } } bmcv_jpeg_encoder_destroy(jpeg_enc); diff --git a/bmvid/bmcv/src/bmcv_api_laplacian.cpp b/bmvid/bmcv/src/bmcv_api_laplacian.cpp index 3409dae..1c8b884 100644 --- a/bmvid/bmcv/src/bmcv_api_laplacian.cpp +++ b/bmvid/bmcv/src/bmcv_api_laplacian.cpp @@ -2,6 +2,7 @@ #include "bmcv_internal.h" #include "bmcv_common_bm1684.h" #include +#include #include #include @@ -218,7 +219,7 @@ bm_status_t bmcv_image_laplacian( unsigned int chipid = 0x1686; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_2(handle, input, output); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; diff --git a/bmvid/bmcv/src/bmcv_api_lkpyramid.cpp b/bmvid/bmcv/src/bmcv_api_lkpyramid.cpp index 21ded96..47ac6d0 100644 --- a/bmvid/bmcv/src/bmcv_api_lkpyramid.cpp +++ b/bmvid/bmcv/src/bmcv_api_lkpyramid.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #ifdef __linux__ @@ -22,24 +23,24 @@ static bm_status_t bmcv_lkpyramid_check( bm_image next) { if (handle == NULL) { bmlib_log("LKPYRAMID", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DEVNOTREADY; } if (prev.width != next.width || prev.height != next.height) { bmlib_log("LKPYRAMID", BMLIB_LOG_ERROR, "previous image and next image size should be same!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (prev.width + 5 - 1 >= 2048) { bmlib_log("LKPYRAMID", BMLIB_LOG_ERROR, "image width is too large!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (prev.image_format != FORMAT_GRAY || next.image_format != FORMAT_GRAY) { bmlib_log("LKPYRAMID", BMLIB_LOG_ERROR, "Image format only support GRAY\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (prev.data_type != DATA_TYPE_EXT_1N_BYTE || next.data_type != DATA_TYPE_EXT_1N_BYTE) { bmlib_log("LKPYRAMID", BMLIB_LOG_ERROR, "Not supported image data type\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } return BM_SUCCESS; } @@ -263,7 +264,7 @@ bm_status_t bmcv_image_lkpyramid_create_plan( P->maxLevel = maxLevel; if (maxLevel > 5) { bmlib_log("LKPYRAMID", BMLIB_LOG_ERROR, "Not supported maxLevel greater 5\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } // in order to use same size kernel, gradient kernel pad 0 int ksize = 5 * 5 * 3; // pyramid-down + x-grad + y-grad @@ -528,7 +529,7 @@ bm_status_t bmcv_image_lkpyramid_execute( unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_2(handle, prevImg, nextImg); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -541,12 +542,12 @@ bm_status_t bmcv_image_lkpyramid_execute( break; case BM1684X: - printf("bm1684x not support\n"); - ret = BM_NOT_SUPPORTED; + printf("current card not support\n"); + ret = BM_ERR_NOFEATURE; break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } diff --git a/bmvid/bmcv/src/bmcv_api_morph.cpp b/bmvid/bmcv/src/bmcv_api_morph.cpp index d5ded7f..0997f32 100644 --- a/bmvid/bmcv/src/bmcv_api_morph.cpp +++ b/bmvid/bmcv/src/bmcv_api_morph.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include using namespace std; @@ -17,12 +18,12 @@ static bm_status_t bmcv_morph_check( bm_image output) { if (handle == NULL) { bmlib_log("MORPH", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DEVNOTREADY; } if (input.height != output.height || input.width != output.width) { bmlib_log("MORPH", BMLIB_LOG_ERROR, "input and output image size should be same\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (input.image_format != FORMAT_BGR_PACKED && input.image_format != FORMAT_RGB_PACKED && @@ -30,16 +31,16 @@ static bm_status_t bmcv_morph_check( input.image_format != FORMAT_RGB_PLANAR && input.image_format != FORMAT_GRAY) { bmlib_log("MORPH", BMLIB_LOG_ERROR, "Not supported input image format\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (input.image_format != output.image_format) { bmlib_log("MORPH", BMLIB_LOG_ERROR, "input and output image format should be same\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (input.data_type != DATA_TYPE_EXT_1N_BYTE || output.data_type != DATA_TYPE_EXT_1N_BYTE) { bmlib_log("MORPH", BMLIB_LOG_ERROR, "Not supported image data type\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } return BM_SUCCESS; } @@ -308,7 +309,7 @@ bm_status_t bmcv_image_morph_bm1684( if (BM_SUCCESS != ret) { bmlib_log("MORPH", BMLIB_LOG_ERROR, "kernel d2s failed!\r\n"); delete [] kernel; - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } int not_zero_cnt = 0; for (int i = 0; i < kw * kh; i++) { @@ -456,12 +457,12 @@ bm_status_t bmcv_image_morph( break; case BM1684X: - printf("bm1684x not support\n"); - ret = BM_NOT_SUPPORTED; + printf("current card not support\n"); + ret = BM_ERR_NOFEATURE; break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } @@ -476,6 +477,7 @@ bm_status_t bmcv_image_erode( int kh, bm_device_mem_t kmem ) { + bm_handle_check_2(handle, src, dst); return bmcv_image_morph(handle, src, dst, kw, kh, kmem, 0); } @@ -487,5 +489,6 @@ bm_status_t bmcv_image_dilate( int kh, bm_device_mem_t kmem ) { + bm_handle_check_2(handle, src, dst); return bmcv_image_morph(handle, src, dst, kw, kh, kmem, 1); } \ No newline at end of file diff --git a/bmvid/bmcv/src/bmcv_api_put_text.cpp b/bmvid/bmcv/src/bmcv_api_put_text.cpp index 3c6557a..73fe11e 100644 --- a/bmvid/bmcv/src/bmcv_api_put_text.cpp +++ b/bmvid/bmcv/src/bmcv_api_put_text.cpp @@ -7,6 +7,7 @@ #include #endif #include +#include #include #include "bmcv_api_ext.h" #include "bmcv_internal.h" @@ -546,7 +547,7 @@ static bm_status_t bmcv_put_text_check( int thickness) { if (handle == NULL) { bmlib_log("DRAW_LINE", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DEVNOTREADY; } if (thickness <= 0) { bmlib_log("DRAW_LINE", BMLIB_LOG_ERROR, "thickness should greater than 0!\r\n"); @@ -554,7 +555,7 @@ static bm_status_t bmcv_put_text_check( } if (!IS_CS_YUV(image.image_format) && image.image_format != FORMAT_GRAY) { bmlib_log("DRAW_LINE", BMLIB_LOG_ERROR, "image format not supported %d !\r\n", image.image_format); - return BM_ERR_PARAM; + return BM_ERR_DATA; } return BM_SUCCESS; } @@ -567,6 +568,8 @@ bm_status_t bmcv_image_put_text( bmcv_color_t color, float fontScale, int thickness) { + + bm_handle_check_1(handle, image); if (BM_SUCCESS != bmcv_put_text_check(handle, image, thickness)) { return BM_ERR_FAILURE; } diff --git a/bmvid/bmcv/src/bmcv_api_pyramid.cpp b/bmvid/bmcv/src/bmcv_api_pyramid.cpp index c604aea..ecdd4d7 100644 --- a/bmvid/bmcv/src/bmcv_api_pyramid.cpp +++ b/bmvid/bmcv/src/bmcv_api_pyramid.cpp @@ -4,6 +4,7 @@ #include #include #include +#include static bm_status_t bmcv_pyramid_check( @@ -48,6 +49,7 @@ bm_status_t bmcv_image_pyramid_down( bm_image input, bm_image output) { bm_status_t ret = BM_SUCCESS; + bm_handle_check_2(handle, input, output); ret = bmcv_pyramid_check(handle, input, output, true); if (BM_SUCCESS != ret) { return ret; diff --git a/bmvid/bmcv/src/bmcv_api_quantify.cpp b/bmvid/bmcv/src/bmcv_api_quantify.cpp new file mode 100644 index 0000000..a6c0f67 --- /dev/null +++ b/bmvid/bmcv/src/bmcv_api_quantify.cpp @@ -0,0 +1,113 @@ +#include "bmcv_api_ext.h" +#include "bmcv_internal.h" +#include "bmcv_common_bm1684.h" +#include + +static bm_status_t bmcv_quantify_check( + bm_handle_t handle, + bm_image input, + bm_image output) { + if (handle == NULL) { + bmlib_log("QUANTIFY", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); + return BM_ERR_PARAM; + } + bm_image_format_ext src_format = input.image_format; + bm_image_data_format_ext src_type = input.data_type; + bm_image_format_ext dst_format = output.image_format; + bm_image_data_format_ext dst_type = output.data_type; + int src_h = input.height; + int src_w = input.width; + int dst_h = output.height; + int dst_w = output.width; + if (src_format != dst_format) { + bmlib_log("QUANTIFY", BMLIB_LOG_ERROR, "input and output image format is NOT same"); + return BM_ERR_PARAM; + } + if (src_format != FORMAT_RGB_PLANAR && + src_format != FORMAT_BGR_PLANAR) { + bmlib_log("QUANTIFY", BMLIB_LOG_ERROR, "Not supported image format"); + return BM_NOT_SUPPORTED; + } + if (src_type != DATA_TYPE_EXT_FLOAT32 || + dst_type != DATA_TYPE_EXT_1N_BYTE) { + bmlib_log("QUANTIFY", BMLIB_LOG_ERROR, "Not supported image data type"); + return BM_NOT_SUPPORTED; + } + if (src_h != dst_h || src_w != dst_w) { + bmlib_log("QUANTIFY", BMLIB_LOG_ERROR, "inputs and output image size should be same"); + return BM_ERR_PARAM; + } + return BM_SUCCESS; +} + +bm_status_t bmcv_image_quantify( + bm_handle_t handle, + bm_image input, + bm_image output) { + bm_status_t ret = BM_SUCCESS; + + bm_handle_check_2(handle, input, output); + ret = bmcv_quantify_check(handle, input, output); + if (BM_SUCCESS != ret) { + return ret; + } + bool output_alloc_flag = false; + if (!bm_image_is_attached(output)) { + ret = bm_image_alloc_dev_mem(output, BMCV_HEAP_ANY); + if (ret != BM_SUCCESS) { + return ret; + } + output_alloc_flag = true; + } + // construct and send api + int channel = bm_image_get_plane_num(input); + int input_str[3], output_str[3]; + bm_image_get_stride(input, input_str); + bm_image_get_stride(output, output_str); + + bm_device_mem_t input_mem[3]; + bm_image_get_device_mem(input, input_mem); + bm_device_mem_t output_mem[3]; + bm_image_get_device_mem(output, output_mem); + + bm_api_cv_quantify_t api; + api.channel = channel; + for (int i = 0; i < channel; i++) { + api.input_addr[i] = bm_mem_get_device_addr(input_mem[i]); + api.output_addr[i] = bm_mem_get_device_addr(output_mem[i]); + api.width[i] = input.image_private->memory_layout[i].W; + api.height[i] = input.image_private->memory_layout[i].H; + api.input_str[i] = input_str[i]; + api.output_str[i] = output_str[i]; + } + // rgb-planar format's channel is 1 + if (input.image_format == FORMAT_RGB_PLANAR || + input.image_format == FORMAT_BGR_PLANAR) { + api.height[0] *= 3; + } + + unsigned int chipid; + bm_get_chipid(handle, &chipid); + + switch (chipid) + { + case BM1684X: + ret = bm_tpu_kernel_launch(handle, "cv_quantify", (u8 *)&api, sizeof(api)); + if(BM_SUCCESS != ret){ + bmlib_log("QUANTIFY", BMLIB_LOG_ERROR, "quantify sync api error\n"); + if (output_alloc_flag) { + for (int i = 0; i < channel; i++) { + bm_free_device(handle, output_mem[i]); + } + } + return ret; + } + break; + + default: + printf("ChipID is NOT supported\n"); + break; + } + + return ret; +} diff --git a/bmvid/bmcv/src/bmcv_api_resize.cpp b/bmvid/bmcv/src/bmcv_api_resize.cpp index 91734b2..6150eca 100644 --- a/bmvid/bmcv/src/bmcv_api_resize.cpp +++ b/bmvid/bmcv/src/bmcv_api_resize.cpp @@ -192,7 +192,7 @@ static bm_status_t bmcv_resize_check( if (handle == NULL) { BMCV_ERR_LOG("[RESIZE] Can not get handle!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } bm_image_format_ext src_format = input[0].image_format; bm_image_format_ext dst_format = output[0].image_format; @@ -201,7 +201,7 @@ static bm_status_t bmcv_resize_check( if (image_num < 0 || image_num > MAX_INPUT_NUM) { BMCV_ERR_LOG("[RESIZE]: image num not support:%d\n", image_num); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } int out_num = 0; for (int i = 0; i < image_num; i++) { @@ -212,22 +212,22 @@ static bm_status_t bmcv_resize_check( out_num = out_is_4N ? (out_num + 3) / 4 : out_num; if (out_num <= 0) { BMCV_ERR_LOG("[RESIZE]: output image number must bigger than 0\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } for (int i = 0; i < image_num; i++) { if (!bm_image_is_attached(input[i])) { BMCV_ERR_LOG("[RESIZE]: input[%d] is not attached device memory\n", i); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if ((input[i].data_type != src_type) || (input[i].image_format != src_format)) { BMCV_ERR_LOG("[RESIZE]: all src image_format and data_type should be same\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } } for (int i = 1; i < out_num; i++) { if ((output[i].data_type != dst_type) || (output[i].image_format != dst_format)) { BMCV_ERR_LOG("[RESIZE]: all dst image_format and data_type should be same\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } } #if TPU_ONLY @@ -241,20 +241,20 @@ static bm_status_t bmcv_resize_check( src_format != FORMAT_RGB_PACKED && src_format != FORMAT_BGR_PACKED && src_format != FORMAT_GRAY) { BMCV_ERR_LOG("[RESIZE]: src format not support:%d\n", src_format); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (dst_format != FORMAT_RGB_PLANAR && dst_format != FORMAT_BGR_PLANAR && dst_format != FORMAT_RGB_PACKED && dst_format != FORMAT_BGR_PACKED && dst_format != FORMAT_GRAY) { BMCV_ERR_LOG("[RESIZE]: dst format not support:%d\n", dst_format); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (((input[0].data_type == DATA_TYPE_EXT_1N_BYTE) && (output[0].data_type == DATA_TYPE_EXT_FLOAT32)) || ((output[0].data_type == DATA_TYPE_EXT_1N_BYTE) && (input[0].data_type == DATA_TYPE_EXT_FLOAT32))) { BMCV_ERR_LOG("[RESIZE]: image data type not support\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if ((input[0].data_type == DATA_TYPE_EXT_FP16) || @@ -263,7 +263,7 @@ static bm_status_t bmcv_resize_check( (output[0].data_type == DATA_TYPE_EXT_BF16)){ BMCV_ERR_LOG("data type not support\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } return BM_SUCCESS; @@ -300,7 +300,7 @@ bm_status_t bmcv_resize_internal(bm_handle_t handle, bm_malloc_device_byte(handle, &img_attr_dev, resize_img_attr_size)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } u64 resize_attr_start_addr = bm_mem_get_device_addr(img_attr_dev); u64 temp_addr = resize_attr_start_addr; @@ -317,7 +317,7 @@ bm_status_t bmcv_resize_internal(bm_handle_t handle, BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); bm_free_device(handle, img_attr_dev); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } if (i != (image_num - 1)) { temp_addr = temp_addr + temp_size; @@ -331,7 +331,7 @@ bm_status_t bmcv_resize_internal(bm_handle_t handle, BMCV_ERR_LOG("bm_image_tensor_alloc_dev_mem error\r\n"); bm_free_device(handle, img_attr_dev); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } bm_device_mem_t input_image_dev, output_image_dev; @@ -349,7 +349,7 @@ bm_status_t bmcv_resize_internal(bm_handle_t handle, bm_free_device(handle, dmem); } - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } if (BM_SUCCESS != bm_memcpy_s2d(handle, roi_num_array_dev, roi_num_array)) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); @@ -388,13 +388,13 @@ bm_status_t bmcv_resize_internal(bm_handle_t handle, BMCV_ERR_LOG("resize send api error\r\n"); bm_free_device(handle, img_attr_dev); bm_free_device(handle, roi_num_array_dev); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } if (BM_SUCCESS != bm_sync_api(handle)) { BMCV_ERR_LOG("resize sync api error\r\n"); bm_free_device(handle, img_attr_dev); bm_free_device(handle, roi_num_array_dev); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } bm_free_device(handle, img_attr_dev); bm_free_device(handle, roi_num_array_dev); @@ -990,7 +990,7 @@ bm_status_t bmcv_query_idx_in_array(int query_idx, } bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "find nothing \r\n"); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } bm_status_t try_image_align(bm_handle_t handle, @@ -998,6 +998,7 @@ bm_status_t try_image_align(bm_handle_t handle, bm_image * src_images, bm_image * converted_images, int * image_align_flag) { + bm_status_t ret = BM_SUCCESS; #ifdef __linux__ bool image_alloc_flag[image_num]; #else @@ -1022,7 +1023,7 @@ bm_status_t try_image_align(bm_handle_t handle, if (bm_vpp_query_limitation(src_images[i].image_format, \ FORMAT_RGB_PACKED, limit) != BM_SUCCESS){ BMCV_ERR_LOG("format not support!\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } if (image_stride[idx]%limit.src_stride_align[idx] != 0){ @@ -1032,13 +1033,14 @@ bm_status_t try_image_align(bm_handle_t handle, } } if (1 == image_align_flag[i]) { - if (BM_SUCCESS != bm_image_create(handle, - src_images[i].height, - src_images[i].width, - src_images[i].image_format, - src_images[i].data_type, - &converted_images[i], - image_stride)) { + ret = bm_image_create(handle, + src_images[i].height, + src_images[i].width, + src_images[i].image_format, + src_images[i].data_type, + &converted_images[i], + image_stride); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("failed to create internal image\n"); for (int free_idx = 0; free_idx < i; free_idx++) { bm_image_destroy(converted_images[free_idx]); @@ -1050,7 +1052,7 @@ bm_status_t try_image_align(bm_handle_t handle, } } - return BM_ERR_FAILURE; + return ret; } if (bm_image_is_attached(src_images[i])) { // keep same heap location with before @@ -1076,7 +1078,7 @@ bm_status_t try_image_align(bm_handle_t handle, } } - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } image_alloc_flag[i] = true; } else { @@ -1084,23 +1086,24 @@ bm_status_t try_image_align(bm_handle_t handle, for (int free_idx = 0; free_idx <= i; free_idx++) { bm_image_destroy(converted_images[free_idx]); } - return BM_ERR_FAILURE; + return BM_ERR_DATA; } // bmcv_width_align(handle, src_images[i], converted_images[i]); } else { - if (BM_SUCCESS != bm_image_create(handle, - src_images[i].height, - src_images[i].width, - src_images[i].image_format, - src_images[i].data_type, - &converted_images[i], - image_stride)) { + ret = bm_image_create(handle, + src_images[i].height, + src_images[i].width, + src_images[i].image_format, + src_images[i].data_type, + &converted_images[i], + image_stride); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("failed to create internal image\n"); for (int free_idx = 0; free_idx < i; free_idx++) { bm_image_destroy(converted_images[free_idx]); } - return BM_ERR_FAILURE; + return ret; } if (bm_image_is_attached(src_images[i])) { bm_device_mem_t dmem[3]; @@ -1112,7 +1115,7 @@ bm_status_t try_image_align(bm_handle_t handle, bm_image_destroy(converted_images[free_idx]); } - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } } @@ -1125,9 +1128,14 @@ bm_status_t do_image_align(bm_handle_t handle, bm_image * src_images, bm_image * converted_images, int * image_align_flag) { + bm_status_t ret = BM_ERR_FAILURE; for (int i = 0; i < image_num; i++) { if (1 == image_align_flag[i]) { - bmcv_width_align(handle, src_images[i], converted_images[i]); + ret = bmcv_width_align(handle, src_images[i], converted_images[i]); + if (BM_SUCCESS != ret) { + BMCV_ERR_LOG("bmcv_width_align error\n"); + return ret; + } } } @@ -1152,7 +1160,7 @@ bm_status_t image_restore_align(bm_handle_t handle, (!bm_image_is_attached(restored_images[i]))) { BMCV_ERR_LOG("image should be attached memory firstly\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } if (1 == image_align_flag[i]) { bmcv_width_align(handle, aligned_images[i], restored_images[i]); @@ -1218,7 +1226,7 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, } } - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } output_mem_alloc_flag[output_idx] = true; } @@ -1250,8 +1258,8 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, #endif bm_image tmp_input[32], tmp_output[32]; int in_align_flag[32] = {0}, out_align_flag[32] = {0}; - if (BM_SUCCESS != - try_image_align(handle, input_num, input, tmp_input, in_align_flag)) { + ret = try_image_align(handle, input_num, input, tmp_input, in_align_flag); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("try_image_align error\n"); for (int free_idx = 0; free_idx < output_num; free_idx++) { if (output_mem_alloc_flag[free_idx]) { @@ -1261,11 +1269,10 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, } } - return BM_ERR_FAILURE; + return ret; } - if (BM_SUCCESS != - try_image_align( - handle, output_num, output, tmp_output, out_align_flag)) { + ret = try_image_align(handle, output_num, output, tmp_output, out_align_flag); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("try_image_align error\n"); for (int free_idx = 0; free_idx < output_num; free_idx++) { if (output_mem_alloc_flag[free_idx]) { @@ -1275,12 +1282,12 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, } } - return BM_ERR_FAILURE; + return ret; } if(!if_use_vpp(tmp_input, tmp_output, input_num, output_num, resize_attr)&&(BMCV_INTER_NEAREST != resize_attr[0].interpolation)){ BMCV_ERR_LOG("[RESIZE]: vpp and tpu not support\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } // if input is packed format, convert it to planar @@ -1367,8 +1374,9 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, } delete [] output_planar; } - if (BM_SUCCESS != image_restore_align( - handle, input_num, tmp_input, NULL, in_align_flag)) { + ret = image_restore_align( + handle, input_num, tmp_input, NULL, in_align_flag); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("image_restore_align error\n"); for (int free_idx = 0; free_idx < output_num; free_idx++) { if (output_mem_alloc_flag[free_idx]) { @@ -1383,11 +1391,11 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, } delete[] input_planar; } - return BM_ERR_FAILURE; + return ret; } - if (BM_SUCCESS != - image_restore_align( - handle, output_num, tmp_output, NULL, out_align_flag)) { + ret = image_restore_align( + handle, output_num, tmp_output, NULL, out_align_flag); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("image_restore_align error\n"); for (int free_idx = 0; free_idx < output_num; free_idx++) { if (output_mem_alloc_flag[free_idx]) { @@ -1403,7 +1411,7 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, delete[] input_planar; } - return BM_ERR_FAILURE; + return ret; } #else int resize_alg = VPP_SCALE_BILINEAR; @@ -1432,9 +1440,9 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, delete[] output_planar; } output_idx = 0; - if (BM_SUCCESS != - do_image_align( - handle, input_num, input_planar, tmp_input, in_align_flag)) { + ret = do_image_align( + handle, input_num, input_planar, tmp_input, in_align_flag); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("do_image_align error\n"); for (int free_idx = 0; free_idx < output_num; free_idx++) { if (output_mem_alloc_flag[free_idx]) { @@ -1450,7 +1458,7 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, delete[] input_planar; } - return BM_ERR_FAILURE; + return ret; } for (int input_idx = 0; input_idx < input_num; input_idx++) { for (int i = 0; i < resize_attr[input_idx].roi_num; i++) { @@ -1491,9 +1499,9 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, return BM_ERR_FAILURE; } - if (BM_SUCCESS != - image_restore_align( - handle, input_num, tmp_input, NULL, in_align_flag)) { + ret = image_restore_align( + handle, input_num, tmp_input, NULL, in_align_flag); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("image_restore_align error\n"); for (int free_idx = 0; free_idx < output_num; free_idx++) { if (output_mem_alloc_flag[free_idx]) { @@ -1509,11 +1517,11 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, delete[] input_planar; } - return BM_ERR_FAILURE; + return ret; } - if (BM_SUCCESS != - image_restore_align( - handle, output_num, tmp_output, output, out_align_flag)) { + ret = image_restore_align( + handle, output_num, tmp_output, output, out_align_flag); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("image_restore_align error\n"); for (int free_idx = 0; free_idx < output_num; free_idx++) { if (output_mem_alloc_flag[free_idx]) { @@ -1529,7 +1537,7 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, delete[] input_planar; } - return BM_ERR_FAILURE; + return ret; } } else { int in_concat_status = 0; @@ -1560,18 +1568,18 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, } else { if (if_4N_to_1N != 0) { BMCV_ERR_LOG("[RESIZE]: mem must be continuous when in 4n to 1n mode\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } int tmp_idx = 0; for (int i = 0; i < input_num; i++) { concat_images_to_tensor(handle, 1, &input_planar[i], &in_tensor[i]); - if (BM_SUCCESS != - concat_images_to_tensor(handle, + ret = concat_images_to_tensor(handle, resize_attr[i].roi_num, &output[tmp_idx], - &out_tensor[i])) { + &out_tensor[i]); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("[RESIZE]: output images not continuous\r\n"); - return BM_ERR_FAILURE; + return ret; } bmcv_resize_internal( handle, 1, &resize_attr[i], in_tensor[i], out_tensor[i]); @@ -1587,9 +1595,9 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, } delete [] output_planar; } - if (BM_SUCCESS != - image_restore_align( - handle, input_num, tmp_input, NULL, in_align_flag)) { + ret = image_restore_align( + handle, input_num, tmp_input, NULL, in_align_flag); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("image_restore_align error\n"); for (int free_idx = 0; free_idx < output_num; free_idx++) { if (output_mem_alloc_flag[free_idx]) { @@ -1605,11 +1613,11 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, delete[] input_planar; } - return BM_ERR_FAILURE; + return ret; } - if (BM_SUCCESS != - image_restore_align( - handle, output_num, tmp_output, NULL, out_align_flag)) { + ret = image_restore_align( + handle, output_num, tmp_output, NULL, out_align_flag); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("image_restore_align error\n"); for (int free_idx = 0; free_idx < output_num; free_idx++) { if (output_mem_alloc_flag[free_idx]) { @@ -1625,7 +1633,7 @@ bm_status_t bmcv_image_resize_(bm_handle_t handle, delete[] input_planar; } - return BM_ERR_FAILURE; + return ret; } } @@ -1650,7 +1658,7 @@ bm_status_t bmcv_image_resize( { unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_2(handle, input[0], output[0]); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -1666,7 +1674,7 @@ bm_status_t bmcv_image_resize( break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } diff --git a/bmvid/bmcv/src/bmcv_api_sobel.cpp b/bmvid/bmcv/src/bmcv_api_sobel.cpp index da479f7..e41642e 100644 --- a/bmvid/bmcv/src/bmcv_api_sobel.cpp +++ b/bmvid/bmcv/src/bmcv_api_sobel.cpp @@ -1,8 +1,10 @@ #include "bmcv_api_ext.h" #include "bmcv_common_bm1684.h" +#include "bmcv_internal.h" #include #include #include +#include #define IS_YUV(a) (a == FORMAT_NV12 || a == FORMAT_NV21 || a == FORMAT_NV16 || \ @@ -90,7 +92,7 @@ static bm_status_t bmcv_sobel_check( int ksize) { if (handle == NULL) { bmlib_log("SOBEL", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DEVNOTREADY; } if (ksize % 2 == 0 || ksize > 31) { bmlib_log("SOBEL", BMLIB_LOG_ERROR, "The kernel size must be odd and not greater than 31\n" ); @@ -110,31 +112,31 @@ static bm_status_t bmcv_sobel_check( int image_dw = output.width; if (image_sw + ksize - 1 >= 2048) { bmlib_log("SOBEL", BMLIB_LOG_ERROR, "image width is too large!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } if (ksize > 9) { bmlib_log("SOBEL", BMLIB_LOG_ERROR, "ksize is too large!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } if (!IS_YUV(src_format) && !IS_RGB(src_format) && src_format != FORMAT_GRAY) { bmlib_log("SOBEL", BMLIB_LOG_ERROR, "Not supported input image format\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if ((IS_YUV(src_format) && dst_format != FORMAT_GRAY) && (dst_format != src_format)) { bmlib_log("SOBEL", BMLIB_LOG_ERROR, "Not supported output image format\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (src_type != DATA_TYPE_EXT_1N_BYTE || dst_type != DATA_TYPE_EXT_1N_BYTE) { bmlib_log("SOBEL", BMLIB_LOG_ERROR, "Not supported image data type\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (image_sh != image_dh || image_sw != image_dw) { bmlib_log("SOBEL", BMLIB_LOG_ERROR, "input and output image size should be same\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } return BM_SUCCESS; } @@ -263,7 +265,7 @@ bm_status_t bmcv_image_sobel( float delta) { unsigned int chipid = 0x1686; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_2(handle, input, output); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -276,12 +278,12 @@ bm_status_t bmcv_image_sobel( break; case 0x1686: - printf("bm1684x not support\n"); - ret = BM_NOT_SUPPORTED; + printf("current card not support\n"); + ret = BM_ERR_NOFEATURE; break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } diff --git a/bmvid/bmcv/src/bmcv_api_split.cpp b/bmvid/bmcv/src/bmcv_api_split.cpp index fa67e53..4377523 100644 --- a/bmvid/bmcv/src/bmcv_api_split.cpp +++ b/bmvid/bmcv/src/bmcv_api_split.cpp @@ -1,4 +1,5 @@ #include +#include #include "bmcv_api.h" #include "bmcv_api_ext.h" #include "bmcv_internal.h" @@ -55,6 +56,7 @@ bm_status_t bmcv_image_split(bm_handle_t handle, BMCV_ERR_LOG("[Split] Can not get handle!\r\n"); return BM_ERR_FAILURE; } + bm_handle_check_2(handle, input, output[0]); bm_status_t ret = BM_SUCCESS; ret = bmcv_split_check(input, output); if (BM_SUCCESS != ret) { diff --git a/bmvid/bmcv/src/bmcv_api_storage_convert.cpp b/bmvid/bmcv/src/bmcv_api_storage_convert.cpp index 26bcaba..eefa215 100644 --- a/bmvid/bmcv/src/bmcv_api_storage_convert.cpp +++ b/bmvid/bmcv/src/bmcv_api_storage_convert.cpp @@ -1,4 +1,5 @@ #include +#include #include #ifdef __linux__ #include @@ -41,14 +42,14 @@ static bm_status_t bmcv_convert_check( if (image_n<1 || image_n>4) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "expected 1 <= image_n <= 4 %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } int width = output[0].width; int height = output[0].height; if (width > 8192) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "expected width <= 8192 %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } bm_image_format_ext expected_input_image_format, expected_output_image_format; bm_image_data_format_ext expected_input_data_format, expected_output_data_format; @@ -67,7 +68,7 @@ static bm_status_t bmcv_convert_check( (output[0].data_type == DATA_TYPE_EXT_BF16)){ BMCV_ERR_LOG("data type not support\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (!IS_4N(expected_input_data_format)) { @@ -83,7 +84,7 @@ static bm_status_t bmcv_convert_check( bmlib_log("BMCV", BMLIB_LOG_ERROR, "expected consistant input image format " "and size %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } } @@ -93,7 +94,7 @@ static bm_status_t bmcv_convert_check( || height != output[i].height) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "expected consistant output image format and size %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } } @@ -102,7 +103,7 @@ static bm_status_t bmcv_convert_check( bmlib_log("BMCV", BMLIB_LOG_ERROR, "Not support input output image format " "FORMAT_COMPRESSED or output image format FORMAT_GRAY %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } switch(expected_input_image_format) @@ -125,7 +126,7 @@ static bm_status_t bmcv_convert_check( default: bmlib_log("BMCV", BMLIB_LOG_ERROR, "Not support input image format, %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } switch(expected_output_image_format) @@ -147,21 +148,21 @@ static bm_status_t bmcv_convert_check( default: bmlib_log("BMCV", BMLIB_LOG_ERROR, "Not support input image format, %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (expected_input_data_format == DATA_TYPE_EXT_1N_BYTE_SIGNED \ || expected_input_data_format == DATA_TYPE_EXT_4N_BYTE_SIGNED) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "not expected signed data format %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (expected_output_data_format == DATA_TYPE_EXT_1N_BYTE_SIGNED \ || expected_output_data_format == DATA_TYPE_EXT_4N_BYTE_SIGNED) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "not expected signed data format %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } bm_image_get_stride(input[0], stride); @@ -172,7 +173,7 @@ static bm_status_t bmcv_convert_check( if (stride[k] != stride_[k]) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "all input should have same stride %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } } } @@ -185,7 +186,7 @@ static bm_status_t bmcv_convert_check( if (stride[k] != stride_[k]) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "all output should have same stride %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } } } @@ -299,7 +300,7 @@ bm_status_t bmcv_image_storage_convert_( csc_type_t csc_type) { if (handle == NULL) { bmlib_log("STORAGE_CONVERT", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } bm_status_t ret = bmcv_convert_check(image_num, input_, output_); if (ret != BM_SUCCESS) @@ -316,7 +317,7 @@ bm_status_t bmcv_image_storage_convert_( if (!bm_image_is_attached(input_[i])) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "input image not attach device memory %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } #ifndef USING_CMODEL @@ -352,7 +353,7 @@ bm_status_t bmcv_image_storage_convert_( bm_free_device(handle, dmem); } } - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } output_alloc_flag[i] = true; @@ -460,7 +461,7 @@ bm_status_t bmcv_image_storage_convert_( delete [] in_planar; } delete [] out; - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } } else { @@ -544,7 +545,7 @@ bm_status_t bmcv_image_storage_convert_( } delete [] in_planar; } - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } input_inner_alloc_flag[i] = true; for (int p = 0; p < plane_num; p++) { @@ -615,7 +616,7 @@ bm_status_t bmcv_image_storage_convert_( } delete [] in_planar; } - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } output_inner_alloc_flag[i] = true; } else { @@ -691,7 +692,7 @@ bm_status_t bmcv_image_storage_convert_( } delete [] in_planar; } - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } for (int k = 0; k < expect_output_num; k++) { @@ -745,7 +746,7 @@ bm_status_t bmcv_image_storage_convert_( } delete [] in_planar; } - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } if (BM_SUCCESS != bm_sync_api(handle)) { BMCV_ERR_LOG("storage_convert sync api error\r\n"); @@ -755,7 +756,7 @@ bm_status_t bmcv_image_storage_convert_( } delete [] in_planar; } - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } inner_in = inner_out; } @@ -833,7 +834,7 @@ bm_status_t bmcv_image_storage_convert_with_csctype( break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } @@ -850,7 +851,7 @@ bm_status_t bmcv_image_storage_convert( bm_status_t ret = BM_SUCCESS; csc_type_t csc_type = CSC_MAX_ENUM; - + bm_handle_check_2(handle, input_[0], output_[0]); ret = bmcv_image_storage_convert_with_csctype(handle, image_num, input_, output_, csc_type); return ret; } diff --git a/bmvid/bmcv/src/bmcv_api_threshold.cpp b/bmvid/bmcv/src/bmcv_api_threshold.cpp index fc4dd29..817075a 100644 --- a/bmvid/bmcv/src/bmcv_api_threshold.cpp +++ b/bmvid/bmcv/src/bmcv_api_threshold.cpp @@ -3,6 +3,7 @@ #include "bmcv_common_bm1684.h" #include #include +#include static bm_status_t bmcv_threshold_check( bm_handle_t handle, @@ -67,6 +68,7 @@ bm_status_t bmcv_image_threshold( unsigned char max_value, bm_thresh_type_t type) { bm_status_t ret = BM_SUCCESS; + bm_handle_check_2(handle, input, output); ret = bmcv_threshold_check(handle, input, output, type); if (BM_SUCCESS != ret) { return ret; diff --git a/bmvid/bmcv/src/bmcv_api_transpose.cpp b/bmvid/bmcv/src/bmcv_api_transpose.cpp index 64f0ef4..efa3887 100644 --- a/bmvid/bmcv/src/bmcv_api_transpose.cpp +++ b/bmvid/bmcv/src/bmcv_api_transpose.cpp @@ -3,6 +3,7 @@ #include "bmcv_internal.h" #include "bmcv_common_bm1684.h" #include +#include #ifdef __linux__ #include #else @@ -15,6 +16,7 @@ bm_status_t bmcv_image_transpose(bm_handle_t handle, bm_image input, bm_image output) { + bm_handle_check_2(handle, input, output); if (handle == NULL) { bmlib_log("TRANSPOSE", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); return BM_ERR_FAILURE; diff --git a/bmvid/bmcv/src/bmcv_api_warp_affine.cpp b/bmvid/bmcv/src/bmcv_api_warp_affine.cpp index 78f0f72..7305dd8 100644 --- a/bmvid/bmcv/src/bmcv_api_warp_affine.cpp +++ b/bmvid/bmcv/src/bmcv_api_warp_affine.cpp @@ -337,6 +337,7 @@ static bm_status_t per_image_deal(bm_handle_t handle, param.output_image_addr = bm_mem_get_device_addr(tensor_output); param.input_image_addr = bm_mem_get_device_addr(tensor_input); ret = bm_malloc_device_byte(handle, &tensor_temp, input.height * input.width * 2); + int index_size_temp = image_dw > image_dh ? ALIGN(image_dw, 64) : ALIGN(image_dh, 64); if (BM_SUCCESS != ret) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "bm_malloc error, %s: %s: %d\n", @@ -365,7 +366,8 @@ static bm_status_t per_image_deal(bm_handle_t handle, } bm_image_get_stride(input, &(param.src_w_stride)); - ret = bm_malloc_device_byte(handle, &tensor_S, image_dh * image_dw * image_c * 4); + ret = bm_malloc_device_byte(handle, &tensor_S, index_size_temp * index_size_temp * image_c * 4); + // ret = bm_malloc_device_byte(handle, &tensor_S, image_dh * image_dw * image_c * 4); if(BM_SUCCESS != ret) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "bm_malloc error, %s: %s: %d\n", @@ -384,7 +386,7 @@ static bm_status_t per_image_deal(bm_handle_t handle, bm_free_device(handle, tensor_temp); bm_free_device(handle, tensor_out_align); return ret; - } + } bm_free_device(handle, tensor_S); bm_free_device(handle, tensor_temp); @@ -423,7 +425,8 @@ static bm_status_t per_image_deal(bm_handle_t handle, param.input_image_addr = bm_mem_get_device_addr(tensor_input); std::vector internal_mem_v; - ret = bm_malloc_device_byte(handle, &tensor_temp_r, input.height * input.width * 2); + ret = bm_malloc_device_byte(handle, &tensor_temp_r, input.height * input.width * 4); + int index_size_temp = image_dw > image_dh ? ALIGN(image_dw, 64) : ALIGN(image_dh, 64); if (BM_SUCCESS != ret) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "bm_malloc error, %s: %s: %d\n", @@ -432,7 +435,7 @@ static bm_status_t per_image_deal(bm_handle_t handle, } internal_mem_v.push_back(&tensor_temp_r); - ret = bm_malloc_device_byte(handle, &tensor_temp_g, input.height * input.width * 2); + ret = bm_malloc_device_byte(handle, &tensor_temp_g, input.height * input.width * 4); if (BM_SUCCESS != ret) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "bm_malloc error, %s: %s: %d\n", @@ -441,7 +444,7 @@ static bm_status_t per_image_deal(bm_handle_t handle, } internal_mem_v.push_back(&tensor_temp_g); - ret = bm_malloc_device_byte(handle, &tensor_temp_b, input.height * input.width * 2); + ret = bm_malloc_device_byte(handle, &tensor_temp_b, input.height * input.width * 4); if (BM_SUCCESS != ret) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "bm_malloc error, %s: %s: %d\n", @@ -450,7 +453,7 @@ static bm_status_t per_image_deal(bm_handle_t handle, } internal_mem_v.push_back(&tensor_temp_b); - ret = bm_malloc_device_byte(handle, &tensor_out_align_a, output.height * output.width * 2); + ret = bm_malloc_device_byte(handle, &tensor_out_align_a, output.height * output.width * 4); if (BM_SUCCESS != ret) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "bm_malloc error, %s: %s: %d\n", @@ -459,7 +462,7 @@ static bm_status_t per_image_deal(bm_handle_t handle, } internal_mem_v.push_back(&tensor_out_align_a); - ret = bm_malloc_device_byte(handle, &tensor_out_align_b, output.height * output.width * 2); + ret = bm_malloc_device_byte(handle, &tensor_out_align_b, output.height * output.width * 4); if (BM_SUCCESS != ret) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "bm_malloc error, %s: %s: %d\n", @@ -486,7 +489,8 @@ static bm_status_t per_image_deal(bm_handle_t handle, param.m.m[i] = matrix.matrix->m[i]; } - ret = bm_malloc_device_byte(handle, &tensor_S, image_dh * image_dw * image_c * 4); + // ret = bm_malloc_device_byte(handle, &tensor_S, image_dh * image_dw * image_c * 4); + ret = bm_malloc_device_byte(handle, &tensor_S, index_size_temp * index_size_temp * image_c * 4); if(BM_SUCCESS != ret) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "bm_malloc error, %s: %s: %d\n", @@ -587,6 +591,10 @@ static bm_status_t per_image_deal(bm_handle_t handle, } for (int num = 0;num < image_num;num++){ + int stride = 0; + bm_image_get_stride(input[num], &stride); + input[num].width = stride; + if(use_bilinear){ bm_image_data_format_ext input_format = input[0].data_type; bm_image_data_format_ext output_format = output[0].data_type; @@ -605,6 +613,7 @@ static bm_status_t per_image_deal(bm_handle_t handle, return ret; } } + return BM_SUCCESS; } @@ -652,7 +661,7 @@ static bm_status_t bmcv_warp_check( filename(__FILE__), __func__, __LINE__); return BM_NOT_SUPPORTED; } - if (src_format != FORMAT_RGB_PLANAR && src_format != FORMAT_BGR_PLANAR) { + if (src_format != FORMAT_RGB_PLANAR && src_format != FORMAT_BGR_PLANAR && src_format != FORMAT_GRAY) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "Not supported input image format %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); return BM_NOT_SUPPORTED; @@ -732,6 +741,7 @@ bm_status_t bmcv_image_warp_affine_similar_to_opencv( bm_image *output, int use_bilinear) { + bm_handle_check_2(handle, input[0], output[0]); UNUSED(use_bilinear); float matrix_tem[3][3]; float matrix_tem_inv[2][3]; @@ -767,6 +777,7 @@ bm_status_t bmcv_image_warp_affine( bm_image *output, int use_bilinear) { + bm_handle_check_2(handle, input[0], output[0]); if(BM_SUCCESS !=bmcv_warp_check(handle, image_num, matrix, input, output)) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); return BM_ERR_FAILURE; @@ -802,5 +813,6 @@ bm_status_t bmcv_image_warp(bm_handle_t handle, bmcv_affine_image_matrix matrix[4], bm_image * input, bm_image * output) { + bm_handle_check_2(handle, input[0], output[0]); return bmcv_image_warp_affine(handle, image_num, matrix, input, output); } diff --git a/bmvid/bmcv/src/bmcv_api_warp_perspective.cpp b/bmvid/bmcv/src/bmcv_api_warp_perspective.cpp index 2b567a9..6f3a6f7 100755 --- a/bmvid/bmcv/src/bmcv_api_warp_perspective.cpp +++ b/bmvid/bmcv/src/bmcv_api_warp_perspective.cpp @@ -632,7 +632,9 @@ static bm_status_t per_image_deal_nearest(bm_handle_t handle, bm_device_mem_t tensor_input; bm_image_get_device_mem(input, &tensor_input); bm_image_get_device_mem(output, &tensor_output); - ret = bm_malloc_device_byte(handle, &tensor_S, image_dh * image_dw * image_c * 4); + // ret = bm_malloc_device_byte(handle, &tensor_S, image_dh * image_dw * image_c * 4); + int index_size_temp = image_dw > image_dh ? ALIGN(image_dw, 64) : ALIGN(image_dh, 64); + ret = bm_malloc_device_byte(handle, &tensor_S, index_size_temp * index_size_temp * image_c * 4); if(BM_SUCCESS != ret) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "bm_malloc error, %s: %s: %d\n", @@ -706,7 +708,9 @@ static bm_status_t per_image_deal_bilinear(bm_handle_t handle, bm_device_mem_t tensor_input; bm_image_get_device_mem(input, &tensor_input); bm_image_get_device_mem(output, &tensor_output); - ret = bm_malloc_device_byte(handle, &tensor_S, image_dh * image_dw * image_c * 4); + // ret = bm_malloc_device_byte(handle, &tensor_S, image_dh * image_dw * image_c * 4); + int index_size_temp = image_dw > image_dh ? ALIGN(image_dw, 64) : ALIGN(image_dh, 64); + ret = bm_malloc_device_byte(handle, &tensor_S, index_size_temp * index_size_temp * image_c * 4); if (BM_SUCCESS != ret) return ret; sg_api_cv_warp_perspective_1684x_t param; @@ -786,6 +790,10 @@ bm_status_t bmcv_image_warp_perspective_1684X( } for (int num = 0;num < image_num;num++) { + int stride = 0; + bm_image_get_stride(input[num], &stride); + input[num].width = stride; + if (!use_bilinear) { ret = per_image_deal_nearest(handle, image_dh, image_dw, input[num], output[num], tensor_output[num], matrix[num]); @@ -816,6 +824,7 @@ bm_status_t bmcv_image_warp_perspective_with_coordinate( int dh = output[0].height; int dw = output[0].width; int coord_sum = 0; + bm_handle_check_2(handle, input[0], output[0]); for (int i = 0; i < image_num; i++) { coord_sum += coord[i].coordinate_num; } @@ -858,6 +867,8 @@ bm_status_t bmcv_image_warp_perspective_similar_to_opencv( { float matrix_tem[3][3]; float matrix_tem_inv[3][3]; + + bm_handle_check_2(handle, input[0], output[0]); for (int i = 0; i < image_num; i++) { for(int matrix_no = 0; matrix_no < matrix[i].matrix_num; matrix_no++){ memset(matrix_tem, 0, sizeof(matrix_tem)); @@ -889,6 +900,7 @@ bm_status_t bmcv_image_warp_perspective( int use_bilinear) { unsigned int chipid; + bm_handle_check_2(handle, input[0], output[0]); bm_get_chipid(handle, &chipid); if (chipid == BM1684X){ return bmcv_image_warp_perspective_1684X( diff --git a/bmvid/bmcv/src/bmcv_api_yolov3_detect_out.cpp b/bmvid/bmcv/src/bmcv_api_yolov3_detect_out.cpp index 137effe..3d3225d 100644 --- a/bmvid/bmcv/src/bmcv_api_yolov3_detect_out.cpp +++ b/bmvid/bmcv/src/bmcv_api_yolov3_detect_out.cpp @@ -44,10 +44,10 @@ bm_status_t bmcv_nms_yolov3_bm1684( int keep_top_k, float bias[18], float anchor_scale[3], float mask[9], bm_device_mem_t output, int yolo_flag, int len_per_batch) { if (handle == NULL) - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; if (input_num > 3) - return BM_ERR_FAILURE; + return BM_ERR_PARAM; bm_device_mem_t b_mem[3]; bm_device_mem_t top_mem; for (int i = 0; i < 3; ++i) { @@ -131,9 +131,9 @@ bm_status_t bmcv_nms_yolov7( bm_device_mem_t output, int yolo_flag, int len_per_batch, int scale, int *orig_image_shape, int model_h, int model_w) { if (handle == NULL) - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; if (input_num > 3) - return BM_ERR_FAILURE; + return BM_ERR_PARAM; bm_device_mem_t b_mem[3]; bm_device_mem_t top_mem; @@ -206,10 +206,10 @@ bm_status_t bmcv_nms_yolov3_bm1684X( int keep_top_k, float bias[18], float anchor_scale[3], float mask[9], bm_device_mem_t output, int yolo_flag, int len_per_batch) { if (handle == NULL) - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; if (input_num > 3) - return BM_ERR_FAILURE; + return BM_ERR_PARAM; bm_device_mem_t b_mem[3]; bm_device_mem_t top_mem; @@ -296,9 +296,9 @@ bm_status_t bmcv_nms_yolo( int keep_top_k, float bias[18], float anchor_scale[3], float mask[9], bm_device_mem_t output, int yolo_flag, int len_per_batch, void *ext) { if (handle == NULL) - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; if (input_num > 3) - return BM_ERR_FAILURE; + return BM_ERR_PARAM; // UNUSED(ext); if (yolo_flag == 0 || yolo_flag == 1){ @@ -321,7 +321,7 @@ bm_status_t bmcv_nms_yolo( temp->scale, temp->orig_image_shape, temp->model_h, temp->model_w); }else{ printf("Not supported !\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } } @@ -356,7 +356,7 @@ bm_status_t bmcv_nms_yolov3( break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } diff --git a/bmvid/bmcv/src/bmcv_api_yuv2hsv.cpp b/bmvid/bmcv/src/bmcv_api_yuv2hsv.cpp index 9fd0c7c..00edaf4 100644 --- a/bmvid/bmcv/src/bmcv_api_yuv2hsv.cpp +++ b/bmvid/bmcv/src/bmcv_api_yuv2hsv.cpp @@ -2,6 +2,7 @@ #include "bmcv_api_ext.h" #include "bmcv_internal.h" #include "bmcv_common_bm1684.h" +#include static bm_status_t bmcv_yuv2hsv_check( bm_handle_t handle, @@ -57,6 +58,7 @@ bm_status_t bmcv_image_yuv2hsv(bm_handle_t handle, } bm_status_t ret = BM_SUCCESS; + bm_handle_check_2(handle, input, output); ret = bmcv_yuv2hsv_check(handle, rect, input, output); if (BM_SUCCESS != ret) { return ret; diff --git a/bmvid/bmcv/src/bmcv_api_yuv2rgb.cpp b/bmvid/bmcv/src/bmcv_api_yuv2rgb.cpp index d91bc7f..0e54381 100644 --- a/bmvid/bmcv/src/bmcv_api_yuv2rgb.cpp +++ b/bmvid/bmcv/src/bmcv_api_yuv2rgb.cpp @@ -3,6 +3,7 @@ #include "bmcv_common_bm1684.h" #include "bmcv_internal.h" #include "bm1684x/bmcv_1684x_vpp_ext.h" +#include #ifndef USING_CMODEL #include "vpplib.h" @@ -21,20 +22,20 @@ static bm_status_t bmcv_yuv2bgr_check(int input_n, if (input_n < 1 || input_h < 1 || input_w < 1) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "illegal image_num, image size %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } if (input_w % 2 != 0 || input_h % 2 != 0) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "input width and height should 2 aligned %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (src_data_format != DATA_TYPE_EXT_1N_BYTE) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "input data size should be DATA_TYPE_EXT_1N_BYTE %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } @@ -44,7 +45,7 @@ static bm_status_t bmcv_yuv2bgr_check(int input_n, { bmlib_log("BMCV", BMLIB_LOG_ERROR, "not supported output data type %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if ((dst_data_format == DATA_TYPE_EXT_FP16) || @@ -53,7 +54,7 @@ static bm_status_t bmcv_yuv2bgr_check(int input_n, (dst_data_format == DATA_TYPE_EXT_BF16)){ BMCV_ERR_LOG("data type not support\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (!(src_image_format == FORMAT_NV12 || src_image_format == FORMAT_NV21 || @@ -62,13 +63,13 @@ static bm_status_t bmcv_yuv2bgr_check(int input_n, { bmlib_log("BMCV", BMLIB_LOG_ERROR, "not supported input image format %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (!(dst_image_format == FORMAT_RGB_PLANAR || dst_image_format == FORMAT_BGR_PLANAR)) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "not supported output image format %s: %s: %d\n", filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } return BM_SUCCESS; } @@ -79,7 +80,7 @@ bm_status_t bmcv_image_yuv2bgr_ext_(bm_handle_t handle, bm_image * output) { if (handle == NULL) { bmlib_log("YUV2BGR", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } int width = input[0].width; int height = input[0].height; @@ -94,39 +95,39 @@ bm_status_t bmcv_image_yuv2bgr_ext_(bm_handle_t handle, if (!bm_image_is_attached(input[0])) { bmlib_log("YUV2RGB", BMLIB_LOG_ERROR, "input not attached data!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } for (int i = 1; i < image_num; i++) { if (width != input[i].width || height != input[i].height) { bmlib_log("YUV2RGB", BMLIB_LOG_ERROR, "inputs shape not same!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if (src_data_format != input[i].data_type) { bmlib_log("YUV2RGB", BMLIB_LOG_ERROR, "inputs data type not same!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if (src_image_format != input[i].image_format) { bmlib_log("YUV2RGB", BMLIB_LOG_ERROR, "inputs format not same!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if (!bm_image_is_attached(input[i])) { bmlib_log("YUV2RGB", BMLIB_LOG_ERROR, "input %d not attached data!\r\n", i); - return BM_ERR_PARAM; + return BM_ERR_DATA; } } for (int i = 0; i < output_image_num; i++) { if (width != output[i].width || height != output[i].height) { bmlib_log("YUV2RGB", BMLIB_LOG_ERROR, "outputs shape not same!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if (dst_data_format != output[i].data_type) { bmlib_log("YUV2RGB", BMLIB_LOG_ERROR, "outputs data type not same!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if (dst_image_format != output[i].image_format) { bmlib_log("YUV2RGB", BMLIB_LOG_ERROR, "outputs format not same!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } } @@ -137,7 +138,7 @@ bm_status_t bmcv_image_yuv2bgr_ext_(bm_handle_t handle, src_data_format, dst_image_format, dst_data_format); - if (result == BM_NOT_SUPPORTED) { + if (result != BM_SUCCESS) { return result; } @@ -151,7 +152,7 @@ bm_status_t bmcv_image_yuv2bgr_ext_(bm_handle_t handle, { if (src_stride[k] != stride[k]) { bmlib_log("YUV2RGB", BMLIB_LOG_ERROR, "src stride not same!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } } } @@ -166,7 +167,7 @@ bm_status_t bmcv_image_yuv2bgr_ext_(bm_handle_t handle, { if (dst_stride[k] != stride[k]) { bmlib_log("YUV2RGB", BMLIB_LOG_ERROR, "dst stride not same!\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } } } @@ -192,7 +193,7 @@ bm_status_t bmcv_image_yuv2bgr_ext_(bm_handle_t handle, bm_free_device(handle, dmem); } } - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } output_alloc_flag[i] = true; } @@ -258,7 +259,7 @@ bm_status_t bmcv_image_yuv2bgr_ext_(bm_handle_t handle, u64 output_dev_addr[4][1] = {0}; for (int i = 0; i < image_num; i++) { if(BM_SUCCESS !=bm_image_get_device_mem(input[i], tensor_input[i])) { - BMCV_ERR_LOG("bm_image_alloc_dev_mem error\r\n"); + BMCV_ERR_LOG("bm_image_get_device_mem error\r\n"); for (int free_idx = 0; free_idx < output_image_num; free_idx ++) { if (output_alloc_flag[free_idx]) { bm_device_mem_t dmem; @@ -267,7 +268,7 @@ bm_status_t bmcv_image_yuv2bgr_ext_(bm_handle_t handle, } } - return BM_ERR_FAILURE; + return BM_ERR_DATA; } for (int channel_idx = 0; channel_idx < bm_image_get_plane_num(input[i]); @@ -279,7 +280,7 @@ bm_status_t bmcv_image_yuv2bgr_ext_(bm_handle_t handle, for (int i = 0; i < output_image_num; i++) { if(BM_SUCCESS !=bm_image_get_device_mem(output[i], tensor_output[i])) { - BMCV_ERR_LOG("bm_image_alloc_dev_mem error\r\n"); + BMCV_ERR_LOG("bm_image_get_device_mem error\r\n"); for (int free_idx = 0; free_idx < output_image_num; free_idx ++) { if (output_alloc_flag[free_idx]) { bm_device_mem_t dmem; @@ -288,7 +289,7 @@ bm_status_t bmcv_image_yuv2bgr_ext_(bm_handle_t handle, } } - return BM_ERR_FAILURE; + return BM_ERR_DATA; } output_dev_addr[i][0] = bm_mem_get_device_addr(tensor_output[i][0]); } @@ -322,11 +323,11 @@ bm_status_t bmcv_image_yuv2bgr_ext_(bm_handle_t handle, if (BM_SUCCESS != bm_send_api(handle, BM_API_ID_CV_YUV2RGB, (uint8_t *)&api, sizeof(api))) { BMCV_ERR_LOG("yuv2rgb send api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } if (BM_SUCCESS != bm_sync_api(handle)) { BMCV_ERR_LOG("yuv2rgb sync api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } return BM_SUCCESS; @@ -355,7 +356,7 @@ bm_status_t bmcv_image_yuv2bgr_ext( { unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_2(handle, input[0], output[0]); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -373,7 +374,7 @@ bm_status_t bmcv_image_yuv2bgr_ext( break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } diff --git a/bmvid/bmcv/src/bmcv_api_yuv_resize.cpp b/bmvid/bmcv/src/bmcv_api_yuv_resize.cpp index c399f3f..2877a94 100644 --- a/bmvid/bmcv/src/bmcv_api_yuv_resize.cpp +++ b/bmvid/bmcv/src/bmcv_api_yuv_resize.cpp @@ -341,7 +341,7 @@ bm_status_t bmcv_image_yuv_resize( { unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_2(handle, input[0], output[0]); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; diff --git a/bmvid/bmcv/src/bmcv_bm1684x.h b/bmvid/bmcv/src/bmcv_bm1684x.h index 122f67e..af7e46c 100644 --- a/bmvid/bmcv/src/bmcv_bm1684x.h +++ b/bmvid/bmcv/src/bmcv_bm1684x.h @@ -184,6 +184,22 @@ typedef struct { int padding_r; } sg_api_crop_resize_t; +typedef struct { + u64 XR; + u64 XI; + u64 YR; + u64 YI; + u64 ER; + int EI; + int batch; + int len; + int forward; + int realInput; + int trans; + int factorSize; + int factors[10]; +} sg_api_cv_fft_t; + typedef struct bm_api_cv_feature_match_1684x { u64 input_data_global_addr; u64 db_data_global_addr; @@ -214,11 +230,13 @@ typedef struct bm_api_cv_bayer2rgb { int width; int height; int dst_fmt; + int src_type; u64 input_addr; u64 output_addr; u64 sys_mem_addr_temp_ul; u64 sys_mem_addr_temp_br; u64 sys_mem_addr_temp_g; + u64 sys_mem_addr_temp_b; u64 input_addr_padding_up_left; u64 input_addr_padding_bottom_right; u64 kernel_addr; @@ -254,6 +272,29 @@ typedef struct bm_matrix_log{ int d_dtype; }bm_matrix_log_t; +typedef struct bm_api_cv_hist_balance1 { + u64 Xaddr; + int len; + u64 cdf_addr; +} bm_api_cv_hist_balance_t1; + +typedef struct bm_api_cv_hist_balance2 { + u64 Xaddr; + int len; + float cdf_min; + u64 cdf_addr; + u64 Yaddr; +} bm_api_cv_hist_balance_t2; + +typedef struct sg_api_cv_rotate { + int channel; + int rotation_angle; + u64 input_addr[3]; + u64 output_addr[3]; + int width; + int height; +} sg_api_cv_rotate_t; + #pragma pack(pop) } diff --git a/bmvid/bmcv/src/bmcv_common_bm1684.h b/bmvid/bmcv/src/bmcv_common_bm1684.h index 8eb849f..e60a22b 100644 --- a/bmvid/bmcv/src/bmcv_common_bm1684.h +++ b/bmvid/bmcv/src/bmcv_common_bm1684.h @@ -627,6 +627,16 @@ typedef struct bm_api_cv_threshold { u32 max_value; } bm_api_cv_threshold_t; +typedef struct bm_api_cv_quantify { + int channel; + u64 input_addr[3]; + u64 output_addr[3]; + int width[3]; + int height[3]; + int input_str[3]; + int output_str[3]; +} bm_api_cv_quantify_t; + typedef struct bm_api_cv_pyramid { u64 input_addr; u64 kernel_addr; @@ -1006,6 +1016,7 @@ INLINE static int ceiling_func_shift(int numerator, int shift) { return (numerat #define BM_API_ID_CV_FUSION 533 #define BM_API_ID_CV_BITWISE 534 #define BM_API_ID_CV_BAYER2RGB 535 +#define BM_API_ID_CV_QUANTIFY 536 /******************bmlib*********************/ diff --git a/bmvid/bmcv/src/bmcv_draw_rectangle.cpp b/bmvid/bmcv/src/bmcv_draw_rectangle.cpp index 40c8b76..213cdbe 100644 --- a/bmvid/bmcv/src/bmcv_draw_rectangle.cpp +++ b/bmvid/bmcv/src/bmcv_draw_rectangle.cpp @@ -3,6 +3,7 @@ #include "bmcv_internal.h" #include "bmcv_common_bm1684.h" #include "bm1684x/bmcv_1684x_vpp_ext.h" +#include static void check_rectangle_parameter(int rect_num, bmcv_rect *rects, @@ -46,11 +47,11 @@ static bm_status_t bmcv_draw_solid_rectangle(bm_handle_t handle, if (BM_SUCCESS != bm_send_api(handle, BM_API_ID_MEMSET_BYTE, (uint8_t *)&api, sizeof(api))) { BMCV_ERR_LOG("draw rectangle send api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } if (BM_SUCCESS != bm_sync_api(handle)) { BMCV_ERR_LOG("draw rectangle sync api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } return BM_SUCCESS; } @@ -65,7 +66,7 @@ bm_status_t bmcv_image_draw_rectangle_(bm_handle_t handle, { if(image.data_type != DATA_TYPE_EXT_1N_BYTE) { - return BM_ERR_FAILURE; + return BM_ERR_DATA; } if(rect.crop_h <= 0 || rect.crop_w <= 0) return BM_SUCCESS; @@ -462,7 +463,7 @@ bm_status_t bmcv_image_draw_rectangle_(bm_handle_t handle, else { BMCV_ERR_LOG("error currently not support this format draw rectangle\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } return BM_SUCCESS; @@ -500,6 +501,8 @@ bm_status_t bmcv_image_draw_rectangle_(bm_handle_t handle, unsigned char g, unsigned char b) { + bm_status_t ret; + if(rect_num == 0) return BM_SUCCESS; if(rect_num < 0) @@ -510,23 +513,23 @@ bm_status_t bmcv_image_draw_rectangle_(bm_handle_t handle, if(!image.image_private) { BMCV_ERR_LOG("invalidate image, not created\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if(image.data_type != DATA_TYPE_EXT_1N_BYTE) { BMCV_ERR_LOG("invalidate image, data type should be DATA_TYPE_EXT_1N_BYTE\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if(!bm_image_is_attached(image)) { BMCV_ERR_LOG("invalidate image, please attach device memory\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if(image.height >= (1 << 16) || image.width >= (1 << 16)) { BMCV_ERR_LOG("Not support such big size image\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } check_rectangle_parameter(rect_num, rects, line_width); @@ -535,10 +538,10 @@ bm_status_t bmcv_image_draw_rectangle_(bm_handle_t handle, { bmcv_rect_t rect = refine_rect(rects[i], image.height, image.width, line_width); - if (bmcv_image_draw_rectangle_(handle, image, rect, line_width, r, g, b) != BM_SUCCESS) - { + ret = bmcv_image_draw_rectangle_(handle, image, rect, line_width, r, g, b); + if (ret != BM_SUCCESS) { BMCV_ERR_LOG("error call draw rectangle\n"); - return BM_ERR_FAILURE; + return ret; } } return BM_SUCCESS; @@ -556,7 +559,7 @@ bm_status_t bmcv_image_draw_rectangle( { unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_1(handle, image); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -576,7 +579,7 @@ bm_status_t bmcv_image_draw_rectangle( break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } diff --git a/bmvid/bmcv/src/bmcv_image_ext.cpp b/bmvid/bmcv/src/bmcv_image_ext.cpp index 726950b..213fe24 100644 --- a/bmvid/bmcv/src/bmcv_image_ext.cpp +++ b/bmvid/bmcv/src/bmcv_image_ext.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include "bmcv_api_ext.h" #include "bmcv_internal.h" #include "bmcv_api.h" @@ -186,6 +187,12 @@ static bm_status_t fill_image_private(bm_image *res, int *stride) { layout::plane_layout(1, 1, H, W, data_size); break; } + case FORMAT_BAYER_RG8: { + image_private->plane_num = 1; + image_private->memory_layout[0] = + layout::plane_layout(1, 1, H, W, data_size); + break; + } } if (!use_default_stride) { @@ -444,7 +451,7 @@ bm_status_t bm_image_format_check(int img_h, __FILE__, __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } if (image_format <= FORMAT_NV24 && (data_type == DATA_TYPE_EXT_4N_BYTE || @@ -455,7 +462,7 @@ bm_status_t bm_image_format_check(int img_h, filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } if (FORMAT_COMPRESSED == image_format) { @@ -467,7 +474,7 @@ bm_status_t bm_image_format_check(int img_h, filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } } return BM_SUCCESS; @@ -492,7 +499,7 @@ bm_status_t bm_image_create(bm_handle_t handle, filename(__FILE__), __func__, __LINE__); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } memset(res->image_private->data, 0, @@ -507,7 +514,7 @@ bm_status_t bm_image_create(bm_handle_t handle, __LINE__); delete res->image_private; res->image_private = nullptr; - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } if (fill_image_private(res, stride) != BM_SUCCESS) { bmlib_log("BMCV", @@ -518,7 +525,7 @@ bm_status_t bm_image_create(bm_handle_t handle, __LINE__); delete res->image_private; res->image_private = nullptr; - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } res->image_private->handle = handle; return BM_SUCCESS; @@ -540,7 +547,7 @@ bm_status_t bm_image_tensor_create(bm_handle_t handle, res->image_c = img_c; res->image.image_private = new bm_image_private; if (!res->image.image_private) - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; fill_image_private_tensor(*res); res->image.image_private->handle = handle; @@ -557,7 +564,7 @@ bm_status_t bm_image_destroy(bm_image image) { filename(__FILE__), __func__, __LINE__); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } #ifndef USING_CMODEL if (image.image_private->decoder != NULL) { @@ -594,13 +601,13 @@ bm_status_t bm_image_tensor_destroy(bm_image_tensor image_tensor) { bm_status_t bm_image_copy_host_to_device(bm_image image, void *buffers[]) { if (!image.image_private) - return BM_ERR_FAILURE; + return BM_ERR_DATA; if (image.image_format == FORMAT_COMPRESSED) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "compressed format only support attached device memory, not " "host pointer\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } // The image didn't attached to a buffer, malloc and own it. if (!image.image_private->attached) { @@ -611,7 +618,7 @@ bm_status_t bm_image_copy_host_to_device(bm_image image, void *buffers[]) { filename(__FILE__), __func__, __LINE__); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } @@ -623,7 +630,7 @@ bm_status_t bm_image_copy_host_to_device(bm_image image, void *buffers[]) { BMCV_ERR_LOG("bm_memcpy_s2d error, src addr %p, dst addr 0x%llx\n", host[i], bm_mem_get_device_addr(image.image_private->data[i])); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } // host = host + image.image_private->plane_byte_size[i]; } @@ -637,9 +644,9 @@ bm_status_t bm_image_tensor_copy_to_device(bm_image_tensor image_tensor, bm_status_t bm_image_copy_device_to_host(bm_image image, void *buffers[]) { if (!image.image_private) - return BM_ERR_FAILURE; + return BM_ERR_DATA; if (!image.image_private->attached) { - return BM_ERR_FAILURE; + return BM_ERR_DATA; } unsigned char **host = (unsigned char **)buffers; for (int i = 0; i < image.image_private->plane_num; i++) { @@ -649,7 +656,7 @@ bm_status_t bm_image_copy_device_to_host(bm_image image, void *buffers[]) { BMCV_ERR_LOG("bm_memcpy_d2s error, src addr 0x%llx, dst addr %p\n", bm_mem_get_device_addr(image.image_private->data[i]), host[i]); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } // host = host + image.image_private->plane_byte_size[i]; } @@ -668,7 +675,7 @@ bm_status_t bm_image_tensor_attach(bm_image_tensor image_tensor, bm_status_t bm_image_attach(bm_image image, bm_device_mem_t *device_memory) { if (!image.image_private) - return BM_ERR_FAILURE; + return BM_ERR_DATA; if (image.image_private->data_owned) { std::lock_guard lock(image.image_private->memory_lock); int total_size = 0; @@ -703,7 +710,7 @@ bm_status_t bm_image_tensor_detach(bm_image image_tensor) { bm_status_t bm_image_detach(bm_image image) { if (!image.image_private) - return BM_ERR_FAILURE; + return BM_ERR_DATA; if (image.image_private->data_owned == true) { std::lock_guard lock(image.image_private->memory_lock); int total_size = 0; @@ -729,7 +736,7 @@ bm_status_t bm_image_detach(bm_image image) { bm_status_t bm_image_get_byte_size(bm_image image, int *size) { if (!image.image_private) - return BM_ERR_FAILURE; + return BM_ERR_DATA; if (image.image_format == FORMAT_COMPRESSED && !image.image_private->attached) { bmlib_log("BMCV", @@ -739,7 +746,7 @@ bm_status_t bm_image_get_byte_size(bm_image image, int *size) { filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } std::lock_guard lock(image.image_private->memory_lock); for (int i = 0; i < image.image_private->plane_num; i++) { @@ -750,7 +757,7 @@ bm_status_t bm_image_get_byte_size(bm_image image, int *size) { bm_status_t bm_image_alloc_dev_mem_heap_mask(bm_image image, int heap_mask) { if (!image.image_private) - return BM_ERR_FAILURE; + return BM_ERR_DATA; if (image.image_format == FORMAT_COMPRESSED) { bmlib_log("BMCV", BMLIB_LOG_ERROR, @@ -759,7 +766,7 @@ bm_status_t bm_image_alloc_dev_mem_heap_mask(bm_image image, int heap_mask) { filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (image.image_private->data_owned == true) { return BM_SUCCESS; @@ -777,7 +784,7 @@ bm_status_t bm_image_alloc_dev_mem_heap_mask(bm_image image, int heap_mask) { image.image_private->handle, &dmem, heap_mask, total_size)) { BMCV_ERR_LOG("bm_malloc_device_byte_heap error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } #ifdef __linux__ @@ -806,7 +813,7 @@ bm_status_t bm_image_alloc_dev_mem_heap_mask(bm_image image, int heap_mask) { bm_status_t bm_image_alloc_dev_mem(bm_image image, int heap_id) { if (!image.image_private) - return BM_ERR_FAILURE; + return BM_ERR_DATA; if (image.image_format == FORMAT_COMPRESSED) { bmlib_log("BMCV", BMLIB_LOG_ERROR, @@ -815,7 +822,7 @@ bm_status_t bm_image_alloc_dev_mem(bm_image image, int heap_id) { filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (image.image_private->data_owned == true) { return BM_SUCCESS; @@ -834,14 +841,14 @@ bm_status_t bm_image_alloc_dev_mem(bm_image image, int heap_id) { image.image_private->handle, &dmem, heap_id, total_size)) { BMCV_ERR_LOG("bm_malloc_device_byte_heap error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } else { if (BM_SUCCESS != bm_malloc_device_byte( image.image_private->handle, &dmem, total_size)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } @@ -892,7 +899,7 @@ bool bm_image_tensor_is_attached(bm_image_tensor image_tensor) { bm_status_t bm_image_get_device_mem(bm_image image, bm_device_mem_t *mem) { if (!image.image_private) - return BM_ERR_FAILURE; + return BM_ERR_DATA; for (int i = 0; i < image.image_private->plane_num; i++) { mem[i] = image.image_private->data[i]; } @@ -914,7 +921,7 @@ bm_status_t bm_image_get_format_info(bm_image * src, filename(__FILE__), __func__, __LINE__); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if (src->image_format == FORMAT_COMPRESSED && !src->image_private->attached) { @@ -925,7 +932,7 @@ bm_status_t bm_image_get_format_info(bm_image * src, filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } info->plane_nb = src->image_private->plane_num; for (int i = 0; i < info->plane_nb; ++i) { @@ -947,13 +954,13 @@ bm_status_t bm_image_tensor_get_device_mem(bm_image_tensor image_tensor, int bm_image_get_plane_num(bm_image image) { if (!image.image_private) - return BM_ERR_FAILURE; + return BM_ERR_DATA; return image.image_private->plane_num; } bm_status_t bm_image_get_stride(bm_image image, int *stride) { if (!image.image_private) - return BM_ERR_FAILURE; + return BM_ERR_DATA; for (int i = 0; i < image.image_private->plane_num; i++) { stride[i] = image.image_private->memory_layout[i].pitch_stride; @@ -963,7 +970,7 @@ bm_status_t bm_image_get_stride(bm_image image, int *stride) { bm_status_t bm_image_write_to_bmp(bm_image image, const char *filename) { if (!image.image_private || !image.image_private->attached) - return BM_ERR_FAILURE; + return BM_ERR_DATA; if (image.data_type == DATA_TYPE_EXT_4N_BYTE || image.data_type == DATA_TYPE_EXT_4N_BYTE_SIGNED) { bmlib_log(BMCV_LOG_TAG, @@ -972,7 +979,7 @@ bm_status_t bm_image_write_to_bmp(bm_image image, const char *filename) { filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } int need_format_transform = (image.image_format != FORMAT_RGB_PACKED && image.image_format != FORMAT_GRAY) || @@ -1001,8 +1008,21 @@ bm_status_t bm_image_write_to_bmp(bm_image image, const char *filename) { } } int component = image.image_format == FORMAT_GRAY ? 1 : 3; + int stride[4] = {0}; + bm_image_get_stride(image_temp, stride); + void * buf_tmp = malloc(stride[0] * image_temp.height); void * buf = malloc(image_temp.width * image_temp.height * component); - bm_status_t ret = bm_image_copy_device_to_host(image_temp, &buf); + bm_status_t ret = bm_image_copy_device_to_host(image_temp, &buf_tmp); + + if (stride[0] > image_temp.width * component) { + for (int i = 0; i < image_temp.height; i++) { + memcpy((unsigned char *)buf + i * image_temp.width * component, (unsigned char *)buf_tmp + i * stride[0], image_temp.width * component); + } + } else { + memcpy((unsigned char *)buf, (unsigned char *)buf_tmp, stride[0] * image_temp.height); + } + free(buf_tmp); + if (ret != BM_SUCCESS) { free(buf); if (need_format_transform) { @@ -1045,6 +1065,7 @@ bm_status_t bm_image_tensor_init(bm_image_tensor *image_tensor) { bm_status_t bmcv_width_align(bm_handle_t handle, bm_image input, bm_image output) { + bm_handle_check_2(handle, input, output); if (input.image_format == FORMAT_COMPRESSED) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_INFO, @@ -1062,7 +1083,7 @@ bm_status_t bmcv_width_align(bm_handle_t handle, filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (!bm_image_is_attached(input)) { @@ -1072,13 +1093,13 @@ bm_status_t bmcv_width_align(bm_handle_t handle, filename(__FILE__), __func__, __LINE__); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (!bm_image_is_attached(output)) { if (BM_SUCCESS != bm_image_alloc_dev_mem(output, BMCV_HEAP_ANY)) { BMCV_ERR_LOG("bm_image_alloc_dev_mem error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } @@ -1103,19 +1124,19 @@ bm_status_t bm_image_alloc_contiguous_mem(int image_num, int heap_id) { if (0 == image_num) { BMCV_ERR_LOG("image_num can not be set as 0\n"); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } for (int i = 0; i < image_num - 1; i++) { for (int idx = 0; idx < images[i].image_private->plane_num; idx++) { if (images[i].image_private->memory_layout[idx].size != images[i + 1].image_private->memory_layout[idx].size) { BMCV_ERR_LOG("all image must have same size\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } if (images[i].image_private->attached) { BMCV_ERR_LOG("image has been attached memory\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } int single_image_sz = 0; @@ -1132,7 +1153,7 @@ bm_status_t bm_image_alloc_contiguous_mem(int image_num, total_image_size)) { BMCV_ERR_LOG("bm_malloc_device_byte_heap error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } else { if (BM_SUCCESS != bm_malloc_device_byte(images[0].image_private->handle, @@ -1140,7 +1161,7 @@ bm_status_t bm_image_alloc_contiguous_mem(int image_num, total_image_size)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } int dmabuf_fd = dmem.u.device.dmabuf_fd; @@ -1171,19 +1192,19 @@ bm_status_t bm_image_alloc_contiguous_mem_heap_mask(int image_num, int heap_mask) { if (0 == image_num) { BMCV_ERR_LOG("image_num can not be set as 0\n"); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } for (int i = 0; i < image_num - 1; i++) { for (int idx = 0; idx < images[i].image_private->plane_num; idx++) { if (images[i].image_private->memory_layout[idx].size != images[i + 1].image_private->memory_layout[idx].size) { BMCV_ERR_LOG("all image must have same size\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } if (images[i].image_private->attached) { BMCV_ERR_LOG("image has been attached memory\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } int single_image_sz = 0; @@ -1199,7 +1220,7 @@ bm_status_t bm_image_alloc_contiguous_mem_heap_mask(int image_num, total_image_size)) { BMCV_ERR_LOG("bm_malloc_device_byte_heap error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } int dmabuf_fd = dmem.u.device.dmabuf_fd; #ifdef __linux__ @@ -1227,11 +1248,11 @@ bm_status_t bm_image_alloc_contiguous_mem_heap_mask(int image_num, bm_status_t bm_image_free_contiguous_mem(int image_num, bm_image *images) { if (0 == image_num) { BMCV_ERR_LOG("[FREE]image_num can not be set as 0\n"); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } for (int i = 0; i < image_num; i++) { if (images[i].image_private->data_owned) { - return BM_ERR_FAILURE; + return BM_ERR_DATA; } bm_image_detach(images[i]); } @@ -1258,14 +1279,14 @@ bm_status_t bm_image_attach_contiguous_mem(int image_num, bm_device_mem_t dmem) { if (0 == image_num) { BMCV_ERR_LOG("[ALLOC]image_num can not be set as 0\n"); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } for (int i = 0; i < image_num - 1; i++) { for (int idx = 0; idx < images[i].image_private->plane_num; idx++) { if (images[i].image_private->memory_layout[idx].size != images[i + 1].image_private->memory_layout[idx].size) { BMCV_ERR_LOG("all image must have same size\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } } @@ -1298,7 +1319,7 @@ bm_status_t bm_image_dettach_contiguous_mem(int image_num, bm_image *images) { for (int i = 0; i < image_num; i++) { if (images[i].image_private->data_owned) { BMCV_ERR_LOG("image mem can not be free\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } bm_image_detach(images[i]); } @@ -1311,19 +1332,19 @@ bm_status_t bm_image_get_contiguous_device_mem(int image_num, bm_device_mem_t *mem) { if (0 == image_num) { BMCV_ERR_LOG("image_num can not be set as 0\n"); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } for (int i = 0; i < image_num - 1; i++) { for (int idx = 0; idx < images[i].image_private->plane_num; idx++) { if (images[i].image_private->memory_layout[idx].size != images[i + 1].image_private->memory_layout[idx].size) { BMCV_ERR_LOG("all image must have same size\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } if (!(images[i].image_private->attached)) { BMCV_ERR_LOG("image has not been attached memory\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } bm_device_mem_t dmem; @@ -1342,7 +1363,7 @@ bm_status_t bm_image_get_contiguous_device_mem(int image_num, if ((base_addr + i * single_image_sz) != tmp_addr) { BMCV_ERR_LOG("images should have continuous mem\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } bm_device_mem_t out_dmem; @@ -1356,10 +1377,10 @@ bm_status_t bm_image_get_contiguous_device_mem(int image_num, bm_status_t bm_image_to_bmcv_image(bm_image *src, bmcv_image *dst) { if (!src->image_private) - return BM_ERR_FAILURE; + return BM_ERR_DATA; if (src->image_private->attached == false) { printf("please attach device memory first!\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } dst->color_space = color_space_convert(src->image_format); @@ -1439,14 +1460,14 @@ static bm_status_t map_addr(bm_handle_t handle, int process_id) { unsigned int heap_num; if (BM_SUCCESS != bm_get_gmem_total_heap_num(handle, &heap_num)) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "get_heap num failed!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } set_map_heap_size(dev_id, heap_num); for (unsigned int i = 0; i < heap_num; i++) { bm_heap_stat_byte_t heap_info; if (BM_SUCCESS != bm_get_gmem_heap_stat_byte_by_id(handle, &heap_info, i)) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "get_heap info failed!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } u64 vaddr = (u64)bmcpu_map_phys_addr( handle, @@ -1478,6 +1499,7 @@ u64 get_mapped_addr(bm_handle_t handle, bm_device_mem_t* mem) { bm_status_t bmcv_open_cpu_process(bm_handle_t handle) { #if !defined(USING_CMODEL) && !defined(SOC_MODE) + bm_status_t ret = BM_SUCCESS; int dev_id = bm_get_devid(handle); if (get_cpu_process_id(handle) != -1) return BM_SUCCESS; int timeout = -1; @@ -1485,11 +1507,11 @@ bm_status_t bmcv_open_cpu_process(bm_handle_t handle) { char* kernel_path = getenv("BMCV_CPU_KERNEL_PATH"); if (lib_path == NULL) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "Please set environment variable: BMCV_CPU_LIB_PATH!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } if (kernel_path == NULL) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "Please set environment variable: BMCV_CPU_KERNEL_PATH!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } if (BM_SUCCESS != bmcpu_start_cpu( handle, @@ -1517,9 +1539,10 @@ bm_status_t bmcv_open_cpu_process(bm_handle_t handle) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "load library failed!\r\n"); return BM_ERR_FAILURE; } - if (BM_SUCCESS != map_addr(handle, cpu_id)) { + ret = map_addr(handle, cpu_id); + if (BM_SUCCESS != ret) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "map addr failed!\r\n"); - return BM_ERR_FAILURE; + return ret; } increase_ref_cnt(dev_id); #else @@ -1547,13 +1570,13 @@ bm_status_t bmcv_close_cpu_process(bm_handle_t handle) { timeout); if (BM_SUCCESS != ret) { bmlib_log("BMCV", BMLIB_LOG_ERROR, "unmap cpu failed!\r\n"); - return BM_ERR_FAILURE; + return ret; } } ret = bmcpu_close_process(handle, cpu_id, timeout); if (BM_SUCCESS != ret) { bmlib_log("BMCV", BMLIB_LOG_WARNING, "close process failed!\r\n"); - return BM_ERR_FAILURE; + return ret; } set_cpu_process_id(dev_id, -1); #else diff --git a/bmvid/bmcv/src/bmcv_internal.cpp b/bmvid/bmcv/src/bmcv_internal.cpp index 681c839..6b5055e 100755 --- a/bmvid/bmcv/src/bmcv_internal.cpp +++ b/bmvid/bmcv/src/bmcv_internal.cpp @@ -1,16 +1,15 @@ #include - +#include #ifdef __linux__ #include #include -#endif -#ifdef _WIN32 +#else #include #include #include #include #endif - +#include #include "bmcv_internal.h" #include "bmcv_common_bm1684.h" #ifndef USING_CMODEL @@ -31,12 +30,20 @@ EXTERN_C IMAGE_DOS_HEADER __ImageBase; #endif #include + +#ifdef _MSC_VER +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT __attribute__((visibility("default"))) +#endif +#define COMMIT_HASH "d6cb8c9ee2d6e939b0e5e700507059274ecb6403" +#define BRANCH_NAME "(HEAD detached at origin/sophon-23.09-lts)" #define FIRMWARE_NAME "libbm1684x_kernel_module.so" bm_status_t sg_malloc_device_mem(bm_handle_t handle, sg_device_mem_st *pmem, unsigned int size) { if (BM_SUCCESS != bm_malloc_device_byte(handle, &(pmem->bm_device_mem), size)) { pmem->flag = 0; - return BM_ERR_DEVNOTREADY; + return BM_ERR_NOMEM; } pmem->flag = 1; return BM_SUCCESS; @@ -44,7 +51,7 @@ bm_status_t sg_malloc_device_mem(bm_handle_t handle, sg_device_mem_st *pmem, uns bm_status_t sg_image_alloc_dev_mem(bm_image image, int heap_id) { if (BM_SUCCESS != bm_image_alloc_dev_mem(image, heap_id)) { - return BM_ERR_DEVNOTREADY; + return BM_ERR_NOMEM; } return BM_SUCCESS; } @@ -214,6 +221,242 @@ static std::map, vpp_limitat }; +extern "C" { + //__attribute__((visibility("default"))) + DLLEXPORT const char* libbmcv_version() { + static const char* version_string = "libbmcv_version:1.0.0, branch:" BRANCH_NAME ", commit:" COMMIT_HASH ", compiled on " __DATE__ " at " __TIME__", "; + return version_string; + } +} + +bm_status_t bm_image_check(bm_image image) +{ + if (image.image_private == NULL) { + return BM_ERR_FAILURE; + } + if (image.image_private->handle == NULL) { + return BM_ERR_FAILURE; + } + return BM_SUCCESS; +} + +bm_status_t bm_image_zeros(bm_image image) +{ + //tpu memset + unsigned long long device_addr = 0; + if(bm_image_check(image) != BM_SUCCESS) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "please check image.image_private or image.image_private->handle %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + for (int i = 0; i < image.image_private->plane_num; i++) { + device_addr = bm_mem_get_device_addr(image.image_private->data[i]); + if((device_addr > 0x4ffffffff) || (device_addr < 0x100000000)) + { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "device memory should between 0x100000000 and 0x4ffffffff %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + bm_memset_device(image.image_private->handle, 0, image.image_private->data[i]); + } + return BM_SUCCESS; +} + +int bm_image_zeros_soc(bm_image image) +{ + //memory connect + bm_device_mem_t dmem; + unsigned long long virt_addr = 0; + unsigned long long total_size = 0; + dmem = image.image_private->data[0]; + for (int i = 0; i < image.image_private->plane_num; i++) { + total_size += image.image_private->memory_layout[i].size; + } + bm_mem_flush_device_mem(image.image_private->handle, &dmem); + bm_mem_mmap_device_mem(image.image_private->handle, &dmem, &virt_addr); + memset((void*)(uintptr_t)virt_addr, 0, total_size); + bm_mem_unmap_device_mem(image.image_private->handle, (void *)&virt_addr, total_size); + bm_mem_flush_device_mem(image.image_private->handle, &dmem); + return 0; +} + +int bm_image_zero_cdma(bm_image image) +{ + int image_byte_size[4] = {0}; + unsigned char * image_malloc_ptr = nullptr; + for (int i = 0; i < image.image_private->plane_num; i++) { + image_byte_size[i] = image.image_private->memory_layout[i].size; + image_malloc_ptr = (unsigned char *)malloc(image_byte_size[i]); + memset(image_malloc_ptr, 0, image_byte_size[i]); + bm_memcpy_s2d(image.image_private->handle, image.image_private->data[i], image_malloc_ptr); + } + free(image_malloc_ptr); + return 0; +} + +bm_status_t bm_handle_check_1(bm_handle_t handle, + bm_image image1) +{ + int dev_id = bm_get_devid(handle); + if (dev_id < 0) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, bm_get_devid failed for handle %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + if(bm_image_check(image1) != BM_SUCCESS) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, image1 is not properly initialized %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + + int image1_id = bm_get_devid(image1.image_private->handle); + if (image1_id < 0) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, bm_get_devid failed for image1 %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + if (dev_id != image1_id){ + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, please check if the handle used for handle and bm_image are the same %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + return BM_SUCCESS; +} +bm_status_t bm_handle_check_2(bm_handle_t handle, + bm_image image1, + bm_image image2) +{ + int dev_id = bm_get_devid(handle); + if (dev_id < 0) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, bm_get_devid failed for handle %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + + if(bm_image_check(image1) != BM_SUCCESS) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, image1 is not properly initialized %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + int image1_id = bm_get_devid(image1.image_private->handle); + if (image1_id < 0) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, bm_get_devid failed for image1 %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + if (dev_id != image1_id){ + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, please check if the handle used for handle and bm_image are the same %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + + if(bm_image_check(image2) != BM_SUCCESS) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, image2 is not properly initialized %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + int image2_id = bm_get_devid(image2.image_private->handle); + if (image2_id < 0) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, bm_get_devid failed for image2 %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + if (dev_id != image2_id){ + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, please check if the handle used for handle and bm_image are the same %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + return BM_SUCCESS; +} + +bm_status_t bm_handle_check_3(bm_handle_t handle, + bm_image image1, + bm_image image2, + bm_image image3) +{ + int dev_id = bm_get_devid(handle); + if (dev_id < 0) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, bm_get_devid failed for handle %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + + if(bm_image_check(image1) != BM_SUCCESS) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, image1 is not properly initialized %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + int image1_id = bm_get_devid(image1.image_private->handle); + if (image1_id < 0) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, bm_get_devid failed for image1 %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + if (dev_id != image1_id){ + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, please check if the handle used for handle and bm_image are the same %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + + if(bm_image_check(image2) != BM_SUCCESS) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, image2 is not properly initialized %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + int image2_id = bm_get_devid(image2.image_private->handle); + if (image2_id < 0) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, bm_get_devid failed for image2 %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + if (dev_id != image2_id){ + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, please check if the handle used for handle and bm_image are the same %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + + if(bm_image_check(image3) != BM_SUCCESS) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, image3 is not properly initialized %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + int image3_id = bm_get_devid(image3.image_private->handle); + if (image3_id < 0) { + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, bm_get_devid failed for image3 %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + if (dev_id != image3_id){ + bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, + "Error, please check if the handle used for handle and bm_image are the same %s: %s: %d\n", + filename(__FILE__), __func__, __LINE__); + return BM_ERR_FAILURE; + } + + return BM_SUCCESS; +} static bm_status_t bm_image_get_heap_id(bm_image image, int *heap_location) { bm_device_mem_t dev_mem[3]; @@ -243,7 +486,7 @@ bm_status_t bm_vpp_query_limitation(bm_image_format_ext input_format, (input_format, output_format); if(vpp_format_allowed_map.find(key) == vpp_format_allowed_map.end()) - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_DATA; else limit = vpp_format_allowed_map[key]; @@ -268,7 +511,7 @@ bm_status_t concat_images_to_tensor(bm_handle_t handle, if (bm_image_get_plane_num(images[0]) != 1) { bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "NOT supported image format\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } bm_device_mem_t first_dev_mem; bm_image_get_device_mem(images[0], &first_dev_mem); @@ -280,11 +523,11 @@ bm_status_t concat_images_to_tensor(bm_handle_t handle, bm_image_get_device_mem(images[i + 1], &dev_mem); u64 dev_addr = bm_mem_get_device_addr(dev_mem); if (dev_addr != last_dev_addr + last_image_size) { - return BM_ERR_FAILURE; + return BM_ERR_DATA; } if ((images[i].width != images[i + 1].width) || (images[i].height != images[i + 1].height)) { - return BM_ERR_FAILURE; + return BM_ERR_DATA; } last_dev_addr = dev_addr; } @@ -307,11 +550,12 @@ bm_status_t concat_images_to_tensor(bm_handle_t handle, bm_status_t bm_image_mem_layout_adjust(bm_handle_t handle, bm_image input_image, bm_image output_image) { + bm_status_t ret; // keep same heap location with before if (!bm_image_is_attached(input_image)) { BMCV_ERR_LOG("image should be attached memory firstly\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } // check and alloc output mem int input_heap_id[3], output_heap_id[3]; @@ -322,7 +566,7 @@ bm_status_t bm_image_mem_layout_adjust(bm_handle_t handle, if (input_heap_id[i] != input_heap_id[i - 1]) { BMCV_ERR_LOG("all planes should be in same heap\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } if (bm_image_is_attached(output_image)) { @@ -332,20 +576,20 @@ bm_status_t bm_image_mem_layout_adjust(bm_handle_t handle, if (output_heap_id[i] != output_heap_id[i - 1]) { BMCV_ERR_LOG("all planes should be in same heap\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } if (input_heap_id[0] != output_heap_id[0]) { BMCV_ERR_LOG("output and input should be allocated in same heap\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } else { // keep same heap location with input if (BM_SUCCESS != bm_image_alloc_dev_mem(output_image, input_heap_id[0])) { BMCV_ERR_LOG("bm_image_alloc_dev_mem error\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } // do copyTo @@ -356,8 +600,8 @@ bm_status_t bm_image_mem_layout_adjust(bm_handle_t handle, copy_to_attr.padding_g = 0; copy_to_attr.padding_r = 0; copy_to_attr.if_padding = 1; - if (BM_SUCCESS != - bmcv_image_copy_to(handle, copy_to_attr, input_image, output_image)) { + ret = bmcv_image_copy_to(handle, copy_to_attr, input_image, output_image); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("bm_image_alloc_dev_mem error\n"); bm_device_mem_t dev_mem[3]; bm_image_get_device_mem(output_image, dev_mem); @@ -365,7 +609,7 @@ bm_status_t bm_image_mem_layout_adjust(bm_handle_t handle, bm_free_device(handle, dev_mem[i]); } - return BM_ERR_FAILURE; + return ret; } return BM_SUCCESS; @@ -545,10 +789,11 @@ bm_status_t bm_shape_align(bm_image image, bm_image *out_image, int align_option, int align_num) { + bm_status_t ret; if (bm_image_is_attached(*out_image)) { BMCV_ERR_LOG("out_image should not be attached firstly\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } int plane_num = bm_image_get_plane_num(image); #ifdef __linux__ @@ -640,7 +885,7 @@ bm_status_t bm_shape_align(bm_image image, default: { BMCV_ERR_LOG("image format not support\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } int width_align = (align_option == DO_HEIGHT_ALIGN) ? (width[0]) : (ALIGN(width[0], align_num)); @@ -721,7 +966,7 @@ bm_status_t bm_shape_align(bm_image image, default: { BMCV_ERR_LOG("image format not support\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } #ifdef __linux__ @@ -747,17 +992,18 @@ bm_status_t bm_shape_align(bm_image image, if (BM_SUCCESS != bm_image_alloc_dev_mem(*out_image, input_heap_id[0])) { BMCV_ERR_LOG("bm_image_alloc_dev_mem error\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } bm_image_get_device_mem(*out_image, out_dev_mem); for (int i = 0; i < plane_num; i++) { // create input image of gray - if (BM_SUCCESS != bm_image_create(handle, - height[i], - width[i], - FORMAT_GRAY, - DATA_TYPE_EXT_1N_BYTE, - &tmp_in_image[i])) { + ret = bm_image_create(handle, + height[i], + width[i], + FORMAT_GRAY, + DATA_TYPE_EXT_1N_BYTE, + &tmp_in_image[i]); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("bm_image_create error\n"); for (int free_idx = 0; free_idx < i; free_idx++) { bm_image_destroy(tmp_in_image[free_idx]); @@ -765,16 +1011,17 @@ bm_status_t bm_shape_align(bm_image image, } bm_image_destroy(*out_image); - return BM_ERR_FAILURE; + return ret; } bm_image_attach(tmp_in_image[i], &dev_mem[i]); // create output image of gray - if (BM_SUCCESS != bm_image_create(handle, - aligned_height[i], - aligned_width[i], - FORMAT_GRAY, - DATA_TYPE_EXT_1N_BYTE, - &tmp_out_image[i])) { + ret = bm_image_create(handle, + aligned_height[i], + aligned_width[i], + FORMAT_GRAY, + DATA_TYPE_EXT_1N_BYTE, + &tmp_out_image[i]); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("bm_image_create error\n"); for (int free_idx = 0; free_idx <= i; free_idx++) { bm_image_destroy(tmp_in_image[free_idx]); @@ -784,7 +1031,7 @@ bm_status_t bm_shape_align(bm_image image, } bm_image_destroy(*out_image); - return BM_ERR_FAILURE; + return ret; } bm_image_attach(tmp_out_image[i], &out_dev_mem[i]); if (1 != bm_image_get_plane_num(tmp_out_image[i])) { @@ -795,11 +1042,12 @@ bm_status_t bm_shape_align(bm_image image, } bm_image_destroy(*out_image); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } // change layout - if (BM_SUCCESS != bm_image_mem_layout_adjust( - handle, tmp_in_image[i], tmp_out_image[i])) { + ret = bm_image_mem_layout_adjust( + handle, tmp_in_image[i], tmp_out_image[i]); + if (BM_SUCCESS != ret) { BMCV_ERR_LOG("image format not support\n"); for (int free_idx = 0; free_idx <= i; free_idx++) { bm_image_destroy(tmp_in_image[free_idx]); @@ -807,7 +1055,7 @@ bm_status_t bm_shape_align(bm_image image, } bm_image_destroy(*out_image); - return BM_ERR_FAILURE; + return ret; } } for (int free_idx = 0; free_idx < plane_num; free_idx++) { @@ -851,13 +1099,13 @@ bm_status_t bm_shape_dealign(bm_image in_image, out_plane_size[i])) { BMCV_ERR_LOG("bm_memcpy_d2d_byte error\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } } else { - BMCV_ERR_LOG("align_option not support\n"); + BMCV_ERR_LOG("align_option not support\n"); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } return BM_SUCCESS; @@ -887,18 +1135,18 @@ bm_status_t bm_separate_to_planar(bm_handle_t handle, if (input_image.image_format != FORMAT_BGRP_SEPARATE && input_image.image_format != FORMAT_RGBP_SEPARATE) { BMCV_ERR_LOG("bm_separate_to_planar input should be FORMAT_BGRP_SEPARATE or FORMAT_RGBP_SEPARATE\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } if (output_image.image_format != FORMAT_BGR_PLANAR && output_image.image_format != FORMAT_RGB_PLANAR) { BMCV_ERR_LOG("bm_separate_to_planar output should be FORMAT_BGR_PLANAR or FORMAT_RGB_PLANAR\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } if ((input_image.data_type != output_image.data_type) || (input_image.width != output_image.width) || (input_image.height != output_image.height)) { BMCV_ERR_LOG("bm_separate_to_planar input and output should be same data_type, width and height\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } int stride_i[3]; int stride_o[3]; @@ -907,14 +1155,14 @@ bm_status_t bm_separate_to_planar(bm_handle_t handle, for (int k = 0; k < bm_image_get_plane_num(output_image); k++) { if(stride_i[k] != stride_o[k]) { BMCV_ERR_LOG("bm_separate_to_planar input and output should be same stride!\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } int height = input_image.height; if(!bm_image_is_attached(output_image)) { if(BM_SUCCESS != bm_image_alloc_dev_mem(output_image, BMCV_HEAP_ANY)) { BMCV_ERR_LOG("bm_image_alloc_dev_mem error\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } bm_device_mem_t mem_i[3], mem_o; @@ -932,18 +1180,18 @@ bm_status_t bm_planar_to_separate(bm_handle_t handle, if (input_image.image_format != FORMAT_BGR_PLANAR && input_image.image_format != FORMAT_RGB_PLANAR) { BMCV_ERR_LOG("bm_separate_to_planar input should be FORMAT_BGR_PLANAR or FORMAT_RGB_PLANAR\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } if (output_image.image_format != FORMAT_BGRP_SEPARATE && output_image.image_format != FORMAT_RGBP_SEPARATE) { BMCV_ERR_LOG("bm_separate_to_planar output should be FORMAT_BGRP_SEPARATE or FORMAT_RGBP_SEPARATE\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } if ((input_image.data_type != output_image.data_type) || (input_image.width != output_image.width) || (input_image.height != output_image.height)) { BMCV_ERR_LOG("bm_separate_to_planar input and output should be same data_type, width and height\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } int stride_i[3]; int stride_o[3]; @@ -952,14 +1200,14 @@ bm_status_t bm_planar_to_separate(bm_handle_t handle, for (int k = 0; k < bm_image_get_plane_num(input_image); k++) { if(stride_i[k] != stride_o[k]) { BMCV_ERR_LOG("bm_separate_to_planar input and output should be same stride!\n"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } } int height = input_image.height; if(!bm_image_is_attached(output_image)) { if(BM_SUCCESS != bm_image_alloc_dev_mem(output_image, BMCV_HEAP_ANY)) { BMCV_ERR_LOG("bm_image_alloc_dev_mem error\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } bm_device_mem_t mem_i, mem_o[3]; @@ -1057,14 +1305,14 @@ bm_status_t layout::update_memory_layout(bm_handle_t handle, switch(chipid){ case 0x1684: if(bm_send_api(handle, BM_API_ID_CV_CORRECT_LAYOUT, (u8 *)&api, sizeof(api)) != BM_SUCCESS || bm_sync_api(handle) != BM_SUCCESS) - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; break; case BM1684X: bm_tpu_kernel_launch(handle, "cv_width_align", (u8 *)&api, sizeof(api)); break; default: - printf("BM_NOT_SUPPORT !\n"); - return BM_NOT_SUPPORTED; + printf("BM_NOT_SUPPORT!\n"); + return BM_ERR_NOFEATURE; break; } return BM_SUCCESS; @@ -1102,37 +1350,37 @@ bm_status_t bmcv_warp_affine_bilinear_bm1684(bm_handle_t handle, { if (src.data_type != dst[0].data_type) { bmlib_log("AFFINE BILINEAR", BMLIB_LOG_ERROR, "src and dst data type is not same!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (src.image_format != dst[0].image_format) { bmlib_log("AFFINE BILINEAR", BMLIB_LOG_ERROR, "src and dst data format is not same!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (src.data_type != DATA_TYPE_EXT_1N_BYTE) { bmlib_log("AFFINE BILINEAR", BMLIB_LOG_ERROR, "data type only support DATA_TYPE_EXT_1N_BYTE!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (src.image_format != FORMAT_RGB_PLANAR && src.image_format != FORMAT_BGR_PLANAR) { bmlib_log("AFFINE BILINEAR", BMLIB_LOG_ERROR, "format only support RGB_PLANAR and BGR_PLANAR!\r\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if (!bm_image_is_attached(src)) { bmlib_log("AFFINE BILINEAR", BMLIB_LOG_ERROR, "src image not attached data !\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } for (int i = 0; i < output_num; i++) { if (dst[0].data_type != dst[i].data_type) { bmlib_log("AFFINE BILINEAR", BMLIB_LOG_ERROR, "dst images' data type not same !\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if (dst[0].image_format != dst[i].image_format) { bmlib_log("AFFINE BILINEAR", BMLIB_LOG_ERROR, "dst images' format not same !\r\n"); - return BM_ERR_PARAM; + return BM_ERR_DATA; } if (!bm_image_is_attached(dst[i])) { if (bm_image_alloc_dev_mem(dst[i]) != BM_SUCCESS) { bmlib_log("AFFINE BILINEAR", BMLIB_LOG_ERROR, "alloc dst dev_mem failed !\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_NOMEM; } } } @@ -1171,11 +1419,11 @@ bm_status_t bmcv_warp_affine_bilinear_bm1684(bm_handle_t handle, if (BM_SUCCESS != bm_send_api(handle, BM_API_ID_CV_WARP_BILINEAR, (u8 *)&api, sizeof(api))) { BMCV_ERR_LOG("warp_bilinear send api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } if (BM_SUCCESS != bm_sync_api(handle)) { BMCV_ERR_LOG("warp_bilinear sync api error\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_TIMEOUT; } } } @@ -1195,7 +1443,7 @@ typedef float bm_gen_proposal_data_type_t; "addr must be in sys memory:%s:%d\n", \ __FILE__, \ __LINE__); \ - return BM_ERR_PARAM; \ + return BM_ERR_DATA; \ } \ } while (0) @@ -1220,9 +1468,10 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, float filter_threshold, bm_device_mem_t filter_output, bm_device_mem_t filter_shape_output) { + bm_status_t ret; if (handle == NULL) { bmlib_log("GEN_PROP_NMS", BMLIB_LOG_ERROR, "Can not get handle!\r\n"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } bm_api_cv_gen_proposal_and_nms_t arg; bm_device_mem_t scores_buf_device_0, scores_buf_device_1, @@ -1274,7 +1523,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, bmcv_gen_proposal_attr_0->feat_h * data_size)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err0; } if (BM_SUCCESS != @@ -1282,7 +1531,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, scores_buf_device_0, bm_mem_get_system_addr(scores_addr_0))) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); - + ret = BM_ERR_NOMEM; goto err1; } } else { @@ -1293,7 +1542,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, &bbox_deltas_buf_device_0, bbox_deltas_size_0)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err1; } if (BM_SUCCESS != @@ -1301,7 +1550,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, bbox_deltas_buf_device_0, bm_mem_get_system_addr(bbox_deltas_addr_0))) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); - + ret = BM_ERR_NOMEM; goto err2; } } else { @@ -1314,7 +1563,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, &anchor_scales_buf_device_0, data_size * bmcv_gen_proposal_attr_0->anchor_scale_size)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err2; } if (BM_SUCCESS != @@ -1322,7 +1571,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, anchor_scales_buf_device_0, bm_mem_get_system_addr(anchor_scales_addr_0))) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); - + ret = BM_ERR_NOMEM; goto err3; } } else { @@ -1338,7 +1587,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, bmcv_gen_proposal_attr_1->feat_h * data_size)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err3; } if (BM_SUCCESS != @@ -1346,7 +1595,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, scores_buf_device_1, bm_mem_get_system_addr(scores_addr_1))) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); - + ret = BM_ERR_NOMEM; goto err4; } } else { @@ -1357,7 +1606,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, &bbox_deltas_buf_device_1, bbox_deltas_size_1)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err4; } if (BM_SUCCESS != @@ -1365,7 +1614,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, bbox_deltas_buf_device_1, bm_mem_get_system_addr(bbox_deltas_addr_1))) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); - + ret = BM_ERR_NOMEM; goto err5; } } else { @@ -1378,7 +1627,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, &anchor_scales_buf_device_1, data_size * bmcv_gen_proposal_attr_1->anchor_scale_size)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err5; } if (BM_SUCCESS != @@ -1386,7 +1635,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, anchor_scales_buf_device_1, bm_mem_get_system_addr(anchor_scales_addr_1))) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); - + ret = BM_ERR_NOMEM; goto err6; } } else { @@ -1402,7 +1651,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, bmcv_gen_proposal_attr_2->feat_h * data_size)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err6; } if (BM_SUCCESS != @@ -1410,7 +1659,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, scores_buf_device_2, bm_mem_get_system_addr(scores_addr_2))) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); - + ret = BM_ERR_NOMEM; goto err7; } } else { @@ -1421,7 +1670,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, &bbox_deltas_buf_device_2, bbox_deltas_size_2)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err7; } if (BM_SUCCESS != @@ -1429,7 +1678,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, bbox_deltas_buf_device_2, bm_mem_get_system_addr(bbox_deltas_addr_2))) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); - + ret = BM_ERR_NOMEM; goto err8; } } else { @@ -1442,7 +1691,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, &anchor_scales_buf_device_2, data_size * bmcv_gen_proposal_attr_2->anchor_scale_size)) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err8; } if (BM_SUCCESS != @@ -1450,7 +1699,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, anchor_scales_buf_device_2, bm_mem_get_system_addr(anchor_scales_addr_2))) { BMCV_ERR_LOG("bm_memcpy_s2d error\r\n"); - + ret = BM_ERR_NOMEM; goto err9; } } else { @@ -1461,7 +1710,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, &prop_output_buf_device, sizeof(m_proposal_t))) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err9; } // if( BM_SUCCESS !=bm_malloc_device_byte( @@ -1471,7 +1720,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, &filter_output_buf_device, sizeof(nms_proposal_t))) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err10; } } else { @@ -1483,7 +1732,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, &filter_output_shape_buf_device, score_shape_0->n * sizeof(int))) { BMCV_ERR_LOG("bm_malloc_device_byte error\r\n"); - + ret = BM_ERR_NOMEM; goto err11; } } else { @@ -1572,7 +1821,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, bm_mem_get_system_addr(filter_shape_output), filter_output_shape_buf_device)) { BMCV_ERR_LOG("bm_memcpy_d2s error\r\n"); - + ret = BM_ERR_NOMEM; goto err12; } bm_free_device(handle, filter_output_shape_buf_device); @@ -1582,7 +1831,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, bm_mem_get_system_addr(filter_output), filter_output_buf_device)) { BMCV_ERR_LOG("bm_memcpy_d2s error\r\n"); - + ret = BM_ERR_NOMEM; goto err11; } bm_free_device(handle, filter_output_buf_device); @@ -1666,7 +1915,7 @@ bm_status_t bmcv_gen_prop_nms_bm1684(bm_handle_t handle, bm_free_device(handle, scores_buf_device_0); } err0: - return BM_ERR_FAILURE; + return ret; } void format_to_str(bm_image_format_ext format, char* res) @@ -1786,12 +2035,12 @@ bm_status_t bmcv_warp_affine_bilinear(bm_handle_t handle, break; case BM1684X: - printf("bm1684x not support\n"); - ret = BM_NOT_SUPPORTED; + printf("current card not support\n"); + ret = BM_ERR_NOFEATURE; break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } @@ -1854,12 +2103,12 @@ bm_status_t bmcv_gen_prop_nms(bm_handle_t handle, break; case BM1684X: - printf("bm1684x not support\n"); - ret = BM_NOT_SUPPORTED; + printf("current card not support\n"); + ret = BM_ERR_NOFEATURE; break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } @@ -1952,7 +2201,7 @@ void data_type_conversion(bm_image_data_format_ext bmcv_data_type, int *tpu_data *tpu_data_type = DT_BFP16; break; default: - bmlib_log("BMCV", BMLIB_LOG_ERROR, "1684x bmcv_data_type not support %s: %s: %d\n", + bmlib_log("BMCV", BMLIB_LOG_ERROR, "current card bmcv_data_type not support %s: %s: %d\n", __FILE__, __func__, __LINE__); break; } diff --git a/bmvid/bmcv/src/bmcv_matrix_log.cpp b/bmvid/bmcv/src/bmcv_matrix_log.cpp index 74cc91b..796b334 100644 --- a/bmvid/bmcv/src/bmcv_matrix_log.cpp +++ b/bmvid/bmcv/src/bmcv_matrix_log.cpp @@ -1,5 +1,6 @@ #include "bmcv_internal.h" #include "bmcv_bm1684x.h" +#include bm_status_t check_matrix_log_param(bm_image src, bm_image dst) { @@ -39,7 +40,7 @@ bm_status_t bmcv_matrix_log(bm_handle_t handle, bm_image src, bm_image dst) { bm_status_t ret = BM_SUCCESS; bm_device_mem_t s_mem[4],d_mem[4]; - + bm_handle_check_2(handle, src, dst); ret = check_matrix_log_param(src,dst); if (BM_SUCCESS != ret) diff --git a/bmvid/bmcv/src/bmcv_vpp_internal.cpp b/bmvid/bmcv/src/bmcv_vpp_internal.cpp index 105b7f8..09f7bee 100644 --- a/bmvid/bmcv/src/bmcv_vpp_internal.cpp +++ b/bmvid/bmcv/src/bmcv_vpp_internal.cpp @@ -19,6 +19,8 @@ bm_status_t bmcv_image_vpp_basic( unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; + bm_handle_check_2(handle, *input, *output); + ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -39,7 +41,7 @@ bm_status_t bmcv_image_vpp_basic( break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } @@ -56,7 +58,7 @@ bm_status_t bmcv_image_vpp_convert( { unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_2(handle, input, *output); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -77,7 +79,7 @@ bm_status_t bmcv_image_vpp_convert( break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } @@ -94,7 +96,7 @@ bm_status_t bmcv_image_vpp_convert_padding( { unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_2(handle, input, *output); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -114,7 +116,7 @@ bm_status_t bmcv_image_vpp_convert_padding( break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } @@ -132,7 +134,7 @@ bm_status_t bmcv_image_vpp_stitch( { unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_2(handle, input[0], output); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -152,7 +154,7 @@ bm_status_t bmcv_image_vpp_stitch( break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } @@ -171,7 +173,7 @@ bm_status_t bmcv_image_vpp_csc_matrix_convert( { unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; - + bm_handle_check_2(handle, input, *output); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -191,7 +193,7 @@ bm_status_t bmcv_image_vpp_csc_matrix_convert( break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } @@ -206,20 +208,20 @@ bm_status_t bmcv_image_mosaic_check(bm_handle_t handle, if(is_expand != 0 && is_expand != 1){ bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "is_expand out of range, is_expand=%d, %s: %s: %d\n", is_expand, filename(__FILE__), __func__, __LINE__); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } if(mosaic_num > 512 || mosaic_num < 1){ bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "mosaic_num out of range, mosaic_num=%d, %s: %s: %d\n", mosaic_num, filename(__FILE__), __func__, __LINE__); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } if(handle == NULL){ bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "handle is nullptr"); - return BM_ERR_FAILURE; + return BM_ERR_DEVNOTREADY; } if(input.image_private == NULL){ bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "input is nullptr"); - return BM_ERR_FAILURE; + return BM_ERR_DATA; } for(int i=0; i input.width || crop_rect[i].crop_h + crop_rect[i].start_y > input.height){ bmlib_log(BMCV_LOG_TAG, BMLIB_LOG_ERROR, "mosaic_rect out of range, i=%d, stx=%d, sty=%d, crop_w=%d, crop_h=%d, image_w=%d, image_h=%d, %s: %s: %d\n", i, crop_rect[i].start_x, crop_rect[i].start_y, crop_rect[i].crop_w, crop_rect[i].crop_h, input.width, input.height, filename(__FILE__), __func__, __LINE__); - return BM_ERR_FAILURE; + return BM_ERR_PARAM; } } return BM_SUCCESS; @@ -262,6 +264,7 @@ bm_status_t bmcv_image_mosaic(bm_handle_t handle, int is_expand){ unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; + bm_handle_check_1(handle, input); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -277,7 +280,7 @@ bm_status_t bmcv_image_mosaic(bm_handle_t handle, break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } fail: @@ -296,6 +299,7 @@ bm_status_t bmcv_image_watermark_superpose(bm_handle_t handle, { unsigned int chipid = BM1684X; bm_status_t ret = BM_SUCCESS; + bm_handle_check_1(handle, *image); ret = bm_get_chipid(handle, &chipid); if (BM_SUCCESS != ret) return ret; @@ -314,7 +318,7 @@ bm_status_t bmcv_image_watermark_superpose(bm_handle_t handle, break; default: - ret = BM_NOT_SUPPORTED; + ret = BM_ERR_NOFEATURE; break; } return ret; @@ -331,6 +335,7 @@ bm_status_t bmcv_image_watermark_repeat_superpose(bm_handle_t handle, bm_status_t ret = BM_SUCCESS; bm_image *image_inner = new bm_image [bitmap_num]; bm_device_mem_t *mem_inner = new bm_device_mem_t [bitmap_num]; + bm_handle_check_1(handle, image); for(int i=0; i= (1 << 16) || image.width >= (1 << 16)) { BMCV_ERR_LOG("Not support such big size image\n"); - return BM_NOT_SUPPORTED; + return BM_ERR_DATA; } if(length > 510){ BMCV_ERR_LOG("Not support such big length(%d) point\n", length); - return BM_NOT_SUPPORTED; + return BM_ERR_PARAM; } + bm_handle_check_1(handle, image); bm_status_t ret = BM_SUCCESS; unsigned int chipid = BM1684X; @@ -393,13 +399,13 @@ bm_status_t bmcv_image_draw_point(bm_handle_t handle, calculate_yuv(r, g, b, fill_val, fill_val + 1, fill_val + 2); ret = bm1684x_vpp_point(handle, image, point_num, coord, length, fill_val[0], fill_val[1], fill_val[2]); if(ret!=BM_SUCCESS){ - BMCV_ERR_LOG("error 1684x draw point\n"); + BMCV_ERR_LOG("error draw point\n"); } break; } default: { - return BM_NOT_SUPPORTED; + return BM_ERR_NOFEATURE; } } return ret; diff --git a/bmvid/bmcv/test/CMakeLists.txt b/bmvid/bmcv/test/CMakeLists.txt index aa3a06c..3760ff6 100755 --- a/bmvid/bmcv/test/CMakeLists.txt +++ b/bmvid/bmcv/test/CMakeLists.txt @@ -147,10 +147,9 @@ test_bmcv( test_resize.cpp test_cv_laplacian.cpp test_cv_axpy.cpp - test_bm1686_vpp_convert.cpp test_cv_bayer2rgb.cpp test_cv_as_strided.cpp - + test_cv_hist_balance.cpp ) elseif(WINDOWS) @@ -173,54 +172,34 @@ test_bmcv( test_cv_absdiff.cpp test_cv_add_weighted.cpp test_cv_bitwise.cpp - test_cv_base64.cpp - test_cv_batch_topk.cpp test_cv_bgrsplit.cpp test_cv_calc_hist.cpp - test_cv_canny.cpp test_cv_cmulp.cpp test_cv_copy_to.cpp test_cv_crop.cpp test_cv_dct.cpp test_cv_distance.cpp - test_cv_draw_lines.cpp - test_cv_draw_rectangle.cpp test_cv_feature_match.cpp - test_cv_fft_1d.cpp - test_cv_fft_2d.cpp test_cv_fill_rectangle.cpp - test_cv_gaussian_blur.cpp test_cv_gemm.cpp - test_cv_image_align.cpp - test_cv_image_transpose.cpp - test_cv_img_scale.cpp test_cv_jpeg.cpp test_cv_json.cpp - test_cv_lkpyramid.cpp test_cv_matmul.cpp test_cv_min_max.cpp test_cv_morph.cpp test_cv_nms.cpp test_cv_put_text.cpp test_cv_pyramid.cpp - test_cv_sobel.cpp test_cv_sort.cpp - test_cv_split.cpp - test_cv_storage_convert.cpp test_cv_threshold.cpp test_cv_transpose.cpp test_cv_vpp.cpp test_cv_vpp_border.cpp test_cv_vpp_loop.cpp - test_cv_vpp_random.cpp test_cv_vpp_stitch.cpp - test_cv_warp.cpp - test_cv_warp_bilinear.cpp test_cv_warp_perspective.cpp test_cv_warp_perspective_bilinear.cpp test_cv_width_align.cpp - test_cv_yuv2hsv.cpp - test_cv_yuv2rgb.cpp test_perf_bmcv.cpp test_perf_vpp.cpp test_resize.cpp @@ -228,6 +207,7 @@ test_bmcv( test_cv_axpy.cpp test_cv_bayer2rgb.cpp test_cv_as_strided.cpp + test_cv_hist_balance.cpp ) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_INSTALL_PREFIX}/bin/bm1684x) @@ -243,7 +223,7 @@ test_bmcv( bm1684x/test_bm1684x_vpp_resize.cpp bm1684x/test_bm1684x_vpp_stitch.cpp bm1684x/test_bm1684x_vpp_convert.cpp - bm1684x/test_bm1684x_vpp_padding.cpp + bm1684x/test_bm1684x_vpp_dst_position.cpp bm1684x/test_bm1684x_vpp_comparison.cpp bm1684x/test_bm1684x_vpp_border_comapre.cpp bm1684x/test_bm1684x_vpp_fill_rectangle.cpp diff --git a/bmvid/bmcv/test/Makefile b/bmvid/bmcv/test/Makefile index 9cc12de..50ac218 100755 --- a/bmvid/bmcv/test/Makefile +++ b/bmvid/bmcv/test/Makefile @@ -17,8 +17,10 @@ BMCV_DIR ?= $(OUT_DIR)/bmcv INCLUDE_DIR += -I$(TEST_BMCV_DIR) #TEST_BMCV_SRCS_CXX = $(wildcard $(TEST_BMCV_DIR)/*.cpp wildcard $(TEST_BMCV_DIR)/bm1684x/*.cpp) -TEST_BMCV_SRCS_CXX = $(TEST_BMCV_DIR)/test_convert_to.cpp \ - $(TEST_BMCV_DIR)/test_cv_absdiff.cpp \ +TEST_BMCV_SRCS_CXX = $(TEST_BMCV_DIR)/test_cv_rotate.cpp \ + $(TEST_BMCV_DIR)/test_convert_to.cpp \ + $(TEST_BMCV_DIR)/test_csc_convert_to.cpp \ + $(TEST_BMCV_DIR)/test_cv_absdiff.cpp \ $(TEST_BMCV_DIR)/test_cv_as_strided.cpp \ $(TEST_BMCV_DIR)/test_cv_bayer2rgb.cpp \ $(TEST_BMCV_DIR)/test_cv_add_weighted.cpp \ @@ -28,9 +30,11 @@ TEST_BMCV_SRCS_CXX = $(TEST_BMCV_DIR)/test_convert_to.cpp \ $(TEST_BMCV_DIR)/test_cv_batch_topk.cpp \ $(TEST_BMCV_DIR)/test_cv_bgrsplit.cpp \ $(TEST_BMCV_DIR)/test_cv_calc_hist.cpp \ + $(TEST_BMCV_DIR)/test_cv_hist_balance.cpp \ $(TEST_BMCV_DIR)/test_cv_canny.cpp \ $(TEST_BMCV_DIR)/test_cv_cmulp.cpp \ $(TEST_BMCV_DIR)/test_cv_copy_to.cpp \ + $(TEST_BMCV_DIR)/test_cv_copy_to_param.cpp \ $(TEST_BMCV_DIR)/test_cv_crop.cpp \ $(TEST_BMCV_DIR)/test_cv_dct.cpp \ $(TEST_BMCV_DIR)/test_cv_distance.cpp \ @@ -58,6 +62,7 @@ TEST_BMCV_SRCS_CXX = $(TEST_BMCV_DIR)/test_convert_to.cpp \ $(TEST_BMCV_DIR)/test_cv_nms.cpp \ $(TEST_BMCV_DIR)/test_cv_put_text.cpp \ $(TEST_BMCV_DIR)/test_cv_pyramid.cpp \ + $(TEST_BMCV_DIR)/test_cv_quantify.cpp \ $(TEST_BMCV_DIR)/test_cv_sobel.cpp \ $(TEST_BMCV_DIR)/test_cv_sort.cpp \ $(TEST_BMCV_DIR)/test_cv_split.cpp \ @@ -87,6 +92,7 @@ TEST_BMCV_SRCS_CXX = $(TEST_BMCV_DIR)/test_convert_to.cpp \ $(TEST_BMCV_DIR)/test_yolov3_detect_out.cpp \ $(TEST_BMCV_DIR)/test_matrix_log.cpp \ $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_fbd.cpp \ + $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_fbd_param.cpp \ $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_slt.cpp \ $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp.cpp \ $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_ax+b.cpp \ @@ -95,9 +101,10 @@ TEST_BMCV_SRCS_CXX = $(TEST_BMCV_DIR)/test_convert_to.cpp \ $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_random.cpp \ $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_resize.cpp \ $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_stitch.cpp \ + $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_stitch_2way.cpp \ $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_mosaic.cpp \ $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_convert.cpp \ - $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_padding.cpp \ + $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_dst_position.cpp \ $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_comparison.cpp \ $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_border_comapre.cpp \ $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_fill_rectangle.cpp \ @@ -107,6 +114,7 @@ TEST_BMCV_SRCS_CXX = $(TEST_BMCV_DIR)/test_convert_to.cpp \ $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_convert_to_comparison.cpp \ $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_mul_crop_resize_haikang.cpp \ $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_point.cpp\ + $(TEST_BMCV_DIR)/bm1684x/test_bm1684x_vpp_multi_thread.cpp\ TEST_BMCV_CXX_EXE1 = $(patsubst %.cpp,%,$(TEST_BMCV_SRCS_CXX)) TEST_BMCV_CXX_EXE2 = $(patsubst $(BMVID_ROOT)/%,%,$(TEST_BMCV_CXX_EXE1)) diff --git a/bmvid/bmcv/test/bm1684x/test_bm1684x_fbd_param.cpp b/bmvid/bmcv/test/bm1684x/test_bm1684x_fbd_param.cpp new file mode 100644 index 0000000..f9f2cf2 --- /dev/null +++ b/bmvid/bmcv/test/bm1684x/test_bm1684x_fbd_param.cpp @@ -0,0 +1,175 @@ +#include +#include +#include "bmcv_api_ext.h" +#include "stdio.h" +#include "stdlib.h" +#include +#include +#include +#include +#ifdef __linux__ +#include +#endif + +#define UNUSED_VARIABLE(x) ((x) = (x)) + +// extern void bm1684x_vpp_read_bin(bm_image src, const char *input_name); +extern void bm1684x_vpp_write_bin(bm_image dst, const char *output_name); +extern void format_to_str(bm_image_format_ext format, char* res); +static void algorithm_to_str(bmcv_resize_algorithm algorithm, char* res) +{ + switch(algorithm) + { + case BMCV_INTER_NEAREST: + strcpy(res, "BMCV_INTER_NEAREST"); + break; + case BMCV_INTER_LINEAR: + strcpy(res, "BMCV_INTER_LINEAR"); + break; + case BMCV_INTER_BICUBIC: + strcpy(res, "BMCV_INTER_BICUBIC"); + break; + default: + printf("%s:%d[%s] Not found such algorithm.\n",__FILE__, __LINE__, __FUNCTION__); + break; + } +} + +void read_image(unsigned char **input_ptr, int *src_len, const char * src_name) +{ + // char input_name[200] = {0}; + // int len = strlen(opencvFile_path); + // if(opencvFile_path[len-1] != '/') + // opencvFile_path[len] = '/'; + // snprintf(input_name, 200,"%s%s", opencvFile_path,src_name); + + FILE *fp_src = fopen(src_name, "rb+"); + fseek(fp_src, 0, SEEK_END); + *src_len = ftell(fp_src); + *input_ptr = (unsigned char *)malloc(*src_len); + fseek(fp_src, 0, SEEK_SET); + size_t cnt = fread((void *)*input_ptr, 1, *src_len, fp_src); + fclose(fp_src); + UNUSED_VARIABLE(cnt); +} + +int main(int argc, char **argv) { + + bm_handle_t handle = NULL; + int src_h, src_w, dst_w, dst_h; + bm_image_format_ext src_fmt = FORMAT_COMPRESSED, dst_fmt; + char *table_y, *data_y, *table_c, *data_c, *dst_name; + bm_image src, dst; + bmcv_rect_t rect; + bmcv_resize_algorithm algorithm = BMCV_INTER_NEAREST; + unsigned int i = 0, loop_time = 0; + unsigned long long time_single, time_total = 0, time_avg = 0; + unsigned long long time_max = 0, time_min = 10000, fps = 0, pixel_per_sec = 0; + int dev_id = 0; + +#ifdef __linux__ + struct timeval tv_start; + struct timeval tv_end; + struct timeval timediff; +#endif + + if (argc != 11) { + printf("usage: %d\n", argc); + printf("%s src_w src_h table_y data_y table_c data_c dst_fmt dst_name loop_time dev_id\n", argv[0]); + printf("example:\n"); + printf("FORMAT_COMPRESSED-->FORMAT_YUV420P:\n"); + printf("%s 1920 1080 offset_base_y.bin offset_comp_y.bin offset_base_c.bin offset_comp_c.bin 0 fbd_1080p.bin 1 1\n", argv[0]); + return 0; + } + + src_w = atoi(argv[1]); + src_h = atoi(argv[2]); + table_y = argv[3]; + data_y = argv[4]; + table_c = argv[5]; + data_c = argv[6]; + dst_w = src_w; + dst_h = src_h; + dst_fmt = (bm_image_format_ext)atoi(argv[7]); + dst_name = argv[8]; + loop_time = atoi(argv[9]); + dev_id = atoi(argv[10]); + + rect.start_x = 0; + rect.start_y = 0; + rect.crop_w = dst_w; + rect.crop_h = dst_h; + + bm_status_t ret = bm_dev_request(&handle, dev_id); + if (ret != BM_SUCCESS) { + printf("Create bm handle failed. ret = %d\n", ret); + exit(-1); + } + + bm_image_create(handle, src_h, src_w, src_fmt, DATA_TYPE_EXT_1N_BYTE, &src); + bm_device_mem_t mem[4]; + memset(mem, 0, sizeof(bm_device_mem_t) * 4); + + unsigned char * buf[4] = {NULL}; + int plane_size[4] = {0}; + + read_image(&buf[0], &plane_size[0], table_y); + read_image(&buf[1], &plane_size[1], data_y); + read_image(&buf[2], &plane_size[2], table_c); + read_image(&buf[3], &plane_size[3], data_c); + + for (int i = 0; i < 4; i++) { + bm_malloc_device_byte(handle, mem + i, plane_size[i]); + bm_memcpy_s2d(handle, mem[i], (void *)buf[i]); + } + bm_image_attach(src, mem); + + bm_image_create(handle, dst_h, dst_w, dst_fmt, DATA_TYPE_EXT_1N_BYTE, &dst); + bm_image_alloc_dev_mem(dst,1); + +// printf("src addr = 0x%lx\n", src.image_private->data[0].u.device.device_addr); +// printf("dst addr = 0x%lx\n", dst.image_private->data[0].u.device.device_addr); + + for(i = 0;i < loop_time; i++){ +#ifdef __linux__ + gettimeofday(&tv_start, NULL); +#endif + + bmcv_image_vpp_csc_matrix_convert(handle, 1, src, &dst, CSC_MAX_ENUM, NULL, algorithm, &rect); + +#ifdef __linux__ + gettimeofday(&tv_end, NULL); + timediff.tv_sec = tv_end.tv_sec - tv_start.tv_sec; + timediff.tv_usec = tv_end.tv_usec - tv_start.tv_usec; + time_single = (unsigned int)(timediff.tv_sec * 1000000 + timediff.tv_usec); +#endif + + if(time_single>time_max){time_max = time_single;} + if(time_single%d*%d, %s->%s,%s\n",src_w,src_h,dst_w,dst_h,src_fmt_str,dst_fmt_str,algorithm_str); + printf("bmcv_image_vpp_csc_matrix_convert:loop %d cycles, time_avg = %llu, fps %llu, %lluM pps\n\n",loop_time, time_avg, fps, pixel_per_sec); + + bmlib_log("BMCV",BMLIB_LOG_TRACE, "loop %d cycles, time_max = %llu, time_min = %llu, time_avg = %llu\n", + loop_time, time_max, time_min, time_avg); + + return 0; +} + diff --git a/bmvid/bmcv/test/bm1684x/test_bm1684x_vpp_padding.cpp b/bmvid/bmcv/test/bm1684x/test_bm1684x_vpp_dst_position.cpp similarity index 100% rename from bmvid/bmcv/test/bm1684x/test_bm1684x_vpp_padding.cpp rename to bmvid/bmcv/test/bm1684x/test_bm1684x_vpp_dst_position.cpp diff --git a/bmvid/bmcv/test/bm1684x/test_bm1684x_vpp_multi_thread.cpp b/bmvid/bmcv/test/bm1684x/test_bm1684x_vpp_multi_thread.cpp new file mode 100644 index 0000000..319de22 --- /dev/null +++ b/bmvid/bmcv/test/bm1684x/test_bm1684x_vpp_multi_thread.cpp @@ -0,0 +1,520 @@ +#include +#include +#include +#include +#include +#include +#include +#include "bmcv_api.h" +#include "bmcv_api_ext.h" +#include "test_misc.h" + +#include +#ifdef __linux__ +#include +#else +#include +#endif + +#define MAX_THREAD_NUM (128) +#define NEED_COMPARE (1) +#define NEED_CALC_MEMORY (1) + +unsigned int single_frame_count[MAX_THREAD_NUM] = {0}; +unsigned int finished_count = 0; +pthread_mutex_t mutex; + + +extern bm_status_t bm1684x_vpp_cmodel_calc( + bm_handle_t handle, + int frame_number, + bm_image* input, + bm_image* output, + bmcv_rect_t* input_crop_rect, + bmcv_padding_atrr_t* padding_attr, + bmcv_resize_algorithm algorithm, + csc_type_t csc_type, + csc_matrix_t* matrix, + bmcv_convert_to_attr* convert_to_attr, + border_t* border_param, + font_t* font_param); + +struct thread_arg{ + int loop_times; + int devid; + int thread_id; + int compare; + int memory_cacl; + unsigned int src_h, src_w, dst_w, dst_h; + bm_image_format_ext src_fmt,dst_fmt; + char *src_name, *dst_name; +}; + +static void user_usage() { + printf( + "-N : num,\n" + "-a : src_name[0],\n" + "-b : src_name[1],\n" + "-c : src_name[2],\n" + "-d : src_name[3],\n" + "-e : src_w,\n" + "-f : src_h,\n" + "-g : src_fmt, \n" + "-h : dst_name, \n" + "-i : dst_w, \n" + "-j : dst_h,\n" + "-k : dst_fmt,\n" + ); +} + +// ȡڴʹ +void read_heap_memory(const char *heap_name, unsigned long *heap_size, unsigned long *used_memory) { + char filename[100]; + int ret = 0; + + sprintf(filename, "/sys/kernel/debug/ion/%s/summary", heap_name); + + FILE *fp = fopen(filename, "r"); + if (fp == NULL) { + perror("Error opening file"); + return; + } + + // ȡ + ret = fscanf(fp, "%*s %*s %*s %*s size:%lu %*s used:%lu bytes", heap_size, used_memory); + if((0 == ret ) || (EOF == ret)) + { + printf("read_heap_memory, fscanf heap_size wrong\n"); + } + + fclose(fp); +} + +// ȡ̵ RSS ڴС +unsigned long get_process_rss() { + FILE *fp; + char path[100]; + char line[256]; + unsigned long rss = 0; + + sprintf(path, "/proc/%d/status", getpid()); + fp = fopen(path, "r"); + if (fp == NULL) { + perror("Error opening file"); + return 0; + } + + printf("/proc/%d/status \n", getpid()); + + while (fgets(line, sizeof(line), fp)) { + if (strncmp(line, "VmRSS:", 6) == 0) { + sscanf(line, "VmRSS: %lu kB", &rss); + break; + } + } + + fclose(fp); + return rss; +} + +int readfile(const char *path, unsigned char* input_data, unsigned int size) { + int ret = 0; + unsigned int cnt = 0; + + FILE *fp_src = fopen(path, "rb"); + if (fp_src == NULL) { + printf("file: %s is NULL\n", path); + return -1; + } + cnt = fread((void *)input_data, 1, size, fp_src); + if (cnt < size) { + printf("file size %d is less than required bytes %d\n", cnt, size); + ret = -1; + }; + fclose(fp_src); + return ret; +} + +static void write_bin(const char *output_path, unsigned char *output_data, unsigned int size) { + FILE *fp_dst = fopen(output_path, "wb"); + if (fp_dst == NULL) { + printf("open file: %s failed \n", output_path); + return; + } + fwrite(output_data, 1, size, fp_dst); + fclose(fp_dst); +} +static int cmodel_compare(bm_handle_t handle, bm_image src, void* src_in_ptr[4], bm_image dst, bmcv_rect_t rect) +{ + bm_image src_cmodel, dst_cmodel; + + int src_image_byte_size[4] = {0}; + bm_image_get_byte_size(src, src_image_byte_size); + + int dst_image_byte_size[4] = {0}; + bm_image_get_byte_size(dst, dst_image_byte_size); + int dst_byte_size = dst_image_byte_size[0] + dst_image_byte_size[1] + dst_image_byte_size[2] + dst_image_byte_size[3]; + + char* dst_input_ptr = (char*)malloc(dst_byte_size); + void* dst_in_ptr[4] = {(void*)dst_input_ptr, + (void*)((char*)dst_input_ptr + dst_image_byte_size[0]), + (void*)((char*)dst_input_ptr + dst_image_byte_size[0] + dst_image_byte_size[1]), + (void*)((char*)dst_input_ptr + dst_image_byte_size[0] + dst_image_byte_size[1] + dst_image_byte_size[2])}; + + bm_image_copy_device_to_host(dst, (void **)dst_in_ptr); + + bm_image_create(handle, src.height, src.width, src.image_format, src.data_type, &src_cmodel); + bm_image_create(handle, dst.height, dst.width, dst.image_format, dst.data_type, &dst_cmodel); + + bm_device_mem_t src_cmodel_mem[4]; + bm_device_mem_t dst_cmodel_mem[4]; + + int src_cmodel_plane_num = bm_image_get_plane_num(src_cmodel); + for(int i = 0; i< src_cmodel_plane_num; i++) + { + src_cmodel_mem[i].u.device.device_addr = (unsigned long)src_in_ptr[i]; + src_cmodel_mem[i].size = src_image_byte_size[i]; + src_cmodel_mem[i].flags.u.mem_type = BM_MEM_TYPE_DEVICE; + } + + bm_image_attach(src_cmodel, src_cmodel_mem); + + char* dst_input_ptr_cmodel = (char*)malloc(dst_byte_size); + void* dst_in_ptr_cmodel[4] = {(void*)dst_input_ptr_cmodel, + (void*)((char*)dst_input_ptr_cmodel + dst_image_byte_size[0]), + (void*)((char*)dst_input_ptr_cmodel + dst_image_byte_size[0] + dst_image_byte_size[1]), + (void*)((char*)dst_input_ptr_cmodel + dst_image_byte_size[0] + dst_image_byte_size[1] + dst_image_byte_size[2])}; + + int dst_cmodel_plane_num = bm_image_get_plane_num(dst_cmodel); + for(int i = 0; i< dst_cmodel_plane_num; i++) + { + dst_cmodel_mem[i].u.device.device_addr = (unsigned long)dst_in_ptr_cmodel[i]; + dst_cmodel_mem[i].size = dst_image_byte_size[i]; + dst_cmodel_mem[i].flags.u.mem_type = BM_MEM_TYPE_DEVICE; + } + + bm_image_attach(dst_cmodel, dst_cmodel_mem); + + bm1684x_vpp_cmodel_calc(handle, 1, &src_cmodel, &dst_cmodel, &rect,NULL,BMCV_INTER_LINEAR,CSC_MAX_ENUM,NULL,NULL,NULL,NULL); + + + int ret = memcmp(dst_input_ptr, dst_input_ptr_cmodel, dst_byte_size); + free(dst_input_ptr); + free(dst_input_ptr_cmodel); + + return ret; +} +void write_output_bin(char *dst_name, bm_image dst, int thread_id) +{ + int dst_image_byte_size[4] = {0}; + bm_image_get_byte_size(dst, dst_image_byte_size); + int dst_byte_size = dst_image_byte_size[0] + dst_image_byte_size[1] + dst_image_byte_size[2] + dst_image_byte_size[3]; + unsigned char* dst_input_ptr = (unsigned char*)malloc(dst_byte_size); + void* dst_in_ptr[4] = {(void*)dst_input_ptr, + (void*)((unsigned char*)dst_input_ptr + dst_image_byte_size[0]), + (void*)((unsigned char*)dst_input_ptr + dst_image_byte_size[0] + dst_image_byte_size[1]), + (void*)((unsigned char*)dst_input_ptr + dst_image_byte_size[0] + dst_image_byte_size[1] + dst_image_byte_size[2])}; + bm_image_copy_device_to_host(dst, (void **)dst_in_ptr); + + char dst_name_id[128]= {0}; + int len = 0; + len = snprintf(dst_name_id, 128, "%s_%d", dst_name, thread_id); + if(len >128) + printf("Output truncated. Required length: %d\n", len+1); + + write_bin(dst_name_id, dst_input_ptr, dst_byte_size); +} + + +static void *test_vpp_random_thread(void *arg) { + + int i, loop_times, thread_id, dev, ret = 0, compare = 0, memory_cacl = 0; + bm_handle_t handle; + unsigned int src_h, src_w, dst_w, dst_h; + bm_image_format_ext src_fmt,dst_fmt; + bm_image src, dst, src_cmodel, dst_cmodel; + bmcv_rect_t rect; + char *src_name = NULL, *dst_name = NULL; + + unsigned long rss_before, rss_after; + unsigned long npu_heap_size_before, npu_used_memory_before; + unsigned long vpp_heap_size_before, vpp_used_memory_before; + unsigned long vpu_heap_size_before, vpu_used_memory_before; + + unsigned long npu_heap_size_after, npu_used_memory_after; + unsigned long vpp_heap_size_after, vpp_used_memory_after; + unsigned long vpu_heap_size_after, vpu_used_memory_after; + + + loop_times = ((struct thread_arg *)arg)->loop_times; + dev = ((struct thread_arg *)arg)->devid; + thread_id = ((struct thread_arg *)arg)->thread_id; + compare = ((struct thread_arg *)arg)->compare; + memory_cacl = ((struct thread_arg *)arg)->memory_cacl; + + src_w = ((struct thread_arg *)arg)->src_w; + src_h = ((struct thread_arg *)arg)->src_h; + src_fmt = ((struct thread_arg *)arg)->src_fmt; + dst_w = ((struct thread_arg *)arg)->dst_w; + dst_h = ((struct thread_arg *)arg)->dst_h; + dst_fmt = ((struct thread_arg *)arg)->dst_fmt; + src_name = ((struct thread_arg *)arg)->src_name; + dst_name = ((struct thread_arg *)arg)->dst_name; + + + single_frame_count[thread_id] = {0}; + + srand(thread_id); + bm_dev_request(&handle, dev); + + rect.start_x = 0; + rect.start_y = 0; + rect.crop_w = src_w; + rect.crop_h = src_h; + + bm_image_create(handle, src_h, src_w, src_fmt, DATA_TYPE_EXT_1N_BYTE, &src); + bm_image_create(handle, dst_h, dst_w, dst_fmt, DATA_TYPE_EXT_1N_BYTE, &dst); + bm_image_alloc_dev_mem(src); + bm_image_alloc_dev_mem(dst); + + int src_image_byte_size[4] = {0}; + bm_image_get_byte_size(src, src_image_byte_size); + + int src_byte_size = src_image_byte_size[0] + src_image_byte_size[1] + src_image_byte_size[2] + src_image_byte_size[3]; + unsigned char* src_input_ptr = (unsigned char*)malloc(src_byte_size); + void* src_in_ptr[4] = {(void*)src_input_ptr, + (void*)((unsigned char*)src_input_ptr + src_image_byte_size[0]), + (void*)((unsigned char*)src_input_ptr + src_image_byte_size[0] + src_image_byte_size[1]), + (void*)((unsigned char*)src_input_ptr + src_image_byte_size[0] + src_image_byte_size[1] + src_image_byte_size[2])}; + + if(NULL == src_name) + { + for (i = 0; i < src_byte_size; i++) { + src_input_ptr[i] = rand() % 255 + 1; + } + } + else + { + ret = readfile(src_name, src_input_ptr, src_byte_size); + if(ret!=0) + { + printf("readfile failed, thread id %d\n", thread_id); + exit(-1); + } + } + + bm_image_copy_host_to_device(src, (void **)src_in_ptr); + + + for (i = 0; i < loop_times; i++) { + + if(NEED_CALC_MEMORY == memory_cacl) + { + rss_before = get_process_rss(); + read_heap_memory("bm_npu_heap_dump", &npu_heap_size_before, &npu_used_memory_before); + read_heap_memory("bm_vpp_heap_dump", &vpp_heap_size_before, &vpp_used_memory_before); + read_heap_memory("bm_vpu_heap_dump", &vpu_heap_size_before, &vpu_used_memory_before); + } + + bmcv_image_vpp_convert(handle, 1, src, &dst, &rect); + + if(NEED_CALC_MEMORY == memory_cacl) + { + rss_after = get_process_rss(); + + printf("Memory consumption: %lu kB, rss_before %lu kB, rss_after %lu kB\n", rss_after - rss_before, rss_before, rss_after); + read_heap_memory("bm_npu_heap_dump", &npu_heap_size_after, &npu_used_memory_after); + read_heap_memory("bm_vpp_heap_dump", &vpp_heap_size_after, &vpp_used_memory_after); + read_heap_memory("bm_vpu_heap_dump", &vpu_heap_size_after, &vpu_used_memory_after); + printf("npu_heap size: %lu Byte, npu_heap consumption %lu Byte, npu_used_memory_after %lu Byte, npu_used_memory_before %lu Byte\n", + npu_heap_size_after,npu_used_memory_after-npu_used_memory_before,npu_used_memory_after,npu_used_memory_before); + printf("vpp_heap size: %lu Byte, vpp_heap consumption %lu Byte, vpp_used_memory_after %lu Byte, vpp_used_memory_before %lu Byte\n", + vpp_heap_size_after,vpp_used_memory_after-vpp_used_memory_before,vpp_used_memory_after,vpp_used_memory_before); + printf("vpu_heap size: %lu Byte, vpu_heap consumption %lu Byte, vpu_used_memory_after %lu Byte, vpu_used_memory_before %lu Byte\n", + vpu_heap_size_after,vpu_used_memory_after-vpu_used_memory_before,vpu_used_memory_after,vpu_used_memory_before); + } + + + single_frame_count[thread_id]++; + + if(NEED_COMPARE == compare) + { + ret = cmodel_compare(handle, src, src_in_ptr, dst, rect); + if(0 != ret) { + printf("asic and cmode compare error, thread_id %d\n",thread_id); + exit(-1); + } + } + } + + if(NULL != dst_name) + { + write_output_bin(dst_name, dst, thread_id); + } + + free(src_input_ptr); + bm_image_destroy(dst); + bm_image_destroy(src); + + + bm_dev_free(handle); + + pthread_mutex_lock(&mutex); + finished_count++; + pthread_mutex_unlock(&mutex); + + return NULL; +} + +int main(int argc, char **argv) { + + struct option long_options[] = { + {"sw", required_argument, NULL, 'a'}, + {"sh", required_argument, NULL, 'b'}, + {"sformat", required_argument, NULL, 'k'}, + {"sname", required_argument, NULL, 'i'}, + {"dw", required_argument, NULL, 'e'}, + {"dh", required_argument, NULL, 'f'}, + {"dformat", required_argument, NULL, 'g'}, + {"dname", required_argument, NULL, 'j'}, + }; + + struct timeval last_time, new_time; + unsigned int i, loop_times = 1; + int devid = 0, ret = 0, total_frames = 0, whole_time = 0; + unsigned int thread_num = 1, fps = 0, compare = 0, memory_cacl = 0; + + unsigned int src_h = 1080, src_w = 1920, dst_w = 1920, dst_h = 1080; + bm_image_format_ext src_fmt = FORMAT_YUV444P, dst_fmt = FORMAT_YUV444P; + char *src_name = NULL, *dst_name = NULL; + + int ch = 0, opt_idx = 0; + while ((ch = getopt_long(argc, argv, "a:b:c:d:e:f:g:i:j:k:l:m:t:", long_options, &opt_idx)) != -1) { + switch (ch) { + case 't': + thread_num = atoi(optarg); + break; + case 'd': + devid = atoi(optarg); + break; + case 'a': + src_w = atoi(optarg); + break; + case 'b': + src_h = atoi(optarg); + break; + case 'k': + src_fmt = (bm_image_format_ext)atoi(optarg); + break; + case 'e': + dst_w = atoi(optarg); + break; + case 'f': + dst_h = atoi(optarg); + break; + case 'g': + dst_fmt = (bm_image_format_ext)atoi(optarg); + break; + case 'l': + loop_times = atoi(optarg); + break; + case 'c': + compare = atoi(optarg); + break; + case 'i': + src_name = optarg; + break; + case 'j': + dst_name = optarg; + break; + case 'm': + memory_cacl = atoi(optarg); + break; + case '?': + user_usage(); + return 0; + } + } + + printf("argc %d, thread_num %d, devid %d, loop_times %d, compare %d, memory_cacl %d\n",argc, thread_num, devid, loop_times, compare, memory_cacl); + printf("src_w %d, src_h %d, src_fmt %d\n",src_w,src_h,src_fmt); + printf("dst_w %d, dst_h %d, dst_fmt %d\n",dst_w,dst_h,dst_fmt); + + pthread_t * pid = new pthread_t[thread_num]; + struct thread_arg arg[thread_num]; + + for (i = 0; i < thread_num; i++) { + arg[i].loop_times = loop_times; + arg[i].devid = devid; + arg[i].thread_id = i; + arg[i].compare = compare; + arg[i].memory_cacl = memory_cacl; + + arg[i].src_w = src_w; + arg[i].src_h = src_h; + arg[i].src_fmt = src_fmt; + arg[i].dst_w = dst_w; + arg[i].dst_h = dst_h; + arg[i].dst_fmt = dst_fmt; + arg[i].src_name = src_name; + arg[i].dst_name = dst_name; + + if (pthread_create( &pid[i], NULL, test_vpp_random_thread, (void *)(&arg[i]))) + { + delete[] pid; + perror("create thread failed\n"); + exit(-1); + } + } + + pthread_mutex_init(&mutex, NULL); + + + total_frames = 0; + gettimeofday(&new_time, NULL); + last_time = new_time; + while(1) + { + pthread_mutex_lock(&mutex); + if(finished_count >= thread_num) + { + break; + } + pthread_mutex_unlock(&mutex); + + + + gettimeofday(&new_time, NULL); + whole_time = (new_time.tv_sec - last_time.tv_sec) * 1000000 + (new_time.tv_usec - last_time.tv_usec); + if(whole_time >= 1000000) + { + for(i = 0; i < thread_num; i++) + { + total_frames += single_frame_count[i]; + single_frame_count[i] = 0; + } + fps = total_frames / (whole_time/1000000); + printf("total_frames: %u, fps %u,whole_time %us\n",total_frames,fps,whole_time); + + total_frames = 0; + fps = 0; + last_time = new_time; + } + } + + for (i = 0; i < thread_num; i++) { + ret = pthread_join(pid[i], NULL); + if (ret != 0) { + delete[] pid; + perror("Thread join failed"); + exit(-1); + } + } + + std::cout << "--------ALL THREADS TEST OVER---------" << std::endl; + + pthread_mutex_destroy(&mutex); + delete[] pid; + + return 0; +} \ No newline at end of file diff --git a/bmvid/bmcv/test/bm1684x/test_bm1684x_vpp_random.cpp b/bmvid/bmcv/test/bm1684x/test_bm1684x_vpp_random.cpp index 06f09ad..26d62e8 100644 --- a/bmvid/bmcv/test/bm1684x/test_bm1684x_vpp_random.cpp +++ b/bmvid/bmcv/test/bm1684x/test_bm1684x_vpp_random.cpp @@ -15,6 +15,8 @@ #endif using namespace std; +#define MAX_INPUT 4088 + struct thread_arg{ int test_loop_times; int devid; @@ -81,8 +83,8 @@ static int test_vpp_random( srand(seed); - src_w = rand() % 8185 + 8; - src_h = rand() % 8185 + 8; + src_w = rand() % MAX_INPUT + 8; + src_h = rand() % MAX_INPUT + 8; src_fmt = src_format[rand() % (sizeof(src_format)/sizeof(bm_image_format_ext))]; dst_fmt = dst_format[rand() % (sizeof(dst_format)/sizeof(bm_image_format_ext))]; diff --git a/bmvid/bmcv/test/bm1684x/test_bm1684x_vpp_stitch_2way.cpp b/bmvid/bmcv/test/bm1684x/test_bm1684x_vpp_stitch_2way.cpp new file mode 100644 index 0000000..72ee065 --- /dev/null +++ b/bmvid/bmcv/test/bm1684x/test_bm1684x_vpp_stitch_2way.cpp @@ -0,0 +1,218 @@ +#ifndef SOC_MODE +#define PCIE_MODE +#endif + +#include +#include +#include +#include +#include +#include "bmcv_api.h" +#include "bmcv_api_ext.h" +#ifdef __linux__ + #include + #include +#endif + +#ifndef USING_CMODEL +#include "vpplib.h" +#endif + +extern void bm1684x_vpp_read_bin(bm_image src, const char *input_name); +extern void bm1684x_vpp_write_bin(bm_image dst, const char *output_name); +extern void format_to_str(bm_image_format_ext format, char* res); +void algorithm_to_str(bmcv_resize_algorithm algorithm, char* res) +{ + switch(algorithm) + { + case BMCV_INTER_NEAREST: + strcpy(res, "BMCV_INTER_NEAREST"); + break; + case BMCV_INTER_LINEAR: + strcpy(res, "BMCV_INTER_LINEAR"); + break; + case BMCV_INTER_BICUBIC: + strcpy(res, "BMCV_INTER_BICUBIC"); + break; + default: + printf("%s:%d[%s] Not found such algorithm.\n",__FILE__, __LINE__, __FUNCTION__); + break; + } +} + +typedef struct convert_ctx_{ + int loop; + int i; +}convert_ctx; + +int test_loop_times = 1; +int test_threads_num = 1; +int src_h = 1080, src_w = 1920, dst_w = 1920, dst_h = 2160, dev_id = 0; +bm_image_format_ext src_fmt = FORMAT_YUV420P, dst_fmt = FORMAT_YUV420P; +const char *src_name = "/opt/sophon/libsophon-current/bin/image/vpp_input/i420.yuv", *dst_name = "stitch_1920x2160_yuv420.bin"; + +bmcv_rect_t dst_rect0 = {.start_x = 0, .start_y = 0, .crop_w = 1920, .crop_h = 1080}; +bmcv_rect_t dst_rect1 = {.start_x = 0, .start_y = 1080, .crop_w = 1920, .crop_h = 1080}; +bmcv_resize_algorithm algorithm = BMCV_INTER_LINEAR; +bm_handle_t handle = NULL; + +static void * stitch(void* arg) { + bm_status_t ret; + convert_ctx ctx = *(convert_ctx*)arg; + bm_image src, dst; + unsigned int i = 0, loop_time = 0; + unsigned long long time_single, time_total = 0, time_avg = 0; + unsigned long long time_max = 0, time_min = 10000, fps_actual = 0, pixel_per_sec = 0; +#if SLEEP_ON + int fps = 60; + int sleep_time = 1000000 / fps; +#endif + + struct timeval tv_start; + struct timeval tv_end; + struct timeval timediff; + + loop_time = ctx.loop; + + bm_image_create(handle, src_h, src_w, src_fmt, DATA_TYPE_EXT_1N_BYTE, &src, NULL); + bm_image_create(handle, dst_h, dst_w, dst_fmt, DATA_TYPE_EXT_1N_BYTE, &dst, NULL); + + ret = bm_image_alloc_dev_mem(src,BMCV_HEAP_ANY); + if (ret != BM_SUCCESS) { + printf("bm_image_alloc_dev_mem_src. ret = %d\n", ret); + exit(-1); + } + ret = bm_image_alloc_dev_mem(dst,BMCV_HEAP_ANY); + if (ret != BM_SUCCESS) { + printf("bm_image_alloc_dev_mem_dst. ret = %d\n", ret); + exit(-1); + } + bm1684x_vpp_read_bin(src,src_name); + bmcv_rect_t rect = {.start_x = 0, .start_y = 0, .crop_w = src_w, .crop_h = src_h}; + bmcv_rect_t src_rect[2] = {rect, rect}; + bmcv_rect_t dst_rect[2] = {dst_rect0, dst_rect1}; + + bm_image input[2] = {src, src}; + + for(i = 0;i < loop_time; i++){ + gettimeofday(&tv_start, NULL); + + bmcv_image_vpp_stitch(handle, 2, input, dst, dst_rect, src_rect, BMCV_INTER_LINEAR); + + gettimeofday(&tv_end, NULL); + timediff.tv_sec = tv_end.tv_sec - tv_start.tv_sec; + timediff.tv_usec = tv_end.tv_usec - tv_start.tv_usec; + time_single = (unsigned int)(timediff.tv_sec * 1000000 + timediff.tv_usec); +#if SLEEP_ON + if(time_single < sleep_time) + usleep((sleep_time - time_single)); + gettimeofday(&tv_end, NULL); + timediff.tv_sec = tv_end.tv_sec - tv_start.tv_sec; + timediff.tv_usec = tv_end.tv_usec - tv_start.tv_usec; + time_single = (unsigned int)(timediff.tv_sec * 1000000 + timediff.tv_usec); +#endif + if(time_single>time_max){time_max = time_single;} + if(time_single%d*%d, %s->%s,%s\n",ctx.i,src_w,src_h,dst_w,dst_h,src_fmt_str,dst_fmt_str,algorithm_str); + printf("idx:%d, bmcv_image_stitch:loop %d cycles, time_max = %llu, time_avg = %llu, fps %llu, %lluM pps\n", + ctx.i, loop_time, time_max, time_avg, fps_actual, pixel_per_sec); + + return 0; +} + +static void print_help(char **argv){ + printf("please follow this order:\n \ + %s src_w src_h src_fmt src_name start_x0 start_y0 crop_w0 crop_h0 start_x1 start_y1 crop_w1 crop_h1 dst_w dst_h dst_name dev_id thread_num loop_num\n \ + %s thread_num loop_num\n", argv[0], argv[0]); +}; + +int main(int argc, char **argv) { + if (argc >= 19) { + test_threads_num = atoi(argv[17]); + test_loop_times = atoi(argv[18]); + } + if (argc >= 17) { + src_w = atoi(argv[1]); + src_h = atoi(argv[2]); + src_fmt = (bm_image_format_ext)atoi(argv[3]); + src_name = argv[4]; + dst_rect0.start_x = atoi(argv[5]); + dst_rect0.start_y = atoi(argv[6]); + dst_rect0.crop_w = atoi(argv[7]); + dst_rect0.crop_h = atoi(argv[8]); + dst_rect1.start_x = atoi(argv[9]); + dst_rect1.start_y = atoi(argv[10]); + dst_rect1.crop_w = atoi(argv[11]); + dst_rect1.crop_h = atoi(argv[12]); + dst_w = atoi(argv[13]); + dst_h = atoi(argv[14]); + dst_fmt = src_fmt; + dst_name = argv[15]; + dev_id = atoi(argv[16]); + } + if (argc == 2){ + if (atoi(argv[1]) < 0){ + print_help(argv); + exit(-1); + } else + test_threads_num = atoi(argv[1]); + } + else if (argc == 3){ + test_threads_num = atoi(argv[1]); + test_loop_times = atoi(argv[2]); + } else if (argc > 3 && argc < 17) { + printf("command input error\n"); + print_help(argv); + exit(-1); + } + int ret = (int)bm_dev_request(&handle, dev_id); + if (ret != 0) { + printf("Create bm handle failed. ret = %d\n", ret); + exit(-1); + } + convert_ctx ctx[test_threads_num]; + #ifdef __linux__ + pthread_t * pid = (pthread_t *)malloc(sizeof(pthread_t)*test_threads_num); + for (int i = 0; i < test_threads_num; i++) { + ctx[i].i = i; + ctx[i].loop = test_loop_times; + if (pthread_create( + &pid[i], NULL, stitch, (void *)(ctx + i))) { + free(pid); + perror("create thread failed\n"); + exit(-1); + } + } + for (int i = 0; i < test_threads_num; i++) { + ret = pthread_join(pid[i], NULL); + if (ret != 0) { + free(pid); + perror("Thread join failed"); + exit(-1); + } + } + bm_dev_free(handle); + printf("--------ALL THREADS TEST OVER---------\n"); + free(pid); + #endif + + return 0; +} diff --git a/bmvid/bmcv/test/bm1684x/test_bmcv_multi_thread.sh b/bmvid/bmcv/test/bm1684x/test_bmcv_multi_thread.sh new file mode 100644 index 0000000..179a55b --- /dev/null +++ b/bmvid/bmcv/test/bm1684x/test_bmcv_multi_thread.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# 在后台持续监控短期使用率,并输出到临时文件 +cat_proc() { + while true; do + cat /proc/vppinfo >> temp_vpp_usage.txt + sleep 1 + done +} + +# 启动监控函数 +cat_proc & + +# 记录监控函数的进程ID +VPPUSAGE=$! +echo "VPPUSAGE $VPPUSAGE" + +./test_bm1684x_vpp_multi_thread -t 96 -d 0 --sw 1920 --sh 1080 --sformat 2 --dw 1920 --dh 1080 --dformat 2 -l 1000 -c 0 --sname yuv444p.bin -m 0 & +VPPPID=$! +echo "VPPPID $VPPPID" + +pidstat -u -p $VPPPID 1 > temp_cpu_stats.txt & +PIDSTATPID=$! +echo "PIDSTATPID $PIDSTATPID" + +wait $VPPPID +kill $PIDSTATPID +kill $VPPUSAGE + +# 从临时文件中提取CPU利用率列,并计算平均值和峰值 +AVERAGE=$(awk '{sum += $4} END {print sum/NR}' temp_cpu_stats.txt) +PEAK=$(awk '{if ($4 > max) max = $4} END {print max}' temp_cpu_stats.txt) + +# 打印均值和峰值 +echo "Average CPU Usage: $AVERAGE%" +echo "Peak CPU Usage: $PEAK%" + +# 清理临时文件 +#rm temp_cpu_stats.txt + + +# 从临时文件中提取vpp短期使用率列,并计算平均值和峰值 +cat temp_vpp_usage.txt | awk -F'[:,|%]' ' + /"id":0/ {sum0 += $5; count0++; if ($5 > max0) max0 = $5} + /"id":1/ {sum1 += $5; count1++; if ($5 > max1) max1 = $5} + END {print "vpp core 0: Average Usage =", sum0/count0"%", "Peak Usage =", max0"%"; + print "vpp core 1: Average Usage =", sum1/count1"%", "Peak Usage =", max1"%"}' + +# 清理临时文件 +#rm temp_usage_stats.txt + + diff --git a/bmvid/bmcv/test/test_csc_convert_to.cpp b/bmvid/bmcv/test/test_csc_convert_to.cpp new file mode 100644 index 0000000..70fb848 --- /dev/null +++ b/bmvid/bmcv/test/test_csc_convert_to.cpp @@ -0,0 +1,225 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_misc.h" +#include "bmcv_api.h" +#include "bmcv_api_ext.h" +#include "bmlib_runtime.h" + +#ifdef __linux__ +#include +#include +#else +#include +#include "time.h" +#endif +extern void bm1684x_vpp_read_bin(bm_image src, const char *input_name); +extern void bm1684x_vpp_write_bin(bm_image dst, const char *output_name); +extern void format_to_str(bm_image_format_ext format, char* res); + +int main(int argc, char **argv) { + bm_handle_t handle = NULL; + int img_num = 1; + int src_h = 1080, src_w = 1920, dst_w = 640, dst_h = 640; + bm_image_format_ext src_fmt = FORMAT_YUV420P, dst_fmt = FORMAT_RGB_PACKED; + char *src_name = NULL, *dst_name = NULL, *dst_csv_name = NULL; + bm_image src, dst; + int crop_num_vec = 1; + bmcv_rect_t rect; + bmcv_padding_atrr_t padding_attr; + bmcv_resize_algorithm algorithm; + csc_type_t csc_type; + csc_matrix_t matrix; + bmcv_convert_to_attr * convert_to_attr = NULL; + int convert_to_need; + + unsigned int i = 0, loop_time = 0; + unsigned long long time_single, time_total = 0, time_avg = 0; + unsigned long long time_max = 0, time_min = 10000, fps = 0, pixel_per_sec = 0; + int dev_id = 0; + + bm_status_t ret = bm_dev_request(&handle, dev_id); + if (ret != BM_SUCCESS) { + printf("Create bm handle failed. ret = %d\n", ret); + exit(-1); + } + + #ifdef __linux__ + struct timeval tv_start; + struct timeval tv_end; + struct timeval timediff; + #endif + + if (argc != 21) { + printf("usage: %d\n", argc); + printf("%s img_num src_w src_h src_fmt src_name \ + dst_start_x dst_start_y dst_crop_w dst_crop_h \ + crop_num_vec algorithm csc_type \ + dst_w dst_h dst_fmt dst_name loop_time dst_csv_name dev_id\n", argv[0]); + printf("example:\n"); + printf("FORMAT_YUV420P-->FORMAT_RGB_PACKED:\n"); + printf("%s 1 1920 1080 0 i420.yuv 0 0 480 480 1 1 0 640 640 10 output.rgb 0 1 output.csv 0\n", argv[0]); + return 0; + } + + img_num = atoi(argv[1]); + src_w = atoi(argv[2]); + src_h = atoi(argv[3]); + src_fmt = (bm_image_format_ext)atoi(argv[4]); + src_name = argv[5]; + + rect.start_x = 0; + rect.start_y = 0; + rect.crop_w = src_w; + rect.crop_h = src_h; + + padding_attr.dst_crop_stx = atoi(argv[6]); + padding_attr.dst_crop_sty = atoi(argv[7]); + padding_attr.dst_crop_w = atoi(argv[8]); + padding_attr.dst_crop_h = atoi(argv[9]); + padding_attr.if_memset = 1; + padding_attr.padding_b = 255; + padding_attr.padding_g = 255; + padding_attr.padding_r = 255; + crop_num_vec = atoi(argv[10]); + algorithm = (bmcv_resize_algorithm)atoi(argv[11]); + csc_type = (csc_type_t)atoi(argv[12]); + + dst_w = atoi(argv[13]); + dst_h = atoi(argv[14]); + dst_fmt = (bm_image_format_ext)atoi(argv[15]); + dst_name = argv[16]; + convert_to_need = atoi(argv[17]); + loop_time = atoi(argv[18]); + dst_csv_name = argv[19]; + dev_id = atoi(argv[20]); + memset(&matrix, 0, sizeof(matrix)); + matrix.csc_add0 = 128 << 10; + matrix.csc_add1 = 128 << 10; + matrix.csc_add2 = 128 << 10; + if (convert_to_need != 0) { + convert_to_attr = new bmcv_convert_to_attr; + convert_to_attr->alpha_0 = ((float)(rand() % 20)) / 10.0f; + convert_to_attr->alpha_1 = ((float)(rand() % 20)) / 10.0f; + convert_to_attr->alpha_2 = ((float)(rand() % 20)) / 10.0f; + convert_to_attr->beta_0 = ((float)(rand() % 20)) / 10.0f - 1; + convert_to_attr->beta_1 = ((float)(rand() % 20)) / 10.0f - 1; + convert_to_attr->beta_2 = ((float)(rand() % 20)) / 10.0f - 1; + } else { + convert_to_attr = NULL; + } + + bm_image_create(handle, src_h, src_w, src_fmt, DATA_TYPE_EXT_1N_BYTE, &src); + bm_image_alloc_dev_mem(src, 1); + bm1684x_vpp_read_bin(src,src_name); + bm_image_create(handle, dst_h, dst_w, dst_fmt, DATA_TYPE_EXT_1N_BYTE, &dst); + bm_image_alloc_dev_mem(dst, 1); + + for(i = 0;i < loop_time; i++){ + +#ifdef __linux__ + gettimeofday(&tv_start, NULL); +#endif + + bmcv_image_csc_convert_to(handle, img_num, &src ,&dst, &crop_num_vec, &rect, &padding_attr, algorithm, csc_type, &matrix, convert_to_attr); + +#ifdef __linux__ + gettimeofday(&tv_end, NULL); + timediff.tv_sec = tv_end.tv_sec - tv_start.tv_sec; + timediff.tv_usec = tv_end.tv_usec - tv_start.tv_usec; + time_single = (unsigned int)(timediff.tv_sec * 1000000 + timediff.tv_usec); +#endif + + if(time_single>time_max){time_max = time_single;} + if(time_single%d*%d, %s->%s\n",src_w,src_h,dst_w,dst_h,src_fmt_str,dst_fmt_str); + printf("bmcv_image_csc_convert_to:loop %d cycles, time_avg = %llu, fps %llu, %lluM pps\n\n",loop_time, time_avg, fps, pixel_per_sec); + + bmlib_log("BMCV",BMLIB_LOG_TRACE, "loop %d cycles, time_max = %llu, time_min = %llu, time_avg = %llu\n", + loop_time, time_max, time_min, time_avg); + + FILE *fp_csv = fopen(dst_csv_name, "ab+"); + fprintf(fp_csv, "%s, %lld, %lld, %lldM\n",src_fmt_str,time_avg, fps, pixel_per_sec); + + fclose(fp_csv); + + return 0; + +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/bmvid/bmcv/test/test_cv_bayer2rgb.cpp b/bmvid/bmcv/test/test_cv_bayer2rgb.cpp index 41c0ea0..6cec9b3 100644 --- a/bmvid/bmcv/test/test_cv_bayer2rgb.cpp +++ b/bmvid/bmcv/test/test_cv_bayer2rgb.cpp @@ -1,23 +1,34 @@ -#include #include "bmcv_api_ext.h" #include "stdio.h" #include "stdlib.h" #include "string.h" -#include "test_misc.h" #include -#include -#include +#include +#include "test_misc.h" #ifdef __linux__ -#include -#include + #include + #include #else -#include -#include "time.h" + #include + #include "time.h" #endif -using namespace std; -#define KERNEL_SIZE 3 * 3 * 3 * 4 * 64 +pthread_mutex_t lock; +#define NPU_NUM 64 +#define KERNEL_SIZE 3 * 3 * 3 * 4 * NPU_NUM #define CONVD_MATRIX 12 * 9 +#define TIME_COST_US(start, end) ((end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec)) + +typedef struct { + int loop_num; + int height; + int width; + int src_type; + bool use_real_img; + char* input_path; + char* output_path; + bm_handle_t handle; +} bayer2rgb_thread_arg_t; void sleep_(unsigned long _t){ #ifdef __linux__ @@ -27,166 +38,167 @@ void sleep_(unsigned long _t){ #endif } -const unsigned char convd_kernel[CONVD_MATRIX] = {1, 0, 1, 0, 0, 0, 1, 0, 1, - 0, 0, 2, 0, 0, 0, 0, 0, 2, - 0, 0, 0, 0, 0, 0, 2, 0, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 4, // r R - 4, 0, 0, 0, 0, 0, 0, 0, 0, // b B - 2, 0, 2, 0, 0, 0, 0, 0, 0, - 2, 0, 0, 0, 0, 0, 2, 0, 0, - 1, 0, 1, 0, 0, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, - 0, 0, 0, 0, 0, 4, 0, 0, 0, // g1 G1 - 0, 0, 0, 0, 0, 0, 0, 4, 0, // g2 G2 - 0, 1, 0, 1, 0, 1, 0, 1, 0}; - -vector>> kernel_r = {{{0.25, 0, 0.25}, {0, 0, 0}, {0.25, 0, 0.25}}, - {{0, 0, 0.5}, {0, 0, 0}, {0, 0, 0.5}}, - {{0, 0, 0}, {0, 0, 0}, {0.5, 0, 0.5}}, - {{0, 0, 0}, {0, 0, 0}, {0, 0, 1}}}; -vector>> kernel_b = {{{1, 0, 0}, {0, 0, 0}, {0, 0, 0}}, - {{0.5, 0, 0.5}, {0, 0, 0}, {0, 0, 0}}, - {{0.5, 0, 0}, {0, 0, 0}, {0.5, 0, 0}}, - {{0.25, 0, 0.25}, {0, 0, 0}, {0.25, 0, 0.25}}}; -vector>> kernel_g = {{{0, 0.25, 0}, {0.25, 0, 0.25}, {0, 0.25, 0}}, - {{0, 0, 0}, {0, 0, 1}, {0, 0, 0}}, - {{0, 0, 0}, {0, 0, 0}, {0, 1, 0}}, - {{0, 0.25, 0}, {0.25, 0, 0.25}, {0, 0.25, 0}}}; - -void readBin(const char * path, unsigned char* input_data, int size) +static int parameters_check(int width, int height, int src_type) { - FILE *fp_src = fopen(path, "rb+"); - if (fread((void *)input_data, 1, size, fp_src) < (unsigned int)size){ + if (width % 2 != 0 || height % 2 != 0){ + printf("Unsupported value : Odd widths and heights are not supported \n"); + return -1; + } + if (src_type != 0 && src_type != 1) { + printf("src_type only support 0(bg8) or 1(rg8) now! \n"); + return -1; + } + return 0; +} + +const unsigned char convd_kernel_bg8[CONVD_MATRIX] = {1, 0, 1, 0, 0, 0, 1, 0, 1, //Rb + 0, 0, 2, 0, 0, 0, 0, 0, 2, //Rg1 + 0, 0, 0, 0, 0, 0, 2, 0, 2, //Rg2 + 0, 0, 0, 0, 0, 0, 0, 0, 4, //Rr + 4, 0, 0, 0, 0, 0, 0, 0, 0, //Bb + 2, 0, 2, 0, 0, 0, 0, 0, 0, //Bg1 + 2, 0, 0, 0, 0, 0, 2, 0, 0, //Bg2 + 1, 0, 1, 0, 0, 0, 1, 0, 1, //Br + 0, 1, 0, 1, 0, 1, 0, 1, 0, //Gb + 0, 0, 0, 0, 0, 4, 0, 0, 0, //Gg1 + 0, 0, 0, 0, 0, 0, 0, 4, 0, //Gg2 + 0, 1, 0, 1, 0, 1, 0, 1, 0};//Gr +const unsigned char convd_kernel_rg8[CONVD_MATRIX] = {4, 0, 0, 0, 0, 0, 0, 0, 0, //Rr + 2, 0, 2, 0, 0, 0, 0, 0, 0, //Rg1 + 2, 0, 0, 0, 0, 0, 2, 0, 0, //Rg2 + 1, 0, 1, 0, 0, 0, 1, 0, 1, //Rb + 1, 0, 1, 0, 0, 0, 1, 0, 1, //Br + 0, 0, 2, 0, 0, 0, 0, 0, 2, //Bg1 + 0, 0, 0, 2, 0, 2, 0, 0, 0, //Bg2 + 0, 0, 0, 0, 0, 0, 0, 0, 4, //Bb + 1, 0, 1, 0, 0, 0, 1, 0, 1, //Gr + 0, 0, 0, 0, 0, 4, 0, 0, 0, //Gg1 + 0, 0, 0, 0, 0, 0, 0, 4, 0, //Gg2 + 0, 1, 0, 1, 0, 1, 0, 1, 0};//Gb + +void readBin(const char *path, unsigned char* input_data, int size) { + FILE *fp_src = fopen(path, "rb"); + if (fp_src == NULL) { + printf("无法打开输出文件 %s\n", path); + return; + } + if (fread((void *)input_data, 1, size, fp_src) < (unsigned int)size) { printf("file size is less than %d required bytes\n", size); }; - fclose(fp_src); } -int resultCompare(unsigned char* output_data, vector ref_data) { - int vec_size = ref_data.size(); - - for (int i = 0;i < vec_size;i++) { - if (abs(output_data[i] - ref_data[i]) > 1) { - printf("output_data[%d] = %u, ref_data[%d] = %u\n", i, output_data[i], i, ref_data[i]); - return -1; +void ReflectionPad2d(unsigned char* srcImg, unsigned char* srcImg_padding, int h, int w) { + int pad_w = w + 2; + int pad_h = h + 2; + // Copy srcImg (center) + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + srcImg_padding[(y + 1) * pad_w + (x + 1)] = srcImg[y * w + x]; } } - - return 0; -} - -int resultCompare(unsigned char* output_data, vector> dstImgCpu_r, vector> dstImgCpu_g, vector> dstImgCpu_b) { - int height = dstImgCpu_r.size(); - int width = dstImgCpu_r[0].size(); - - for (int i = 0;i < height;i++) - for (int j = 0;j < width;j++) { - if (abs(dstImgCpu_r[i][j] - output_data[i * width + j]) > 1) { - printf(" dstImgCpu_r , i = %d, j = %d\n", i, j); - printf(" dstImgCpu_r = %u, output_tpu = %u\n", dstImgCpu_r[i][j], output_data[i * width + j]); - return -1; - } - - if (abs(dstImgCpu_g[i][j] - output_data[height * width + i * width + j]) > 1) { - printf(" dstImgCpu_g , i = %d, j = %d\n", i, j); - printf(" dstImgCpu_g = %u, output_tpu = %u\n", dstImgCpu_g[i][j], output_data[height * width + i * width + j]); - return -1; - } - - if (abs(dstImgCpu_b[i][j] - output_data[height * width * 2 + i * width + j]) > 1) { - printf(" dstImgCpu_b , i = %d, j = %d\n", i, j); - printf(" dstImgCpu_b = %u, output_tpu = %u\n", dstImgCpu_b[i][j], output_data[height * width * 2 + i * width + j]); - return -1; - } - } - - return 0; + // Top and bottom padding + for (int x = 0; x < pad_w; x++) { + srcImg_padding[x] = srcImg_padding[2 * pad_w + x]; // Top padding + srcImg_padding[(pad_h - 1) * pad_w + x] = srcImg_padding[(pad_h - 3) * pad_w + x]; // Bottom padding + } + // Left and right padding + for (int y = 0; y < pad_h; y++) { + srcImg_padding[y * pad_w] = srcImg_padding[y * pad_w + 2]; // Left padding + srcImg_padding[y * pad_w + (pad_w - 1)] = srcImg_padding[y * pad_w + (pad_w - 3)]; // Right padding + } } -vector convertRGBPlanarToPacked(vector> dstImgCpu_r, vector> dstImgCpu_g, - vector> dstImgCpu_b, int width, int height) { - vector dstData; +// Function to generate the final matrix +void gen_matrix_final(float** vector_r, float** vector_g1, float** vector_g2, float** vector_b, + int rows, int cols, float** vector_temp3) { + float** vector_temp1 = (float**)malloc(rows * sizeof(float*)); + float** vector_temp2 = (float**)malloc(rows * sizeof(float*)); - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - // rerange the rgb pixel - dstData.push_back(dstImgCpu_r[y][x]); - dstData.push_back(dstImgCpu_g[y][x]); - dstData.push_back(dstImgCpu_b[y][x]); + for (int i = 0; i < rows; ++i) { + vector_temp1[i] = (float*)malloc(2 * cols * sizeof(float)); + vector_temp2[i] = (float*)malloc(2 * cols * sizeof(float)); + } + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < 2 * cols; ++j) { + vector_temp1[i][j] = 0.0; + vector_temp2[i][j] = 0.0; } } - return dstData; -} - -vector> gen_matrix_final(vector> vector_r, vector> vector_g1, vector> vector_g2, vector> vector_b) -{ - vector> vector_temp1(vector_r.size(), vector(vector_r[0].size() * 2, 0)); - vector> vector_temp2(vector_r.size(), vector(vector_r[0].size() * 2, 0)); - vector> vector_temp3(vector_r.size() * 2, vector(vector_r[0].size() * 2, 0)); - - for (size_t i = 0; i < vector_r.size(); ++i) { - for (size_t j = 0; j < vector_r[i].size(); ++j) { + // Fill vector_temp1 + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { vector_temp1[i][2 * j] = vector_r[i][j]; vector_temp1[i][2 * j + 1] = vector_g1[i][j]; } } - for (size_t i = 0; i < vector_g2.size(); ++i) { - for (size_t j = 0; j < vector_g2[i].size(); ++j) { + // Fill vector_temp2 + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { vector_temp2[i][2 * j] = vector_g2[i][j]; vector_temp2[i][2 * j + 1] = vector_b[i][j]; } } - for (size_t i = 0; i < vector_temp1.size(); ++i) { - for (size_t j = 0; j < vector_temp1[i].size(); ++j) { - vector_temp3[2 * i][j] = vector_temp1[i][j]; + // Fill vector_temp3 + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < 2 * cols; ++j) { + vector_temp3[2 * i][j] = vector_temp1[i][j]; vector_temp3[2 * i + 1][j] = vector_temp2[i][j]; } } - return vector_temp3; -} - -vector> ReflectionPad2d(vector> srcImg, int height) { - vector second_row = srcImg[1]; - vector last_second_row = srcImg[height - 2]; - - // padding up & bottom - srcImg.insert(srcImg.begin(), second_row); - srcImg.push_back(last_second_row); - - // padding left & right - for (auto& row : srcImg) { - row.insert(row.begin(), row[1]); - } - - for (auto& row : srcImg) { - row.insert(row.end(), row[row.size() - 2]); + // Free memory for temporary matrices + for (int i = 0; i < rows; ++i) { + free(vector_temp1[i]); + free(vector_temp2[i]); } - return srcImg; + free(vector_temp1); + free(vector_temp2); } -vector> convolution_rb(const vector>& input, vector>> kernel, int stride, int offset) { - // output size - int inputRows = input.size(); - int inputCols = input[0].size(); - - // input size - int outputRows = (inputRows - 3) / stride + 1; - int outputCols = (inputCols - 3) / stride + 1; - vector> input_float(input.size(), vector(input[0].size())); - vector> output_float(input.size() - 2, vector(input[0].size() - 2)); - vector> output_rb(outputRows, vector(outputCols, 0)); - vector> output_rg1(outputRows, vector(outputCols, 0)); - vector> output_rg2(outputRows, vector(outputCols, 0)); - vector> output_rr(outputRows, vector(outputCols, 0)); - +void convolution_g(const unsigned char* input, unsigned char* dstImgCpu_g, float* kernel, int stride, int offset, int pad_h, int pad_w) { + // Calculate output size + int outputRows = (pad_h - 3) / stride + 1; + int outputCols = (pad_w - 3) / stride + 1; + + // Define dimensions for input and output matrices + int rows = pad_h - 2; + int cols = pad_w - 2; + + // Allocate memory for input matrices and output matrix + float** input_float = (float**)malloc(pad_h * sizeof(float*)); + float** output_float = (float**)malloc(rows * sizeof(float*)); + float** output_gr = (float**)malloc(outputRows * sizeof(float*)); + float** output_gg1 = (float**)malloc(outputRows * sizeof(float*)); + float** output_gg2 = (float**)malloc(outputRows * sizeof(float*)); + float** output_gb = (float**)malloc(outputRows * sizeof(float*)); + + for (int i = 0; i < pad_h; ++i) { + input_float[i] = (float*)malloc(pad_w * sizeof(float)); + } + for (int i = 0; i < rows; ++i) { + output_float[i] = (float*)malloc(cols * sizeof(float)); + } + for (int i = 0; i < outputRows; ++i) { + output_gr[i] = (float*)malloc(outputCols * sizeof(float)); + output_gg1[i] = (float*)malloc(outputCols * sizeof(float)); + output_gg2[i] = (float*)malloc(outputCols * sizeof(float)); + output_gb[i] = (float*)malloc(outputCols * sizeof(float)); + } + for (int i = 0; i < outputRows; i++) { + for (int j = 0; j < outputCols; j++) { + output_gr[i][j] = 0.0; + output_gg1[i][j] = 0.0; + output_gg2[i][j] = 0.0; + output_gb[i][j] = 0.0; + } + } // uchar2float - for (size_t i = 0; i < input.size(); ++i) { - for (size_t j = 0; j < input[i].size(); ++j) { - input_float[i][j] = static_cast(input[i][j]); + for (int i = 0; i < pad_h; ++i) { + for (int j = 0; j < pad_w; ++j) { + input_float[i][j] = (float)input[i * pad_w + j]; } } @@ -195,227 +207,370 @@ vector> convolution_rb(const vector> for (int j = 0; j < outputCols; j++) { for (int k = 0; k < 3; k++) { for (int l = 0; l < 3; l++) { - output_rb[i][j] += (input_float[i * stride + k + offset][j * stride + l + offset] * kernel[0][k][l]); - output_rg1[i][j] += (input_float[i * stride + k + offset][j * stride + l + offset] * kernel[1][k][l]); - output_rg2[i][j] += (input_float[i * stride + k + offset][j * stride + l + offset] * kernel[2][k][l]); - output_rr[i][j] += (input_float[i * stride + k + offset][j * stride + l + offset] * kernel[3][k][l]); + output_gr[i][j] += (input_float[i * stride + k + offset][j * stride + l + offset] * kernel[3 * k + l]); + output_gg1[i][j] += (input_float[i * stride + k + offset][j * stride + l + offset] * kernel[9 + 3 * k + l]); + output_gg2[i][j] += (input_float[i * stride + k + offset][j * stride + l + offset] * kernel[18 + 3 * k + l]); + output_gb[i][j] += (input_float[i * stride + k + 1][j * stride + l + 1] * kernel[27 + 3 * k + l]); } } } } // debayer - output_float = gen_matrix_final(output_rb, output_rg1, output_rg2, output_rr); + gen_matrix_final(output_gr, output_gg1, output_gg2, output_gb, outputRows, outputCols, output_float); // float2uchar - vector> uchar_output(output_float.size(), vector(output_float[0].size())); - for (size_t i = 0; i < output_float.size(); ++i) { - for (size_t j = 0; j < output_float[i].size(); ++j) { - uchar_output[i][j] = static_cast(output_float[i][j]); + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + dstImgCpu_g[i * cols + j] = (unsigned char)output_float[i][j]; } } - return uchar_output; + // Free memory for input and output matrices + for (int i = 0; i < pad_h; ++i) { + free(input_float[i]); + } + for (int i = 0; i < rows; ++i) { + free(output_float[i]); + } + for (int i = 0; i < outputRows; ++i) { + free(output_gr[i]); + free(output_gg1[i]); + free(output_gg2[i]); + free(output_gb[i]); + } + free(input_float); + free(output_float); + free(output_gr); + free(output_gg1); + free(output_gg2); + free(output_gb); } -// conv2d -vector> convolution_g(const vector>& input, vector>> kernel, int stride, int offset) { - // intput size - int inputRows = input.size(); - int inputCols = input[0].size(); - - // output size - int outputRows = (inputRows - 3) / stride + 1; - int outputCols = (inputCols - 3) / stride + 1; - - vector> input_float(input.size(), vector(input[0].size())); - vector> output_float(input.size() - 2, vector(input[0].size() - 2)); +void convolution_rb(const unsigned char* input, unsigned char* dstImgCpu_r, float* kernel, int stride, int offset, int pad_h, int pad_w) { + int outputRows = (pad_h - 3) / stride + 1; + int outputCols = (pad_w - 3) / stride + 1; + + // Define dimensions for input and output matrices + int rows = pad_h - 2; + int cols = pad_w - 2; + // Allocate memory for input matrices and output matrix + float** input_float = (float**)malloc(pad_h * sizeof(float*)); + float** output_float = (float**)malloc(rows * sizeof(float*)); + float** output_rr = (float**)malloc(outputRows * sizeof(float*)); + float** output_rg1 = (float**)malloc(outputRows * sizeof(float*)); + float** output_rg2 = (float**)malloc(outputRows * sizeof(float*)); + float** output_rb = (float**)malloc(outputRows * sizeof(float*)); + for (int i = 0; i < pad_h; ++i) { + input_float[i] = (float*)malloc(pad_w * sizeof(float)); + } + for (int i = 0; i < rows; ++i) { + output_float[i] = (float*)malloc(cols * sizeof(float)); + } + for (int i = 0; i < outputRows; ++i) { + output_rr[i] = (float*)malloc(outputCols * sizeof(float)); + output_rg1[i] = (float*)malloc(outputCols * sizeof(float)); + output_rg2[i] = (float*)malloc(outputCols * sizeof(float)); + output_rb[i] = (float*)malloc(outputCols * sizeof(float)); + } + for (int i = 0; i < outputRows; i++) { + for (int j = 0; j < outputCols; j++) { + output_rr[i][j] = 0.0; + output_rg1[i][j] = 0.0; + output_rg2[i][j] = 0.0; + output_rb[i][j] = 0.0; + } + } // uchar2float - for (size_t i = 0; i < input.size(); ++i) { - for (size_t j = 0; j < input[i].size(); ++j) { - input_float[i][j] = static_cast(input[i][j]); + for (int i = 0; i < pad_h; ++i) { + for (int j = 0; j < pad_w; ++j) { + input_float[i][j] = (float)input[i * pad_w + j]; } } - - vector> output_rb(outputRows, vector(outputCols, 0)); - vector> output_rg1(outputRows, vector(outputCols, 0)); - vector> output_rg2(outputRows, vector(outputCols, 0)); - vector> output_rr(outputRows, vector(outputCols, 0)); // special - // r based on B for (int i = 0; i < outputRows; i++) { for (int j = 0; j < outputCols; j++) { for (int k = 0; k < 3; k++) { for (int l = 0; l < 3; l++) { - output_rb[i][j] += (input_float[i * stride + k + offset][j * stride + l + offset] * kernel[0][k][l]); - output_rg1[i][j] += (input_float[i * stride + k + offset][j * stride + l + offset] * kernel[1][k][l]); - output_rg2[i][j] += (input_float[i * stride + k + offset][j * stride + l + offset] * kernel[2][k][l]); - output_rr[i][j] += (input_float[i * stride + k + 1][j * stride + l + 1] * kernel[3][k][l]); + output_rr[i][j] += (input_float[i * stride + k + offset][j * stride + l + offset] * kernel[3 * k + l]); + output_rg1[i][j] += (input_float[i * stride + k + offset][j * stride + l + offset] * kernel[9 + 3 * k + l]); + output_rg2[i][j] += (input_float[i * stride + k + offset][j * stride + l + offset] * kernel[18 + 3 * k + l]); + output_rb[i][j] += (input_float[i * stride + k + offset][j * stride + l + offset] * kernel[27 + 3 * k + l]); } } } } // debayer - output_float = gen_matrix_final(output_rb, output_rg1, output_rg2, output_rr); - + gen_matrix_final(output_rr, output_rg1, output_rg2, output_rb, outputRows, outputCols, output_float); // float2uchar - vector> uchar_output(output_float.size(), vector(output_float[0].size())); - for (size_t i = 0; i < output_float.size(); ++i) { - for (size_t j = 0; j < output_float[i].size(); ++j) { - uchar_output[i][j] = static_cast(output_float[i][j]); + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + dstImgCpu_r[i * cols + j] = (unsigned char)output_float[i][j]; } } + // Free memory for input and output matrices + for (int i = 0; i < pad_h; ++i) { + free(input_float[i]); + } + for (int i = 0; i < rows; ++i) { + free(output_float[i]); + } + for (int i = 0; i < outputRows; ++i) { + free(output_rr[i]); + free(output_rg1[i]); + free(output_rg2[i]); + free(output_rb[i]); + } + free(input_float); + free(output_float); + free(output_rr); + free(output_rg1); + free(output_rg2); + free(output_rb); +} - return uchar_output; +void bayer2rgb_cpu(unsigned char* dstImgCpu_r, unsigned char* dstImgCpu_g, unsigned char* dstImgCpu_b, + unsigned char* srcImg, int height, int width, int src_type) { + int pad_w = width + 2; + int pad_h = height + 2; + unsigned char* srcImg_padding = (unsigned char*)malloc(pad_h * pad_w); + ReflectionPad2d(srcImg, srcImg_padding, height, width); + if(src_type == 0) { + float array_kernel_r[36] = {0.25, 0, 0.25, 0, 0, 0, 0.25, 0, 0.25, + 0, 0, 0.5, 0, 0, 0, 0, 0, 0.5, + 0, 0, 0, 0, 0, 0, 0.5, 0, 0.5, + 0, 0, 0, 0, 0, 0, 0, 0, 1}; + float array_kernel_g[36] = {0, 0.25, 0, 0.25, 0, 0.25, 0, 0.25, 0, + 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0.25, 0, 0.25, 0, 0.25, 0, 0.25, 0}; + float array_kernel_b[36] = {1, 0, 0, 0, 0, 0, 0, 0, 0, + 0.5, 0, 0.5, 0, 0, 0, 0, 0, 0, + 0.5, 0, 0, 0, 0, 0, 0.5, 0, 0, + 0.25, 0, 0.25, 0, 0, 0, 0.25, 0, 0.25}; + convolution_rb(srcImg_padding, dstImgCpu_r, array_kernel_r, 2, 0, pad_h, pad_w); + convolution_g(srcImg_padding, dstImgCpu_g, array_kernel_g, 2, 0, pad_h, pad_w); + convolution_rb(srcImg_padding, dstImgCpu_b, array_kernel_b, 2, 1, pad_h, pad_w); + } else { + float array_kernel_r[36] = {1, 0, 0, 0, 0, 0, 0, 0, 0, + 0.5, 0, 0.5, 0, 0, 0, 0, 0, 0, + 0.5, 0, 0, 0, 0, 0, 0.5, 0, 0, + 0.25, 0, 0.25, 0, 0, 0, 0.25, 0, 0.25}; + float array_kernel_g[36] = {0.25, 0, 0.25, 0, 0, 0, 0.25, 0, 0.25, + 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0.25, 0, 0.25, 0, 0.25, 0, 0.25, 0}; + float array_kernel_b[36] = {0.25, 0, 0.25, 0, 0, 0, 0.25, 0, 0.25, + 0, 0, 0.5, 0, 0, 0, 0, 0, 0.5, + 0, 0, 0, 0.5, 0, 0.5, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1}; + convolution_rb(srcImg_padding, dstImgCpu_r, array_kernel_r, 2, 1, pad_h, pad_w); + convolution_g(srcImg_padding, dstImgCpu_g, array_kernel_g, 2, 0, pad_h, pad_w); + convolution_rb(srcImg_padding, dstImgCpu_b, array_kernel_b, 2, 0, pad_h, pad_w); + } + free(srcImg_padding); } -static int bayer2rgb_tpu( - bm_handle_t handle, - unsigned char* input, - unsigned char* output, - unsigned char* convd_kernel, - int height, - int width, - int dst_fmt) { +static int bayer2rgb_tpu(bm_handle_t handle, unsigned char* input, unsigned char* output, + unsigned char* convd_kernel, int height, int width, int src_type) { + struct timeval t1, t2; bm_image input_img; bm_image output_img; - bm_image_create(handle, height, width, FORMAT_BAYER, DATA_TYPE_EXT_1N_BYTE, &input_img); - if (dst_fmt == FORMAT_RGB_PACKED) - bm_image_create(handle, height, width, FORMAT_RGB_PACKED, DATA_TYPE_EXT_1N_BYTE, &output_img); - else if (dst_fmt == FORMAT_RGB_PLANAR) - bm_image_create(handle, height, width, FORMAT_RGB_PLANAR, DATA_TYPE_EXT_1N_BYTE, &output_img); - else - printf(" Not support the fmt!\n"); + bm_status_t ret; + pthread_mutex_lock(&lock); + if (src_type == 0) { + ret = bm_image_create(handle, height, width, FORMAT_BAYER, DATA_TYPE_EXT_1N_BYTE, &input_img, NULL); + } else { + ret = bm_image_create(handle, height, width, FORMAT_BAYER_RG8, DATA_TYPE_EXT_1N_BYTE, &input_img, NULL); + } + if(ret != BM_SUCCESS){ + return -1; + } + bm_image_create(handle, height, width, FORMAT_RGB_PLANAR, DATA_TYPE_EXT_1N_BYTE, &output_img, NULL); + bm_image_alloc_dev_mem(input_img, BMCV_HEAP_ANY); bm_image_alloc_dev_mem(output_img, BMCV_HEAP_ANY); - - unsigned char* input_data[3] = {input, input + height * width, input + 2 * height * width}; - bm_image_copy_host_to_device(input_img, (void **)input_data); - - bm_profile_t start, end; - memset(&start, 0, sizeof(start)); - memset(&end, 0, sizeof(end)); - bm_get_profile(handle, &start); + bm_image_copy_host_to_device(input_img, (void **)(&input)); + gettimeofday_(&t1); bmcv_image_bayer2rgb(handle, convd_kernel, input_img, output_img); - bm_get_profile(handle, &end); - size_t npu_time = end.tpu_process_time - start.tpu_process_time; - cout<<"bayer2rgb time="<>& dstImgCpu_r, vector>& dstImgCpu_g, vector>& dstImgCpu_b, - vector> srcImg, vector>> kernel_r, vector>> kernel_g, - vector>> kernel_b, vector& ref_data, int height, int width) { - vector> srcImg_padding(height + 2, vector(width + 2, 0)); - srcImg_padding = ReflectionPad2d(srcImg, height); - dstImgCpu_r = convolution_rb(srcImg_padding, kernel_r, 2, 0); - dstImgCpu_g = convolution_g(srcImg_padding, kernel_g, 2, 0); - dstImgCpu_b = convolution_rb(srcImg_padding, kernel_b, 2, 1); - ref_data = convertRGBPlanarToPacked(dstImgCpu_r, dstImgCpu_g, dstImgCpu_b, width, height); - } - -int test_bayer2rgb_random(bm_handle_t handle, int height, int width, int dst_fmt, int use_real_img, const char *src_name) { - struct timespec tp; - clock_gettime_(0, &tp); - bool fixed = false; - int seed = 0; - seed = (fixed) ? seed : tp.tv_nsec; - srand(seed); - printf("seed = %d\n", seed); +int resultCompare2(unsigned char* output_data, unsigned char* dstImgCpu_r, unsigned char* dstImgCpu_g, unsigned char* dstImgCpu_b, int width, int height) { + + for (int i = 0;i < height;i++) + for (int j = 0;j < width;j++) { + if (abs(dstImgCpu_r[i * width + j] - output_data[i * width + j]) > 1) { + printf(" dstImgCpu_r , i = %d, j = %d\n", i, j); + printf(" dstImgCpu_r = %u, output_tpu = %u\n", dstImgCpu_r[i * width + j], output_data[i * width + j]); + return -1; + } + + if (abs(dstImgCpu_g[i * width + j] - output_data[height * width + i * width + j]) > 1) { + printf(" dstImgCpu_g , i = %d, j = %d\n", i, j); + printf(" dstImgCpu_g = %u, output_tpu = %u\n", dstImgCpu_g[i * width + j], output_data[height * width + i * width + j]); + return -1; + } + + if (abs(dstImgCpu_b[i * width + j] - output_data[height * width * 2 + i * width + j]) > 1) { + printf(" dstImgCpu_b , i = %d, j = %d\n", i, j); + printf(" dstImgCpu_b = %u, output_tpu = %u\n", dstImgCpu_b[i * width + j], output_data[height * width * 2 + i * width + j]); + return -1; + } + } - int ret = -1; - printf("dst_fmt = %d, height = %d, width = %d\n", dst_fmt, height, width); - unsigned char* output_tpu = new unsigned char [width * height * 3]; - unsigned char* input_data = new unsigned char [height * width]; - vector> srcImg(height, vector(width, 0)); - vector> dstImgCpu_r; - vector> dstImgCpu_g; - vector> dstImgCpu_b; - vector ref_data(3 * height * width); - unsigned char kernel_data[KERNEL_SIZE]; - memset(kernel_data, 0, KERNEL_SIZE); + return 0; +} + +int test_bayer2rgb_random(bm_handle_t handle, int height, int width, bool use_real_img, + const char *src_name, const char *output_path, int src_type) { + int ret = 0; + struct timeval t1, t2; + printf("height = %d, width = %d, src_type = %d\n", height, width, src_type); + unsigned char* output_tpu = (unsigned char*)malloc(width * height * 3); + unsigned char* input_data = (unsigned char*)malloc(width * height); + unsigned char* srcImg = (unsigned char*)malloc(width * height); + unsigned char* dstImgCpu_r = (unsigned char*)malloc(width * height * 3); + unsigned char* dstImgCpu_g = (unsigned char*)malloc(width * height * 3); + unsigned char* dstImgCpu_b = (unsigned char*)malloc(width * height * 3); + unsigned char kernel_data[KERNEL_SIZE] = {0}; // reconstructing kernel data for (int i = 0;i < 12;i++) { for (int j = 0;j < 9;j++) { - kernel_data[i * 9 * 64 + 64 * j] = convd_kernel[i * 9 + j]; + if (src_type == 0) { + kernel_data[i * 9 * NPU_NUM + NPU_NUM * j] = convd_kernel_bg8[i * 9 + j]; + } else { + kernel_data[i * 9 * NPU_NUM + NPU_NUM * j] = convd_kernel_rg8[i * 9 + j]; + } } } - // constructing input data - if (use_real_img) { + if (use_real_img == true) { printf("test real image !\n"); readBin(src_name, input_data, height * width); - for (int i = 0;i < height;i++) + for (int i = 0;i < height;i++) { for (int j = 0;j < width;j++) { - srcImg[i][j] = input_data[i * width + j];; + srcImg[i * width + j] = input_data[i * width + j]; } + } } else { - printf("test random data !\n"); - for (int i = 0;i < height;i++) + for (int i = 0;i < height;i++) { for (int j = 0;j < width;j++) { input_data[i * width + j] = rand() % 255; - srcImg[i][j] = input_data[i * width + j]; + srcImg[i * width + j] = input_data[i * width + j]; } + } } - - // cal the reference - bayer2rgb_cpu(dstImgCpu_r, dstImgCpu_g, dstImgCpu_b, - srcImg, kernel_r, kernel_g, kernel_b, ref_data, height, width); - - bayer2rgb_tpu( - handle, - input_data, - output_tpu, - kernel_data, - height, - width, - dst_fmt); - - ret = dst_fmt == FORMAT_RGB_PACKED ? resultCompare(output_tpu, ref_data) : resultCompare(output_tpu, dstImgCpu_r, dstImgCpu_g, dstImgCpu_b); - ret == 0 ? printf(" bayer2rgb successful! \n") : printf(" bayer2rgb failed! \n"); - - if (use_real_img == 1) { - FILE *fp_dst = fopen("bayer2rgb.bin", "wb"); - fwrite((void *)output_tpu, 1, height * width * 3, fp_dst); + gettimeofday_(&t1); + bayer2rgb_cpu(dstImgCpu_r, dstImgCpu_g, dstImgCpu_b,srcImg, height, width, src_type); + gettimeofday_(&t2); + + printf("Bayer2rgb CPU using time = %ld(us)\n", TIME_COST_US(t1, t2)); + ret = bayer2rgb_tpu(handle, input_data, output_tpu, kernel_data, height, width, src_type); + + if (ret != 0) { + free(output_tpu); + free(input_data); + free(srcImg); + free(dstImgCpu_r); + free(dstImgCpu_g); + free(dstImgCpu_b); + return ret; + } + ret = resultCompare2(output_tpu, dstImgCpu_r, dstImgCpu_g, dstImgCpu_b, width, height); + ret == 0 ? printf(" compare successful! \n") : printf(" compare failed! \n"); + if (use_real_img == true) { + FILE *fp_dst = fopen(output_path, "wb"); + if (fp_dst == NULL) { + printf("无法打开输出文件 %s\n", output_path); + ret = -1; + } else { + fwrite((void *)output_tpu, 1, height * width * 3, fp_dst); + } fclose(fp_dst); } - - delete[] output_tpu; - delete[] input_data; - + free(output_tpu); + free(input_data); + free(srcImg); + free(dstImgCpu_r); + free(dstImgCpu_g); + free(dstImgCpu_b); return ret; } +void* test_bayer2rgb(void* args) { + bayer2rgb_thread_arg_t* bayer2rgb_thread_arg = (bayer2rgb_thread_arg_t*)args; + bm_handle_t handle = bayer2rgb_thread_arg->handle; + int loop_num = bayer2rgb_thread_arg->loop_num; + int height = bayer2rgb_thread_arg->height; + int width = bayer2rgb_thread_arg->width; + int src_type = bayer2rgb_thread_arg->src_type; + bool use_real_img = bayer2rgb_thread_arg->use_real_img; + const char* input_path = bayer2rgb_thread_arg->input_path; + const char* output_path = bayer2rgb_thread_arg->output_path; + for (int i = 0; i < loop_num; i++) { + if(loop_num > 1){ + height = 2 + rand() % 2047 * 2; + width = 2 + rand() % 2047 * 2; + } + if (0 != test_bayer2rgb_random(handle, height, width, use_real_img, input_path, output_path, src_type)) { + printf("------TEST BAYER2RGB FAILED------\n"); + exit(-1); + } + printf("------TEST BAYER2RGB PASSED!------\n"); + } + return NULL; +} + int main(int argc, char* args[]) { - int loop = 0; - int dst_fmt = FORMAT_RGB_PLANAR; + struct timespec tp; + clock_gettime_(0, &tp); + int seed = tp.tv_nsec; + srand(seed); + int loop = 1; int use_real_img = 0; - int height = 1024; - int width = 1024; - char *src_name = nullptr; - - if (argc < 3) { - printf("usage: %d\n", argc); - printf("%s use_real_img dst_fmt height width src_name(when use_real_img = 1,need to set height, width and src_name) \n", args[0]); + int width = 2 + rand() % 2047 * 2; + int height = 2 + rand() % 2047 * 2; + int src_type = rand() % 2; + int thread_num = 1; + int check = 0; + char *input_path = NULL; + char *output_path = NULL; + + if (argc == 2 && atoi(args[1]) == -1) { + printf("usage:\n"); + printf("%s thread_num loop use_real_img width height src_type(0-bg8 1-rg8) input_path output_path\n", args[0]); printf("example:\n"); - printf("FORMAT_BAYER-->FORMAT_RGB_PLANNAR:\n"); - printf("%s 0 8\n", args[0]); - printf("%s 1 8 1024 1024 bayer.bin\n", args[0]); + printf("%s \n", args[0]); + printf("%s 2\n", args[0]); + printf("%s 2 1\n", args[0]); + printf("%s 1 1 0 1920 1080 0\n", args[0]); + printf("%s 1 1 1 1920 1080 0 1920x1080_bayer.bin out/out_bayer2rgb.bin\n", args[0]); return 0; } - if (argc > 1) use_real_img = atoi(args[1]); - if (argc > 2) dst_fmt = atoi(args[2]); - if (argc > 3) height = atoi(args[3]); + if (argc > 1) thread_num = atoi(args[1]); + if (argc > 2) loop = atoi(args[2]); + if (argc > 3) use_real_img = atoi(args[3]); if (argc > 4) width = atoi(args[4]); - if (argc > 5) src_name = args[5]; + if (argc > 5) height = atoi(args[5]); + if (argc > 6) src_type = atoi(args[6]); + if (argc > 7) input_path = args[7]; + if (argc > 8) output_path = args[8]; + check = parameters_check(width, height, src_type); + if (check) { + printf("Parameters Failed! \n"); + return check; + } + printf("seed = %d\n", seed); bm_handle_t handle; bm_status_t ret = bm_dev_request(&handle, 0); if (ret != BM_SUCCESS) { @@ -423,20 +578,33 @@ int main(int argc, char* args[]) { return -1; } - // 8 * 8 ~ 8096 * 8096 - loop = ((use_real_img == 0) && (argc < 5)) ? 2022 : 1; - - for (int i = 0; i < loop; i++) { - if ((use_real_img == 0) && (argc < 5)) { - height = 8 + i * 4; - width = 8 + i * 4; - } - - if (0 != test_bayer2rgb_random(handle, height, width, dst_fmt, use_real_img, src_name)) { + // test for multi-thread + pthread_t* pid = new pthread_t[thread_num]; + bayer2rgb_thread_arg_t* bayer2rgb_thread_arg = new bayer2rgb_thread_arg_t[thread_num]; + for (int i = 0; i < thread_num; i++) { + bayer2rgb_thread_arg[i].loop_num = loop; + bayer2rgb_thread_arg[i].handle = handle; + bayer2rgb_thread_arg[i].height = height; + bayer2rgb_thread_arg[i].width = width; + bayer2rgb_thread_arg[i].use_real_img = use_real_img; + bayer2rgb_thread_arg[i].src_type = src_type; + bayer2rgb_thread_arg[i].input_path = input_path; + bayer2rgb_thread_arg[i].output_path = output_path; + if (pthread_create(pid + i, NULL, test_bayer2rgb, bayer2rgb_thread_arg + i) != 0) { + printf("create thread failed\n"); + bm_dev_free(handle); return -1; } - sleep_(1000); } + for (int i = 0; i < thread_num; i++) { + int ret = pthread_join(pid[i], NULL); + if (ret != 0) { + printf("Thread join failed\n"); + exit(-1); + } + } + delete[] pid; + delete[] bayer2rgb_thread_arg; bm_dev_free(handle); - return 0; + return ret; } diff --git a/bmvid/bmcv/test/test_cv_copy_to_param.cpp b/bmvid/bmcv/test/test_cv_copy_to_param.cpp new file mode 100644 index 0000000..c40504a --- /dev/null +++ b/bmvid/bmcv/test/test_cv_copy_to_param.cpp @@ -0,0 +1,152 @@ +#include +#include +#include "bmcv_api_ext.h" +#include "stdio.h" +#include "stdlib.h" +#include +#include +#include +#include +#ifdef __linux__ +#include +#endif + +extern void bm1684x_vpp_read_bin(bm_image src, const char *input_name); +extern void bm1684x_vpp_write_bin(bm_image dst, const char *output_name); +extern void format_to_str(bm_image_format_ext format, char* res); + +int main(int argc, char **argv) { + + bm_handle_t handle = NULL; + int src_h, src_w, dst_w, dst_h; + bm_image_format_ext src_fmt,dst_fmt; + bm_image_data_format_ext src_type,dst_type; + char *src_name, *dst_name; + bm_image src, dst; + bmcv_copy_to_atrr_t copy_to_attr; + unsigned int i = 0, loop_time = 0; + unsigned long long time_single, time_total = 0, time_avg = 0; + unsigned long long time_max = 0, time_min = 10000, fps = 0, pixel_per_sec = 0; + int dev_id = 0; + +#ifdef __linux__ + struct timeval tv_start; + struct timeval tv_end; + struct timeval timediff; +#endif + + if (argc != 19 && argc != 16) { + printf("usage: %d\n", argc); + printf( + "%s src_w src_h src_fmt src_type src_name dst_w dst_h dst_fmt dst_type dst_name start_x " + "start_y if_padding (padding_r padding_g padding_b) loop_time dev_id\n", + argv[0]); + printf("example:\n"); + printf("FORMAT_YUV420P-->FORMAT_YUV420P, DATA_TYPE_EXT_1N_BYTE-->DATA_TYPE_EXT_1N_BYTE:\n"); + printf("%s 1920 1080 0 1 src.bin 3840 2160 0 1 dst.bin 960 540 1 153 204 102 1 0\n", argv[0]); + printf("%s 1920 1080 0 1 src.bin 3840 2160 0 1 dst.bin 960 540 0 1 0\n", argv[0]); + return 0; + } + + src_w = atoi(argv[1]); + src_h = atoi(argv[2]); + src_fmt = (bm_image_format_ext)atoi(argv[3]); + src_type = (bm_image_data_format_ext)atoi(argv[4]); + src_name = argv[5]; + dst_w = atoi(argv[6]); + dst_h = atoi(argv[7]); + dst_fmt = (bm_image_format_ext)atoi(argv[8]); + dst_type = (bm_image_data_format_ext)atoi(argv[9]); + dst_name = argv[10]; + copy_to_attr.start_x = atoi(argv[11]); + copy_to_attr.start_y = atoi(argv[12]); + copy_to_attr.if_padding = atoi(argv[13]); + + if (copy_to_attr.if_padding) { + copy_to_attr.padding_r = atoi(argv[14]); + copy_to_attr.padding_g = atoi(argv[15]); + copy_to_attr.padding_b = atoi(argv[16]); + loop_time = atoi(argv[17]); + dev_id = atoi(argv[18]); + } else { + loop_time = atoi(argv[14]); + dev_id = atoi(argv[15]); + } + + if (src_type == DATA_TYPE_EXT_1N_BYTE) { + if (dst_type != DATA_TYPE_EXT_FLOAT32 && dst_type != DATA_TYPE_EXT_1N_BYTE && + dst_type != DATA_TYPE_EXT_1N_BYTE_SIGNED && dst_type != DATA_TYPE_EXT_FP16 && + dst_type != DATA_TYPE_EXT_BF16) { + printf("Invalid output datatype.\n"); + return -1; + } + } else if (src_type == DATA_TYPE_EXT_FLOAT32) { + if (dst_type != DATA_TYPE_EXT_FLOAT32) { + printf("Invalid output datatype.\n"); + return -1; + } + } else { + printf("Invalid input datatype.\n"); + return -1; + } + + bm_status_t ret = bm_dev_request(&handle, dev_id); + if (ret != BM_SUCCESS) { + printf("Create bm handle failed. ret = %d\n", ret); + exit(-1); + } + + bm_image_create(handle, src_h, src_w, src_fmt, src_type, &src); + bm_image_alloc_dev_mem(src,1); + bm1684x_vpp_read_bin(src,src_name); + + bm_image_create(handle, dst_h, dst_w, dst_fmt, dst_type, &dst); + bm_image_alloc_dev_mem(dst,1); + +// printf("src addr = 0x%lx\n", src.image_private->data[0].u.device.device_addr); +// printf("dst addr = 0x%lx\n", dst.image_private->data[0].u.device.device_addr); + + for(i = 0;i < loop_time; i++){ +#ifdef __linux__ + gettimeofday(&tv_start, NULL); +#endif + + + ret = bmcv_image_copy_to(handle, copy_to_attr, src, dst); + +#ifdef __linux__ + gettimeofday(&tv_end, NULL); + timediff.tv_sec = tv_end.tv_sec - tv_start.tv_sec; + timediff.tv_usec = tv_end.tv_usec - tv_start.tv_usec; + time_single = (unsigned int)(timediff.tv_sec * 1000000 + timediff.tv_usec); +#endif + + if(time_single>time_max){time_max = time_single;} + if(time_single%d*%d, %s->%s,%s\n",src_w,src_h,dst_w,dst_h,src_fmt_str,dst_fmt_str,algorithm_str); + printf("bmcv_image_copy_to:loop %d cycles, time_avg = %llu, fps %llu, %lluM pps\n\n",loop_time, time_avg, fps, pixel_per_sec); + + bmlib_log("BMCV",BMLIB_LOG_TRACE, "loop %d cycles, time_max = %llu, time_min = %llu, time_avg = %llu\n", + loop_time, time_max, time_min, time_avg); + + + return 0; +} + diff --git a/bmvid/bmcv/test/test_cv_gaussian_blur.cpp b/bmvid/bmcv/test/test_cv_gaussian_blur.cpp old mode 100644 new mode 100755 index a6ec2f9..5603e9a --- a/bmvid/bmcv/test/test_cv_gaussian_blur.cpp +++ b/bmvid/bmcv/test/test_cv_gaussian_blur.cpp @@ -1,87 +1,323 @@ -#include -#include -#include "bmcv_api_ext.h" -#include "test_misc.h" -#include "stdio.h" +#include +#include "bmcv_api_ext_c.h" #include "stdlib.h" #include "string.h" #include -#ifdef __linux__ +#include +#include #include -#else -#include -#include "time.h" +#include + +#define TIME_COST_US(start, end) ((end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec)) + +typedef struct { + int loop_num; + int random; + int channel; + int height; + int width; + int format; + float sigmaX; + float sigmaY; + bool is_packed; + char* input_path; + char* output_path; + bm_handle_t handle; +} gaussian_blur_thread_arg_t; + +static int parameters_check(int height, int width) +{ + if (height > 8192 || width > 4096){ + printf("Unsupported size : size_max = 4096 x 8192 \n"); + return -1; + } + return 0; +} + +#if 0 // Use opencv for comparison to prove that the cpu side's own implementation of ref is correct. +#include "opencv2/opencv.hpp" +using namespace cv; #endif -using namespace std; +#if 0 +static int gaussian_blur_opencv(unsigned char *input, unsigned char *output, int channel, bool is_packed, + int height, int width, int kw, int kh, float sigmaX, float sigmaY, int format) { + int type = is_packed ? CV_8UC3 : CV_8UC1; + channel = is_packed ? 1 : channel; + if (format == 0) { + unsigned char *input_addr[3] = {input, input + width * height, input + width * height * 5 / 4}; + unsigned char *output_addr[3] = {output, output + width * height, output + width * height * 5 / 4}; + for (int i = 0; i < channel; i++){ + Mat mat_i = Mat(height, width, type, input_addr[i]); + Mat mat_o = Mat(height, width, type, output_addr[i]); + GaussianBlur(mat_i, mat_o, Size(kw, kh), sigmaX, sigmaY); + } + } else if (format == 1) { + unsigned char *input_addr[3] = {input, input + width * height, input + width * height * 3 / 2}; + unsigned char *output_addr[3] = {output, output + width * height, output + width * height * 3 / 2}; + for (int i = 0; i < channel; i++) { + Mat mat_i = Mat(height, width, type, input_addr[i]); + Mat mat_o = Mat(height, width, type, output_addr[i]); + GaussianBlur(mat_i, mat_o, Size(kw, kh), sigmaX, sigmaY); + } + } else { + for (int i = 0; i < channel; i++) { + Mat mat_i = Mat(height, width, type, input + i * height * width); + Mat mat_o = Mat(height, width, type, output + i * height * width); + GaussianBlur(mat_i, mat_o, Size(kw, kh), sigmaX, sigmaY); + } + } + return 0; +} +#endif -char* opencvFile_path = NULL; +static int get_format_size(int format,int width, int height) { + switch (format) { + case FORMAT_RGB_PLANAR: + case FORMAT_BGR_PLANAR: + case FORMAT_RGB_PACKED: + case FORMAT_BGR_PACKED: + case FORMAT_RGBP_SEPARATE: + case FORMAT_BGRP_SEPARATE: + return width * height * 3; + case FORMAT_GRAY: + return width * height; + break; + default: + printf("format error\n"); + return 0; + } +} -static void fill( - unsigned char* input, - int channel, - int width, - int height) { - int conut = 1; - for (int i = 0; i < channel; i++) { +static void fill(unsigned char *input, int channel, int width, int height, int is_packed) { + for (int i = 0; i < (is_packed ? 3 : channel); i++) { + int num = 10; for (int j = 0; j < height; j++) { for (int k = 0; k < width; k++) { - //input[i * width * height + j * width + k] = rand() % 256; - input[i * width * height + j * width + k] = conut; - conut++; - if(conut == 256) - conut = 1; + input[i * width * height + j * width + k] = num % 128; + num++; } } } } +static int get_gaussian_sep_kernel(int n, float sigma, float *k_sep) { + const int SMALL_GAUSSIAN_SIZE = 3; + static const float small_gaussian_tab[3] = {0.25f, 0.5f, 0.25f}; + const float* fixed_kernel = n % 2 == 1 && n <= SMALL_GAUSSIAN_SIZE && sigma <= 0 ? small_gaussian_tab : 0; + float sigmaX = sigma > 0 ? sigma : ((n - 1) * 0.5 - 1) * 0.3 + 0.8; + float scale2X = -0.5 / (sigmaX * sigmaX); + float sum = 0; + int i; -static int gaussian_blur_tpu( - unsigned char* input, - unsigned char* output, - int channel, - bool is_packed, - int height, - int width, - int kw, - int kh, - float sigmaX, - float sigmaY) { - bm_handle_t handle; - bm_status_t ret = bm_dev_request(&handle, 0); - if (ret != BM_SUCCESS) { - printf("Create bm handle failed. ret = %d\n", ret); - return -1; + for (i = 0; i < n; i++) { + float x = i - (n - 1) * 0.5; + float t = fixed_kernel ? fixed_kernel[i] : exp(scale2X * x * x); + k_sep[i] = t; + sum += k_sep[i]; } - bm_image_format_ext fmt = channel == 3 ? FORMAT_RGB_PLANAR : FORMAT_GRAY; - fmt = is_packed ? FORMAT_RGB_PACKED : fmt; + sum = 1./sum; + for (i = 0; i < n; i++) { + k_sep[i] = k_sep[i] * sum; + } + return 0; +} + +static void create_gaussian_kernel(float* kernel, int kw, int kh, float sigma1, float sigma2) { + float* k_sep_x = (float* )malloc(sizeof(float) * kw); + float* k_sep_y = (float* )malloc(sizeof(float) * kh); + + if(sigma2 <= 0) sigma2 = sigma1; + // automatic detection of kernel size from sigma + if (kw <= 0 && sigma1 > 0 ) kw = (int)round(sigma1 * 3 * 2 + 1) | 1; + if (kh <= 0 && sigma2 > 0 ) kh = (int)round(sigma2 * 3 * 2 + 1) | 1; + sigma1 = sigma1 < 0 ? 0 : sigma1; + sigma2 = sigma2 < 0 ? 0 : sigma2; + get_gaussian_sep_kernel(kw, sigma1, k_sep_x); + if (kh == kw && abs(sigma1 - sigma2) < DBL_EPSILON) { + get_gaussian_sep_kernel(kw, sigma1, k_sep_y); + } else { + get_gaussian_sep_kernel(kh, sigma2, k_sep_y); + } + for (int i = 0; i < kh; i++) { + for (int j = 0; j < kw; j++) { + kernel[i * kw + j] = k_sep_y[i] * k_sep_x[j]; + } + } + free(k_sep_x); + free(k_sep_y); +} + +static void borderfill(int width, int height, int ksize_w, int ksize_h,unsigned char *input_data, + unsigned char* input_data_border) { + int border_w = ksize_w / 2; + int border_h = ksize_h / 2; + int col = 0; + int row = 0; + int temp = 0; + // first fill left and right + for (int i = border_h; i < border_h + height; i++) { + temp = border_w; + for (int j = 0; j < border_w; j++) { + input_data_border[i * (width + 2 * border_w) + j] = input_data[row * width + temp]; + temp--; + } + for (int j = border_w; j < border_w + width; j++) { + input_data_border[i * (width + 2 * border_w) + j] = input_data[row * width + col]; + col++; + } + temp = width - 2; + for (int j = border_w + width; j < 2 * border_w + width; j++) { + input_data_border[i * (width + 2 * border_w) + j] = input_data[row * width + temp]; + temp--; + } + row++; + col = 0; + } + // fill top and bottom + temp = 2 * border_h; + for (int i = 0; i < border_h; i++) { + for (int j = 0; j < 2 * border_w + width; j++) { + input_data_border[i * (2 * border_w + width) + j] = + input_data_border[(i + temp) * (2 * border_w + width) + j]; + } + temp -= 2; + } + temp = 2; + for (int i = border_h + height; i < 2 * border_h + height; i++) { + for (int j = 0; j < 2 * border_w + width; j++) { + input_data_border[i * (2 * border_w + width) + j] = + input_data_border[(i - temp) * (2 * border_w + width) + j]; + } + temp += 2; + } +} + +static void conv(int width, int height, int kw, int kh, float *kernel, + unsigned char *input_data_border, unsigned char *output) { + // The cpu side does the convolution on the input_img + float sum_ = 0; + for (int i = 0; i < (kh / 2 + height); i++) { + for (int j = 0; j < (kw / 2 + width); j++) { + for (int g = 0; g < kh; g++) { + for (int k = 0; k < kw; k++) { + sum_ += input_data_border[(i + g) * (2 * (kw / 2) + width) + j + k] * kernel[g * kw + k]; + } + } + if (sum_ < 0) {sum_ = 0;} + if (sum_ > 255) {sum_ = 255;} + if ((i < height) && (j < width)) { + output[i * width + j] = (unsigned char)(sum_ + 0.5); // opencv是四舍五入 + } + sum_ = 0; + } + } +} + +static int gaussian_blur_ref_single_channel(unsigned char *input, unsigned char *output, int height, + int width, int kw, int kh, float *kernel) { + // 1.get kernel --get from last param + // 2.border fill --different from tpu,but the result is the same. + int border_w = kw / 2; + int border_h = kh / 2; + unsigned char* input_data_border = (unsigned char*)malloc((width + 2 * border_w) * (height + 2 * border_h + 1)); + memset(input_data_border, 0, (width + 2 * border_w) * (height + 2 * border_h)); + borderfill(width, height, kw, kh, input, input_data_border); +#if 0 // check borderfill + for(int i = 0; i < 10; i++){ + for(int j = 0; j < 10; j++){ + printf("%3d ",input_data_border[i * 10 + j]); + } + printf("\n"); + } +#endif + // 3.convolution + conv(width, height, kw, kh, kernel, input_data_border, output); + free(input_data_border); + return 0; +} + +static int gaussian_blur_cpu_ref(unsigned char *input, unsigned char *output, int channel, bool is_packed, + int height, int width, int kw, int kh, float sigmaX, float sigmaY) { + // create kernel + float *kernel = (float*)malloc(kw * kh * sizeof(float)); + memset(kernel, 0, kw * kh * sizeof(float)); + create_gaussian_kernel(kernel, kw, kh, sigmaX, sigmaY); + if(is_packed) { + unsigned char *input_temp = (unsigned char *)malloc(width * height * 3); + unsigned char *output_temp = (unsigned char *)malloc(width * height * 3); + // Adjusting the order of rgb alignment + for (int i = 0; i < 3; i++) { + int temp = 0; + for (int j = 0; j < width * height; j++) { + input_temp[i * width * height + j] = input[i + temp]; + temp += 3; + } + } + for (int i = 0; i < 3; i++) { + gaussian_blur_ref_single_channel(input_temp + i * width * height, output_temp + i * width * height, + height, width, kw, kh, kernel); + } + for (int i = 0; i < 3; i++) { + int tep = 0; + for (int j = 0; j < width * height; j++) { + output[i + tep] = output_temp[i * width * height + j]; + tep += 3; + } + } + free(input_temp); + free(output_temp); + } else { + for (int i = 0; i < channel; i++) { + gaussian_blur_ref_single_channel(input + i * width * height, output + i * width * height, + height, width, kw, kh, kernel); + } + } + free(kernel); + return 0; +} + +static int gaussian_blur_tpu(unsigned char *input, unsigned char *output, int height, int width, int kw, + int kh, float sigmaX, float sigmaY, int format, bm_handle_t handle) { bm_image img_i; bm_image img_o; - bm_image_create(handle, height, width, fmt, DATA_TYPE_EXT_1N_BYTE, &img_i); - bm_image_create(handle, height, width, fmt, DATA_TYPE_EXT_1N_BYTE, &img_o); - bm_image_alloc_dev_mem(img_i); - bm_image_alloc_dev_mem(img_o); - unsigned char* in_ptr[3] = {input, input + height * width, input + 2 * height * width}; - bm_image_copy_host_to_device(img_i, (void **)(in_ptr)); struct timeval t1, t2; - gettimeofday_(&t1); - bmcv_image_gaussian_blur(handle, img_i, img_o, kw, kh, sigmaX, sigmaY); - gettimeofday_(&t2); - cout << "GaussianBlur TPU using time: " << ((t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec) << "us" << endl; - unsigned char* out_ptr[3] = {output, output + height * width, output + 2 * height * width}; - bm_image_copy_device_to_host(img_o, (void **)out_ptr); + bm_image_create(handle, height, width, (bm_image_format_ext)format, DATA_TYPE_EXT_1N_BYTE, &img_i, NULL); + bm_image_create(handle, height, width, (bm_image_format_ext)format, DATA_TYPE_EXT_1N_BYTE, &img_o, NULL); + bm_image_alloc_dev_mem(img_i, 2); + bm_image_alloc_dev_mem(img_o, 2); + + if(format == 14){ + unsigned char *input_addr[1] = {input}; + bm_image_copy_host_to_device(img_i, (void **)(input_addr)); + }else{ + unsigned char *input_addr[3] = {input, input + height * width, input + 2 * height * width}; + bm_image_copy_host_to_device(img_i, (void **)(input_addr)); + } + + gettimeofday(&t1, NULL); + if(BM_SUCCESS != bmcv_image_gaussian_blur(handle, img_i, img_o, kw, kh, sigmaX, sigmaY)){ + bm_image_destroy(img_i); + bm_image_destroy(img_o); + bm_dev_free(handle); + return -1; + } + gettimeofday(&t2, NULL); + printf("Gaussian_blur TPU using time = %ld(us)\n", TIME_COST_US(t1, t2)); + if (format == 14){ + unsigned char *output_addr[1] = {output}; + bm_image_copy_device_to_host(img_o, (void **)output_addr); + } else { + unsigned char *output_addr[3] = {output, output + height * width, output + 2 * height * width}; + bm_image_copy_device_to_host(img_o, (void **)output_addr); + } bm_image_destroy(img_i); bm_image_destroy(img_o); - bm_dev_free(handle); return 0; } -static int cmp( - unsigned char* got, - unsigned char* exp, - int len) { +static int cmp(unsigned char *got, unsigned char *exp, int len) { for (int i = 0; i < len; i++) { if (abs(got[i] - exp[i]) > 2) { printf("cmp error: idx=%d exp=%d got=%d\n", i, exp[i], got[i]); @@ -91,79 +327,217 @@ static int cmp( return 0; } -static int test_gaussian_blur_random( - int channel, - bool is_packed, - int height, - int width, - int kw, - int kh) { - cout << "===== test gaussian blur =====" << endl; - struct timespec tp; - clock_gettime_(0, &tp); +static void read_bin(const char *input_path, unsigned char *input_data, int width, int height, int format) { + FILE *fp_src = fopen(input_path, "rb"); + if (fp_src == NULL) { + printf("无法打开输出文件 %s\n", input_path); + return; + } + if(fread(input_data, sizeof(char), get_format_size(format, width, height), fp_src) != 0) { + printf("read image success\n"); + } + fclose(fp_src); +} - unsigned int seed = tp.tv_nsec; - srand(seed); - cout << "seed = " << seed << endl; - float sigmaX = 4; - float sigmaY = 3; - cout << "channel: " << channel << " width: " << width << " height: " << height << endl; - cout << "kw: " << kw << " kh: " << kh << " is_packed: " << is_packed << endl; - cout << "simgaX: " << sigmaX << " sigmaY: " << sigmaY << endl; - unsigned char* input_data = new unsigned char [width * height * 3]; - unsigned char* output_tpu = new unsigned char [width * height * 3]; - unsigned char* output_opencv = new unsigned char [width * height * 3]; - fill(input_data, 3, width, height); - ifstream opencv_readfile((string(opencvFile_path) + string("/opencv_gaussian_blur.bin")), ios :: in | ios :: binary); - if( ! opencv_readfile){ - cout << "Error opening file" << endl; +static void write_bin(const char *output_path, unsigned char *output_data, int width, int height, int format) { + FILE *fp_dst = fopen(output_path, "wb"); + if (fp_dst == NULL) { + printf("无法打开输出文件 %s\n", output_path); + return; + } + fwrite(output_data, sizeof(char), get_format_size(format, width, height), fp_dst); + fclose(fp_dst); +} + +static int test_gaussian_blur_random(int random, int channel, bool is_packed, int height, int width, int format, + float sgmX, float sgmY, char *input_path, char *output_path, bm_handle_t handle) { + struct timeval t1, t2; + int kw = 3, kh = 3; + float sigmaX = sgmX; + float sigmaY = sgmY; + printf("===== test gaussian blur =====\n"); + // 1.Define the parameters + printf("channel: %d, width: %d, height: %d\n", channel, width, height); + printf("kw: %d, kh: %d, is_packed: %d\n", kw, kh, is_packed); + printf("sigmaX: %f, sigmaY: %f, format: %d\n", sigmaX, sigmaY, format); + unsigned char *input_data = (unsigned char*)malloc(width * height * (is_packed ? 3 : channel)); + unsigned char *output_tpu = (unsigned char*)malloc(width * height * (is_packed ? 3 : channel)); + unsigned char *output_cpu = (unsigned char*)malloc(width * height * (is_packed ? 3 : channel)); + // 2.fill input_img + if (random) { + fill(input_data, (is_packed ? 3 : channel), width, height, is_packed); + } else { + // Input file test, can be written as image observation + read_bin(input_path, input_data, width, height, format); + } + // 3.gaussian_blur --cpu reference + gettimeofday(&t1, NULL); + gaussian_blur_cpu_ref(input_data, output_cpu, channel, is_packed, height, + width, kw, kh, sigmaX, sigmaY); + gettimeofday(&t2, NULL); + printf("Gaussian_blur CPU using time = %ld(us)\n", TIME_COST_US(t1, t2)); +#if 0 // Use opencv for comparison to prove that the cpu side's own implementation of ref is correct. + unsigned char *output_opencv = (unsigned char*)malloc(width * height * (is_packed ? 3 : channel)); + gaussian_blur_opencv(input_data, output_opencv, channel, is_packed, + height, width, kw, kh, sigmaX, sigmaY, format); + // cmp opencv & cpu reference + if (0 != cmp(output_cpu, output_opencv, get_format_size(format, width, height))) { + printf("opencv and cpu failed to compare \n"); + } else { + printf("Compare CPU result with OpenCV successfully!\n"); + } + free(output_opencv); +#endif + if(0 != gaussian_blur_tpu(input_data, output_tpu, height, width, kw, kh, sigmaX, sigmaY, format, handle)){ + free(input_data); + free(output_tpu); + free(output_cpu); return -1; } - int i = 0; - while(opencv_readfile.read((char *)&output_opencv[i], sizeof(unsigned char))){ - i++; - } - opencv_readfile.close(); - gaussian_blur_tpu( - input_data, - output_tpu, - channel, - is_packed, - height, - width, - kw, - kh, - sigmaX, - sigmaY); - int ret = cmp(output_tpu, output_opencv, width * height * (is_packed ? 3 : channel)); - delete [] input_data; - delete [] output_tpu; - delete [] output_opencv; + int ret = cmp(output_tpu, output_cpu, get_format_size(format, width, height)); + if (ret == 0) { + printf("Compare TPU result with CPU result successfully!\n"); + if (random == 0) { + write_bin(output_path, output_tpu, width, height, format); + } + } else { + printf("cpu and tpu failed to compare \n"); + } + free(input_data); + free(output_tpu); + free(output_cpu); return ret; } -int main(int argc, char* args[]) { +void* test_gaussian_blur(void* args) { + gaussian_blur_thread_arg_t* gaussian_blur_thread_arg = (gaussian_blur_thread_arg_t*)args; + int loop_num = gaussian_blur_thread_arg->loop_num; + int random = gaussian_blur_thread_arg->random; + int channel = gaussian_blur_thread_arg->channel; + int height = gaussian_blur_thread_arg->height; + int width = gaussian_blur_thread_arg->width; + int format = gaussian_blur_thread_arg->format; + float sigmaX = gaussian_blur_thread_arg->sigmaX; + float sigmaY = gaussian_blur_thread_arg->sigmaY; + bool is_packed = gaussian_blur_thread_arg->is_packed; + char* input_path = gaussian_blur_thread_arg->input_path; + char* output_path = gaussian_blur_thread_arg->output_path; + bm_handle_t handle = gaussian_blur_thread_arg->handle; + for (int i = 0; i < loop_num; i++) { + // if (loop_num > 1) { + // width = 2 + rand() % 4094; + // height = 2 + rand() % 8190; + // int format_num[] = {8,9,10,11,12,13,14}; + // int size = sizeof(format_num) / sizeof(format_num[0]); + // int rand_num = rand() % size; + // format = format_num[rand_num]; + // sigmaX = (float)rand() / RAND_MAX * 5.0f; + // sigmaY = (float)rand() / RAND_MAX * 5.0f; + // channel = ((format == 10 || format == 11 || format == 14) ? 1 : 3); + // is_packed = ((format == 10 || format == 11) ? 1 : 0); + // } + if (0 != test_gaussian_blur_random(random, channel, is_packed, height, width, format, sigmaX, + sigmaY, input_path, output_path, handle)) { + printf("------TEST GAUSSIAN_BLUR FAILED------\n"); + return (void*)-1; + } + printf("------TEST GAUSSIAN_BLUR PASSED!------\n"); + } + return (void*)0; +} + + +int main(int argc, char *args[]) { + struct timespec tp; + clock_gettime(0, &tp); + unsigned int seed = tp.tv_nsec; + srand(seed); + printf("seed = %d\n", seed); + int random = 1; int loop = 1; - int channel = 3; - int height = 1080; - int width = 1920; - int kh = 3; - int kw = 3; - int is_packed = 1; - if (argc > 1) loop = atoi(args[1]); - opencvFile_path = getenv("BMCV_TEST_FILE_PATH"); - if (opencvFile_path == NULL) { - printf("please set environment vairable: BMCV_TEST_FILE_PATH !\n"); + int width = 2 + rand() % 4094; + int height = 2 + rand() % 8190; + int format_num[] = {8,9,10,11,12,13,14}; + int size = sizeof(format_num) / sizeof(format_num[0]); + int rand_num = rand() % size; + int format = format_num[rand_num]; + float sigmaX = (float)rand() / RAND_MAX * 5.0f; + float sigmaY = (float)rand() / RAND_MAX * 5.0f; + int channel; + int is_packed; + int thread_num = 1; + int check = 0; + int ret = 0; + char *input_path = NULL; + char *output_path = NULL; + bm_handle_t handle; + ret = bm_dev_request(&handle, 0); + if (ret != BM_SUCCESS) { + printf("bm_dev_request failed. ret = %d\n", ret); return -1; } - int ret = 0; - for (int i = 0; i < loop; i++) { - ret = test_gaussian_blur_random(channel, is_packed, height, width, kw, kh); - if (ret) { - cout << "test gaussian_blur failed" << endl; - return ret; + + if (argc == 2 && atoi(args[1]) == -1) { + printf("usage: \n"); + printf("%s thread_num loop random height width format sigmaX sigmaY input_path output_path(when random = 0,need to set input_path and output_path) \n", args[0]); + printf("example:\n"); + printf("%s \n", args[0]); + printf("%s 2\n", args[0]); + printf("%s 2 1\n", args[0]); + printf("%s 2 1 1 512 512 8 0.5 0.5\n", args[0]); + printf("%s 1 1 1 1080 1920 8 2 0 res/1920x1080_rgb.bin out/out_gaussian_blur.bin \n", args[0]); + return 0; + } + + if (argc > 1) thread_num = atoi(args[1]); + if (argc > 2) loop = atoi(args[2]); + if (argc > 3) random = atoi(args[3]); + if (argc > 4) height = atoi(args[4]); + if (argc > 5) width = atoi(args[5]); + if (argc > 6) format = atoi(args[6]); + if (argc > 7) sigmaX = atof(args[7]); + if (argc > 8) sigmaY = atof(args[8]); + if (argc > 9) input_path = args[9]; + if (argc > 10) output_path = args[10]; + channel = ((format == 10 || format == 11 || format == 14) ? 1 : 3); + is_packed = ((format == 10 || format == 11) ? 1 : 0); + check = parameters_check(height, width); + if (check) { + printf("Parameters Failed! \n"); + return check; + } + + // test for multi-thread + pthread_t pid[thread_num]; + gaussian_blur_thread_arg_t gaussian_blur_thread_arg[thread_num]; + for (int i = 0; i < thread_num; i++) { + gaussian_blur_thread_arg[i].loop_num = loop; + gaussian_blur_thread_arg[i].random = random; + gaussian_blur_thread_arg[i].channel = channel; + gaussian_blur_thread_arg[i].is_packed = is_packed; + gaussian_blur_thread_arg[i].height = height; + gaussian_blur_thread_arg[i].width = width; + gaussian_blur_thread_arg[i].format = format; + gaussian_blur_thread_arg[i].sigmaX = sigmaX; + gaussian_blur_thread_arg[i].sigmaY = sigmaY; + gaussian_blur_thread_arg[i].input_path = input_path; + gaussian_blur_thread_arg[i].output_path = output_path; + gaussian_blur_thread_arg[i].handle = handle; + if (pthread_create(pid + i, NULL, test_gaussian_blur, gaussian_blur_thread_arg + i) != 0) { + printf("create thread failed\n"); + bm_dev_free(handle); + return -1; } } - cout << "Compare TPU result with OpenCV successfully!" << endl; - return 0; -} + for (int i = 0; i < thread_num; i++) { + int ret = pthread_join(pid[i], NULL); + if (ret != 0) { + printf("Thread join failed\n"); + bm_dev_free(handle); + exit(-1); + } + } + + bm_dev_free(handle); + return ret; +} \ No newline at end of file diff --git a/bmvid/bmcv/test/test_cv_hist_balance.cpp b/bmvid/bmcv/test/test_cv_hist_balance.cpp new file mode 100644 index 0000000..e2fa337 --- /dev/null +++ b/bmvid/bmcv/test/test_cv_hist_balance.cpp @@ -0,0 +1,226 @@ +#include "bmcv_api_ext_c.h" +#include "bmlib_runtime.h" +#include +#include +#include +#include +#include "test_misc.h" +#ifdef __linux__ +#include +#else +#include +#include "time.h" +#endif + +#define TIME_COST_US(start, end) ((end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec)) +#define GRAY_SERIES 256 +#define ERR_MAX 1 + +struct gray_para { + uint8_t gray_val; + float cdf_val; +}; + +static float get_cdf_min(float* cdf, int len) +{ + int i; + + for(i = 0; i < len; ++i) { + if (cdf[i] != 0) { + return cdf[i]; + } + } + return 0.f; +} + +static int cpu_hist_balance(uint8_t* input_host, uint8_t* output_cpu, int height, int width) +{ + int H = height; + int W = width; + uint8_t binX; + int j; + float gray_tmp; + uint8_t gray_index_tmp; + float cdf_min; + float cdf_max; + float* cpu_cdf; + + if (input_host == NULL || output_cpu == NULL) { + printf("cpu_calc_hist param error!\n"); + return -1; + } + + cpu_cdf = (float*)malloc(GRAY_SERIES * sizeof(float)); + memset(cpu_cdf, 0.f, GRAY_SERIES *sizeof(float)); + + for (j = 0; j < W * H; ++j) { + binX = input_host[j]; + cpu_cdf[binX]++; + } + + for (j = 1; j < GRAY_SERIES ; ++j) { + cpu_cdf[j] += cpu_cdf[j - 1]; + } + + cdf_min = get_cdf_min(cpu_cdf, GRAY_SERIES); + cdf_max = H * W; + + for(j = 0; j < H * W; ++j) { + gray_index_tmp = input_host[j]; + gray_tmp = round((cpu_cdf[gray_index_tmp] - cdf_min) * (GRAY_SERIES - 1) / (cdf_max - cdf_min)); + output_cpu[j] = (uint8_t)gray_tmp; + } + + free(cpu_cdf); + return 0; +} + +static int tpu_hist_balance(uint8_t* input_host, uint8_t* output_tpu, int height, int width) +{ + bm_handle_t handle = NULL; + bm_device_mem_t input, output; + int dev_id = 0; + int H = height; + int W = width; + bm_status_t ret = BM_SUCCESS; + struct timeval t1, t2; + + ret = bm_dev_request(&handle, dev_id); + if (ret != BM_SUCCESS) { + printf("bm_dev_request failed. ret = %d\n", ret); + return -1; + } + + ret = bm_malloc_device_byte(handle, &output, H * W * sizeof(uint8_t)); + if (ret != BM_SUCCESS) { + printf("bm_malloc_device_byte output float failed. ret = %d\n", ret); + bm_dev_free(handle); + return -1; + } + + ret = bm_malloc_device_byte(handle, &input, H * W * sizeof(uint8_t)); + if (ret != BM_SUCCESS) { + printf("bm_malloc_device_byte intput uint8_t failed. ret = %d\n", ret); + bm_free_device(handle, output); + bm_dev_free(handle); + return -1; + } + + ret = bm_memcpy_s2d(handle, input, input_host); + if (ret != BM_SUCCESS) { + printf("bm_memcpy_s2d uint8_t failed. ret = %d\n", ret); + bm_free_device(handle, input); + bm_free_device(handle, output); + bm_dev_free(handle); + return -1; + } + + gettimeofday_(&t1); + ret = bmcv_hist_balance(handle, input, output, H, W); + if (ret != BM_SUCCESS) { + printf("bmcv_calc_hist_weighted uint8_t failed. ret = %d\n", ret); + bm_free_device(handle, input); + bm_free_device(handle, output); + bm_dev_free(handle); + return -1; + } + gettimeofday_(&t2); + printf("bmcv_hist_balance TPU using time = %ld(us)\n", TIME_COST_US(t1, t2)); + + ret = bm_memcpy_d2s(handle, output_tpu, output); + if (ret != BM_SUCCESS) { + printf("bm_memcpy_d2s failed. ret = %d\n", ret); + bm_free_device(handle, input); + bm_free_device(handle, output); + bm_dev_free(handle); + return -1; + } + + bm_free_device(handle, input); + bm_free_device(handle, output); + bm_dev_free(handle); + return 0; +} + +static int cmp_result(uint8_t* out_cpu, uint8_t* out_tpu, int len) +{ + int i; + + for (i = 0; i < len; ++i) { + if (abs(out_tpu[i] - out_cpu[i]) > ERR_MAX) { + printf("out_tpu[%d] = %u, out_cpu[%d] = %u\n", i, out_tpu[i], i, out_cpu[i]); + return -1; + } + } + + return 0; +} + +static int test_hist_balance(int height, int width) +{ + int len = height * width; + int j; + int ret = 0; + struct timeval t1, t2; + + uint8_t* inputHost = new uint8_t[len]; + uint8_t* output_cpu = new uint8_t[len]; + uint8_t* output_tpu = new uint8_t[len]; + + for (j = 0; j < len; ++j) { + inputHost[j] = (uint8_t)(rand() % 256); + } + + gettimeofday_(&t1); + ret = cpu_hist_balance(inputHost, output_cpu, height, width); + gettimeofday_(&t2); + printf("cpu_hist_balance CPU using time = %ld(us)\n", TIME_COST_US(t1, t2)); + if (ret) { + printf("cpu_hist_balance failed!\n"); + return ret; + } + ret = tpu_hist_balance(inputHost, output_tpu, height, width); + if (ret) { + printf("tpu_hist_balance failed!\n"); + return ret; + } + + ret = cmp_result(output_cpu, output_tpu, len); + if (ret) { + printf("cmp_result failed!\n"); + return ret; + } + + delete[]inputHost; + delete[]output_cpu; + delete[]output_tpu; + return ret; +} + +int main(int argc, char *argv[]) +{ + (void)argc; + (void)argv; + int ret = 0; + int height; + int width; + int loop = 1; + int i; + + printf("please input the img height: \n"); + ret = scanf("%d", &(height)); + printf("please input the img width: \n"); + ret = scanf("%d", &(width)); + printf("please input the loop: \n"); + ret = scanf("%d", &(loop)); + + for (i = 0; i < loop; ++i) { + ret = test_hist_balance(height, width); + if (ret) { + std::cout << "test_hist_balance failed" << std::endl; + return ret; + } + } + std::cout << "Compare TPU result with CPU successfully!" << std::endl; + return 0; +} \ No newline at end of file diff --git a/bmvid/bmcv/test/test_cv_jpeg.cpp b/bmvid/bmcv/test/test_cv_jpeg.cpp index ea6e00a..de919a4 100644 --- a/bmvid/bmcv/test/test_cv_jpeg.cpp +++ b/bmvid/bmcv/test/test_cv_jpeg.cpp @@ -463,7 +463,6 @@ DWORD WINAPI test_jpeg_enc_and_dec(LPVOID argv) } ret = bmcv_image_jpeg_dec(handle, (void**)jpeg_data, size, image_n, dst); assert(ret == BM_SUCCESS); - int image_byte_size[3] = {0}; bm_image_get_byte_size(dst[0], image_byte_size); #ifdef DEBUG_JPEG @@ -651,7 +650,7 @@ int main(int argc, char *argv[]) { } printf("random seed %d\n", seed); srand(seed); - bmlib_log_set_level(BMLIB_LOG_INFO); + bmlib_log_set_level(BMLIB_LOG_ERROR); #ifdef EXAMPLE_JPEG char* file = (char*)"dehua1.jpg"; test_jpeg_dec(file); diff --git a/bmvid/bmcv/test/test_cv_quantify.cpp b/bmvid/bmcv/test/test_cv_quantify.cpp new file mode 100644 index 0000000..99cd2bc --- /dev/null +++ b/bmvid/bmcv/test/test_cv_quantify.cpp @@ -0,0 +1,279 @@ +#include +#include "bmcv_api_ext.h" +#include "stdlib.h" +#include "string.h" +#include +#include +#include +#include + +pthread_mutex_t lock; +#define TIME_COST_US(start, end) ((end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec)) + +typedef struct { + int loop_num; + int height; + int width; + int use_real_img; + char* input_path; + char* output_path; + bm_handle_t handle; +} cv_quantify_thread_arg_t; + +static int parameters_check(int height, int width) +{ + if (height > 8192 || width > 8192){ + printf("Unsupported size : size_max = 8192 x 8192 \n"); + return -1; + } + return 0; +} + +static void read_bin(const char *input_path, float *input_data, int width, int height) +{ + FILE *fp_src = fopen(input_path, "rb"); + if (fp_src == NULL) + { + printf("无法打开输出文件 %s\n", input_path); + return; + } + if(fread(input_data, sizeof(float), width * height, fp_src) != 0) + printf("read image success\n"); + fclose(fp_src); +} + +static void write_bin(const char *output_path, unsigned char *output_data, int width, int height) +{ + FILE *fp_dst = fopen(output_path, "wb"); + if (fp_dst == NULL) + { + printf("无法打开输出文件 %s\n", output_path); + return; + } + fwrite(output_data, sizeof(int), width * height, fp_dst); + fclose(fp_dst); +} + +float random_float(float min, float max) { + return min + ((float)rand() / RAND_MAX) * (max - min); +} + +static void fill( + float* input, + int channel, + int width, + int height) { + for (int i = 0; i < channel; i++) { + for (int j = 0; j < height; j++) { + for(int k = 0; k < width; k++){ + float num = random_float(-100.0f, 300.0f); + input[i * width * height + j * width + k] = num; + } + } + } +} + +static int quantify_cpu( + float* input, + unsigned char* output, + int height, + int width) { + for(int i = 0; i < width * height * 3; i++) { + if (input[i] < 0.0f) { + output[i] = 0; + } else if(input[i] > 255.0f) { + output[i] = 255; + } else { + output[i] = (unsigned char)input[i]; + } + } + return 0; +} + +static int quantify_tpu( + float* input, + unsigned char* output, + int height, + int width, + bm_handle_t handle) { + + bm_image input_img; + bm_image output_img; + struct timeval t1, t2; + pthread_mutex_lock(&lock); + bm_image_create(handle, height, width, (bm_image_format_ext)FORMAT_RGB_PLANAR, DATA_TYPE_EXT_FLOAT32, &input_img, NULL); + bm_image_create(handle, height, width, (bm_image_format_ext)FORMAT_RGB_PLANAR, DATA_TYPE_EXT_1N_BYTE, &output_img, NULL); + bm_image_alloc_dev_mem(input_img, 1); + bm_image_alloc_dev_mem(output_img, 1); + float* in_ptr[1] = {input}; + bm_image_copy_host_to_device(input_img, (void **)in_ptr); + gettimeofday(&t1, NULL); + bmcv_image_quantify(handle, input_img, output_img); + gettimeofday(&t2, NULL); + printf("Quantify TPU using time = %ld(us)\n", TIME_COST_US(t1, t2)); + unsigned char* out_ptr[1] = {output}; + bm_image_copy_device_to_host(output_img, (void **)out_ptr); + bm_image_destroy(input_img); + bm_image_destroy(output_img); + pthread_mutex_unlock(&lock); + return 0; +} + +static int cmp( + unsigned char *got, + unsigned char *exp, + int len) { + for (int i = 0; i < len; i++) { + if (got[i] != exp[i]) { + printf("cmp error: idx=%d exp=%d got=%d\n", i, exp[i], got[i]); + return -1; + } + } + return 0; +} + +static int test_quantify_random( + int height, + int width, + int use_real_img, + char *input_path, + char *output_path, + bm_handle_t handle) { + printf("width: %d , height: %d\n", width, height); + int ret; + struct timeval t1, t2; + + float* input_data = (float*)malloc(width * height * 3 * sizeof(float)); + unsigned char* output_cpu = (unsigned char*)malloc(width * height * 3 * sizeof(unsigned char)); + unsigned char* output_tpu = (unsigned char*)malloc(width * height * 3 * sizeof(unsigned char)); + if(use_real_img == 1){ + read_bin(input_path, input_data, width, height); + } else { + fill(input_data, 3, width, height); + } + gettimeofday(&t1, NULL); + ret = quantify_cpu(input_data, output_cpu, height, width); + gettimeofday(&t2, NULL); + printf("Quantify CPU using time = %ld(us)\n", TIME_COST_US(t1, t2)); + if(ret != 0){ + free(input_data); + free(output_cpu); + free(output_tpu); + return ret; + } + + ret = quantify_tpu(input_data, output_tpu, height, width, handle); + if(ret != 0){ + free(input_data); + free(output_cpu); + free(output_tpu); + return ret; + } + ret = cmp(output_tpu, output_cpu, width * height * 3); + if (ret == 0) { + printf("Compare TPU result with CPU result successfully!\n"); + if (use_real_img == 1) { + write_bin(output_path, output_tpu, width, height); + } + } else { + printf("cpu and tpu failed to compare \n"); + } + free(input_data); + free(output_cpu); + free(output_tpu); + return ret; +} + +void* test_quantify(void* args) { + cv_quantify_thread_arg_t* cv_quantify_thread_arg = (cv_quantify_thread_arg_t*)args; + int loop_num = cv_quantify_thread_arg->loop_num; + int use_real_img = cv_quantify_thread_arg->use_real_img; + int height = cv_quantify_thread_arg->height; + int width = cv_quantify_thread_arg->width; + char* input_path = cv_quantify_thread_arg->input_path; + char* output_path = cv_quantify_thread_arg->output_path; + bm_handle_t handle = cv_quantify_thread_arg->handle; + for (int i = 0; i < loop_num; i++) { + // if(loop_num > 1) { + // width = 1 + rand() % 8192; + // height = 1 + rand() % 8192; + // } + if (0 != test_quantify_random(height, width, use_real_img, input_path, output_path, handle)){ + printf("------TEST CV_QUANTIFY FAILED------\n"); + exit(-1); + } + printf("------TEST CV_QUANTIFY PASSED!------\n"); + } + return NULL; +} + +int main(int argc, char* args[]) { + struct timespec tp; + clock_gettime(0, &tp); + unsigned int seed = tp.tv_nsec; + srand(seed); + int use_real_img = 0; + int loop = 1; + int height = 1 + rand() % 8192; + int width = 1 + rand() % 8192; + int thread_num = 1; + int check = 0; + char *input_path = NULL; + char *output_path = NULL; + bm_handle_t handle; + bm_status_t ret = bm_dev_request(&handle, 0); + if (ret != BM_SUCCESS) { + printf("Create bm handle failed. ret = %d\n", ret); + return -1; + } + + if (argc == 2 && atoi(args[1]) == -1) { + printf("usage:\n"); + printf("%s thread_num loop use_real_img height width input_path output_path(when use_real_img = 1,need to set input_path and output_path) \n", args[0]); + printf("example:\n"); + printf("%s \n", args[0]); + printf("%s 2\n", args[0]); + printf("%s 2 1 0 512 512 \n", args[0]); + printf("%s 1 1 1 1920 1080 res/1920x1080_rgbp.bin out/out_quantify.bin \n", args[0]); + return 0; + } + + if (argc > 1) thread_num = atoi(args[1]); + if (argc > 2) loop = atoi(args[2]); + if (argc > 3) use_real_img = atoi(args[3]); + if (argc > 4) width = atoi(args[4]); + if (argc > 5) height = atoi(args[5]); + if (argc > 6) input_path = args[6]; + if (argc > 7) output_path = args[7]; + check = parameters_check(height, width); + if (check) { + printf("Parameters Failed! \n"); + return check; + } + + // test for multi-thread + pthread_t pid[thread_num]; + cv_quantify_thread_arg_t cv_quantify_thread_arg[thread_num]; + for (int i = 0; i < thread_num; i++) { + cv_quantify_thread_arg[i].loop_num = loop; + cv_quantify_thread_arg[i].height = height; + cv_quantify_thread_arg[i].width = width; + cv_quantify_thread_arg[i].use_real_img = use_real_img; + cv_quantify_thread_arg[i].input_path = input_path; + cv_quantify_thread_arg[i].output_path = output_path; + cv_quantify_thread_arg[i].handle = handle; + if (pthread_create(pid + i, NULL, test_quantify, cv_quantify_thread_arg + i) != 0) { + printf("create thread failed\n"); + return -1; + } + } + for (int i = 0; i < thread_num; i++) { + int ret = pthread_join(pid[i], NULL); + if (ret != 0) { + printf("Thread join failed\n"); + exit(-1); + } + } + bm_dev_free(handle); + return ret; +} diff --git a/bmvid/bmcv/test/test_faiss_indexPQ.cpp b/bmvid/bmcv/test/test_faiss_indexPQ.cpp index af5c2c5..43a43b6 100644 --- a/bmvid/bmcv/test/test_faiss_indexPQ.cpp +++ b/bmvid/bmcv/test/test_faiss_indexPQ.cpp @@ -62,31 +62,33 @@ bm_status_t compare_result_with_faiss_ADC( faiss::IndexPQ index(m * dsub, M, nbits, metric); index.pq.verbose = 1; // training log // index.pq.cp.spherical = true; //normalize centroids - index.train(ntotal, database_input_sys); - index.reset(); - index.add(ntotal, database_input_sys); - std::cout << "------------set input-----------" << std::endl; std::mt19937 rng; rng.seed(seed); std::uniform_real_distribution dist_value(-10, 10); - for (int i = 0; i < nx; i++) + + for (int i = 0; i < ny; i++) { for (int j = 0; j < d; j++) { - nxquery_input_sys[i * d + j] = dist_value(rng); + database_input_sys[i * d + j] = dist_value(rng); } } - // faiss::fvec_renorm_L2(d, 1, query_input_sys); //normalize query - std::cout << "set query, done" << std::endl; - for (int i = 0; i < ny; i++) + std::cout << "set database, done" << std::endl; + + index.train(ntotal, database_input_sys); + index.reset(); + index.add(ntotal, database_input_sys); + + for (int i = 0; i < nx; i++) { for (int j = 0; j < d; j++) { - database_input_sys[i * d + j] = dist_value(rng); + nxquery_input_sys[i * d + j] = dist_value(rng); } } - std::cout << "set database, done" << std::endl; + // faiss::fvec_renorm_L2(d, 1, query_input_sys); //normalize query + std::cout << "set query, done" << std::endl; for (size_t i = 0; i < M; i++) { for (size_t j = 0; j < ks; j++) @@ -156,8 +158,8 @@ bm_status_t compare_result_with_faiss_ADC( std::cout << "tpu: index:" << index_output_sys[i] << "\t"; std::cout << "distance:" << distance_output_sys[i]; std::cout << std::endl; - std::cout << "faiss: index:" << index_output_sys[i] << "\t"; - std::cout << "distance:" << distance_output_sys[i]; + std::cout << "faiss: index:" << fs_index_output[i] << "\t"; + std::cout << "distance:" << fs_distance_output[i]; std::cout << std::endl; } @@ -225,37 +227,40 @@ bm_status_t compare_result_with_faiss_SDC( faiss::IndexPQ index(d, M, nbits, metric); index.pq.verbose = 1; // training log // index.pq.cp.spherical = true; //normalize centroids - index.train(ntotal, database_input_sys); - index.reset(); - index.add(ntotal, database_input_sys); - - index.pq.compute_codes(query_input_sys, fs_query_codes, nx); - index.pq.compute_sdc_table(); - std::cout << "------------set input-----------\n" << std::endl; std::mt19937 rng; rng.seed(seed); std::uniform_real_distribution dist_value(-10, 10); - for (int i = 0; i < nx; i++) + for (int i = 0; i < ny; i++) { for (int j = 0; j < d; j++) { - query_input_sys[i * d + j] = dist_value(rng); + database_input_sys[i * d + j] = dist_value(rng); } } - // faiss::fvec_renorm_L2(d, 1, query_input_sys); //normalize query - std::cout << "set query, done\n" + std::cout << "set database, done\n" << std::endl; - for (int i = 0; i < ny; i++) + + index.train(ntotal, database_input_sys); + index.reset(); + index.add(ntotal, database_input_sys); + + index.pq.compute_codes(query_input_sys, fs_query_codes, nx); + index.pq.compute_sdc_table(); + + + for (int i = 0; i < nx; i++) { for (int j = 0; j < d; j++) { - database_input_sys[i * d + j] = dist_value(rng); + query_input_sys[i * d + j] = dist_value(rng); } } - std::cout << "set database, done\n" + // faiss::fvec_renorm_L2(d, 1, query_input_sys); //normalize query + std::cout << "set query, done\n" << std::endl; + for (size_t i = 0; i < M; i++) { for (size_t j = 0; j < ks; j++) diff --git a/bmvid/bmcv/vppion.o b/bmvid/bmcv/vppion.o index 89283fc..a5c3c14 100644 Binary files a/bmvid/bmcv/vppion.o and b/bmvid/bmcv/vppion.o differ diff --git a/bmvid/bmcv/vpplib.o b/bmvid/bmcv/vpplib.o index 552c2e5..cd9ac89 100644 Binary files a/bmvid/bmcv/vpplib.o and b/bmvid/bmcv/vpplib.o differ diff --git a/bmvid/bmcv/vpptest.o b/bmvid/bmcv/vpptest.o index b845772..f60feed 100644 Binary files a/bmvid/bmcv/vpptest.o and b/bmvid/bmcv/vpptest.o differ diff --git a/bmvid/build/build.sh b/bmvid/build/build.sh index b401e26..b85448b 100755 --- a/bmvid/build/build.sh +++ b/bmvid/build/build.sh @@ -864,6 +864,7 @@ function build_bmcv_lib() # build_jpu_lib || return $? # build_vpp_lib || return $? + update_bmcv_commit_and_branch if [ -n "$1" ]; then BMCV_OUTPUT_DIR=${BMVID_OUTPUT_DIR} MAKE_OPT="USING_CMODEL=$1 OUT_DIR=${BMCV_OUTPUT_DIR}" @@ -1086,6 +1087,36 @@ function clean_bmcv_test() return 0 } +function update_bmcv_commit_and_branch() +{ + file_path=$(find "$(git rev-parse --show-toplevel)" -type f -name "bmcv_internal.cpp" -print -quit) + + if [ -n "$file_path" ]; then + file_dir=$(dirname "$file_path") + pushd . > /dev/null + + cd "$file_dir" || exit + + if git rev-parse --git-dir > /dev/null 2>&1; then + commit_hash=$(git log -1 --pretty=format:"%H") + branch_name=$(git branch --contains HEAD | sed -n '/\* /s///p') + + sed -i "s|#define COMMIT_HASH .*|#define COMMIT_HASH \"$commit_hash\"|" "bmcv_internal.cpp" + sed -i "s|#define BRANCH_NAME .*|#define BRANCH_NAME \"$branch_name\"|" "bmcv_internal.cpp" + + echo "Commit hash $commit_hash has been written to $file_path" + echo "Branch name $branch_name has been written to $file_path" + else + echo "This directory is not a git repository." + fi + + popd > /dev/null + else + echo "bmcv_internal.cpp not found." + fi +} + + if [ -z ${BMVID_OUTPUT_DIR} ]; then echo "warning: source envsetup_sh at first!!!!!" exit 0 diff --git a/bmvid/build/version.mak b/bmvid/build/version.mak new file mode 100644 index 0000000..1a45589 --- /dev/null +++ b/bmvid/build/version.mak @@ -0,0 +1,2 @@ +SO_VERSION=".0.11.0" +SO_NAME=".0" diff --git a/bmvid/cmake/FindBMVidTarget.cmake b/bmvid/cmake/FindBMVidTarget.cmake index 49cc73a..9d78b6e 100644 --- a/bmvid/cmake/FindBMVidTarget.cmake +++ b/bmvid/cmake/FindBMVidTarget.cmake @@ -126,19 +126,17 @@ function(ADD_TARGET_ION_LIB target_name chip_name platform subtype debug ion com endfunction(ADD_TARGET_ION_LIB) function(ADD_TARGET_JPU_LIB target_name chip_name platform subtype debug ion component out_abs_path ion_abs_path) - - set(JPULITE_LIB_TARGET ${out_abs_path}/lib/libbmjpulite.so) - set(JPUAPI_LIB_TARGET ${out_abs_path}/lib/libbmjpuapi.so) set(JPU_HEADER_TARGET ${out_abs_path}/include) set(JPU_APP_TARGET ${out_abs_path}/bin) - set(JPU_EXPORT_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/driver/bmjpuapi/inc/bmjpuapi_jpeg.h - ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/driver/bmjpuapi/inc/bmjpuapi.h - ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/driver/bmjpulite/inc/jpu_io.h - ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/driver/bmjpulite/inc/jpu_lib.h - ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/driver/bmjpulite/inc/jpu_logging.h) - set(JPU_EXPORT_BINS ${JPU_APP_TARGET}/bmjpegdec ${JPU_APP_TARGET}/bmjpegenc ${JPU_APP_TARGET}/bmjpegmulti) - - if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/driver/bmjpulite) + set(JPU_LIB_TARGET ${out_abs_path}/lib) + set(JPULITE_LIB_TARGET ${JPU_LIB_TARGET}/libbmjpulite.so) + set(JPUAPI_LIB_TARGET ${JPU_LIB_TARGET}/libbmjpuapi.so) + set(JPU_BINARY_HEADER_PATH ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/include) + set(JPU_BINARY_APP_PATH ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/bin) + set(JPU_BINARY_LIB_PATH ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/lib) + + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/driver/bmjpulite AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/driver/bmjpuapi) + # build from source code add_custom_command(OUTPUT ${JPULITE_LIB_TARGET} COMMAND make clean CHIP=${chip_name} PRODUCTFORM=${platform} COMMAND ${CMAKE_COMMAND} -E chdir .. ./update_version.sh @@ -180,32 +178,41 @@ function(ADD_TARGET_JPU_LIB target_name chip_name platform subtype debug ion com COMMAND ${CMAKE_COMMAND} -E chdir .. git checkout -- include/version.h DEPENDS ${JPULITE_LIB_TARGET} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/driver/bmjpuapi) + + add_custom_target(${target_name} ALL DEPENDS ${JPULITE_LIB_TARGET} ${JPUAPI_LIB_TARGET}) + + # update binary + add_custom_command(TARGET ${target_name} + POST_BUILD + COMMENT "copy to binary" + COMMAND rm -rf ${JPU_BINARY_LIB_PATH} + COMMAND mkdir -p ${JPU_BINARY_LIB_PATH} && cp -rd ${JPU_LIB_TARGET}/*jpu* ${JPU_BINARY_LIB_PATH}/ + COMMAND rm -rf ${JPU_BINARY_HEADER_PATH} + COMMAND mkdir -p ${JPU_BINARY_HEADER_PATH} && cp -rd ${JPU_HEADER_TARGET}/*jpu* ${JPU_BINARY_HEADER_PATH}/ + COMMAND rm -rf ${JPU_BINARY_APP_PATH} + COMMAND mkdir -p ${JPU_BINARY_APP_PATH} && cp -rd ${JPU_APP_TARGET}/bmjpeg* ${JPU_BINARY_APP_PATH}/) else() - file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/lib/libbmjpulite.so DESTINATION ${out_abs_path}/lib/ FOLLOW_SYMLINK_CHAIN) - file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/lib/libbmjpuapi.so DESTINATION ${out_abs_path}/lib/ FOLLOW_SYMLINK_CHAIN) + # no source code, copy from binary + file(MAKE_DIRECTORY ${JPU_LIB_TARGET}) + file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/lib/libbmjpulite.so DESTINATION ${JPU_LIB_TARGET}/ FOLLOW_SYMLINK_CHAIN) + file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/lib/libbmjpuapi.so DESTINATION ${JPU_LIB_TARGET}/ FOLLOW_SYMLINK_CHAIN) + + file(MAKE_DIRECTORY ${JPU_HEADER_TARGET}) foreach(file_i ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/include/) - file(COPY ${file_i} DESTINATION ${out_abs_path}/include/) + file(COPY ${file_i} DESTINATION ${JPU_HEADER_TARGET}/) endforeach( file_i ) + + file(MAKE_DIRECTORY ${JPU_APP_TARGET}) foreach(file_i ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/bin/) - file(COPY ${file_i} DESTINATION ${out_abs_path}/bin/) + file(COPY ${file_i} DESTINATION ${JPU_APP_TARGET}/) endforeach( file_i ) - endif() - add_custom_target (${target_name} ALL DEPENDS ${JPULITE_LIB_TARGET} ${JPUAPI_LIB_TARGET}) - if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/driver/bmjpulite) - add_custom_command(TARGET ${target_name} POST_BUILD - COMMAND rm -f ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/include/* - COMMAND cp ${JPU_EXPORT_HEADERS} ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/include/ - COMMAND rm -f ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/lib/* - COMMAND cp -d ${JPULITE_LIB_TARGET}* ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/lib/ - COMMAND cp -d ${JPUAPI_LIB_TARGET}* ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/lib/ - COMMAND rm -f ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/bin/* - COMMAND cp ${JPU_EXPORT_BINS} ${CMAKE_CURRENT_SOURCE_DIR}/jpeg/binary/${platform}/bin/) + add_custom_target(${target_name} ALL) endif() get_filename_component(JPUAPI_LIB_FILENAME ${JPUAPI_LIB_TARGET} NAME) get_filename_component(JPULITE_LIB_FILENAME ${JPULITE_LIB_TARGET} NAME) - install(DIRECTORY ${out_abs_path}/lib/ + install(DIRECTORY ${JPU_LIB_TARGET}/ DESTINATION lib FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE COMPONENT ${component} diff --git a/bmvid/document/bmApi/BmApi.md b/bmvid/document/bmApi/BmApi.md new file mode 100644 index 0000000..d97f3e2 --- /dev/null +++ b/bmvid/document/bmApi/BmApi.md @@ -0,0 +1,2196 @@ + + +**目录** + +- [一、 bm\_video Decode 数据结构 \& API说明](#一-bm_video-decode-数据结构--api说明) + - [bm\_video Decode 数据结构](#bm_video-decode-数据结构) + - [1. BmVpuDecStreamFormat](#1-BmVpuDecStreamFormat) + - [2. BmVpuDecSkipMode](#2-BmVpuDecSkipMode) + - [3. BmVpuDecDMABuffer](#3-BmVpuDecDMABuffer) + - [4. BmVpuDecOutputMapType](#-BmVpuDecOutputMapType) + - [5. BmVpuDecBitStreamMode](#6-BmVpuDecBitStreamMode) + - [6. BmVpuDecPixFormat](#7-BmVpuDecPixFormat) + - [7. BMVidDecParam](#8-BMVidDecParam) + - [8. BMDecStatus](#9-BMDecStatus) + - [9. BMDecOutputMapType](#10-BMDecOutputMapType) + - [10. BMVidStream](#11-BMVidStream) + - [11. BMVidFrame](#12-BMVidFrame) + - [12. BMVidStreamInfo](#13-BMVidStreamInfo) + - [13. BmVpuDecPicType](#14-BmVpuDecPicType) + - [bm\_video Decode API说明](#bm_video-decode-api说明) + - [1. bmvpu\_dec\_create](#1-bmvpu_dec_create) + - [2. bmvpu\_dec\_get\_status](#2-bmvpu_dec_get_status) + - [3. bmvpu\_dec\_decode](#3-bmvpu_dec_decode) + - [4. bmvpu\_dec\_get\_caps](#4-bmvpu_dec_get_caps) + - [5. bmvpu\_dec\_get\_output](#5-bmvpu_dec_get_output) + - [6. bmvpu\_dec\_clear\_output](#6-bmvpu_dec_clear_output) + - [7. bmvpu\_dec\_flush](#7-bmvpu_dec_flush) + - [8. bmvpu\_dec\_delete](#8-bmvpu_dec_delete) + - [9. bmvpu\_dec\_get\_stream\_buffer\_empty\_size](#9-bmvpu_dec_get_stream_buffer_empty_size) + - [10. bmvpu\_dec\_all\_frame\_in\_buffer](#10-bmvpu_dec_all_frame_in_buffer) + - [11. bmvpu\_dec\_get\_empty\_input\_buffer\_cnt](#11-bmvpu_dec_get_empty_input_buffer_cnt) + - [12. bmvpu\_dec\_get\_pkt\_in\_buf\_count](#12-bmvpu_dec_get_pkt_in_buf_count) + - [13. bmvpu\_dec\_reset](#13-bmvpu_dec_reset) + - [14. bmvpu\_dec\_get\_core\_idx](#14-bmvpu_dec_get_core_idx) + - [15. bmvpu\_dec\_dump\_stream](#15-bmvpu_dec_dump_stream) + - [16. bmvpu\_dec\_get\_inst\_idx](#16-bmvpu_dec_get_inst_idx) + - [17. bmvpu\_dec\_get\_stream\_info](#17-bmvpu_dec_get_stream_info) + - [18. bmvpu\_dec\_set\_logging\_threshold](#18-bmvpu_dec_set_logging_threshold) + - [Frame Buffer 计算方法](#Frame-Buffer-计算方法) + - [1. compress frame data](#1.-compress-frame-data) + - [2. compress frame table](#2.-compress-frame-table) + - [3. linear frame buffer](#3.-linear-frame-buffer) +- [二、 bm\_video Encode 数据结构 \& API说明](#二-bm_video-encode-数据结构--api说明) + - [bm\_video Encode 枚举类型](#bm_video-encode-枚举类型) + - [1. BmVpuEncReturnCodes](#1-bmvpuencreturncodes) + - [2. BmVpuEncOutputCodes](#2-bmvpuencoutputcodes) + - [3. BmVpuEncHeaderDataTypes](#3-bmvpuencheaderdatatypes) + - [4. BmVpuCodecFormat](#4-bmvpucodecformat) + - [5. BmVpuEncPixFormat](#5-bmvpuencpixformat) + - [6. BMVpuEncGopPreset](#6-bmvpuencgoppreset) + - [7. BMVpuEncMode](#7-bmvpuencmode) + - [8. BmVpuMappingFlags](#8-bmvpumappingflags) + - [bm\_video Encode 数据结构](#bm_video-encode-数据结构) + - [1. BmVpuEncH264Params](#1-bmvpuench264params) + - [2. BmVpuEncH265Params](#2-bmvpuench265params) + - [3. BmVpuEncOpenParams](#3-bmvpuencopenparams) + - [4. BmVpuEncInitialInfo](#4-bmvpuencinitialinfo) + - [5. BmCustomMapOpt](#5-bmcustommapopt) + - [6. BmVpuEncParams](#6-bmvpuencparams) + - [7. BmVpuEncoder](#7-bmvpuencoder) + - [8. BmVpuFbInfo](#8-bmvpufbinfo) + - [9. BmVpuEncodedFrame](#9-bmvpuencodedframe) + - [10. BmVpuEncDMABuffer](#10-bmvpuencdmabuffer) + - [11. BmVpuRawFrame](#11-bmvpurawframe) + - [12. BmVpuFramebuffer](#12-bmvpuframebuffer) + - [bm\_video Encode API](#bm_video-encode-api) + - [1. bmvpu\_enc\_error\_string](#1-bmvpu_enc_error_string) + - [2. bmvpu\_enc\_get\_core\_idx](#2-bmvpu_enc_get_core_idx) + - [3. bmvpu\_enc\_load](#3-bmvpu_enc_load) + - [4. bmvpu\_enc\_unload](#4-bmvpu_enc_unload) + - [5. bmvpu\_enc\_get\_bitstream\_buffer\_info](#5-bmvpu_enc_get_bitstream_buffer_info) + - [6. bmvpu\_enc\_set\_default\_open\_params](#6-bmvpu_enc_set_default_open_params) + - [7. bmvpu\_fill\_framebuffer\_params](#7-bmvpu_fill_framebuffer_params) + - [8. bmvpu\_enc\_open](#8-bmvpu_enc_open) + - [9. bmvpu\_enc\_close](#9-bmvpu_enc_close) + - [10. bmvpu\_enc\_encode](#10-bmvpu_enc_encode) + - [11. bmvpu\_enc\_dma\_buffer\_allocate](#11-bmvpu_enc_dma_buffer_allocate) + - [12. bmvpu\_enc\_dma\_buffer\_deallocate](#12-bmvpu_enc_dma_buffer_deallocate) + - [13. bmvpu\_enc\_dma\_buffer\_attach](#13-bmvpu_enc_dma_buffer_attach) + - [14. bmvpu\_enc\_dma\_buffer\_deattach](#14-bmvpu_enc_dma_buffer_deattach) + - [15. bmvpu\_dma\_buffer\_map](#15-bmvpu_dma_buffer_map) + - [16. bmvpu\_dma\_buffer\_unmap](#16-bmvpu_dma_buffer_unmap) + - [17. bmvpu\_enc\_dma\_buffer\_flush](#17-bmvpu_enc_dma_buffer_flush) + - [18. bmvpu\_enc\_dma\_buffer\_invalidate](#18-bmvpu_enc_dma_buffer_invalidate) + - [19. bmvpu\_enc\_dma\_buffer\_get\_physical\_address](#19-bmvpu_enc_dma_buffer_get_physical_address) + - [20. bmvpu\_enc\_dma\_buffer\_get\_size](#20-bmvpu_enc_dma_buffer_get_size) + - [21. bmvpu\_enc\_upload\_data](#21-bmvpu_enc_upload_data) + - [22. bmvpu\_enc\_download\_data](#22-bmvpu_enc_download_data) +- [三、 JPU通用结构体](#三-jpu通用结构体) + - [JPU枚举类型](#jpu枚举类型) + - [1. BmJpuLogLevel](#1-bmjpuloglevel) + - [2. BmJpuImageFormat](#2-bmjpuimageformat) + - [3. BmJpuColorFormat](#3-bmjpucolorformat) + - [4. BmJpuChromaFormat](#4-bmjpuchromaformat) + - [5. BmJpuRotateAngle](#5-bmjpurotateangle) + - [6. BmJpuMirrorDirection](#6-bmjpumirrordirection) + - [JPU通用结构体](#jpu通用结构体) + - [1. BmJpuFramebuffer](#1-bmjpuframebuffer) + - [2. BmJpuFramebufferSizes](#2-bmjpuframebuffersizes) + - [3. BmJpuRawFrame](#3-bmjpurawframe) +- [四、 jpeg Decode 数据结构 \& API说明](#四-jpeg-decode-数据结构--api说明) + - [jpeg Decode 结构体](#jpeg-decode-结构体) + - [1. BmJpuJPEGDecInfo](#1-bmjpujpegdecinfo) + - [2. BmJpuJPEGDecoder](#2-bmjpujpegdecoder) + - [3. BmJpuDecOpenParams](#3-bmjpudecopenparams) + - [4. BmJpuDecInitialInfo](#4-bmjpudecinitialinfo) + - [5. BmJpuDecReturnCodes](#5-bmjpudecreturncodes) + - [jpeg Decode API](#jpeg-decode-api) + - [1. bm\_jpu\_dec\_load](#1-bm_jpu_dec_load) + - [2. bm\_jpu\_jpeg\_dec\_open](#2-bm_jpu_jpeg_dec_open) + - [3. bm\_jpu\_jpeg\_dec\_decode](#3-bm_jpu_jpeg_dec_decode) + - [4. bm\_jpu\_jpeg\_dec\_get\_info](#4-bm_jpu_jpeg_dec_get_info) + - [5. bm\_jpu\_jpeg\_dec\_frame\_finished](#5-bm_jpu_jpeg_dec_frame_finished) + - [6. bm\_jpu\_jpeg\_dec\_close](#6-bm_jpu_jpeg_dec_close) + - [7. bm\_jpu\_dec\_unload](#7-bm_jpu_dec_unload) + - [8. bm\_jpu\_calc\_framebuffer\_sizes](#8-bm_jpu_calc_framebuffer_sizes) + - [9. bm\_jpu\_dec\_error\_string](#9-bm_jpu_dec_error_string) + - [10. bm\_jpu\_dec\_get\_bm\_handle](#10-bm_jpu_dec_get_bm_handle) + - [11. bm\_jpu\_jpeg\_dec\_flush](#11-bm_jpu_jpeg_dec_flush) +- [五、 jpeg Encode 数据结构 \& API说明](#五-jpeg-encode-数据结构--api说明) + - [jpeg Encode 结构体](#jpeg-encode-结构体) + - [1. BmJpuJPEGEncParams](#1-bmjpujpegencparams) + - [2. BmJpuJPEGEncoder](#2-bmjpujpegencoder) + - [3. BmJpuEncInitialInfo](#3-bmjpuencinitialinfo) + - [4. BmJpuEncReturnCodes](#4-bmjpuencreturncodes) + - [jpeg Encode API](#jpeg-encode-api) + - [1. bm\_jpu\_enc\_load](#1-bm_jpu_enc_load) + - [2. bm\_jpu\_jpeg\_enc\_open](#2-bm_jpu_jpeg_enc_open) + - [3. bm\_jpu\_jpeg\_enc\_encode](#3-bm_jpu_jpeg_enc_encode) + - [4. bm\_jpu\_jpeg\_enc\_close](#4-bm_jpu_jpeg_enc_close) + - [5. bm\_jpu\_enc\_unload](#5-bm_jpu_enc_unload) + - [6. bm\_jpu\_enc\_error\_string](#6-bm_jpu_enc_error_string) + - [7. bm\_jpu\_enc\_get\_bm\_handle](#7-bm_jpu_enc_get_bm_handle) + - [jpeg Encode Callback](#jpeg-encode-callback) + - [1. BmJpuEncAcquireOutputBuffer](#1-bmjpuencacquireoutputbuffer) + - [2. BmJpuEncFinishOutputBuffer](#2-bmjpuencfinishoutputbuffer) + - [3. BmJpuWriteOutputData](#3-bmjpuwriteoutputdata) + + +
+ +# 一、 bm_video Decode 数据结构 & API说明 + +## bm_video Decode 数据结构 +- BmVpuDecStreamFormat +- BmVpuDecSkipMode +- BmVpuDecDMABuffer +- BmVpuDecOutputMapType +- BmVpuDecBitStreamMode +- BmVpuDecPixFormat +- BMVidDecParam +- BMDecStatus +- BMDecOutputMapType +- BMVidStream +- BMVidFrame +- BMVidStreamInfo +- BmVpuDecPicType + +### 1. BmVpuDecStreamFormat +设置输入码流的格式。 +- BMDEC_AVC 表示输入码流满足 AVC 编码标准; +- BMDEC_HEVC 表示输入码流满足 HEVC 编码标准。 + +### 2. BmVpuDecSkipMode +设置跳帧模式 + +| 成员变量 | 描述 | +|------------------|----------------------| +| BMDEC_FRAME_SKIP_MODE | 不开启跳帧模式 | +| BMDEC_SKIP_NON_REF_NON_I | 开启跳帧模式,跳过除参考帧和I帧外的视频帧 | +| BMDEC_SKIP_NON_I | 开启跳帧模式,跳过除I帧外的视频帧 | +| | | + +### 3. BmVpuDecDMABuffer +保存 VPU 缓冲区的信息 + +| 成员变量 | 描述 | +|------------------|----------------------| +| size | 缓冲区的大小 | +| phys_addr | 缓冲区的物理地址 | +| virt_addr | 缓冲区的虚拟地址 | +| | | + +### 4. BmVpuDecOutputMapType +设置输出数据的类型。 +- BMDEC_OUTPUT_UNMAP 输出 yuv 数据; +- BMDEC_OUTPUT_COMPRESSED 输出压缩模式数据。 + +### 5. BmVpuDecBitStreamMode +设置 VPU 解码方式。 +- BMDEC_BS_MODE_INTERRUPT 采用流模式解码,当输入缓冲区填满后送入解码器; +- BMDEC_BS_MODE_PIC_END 采用帧模式解码,获取到一帧数据就送入解码器。 + +### 6. BmVpuDecPixFormat +设置输出数据的格式 + +| 成员变量 | 描述 | +|------------------|----------------------| +| BM_VPU_DEC_PIX_FORMAT_YUV420P | 输出 YUV420P 数据 | +| BM_VPU_DEC_PIX_FORMAT_YUV422P | 输出 YUV422P 数据,BM1684 不支持 | +| BM_VPU_DEC_PIX_FORMAT_YUV444P | 输出 YUV444P 数据,BM1684 不支持 | +| BM_VPU_DEC_PIX_FORMAT_YUV400 | 输出 YUV400 数据,BM1684 不支持 | +| BM_VPU_DEC_PIX_FORMAT_NV12 | 输出 NV12 数据 | +| BM_VPU_DEC_PIX_FORMAT_NV21 | 输出 NV21 数据 | +| BM_VPU_DEC_PIX_FORMAT_NV16 | 输出 NV16 数据,BM1684 不支持 | +| BM_VPU_DEC_PIX_FORMAT_NV24 | 输出 NV24 数据,BM1684 不支持 | +| BM_VPU_DEC_PIX_FORMAT_COMPRESSED | 输出压缩格式数据 | +| BM_VPU_DEC_PIX_FORMAT_COMPRESSED_10BITS | 输出10bits压缩格式数据,BM1684 不支持 | +| | | + +### 7. BMVidDecRetStatus +解码器接口返回的错误码类型 +| 成员变量 | 描述 | +|------------------|----------------------| +| BM_ERR_VDEC_INVALID_CHNID | 无效的解码channel id | +| BM_ERR_VDEC_ILLEGAL_PARAM | 非法参数 | +| BM_ERR_VDEC_EXIST | 解码channel已存在 | +| BM_ERR_VDEC_UNEXIST | 解码channel不存在 | +| BM_ERR_VDEC_NULL_PTR | 空指针 | +| BM_ERR_VDEC_NOT_CONFIG | 解码器未配置 | +| BM_ERR_VDEC_NOT_SUPPORT | 不支持的解码业务 | +| BM_ERR_VDEC_NOT_PERM | 参数异常 | +| BM_ERR_VDEC_INVALID_PIPEID | 非法的PIPEID | +| BM_ERR_VDEC_INVALID_GRPID | 非法的GRPID | +| BM_ERR_VDEC_NOMEM | 存储空间异常 | +| BM_ERR_VDEC_NOBUF | 缓冲区异常 | +| BM_ERR_VDEC_BUF_EMPTY | 缓冲区空 | +| BM_ERR_VDEC_BUF_FULL | 缓冲区满 | +| BM_ERR_VDEC_SYS_NOTREADY | 解码器未准备就绪 | +| BM_ERR_VDEC_BADADDR | 错误地址 | +| BM_ERR_VDEC_BUSY | 解码器忙 | +| BM_ERR_VDEC_SIZE_NOT_ENOUGH | 空间不足 | +| BM_ERR_VDEC_INVALID_VB | 无效的VB | +| BM_ERR_VDEC_ERR_INIT | 解码器初始化错误 | +| BM_ERR_VDEC_ERR_INVALID_RET | 无效返回值 | +| BM_ERR_VDEC_ERR_SEQ_OPER | 队列处理异常 | +| BM_ERR_VDEC_ERR_VDEC_MUTEX | 信号量异常 | +| BM_ERR_VDEC_ERR_SEND_FAILED | 发送失败 | +| BM_ERR_VDEC_ERR_GET_FAILED | 获取失败 | +| BM_ERR_VDEC_ERR_HUNG | 解码器挂起 | + + + +### 8. BMVidDecParam +BMVidDecParam 用于设置解码器的初始化参数,在调用接口 bmvpu_dec_create 前需要创建 BMVidDecParam 对象,并对其进行初始化。 + +| 成员变量 | 类型 | 描述 | +|-------------------|--------|-----------------------------------------| +| streamFormat | BmVpuDecStreamFormat | 设置输入码流类型,BMDEC_AVC 为 H.264(AVC),BMDEC_HEVC 为 H.265(HEVC) | +| wtlFormat | BmVpuDecOutputMapType | 设置输出数据格式 | +| skip_mode | BmVpuDecSkipMode | 设置跳帧模式 | +| bsMode | BmVpuDecBitStreamMode | 设置解码器工作方式。
0 以 INTERRPUT 模式工作;
2 以 PIC_END 模式工作。| +| enableCrop | int | 是否启用裁剪选项,此参数无效| +| pixel_format | BmVpuDecPixFormat | 输出图像格式 | +| secondaryAXI | int | 是否开启 secondary AXI。SDK 中会根据码流类型,自动选择,不需要手动开启 | +| mp4class | int | MPEG_4,此参数无效 | +| frameDelay | int | 帧延迟输出,大于0时,在解码frameDelay帧后输出显示帧,此参数无效 | +| pcie_board_id | int | PCIE板卡的设备id | +| pcie_no_copyback | int | pcie模式,解码输出数据不拷贝回host端 | +| enable_cache | int | 启用缓存,提高内存拷贝速度,但会增加算力、带宽等的消耗 | +| perf | int | 性能监测功能 | +| core_idx | int | 解码核选择。1684x core_idx 可以配置为 0,1,-1;配置为-1时,会根据解码器负载自动选择解码核 | +| timeout | int | 解码超时时间,默认为3000ms(即VPU_WAIT_TIME_OUT) | +| timeout_count | int | 解码超时重试次数,默认为5 | +| extraFrameBufferNum | int | 除去vpu所必要的Frame Buffer 外,用户额外需要的 Frame Buffer 的数量。 | +| min_framebuf_cnt | int | 输入码流所需要的最小的 Frame Buffer 的数量。 | +| framebuf_delay | int | 解码延迟出帧所需要的 Frame Buffer 的数量。 | +| streamBufferSize | int | 设置输入码流的缓冲区大小。
若设置为 0,则默认缓冲区大小为 0x700000。| +| bitstream_buffer | BmVpuDecDMABuffer | 输入码流缓冲区信息 | +| frame_buffer | BmVpuDecDMABuffer | Frame Buffer 信息 | +| Ytable_buffer | BmVpuDecDMABuffer | 压缩模式 Y table 缓冲区信息 | +| Ctable_buffer | BmVpuDecDMABuffer | 压缩模式 C table 缓冲区信息 | +| | | | +备注: +- 解码器支持用户自行分配 Bitstream Buffer 和 Frame Buffer。当外部分配内存时,extraFrameBufferNum、min_framebuf_cnt、framebuf_delay、streamBufferSize、bitstream_buffer、frame_buffer、Ytable_buffer、Ctable_buffer 必须配置。 +- Frame Buffer的计算参考(Frame Buffer 计算方法) + + +### 9. BMDecStatus +枚举类型,用于指示解码器的状态。 + +| 状态 | 含义 | +|------------------|----------------------| +| BMDEC_UNCREATE | 解码器未创建(用户无需处理) | +| BMDEC_UNLOADING | 解码器未加载(用户无需处理) | +| BMDEC_UNINIT | 解码器未初始化 | +| BMDEC_WRONG_RESOLUTION | 设置的分辨率不匹配 | +| BMDEC_FRAMEBUFFER_NOTENOUGH | 分配的 Frame Buffer 不足 | +| BMDEC_DECODING | 解码器正在解码 | +| BMDEC_ENDOF | 解码器送帧结束 | +| BMDEC_STOP | 解码器停止解码 | +| BMDEC_HUNG | 解码器⽆响应 | +| BMDEC_CLOSE | 关闭解码器,表示可以开始关闭解码器 | +| BMDEC_CLOSED | 解码器关闭状态 | +| | | + +### 10. BMDecOutputMapType +枚举类型,用于定义编码器输出的YUV格式 + +| 类型 | 含义 | +|------------------|----------------------| +| BMDEC_OUTPUT_UNMAP | 原始YUV数据 | +| BMDEC_OUTPUT_TILED | TILED YUV数据(已弃用) | +| BMDEC_OUTPUT_COMPRESSED | 压缩格式YUV数据 | +| | | + + +### 11. BMVidStream +保用于表示视频流的实际数据,包括像素值、时间戳等。 + +| 成员变量 | 类型 | 描述 | +|------------------|---------------|-----------------------------------------| +| buf | unsigned char* | 码流信息存储地址 | +| length | unsigned int | 码流信息大小 | +| header_buf | unsigned char* | 码流头信息存储地址 | +| header_size | unsigned int | 码流头信息大小 | +| extradata | unsigned char* | 已弃用:不再接受 extradata 数据。 | +| extradata_size | unsigned int | 已弃用:不再接受 extradata 数据。 | +| pts | unsigned long | 显示时间戳。 | +| dts | unsigned long | 解码时间戳。 | +| | | | + +### 12.BMVidFrame +保存解码器输出的视频帧信息。 + +| 成员变量 | 类型 | 描述 | +|--------------------------|---------------|---------------------------------------------| +| picType | BmVpuDecPicType | 图片类型 | +| buf[8] | unsigned char* | 存放输出数据的地址。前四个通道存储YUV的虚拟地址,后四通道存储YUV的物理地址。0:Y虚拟地址,1:Cb虚拟地址,2:Cr虚拟地址。4:Y物理地址,5:Cb物理地址,6:Cr物理地址。3和7为特殊格式数据的存储通道(如存放透明度数据)| +| stride[8] | int | 和 buf 对应,存放对应通道的步长。
对于 FBC 数据,stride 存放的数据稍有不同。channel 0 和 4,存放 Y 分量的宽度;
channel 1 和 5,存放 Cb 分量的宽度;
channel 2 和 6,存放 Y table 的长度;
channel 3 和 7,存放 Cb table 的长度| +| width | unsigned int | 存放 Frame 的宽度| +| height | unsigned int | 存放 Frame 的高度| +| frameFormat | int | 输出 yuv的格式
frameFormat为0 :输出为yuv,需要和cbcrinterleve 结合使用。
frameFormat为0,cbcrInterleave为 0 输出:yuv420p
frameFormat为0 cbcrInterleave为1 输出:nv12
frameFormat为116:压缩数据需要调用vpp解压缩| +| interlacedFrame | BmVpuDecLaceFrame | 图像扫描方式。0 为逐行扫描模式,1为隔行扫描模式| +| lumaBitDepth | int | 亮度数据的深度| +| chromaBitDepth | int | 色度数据的深度| +| pixel_format | BmVpuDecPixFormat | 图像格式 | +| endian | int | 表示帧缓冲区的段序。
endian=0,以小端模式存储;
endian=1,以大端模式存储;
endian=2,以 32 位小端模式存储;
endian=3,以 32 位大端模式存储 | +| sequenceNo | int | 表示码流序列的状态。当码流序列改变时,sequenceNo 的值会进行累加 | +| frameIdx | int | 图像帧缓冲区的索引。用于表示该帧缓冲区在解码器中位置 | +| pts | unsigned long | 显示时间戳戳 | +| dts | unsigned long | 解码时间戳戳 | +| size | int | 帧缓冲区的大小 | +| colorPrimaries | int | 指定视频使用的色彩原色标准,影响颜色再现方式 | +| colorTransferCharacteristic | int | 定义了颜色从原色到可显示色彩的转换特性或曲线 | +| colorSpace | int | 表明视频色彩的编码空间,如RGB、YCbCr等 | +| colorRange | int | 指明色彩的动态范围,如广色域或有限色域(全范围或限定范围) | +| chromaLocation | int | 定义色度样本相对于亮度样本的位置。 | +| coded_width | unsigned int | 用于编码的图片宽度 | +| coded_height | unsigned int | 用于编码的图片高度 | +| | | | + + +### 13.BMVidStreamInfo +用于描述视频流的基本信息,例如图像大小、帧率、编码标准等。 + +| 成员变量 | 类型 | 描述 | +|-----------------------|---------------|----------------------------------------------------------------| +| picWidth | int | 图片的水平像素大小 | +| picHeight | int | 图片的垂直像素大小 | +| fRateNumerator | int | 帧率分数的分子 | +| fRateDenominator | int | 帧率分数的分母 | +| picCropRect | CropRect | 图片裁剪矩形信息(仅适用于H.264/HEVC解码器) | +| mp4DataPartitionEnable| int | MPEG4 VOL头中的 data_partitioned 标志位 | +| mp4ReversibleVlcEnable| int | MPEG4 VOL头中的 reversible_vlc 标志位 | +| mp4ShortVideoHeader | int | 0:非H.263流
1:H.263流(mpeg4 short video header) | +| h263AnnexJEnable | int | 0:禁用Annex J,
1:启用Annex J(可选的解块滤波器模式) | +| minFrameBufferCount | int | 解码所需的最小帧缓冲区数量 | +| frameBufDelay | int | 最大显示帧缓冲区延迟 | +| normalSliceSize | int | 正常情况下保存切片的推荐缓冲区大小(仅适用于H.264) | +| worstSliceSize | int | 最坏情况下保存切片的推荐缓冲区大小(仅适用于H.264) | +| maxSubLayers | int | H.265/HEVC的子层数量。 | +| profile | int | 不同视频编码标准的配置文件信息 | +| level | int | 不同视频编码标准的级别信息。 | +| tier | int | 层次指示器(0:主层,1:高层) | +| interlace | int | Indication of interlaced or progressive frame | +| constraint_set_flag | int[4] | H.264/AVC SPS中的 constraint_set0_flag 至 constraint_set3_flag。指定了视频编码的一些约束集,每个元素对应一个约束集的标志| +| direct8x8Flag | int | 标志指定了解码器在进行运动估计时是否使用直接模式进行 8x8 推导。
1表示启用了直接模式;
0表示禁用| +| vc1Psf | int | VC1序列层中的渐进分段帧(PSF)标志 | +| isExtSAR | int | H.264中的SAR(Sample Aspect Ratio,样本宽高比)扩展标志 | +| maxNumRefFrmFlag | int | H.264 中的 max_num_ref_frames 的标志位。
0表示 max_num_ref_frames 为 0;
1表示 max_num_ref_frames 不为 0 | +| maxNumRefFrm | int | H.264 中的 max_num_ref_frames 的具体数值,仅在maxNumRefFrmFlag==1时有效 | +| aspectRateInfo | int | 图像的宽高比信息 | +| bitRate | int | 码流写入时的比特率| +| mp2LowDelay | int | MPEG2规范中sequence extension的low_delay语法 | +| mp2DispVerSize | int | MPEG2规范中sequence display extension的display_vertical_size语法 | +| mp2DispHorSize | int | MPEG2规范中sequence display extension的display_horizontal_size语法 | +| userDataHeader | Uint32 | 用户数据头 | +| userDataNum | Uint32 | 用户数据的数量 | +| userDataSize | Uint32 | 用户数据的大小 | +| userDataBufFull | Uint32 | 当 userDataEnable 启用时,解码器将帧缓冲区状态报告到 userDataBufAddr 和以字节为单位的 userDataSize 中。当用户数据报告模式为1且用户数据大小大于用户数据缓冲区大小时,VPU将报告与缓冲区大小一样多的用户数据,跳过剩余部分并设置 serDataBufFull | +| chromaFormatIDC | int | 色度格式指示器 | +| lumaBitdepth | int | 亮度样本的位深度 | +| chromaBitdepth | int | 色度样本的位深度 | +| seqInitErrReason | int | 序列头解码错误原因 | +| warnInfo | int | 警告信息 | +| sequenceNo | unsigned int | 序列信息的序号,增加1表示检测到序列变化 | +| | | | + +### 14.BmVpuDecPicType +- 枚举类型,表示图片类型。 +- 0 表示I帧;1 表示P帧;2 表示B帧;5表示IDR帧。 + +
+ +## bm_video Decode API说明 +- bmvpu_dec_create +- bmvpu_dec_get_status +- bmvpu_dec_decode +- bmvpu_dec_get_caps +- bmvpu_dec_get_output +- bmvpu_dec_clear_output +- bmvpu_dec_flush +- bmvpu_dec_delete +- bmvpu_dec_get_stream_buffer_empty_size +- bmvpu_dec_get_all_frame_in_buffer +- bmvpu_dec_get_empty_input_buf_cnt +- bmvpu_dec_get_pkt_in_buf_count +- bmvpu_dec_vpu_reset +- bmvpu_dec_get_core_idx +- bmvpu_dec_dump_stream +- bmvpu_get_inst_idx +- bmvpu_dec_get_stream_info +- bmvpu_dec_set_logging_threshold + +### 1. bmvpu_dec_create + +[功能和说明] + +- 用于创建视频解码器的实例,返回创建的视频解码器实例句柄。 +- 初始化一些变量和数据结构,申请解码器实例所需要的内存。将创建的解码器实例的句柄返回给用户,用户通过句柄来操作解码器。 + +[函数名] + +`BMVidDecRetStatus bmvpu_dec_create(BMVidCodHandle* pVidCodHandle, BMVidDecParam decParam)` + +[参数说明] +- `BMVidCodHandle *pVidCodHandle` 存储创建的视频解码器实例的句柄 +- `BMVidDecParam decParam` 视频解码器的配置参数 + +[返回值] + +返回值为0表示成功,其他值表示失败。 + + +### 2. bmvpu_dec_get_status + +[功能和说明] + +- 获取当前视频解码器的状态。 +- 该函数接受一个视频编码器句柄 BMVidCodHandle,并返回该句柄对应视频解码器的状态 BMDecStatus。 +- 如果句柄有效,函数返回解码器的当前状态;否则,返回 BMDEC_CLOSED 表示句柄错误。 + +[函数名] + +`BMDecStatus bmvpu_dec_get_status(BMVidCodHandle vidCodHandle)` + +[参数说明] + +- `BMVidCodHandle vidCodHandle`:视频编码器句柄。 + +[返回值] + +解码器的状态 + +### 3. bmvpu_dec_decode + +[功能和说明] + +- 解码视频流并将解码后的帧放入输出队列。 +- 该函数接受一个视频编码器句柄 BMVidCodHandle 和一个包含视频流信息的结构体 BMVidStream。 +- 函数首先检查输入参数的有效性,包括输入队列是否已满、解码器是否处于正确状态等。 +- 若满足调用条件,函数将视频流数据填充到环形缓冲区,并触发解码器开始解码。 + +[函数名] + +`BMVidDecRetStatus bmvpu_dec_decode(BMVidCodHandle vidCodHandle, BMVidStream vidStream)` + +[参数说明] + +- `BMVidCodHandle vidCodHandle` 视频编码器句柄。 +- `BMVidStream vidStream` 包含视频流信息的结构体,包括帧数据、长度、时间戳等。 + +[返回值] + +返回值为0表示成功,其他值表示失败。 + +### 4. bmvpu_dec_get_caps + +[功能和说明] + +- 获取视频编码器和码流的相关信息。 +- 该函数接受一个视频编码器句柄 BMVidCodHandle 和一个用于存储信息的结构体指针 BMVidStreamInfo。 +- 若满足调用条件,函数从视频编码器实例中提取初始信息,填充到给定的 BMVidStreamInfo 结构体中。 +- 返回 0 表示成功,其他值表示错误。 + +[函数名] + +`BMVidDecRetStatus bmvpu_dec_get_caps(BMVidCodHandle vidCodHandle, BMVidStreamInfo* streamInfo)` + +[参数说明] + +- `BMVidCodHandle vidCodHandle` 视频编码器句柄。 +- `BMVidStreamInfo *streamInfo` 用于存储视频解码器信息的结构体指针。 + +[返回值] + +返回值为0表示成功,其他值表示失败。 + +### 5. bmvpu_dec_get_output + +[功能和说明] + +- 从视频解码器获取输出帧信息。 +- 该函数接受一个视频编码器句柄 BMVidCodHandle,并返回一个指向 BMVidFrame 结构的指针,该结构包含解码器输出的帧信息。 + +[函数名] + +`BMVidDecRetStatus bmvpu_dec_get_output(BMVidCodHandle vidCodHandle, BMVidFrame* frame)` + +[参数说明] + +- `BMVidCodHandle vidCodHandle` 视频编码器句柄。 + +[返回值] + +- 如果成功获取输出帧信息,则返回指向 BMVidFrame 结构的指针,其中包含帧的详细信息。 +- 如果未成功获取输出帧信息,返回 NULL。 +- 此函数的返回值可能为 NULL,因此在使用返回值前建议进行有效性检查。 + +### 6. bmvpu_dec_clear_output + +[功能和说明] + +释放指定的视频帧所占用的缓冲区资源,供后续输出视频帧使用。 + +[函数名] + +`BMVidDecRetStatus bmvpu_dec_clear_output(BMVidCodHandle vidCodHandle, BMVidFrame* frame)` + +[参数说明] +- `BMVidCodHandle vidCodHandle` 视频解码器的句柄,用于标识特定的视频解码器实例。 +- `BMVidFrame *frame` 要清除的输出帧 + +[返回值] +返回值为0表示成功,其他值表示失败。 + +### 7. bmvpu_dec_flush + +[功能和说明] + +- 刷新(Flush)视频解码器输出缓冲区 +- 该函数用于刷新视频解码器,应在关闭解码器之前调用,确保获取到所有的解码输出数据。 +- 解码器解码通常会存在延迟出帧的情况,当输入数据全部送入解码器后,并不能得到所有输出数据。需要调用 bmvpu_dec_flush 告诉解码器当前输入文件已经结束,刷新解码器中的数据,并更新解码器状态。 +- 如果不调用 bmvpu_dec_flush,可能会存在丢帧问题。 + +[函数名] + +`BMVidDecRetStatus bmvpu_dec_flush(BMVidCodHandle vidCodHandle)` + +[参数说明] +- `BMVidCodHandle vidCodHandle` 视频解码器的句柄,用于标识特定的视频解码器实例 + +[返回值] +返回值为0表示成功,其他值表示失败。 + +### 8. bmvpu_dec_delete + +[功能和说明] + +调用 bmvpu_dec_delete 用于关闭解码器实例,并释放资源。 + +[函数名] + +`BMVidDecRetStatus bmvpu_dec_delete(BMVidCodHandle vidCodHandle)` + +[参数说明] + +- `BMVidCodHandle vidCodHandle` 视频解码器的句柄,用于标识特定的视频解码器实例 + +[返回值] + +返回值为0表示成功,其他值表示失败。 + +### 9. bmvpu_dec_get_stream_buffer_empty_size + +[功能和说明] + +- 获取视频解码器的比特流缓冲的可用空间大小。 +- INTERRUPT MODE: 返回比特流缓冲区的剩余空间大小。 +- PIC_END MODE:返回比特流缓冲区当前能够存储的压缩帧数据的最大容量。该容量并不一定等于比特流缓冲区的剩余空间大小。 +- 返回比特流缓冲的可用空间大小或错误码。 + +[函数名] + +`int bmvpu_dec_get_stream_buffer_empty_size(BMVidCodHandle vidCodHandle)` + +[参数说明] + +`BMVidCodHandle vidCodHandle` 视频解码器实例的句柄 + +[返回值] + +返回值为比特流缓冲的剩余空间大小,如果出现错误,返回对应错误码。 + +### 10. bmvpu_dec_get_all_frame_in_buffer + +[功能和说明] + +- 刷新视频解码器输出缓冲区。 +- 通过句柄获取解码器实例相关信息。 +- 作用和 bmvpu_dec_flush 一样,用于刷新解码器中剩余的帧数据。 + +[函数名] + +`int bmvpu_dec_get_all_frame_in_buffer(BMVidCodHandle vidCodHandle)` + +[参数说明] + +- `BMVidCodHandle vidCodHandle` 视频解码器实例的句柄 + +[返回值] + +返回值为0表示成功通知视频解码器获取所有剩余帧并放入缓冲区。 + +### 11. bmvpu_dec_get_all_empty_input_buf_cnt + +[功能和说明] + +- 该函数用于获取视频解码器当前空闲输入缓冲区的数量。 +- 通过句柄获取解码器实例相关信息。 +- 检查vidHandle是否为空或者解码器状态是否已关闭。如果是,返回0表示没有空闲输入缓冲区。 +- 返回计算得到的空闲输入缓冲区的数量。 +- 函数返回表示空闲输入缓冲区数量的整数值。 + +[函数名] + +`int bmvpu_dec_get_all_empty_input_buf_cnt(BMVidCodHandle vidCodHandle)` + +[参数说明] + +- `BMVidCodHandle vidCodHandle` 视频解码器实例的句柄 + +[返回值] + +返回值表示空闲输入缓冲区的数量。 + +### 12. bmvpu_dec_get_pkt_in_buf_count + +[功能和说明] + +- 获取视频解码器输入缓冲区中数据包数量。 +- 检查vidHandle是否为空或者解码器状态是否已关闭。如果是,返回0表示没有已有数据包。 +- 获取队列中的元素个数,该数量表示还未进行解码的数据包的个数。 +- 函数返回获取到的数据包的数量。 + +[函数名] + +`int bmvpu_dec_get_pkt_in_buf_count(BMVidCodHandle vidCodHandle)` + +[参数说明] + +- `BMVidCodHandle vidCodHandle` 视频解码器实例的句柄 + +### 13. bmvpu_dec_vpu_reset + +[功能和说明] + +- 对Sophon设备的VPU进行硬件复位(reset)。 +- 该函数可用于复位指定设备上的所有VPU核心,或者只复位设备上的特定VPU核心。 + +[函数名] + +`BMVidDecRetStatus bmvpu_dec_reset(int devIdx, int coreIdx);` + +[参数说明] + +- `int devIdx` Sophon设备的索引,范围为[0, MAX_PCIE_BOARD_NUM-1]。 +- `int coreIdx` VPU核心的索引,范围为[-1, MAX_NUM_VPU_CORE_CHIP-1]。若为-1,表示复位设备上的所有VPU核心。 + +[返回值] + +返回值为0表示复位成功,其他值表示复位失败。 + +### 14. bmvpu_dec_get_core_idx + +[功能和说明] + +- 获取视频编码器实例的 VPU 核心索引 +- 该函数接受一个视频编码器句柄 BMVidCodHandle 作为参数。 +- 通过将输入句柄强制类型转换为 BMVidHandle,然后获取其 codecInst,最终取得 coreIdx。 +- 返回 VPU 核心索引。 + +[函数名] + +`int getcoreidx(BMVidCodHandle handle)` +[返回值] + +返回VPU核心索引 + +### 15. bmvpu_dec_dump_stream + +[功能和说明] + +- 用于将解码器的输入缓冲区进行映射,用于转储位本地文件。 +- 若出现错误,将会返回0. + +[函数名] + +`int bmvpu_dec_dump_stream(BMVidCodHandle vidCodHandle, unsigned char *p_stream, int size)` + +[参数说明] + +- `BMVidCodHandle vidCodHandle` 视频编解码器句柄,表示与Sophon设备中的VPU关联的视频编解码器。 +- `unsigned char *p_stream` 用于存储解码器输入比特流的本地内存缓冲区的指针。 +- `int size` 缓冲区的大小,表示用户提供的本地内存缓冲区的长度。 + +[返回值] + +返回值表示成功转储到本地内存的比特流的长度。如果发生错误,返回值为0。 + +### 16. bmvpu_get_inst_idx + +[功能和说明] + +用于获取与Sophon设备中的VPU关联的视频编解码器的实例索引。 + +[函数名] + +`int bmvpu_get_inst_idx(BMVidCodHandle vidCodHandle)` +[参数说明] + +- `BMVidCodHandle vidCodHandle` 视频编解码器句柄,表示与Sophon设备中的VPU关联的视频编解码器。 + +[返回值] + +返回值表示与Sophon设备中的VPU关联的视频编解码器的实例索引。 + +### 17. bmvpu_dec_get_stream_info +[功能和说明] + +- 该接口用于外部分配内存时查询码流信息。 +- 调用该接口可以获取码流的宽高和缓冲区数量信息。 +- 当用户申请的内存和解码器所需要的内存不匹配时,会返回错误。可以通过该接口获取正确的信息。 + +[函数名] + +`BMVidDecRetStatus bmvpu_dec_get_stream_info(BMVidCodHandle vidCodHandle, int* width, int* height, int* mini_fb, int* frame_delay)` +[参数说明] + +- `BMVidCodHandle vidCodHandle` 视频编解码器句柄。 +- `int* width` 存储码流宽度 +- `int* height` 存储码流高度 +- `int* mini_fb` 存储解码所需的最小缓冲区数量 +- `int* frame_delay` 存储延迟出帧所需的缓冲区数量 + +[返回值] + +返回值表示接口执行的状态 + +### 18. bmvpu_dec_set_logging_threshold +[功能和说明] + +该接口用于设置 SDK 调试信息的打印等级。 + +[函数名] + +`void bmvpu_dec_set_logging_threshold(BmVpuDecLogLevel log_level)` +[参数说明] + +- `BmVpuDecLogLevel log_level` 设置打印等级。 +- `BmVpuDecLogLevel` 为枚举类型,定义如下: + +| 类型 | 含义 | +|------------------|-----------| +| BMVPU_DEC_LOG_LEVEL_NONE | 不打印任何调试信息 | +| BMVPU_DEC_LOG_LEVEL_ERR | 打印 ERR 类调试信息 | +| BMVPU_DEC_LOG_LEVEL_WARN | 打印 ERR 和 WARN 类调试信息 | +| BMVPU_DEC_LOG_LEVEL_INFO | 打印 ERR、WARN 和 INFO 类调试信息 | +| BMVPU_DEC_LOG_LEVEL_TRACE | 打印 ERR、WARN、INFO 和 TRACE 类调试信息 | +| BMVPU_DEC_LOG_LEVEL_MAX_LOG_LEVEL| 打印所有调试信息 | + +[返回值] + +返回值表示接口执行的状态 + +## Frame Buffer 计算方法 + +### 1. compress frame data + +(1) buffer count +compress frame 的 count 由 minFrameBufferCount 和 extraFrameBufferCount 来决定 + +`compressedFbCount = minFrameBufferCount + extraFrameBufferCount` + +其中 extraFrameBufferCount 由用户指定,数量必须大于0。 +- Frame Buffer 通过 sdk 内部分配,此参数有默认值为 5,若用户不指定该参数,则按照默认参数来分配 +- Frame Buffer 由用户在外部分配,此参数必须设置,不设置则会报错。 + +minFrameBufferCount 因码流而异,是 VPU 解码需要的 Frame Buffer 最小值。此参数可以通过 VPU 分析码流得到,但如需要外部分配内存,则需要用户自己计算。 +- 对于 HEVC 码流, minFrameBufferCount 由 VPS 参数中的 vps_max_dec_pic_buffering_minus1 参数决定,minFrameBufferCount = vps_max_dec_pic_buffering_minus1 + 2; +- 对于 AVC 码流,minFrameBufferCount 由 max_dec_frame_buffering 参数决定,minFrameBufferCount = max_dec_frame_buffering + 2; + + +(2) buffer size +定义 ALIGN_xx()表示进行 xx 字节对齐。如 ALIGN_16()表示进行 16字节对齐。 +compress frame 的 size 由 图像的宽高决定,具体计算方法如下 + +`stride = ALIGN_32(width)` + +`height = ALIGN_32(height)` + +`LumaSize = stride * height` + +`ChromaSize = ALIGN_16(stride / 2) * height` + +`FramebufSize = LumaSize + ChromaSize` + +### 2. compress frame table + +(1) buffer count +table 的数量和 compress frame 匹配,计算方法参考 compress frame data。 + +(2) buffer size + +`YtableBufferSize = ALIGN_16(height) * ALIGN_256(width) / 32` + +`YtableBufferSize = ALIGN_4096(YtableBufferSize) + 4096` + +`CtableBufferSize = ALIGN_16(height) * ALIGN_256(width / 2) / 32` + +`CtableBufferSize = ALIGN_4096(CtableBufferSize) + 4096` + +### 3. linear frame buffer + +(1) buffer count +linear frame 的 count 由 frameBufDelay 和 extraFrameBufferCount 决定。 + +`linearFbCount = frameBufDelay + extraFrameBufferCount + 1` + +- 对于 HEVC 码流, frameBufDelay 由 VPS 参数中的 num_reorder_pics参数决定, frameBufDelay = vps_max_num_reorder_pics + 2 +- 对于 AVC 码流,frameBufDelay 由 num_reorder_frames 参数决定, frameBufDelay = num_reorder_frames + 2 + +(2) buffer size +stride 的对齐方式和 compress frame 一致, height 不进行对齐 + +`LumaSize = stride * height` + +`ChromaSize = (stride / 2) * (height / 2)` + +`FramebufSize = LumaSize + ChromaSize * 2` + + +
+ +# 二、 bm_video Encode 数据结构 & API说明 + +## bm_video Encode 枚举类型 +- BmVpuEncReturnCodes +- BmVpuEncOutputCodes +- BmVpuEncHeaderDataTypes +- BmVpuCodecFormat +- BmVpuEncPixFormat +- BMVpuEncGopPreset +- BMVpuEncMode +- BmVpuMappingFlags + +### 1. BmVpuEncReturnCodes + +- 编码器返回值代码。除了BM_VPU_ENC_RETURN_CODE_OK,其他的返回值应该被视为发生错误,此时编码器应该被关闭。 + +| 枚举值 | 返回值 | 描述 | +| ------------------------------------------------ | ------ | --------------------------------------------------------------------------------------------------------------- | +| BM_VPU_ENC_RETURN_CODE_OK | 0 | 操作成功完成。 | +| BM_VPU_ENC_RETURN_CODE_ERROR | 1 | 通用错误代码,用作其他错误返回代码不匹配时的通用错误。 | +| BM_VPU_ENC_RETURN_CODE_INVALID_PARAMS | 2 | 输入参数无效。 | +| BM_VPU_ENC_RETURN_CODE_INVALID_HANDLE | 3 | VPU 编码器句柄无效,内部错误,可能是库中的错误,请报告此类错误。 | +| BM_VPU_ENC_RETURN_CODE_INVALID_FRAMEBUFFER | 4 | 帧缓冲区信息无效,通常发生在将包含无效值的 BmVpuFramebuffer 结构传递给 bmvpu_enc_register_framebuffers() 函数时。 | +| BM_VPU_ENC_RETURN_CODE_INSUFFICIENT_FRAMEBUFFERS | 5 | 注册用于编码的帧缓冲区失败,因为未提供足够的帧缓冲区给 bmvpu_enc_register_framebuffers() 函数。 | +| BM_VPU_ENC_RETURN_CODE_INVALID_STRIDE | 6 | 步幅值无效,例如帧缓冲区的一个步幅值无效。 | +| BM_VPU_ENC_RETURN_CODE_WRONG_CALL_SEQUENCE | 7 | 在不适当的时间调用函数。 | +| BM_VPU_ENC_RETURN_CODE_TIMEOUT | 8 | 操作超时。 | +| BM_VPU_ENC_RETURN_CODE_RESEND_FRAME | 9 | 重复送帧。 | +| BM_VPU_ENC_RETURN_CODE_ENC_END | 10 | 编码结束。 | +| BM_VPU_ENC_RETURN_CODE_END | 11 | 编码结束。 | + +### 2. BmVpuEncOutputCodes + +- 编码器内部输出代码。这些代码可以通过按位 OR 进行组合,通过使用按位 AND 检查 `bmvpu_enc_encode()` 返回的 `output_codes` 位掩码,来确认编码器状态。 + +| 枚举值 | 返回值 | 描述 | +| ---------------------------------------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| BM_VPU_ENC_OUTPUT_CODE_INPUT_USED | 1 << 0 | 表示输入数据已被使用。如果未设置该标志位,则编码器尚未使用输入数据,因此请将其再次输入给编码器,直到此标志位被设置或返回错误。 | +| BM_VPU_ENC_OUTPUT_CODE_ENCODED_FRAME_AVAILABLE | 1 << 1 | 表示现在有一个完全编码的帧可用。传递给 `bmvpu_enc_encode()` 的 `encoded_frame` 参数包含有关此帧的信息。 | +| BM_VPU_ENC_OUTPUT_CODE_CONTAINS_HEADER | 1 << 2 | 表示编码帧中的数据还包含头信息,如 h.264 的 SPS/PSS。头信息始终放置在编码数据的开头,如果未设置 `BM_VPU_ENC_OUTPUT_CODE_ENCODED_FRAME_AVAILABLE`,则该标志位不会被设置。 | + +### 3. BmVpuEncHeaderDataTypes + +- 定义了编码器头部数据的不同类型 + +| 枚举值 | 返回值 | 描述 | +| ------------------------------------ | ------ | --------------------------------------------------------------- | +| BM_VPU_ENC_HEADER_DATA_TYPE_VPS_RBSP | 0 | 视频参数集 (VPS) 的 RBSP(Raw Byte Sequence Payload)数据类型。 | +| BM_VPU_ENC_HEADER_DATA_TYPE_SPS_RBSP | 1 | 序列参数集 (SPS) 的 RBSP 数据类型。 | +| BM_VPU_ENC_HEADER_DATA_TYPE_PPS_RBSP | 2 | 图像参数集 (PPS) 的 RBSP 数据类型。 | + +### 4. BmVpuCodecFormat + +- 编码器支持的编码格式 + +| 枚举值 | 返回值 | 描述 | +| ------------------------ | ------ | ------------- | +| BM_VPU_CODEC_FORMAT_H264 | 0 | 编码类型 h264 | +| BM_VPU_CODEC_FORMAT_H265 | 1 | 编码类型 h265 | + +### 5. BmVpuEncPixFormat + +- 编码器输入 yuv 格式 +- 目前仅支持 nv12,nv21,yuv420p + +| 枚举值 | 返回值 | 描述 | +| ----------------------------- | ------ | ----------------- | +| BM_VPU_ENC_PIX_FORMAT_YUV420P | 0 | planar 4:2:0 | +| BM_VPU_ENC_PIX_FORMAT_YUV422P | 1 | planar 4:2:2 | +| BM_VPU_ENC_PIX_FORMAT_YUV444P | 3 | planar 4:4:4 | +| BM_VPU_ENC_PIX_FORMAT_YUV400 | 4 | 8位灰度图像 | +| BM_VPU_ENC_PIX_FORMAT_NV12 | 5 | semi-planar 4:2:0 | +| BM_VPU_ENC_PIX_FORMAT_NV16 | 6 | semi-planar 4:2:2 | +| BM_VPU_ENC_PIX_FORMAT_NV24 | 7 | semi-planar 4:4:4 | + +### 6. BMVpuEncGopPreset + +- 编码器输入 gop_preset设置 + +| 枚举值 | 返回值 | 描述 | +| --------------------------- | ------ | ------------------------- | +| BM_VPU_ENC_GOP_PRESET_ALL_I | 1 | 全I帧模式 gopsize=1 | +| BM_VPU_ENC_GOP_PRESET_IPP | 2 | 全IP帧模式 gopsize=1 | +| BM_VPU_ENC_GOP_PRESET_IBBB | 3 | 全IB帧模式 gopsize=1 | +| BM_VPU_ENC_GOP_PRESET_IBPBP | 4 | 全IBP帧模式 gopsize=2 | +| BM_VPU_ENC_GOP_PRESET_IBBBP | 5 | 全IBP帧模式 gopsize=4 | +| BM_VPU_ENC_GOP_PRESET_IPPPP | 6 | 全IP帧模式 gopsize=4 | +| BM_VPU_ENC_GOP_PRESET_IBBBB | 7 | 全IB帧模式 gopsize=4 | +| BM_VPU_ENC_GOP_PRESET_RA_IB | 8 | Random IB帧模式 gopsize=8 | + +### 7. BMVpuEncMode + +- 编码器输入 编码模式 + +| 枚举值 | 返回值 | 描述 | +| --------------------------- | ------ | ---------------------------------- | +| BM_VPU_ENC_CUSTOM_MODE | 0 | 自定义模式 | +| BM_VPU_ENC_RECOMMENDED_MODE | 1 | 推荐模式(慢编码速度,最高画质) | +| BM_VPU_ENC_BOOST_MODE | 2 | 提升模式(正常编码速度,正常画质) | +| BM_VPU_ENC_FAST_MODE | 3 | 快速模式(高编码速度,低画质) | + +### 8. BmVpuMappingFlags + +- 用于使用vpu_EncMmap()函数时指定映射类型。 + +| 枚举值 | 返回值 | 描述 | +| ------------------------- | ------ | ------------ | +| BM_VPU_MAPPING_FLAG_WRITE | 1 << 0 | 可写权限标志 | +| BM_VPU_MAPPING_FLAG_READ | 1 << 1 | 可读权限标志 | + +## bm_video Encode 数据结构 +- BmVpuEncH264Params +- BmVpuEncH265Params +- BmVpuEncOpenParams +- BmVpuEncInitialInfo +- BmCustomMapOpt +- BmVpuEncParams +- BmVpuEncoder +- BmVpuFbInfo +- BmVpuEncodedFrame +- BmVpuEncDMABuffer +- BmVpuRawFrame +- BmVpuFramebuffer + +### 1. BmVpuEncH264Params + +- 定义了新的 H.264 编码器实例的参数 + +| 成员变量 | 类型 | 描述 | +| ------------------- | ---- | ------------------------------------------ | +| enable_transform8x8 | int | 启用 8x8 帧内预测和 8x8 变换。默认值为 1。 | + +### 2. BmVpuEncH265Params + +- 定义了新的 H.265 编码器实例的参数 + +| 成员变量 | 类型 | 描述 | +| ----------------------------- | ---- | ------------------------------------------------------------------------ | +| enable_tmvp | int | 启用时域运动矢量预测。默认值为 1。 | +| enable_wpp | int | 启用线性缓冲区模式的波前并行处理。默认值为 0。 | +| enable_sao | int | 如果设置为 1,则启用 SAO;如果设置为 0,则禁用。默认值为 1。 | +| enable_strong_intra_smoothing | int | 启用对带有少量 AC 系数的区域进行强烈的帧内平滑,以防止伪影。默认值为 1。 | +| enable_intra_trans_skip | int | 启用帧内 CU 的变换跳过。默认值为 0。 | +| enable_intraNxN | int | 启用帧内 NxN PUs。默认值为 1。 | + +### 3. BmVpuEncOpenParams + +- 用于初始化编码器的全局参数,设置编码器的基本属性和编码器类型等。 + +| 成员变量 | 类型 | 描述 | +| ----------------------------------- | ----------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| codec_format | BmVpuCodecFormat | 指定要生成的编码数据的编码格式。 | +| pix_format | BmVpuEncPixFormat | 指定传入帧使用的图像格式。 | +| frame_width | uint32_t | 传入帧的宽度(以像素为单位),无需对齐 | +| frame_height | uint32_t | 传入帧的高度(以像素为单位),无需对齐 | +| timebase_num | uint32_t | 时间基数,以分数形式给出。 | +| timebase_den | uint32_t | 时间分母,以分数形式给出。 | +| fps_num | uint32_t | 帧率,以分数形式给出。 | +| fps_den | uint32_t | 帧率分母,以分数形式给出。 | +| bitrate | int64_t | 比特率(以 bps 为单位)。如果设置为 0,则禁用码率控制,而使用常量质量模式。默认值为 100000。 | +| vbv_buffer_size | uint64_t | vbv 缓冲区的大小,以 bit 为单位。仅在启用码率控制时( *BmVpuEncOpenParams* 中的 *bitrate* 非零)使用。0 表示不检查缓冲区大小约束。默认值为 0。 | +| cqp | int | 常量质量模式的量化参数。 | +| enc_mode | BMVpuEncMode | 编码模式:0 自定义模式,1 推荐的编码器参数(慢编码速度,最高画质),2 提升模式(正常编码速度,正常画质),3 快速模式(高编码速度,低画质)。默认值为 2。 | +| max_num_merge | int | RDO 中的合并候选数(1 或 2)。1:提高编码性能,2:提高编码图像的质量。默认值为 2。 | +| enable_constrained_intra_prediction | int | 启用受限帧内预测。如果设置为 1,则启用;如果设置为 0,则禁用。默认值为 0。 | +| enable_wp | int | 启用加权预测。默认值为 1。 | +| disable_deblocking | int | 如果设置为 1,则禁用去块滤波器。如果设置为 0,则保持启用。默认值为 0。 | +| offset_tc | int | deblocking 滤波器的 Alpha/Tc 偏移。默认值为 0。 | +| offset_beta | int | deblocking 滤波器的 Beta 偏移。默认值为 0。 | +| enable_cross_slice_boundary | int | 启用帧内循环滤波中的跨切片边界滤波。默认值为 0。 | +| enable_nr | int | 启用降噪。默认值为 1。 | +| h264_params | BmVpuEncH264Params | H.264 编码器参数。(union,从 *h264_params* 和 *h265_params* 中选择一个) | +| h265_params | BmVpuEncH265Params | H.265 编码器参数。(union,从 *h264_params* 和 *h265_params* 中选择一个) | +| soc_idx | int | 仅用于 PCIe 模式。对于 SOC 模式,此值必须为 0。默认值为 0。 | +| gop_preset | BMVpuEncGopPreset | GOP 结构预设选项。
1:全部为 I帧,gopsize = 1;
2:I-P-P,连续 P,循环 gopsize = 1;
3:I-B-B-B,连续 B,循环 gopsize = 1;
4:I-B-P-B-P,gopsize = 2;
5:I-B-B-B-P,gopsize = 4;
6:I-P-P-P-P,连续 P,循环 gopsize = 4;
7:I-B-B-B-B,连续 B,循环 gopsize = 4;
8:随机访问,I-B-B-B-B-B-B-B-B,循环 gopsize = 8。低延迟情况为 1、2、3、6、7。默认值为 5。 | +| intra_period | int | GOP 大小内的帧内图片周期。默认值为 28。 | +| bg_detection | int | 启用背景检测。默认值为 0。 | +| mb_rc | int | 启用 MB 级/CU 级码率控制。默认值为 1。 | +| delta_qp | int | 码率控制的最大 delta QP。默认值为 5。 | +| min_qp | int | 码率控制的最小 QP。默认值为 8。 | +| max_qp | int | 码率控制的最大 QP。默认值为 51。 | +| roi_enable | int | ROI 编码标志。默认值为 0。 | +| cmd_queue_depth | int | 设置命令队列深度,默认值为4,取值范围[1, 4] | +| timeout | int | 编码超时时间,默认为1000ms(即VPU_WAIT_TIMEOUT) | +| timeout_count | int | 编码超时重试次数,默认为40(即VPU_MAX_TIMEOUT_COUNTS) | +| buffer_alloc_func | BmVpuEncBufferAllocFunc | 缓冲区内存分配函数接口 | +| buffer_free_func | BmVpuEncBufferFreeFunc | 缓冲区内存释放函数接口 | +| buffer_context | void* | 缓冲区上下文信息 | + +### 4. BmVpuEncInitialInfo + +- 初始编码信息,由编码器生成。这个结构体对于实际开始编码至关重要,因为它包含了创建和注册足够的帧缓冲区所需的所有信息。 + +| 成员变量 | 类型 | 描述 | +| --------------------- | ----------- | --------------------------------------------------------- | +| min_num_rec_fb | uint32_t | 最小推荐帧缓冲区数量,分配少于此数量可能会影响编码质量。 | +| min_num_src_fb | uint32_t | 输入 YUV 数据帧的最小数量,分配少于此数量可能会影响编码。 | +| framebuffer_alignment | uint32_t | 物理帧缓冲区地址的对齐要求。 | +| rec_fb | BmVpuFbInfo | 用于重建的帧缓冲区大小信息。包括宽度、高度等信息。 | +| src_fb | BmVpuFbInfo | 输入 YUV 数据的宽高信息。 | + +### 5. BmCustomMapOpt + +- 自定义映射选项(H.265编码器) + +| 成员变量 | 类型 | 描述 | +| --------------------- | ----------------- | ----------------------------------------------------------- | +| roiAvgQp | int | ROI 映射的平均 QP | +| customRoiMapEnable | int | 是否开启 ROI 映射 | +| customLambdaMapEnable | int | 是否开启 Lambda 映射 | +| customModeMapEnable | int | 是否指定 CTU 使用帧间编码,否则跳过 | +| customCoefDropEnable | int | 对于每个 CTU,是否设置 TQ 系数为全0,系数全0的 CTU 将被丢弃 | +| addrCustomMap | bmvpu_phys_addr_t | 自定义映射缓冲区的起始地址 | + +### 6. BmVpuEncParams + +- 用于配置编码器的运行时参数,__影响每次编码操作的具体行为。__ + +| 成员变量 | 类型 | 描述 | +| --------------------- | --------------------------- | --------------------------------------------------------------------------------------------------------------------------- | +| skip_frame | int | 默认值为 0,禁用跳帧生成。如果设置为 1,则 VPU 忽略给定的原始帧,而生成一个“跳帧”,它是前一帧的复制。这个跳帧被编码为 P 帧。 | +| forcePicTypeEnable | int | 是否强制指定编码帧类型 | +| forcePicType | int | 强制指定的编码帧类型(I帧、P帧、B帧、IDR帧、CRA帧),只有当 *forcePicTypeEnable* 为1时有效 | +| acquire_output_buffer | BmVpuEncAcquireOutputBuffer | 用于获取输出缓冲区的函数。 | +| finish_output_buffer | BmVpuEncFinishOutputBuffer | 用于释放输出缓冲区的函数。 | +| output_buffer_context | void* | 传递给上述函数的用户提供的值。 | +| customMapOpt | BmCustomMapOpt* | 指向自定义映射选项的指针。 | + +### 7. BmVpuEncoder + +- 具体的编码器实例,会接收BmVpuEncOpenParams的参数信息 + +| 成员变量 | 类型 | 描述 | +| --------------------- | ------------------- | ---------------------------------------------------------------------------------------------------- | +| handle | void* | 编码器句柄。 | +| soc_idx | int | Sophon SoC 的索引。对于 PCIE 模式,请参考 /dev/bm-sophonxx 中的编号。对于 SOC 模式,请将其设置为零。 | +| core_idx | int | 所有 Sophon SoC 中 VPU 编码器core的统一索引。 | +| codec_format | BmVpuCodecFormat | 编码器使用的视频编解码格式。 | +| color_format | BmVpuEncPixFormat | 传入帧使用的图像格式。 | +| frame_width | uint32_t | 传入帧的宽度(以像素为单位)。 | +| frame_height | uint32_t | 传入帧的高度(以像素为单位)。 | +| fps_n | uint32_t | 帧率的分子。 | +| fps_d | uint32_t | 帧率的分母。 | +| first_frame | int | 是否为第一帧。 | +| rc_enable | int | 是否启用码率控制。 | +| cqp | int | 在禁用码率控制时,使用恒定的量化参数 QP。 | +| work_dmabuffer | BmVpuEncDMABuffer* | 用于编码器工作的 DMA 缓冲区。 | +| bs_dmabuffer | BmVpuEncDMABuffer* | 用于存储码流的 DMA 缓冲区。 | +| bs_virt_addr | unsigned long long | 码流的虚拟地址。 | +| bs_phys_addr | bmvpu_phys_addr_t | 码流的物理地址。 | +| num_framebuffers | uint32_t | 帧缓冲区的数量。 | +| internal_framebuffers | void* | 编码器内部的帧缓冲区。 | +| framebuffers | BmVpuFramebuffer* | 帧缓冲区。 | +| buffer_mv | BmVpuEncDMABuffer* | 用于存储运动矢量的 DMA 缓冲区。 | +| buffer_fbc_y_tbl | BmVpuEncDMABuffer* | 用于存储 FBC 亮度表的 DMA 缓冲区。 | +| buffer_fbc_c_tbl | BmVpuEncDMABuffer* | 用于存储 FBC 色度表的 DMA 缓冲区。 | +| buffer_sub_sam | BmVpuEncDMABuffer* | 用于 ME 的子采样 DMA 缓冲区。 | +| headers_rbsp | uint8_t* | 帧头 RBSP 数据。 | +| headers_rbsp_size | size_t | 帧头 RBSP 数据的大小。 | +| initial_info | BmVpuEncInitialInfo | 编码器的初始信息。 | +| timeout | int | 编码超时时间,默认为1000ms(即VPU_WAIT_TIMEOUT) | +| timeout_count | int | 编码超时重试次数,默认为40(即VPU_MAX_TIMEOUT_COUNTS) | +| video_enc_ctx | void* | 编码上下文信息,内部使用 | + +### 8. BmVpuFbInfo + +- 与 `bmvpu_calc_framebuffer_sizes()` 一起使用,用于计算帧缓冲区的大小 + +| 成员变量 | 类型 | 描述 | +| -------- | ---- | --------------------------------------------------------------------- | +| width | int | 帧的宽度,按照 VPU 要求的 16 像素边界对齐。 | +| height | int | 帧的高度,按照 VPU 要求的 16 像素边界对齐。 | +| y_stride | int | 对齐后的 Y 分量的跨距大小,以字节为单位。 | +| c_stride | int | 对齐后的 Cb 和 Cr 分量的跨距大小,以字节为单位(可选)。 | +| y_size | int | Y 分量的 DMA 缓冲区大小,以字节为单位。 | +| c_size | int | Cb 和 Cr 分量的 DMA 缓冲区大小,以字节为单位。 | +| size | int | 帧缓冲区 DMA 缓冲区的总大小,以字节为单位。这个值包括所有通道的大小。 | + +### 9. BmVpuEncodedFrame + +- 编码帧的详细信息 + +| 成员变量 | 类型 | 描述 | +| ------------------- | ----------------- | --------------------------------------------------------------------------------------------------------------------------- | +| data | uint8_t* | 在解码时,data 必须指向包含码流数据的内存块,编码器不使用。 | +| data_size | size_t | 编码数据的大小。在编码时,由编码器设置,表示获取的输出块的大小,以字节为单位。 | +| frame_type | BmVpuEncFrameType | 编码帧的帧类型(I、P、B 等)。由编码器填充。仅由编码器使用。 | +| acquired_handle | void* | 在编码时由用户定义的 **acquire_output_buffer** 函数生成的句柄。仅由编码器使用。 | +| context | void* | 用户定义的指针。编码器不会更改此值。这个指针和相应原始帧的指针将具有相同的值,在编码器中传递。 | +| pts | uint64_t | 用户定义的显示时间戳(Presentation Timestamp)。与 *context* 指针一样,编码器只是将其传递到关联的原始帧,并不实际更改其值。 | +| dts | uint64_t | 用户定义的解码时间戳(Decoding Timestamp)。与 *pts* 指针一样,编码器只是将其传递到关联的原始帧,并不实际更改其值。 | +| src_idx | int | 原始帧的索引。 | +| u64CustomMapPhyAddr | bmvpu_phys_addr_t | 用户自定义映射选项的起始地址 | +| avg_ctu_qp | int | 平均 CTU QP(Quantization Parameter)。 | + +### 10. BmVpuEncDMABuffer + +- 保存 YUV 数据的物理内存. + +| 成员变量 | 类型 | 描述 | +| ------------ | ------------ | ------------------------ | +| size | unsigned int | 物理内存的大小 | +| phys_addr | uint64_t | 物理内存的地址 | +| virt_addr | uint64_t | 物理内存mmap后的虚拟地址 | +| enable_cache | int | 是否开启cache | + +### 11. BmVpuRawFrame + +- 结构体包含了关于原始、未压缩帧的详细信息 + +| 成员变量 | 类型 | 描述 | +| ----------- | ----------------- | --------------------------------------------------------------------------------------------------------------------------- | +| framebuffer | BmVpuFramebuffer* | 原始帧的帧缓冲区。 | +| context | void* | 用户定义的指针。编码器不会更改此值。这个指针和相应编码帧的指针将具有相同的值,在编码器中传递。 | +| pts | uint64_t | 用户定义的显示时间戳(Presentation Timestamp)。与 *context* 指针一样,编码器只是将其传递到关联的编码帧,并不实际更改其值。 | +| dts | uint64_t | 用户定义的解码时间戳(Decoding Timestamp)。与 *pts* 指针一样,编码器只是将其传递到关联的编码帧,并不实际更改其值。 | + +### 12. BmVpuFramebuffer + +- 帧缓冲区的相关信息,用于容纳视频帧的像素数据,同时用于编码和解码。 + +| 成员变量 | 类型 | 描述 | +| -------------- | ------------------ | ---------------------------------------------------------------------------------------------------------- | +| dma_buffer | BmVpuEncDMABuffer* | 保存 YUV 数据的物理内存 | +| myIndex | int | YUV 索引,用户设置,用于释放 YUV 数据 | +| y_stride | unsigned int | Y 通道对齐后的大小 | +| cbcr_stride | unsigned int | UV 通道对齐后的大小 | +| width | unsigned int | 编码 YUV 图像的宽 | +| height | unsigned int | 编码 YUV 图像的高 | +| y_offset | size_t | Y 通道 offset。相对于缓冲区起始位置,指定每个分量的起始偏移量。以字节为单位指定。 | +| cb_offset | size_t | U 通道 offset。 | +| cr_offset | size_t | V 通道 offset。 | +| already_marked | int | 如果帧缓冲区已在编码器中标记为已使用,则设置为 1。仅供内部使用。不要从外部读取或写入。 | +| internal | void* | 内部实现定义的数据。不要修改。 | +| context | void* | 用户定义的指针,编码器不会修改此值。用法由用户决定,例如,可以用于标识在编码器中包含该帧的帧缓冲区的序号。 | + +
+ +## bm_video Encode API +- bmvpu_enc_error_string +- bmvpu_enc_get_core_idx +- bmvpu_enc_load +- bmvpu_enc_unload +- bmvpu_enc_get_bitstream_buffer_info +- bmvpu_enc_set_default_open_params +- bmvpu_fill_framebuffer_params +- bmvpu_enc_open +- bmvpu_enc_close +- bmvpu_enc_encode +- bmvpu_enc_dma_buffer_allocate +- bmvpu_enc_dma_buffer_deallocate +- bmvpu_enc_dma_buffer_attach +- bmvpu_enc_dma_buffer_deattach +- bmvpu_dma_buffer_map +- bmvpu_dma_buffer_unmap +- bmvpu_enc_dma_buffer_flush +- bmvpu_enc_dma_buffer_invalidate +- bmvpu_enc_dma_buffer_get_physical_address +- bmvpu_enc_dma_buffer_get_size +- bmvpu_enc_upload_data +- bmvpu_enc_download_data + +### 1. bmvpu_enc_error_string + +[功能和说明] +- 返回编码错误码的具体描述 + +[函数名] + +`char const * bmvpu_enc_error_string(BmVpuEncReturnCodes code)` + +[参数说明] +- `BmVpuEncReturnCodes code` 编码错误码 + +### 2. bmvpu_enc_get_core_idx + +[功能和说明] +- 在指定的Sophon SoC上,获取VPU编码器core的唯一索引。 + +[函数名] + +`int bmvpu_enc_get_core_idx(int soc_idx)` + +[参数说明] +- `int soc_idx` 设备索引号 + +### 3. bmvpu_enc_load + +[功能和说明] +__加载Sophon设备上的视频处理单元(VPU)的编码模块。__ +- unload()和load()的调用次数要一致。 +- 在对编码器执行任何其他操作之前,必须先加载(load)编码器。 +- 同样,在完成所有编码器活动之前,不得卸载(unload)编码器,包括打开编码器实例。 + +[函数名] + +`int bmvpu_enc_load(int soc_idx)` + +[参数说明] +- `int soc_idx` 设备索引号 + +### 4. bmvpu_enc_unload + +[功能和说明] +- 卸载(unload)编码器 + +[函数名] + +`int bmvpu_enc_unload(int soc_idx)` + +[参数说明] +- `int soc_idx` 设备索引号 + +### 5. bmvpu_enc_get_bitstream_buffer_info + +[功能和说明] +- 该函数得到编码器所需的 bitstream buffer 的大小 (size) 和所需要的 对齐(alignment)值。 +- 返回编码器的码流缓冲区所需的物理内存块的对齐方式和大小。 +- 用户必须分配至少此大小的 DMA 缓冲区,并且必须根据对齐值对其物理地址进行对齐。 +- __需要在bmvpu_enc_open()之前调用__ + +[函数名] + +`void bmvpu_enc_get_bitstream_buffer_info(size_t *size, uint32_t *alignment)` + +[参数说明] +- `size_t *size` 码流缓冲区所需的物理内存块的大小 +- `uint32_t *alignment` 码流缓冲区所需的物理内存块的对齐方式 + +### 6. bmvpu_enc_set_default_open_params + +[功能和说明] +- 设置编码器的默认变量,用于编码器初始化时传递参数 +- 如果调用方只想修改几个成员变量(或者不做修改),可以调用此函数 +- __需要在 bmvpu_enc_open 之前调用__ + +[函数名] + +`void bmvpu_enc_set_default_open_params(BmVpuEncOpenParams *open_params, BmVpuCodecFormat codec_format)` + +[参数说明] +- `BmVpuEncOpenParams *open_params` 用于返回编码器的参数 +- `BmVpuCodecFormat codec_format` 编码器选择,h264 或 h265 + +### 7. bmvpu_fill_framebuffer_params + +[功能和说明] +- 根据传入的 *fb_info* 填充 *BmVpuFramebuffer* 结构中的参数 +- 可以在此指定帧缓冲区及上下文信息 + +[函数名] + +`int bmvpu_fill_framebuffer_params(BmVpuFramebuffer *framebuffer, BmVpuFbInfo *fb_info, BmVpuEncDMABuffer *fb_dma_buffer, int fb_id, void *context)` + +[参数说明] +- `BmVpuFramebuffer *framebuffer` 需要填充的帧缓冲区信息 +- `BmVpuFbInfo *fb_info` 从编码器获取的初始化信息,包含所需的帧缓冲区的最小个数及大小 +- `BmVpuEncDMABuffer *fb_dma_buffer` 分配给帧缓冲区的保存 YUV 数据的物理内存 +- `int fb_id` 用户设置的 YUV 数据索引 +- `void *context` 用户设置的上下文信息 + +### 8. bmvpu_enc_open + +[功能和说明] +- 打开一个新的编码器实例, 设置编码器参数并开始接收视频帧 +- __`BmVpuEncOpenParams *open_params` 和 `BmVpuDMABuffer *bs_dmabuffer` 必须不为空__ + +[函数名] + +`int bmvpu_enc_open(BmVpuEncoder **encoder, BmVpuEncOpenParams *open_params, BmVpuDMABuffer *bs_dmabuffer, BmVpuEncInitialInfo *initial_info)` + +[参数说明] +- `BmVpuEncoder **encoder` 指向编码器实例的二级指针,接收编码器的属性和视频帧的部分设置, 例如设备 id、缓冲区设置和帧率、宽高等 +- `BmVpuEncOpenParams *open_params` 编码器各项参数 +- `BmVpuDMABuffer *bs_dmabuffer` 指向码流缓冲区的指针,使用之前已经分配的码流缓冲区 +- `BmVpuEncInitialInfo *initial_info` 编码器的初始化信息,返回给用户编码器需要的帧缓冲区最小个数和大小 + +### 9. bmvpu_enc_close + +[功能和说明] +- 关闭编码器实例 +- 多次尝试关闭同一实例会导致未定义的行为 + +[函数名] + +`int bmvpu_enc_close(BmVpuEncoder *encoder)` + +[参数说明] +- `BmVpuEncoder *encoder` 视频编码器实例 + +### 10. bmvpu_enc_encode + +[功能和说明] +- 使用给定的编码参数对给定的原始输入帧进行编码。*encoded_frame* 填充有关于所得到的编码输出帧的信息。 +- 编码的帧数据本身被存储在由用户提供的函数(在 *encoding_params* 中被设置为 *acquire_output_buffer* 和 *finish_output_buffer* 函数指针)分配的缓冲区中 + +[函数名] + +`int bmvpu_enc_encode(BmVpuEncoder *encoder, BmVpuRawFrame const *raw_frame, BmVpuEncodedFrame *encoded_frame, BmVpuEncParams *encoding_params, uint32_t *output_code)` + +[参数说明] +- `BmVpuEncoder *encoder` 视频编码器实例 +- `BmVpuRawFrame const *raw_frame` 原始视频帧,包括帧数据、时间戳等。 +- `BmVpuEncodedFrame *encoded_frame` 编码后的视频帧,包括帧数据、帧类型、时间戳等。 +- `BmVpuEncParams *encoding_params` 用于编码的参数 +- `uint32_t *output_code` 返回输出状态代码 + +
+ +### 11. bmvpu_enc_dma_buffer_allocate + +[功能和说明] +- 根据用户指定的size分配设备内存。 + +[函数名] + +`int bmvpu_enc_dma_buffer_allocate(int vpu_core_idx, BmVpuEncDMABuffer *buf, unsigned int size)` + +[参数说明] +- `int vpu_core_idx` 输入参数,指定编码器所在core的索引 +- `BmVpuEncDMABuffer *buf` 输出参数,函数执行后,将会填充该结构体的 *phys_addr*、*size*、*enable_cache* 成员变量 +- `unsigned int size` 输入参数,以字节为单位,指定需要的缓冲区大小 + +[返回值] +- 返回BM_SUCESS(=0)表示分配成功 ,否则分配失败 + +### 12. bmvpu_enc_dma_buffer_deallocate + +[功能和说明] +- 释放由 **bmvpu_enc_dma_buffer_allocate** 函数分配的设备内存。 + +[函数名] + +`int bmvpu_enc_dma_buffer_deallocate(int vpu_core_idx, BmVpuEncDMABuffer *buf)` + +[参数说明] +- `int vpu_core_idx` 输入参数,指定编码器所在core的索引 +- `BmVpuDMABuffer *buf` 输入参数,调用前用户必须要填充该结构体的 *phys_addr*、*size*、*virt_addr* 成员变量 + +[返回值] +- 返回BM_SUCESS(=0)表示执行成功 ,否则执行失败 + +### 13. bmvpu_enc_dma_buffer_attach + +[功能和说明] +- 将用户通过 **bmvpu_enc_dma_buffer_allocate** 函数以外的其它方式申请的设备内存地址绑定至编码器。 + +[函数名] + +`int bmvpu_enc_dma_buffer_attach(int vpu_core_idx, uint64_t paddr, unsigned int size)` + +[参数说明] +- `int vpu_core_idx` 输入参数,指定编码器所在core的索引 +- `uint64_t paddr` 输入参数,由用户通过 **bmvpu_enc_dma_buffer_allocate** 函数以外的其它方式申请的设备内存地址 +- `unsigned int size` 输入参数,该块设备内存大小(byte) + +[返回值] +- 返回BM_SUCESS(=0)表示执行成功 ,否则执行失败 + +### 14. bmvpu_enc_dma_buffer_deattach + +[功能和说明] +- 将用户通过 **bmvpu_enc_dma_buffer_attach** 函数绑定的设备内存解绑。 + +[函数名] + +`int bmvpu_enc_dma_buffer_deattach(int vpu_core_idx, uint64_t paddr, unsigned int size)` + +[参数说明] +- `int vpu_core_idx` 输入参数,指定编码器所在core的索引 +- `uint64_t paddr` 输入参数,由用户通过 **bmvpu_enc_dma_buffer_attach** 函数绑定的设备内存物理地址 +- `unsigned int size` 输入参数,该块设备内存大小(byte) + +[返回值] +- 返回BM_SUCESS(=0)表示执行成功 ,否则执行失败 + +### 15. bmvpu_dma_buffer_map + +[功能和说明] +- 将对应core上申请的设备内存映射到系统内存。 + +[函数名] + +`int bmvpu_dma_buffer_map(int vpu_core_idx, BmVpuEncDMABuffer *buf, int port_flag)` + +[参数说明] +- `int vpu_core_idx` 输入参数,指定编码器所在core的索引 +- `BmVpuEncDMABuffer *buf` 输入参数,指定设备内存的地址、大小等信息 +- `int port_flag` 输入参数,配置 **mmap** 内存可读(*BM_VPU_MAPPING_FLAG_READ*)或可写(*BM_VPU_MAPPING_FLAG_WRITE*) + +[返回值] +- 返回BM_SUCESS(=0)表示执行成功 ,否则执行失败 + +### 16. bmvpu_dma_buffer_unmap + +[功能和说明] +- 对某个core上映射过的设备内存解除映射 + +[函数名] + +`int bmvpu_dma_buffer_unmap(int vpu_core_idx, BmVpuEncDMABuffer *buf)` + +[参数说明] +- `int vpu_core_idx` 输入参数,指定编码器所在core的索引 +- `BmVpuEncDMABuffer *buf` 输入参数,指定设备内存的地址、大小等信息 + +[返回值] +- 返回BM_SUCESS(=0)表示执行成功 ,否则执行失败 + +### 17. bmvpu_enc_dma_buffer_flush + +[功能和说明] +- 对已分配的设备内存进行flush操作。 + +[函数名] + +`int bmvpu_enc_dma_buffer_flush(int vpu_core_idx, BmVpuEncDMABuffer *buf)` + +[参数说明] +- `int vpu_core_idx` 输入参数,指定编码器所在core的索引 +- `BmVpuEncDMABuffer *buf` 输入参数,调用前用户至少要填充该结构体的 *phys_addr*、*size* 成员变量 + +[返回值] +- 返回BM_SUCESS(=0)表示执行成功 ,否则执行失败 + +### 18. bmvpu_enc_dma_buffer_invalidate + +[功能和说明] +- 对已分配的设备内存进行invalid操作。 + +[函数名] + +`int bmvpu_enc_dma_buffer_invalidate(int vpu_core_idx, BmVpuEncDMABuffer *buf)` + +[参数说明] +- `int vpu_core_idx` 输入参数,指定编码器所在core的索引 +- `BmVpuEncDMABuffer *buf` 输入参数,调用前用户至少要填充该结构体的 *phys_addr*、*size* 成员变量 + +[返回值] +- 返回BM_SUCESS(=0)表示执行成功 ,否则执行失败 + +### 19. bmvpu_enc_dma_buffer_get_physical_address + +[功能和说明] +- 返回已分配的设备内存的地址。 + +[函数名] + +`uint64_t bmvpu_enc_dma_buffer_get_physical_address(BmVpuEncDMABuffer *buf)` + +[参数说明] +- `BmVpuEncDMABuffer *buf` 输入参数,已分配的设备内存 + +[返回值] +- 已分配的设备内存的物理地址 + +### 20. bmvpu_enc_dma_buffer_get_size + +[功能和说明] +- 返回已分配的设备内存的大小。 + +[函数名] + +`unsigned int bmvpu_enc_dma_buffer_get_size(BmVpuEncDMABuffer *buf)` + +[参数说明] +- `BmVpuDMABuffer *buf` 输入参数 + +[返回值] +- 已分配的设备内存的大小 + +### 21. bmvpu_enc_upload_data + +[功能和说明] +- 向使用 **bmvpu_enc_dma_buffer_allocate()** 分配的设备内存地址传输数据。 + +[函数名] + +`int bmvpu_enc_upload_data(int vpu_core_idx, const uint8_t* host_va, int host_stride, uint64_t vpu_pa, int vpu_stride, int width, int height)` + +[参数说明] +- `int vpu_core_idx` 输入参数,指定编码器所在core的索引 +- `const uint8_t* host_va` 输入参数, 待传输数据的host端虚拟地址 +- `int host_stride` 输入参数,host端的数据跨距 +- `uint64_t vpu_pa` 输入参数,传输数据的目标物理地址 +- `int vpu_stride` 输入参数,device端的数据跨距 +- `int width` 输入参数,数据宽度 +- `int height` 输入参数,数据高度 + +[返回值] +- 返回BM_SUCESS(=0)表示执行成功 ,否则执行失败 + +### 22. bmvpu_enc_download_data + +[功能和说明] +- 从 **bmvpu_enc_dma_buffer_allocate()** 分配的设备内存地址向host端传输数据。 + +[函数名] + +`int bmvpu_enc_download_data(int vpu_core_idx, uint8_t* host_va, int host_stride, uint64_t vpu_pa, int vpu_stride, int width, int height)` + +[参数说明] +- `int vpu_core_idx` 输入参数,指定编码器所在core的索引 +- `const uint8_t* host_va` 输入参数, 传输数据的目标地址 +- `int host_stride` 输入参数,host端的数据跨距 +- `uint64_t vpu_pa` 输入参数,传输数据的物理地址 +- `int vpu_stride` 输入参数,device端的数据跨距 +- `int width` 输入参数,数据宽度 +- `int height` 输入参数,数据高度 + +[返回值] +- 返回BM_SUCESS(=0)表示执行成功 ,否则执行失败 + +# 三、 JPU通用结构体 + +## JPU枚举类型 + +- BmJpuLogLevel +- BmJpuImageFormat +- BmJpuColorFormat +- BmJpuChromaFormat +- BmJpuRotateAngle +- BmJpuMirrorDirection + +### 1. BmJpuLogLevel + +| 枚举变量 | 描述 | +| ------------------------ | --------------- | +| BM_JPU_LOG_LEVEL_ERROR | 日志等级ERROR | +| BM_JPU_LOG_LEVEL_WARNING | 日志等级WARNING | +| BM_JPU_LOG_LEVEL_INFO | 日志等级INFO | +| BM_JPU_LOG_LEVEL_DEBUG | 日志等级DEBUG | +| BM_JPU_LOG_LEVEL_LOG | 日志等级LOG | +| BM_JPU_LOG_LEVEL_TRACE | 日志等级TRACE | + +### 2. BmJpuImageFormat + +| 枚举变量 | 描述 | +| --------------------------- | ---------------------------- | +| BM_JPU_IMAGE_FORMAT_YUV420P | YUV 4:2:0 planar | +| BM_JPU_IMAGE_FORMAT_YUV422P | YUV 4:2:2 planar | +| BM_JPU_IMAGE_FORMAT_YUV444P | YUV 4:4:4 planar | +| BM_JPU_IMAGE_FORMAT_NV12 | YUV 4:2:0 NV12 | +| BM_JPU_IMAGE_FORMAT_NV21 | YUV 4:2:0 NV21 | +| BM_JPU_IMAGE_FORMAT_NV16 | YUV 4:2:2 NV16 | +| BM_JPU_IMAGE_FORMAT_NV61 | YUV 4:2:2 NV61 | +| BM_JPU_IMAGE_FORMAT_GRAY | YUV 4:0:0 | +| BM_JPU_IMAGE_FORMAT_RGB | RGB 8:8:8 packed, for opencv | + +### 3. BmJpuColorFormat + +| 枚举变量 | 描述 | +| ------------------------------------- | ---------------------------------------------------- | +| BM_JPU_COLOR_FORMAT_YUV420 | YUV 4:2:0 format | +| BM_JPU_COLOR_FORMAT_YUV422_HORIZONTAL | YUV 4:2:2 format | +| BM_JPU_COLOR_FORMAT_YUV422_VERTICAL | YUV 2:2:4 format
(JPU中定义的一种格式,很少使用) | +| BM_JPU_COLOR_FORMAT_YUV444 | YUV 4:4:4 format | +| BM_JPU_COLOR_FORMAT_YUV400 | YUV 4:0:0 format | + +### 4. BmJpuChromaFormat + +| 枚举变量 | 描述 | +| ------------------------------------ | -------------------------------------------------------- | +| BM_JPU_CHROMA_FORMAT_CBCR_SEPARATED | Cb、Cr分量非交织,分别存放在不同的通道 | +| BM_JPU_CHROMA_FORMAT_CBCR_INTERLEAVE | Cb、Cr分量交织,存放在一个通道中交错排列,Cb在前,Cr在后 | +| BM_JPU_CHROMA_FORMAT_CRCB_INTERLEAVE | Cb、Cr分量交织,存放在一个通道中交错排列,Cr在前,Cb在后 | + +### 5. BmJpuRotateAngle + +| 枚举变量 | 描述 | +| ------------------ | --------------- | +| BM_JPU_ROTATE_NONE | 不做旋转 | +| BM_JPU_ROTATE_90 | 逆时针旋转90度 | +| BM_JPU_ROTATE_180 | 逆时针旋转180度 | +| BM_JPU_ROTATE_270 | 逆时针旋转270度 | + +### 6. BmJpuMirrorDirection + +| 枚举变量 | 描述 | +| --------------------- | ------------------ | +| BM_JPU_MIRROR_NONE | 不做镜像 | +| BM_JPU_MIRROR_VER | 竖直方向镜像 | +| BM_JPU_MIRROR_HOR | 水平方向镜像 | +| BM_JPU_MIRROR_HOR_VER | 水平和竖直方向镜像 | + +## JPU通用结构体 + +- BmJpuFramebuffer +- BmJpuFramebufferSizes +- BmJpuRawFrame + +### 1. BmJpuFramebuffer + +| 成员变量 | 类型 | 描述 | +| -------------- | ---------------- | ---------------------------------------------------------------------------------- | +| y_stride | unsigned int | Y 分量的步幅(Stride),单位: byte。 | +| cbcr_stride | unsigned int | Cb 和 Cr 分量的步幅(Stride),单位: byte。 | +| dma_buffer | bm_device_mem_t* | 用于存放 YUV 数据的一块设备内存,由 bmlib 分配。 | +| y_offset | size_t | Y 分量起始地址相对于 dma_buffer 中物理地址的偏移量,单位: byte。 | +| cb_offset | size_t | Cb 分量起始地址相对于 dma_buffer 中物理地址的偏移量,单位: byte。 | +| cr_offset | size_t | Cr 分量起始地址相对于 dma_buffer 中物理地址的偏移量,单位: byte。 | +| context | void* | 用户定义的指针,库不会更改此值。使用方法由用户决定,例如:用于保存解码上下文信息。 | +| already_marked | int | 如果帧缓冲区已标记为已显示,则设置为 1 。仅供内部使用,请不要在外部修改。 | +| internal | void* | 内部定义的数据。请不要修改。 | + +### 2. BmJpuFramebufferSizes + +| 成员变量 | 类型 | 描述 | +| -------------------- | ---------------- | ----------------------------------------------------------------------------------------------------------- | +| aligned_frame_width | unsigned int | 帧的宽度(单位: pixel),按照 64 像素边界对齐。 | +| aligned_frame_height | unsigned int | 帧的高度(单位: pixel),按照 16 像素边界对齐。 | +| y_stride | unsigned int | Y 分量的跨度大小(单位: byte),包含了对齐要求。 | +| cbcr_stride | unsigned int | Cb 和 Cr 分量的跨度大小(单位: byte),包含了对齐要求。Cb和Cr分量始终使用相同的跨度,因此它们共享相同的值。 | +| y_size | unsigned int | Y 分量的 DMA 缓冲区大小(单位: byte)。 | +| cbcr_size | unsigned int | Cb 和 Cr 分量的 DMA 缓冲区大小(单位: byte)。Cb和Cr分量始终使用相同的大小,因此它们共享相同的值。 | +| total_size | unsigned int | 帧缓冲区 DMA 缓冲区的总大小,包括所有分量的大小、对齐和填充字节。 | +| image_format | BmJpuImageFormat | 帧的图像格式,可选项请参考 **BmJpuImageFormat** 定义。 | + +### 3. BmJpuRawFrame + +| 成员变量 | 类型 | 描述 | +| ----------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| framebuffer | BmJpuFramebuffer* | 在解码时,指向包含解码后的原始帧的帧缓冲区。在编码时,指向包含要编码的原始帧的帧缓冲区。 | +| context | void* | 用户定义的指针。库不会修改此值。该指针和相应的编码帧中的context指向相同的地址,库会将它们传递给用户。使用方法由用户决定,例如:可以用于标识与此编码帧关联的原始帧。 | +| pts | uint64_t | 用户定义的时间戳(PTS - Presentation Time Stamp)。
通常用于关联原始/编码帧的时间戳信息。库只是将其传递给关联的编码帧,不会实际更改其值。 | +| dts | uint64_t | 用户定义的时间戳(DTS - Decoding Time Stamp)。
通常用于关联原始/编码帧的时间戳信息。库只是将其传递给关联的编码帧,不会实际更改其值。 | + +
+ +# 四、 jpeg Decode 数据结构 & API说明 + +## jpeg Decode 结构体 + +- BmJpuJPEGDecInfo +- BmJpuJPEGDecoder +- BmJpuDecOpenParams +- BmJpuDecInitialInfo +- BmJpuDecReturnCodes + +### 1. BmJpuJPEGDecInfo + +- JPU帧缓冲区的宽度和高度与内部边界对齐。 +- 帧aligned_frame由实际图像像素actual_frame和额外的填充像素组成。 + +| 成员变量 | 类型 | 描述 | +| -------------------- | ----------------- | --------------------------------------------------------------- | +| aligned_frame_width | unsigned int | 对齐后的宽度,包括额外的填充像素,已对齐到内部边界 | +| aligned_frame_height | unsigned int | 对齐后的高度,包括额外的填充像素,已对齐到内部边界 | +| actual_frame_width | unsigned int | 实际帧宽度,不包括额外的填充像素 | +| actual_frame_height | unsigned int | 实际帧高度,不包括额外的填充像素 | +| y_stride | unsigned int | Y分量的跨度(每行占用的字节数) | +| cbcr_stride | unsigned int | Cr和Cb分量的跨度,通常与Y分量相同。
Cb和Cr的跨度经常是相同的 | +| y_size | unsigned int | Y分量在帧缓冲区中的大小(单位: byte) | +| cbcr_size | unsigned int | Cr和Cb分量在帧缓冲区中的大小(单位: byte) | +| y_offset | unsigned int | Y分量在帧缓冲区中的偏移量(单位: byte) | +| cb_offset | unsigned int | Cr分量在帧缓冲区中的偏移量(单位: byte) | +| cr_offset | unsigned int | Cb分量在帧缓冲区中的偏移量(单位: byte) | +| framebuffer | BmJpuFramebuffer* | 包含解码后的帧像素数据的指针和相关信息 | +| image_format | BmJpuImageFormat | 解码后的帧的图像格式 | +| framebuffer_recycle | bool | 表示是否开启framebuffer_recycle模式 | +| framebuffer_size | size_t | 帧缓冲区的总大小(单位: byte) | + +### 2. BmJpuJPEGDecoder + +| 成员变量 | 类型 | 描述 | +| -------------------------- | --------------------- | -------------------------------------------------------------------------------- | +| decoder | BmJpuDecoder* | 指向内部JPEG解码器的指针 | +| bitstream_buffer | bm_device_mem_t* | 码流缓冲区的指针,用于存储JPEG码流数据。
**bm_device_mem_t: bmlib内存描述符** | +| bitstream_buffer_size | size_t | 码流缓冲区的大小 | +| bitstream_buffer_alignment | unsigned int | 码流缓冲区的内存对齐要求(单位: byte) | +| initial_info | BmJpuDecInitialInfo | 包含JPEG解码器的初始化信息 | +| framebuffers | BmJpuFramebuffer* | 记录解码器中帧缓冲区中各分量的地址偏移及大小 | +| framebuffer_addrs | bm_jpu_phys_addr_t* | 存储解码器中帧缓冲区的设备内存对应的物理地址,由 bmlib 分配 | +| framebuffer_size | size_t | 记录解码器中帧缓冲区的总内存大小(各缓冲区大小相同) | +| num_framebuffers | unsigned int | 解码器申请的帧缓冲区总帧数。 | +| num_extra_framebuffers | unsigned int | 解码需要的帧缓冲区额外帧数,通常为 0,暂未使用。 | +| calculated_sizes | BmJpuFramebufferSizes | 记录对齐后的帧缓冲区大小信息。 | +| raw_frame | BmJpuRawFrame | 表示原始帧数据,用于存储图像的原始数据和时间戳。 | +| device_index | int | 设备索引,标识使用的解码设备的索引 | +| opaque | void* | 用户自定义数据 | +| rotationEnable | int | 是否启用图像旋转。0 表示不旋转,1 表示旋转。 | +| rotationAngle | BmJpuRotateAngle | 旋转角度。可选项请参考 **BmJpuRotateAngle** 定义 | +| mirrorEnable | int | 是否启用图像镜像。0 表示不镜像,1 表示镜像。 | +| mirrorDirection | BmJpuMirrorDirection | 镜像方向。可选项请参考 **BmJpuMirrorDirection** 定义 | +| framebuffer_recycle | bool | 表示是否开启framebuffer_recycle模式 | +| bitstream_from_user | bool | 表示码流缓冲区的设备内存是否由外部分配 | +| framebuffer_from_user | bool | 表示帧缓冲区的设备内存是否由外部分配 | + +### 3. BmJpuDecOpenParams + +| 成员变量 | 类型 | 描述 | +| ---------------------- | -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| min_frame_width | unsigned int | 解码器能够处理的最小宽度(单位: pixel),设为0表示无限制。 | +| min_frame_height | unsigned int | 解码器能够处理的最小高度(单位: pixel),设为0表示无限制。 | +| max_frame_width | unsigned int | 解码器能够处理的最大宽度(单位: pixel),设为0表示无限制。 | +| max_frame_height | unsigned int | 解码器能够处理的最大高度(单位: pixel),设为0表示无限制。 | +| color_format | BmJpuColorFormat | 解码器输出的YUV格式,**注意:BM1684/BM1684X上不支持该设置**。 | +| chroma_interleave | BmJpuChromaFormat | 色度分量存储方式,可选项请参考 **BmJpuChromaFormat** 定义。 | +| scale_ratio | unsigned int | 缩放比例,用于指定解码输出的缩放级别。0 表示不进行缩放,n (取值1-3) 表示等比缩放为 2^n 倍。 | +| bs_buffer_size | size_t | 用于码流的 DMA 缓冲区大小,这里记录了存储输入图片需要的字节大小。 | +| buffer | uint8_t* | 码流缓冲区的指针。仅在 Windows 环境下使用,在其他环境下已弃用,不建议使用。 | +| device_index | int | 设备索引。 | +| rotationEnable | int | 是否启用图像旋转。0 表示不旋转,1 表示旋转。 | +| rotationAngle | BmJpuRotateAngle | 旋转角度。可选项请参考 **BmJpuRotateAngle** 定义。 | +| mirrorEnable | int | 是否启用图像镜像。0 表示不镜像,1 表示镜像。 | +| mirrorDirection | BmJpuMirrorDirection | 镜像方向。可选项请参考 **BmJpuMirrorDirection** 定义。 | +| roiEnable | int | 是否启用感兴趣区域(ROI)。 | +| roiWidth | int | ROI 的宽度。 | +| roiHeight | int | ROI 的高度。 | +| roiOffsetX | int | ROI 相对图像左上角的水平偏移量。 | +| roiOffsetY | int | ROI 相对图像左上角的垂直偏移量。 | +| framebuffer_recycle | bool | 是否启用framebuffer_recycle模式。如果开启recycle模式,则解码器会使用固定大小的帧缓冲区,当输入码流的分辨率或格式切换时,不会重新申请设备内存。此时,帧缓冲区的大小由用户指定。 | +| framebuffer_size | size_t | 用户指定的帧缓冲区大小,*framebuffer_recycle = 1* 或 *framebuffer_from_user = 1* 时生效,生效时要求该值大于0。 | +| bitstream_from_user | bool | 是否由外部分配码流缓冲区设备内存 | +| bs_buffer_phys_addr | bm_jpu_phys_addr_t | 用户指定的码流缓冲区的设备内存的物理地址,*bitstream_from_user = 1* 时生效。 | +| framebuffer_from_user | bool | 是否由外部分配帧缓冲区设备内存 | +| framebuffer_num | int | 用户指定的帧缓冲区个数,*framebuffer_from_user = 1* 时生效,默认为1 | +| framebuffer_phys_addrs | bm_jpu_phys_addr_t* | 用户指定的帧缓冲区的设备内存的物理地址,*framebuffer_from_user = 1* 时生效,以数组的形式传入,个数由 *framebuffer_num* 指定。 | +| timeout | int | 解码超时时间,默认为2s | +| timeout_count | int | 解码超时重试次数,默认为5 | + +### 4. BmJpuDecInitialInfo + +| 成员变量 | 类型 | 描述 | +| ----------------------------- | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| frame_height | unsigned int | 帧的高度(单位: pixel)。 | +| | | 请注意,这些值不一定对齐到 16 像素边界(JPU 帧缓冲区对齐要求)。这些是实际像素内容的宽度和高度。如果需要对齐,可能会在帧的右侧有填充列和在帧的下方有填充行。 | +| frame_width | unsigned int | 帧的宽度(单位: pixel)。 | +| | | 请注意,这些值不一定对齐到 16 像素边界(JPU 帧缓冲区对齐要求)。这些是实际像素内容的宽度和高度。如果需要对齐,可能会在帧的右侧有填充列和在帧的下方有填充行。 | +| min_num_required_framebuffers | unsigned int | 解码需要的缓冲区最小个数,调用者必须向解码器注册至少该数量的帧缓冲区。 | +| image_format | BmJpuImageFormat | 解码后帧的图像格式。可选项请参考 **BmJpuImageFormat** 定义。 | +| framebuffer_alignment | unsigned int | 帧缓冲区的内存对齐要求(单位: byte)。 | +| roiFrameHeight | int | 感兴趣区域 (ROI) 帧的高度(单位: pixel)。 | +| roiFrameWidth | int | 感兴趣区域 (ROI) 帧的宽度(单位: pixel)。 | + +### 5. BmJpuDecReturnCodes +- 解码器返回代码。除BM_JPU_DEC_RETURN_CODE_OK外,这些都应被视为错误,返回时应关闭解码器。 + +| 错误码 | 返回值 | 描述 | +| ------------------------------------------------ | ------ | ------------------------------------------------------------------------------------------------------------------ | +| BM_JPU_DEC_RETURN_CODE_OK | 0 | 操作成功完成。 | +| BM_JPU_DEC_RETURN_CODE_ERROR | 1 | 通用错误码,用于当其他错误码无法匹配错误时的情况。 | +| BM_JPU_DEC_RETURN_CODE_INVALID_PARAMS | 2 | 输入参数无效。 | +| BM_JPU_DEC_RETURN_CODE_INVALID_HANDLE | 3 | JPU 解码器句柄无效。这是一个内部错误,很可能是JPU库中的BUG,请报告此类错误。 | +| BM_JPU_DEC_RETURN_CODE_INVALID_FRAMEBUFFER | 4 | 帧缓冲区信息无效。通常发生在 *bm_jpu_jpeg_dec_get_info()* 函数中获取的 **BmJpuFramebuffer** 结构包含无效值的情况。 | +| BM_JPU_DEC_RETURN_CODE_INSUFFICIENT_FRAMEBUFFERS | 5 | 使用已注册给解码器的帧缓冲区解码失败,因为未提供足够大小的帧缓冲区。 | +| BM_JPU_DEC_RETURN_CODE_INVALID_STRIDE | 6 | 某个跨度值(例如帧缓冲区的Y跨度值)无效。 | +| BM_JPU_DEC_RETURN_CODE_WRONG_CALL_SEQUENCE | 7 | 在不适当的时间调用函数。这是一个内部错误,很可能是JPU库中的BUG,请报告此类错误。 | +| BM_JPU_DEC_RETURN_CODE_TIMEOUT | 8 | 操作超时。 | +| BM_JPU_DEC_RETURN_CODE_ALREADY_CALLED | 9 | 调用了一个只应在解码会话的持续时间内调用一次的函数。这是一个内部错误,很可能是JPU库中的BUG,请报告此类错误。 | +| BM_JPU_DEC_RETURN_ALLOC_MEM_ERROR | 10 | 分配内存失败。 | +| BM_JPU_DEC_RETURN_OVER_LIMITED | 11 | 超过解码分辨率限制 | + + + +
+ + +## jpeg Decode API +- bm_jpu_dec_load +- bm_jpu_jpeg_dec_open +- bm_jpu_jpeg_dec_decode +- bm_jpu_jpeg_dec_get_info +- bm_jpu_jpeg_dec_frame_finished +- bm_jpu_jpeg_dec_close +- bm_jpu_dec_unload +- bm_jpu_calc_framebuffer_sizes +- bm_jpu_dec_error_string +- bm_jpu_dec_get_bm_handle +- bm_jpu_jpeg_dec_flush + +### 1. bm_jpu_dec_load +[功能和说明] +- 根据传入的 设备ID 打开指定的解码设备节点,可以通过 bmlib 管理内存分配。 + +[函数名] + +`BmJpuDecReturnCodes bm_jpu_dec_load(int device_index)` + +[参数说明] +- `int device_index` 解码设备ID。 + +### 2. bm_jpu_jpeg_dec_open +[功能和说明] +- 初始化一个JPEG解码器实例。 +- 分配额外的帧缓冲区,如果需要的话(例如,用于快速解码多个JPEG图像或者保留解码后的DMA缓冲区)。 +- 分配一个码流缓冲区,用于存储JPEG数据以供解码器使用。 +- 调用 `bm_jpu_dec_open` 来实际打开解码器并传入必要的回调和参数。 +- 如果在任何步骤中出现错误,函数将清理已分配的资源并返回相应的错误码。 + +[函数名] + +`BmJpuDecReturnCodes bm_jpu_jpeg_dec_open(BmJpuJPEGDecoder **jpeg_decoder,BmJpuDecOpenParams *open_params, unsigned int num_extra_framebuffers)` + +[参数说明] +- `BmJpuJPEGDecoder **jpeg_decoder` 指向解码器的二级指针,在接口内部完成初始化,返回给用户一个解码器实例。 +- `BmJpuDecOpenParams *open_params` 指向解码参数的指针,该结构体包含打开解码器时所需的参数,如设备索引、解码帧配置参数等。 +- `unsigned int num_extra_framebuffers` 指示函数分配额外帧缓冲区的数量。这些额外的帧缓冲区用于解码多个JPEG图像或在其他地方保留解码帧的DMA缓冲区。 + +### 3. bm_jpu_jpeg_dec_decode + +[功能和说明] +- 解码JPEG数据。 +- 设置输入JPEG数据块及其大小。 +- 注意,这个解码器只支持基线(Baseline)JPEG数据,不支持渐进式(Progressive)编码。 + +[函数名] + +`BmJpuDecReturnCodes bm_jpu_jpeg_dec_decode(BmJpuJPEGDecoder *jpeg_decoder, uint8_t const *jpeg_data, size_t const jpeg_data_size, int timeout, int timeout_count)` + +[参数说明] +- `BmJpuJPEGDecoder *jpeg_decoder` 指向JPEG解码器实例的指针。 +- `uint8_t const *jpeg_data` 待解码的图像数据。 +- `size_t const jpeg_data_size` 待解码的图像数据大小,单位: byte。 +- `int timeout` 解码超时时间。 +- `int timeout_count` 解码超时重试次数 + +### 4. bm_jpu_jpeg_dec_get_info +[功能和说明] +- 从解码器获取解码信息 + +[函数名] + +`void bm_jpu_jpeg_dec_get_info(BmJpuJPEGDecoder *jpeg_decoder, BmJpuJPEGDecInfo *info)` + +[参数说明] +- `BmJpuJPEGDecoder *jpeg_decoder` 指向JPEG解码器实例的指针。 +- `BmJpuJPEGDecInfo *info` 用来存储解码图像的详细信息的结构体。 + +### 5. bm_jpu_jpeg_dec_frame_finished +[功能和说明] +- 通知解码器一个解码帧已经被处理完毕,并且相关的资源可以被回收。 +- 一旦用户处理完一帧,就必须始终调用此函数,否则JPU无法回收此帧缓冲区,如果解码器内部的帧缓冲区均被占用,将无法继续解码。 + +[函数名] + +`BmJpuDecReturnCodes bm_jpu_jpeg_dec_frame_finished(BmJpuJPEGDecoder *jpeg_decoder, BmJpuFramebuffer *framebuffer)` + +[参数说明] +- `BmJpuJPEGDecoder *jpeg_decoder` 指向JPEG解码器实例的指针。 +- `BmJpuFramebuffer *framebuffer` 包含了已解码的图像数据的帧缓冲区。 + +### 6. bm_jpu_jpeg_dec_close +[功能和说明] +- 用于关闭JPEG解码器实例,并释放资源。 + +[函数名] + +`BmJpuDecReturnCodes bm_jpu_jpeg_dec_close(BmJpuJPEGDecoder *jpeg_decoder)` + +[参数说明] +- `BmJpuJPEGDecoder *jpeg_decoder` 指向JPEG解码器实例的指针。 + +### 7. bm_jpu_dec_unload +[功能和说明] +- 释放指定的解码设备节点 + +[函数名] + +`BmJpuDecReturnCodes bm_jpu_dec_unload(int device_index)` + +[参数说明] +- `int device_index` 解码设备ID + +### 8. bm_jpu_calc_framebuffer_sizes +[功能和说明] +- 用于计算存放解码数据的帧缓冲区的各分量对齐后的跨度、大小等信息 + +[函数名] + +`BmJpuDecReturnCodes bm_jpu_calc_framebuffer_sizes(unsigned int frame_width, unsigned int frame_height, unsigned int framebuffer_alignment, BmJpuImageFormat image_format, BmJpuFramebufferSizes *calculated_sizes)` + +[参数说明] +- `unsigned int frame_width` 图像实际宽度 +- `unsigned int frame_height` 图像实际高度 +- `unsigned int framebuffer_alignment` 解码帧缓冲区内存对齐要求,设为0或1表示不对齐,单位: byte +- `BmJpuImageFormat image_format` 图像格式 +- `BmJpuFramebufferSizes *calculated_sizes` 计算后的帧缓冲区大小信息 + +### 9. bm_jpu_dec_error_string +[功能和说明] +- 返回解码错误码的具体描述 + +[函数名] + +`char const * bm_jpu_dec_error_string(BmJpuDecReturnCodes code)` + +[参数说明] +- `BmJpuDecReturnCodes code` 解码错误码 + +### 10. bm_jpu_dec_get_bm_handle +[功能和说明] +- 获取该解码设备上bmlib的句柄 + +[函数名] + +`bm_handle_t bm_jpu_dec_get_bm_handle(int device_index)` + +[参数说明] +- `int device_index` 解码设备ID + +### 11. bm_jpu_jpeg_dec_flush +[功能和说明] +- 刷新解码器帧缓冲区状态,解码器内部所有帧缓冲区将解除占用,可用于后续解码 + +[函数名] + +`BmJpuDecReturnCodes bm_jpu_jpeg_dec_flush(BmJpuJPEGDecoder *jpeg_decoder)` + +[参数说明] +- `BmJpuJPEGDecoder *jpeg_decoder` 指向JPEG解码器实例的指针。 + + +
+ + +# 五、 jpeg Encode 数据结构 & API说明 + +## jpeg Encode 结构体 +- BmJpuJPEGEncParams +- BmJpuJPEGEncoder +- BmJpuEncInitialInfo +- BmJpuEncReturnCodes + +### 1. BmJpuJPEGEncParams + +| 成员变量 | 类型 | 描述 | +| --------------------- | --------------------------- | ------------------------------------------------------------------------------------------------------------ | +| frame_width | unsigned int | 输入帧的宽度,这是实际大小,这些大小不能为零。如果需要,它们将在内部对齐。 | +| frame_height | unsigned int | 输入帧的高度,这是实际大小,这些大小不能为零。如果需要,它们将在内部对齐。 | +| quality_factor | unsigned int | JPEG编码的图像质量因子。1表示最佳压缩,100表示最佳质量。 | +| image_format | BmJpuImageFormat | 输入帧的图像格式 | +| acquire_output_buffer | BmJpuEncAcquireOutputBuffer | 获取编码码流输出 buffer 的回调函数, 具体定义请参考 **BmJpuEncAcquireOutputBuffer** 。 | +| finish_output_buffer | BmJpuEncFinishOutputBuffer | 释放上述 buffer 的回调函数,具体定义请参考 **BmJpuEncFinishOutputBuffer**。 | +| write_output_data | BmJpuWriteOutputData | 指定编码码流输出方式的回调函数,如:写入文件或写入指定的内存地址。具体定义请参考 **BmJpuWriteOutputData** | +| | | 使用此函数将不会调用 `acquire_output_buffer` 和 `finish_output_buffer` 函数。 | +| output_buffer_context | void* | 保存输出数据的上下文,将传递给 `acquire_output_buffer`、`finish_output_buffer` 和 `write_output_data` 函数。 | +| rotationEnable | int | 是否启用图像旋转。0 表示不旋转,1 表示旋转。 | +| rotationAngle | BmJpuRotateAngle | 旋转角度。可选项请参考 **BmJpuRotateAngle** 定义。 | +| mirrorEnable | int | 是否启用图像镜像。0 表示不镜像,1 表示镜像。 | +| mirrorDirection | BmJpuMirrorDirection | 镜像方向。可选项请参考 **BmJpuMirrorDirection** 定义。 | +| bs_in_device | int | 表示编码的码流数据输出到设备内存还是系统内存,0表示输出到系统内存,1表示输出到设备内存。 | +| timeout | int | 编码超时时间,默认为2s。 | +| timeout_count | int | 编码超时重试次数,默认为5。 | +| bs_buffer_phys_addr | bm_jpu_phys_addr_t | (可选)用户外部分配的码流缓冲区的设备内存的物理地址。 | +| bs_buffer_size | int | (可选)用户外部分配的码流缓冲区的小大。 | + +### 2. BmJpuJPEGEncoder + +| 成员变量 | 类型 | 描述 | +| -------------------------- | --------------------- | --------------------------------------------------------------- | +| encoder | BmJpuEncoder * | 指向内部JPEG编码器的指针。 | +| bitstream_buffer_addr | bm_jpu_phys_addr_t | 码流缓冲区的设备内存的物理地址。 | +| bitstream_buffer_size | size_t | 码流缓冲区的大小,单位: byte。 | +| bitstream_buffer_alignment | unsigned int | 码流缓冲区的对齐要求,单位: byte。 | +| initial_info | BmJpuEncInitialInfo | 编码器的初始化信息。 | +| frame_width | unsigned int | 输入帧的宽度,单位: pixel。 | +| frame_height | unsigned int | 输入帧的高度,单位: pixel。 | +| calculated_sizes | BmJpuFramebufferSizes | 由输入帧计算得到的帧缓冲区大小信息。 | +| quality_factor | unsigned int | JPEG 编码的质量因子。1 表示最佳压缩质量,100 表示最佳图像质量。 | +| image_format | BmJpuImageFormat | 输入帧的图像格式。 | +| device_index | int | 设备索引。 | +| rotationEnable | int | 是否启用图像旋转。0 表示不旋转,1 表示旋转。 | +| rotationAngle | BmJpuRotateAngle | 旋转角度。可选项请参考 **BmJpuRotateAngle** 定义。 | +| mirrorEnable | int | 是否启用图像镜像。0 表示不镜像,1 表示镜像。 | +| mirrorDirection | BmJpuMirrorDirection | 镜像方向。可选项请参考 **BmJpuMirrorDirection** 定义。 | +| bitstream_from_user | bool | 表示码流缓冲区的设备内存是否由外部分配 | + +### 3. BmJpuEncInitialInfo + +| 成员变量 | 类型 | 描述 | +| ----------------------------- | ------------ | ---------------------------------------------------------------------- | +| min_num_required_framebuffers | unsigned int | 编码需要的缓冲区最小个数,调用者必须向编码器注册至少该数量的帧缓冲区。 | +| framebuffer_alignment | unsigned int | 帧缓冲区的内存对齐要求(单位: byte)。 | + +### 4. BmJpuEncReturnCodes +- 编码器返回代码。除BM_JPU_ENC_RETURN_CODE_OK外,这些都应被视为错误,返回时应关闭编码器。 + +| 错误码 | 返回值 | 描述 | +| ------------------------------------------------ | ------ | -------------------------------------------------------------------------------------------------------------- | +| BM_JPU_ENC_RETURN_CODE_OK | 0 | 操作成功完成。 | +| BM_JPU_ENC_RETURN_CODE_ERROR | 1 | 通用错误码,用于当其他错误码无法匹配错误时的情况。 | +| BM_JPU_ENC_RETURN_CODE_INVALID_PARAMS | 2 | 输入参数无效。 | +| BM_JPU_ENC_RETURN_CODE_INVALID_HANDLE | 3 | JPU 编码器句柄无效。这是一个内部错误,很可能是JPU库中的BUG,请报告此类错误。 | +| BM_JPU_ENC_RETURN_CODE_INVALID_FRAMEBUFFER | 4 | 帧缓冲区信息无效。通常发生在传递给 *bm_jpu_jpeg_enc_encode* 函数的 **BmJpuFramebuffer** 结构包含无效值的情况。 | +| BM_JPU_ENC_RETURN_CODE_INSUFFICIENT_FRAMEBUFFERS | 5 | 编码时使用的帧缓冲区无效。这是一个内部错误,很可能是JPU库中的BUG,请报告此类错误。 | +| BM_JPU_ENC_RETURN_CODE_INVALID_STRIDE | 6 | 某个跨度值(例如帧缓冲区的Y跨度值)无效。 | +| BM_JPU_ENC_RETURN_CODE_WRONG_CALL_SEQUENCE | 7 | 在不适当的时间调用函数。这是一个内部错误,很可能是JPU库中的BUG,请报告此类错误。 | +| BM_JPU_ENC_RETURN_CODE_TIMEOUT | 8 | 操作超时。 | +| BM_JPU_ENC_RETURN_CODE_ALREADY_CALLED | 9 | 调用了一个只应在编码会话的持续时间内调用一次的函数。这是一个内部错误,很可能是JPU库中的BUG,请报告此类错误。 | +| BM_JPU_ENC_RETURN_CODE_WRITE_CALLBACK_FAILED | 10 | 编码输出回调函数 **BmJpuWriteOutputData** 调用失败 | +| BM_JPU_ENC_RETURN_ALLOC_MEM_ERROR | 11 | 分配内存失败 | +| BM_JPU_ENC_BYTE_ERROR | 12 | 编码数据异常 | +| BM_JPU_ENC_RETURN_BS_BUFFER_FULL | 13 | 码流缓冲区已满 | + +
+ +## jpeg Encode API +- bm_jpu_enc_load +- bm_jpu_jpeg_enc_open +- bm_jpu_jpeg_enc_encode +- bm_jpu_jpeg_enc_close +- bm_jpu_enc_unload +- bm_jpu_enc_error_string +- bm_jpu_enc_get_bm_handle + +### 1. bm_jpu_enc_load +[功能和说明] +- 根据传入的 设备ID 打开指定的编码设备节点,可以通过 bmlib 管理内存分配。 + +[函数名] + +`BmJpuEncReturnCodes bm_jpu_enc_load(int device_index)` + +[参数说明] +- `int device_index` 编码设备ID。 + +### 2. bm_jpu_jpeg_enc_open +[功能和说明] +- 创建一个JPEG编码器实例,并申请指定大小的码流缓冲区。 + +[函数名] + +`BmJpuEncReturnCodes bm_jpu_jpeg_enc_open(BmJpuJPEGEncoder **jpeg_encoder, bm_jpu_phys_addr_t bs_buffer_phys_addr, int bs_buffer_size, int device_index)` + +[参数说明] +- `BmJpuJPEGEncoder **jpeg_encoder` 指向编码器的二级指针,在接口内部完成初始化,返回给用户一个编码器实例。 +- `bm_jpu_phys_addr_t bs_buffer_phys_addr` 用户指定的码流缓冲区设备内存物理地址,0表示由编码器内部分配。 +- `int bs_buffer_size` 码流缓冲区的大小,0表示默认大小(5MB)。 +- `int device_index` 编码设备ID + +### 3. bm_jpu_jpeg_enc_encode + +[功能和说明] +- 编码原始输入帧 +- JPU编码器仅生成baseline JPEG数据,不支持渐进式编码。 + +[函数名] + +`BmJpuEncReturnCodes bm_jpu_jpeg_enc_encode(BmJpuJPEGEncoder *jpeg_encoder, BmJpuFramebuffer const *framebuffer, BmJpuJPEGEncParams const *params, void **acquired_handle, size_t *output_buffer_size)` + +[参数说明] +- `BmJpuJPEGEncoder *jpeg_encoder` 指向 `BmJpuJPEGEncoder` 结构体的指针,它包含了与JPEG编码相关的设置和状态信息 +- `mJpuFramebuffer const *framebuffer` 包含要编码的原始输入像素。它的跨度(stride)和偏移(offset)值必须有效,并且其 dma_buffer 指针必须指向包含像素数据的 DMA 缓冲区。 +- `BmJpuJPEGEncParams const *params` 包含了JPEG编码的参数,如图像尺寸、质量因子等, params必须填充有效的数值;帧的宽度和高度不能为零。 +- `void **acquired_handle` 用于返回获取编码后的图像数据的句柄。 +- `size_t *output_buffer_size` 用于返回编码后的图像数据的大小。 + +### 4. bm_jpu_jpeg_enc_close +[功能和说明] +- 关闭编码器,释放资源。 +- 获取与 jpeg_encoder 相关的设备句柄 handle,通常用于与硬件设备交互。 +- 如果 jpeg_encoder 的 bitstream_buffer 不为空(即已分配了码流缓冲区),且设备内存由编码器内部申请,它会释放该码流缓冲区的设备内存,并释放 jpeg_encoder->bitstream_buffer 占用的内存。 +- 最后释放 jpeg_encoder 结构体本身的内存,并将 jpeg_encoder 指针设置为 NULL。 + +[函数名] + +`BmJpuEncReturnCodes bm_jpu_jpeg_enc_close(BmJpuJPEGEncoder *jpeg_encoder)` + +[参数说明] +- `BmJpuJPEGEncoder *jpeg_encoder` 指向JPEG编码器实例的指针。 + +### 5. bm_jpu_enc_unload +[功能和说明] +- 释放指定的编码设备节点 + +[函数名] + +`BmJpuDecReturnCodes bm_jpu_enc_unload(int device_index)` + +[参数说明] +- `int device_index` 编码设备ID + +### 6. bm_jpu_enc_error_string +[功能和说明] +- 返回编码错误码的具体描述 + +[函数名] + +`char const * bm_jpu_enc_error_string(BmJpuEncReturnCodes code)` + +[参数说明] +- `BmJpuEncReturnCodes code` 编码错误码 + +### 7. bm_jpu_enc_get_bm_handle +[功能和说明] +- 获取该编码设备上bmlib的句柄 + +[函数名] + +`bm_handle_t bm_jpu_enc_get_bm_handle(int device_index)` + +[参数说明] +- `int device_index` 编码设备ID + +## jpeg Encode Callback +- BmJpuEncAcquireOutputBuffer +- BmJpuEncFinishOutputBuffer +- BmJpuWriteOutputData + +### 1. BmJpuEncAcquireOutputBuffer +[功能和说明] +- 用于申请编码数据的输出buffer,可根据编码器的 *bs_in_device* 配置,选择输出到设备内存还是系统内存。 +- 编码完成后,数据会从编码器中的码流缓冲区拷贝到上述申请的buffer中,用户可以从 *acquired_handle* 获取输出数据。 +- **BmJpuWriteOutputData** 回调函数为空时执行该操作 + +[接口实现] + +`typedef void* (*BmJpuEncAcquireOutputBuffer)(void *context, size_t size, void **acquired_handle)` + +[参数说明] +- `void *context` 输出上下文信息,由用户在编码参数中的 **output_buffer_context** 指定 +- `size_t size` 申请buffer的大小 +- `void **acquired_handle` 用户获取输出数据的句柄,由编码接口中的 **acquired_handle** 参数指定 + +### 2. BmJpuEncFinishOutputBuffer +[功能和说明] +- 用于释放 **BmJpuEncAcquireOutputBuffer** 接口申请的buffer,与上述接口配套使用。 +- **BmJpuWriteOutputData** 回调函数为空时执行该操作 + +[接口实现] + +`typedef void (*BmJpuEncFinishOutputBuffer)(void *context, void *acquired_handle)` + +[参数说明] +- `void *context` 输出上下文信息,与 **BmJpuEncAcquireOutputBuffer** 接口中的 **context** 参数相同 +- `void *acquired_handle` 需要释放的buffer,等价于 **BmJpuEncAcquireOutputBuffer** 接口中的 **\*acquired_handle** 参数值 + +### 3. BmJpuWriteOutputData +[功能和说明] +- 用于用户指定编码数据输出方式的回调函数 + +[接口实现] + +`typedef int (*BmJpuWriteOutputData)(void *context, uint8_t const *data, uint32_t size, BmJpuEncodedFrame *encoded_frame)` + +[参数说明] +- `void *context` 输出上下文信息,由用户在编码参数中的 **output_buffer_context** 指定 +- `uint8_t const *data` 编码器码流缓冲区映射后的虚拟地址,由编码器内部mmap得到 +- `uint32_t size` 编码输出码流大小 +- `BmJpuEncodedFrame *encoded_frame` 编码器内部生成的编码帧信息,用于保存PTS、DTS等信息(暂未使用) \ No newline at end of file diff --git a/bmvid/document/bmApi/BmApi.pdf b/bmvid/document/bmApi/BmApi.pdf new file mode 100644 index 0000000..04ce64d Binary files /dev/null and b/bmvid/document/bmApi/BmApi.pdf differ diff --git a/bmvid/document/bmApi/ReadMe.md b/bmvid/document/bmApi/ReadMe.md new file mode 100644 index 0000000..3630ece --- /dev/null +++ b/bmvid/document/bmApi/ReadMe.md @@ -0,0 +1,29 @@ + + +**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* + + - [author: shuning.han chun.wang +date: January 30, 2024](#author-shuninghan-chunwang%0Adate-january-30-2024) +- [本目录下源文件为BmApi.md,为sdk接口的相关说明](#%E6%9C%AC%E7%9B%AE%E5%BD%95%E4%B8%8B%E6%BA%90%E6%96%87%E4%BB%B6%E4%B8%BAbmapimd%E4%B8%BAsdk%E6%8E%A5%E5%8F%A3%E7%9A%84%E7%9B%B8%E5%85%B3%E8%AF%B4%E6%98%8E) +- [md文件转pdf时推荐使用vscode 插件 markdown pdf](#md%E6%96%87%E4%BB%B6%E8%BD%ACpdf%E6%97%B6%E6%8E%A8%E8%8D%90%E4%BD%BF%E7%94%A8vscode-%E6%8F%92%E4%BB%B6-markdown-pdf) + + + +--- +author: shuning.han chun.wang +date: January 30, 2024 +--- + +# 本目录下源文件为BmApi.md,为sdk接口的相关说明 + + 生成和更新目录时推荐使用doctoc + + sudo npm install -g doctoc (如果提示没有npm: sudo apt install npm) + + 更新目录 + + doctoc . + +# md文件转pdf时推荐使用vscode 插件 markdown pdf + + F1 + export 选择输出文件类型自动生成pdf diff --git a/bmvid/document/bmcv/source_en/api/absdiff.rst b/bmvid/document/bmcv/source_en/api/absdiff.rst index e168d77..017057e 100644 --- a/bmvid/document/bmcv/source_en/api/absdiff.rst +++ b/bmvid/document/bmcv/source_en/api/absdiff.rst @@ -4,6 +4,11 @@ bmcv_image_absdiff Subtract the pixel values of two images with the same size and take the absolute value. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/add_weighted.rst b/bmvid/document/bmcv/source_en/api/add_weighted.rst index c3d3d19..dc8432b 100644 --- a/bmvid/document/bmcv/source_en/api/add_weighted.rst +++ b/bmvid/document/bmcv/source_en/api/add_weighted.rst @@ -9,6 +9,11 @@ Fusion of two images of the same size by weighted, as follows: \end{array} +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/api_introduct.rst b/bmvid/document/bmcv/source_en/api/api_introduct.rst index f5f99de..d711c1f 100644 --- a/bmvid/document/bmcv/source_en/api/api_introduct.rst +++ b/bmvid/document/bmcv/source_en/api/api_introduct.rst @@ -7,10 +7,7 @@ Briefly explain which part of the hardware implements the BMCV API * bmcv_image_canny * bmcv_image_dct * bmcv_image_draw_lines -* bmcv_feature_match * bmcv_fft -* bmcv_image_gaussian_blur -* bmcv_image_laplacian * bmcv_image_lkpyramid * bmcv_image_morph * bmcv_image_sobel @@ -18,110 +15,119 @@ Briefly explain which part of the hardware implements the BMCV API +-----+----------------------------------+-----------+-----------+ | num | API | BM1684 | BM1684X | +=====+==================================+===========+===========+ -| 1 | bmcv_image_absdiff | TPU | TPU | +| 1 | bmcv_as_strided |NOT SUPPORT| TPU | +-----+----------------------------------+-----------+-----------+ -| 2 | bmcv_image_add_weighted | TPU | TPU | +| 2 | bmcv_image_absdiff | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 3 | bmcv_base64 | SPACC | SPACC | +| 3 | bmcv_image_add_weighted | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 4 | bmcv_image_bitwise_and | TPU | TPU | +| 4 | bmcv_base64 | SPACC | SPACC | +-----+----------------------------------+-----------+-----------+ -| 5 | bmcv_image_bitwise_or | TPU | TPU | +| 5 | bmcv_image_bayer2rgb |NOT SUPPORT| TPU | +-----+----------------------------------+-----------+-----------+ -| 6 | bmcv_image_bitwise_xor | TPU | TPU | +| 6 | bmcv_image_bitwise_and | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 7 | bmcv_calc_hist | TPU | TPU | +| 7 | bmcv_image_bitwise_or | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 8 | bmcv_image_canny | TPU | TPU | +| 8 | bmcv_image_bitwise_xor | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 9 | bmcv_image_convert_to | TPU | VPP+TPU | +| 9 | bmcv_calc_hist | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 10 | bmcv_image_copy_to | TPU | VPP+TPU | +| 10 | bmcv_image_canny | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 11 | bmcv_image_dct | TPU | TPU | +| 11 | bmcv_image_convert_to | TPU | VPP+TPU | +-----+----------------------------------+-----------+-----------+ -| 12 | bmcv_distance | TPU | TPU | +| 12 | bmcv_image_copy_to | TPU | VPP+TPU | +-----+----------------------------------+-----------+-----------+ -| 13 | bmcv_image_draw_lines | CPU | VPP | +| 13 | bmcv_image_dct | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 14 | bmcv_image_draw_rectangle | TPU | VPP | +| 14 | bmcv_distance | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 15 | bmcv_feature_match | TPU | TPU | +| 15 | bmcv_image_draw_lines | CPU | VPP | +-----+----------------------------------+-----------+-----------+ -| 16 | bmcv_fft | TPU | TPU | +| 16 | bmcv_image_draw_rectangle | TPU | VPP | +-----+----------------------------------+-----------+-----------+ -| 17 | bmcv_image_fill_rectangle | TPU | VPP | +| 17 | bmcv_feature_match | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 18 | bmcv_image_gaussian_blur | TPU | TPU | +| 18 | bmcv_fft | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 19 | bmcv_gemm | TPU | TPU | +| 19 | bmcv_image_fill_rectangle | TPU | VPP | +-----+----------------------------------+-----------+-----------+ -| 20 | bmcv_image_jpeg_enc | JPU | JPU | +| 20 | bmcv_image_gaussian_blur | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 21 | bmcv_image_jpeg_dec | JPU | JPU | +| 21 | bmcv_gemm | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 22 | bmcv_image_laplacian | TPU | TPU | +| 22 | bmcv_image_jpeg_enc | JPU | JPU | +-----+----------------------------------+-----------+-----------+ -| 23 | bmcv_matmul | TPU | TPU | +| 23 | bmcv_image_jpeg_dec | JPU | JPU | +-----+----------------------------------+-----------+-----------+ -| 24 | bmcv_min_max | TPU | TPU | +| 24 | bmcv_image_laplacian | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 25 | bmcv_nms_ext | TPU | TPU | +| 25 | bmcv_matmul | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 26 | bmcv_nms | TPU | TPU | +| 26 | bmcv_min_max | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 27 | bmcv_image_resize | VPP+TPU | VPP | +| 27 | bmcv_nms_ext | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 28 | bmcv_image_sobel | TPU | TPU | +| 28 | bmcv_nms | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 29 | bmcv_sort | TPU | TPU | +| 29 | bmcv_image_resize | VPP+TPU | VPP | +-----+----------------------------------+-----------+-----------+ -| 30 | bmcv_image_storage_convert | VPP+TPU | VPP | +| 30 | bmcv_image_sobel | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 31 | bmcv_image_threshold | TPU | TPU | +| 31 | bmcv_sort | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 32 | bmcv_image_transpose | TPU | TPU | +| 32 | bmcv_image_storage_convert | VPP+TPU | VPP | +-----+----------------------------------+-----------+-----------+ -| 33 | bmcv_image_vpp_basic | VPP | VPP | +| 33 | bmcv_image_threshold | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 34 | bmcv_image_vpp_convert_padding | VPP | VPP | +| 34 | bmcv_image_transpose | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 35 | bmcv_image_vpp_convert | VPP | VPP | +| 35 | bmcv_image_vpp_basic | VPP | VPP | +-----+----------------------------------+-----------+-----------+ -| 36 | bmcv_image_vpp_csc_matrix_convert| VPP | VPP | +| 36 | bmcv_image_vpp_convert_padding | VPP | VPP | +-----+----------------------------------+-----------+-----------+ -| 37 | bmcv_image_vpp_stitch | VPP | VPP | +| 37 | bmcv_image_vpp_convert | VPP | VPP | +-----+----------------------------------+-----------+-----------+ -| 38 | bmcv_image_warp_affine | TPU | TPU | +| 38 | bmcv_image_vpp_csc_matrix_convert| VPP | VPP | +-----+----------------------------------+-----------+-----------+ -| 39 | bmcv_image_warp_perspective | TPU | TPU | +| 39 | bmcv_image_vpp_stitch | VPP | VPP | +-----+----------------------------------+-----------+-----------+ -| 40 | bmcv_nms_yolo | TPU | TPU | +| 40 | bmcv_image_warp_affine | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 41 | bmcv_cmulp | TPU | TPU | +| 41 | bmcv_image_warp_perspective | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 42 | bmcv_faiss_indexflatIP |NOT SUPPORT| TPU | +| 42 | bmcv_image_watermark_superpose |NOT SUPPORT| TPU | +-----+----------------------------------+-----------+-----------+ -| 43 | bmcv_faiss_indexflatL2 |NOT SUPPORT| TPU | +| 43 | bmcv_nms_yolo | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 44 | bmcv_image_yuv2bgr_ext | TPU | VPP | +| 44 | bmcv_cmulp | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 45 | bmcv_image_yuv2hsv | TPU | VPP+TPU | +| 45 | bmcv_faiss_indexflatIP |NOT SUPPORT| TPU | +-----+----------------------------------+-----------+-----------+ -| 46 | bmcv_batch_topk | TPU | TPU | +| 46 | bmcv_faiss_indexflatL2 |NOT SUPPORT| TPU | +-----+----------------------------------+-----------+-----------+ -| 47 | bmcv_image_put_text | CPU | CPU | +| 47 | bmcv_image_yuv2bgr_ext | TPU | VPP | +-----+----------------------------------+-----------+-----------+ -| 48 | bmcv_hm_distance |NOT SUPPORT| TPU | +| 48 | bmcv_image_yuv2hsv | TPU | VPP+TPU | +-----+----------------------------------+-----------+-----------+ -| 49 | bmcv_axpy | TPU | TPU | +| 49 | bmcv_batch_topk | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 50 | bmcv_image_pyramid_down | TPU | TPU | +| 50 | bmcv_image_put_text | CPU | CPU | +-----+----------------------------------+-----------+-----------+ +| 51 | bmcv_hm_distance |NOT SUPPORT| TPU | ++-----+----------------------------------+-----------+-----------+ +| 52 | bmcv_axpy | TPU | TPU | ++-----+----------------------------------+-----------+-----------+ +| 53 | bmcv_image_pyramid_down | TPU | TPU | ++-----+----------------------------------+-----------+-----------+ +| 54 | bmcv_image_quantify |NOT SUPPORT| TPU | ++-----+----------------------------------+-----------+-----------+ + **Note:** -For BM1684 and BM1684X, the implementation of the following two operators requires a combination of BMCPU(CPU) and TPU +For BM1684 and BM1684X, the implementation of the following two operators requires a combination of BMCPU and Tensor Computing Processor +-----+----------------------------------+ | num | API | diff --git a/bmvid/document/bmcv/source_en/api/as_strided.rst b/bmvid/document/bmcv/source_en/api/as_strided.rst index 69b6374..4a4b8ba 100644 --- a/bmvid/document/bmcv/source_en/api/as_strided.rst +++ b/bmvid/document/bmcv/source_en/api/as_strided.rst @@ -3,6 +3,10 @@ bmcv_as_strided This interface can create a view matrix based on the existing matrix and the given step size. +**Processor model support** + +This interface only supports BM1684X. + **Interface form:** @@ -109,7 +113,7 @@ This interface can create a view matrix based on the existing matrix and the giv output_row, output_col, row_stride, col_stride); gettimeofday_(&t2); - std::cout << "as_strided TPU using time= " << ((t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec) << "(us)" << std::endl; + std::cout << "as_strided Tensor Computing Processor using time= " << ((t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec) << "(us)" << std::endl; if (ret != BM_SUCCESS) { printf("as_strided failed. ret = %d\n", ret); goto exit; diff --git a/bmvid/document/bmcv/source_en/api/axpy.rst b/bmvid/document/bmcv/source_en/api/axpy.rst index 84c51fc..47b0c21 100644 --- a/bmvid/document/bmcv/source_en/api/axpy.rst +++ b/bmvid/document/bmcv/source_en/api/axpy.rst @@ -4,6 +4,11 @@ bmcv_axpy This interface implements F = A * X + Y, where A is a constant of size n * c , and F , X , Y are all matrices of size n * c * h * w. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/base64.rst b/bmvid/document/bmcv/source_en/api/base64.rst index 512d257..b6fda61 100644 --- a/bmvid/document/bmcv/source_en/api/base64.rst +++ b/bmvid/document/bmcv/source_en/api/base64.rst @@ -4,6 +4,11 @@ bmcv_base64_enc(dec) A common encoding method in base64 network transmission, which uses 64 common characters to encode 6-bit binary numbers. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/batch_topk.rst b/bmvid/document/bmcv/source_en/api/batch_topk.rst index 599e37d..690b856 100644 --- a/bmvid/document/bmcv/source_en/api/batch_topk.rst +++ b/bmvid/document/bmcv/source_en/api/batch_topk.rst @@ -3,6 +3,11 @@ bmcv_batch_topk Compute the largest or smallest k number in each db, and return the index. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/bayer2rgb.rst b/bmvid/document/bmcv/source_en/api/bayer2rgb.rst index 287b88e..10e5ebb 100644 --- a/bmvid/document/bmcv/source_en/api/bayer2rgb.rst +++ b/bmvid/document/bmcv/source_en/api/bayer2rgb.rst @@ -1,9 +1,15 @@ bmcv_image_bayer2rgb ================== -Converts bayerBG8 format images to RGB Plannar format. +Converts bayerBG8 or bayerRG8 format images to RGB Plannar format. -**接口形式:** + +**Processor model support** + +This interface only supports BM1684X. + + +**Interface form:** .. code-block:: c @@ -15,7 +21,7 @@ Converts bayerBG8 format images to RGB Plannar format. bm_image output); -**参数说明:** +**Parameter Description:** * bm_handle_t handle @@ -50,6 +56,8 @@ The interface currently supports the following input format: +=====+================================+ | 1 | FORMAT_BAYER | +-----+--------------------------------+ +| 2 | FORMAT_BAYER_RG8 | ++-----+--------------------------------+ The interface currently supports the following output format: @@ -70,9 +78,13 @@ The interface currently supports the following data_type: **Note** -1、The format of input is bayerBG, the format of output is rgb plannar, and the data_type is uint8. -2、The interface currently supports bm1684x. -3、The interface supports the size range of 8*8 ~ 8096*8096, and the width and height of the image need to be even. +1. The input format currently supports bayerBG8 or bayerRG8. In the bm_image_create step, bayerBG8 is created in the FORMAT_BAYER format, and bayerRG8 is created in the FORMAT_BAYER_RG8 format. + +2. The output format is rgb plannar, and data_type is uint8 type. + +3. The size range supported by this interface is 2*2 ~ 8192*8192, and the width and height of the image need to be an even number. + +4. If the program calling this interface is a multi-threaded program, thread locks need to be added before creating bm_image and after destroying bm_image. **Code example:** @@ -82,31 +94,45 @@ The interface currently supports the following data_type: #define KERNEL_SIZE 3 * 3 * 3 * 4 * 64 #define CONVD_MATRIX 12 * 9 - - const unsigned char convd_kernel[CONVD_MATRIX] = {1, 0, 1, 0, 0, 0, 1, 0, 1, - 0, 0, 2, 0, 0, 0, 0, 0, 2, - 0, 0, 0, 0, 0, 0, 2, 0, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 4, // r R - 4, 0, 0, 0, 0, 0, 0, 0, 0, // b B - 2, 0, 2, 0, 0, 0, 0, 0, 0, - 2, 0, 0, 0, 0, 0, 2, 0, 0, - 1, 0, 1, 0, 0, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, - 0, 0, 0, 0, 0, 4, 0, 0, 0, // g1 G1 - 0, 0, 0, 0, 0, 0, 0, 4, 0, // g2 G2 - 0, 1, 0, 1, 0, 1, 0, 1, 0}; + const unsigned char convd_kernel_bg8[CONVD_MATRIX] = {1, 0, 1, 0, 0, 0, 1, 0, 1, //Rb + 0, 0, 2, 0, 0, 0, 0, 0, 2, //Rg1 + 0, 0, 0, 0, 0, 0, 2, 0, 2, //Rg2 + 0, 0, 0, 0, 0, 0, 0, 0, 4, //Rr + 4, 0, 0, 0, 0, 0, 0, 0, 0, //Bb + 2, 0, 2, 0, 0, 0, 0, 0, 0, //Bg1 + 2, 0, 0, 0, 0, 0, 2, 0, 0, //Bg2 + 1, 0, 1, 0, 0, 0, 1, 0, 1, //Br + 0, 1, 0, 1, 0, 1, 0, 1, 0, //Gb + 0, 0, 0, 0, 0, 4, 0, 0, 0, //Gg1 + 0, 0, 0, 0, 0, 0, 0, 4, 0, //Gg2 + 0, 1, 0, 1, 0, 1, 0, 1, 0};//Gr + + const unsigned char convd_kernel_rg8[CONVD_MATRIX] = {4, 0, 0, 0, 0, 0, 0, 0, 0, //Rr + 2, 0, 2, 0, 0, 0, 0, 0, 0, //Rg1 + 2, 0, 0, 0, 0, 0, 2, 0, 0, //Rg2 + 1, 0, 1, 0, 0, 0, 1, 0, 1, //Rb + 1, 0, 1, 0, 0, 0, 1, 0, 1, //Br + 0, 0, 2, 0, 0, 0, 0, 0, 2, //Bg1 + 0, 0, 0, 2, 0, 2, 0, 0, 0, //Bg2 + 0, 0, 0, 0, 0, 0, 0, 0, 4, //Bb + 1, 0, 1, 0, 0, 0, 1, 0, 1, //Gr + 0, 0, 0, 0, 0, 4, 0, 0, 0, //Gg1 + 0, 0, 0, 0, 0, 0, 0, 4, 0, //Gg2 + 0, 1, 0, 1, 0, 1, 0, 1, 0};//Gb int width = 1920; int height = 1080; int dev_id = 0; + unsigned char* input = (unsigned char*)malloc(width * height); + unsigned char* output = (unsigned char*)malloc(width * height * 3); bm_handle_t handle; bm_status_t dev_ret = bm_dev_request(&handle, dev_id); - std::shared_ptr src1_ptr( - new unsigned char[channel * width * height], - std::default_delete()); + bm_image input_img; bm_image output_img; - bm_image_create(handle, height, width, FORMAT_BAYER, DATA_TYPE_EXT_1N_BYTE, &input_img); + bm_image_create(handle, height, width, FORMAT_BAYER_RG8, DATA_TYPE_EXT_1N_BYTE, &input_img); + //bm_image_create(handle, height, width, FORMAT_BAYER, DATA_TYPE_EXT_1N_BYTE, &input_img); //bayerBG8 bm_image_create(handle, height, width, FORMAT_RGB_PLANAR, DATA_TYPE_EXT_1N_BYTE, &output_img); + bm_image_alloc_dev_mem(input_img, BMCV_HEAP_ANY); bm_image_alloc_dev_mem(output_img, BMCV_HEAP_ANY); unsigned char kernel_data[KERNEL_SIZE]; @@ -114,13 +140,16 @@ The interface currently supports the following data_type: // constructing convd_kernel_data for (int i = 0;i < 12;i++) { for (int j = 0;j < 9;j++) { - kernel_data[i * 9 * 64 + 64 * j] = convd_kernel[i * 9 + j]; + kernel_data[i * 9 * 64 + 64 * j] = convd_kernel_rg8[i * 9 + j]; + //kernel_data[i * 9 * 64 + 64 * j] = convd_kernel_bg8[i * 9 + j]; } } - unsigned char* input_data[3] = {srcImage.data, srcImage.data + height * width, srcImage.data + 2 * height * width}; - bm_image_copy_host_to_device(input_img, (void **)input_data); + + bm_image_copy_host_to_device(input_img, (void **)input); bmcv_image_bayer2rgb(handle, kernel_data, input_img, output_img); bm_image_copy_device_to_host(output_img, (void **)(&output)); bm_image_destroy(input_img); bm_image_destroy(output_img); + free(input); + free(output); bm_dev_free(handle); \ No newline at end of file diff --git a/bmvid/document/bmcv/source_en/api/bitwise_and.rst b/bmvid/document/bmcv/source_en/api/bitwise_and.rst index b737145..cb3c41f 100644 --- a/bmvid/document/bmcv/source_en/api/bitwise_and.rst +++ b/bmvid/document/bmcv/source_en/api/bitwise_and.rst @@ -4,6 +4,11 @@ bmcv_image_bitwise_and Bitwise and operate on the corresponding pixel value of two images with the same size. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/bitwise_or.rst b/bmvid/document/bmcv/source_en/api/bitwise_or.rst index f08ea2c..39e9945 100644 --- a/bmvid/document/bmcv/source_en/api/bitwise_or.rst +++ b/bmvid/document/bmcv/source_en/api/bitwise_or.rst @@ -1,9 +1,12 @@ bmcv_image_bitwise_or ===================== +Bitwise or operate on the corresponding pixel value of two images with the same size. -Bitwise or operate on the corresponding pixel value of two images with the same size. +**Processor model support** + +This interface supports BM1684/BM1684X. **Interface form:** diff --git a/bmvid/document/bmcv/source_en/api/bitwise_xor.rst b/bmvid/document/bmcv/source_en/api/bitwise_xor.rst index bc91767..71af9c2 100644 --- a/bmvid/document/bmcv/source_en/api/bitwise_xor.rst +++ b/bmvid/document/bmcv/source_en/api/bitwise_xor.rst @@ -4,6 +4,11 @@ bmcv_image_bitwise_xor Perform bitwise xor operate on the corresponding pixel values of two images with the same size. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/bmcv_hist_balance.rst b/bmvid/document/bmcv/source_en/api/bmcv_hist_balance.rst new file mode 100644 index 0000000..3a8ad78 --- /dev/null +++ b/bmvid/document/bmcv/source_en/api/bmcv_hist_balance.rst @@ -0,0 +1,119 @@ +bmcv_hist_balance +=================== + +Perform histogram equalization on the image to improve the contrast of the image. + + +**Interface form:** + + .. code-block:: c + + bm_status_t bmcv_hist_balance( + bm_handle_t handle, + bm_device_mem_t input, + bm_device_mem_t output, + int H, + int W); + + +**Description of parameters:** + +* bm_handle_t handle + + Input parameters. The handle of bm_handle. + +* bm_device_mem_t input + + Input parameter. The device memory space stores the input data. Its size is H * W * sizeof (uint8_t). + +* bm_device_mem_t output + + Output parameter. The device memory space stores the input data. Its size is H * W * sizeof (uint8_t). + +* int H + + Input parameter. The height of the input image. + +* int W + + Input parameter. The width of the input image. + + +**Return value description:** + +* BM_SUCCESS: success + +* Other: failed + + +**Note** + +1. The data type only support uint8_t。 + +2. The min height and width of the input image is H = 1, W = 1。 + +3. The max height and width of the input image is H = 8192, W = 8192。 + + +**Code example:** + + .. code-block:: c + + int H = 1024; + int W = 1024; + uint8_t* input_addr = (uint8_t*)malloc(H * W * sizeof(uint8_t)); + uint8_t* output_addr = (uint8_t*)malloc(H * W * sizeof(uint8_t)); + bm_handle_t handle; + bm_status_t ret = BM_SUCCESS; + bm_device_mem_t input, output; + int i; + + struct timespec tp; + clock_gettime(NULL, &tp); + srand(tp.tv_nsec); + + for (i = 0; i < W * H; ++i) { + input_addr[i] = (uint8_t)rand() % 256; + } + + ret = bm_dev_request(&handle, 0); + if (ret != BM_SUCCESS) { + printf("bm_dev_request failed. ret = %d\n", ret); + exit(-1); + } + + ret = bm_malloc_device_byte(handle, &input, H * W * sizeof(uint8_t)); + if (ret != BM_SUCCESS) { + printf("bm_malloc_device_byte failed. ret = %d\n", ret); + exit(-1); + } + + ret = bm_malloc_device_byte(handle, &output, H * W * sizeof(uint8_t)); + if (ret != BM_SUCCESS) { + printf("bm_malloc_device_byte failed. ret = %d\n", ret); + exit(-1); + } + + ret = bm_memcpy_s2d(handle, input, input_addr); + if (ret != BM_SUCCESS) { + printf("bm_memcpy_s2d failed. ret = %d\n", ret); + exit(-1); + } + + ret = bmcv_hist_balance(handle, input, output, H, W); + if (ret != BM_SUCCESS) { + printf("bmcv_hist_balance failed. ret = %d\n", ret); + exit(-1); + } + + ret = bm_memcpy_d2s(handle, output_addr, output); + if (ret != BM_SUCCESS) { + printf("bm_memcpy_d2s failed. ret = %d\n", ret); + exit(-1); + } + + free(input_addr); + free(output_addr); + bm_free_device(handle, input); + bm_free_device(handle, output); + bm_dev_free(handle); \ No newline at end of file diff --git a/bmvid/document/bmcv/source_en/api/calc_hist.rst b/bmvid/document/bmcv/source_en/api/calc_hist.rst index f8a69d4..d5ebca4 100644 --- a/bmvid/document/bmcv/source_en/api/calc_hist.rst +++ b/bmvid/document/bmcv/source_en/api/calc_hist.rst @@ -22,6 +22,10 @@ _________ const float *ranges, int inputDtype); +**Processor model support** + +This interface supports BM1684/BM1684X. + **Parameter Description:** @@ -148,6 +152,10 @@ _________ Weighted Histogram __________________ +**Processor model support** + +This interface supports BM1684/BM1684X. + **Interface form:** diff --git a/bmvid/document/bmcv/source_en/api/canny.rst b/bmvid/document/bmcv/source_en/api/canny.rst index a6c95fb..e9193fc 100644 --- a/bmvid/document/bmcv/source_en/api/canny.rst +++ b/bmvid/document/bmcv/source_en/api/canny.rst @@ -4,6 +4,11 @@ bmcv_image_canny Canny operator for edge detection. +**Processor model support** + +This interface only supports BM1684. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/cmulp.rst b/bmvid/document/bmcv/source_en/api/cmulp.rst index f55943f..5b31a62 100644 --- a/bmvid/document/bmcv/source_en/api/cmulp.rst +++ b/bmvid/document/bmcv/source_en/api/cmulp.rst @@ -12,6 +12,10 @@ This interface is used to implement the complex number multiplication, as shown Among that, :math:`i` is the imaginary unit and satisfying the equation :math:`i^2 = -1`. +**Processor model support** + +This interface supports BM1684/BM1684X. + **Interface form:** diff --git a/bmvid/document/bmcv/source_en/api/convert_to.rst b/bmvid/document/bmcv/source_en/api/convert_to.rst index 5b58698..7514d9e 100644 --- a/bmvid/document/bmcv/source_en/api/convert_to.rst +++ b/bmvid/document/bmcv/source_en/api/convert_to.rst @@ -8,6 +8,11 @@ The interface is used to do the linear change of image pixels. The specific data y=kx+b \end{array} +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/copy_to.rst b/bmvid/document/bmcv/source_en/api/copy_to.rst index dbfedfb..38d7025 100644 --- a/bmvid/document/bmcv/source_en/api/copy_to.rst +++ b/bmvid/document/bmcv/source_en/api/copy_to.rst @@ -5,6 +5,11 @@ bmcv_image_copy_to The interface copies an image to the corresponding memory area of the target image. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/crop.rst b/bmvid/document/bmcv/source_en/api/crop.rst index 0b1c19c..b81bcfc 100644 --- a/bmvid/document/bmcv/source_en/api/crop.rst +++ b/bmvid/document/bmcv/source_en/api/crop.rst @@ -5,6 +5,11 @@ bmcv_image_crop The interface can crop out several small images from an original image. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/csc_convert_to.rst b/bmvid/document/bmcv/source_en/api/csc_convert_to.rst index e41b5d8..5b3d865 100755 --- a/bmvid/document/bmcv/source_en/api/csc_convert_to.rst +++ b/bmvid/document/bmcv/source_en/api/csc_convert_to.rst @@ -18,6 +18,10 @@ bmcv_image_csc_convert_to csc_matrix_t* matrix = NULL, bmcv_convert_to_attr* convert_to_attr); +**Processor model support** + +This interface supports BM1684/BM1684X. + **Description of incoming parameters:** diff --git a/bmvid/document/bmcv/source_en/api/dct.rst b/bmvid/document/bmcv/source_en/api/dct.rst index 806246e..1fa642c 100644 --- a/bmvid/document/bmcv/source_en/api/dct.rst +++ b/bmvid/document/bmcv/source_en/api/dct.rst @@ -13,6 +13,9 @@ The format of the interface is as follows: bm_image output, bool is_inversed); +**Processor model support** + +This interface only supports BM1684. **Description of input parameters:** diff --git a/bmvid/document/bmcv/source_en/api/debug_savedata.rst b/bmvid/document/bmcv/source_en/api/debug_savedata.rst index 24cbfd2..0f11701 100644 --- a/bmvid/document/bmcv/source_en/api/debug_savedata.rst +++ b/bmvid/document/bmcv/source_en/api/debug_savedata.rst @@ -3,6 +3,10 @@ bmcv_debug_savedata This interface is used to input bm_image object to the internally defined binary file for debugging. The binary file format and parsing method are given in the example code. +**Processor model support** + +This interface supports BM1684/BM1684X. + **Interface form:** diff --git a/bmvid/document/bmcv/source_en/api/distance.rst b/bmvid/document/bmcv/source_en/api/distance.rst index 1eacdd2..a45c5f0 100644 --- a/bmvid/document/bmcv/source_en/api/distance.rst +++ b/bmvid/document/bmcv/source_en/api/distance.rst @@ -16,6 +16,10 @@ The format of the interface is as follows: const float *pnt, int len); +**Processor model support** + +This interface supports BM1684/BM1684X. + **Input parameter description:** diff --git a/bmvid/document/bmcv/source_en/api/draw_lines.rst b/bmvid/document/bmcv/source_en/api/draw_lines.rst index b98fe25..533e46e 100644 --- a/bmvid/document/bmcv/source_en/api/draw_lines.rst +++ b/bmvid/document/bmcv/source_en/api/draw_lines.rst @@ -4,6 +4,11 @@ bmcv_image_draw_lines The function of drawing polygons can be implemented by drawing one or more lines on an image, it also can specify the color and width of lines. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/draw_point.rst b/bmvid/document/bmcv/source_en/api/draw_point.rst index 82ec4ea..42bbde7 100644 --- a/bmvid/document/bmcv/source_en/api/draw_point.rst +++ b/bmvid/document/bmcv/source_en/api/draw_point.rst @@ -1,7 +1,14 @@ bmcv_image_draw_point ========================= + This interface is used to fill one or more points on an image。 + +**Processor model support** + +This interface only supports BM1684X. + + **接口形式:** .. code-block:: c @@ -76,9 +83,7 @@ This interface is used to fill one or more points on an image。 **注意事项:** -1. this interface does not support bm1684. - -2. bm1684x supports the following formats of bm_image: +1. bm1684x supports the following formats of bm_image: +-----+-------------------------------+ | num | input image_format | diff --git a/bmvid/document/bmcv/source_en/api/draw_rectangle.rst b/bmvid/document/bmcv/source_en/api/draw_rectangle.rst index 0ad3e00..fbf05fd 100644 --- a/bmvid/document/bmcv/source_en/api/draw_rectangle.rst +++ b/bmvid/document/bmcv/source_en/api/draw_rectangle.rst @@ -3,6 +3,12 @@ bmcv_image_draw_rectangle This interface is used to draw one or more rectangular boxes on the image. + +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/faiss_indexflatIP.rst b/bmvid/document/bmcv/source_en/api/faiss_indexflatIP.rst index 7a5d539..721242f 100644 --- a/bmvid/document/bmcv/source_en/api/faiss_indexflatIP.rst +++ b/bmvid/document/bmcv/source_en/api/faiss_indexflatIP.rst @@ -3,6 +3,12 @@ bmcv_faiss_indexflatIP This interface is used to calculate inner product distance between query vectors and database vectors, output the top K (sort_cnt) IP-values and the corresponding indices, return BM_SUCCESS if succeed. + +**Processor model support** + +This interface only supports BM1684X. + + **Interface form:** .. code-block:: c++ @@ -95,7 +101,7 @@ This interface is used to calculate inner product distance between query vectors 4. The larger the inner product values of the query vector and the database vector, the higher the similarity of the two vectors. Therefore, the inner product values are sorted in descending order in the process of TopK. -5. The interface is used for Faiss::IndexFlatIP.search() and implemented on BM1684X. According to the continuous memory of TPU on BM1684X, we can query about 512 inputs of 256 dimensions at a time on a single chip if the database is about 100W. +5. The interface is used for Faiss::IndexFlatIP.search() and implemented on BM1684X. According to the continuous memory of Tensor Computing Processor on BM1684X, we can query about 512 inputs of 256 dimensions at a time on a single processor if the database is about 100W. **Sample code** diff --git a/bmvid/document/bmcv/source_en/api/faiss_indexflatL2.rst b/bmvid/document/bmcv/source_en/api/faiss_indexflatL2.rst index ac9d557..3d8b8cf 100644 --- a/bmvid/document/bmcv/source_en/api/faiss_indexflatL2.rst +++ b/bmvid/document/bmcv/source_en/api/faiss_indexflatL2.rst @@ -3,6 +3,12 @@ bmcv_faiss_indexflatL2 This interface is used to calculate squared L2 distance between query vectors and database vectors, output the top K (sort_cnt) L2sqr-values and the corresponding indices, return BM_SUCCESS if succeed. + +**Processor model support** + +This interface only supports BM1684X. + + **Interface form:** .. code-block:: c++ @@ -101,13 +107,13 @@ This interface is used to calculate squared L2 distance between query vectors an 2. The data type of the output sorted similarity result is float, and that of the corresponding indices is int. -3. The assumption is that the norm_L2sqr values of the input data and the database data have been computed ahead of time and stored on the chip. +3. The assumption is that the norm_L2sqr values of the input data and the database data have been computed ahead of time and stored on the processor. 4. Usually, the data in the database is arranged in the memory as database_vecs_num * vec_dims. Therefore, the is_transpose needs to be set to 1. 5. The smaller the squared L2 values of the query vector and the database vector, the higher the similarity of the two vectors. Therefore, the squared L2 values are sorted in ascending order in the process of TopK. -6. The interface is used for Faiss::IndexFlatL2.search() and implemented on BM1684X. According to the continuous memory of TPU on BM1684X, we can query about 512 inputs of 256 dimensions at a time on a single chip if the database is about 100W. +6. The interface is used for Faiss::IndexFlatL2.search() and implemented on BM1684X. According to the continuous memory of Tensor Computing Processor on BM1684X, we can query about 512 inputs of 256 dimensions at a time on a single processor if the database is about 100W. 7. the value of database_vecs_num and sort_cnt needs to meet the condition: database_vecs_num > sort_cnt. diff --git a/bmvid/document/bmcv/source_en/api/feature_match_fix8b.rst b/bmvid/document/bmcv/source_en/api/feature_match_fix8b.rst index 4d95dfd..039c99d 100644 --- a/bmvid/document/bmcv/source_en/api/feature_match_fix8b.rst +++ b/bmvid/document/bmcv/source_en/api/feature_match_fix8b.rst @@ -3,6 +3,12 @@ bmcv_feature_match The interface is used to compare the feature points obtained from the network (int8 format) with the feature points in the database (int8 format),and output the best matching top-k. + +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/fft.rst b/bmvid/document/bmcv/source_en/api/fft.rst index 931c123..de4c646 100644 --- a/bmvid/document/bmcv/source_en/api/fft.rst +++ b/bmvid/document/bmcv/source_en/api/fft.rst @@ -19,6 +19,11 @@ For one-dimensional FFT, multi-batch operation is supported. The interface form bool forward, void *&plan); +**Processor model support** + +This interface only supports BM1684. + + **Input parameter description:** * bm_handle_t handle diff --git a/bmvid/document/bmcv/source_en/api/fill_rectangle.rst b/bmvid/document/bmcv/source_en/api/fill_rectangle.rst index 4a16565..b862bdd 100644 --- a/bmvid/document/bmcv/source_en/api/fill_rectangle.rst +++ b/bmvid/document/bmcv/source_en/api/fill_rectangle.rst @@ -3,6 +3,12 @@ bmcv_image_fill_rectangle This interface is used to fill one or more rectangles on the image. + +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/gaussian_blur.rst b/bmvid/document/bmcv/source_en/api/gaussian_blur.rst index 1ebc421..0548194 100644 --- a/bmvid/document/bmcv/source_en/api/gaussian_blur.rst +++ b/bmvid/document/bmcv/source_en/api/gaussian_blur.rst @@ -3,6 +3,10 @@ bmcv_image_gaussian_blur Gaussian blur of the image. +**Processor model support** + +This interface supports BM1684/BM1684X. + **Interface form:** @@ -77,13 +81,6 @@ The interface currently supports the following image_format: +-----+------------------------+------------------------+ | 7 | FORMAT_GRAY | FORMAT_GRAY | +-----+------------------------+------------------------+ -| 8 | FORMAT_YUV420P | FORMAT_YUV420P | -+-----+------------------------+------------------------+ -| 9 | FORMAT_YUV422P | FORMAT_YUV422P | -+-----+------------------------+------------------------+ -| 10 | FORMAT_YUV444P | FORMAT_YUV444P | -+-----+------------------------+------------------------+ - The interface currently supports the following data_type: @@ -100,9 +97,9 @@ The interface currently supports the following data_type: 2. The data_type and image_format of input and must be the same. -3. The currently supported maximum image width is (2048 - kw). - +3. The maximum width of the image supported by BM1684 is (2048 - kw), the maximum width supported by BM1684X is 4096, and the maximum height is 8192. +4. The maximum convolution kernel width and height supported by BM1684 is 31, and the maximum convolution kernel width and height supported by BM1684X is 3. **Code example:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/gemm.rst b/bmvid/document/bmcv/source_en/api/gemm.rst index 5290268..bac822e 100644 --- a/bmvid/document/bmcv/source_en/api/gemm.rst +++ b/bmvid/document/bmcv/source_en/api/gemm.rst @@ -28,6 +28,10 @@ The format of the interface is as follows: bm_device_mem_t C, int ldc); +**Processor model support** + +This interface supports BM1684/BM1684X. + **Input parameter description:** diff --git a/bmvid/document/bmcv/source_en/api/gemm_ext.rst b/bmvid/document/bmcv/source_en/api/gemm_ext.rst index b8d53c6..a547ca7 100644 --- a/bmvid/document/bmcv/source_en/api/gemm_ext.rst +++ b/bmvid/document/bmcv/source_en/api/gemm_ext.rst @@ -28,6 +28,10 @@ The format of the interface is as follows: bm_image_data_format_ext input_dtype, bm_image_data_format_ext output_dtype); +**Processor model support** + +This interface only supports BM1684X. + **Input parameter description:** @@ -95,11 +99,9 @@ The format of the interface is as follows: **Note** -1. This interface only supports BM1684X. - -2. In the case of FP16 input and A matrix transpose, M only supports values less than or equal to 64. +1. In the case of FP16 input and A matrix transpose, M only supports values less than or equal to 64. -3. This interface does not support FP32 input and FP16 output. +2. This interface does not support FP32 input and FP16 output. **Sample code** diff --git a/bmvid/document/bmcv/source_en/api/hm_distance.rst b/bmvid/document/bmcv/source_en/api/hm_distance.rst index 840c151..7549315 100644 --- a/bmvid/document/bmcv/source_en/api/hm_distance.rst +++ b/bmvid/document/bmcv/source_en/api/hm_distance.rst @@ -4,6 +4,11 @@ bmcv_hm_distance Calculates the Hamming distance of each element in two vectors. +**Processor model support** + +This interface only supports BM1684X. + + **Interface form:** .. code-block:: c @@ -54,10 +59,6 @@ Calculates the Hamming distance of each element in two vectors. * Other: failed -**Note:** - -This interface only supports BM1684X. - **Code example:** diff --git a/bmvid/document/bmcv/source_en/api/jpeg_decode.rst b/bmvid/document/bmcv/source_en/api/jpeg_decode.rst index 51b6f9f..7318d50 100644 --- a/bmvid/document/bmcv/source_en/api/jpeg_decode.rst +++ b/bmvid/document/bmcv/source_en/api/jpeg_decode.rst @@ -3,6 +3,12 @@ bmcv_image_jpeg_dec The interface can decode multiple JPEG images. + +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/jpeg_encode.rst b/bmvid/document/bmcv/source_en/api/jpeg_encode.rst index 9920f0e..48704d0 100644 --- a/bmvid/document/bmcv/source_en/api/jpeg_encode.rst +++ b/bmvid/document/bmcv/source_en/api/jpeg_encode.rst @@ -3,6 +3,12 @@ bmcv_image_jpeg_enc This API can be used for JPEG encoding of multiple bm_image. + +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/laplacian.rst b/bmvid/document/bmcv/source_en/api/laplacian.rst index d3f1c56..269f6f7 100644 --- a/bmvid/document/bmcv/source_en/api/laplacian.rst +++ b/bmvid/document/bmcv/source_en/api/laplacian.rst @@ -4,6 +4,11 @@ bmcv_image_laplacian Laplacian operator of gradient calculation. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/lkpyramid.rst b/bmvid/document/bmcv/source_en/api/lkpyramid.rst index cdcc9db..604a664 100644 --- a/bmvid/document/bmcv/source_en/api/lkpyramid.rst +++ b/bmvid/document/bmcv/source_en/api/lkpyramid.rst @@ -1,7 +1,7 @@ bmcv_image_lkpyramid ==================== -LK pyramid optical flow algorithm. The complete call flow include creation, execution and destruction. The first half of the algorithm uses TPU, and the second half uses CPU for serial operation. Therefore, for PCIe mode, it is recommended to enable CPU to accelerate. Please refer to Chapter 5 for specific steps. +LK pyramid optical flow algorithm. The complete call flow include creation, execution and destruction. The first half of the algorithm uses Tensor Computing Processor, and the second half uses Processor for serial operation. Therefore, for PCIe mode, it is recommended to enable Processor to accelerate. Please refer to Chapter 5 for specific steps. Create ______ @@ -19,6 +19,12 @@ The internal implementation of the algorithm requires some cache space. Therefor int winH = 21, int maxLevel = 3); + +**Processor model support** + +This interface only supports BM1684. + + **Input parameter description:** * bm_handle_t handle @@ -174,7 +180,7 @@ ___________ } ret = bmcv_open_cpu_process(handle); if (ret != BM_SUCCESS) { - printf("BMCV enable CPU failed. ret = %d\n", ret); + printf("BMCV enable Processor failed. ret = %d\n", ret); bm_dev_free(handle); return -1; } @@ -211,7 +217,7 @@ ___________ bm_image_destroy(nextImg); ret = bmcv_close_cpu_process(handle); if (ret != BM_SUCCESS) { - printf("BMCV disable CPU failed. ret = %d\n", ret); + printf("BMCV disable Processor failed. ret = %d\n", ret); bm_dev_free(handle); return -1; } diff --git a/bmvid/document/bmcv/source_en/api/matmul.rst b/bmvid/document/bmcv/source_en/api/matmul.rst index c756d94..859e6ae 100644 --- a/bmvid/document/bmcv/source_en/api/matmul.rst +++ b/bmvid/document/bmcv/source_en/api/matmul.rst @@ -49,6 +49,10 @@ The format of the interface is as follows: float beta = 0); +**Processor model support** + +This interface supports BM1684/BM1684X. + **输入参数说明:** diff --git a/bmvid/document/bmcv/source_en/api/min_max.rst b/bmvid/document/bmcv/source_en/api/min_max.rst index 4612f60..b92c374 100644 --- a/bmvid/document/bmcv/source_en/api/min_max.rst +++ b/bmvid/document/bmcv/source_en/api/min_max.rst @@ -15,6 +15,11 @@ The format of the interface is as follows: int len); +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Input parameter description:** * bm_handle_t handle diff --git a/bmvid/document/bmcv/source_en/api/morph.rst b/bmvid/document/bmcv/source_en/api/morph.rst index 356bbaa..7e9efad 100644 --- a/bmvid/document/bmcv/source_en/api/morph.rst +++ b/bmvid/document/bmcv/source_en/api/morph.rst @@ -14,6 +14,11 @@ Users can use the following interface to obtain the Device Memory of Kernel duri The function passes in the size and shape of the required Kernel and returns the corresponding Device Memory to the subsequent morphological operation interface. In the end, users need to manually free the space. +**Processor model support** + +This interface only supports BM1684. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/mosaic.rst b/bmvid/document/bmcv/source_en/api/mosaic.rst index 9b9d206..78c43c8 100644 --- a/bmvid/document/bmcv/source_en/api/mosaic.rst +++ b/bmvid/document/bmcv/source_en/api/mosaic.rst @@ -13,6 +13,11 @@ This interface is used to print one or more mosaics on the image. int is_expand) +**Processor model support** + +This interface only supports BM1684X. + + **Description of incoming parameters:** * bm_handle_t handle @@ -67,9 +72,7 @@ This interface is used to print one or more mosaics on the image. **Note:** -1. bm1684x: - -- bm1684x supports the following data_type of bm_image: +1.bm1684x supports the following data_type of bm_image: +-----+-------------------------------+ | num | data_type | @@ -107,12 +110,10 @@ This interface is used to print one or more mosaics on the image. Returns a failure if the input and output format requirements are not met. -2. bm1684: bm1684 mosaic function is not supported。 - -3. All input and output bm_image structures must be created in advance, or a failure will be returned. +2. All input and output bm_image structures must be created in advance, or a failure will be returned. -4. If the width and height of the mosaic are not aligned with 8, it will automatically align up to 8. If it is in the edge area, the 8 alignment will extend toward the non edge direction. +3. If the width and height of the mosaic are not aligned with 8, it will automatically align up to 8. If it is in the edge area, the 8 alignment will extend toward the non edge direction. -5. If the mosaic area exceeds the width and height of the original drawing, the exceeding part will be automatically pasted to the edge of the original drawing. +4. If the mosaic area exceeds the width and height of the original drawing, the exceeding part will be automatically pasted to the edge of the original drawing. -6. Only mosaic sizes above 8x8 are supported. +5. Only mosaic sizes above 8x8 are supported. diff --git a/bmvid/document/bmcv/source_en/api/nms.rst b/bmvid/document/bmcv/source_en/api/nms.rst index 42294b8..81368f9 100644 --- a/bmvid/document/bmcv/source_en/api/nms.rst +++ b/bmvid/document/bmcv/source_en/api/nms.rst @@ -4,6 +4,11 @@ bmcv_nms The interface is used to eliminate excessive object frames obtained by network calculation and find the best object frame. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/nms_ext.rst b/bmvid/document/bmcv/source_en/api/nms_ext.rst index d535621..9135d87 100644 --- a/bmvid/document/bmcv/source_en/api/nms_ext.rst +++ b/bmvid/document/bmcv/source_en/api/nms_ext.rst @@ -4,6 +4,11 @@ bmcv_nms_ext This interface is the generalized form of bmcv_nms.It supports Hard_NMS/Soft_NMS/Adaptive_NMS/SSD_NMS which is used to eliminate excessive object frames obtained by network calculation and find the best object frame. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/put_text.rst b/bmvid/document/bmcv/source_en/api/put_text.rst index 5f45ab9..54d9822 100644 --- a/bmvid/document/bmcv/source_en/api/put_text.rst +++ b/bmvid/document/bmcv/source_en/api/put_text.rst @@ -4,6 +4,11 @@ bmcv_image_put_text The functions of writing (English) on an image and specifying the color, size and width of words are supported. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/pyramid.rst b/bmvid/document/bmcv/source_en/api/pyramid.rst index a9d6ab2..63dbba5 100644 --- a/bmvid/document/bmcv/source_en/api/pyramid.rst +++ b/bmvid/document/bmcv/source_en/api/pyramid.rst @@ -3,6 +3,12 @@ bmcv_image_pyramid_down This interface implements downsampling in image gaussian pyramid operations. + +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c @@ -83,7 +89,7 @@ The interface currently supports the following image_format and data_type: gettimeofday_(&t1); bmcv_image_pyramid_down(handle, img_i, img_o); gettimeofday_(&t2); - cout << "pyramid down TPU using time: " << ((t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec) << "us" << endl; + cout << "pyramid down Tensor Computing Processor using time: " << ((t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec) << "us" << endl; bm_image_copy_device_to_host(img_o, (void **)(&output)); bm_image_destroy(img_i); diff --git a/bmvid/document/bmcv/source_en/api/quantify.rst b/bmvid/document/bmcv/source_en/api/quantify.rst new file mode 100644 index 0000000..4aad08c --- /dev/null +++ b/bmvid/document/bmcv/source_en/api/quantify.rst @@ -0,0 +1,148 @@ +bmcv_image_quantify +==================== + +Convert float type data into int type (the rounding mode is truncation directly after the decimal point), and change the number less than 0 to 0, and the number greater than 255 to 255. + + +**Processor model support** + +This interface only support BM1684X. + + +**Interface form:** + + .. code-block:: c + + bm_status_t bmcv_image_quantify( + bm_handle_t handle, + bm_image input, + bm_image output); + + +**Description of parameters:** + +* bm_handle_t handle + + Input parameters. bm_handle handle. + +* bm_image input + + Input parameter. The bm_image of the input image. The creation of bm_image requires an external call to bmcv_image_create. The image memory can use bm_image_alloc_dev_mem or bm_image_copy_host_to_device to create new memory, or use bmcv_image_attach to attach existing memory. + +* bm_image output + + Output parameter. The bm_image of the output image. The creation of bm_image requires an external call to bmcv_image_create. The image memory can use bm_image_alloc_dev_mem or bm_image_copy_host_to_device to create new memory, or use bmcv_image_attach to attach existing memory. + + +**Return value description:** + +* BM_SUCCESS: success + +* Other: failed + + +**Format support:** + +The interface currently supports the following image_format and data_type: + ++-----+------------------------+------------------------+ +| num | input image_format | output image_format | ++=====+========================+========================+ +| 1 | FORMAT_RGB_PLANAR | FORMAT_RGB_PLANAR | ++-----+------------------------+------------------------+ +| 2 | FORMAT_BGR_PLANAR | FORMAT_BGR_PLANAR | ++-----+------------------------+------------------------+ + + +Input data currently supports the following data_types: + ++-----+--------------------------------+ +| num | data_type | ++=====+================================+ +| 1 | DATA_TYPE_EXT_FLOAT32 | ++-----+--------------------------------+ + +Output data currently supports the following data_types: + ++-----+--------------------------------+ +| num | data_type | ++=====+================================+ +| 1 | DATA_TYPE_EXT_1N_BYTE | ++-----+--------------------------------+ + + +**Note:** + +1. Before calling this interface, you must ensure that the input image memory has been allocated. + +2. If the program calling this interface is a multi-threaded program, thread locks need to be added before creating bm_image and after destroying bm_image. + +3. This interface supports image width and height ranging from 1x1 to 8192x8192. + +**Code example:** + + .. code-block:: c + + //pthread_mutex_t lock; + static void read_bin(const char *input_path, float *input_data, int width, int height) { + FILE *fp_src = fopen(input_path, "rb"); + if (fp_src == NULL) + { + printf("Unable to open output file %s\n", input_path); + return; + } + if(fread(input_data, sizeof(float), width * height, fp_src) != 0) + printf("read image success\n"); + fclose(fp_src); + } + + static int quantify_tpu(float* input, unsigned char* output, int height, int width, bm_handle_t handle) { + bm_image input_img; + bm_image output_img; + //pthread_mutex_lock(&lock); + bm_image_create(handle, height, width, (bm_image_format_ext)FORMAT_RGB_PLANAR, DATA_TYPE_EXT_FLOAT32, &input_img, NULL); + bm_image_create(handle, height, width, (bm_image_format_ext)FORMAT_RGB_PLANAR, DATA_TYPE_EXT_1N_BYTE, &output_img, NULL); + bm_image_alloc_dev_mem(input_img, 1); + bm_image_alloc_dev_mem(output_img, 1); + float* in_ptr[1] = {input}; + bm_image_copy_host_to_device(input_img, (void **)in_ptr); + bmcv_image_quantify(handle, input_img, output_img); + unsigned char* out_ptr[1] = {output}; + bm_image_copy_device_to_host(output_img, (void **)out_ptr); + bm_image_destroy(input_img); + bm_image_destroy(output_img); + //pthread_mutex_unlock(&lock); + return 0; + } + + int main(int argc, char* args[]) { + int width = 1920; + int height = 1080; + int dev_id = 0; + char *input_path = NULL; + char *output_path = NULL; + + bm_handle_t handle; + bm_status_t ret = bm_dev_request(&handle, 0); + if (ret != BM_SUCCESS) { + printf("Create bm handle failed. ret = %d\n", ret); + return -1; + } + + if (argc > 1) width = atoi(args[1]); + if (argc > 2) height = atoi(args[2]); + if (argc > 3) input_path = args[3]; + if (argc > 4) output_path = args[4]; + + float* input_data = (float*)malloc(width * height * 3 * sizeof(float)); + unsigned char* output_tpu = (unsigned char*)malloc(width * height * 3 * sizeof(unsigned char)); + + read_bin(input_path, input_data, width, height); + + int ret = quantify_tpu(input_data, output_tpu, height, width, handle); + + free(input_data); + free(output_tpu); + bm_dev_free(handle); + return ret; + diff --git a/bmvid/document/bmcv/source_en/api/resize.rst b/bmvid/document/bmcv/source_en/api/resize.rst index 2d4de7c..165dd57 100644 --- a/bmvid/document/bmcv/source_en/api/resize.rst +++ b/bmvid/document/bmcv/source_en/api/resize.rst @@ -1,9 +1,14 @@ bmcv_image_resize ================= - The interface is used to change image size, such as zoom in, zoom out, matting and other functions. + +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/sobel.rst b/bmvid/document/bmcv/source_en/api/sobel.rst index 782db55..66790b2 100644 --- a/bmvid/document/bmcv/source_en/api/sobel.rst +++ b/bmvid/document/bmcv/source_en/api/sobel.rst @@ -4,6 +4,11 @@ bmcv_image_sobel Sobel operator for edge detection. +**Processor model support** + +This interface only supports BM1684. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/sort.rst b/bmvid/document/bmcv/source_en/api/sort.rst index 216508e..6b50bf5 100644 --- a/bmvid/document/bmcv/source_en/api/sort.rst +++ b/bmvid/document/bmcv/source_en/api/sort.rst @@ -3,6 +3,12 @@ bmcv_sort This interface can sort floating-point data (ascending/descending), and support the index corresponding to the original data after sorting. + +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/storage_convert.rst b/bmvid/document/bmcv/source_en/api/storage_convert.rst index b07ec0b..8df6760 100644 --- a/bmvid/document/bmcv/source_en/api/storage_convert.rst +++ b/bmvid/document/bmcv/source_en/api/storage_convert.rst @@ -4,6 +4,11 @@ bmcv_image_storage_convert The interface converts the data corresponding to the source image format into the format data of the target image and fills it in the device memory associated with the target image. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/threshold.rst b/bmvid/document/bmcv/source_en/api/threshold.rst index ea9e199..99f35ad 100644 --- a/bmvid/document/bmcv/source_en/api/threshold.rst +++ b/bmvid/document/bmcv/source_en/api/threshold.rst @@ -3,6 +3,12 @@ bmcv_image_threshold Image thresholding operation. + +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/transpose.rst b/bmvid/document/bmcv/source_en/api/transpose.rst index 0c36d44..ad40623 100644 --- a/bmvid/document/bmcv/source_en/api/transpose.rst +++ b/bmvid/document/bmcv/source_en/api/transpose.rst @@ -3,6 +3,12 @@ bmcv_image_transpose The interface can transpose image width and height. + +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form::** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/vpp_basic.rst b/bmvid/document/bmcv/source_en/api/vpp_basic.rst index 29390aa..4aee0c6 100644 --- a/bmvid/document/bmcv/source_en/api/vpp_basic.rst +++ b/bmvid/document/bmcv/source_en/api/vpp_basic.rst @@ -1,7 +1,7 @@ bmcv_image_vpp_basic ========================= - There is a special video post-processing module VPP on BM1684 and BM1684X. Under certain conditions, it can do the functions of clip, color-space-convert, resize and padding at one time, faster than TPU. + There is a special video post-processing module VPP on BM1684 and BM1684X. Under certain conditions, it can do the functions of clip, color-space-convert, resize and padding at one time, faster than Tensor Computing Processor. The API can combine crop, color-space-convert, resize, padding and any number of functions for multiple images. .. code-block:: c @@ -19,6 +19,11 @@ bmcv_image_vpp_basic csc_matrix_t* matrix = NULL); +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Description of incoming parameters:** * bm_handle_t handle diff --git a/bmvid/document/bmcv/source_en/api/vpp_convert.rst b/bmvid/document/bmcv/source_en/api/vpp_convert.rst index 53925a0..cddc128 100644 --- a/bmvid/document/bmcv/source_en/api/vpp_convert.rst +++ b/bmvid/document/bmcv/source_en/api/vpp_convert.rst @@ -15,6 +15,11 @@ The API converts the input image format into the output image format, and suppor ); +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Description of incoming parameters:** * bm_handle_t handle diff --git a/bmvid/document/bmcv/source_en/api/vpp_convert_padding.rst b/bmvid/document/bmcv/source_en/api/vpp_convert_padding.rst index f2133e6..e5b82f0 100644 --- a/bmvid/document/bmcv/source_en/api/vpp_convert_padding.rst +++ b/bmvid/document/bmcv/source_en/api/vpp_convert_padding.rst @@ -15,6 +15,12 @@ bmcv_image_vpp_convert_padding bmcv_rect_t * crop_rect = NULL, bmcv_resize_algorithm algorithm = BMCV_INTER_LINEAR); + +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Description of incoming parameters:** * bm_handle_t handle diff --git a/bmvid/document/bmcv/source_en/api/vpp_csc_matrix_convert.rst b/bmvid/document/bmcv/source_en/api/vpp_csc_matrix_convert.rst index e35e83e..eda85a0 100644 --- a/bmvid/document/bmcv/source_en/api/vpp_csc_matrix_convert.rst +++ b/bmvid/document/bmcv/source_en/api/vpp_csc_matrix_convert.rst @@ -14,6 +14,12 @@ bmcv_image_vpp_csc_matrix_convert csc_matrix_t * matrix = nullptr, bmcv_resize_algorithm algorithm = BMCV_INTER_LINEAR); + +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Description of incoming parameters:** * bm_handle_t handle diff --git a/bmvid/document/bmcv/source_en/api/vpp_stitch.rst b/bmvid/document/bmcv/source_en/api/vpp_stitch.rst index f58c93e..3738927 100644 --- a/bmvid/document/bmcv/source_en/api/vpp_stitch.rst +++ b/bmvid/document/bmcv/source_en/api/vpp_stitch.rst @@ -15,6 +15,12 @@ Use the crop function of vpp hardware to complete image stitching. The src crop bmcv_rect_t* src_crop_rect = NULL, bmcv_resize_algorithm algorithm = BMCV_INTER_LINEAR); + +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Description of incoming parameters:** * bm_handle_t handle diff --git a/bmvid/document/bmcv/source_en/api/warp_affine.rst b/bmvid/document/bmcv/source_en/api/warp_affine.rst index c5b6209..cb35036 100644 --- a/bmvid/document/bmcv/source_en/api/warp_affine.rst +++ b/bmvid/document/bmcv/source_en/api/warp_affine.rst @@ -40,6 +40,11 @@ bmcv_affine_image_matrix defines that there are several transformation matrices } bmcv_affine_image_matrix; +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form 1:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/warp_perspective.rst b/bmvid/document/bmcv/source_en/api/warp_perspective.rst index 0234733..7adf83a 100644 --- a/bmvid/document/bmcv/source_en/api/warp_perspective.rst +++ b/bmvid/document/bmcv/source_en/api/warp_perspective.rst @@ -45,6 +45,10 @@ In order to complete the transmission transformation more conveniently, the libr :align: center +**Processor model support** + +This interface supports BM1684/BM1684X. + **Interface form 1:** diff --git a/bmvid/document/bmcv/source_en/api/watermask_superpose.rst b/bmvid/document/bmcv/source_en/api/watermask_superpose.rst index 434d2bb..df3b7d5 100644 --- a/bmvid/document/bmcv/source_en/api/watermask_superpose.rst +++ b/bmvid/document/bmcv/source_en/api/watermask_superpose.rst @@ -1,7 +1,14 @@ bmcv_image_watermark_superpose ========================= + This interface is used to overlay one or more watermarks on the image. + +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form 1:** .. code-block:: c @@ -149,10 +156,8 @@ This interface is used to overlay one or more watermarks on the image. Returns a failure if the input and output format requirements are not met. -2. bm1684: bm1684 Watermark function is not supported。 - -3. All input and output bm_image structures must be created in advance, or a failure will be returned. +2. All input and output bm_image structures must be created in advance, or a failure will be returned. -4. The maximum number of watermarks can be 512. +3. The maximum number of watermarks can be 512. -5. If the watermark area exceeds the width and height of the original image, a failure will be returned. +4. If the watermark area exceeds the width and height of the original image, a failure will be returned. diff --git a/bmvid/document/bmcv/source_en/api/yolo_nms.rst b/bmvid/document/bmcv/source_en/api/yolo_nms.rst index c4cb177..dc8b49c 100644 --- a/bmvid/document/bmcv/source_en/api/yolo_nms.rst +++ b/bmvid/document/bmcv/source_en/api/yolo_nms.rst @@ -4,6 +4,11 @@ bmcv_nms_yolo This interface supports yolov3/yolov7, which is used to eliminate too many object boxes obtained by network calculation and find the best object box. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/yuv2bgr.rst b/bmvid/document/bmcv/source_en/api/yuv2bgr.rst index be3a8b2..12e928c 100644 --- a/bmvid/document/bmcv/source_en/api/yuv2bgr.rst +++ b/bmvid/document/bmcv/source_en/api/yuv2bgr.rst @@ -3,6 +3,12 @@ bmcv_image_yuv2bgr_ext This interface convert YUV format to RGB format. + +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/api/yuv2hsv.rst b/bmvid/document/bmcv/source_en/api/yuv2hsv.rst index 1488c4a..84bab3c 100644 --- a/bmvid/document/bmcv/source_en/api/yuv2hsv.rst +++ b/bmvid/document/bmcv/source_en/api/yuv2hsv.rst @@ -4,6 +4,11 @@ bmcv_image_yuv2hsv Convert the specified area of YUV image to HSV format. +**Processor model support** + +This interface supports BM1684/BM1684X. + + **Interface form:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_en/bm_image/bm_image_write_to_bmp.rst b/bmvid/document/bmcv/source_en/bm_image/bm_image_write_to_bmp.rst new file mode 100644 index 0000000..d819e7f --- /dev/null +++ b/bmvid/document/bmcv/source_en/bm_image/bm_image_write_to_bmp.rst @@ -0,0 +1,34 @@ +bm_image_write_to_bmp +===================== + +This interface is used to output bm_image objects as bitmaps (.bmp). + + +**Interface form:** + + .. code-block:: c + + bm_status_t bm_image_write_to_bmp( + bm_image input, + const char* filename); + +**Parameter description:** + +* bm_image input + +Input parameter. Input bm_image. + +* const char\* filename + +Input parameter. The path and file name of the saved bitmap file. + + +**Return value description:** + +* BM_SUCCESS: success + +* Others: failed + +**Note:** + +1. Before calling bm_image_write_to_bmp(), you must ensure that the input image has been created correctly and is_attached, otherwise the function will return failure. diff --git a/bmvid/document/bmcv/source_en/bm_image/bmcv_calc_cbcr_addr.rst b/bmvid/document/bmcv/source_en/bm_image/bmcv_calc_cbcr_addr.rst new file mode 100644 index 0000000..9a6920c --- /dev/null +++ b/bmvid/document/bmcv/source_en/bm_image/bmcv_calc_cbcr_addr.rst @@ -0,0 +1,63 @@ +bmcv_calc_cbcr_addr +=================== + +Video decoding (Vdec) outputs compressed format data, and the physical address of the Y compressed data, +the stride of the Y channel data, as well as the original image's height can be used to calculate the physical address of the CbCr compressed data. +This interface is primarily used to match the internal decoder's compressed format. +For usage, please refer to the example provided. + +**Interface form:** + + .. code-block:: c + + unsigned long long bmcv_calc_cbcr_addr( + unsigned long long y_addr, + unsigned int y_stride, + unsigned int frame_height); + +**Parameter description:** + +* unsigned long long y_addr + + Input parameter. The physical address of the Y compressed data. + +* unsigned int y_stride + + Input parameter. The stride of the Y compressed data. + +* unsigned int frame_height + + Input parameter. The height of the Y compressed data. + + +**Return value description:** + +The return value is the physical address of the CbCr compressed data. + +**Code example:** + + .. code-block:: c + + + bm_image src; + unsigned long long cbcr_addr; + bm_image_create(bm_handle, + pFrame->height, + pFrame->width, + FORMAT_COMPRESSED, + DATA_TYPE_EXT_1N_BYTE, + &src, + NULL); + bm_device_mem_t input_addr[4]; + int size = pFrame->height * pFrame->stride[4]; + input_addr[0] = bm_mem_from_device((unsigned long long)pFrame->buf[6], size); + size = (pFrame->height / 2) * pFrame->stride[5]; + input_addr[1] = bm_mem_from_device((unsigned long long)pFrame->buf[4], size); + size = pFrame->stride[6]; + input_addr[2] = bm_mem_from_device((unsigned long long)pFrame->buf[7], size); + size = pFrame->stride[7]; + cbcr_addr = bmcv_calc_cbcr_addr((unsigned long long)pFrame->buf[4], pFrame->stride[5], pFrame->height); + input_addr[3] = bm_mem_from_device(cbcr_addr, 0); + bm_image_attach(src, input_addr); + + diff --git a/bmvid/document/bmcv/source_en/bmcv.rst b/bmvid/document/bmcv/source_en/bmcv.rst index b5ef238..7296642 100644 --- a/bmvid/document/bmcv/source_en/bmcv.rst +++ b/bmvid/document/bmcv/source_en/bmcv.rst @@ -1,4 +1,4 @@ Introduction to BMCV ========= -BMCV provides a set of optimized machine vision libraries based on Sophon AI chips. Through the TPU and VPP module of the chip, users can complete the operations of color space conversion, scale transformation, affine transformation, perspective transformation, linear transformation, picture frame, JPEG encoding and decoding, BASE64 encoding and decoding, NMS, sequencing, feature matching and so on. +BMCV provides a set of optimized machine vision libraries based on SOPHON Deep learning processors. Through the Tensor Computing Processor and VPP module of the processor, users can complete the operations of color space conversion, scale transformation, affine transformation, perspective transformation, linear transformation, picture frame, JPEG encoding and decoding, BASE64 encoding and decoding, NMS, sequencing, feature matching and so on. diff --git a/bmvid/document/bmcv/source_en/index.rst b/bmvid/document/bmcv/source_en/index.rst index 14ee7f5..2a35c81 100644 --- a/bmvid/document/bmcv/source_en/index.rst +++ b/bmvid/document/bmcv/source_en/index.rst @@ -48,6 +48,8 @@ Introduction to bm_image bm_image/bm_image_get_plane_num bm_image/bm_image_is_attached bm_image/bm_image_get_handle + bm_image/bm_image_write_to_bmp + bm_image/bmcv_calc_cbcr_addr bm_image device memory management @@ -63,6 +65,7 @@ BMCV API :glob: api/api_introduct + api/bmcv_hist_balance api/yuv2bgr api/warp_affine api/warp_perspective @@ -124,6 +127,7 @@ BMCV API api/pyramid api/bayer2rgb api/as_strided + api/quantify PCIe CPU -------- diff --git a/bmvid/document/bmcv/source_en/memory.rst b/bmvid/document/bmcv/source_en/memory.rst index fb08e8b..2661c2f 100644 --- a/bmvid/document/bmcv/source_en/memory.rst +++ b/bmvid/document/bmcv/source_en/memory.rst @@ -34,7 +34,7 @@ Users can call the following API to judge whether the bm_image object has been c 2. The applied memory when bm_image calls bm_image_alloc_dev_mem is automatically managed internally. The memory will be automatically released without the management of the caller when calling bm_image_destroy, bm_image_detach, bm_image_attach and other device memory. Conversely, if the bm_image_attach is connected with a device memory, it means that the memory will be managed by the caller. The memory will not be automatically released when calling bm_image_destroy, bm_image_detach, bm_image_attach or other device memory. It needs to be manually released by the caller. -3. At present, device memory is divided into three memory spaces: heap0, heap1 and heap2. The difference among the three is whether the hardware VPP module of the chip has reading permission. Therefore, if an API needs to be implemented by specifying the hardware VPP module, the input bm_image of the API must be guaranteed to be saved in heap1 or heap2. +3. At present, device memory is divided into three memory spaces: heap0, heap1 and heap2. The difference among the three is whether the hardware VPP module of the processor has reading permission. Therefore, if an API needs to be implemented by specifying the hardware VPP module, the input bm_image of the API must be guaranteed to be saved in heap1 or heap2. +------------------+------------------+------------------+ | heap id | bm1684 VPP | bm1684x VPP | diff --git a/bmvid/document/bmcv/source_en/pcie_cpu.rst b/bmvid/document/bmcv/source_en/pcie_cpu.rst index 9045f7b..06489e7 100644 --- a/bmvid/document/bmcv/source_en/pcie_cpu.rst +++ b/bmvid/document/bmcv/source_en/pcie_cpu.rst @@ -1,11 +1,11 @@ PCIe CPU ========== -For operations that are inconvenient to use TPU acceleration, the cooperation of CPU is required. +For operations that are inconvenient to use Tensor Computing Processor acceleration, the cooperation of Processor is required. -If it is SoC mode, the host side is the on-chip ARM A53 processor, which completes the CPU operation. +If it is SoC mode, the host side is the on-chip ARM A53 processor, which completes the Processor operation. -In case of PCIe mode, the host side is the user’s host, and the CPU operation can be completed at the host side or by using the on-chip ARM A53 processor. The two implementation methods have their own advantages and disadvantages: the former needs to carry input and output data between device and host, but the operation performance may be better than ARM, so users can choose the better method according to their own host processor performance, load and other actual conditions. It is the former by default. If you need to use an on-chip processor, you can turn it on in the following way. +In case of PCIe mode, the host side is the user’s host, and the Processor operation can be completed at the host side or by using the on-chip ARM A53 processor. The two implementation methods have their own advantages and disadvantages: the former needs to carry input and output data between device and host, but the operation performance may be better than ARM, so users can choose the better method according to their own host processor performance, load and other actual conditions. It is the former by default. If you need to use an on-chip processor, you can turn it on in the following way. Preparatory Work @@ -21,11 +21,11 @@ You need to set the path where these two files are located to the environment va $ export BMCV_CPU_KERNEL_PATH=/path/to/kernel_fils/ -All implementations of BMCV that require CPU operations are in the library libbmcv_cpu_func.so, you need to add the path of the file to the environment variable BMCV_CPU_KERNEL_PATH where the program runs, as follows: +All implementations of BMCV that require Processor operations are in the library libbmcv_cpu_func.so, you need to add the path of the file to the environment variable BMCV_CPU_KERNEL_PATH where the program runs, as follows: $ export BMCV_CPU_LIB_PATH=/path/to/lib/ -At present, the APIs that require CPU participation are as follows. If the following APIs are not used, this function can be ignored. +At present, the APIs that require Processor participation are as follows. If the following APIs are not used, this function can be ignored. +-----+-----------------------------------+ | num | API | diff --git a/bmvid/document/bmcv/source_zh/api/absdiff.rst b/bmvid/document/bmcv/source_zh/api/absdiff.rst index 5378e23..5d1295e 100644 --- a/bmvid/document/bmcv/source_zh/api/absdiff.rst +++ b/bmvid/document/bmcv/source_zh/api/absdiff.rst @@ -3,6 +3,10 @@ bmcv_image_absdiff 两张大小相同的图片对应像素值相减并取绝对值。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **接口形式:** diff --git a/bmvid/document/bmcv/source_zh/api/add_weighted.rst b/bmvid/document/bmcv/source_zh/api/add_weighted.rst index c1ccf01..22c2c70 100644 --- a/bmvid/document/bmcv/source_zh/api/add_weighted.rst +++ b/bmvid/document/bmcv/source_zh/api/add_weighted.rst @@ -9,6 +9,11 @@ bmcv_image_add_weighted \end{array} +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_zh/api/api_introduct.rst b/bmvid/document/bmcv/source_zh/api/api_introduct.rst index 84c6ff1..1211940 100644 --- a/bmvid/document/bmcv/source_zh/api/api_introduct.rst +++ b/bmvid/document/bmcv/source_zh/api/api_introduct.rst @@ -7,10 +7,7 @@ BMCV API * bmcv_image_canny * bmcv_image_dct * bmcv_image_draw_lines -* bmcv_feature_match * bmcv_fft -* bmcv_image_gaussian_blur -* bmcv_image_laplacian * bmcv_image_lkpyramid * bmcv_image_morph * bmcv_image_sobel @@ -18,110 +15,118 @@ BMCV API +-----+----------------------------------+-----------+-----------+ | num | API | BM1684 | BM1684X | +=====+==================================+===========+===========+ -| 1 | bmcv_image_absdiff | TPU | TPU | +| 1 | bmcv_as_strided |NOT SUPPORT| TPU | +-----+----------------------------------+-----------+-----------+ -| 2 | bmcv_image_add_weighted | TPU | TPU | +| 2 | bmcv_image_absdiff | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 3 | bmcv_base64 | SPACC | SPACC | +| 3 | bmcv_image_add_weighted | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 4 | bmcv_image_bitwise_and | TPU | TPU | +| 4 | bmcv_base64 | SPACC | SPACC | +-----+----------------------------------+-----------+-----------+ -| 5 | bmcv_image_bitwise_or | TPU | TPU | +| 5 | bmcv_image_bayer2rgb |NOT SUPPORT| TPU | +-----+----------------------------------+-----------+-----------+ -| 6 | bmcv_image_bitwise_xor | TPU | TPU | +| 6 | bmcv_image_bitwise_and | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 7 | bmcv_calc_hist | TPU | TPU | +| 7 | bmcv_image_bitwise_or | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 8 | bmcv_image_canny | TPU | TPU | +| 8 | bmcv_image_bitwise_xor | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 9 | bmcv_image_convert_to | TPU | VPP+TPU | +| 9 | bmcv_calc_hist | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 10 | bmcv_image_copy_to | TPU | VPP+TPU | +| 10 | bmcv_image_canny | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 11 | bmcv_image_dct | TPU | TPU | +| 11 | bmcv_image_convert_to | TPU | VPP+TPU | +-----+----------------------------------+-----------+-----------+ -| 12 | bmcv_distance | TPU | TPU | +| 12 | bmcv_image_copy_to | TPU | VPP+TPU | +-----+----------------------------------+-----------+-----------+ -| 13 | bmcv_image_draw_lines | CPU | VPP | +| 13 | bmcv_image_dct | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 14 | bmcv_image_draw_rectangle | TPU | VPP | +| 14 | bmcv_distance | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 15 | bmcv_feature_match | TPU | TPU | +| 15 | bmcv_image_draw_lines | CPU | VPP | +-----+----------------------------------+-----------+-----------+ -| 16 | bmcv_fft | TPU | TPU | +| 16 | bmcv_image_draw_rectangle | TPU | VPP | +-----+----------------------------------+-----------+-----------+ -| 17 | bmcv_image_fill_rectangle | TPU | VPP | +| 17 | bmcv_feature_match | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 18 | bmcv_image_gaussian_blur | TPU | TPU | +| 18 | bmcv_fft | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 19 | bmcv_gemm | TPU | TPU | +| 19 | bmcv_image_fill_rectangle | TPU | VPP | +-----+----------------------------------+-----------+-----------+ -| 20 | bmcv_image_jpeg_enc | JPU | JPU | +| 20 | bmcv_image_gaussian_blur | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 21 | bmcv_image_jpeg_dec | JPU | JPU | +| 21 | bmcv_gemm | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 22 | bmcv_image_laplacian | TPU | TPU | +| 22 | bmcv_image_jpeg_enc | JPU | JPU | +-----+----------------------------------+-----------+-----------+ -| 23 | bmcv_matmul | TPU | TPU | +| 23 | bmcv_image_jpeg_dec | JPU | JPU | +-----+----------------------------------+-----------+-----------+ -| 24 | bmcv_min_max | TPU | TPU | +| 24 | bmcv_image_laplacian | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 25 | bmcv_nms_ext | TPU | TPU | +| 25 | bmcv_matmul | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 26 | bmcv_nms | TPU | TPU | +| 26 | bmcv_min_max | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 27 | bmcv_image_resize | VPP+TPU | VPP | +| 27 | bmcv_nms_ext | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 28 | bmcv_image_sobel | TPU | TPU | +| 28 | bmcv_nms | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 29 | bmcv_sort | TPU | TPU | +| 29 | bmcv_image_resize | VPP+TPU | VPP | +-----+----------------------------------+-----------+-----------+ -| 30 | bmcv_image_storage_convert | VPP+TPU | VPP | +| 30 | bmcv_image_sobel | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 31 | bmcv_image_threshold | TPU | TPU | +| 31 | bmcv_sort | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 32 | bmcv_image_transpose | TPU | TPU | +| 32 | bmcv_image_storage_convert | VPP+TPU | VPP | +-----+----------------------------------+-----------+-----------+ -| 33 | bmcv_image_vpp_basic | VPP | VPP | +| 33 | bmcv_image_threshold | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 34 | bmcv_image_vpp_convert_padding | VPP | VPP | +| 34 | bmcv_image_transpose | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 35 | bmcv_image_vpp_convert | VPP | VPP | +| 35 | bmcv_image_vpp_basic | VPP | VPP | +-----+----------------------------------+-----------+-----------+ -| 36 | bmcv_image_vpp_csc_matrix_convert| VPP | VPP | +| 36 | bmcv_image_vpp_convert_padding | VPP | VPP | +-----+----------------------------------+-----------+-----------+ -| 37 | bmcv_image_vpp_stitch | VPP | VPP | +| 37 | bmcv_image_vpp_convert | VPP | VPP | +-----+----------------------------------+-----------+-----------+ -| 38 | bmcv_image_warp_affine | TPU | TPU | +| 38 | bmcv_image_vpp_csc_matrix_convert| VPP | VPP | +-----+----------------------------------+-----------+-----------+ -| 39 | bmcv_image_warp_perspective | TPU | TPU | +| 39 | bmcv_image_vpp_stitch | VPP | VPP | +-----+----------------------------------+-----------+-----------+ -| 40 | bmcv_nms_yolo | TPU | TPU | +| 40 | bmcv_image_warp_affine | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 41 | bmcv_cmulp | TPU | TPU | +| 41 | bmcv_image_warp_perspective | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 42 | bmcv_faiss_indexflatIP |NOT SUPPORT| TPU | +| 42 | bmcv_image_watermark_superpose |NOT SUPPORT| TPU | +-----+----------------------------------+-----------+-----------+ -| 43 | bmcv_faiss_indexflatL2 |NOT SUPPORT| TPU | +| 43 | bmcv_nms_yolo | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 44 | bmcv_image_yuv2bgr_ext | TPU | VPP | +| 44 | bmcv_cmulp | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 45 | bmcv_image_yuv2hsv | TPU | VPP+TPU | +| 45 | bmcv_faiss_indexflatIP |NOT SUPPORT| TPU | +-----+----------------------------------+-----------+-----------+ -| 46 | bmcv_batch_topk | TPU | TPU | +| 46 | bmcv_faiss_indexflatL2 |NOT SUPPORT| TPU | +-----+----------------------------------+-----------+-----------+ -| 47 | bmcv_image_put_text | CPU | CPU | +| 47 | bmcv_image_yuv2bgr_ext | TPU | VPP | +-----+----------------------------------+-----------+-----------+ -| 48 | bmcv_hm_distance |NOT SUPPORT| TPU | +| 48 | bmcv_image_yuv2hsv | TPU | VPP+TPU | +-----+----------------------------------+-----------+-----------+ -| 49 | bmcv_axpy | TPU | TPU | +| 49 | bmcv_batch_topk | TPU | TPU | +-----+----------------------------------+-----------+-----------+ -| 50 | bmcv_image_pyramid_down | TPU | TPU | +| 50 | bmcv_image_put_text | CPU | CPU | ++-----+----------------------------------+-----------+-----------+ +| 51 | bmcv_hm_distance |NOT SUPPORT| TPU | ++-----+----------------------------------+-----------+-----------+ +| 52 | bmcv_axpy | TPU | TPU | ++-----+----------------------------------+-----------+-----------+ +| 53 | bmcv_image_pyramid_down | TPU | TPU | ++-----+----------------------------------+-----------+-----------+ +| 54 | bmcv_image_quantify |NOT SUPPORT| TPU | +-----+----------------------------------+-----------+-----------+ **注意:** -对于BM1684和BM1684X而言,以下两个算子的实现需要结合BMCPU(CPU)与TPU: +对于BM1684和BM1684X而言,以下两个算子的实现需要结合BMCPU与Tensor Computing Processor: +-----+----------------------------------+ | num | API | diff --git a/bmvid/document/bmcv/source_zh/api/as_strided.rst b/bmvid/document/bmcv/source_zh/api/as_strided.rst index 2e7d671..e5c7085 100644 --- a/bmvid/document/bmcv/source_zh/api/as_strided.rst +++ b/bmvid/document/bmcv/source_zh/api/as_strided.rst @@ -3,6 +3,10 @@ bmcv_as_strided 该接口可以根据现有矩阵以及给定的步长来创建一个视图矩阵。 +**处理器型号支持:** + +该接口仅支持BM1684X。 + **接口形式:** @@ -109,7 +113,7 @@ bmcv_as_strided output_row, output_col, row_stride, col_stride); gettimeofday_(&t2); - std::cout << "as_strided TPU using time= " << ((t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec) << "(us)" << std::endl; + std::cout << "as_strided Tensor Computing Processor using time= " << ((t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec) << "(us)" << std::endl; if (ret != BM_SUCCESS) { printf("as_strided failed. ret = %d\n", ret); goto exit; diff --git a/bmvid/document/bmcv/source_zh/api/axpy.rst b/bmvid/document/bmcv/source_zh/api/axpy.rst index b149fca..a4a2048 100644 --- a/bmvid/document/bmcv/source_zh/api/axpy.rst +++ b/bmvid/document/bmcv/source_zh/api/axpy.rst @@ -3,6 +3,10 @@ bmcv_axpy 该接口实现F = A * X + Y,其中 A 是常数,大小为 n * c ,F 、X 、Y 都是大小为n * c * h * w的矩阵。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **接口形式:** diff --git a/bmvid/document/bmcv/source_zh/api/base64.rst b/bmvid/document/bmcv/source_zh/api/base64.rst index 7583bda..5cba84e 100644 --- a/bmvid/document/bmcv/source_zh/api/base64.rst +++ b/bmvid/document/bmcv/source_zh/api/base64.rst @@ -3,6 +3,10 @@ bmcv_base64_enc(dec) base64 网络传输中常用的编码方式,利用64个常用字符来对6位二进制数编码。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **接口形式:** diff --git a/bmvid/document/bmcv/source_zh/api/batch_topk.rst b/bmvid/document/bmcv/source_zh/api/batch_topk.rst index 5143550..a4cb3c5 100644 --- a/bmvid/document/bmcv/source_zh/api/batch_topk.rst +++ b/bmvid/document/bmcv/source_zh/api/batch_topk.rst @@ -2,6 +2,10 @@ bmcv_batch_topk ================ 计算每个 db 中最大或最小的k个数,并返回index。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **接口形式:** diff --git a/bmvid/document/bmcv/source_zh/api/bayer2rgb.rst b/bmvid/document/bmcv/source_zh/api/bayer2rgb.rst index 9c4f233..64bb6a1 100644 --- a/bmvid/document/bmcv/source_zh/api/bayer2rgb.rst +++ b/bmvid/document/bmcv/source_zh/api/bayer2rgb.rst @@ -1,7 +1,12 @@ bmcv_image_bayer2rgb ================== -将bayerBG8格式图像转成RGB Plannar格式。 +将bayerBG8或bayerRG8格式图像转成RGB Plannar格式。 + +**处理器型号支持:** + +该接口仅支持BM1684X。 + **接口形式:** @@ -27,7 +32,7 @@ bmcv_image_bayer2rgb * bm_image input - 输入参数。输入bayerBG8格式图像的 bm_image,bm_image 需要外部调用 bmcv_image_create 创建。image 内存可以使用 bm_image_alloc_dev_mem 或者 bm_image_copy_host_to_device 来开辟新的内存,或者使用 bmcv_image_attach 来 attach 已有的内存。 + 输入参数。输入bayer格式图像的 bm_image,bm_image 需要外部调用 bmcv_image_create 创建。image 内存可以使用 bm_image_alloc_dev_mem 或者 bm_image_copy_host_to_device 来开辟新的内存,或者使用 bmcv_image_attach 来 attach 已有的内存。 * bm_image output @@ -50,6 +55,9 @@ bmcv_image_bayer2rgb +=====+================================+ | 1 | FORMAT_BAYER | +-----+--------------------------------+ +| 2 | FORMAT_BAYER_RG8 | ++-----+--------------------------------+ + 该接口目前支持以下输出格式: @@ -72,10 +80,13 @@ bmcv_image_bayer2rgb **注意事项:** -1、input的格式是bayerBG,output的格式是rgb plannar, data_type均为uint8类型。 -2、该接口目前支持bm1684x。 -3、该接口支持的尺寸范围是 8*8 ~ 8096*8096,且图像的宽高需要是偶数。 +1. input的格式目前支持bayerBG8或bayerRG8,bm_image_create步骤中bayerBG8创建为FORMAT_BAYER格式,bayerRG8创建为FORMAT_BAYER_RG8格式。 +2. output的格式是rgb plannar, data_type均为uint8类型。 + +3. 该接口支持的尺寸范围是 2*2 ~ 8192*8192,且图像的宽高需要是偶数。 + +4. 如调用该接口的程序为多线程程序,需要在创建bm_image前和销毁bm_image后加线程锁。 **代码示例:** @@ -84,31 +95,45 @@ bmcv_image_bayer2rgb #define KERNEL_SIZE 3 * 3 * 3 * 4 * 64 #define CONVD_MATRIX 12 * 9 - - const unsigned char convd_kernel[CONVD_MATRIX] = {1, 0, 1, 0, 0, 0, 1, 0, 1, - 0, 0, 2, 0, 0, 0, 0, 0, 2, - 0, 0, 0, 0, 0, 0, 2, 0, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 4, // r R - 4, 0, 0, 0, 0, 0, 0, 0, 0, // b B - 2, 0, 2, 0, 0, 0, 0, 0, 0, - 2, 0, 0, 0, 0, 0, 2, 0, 0, - 1, 0, 1, 0, 0, 0, 1, 0, 1, - 0, 1, 0, 1, 0, 1, 0, 1, 0, - 0, 0, 0, 0, 0, 4, 0, 0, 0, // g1 G1 - 0, 0, 0, 0, 0, 0, 0, 4, 0, // g2 G2 - 0, 1, 0, 1, 0, 1, 0, 1, 0}; + const unsigned char convd_kernel_bg8[CONVD_MATRIX] = {1, 0, 1, 0, 0, 0, 1, 0, 1, //Rb + 0, 0, 2, 0, 0, 0, 0, 0, 2, //Rg1 + 0, 0, 0, 0, 0, 0, 2, 0, 2, //Rg2 + 0, 0, 0, 0, 0, 0, 0, 0, 4, //Rr + 4, 0, 0, 0, 0, 0, 0, 0, 0, //Bb + 2, 0, 2, 0, 0, 0, 0, 0, 0, //Bg1 + 2, 0, 0, 0, 0, 0, 2, 0, 0, //Bg2 + 1, 0, 1, 0, 0, 0, 1, 0, 1, //Br + 0, 1, 0, 1, 0, 1, 0, 1, 0, //Gb + 0, 0, 0, 0, 0, 4, 0, 0, 0, //Gg1 + 0, 0, 0, 0, 0, 0, 0, 4, 0, //Gg2 + 0, 1, 0, 1, 0, 1, 0, 1, 0};//Gr + + const unsigned char convd_kernel_rg8[CONVD_MATRIX] = {4, 0, 0, 0, 0, 0, 0, 0, 0, //Rr + 2, 0, 2, 0, 0, 0, 0, 0, 0, //Rg1 + 2, 0, 0, 0, 0, 0, 2, 0, 0, //Rg2 + 1, 0, 1, 0, 0, 0, 1, 0, 1, //Rb + 1, 0, 1, 0, 0, 0, 1, 0, 1, //Br + 0, 0, 2, 0, 0, 0, 0, 0, 2, //Bg1 + 0, 0, 0, 2, 0, 2, 0, 0, 0, //Bg2 + 0, 0, 0, 0, 0, 0, 0, 0, 4, //Bb + 1, 0, 1, 0, 0, 0, 1, 0, 1, //Gr + 0, 0, 0, 0, 0, 4, 0, 0, 0, //Gg1 + 0, 0, 0, 0, 0, 0, 0, 4, 0, //Gg2 + 0, 1, 0, 1, 0, 1, 0, 1, 0};//Gb int width = 1920; int height = 1080; int dev_id = 0; + unsigned char* input = (unsigned char*)malloc(width * height); + unsigned char* output = (unsigned char*)malloc(width * height * 3); bm_handle_t handle; bm_status_t dev_ret = bm_dev_request(&handle, dev_id); - std::shared_ptr src1_ptr( - new unsigned char[channel * width * height], - std::default_delete()); + bm_image input_img; bm_image output_img; - bm_image_create(handle, height, width, FORMAT_BAYER, DATA_TYPE_EXT_1N_BYTE, &input_img); + bm_image_create(handle, height, width, FORMAT_BAYER_RG8, DATA_TYPE_EXT_1N_BYTE, &input_img); + //bm_image_create(handle, height, width, FORMAT_BAYER, DATA_TYPE_EXT_1N_BYTE, &input_img); //bayerBG8 bm_image_create(handle, height, width, FORMAT_RGB_PLANAR, DATA_TYPE_EXT_1N_BYTE, &output_img); + bm_image_alloc_dev_mem(input_img, BMCV_HEAP_ANY); bm_image_alloc_dev_mem(output_img, BMCV_HEAP_ANY); unsigned char kernel_data[KERNEL_SIZE]; @@ -116,13 +141,16 @@ bmcv_image_bayer2rgb // constructing convd_kernel_data for (int i = 0;i < 12;i++) { for (int j = 0;j < 9;j++) { - kernel_data[i * 9 * 64 + 64 * j] = convd_kernel[i * 9 + j]; + kernel_data[i * 9 * 64 + 64 * j] = convd_kernel_rg8[i * 9 + j]; + //kernel_data[i * 9 * 64 + 64 * j] = convd_kernel_bg8[i * 9 + j]; } } - unsigned char* input_data[3] = {srcImage.data, srcImage.data + height * width, srcImage.data + 2 * height * width}; - bm_image_copy_host_to_device(input_img, (void **)input_data); + + bm_image_copy_host_to_device(input_img, (void **)input); bmcv_image_bayer2rgb(handle, kernel_data, input_img, output_img); bm_image_copy_device_to_host(output_img, (void **)(&output)); bm_image_destroy(input_img); bm_image_destroy(output_img); + free(input); + free(output); bm_dev_free(handle); \ No newline at end of file diff --git a/bmvid/document/bmcv/source_zh/api/bitwise_and.rst b/bmvid/document/bmcv/source_zh/api/bitwise_and.rst index ffa5a13..b825c8b 100644 --- a/bmvid/document/bmcv/source_zh/api/bitwise_and.rst +++ b/bmvid/document/bmcv/source_zh/api/bitwise_and.rst @@ -3,6 +3,10 @@ bmcv_image_bitwise_and 两张大小相同的图片对应像素值进行按位与操作。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **接口形式:** diff --git a/bmvid/document/bmcv/source_zh/api/bitwise_or.rst b/bmvid/document/bmcv/source_zh/api/bitwise_or.rst index affa2ad..2867933 100644 --- a/bmvid/document/bmcv/source_zh/api/bitwise_or.rst +++ b/bmvid/document/bmcv/source_zh/api/bitwise_or.rst @@ -1,9 +1,11 @@ bmcv_image_bitwise_or ===================== +两张大小相同的图片对应像素值进行按位或操作。 +**处理器型号支持:** -两张大小相同的图片对应像素值进行按位或操作。 +该接口支持BM1684/BM1684X。 **接口形式:** diff --git a/bmvid/document/bmcv/source_zh/api/bitwise_xor.rst b/bmvid/document/bmcv/source_zh/api/bitwise_xor.rst index fc33767..1d6dc65 100644 --- a/bmvid/document/bmcv/source_zh/api/bitwise_xor.rst +++ b/bmvid/document/bmcv/source_zh/api/bitwise_xor.rst @@ -3,6 +3,10 @@ bmcv_image_bitwise_xor 两张大小相同的图片对应像素值进行按位异或操作。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **接口形式:** diff --git a/bmvid/document/bmcv/source_zh/api/bmcv_hist_balance.rst b/bmvid/document/bmcv/source_zh/api/bmcv_hist_balance.rst new file mode 100644 index 0000000..61c822a --- /dev/null +++ b/bmvid/document/bmcv/source_zh/api/bmcv_hist_balance.rst @@ -0,0 +1,119 @@ +bmcv_hist_balance +=================== + +对图像进行直方图均衡化操作,提高图像的对比度。 + + +**接口形式:** + + .. code-block:: c + + bm_status_t bmcv_hist_balance( + bm_handle_t handle, + bm_device_mem_t input, + bm_device_mem_t output, + int H, + int W); + + +**参数说明:** + +* bm_handle_t handle + + 输入参数。bm_handle 句柄 + +* bm_device_mem_t input + + 输入参数。存放输入图像的 device 空间。其大小为 H * W * sizeof(uint8_t)。 + +* bm_device_mem_t output + + 输出参数。存放输出图像的 device 空间。其大小为 H * W * sizeof(uint8_t)。 + +* int H + + 输入参数。图像的高。 + +* int W + + 输入参数。图像的宽。 + + +**返回值说明:** + +* BM_SUCCESS: 成功 + +* 其他: 失败 + + +**注意事项:** + +1. 数据类型仅支持 uint8_t。 + +2. 支持的最小图像尺寸为 H = 1, W = 1。 + +3. 支持的最大图像尺寸为 H = 8192, W = 8192。 + + +**示例代码** + + .. code-block:: c + + int H = 1024; + int W = 1024; + uint8_t* input_addr = (uint8_t*)malloc(H * W * sizeof(uint8_t)); + uint8_t* output_addr = (uint8_t*)malloc(H * W * sizeof(uint8_t)); + bm_handle_t handle; + bm_status_t ret = BM_SUCCESS; + bm_device_mem_t input, output; + int i; + + struct timespec tp; + clock_gettime(NULL, &tp); + srand(tp.tv_nsec); + + for (i = 0; i < W * H; ++i) { + input_addr[i] = (uint8_t)rand() % 256; + } + + ret = bm_dev_request(&handle, 0); + if (ret != BM_SUCCESS) { + printf("bm_dev_request failed. ret = %d\n", ret); + exit(-1); + } + + ret = bm_malloc_device_byte(handle, &input, H * W * sizeof(uint8_t)); + if (ret != BM_SUCCESS) { + printf("bm_malloc_device_byte failed. ret = %d\n", ret); + exit(-1); + } + + ret = bm_malloc_device_byte(handle, &output, H * W * sizeof(uint8_t)); + if (ret != BM_SUCCESS) { + printf("bm_malloc_device_byte failed. ret = %d\n", ret); + exit(-1); + } + + ret = bm_memcpy_s2d(handle, input, input_addr); + if (ret != BM_SUCCESS) { + printf("bm_memcpy_s2d failed. ret = %d\n", ret); + exit(-1); + } + + ret = bmcv_hist_balance(handle, input, output, H, W); + if (ret != BM_SUCCESS) { + printf("bmcv_hist_balance failed. ret = %d\n", ret); + exit(-1); + } + + ret = bm_memcpy_d2s(handle, output_addr, output); + if (ret != BM_SUCCESS) { + printf("bm_memcpy_d2s failed. ret = %d\n", ret); + exit(-1); + } + + free(input_addr); + free(output_addr); + bm_free_device(handle, input); + bm_free_device(handle, output); + bm_dev_free(handle); \ No newline at end of file diff --git a/bmvid/document/bmcv/source_zh/api/calc_hist.rst b/bmvid/document/bmcv/source_zh/api/calc_hist.rst index a5dcfe5..17eadf4 100644 --- a/bmvid/document/bmcv/source_zh/api/calc_hist.rst +++ b/bmvid/document/bmcv/source_zh/api/calc_hist.rst @@ -4,6 +4,10 @@ bmcv_calc_hist 直方图 _______ +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **接口形式:** @@ -148,6 +152,10 @@ _______ 带权重的直方图 _______________ +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **接口形式:** diff --git a/bmvid/document/bmcv/source_zh/api/canny.rst b/bmvid/document/bmcv/source_zh/api/canny.rst index 4aa69d6..b78c479 100644 --- a/bmvid/document/bmcv/source_zh/api/canny.rst +++ b/bmvid/document/bmcv/source_zh/api/canny.rst @@ -3,6 +3,10 @@ bmcv_image_canny 边缘检测Canny算子。 +**处理器型号支持:** + +该接口仅支持BM1684。 + **接口形式:** diff --git a/bmvid/document/bmcv/source_zh/api/cmulp.rst b/bmvid/document/bmcv/source_zh/api/cmulp.rst index 15ea22b..2f51d5b 100644 --- a/bmvid/document/bmcv/source_zh/api/cmulp.rst +++ b/bmvid/document/bmcv/source_zh/api/cmulp.rst @@ -12,6 +12,11 @@ bmcv_cmulp 其中,:math:`i` 是虚数单位,满足公式 :math:`i^2 = -1`. +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c++ diff --git a/bmvid/document/bmcv/source_zh/api/convert_to.rst b/bmvid/document/bmcv/source_zh/api/convert_to.rst index 5f0b077..656d0ff 100644 --- a/bmvid/document/bmcv/source_zh/api/convert_to.rst +++ b/bmvid/document/bmcv/source_zh/api/convert_to.rst @@ -10,6 +10,11 @@ bmcv_image_convert_to y=kx+b \end{array} +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_zh/api/copy_to.rst b/bmvid/document/bmcv/source_zh/api/copy_to.rst index 9bdb1ff..de3263f 100644 --- a/bmvid/document/bmcv/source_zh/api/copy_to.rst +++ b/bmvid/document/bmcv/source_zh/api/copy_to.rst @@ -1,10 +1,14 @@ bmcv_image_copy_to ================== - 该接口实现将一幅图像拷贝到目的图像的对应内存区域。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_zh/api/crop.rst b/bmvid/document/bmcv/source_zh/api/crop.rst index 72fdc27..69ed9d6 100644 --- a/bmvid/document/bmcv/source_zh/api/crop.rst +++ b/bmvid/document/bmcv/source_zh/api/crop.rst @@ -1,10 +1,14 @@ bmcv_image_crop =============== - 该接口实现从一幅原图中 crop 出若干个小图。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_zh/api/csc_convert_to.rst b/bmvid/document/bmcv/source_zh/api/csc_convert_to.rst index e34485b..3b25a11 100755 --- a/bmvid/document/bmcv/source_zh/api/csc_convert_to.rst +++ b/bmvid/document/bmcv/source_zh/api/csc_convert_to.rst @@ -19,6 +19,10 @@ bmcv_image_csc_convert_to csc_matrix_t* matrix = NULL, bmcv_convert_to_attr* convert_to_attr); +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **传入参数说明:** @@ -59,7 +63,7 @@ bmcv_image_csc_convert_to * bmcv_padding_atrr_t* padding_attr = NULL - 输入参数。所有 crop 的目标小图在 dst image 中的位置信息以及要 padding 的各通道像素值,若不使用 padding 功能则设置为 NULL。 + 输入参数。所有 crop 的目标小图在 dst image 中的位置信息以及要 padding 的各通道像素值,若不使用 padding 功能则设置为 NULL。 .. code-block:: c diff --git a/bmvid/document/bmcv/source_zh/api/dct.rst b/bmvid/document/bmcv/source_zh/api/dct.rst index 4032d31..97b0bf2 100644 --- a/bmvid/document/bmcv/source_zh/api/dct.rst +++ b/bmvid/document/bmcv/source_zh/api/dct.rst @@ -13,6 +13,9 @@ bmcv_image_dct bm_image output, bool is_inversed); +**处理器型号支持:** + +该接口仅支持BM1684。 **输入参数说明:** diff --git a/bmvid/document/bmcv/source_zh/api/debug_savedata.rst b/bmvid/document/bmcv/source_zh/api/debug_savedata.rst index 69f8347..d19ad40 100644 --- a/bmvid/document/bmcv/source_zh/api/debug_savedata.rst +++ b/bmvid/document/bmcv/source_zh/api/debug_savedata.rst @@ -3,6 +3,10 @@ bmcv_debug_savedata 该接口用于将bm_image对象输出至内部定义的二进制文件方便debug,二进制文件格式以及解析方式在示例代码中给出。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **接口形式:** diff --git a/bmvid/document/bmcv/source_zh/api/distance.rst b/bmvid/document/bmcv/source_zh/api/distance.rst index 34f2179..01734e1 100644 --- a/bmvid/document/bmcv/source_zh/api/distance.rst +++ b/bmvid/document/bmcv/source_zh/api/distance.rst @@ -16,6 +16,10 @@ bmcv_distance const float *pnt, int len); +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **输入参数说明:** diff --git a/bmvid/document/bmcv/source_zh/api/draw_lines.rst b/bmvid/document/bmcv/source_zh/api/draw_lines.rst index 59a7b78..e9b7eed 100644 --- a/bmvid/document/bmcv/source_zh/api/draw_lines.rst +++ b/bmvid/document/bmcv/source_zh/api/draw_lines.rst @@ -4,6 +4,11 @@ bmcv_image_draw_lines 可以实现在一张图像上画一条或多条线段,从而可以实现画多边形的功能,并支持指定线的颜色和线的宽度。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_zh/api/draw_point.rst b/bmvid/document/bmcv/source_zh/api/draw_point.rst index 6b343b0..c61b19b 100644 --- a/bmvid/document/bmcv/source_zh/api/draw_point.rst +++ b/bmvid/document/bmcv/source_zh/api/draw_point.rst @@ -1,7 +1,14 @@ bmcv_image_draw_point ========================= + 该接口用于在图像上填充一个或者多个point。 + +**处理器型号支持:** + +该接口仅支持BM1684X。 + + **接口形式:** .. code-block:: c @@ -76,9 +83,7 @@ bmcv_image_draw_point **注意事项:** -1. 该接口不支持bm1684。 - -2. bm1684x 支持输入 bm_image 图像格式为 +1. 该接口支持输入 bm_image 的图像格式为 +-----+-------------------------------+ | num | input image_format | diff --git a/bmvid/document/bmcv/source_zh/api/draw_rectangle.rst b/bmvid/document/bmcv/source_zh/api/draw_rectangle.rst index b8f13d6..1c78bf7 100644 --- a/bmvid/document/bmcv/source_zh/api/draw_rectangle.rst +++ b/bmvid/document/bmcv/source_zh/api/draw_rectangle.rst @@ -1,7 +1,14 @@ bmcv_image_draw_rectangle ========================= + 该接口用于在图像上画一个或多个矩形框。 + +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_zh/api/faiss_indexflatIP.rst b/bmvid/document/bmcv/source_zh/api/faiss_indexflatIP.rst index d4f22f4..16d89e6 100644 --- a/bmvid/document/bmcv/source_zh/api/faiss_indexflatIP.rst +++ b/bmvid/document/bmcv/source_zh/api/faiss_indexflatIP.rst @@ -3,6 +3,12 @@ bmcv_faiss_indexflatIP 计算查询向量与数据库向量的内积距离, 输出前 K (sort_cnt) 个最匹配的内积距离值及其对应的索引。 + +**处理器型号支持:** + +该接口仅支持BM1684X。 + + **接口形式:** .. code-block:: c++ @@ -95,7 +101,7 @@ bmcv_faiss_indexflatIP 4、查询向量和数据库向量内积距离值越大, 表示两者的相似度越高。因此, 在 TopK 过程中对内积距离值按降序排序。 -5、该接口用于 Faiss::IndexFlatIP.search(), 在 BM1684X 上实现。考虑 BM1684X 上 TPU 的连续内存, 针对 100W 底库, 可以在单芯片上一次查询最多约 512 个 256 维的输入。 +5、该接口用于 Faiss::IndexFlatIP.search(), 在 BM1684X 上实现。考虑 BM1684X 上 Tensor Computing Processor 的连续内存, 针对 100W 底库, 可以在单处理器上一次查询最多约 512 个 256 维的输入。 **示例代码** diff --git a/bmvid/document/bmcv/source_zh/api/faiss_indexflatL2.rst b/bmvid/document/bmcv/source_zh/api/faiss_indexflatL2.rst index f9afe98..a77dae6 100644 --- a/bmvid/document/bmcv/source_zh/api/faiss_indexflatL2.rst +++ b/bmvid/document/bmcv/source_zh/api/faiss_indexflatL2.rst @@ -3,6 +3,12 @@ bmcv_faiss_indexflatL2 计算查询向量与数据库向量 L2 距离的平方, 输出前 K (sort_cnt)个最匹配的 L2 距离的平方值及其对应的索引。 + +**处理器型号支持:** + +该接口仅支持BM1684X。 + + **接口形式:** .. code-block:: c++ @@ -101,13 +107,13 @@ bmcv_faiss_indexflatL2 2、输出的排序后的相似度结果的数据类型为 float, 相对应的索引的数据类型为 int。 -3、假设输入数据和底库数据的 L2 范数的平方值已提前计算完成, 并存储在芯片上。 +3、假设输入数据和底库数据的 L2 范数的平方值已提前计算完成, 并存储在处理器上。 3、底库数据通常以 database_vecs_num * vec_dims 的形式排布在内存中。此时, 参数 is_transpose 需要设置为 1。 5、查询向量和数据库向量 L2 距离的平方值越小, 表示两者的相似度越高。因此, 在 TopK 过程中对 L2 距离的平方值按升序排序。 -6、该接口用于 Faiss::IndexFlatL2.search(), 在 BM1684X 上实现。考虑 BM1684X 上TPU 的连续内存, 针对 100W 底库, 可以在单芯片上一次查询最多约 512 个 256 维的输入。 +6、该接口用于 Faiss::IndexFlatL2.search(), 在 BM1684X 上实现。考虑 BM1684X 上 Tensor Computing Processor 的连续内存, 针对 100W 底库, 可以在单处理器上一次查询最多约 512 个 256 维的输入。 7、database_vecs_num与sort_cnt的取值需要满足条件:database_vecs_num > sort_cnt。 diff --git a/bmvid/document/bmcv/source_zh/api/feature_match_fix8b.rst b/bmvid/document/bmcv/source_zh/api/feature_match_fix8b.rst index 6f103ea..1717ba6 100644 --- a/bmvid/document/bmcv/source_zh/api/feature_match_fix8b.rst +++ b/bmvid/document/bmcv/source_zh/api/feature_match_fix8b.rst @@ -3,6 +3,11 @@ bmcv_feature_match 该接口用于将网络得到特征点(int8格式)与数据库中特征点(int8格式)进行比对,输出最佳匹配的top-k。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c @@ -54,11 +59,11 @@ bmcv_feature_match 输入参数。数据库中数据特征点的组数。db_size最大值不应该超过500000。 -* int sort_cnt +* int sort_cnt 输入参数。每个 batch 对比结果中所要排序个数,也就是输出结果个数,如需要最大的3个比对结果,则sort_cnt设置为3。该值默认为1。sort_cnt最大值不应该超过30。 -* int rshiftbits +* int rshiftbits 输入参数。对结果进行右移处理的位数,右移采用round对小数进行取整处理。该参数默认为0。 @@ -83,7 +88,7 @@ bmcv_feature_match **示例代码** - + .. code-block:: c int batch_size = 4; diff --git a/bmvid/document/bmcv/source_zh/api/fft.rst b/bmvid/document/bmcv/source_zh/api/fft.rst index c39656d..d97131f 100644 --- a/bmvid/document/bmcv/source_zh/api/fft.rst +++ b/bmvid/document/bmcv/source_zh/api/fft.rst @@ -19,6 +19,11 @@ _____ bool forward, void *&plan); +**处理器型号支持:** + +该接口仅支持BM1684。 + + **输入参数说明:** * bm_handle_t handle diff --git a/bmvid/document/bmcv/source_zh/api/fill_rectangle.rst b/bmvid/document/bmcv/source_zh/api/fill_rectangle.rst index 4468e9f..41f6cb0 100644 --- a/bmvid/document/bmcv/source_zh/api/fill_rectangle.rst +++ b/bmvid/document/bmcv/source_zh/api/fill_rectangle.rst @@ -1,7 +1,13 @@ bmcv_image_fill_rectangle ========================= + 该接口用于在图像上填充一个或者多个矩形。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_zh/api/gaussian_blur.rst b/bmvid/document/bmcv/source_zh/api/gaussian_blur.rst index 7a56d19..073b6ac 100644 --- a/bmvid/document/bmcv/source_zh/api/gaussian_blur.rst +++ b/bmvid/document/bmcv/source_zh/api/gaussian_blur.rst @@ -1,7 +1,11 @@ bmcv_image_gaussian_blur ======================== -图像的高斯滤波。 +该接口用于对图像进行高斯滤波。 + +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 **接口形式:** @@ -77,13 +81,6 @@ bmcv_image_gaussian_blur +-----+------------------------+------------------------+ | 7 | FORMAT_GRAY | FORMAT_GRAY | +-----+------------------------+------------------------+ -| 8 | FORMAT_YUV420P | FORMAT_YUV420P | -+-----+------------------------+------------------------+ -| 9 | FORMAT_YUV422P | FORMAT_YUV422P | -+-----+------------------------+------------------------+ -| 10 | FORMAT_YUV444P | FORMAT_YUV444P | -+-----+------------------------+------------------------+ - 目前支持以下 data_type: @@ -100,7 +97,9 @@ bmcv_image_gaussian_blur 2、input output 的 data_type,image_format必须相同。 -3、目前支持图像的最大width为(2048 - kw)。 +3、BM1684支持的图像最大宽为(2048 - kw),BM1684X支持的最大宽为4096,最大高为8192。 + +4、BM1684支持的最大卷积核宽高为31,BM1684X支持的最大卷积核宽高为3。 **代码示例:** diff --git a/bmvid/document/bmcv/source_zh/api/gemm.rst b/bmvid/document/bmcv/source_zh/api/gemm.rst index 5a50589..19cd1e1 100644 --- a/bmvid/document/bmcv/source_zh/api/gemm.rst +++ b/bmvid/document/bmcv/source_zh/api/gemm.rst @@ -28,6 +28,10 @@ bmcv_gemm bm_device_mem_t C, int ldc); +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **输入参数说明:** @@ -65,7 +69,7 @@ bmcv_gemm * int lda - 输入参数。矩阵 A 的 leading dimension, 即第一维度的大小,在行与行之间没有stride的情况下即为 A 的列数(不做转置)或行数(做转置) + 输入参数。矩阵 A 的 leading dimension, 即第一维度的大小,在行与行之间没有stride的情况下即为 A 的列数(不做转置)或行数(做转置) * bm_device_mem_t B @@ -105,13 +109,13 @@ bmcv_gemm float alpha = 0.4, beta = 0.6; bool is_A_trans = false; bool is_B_trans = false; - float *A = new float[M * K]; - float *B = new float[N * K]; - float *C = new float[M * N]; + float *A = new float[M * K]; + float *B = new float[N * K]; + float *C = new float[M * N]; memset(A, 0x11, M * K * sizeof(float)); memset(B, 0x22, N * K * sizeof(float)); memset(C, 0x33, M * N * sizeof(float)); - + bmcv_gemm(handle, is_A_trans, is_B_trans, @@ -119,12 +123,12 @@ bmcv_gemm N, K, alpha, - bm_mem_from_system((void *)A), - is_A_trans ? M : K, - bm_mem_from_system((void *)B), - is_B_trans ? K : N, + bm_mem_from_system((void *)A), + is_A_trans ? M : K, + bm_mem_from_system((void *)B), + is_B_trans ? K : N, beta, - bm_mem_from_system((void *)C), + bm_mem_from_system((void *)C), N); delete A; delete B; diff --git a/bmvid/document/bmcv/source_zh/api/gemm_ext.rst b/bmvid/document/bmcv/source_zh/api/gemm_ext.rst index dfb1026..ec4e5b1 100644 --- a/bmvid/document/bmcv/source_zh/api/gemm_ext.rst +++ b/bmvid/document/bmcv/source_zh/api/gemm_ext.rst @@ -28,6 +28,10 @@ bmcv_gemm_ext bm_image_data_format_ext input_dtype, bm_image_data_format_ext output_dtype); +**处理器型号支持:** + +该接口仅支持BM1684X。 + **输入参数说明:** @@ -95,11 +99,9 @@ bmcv_gemm_ext **注意:** -1. 该接口仅支持BM1684X。 - -2. 该接口在FP16输入、A矩阵转置的情况下,M仅支持小于等于64的取值。 +1. 该接口在FP16输入、A矩阵转置的情况下,M仅支持小于等于64的取值。 -3. 该接口不支持FP32输入且FP16输出。 +2. 该接口不支持FP32输入且FP16输出。 **示例代码** diff --git a/bmvid/document/bmcv/source_zh/api/hm_distance.rst b/bmvid/document/bmcv/source_zh/api/hm_distance.rst index c35af44..6c9757b 100644 --- a/bmvid/document/bmcv/source_zh/api/hm_distance.rst +++ b/bmvid/document/bmcv/source_zh/api/hm_distance.rst @@ -17,6 +17,11 @@ bmcv_hm_distance int input1_num, int input2_num); +**处理器型号支持:** + +该接口仅支持BM1684X。 + + **参数说明:** * bm_handle_t handle @@ -54,10 +59,6 @@ bmcv_hm_distance * 其他:失败 -**注意:** - -该接口仅支持BM1684X。 - **示例代码** diff --git a/bmvid/document/bmcv/source_zh/api/jpeg_decode.rst b/bmvid/document/bmcv/source_zh/api/jpeg_decode.rst index 85a0dfc..2abecbf 100644 --- a/bmvid/document/bmcv/source_zh/api/jpeg_decode.rst +++ b/bmvid/document/bmcv/source_zh/api/jpeg_decode.rst @@ -3,6 +3,11 @@ bmcv_image_jpeg_dec 该接口可以实现对多张图片的 JPEG 解码过程。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c @@ -39,7 +44,7 @@ bmcv_image_jpeg_dec 输出参数。输出 bm_image的指针。每个 dst bm_image 用户可以选择自行调用 bm_image_create 创建,也可以选择不创建。如果用户只声明而不创建则由接口内部根据待解码图片信息自动创建,默认的 format 如下表所示, 当不再需要时仍然需要用户调用 bm_image_destory 来销毁。 +------------+------------------+ -| 码 流 | 默认输出 format | +| 码 流 | 默认输出 format | +============+==================+ | YUV420 | FORMAT_YUV420P | +------------+------------------+ @@ -67,7 +72,7 @@ bmcv_image_jpeg_dec 2. 目前解码支持的图片格式及其输出格式对应如下,如果用户需要指定以下某一种输出格式,可通过使用 bmcv_image_create 自行创建 dst bm_image,从而实现将图片解码到以下对应的某一格式。 +------------------+------------------+ -| 码 流 | 输 出 format | +| 码 流 | 输 出 format | +==================+==================+ | | FORMAT_YUV420P | + YUV420 +------------------+ @@ -109,13 +114,13 @@ bmcv_image_jpeg_dec fseek(fp, 0, SEEK_SET); fread(jpeg_data, *size, 1, fp); fclose(fp); - + // create bm_image used to save output bm_image dst; memset((char*)&dst, 0, sizeof(bm_image)); // if you not create dst bm_image it will create automatically inside. // you can also create dst bm_image here, like this: - // bm_image_create(handle, IMAGE_H, IMAGE_W, FORMAT_YUV420P, + // bm_image_create(handle, IMAGE_H, IMAGE_W, FORMAT_YUV420P, // DATA_TYPE_EXT_1N_BYTE, &dst); // decode input diff --git a/bmvid/document/bmcv/source_zh/api/jpeg_encode.rst b/bmvid/document/bmcv/source_zh/api/jpeg_encode.rst index 58cc934..b69a3a5 100644 --- a/bmvid/document/bmcv/source_zh/api/jpeg_encode.rst +++ b/bmvid/document/bmcv/source_zh/api/jpeg_encode.rst @@ -10,12 +10,16 @@ bmcv_image_jpeg_enc bm_status_t bmcv_image_jpeg_enc( bm_handle_t handle, int image_num, - bm_image * src, + bm_image * src, void * p_jpeg_data[], size_t * out_size, int quality_factor = 85 ); +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **输入参数说明:** @@ -79,20 +83,20 @@ bmcv_image_jpeg_enc int size = image_h * image_w; int format = FORMAT_YUV420P; bm_image src; - bm_image_create(handle, image_h, image_w, (bm_image_format_ext)format, + bm_image_create(handle, image_h, image_w, (bm_image_format_ext)format, DATA_TYPE_EXT_1N_BYTE, &src); std::unique_ptr buf1(new unsigned char[size]); memset(buf1.get(), 0x11, size); - + std::unique_ptr buf2(new unsigned char[size / 4]); memset(buf2.get(), 0x22, size / 4); - + std::unique_ptr buf3(new unsigned char[size / 4]); memset(buf3.get(), 0x33, size / 4); - + unsigned char *buf[] = {buf1.get(), buf2.get(), buf3.get()}; bm_image_copy_host_to_device(src, (void **)buf); - + void* jpeg_data = NULL; size_t out_size = 0; int ret = bmcv_image_jpeg_enc(handle, 1, &src, &jpeg_data, &out_size); diff --git a/bmvid/document/bmcv/source_zh/api/laplacian.rst b/bmvid/document/bmcv/source_zh/api/laplacian.rst index aa25598..08b9c48 100644 --- a/bmvid/document/bmcv/source_zh/api/laplacian.rst +++ b/bmvid/document/bmcv/source_zh/api/laplacian.rst @@ -3,11 +3,15 @@ bmcv_image_laplacian 梯度计算laplacian算子。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **接口形式:** .. code-block:: c - + bm_status_t bmcv_image_laplacian( bm_handle_t handle, bm_image input, @@ -93,11 +97,11 @@ bmcv_image_laplacian ret = bm_dev_request(&handle, 0); if (ret != BM_SUCCESS) throw("bm_dev_request failed"); - + bm_image_data_format_ext data_type = DATA_TYPE_EXT_1N_BYTE; bm_image input; bm_image output; - + bm_image_create(handle, ih, iw, fmt, data_type, &input); bm_image_alloc_dev_mem(input); @@ -107,7 +111,7 @@ bmcv_image_laplacian std::shared_ptr ch0_ptr = std::make_shared(new unsigned char[ih * iw]); std::shared_ptr tpu_res_ptr = std::make_shared(new unsigned char[ih * iw]); std::shared_ptr cpu_res_ptr = std::make_shared(new unsigned char[ih*iw]); - + for (int i = 0; i < loop; i++) { for (int j = 0; j < ih * iw; j++) { (*ch0_ptr.get())[j] = j % 256; diff --git a/bmvid/document/bmcv/source_zh/api/lkpyramid.rst b/bmvid/document/bmcv/source_zh/api/lkpyramid.rst index f7d0bb3..ab3734a 100644 --- a/bmvid/document/bmcv/source_zh/api/lkpyramid.rst +++ b/bmvid/document/bmcv/source_zh/api/lkpyramid.rst @@ -1,7 +1,7 @@ bmcv_image_lkpyramid ==================== -LK金字塔光流算法。完整的使用步骤包括创建、执行、销毁三步。该算法前半部分使用TPU,而后半部分为串行运算需要使用CPU,因此对于PCIe模式,建议使能CPU进行加速,具体步骤参考第5章节。 +LK金字塔光流算法。完整的使用步骤包括创建、执行、销毁三步。该算法前半部分使用智能视觉深度学习处理器,而后半部分为串行运算需要使用处理器,因此对于PCIe模式,建议使能处理器进行加速,具体步骤参考第5章节。 创建 _____ @@ -19,6 +19,11 @@ _____ int winH = 21, int maxLevel = 3); +**处理器型号支持:** + +该接口仅支持BM1684。 + + **输入参数说明:** * bm_handle_t handle @@ -174,7 +179,7 @@ ___________ } ret = bmcv_open_cpu_process(handle); if (ret != BM_SUCCESS) { - printf("BMCV enable CPU failed. ret = %d\n", ret); + printf("BMCV enable Processor failed. ret = %d\n", ret); bm_dev_free(handle); return -1; } @@ -211,7 +216,7 @@ ___________ bm_image_destroy(nextImg); ret = bmcv_close_cpu_process(handle); if (ret != BM_SUCCESS) { - printf("BMCV disable CPU failed. ret = %d\n", ret); + printf("BMCV disable Processor failed. ret = %d\n", ret); bm_dev_free(handle); return -1; } diff --git a/bmvid/document/bmcv/source_zh/api/matmul.rst b/bmvid/document/bmcv/source_zh/api/matmul.rst index 8da6415..917ca99 100644 --- a/bmvid/document/bmcv/source_zh/api/matmul.rst +++ b/bmvid/document/bmcv/source_zh/api/matmul.rst @@ -48,6 +48,9 @@ bmcv_matmul float alpha = 1, float beta = 0); +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 **输入参数说明:** diff --git a/bmvid/document/bmcv/source_zh/api/min_max.rst b/bmvid/document/bmcv/source_zh/api/min_max.rst index d53fd9f..a797493 100644 --- a/bmvid/document/bmcv/source_zh/api/min_max.rst +++ b/bmvid/document/bmcv/source_zh/api/min_max.rst @@ -14,6 +14,10 @@ bmcv_min_max float *maxVal, int len); +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **输入参数说明:** diff --git a/bmvid/document/bmcv/source_zh/api/morph.rst b/bmvid/document/bmcv/source_zh/api/morph.rst index a01f721..ce61879 100644 --- a/bmvid/document/bmcv/source_zh/api/morph.rst +++ b/bmvid/document/bmcv/source_zh/api/morph.rst @@ -13,6 +13,10 @@ bmcv_image_morph 函数通过传入所需 Kernel 的大小和形状,返回对应的 Device Memory 给后面的形态学运算接口使用,用户应用程序的最后需要用户手动释放该空间。 +**处理器型号支持:** + +该接口仅支持BM1684。 + **接口形式:** diff --git a/bmvid/document/bmcv/source_zh/api/mosaic.rst b/bmvid/document/bmcv/source_zh/api/mosaic.rst index e0a2bed..8e0c93b 100644 --- a/bmvid/document/bmcv/source_zh/api/mosaic.rst +++ b/bmvid/document/bmcv/source_zh/api/mosaic.rst @@ -1,7 +1,14 @@ bmcv_image_mosaic ========================= + 该接口用于在图像上打一个或多个马赛克。 + +**处理器型号支持:** + +该接口仅支持BM1684X。 + + **接口形式:** .. code-block:: c @@ -67,9 +74,7 @@ bmcv_image_mosaic **注意事项:** -1. bm1684x要求如下: - -- 输入和输出的数据类型必须为: +1. 输入和输出的数据类型必须为: +-----+-------------------------------+ | num | data_type | @@ -107,12 +112,10 @@ bmcv_image_mosaic 如果不满足输入输出格式要求,则返回失败。 -2. bm1684部分:bm1684不支持马赛克功能。 - -3. 输入输出所有 bm_image 结构必须提前创建,否则返回失败。 +2. 输入输出所有 bm_image 结构必须提前创建,否则返回失败。 -4. 如果马赛克宽高非8对齐,则会自动向上8对齐,若在边缘区域,则8对齐时会往非边缘方向延展。 +3. 如果马赛克宽高非8对齐,则会自动向上8对齐,若在边缘区域,则8对齐时会往非边缘方向延展。 -5. 如果马赛克区域超出原图宽高,超出部分会自动贴到原图边缘。 +4. 如果马赛克区域超出原图宽高,超出部分会自动贴到原图边缘。 -6. 仅支持8x8以上的马赛克尺寸。 +5. 仅支持8x8以上的马赛克尺寸。 diff --git a/bmvid/document/bmcv/source_zh/api/nms.rst b/bmvid/document/bmcv/source_zh/api/nms.rst index d24bc91..da8180a 100644 --- a/bmvid/document/bmcv/source_zh/api/nms.rst +++ b/bmvid/document/bmcv/source_zh/api/nms.rst @@ -3,6 +3,10 @@ bmcv_nms 该接口用于消除网络计算得到过多的物体框,并找到最佳物体框。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **接口形式:** diff --git a/bmvid/document/bmcv/source_zh/api/nms_ext.rst b/bmvid/document/bmcv/source_zh/api/nms_ext.rst index ec30515..262f062 100644 --- a/bmvid/document/bmcv/source_zh/api/nms_ext.rst +++ b/bmvid/document/bmcv/source_zh/api/nms_ext.rst @@ -4,6 +4,11 @@ bmcv_nms_ext 该接口是bmcv_nms接口的广义形式,支持Hard_NMS/Soft_NMS/Adaptive_NMS/SSD_NMS,用于消除网络计算得到过多的物体框,并找到最佳物体框。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_zh/api/put_text.rst b/bmvid/document/bmcv/source_zh/api/put_text.rst index bb2acf2..43d7a96 100644 --- a/bmvid/document/bmcv/source_zh/api/put_text.rst +++ b/bmvid/document/bmcv/source_zh/api/put_text.rst @@ -3,6 +3,10 @@ bmcv_image_put_text 可以实现在一张图像上写字的功能(英文),并支持指定字的颜色、大小和宽度。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **接口形式:** diff --git a/bmvid/document/bmcv/source_zh/api/pyramid.rst b/bmvid/document/bmcv/source_zh/api/pyramid.rst index 0b4f662..a4432b2 100644 --- a/bmvid/document/bmcv/source_zh/api/pyramid.rst +++ b/bmvid/document/bmcv/source_zh/api/pyramid.rst @@ -3,6 +3,11 @@ bmcv_image_pyramid_down 该接口实现图像高斯金字塔操作中的向下采样。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c @@ -85,7 +90,7 @@ bmcv_image_pyramid_down gettimeofday_(&t1); bmcv_image_pyramid_down(handle, img_i, img_o); gettimeofday_(&t2); - cout << "pyramid down TPU using time: " << ((t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec) << "us" << endl; + cout << "pyramid down Tensor Computing Processor using time: " << ((t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec) << "us" << endl; bm_image_copy_device_to_host(img_o, (void **)(&output)); bm_image_destroy(img_i); diff --git a/bmvid/document/bmcv/source_zh/api/quantify.rst b/bmvid/document/bmcv/source_zh/api/quantify.rst new file mode 100644 index 0000000..1fe9925 --- /dev/null +++ b/bmvid/document/bmcv/source_zh/api/quantify.rst @@ -0,0 +1,149 @@ +bmcv_image_quantify +==================== + +将float类型数据转化成int类型(舍入模式为小数点后直接截断),并将小于0的数变为0,大于255的数变为255。 + + +**处理器型号支持:** + +该接口仅支持BM1684X。 + + +**接口形式:** + + .. code-block:: c + + bm_status_t bmcv_image_quantify( + bm_handle_t handle, + bm_image input, + bm_image output); + + +**参数说明:** + +* bm_handle_t handle + + 输入参数。 bm_handle 句柄。 + +* bm_image input + + 输入参数。输入图像的 bm_image,bm_image 需要外部调用 bmcv_image_create 创建。image 内存可以使用 bm_image_alloc_dev_mem 或者 bm_image_copy_host_to_device 来开辟新的内存,或者使用 bmcv_image_attach 来 attach 已有的内存。 + +* bm_image output + + 输出参数。输出 bm_image,bm_image 需要外部调用 bmcv_image_create 创建。image 内存可以通过 bm_image_alloc_dev_mem 来开辟新的内存,或者使用 bmcv_image_attach 来 attach 已有的内存。如果不主动分配将在 api 内部进行自行分配。 + + +**返回值说明:** + +* BM_SUCCESS: 成功 + +* 其他:失败 + + +**格式支持:** + +该接口目前支持以下 image_format: + ++-----+------------------------+------------------------+ +| num | input image_format | output image_format | ++=====+========================+========================+ +| 1 | FORMAT_RGB_PLANAR | FORMAT_RGB_PLANAR | ++-----+------------------------+------------------------+ +| 2 | FORMAT_BGR_PLANAR | FORMAT_BGR_PLANAR | ++-----+------------------------+------------------------+ + + +输入数据目前支持以下 data_type: + ++-----+--------------------------------+ +| num | data_type | ++=====+================================+ +| 1 | DATA_TYPE_EXT_FLOAT32 | ++-----+--------------------------------+ + +输出数据目前支持以下 data_type: + ++-----+--------------------------------+ +| num | data_type | ++=====+================================+ +| 1 | DATA_TYPE_EXT_1N_BYTE | ++-----+--------------------------------+ + + +**注意事项:** + +1. 在调用该接口之前必须确保输入的 image 内存已经申请。 + +2. 如调用该接口的程序为多线程程序,需要在创建bm_image前和销毁bm_image后加线程锁。 + +3. 该接口支持图像宽高范围为1x1~8192x8192。 + +**代码示例:** + + .. code-block:: c + + + //pthread_mutex_t lock; + static void read_bin(const char *input_path, float *input_data, int width, int height) { + FILE *fp_src = fopen(input_path, "rb"); + if (fp_src == NULL) + { + printf("无法打开输出文件 %s\n", input_path); + return; + } + if(fread(input_data, sizeof(float), width * height, fp_src) != 0) + printf("read image success\n"); + fclose(fp_src); + } + + static int quantify_tpu(float* input, unsigned char* output, int height, int width, bm_handle_t handle) { + bm_image input_img; + bm_image output_img; + //pthread_mutex_lock(&lock); + bm_image_create(handle, height, width, (bm_image_format_ext)FORMAT_RGB_PLANAR, DATA_TYPE_EXT_FLOAT32, &input_img, NULL); + bm_image_create(handle, height, width, (bm_image_format_ext)FORMAT_RGB_PLANAR, DATA_TYPE_EXT_1N_BYTE, &output_img, NULL); + bm_image_alloc_dev_mem(input_img, 1); + bm_image_alloc_dev_mem(output_img, 1); + float* in_ptr[1] = {input}; + bm_image_copy_host_to_device(input_img, (void **)in_ptr); + bmcv_image_quantify(handle, input_img, output_img); + unsigned char* out_ptr[1] = {output}; + bm_image_copy_device_to_host(output_img, (void **)out_ptr); + bm_image_destroy(input_img); + bm_image_destroy(output_img); + //pthread_mutex_unlock(&lock); + return 0; + } + + int main(int argc, char* args[]) { + int width = 1920; + int height = 1080; + int dev_id = 0; + char *input_path = NULL; + char *output_path = NULL; + + bm_handle_t handle; + bm_status_t ret = bm_dev_request(&handle, 0); + if (ret != BM_SUCCESS) { + printf("Create bm handle failed. ret = %d\n", ret); + return -1; + } + + if (argc > 1) width = atoi(args[1]); + if (argc > 2) height = atoi(args[2]); + if (argc > 3) input_path = args[3]; + if (argc > 4) output_path = args[4]; + + float* input_data = (float*)malloc(width * height * 3 * sizeof(float)); + unsigned char* output_tpu = (unsigned char*)malloc(width * height * 3 * sizeof(unsigned char)); + + read_bin(input_path, input_data, width, height); + + int ret = quantify_tpu(input_data, output_tpu, height, width, handle); + + free(input_data); + free(output_tpu); + bm_dev_free(handle); + return ret; + diff --git a/bmvid/document/bmcv/source_zh/api/resize.rst b/bmvid/document/bmcv/source_zh/api/resize.rst index 49d3361..286ddc8 100644 --- a/bmvid/document/bmcv/source_zh/api/resize.rst +++ b/bmvid/document/bmcv/source_zh/api/resize.rst @@ -1,10 +1,14 @@ bmcv_image_resize ================= - 该接口用于实现图像尺寸的变化,如放大、缩小、抠图等功能。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_zh/api/sobel.rst b/bmvid/document/bmcv/source_zh/api/sobel.rst index b2f1fe0..cbbeb2d 100644 --- a/bmvid/document/bmcv/source_zh/api/sobel.rst +++ b/bmvid/document/bmcv/source_zh/api/sobel.rst @@ -3,6 +3,10 @@ bmcv_image_sobel 边缘检测Sobel算子。 +**处理器型号支持:** + +该接口仅支持BM1684。 + **接口形式:** diff --git a/bmvid/document/bmcv/source_zh/api/sort.rst b/bmvid/document/bmcv/source_zh/api/sort.rst index a571ae0..0e59db2 100644 --- a/bmvid/document/bmcv/source_zh/api/sort.rst +++ b/bmvid/document/bmcv/source_zh/api/sort.rst @@ -3,6 +3,11 @@ bmcv_sort 该接口可以实现浮点数据的排序(升序/降序),并且支持排序后可以得到原数据所对应的 index。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c @@ -82,7 +87,7 @@ bmcv_sort **示例代码** - + .. code-block:: c int data_cnt = 100; diff --git a/bmvid/document/bmcv/source_zh/api/storage_convert.rst b/bmvid/document/bmcv/source_zh/api/storage_convert.rst index 150cc77..3c5fc78 100644 --- a/bmvid/document/bmcv/source_zh/api/storage_convert.rst +++ b/bmvid/document/bmcv/source_zh/api/storage_convert.rst @@ -4,6 +4,11 @@ bmcv_image_storage_convert 该接口将源图像格式的对应的数据转换为目的图像的格式数据,并填充在目的图像关联的 device memory 中。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c @@ -244,9 +249,9 @@ bm1684x时,该API, int image_h = 1080; int image_w = 1920; bm_image src, dst; - bm_image_create(handle, image_h, image_w, FORMAT_NV12, + bm_image_create(handle, image_h, image_w, FORMAT_NV12, DATA_TYPE_EXT_1N_BYTE, &src); - bm_image_create(handle, image_h, image_w, FORMAT_BGR_PLANAR, + bm_image_create(handle, image_h, image_w, FORMAT_BGR_PLANAR, DATA_TYPE_EXT_1N_BYTE, &dst); std::shared_ptr y_ptr = std::make_shared( new u8[image_h * image_w]); diff --git a/bmvid/document/bmcv/source_zh/api/threshold.rst b/bmvid/document/bmcv/source_zh/api/threshold.rst index 9acd012..68b7d9f 100644 --- a/bmvid/document/bmcv/source_zh/api/threshold.rst +++ b/bmvid/document/bmcv/source_zh/api/threshold.rst @@ -4,6 +4,11 @@ bmcv_image_threshold 图像阈值化操作。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_zh/api/transpose.rst b/bmvid/document/bmcv/source_zh/api/transpose.rst index a42045d..52de3ea 100644 --- a/bmvid/document/bmcv/source_zh/api/transpose.rst +++ b/bmvid/document/bmcv/source_zh/api/transpose.rst @@ -3,6 +3,12 @@ bmcv_image_transpose 该接口可以实现图片宽和高的转置。 + +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_zh/api/vpp_basic.rst b/bmvid/document/bmcv/source_zh/api/vpp_basic.rst index 7d5fdc1..481f274 100644 --- a/bmvid/document/bmcv/source_zh/api/vpp_basic.rst +++ b/bmvid/document/bmcv/source_zh/api/vpp_basic.rst @@ -2,7 +2,7 @@ bmcv_image_vpp_basic ========================= -bm1684和bm1684x 上有专门的视频后处理模块VPP,在满足一定条件下可以一次实现 crop、color-space-convert、resize 以及 padding 功能,速度比 TPU 更快。 +bm1684和bm1684x 上有专门的视频后处理模块VPP,在满足一定条件下可以一次实现 crop、color-space-convert、resize 以及 padding 功能,速度比 Tensor Computing Processor 更快。 该 API 可以实现对多张图片的 crop、color-space-convert、resize、padding 及其任意若干个功能的组合。 .. code-block:: c @@ -20,6 +20,11 @@ bm1684和bm1684x 上有专门的视频后处理模块VPP,在满足一定条件 csc_matrix_t* matrix = NULL); +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **传入参数说明:** * bm_handle_t handle @@ -48,33 +53,33 @@ bm1684和bm1684x 上有专门的视频后处理模块VPP,在满足一定条件 .. code-block:: c - typedef struct bmcv_rect { + typedef struct bmcv_rect { int start_x; int start_y; int crop_w; - int crop_h; + int crop_h; } bmcv_rect_t; 每个输出 bm_image 对象所对应的在输入图像上 crop 的参数,包括起始点x坐标、起始点y坐标、crop图像的宽度以及crop图像的高度。图像左上顶点作为坐标原点。如果不使用 crop 功能可填 NULL。 * bmcv_padding_atrr_t* padding_attr = NULL - 输入参数。所有 crop 的目标小图在 dst image 中的位置信息以及要 padding 的各通道像素值,若不使用 padding 功能则设置为 NULL。 - - .. code-block:: c - - typedef struct bmcv_padding_atrr_s { - unsigned int dst_crop_stx; - unsigned int dst_crop_sty; - unsigned int dst_crop_w; - unsigned int dst_crop_h; - unsigned char padding_r; - unsigned char padding_g; - unsigned char padding_b; - int if_memset; - } bmcv_padding_atrr_t; - - + 输入参数。所有 crop 的目标小图在 dst image 中的位置信息以及要 padding 的各通道像素值,若不使用 padding 功能则设置为 NULL。 + + .. code-block:: c + + typedef struct bmcv_padding_atrr_s { + unsigned int dst_crop_stx; + unsigned int dst_crop_sty; + unsigned int dst_crop_w; + unsigned int dst_crop_h; + unsigned char padding_r; + unsigned char padding_g; + unsigned char padding_b; + int if_memset; + } bmcv_padding_atrr_t; + + 1. 目标小图的左上角顶点相对于 dst image 原点(左上角)的offset信息:dst_crop_stx 和 dst_crop_sty; #. 目标小图经resize后的宽高:dst_crop_w 和 dst_crop_h; @@ -85,7 +90,7 @@ bm1684和bm1684x 上有专门的视频后处理模块VPP,在满足一定条件 输入参数。resize 算法选择,包括 BMCV_INTER_NEAREST、BMCV_INTER_LINEAR 和 BMCV_INTER_BICUBIC三种,默认情况下是双线性差值。 - - bm1684 支持 : + - bm1684 支持 : BMCV_INTER_NEAREST,BMCV_INTER_LINEAR,BMCV_INTER_BICUBIC。 - bm1684x 支持: @@ -111,17 +116,17 @@ bm1684和bm1684x 上有专门的视频后处理模块VPP,在满足一定条件 | CSC_YPbPr2RGB_BT709 | +----------------------------+ | CSC_RGB2YPbPr_BT709 | -+----------------------------+ ++----------------------------+ | CSC_USER_DEFINED_MATRIX | -+----------------------------+ ++----------------------------+ | CSC_MAX_ENUM | -+----------------------------+ ++----------------------------+ * csc_matrix_t* matrix = NULL 输入参数。如果 csc_type 选择 CSC_USER_DEFINED_MATRIX,则需要传入系数矩阵,格式如下: - .. code-block:: c + .. code-block:: c typedef struct { int csc_coe00; @@ -281,6 +286,8 @@ bm1684支持的要求如下: | | RGBP_SEPARATE | 条件1 | | +---------------------+----------+ | | BGRP_SEPARATE | 条件1 | +| +---------------------+----------+ +| | ARGB_PACKED | 条件1 | +------------------+---------------------+----------+ | | RGB_PACKED | 条件1 | | +---------------------+----------+ @@ -305,6 +312,8 @@ bm1684支持的要求如下: | | RGBP_SEPARATE | 条件1 | | +---------------------+----------+ | | BGRP_SEPARATE | 条件1 | +| +---------------------+----------+ +| | ARGB_PACKED | 条件1 | +------------------+---------------------+----------+ | | RGB_PACKED | 条件1 | | +---------------------+----------+ @@ -342,6 +351,12 @@ bm1684支持的要求如下: | +---------------------+----------+ | | BGRP_SEPARATE | 条件1 | +------------------+---------------------+----------+ +| | RGB_PLANAR | 条件1 | +| +---------------------+----------+ +| ARGB_PACKED | RGB_PACKED | 条件1 | +| +---------------------+----------+ +| | ARGB_PACKED | 条件1 | ++------------------+---------------------+----------+ | GRAY | GRAY | 条件1 | +------------------+---------------------+----------+ | YUV420P | YUV420P | 条件2 | @@ -359,6 +374,8 @@ bm1684支持的要求如下: | RGBP_SEPARATE | | 条件3 | +------------------+ +----------+ | BGRP_SEPARATE | | 条件3 | ++------------------+ +----------+ +| ARGB_PACKED | | 条件3 | +------------------+---------------------+----------+ | | RGB_PACKED | 条件4 | | +---------------------+----------+ @@ -371,6 +388,8 @@ bm1684支持的要求如下: | | RGBP_SEPARATE | 条件4 | | +---------------------+----------+ | | BGRP_SEPARATE | 条件4 | +| +---------------------+----------+ +| | ARGB_PACKED | 条件4 | +------------------+---------------------+----------+ | | RGB_PACKED | 条件4 | | +---------------------+----------+ @@ -433,4 +452,4 @@ bm1684支持的要求如下: bm_image_attach(*compressed_image, src_plane_device); - + diff --git a/bmvid/document/bmcv/source_zh/api/vpp_convert.rst b/bmvid/document/bmcv/source_zh/api/vpp_convert.rst index 8242741..ae71821 100644 --- a/bmvid/document/bmcv/source_zh/api/vpp_convert.rst +++ b/bmvid/document/bmcv/source_zh/api/vpp_convert.rst @@ -16,6 +16,11 @@ bmcv_image_vpp_convert ); +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **传入参数说明:** * bm_handle_t handle @@ -92,7 +97,7 @@ bmcv_image_vpp_convert -**代码示例:** +**代码示例:** .. code-block:: c @@ -106,14 +111,14 @@ bmcv_image_vpp_convert #include "stdlib.h" #include #include - + int main(int argc, char *argv[]) { bm_handle_t handle; int image_h = 1080; int image_w = 1920; bm_image src, dst[4]; bm_dev_request(&handle, 0); - bm_image_create(handle, image_h, image_w, FORMAT_NV12, + bm_image_create(handle, image_h, image_w, FORMAT_NV12, DATA_TYPE_EXT_1N_BYTE, &src); bm_image_alloc_dev_mem(src, 1); for (int i = 0; i < 4; i++) { @@ -131,21 +136,21 @@ bmcv_image_vpp_convert memset((void *)(uv_ptr.get()), 158, image_h * image_w / 2); u8 *host_ptr[] = {y_ptr.get(), uv_ptr.get()}; bm_image_copy_host_to_device(src, (void **)host_ptr); - + bmcv_rect_t rect[] = {{0, 0, image_w / 2, image_h / 2}, {0, image_h / 2, image_w / 2, image_h / 2}, {image_w / 2, 0, image_w / 2, image_h / 2}, {image_w / 2, image_h / 2, image_w / 2, image_h / 2}}; - + bmcv_image_vpp_convert(handle, 4, src, dst, rect); - + for (int i = 0; i < 4; i++) { bm_image_destroy(dst[i]); } - + bm_image_destroy(src); bm_dev_free(handle); return 0; } - + diff --git a/bmvid/document/bmcv/source_zh/api/vpp_convert_padding.rst b/bmvid/document/bmcv/source_zh/api/vpp_convert_padding.rst index 985e376..19c6ede 100644 --- a/bmvid/document/bmcv/source_zh/api/vpp_convert_padding.rst +++ b/bmvid/document/bmcv/source_zh/api/vpp_convert_padding.rst @@ -17,6 +17,12 @@ bmcv_image_vpp_convert_padding bmcv_rect_t * crop_rect = NULL, bmcv_resize_algorithm algorithm = BMCV_INTER_LINEAR); + +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **传入参数说明:** * bm_handle_t handle diff --git a/bmvid/document/bmcv/source_zh/api/vpp_csc_matrix_convert.rst b/bmvid/document/bmcv/source_zh/api/vpp_csc_matrix_convert.rst index d786b64..8662646 100644 --- a/bmvid/document/bmcv/source_zh/api/vpp_csc_matrix_convert.rst +++ b/bmvid/document/bmcv/source_zh/api/vpp_csc_matrix_convert.rst @@ -14,6 +14,12 @@ bmcv_image_vpp_csc_matrix_convert csc_matrix_t * matrix = nullptr, bmcv_resize_algorithm algorithm = BMCV_INTER_LINEAR); + +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **传入参数说明:** * bm_handle_t handle @@ -122,7 +128,7 @@ bmcv_image_vpp_csc_matrix_convert 3. 如果csc == CSC_USER_DEFINED_MATRIX而matrix为nullptr,则返回失败。 -**代码示例:** +**代码示例:** .. code-block:: c @@ -136,14 +142,14 @@ bmcv_image_vpp_csc_matrix_convert #include "stdlib.h" #include #include - + int main(int argc, char *argv[]) { bm_handle_t handle; int image_h = 1080; int image_w = 1920; bm_image src, dst[4]; bm_dev_request(&handle, 0); - bm_image_create(handle, image_h, image_w, FORMAT_NV12, + bm_image_create(handle, image_h, image_w, FORMAT_NV12, DATA_TYPE_EXT_1N_BYTE, &src); bm_image_alloc_dev_mem(src, 1); for (int i = 0; i < 4; i++) { @@ -161,21 +167,21 @@ bmcv_image_vpp_csc_matrix_convert memset((void *)(uv_ptr.get()), 158, image_h * image_w / 2); u8 *host_ptr[] = {y_ptr.get(), uv_ptr.get()}; bm_image_copy_host_to_device(src, (void **)host_ptr); - + bmcv_rect_t rect[] = {{0, 0, image_w / 2, image_h / 2}, {0, image_h / 2, image_w / 2, image_h / 2}, {image_w / 2, 0, image_w / 2, image_h / 2}, {image_w / 2, image_h / 2, image_w / 2, image_h / 2}}; - + bmcv_image_vpp_csc_matrix_convert(handle, 4, src, dst, CSC_YCbCr2RGB_BT601); - + for (int i = 0; i < 4; i++) { bm_image_destroy(dst[i]); } - + bm_image_destroy(src); bm_dev_free(handle); return 0; } - + diff --git a/bmvid/document/bmcv/source_zh/api/vpp_stitch.rst b/bmvid/document/bmcv/source_zh/api/vpp_stitch.rst index 36ea17c..3951fdc 100644 --- a/bmvid/document/bmcv/source_zh/api/vpp_stitch.rst +++ b/bmvid/document/bmcv/source_zh/api/vpp_stitch.rst @@ -15,6 +15,12 @@ bmcv_image_vpp_stitch bmcv_rect_t* src_crop_rect = NULL, bmcv_resize_algorithm algorithm = BMCV_INTER_LINEAR); + +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **传入参数说明:** * bm_handle_t handle diff --git a/bmvid/document/bmcv/source_zh/api/warp_affine.rst b/bmvid/document/bmcv/source_zh/api/warp_affine.rst index 47dc672..17cef4e 100644 --- a/bmvid/document/bmcv/source_zh/api/warp_affine.rst +++ b/bmvid/document/bmcv/source_zh/api/warp_affine.rst @@ -40,6 +40,11 @@ bmcv_affine_matrix 定义了一个坐标变换矩阵,其顺序为 float m[6] = } bmcv_affine_image_matrix; +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式一:** .. code-block:: c @@ -65,7 +70,7 @@ bmcv_affine_matrix 定义了一个坐标变换矩阵,其顺序为 float m[6] = bm_image* output, int use_bilinear = 0 ); -本接口是对齐opencv仿射变换的接口。 +本接口是对齐opencv仿射变换的接口, 该矩阵是从输入图像坐标推导输出图像坐标的系数矩阵。 **输入参数说明** diff --git a/bmvid/document/bmcv/source_zh/api/warp_perspective.rst b/bmvid/document/bmcv/source_zh/api/warp_perspective.rst index 3f72916..75f5f0b 100644 --- a/bmvid/document/bmcv/source_zh/api/warp_perspective.rst +++ b/bmvid/document/bmcv/source_zh/api/warp_perspective.rst @@ -4,6 +4,7 @@ bmcv_image_warp_perspective 该接口实现图像的透射变换,又称投影变换或透视变换。透射变换将图片投影到一个新的视平面,是一种二维坐标 (:math:`x_0` , :math:`y_0`) 到二维坐标(:math:`x` , :math:`y`)的非线性变换,该接口的实现是针对输出图像的每一个像素点坐标得到对应输入图像的坐标,然后构成一幅新的图像,其数学表达式形式如下: + .. math:: \left\{ @@ -45,6 +46,10 @@ bmcv_image_warp_perspective :align: center +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + **接口形式一:** diff --git a/bmvid/document/bmcv/source_zh/api/watermask_superpose.rst b/bmvid/document/bmcv/source_zh/api/watermask_superpose.rst index 01a8b8a..b8ad67f 100644 --- a/bmvid/document/bmcv/source_zh/api/watermask_superpose.rst +++ b/bmvid/document/bmcv/source_zh/api/watermask_superpose.rst @@ -1,7 +1,14 @@ bmcv_image_watermark_superpose ========================= + 该接口用于在图像上叠加一个或多个水印。 + +**处理器型号支持:** + +该接口仅支持BM1684X。 + + **接口形式一:** .. code-block:: c @@ -150,10 +157,8 @@ bmcv_image_watermark_superpose 如果不满足输入输出格式要求,则返回失败。 -2. bm1684部分:bm1684不支持水印功能。 - -3. 输入输出所有 bm_image 结构必须提前创建,否则返回失败。 +2. 输入输出所有 bm_image 结构必须提前创建,否则返回失败。 -4. 水印数量最多可设置512个。 +3. 水印数量最多可设置512个。 -5. 如果水印区域超出原图宽高,会返回失败。 +4. 如果水印区域超出原图宽高,会返回失败。 diff --git a/bmvid/document/bmcv/source_zh/api/yolo_nms.rst b/bmvid/document/bmcv/source_zh/api/yolo_nms.rst index 5fb8e3e..72fd9ba 100644 --- a/bmvid/document/bmcv/source_zh/api/yolo_nms.rst +++ b/bmvid/document/bmcv/source_zh/api/yolo_nms.rst @@ -4,6 +4,11 @@ bmcv_nms_yolo 该接口目前支持yolov3/yolov7,用于消除网络计算得到过多的物体框,并找到最佳物体框。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_zh/api/yuv2bgr.rst b/bmvid/document/bmcv/source_zh/api/yuv2bgr.rst index 72fb133..0a254a8 100644 --- a/bmvid/document/bmcv/source_zh/api/yuv2bgr.rst +++ b/bmvid/document/bmcv/source_zh/api/yuv2bgr.rst @@ -3,6 +3,12 @@ bmcv_image_yuv2bgr_ext 该接口实现YUV格式到RGB格式的转换。 + +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c @@ -55,18 +61,18 @@ bmcv_image_yuv2bgr_ext #include "stdlib.h" #include "string.h" #include - + int main(int argc, char *argv[]) { bm_handle_t handle; bm_dev_request(&handle, 0); - + int image_n = 1; int image_h = 1080; int image_w = 1920; bm_image src, dst; - bm_image_create(handle, image_h, image_w, FORMAT_NV12, + bm_image_create(handle, image_h, image_w, FORMAT_NV12, DATA_TYPE_EXT_1N_BYTE, &src); - bm_image_create(handle, image_h, image_w, FORMAT_BGR_PLANAR, + bm_image_create(handle, image_h, image_w, FORMAT_BGR_PLANAR, DATA_TYPE_EXT_1N_BYTE, &dst); std::shared_ptr y_ptr = std::make_shared( new u8[image_h * image_w]); diff --git a/bmvid/document/bmcv/source_zh/api/yuv2hsv.rst b/bmvid/document/bmcv/source_zh/api/yuv2hsv.rst index 719be02..b009006 100644 --- a/bmvid/document/bmcv/source_zh/api/yuv2hsv.rst +++ b/bmvid/document/bmcv/source_zh/api/yuv2hsv.rst @@ -4,6 +4,11 @@ bmcv_image_yuv2hsv 对YUV图像的指定区域转为HSV格式。 +**处理器型号支持:** + +该接口支持BM1684/BM1684X。 + + **接口形式:** .. code-block:: c diff --git a/bmvid/document/bmcv/source_zh/bm_image/bm_image_write_to_bmp.rst b/bmvid/document/bmcv/source_zh/bm_image/bm_image_write_to_bmp.rst new file mode 100644 index 0000000..7dc63fd --- /dev/null +++ b/bmvid/document/bmcv/source_zh/bm_image/bm_image_write_to_bmp.rst @@ -0,0 +1,34 @@ +bm_image_write_to_bmp +===================== + +该接口用于将bm_image对象输出为位图(.bmp)。 + + +**接口形式:** + + .. code-block:: c + + bm_status_t bm_image_write_to_bmp( + bm_image input, + const char* filename); + +**参数说明:** + +* bm_image input + +输入参数。输入 bm_image。 + +* const char\* filename + +输入参数。保存的位图文件路径以及文件名称。 + + +**返回值说明:** + +* BM_SUCCESS: 成功 + +* 其他:失败 + +**注意事项:** + +1. 在调用 bm_image_write_to_bmp()之前必须确保输入的 image 已被正确创建并保证is_attached,否则该函数将返回失败。 diff --git a/bmvid/document/bmcv/source_zh/bm_image/bmcv_calc_cbcr_addr.rst b/bmvid/document/bmcv/source_zh/bm_image/bmcv_calc_cbcr_addr.rst new file mode 100644 index 0000000..805b4da --- /dev/null +++ b/bmvid/document/bmcv/source_zh/bm_image/bmcv_calc_cbcr_addr.rst @@ -0,0 +1,61 @@ +bmcv_calc_cbcr_addr +=================== + +视频解码(Vdec)输出的压缩格式的地址时,可以通过 Y 压缩数据的物理地址,Y 通道数据的stride,以及原图的高,计算得出 CbCr 压缩数据 的物理地址。 +此接口主要用于匹配内部解码器的压缩格式。使用方法请看示例。 + +**接口形式:** + + .. code-block:: c + + unsigned long long bmcv_calc_cbcr_addr( + unsigned long long y_addr, + unsigned int y_stride, + unsigned int frame_height); + +**输入参数说明:** + +* unsigned long long y_addr + + 输入参数。Y 压缩数据的物理地址。 + +* unsigned int y_stride + + 输入参数。Y 压缩数据的stride。 + +* unsigned int frame_height + + 输入参数。Y 压缩数据的物理地址。 + + +**返回值说明:** + +返回值即为CbCr 压缩数据 的物理地址。 + + **示例代码** + + .. code-block:: c + + + bm_image src; + unsigned long long cbcr_addr; + bm_image_create(bm_handle, + pFrame->height, + pFrame->width, + FORMAT_COMPRESSED, + DATA_TYPE_EXT_1N_BYTE, + &src, + NULL); + bm_device_mem_t input_addr[4]; + int size = pFrame->height * pFrame->stride[4]; + input_addr[0] = bm_mem_from_device((unsigned long long)pFrame->buf[6], size); + size = (pFrame->height / 2) * pFrame->stride[5]; + input_addr[1] = bm_mem_from_device((unsigned long long)pFrame->buf[4], size); + size = pFrame->stride[6]; + input_addr[2] = bm_mem_from_device((unsigned long long)pFrame->buf[7], size); + size = pFrame->stride[7]; + cbcr_addr = bmcv_calc_cbcr_addr((unsigned long long)pFrame->buf[4], pFrame->stride[5], pFrame->height); + input_addr[3] = bm_mem_from_device(cbcr_addr, 0); + bm_image_attach(src, input_addr); + + diff --git a/bmvid/document/bmcv/source_zh/bmcv.rst b/bmvid/document/bmcv/source_zh/bmcv.rst index 438cf51..ab2f92e 100644 --- a/bmvid/document/bmcv/source_zh/bmcv.rst +++ b/bmvid/document/bmcv/source_zh/bmcv.rst @@ -1,4 +1,4 @@ BMCV 介绍 ========= -BMCV 提供了一套基于 Sophon AI 芯片优化的机器视觉库,通过利用芯片的 TPU 和 VPP 模块,可以完成色彩空间转换、尺度变换、仿射变换、透射变换、线性变换、画框、JPEG编解码、BASE64编解码、NMS、排序、特征匹配等操作。 +BMCV 提供了一套基于 SOPHON Deep learning 处理器优化的机器视觉库,通过利用处理器的 Tensor Computing Processor 和 VPP 模块,可以完成色彩空间转换、尺度变换、仿射变换、透射变换、线性变换、画框、JPEG编解码、BASE64编解码、NMS、排序、特征匹配等操作。 diff --git a/bmvid/document/bmcv/source_zh/index.rst b/bmvid/document/bmcv/source_zh/index.rst index e1ec2d1..7816c2c 100644 --- a/bmvid/document/bmcv/source_zh/index.rst +++ b/bmvid/document/bmcv/source_zh/index.rst @@ -48,6 +48,8 @@ bm_image 介绍 bm_image/bm_image_get_plane_num bm_image/bm_image_is_attached bm_image/bm_image_get_handle + bm_image/bm_image_write_to_bmp + bm_image/bmcv_calc_cbcr_addr bm_image device memory 管理 @@ -63,6 +65,7 @@ BMCV API :glob: api/api_introduct + api/bmcv_hist_balance api/yuv2bgr api/warp_affine api/warp_perspective @@ -124,6 +127,7 @@ BMCV API api/pyramid api/bayer2rgb api/as_strided + api/quantify PCIe CPU -------- diff --git a/bmvid/document/bmcv/source_zh/memory.rst b/bmvid/document/bmcv/source_zh/memory.rst index 5796e6f..29fd570 100644 --- a/bmvid/document/bmcv/source_zh/memory.rst +++ b/bmvid/document/bmcv/source_zh/memory.rst @@ -34,7 +34,7 @@ bm_image 结构需要关联相关 device memory,并且 device memory 中有你 2. bm_image 调用 bm_image_alloc_dev_mem 所申请的内存都由内部自动管理,在调用 bm_image_destroy、 bm_image_detach 或者 bm_image_attach 其他 device memory 时自动释放,无需调用者管理。相反,如果 bm_image_attach 一块 device memory 时,表示这块 memory 将由调用者自己管理。无论是 bm_image_destroy、bm_image_detach,或者再调用 bm_image_attach 其他 device memory,均不会释放,需要调用者手动释放。 -3. 目前 device memory 分为三块内存空间:heap0、heap1和heap2。三者的区别在于bm1684 芯片的硬件 VPP 模块是否有读取权限,其他完全相同,因此如果某一 API 需要指定使用bm1684 硬件VPP模块来实现,则必须保证该 API 的输入 bm_image 保存在 heap1 或者 heap2 空间上。 bm1684x vpp无此限制。 +3. 目前 device memory 分为三块内存空间:heap0、heap1和heap2。三者的区别在于bm1684 处理器的硬件 VPP 模块是否有读取权限,其他完全相同,因此如果某一 API 需要指定使用bm1684 硬件VPP模块来实现,则必须保证该 API 的输入 bm_image 保存在 heap1 或者 heap2 空间上。 bm1684x vpp无此限制。 +------------------+------------------+------------------+ | heap id | bm1684 VPP | bm1684x VPP | diff --git a/bmvid/document/bmcv/source_zh/pcie_cpu.rst b/bmvid/document/bmcv/source_zh/pcie_cpu.rst index 0ce912b..408d730 100644 --- a/bmvid/document/bmcv/source_zh/pcie_cpu.rst +++ b/bmvid/document/bmcv/source_zh/pcie_cpu.rst @@ -1,11 +1,11 @@ PCIe CPU ========== -对于不方便使用 TPU 加速的操作,需要 CPU 配合来完成。 +对于不方便使用 Tensor Computing Processor 加速的操作,需要 Processor 配合来完成。 -如果是 SoC 模式,host端即为片上的ARM A53处理器,由它来完成CPU操作。 +如果是 SoC 模式,host端即为片上的ARM A53处理器,由它来完成Processor操作。 -如果是 PCIe 模式,host端为用户的主机,CPU 操作可以选择在host端完成,也可以使用片上的ARM A53处理器来完成。两种实现方式各有优缺点:前者需要在device和host之间搬运输入输出数据,但运算性能可能优于ARM,所以用户可以根据自身host处理器性能、负载等实际情况选择最优的方式。默认情况下为前者,如果需要使用片上处理器可按照以下方式开启。 +如果是 PCIe 模式,host端为用户的主机,Processor 操作可以选择在host端完成,也可以使用片上的ARM A53处理器来完成。两种实现方式各有优缺点:前者需要在device和host之间搬运输入输出数据,但运算性能可能优于ARM,所以用户可以根据自身host处理器性能、负载等实际情况选择最优的方式。默认情况下为前者,如果需要使用片上处理器可按照以下方式开启。 准备工作 @@ -21,11 +21,11 @@ ________ $ export BMCV_CPU_KERNEL_PATH=/path/to/kernel_fils/ -BMCV所有需要CPU操作的实现均在库 libbmcv_cpu_func.so 中,需要将该文件所在路径添加到程序运行的环境变量 BMCV_CPU_LIB_PATH 中,如下: +BMCV所有需要Processor操作的实现均在库 libbmcv_cpu_func.so 中,需要将该文件所在路径添加到程序运行的环境变量 BMCV_CPU_LIB_PATH 中,如下: $ export BMCV_CPU_LIB_PATH=/path/to/lib/ -目前需要CPU参与实现的API如下所示,如果没有使用以下API可忽略该功能。 +目前需要Processor参与实现的API如下所示,如果没有使用以下API可忽略该功能。 +-----+-----------------------------------+ | num | API | diff --git a/bmvid/example/bm_test.c b/bmvid/example/bm_test.c index 68bcc55..95bdb82 100644 --- a/bmvid/example/bm_test.c +++ b/bmvid/example/bm_test.c @@ -1,19 +1,17 @@ -//--=========================================================================-- -// This file is a part of VPUAPI -//----------------------------------------------------------------------------- -// -// This confidential and proprietary software may be used only -// as authorized by a licensing agreement from Chips&Media Inc. -// In the event of publication, the following notice is applicable: -// -// (C) COPYRIGHT 2004 - 2014 CHIPS&MEDIA INC. -// ALL RIGHTS RESERVED -// -// The entire notice above must be reproduced on all authorized -// copies. -// -//----------------------------------------------------------------------------- +/***************************************************************************** + * + * Copyright (C) 2022 Sophgo Technologies Inc. All rights reserved. + * + * bmvid is licensed under the 2-Clause BSD License except for the + * third-party components. + * + *****************************************************************************/ +/* This library provides a high-level interface for controlling the BitMain + * Sophon VPU en/decoder. + */ #include +#include +#include #include #ifdef __linux__ #include @@ -26,25 +24,45 @@ #include "windows/libusb-1.0.18/examples/getopt/getopt.h" #endif -#include "bm_video_interface.h" -#include "bm_video_internal.h" +/* + * vpuapifunc.h is for the compatibility with some operations on Windows and Linux, such as create thread, get time. + * util.h is for md5 calculate. + * + * user can use their own tools rather than these headers. + */ #include "vpuapifunc.h" -#include "main_helper.h" +#include "util.h" +#include "bm_vpudec_interface.h" +#include "bmlib_runtime.h" + +#define VPU_ALIGN16(_x) (((_x)+0x0f)&~0x0f) +#define VPU_ALIGN32(_x) (((_x)+0x1f)&~0x1f) +#define VPU_ALIGN256(_x) (((_x)+0xff)&~0xff) +#define VPU_ALIGN4096(_x) (((_x)+0xfff)&~0xfff) + +#define HEAP_MASK 0x07 +#define INTERVAL (10) #define defaultReadBlockLen 0x80000 +#define BM_VPU_DEC_LITTLE_ENDIAN 0 int readBlockLen = defaultReadBlockLen; int injectionPercent = 0; int injectLost = 1; // default as lost data, or scramble the data int injectWholeBlock = 0; typedef struct BMTestConfig_struct { - int streamFormat; + int run_times; int compareType; int compareNum; int instanceNum; - int wirteYuv; + int cbcr_interleave; + int nv21; + int writeYuv; + BmVpuDecBitStreamMode bsMode; + BmVpuDecOutputMapType wtlFormat; + BmVpuDecStreamFormat streamFormat; #ifdef BM_PCIE_MODE int pcie_board_id; #endif @@ -52,14 +70,25 @@ typedef struct BMTestConfig_struct { int log_level; int compare_skip_num; int first_comp_idx; - - FrameBufferFormat wtlFormat; + int decode_order; + int width; + int height; + + int mem_alloc_type; + int extraFrame; + int min_frame_cnt; + int frame_delay; BMVidCodHandle vidCodHandle; char outputPath[MAX_FILE_PATH]; char inputPath[MAX_FILE_PATH]; char refYuvPath[MAX_FILE_PATH]; + osal_file_t* fpIn; } BMTestConfig; +uint64_t count_enc[MAX_NUM_INSTANCE * MAX_NUM_VPU_CORE]; +double fps_enc[MAX_NUM_INSTANCE * MAX_NUM_VPU_CORE]; +int g_exit_flag = 0; + int my_memcmp(const void* src, const void* dst, int size) { u64 *src_64, *dst_64; @@ -96,52 +125,127 @@ int my_memcmp(const void* src, const void* dst, int size) return 0; } -int ret = 0; +int global_ret = 0; static int parse_args(int argc, char **argv, BMTestConfig* par); +static void stat_pthread(void *arg) +{ + int thread_num = *(int*)arg; + int i = 0; + uint64_t last_count_enc[MAX_NUM_INSTANCE * MAX_NUM_VPU_CORE] = {0}; + uint64_t last_count_sum = 0; + int dis_mode = 0; + char *display = getenv("BMVPUDEC_DISPLAY_FRAMERATE"); + if (display) dis_mode = atoi(display); + printf("BMVPUDEC_DISPLAY_FRAMERATE=%d thread_num=%d g_exit_flag=%d \n", dis_mode, thread_num, g_exit_flag); + while(!g_exit_flag) { +#ifdef __linux__ + sleep(INTERVAL); +#elif _WIN32 + Sleep(INTERVAL*1000); +#endif + if (dis_mode == 1) { + for (i = 0; i < thread_num; i++) { + if (i == 0) { + printf("ID[%d], FRM[%10lld], FPS[%2.2lf]\n", + i, (long long)count_enc[i], ((double)(count_enc[i]-last_count_enc[i]))/INTERVAL); + } else { + printf("ID[%d] , FRM[%10lld], FPS[%2.2lf] \n", + i, (long long)count_enc[i], ((double)(count_enc[i]-last_count_enc[i]))/INTERVAL); + } + last_count_enc[i] = count_enc[i]; + } + } else { + uint64_t count_sum = 0; + for (i = 0; i < thread_num; i++) + count_sum += count_enc[i]; + printf("thread %d, frame %ld, fps %2.2f\n", thread_num, count_sum, ((double)(count_sum-last_count_sum))/INTERVAL); + last_count_sum = count_sum; + } + osal_fflush(stdout); + } + printf("stat_pthread over.\n"); + return; +} + +static void process_frame(uint8_t *buf0, uint8_t *buf1, uint8_t *buf2, int stride, + int width, int height, int is_interleave) +{ + int i; + // Set strided part to 0 to make md5 stable + if(stride != width) { + for (i = 0; i < height; i ++) { + memset(buf0 + stride * i + width , 0, stride - width); + } + if (0 == is_interleave) { + for (i = 0; i < height / 2; i ++) { + memset(buf1 + stride * i / 2 + width / 2 , 0, (stride - width) / 2); + } + for (i = 0; i < height / 2; i ++) { + memset(buf2 + stride * i / 2 + width / 2 , 0, (stride - width) / 2); + } + } else { + for (i = 0; i < height / 4; i ++) { + memset(buf1 + stride * i + width , 0, (stride - width)); + } + } + } +} + static void process_output(void* arg) { BMTestConfig *testConfigPara = (BMTestConfig *)arg; - BMVidCodHandle vidCodHandle = (BMVidCodHandle)testConfigPara->vidCodHandle; - BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; + BMVidCodHandle vidHandle = (BMVidCodHandle)testConfigPara->vidCodHandle; #ifdef BM_PCIE_MODE - DecHandle decHandle = vidHandle->codecInst; - int coreIdx = decHandle->coreIdx; + int coreIdx = bmvpu_dec_get_core_idx(vidHandle); #endif - BMVidFrame *pFrame=NULL; + BMVidFrame *pFrame = (BMVidFrame*)malloc(sizeof(BMVidFrame)); BOOL match, alloced = FALSE, result = TRUE; - osal_file_t* fpRef; - Uint8* pRefMem = NULL; - Int32 readLen = -1, compare = 0, compareNum = 1; - Uint32 frameSize = 0, ySize = 0, allocedSize = 0; - int total_frame = 0, cur_frame_idx = 0; - Uint64 start_time, dec_time; - VLOG(INFO, "Enter process_output!\n"); + osal_file_t *fpRef = NULL, fpOutput = NULL; + uint8_t* pRefMem = NULL; + int32_t readLen = -1, compare = 0, compareNum = 1; + uint32_t frameSize = 0, ySize = 0, allocedSize = 0; + int total_frame = 0, cur_frame_idx = 0, ret = 0; + uint64_t start_time, dec_time; + printf("Enter process_output!\n"); #ifdef BM_PCIE_MODE - int frame_write_num = testConfigPara->wirteYuv; + int frame_write_num = testConfigPara->writeYuv; int writeYuv = 0; - writeYuv = testConfigPara->wirteYuv; + writeYuv = testConfigPara->writeYuv; #endif - if (testConfigPara->refYuvPath) + if (strcmp(testConfigPara->refYuvPath, "")) { fpRef = osal_fopen(testConfigPara->refYuvPath, "rb"); if(fpRef == NULL) { - VLOG(ERR, "Can't open reference yuv file: %s\n", testConfigPara->refYuvPath); + fprintf(stderr, "Can't open reference yuv file: %s\n", testConfigPara->refYuvPath); } else { compare = testConfigPara->compareType; } } + + if (strcmp(testConfigPara->outputPath, "")) + { + fpOutput = osal_fopen(testConfigPara->outputPath, "wb"); + if(fpOutput == NULL) + { + fprintf(stderr, "Can't open output yuv file: %s\n", testConfigPara->outputPath); + } + } else { + fpOutput = NULL; + } + start_time = osal_gettime(); - while(BMVidGetStatus(vidHandle)<=BMDEC_CLOSE) + + while(bmvpu_dec_get_status(vidHandle) <= BMDEC_CLOSE) { - if((pFrame = BMVidDecGetOutput(vidHandle)) != NULL) + if(bmvpu_dec_get_output(vidHandle, pFrame) == 0) { - //VLOG(INFO, "get a frame, display index: %d\n", pFrame->frameIdx); + //printf("get a frame, display index: %d\n", pFrame->frameIdx); /* compare yuv if there is reference yum file */ if(compare) @@ -151,16 +255,16 @@ static void process_output(void* arg) if((alloced == TRUE) && (frameSize != allocedSize)) //if sequence change, free memory then malloc { - osal_free(pRefMem); + free(pRefMem); alloced = FALSE; } if(alloced == FALSE) { - pRefMem = osal_malloc(frameSize); + pRefMem = malloc(frameSize); if(pRefMem == NULL) { - VLOG(ERR, "Can't alloc reference yuv memory\n"); + fprintf(stderr, "Can't alloc reference yuv memory\n"); break; } alloced = TRUE; @@ -183,8 +287,9 @@ static void process_output(void* arg) //printf("first compare frame idx : %d\n", testConfigPara->first_comp_idx); } dec_time = osal_gettime(); - if(dec_time - start_time != 0) - printf("Inst %d: fps %.2f, passed!\n", testConfigPara->instanceNum, (float)total_frame*1000/(dec_time-start_time)); + if(dec_time - start_time != 0) { + fps_enc[testConfigPara->instanceNum] = (float)total_frame*1000/(dec_time-start_time); + } start_time = dec_time; compareNum++; cur_frame_idx = 0; @@ -197,25 +302,26 @@ static void process_output(void* arg) //result = TRUE; //testConfigPara->result = TRUE; cur_frame_idx += 1; - BMVidDecClearOutput(vidHandle, pFrame); + bmvpu_dec_clear_output(vidHandle, pFrame); + count_enc[testConfigPara->instanceNum]++; continue; } - - osal_fseek(fpRef, frameSize*cur_frame_idx, SEEK_SET); + osal_fseek(fpRef, (long)frameSize*cur_frame_idx, SEEK_SET); readLen = osal_fread(pRefMem, 1, frameSize, fpRef); if(readLen == frameSize) { - Uint8 *buf0; - Uint8 *buf1; - Uint8 *buf2; + uint8_t *buf0 = NULL; + uint8_t *buf1 = NULL; + uint8_t *buf2 = NULL; #ifdef BM_PCIE_MODE if(pFrame->size > 0 && pFrame->size <= 8192*4096*3) { //optimize the framebuffer cdma copy. buf0 = malloc(pFrame->size); buf1 = buf0 + (unsigned int)(pFrame->buf[5] - pFrame->buf[4]); - buf2 = buf0 + (unsigned int)(pFrame->buf[6] - pFrame->buf[4]); + if(!testConfigPara->cbcr_interleave) + buf2 = buf0 + (unsigned int)(pFrame->buf[6] - pFrame->buf[4]); } else { buf0 = NULL; @@ -225,32 +331,43 @@ static void process_output(void* arg) result = FALSE; break; } - vdi_read_memory(coreIdx, (u64)(pFrame->buf[4]), buf0, pFrame->size, VDI_LITTLE_ENDIAN); + bmvpu_dec_read_memory(coreIdx, (u64)(pFrame->buf[4]), buf0, pFrame->size, BM_VPU_DEC_LITTLE_ENDIAN); #else buf0 = pFrame->buf[0]; buf1 = pFrame->buf[1]; - buf2 = pFrame->buf[2]; + if(!testConfigPara->cbcr_interleave) + buf2 = pFrame->buf[2]; #endif match = (memcmp(pRefMem, (void*)buf0, ySize) == 0 ? TRUE : FALSE); if (match == FALSE) { result = FALSE; - VLOG(ERR, "MISMATCH WITH GOLDEN Y in frame %d\n", cur_frame_idx); + fprintf(stderr, "MISMATCH WITH GOLDEN Y in frame %d\n", cur_frame_idx); } - match = (memcmp(pRefMem+ySize, (void*)buf1, ySize/4) == 0 ? TRUE : FALSE); - if (match == FALSE) + if(testConfigPara->cbcr_interleave) { - result = FALSE; - VLOG(ERR, "MISMATCH WITH GOLDEN U in frame %d\n", cur_frame_idx); - } + match = (memcmp(pRefMem+ySize, (void*)buf1, ySize/2) == 0 ? TRUE : FALSE); + if (match == FALSE) + { + result = FALSE; + fprintf(stderr, "MISMATCH WITH GOLDEN chroma in frame %d\n", cur_frame_idx); + } + } else { + match = (memcmp(pRefMem+ySize, (void*)buf1, ySize/4) == 0 ? TRUE : FALSE); + if (match == FALSE) + { + result = FALSE; + fprintf(stderr, "MISMATCH WITH GOLDEN U in frame %d\n", cur_frame_idx); + } - match = (memcmp(pRefMem+ySize*5/4, (void*)buf2, ySize/4) == 0 ? TRUE : FALSE); - if (match == FALSE) - { - result = FALSE; - VLOG(ERR, "MISMATCH WITH GOLDEN V in frame %d\n", cur_frame_idx); + match = (memcmp(pRefMem+ySize*5/4, (void*)buf2, ySize/4) == 0 ? TRUE : FALSE); + if (match == FALSE) + { + result = FALSE; + fprintf(stderr, "MISMATCH WITH GOLDEN V in frame %d\n", cur_frame_idx); + } } #ifdef BM_PCIE_MODE free(buf0); @@ -259,7 +376,7 @@ static void process_output(void* arg) else { result = FALSE; - VLOG(ERR, "NOT ENOUGH DATA\n"); + fprintf(stderr, "NOT ENOUGH DATA\n"); } } else if((compare == 2) && (result == TRUE)) //yuv md5 compare @@ -268,25 +385,29 @@ static void process_output(void* arg) char yMd5Str[33], uMd5Str[33], vMd5Str[33]; char yRefMd5Str[33], uRefMd5Str[33], vRefMd5Str[33]; int i; + if(cur_frame_idxfirst_comp_idx || (cur_frame_idx-testConfigPara->first_comp_idx)%(testConfigPara->compare_skip_num+1) != 0) { //result = TRUE; //testConfigPara->result = TRUE; cur_frame_idx += 1; - BMVidDecClearOutput(vidHandle, pFrame); + bmvpu_dec_clear_output(vidHandle, pFrame); + count_enc[testConfigPara->instanceNum]++; continue; } osal_fseek(fpRef, 99*cur_frame_idx, SEEK_SET); + + uint8_t *buf0 = NULL; + uint8_t *buf1 = NULL; + uint8_t *buf2 = NULL; #ifdef BM_PCIE_MODE - Uint8 *buf0 = NULL; - Uint8 *buf1; - Uint8 *buf2; if(pFrame->size > 0 && pFrame->size <= 8192*4096*3) { //optimize the framebuffer cdma copy. buf0 = malloc(pFrame->size); buf1 = buf0 + (unsigned int)(pFrame->buf[5] - pFrame->buf[4]); - buf2 = buf0 + (unsigned int)(pFrame->buf[6] - pFrame->buf[4]); + if(!testConfigPara->cbcr_interleave) + buf2 = buf0 + (unsigned int)(pFrame->buf[6] - pFrame->buf[4]); } else { buf0 = NULL; @@ -296,129 +417,144 @@ static void process_output(void* arg) result = FALSE; break; } - vdi_read_memory(coreIdx, (u64)(pFrame->buf[4]), buf0, pFrame->size, VDI_LITTLE_ENDIAN); + bmvpu_dec_read_memory(coreIdx, (u64)(pFrame->buf[4]), buf0, pFrame->size, BM_VPU_DEC_LITTLE_ENDIAN); +#else + buf0 = pFrame->buf[0]; + buf1 = pFrame->buf[1]; + if(!testConfigPara->cbcr_interleave) + buf2 = pFrame->buf[2]; +#endif + process_frame(buf0, buf1, buf2, pFrame->stride[0], + pFrame->width, pFrame->height, testConfigPara->cbcr_interleave); MD5(buf0, ySize, yMd); - MD5(buf1, ySize/4, uMd); - MD5(buf2, ySize/4, vMd); - + if(testConfigPara->cbcr_interleave) + { + MD5(buf1, ySize/2, uMd); + } else { + MD5(buf1, ySize/4, uMd); + MD5(buf2, ySize/4, vMd); + } +#ifdef BM_PCIE_MODE free(buf0); -#else - MD5((Uint8*)pFrame->buf[0], ySize, yMd); - MD5((Uint8*)pFrame->buf[1], ySize/4, uMd); - MD5((Uint8*)pFrame->buf[2], ySize/4, vMd); #endif - for(i=0; i<16; i++) { snprintf(yMd5Str + i*2, 2+1, "%02x", yMd[i]); snprintf(uMd5Str + i*2, 2+1, "%02x", uMd[i]); - snprintf(vMd5Str + i*2, 2+1, "%02x", vMd[i]); + if(!testConfigPara->cbcr_interleave) + snprintf(vMd5Str + i*2, 2+1, "%02x", vMd[i]); } readLen = osal_fread(pRefMem, 1, 99, fpRef); if(readLen == 99) { - match = (osal_memcmp(pRefMem, yMd5Str, 32) == 0 ? TRUE : FALSE); + match = (memcmp(pRefMem, yMd5Str, 32) == 0 ? TRUE : FALSE); if (match == FALSE) { snprintf(yRefMd5Str, 33, "%s", (char *)pRefMem); result = FALSE; - VLOG(ERR, "MISMATCH WITH GOLDEN Y in frame %d, %s, %s\n", cur_frame_idx, yMd5Str, yRefMd5Str); + fprintf(stderr, "MISMATCH WITH GOLDEN Y in frame %d, %s, %s\n", cur_frame_idx, yMd5Str, yRefMd5Str); } - match = (osal_memcmp(pRefMem+33, uMd5Str, 32) == 0 ? TRUE : FALSE); + match = (memcmp(pRefMem+33, uMd5Str, 32) == 0 ? TRUE : FALSE); if (match == FALSE) { snprintf(uRefMd5Str, 33, "%s", (char *)(pRefMem+33)); result = FALSE; - VLOG(ERR, "MISMATCH WITH GOLDEN U in frame %d, %s, %s\n", cur_frame_idx, uMd5Str, uRefMd5Str); + fprintf(stderr, "MISMATCH WITH GOLDEN U in frame %d, %s, %s\n", cur_frame_idx, uMd5Str, uRefMd5Str); } - match = (osal_memcmp(pRefMem+66, vMd5Str, 32) == 0 ? TRUE : FALSE); - if (match == FALSE) + if(!testConfigPara->cbcr_interleave) { - snprintf(vRefMd5Str, 33, "%s", (char *)(pRefMem+66)); - result = FALSE; - VLOG(ERR, "MISMATCH WITH GOLDEN V in frame %d, %s, %s\n", cur_frame_idx, vMd5Str, vRefMd5Str); + match = (memcmp(pRefMem+66, vMd5Str, 32) == 0 ? TRUE : FALSE); + if (match == FALSE) + { + snprintf(vRefMd5Str, 33, "%s", (char *)(pRefMem+66)); + result = FALSE; + fprintf(stderr, "MISMATCH WITH GOLDEN V in frame %d, %s, %s\n", cur_frame_idx, vMd5Str, vRefMd5Str); + } } } else { result = FALSE; - VLOG(ERR, "NOT ENOUGH DATA\n"); + fprintf(stderr, "NOT ENOUGH DATA\n"); } } if(result == FALSE && testConfigPara->result == TRUE) { int stream_size = 0x700000; int len = 0; - int core_idx = getcoreidx(vidHandle); - int inst_idx = BMVidVpuGetInstIdx(vidHandle); + int core_idx = bmvpu_dec_get_core_idx(vidHandle); + int inst_idx = bmvpu_dec_get_inst_idx(vidHandle); unsigned char *p_stream = malloc(stream_size); if(ySize != 0) { char yuv_filename[256]={0}; FILE *fp = NULL; sprintf(yuv_filename, "core%d_inst%d_frame%d_dump_%dx%d.yuv", core_idx, inst_idx, cur_frame_idx, pFrame->width, pFrame->height); - VLOG(ERR, "get error and dump yuvfile: %s\n", yuv_filename); + fprintf(stderr, "get error and dump yuvfile: %s\n", yuv_filename); fp = fopen(yuv_filename, "wb+"); if(fp != NULL) { #ifndef BM_PCIE_MODE fwrite(pFrame->buf[0], 1, ySize, fp); - fwrite(pFrame->buf[1], 1, ySize/4, fp); - fwrite(pFrame->buf[2], 1, ySize/4, fp); + if(testConfigPara->cbcr_interleave) + { + fwrite(pFrame->buf[1], 1, ySize/2, fp); + } else { + fwrite(pFrame->buf[1], 1, ySize/4, fp); + fwrite(pFrame->buf[2], 1, ySize/4, fp); + } #else - Uint8 *buf0 = malloc(ySize); - Uint8 *buf1 = malloc(ySize/4); - Uint8 *buf2 = malloc(ySize/4); + uint8_t *buf = malloc(frameSize); - vdi_read_memory(coreIdx, (u64)(pFrame->buf[4]), buf0, ySize, VDI_LITTLE_ENDIAN); - vdi_read_memory(coreIdx, (u64)(pFrame->buf[5]), buf1, ySize/4, VDI_LITTLE_ENDIAN); - vdi_read_memory(coreIdx, (u64)(pFrame->buf[6]), buf2, ySize/4, VDI_LITTLE_ENDIAN); - - fwrite(buf0, 1, ySize, fp); - fwrite(buf1, 1, ySize/4, fp); - fwrite(buf2, 1, ySize/4, fp); + bmvpu_dec_read_memory(coreIdx, (u64)(pFrame->buf[4]), buf, ySize, BM_VPU_DEC_LITTLE_ENDIAN); + if(testConfigPara->cbcr_interleave) + { + bmvpu_dec_read_memory(coreIdx, (u64)(pFrame->buf[5]), buf+ySize, ySize/2, BM_VPU_DEC_LITTLE_ENDIAN); + } else { + bmvpu_dec_read_memory(coreIdx, (u64)(pFrame->buf[5]), buf+ySize, ySize/4, BM_VPU_DEC_LITTLE_ENDIAN); + bmvpu_dec_read_memory(coreIdx, (u64)(pFrame->buf[6]), buf+(ySize+ySize/4), ySize/4, BM_VPU_DEC_LITTLE_ENDIAN); + } - free(buf0); - free(buf1); - free(buf2); + fwrite(buf, 1, frameSize, fp); + free(buf); #endif - fclose(fp); + osal_fclose(fp); } if(compare == 1 && pRefMem != NULL) { memset(yuv_filename, 0, 256); sprintf(yuv_filename, "core%d_inst%d_frame%d_gold_%dx%d.yuv", core_idx, inst_idx, cur_frame_idx, pFrame->width, pFrame->height); - fp = fopen(yuv_filename, "wb+"); + fp = osal_fopen(yuv_filename, "wb+"); if(fp != NULL) { fwrite(pRefMem, 1, ySize*3/2, fp); - fclose(fp); + osal_fclose(fp); } } } else { - VLOG(ERR, "get error and ySize: %d\n", ySize); + fprintf(stderr, "get error and ySize: %d\n", ySize); } if(p_stream != NULL) { - len = BMVidVpuDumpStream(vidHandle, p_stream, stream_size); + len = bmvpu_dec_dump_stream(vidHandle, p_stream, stream_size); if(len > 0) { char stream_filename[256]={0}; FILE *fp = NULL; sprintf(stream_filename, "core%d_inst%d_len%d_stream_dump.bin", core_idx, inst_idx, len); - VLOG(ERR, "get error and dump streamfile: %s\n", stream_filename); + fprintf(stderr, "get error and dump streamfile: %s\n", stream_filename); fp = fopen(stream_filename, "wb+"); if(fp != NULL) { fwrite(p_stream, 1, len, fp); - fclose(fp); + osal_fclose(fp); } } free(p_stream); @@ -429,9 +565,24 @@ static void process_output(void* arg) else { if(cur_frame_idx != 0 && (cur_frame_idx % 1000) == 0) { dec_time = osal_gettime(); - if(dec_time - start_time != 0) - printf("Inst %d: fps %.2f, passed!\n", testConfigPara->instanceNum, (float)(cur_frame_idx+1)*1000/(dec_time-start_time)); + if(dec_time - start_time != 0) { + fps_enc[testConfigPara->instanceNum] = (float)(cur_frame_idx+1)*1000/(dec_time-start_time); + } + } +#ifndef BM_PCIE_MODE + if (fpOutput) + { + ySize = pFrame->stride[0]*pFrame->height; + osal_fwrite(pFrame->buf[0], 1, ySize, fpOutput); + if(testConfigPara->cbcr_interleave) + { + osal_fwrite(pFrame->buf[1], 1, ySize/2, fpOutput); + } else { + osal_fwrite(pFrame->buf[1], 1, ySize/4, fpOutput); + osal_fwrite(pFrame->buf[2], 1, ySize/4, fpOutput); + } } +#endif } #ifdef BM_PCIE_MODE @@ -447,19 +598,25 @@ static void process_output(void* arg) //result = TRUE; //testConfigPara->result = TRUE; cur_frame_idx += 1; - BMVidDecClearOutput(vidHandle, pFrame); + bmvpu_dec_clear_output(vidHandle, pFrame); + count_enc[testConfigPara->instanceNum]++; continue; } - Uint8* buf0 = NULL; - Uint8* buf1 = NULL; - Uint8* buf2 = NULL; + uint8_t* buf0 = NULL; + uint8_t* buf1 = NULL; + uint8_t* buf2 = NULL; if (pFrame->size > 0 && pFrame->size <= 8192 * 4096 * 3) { //optimize the framebuffer cdma copy. buf0 = malloc(pFrame->size); - buf1 = buf0 + (unsigned int)(pFrame->buf[5] - pFrame->buf[4]); - buf2 = buf0 + (unsigned int)(pFrame->buf[6] - pFrame->buf[4]); + if(testConfigPara->cbcr_interleave) + { + buf1 = buf0 + (unsigned int)(pFrame->buf[5] - pFrame->buf[4]); + } else { + buf1 = buf0 + (unsigned int)(pFrame->buf[5] - pFrame->buf[4]); + buf2 = buf0 + (unsigned int)(pFrame->buf[6] - pFrame->buf[4]); + } } else { buf0 = NULL; @@ -468,24 +625,29 @@ static void process_output(void* arg) printf("the frame buffer size maybe error..\n"); break; } - vdi_read_memory(coreIdx, (u64)(pFrame->buf[4]), buf0, pFrame->size, VDI_LITTLE_ENDIAN); + bmvpu_dec_read_memory(coreIdx, (u64)(pFrame->buf[4]), buf0, pFrame->size, BM_VPU_DEC_LITTLE_ENDIAN); //printf("pFrame->size = %d\n", pFrame->size); - int core_idx = getcoreidx(vidHandle); - int inst_idx = BMVidVpuGetInstIdx(vidHandle); + int core_idx = bmvpu_dec_get_core_idx(vidHandle); + int inst_idx = bmvpu_dec_get_inst_idx(vidHandle); sprintf(yuv_filename, "core%d_inst%d_frame%d_dump_%dx%d.yuv", core_idx, inst_idx, cur_frame_idx, pFrame->width, pFrame->height); FILE* fpWriteyuv = NULL; fpWriteyuv = fopen(yuv_filename, "wb+"); if (ySize != 0) { if (fpWriteyuv != NULL) { fwrite(buf0, 1, ySize, fpWriteyuv); - fwrite(buf1, 1, ySize / 4, fpWriteyuv); - fwrite(buf2, 1, ySize / 4, fpWriteyuv); + if(testConfigPara->cbcr_interleave) + { + fwrite(buf1, 1, ySize / 2, fpWriteyuv); + } else { + fwrite(buf1, 1, ySize / 4, fpWriteyuv); + fwrite(buf2, 1, ySize / 4, fpWriteyuv); + } } } if (fpWriteyuv) { - fflush(fpWriteyuv); - fclose(fpWriteyuv); + osal_fflush(fpWriteyuv); + osal_fclose(fpWriteyuv); fpWriteyuv = NULL; } if (buf0) @@ -496,12 +658,18 @@ static void process_output(void* arg) } #endif cur_frame_idx += 1; - BMVidDecClearOutput(vidHandle, pFrame); + bmvpu_dec_clear_output(vidHandle, pFrame); + count_enc[testConfigPara->instanceNum]++; } else { - if(BMVidGetStatus(vidHandle)==BMDEC_STOP) + ret = bmvpu_dec_get_status(vidHandle); + if(ret == BMDEC_STOP) break; + if(ret == BMDEC_WRONG_RESOLUTION || ret == BMDEC_FRAMEBUFFER_NOTENOUGH || ret == BMDEC_HUNG) { + global_ret = -1; + break; + } #ifdef __linux__ usleep(1); #elif _WIN32 @@ -510,23 +678,69 @@ static void process_output(void* arg) } } - VLOG(INFO, "Exit process_output!\n"); + printf("Exit process_output!\n"); if(compare) { osal_fflush(fpRef); osal_fclose(fpRef); - VLOG(INFO, "Inst %d: verification %d %s!\n", testConfigPara->instanceNum, compareNum, result == TRUE ? "passed" :"failed"); - if(result == FALSE) ret = -1; + printf("Inst %d: verification %d %s!\n", testConfigPara->instanceNum, compareNum, result == TRUE ? "passed" :"failed"); + if(result == FALSE) global_ret = -1; + } + + if (fpOutput) + { + osal_fflush(fpOutput); + osal_fclose(fpOutput); } + if(pRefMem) - osal_free(pRefMem); + free(pRefMem); + + if(pFrame) + free(pFrame); + + return; +} + +static void free_frame_buffer(bm_handle_t bm_handle, int num, bm_device_mem_t *frame_buf, bm_device_mem_t *Ytab_buf, bm_device_mem_t *Ctab_buf, int compress_cnt, int linear_cnt) +{ + int i; + + if(num > (compress_cnt + linear_cnt)) + { + printf("free_frame_buffer: invalid frame buffer count\n"); + return; + } + + if(frame_buf == NULL || Ytab_buf == NULL || Ctab_buf == NULL) + { + printf("free_frame_buffer: invalid frame buffer\n"); + return; + } + + for(i = 0; i < num; i++) + { + if(frame_buf[i].size > 0 && frame_buf[i].u.device.device_addr) + bm_free_device(bm_handle, frame_buf[i]); + if(i < compress_cnt) + { + if(Ytab_buf[i].size > 0 && Ytab_buf[i].u.device.device_addr) + bm_free_device(bm_handle, Ytab_buf[i]); + if(Ctab_buf[i].size > 0 && Ctab_buf[i].u.device.device_addr) + bm_free_device(bm_handle, Ctab_buf[i]); + } + } + + free(frame_buf); + free(Ytab_buf); + free(Ctab_buf); } static void dec_test(void* arg) { osal_file_t* fpIn; - Uint8* pInMem; - Int32 readLen = -1; + uint8_t* pInMem; + int32_t readLen = -1; BMVidStream vidStream; BMVidDecParam param = {0}; BMTestConfig *testConfigPara = (BMTestConfig *)arg; @@ -536,43 +750,242 @@ static void dec_test(void* arg) int compareNum = 1, i = 0, j = 0; struct timeval tv; int wrptr = 0; - - fpIn = osal_fopen(inputPath, "rb"); - if(fpIn==NULL) - { - VLOG(ERR, "Can't open input file: %s\n", inputPath); - ret = -1; - return; + uint8_t bFindStart, bFindEnd; + int UsedBytes = 0; + int framebuffer_cnt = 0; + int compress_count = 0; + int linear_count = 0; + int framebuf_size, Ytab_size, Ctab_size; + int ret = 0; + bm_handle_t bm_handle = NULL; + bm_device_mem_t bitstream_buf; + bm_device_mem_t *frame_buf = NULL; + bm_device_mem_t *Ytab_buf = NULL; + bm_device_mem_t *Ctab_buf = NULL; + BmVpuDecDMABuffer *vpu_frame_buf = NULL; + BmVpuDecDMABuffer *vpu_Ytab_buf = NULL; + BmVpuDecDMABuffer *vpu_Ctab_buf = NULL; + int height, width, stride; + uint32_t heap_num; + uint32_t heap_mask = 0; + + fpIn = testConfigPara->fpIn; + + if (testConfigPara->cbcr_interleave) { + if (testConfigPara->nv21) + param.pixel_format = BM_VPU_DEC_PIX_FORMAT_NV21; + else + param.pixel_format = BM_VPU_DEC_PIX_FORMAT_NV12; + } else { + param.pixel_format = BM_VPU_DEC_PIX_FORMAT_YUV420P; } - param.streamFormat = testConfigPara->streamFormat; param.wtlFormat = testConfigPara->wtlFormat; - param.extraFrameBufferNum = 1; + param.extraFrameBufferNum = testConfigPara->extraFrame; param.streamBufferSize = 0x700000; param.enable_cache = 1; + param.bsMode = testConfigPara->bsMode; /* VIDEO_MODE_STREAM */ param.core_idx=-1; + param.decode_order = testConfigPara->decode_order; + param.picWidth = testConfigPara->width; + param.picHeight = testConfigPara->height; // param.wtlFormat = 101; #ifdef BM_PCIE_MODE param.pcie_board_id = testConfigPara->pcie_board_id; #endif - if (BMVidDecCreate(&vidHandle, param)!=RETCODE_SUCCESS) + + if(param.streamFormat != BMDEC_AVC && param.streamFormat != BMDEC_HEVC) + { + fprintf(stderr, "Error: the stream type is invalid!\n"); + global_ret = -1; + return; + } + + /* alloc frame buffer outside */ + if(testConfigPara->mem_alloc_type == 1) + { + if(testConfigPara->min_frame_cnt <= 0 || testConfigPara->extraFrame <= 0) + { + printf("invalid buffer count. min_frame_cnt:%d min_frame_cnt:%d\n", testConfigPara->min_frame_cnt, testConfigPara->extraFrame); + global_ret = -1; + return; + } + + if(bm_dev_request(&bm_handle, 0) != 0) + { + printf("failed to open vpu handle\n"); + global_ret = -1; + return; + } + + if(param.picWidth <= 0 || param.picHeight <= 0) + { + printf("invalid buffer size\n"); + global_ret = -1; + return; + } + + if(bm_get_gmem_total_heap_num(bm_handle, &heap_num) != 0) + { + printf("fail to get heap num\n"); + global_ret = -1; + return; + } + + for(i = (heap_num - 1); i >= 0; i--) + { + if((1 << i) && HEAP_MASK != 0) + { + heap_mask = (1 << i); + break; + } + } + + compress_count = testConfigPara->min_frame_cnt + testConfigPara->extraFrame; + linear_count = 0; + if(testConfigPara->wtlFormat != BMDEC_OUTPUT_COMPRESSED) + linear_count = testConfigPara->frame_delay + testConfigPara->extraFrame + 1; + framebuffer_cnt = compress_count + linear_count; + frame_buf = (bm_device_mem_t *)malloc(framebuffer_cnt * sizeof(bm_device_mem_t)); + Ytab_buf = (bm_device_mem_t *)malloc(compress_count * sizeof(bm_device_mem_t)); + Ctab_buf = (bm_device_mem_t *)malloc(compress_count * sizeof(bm_device_mem_t)); + vpu_frame_buf = (BmVpuDecDMABuffer *)malloc(framebuffer_cnt * sizeof(BmVpuDecDMABuffer)); + vpu_Ytab_buf = (BmVpuDecDMABuffer *)malloc(compress_count * sizeof(BmVpuDecDMABuffer)); + vpu_Ctab_buf = (BmVpuDecDMABuffer *)malloc(compress_count * sizeof(BmVpuDecDMABuffer)); + + height = param.picHeight; + width = param.picWidth; + stride = VPU_ALIGN32(width); + + /* allocate compress frame buffer */ + framebuf_size = stride * VPU_ALIGN32(height) + VPU_ALIGN32(stride / 2) * VPU_ALIGN32(height); + Ytab_size = VPU_ALIGN16(height) * VPU_ALIGN256(width) / 32; + Ytab_size = VPU_ALIGN4096(Ytab_size) + 4096; + Ctab_size = VPU_ALIGN16(height) * VPU_ALIGN256(width / 2) / 32; + Ctab_size = VPU_ALIGN4096(Ctab_size) + 4096; + for(i=0; iwtlFormat != BMDEC_OUTPUT_COMPRESSED) + { + if(testConfigPara->frame_delay <= 0) + { + printf("invalid linear buffer delay. frame_delay:%d\n", testConfigPara->frame_delay); + global_ret = -1; + + free_frame_buffer(bm_handle, compress_count, frame_buf, Ytab_buf, Ctab_buf, compress_count, 0); + free(vpu_frame_buf); + free(vpu_Ytab_buf); + free(vpu_Ctab_buf); + return; + } + /* allocate linear frame buffer */ + framebuf_size = stride * height + stride * height / 2; + for(i = compress_count; i < framebuffer_cnt; i++) + { + frame_buf[i].size = framebuf_size; + if(bm_malloc_device_byte_heap_mask(bm_handle, &frame_buf[i], heap_mask, framebuf_size) != 0) + { + printf("allocate linear frame buffer failed.\n"); + global_ret = -1; + + free_frame_buffer(bm_handle, i+1, frame_buf, Ytab_buf, Ctab_buf, compress_count, linear_count); + free(vpu_frame_buf); + free(vpu_Ytab_buf); + free(vpu_Ctab_buf); + return; + } + vpu_frame_buf[i].size = frame_buf[i].size; + vpu_frame_buf[i].phys_addr = frame_buf[i].u.device.device_addr; + } + } + + /* allocate bitstream buffer */ + bitstream_buf.size = param.streamBufferSize; + if(bm_malloc_device_byte_heap_mask(bm_handle, &bitstream_buf, heap_mask, param.streamBufferSize) != 0) + { + printf("allocate bitstream buffer failed.\n"); + global_ret = -1; + free_frame_buffer(bm_handle, framebuffer_cnt, frame_buf, Ytab_buf, Ctab_buf, compress_count, linear_count); + free(vpu_frame_buf); + free(vpu_Ytab_buf); + free(vpu_Ctab_buf); + return; + } + + param.min_framebuf_cnt = testConfigPara->min_frame_cnt; + param.framebuf_delay = testConfigPara->frame_delay; + param.bitstream_buffer.size = bitstream_buf.size; + param.bitstream_buffer.phys_addr = bitstream_buf.u.device.device_addr; + param.frame_buffer = vpu_frame_buf; + param.Ytable_buffer = vpu_Ytab_buf; + param.Ctable_buffer = vpu_Ctab_buf; + } + + if (bmvpu_dec_create(&vidHandle, param)!= 0) { - VLOG(ERR, "Can't create decoder.\n"); - ret = -1; + fprintf(stderr, "Can't create decoder.\n"); + global_ret = -1; return; } - VLOG(INFO, "Decoder Create success, inputpath %s!\n", inputPath); + printf("Decoder Create success, inputpath %s!\n", inputPath); - pInMem = osal_malloc(defaultReadBlockLen); + pInMem = malloc(defaultReadBlockLen); vidStream.buf = pInMem; vidStream.header_size = 0; if(pInMem==NULL) { - VLOG(ERR, "Can't get input memory\n"); - ret = -1; + fprintf(stderr, "Can't get input memory\n"); + global_ret = -1; return; } @@ -596,110 +1009,245 @@ static void dec_test(void* arg) for (i = 0; i < compareNum; i++) { while(1){ + if(testConfigPara->bsMode == BMDEC_BS_MODE_INTERRUPT){ /* BS_MODE_INTERRUPT */ #ifdef BM_PCIE_MODE - osal_memset(pInMem,0,defaultReadBlockLen); + memset(pInMem,0,defaultReadBlockLen); #endif - wrptr = 0; - do{ - readLen = (defaultReadBlockLen-wrptr) >readBlockLen? readBlockLen:(defaultReadBlockLen-wrptr); - if( readLen == 0 ) - break; - if((readLen = osal_fread(pInMem+wrptr, 1, readLen, fpIn))>0) - { - int toLost = 0; - if(injectionPercent == 0) + wrptr = 0; + do{ + readLen = (defaultReadBlockLen-wrptr) >readBlockLen? readBlockLen:(defaultReadBlockLen-wrptr); + if( readLen == 0 ) + break; + if((readLen = osal_fread(pInMem+wrptr, 1, readLen, fpIn))>0) { - wrptr += readLen; - if(testConfigPara->result == FALSE) + int toLost = 0; + if(injectionPercent == 0) { - goto OUT1; + wrptr += readLen; + if(testConfigPara->result == FALSE) + { + goto OUT1; + } } - } - else{ - if((rand()%100) <= injectionPercent) - { - if(injectWholeBlock) + else{ + if((rand()%100) <= injectionPercent) { - toLost = readLen; - if (injectLost) - continue; - else + if(injectWholeBlock) { - for(j=0;j 0); -//1682 pcie card cdma need 4B aligned + }while(readLen > 0); + //1682 pcie card cdma need 4B aligned #ifdef BM_PCIE_MODE - readLen = (wrptr + 3)/4*4; - vidStream.length = readLen; + readLen = (wrptr + 3)/4*4; + vidStream.length = readLen; #else - vidStream.length = wrptr; + vidStream.length = wrptr; #endif + } else{ + bFindStart = 0; + bFindEnd = 0; + int i; + + osal_fseek(fpIn, UsedBytes, SEEK_SET); + readLen = osal_fread(pInMem, 1, defaultReadBlockLen, fpIn); + if(readLen == 0){ + break; + } + + if(testConfigPara->streamFormat == BMDEC_AVC) { /* H264 */ + for (i = 0; i < readLen - 8; i++) { + int tmp = pInMem[i + 3] & 0x1F; + + if (pInMem[i] == 0 && pInMem[i + 1] == 0 && pInMem[i + 2] == 1 && + (((tmp == 0x5 || tmp == 0x1) && ((pInMem[i + 4] & 0x80) == 0x80)) || + (tmp == 20 && (pInMem[i + 7] & 0x80) == 0x80))) { + bFindStart = 1; + i += 8; + break; + } + } + + for (; i < readLen - 8; i++) { + int tmp = pInMem[i + 3] & 0x1F; + + if (pInMem[i] == 0 && pInMem[i + 1] == 0 && pInMem[i + 2] == 1 && + (tmp == 15 || tmp == 7 || tmp == 8 || tmp == 6 || + ((tmp == 5 || tmp == 1) && ((pInMem[i + 4] & 0x80) == 0x80)) || + (tmp == 20 && (pInMem[i + 7] & 0x80) == 0x80))) { + bFindEnd = 1; + break; + } + } + + if (i > 0) + readLen = i; + if (bFindStart == 0) { + printf("chn %d can not find H264 start code!readLen %d, s32UsedBytes %d.!\n", + (int)*((int *)vidHandle), readLen, UsedBytes); + } + if (bFindEnd == 0) { + readLen = i + 8; + } + } + else if(testConfigPara->streamFormat == BMDEC_HEVC) { /* H265 */ + int bNewPic = 0; + + for(i=0; i> 1; + + bNewPic = (pInMem[i + 0] == 0 && pInMem[i + 1] == 0 && pInMem[i + 2] == 1 && + (tmp <= 21) && ((pInMem[i + 5] & 0x80) == 0x80)); + + if (bNewPic) { + bFindStart = 1; + i += 6; + break; + } + } + + for (; i < readLen - 6; i++) { + int tmp = (pInMem[i + 3] & 0x7E) >> 1; + + bNewPic = (pInMem[i + 0] == 0 && pInMem[i + 1] == 0 && pInMem[i + 2] == 1 && + (tmp == 32 || tmp == 33 || tmp == 34 || tmp == 39 || tmp == 40 || + ((tmp <= 21) && (pInMem[i + 5] & 0x80) == 0x80))); + + if (bNewPic) { + bFindEnd = 1; + break; + } + } + if (i > 0) + readLen = i; + + if (bFindEnd == 0) { + readLen = i + 6; + } + } + + vidStream.length = readLen; + } + int result = 0; - while((result = BMVidDecDecode(vidHandle, vidStream))!=RETCODE_SUCCESS){ + while((result = bmvpu_dec_decode(vidHandle, vidStream))!= 0){ + if(result == BM_ERR_VDEC_ILLEGAL_PARAM || result == BM_ERR_VDEC_ERR_HUNG) + goto OUT1; #ifdef __linux__ - usleep(10); + usleep(40*1000); #elif _WIN32 Sleep(1); #endif } - if (wrptr < defaultReadBlockLen){ - break; + + if(testConfigPara->bsMode == BMDEC_BS_MODE_INTERRUPT){ + if (wrptr < defaultReadBlockLen){ + break; + } + } + else{ + UsedBytes += readLen; + vidStream.pts += 30; } } osal_fseek(fpIn, 0, SEEK_SET); } OUT1: - BMVidDecFlush(vidHandle); + while((bmvpu_dec_flush(vidHandle)) != 0){ +#ifdef __linux__ + usleep(2*1000); +#elif _WIN32 + Sleep(1); +#endif + } - while (BMVidGetStatus(vidHandle)!=BMDEC_STOP) + while ((ret = bmvpu_dec_get_status(vidHandle)) != BMDEC_STOP) { + if(ret == BMDEC_FRAMEBUFFER_NOTENOUGH || ret == BMDEC_WRONG_RESOLUTION || ret == BMDEC_HUNG) { + global_ret = -1; + break; + } #ifdef __linux__ - usleep(2); + usleep(2*1000); #elif _WIN32 Sleep(1); #endif } osal_thread_join(vpu_thread, NULL); - VLOG(INFO, "EXIT\n"); - BMVidDecDelete(vidHandle); - osal_free(pInMem); + printf("EXIT\n"); + if(testConfigPara->mem_alloc_type == 1) + { + if(bitstream_buf.size > 0) + bm_free_device(bm_handle, bitstream_buf); + free_frame_buffer(bm_handle, framebuffer_cnt, frame_buf, Ytab_buf, Ctab_buf, compress_count, linear_count); + free(vpu_frame_buf); + free(vpu_Ytab_buf); + free(vpu_Ctab_buf); + + bm_dev_free(bm_handle); + bm_handle = NULL; + } + bmvpu_dec_delete(vidHandle); + free(pInMem); +} + +static void run(void* arg) +{ + int i; + BMTestConfig *testConfigPara = (BMTestConfig *)arg; + testConfigPara->fpIn = osal_fopen((char *)testConfigPara->inputPath, "rb"); + if(testConfigPara->fpIn==NULL) + { + fprintf(stderr, "Can't open input file: %s\n", (char *)testConfigPara->inputPath); + global_ret = -1; + return; + } + for(i=0; irun_times; i++) + { + dec_test(arg); + } + osal_fclose(testConfigPara->fpIn); + return; } static void Help(const char *programName) { fprintf(stderr, "------------------------------------------------------------------------------\n"); - fprintf(stderr, "%s(API v%d.%d.%d)\n", programName, API_VERSION_MAJOR, API_VERSION_MINOR, API_VERSION_PATCH); fprintf(stderr, "\tAll rights reserved by Bitmain\n"); fprintf(stderr, "------------------------------------------------------------------------------\n"); fprintf(stderr, "%s [option] --input bistream\n", programName); @@ -711,27 +1259,42 @@ Help(const char *programName) fprintf(stderr, " 1 : compare with golden yuv that specified --ref-yuv option\n"); fprintf(stderr, " 2 : compare with golden yuv md5 that specified --ref-yuv option\n"); fprintf(stderr, "-n compare count\n"); + fprintf(stderr, "-m bitstream mode. 0 for interrupt mode, 2 for pic end mode.\n"); + fprintf(stderr, "--run_times open and close codec count\n"); fprintf(stderr, "--input bitstream path\n"); fprintf(stderr, "--output YUV path\n"); fprintf(stderr, "--stream-type 0,12, default 0 (H.264:0, HEVC:12)\n"); fprintf(stderr, "--ref-yuv golden yuv path\n"); fprintf(stderr, "--instance instance number\n"); - fprintf(stderr, "--write_yuv 0 no writing , num write frame numbers\n"); - fprintf(stderr, "--wtl-format yuv format. default 0.\n"); + fprintf(stderr, "--write_yuv 0 no writing , num write frame numbers\n"); + fprintf(stderr, "--wtl-format yuv format. default 0. 101: fbc data.\n"); + fprintf(stderr, "--cbcr_interleave chorma interleave. default 0.\n"); + fprintf(stderr, "--nv21 nv21 output. default 0.\n"); + fprintf(stderr, "--width input width\n"); + fprintf(stderr, "--height input height\n"); + fprintf(stderr, "--extraFrame extra frame nums. default 2.\n"); + fprintf(stderr, "--mem_alloc_type memory allocate type. default 0: allocate memory in sdk, 1: allocate memory by user.\n"); + fprintf(stderr, "--min_frame_cnt minimum count of frame buffer use by VPU\n"); + fprintf(stderr, "--frame_delay minimum count of linear buffer delay.\n"); fprintf(stderr, "--read-block-len block length of read from file, default is 0x80000\n"); fprintf(stderr, "--inject-percent percent of blocks to introduce lost/scramble data, will introduce random length of data at %% of blocks, or the whole block \n"); fprintf(stderr, "--inject-lost type of injection, default is 1 for data lost, set to 0 for scramble the data\n"); fprintf(stderr, "--inject-whole-block lost the whole block, default is lost part of the block\n"); + fprintf(stderr, "--decode_order get yuv frame by decode order, default:0\n"); #ifdef BM_PCIE_MODE fprintf(stderr, "--pcie_board_id select pcie card by pci_board_id\n"); #endif + fprintf(stderr, "BMVPUDEC_DISPLAY_FRAMERATE:\n"); + fprintf(stderr, " set BMVPUDEC_DISPLAY_FRAMERATE=0 defalut 0, print all stream rate.\n"); + fprintf(stderr, " set BMVPUDEC_DISPLAY_FRAMERATE=1 print the frame rate of each stream\n"); } int main(int argc, char **argv) { int i = 0; osal_thread_t vpu_thread[MAX_NUM_INSTANCE * MAX_NUM_VPU_CORE]; + osal_thread_t monitor_thread; BMTestConfig *testConfigPara = malloc(MAX_NUM_INSTANCE * MAX_NUM_VPU_CORE * sizeof(BMTestConfig));//[MAX_NUM_INSTANCE * MAX_NUM_VPU_CORE]; BMTestConfig testConfigOption; if(testConfigPara == NULL) { @@ -739,13 +1302,13 @@ int main(int argc, char **argv) return -1; } parse_args(argc, argv, &testConfigOption); - SetMaxLogLevel(testConfigOption.log_level); + bmvpu_dec_set_logging_threshold(testConfigOption.log_level); printf("compareNum: %d\n", testConfigOption.compareNum); printf("instanceNum: %d\n", testConfigOption.instanceNum); for(i = 0; i < testConfigOption.instanceNum; i++) - osal_memset(&testConfigPara[i], 0, sizeof(BMTestConfig)); + memset(&testConfigPara[i], 0, sizeof(BMTestConfig)); for(i = 0; i < testConfigOption.instanceNum; i++) { @@ -753,31 +1316,47 @@ int main(int argc, char **argv) testConfigPara[i].instanceNum = i; testConfigPara[i].result = TRUE; printf("inputpath: %s\n", testConfigPara[i].inputPath); + printf("outputpath: %s\n", testConfigPara[i].outputPath); printf("refYuvPath: %s\n", testConfigPara[i].refYuvPath); } for(i = 0; i < testConfigOption.instanceNum; i++) { - vpu_thread[i] = osal_thread_create(dec_test, (void*)&testConfigPara[i]); + vpu_thread[i] = osal_thread_create(run, (void*)&testConfigPara[i]); } + monitor_thread = osal_thread_create(stat_pthread, (void*)&testConfigOption.instanceNum); + for(i = 0; i < testConfigOption.instanceNum; i++) { osal_thread_join(vpu_thread[i], NULL); } + + printf("set g_exit_flag=1\n"); + g_exit_flag = 1; + osal_thread_join(monitor_thread, NULL); free(testConfigPara); - return ret; + return global_ret; } static struct option options[] = { + {"run_times", 1, NULL, 0}, {"output", 1, NULL, 0}, {"input", 1, NULL, 0}, {"codec", 1, NULL, 0}, {"stream-type", 1, NULL, 0}, {"ref-yuv", 1, NULL, 0}, {"instance", 1, NULL, 0}, - {"write_yuv", 1, NULL, 0}, + {"write_yuv", 1, NULL, 0}, {"wtl-format", 1, NULL, 0}, + {"cbcr_interleave", 1, NULL, 0}, + {"nv21", 1, NULL, 0}, + {"extraFrame", 1, NULL, 0}, + {"mem_alloc_type", 1, NULL, 0}, + {"min_frame_cnt", 1, NULL, 0}, + {"width", 1, NULL, 0}, + {"height", 1, NULL, 0}, + {"frame_delay", 1, NULL, 0}, {"comp-skip", 1, NULL, 0}, {"read-block-len", 1, NULL, 0}, {"inject-percent", 1, NULL, 0}, @@ -786,21 +1365,25 @@ static struct option options[] = { #ifdef BM_PCIE_MODE {"pcie_board_id", 1, NULL, 0}, #endif + {"decode_order", 1, NULL, 0}, {NULL, 0, NULL, 0}, }; static int parse_args(int argc, char **argv, BMTestConfig* par) { int index; - Int32 opt; + int32_t opt; /* default setting. */ - osal_memset(par, 0, sizeof(BMTestConfig)); + memset(par, 0, sizeof(BMTestConfig)); + par->run_times = 1; par->instanceNum = 1; - par->log_level = 4; + par->log_level = 1; + par->bsMode = BMDEC_BS_MODE_INTERRUPT; + par->extraFrame = 2; - while ((opt=getopt_long(argc, argv, "v:c:h:n:", options, &index)) != -1) + while ((opt=getopt_long(argc, argv, "v:c:h:n:m:", options, &index)) != -1) { switch (opt) { @@ -810,7 +1393,14 @@ static int parse_args(int argc, char **argv, BMTestConfig* par) case 'n': par->compareNum = atoi(optarg); break; + case 'm': + par->bsMode = (BmVpuDecBitStreamMode)atoi(optarg); + break; case 0: + if (!strcmp(options[index].name, "run_times")) + { + par->run_times = (int)atoi(optarg); + } if (!strcmp(options[index].name, "output")) { memcpy(par->outputPath, optarg, strlen(optarg)); @@ -823,24 +1413,56 @@ static int parse_args(int argc, char **argv, BMTestConfig* par) } else if (!strcmp(options[index].name, "ref-yuv")) { - osal_memcpy(par->refYuvPath, optarg, strlen(optarg)); + memcpy(par->refYuvPath, optarg, strlen(optarg)); ChangePathStyle(par->refYuvPath); } else if (!strcmp(options[index].name, "stream-type")) { - par->streamFormat = (int)atoi(optarg); + par->streamFormat = (BmVpuDecStreamFormat)atoi(optarg); } else if (!strcmp(options[index].name, "instance")) { par->instanceNum = atoi(optarg); } - else if (!strcmp(options[index].name, "write_yuv")) + else if (!strcmp(options[index].name, "write_yuv")) { - par->wirteYuv = atoi(optarg); + par->writeYuv = atoi(optarg); } else if (!strcmp(options[index].name, "wtl-format")) { - par->wtlFormat = (FrameBufferFormat)atoi(optarg); + par->wtlFormat = (BmVpuDecOutputMapType)atoi(optarg); + } + else if (!strcmp(options[index].name, "cbcr_interleave")) + { + par->cbcr_interleave = atoi(optarg); + } + else if (!strcmp(options[index].name, "nv21")) + { + par->nv21 = atoi(optarg); + } + else if (!strcmp(options[index].name, "width")) + { + par->width = atoi(optarg); + } + else if (!strcmp(options[index].name, "height")) + { + par->height = atoi(optarg); + } + else if (!strcmp(options[index].name, "mem_alloc_type")) + { + par->mem_alloc_type = atoi(optarg); + } + else if (!strcmp(options[index].name, "min_frame_cnt")) + { + par->min_frame_cnt = atoi(optarg); + } + else if (!strcmp(options[index].name, "frame_delay")) + { + par->frame_delay = atoi(optarg); + } + else if (!strcmp(options[index].name, "extraFrame")) + { + par->extraFrame = atoi(optarg); } else if(!strcmp(options[index].name, "comp-skip")) { @@ -867,6 +1489,9 @@ static int parse_args(int argc, char **argv, BMTestConfig* par) par->pcie_board_id = (int)atoi(optarg); } #endif + else if (!strcmp(options[index].name, "decode_order")) { + par->decode_order = (int)atoi(optarg); + } break; case 'v': par->log_level = atoi(optarg); @@ -880,6 +1505,16 @@ static int parse_args(int argc, char **argv, BMTestConfig* par) } } + if(par->nv21) + par->cbcr_interleave = 1; + + if(par->bsMode != BMDEC_BS_MODE_INTERRUPT && par->bsMode != BMDEC_BS_MODE_PIC_END) + { + fprintf(stderr, "Invalid bsMode(%d), 0 for interrupt mode and 2 for pic end mode\n", par->instanceNum); + Help(argv[0]); + exit(1); + } + if (par->instanceNum <= 0 || par->instanceNum > MAX_NUM_INSTANCE * MAX_NUM_VPU_CORE) { fprintf(stderr, "Invalid instanceNum(%d)\n", par->instanceNum); @@ -887,7 +1522,7 @@ static int parse_args(int argc, char **argv, BMTestConfig* par) exit(1); } - if (par->log_level < NONE || par->log_level > TRACE) + if (par->log_level < BMVPU_DEC_LOG_LEVEL_NONE || par->log_level > BMVPU_DEC_LOG_LEVEL_TRACE) { fprintf(stderr, "Wrong log level: %d\n", par->log_level); Help(argv[0]); diff --git a/bmvid/video/encoder/bm_enc_api/inc/bmqueue.h b/bmvid/example/bmqueue.h similarity index 100% rename from bmvid/video/encoder/bm_enc_api/inc/bmqueue.h rename to bmvid/example/bmqueue.h diff --git a/bmvid/example/bmvpuenc.c b/bmvid/example/bmvpuenc.c index c37758a..f7979ce 100644 --- a/bmvid/example/bmvpuenc.c +++ b/bmvid/example/bmvpuenc.c @@ -26,9 +26,7 @@ #include "bmvpuenc.h" #include "bmqueue.h" - -#include "bmlib_runtime.h" - +#define INTERVAL 1 #define handle_error(msg) \ do { perror(msg); exit(EXIT_FAILURE); } while (0) @@ -48,6 +46,7 @@ typedef struct { int aligned_height; int bit_rate; /* kbps */ + int fps; /*default is 30*/ int cqp; int gop_preset; @@ -56,45 +55,38 @@ typedef struct { typedef struct { char *input_filename; + FILE *fin; char *output_filename; int log_level; - - int thread_number; /* at most 2 threads for now */ + int thread_number; /* at most 24 threads for now */ int thread_id; int loop; int frame_number; - EncParameter enc; int result; + + int run_times; } InputParameter; typedef struct { BmVpuEncOpenParams open_params; - bm_handle_t bm_handle; BmVpuEncoder* video_encoder; BmVpuEncInitialInfo initial_info; - BmVpuFramebuffer* rec_fb_list; - bm_device_mem_t* rec_fb_dmabuffers; - int num_rec_fb; - BmVpuFramebuffer* src_fb_list; - bm_device_mem_t* src_fb_dmabuffers; + BmVpuEncDMABuffer* src_fb_dmabuffers; void* frame_unused_queue; int num_src_fb; BmVpuFramebuffer* src_fb; - bm_device_mem_t bs_dma_buffer; + BmVpuEncDMABuffer bs_dma_buffer; size_t bs_buffer_size; uint32_t bs_buffer_alignment; - size_t work_buffer_size; - uint32_t work_buffer_alignment; - BmVpuEncParams enc_params; BmVpuRawFrame input_frame; BmVpuEncodedFrame output_frame; @@ -106,11 +98,10 @@ typedef struct { int preset; /* 0, 1, 2 */ - int perf; struct timeval ps; struct timeval pe; double total_time; - long total_frame; + long long total_frame; } VpuEncContext; typedef struct { @@ -124,12 +115,25 @@ typedef struct { #ifdef __linux__ static sigset_t waitsigset = {0}; #endif +#ifdef BM_PCIE_MODE +#define MAX_THREAD 24 +#else +#define MAX_THREAD 18 +#endif +long long count_enc[MAX_THREAD]; +double sta_time[MAX_THREAD]; +int g_exit_flag = 0; +#ifdef __linux__ + pthread_t thread_stat; +#elif _WIN32 + HANDLE thread_stat; +#endif static void usage(char *progname); static int parse_args(int argc, char **argv, InputParameter* par); static int read_yuv_source(uint8_t* dst_pa, int dst_stride_y, int dst_stride_c, int dst_height, FILE** src_file, int src_stride_y, int src_stride_c, int src_height, - int chroma_interleave, + int pix_format, int width, int height); static BmVpuFramebuffer* get_src_framebuffer(VpuEncContext *ctx); @@ -166,6 +170,60 @@ static BOOL CtrlHandler(DWORD fdwCtrlType) } } #endif +#if _WIN32 +DWORD statPthread(void *arg) +{ +#else +void *stat_pthread(void *arg) +{ +#endif + InputParameter* ctx = (InputParameter*)arg; + int thread_num = ctx->thread_number; + int i = 0; + long long last_count_enc[MAX_THREAD] = {0}; + double last_time[MAX_THREAD] = {0.0}; + double cur_fps[MAX_THREAD] = {0.0}; + double total_fps = 0.0; + long long count_sum = 0; + int dis_mode = 0; + char *display = getenv("BMVPUENC_DISPLAY_FRAMERATE"); + if (display) dis_mode = atoi(display); + printf("BMVPUENC_DISPLAY_FRAMERATE=%d thread_num=%d g_exit_flag=%d \n", dis_mode, thread_num, g_exit_flag); + while(!g_exit_flag) { +#ifdef __linux__ + sleep(INTERVAL); +#elif _WIN32 + Sleep(INTERVAL*1000); +#endif + total_fps = 0.0; + for (i = 0; i < thread_num; i++){ + count_sum += count_enc[i] - last_count_enc[i]; + if ((count_enc[i] > last_count_enc[i]) && (sta_time[i] > last_time[i])) { + cur_fps[i] = (double)((count_enc[i]-last_count_enc[i]) * 1000)/(sta_time[i]-last_time[i]); + } else { + cur_fps[i] = 0; + } + + total_fps += cur_fps[i]; + last_count_enc[i] = count_enc[i]; + last_time[i] = sta_time[i]; + } + if (dis_mode == 1) { + for (i = 0; i < thread_num; i++) + printf("ID[%d], FRM[%10lld], Cost Time[%2.2f] FPS[%2.2f] \n", i, (long long)count_enc[i], sta_time[i], cur_fps[i]); + }else + printf("thread_count %d, total_frame %lld, total_enc_fps %2.2f\n", thread_num, count_sum, total_fps); + printf("\r"); + fflush(stdout); + } + fflush(stdout); + + for (i = 0; i < thread_num; i++) + printf("%3dth thread Encode %lld frame in total, avg: %5.4f, time: %5.4fms!\n", i, count_enc[i], (double)count_enc[i] * 1000 / sta_time[i], sta_time[i]); + printf("stat_pthread over.\n"); + return NULL; +} + static void* acquire_output_buffer(void *context, size_t size, void **acquired_handle) { ((void)(context)); @@ -205,34 +263,22 @@ static void cleanup_task(void* arg) } /* Free all allocated memory (both regular and DMA memory) */ - if (ctx->rec_fb_list) - free(ctx->rec_fb_list); - if (ctx->rec_fb_dmabuffers) - { - for (i = 0; i < ctx->num_rec_fb; ++i) - bm_free_device(ctx->bm_handle, ctx->rec_fb_dmabuffers[i]); - - free(ctx->rec_fb_dmabuffers); - } - if (ctx->src_fb_list) free(ctx->src_fb_list); if (ctx->src_fb_dmabuffers) { for (i = 0; i < ctx->num_src_fb; ++i) - bm_free_device(ctx->bm_handle, ctx->src_fb_dmabuffers[i]); + bmvpu_enc_dma_buffer_deallocate(ctx->core_idx, &(ctx->src_fb_dmabuffers[i])); free(ctx->src_fb_dmabuffers); } if (&(ctx->bs_dma_buffer)) - bm_free_device(ctx->bm_handle, ctx->bs_dma_buffer); + bmvpu_enc_dma_buffer_deallocate(ctx->core_idx, &(ctx->bs_dma_buffer)); /* Unload the VPU firmware */ bmvpu_enc_unload(ctx->soc_idx); - if (ctx->fin != NULL) - fclose(ctx->fin); if (ctx->fout != NULL) fclose(ctx->fout); @@ -249,26 +295,18 @@ static int run_once(InputParameter* par) EncParameter* enc_par = &(par->enc); char output_filename[256] = {0}; FILE *fin, *fout; - char* perf_s = NULL; - int frm_cnt = 0; int l, i, ret = 0; + int tid = par->thread_id; - fin = fopen(par->input_filename, "rb"); - if (fin == NULL) - { - fprintf(stderr, "Failed to open %s for reading: %s\n", - par->input_filename, strerror(errno)); - par->result = -1; - return -1; - } + fin = par->fin; ret = strncmp(par->output_filename, "/dev/null", 9); if (ret != 0) { if (enc_par->enc_fmt == 0) - sprintf(output_filename, "%s-%d.264", par->output_filename, par->thread_id); + sprintf(output_filename, "%s-%d.264", par->output_filename, tid); else - sprintf(output_filename, "%s-%d.265", par->output_filename, par->thread_id); + sprintf(output_filename, "%s-%d.265", par->output_filename, tid); fout = fopen(output_filename, "wb"); } else @@ -285,8 +323,8 @@ static int run_once(InputParameter* par) return -1; } - bmvpu_set_logging_threshold(par->log_level); - bmvpu_set_logging_function(logging_fn); + bmvpu_enc_set_logging_threshold(par->log_level); + bmvpu_enc_set_logging_function(logging_fn); ctx = calloc(1, sizeof(VpuEncContext)); if (ctx == NULL) @@ -303,23 +341,6 @@ static int run_once(InputParameter* par) ctx->fin = fin; ctx->fout = fout; - perf_s = getenv("BMVE_PERF"); - if (perf_s) - { - printf("BMVE_PERF=%s\n", perf_s); - ctx->perf = atoi(perf_s); - } - else - { - ctx->perf = 0; - } - - if (ctx->perf) - { - ctx->total_time = 0.0; - ctx->total_frame = 0; - } - ctx->soc_idx = enc_par->soc_idx; ctx->core_idx = bmvpu_enc_get_core_idx(enc_par->soc_idx); @@ -334,20 +355,19 @@ static int run_once(InputParameter* par) eop->soc_idx = ctx->soc_idx; - eop->color_format = BM_VPU_COLOR_FORMAT_YUV420; /* If this is 1, then Cb and Cr are interleaved in one shared chroma * plane, otherwise they are separated in their own planes. - * See the BmVpuColorFormat documentation for the consequences of this. */ + * See the BmVpuEncPixFormat documentation for the consequences of this. */ if (enc_par->pix_fmt == 0) - eop->chroma_interleave = 0; + eop->pix_format = BM_VPU_ENC_PIX_FORMAT_YUV420P; else - eop->chroma_interleave = 1; + eop->pix_format = BM_VPU_ENC_PIX_FORMAT_NV12; eop->frame_width = enc_par->crop_w; eop->frame_height = enc_par->crop_h; eop->timebase_num = 1; eop->timebase_den = 1; - eop->fps_num = 30; + eop->fps_num = par->enc.fps; eop->fps_den = 1; /* Set a bitrate of 0 bps, which tells the VPU to use constant quality mode. */ @@ -358,6 +378,10 @@ static int run_once(InputParameter* par) eop->intra_period = enc_par->intra_period; eop->gop_preset = enc_par->gop_preset; + eop->buffer_alloc_func = NULL; + eop->buffer_free_func = NULL; + eop->buffer_context = NULL; + /* Load the VPU firmware */ ret = bmvpu_enc_load(ctx->soc_idx); if (ret != BM_VPU_ENC_RETURN_CODE_OK) @@ -373,16 +397,8 @@ static int run_once(InputParameter* par) /* in unit of 4k bytes */ ctx->bs_buffer_size = (ctx->bs_buffer_size +(4*1024-1)) & (~(4*1024-1)); - ctx->bm_handle = bmvpu_enc_get_bmlib_handle(ctx->soc_idx); - if (!ctx->bm_handle) - { - fprintf(stderr, "bm_dev handle request failed! \n"); - ret = -1; - goto cleanup; - } - /* Create bs buffer */ - ret = bmvpu_malloc_device_byte_heap(ctx->bm_handle, &(ctx->bs_dma_buffer), ctx->bs_buffer_size, 0x06, 1); + ret = bmvpu_enc_dma_buffer_allocate(ctx->core_idx, &(ctx->bs_dma_buffer), ctx->bs_buffer_size); if (ret != 0) { fprintf(stderr, "bm_malloc_device_byte for bs_dmabuffer failed!\n"); @@ -391,7 +407,7 @@ static int run_once(InputParameter* par) } /* Open an encoder instance, using the previously allocated bitstream buffer */ - ret = bmvpu_enc_open(&(ctx->video_encoder), eop, &(ctx->bs_dma_buffer)); + ret = bmvpu_enc_open(&(ctx->video_encoder), eop, &(ctx->bs_dma_buffer), &(ctx->initial_info)); if (ret != BM_VPU_ENC_RETURN_CODE_OK) { fprintf(stderr, "bmvpu_enc_open failed\n"); @@ -405,71 +421,6 @@ static int run_once(InputParameter* par) goto cleanup; } - /* Retrieve the initial information to allocate source and reconstruction - * framebuffers for the encoding process. */ - ret = bmvpu_enc_get_initial_info(ctx->video_encoder, &(ctx->initial_info)); - if (ret != 0) - { - fprintf(stderr, "bmvpu_enc_get_initial_info failed\n"); - ret = -1; - goto cleanup; - } - - ctx->num_rec_fb = ctx->initial_info.min_num_rec_fb; -#ifdef __linux__ - printf("[%zx] num framebuffers for recon: %u\n", pthread_self(), ctx->num_rec_fb); -#endif -#ifdef _WIN32 - printf("[%zx] num framebuffers for recon: %u\n", GetCurrentThreadId(), ctx->num_rec_fb); -#endif - - /* Allocate memory blocks for the framebuffer and DMA buffer structures, - * and allocate the DMA buffers themselves */ - ctx->rec_fb_list = malloc(sizeof(BmVpuFramebuffer) * ctx->num_rec_fb); //malloc(sizeof(VpuFrameBuffer) * num_framebuffers); - if (ctx->rec_fb_list == NULL) - { - fprintf(stderr, "malloc failed\n"); - ret = -1; - goto cleanup; - } - - ctx->rec_fb_dmabuffers = (bm_device_mem_t*)malloc(sizeof(bm_device_mem_t) * ctx->num_rec_fb); - for (i = 0; i < ctx->num_rec_fb; i++) - { - int rec_id = 0x200 + (par->thread_id<<5) + i; - - /* Allocate a DMA buffer for each framebuffer. - * It is possible to specify alternate allocators; - * all that is required is that the allocator provides physically contiguous memory - * (necessary for DMA transfers) and repects the alignment value. */ - ret = bmvpu_malloc_device_byte_heap(ctx->bm_handle, &ctx->rec_fb_dmabuffers[i], ctx->initial_info.rec_fb.size, 0x06, 1); - if(ret != 0){ - fprintf(stderr, "bmvpu_malloc_device_byte_heap for rec_buffer failed\n"); - ret = -1; - goto cleanup; - } - - ret = bmvpu_fill_framebuffer_params(&(ctx->rec_fb_list[i]), - &(ctx->initial_info.rec_fb), - &(ctx->rec_fb_dmabuffers[i]), - rec_id, NULL); - if(ret != 0){ - fprintf(stderr, "bmvpu_fill_framebuffer_params failed\n"); - ret = -1; - goto cleanup; - } - } - - /* Buffer registration help the VPU knows which buffers to use for - * storing temporary frames into. */ - ret = bmvpu_enc_register_framebuffers(ctx->video_encoder, ctx->rec_fb_list, ctx->num_rec_fb); - if (ret != 0) - { - fprintf(stderr, "bmvpu_enc_register_framebuffers failed\n"); - ret = -1; - goto cleanup; - } - ctx->num_src_fb = ctx->initial_info.min_num_src_fb; printf("num framebuffers for src: %u\n", ctx->num_src_fb); @@ -480,7 +431,7 @@ static int run_once(InputParameter* par) ret = -1; goto cleanup; } - ctx->src_fb_dmabuffers = (bm_device_mem_t*)malloc(sizeof(bm_device_mem_t) * ctx->num_src_fb); + ctx->src_fb_dmabuffers = (BmVpuEncDMABuffer*)malloc(sizeof(BmVpuEncDMABuffer) * ctx->num_src_fb); if (ctx->src_fb_dmabuffers == NULL) { fprintf(stderr, "malloc failed\n"); @@ -489,10 +440,10 @@ static int run_once(InputParameter* par) } for (i = 0; i < ctx->num_src_fb; ++i) { - int src_id = 0x100 + (par->thread_id<<5) + i; + int src_id = 0x100 + (tid<<5) + i; /* Allocate DMA buffers for the raw input frames. */ - ret = bmvpu_malloc_device_byte_heap(ctx->bm_handle, &ctx->src_fb_dmabuffers[i], ctx->initial_info.src_fb.size, 0x06, 1); + ret = bmvpu_enc_dma_buffer_allocate(ctx->core_idx, &ctx->src_fb_dmabuffers[i], ctx->initial_info.src_fb.size); if(ret != 0){ fprintf(stderr, "bmvpu_malloc_device_byte_heap for src_buffer failed\n"); ret = -1; @@ -522,7 +473,7 @@ static int run_once(InputParameter* par) { BmVpuFramebuffer *fb = &(ctx->src_fb_list[i]); bm_queue_push(ctx->frame_unused_queue, (void*)(&fb)); - if (par->log_level > BM_VPU_LOG_LEVEL_INFO) + if (par->log_level > BMVPU_ENC_LOG_LEVEL_INFO) #ifdef __linux__ printf("[%zx] myIndex = 0x%x, %p, push\n", pthread_self(), fb->myIndex, fb); #endif @@ -568,7 +519,8 @@ static int run_once(InputParameter* par) #ifdef _WIN32 SetConsoleCtrlHandler((PHANDLER_ROUTINE)CtrlHandler, TRUE); #endif - for (l=0; lloop; l++) + l = 0; + while (1) { fseek(fin, 0, SEEK_SET); /* Read input I420/NV12 frames and encode them until the end of the input file is reached */ @@ -593,57 +545,52 @@ static int run_once(InputParameter* par) } /* Read uncompressed pixels into the input DMA buffer */ -#ifdef BM_PCIE_MODE ret = read_yuv_source(host_va, ctx->initial_info.src_fb.y_stride, ctx->initial_info.src_fb.c_stride, ctx->initial_info.src_fb.height, &fin, enc_par->y_stride, enc_par->c_stride, enc_par->aligned_height, - eop->chroma_interleave, + eop->pix_format, enc_par->crop_w, enc_par->crop_h); if (ret < 0) + { + int j; + for (j=0; jnum_src_fb; j++) + { + BmVpuFramebuffer* fb = &(ctx->src_fb_list[j]); + if (fb->myIndex == ctx->src_fb->myIndex) + { + bm_queue_push(ctx->frame_unused_queue, &fb); + if (par->log_level > BMVPU_ENC_LOG_LEVEL_INFO) +#ifdef __linux__ + printf("[%zx] myIndex = 0x%x, push\n", pthread_self(), fb->myIndex); +#endif +#ifdef _WIN32 + printf("[%zx] myIndex = 0x%x, push\n", GetCurrentThreadId(), fb->myIndex); +#endif + break; + } + } break; + } // TODO - // u64 vpu_pa = bm_mem_get_device_addr(*(ctx->src_fb->dma_buffer)); - // ret = bmvpu_upload_data(ctx->soc_idx, host_va, frame_size, - // vpu_pa, frame_size, frame_size, 1); - - ret = bm_memcpy_s2d_partial(ctx->bm_handle, *(ctx->src_fb->dma_buffer), host_va, frame_size); + u64 vpu_pa = bmvpu_enc_dma_buffer_get_physical_address(ctx->src_fb->dma_buffer); + ret = bmvpu_enc_upload_data(ctx->core_idx, host_va, frame_size, + vpu_pa, frame_size, frame_size, 1); if (ret != 0) { - fprintf(stderr, "%s:%d(%s) bmvpu_upload_data failed\n", __FILE__, __LINE__, __func__); + fprintf(stderr, "%s:%d(%s) bmvpu_enc_upload_data failed\n", __FILE__, __LINE__, __func__); ret = -1; goto cleanup; } -#else - unsigned long long tmp_va; - bm_mem_mmap_device_mem_no_cache(ctx->bm_handle, ctx->src_fb->dma_buffer, &tmp_va); - - ret = read_yuv_source((uint8_t*)tmp_va, ctx->initial_info.src_fb.y_stride, ctx->initial_info.src_fb.c_stride, ctx->initial_info.src_fb.height, - &fin, enc_par->y_stride, enc_par->c_stride, enc_par->aligned_height, - eop->chroma_interleave, - enc_par->crop_w, enc_par->crop_h); - - bm_mem_unmap_device_mem(ctx->bm_handle, (void*)tmp_va, frame_size); - if (ret < 0) - break; - -#endif ctx->input_frame.framebuffer = ctx->src_fb; - if (par->log_level > BM_VPU_LOG_LEVEL_ERROR) + if (par->log_level > BMVPU_ENC_LOG_LEVEL_ERROR) printf("\n\n"); #ifdef __linux__ - printf("[%zx] Encoding frame #%d\n", pthread_self(), frm_cnt++); -#endif -#ifdef _WIN32 - printf("[%zx] Encoding frame #%d\n", GetCurrentThreadId(), frm_cnt++); -#endif - if (ctx->perf) -#ifdef __linux__ - gettimeofday(&(ctx->ps), NULL); + gettimeofday(&(ctx->ps), NULL); #elif _WIN32 - s_gettimeofday(&(ctx->ps), NULL); + s_gettimeofday(&(ctx->ps), NULL); #endif /* The actual encoding */ ret = bmvpu_enc_encode(ctx->video_encoder, &(ctx->input_frame), &(ctx->output_frame), &(ctx->enc_params), &output_code); @@ -664,7 +611,7 @@ static int run_once(InputParameter* par) if (fb->myIndex == ctx->output_frame.src_idx) { bm_queue_push(ctx->frame_unused_queue, &fb); - if (par->log_level > BM_VPU_LOG_LEVEL_INFO) + if (par->log_level > BMVPU_ENC_LOG_LEVEL_INFO) #ifdef __linux__ printf("[%zx] myIndex = 0x%x, push\n", pthread_self(), fb->myIndex); #endif @@ -678,17 +625,15 @@ static int run_once(InputParameter* par) { fprintf(stderr, "unknown encSrcIdx %d\n", ctx->output_frame.src_idx); } - if (ctx->perf) - { + #ifdef __linux__ - gettimeofday(&(ctx->pe), NULL); + gettimeofday(&(ctx->pe), NULL); #elif _WIN32 - s_gettimeofday(&(ctx->pe), NULL); + s_gettimeofday(&(ctx->pe), NULL); #endif - ctx->total_time += ((ctx->pe.tv_sec*1000.0 + ctx->pe.tv_usec/1000.0) - - (ctx->ps.tv_sec*1000.0 + ctx->ps.tv_usec/1000.0)); - ctx->total_frame++; - } + sta_time[tid] += ((ctx->pe.tv_sec*1000.0 + ctx->pe.tv_usec/1000.0) - + (ctx->ps.tv_sec*1000.0 + ctx->ps.tv_usec/1000.0)); + count_enc[tid]++; /* Write out the encoded frame to the output file. */ output_block = ctx->output_frame.acquired_handle; @@ -705,6 +650,8 @@ static int run_once(InputParameter* par) if (win_exit_flag) break; #endif + if (par->loop > 0 && ++l >= par->loop) + break; } free(host_va); @@ -719,18 +666,14 @@ static int run_once(InputParameter* par) ctx->input_frame.pts = 0L; ctx->input_frame.dts = 0L; - if (par->log_level > BM_VPU_LOG_LEVEL_ERROR) + if (par->log_level > BMVPU_ENC_LOG_LEVEL_ERROR) printf("\n\n"); #ifdef __linux__ - printf("[%zx] Flushing frame #%d\n", pthread_self(), frm_cnt++); - if (ctx->perf) - gettimeofday(&(ctx->ps), NULL); + gettimeofday(&(ctx->ps), NULL); #endif #ifdef _WIN32 - printf("[%zx] Flushing frame #%d\n", GetCurrentThreadId(), frm_cnt++); - if (ctx->perf) - s_gettimeofday(&(ctx->ps), NULL); + s_gettimeofday(&(ctx->ps), NULL); #endif @@ -759,37 +702,22 @@ static int run_once(InputParameter* par) break; } - if (ctx->perf) - { #ifdef __linux__ - gettimeofday(&(ctx->pe), NULL); + gettimeofday(&(ctx->pe), NULL); #elif _WIN32 - s_gettimeofday(&(ctx->pe), NULL); + s_gettimeofday(&(ctx->pe), NULL); #endif - ctx->total_time += ((ctx->pe.tv_sec*1000.0 + ctx->pe.tv_usec/1000.0) - - (ctx->ps.tv_sec*1000.0 + ctx->ps.tv_usec/1000.0)); - ctx->total_frame++; - } - + sta_time[tid] += ((ctx->pe.tv_sec*1000.0 + ctx->pe.tv_usec/1000.0) - + (ctx->ps.tv_sec*1000.0 + ctx->ps.tv_usec/1000.0)); + count_enc[tid]++; /* Write out the encoded frame to the output file. */ output_block = ctx->output_frame.acquired_handle; fwrite(output_block, 1, ctx->output_frame.data_size, fout); free(output_block); } - if (ctx->perf) - { - if (ctx->total_time > 0.0f) - { - printf("Frames encoded: %ld. Encoding speed: %.0ffps\n", - ctx->total_frame, ctx->total_frame*1000/ctx->total_time); - } - } - cleanup: - cleanup_task((void*)ctx); - par->result = ret; #ifdef __linux__ pthread_cleanup_pop(0); @@ -797,6 +725,29 @@ static int run_once(InputParameter* par) return ret; } +static int run(InputParameter* par) +{ + int ret = 0; + int i; + par->fin = fopen(par->input_filename, "rb"); + if (par->fin == NULL) + { + fprintf(stderr, "Failed to open %s for reading: %s\n", + par->input_filename, strerror(errno)); + par->result = -1; + return -1; + } + for(i=0; irun_times; i++) + { + fseek(par->fin, 0, SEEK_SET); + ret = run_once(par); + if(ret < 0) + break; + } + fclose(par->fin); + return ret; +} + static void usage(char *progname) { static char options[] = @@ -806,7 +757,7 @@ static void usage(char *progname) "\t--soc Sophon device index. (only for PCIE mode)\n" "\t\tFor example, if /dev/bm-sophon1 will be used, please set -s 1\n" "\t\tor set --soc 1\n" - "\t-m thread number: 1(default), 2\n" + "\t-m thread number: 1(default), pcie max 24, soc max 18\n" "\t-f pixel format. 0: YUV420(default); 1: NV12\n" "\t--pix_fmt pixel format. 0: YUV420(default); 1: NV12\n" "\t-c video encoder. 0: H.264; 1: H.265(default)\n" @@ -833,13 +784,19 @@ static void usage(char *progname) "\t-t aligned height (optional)\n" "\t-r bit rate (kbps). 0 means to use constant quality mode (0 at default)\n" "\t-q quantization parameter for constant quality mode. [0, 51] (32 at default)\n" - "\t-l loop number(optional). (1 at default)\n" - "\t--loop loop number(optional). (1 at default)\n" + "\t--run_times open and close codec count. (1 at default)\n" + "\t-l loop number(optional). (1 at default, 0 is press test)\n" + "\t--loop loop number(optional). (1 at default, 0 is press test)\n" "\t-n output frame number(optional). -1,0: unlimited encoding\n" + "\t-a framerate,default 30 \n" + "\t--fps framerate,default 30 \n" "\t-i input file\n" "\t-o output file\n" "\t-?\n" "\t--help\n" + "\tSet BMVPUENC_DISPLAY_FRAMERATE to view the details of fps:\n" + "\texport BMVPUENC_DISPLAY_FRAMERATE = 0(default) shows the total fps info of all threads;\n" + "\texport BMVPUENC_DISPLAY_FRAMERATE = 1 shows fps of every single thread;" "For example,\n" "\tbmvpuenc -w 426 -h 240 -i 240p.yuv -o 240p.265\n" "\tbmvpuenc -c 0 -w 426 -h 240 -i 240p.yuv -o 240p.264\n" @@ -863,23 +820,26 @@ static int parse_args(int argc, char **argv, InputParameter* par) { "width", required_argument, NULL, 'w' }, { "height", required_argument, NULL, 'h' }, // TODO { "loop", required_argument, NULL, 'l' }, + { "run_times", required_argument, NULL, 0 }, + { "fps", required_argument, NULL, 'a' }, { "help", no_argument, NULL, 0 }, { NULL, no_argument, NULL, 0 } }; - int opt; + int opt , ret; int longIndex = 0; - int ret; memset(par, 0, sizeof(InputParameter)); + par->run_times = 1; par->thread_number = 1; par->enc.soc_idx = 0; par->enc.enc_fmt = 1; - par->enc.gop_preset = 5; + par->enc.gop_preset = BM_VPU_ENC_GOP_PRESET_IBBBP; par->enc.intra_period = 28; par->enc.bit_rate = 0; par->enc.cqp = 32; - + par->enc.fps = 30; + par->loop = 1; if (argc == 1) { /* No input argument */ usage(argv[0]); @@ -888,10 +848,9 @@ static int parse_args(int argc, char **argv, InputParameter* par) while (1) { - opt = getopt_long(argc, argv, "s:f:i:o:w:h:y:c:t:v:l:n:p:g:m:r:q:?", longOpts, &longIndex); + opt = getopt_long(argc, argv, "s:f:i:o:w:h:y:c:t:v:l:n:p:g:m:r:q:a:?", longOpts, &longIndex); if (opt == -1) break; - switch (opt) { case 'v': @@ -945,6 +904,9 @@ static int parse_args(int argc, char **argv, InputParameter* par) case 'o': par->output_filename = optarg; break; + case 'a': + par->enc.fps = atoi(optarg); + break; case '?': usage(argv[0]); return RETVAL_ERROR; @@ -1003,6 +965,17 @@ static int parse_args(int argc, char **argv, InputParameter* par) par->loop = atoi(optarg); break; } + ret = strcmp("run_times", longOpts[longIndex].name); + if (ret == 0) + { + par->run_times = atoi(optarg); + break; + } + ret = strcmp("fps", longOpts[longIndex].name); + if(ret == 0 ){ + par->enc.fps = atoi(optarg); + break; + } break; default: usage(argv[0]); @@ -1100,6 +1073,9 @@ static int parse_args(int argc, char **argv, InputParameter* par) if (par->enc.bit_rate < 0) par->enc.bit_rate = 0; + if (par->enc.fps <= 0 ) + par->enc.fps = 30; + if (par->enc.cqp < 0 || par->enc.cqp > 51) { fprintf(stderr, "Invalid quantization parameter %d\n", par->enc.cqp); @@ -1107,8 +1083,8 @@ static int parse_args(int argc, char **argv, InputParameter* par) return RETVAL_ERROR; } - if (par->log_level < BM_VPU_LOG_LEVEL_ERROR || - par->log_level > BM_VPU_LOG_LEVEL_TRACE) + if (par->log_level < BMVPU_ENC_LOG_LEVEL_ERROR || + par->log_level > BMVPU_ENC_LOG_LEVEL_TRACE) { fprintf(stderr, "Wrong log level: %d\n", par->log_level); usage(argv[0]); @@ -1116,16 +1092,19 @@ static int parse_args(int argc, char **argv, InputParameter* par) } if (par->thread_number < 1 || - par->thread_number > 4) + par->thread_number > MAX_THREAD ) { fprintf(stderr, "Wrong thread number: %d\n", par->thread_number); usage(argv[0]); return RETVAL_ERROR; } - if (par->loop <= 0) + if (par->loop < 0) par->loop = 1; + if (par->run_times <= 0) + par->run_times = 1; + if (par->frame_number <= 0) par->frame_number = 0x7fffffff; @@ -1134,7 +1113,7 @@ static int parse_args(int argc, char **argv, InputParameter* par) static int read_yuv_source(uint8_t* dst_va, int dst_stride_y, int dst_stride_c, int dst_height, FILE** src_file, int src_stride_y, int src_stride_c, int src_height, - int chroma_interleave, + int pix_format, int width, int height) { int dst_size_y = dst_stride_y * dst_height; @@ -1146,8 +1125,8 @@ static int read_yuv_source(uint8_t* dst_va, int dst_stride_y, int dst_stride_c, int log_level; int i; - log_level = bmvpu_get_logging_threshold(); - if (log_level > BM_VPU_LOG_LEVEL_INFO) + log_level = bmvpu_enc_get_logging_threshold(); + if (log_level > BMVPU_ENC_LOG_LEVEL_INFO) { printf("dst_stride_y=%d, dst_stride_c=%d, dst_height=%d\n", dst_stride_y, dst_stride_c, dst_height); printf("src_stride_y=%d, src_stride_c=%d, src_height=%d\n", src_stride_y, src_stride_c, src_height); @@ -1167,14 +1146,14 @@ static int read_yuv_source(uint8_t* dst_va, int dst_stride_y, int dst_stride_c, } int dst_frame_size = dst_size_y + dst_size_c*2; - if (chroma_interleave) + if (pix_format == BM_VPU_ENC_PIX_FORMAT_NV12) dst_frame_size = dst_size_y + dst_size_c; if (dst_stride_y == src_stride_y && dst_stride_c == src_stride_c && dst_height == src_height) { - if ( dst_frame_size > fread(dst_va, sizeof(uint8_t), dst_frame_size, *src_file)){ + if ( dst_frame_size > fread(dst_va, sizeof(uint8_t), dst_frame_size, *src_file)){ printf("eof when read in dst frame...\n"); return -1; } @@ -1183,7 +1162,7 @@ static int read_yuv_source(uint8_t* dst_va, int dst_stride_y, int dst_stride_c, } int src_frame_size = src_size_y + src_size_c*2; - if (chroma_interleave) + if (pix_format == BM_VPU_ENC_PIX_FORMAT_NV12) src_frame_size = src_size_y + src_size_c; uint8_t* tmp_buffer = malloc(sizeof(uint8_t)*src_frame_size); if (tmp_buffer==NULL) @@ -1208,7 +1187,7 @@ static int read_yuv_source(uint8_t* dst_va, int dst_stride_y, int dst_stride_c, dy += dst_stride_y; } - if (chroma_interleave==0) + if (pix_format == BM_VPU_ENC_PIX_FORMAT_YUV420P) { int w_c = (width +1)/2; int h_c = (height+1)/2; @@ -1272,8 +1251,8 @@ static BmVpuFramebuffer* get_src_framebuffer(VpuEncContext *ctx) return NULL; } - log_level = bmvpu_get_logging_threshold(); - if (log_level > BM_VPU_LOG_LEVEL_INFO) + log_level = bmvpu_enc_get_logging_threshold(); + if (log_level > BMVPU_ENC_LOG_LEVEL_INFO) { #ifdef __linux__ printf("[%zx] myIndex = 0x%x, %p, pop\n", pthread_self(), fb->myIndex, fb); @@ -1296,10 +1275,7 @@ static void* sigmgr_thread(void* argument) threads_t* threads = (threads_t*)argument; #ifdef __linux__ siginfo_t info; - - int i, ret; - while (1) { ret = sigwaitinfo(&waitsigset, &info); if (ret != -1) { @@ -1307,8 +1283,12 @@ static void* sigmgr_thread(void* argument) if (info.si_signo == SIGINT || info.si_signo == SIGTERM) { for (i=0; inumber; i++) { pthread_t ptid = threads->handles[i]; - printf("Thread 0x%lx is canceling...\n", ptid); - pthread_cancel(ptid); + if (ptid != NULL) + { + printf("Thread 0x%lx is canceling...\n", ptid); + pthread_cancel(ptid); + threads->handles[i] = NULL; + } } } } else { @@ -1322,7 +1302,6 @@ static void* sigmgr_thread(void* argument) int main(int argc, char *argv[]) { - InputParameter par = {0}; #ifdef __linux__ sigset_t oldset; @@ -1334,11 +1313,13 @@ int main(int argc, char *argv[]) ret = parse_args(argc, argv, &par); if (ret != RETVAL_OK) + { return -1; + } - bmvpu_set_logging_threshold(par.log_level); + bmvpu_enc_set_logging_threshold(par.log_level); - bmvpu_set_logging_function(logging_fn); + bmvpu_enc_set_logging_function(logging_fn); #ifdef __linux__ @@ -1367,7 +1348,7 @@ int main(int argc, char *argv[]) memcpy(&(mt_par[i]), &par, sizeof(InputParameter)); mt_par[i].thread_id = i; #ifdef __linux__ - ret = pthread_create(&(thread_handle[i]), NULL, (void*)run_once, (void*)(&(mt_par[i]))); + ret = pthread_create(&(thread_handle[i]), NULL, (void*)run, (void*)(&(mt_par[i]))); if (ret < 0) { snprintf(tmp, 256, "Failed to create pthread #%d\n", i); @@ -1375,12 +1356,25 @@ int main(int argc, char *argv[]) } #endif #ifdef _WIN32 - if ((thread_handle[i] = CreateThread(NULL, 0, (void*)run_once, (void*)(&(mt_par[i])), 0, NULL)) == NULL ) { + if ((thread_handle[i] = CreateThread(NULL, 0, (void*)run, (void*)(&(mt_par[i])), 0, NULL)) == NULL ) { printf("create thread error\n"); } #endif printf("Thread %d: %zx\n", i, thread_handle[i]); } + +#ifdef WIN32 + thread_stat = CreateThread(NULL, 0, statPthread, (void*)(&(par)), 0, NULL); + if (thread_stat == NULL) { + printf("stat pthread create failed \n"); + } +#else + ret = pthread_create(&(thread_stat), NULL, stat_pthread, (void*)(&(par))); + if (ret != 0) { + printf("stat pthread create failed \n"); + } +#endif + #ifdef __linux__ pthread_t sigmgr_thread_id; #endif @@ -1406,13 +1400,20 @@ int main(int argc, char *argv[]) snprintf(tmp, 256, "Failed to join pthread #%d\n", i); handle_error(tmp); } -#endif -#ifdef _WIN32 +#elif _WIN32 if (WaitForSingleObject(thread_handle[i], INFINITE) != WAIT_OBJECT_0) { printf("release thread error\n"); } #endif } + printf("All threads have done, set g_exit_flag = 1 \n"); + g_exit_flag = 1; + +#ifdef WIN32 + WaitForSingleObject(thread_stat, INFINITE); +#else + pthread_join(thread_stat, NULL); +#endif for (i=0; i #endif -#include "bmvpuapi.h" +#include "bm_vpuenc_interface.h" enum { @@ -34,7 +34,7 @@ enum RETVAL_EOS = 2 }; -static void logging_fn(BmVpuLogLevel level, +static void logging_fn(BmVpuEncLogLevel level, char const *file, int const line, char const *fn, @@ -45,12 +45,12 @@ static void logging_fn(BmVpuLogLevel level, char const *lvlstr = ""; switch (level) { - case BM_VPU_LOG_LEVEL_ERROR: lvlstr = "ERROR"; break; - case BM_VPU_LOG_LEVEL_WARNING: lvlstr = "WARNING"; break; - case BM_VPU_LOG_LEVEL_INFO: lvlstr = "INFO"; break; - case BM_VPU_LOG_LEVEL_DEBUG: lvlstr = "DEBUG"; break; - case BM_VPU_LOG_LEVEL_TRACE: lvlstr = "TRACE"; break; - case BM_VPU_LOG_LEVEL_LOG: lvlstr = "LOG"; break; + case BMVPU_ENC_LOG_LEVEL_ERROR: lvlstr = "ERROR"; break; + case BMVPU_ENC_LOG_LEVEL_WARNING: lvlstr = "WARNING"; break; + case BMVPU_ENC_LOG_LEVEL_INFO: lvlstr = "INFO"; break; + case BMVPU_ENC_LOG_LEVEL_DEBUG: lvlstr = "DEBUG"; break; + case BMVPU_ENC_LOG_LEVEL_TRACE: lvlstr = "TRACE"; break; + case BMVPU_ENC_LOG_LEVEL_LOG: lvlstr = "LOG"; break; default: break; } #ifdef __linux__ diff --git a/bmvid/video/encoder/bm_enc_api/src/queue.c b/bmvid/example/queue.c similarity index 95% rename from bmvid/video/encoder/bm_enc_api/src/queue.c rename to bmvid/example/queue.c index 23b2e5e..c8012e7 100644 --- a/bmvid/video/encoder/bm_enc_api/src/queue.c +++ b/bmvid/example/queue.c @@ -133,8 +133,8 @@ bool bm_queue_show(bm_queue_t* q) int offset = start * q->size; int* ptr = (int*)(&q->buffer[offset]); - printf("%s:%d(%s) count=%d. front=%d, rear=%d. %dth: 0x%08x\n", - __FILE__, __LINE__, __func__, + printf("line:%d(%s) count=%d. front=%d, rear=%d. %dth: 0x%08x\n", + __LINE__, __func__, q->count, q->front, q->rear, start, *ptr); start++; diff --git a/bmvid/example/util.c b/bmvid/example/util.c new file mode 100644 index 0000000..c309d1b --- /dev/null +++ b/bmvid/example/util.c @@ -0,0 +1,312 @@ +/***************************************************************************** + * + * Copyright (C) 2022 Sophgo Technologies Inc. All rights reserved. + * + * bmvid is licensed under the 2-Clause BSD License except for the + * third-party components. + * + *****************************************************************************/ +/* This library provides a high-level interface for controlling the BitMain + * Sophon VPU en/decoder. + */ + +#include "misc.h" + +void ChangePathStyle( + char *str + ) +{ +} + +/********************* md5 *********************/ + +#define MD5_CBLOCK 64 +#define MD5_LBLOCK (MD5_CBLOCK/4) +#define MD5_DIGEST_LENGTH 16 + +#define DATA_ORDER_IS_LITTLE_ENDIAN + +#define HASH_MAKE_STRING(c,s) do { \ + uint64_t ll; \ + ll=(c)->A; HOST_l2c(ll,(s)); \ + ll=(c)->B; HOST_l2c(ll,(s)); \ + ll=(c)->C; HOST_l2c(ll,(s)); \ + ll=(c)->D; HOST_l2c(ll,(s)); \ + } while (0) + +#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n)))) + +#if defined(DATA_ORDER_IS_BIG_ENDIAN) +#define HOST_c2l(c,l) \ + (l =(((uint64_t)(*((c)++)))<<24), \ + l|=(((uint64_t)(*((c)++)))<<16), \ + l|=(((uint64_t)(*((c)++)))<< 8), \ + l|=(((uint64_t)(*((c)++))) )) + +#define HOST_l2c(l,c) \ + (*((c)++)=(unsigned char)(((l)>>24)&0xff), \ + *((c)++)=(unsigned char)(((l)>>16)&0xff), \ + *((c)++)=(unsigned char)(((l)>> 8)&0xff), \ + *((c)++)=(unsigned char)(((l) )&0xff)) +#endif +#if defined(DATA_ORDER_IS_LITTLE_ENDIAN) +#define HOST_c2l(c,l) \ + (l =(((uint64_t)(*((c)++))) ), \ + l|=(((uint64_t)(*((c)++)))<< 8), \ + l|=(((uint64_t)(*((c)++)))<<16), \ + l|=(((uint64_t)(*((c)++)))<<24)) + +#define HOST_l2c(l,c) \ + (*((c)++)=(unsigned char)(((l) )&0xff), \ + *((c)++)=(unsigned char)(((l)>> 8)&0xff), \ + *((c)++)=(unsigned char)(((l)>>16)&0xff), \ + *((c)++)=(unsigned char)(((l)>>24)&0xff)) +#endif + +#define MD32_REG_T long + +#define F(b,c,d) ((((c) ^ (d)) & (b)) ^ (d)) +#define G(b,c,d) ((((b) ^ (c)) & (d)) ^ (c)) +#define H(b,c,d) ((b) ^ (c) ^ (d)) +#define I(b,c,d) (((~(d)) | (b)) ^ (c)) + +#define R0(a,b,c,d,k,s,t) { \ + a+=((k)+(t)+F((b),(c),(d))); \ + a=ROTATE(a,s); \ + a+=b; }; +#define R1(a,b,c,d,k,s,t) { \ + a+=((k)+(t)+G((b),(c),(d))); \ + a=ROTATE(a,s); \ + a+=b; }; +#define R2(a,b,c,d,k,s,t) { \ + a+=((k)+(t)+H((b),(c),(d))); \ + a=ROTATE(a,s); \ + a+=b; }; +#define R3(a,b,c,d,k,s,t) { \ + a+=((k)+(t)+I((b),(c),(d))); \ + a=ROTATE(a,s); \ + a+=b; }; + +typedef struct MD5state_st { + uint32_t A,B,C,D; + uint32_t Nl,Nh; + uint32_t data[16]; + uint32_t num; +} MD5_CTX; + + +void md5_block_data_order (MD5_CTX *c, const void *data_, size_t num) +{ + const unsigned char *data=data_; + register unsigned MD32_REG_T A,B,C,D,l; +#ifndef MD32_XARRAY + /* See comment in crypto/sha/sha_locl.h for details. */ + unsigned MD32_REG_T XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7, + XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15; +# define X(i) XX##i +#else + uint32_t XX[MD5_LBLOCK]; +# define X(i) XX[i] +#endif + + A=c->A; + B=c->B; + C=c->C; + D=c->D; + + for (;num--;) { + HOST_c2l(data,l); X( 0)=l; + HOST_c2l(data,l); X( 1)=l; + /* Round 0 */ + R0(A,B,C,D,X( 0), 7,0xd76aa478L); HOST_c2l(data,l); X( 2)=l; + R0(D,A,B,C,X( 1),12,0xe8c7b756L); HOST_c2l(data,l); X( 3)=l; + R0(C,D,A,B,X( 2),17,0x242070dbL); HOST_c2l(data,l); X( 4)=l; + R0(B,C,D,A,X( 3),22,0xc1bdceeeL); HOST_c2l(data,l); X( 5)=l; + R0(A,B,C,D,X( 4), 7,0xf57c0fafL); HOST_c2l(data,l); X( 6)=l; + R0(D,A,B,C,X( 5),12,0x4787c62aL); HOST_c2l(data,l); X( 7)=l; + R0(C,D,A,B,X( 6),17,0xa8304613L); HOST_c2l(data,l); X( 8)=l; + R0(B,C,D,A,X( 7),22,0xfd469501L); HOST_c2l(data,l); X( 9)=l; + R0(A,B,C,D,X( 8), 7,0x698098d8L); HOST_c2l(data,l); X(10)=l; + R0(D,A,B,C,X( 9),12,0x8b44f7afL); HOST_c2l(data,l); X(11)=l; + R0(C,D,A,B,X(10),17,0xffff5bb1L); HOST_c2l(data,l); X(12)=l; + R0(B,C,D,A,X(11),22,0x895cd7beL); HOST_c2l(data,l); X(13)=l; + R0(A,B,C,D,X(12), 7,0x6b901122L); HOST_c2l(data,l); X(14)=l; + R0(D,A,B,C,X(13),12,0xfd987193L); HOST_c2l(data,l); X(15)=l; + R0(C,D,A,B,X(14),17,0xa679438eL); + R0(B,C,D,A,X(15),22,0x49b40821L); + /* Round 1 */ + R1(A,B,C,D,X( 1), 5,0xf61e2562L); + R1(D,A,B,C,X( 6), 9,0xc040b340L); + R1(C,D,A,B,X(11),14,0x265e5a51L); + R1(B,C,D,A,X( 0),20,0xe9b6c7aaL); + R1(A,B,C,D,X( 5), 5,0xd62f105dL); + R1(D,A,B,C,X(10), 9,0x02441453L); + R1(C,D,A,B,X(15),14,0xd8a1e681L); + R1(B,C,D,A,X( 4),20,0xe7d3fbc8L); + R1(A,B,C,D,X( 9), 5,0x21e1cde6L); + R1(D,A,B,C,X(14), 9,0xc33707d6L); + R1(C,D,A,B,X( 3),14,0xf4d50d87L); + R1(B,C,D,A,X( 8),20,0x455a14edL); + R1(A,B,C,D,X(13), 5,0xa9e3e905L); + R1(D,A,B,C,X( 2), 9,0xfcefa3f8L); + R1(C,D,A,B,X( 7),14,0x676f02d9L); + R1(B,C,D,A,X(12),20,0x8d2a4c8aL); + /* Round 2 */ + R2(A,B,C,D,X( 5), 4,0xfffa3942L); + R2(D,A,B,C,X( 8),11,0x8771f681L); + R2(C,D,A,B,X(11),16,0x6d9d6122L); + R2(B,C,D,A,X(14),23,0xfde5380cL); + R2(A,B,C,D,X( 1), 4,0xa4beea44L); + R2(D,A,B,C,X( 4),11,0x4bdecfa9L); + R2(C,D,A,B,X( 7),16,0xf6bb4b60L); + R2(B,C,D,A,X(10),23,0xbebfbc70L); + R2(A,B,C,D,X(13), 4,0x289b7ec6L); + R2(D,A,B,C,X( 0),11,0xeaa127faL); + R2(C,D,A,B,X( 3),16,0xd4ef3085L); + R2(B,C,D,A,X( 6),23,0x04881d05L); + R2(A,B,C,D,X( 9), 4,0xd9d4d039L); + R2(D,A,B,C,X(12),11,0xe6db99e5L); + R2(C,D,A,B,X(15),16,0x1fa27cf8L); + R2(B,C,D,A,X( 2),23,0xc4ac5665L); + /* Round 3 */ + R3(A,B,C,D,X( 0), 6,0xf4292244L); + R3(D,A,B,C,X( 7),10,0x432aff97L); + R3(C,D,A,B,X(14),15,0xab9423a7L); + R3(B,C,D,A,X( 5),21,0xfc93a039L); + R3(A,B,C,D,X(12), 6,0x655b59c3L); + R3(D,A,B,C,X( 3),10,0x8f0ccc92L); + R3(C,D,A,B,X(10),15,0xffeff47dL); + R3(B,C,D,A,X( 1),21,0x85845dd1L); + R3(A,B,C,D,X( 8), 6,0x6fa87e4fL); + R3(D,A,B,C,X(15),10,0xfe2ce6e0L); + R3(C,D,A,B,X( 6),15,0xa3014314L); + R3(B,C,D,A,X(13),21,0x4e0811a1L); + R3(A,B,C,D,X( 4), 6,0xf7537e82L); + R3(D,A,B,C,X(11),10,0xbd3af235L); + R3(C,D,A,B,X( 2),15,0x2ad7d2bbL); + R3(B,C,D,A,X( 9),21,0xeb86d391L); + + A = c->A += A; + B = c->B += B; + C = c->C += C; + D = c->D += D; + } +} + +/* Implemented from RFC1321 The MD5 Message-Digest Algorithm + */ + +#define INIT_DATA_A (uint64_t)0x67452301L +#define INIT_DATA_B (uint64_t)0xefcdab89L +#define INIT_DATA_C (uint64_t)0x98badcfeL +#define INIT_DATA_D (uint64_t)0x10325476L + +int MD5_Init (MD5_CTX *c) +{ + c->A=INIT_DATA_A; + c->B=INIT_DATA_B; + c->C=INIT_DATA_C; + c->D=INIT_DATA_D; + c->Nl=0; + c->Nh=0; + c->num=0; + return 1; +} + +int MD5_Update (MD5_CTX *c, const void *data_, size_t len) +{ + const unsigned char *data=data_; + unsigned char *p; + uint32_t l; + size_t n; + + if (len==0) return 1; + + l=(c->Nl+(((uint32_t)len)<<3))&0xffffffffUL; + /* 95-05-24 eay Fixed a bug with the overflow handling, thanks to + * Wei Dai for pointing it out. */ + if (l < c->Nl) /* overflow */ + c->Nh++; + c->Nh+=(len>>29); /* might cause compiler warning on 16-bit */ + c->Nl=l; + + n = c->num; + if (n != 0) { + p=(unsigned char *)c->data; + + if (len >= MD5_CBLOCK || len+n >= MD5_CBLOCK) { + memcpy (p+n,data,MD5_CBLOCK-n); + md5_block_data_order (c,p,1); + n = MD5_CBLOCK-n; + data += n; + len -= n; + c->num = 0; + memset (p,0,MD5_CBLOCK); /* keep it zeroed */ + } else { + memcpy (p+n,data,len); + c->num += (unsigned int)len; + return 1; + } + } + + n = len/MD5_CBLOCK; + if (n > 0) { + md5_block_data_order (c,data,n); + n *= MD5_CBLOCK; + data += n;//lint !e662 + len -= n; + } + + if (len != 0) { + p = (unsigned char *)c->data; + c->num = len; + memcpy (p,data,len); + } + return 1; +} + +int MD5_Final (uint8_t* md, MD5_CTX *c) +{ + unsigned char *p = (unsigned char *)c->data; + size_t n = c->num; + + p[n] = 0x80; /* there is always room for one */ + n++; + + if (n > (MD5_CBLOCK-8)) { + memset (p+n,0,MD5_CBLOCK-n); + n=0; + md5_block_data_order (c,p,1); + } + memset (p+n,0,MD5_CBLOCK-8-n); + + p += MD5_CBLOCK-8; +#if defined(DATA_ORDER_IS_BIG_ENDIAN) + (void)HOST_l2c(c->Nh,p); + (void)HOST_l2c(c->Nl,p); +#elif defined(DATA_ORDER_IS_LITTLE_ENDIAN) + (void)HOST_l2c(c->Nl,p); + (void)HOST_l2c(c->Nh,p); +#endif + p -= MD5_CBLOCK; + md5_block_data_order (c,p,1); + c->num=0; + memset (p,0,MD5_CBLOCK); + + HASH_MAKE_STRING(c,md); + + return 1; +} + +uint8_t* MD5 (const uint8_t* d, size_t n, uint8_t* md) +{ + MD5_CTX c; + static unsigned char m[MD5_DIGEST_LENGTH]; + + if (md == NULL) md=m; + if (!MD5_Init(&c)) + return NULL; + MD5_Update(&c,d,n); + MD5_Final(md,&c); + return(md); +} \ No newline at end of file diff --git a/bmvid/video/encoder/bm_enc_api/inc/bmvpuapi.h b/bmvid/example/util.h similarity index 58% rename from bmvid/video/encoder/bm_enc_api/inc/bmvpuapi.h rename to bmvid/example/util.h index 12a4cba..9541dd4 100644 --- a/bmvid/video/encoder/bm_enc_api/inc/bmvpuapi.h +++ b/bmvid/example/util.h @@ -9,28 +9,10 @@ /* This library provides a high-level interface for controlling the BitMain * Sophon VPU en/decoder. */ -#ifndef __BMVPUAPI_H__ -#define __BMVPUAPI_H__ -#include -#include +#include -#ifdef __cplusplus -extern "C" { -#endif -#define BMVPUAPI_VERSION "1.0.0" +void ChangePathStyle(char *str); -#include "bmvpu_types.h" -#include "bmvpu.h" -#include "bmvpu_logging.h" - -#include "bmvpuapi_common.h" -#include "bmvpuapi_enc.h" - -#ifdef __cplusplus -} -#endif - - -#endif +uint8_t* MD5 (const uint8_t* d, size_t n, uint8_t* md); \ No newline at end of file diff --git a/bmvid/example/vpu_info.c b/bmvid/example/vpu_info.c index e98edc9..3052142 100644 --- a/bmvid/example/vpu_info.c +++ b/bmvid/example/vpu_info.c @@ -26,8 +26,8 @@ #include "windows/libusb-1.0.18/examples/getopt/getopt.h" #endif -#include "bm_video_interface.h" -#include "bm_video_internal.h" +#include "bm_vpudec_interface.h" +#include "bm_vpudec_internal.h" #include "vpuapifunc.h" #include "main_helper.h" diff --git a/bmvid/example/vpu_reset.c b/bmvid/example/vpu_reset.c index 9af07fc..a3ee8d3 100644 --- a/bmvid/example/vpu_reset.c +++ b/bmvid/example/vpu_reset.c @@ -3,7 +3,7 @@ #include #include -#include "bm_video_interface.h" +#include "bm_vpudec_interface.h" #ifdef __linux__ #include @@ -38,7 +38,7 @@ int main(int argc, char* argv[]) if (ret < 0) return -1; - return BMVidVpuReset(devIdx, coreIdx); + return bmvpu_dec_reset(devIdx, coreIdx); } static void usage(char* prog_name) diff --git a/bmvid/jpeg/binary/github_remove.sh b/bmvid/jpeg/binary/github_remove.sh index 74778c8..eed077a 100755 --- a/bmvid/jpeg/binary/github_remove.sh +++ b/bmvid/jpeg/binary/github_remove.sh @@ -12,5 +12,4 @@ rm ../driver/jpuapi/jpuapifunc.h rm ../driver/jpuapi/jpuapi.h rm ../driver/jpuapi/jputable.h rm ../driver/jdi/linux/jdi.c -rm -rf ../driver/jdi/nonos -rm -rf ../../bmcv/vpp_cmodel \ No newline at end of file +rm -rf ../driver/jdi/nonos \ No newline at end of file diff --git a/bmvid/jpeg/binary/pcie/bin/bmjpegdec b/bmvid/jpeg/binary/pcie/bin/bmjpegdec index 9333e42..ac430d8 100755 Binary files a/bmvid/jpeg/binary/pcie/bin/bmjpegdec and b/bmvid/jpeg/binary/pcie/bin/bmjpegdec differ diff --git a/bmvid/jpeg/binary/pcie/bin/bmjpegdec_seq b/bmvid/jpeg/binary/pcie/bin/bmjpegdec_seq new file mode 100755 index 0000000..052bce6 Binary files /dev/null and b/bmvid/jpeg/binary/pcie/bin/bmjpegdec_seq differ diff --git a/bmvid/jpeg/binary/pcie/bin/bmjpegenc b/bmvid/jpeg/binary/pcie/bin/bmjpegenc index ddda8a5..386cb5d 100755 Binary files a/bmvid/jpeg/binary/pcie/bin/bmjpegenc and b/bmvid/jpeg/binary/pcie/bin/bmjpegenc differ diff --git a/bmvid/jpeg/binary/pcie/bin/bmjpegenc_seq b/bmvid/jpeg/binary/pcie/bin/bmjpegenc_seq new file mode 100755 index 0000000..76cc297 Binary files /dev/null and b/bmvid/jpeg/binary/pcie/bin/bmjpegenc_seq differ diff --git a/bmvid/jpeg/binary/pcie/bin/bmjpegmulti b/bmvid/jpeg/binary/pcie/bin/bmjpegmulti index b6f867d..ffe6bdd 100755 Binary files a/bmvid/jpeg/binary/pcie/bin/bmjpegmulti and b/bmvid/jpeg/binary/pcie/bin/bmjpegmulti differ diff --git a/bmvid/jpeg/binary/pcie/include/bmjpuapi.h b/bmvid/jpeg/binary/pcie/include/bmjpuapi.h deleted file mode 100644 index 6869f01..0000000 --- a/bmvid/jpeg/binary/pcie/include/bmjpuapi.h +++ /dev/null @@ -1,1094 +0,0 @@ -/* bmjpuapi API library for the BitMain SoC - * Copyright (C) 2018 Solan Shang - * Copyright (C) 2015 Carlos Rafael Giani - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 - * USA - */ - -#ifndef BMJPUAPI_H -#define BMJPUAPI_H - -#include -#include -#include -#include "bmlib_runtime.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -#if !defined DECL_EXPORT -#ifdef _WIN32 - #define DECL_EXPORT __declspec(dllexport) -#else - #define DECL_EXPORT -#endif -#endif - - -/* This library provides a high-level interface for controlling the BitMain JPU en/decoder. - * - * Note that the functions are _not_ thread safe. If they may be called from - * different threads, you must make sure they are surrounded by a mutex lock. - * It is recommended to use one global mutex for the bm_jpu_*_load()/unload() - * functions, and another de/encoder instance specific mutex for all of the other - * calls. */ - - - - -/**************************************************/ -/******* ALLOCATOR STRUCTURES AND FUNCTIONS *******/ -/**************************************************/ - - -/* Format and for printf-compatible format-strings - * example use: printf("physical address: %" BM_JPU_PHYS_ADDR_FORMAT, phys_addr */ -#define BM_JPU_PHYS_ADDR_FORMAT "#lx" -/* Typedef for physical addresses */ -typedef unsigned long long bm_jpu_phys_addr_t; - -/* BmJpuAllocationFlags: flags for the BmJpuDMABufferAllocator's allocate vfunc */ -typedef enum -{ - BM_JPU_ALLOCATION_FLAG_CACHED = 0, - BM_JPU_ALLOCATION_FLAG_WRITECOMBINE = 1, - BM_JPU_ALLOCATION_FLAG_UNCACHED = 2 -}BmJpuAllocationFlags; - -typedef enum -{ - BM_ION_FLAG_HEAP_VPP = 0, - BM_ION_FLAG_HEAP_NPU = 1, - BM_ION_FLAG_HEAP_VPU = 2 -}BmJpuIonHeapFlags; - -#define BM_JPU_ALLOCATION_FLAG_DEFAULT ((BM_ION_FLAG_HEAP_VPP << 4) | BM_JPU_ALLOCATION_FLAG_CACHED) -#define BM_JPU_ALLOCATION_FLAG_VPP_CACHED BM_JPU_ALLOCATION_FLAG_DEFAULT -#define BM_JPU_ALLOCATION_FLAG_JPU_CACHED ((BM_ION_FLAG_HEAP_VPU << 4) | BM_JPU_ALLOCATION_FLAG_CACHED) -#define BM_JPU_ALLOCATION_FLAG_NPU_CACHED ((BM_ION_FLAG_HEAP_NPU << 4) | BM_JPU_ALLOCATION_FLAG_CACHED) -#define BM_JPU_ALLOCATION_FLAG_VPP_WRITECOMBINE ((BM_ION_FLAG_HEAP_VPP << 4) | BM_JPU_ALLOCATION_FLAG_WRITECOMBINE) -/* BmJpuMappingFlags: flags for the BmJpuDMABufferAllocator's map vfuncs - * These flags can be bitwise-OR combined, although READ and WRITE cannot - * both be set */ -typedef enum -{ - /* Map memory for CPU write access */ - BM_JPU_MAPPING_FLAG_WRITE = (1UL << 0), - /* Map memory for CPU read access */ - BM_JPU_MAPPING_FLAG_READ = (1UL << 1) - /* XXX: When adding extra flags here, follow the pattern: BM_JPU_MAPPING_FLAG_ = (1UL << ) */ -} -BmJpuMappingFlags; - - - -/* Heap allocation function for virtual memory blocks internally allocated by bmjpuapi. - * These have nothing to do with the DMA buffer allocation interface defined above. - * By default, malloc/free are used. */ -typedef void* (*BmJpuHeapAllocFunc)(size_t const size, void *context, char const *file, int const line, char const *fn); -typedef void (*BmJpuHeapFreeFunc)(void *memblock, size_t const size, void *context, char const *file, int const line, char const *fn); - -/* This function allows for setting custom heap allocators, which are used to create internal heap blocks. - * The heap allocator referred to by "heap_alloc_fn" must return NULL if allocation fails. - * "context" is a user-defined value, which is passed on unchanged to the allocator functions. - * Calling this function with either "heap_alloc_fn" or "heap_free_fn" set to NULL resets the internal - * pointers to use malloc and free (the default allocators). */ -DECL_EXPORT void bm_jpu_set_heap_allocator_functions(BmJpuHeapAllocFunc heap_alloc_fn, BmJpuHeapFreeFunc heap_free_fn, void *context); - - - - -/***********************/ -/******* LOGGING *******/ -/***********************/ - - -/* Log levels. */ -typedef enum -{ - BM_JPU_LOG_LEVEL_ERROR = 0, - BM_JPU_LOG_LEVEL_WARNING = 1, - BM_JPU_LOG_LEVEL_INFO = 2, - BM_JPU_LOG_LEVEL_DEBUG = 3, - BM_JPU_LOG_LEVEL_LOG = 4, - BM_JPU_LOG_LEVEL_TRACE = 5 -} -BmJpuLogLevel; - -/* Function pointer type for logging functions. - * - * This function is invoked by BM_JPU_LOG() macro calls. This macro also passes the name - * of the source file, the line in that file, and the function name where the logging occurs - * to the logging function (over the file, line, and fn arguments, respectively). - * Together with the log level, custom logging functions can output this metadata, or use - * it for log filtering etc.*/ -typedef void (*BmJpuLoggingFunc)(BmJpuLogLevel level, char const *file, int const line, char const *fn, const char *format, ...); - -/* Defines the threshold for logging. Logs with lower priority are discarded. - * By default, the threshold is set to BM_JPU_LOG_LEVEL_INFO. */ -DECL_EXPORT void bm_jpu_set_logging_threshold(BmJpuLogLevel threshold); - -/* Defines a custom logging function. - * If logging_fn is NULL, logging is disabled. This is the default value. */ -DECL_EXPORT void bm_jpu_set_logging_function(BmJpuLoggingFunc logging_fn); - - - - -/******************************************************/ -/******* MISCELLANEOUS STRUCTURES AND FUNCTIONS *******/ -/******************************************************/ -typedef enum -{ - /* planar 4:2:0; if the chroma_interleave parameter is 1, the corresponding format is NV12, otherwise it is I420 */ - BM_JPU_COLOR_FORMAT_YUV420 = 0, - /* planar 4:2:2; if the chroma_interleave parameter is 1, the corresponding format is NV16 */ - BM_JPU_COLOR_FORMAT_YUV422_HORIZONTAL = 1, - /* 4:2:2 vertical, actually 2:2:4 (according to the JPU docs); no corresponding format known for the chroma_interleave=1 case */ - /* NOTE: this format is rarely used, and has only been seen in a few JPEG files */ - BM_JPU_COLOR_FORMAT_YUV422_VERTICAL = 2, - /* planar 4:4:4; if the chroma_interleave parameter is 1, the corresponding format is NV24 */ - BM_JPU_COLOR_FORMAT_YUV444 = 3, - /* 8-bit greayscale */ - BM_JPU_COLOR_FORMAT_YUV400 = 4, - /* RGBP */ - BM_JPU_COLOR_FORMAT_RGB = 5 -} -BmJpuColorFormat; - - -/* Framebuffers are frame containers, and are used both for en- and decoding. */ -typedef struct -{ - /* Stride of the Y and of the Cb&Cr components. - * Specified in bytes. */ - unsigned int y_stride; - unsigned int cbcr_stride; - - /* DMA buffer which contains the pixels. */ - bm_device_mem_t *dma_buffer; - - /* These define the starting offsets of each component - * relative to the start of the buffer. Specified in bytes. - */ - size_t y_offset; - size_t cb_offset; - size_t cr_offset; - - /* User-defined pointer. The library does not touch this value. - * Not to be confused with the context fields of BmJpuEncodedFrame - * and BmJpuRawFrame. - * This can be used for example to identify which framebuffer out of - * the initially allocated pool was used by the JPU to contain a frame. - */ - void *context; - - /* Set to 1 if the framebuffer was already marked as displayed. This is for - * internal use only. Not to be read or written from the outside. */ - int already_marked; - - /* Internal, implementation-defined data. Do not modify. */ - void *internal; -} -BmJpuFramebuffer; - - -/* Structure containing details about encoded frames. */ -typedef struct -{ - /* When decoding, data must point to the memory block which contains - * encoded frame data that gets consumed by the JPU. Not used by - * the encoder. */ - uint8_t *data; - - /* Size of the encoded data, in bytes. When decoding, this is set by - * the user, and is the size of the encoded data that is pointed to - * by data. When encoding, the encoder sets this to the size of the - * acquired output block, in bytes (exactly the same value as the - * acquire_output_buffer's size argument). */ - size_t data_size; - - /* Handle produced by the user-defined acquire_output_buffer function - * during encoding. Not used by the decoder. */ - void *acquired_handle; - - /* User-defined pointer. The library does not touch this value. - * This pointer and the one from the corresponding raw frame will have - * the same value. The library will pass then through. - * It can be used to identify which raw frame is associated with this - * encoded frame for example. */ - void *context; - - /* User-defined timestamps. These are here for convenience. In many - * cases, the context one wants to associate with raw/encoded frames - * is a PTS-DTS pair. If only the context pointer were available, users - * would have to create a separate data structure containing PTS & DTS - * values for each context. Since this use case is common, these two - * fields are added to the frame structure. Just like the context - * pointer, the library just passes them through to the associated - * raw frame, and does not actually touch their values. It is also - * perfectly OK to not use them, and just use the context pointer - * instead, or vice versa. */ - uint64_t pts, dts; -} -BmJpuEncodedFrame; - - -/* Structure containing details about raw, uncompressed frames. */ -typedef struct -{ - /* When decoding: pointer to the framebuffer containing the decoded raw frame. - * When encoding: pointer to the framebuffer containing the raw frame to encode. */ - BmJpuFramebuffer *framebuffer; - - /* User-defined pointer. The library does not touch this value. - * This pointer and the one from the corresponding encoded frame will have - * the same value. The library will pass then through. - * It can be used to identify which raw frame is associated with this - * encoded frame for example. */ - void *context; - - /* User-defined timestamps. These are here for convenience. In many - * cases, the context one wants to associate with raw/encoded frames - * is a PTS-DTS pair. If only the context pointer were available, users - * would have to create a separate data structure containing PTS & DTS - * values for each context. Since this use case is common, these two - * fields are added to the frame structure. Just like the context - * pointer, the library just passes them through to the associated - * encoded frame, and does not actually touch their values. It is also - * perfectly OK to not use them, and just use the context pointer - * instead, or vice versa. */ - uint64_t pts, dts; -} -BmJpuRawFrame; - - -/* Structure used together with bm_jpu_calc_framebuffer_sizes() */ -typedef struct -{ - /* Frame width and height, aligned to the 16-pixel boundary required by the JPU. */ - unsigned int aligned_frame_width, aligned_frame_height; - - /* Stride sizes, in bytes, with alignment applied. The Cb and Cr planes always - * use the same stride, so they share the same value. */ - unsigned int y_stride, cbcr_stride; - - /* Required DMA memory size for the Y,Cb,Cr planes in bytes. - * The Cb and Cr planes always are of the same size, so they share the same value. */ - unsigned int y_size, cbcr_size; - - /* Total required size of a framebuffer's DMA buffer, in bytes. This value includes - * the sizes of all planes, and extra bytes for alignment and padding. - * This value must be used when allocating DMA buffers for decoder framebuffers. */ - unsigned int total_size; - - /* This corresponds to the other chroma_interleave values used in bmjpuapi. - * It is stored here to allow other functions to select the correct offsets. */ - int chroma_interleave; -} -BmJpuFramebufferSizes; - - - - -/************************************************/ -/******* DECODER STRUCTURES AND FUNCTIONS *******/ -/************************************************/ - - -/* How to use the decoder (error handling omitted for clarity): - * - * Global initialization / shutdown is done by calling bm_jpu_dec_load() and - * bm_jpu_dec_unload() respectively. These functions contain a reference counter, - * so bm_jpu_dec_unload() must be called as many times as bm_jpu_dec_load() was, - * or else it will not unload. Do not try to create a decoder before calling - * bm_jpu_dec_load(), as this function loads the JPU firmware. Likewise, the - * bm_jpu_dec_unload() function unloads the firmware. This firmware (un)loading - * affects the entire process, not just the current thread. - * - * Typically, loading/unloading is done in two ways: - * (1) bm_dec_jpu_load() gets called in the startup phase of the process, and - * bm_jpu_dec_unload() in the shutdown phase. - * (2) bm_dec_jpu_load() gets called every time before a decoder is to be created, - * and bm_jpu_dec_unload() every time after a decoder was shut down. - * - * Both methods are fine; however, for (2), it is important to keep in mind that - * the bm_jpu_dec_load() / bm_jpu_dec_unload() functions are *not* thread safe, - * so surround their calls with mutex locks. - * - * How to create, use, and shutdown a decoder: - * 1. Call bm_jpu_dec_get_bitstream_buffer_info(), and allocate a DMA buffer - * with the given size and alignment. This is the minimum required size. - * The buffer can be larger, but must not be smaller than the given size. - * 2. Fill an instance of BmJpuDecOpenParams with the values specific to the - * input data. Check the documentation of BmJpuDecOpenParams for details - * about its fields. - * 3. Call bm_jpu_dec_open(), passing in a pointer to the filled BmJpuDecOpenParams - * instance, the bitstream DMA buffer which was allocated in step 1, a callback - * of type bm_jpu_dec_new_initial_info_callback, and a user defined pointer - * that is passed to the callback (if not needed, just set it to NULL). - * 4. Call bm_jpu_dec_decode(), and push data to it. Once initial information about - * the bitstream becomes available, the callback from step 3 is invoked. - * 5. Inside the callback, the new initial info is available. The new_initial_info pointer - * is never NULL. In this callback, framebuffers are allocated and registered, as - * explained in the next steps. Steps 7-9 are performed inside the callback. - * 6. (Optional) Perform the necessary size and alignment calculations by calling - * bm_jpu_calc_framebuffer_sizes(). Pass in either the frame width & height from - * BmJpuDecInitialInfo , or some explicit values that were determined externally. - * (The width & height do not have to be aligned; the function does this automatically.) - * 7. Create an array of at least as many BmJpuFramebuffer instances as specified in - * min_num_required_framebuffers. Each instance must point to a DMA buffer that is big - * enough to hold a raw decoded frame. If step 7 was performed, allocating as many bytes - * as indicated by total_size is enough. Make sure the Y,Cb,Cr offsets in each - * BmJpuFramebuffer instance are valid. Using the bm_jpu_fill_framebuffer_params() - * convenience function for this is strongly recommended. - * 8. Call bm_jpu_dec_register_framebuffers() and pass in the BmJpuFramebuffer array - * and the number of BmJpuFramebuffer instances. - * Note that this call does _not_ copy the framebuffer array, it just stores the pointer - * to it internally, so make sure the array is valid until the decoder is closed! - * This should be the last action in the callback. - * 9. Continue calling bm_jpu_dec_decode(). Make sure the input data is not NULL. - * If the BM_JPU_DEC_OUTPUT_CODE_DECODED_FRAME_AVAILABLE flag is set in the output code, - * call bm_jpu_dec_get_decoded_frame() with a pointer to an BmJpuRawFrame instance. - * The instance will get filled by the function with information about the decoded frame. - * Once the decoded frame has been processed by the user, it is important to call - * bm_jpu_dec_mark_framebuffer_as_displayed() to let the decoder know that the - * framebuffer is available for storing new decoded frames again. - * If BM_JPU_DEC_OUTPUT_CODE_EOS is set, or if bm_jpu_dec_decode() returns a value other - * than BM_JPU_DEC_RETURN_CODE_OK, stop playback and close the decoder. - * 10. In case a flush/reset is desired (typically after seeking), call bm_jpu_dec_flush(). - * Note that any internal context/PTS/DTS values from the encoded and raw frames will be thrown - * away after this call; if for example the context is an index, the system that hands - * out the indices should be informed that any previously handed out index is now unused. - * 11. After playback is finished, close the decoder with bm_jpu_dec_close(). - * 12. Deallocate framebuffer memory blocks and the bitstream buffer memory block. - * - * In situations where decoding and display of decoded frames happen in different threads, it - * is necessary to wait until decoding is possible. bm_jpu_dec_check_if_can_decode() is used - * for this purpose. This needs to be done in steps 5 and 10. Typically this is done by using - * a thread condition variable. Example pseudo code: - * - * mutex_lock(&mutex); - * - * while (dec_initialized && !bm_jpu_dec_check_if_can_decode(decode) && !abort_waiting) - * condition_wait(&condition_variable, &mutex); - * - * if (!abort_waiting) - * bm_jpu_dec_decode(decoder, encoded_frame, &output_code); - * ... - * - * mutex_unlock(&mutex); - * - * (abort_waiting would be a flag that gets raised when something from the outside signals - * that waiting and decoding needs to be shut down now, for example because the user wants - * to close the player, or because the user pressed Ctrl+C. dec_initialized would be a flag - * that is initially cleared, and raised in the initial info callback; it is pointless to - * call bm_jpu_dec_check_if_can_decode() before the callback was executed.) - * - * If any video sequence parameters (like frame width and height) in the input data change, - * the output code from bm_jpu_dec_decode() calls in step 10 will contain the - * BM_JPU_DEC_OUTPUT_CODE_VIDEO_PARAMS_CHANGED flag. (This will never happen in step 5.) - * When this occurs, decoding cannot continue, because the registered framebuffers are - * of an incorrect size, and because the decoder's configuration is set up for the previous - * parameters. Therefore, in this case, first, the decoder has to be drained of decoded- - * but-not-yet-displayed frames like in step 12, then, it has to be closed, and opened - * again. The BmJpuDecOpenParams structure that is then passed to the bm_jpu_dec_open() - * call should have its frame_width and frame_height values set to 0 to ensure the - * new sequence parameters are properly used. Then, the data that was fed into the - * bm_jpu_dec_decode() call that set the BM_JPU_DEC_OUTPUT_CODE_VIDEO_PARAMS_CHANGED flag - * has to be fed again to bm_jpu_dec_decode(). The initial info callback from - * bm_jpu_dec_open() will again be called, and decoding continues as usual. - * - * It is also recommended to make sure that framebuffers and associated DMA buffers that - * were allocated before the video sequence parameter change be deallocated in the - * initial callback to avoid memory leaks. - * - * However, if the environment is a framework like GStreamer or libav/FFmpeg, it is likely - * this will never have to be done, since these have their own parsers that detect parameter - * changes and initiate reinitializations. - */ - - -/* Opaque decoder structure. */ -typedef struct _BmJpuDecoder BmJpuDecoder; - - -/* Decoder return codes. With the exception of BM_JPU_DEC_RETURN_CODE_OK, these - * should be considered hard errors, and the decoder should be closed when they - * are returned. */ -typedef enum -{ - /* Operation finished successfully. */ - BM_JPU_DEC_RETURN_CODE_OK = 0, - /* General return code for when an error occurs. This is used as a catch-all - * for when the other error return codes do not match the error. */ - BM_JPU_DEC_RETURN_CODE_ERROR, - /* Input parameters were invalid. */ - BM_JPU_DEC_RETURN_CODE_INVALID_PARAMS, - /* JPU decoder handle is invalid. This is an internal error, and most likely - * a bug in the library. Please report such errors. */ - BM_JPU_DEC_RETURN_CODE_INVALID_HANDLE, - /* Framebuffer information is invalid. Typically happens when the BmJpuFramebuffer - * structures that get passed to bm_jpu_dec_register_framebuffers() contain - * invalid values. */ - BM_JPU_DEC_RETURN_CODE_INVALID_FRAMEBUFFER, - /* Registering framebuffers for decoding failed because not enough framebuffers - * were given to the bm_jpu_dec_register_framebuffers() function. */ - BM_JPU_DEC_RETURN_CODE_INSUFFICIENT_FRAMEBUFFERS, - /* A stride value (for example one of the stride values of a framebuffer) is invalid. */ - BM_JPU_DEC_RETURN_CODE_INVALID_STRIDE, - /* A function was called at an inappropriate time (for example, when - * bm_jpu_dec_register_framebuffers() is called before a single byte of input data - * was passed to bm_jpu_dec_decode() ). */ - BM_JPU_DEC_RETURN_CODE_WRONG_CALL_SEQUENCE, - /* The operation timed out. */ - BM_JPU_DEC_RETURN_CODE_TIMEOUT, - /* A function that should only be called once for the duration of the decoding - * session was called again. One example is bm_jpu_dec_register_framebuffers(). */ - BM_JPU_DEC_RETURN_CODE_ALREADY_CALLED, - /* Allocation memory failure */ - BM_JPU_DEC_RETURN_ALLOC_MEM_ERROR -} -BmJpuDecReturnCodes; - - -/* Decoder output codes. These can be bitwise OR combined, so check - * for their presence in the output_codes bitmask returned by - * bm_jpu_dec_decode() by using a bitwise AND. */ -typedef enum -{ - /* Input data was used. If this code is present, the input data - * that was given to the bm_jpu_dec_decode() must not be given - * to a following bm_jpu_dec_decode() call; instead, new data - * should be loaded. If this code is not present, then the decoder - * didn't use it yet, so give it to the decoder again until this - * code is set or an error is returned. - * NOTE: this flag is obsolete. It used to mean something with the - * fslwrapper backend; however, with the jpulib backend, it will - * always use the input unless an error occurs or EOS is signaled - * in drain mode. */ - BM_JPU_DEC_OUTPUT_CODE_INPUT_USED = (1UL << 0), - /* EOS was reached; no more unfinished frames are queued internally. - * This can be reached by bitstreams with no frame delay. - */ - BM_JPU_DEC_OUTPUT_CODE_EOS = (1UL << 1), - /* A fully decoded frame is now available, and can be retrieved - * by calling bm_jpu_dec_get_decoded_frame(). */ - BM_JPU_DEC_OUTPUT_CODE_DECODED_FRAME_AVAILABLE = (1UL << 2), - - /* There aren't enough free framebuffers available for decoding. - * This usually happens when bm_jpu_dec_mark_framebuffer_as_displayed() - * wasn't called before bm_jpu_dec_decode(), which can occur in - * multithreaded environments. bm_jpu_dec_check_if_can_decode() is useful - * to avoid this. Also see the guide above for more. */ - BM_JPU_DEC_OUTPUT_CODE_NOT_ENOUGH_OUTPUT_FRAMES = (1UL << 3), - /* Input data for a frame is incomplete. No decoded frame will - * be available until the input frame's data has been fully and - * correctly delivered. */ - BM_JPU_DEC_OUTPUT_CODE_NOT_ENOUGH_INPUT_DATA = (1UL << 4), - /* The JPU detected a change in the video sequence parameters - * (like frame width and height). Decoding cannot continue. See the - * explanation in the step-by-step guide above for what steps to take - * if this output code is set. Note that this refers to detected - * changes in the *input data*, not to the decoded frames. This means - * that this flag is set immediately when input data with param changes - * is fed to the decoder, even if this is for example a h.264 high - * profile stream with lots of frame reordering and frame delays. */ - BM_JPU_DEC_OUTPUT_CODE_VIDEO_PARAMS_CHANGED = (1UL << 5) -} -BmJpuDecOutputCodes; - - -/* Structure used together with bm_jpu_dec_open() */ -typedef struct -{ - /* These are necessary with some formats which do not store the width - * and height in the bitstream. If the format does store them, these - * values can be set to zero. */ - unsigned int frame_width; - unsigned int frame_height; - - /* If this is 1, then Cb and Cr are interleaved in one shared chroma - * plane, otherwise they are separated in their own planes. - * See the BmJpuColorFormat documentation for the consequences of this. */ - int chroma_interleave; - - /* 0: no scaling; n(1-3): scale by 2^n; */ - unsigned int scale_ratio; - - /* The DMA buffer size for bitstream */ - int bs_buffer_size; -#ifdef _WIN32 - uint8_t *buffer; -#else - uint8_t *buffer __attribute__((deprecated)); -#endif - - int device_index; - - int rotationEnable; - int mirrorEnable; - int mirrorDirection; - int rotationAngle; - - int roiEnable; - int roiWidth; - int roiHeight; - int roiOffsetX; - int roiOffsetY; - - int framebuffer_recycle; - size_t framebuffer_size; -} -BmJpuDecOpenParams; - - -/* Structure used together with bm_jpu_dec_new_initial_info_callback() . - * The values are filled by the decoder. */ -typedef struct -{ - /* Width of height of frames, in pixels. Note: it is not guaranteed that - * these values are aligned to a 16-pixel boundary (which is required - * for JPU framebuffers). These are the width and height of the frame - * with actual pixel content. It may be a subset of the total frame, - * in case these sizes need to be aligned. In that case, there are - * padding columns to the right, and padding rows below the frames. */ - unsigned int frame_width, frame_height; - - /* Caller must register at least this many framebuffers - * with the decoder. */ - unsigned int min_num_required_framebuffers; - - /* Color format of the decoded frames. */ - BmJpuColorFormat color_format; - - int chroma_interleave; - - /* Physical framebuffer addresses must be aligned to this value. */ - unsigned int framebuffer_alignment; - - int roiFrameWidth; - int roiFrameHeight; - } -BmJpuDecInitialInfo; - -/* Convenience function which calculates various sizes out of the given width & height and color format. - * The results are stored in "calculated_sizes". The given frame width and height will be aligned if - * they aren't already, and the aligned value will be stored in calculated_sizes. Width & height must be - * nonzero. The calculated_sizes pointer must also be non-NULL. framebuffer_alignment is an alignment - * value for the sizes of the Y/U/V planes. 0 or 1 mean no alignment. uses_interlacing is set to 1 - * if interlacing is to be used, 0 otherwise. chroma_interleave is set to 1 if a shared CbCr chroma - * plane is to be used, 0 if Cb and Cr shall use separate planes. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_calc_framebuffer_sizes(BmJpuColorFormat color_format, - unsigned int frame_width, - unsigned int frame_height, - unsigned int framebuffer_alignment, - int chroma_interleave, - BmJpuFramebufferSizes *calculated_sizes); - -/* Convenience function which fills fields of the BmJpuFramebuffer structure, based on data from "calculated_sizes". - * The specified DMA buffer and context pointer are also set. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_fill_framebuffer_params(BmJpuFramebuffer *framebuffer, - BmJpuFramebufferSizes *calculated_sizes, - bm_device_mem_t *fb_dma_buffer, - void* context); - -/* Returns a human-readable description of the given color format. Useful for logging. */ -DECL_EXPORT char const *bm_jpu_color_format_string(BmJpuColorFormat color_format); - - -/* Callback for handling new BmJpuDecInitialInfo data. This is called when new - * information about the bitstream becomes available. output_code can be useful - * to check why this callback was invoked. BM_JPU_DEC_OUTPUT_CODE_INITIAL_INFO_AVAILABLE - * is always set. Every time this callback gets called, new framebuffers should be - * allocated and registered with bm_jpu_dec_register_framebuffers(). - * user_data is a user-defined pointer that is passed to this callback. It has the same - * value as the callback_user_data pointer from the bm_jpu_dec_open() call. - * The callback returns 0 if something failed, nonzero if successful. */ -DECL_EXPORT typedef int (*bm_jpu_dec_new_initial_info_callback)(BmJpuDecoder *decoder, - BmJpuDecInitialInfo *new_initial_info, - unsigned int output_code, - void *user_data); - - -/* Returns a human-readable description of the error code. - * Useful for logging. */ -DECL_EXPORT char const * bm_jpu_dec_error_string(BmJpuDecReturnCodes code); - -/* These two functions load/unload the decoder. Due to an internal reference - * counter, it is safe to call these functions more than once. However, the - * number of unload() calls must match the number of load() calls. - * - * The decoder must be loaded before doing anything else with it. - * Similarly, the decoder must not be unloaded before all decoder activities - * have been finished. This includes opening/decoding decoder instances. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_load(int device_index); -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_unload(int device_index); -DECL_EXPORT bm_handle_t bm_jpu_get_handle(int device_index); - -/* Called before bm_jpu_dec_open(), it returns the alignment and size for the - * physical memory block necessary for the decoder's bitstream buffer. The user - * must allocate a DMA buffer of at least this size, and its physical address - * must be aligned according to the alignment value. */ -DECL_EXPORT void bm_jpu_dec_get_bitstream_buffer_info(size_t *size, unsigned int *alignment); - -/* Opens a new decoder instance. "open_params", "bitstream_buffer", and "new_initial_info" - * must not be NULL. "callback_user_data" is a user-defined pointer that is passed on to - * the callback when it is invoked. The bitstream buffer must use the alignment and size - * that bm_jpu_dec_get_bitstream_buffer_info() specifies (it can also be larger, but must - * not be smaller than the size this function gives). */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_open(BmJpuDecoder **decoder, BmJpuDecOpenParams *open_params, - bm_device_mem_t *bitstream_buffer, - bm_jpu_dec_new_initial_info_callback new_initial_info_callback, - void *callback_user_data); - -/* Closes a decoder instance. Trying to close the same instance multiple times results in undefined behavior. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_close(BmJpuDecoder *decoder); - -/* Flushes the decoder. Any internal undecoded or queued frames are discarded. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_flush(BmJpuDecoder *decoder); - -/* Registers the specified array of framebuffers with the decoder. This must be called after - * bm_jpu_dec_decode() returned an output code with BM_JPU_DEC_OUTPUT_CODE_INITIAL_INFO_AVAILABLE - * set in it. Registering can happen only once during the lifetime of a decoder instance. If for some reason - * framebuffers need to be re-registered, the instance must be closed, and a new one opened. - * The caller must ensure that the specified framebuffer array remains valid until the decoder instance - * is closed, since this function does not copy it; it just stores a pointer to the array internally. Also - * note that internally, values might be written to the array (though it will never be reallocated - * and/or freed from the inside). Also, the framebuffers' DMA buffers will be memory-mapped until the decoder - * is closed. - * - * Since this function only stores a pointer to the framebuffer array internally, and does not actually copy - * the array, it is possible - and valid - to modify the "context" fields of the framebuffers even after - * this call was made. This is useful if for example system resources are associated later with the - * framebuffers. In this case, it is perfectly OK to set "context" to NULL initially, and later, when the - * resources are available, associated them to the framebuffers by setting the context fields, even if - * bm_jpu_dec_register_framebuffers() was already called earlier. - * - * The framebuffers must contain valid values. The convenience functions bm_jpu_calc_framebuffer_sizes() and - * bm_jpu_fill_framebuffer_params() can be used for this. Note that all framebuffers must have the same - * stride values. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_register_framebuffers(BmJpuDecoder *decoder, BmJpuFramebuffer *framebuffers, unsigned int num_framebuffers); - -/* Decodes an encoded input frame. "encoded_frame" must always be set, even in drain mode. See BmJpuEncodedFrame - * for details about its contents. output_code is a bit mask, must not be NULL, and returns important information - * about the decoding process. The value is a bitwise OR combination of the codes in BmJpuDecOutputCodes. Also - * look at bm_jpu_dec_get_decoded_frame() about how to retrieve decoded frames (if these exist). Note that if - * the BM_JPU_DEC_OUTPUT_CODE_VIDEO_PARAMS_CHANGED flag is set in the output_code, decoding cannot continue, - * and the decoder should be closed. See the notes below step-by-step guide above for details about this. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_decode(BmJpuDecoder *decoder, BmJpuEncodedFrame const *encoded_frame, unsigned int *output_code); - -/* Retrieves a decoded frame. The structure referred to by "decoded_frame" will be filled with data about - * the decoded frame. "decoded_frame" must not be NULL. - * - * Calling this function before bm_jpu_dec_decode() results in an BM_JPU_DEC_RETURN_CODE_WRONG_CALL_SEQUENCE - * return value. Calling this function more than once after a bm_jpu_dec_decode() yields the same result. - */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_get_decoded_frame(BmJpuDecoder *decoder, BmJpuRawFrame *decoded_frame); - - -/* Check if the JPU can decode right now. While decoding a video stream, sometimes the JPU may not be able - * to decode. This is directly related to the set of free framebuffers. If this function returns 0, decoding - * should not be attempted until after bm_jpu_dec_mark_framebuffer_as_displayed() was called. If this - * happens, bm_jpu_dec_check_if_can_decode() should be called again to check if the situation changed and - * decoding can be done again. Also, calling this function before the initial info callback was executed is - * not recommended and causes undefined behavior. See the explanation above for details. */ -DECL_EXPORT int bm_jpu_dec_check_if_can_decode(BmJpuDecoder *decoder); - -/* Marks a framebuffer as displayed. This always needs to be called once the application is done with a decoded - * frame. It returns the framebuffer to the JPU pool so it can be reused for further decoding. Not calling - * this will eventually cause the decoder to fail, because it won't find any free framebuffer for storing - * a decoded frame anymore. - * - * It is safe to mark a framebuffer multiple times. The library will simply ignore the subsequent calls. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_mark_framebuffer_as_displayed(BmJpuDecoder *decoder, BmJpuFramebuffer *framebuffer); - - - - -/************************************************/ -/******* ENCODER STRUCTURES AND FUNCTIONS *******/ -/************************************************/ - - -/* How to use the encoder (error handling omitted for clarity): - * - * Global initialization / shutdown is done by calling bm_jpu_enc_load() and - * bm_jpu_enc_unload() respectively. These functions contain a reference counter, - * so bm_jpu_enc_unload() must be called as many times as bm_jpu_enc_load() was, - * or else it will not unload. Do not try to create a encoder before calling - * bm_jpu_enc_load(), as this function loads the JPU firmware. Likewise, the - * bm_jpu_enc_unload() function unloads the firmware. This firmware (un)loading - * affects the entire process, not just the current thread. - * - * Typically, loading/unloading is done in two ways: - * (1) bm_jpu_enc_load() gets called in the startup phase of the process, and - * bm_jpu_enc_unload() in the shutdown phase. - * (2) bm_jpu_enc_load() gets called every time before a encoder is to be created, - * and bm_jpu_enc_unload() every time after a encoder was shut down. - * - * Both methods are fine; however, for (2), it is important to keep in mind that - * the bm_jpu_enc_load() / bm_jpu_enc_unload() functions are *not* thread safe, - * so surround their calls with mutex locks. - * - * How to create, use, and shutdown an encoder: - * 1. Call bm_jpu_enc_get_bitstream_buffer_info(), and allocate a DMA buffer - * with the given size and alignment. This is the minimum required size. - * The buffer can be larger, but must not be smaller than the given size. - * 2. Fill an instance of BmJpuEncOpenParams with the values specific to the - * input data. Check the documentation of BmJpuEncOpenParams for details - * about its fields. It is recommended to set default values by calling - * bm_jpu_enc_set_default_open_params() and afterwards set any explicit valus. - * 3. Call bm_jpu_enc_open(), passing in a pointer to the filled BmJpuEncOpenParams - * instance, and the DMA buffer of the bitstream DMA buffer which was allocated in - * step 1. - * 4. Call bm_jpu_enc_get_initial_info(). The encoder's initial info contains the - * minimum number of framebuffers that must be allocated and registered, and the - * address alignment that must be used when allocating DMA memory for these - * framebuffers. - * 5. (Optional) Perform the necessary size and alignment calculations by calling - * bm_jpu_calc_framebuffer_sizes(). Pass in the width & height of the frames that - * shall be encoded. (The width & height do not have to be aligned; the function - * does this automatically.) - * 6. (Optional) allocate a DMA buffer for the input frames. Only one buffer is necessary. - * If the incoming data is already stored in DMA buffers, this step can be omitted, - * since the encoder can then read the data directly. - * 7. Create an instance of BmJpuRawFrame, set its values to zero (typically by using memset()). - * 8. Create an instance of BmJpuEncodedFrame. Set its values to zero (typically by using memset()). - * 9. Set the framebuffer pointer of the BmJpuRawFrame's instance from step 7 to refer to the - * input DMA buffer (either the one allocated in step 6, or the one containing the input data if - * it already comes in DMA memory). - * 10. Fill an instance of BmJpuEncParams with valid values. It is recommended to first set its - * values to zero by using memset() to set default values. It is essential to make sure the - * acquire_output_buffer() and finish_output_buffer() function pointers are set, as these are - * used for acquiring buffers to write encoded output data into. - * Alternatively, set write_output_data() if write-callback style output is preferred. If this - * function pointer is non-NULL, then acquire_output_buffer() and finish_output_buffer() are - * ignored. - * 11. If step 6 was performed, and therefore input data does *not* come in DMA memory, copy the - * pixels from the raw input frames into the DMA buffer allocated in step 6. Otherwise, if - * the raw input frames are already stored in DMA memory, this step can be omitted. - * 12. Call bm_jpu_enc_encode(). Pass the raw frame, the encoded frame, and the encoding param - * structures from steps 9, 10, and 12 to it. - * This function will encode data, and acquire an output buffer to write the encoded data into - * by using the acquire_output_buffer() function pointer set in step 10. Once it is done - * encoding, it will call the finish_output_buffer() function from step 10. Any handle created - * by acquire_output_buffer() will be copied over to the encoded data frame structure. When - * bm_jpu_enc_encode() exits, this handle can then be used to further process the output data. - * It is guaranteed that once acquire_output_buffer() was called, finish_output_buffer() will - * be called, even if an error occurred. - * The BM_JPU_ENC_OUTPUT_CODE_ENCODED_FRAME_AVAILABLE output code bit will always be set - * unless the function returned a code other than BM_JPU_ENC_RETURN_CODE_OK. - * If the BM_JPU_ENC_OUTPUT_CODE_CONTAINS_HEADER bit is set, then header data has been - * written in the output memory block allocated in step 8. It is placed right before the - * actual encoded frame data. bm_jpu_enc_encode() will pass over the combined size of the header - * and the encoded frame data to acquire_output_buffer() in this case, ensuring that the output - * buffers are big enough. - * If write-callback style output is used instead (= if the write_output_data() function pointer - * inside the encoding_params is set to a valid value), then this function haves as described - * above, except that it does not call acquire_output_buffer() or finish_output_buffer(). It - * still adds headers etc. but outputs these immediately by calling write_output_data(). - * 13. Repeat steps 11 to 14 until there are no more frames to encode or an error occurs. - * 14. After encoding is finished, close the encoder with bm_jpu_enc_close(). - * 15. Deallocate framebuffer memory blocks, the input DMA buffer block, the output memory block, - * and the bitstream buffer memory block. - * - * Note that the encoder does not use any kind of frame reordering. h.264 data uses the - * baseline profile. An input frame immediately results in an output frame (unless an error occured). - * There is no delay. - * - * The JPU's encoders supports all formats from BmJpuColorFormat. - */ - - -/* Opaque encoder structure. */ -typedef struct _BmJpuEncoder BmJpuEncoder; - - -/* Encoder return codes. With the exception of BM_JPU_ENC_RETURN_CODE_OK, these - * should be considered hard errors, and the encoder should be closed when they - * are returned. */ -typedef enum -{ - /* Operation finished successfully. */ - BM_JPU_ENC_RETURN_CODE_OK = 0, - /* General return code for when an error occurs. This is used as a catch-all - * for when the other error return codes do not match the error. */ - BM_JPU_ENC_RETURN_CODE_ERROR, - /* Input parameters were invalid. */ - BM_JPU_ENC_RETURN_CODE_INVALID_PARAMS, - /* JPU encoder handle is invalid. This is an internal error, and most likely - * a bug in the library. Please report such errors. */ - BM_JPU_ENC_RETURN_CODE_INVALID_HANDLE, - /* Framebuffer information is invalid. Typically happens when the BmJpuFramebuffer - * structures that get passed to bm_jpu_enc_register_framebuffers() contain - * invalid values. */ - BM_JPU_ENC_RETURN_CODE_INVALID_FRAMEBUFFER, - /* Registering framebuffers for encoding failed because not enough framebuffers - * were given to the bm_jpu_enc_register_framebuffers() function. */ - BM_JPU_ENC_RETURN_CODE_INSUFFICIENT_FRAMEBUFFERS, - /* A stride value (for example one of the stride values of a framebuffer) is invalid. */ - BM_JPU_ENC_RETURN_CODE_INVALID_STRIDE, - /* A function was called at an inappropriate time. */ - BM_JPU_ENC_RETURN_CODE_WRONG_CALL_SEQUENCE, - /* The operation timed out. */ - BM_JPU_ENC_RETURN_CODE_TIMEOUT, - /* write_output_data() in BmJpuEncParams returned 0. */ - BM_JPU_ENC_RETURN_CODE_WRITE_CALLBACK_FAILED, - /* Allocation memory failure */ - BM_JPU_ENC_RETURN_ALLOC_MEM_ERROR -} -BmJpuEncReturnCodes; - - -/* Encoder output codes. These can be bitwise OR combined, so check - * for their presence in the output_codes bitmask returned by - * bm_jpu_enc_encode() by using a bitwise AND. */ -typedef enum -{ - /* Input data was used. If this code is present, the input frame - * that was given to the bm_jpu_dec_encode() must not be given - * to a following bm_jpu_dec_encode() call; instead, a new frame - * should be loaded. If this code is not present, then the encoder - * didn't use it yet, so give it to the encoder again until this - * code is set or an error is returned. */ - BM_JPU_ENC_OUTPUT_CODE_INPUT_USED = (1UL << 0), - /* A fully encoded frame is now available. The encoded_frame argument - * passed to bm_jpu_enc_encode() contains information about this frame. */ - BM_JPU_ENC_OUTPUT_CODE_ENCODED_FRAME_AVAILABLE = (1UL << 1), - /* The data in the encoded frame also contains header information - * like SPS/PSS for h.264. Headers are always placed at the beginning - * of the encoded data, and this code is never present if the - * BM_JPU_ENC_OUTPUT_CODE_ENCODED_FRAME_AVAILABLE isn't set. */ - BM_JPU_ENC_OUTPUT_CODE_CONTAINS_HEADER = (1UL << 2) -} -BmJpuEncOutputCodes; - - -/* Structure used together with bm_jpu_enc_open() */ -typedef struct -{ - /* Width and height of the incoming frames, in pixels. These - * do not have to be aligned to any boundaries. */ - unsigned int frame_width; - unsigned int frame_height; - /* Color format to use for incoming frames. MJPEG actually uses - * all possible values. - * See the BmJpuColorFormat documentation for an explanation how - * the chroma_interleave value can affec the pixel format that is used. */ - BmJpuColorFormat color_format; - - /* Quality factor for JPEG encoding, between 1 (worst quality, best - * compression) and 100 (best quality, worst compression). Default - * value is 85. - * This quality factor is the one from the Independent JPEG Group's - * formula for generating a scale factor out of the quality factor. - * This means that this quality factor is exactly the same as the - * one used by libjpeg. */ - unsigned int quality_factor; - - /* If this is 1, then Cb and Cr are interleaved in one shared chroma - * plane, otherwise they are separated in their own planes. - * See the BmJpuColorFormat documentation for the consequences of this. */ - int chroma_interleave; - - int packed_format; - int device_index; - - int rotationEnable; - int mirrorEnable; - int mirrorDirection; - int rotationAngle; -} -BmJpuEncOpenParams; - - -/* Initial encoding information, produced by the encoder. This structure is - * essential to actually begin encoding, since it contains all of the - * necessary information to create and register enough framebuffers. */ -typedef struct -{ - /* Caller must register at least this many framebuffers - * with the encoder. */ - unsigned int min_num_required_framebuffers; - - /* Physical framebuffer addresses must be aligned to this value. */ - unsigned int framebuffer_alignment; -} -BmJpuEncInitialInfo; - - -/* Function pointer used during encoding for acquiring output buffers. - * See bm_jpu_enc_encode() for details about the encoding process. - * context is the value of output_buffer_context specified in - * BmJpuEncParams. size is the size of the block to acquire, in bytes. - * acquired_handle is an output value; the function can set this to a - * handle that corresponds to the acquired buffer. For example, in - * libav/FFmpeg, this handle could be a pointer to an AVBuffer. In - * GStreamer, this could be a pointer to a GstBuffer. The value of - * *acquired_handle will later be copied to the acquired_handle value - * of BmJpuEncodedFrame. - * The return value is a pointer to a memory-mapped region of the - * output buffer, or NULL if acquiring failed. - * If the write_output_data function pointer in the encoder params - * is non-NULL, this function is not called. - * This function is only used by bm_jpu_enc_encode(). */ -typedef void* (*BmJpuEncAcquireOutputBuffer)(void *context, size_t size, void **acquired_handle); - -/* Function pointer used during encoding for notifying that the encoder - * is done with the output buffer. This is *not* a function for freeing - * allocated buffers; instead, it makes it possible to release, unmap etc. - * context is the value of output_buffer_context specified in - * BmJpuEncParams. acquired_handle equals the value of *acquired_handle in - * BmJpuEncAcquireOutputBuffer. - * If the write_output_data function pointer in the encoder params - * is non-NULL, this function is not called. */ -typedef void (*BmJpuEncFinishOutputBuffer)(void *context, void *acquired_handle); - -/* Function pointer used during encoding for passing the output encoded data - * to the user. If this function is not NULL, then BmJpuEncFinishOutputBuffer - * and BmJpuEncAcquireOutputBuffer function are not called. Instead, this - * data write function is called whenever the library wants to write output. - * encoded_frame contains valid pts, dts, and context data which was copied - * over from the corresponding raw frame. - * Returns 1 if writing succeeded, 0 otherwise. - * */ -typedef int (*BmJpuWriteOutputData)(void *context, uint8_t const *data, uint32_t size, BmJpuEncodedFrame *encoded_frame); - - -typedef struct -{ - /* Functions for acquiring and finishing output buffers. See the - * typedef documentations above for details about how these - * functions should behave, and the bm_jpu_enc_encode() - * documentation for how they are used. - * Note that these functions are only used if write_output_data - * is set to NULL. - */ - BmJpuEncAcquireOutputBuffer acquire_output_buffer; - BmJpuEncFinishOutputBuffer finish_output_buffer; - - /* Function for directly passing the output data to the user - * without copying it first. - * Using this function will inhibit calls to acquire_output_buffer - * and finish_output_buffer. See the typedef documentations - * above for details about how this function should behave, and - * the bm_jpu_enc_encode() documentation for how they are used. - * Note that if this function is NULL then acquire_output_buffer - * and finish_output_buffer must be set. - */ - BmJpuWriteOutputData write_output_data; - int bs_in_device; - - /* User supplied value that will be passed to the functions */ - void *output_buffer_context; -} -BmJpuEncParams; - - -/* Returns a human-readable description of the error code. - * Useful for logging. */ -DECL_EXPORT char const * bm_jpu_enc_error_string(BmJpuEncReturnCodes code); - -/* These two functions load/unload the encoder. Due to an internal reference - * counter, it is safe to call these functions more than once. However, the - * number of unload() calls must match the number of load() calls. - * - * The encoder must be loaded before doing anything else with it. - * Similarly, the encoder must not be unloaded before all encoder activities - * have been finished. This includes opening/decoding encoder instances. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_load(int device_index); -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_unload(int device_index); - -/* Called before bm_jpu_enc_open(), it returns the alignment and size for the - * physical memory block necessary for the encoder's bitstream buffer. The user - * must allocate a DMA buffer of at least this size, and its physical address - * must be aligned according to the alignment value. */ -DECL_EXPORT void bm_jpu_enc_get_bitstream_buffer_info(size_t *size, unsigned int *alignment); - -/* Set the fields in "open_params" to valid defaults - * Useful if the caller wants to modify only a few fields (or none at all) */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_set_default_open_params(BmJpuEncOpenParams *open_params); - -/* Opens a new encoder instance. "open_params" and "bitstream_buffer" must not be NULL. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_open(BmJpuEncoder **encoder, BmJpuEncOpenParams *open_params, - bm_device_mem_t *bitstream_buffer); - -/* Closes a encoder instance. Trying to close the same instance multiple times results in undefined behavior. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_close(BmJpuEncoder *encoder); - -/* Retrieves initial information available after calling bm_jpu_enc_open(). */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_get_initial_info(BmJpuEncoder *encoder, BmJpuEncInitialInfo *info); - -/* Encodes a given raw input frame with the given encoding parameters. encoded_frame is filled with information - * about the resulting encoded output frame. The encoded frame data itself is stored in a buffer that is - * allocated by user-supplied functions (which are set as the acquire_output_buffer and finish_output_buffer - * function pointers in the encoding_params). - * - * Encoding internally works as follows: first, the actual encoding operation is performed by the JPU. Next, - * information about the encoded data is queried, particularly its size in bytes. Once this size is known, - * acquire_output_buffer() from encoding_params is called. This function must acquire a buffer that can be - * used to store the encoded data. This buffer must be at least as large as the size of the encoded data - * (which is given to acquire_output_buffer() as an argument). The return value of acquire_output_buffer() - * is a pointer to the (potentially memory-mapped) region of the buffer. The encoded frame data is then - * copied to this buffer, and finish_output_buffer() is called. This function can be used to inform the - * caller that the encoder is done with this buffer; it now contains encoded data, and will not be modified - * further. encoded_frame is filled with information about the encoded frame data. - * If acquiring the buffer fails, acquire_output_buffer() returns a NULL pointer. - * NOTE: again, finish_output_buffer() is NOT a function to free the buffer; it just signals that the encoder - * won't touch the memory inside the buffer anymore. - * - * acquire_output_buffer() can also pass on a handle to the acquired buffer (for example, in FFmpeg/libav, - * this handle would be a pointer to an AVBuffer). The handle is called the "acquired_handle". - * acquire_output_buffer() can return such a handle. This handle is copied to the encoded_frame struct's - * acquired_handle field. This way, a more intuitive workflow can be used; if for example, acquire_output_buffer() - * returns an AVBuffer pointer as the handle, this AVBuffer pointer ends up in the encoded_frame. Afterwards, - * encoded_frame contains all the necessary information to process the encoded frame data. - * - * It is guaranteed that once the buffer was acquired, finish_output_buffer() will always be called, even if - * an error occurs. This prevents potential memory/resource leaks if the finish_output_buffer() call somehow - * unlocks or releases the buffer for further processing. The acquired_handle is also copied to encoded_frame - * even if an error occurs, unless the error occurred before the acquire_output_buffer() call, in which case - * the encoded_frame's acquired_handle field will be set to NULL. - * - * The aforementioned sequences involve a copy (encoded data is copied into the acquired buffer). As an - * alternative, a write-callback-style mode of operation can be used. This alternative mode is active if - * the write_output_data function pointer in encoding_params is not NULL. In this mode, neither - * acquire_output_buffer() nor finish_output_buffer() are called. Instead, whenever the encoder needs to - * write out data, it calls write_output_data(). - * - * The other fields in encoding_params specify additional encoding parameters, which can vary from frame to - * frame. - * output_code is a bit mask containing information about the encoding result. The value is a bitwise OR - * combination of the codes in BmJpuEncOutputCodes. - * - * None of the arguments may be NULL. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_encode(BmJpuEncoder *encoder, - BmJpuRawFrame const *raw_frame, - BmJpuEncodedFrame *encoded_frame, - BmJpuEncParams *encoding_params, - unsigned int *output_code); - -DECL_EXPORT int bm_jpu_get_dump(void); - -#ifdef __cplusplus -} -#endif - - -#endif diff --git a/bmvid/jpeg/binary/pcie/include/bmjpuapi_jpeg.h b/bmvid/jpeg/binary/pcie/include/bmjpuapi_jpeg.h deleted file mode 100644 index d1dfa6f..0000000 --- a/bmvid/jpeg/binary/pcie/include/bmjpuapi_jpeg.h +++ /dev/null @@ -1,281 +0,0 @@ -/* Simplified API for JPEG en- and decoding with the BitMain SoC - * Copyright (C) 2018 Solan Shang - * Copyright (C) 2014 Carlos Rafael Giani - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 - * USA - */ - - -/* This is a convenience interface for simple en- and decoding of JPEG data. - * For merely en/decoding JPEGs, having to set up a JPU en/decoder involves - * a considerable amount of boilerplate code. This interface takes care of - * these details, and presents a much simpler interface focused on this one - * task: to en/decode JPEGs. */ - -#ifndef BMJPUAPI_JPEG_H -#define BMJPUAPI_JPEG_H - -#include "bmjpuapi.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#if !defined DECL_EXPORT -#ifdef _WIN32 - #define DECL_EXPORT __declspec(dllexport) -#else - #define DECL_EXPORT -#endif -#endif - -typedef struct -{ - /* Width and height of JPU framebuffers are aligned to internal boundaries. - * The frame consists of the actual image pixels and extra padding pixels. - * aligned_frame_width / aligned_frame_height specify the full width/height - * including the padding pixels, and actual_frame_width / actual_frame_height - * specify the width/height without padding pixels. */ - unsigned int aligned_frame_width, aligned_frame_height; - unsigned int actual_frame_width, actual_frame_height; - - /* Stride and size of the Y, Cr, and Cb planes. The Cr and Cb planes always - * have the same stride and size. */ - unsigned int y_stride, cbcr_stride; - unsigned int y_size, cbcr_size; - - /* Offset from the start of a framebuffer's memory, in bytes. Note that the - * Cb and Cr offset values are *not* the same, unlike the stride and size ones. */ - unsigned int y_offset, cb_offset, cr_offset; - - /* Framebuffer containing the pixels of the decoded frame. */ - BmJpuFramebuffer *framebuffer; - - /* Color format of the decoded frame. */ - BmJpuColorFormat color_format; - - int chroma_interleave; - - int framebuffer_recycle; - size_t framebuffer_size; -} -BmJpuJPEGDecInfo; - - -typedef struct -{ - BmJpuDecoder *decoder; - - bm_device_mem_t *bitstream_buffer; - size_t bitstream_buffer_size; - unsigned int bitstream_buffer_alignment; - - BmJpuDecInitialInfo initial_info; - - BmJpuFramebuffer *framebuffers; - bm_device_mem_t *fb_dmabuffers; - unsigned int num_framebuffers; - unsigned int num_extra_framebuffers; // TODO - BmJpuFramebufferSizes calculated_sizes; - - BmJpuRawFrame raw_frame; - int device_index; - - BmJpuFramebuffer *cur_framebuffer; - bm_device_mem_t *cur_dma_buffer; - void *opaque; - - int rotationEnable; - int mirrorEnable; - int mirrorDirection; - int rotationAngle; - - int framebuffer_recycle; - size_t framebuffer_size; -} -BmJpuJPEGDecoder; - -/* Opens a new JPU JPEG decoder instance. - * - * Internally, this function calls bm_jpu_dec_load(). - * - * If dma_buffer_allocator is NULL, the default decoder allocator is used. - * - * num_extra_framebuffers is used for instructing this function to allocate this many - * more framebuffers. Usually this value is zero, but in certain cases where many - * JPEGs need to be decoded quickly, or the DMA buffers of decoded frames need to - * be kept around elsewhere, having more framebuffers available can be helpful. - * Note though that more framebuffers also means more DMA memory consumption. - * If unsure, keep this to zero. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_jpeg_dec_open(BmJpuJPEGDecoder **jpeg_decoder, - BmJpuDecOpenParams *open_params, - unsigned int num_extra_framebuffers); - -/* Closes a JPEG decoder instance. Trying to close the same instance multiple times results in undefined behavior. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_jpeg_dec_close(BmJpuJPEGDecoder *jpeg_decoder); - -/* Determines if the JPU can decode a frame at this moment. - * - * The return value depends on how many framebuffers in the decoder are free. - * If enough framebuffers are free, this returns 1, otherwise 0. - * - * For simple decoding schemes where one frame is decoded, then displayed or - * consumed in any other way, and then returned to the decoder by calling - * bm_jpu_jpeg_dec_frame_finished(), this function does not have to be used, - * since in this case, there will always be enough free framebuffers. - * If however the consumption of the decoded frame occurs in a different thread - * than the decoding, it makes sense to use this function in order to wait - * until enough framebfufers are free (typically implemented by using mutexes - * and thread condition variables). Also, in this case, this function is more - * likely to return 1 the more extra framebuffers were requested in the - * bm_jpu_jpeg_dec_open() call. - */ -DECL_EXPORT int bm_jpu_jpeg_dec_can_decode(BmJpuJPEGDecoder *jpeg_decoder); - -/* Decodes a JPEG frame. - * - * jpeg_data must be set to the memory block that contains the encoded JPEG data, - * and jpeg_data_size must be set to the size of that block, in bytes. After this - * call, use the bm_jpu_jpeg_dec_get_info() function to retrieve information about - * the decoded frame. - * - * The JPU decoder only consumes baseline JPEG data. Progressive encoding is not supported. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_jpeg_dec_decode(BmJpuJPEGDecoder *jpeg_decoder, - uint8_t const *jpeg_data, - size_t const jpeg_data_size); - -/* Retrieves information about the decoded JPEG frame. - * - * The BmJpuJPEGDecInfo's fields will be set to those of the decoded frame. In particular, - * info's framebuffer pointer will be set to point to the framebuffer containing the - * decoded frame. Be sure to pass this pointer to bm_jpu_jpeg_dec_frame_finished() once - * the frame's pixels are no longer needed. - * - * Note that the return value of the previous bm_jpu_jpeg_dec_decode() call can be - * BM_JPU_DEC_RETURN_CODE_OK even though the framebuffer pointer retrieved here is NULL. - * This is the case when not enough free framebuffers are present. It is recommended to - * check the return value of the bm_jpu_jpeg_dec_can_decode() function before calling - * bm_jpu_jpeg_dec_decode(), unless the decoding sequence is simple (like in the example - * mentioned in the bm_jpu_jpeg_dec_can_decode() description). - * - * This function must not be called before bm_jpu_jpeg_dec_decode() , since otherwise, - * there is no information available (it is read in the decoding step). */ -DECL_EXPORT void bm_jpu_jpeg_dec_get_info(BmJpuJPEGDecoder *jpeg_decoder, BmJpuJPEGDecInfo *info); - -/* Inform the JPEG decoder that a previously decoded frame is no longer being used. - * - * This function must always be called once the user is done with a frame, otherwise - * the JPU cannot reclaim this ramebuffer, and will eventually run out of internal - * framebuffers to decode into. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_jpeg_dec_frame_finished(BmJpuJPEGDecoder *jpeg_decoder, - BmJpuFramebuffer *framebuffer); - -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_jpeg_dec_flush(BmJpuJPEGDecoder *jpeg_decoder); - - - -typedef struct -{ - /* Frame width and height of the input frame. These are the actual sizes; - * they will be aligned internally if necessary. These sizes must not be - * zero. */ - unsigned int frame_width, frame_height; - - /* Quality factor for JPEG encoding. 1 = best compression, 100 = best quality. - * This is the exact same quality factor as used by libjpeg. */ - unsigned int quality_factor; - - /* Color format of the input frame. */ - BmJpuColorFormat color_format; - - /* Functions for acquiring and finishing output buffers. See the - * typedef documentations in bmjpuapi.h for details about how - * these functions should behave. */ - BmJpuEncAcquireOutputBuffer acquire_output_buffer; - BmJpuEncFinishOutputBuffer finish_output_buffer; - - /* Function for directly passing the output data to the user - * without copying it first. - * Using this function will inhibit calls to acquire_output_buffer - * and finish_output_buffer. */ - BmJpuWriteOutputData write_output_data; - - /* User supplied value that will be passed to the functions: - * acquire_output_buffer, finish_output_buffer, write_output_data */ - void *output_buffer_context; - - int packed_format; - int chroma_interleave; - - int rotationEnable; - int mirrorEnable; - int mirrorDirection; - int rotationAngle; - int bs_in_device; -} -BmJpuJPEGEncParams; - - -typedef struct _BmJpuJPEGEncoder BmJpuJPEGEncoder; - -/* Opens a new JPU JPEG encoder instance. - * - * Internally, this function calls bm_jpu_enc_load(). - * - * If dma_buffer_allocator is NULL, the default encoder allocator is used. - */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_jpeg_enc_open(BmJpuJPEGEncoder **jpeg_encoder, - int bs_buffer_size, - int device_index); - -/* Closes a JPEG encoder instance. Trying to close the same instance multiple times results in undefined behavior. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_jpeg_enc_close(BmJpuJPEGEncoder *jpeg_encoder); - -/* Encodes a raw input frame. - * - * params must be filled with valid values; frame width and height must not be zero. - * framebuffer contains the raw input pixels to encode. Its stride and offset values - * must be valid, and its dma_buffer pointer must point to a DMA buffer that contains - * the pixel data. - * - * During encoding, the encoder will call params->acquire_output_buffer() to acquire - * an output buffer and put encoded JPEG data into. Once encoding is done, the - * params->finish_output_buffer() function is called. This is *not* to be confused with - * a memory deallocation function; it is instead typically used to notify the caller - * that the encoder won't touch the acquired buffer's contents anymore. It is guaranteed - * that finish_output_buffer() is called if acquire_output_buffer() was called earlier. - * - * If acquired_handle is non-NULL, then the poiner it refers to will be set to the handle - * produced by acquire_output_buffer(), even if bm_jpu_jpeg_enc_encode() exits with an - * error (unless said error occurred *before* the acquire_output_buffer() call, in which - * case *acquired_handle will be set to NULL). If output_buffer_size is non-NULL, the - * size value it points to will be set to the number of bytes of the encoded JPEG data. - * - * The JPU encoder only produces baseline JPEG data. Progressive encoding is not supported. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_jpeg_enc_encode(BmJpuJPEGEncoder *jpeg_encoder, - BmJpuFramebuffer const *framebuffer, - BmJpuJPEGEncParams const *params, - void **acquired_handle, - size_t *output_buffer_size); - -DECL_EXPORT int bm_jpu_jpeg_get_dump(void); - -#ifdef __cplusplus -} -#endif - - -#endif diff --git a/bmvid/jpeg/binary/pcie/include/jpu_lib.h b/bmvid/jpeg/binary/pcie/include/jpu_lib.h index ff001f1..b8c5a53 100644 --- a/bmvid/jpeg/binary/pcie/include/jpu_lib.h +++ b/bmvid/jpeg/binary/pcie/include/jpu_lib.h @@ -9,6 +9,9 @@ #define DC_TABLE_INDEX1 2 #define AC_TABLE_INDEX1 3 +#ifndef BOOL +typedef int BOOL; +#endif //------------------------------------------------------------------------------ // common struct and definition @@ -60,7 +63,8 @@ typedef enum { JPG_RET_INVALID_STRIDE, JPG_RET_WRONG_CALL_SEQUENCE, JPG_RET_CALLED_BEFORE, - JPG_RET_NOT_INITIALIZED + JPG_RET_NOT_INITIALIZED, + JPG_RET_BS_BUFFER_FULL } JpgRet; typedef enum { @@ -284,10 +288,12 @@ DECL_EXPORT int jpu_DecOpen(DecHandle *, DecOpenParam *); DECL_EXPORT int jpu_DecClose(DecHandle); DECL_EXPORT int jpu_DecGetInitialInfo(DecHandle handle, DecInitialInfo * info); +DECL_EXPORT int jpu_DecSetResolutionInfo(DecHandle handle, int width, int height); DECL_EXPORT int jpu_DecRegisterFrameBuffer(DecHandle handle, FrameBuffer * bufArray, int num, int stride, void* par0); DECL_EXPORT int jpu_DecUpdateBitstreamBuffer(DecHandle handle, uint32_t size); +DECL_EXPORT int jpu_DecSetRdPtrEx(DecHandle handle, PhysicalAddress addr, BOOL updateWrPtr); DECL_EXPORT int jpu_DecSetBsPtr(DecHandle handle, uint8_t *data, int data_size); DECL_EXPORT int jpu_DecStartOneFrame(DecHandle handle, DecParam * param); @@ -297,6 +303,7 @@ DECL_EXPORT int jpu_DecGiveCommand(DecHandle handle, CodecCommand cmd, void *par DECL_EXPORT int jpu_DecWaitForInt(DecHandle handle, int timeout_in_ms, int timeout_counts); DECL_EXPORT int jpu_GetDump(); +DECL_EXPORT int jpu_HWReset(); DECL_EXPORT int vpp_Init(int32_t device_index); #endif /* __BM_JPU_LIB_H__ */ diff --git a/bmvid/jpeg/binary/pcie/lib/libbmjpuapi.so.0 b/bmvid/jpeg/binary/pcie/lib/libbmjpuapi.so.0 index 31cf593..254e318 120000 --- a/bmvid/jpeg/binary/pcie/lib/libbmjpuapi.so.0 +++ b/bmvid/jpeg/binary/pcie/lib/libbmjpuapi.so.0 @@ -1 +1 @@ -libbmjpuapi.so.0.7.1 \ No newline at end of file +libbmjpuapi.so.0.11.0 \ No newline at end of file diff --git a/bmvid/jpeg/binary/pcie/lib/libbmjpuapi.so.0.10.0 b/bmvid/jpeg/binary/pcie/lib/libbmjpuapi.so.0.10.0 new file mode 100755 index 0000000..9e3a83b Binary files /dev/null and b/bmvid/jpeg/binary/pcie/lib/libbmjpuapi.so.0.10.0 differ diff --git a/bmvid/jpeg/binary/pcie/lib/libbmjpuapi.so.0.11.0 b/bmvid/jpeg/binary/pcie/lib/libbmjpuapi.so.0.11.0 new file mode 100755 index 0000000..9fe3167 Binary files /dev/null and b/bmvid/jpeg/binary/pcie/lib/libbmjpuapi.so.0.11.0 differ diff --git a/bmvid/jpeg/binary/pcie/lib/libbmjpuapi.so.0.7.1 b/bmvid/jpeg/binary/pcie/lib/libbmjpuapi.so.0.7.1 deleted file mode 100755 index d4971a8..0000000 Binary files a/bmvid/jpeg/binary/pcie/lib/libbmjpuapi.so.0.7.1 and /dev/null differ diff --git a/bmvid/jpeg/binary/pcie/lib/libbmjpulite.a b/bmvid/jpeg/binary/pcie/lib/libbmjpulite.a new file mode 100755 index 0000000..f65c523 Binary files /dev/null and b/bmvid/jpeg/binary/pcie/lib/libbmjpulite.a differ diff --git a/bmvid/jpeg/binary/pcie/lib/libbmjpulite.so.0 b/bmvid/jpeg/binary/pcie/lib/libbmjpulite.so.0 index 4fd0f85..7e37060 120000 --- a/bmvid/jpeg/binary/pcie/lib/libbmjpulite.so.0 +++ b/bmvid/jpeg/binary/pcie/lib/libbmjpulite.so.0 @@ -1 +1 @@ -libbmjpulite.so.0.7.1 \ No newline at end of file +libbmjpulite.so.0.11.0 \ No newline at end of file diff --git a/bmvid/jpeg/binary/pcie/lib/libbmjpulite.so.0.10.0 b/bmvid/jpeg/binary/pcie/lib/libbmjpulite.so.0.10.0 new file mode 100755 index 0000000..4e71385 Binary files /dev/null and b/bmvid/jpeg/binary/pcie/lib/libbmjpulite.so.0.10.0 differ diff --git a/bmvid/jpeg/binary/pcie/lib/libbmjpulite.so.0.11.0 b/bmvid/jpeg/binary/pcie/lib/libbmjpulite.so.0.11.0 new file mode 100755 index 0000000..4e71385 Binary files /dev/null and b/bmvid/jpeg/binary/pcie/lib/libbmjpulite.so.0.11.0 differ diff --git a/bmvid/jpeg/binary/pcie/lib/libbmjpulite.so.0.7.1 b/bmvid/jpeg/binary/pcie/lib/libbmjpulite.so.0.7.1 deleted file mode 100755 index 9eeb108..0000000 Binary files a/bmvid/jpeg/binary/pcie/lib/libbmjpulite.so.0.7.1 and /dev/null differ diff --git a/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegdec b/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegdec index 119cf15..350af9a 100755 Binary files a/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegdec and b/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegdec differ diff --git a/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegdec_seq b/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegdec_seq new file mode 100755 index 0000000..ea057e6 Binary files /dev/null and b/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegdec_seq differ diff --git a/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegenc b/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegenc index b796000..2d63380 100755 Binary files a/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegenc and b/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegenc differ diff --git a/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegenc_seq b/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegenc_seq new file mode 100755 index 0000000..65462ed Binary files /dev/null and b/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegenc_seq differ diff --git a/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegmulti b/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegmulti index 94bfa42..9da91fa 100755 Binary files a/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegmulti and b/bmvid/jpeg/binary/pcie_arm64/bin/bmjpegmulti differ diff --git a/bmvid/jpeg/binary/pcie_arm64/include/bmjpuapi.h b/bmvid/jpeg/binary/pcie_arm64/include/bmjpuapi.h deleted file mode 100644 index 73c1bc3..0000000 --- a/bmvid/jpeg/binary/pcie_arm64/include/bmjpuapi.h +++ /dev/null @@ -1,1070 +0,0 @@ -#ifndef BMJPUAPI_H -#define BMJPUAPI_H - -#include -#include -#include -#include "bmlib_runtime.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -#if !defined DECL_EXPORT -#ifdef _WIN32 - #define DECL_EXPORT __declspec(dllexport) -#else - #define DECL_EXPORT -#endif -#endif - - -/* This library provides a high-level interface for controlling the BitMain JPU en/decoder. - * - * Note that the functions are _not_ thread safe. If they may be called from - * different threads, you must make sure they are surrounded by a mutex lock. - * It is recommended to use one global mutex for the bm_jpu_*_load()/unload() - * functions, and another de/encoder instance specific mutex for all of the other - * calls. */ - - - - -/**************************************************/ -/******* ALLOCATOR STRUCTURES AND FUNCTIONS *******/ -/**************************************************/ - - -/* Format and for printf-compatible format-strings - * example use: printf("physical address: %" BM_JPU_PHYS_ADDR_FORMAT, phys_addr */ -#define BM_JPU_PHYS_ADDR_FORMAT "#lx" -/* Typedef for physical addresses */ -typedef unsigned long long bm_jpu_phys_addr_t; - -/* BmJpuAllocationFlags: flags for the BmJpuDMABufferAllocator's allocate vfunc */ -typedef enum -{ - BM_JPU_ALLOCATION_FLAG_CACHED = 0, - BM_JPU_ALLOCATION_FLAG_WRITECOMBINE = 1, - BM_JPU_ALLOCATION_FLAG_UNCACHED = 2 -}BmJpuAllocationFlags; - -typedef enum -{ - BM_ION_FLAG_HEAP_VPP = 0, - BM_ION_FLAG_HEAP_NPU = 1, - BM_ION_FLAG_HEAP_VPU = 2 -}BmJpuIonHeapFlags; - -#define BM_JPU_ALLOCATION_FLAG_DEFAULT ((BM_ION_FLAG_HEAP_VPP << 4) | BM_JPU_ALLOCATION_FLAG_CACHED) -#define BM_JPU_ALLOCATION_FLAG_VPP_CACHED BM_JPU_ALLOCATION_FLAG_DEFAULT -#define BM_JPU_ALLOCATION_FLAG_JPU_CACHED ((BM_ION_FLAG_HEAP_VPU << 4) | BM_JPU_ALLOCATION_FLAG_CACHED) -#define BM_JPU_ALLOCATION_FLAG_NPU_CACHED ((BM_ION_FLAG_HEAP_NPU << 4) | BM_JPU_ALLOCATION_FLAG_CACHED) -#define BM_JPU_ALLOCATION_FLAG_VPP_WRITECOMBINE ((BM_ION_FLAG_HEAP_VPP << 4) | BM_JPU_ALLOCATION_FLAG_WRITECOMBINE) -/* BmJpuMappingFlags: flags for the BmJpuDMABufferAllocator's map vfuncs - * These flags can be bitwise-OR combined, although READ and WRITE cannot - * both be set */ -typedef enum -{ - /* Map memory for CPU write access */ - BM_JPU_MAPPING_FLAG_WRITE = (1UL << 0), - /* Map memory for CPU read access */ - BM_JPU_MAPPING_FLAG_READ = (1UL << 1) - /* XXX: When adding extra flags here, follow the pattern: BM_JPU_MAPPING_FLAG_ = (1UL << ) */ -} -BmJpuMappingFlags; - - - -/* Heap allocation function for virtual memory blocks internally allocated by bmjpuapi. - * These have nothing to do with the DMA buffer allocation interface defined above. - * By default, malloc/free are used. */ -typedef void* (*BmJpuHeapAllocFunc)(size_t const size, void *context, char const *file, int const line, char const *fn); -typedef void (*BmJpuHeapFreeFunc)(void *memblock, size_t const size, void *context, char const *file, int const line, char const *fn); - -/* This function allows for setting custom heap allocators, which are used to create internal heap blocks. - * The heap allocator referred to by "heap_alloc_fn" must return NULL if allocation fails. - * "context" is a user-defined value, which is passed on unchanged to the allocator functions. - * Calling this function with either "heap_alloc_fn" or "heap_free_fn" set to NULL resets the internal - * pointers to use malloc and free (the default allocators). */ -DECL_EXPORT void bm_jpu_set_heap_allocator_functions(BmJpuHeapAllocFunc heap_alloc_fn, BmJpuHeapFreeFunc heap_free_fn, void *context); - - - - -/***********************/ -/******* LOGGING *******/ -/***********************/ - - -/* Log levels. */ -typedef enum -{ - BM_JPU_LOG_LEVEL_ERROR = 0, - BM_JPU_LOG_LEVEL_WARNING = 1, - BM_JPU_LOG_LEVEL_INFO = 2, - BM_JPU_LOG_LEVEL_DEBUG = 3, - BM_JPU_LOG_LEVEL_LOG = 4, - BM_JPU_LOG_LEVEL_TRACE = 5 -} -BmJpuLogLevel; - -/* Function pointer type for logging functions. - * - * This function is invoked by BM_JPU_LOG() macro calls. This macro also passes the name - * of the source file, the line in that file, and the function name where the logging occurs - * to the logging function (over the file, line, and fn arguments, respectively). - * Together with the log level, custom logging functions can output this metadata, or use - * it for log filtering etc.*/ -typedef void (*BmJpuLoggingFunc)(BmJpuLogLevel level, char const *file, int const line, char const *fn, const char *format, ...); - -/* Defines the threshold for logging. Logs with lower priority are discarded. - * By default, the threshold is set to BM_JPU_LOG_LEVEL_INFO. */ -DECL_EXPORT void bm_jpu_set_logging_threshold(BmJpuLogLevel threshold); - -/* Defines a custom logging function. - * If logging_fn is NULL, logging is disabled. This is the default value. */ -DECL_EXPORT void bm_jpu_set_logging_function(BmJpuLoggingFunc logging_fn); - - - - -/******************************************************/ -/******* MISCELLANEOUS STRUCTURES AND FUNCTIONS *******/ -/******************************************************/ -typedef enum -{ - /* planar 4:2:0; if the chroma_interleave parameter is 1, the corresponding format is NV12, otherwise it is I420 */ - BM_JPU_COLOR_FORMAT_YUV420 = 0, - /* planar 4:2:2; if the chroma_interleave parameter is 1, the corresponding format is NV16 */ - BM_JPU_COLOR_FORMAT_YUV422_HORIZONTAL = 1, - /* 4:2:2 vertical, actually 2:2:4 (according to the JPU docs); no corresponding format known for the chroma_interleave=1 case */ - /* NOTE: this format is rarely used, and has only been seen in a few JPEG files */ - BM_JPU_COLOR_FORMAT_YUV422_VERTICAL = 2, - /* planar 4:4:4; if the chroma_interleave parameter is 1, the corresponding format is NV24 */ - BM_JPU_COLOR_FORMAT_YUV444 = 3, - /* 8-bit greayscale */ - BM_JPU_COLOR_FORMAT_YUV400 = 4, - /* RGBP */ - BM_JPU_COLOR_FORMAT_RGB = 5 -} -BmJpuColorFormat; - - -/* Framebuffers are frame containers, and are used both for en- and decoding. */ -typedef struct -{ - /* Stride of the Y and of the Cb&Cr components. - * Specified in bytes. */ - unsigned int y_stride; - unsigned int cbcr_stride; - - /* DMA buffer which contains the pixels. */ - bm_device_mem_t *dma_buffer; - - /* These define the starting offsets of each component - * relative to the start of the buffer. Specified in bytes. - */ - size_t y_offset; - size_t cb_offset; - size_t cr_offset; - - /* User-defined pointer. The library does not touch this value. - * Not to be confused with the context fields of BmJpuEncodedFrame - * and BmJpuRawFrame. - * This can be used for example to identify which framebuffer out of - * the initially allocated pool was used by the JPU to contain a frame. - */ - void *context; - - /* Set to 1 if the framebuffer was already marked as displayed. This is for - * internal use only. Not to be read or written from the outside. */ - int already_marked; - - /* Internal, implementation-defined data. Do not modify. */ - void *internal; -} -BmJpuFramebuffer; - - -/* Structure containing details about encoded frames. */ -typedef struct -{ - /* When decoding, data must point to the memory block which contains - * encoded frame data that gets consumed by the JPU. Not used by - * the encoder. */ - uint8_t *data; - - /* Size of the encoded data, in bytes. When decoding, this is set by - * the user, and is the size of the encoded data that is pointed to - * by data. When encoding, the encoder sets this to the size of the - * acquired output block, in bytes (exactly the same value as the - * acquire_output_buffer's size argument). */ - size_t data_size; - - /* Handle produced by the user-defined acquire_output_buffer function - * during encoding. Not used by the decoder. */ - void *acquired_handle; - - /* User-defined pointer. The library does not touch this value. - * This pointer and the one from the corresponding raw frame will have - * the same value. The library will pass then through. - * It can be used to identify which raw frame is associated with this - * encoded frame for example. */ - void *context; - - /* User-defined timestamps. These are here for convenience. In many - * cases, the context one wants to associate with raw/encoded frames - * is a PTS-DTS pair. If only the context pointer were available, users - * would have to create a separate data structure containing PTS & DTS - * values for each context. Since this use case is common, these two - * fields are added to the frame structure. Just like the context - * pointer, the library just passes them through to the associated - * raw frame, and does not actually touch their values. It is also - * perfectly OK to not use them, and just use the context pointer - * instead, or vice versa. */ - uint64_t pts, dts; -} -BmJpuEncodedFrame; - - -/* Structure containing details about raw, uncompressed frames. */ -typedef struct -{ - /* When decoding: pointer to the framebuffer containing the decoded raw frame. - * When encoding: pointer to the framebuffer containing the raw frame to encode. */ - BmJpuFramebuffer *framebuffer; - - /* User-defined pointer. The library does not touch this value. - * This pointer and the one from the corresponding encoded frame will have - * the same value. The library will pass then through. - * It can be used to identify which raw frame is associated with this - * encoded frame for example. */ - void *context; - - /* User-defined timestamps. These are here for convenience. In many - * cases, the context one wants to associate with raw/encoded frames - * is a PTS-DTS pair. If only the context pointer were available, users - * would have to create a separate data structure containing PTS & DTS - * values for each context. Since this use case is common, these two - * fields are added to the frame structure. Just like the context - * pointer, the library just passes them through to the associated - * encoded frame, and does not actually touch their values. It is also - * perfectly OK to not use them, and just use the context pointer - * instead, or vice versa. */ - uint64_t pts, dts; -} -BmJpuRawFrame; - - -/* Structure used together with bm_jpu_calc_framebuffer_sizes() */ -typedef struct -{ - /* Frame width and height, aligned to the 16-pixel boundary required by the JPU. */ - unsigned int aligned_frame_width, aligned_frame_height; - - /* Stride sizes, in bytes, with alignment applied. The Cb and Cr planes always - * use the same stride, so they share the same value. */ - unsigned int y_stride, cbcr_stride; - - /* Required DMA memory size for the Y,Cb,Cr planes in bytes. - * The Cb and Cr planes always are of the same size, so they share the same value. */ - unsigned int y_size, cbcr_size; - - /* Total required size of a framebuffer's DMA buffer, in bytes. This value includes - * the sizes of all planes, and extra bytes for alignment and padding. - * This value must be used when allocating DMA buffers for decoder framebuffers. */ - unsigned int total_size; - - /* This corresponds to the other chroma_interleave values used in bmjpuapi. - * It is stored here to allow other functions to select the correct offsets. */ - int chroma_interleave; -} -BmJpuFramebufferSizes; - - - - -/************************************************/ -/******* DECODER STRUCTURES AND FUNCTIONS *******/ -/************************************************/ - - -/* How to use the decoder (error handling omitted for clarity): - * - * Global initialization / shutdown is done by calling bm_jpu_dec_load() and - * bm_jpu_dec_unload() respectively. These functions contain a reference counter, - * so bm_jpu_dec_unload() must be called as many times as bm_jpu_dec_load() was, - * or else it will not unload. Do not try to create a decoder before calling - * bm_jpu_dec_load(), as this function loads the JPU firmware. Likewise, the - * bm_jpu_dec_unload() function unloads the firmware. This firmware (un)loading - * affects the entire process, not just the current thread. - * - * Typically, loading/unloading is done in two ways: - * (1) bm_dec_jpu_load() gets called in the startup phase of the process, and - * bm_jpu_dec_unload() in the shutdown phase. - * (2) bm_dec_jpu_load() gets called every time before a decoder is to be created, - * and bm_jpu_dec_unload() every time after a decoder was shut down. - * - * Both methods are fine; however, for (2), it is important to keep in mind that - * the bm_jpu_dec_load() / bm_jpu_dec_unload() functions are *not* thread safe, - * so surround their calls with mutex locks. - * - * How to create, use, and shutdown a decoder: - * 1. Call bm_jpu_dec_get_bitstream_buffer_info(), and allocate a DMA buffer - * with the given size and alignment. This is the minimum required size. - * The buffer can be larger, but must not be smaller than the given size. - * 2. Fill an instance of BmJpuDecOpenParams with the values specific to the - * input data. Check the documentation of BmJpuDecOpenParams for details - * about its fields. - * 3. Call bm_jpu_dec_open(), passing in a pointer to the filled BmJpuDecOpenParams - * instance, the bitstream DMA buffer which was allocated in step 1, a callback - * of type bm_jpu_dec_new_initial_info_callback, and a user defined pointer - * that is passed to the callback (if not needed, just set it to NULL). - * 4. Call bm_jpu_dec_decode(), and push data to it. Once initial information about - * the bitstream becomes available, the callback from step 3 is invoked. - * 5. Inside the callback, the new initial info is available. The new_initial_info pointer - * is never NULL. In this callback, framebuffers are allocated and registered, as - * explained in the next steps. Steps 7-9 are performed inside the callback. - * 6. (Optional) Perform the necessary size and alignment calculations by calling - * bm_jpu_calc_framebuffer_sizes(). Pass in either the frame width & height from - * BmJpuDecInitialInfo , or some explicit values that were determined externally. - * (The width & height do not have to be aligned; the function does this automatically.) - * 7. Create an array of at least as many BmJpuFramebuffer instances as specified in - * min_num_required_framebuffers. Each instance must point to a DMA buffer that is big - * enough to hold a raw decoded frame. If step 7 was performed, allocating as many bytes - * as indicated by total_size is enough. Make sure the Y,Cb,Cr offsets in each - * BmJpuFramebuffer instance are valid. Using the bm_jpu_fill_framebuffer_params() - * convenience function for this is strongly recommended. - * 8. Call bm_jpu_dec_register_framebuffers() and pass in the BmJpuFramebuffer array - * and the number of BmJpuFramebuffer instances. - * Note that this call does _not_ copy the framebuffer array, it just stores the pointer - * to it internally, so make sure the array is valid until the decoder is closed! - * This should be the last action in the callback. - * 9. Continue calling bm_jpu_dec_decode(). Make sure the input data is not NULL. - * If the BM_JPU_DEC_OUTPUT_CODE_DECODED_FRAME_AVAILABLE flag is set in the output code, - * call bm_jpu_dec_get_decoded_frame() with a pointer to an BmJpuRawFrame instance. - * The instance will get filled by the function with information about the decoded frame. - * Once the decoded frame has been processed by the user, it is important to call - * bm_jpu_dec_mark_framebuffer_as_displayed() to let the decoder know that the - * framebuffer is available for storing new decoded frames again. - * If BM_JPU_DEC_OUTPUT_CODE_EOS is set, or if bm_jpu_dec_decode() returns a value other - * than BM_JPU_DEC_RETURN_CODE_OK, stop playback and close the decoder. - * 10. In case a flush/reset is desired (typically after seeking), call bm_jpu_dec_flush(). - * Note that any internal context/PTS/DTS values from the encoded and raw frames will be thrown - * away after this call; if for example the context is an index, the system that hands - * out the indices should be informed that any previously handed out index is now unused. - * 11. After playback is finished, close the decoder with bm_jpu_dec_close(). - * 12. Deallocate framebuffer memory blocks and the bitstream buffer memory block. - * - * In situations where decoding and display of decoded frames happen in different threads, it - * is necessary to wait until decoding is possible. bm_jpu_dec_check_if_can_decode() is used - * for this purpose. This needs to be done in steps 5 and 10. Typically this is done by using - * a thread condition variable. Example pseudo code: - * - * mutex_lock(&mutex); - * - * while (dec_initialized && !bm_jpu_dec_check_if_can_decode(decode) && !abort_waiting) - * condition_wait(&condition_variable, &mutex); - * - * if (!abort_waiting) - * bm_jpu_dec_decode(decoder, encoded_frame, &output_code); - * ... - * - * mutex_unlock(&mutex); - * - * (abort_waiting would be a flag that gets raised when something from the outside signals - * that waiting and decoding needs to be shut down now, for example because the user wants - * to close the player, or because the user pressed Ctrl+C. dec_initialized would be a flag - * that is initially cleared, and raised in the initial info callback; it is pointless to - * call bm_jpu_dec_check_if_can_decode() before the callback was executed.) - * - * If any video sequence parameters (like frame width and height) in the input data change, - * the output code from bm_jpu_dec_decode() calls in step 10 will contain the - * BM_JPU_DEC_OUTPUT_CODE_VIDEO_PARAMS_CHANGED flag. (This will never happen in step 5.) - * When this occurs, decoding cannot continue, because the registered framebuffers are - * of an incorrect size, and because the decoder's configuration is set up for the previous - * parameters. Therefore, in this case, first, the decoder has to be drained of decoded- - * but-not-yet-displayed frames like in step 12, then, it has to be closed, and opened - * again. The BmJpuDecOpenParams structure that is then passed to the bm_jpu_dec_open() - * call should have its frame_width and frame_height values set to 0 to ensure the - * new sequence parameters are properly used. Then, the data that was fed into the - * bm_jpu_dec_decode() call that set the BM_JPU_DEC_OUTPUT_CODE_VIDEO_PARAMS_CHANGED flag - * has to be fed again to bm_jpu_dec_decode(). The initial info callback from - * bm_jpu_dec_open() will again be called, and decoding continues as usual. - * - * It is also recommended to make sure that framebuffers and associated DMA buffers that - * were allocated before the video sequence parameter change be deallocated in the - * initial callback to avoid memory leaks. - * - * However, if the environment is a framework like GStreamer or libav/FFmpeg, it is likely - * this will never have to be done, since these have their own parsers that detect parameter - * changes and initiate reinitializations. - */ - - -/* Opaque decoder structure. */ -typedef struct _BmJpuDecoder BmJpuDecoder; - - -/* Decoder return codes. With the exception of BM_JPU_DEC_RETURN_CODE_OK, these - * should be considered hard errors, and the decoder should be closed when they - * are returned. */ -typedef enum -{ - /* Operation finished successfully. */ - BM_JPU_DEC_RETURN_CODE_OK = 0, - /* General return code for when an error occurs. This is used as a catch-all - * for when the other error return codes do not match the error. */ - BM_JPU_DEC_RETURN_CODE_ERROR, - /* Input parameters were invalid. */ - BM_JPU_DEC_RETURN_CODE_INVALID_PARAMS, - /* JPU decoder handle is invalid. This is an internal error, and most likely - * a bug in the library. Please report such errors. */ - BM_JPU_DEC_RETURN_CODE_INVALID_HANDLE, - /* Framebuffer information is invalid. Typically happens when the BmJpuFramebuffer - * structures that get passed to bm_jpu_dec_register_framebuffers() contain - * invalid values. */ - BM_JPU_DEC_RETURN_CODE_INVALID_FRAMEBUFFER, - /* Registering framebuffers for decoding failed because not enough framebuffers - * were given to the bm_jpu_dec_register_framebuffers() function. */ - BM_JPU_DEC_RETURN_CODE_INSUFFICIENT_FRAMEBUFFERS, - /* A stride value (for example one of the stride values of a framebuffer) is invalid. */ - BM_JPU_DEC_RETURN_CODE_INVALID_STRIDE, - /* A function was called at an inappropriate time (for example, when - * bm_jpu_dec_register_framebuffers() is called before a single byte of input data - * was passed to bm_jpu_dec_decode() ). */ - BM_JPU_DEC_RETURN_CODE_WRONG_CALL_SEQUENCE, - /* The operation timed out. */ - BM_JPU_DEC_RETURN_CODE_TIMEOUT, - /* A function that should only be called once for the duration of the decoding - * session was called again. One example is bm_jpu_dec_register_framebuffers(). */ - BM_JPU_DEC_RETURN_CODE_ALREADY_CALLED, - /* Allocation memory failure */ - BM_JPU_DEC_RETURN_ALLOC_MEM_ERROR -} -BmJpuDecReturnCodes; - - -/* Decoder output codes. These can be bitwise OR combined, so check - * for their presence in the output_codes bitmask returned by - * bm_jpu_dec_decode() by using a bitwise AND. */ -typedef enum -{ - /* Input data was used. If this code is present, the input data - * that was given to the bm_jpu_dec_decode() must not be given - * to a following bm_jpu_dec_decode() call; instead, new data - * should be loaded. If this code is not present, then the decoder - * didn't use it yet, so give it to the decoder again until this - * code is set or an error is returned. - * NOTE: this flag is obsolete. It used to mean something with the - * fslwrapper backend; however, with the jpulib backend, it will - * always use the input unless an error occurs or EOS is signaled - * in drain mode. */ - BM_JPU_DEC_OUTPUT_CODE_INPUT_USED = (1UL << 0), - /* EOS was reached; no more unfinished frames are queued internally. - * This can be reached by bitstreams with no frame delay. - */ - BM_JPU_DEC_OUTPUT_CODE_EOS = (1UL << 1), - /* A fully decoded frame is now available, and can be retrieved - * by calling bm_jpu_dec_get_decoded_frame(). */ - BM_JPU_DEC_OUTPUT_CODE_DECODED_FRAME_AVAILABLE = (1UL << 2), - - /* There aren't enough free framebuffers available for decoding. - * This usually happens when bm_jpu_dec_mark_framebuffer_as_displayed() - * wasn't called before bm_jpu_dec_decode(), which can occur in - * multithreaded environments. bm_jpu_dec_check_if_can_decode() is useful - * to avoid this. Also see the guide above for more. */ - BM_JPU_DEC_OUTPUT_CODE_NOT_ENOUGH_OUTPUT_FRAMES = (1UL << 3), - /* Input data for a frame is incomplete. No decoded frame will - * be available until the input frame's data has been fully and - * correctly delivered. */ - BM_JPU_DEC_OUTPUT_CODE_NOT_ENOUGH_INPUT_DATA = (1UL << 4), - /* The JPU detected a change in the video sequence parameters - * (like frame width and height). Decoding cannot continue. See the - * explanation in the step-by-step guide above for what steps to take - * if this output code is set. Note that this refers to detected - * changes in the *input data*, not to the decoded frames. This means - * that this flag is set immediately when input data with param changes - * is fed to the decoder, even if this is for example a h.264 high - * profile stream with lots of frame reordering and frame delays. */ - BM_JPU_DEC_OUTPUT_CODE_VIDEO_PARAMS_CHANGED = (1UL << 5) -} -BmJpuDecOutputCodes; - - -/* Structure used together with bm_jpu_dec_open() */ -typedef struct -{ - /* These are necessary with some formats which do not store the width - * and height in the bitstream. If the format does store them, these - * values can be set to zero. */ - unsigned int frame_width; - unsigned int frame_height; - - /* If this is 1, then Cb and Cr are interleaved in one shared chroma - * plane, otherwise they are separated in their own planes. - * See the BmJpuColorFormat documentation for the consequences of this. */ - int chroma_interleave; - - /* 0: no scaling; n(1-3): scale by 2^n; */ - unsigned int scale_ratio; - - /* The DMA buffer size for bitstream */ - int bs_buffer_size; -#ifdef _WIN32 - uint8_t *buffer; -#else - uint8_t *buffer __attribute__((deprecated)); -#endif - - int device_index; - - int rotationEnable; - int mirrorEnable; - int mirrorDirection; - int rotationAngle; - - int roiEnable; - int roiWidth; - int roiHeight; - int roiOffsetX; - int roiOffsetY; -} -BmJpuDecOpenParams; - - -/* Structure used together with bm_jpu_dec_new_initial_info_callback() . - * The values are filled by the decoder. */ -typedef struct -{ - /* Width of height of frames, in pixels. Note: it is not guaranteed that - * these values are aligned to a 16-pixel boundary (which is required - * for JPU framebuffers). These are the width and height of the frame - * with actual pixel content. It may be a subset of the total frame, - * in case these sizes need to be aligned. In that case, there are - * padding columns to the right, and padding rows below the frames. */ - unsigned int frame_width, frame_height; - - /* Caller must register at least this many framebuffers - * with the decoder. */ - unsigned int min_num_required_framebuffers; - - /* Color format of the decoded frames. */ - BmJpuColorFormat color_format; - - int chroma_interleave; - - /* Physical framebuffer addresses must be aligned to this value. */ - unsigned int framebuffer_alignment; - - int roiFrameWidth; - int roiFrameHeight; - } -BmJpuDecInitialInfo; - -/* Convenience function which calculates various sizes out of the given width & height and color format. - * The results are stored in "calculated_sizes". The given frame width and height will be aligned if - * they aren't already, and the aligned value will be stored in calculated_sizes. Width & height must be - * nonzero. The calculated_sizes pointer must also be non-NULL. framebuffer_alignment is an alignment - * value for the sizes of the Y/U/V planes. 0 or 1 mean no alignment. uses_interlacing is set to 1 - * if interlacing is to be used, 0 otherwise. chroma_interleave is set to 1 if a shared CbCr chroma - * plane is to be used, 0 if Cb and Cr shall use separate planes. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_calc_framebuffer_sizes(BmJpuColorFormat color_format, - unsigned int frame_width, - unsigned int frame_height, - unsigned int framebuffer_alignment, - int chroma_interleave, - BmJpuFramebufferSizes *calculated_sizes); - -/* Convenience function which fills fields of the BmJpuFramebuffer structure, based on data from "calculated_sizes". - * The specified DMA buffer and context pointer are also set. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_fill_framebuffer_params(BmJpuFramebuffer *framebuffer, - BmJpuFramebufferSizes *calculated_sizes, - bm_device_mem_t *fb_dma_buffer, - void* context); - -/* Returns a human-readable description of the given color format. Useful for logging. */ -DECL_EXPORT char const *bm_jpu_color_format_string(BmJpuColorFormat color_format); - - -/* Callback for handling new BmJpuDecInitialInfo data. This is called when new - * information about the bitstream becomes available. output_code can be useful - * to check why this callback was invoked. BM_JPU_DEC_OUTPUT_CODE_INITIAL_INFO_AVAILABLE - * is always set. Every time this callback gets called, new framebuffers should be - * allocated and registered with bm_jpu_dec_register_framebuffers(). - * user_data is a user-defined pointer that is passed to this callback. It has the same - * value as the callback_user_data pointer from the bm_jpu_dec_open() call. - * The callback returns 0 if something failed, nonzero if successful. */ -DECL_EXPORT typedef int (*bm_jpu_dec_new_initial_info_callback)(BmJpuDecoder *decoder, - BmJpuDecInitialInfo *new_initial_info, - unsigned int output_code, - void *user_data); - - -/* Returns a human-readable description of the error code. - * Useful for logging. */ -DECL_EXPORT char const * bm_jpu_dec_error_string(BmJpuDecReturnCodes code); - -/* These two functions load/unload the decoder. Due to an internal reference - * counter, it is safe to call these functions more than once. However, the - * number of unload() calls must match the number of load() calls. - * - * The decoder must be loaded before doing anything else with it. - * Similarly, the decoder must not be unloaded before all decoder activities - * have been finished. This includes opening/decoding decoder instances. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_load(int device_index); -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_unload(int device_index); -DECL_EXPORT bm_handle_t bm_jpu_get_handle(int device_index); - -/* Called before bm_jpu_dec_open(), it returns the alignment and size for the - * physical memory block necessary for the decoder's bitstream buffer. The user - * must allocate a DMA buffer of at least this size, and its physical address - * must be aligned according to the alignment value. */ -DECL_EXPORT void bm_jpu_dec_get_bitstream_buffer_info(size_t *size, unsigned int *alignment); - -/* Opens a new decoder instance. "open_params", "bitstream_buffer", and "new_initial_info" - * must not be NULL. "callback_user_data" is a user-defined pointer that is passed on to - * the callback when it is invoked. The bitstream buffer must use the alignment and size - * that bm_jpu_dec_get_bitstream_buffer_info() specifies (it can also be larger, but must - * not be smaller than the size this function gives). */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_open(BmJpuDecoder **decoder, BmJpuDecOpenParams *open_params, - bm_device_mem_t *bitstream_buffer, - bm_jpu_dec_new_initial_info_callback new_initial_info_callback, - void *callback_user_data); - -/* Closes a decoder instance. Trying to close the same instance multiple times results in undefined behavior. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_close(BmJpuDecoder *decoder); - -/* Flushes the decoder. Any internal undecoded or queued frames are discarded. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_flush(BmJpuDecoder *decoder); - -/* Registers the specified array of framebuffers with the decoder. This must be called after - * bm_jpu_dec_decode() returned an output code with BM_JPU_DEC_OUTPUT_CODE_INITIAL_INFO_AVAILABLE - * set in it. Registering can happen only once during the lifetime of a decoder instance. If for some reason - * framebuffers need to be re-registered, the instance must be closed, and a new one opened. - * The caller must ensure that the specified framebuffer array remains valid until the decoder instance - * is closed, since this function does not copy it; it just stores a pointer to the array internally. Also - * note that internally, values might be written to the array (though it will never be reallocated - * and/or freed from the inside). Also, the framebuffers' DMA buffers will be memory-mapped until the decoder - * is closed. - * - * Since this function only stores a pointer to the framebuffer array internally, and does not actually copy - * the array, it is possible - and valid - to modify the "context" fields of the framebuffers even after - * this call was made. This is useful if for example system resources are associated later with the - * framebuffers. In this case, it is perfectly OK to set "context" to NULL initially, and later, when the - * resources are available, associated them to the framebuffers by setting the context fields, even if - * bm_jpu_dec_register_framebuffers() was already called earlier. - * - * The framebuffers must contain valid values. The convenience functions bm_jpu_calc_framebuffer_sizes() and - * bm_jpu_fill_framebuffer_params() can be used for this. Note that all framebuffers must have the same - * stride values. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_register_framebuffers(BmJpuDecoder *decoder, BmJpuFramebuffer *framebuffers, unsigned int num_framebuffers); - -/* Decodes an encoded input frame. "encoded_frame" must always be set, even in drain mode. See BmJpuEncodedFrame - * for details about its contents. output_code is a bit mask, must not be NULL, and returns important information - * about the decoding process. The value is a bitwise OR combination of the codes in BmJpuDecOutputCodes. Also - * look at bm_jpu_dec_get_decoded_frame() about how to retrieve decoded frames (if these exist). Note that if - * the BM_JPU_DEC_OUTPUT_CODE_VIDEO_PARAMS_CHANGED flag is set in the output_code, decoding cannot continue, - * and the decoder should be closed. See the notes below step-by-step guide above for details about this. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_decode(BmJpuDecoder *decoder, BmJpuEncodedFrame const *encoded_frame, unsigned int *output_code); - -/* Retrieves a decoded frame. The structure referred to by "decoded_frame" will be filled with data about - * the decoded frame. "decoded_frame" must not be NULL. - * - * Calling this function before bm_jpu_dec_decode() results in an BM_JPU_DEC_RETURN_CODE_WRONG_CALL_SEQUENCE - * return value. Calling this function more than once after a bm_jpu_dec_decode() yields the same result. - */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_get_decoded_frame(BmJpuDecoder *decoder, BmJpuRawFrame *decoded_frame); - - -/* Check if the JPU can decode right now. While decoding a video stream, sometimes the JPU may not be able - * to decode. This is directly related to the set of free framebuffers. If this function returns 0, decoding - * should not be attempted until after bm_jpu_dec_mark_framebuffer_as_displayed() was called. If this - * happens, bm_jpu_dec_check_if_can_decode() should be called again to check if the situation changed and - * decoding can be done again. Also, calling this function before the initial info callback was executed is - * not recommended and causes undefined behavior. See the explanation above for details. */ -DECL_EXPORT int bm_jpu_dec_check_if_can_decode(BmJpuDecoder *decoder); - -/* Marks a framebuffer as displayed. This always needs to be called once the application is done with a decoded - * frame. It returns the framebuffer to the JPU pool so it can be reused for further decoding. Not calling - * this will eventually cause the decoder to fail, because it won't find any free framebuffer for storing - * a decoded frame anymore. - * - * It is safe to mark a framebuffer multiple times. The library will simply ignore the subsequent calls. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_mark_framebuffer_as_displayed(BmJpuDecoder *decoder, BmJpuFramebuffer *framebuffer); - - - - -/************************************************/ -/******* ENCODER STRUCTURES AND FUNCTIONS *******/ -/************************************************/ - - -/* How to use the encoder (error handling omitted for clarity): - * - * Global initialization / shutdown is done by calling bm_jpu_enc_load() and - * bm_jpu_enc_unload() respectively. These functions contain a reference counter, - * so bm_jpu_enc_unload() must be called as many times as bm_jpu_enc_load() was, - * or else it will not unload. Do not try to create a encoder before calling - * bm_jpu_enc_load(), as this function loads the JPU firmware. Likewise, the - * bm_jpu_enc_unload() function unloads the firmware. This firmware (un)loading - * affects the entire process, not just the current thread. - * - * Typically, loading/unloading is done in two ways: - * (1) bm_jpu_enc_load() gets called in the startup phase of the process, and - * bm_jpu_enc_unload() in the shutdown phase. - * (2) bm_jpu_enc_load() gets called every time before a encoder is to be created, - * and bm_jpu_enc_unload() every time after a encoder was shut down. - * - * Both methods are fine; however, for (2), it is important to keep in mind that - * the bm_jpu_enc_load() / bm_jpu_enc_unload() functions are *not* thread safe, - * so surround their calls with mutex locks. - * - * How to create, use, and shutdown an encoder: - * 1. Call bm_jpu_enc_get_bitstream_buffer_info(), and allocate a DMA buffer - * with the given size and alignment. This is the minimum required size. - * The buffer can be larger, but must not be smaller than the given size. - * 2. Fill an instance of BmJpuEncOpenParams with the values specific to the - * input data. Check the documentation of BmJpuEncOpenParams for details - * about its fields. It is recommended to set default values by calling - * bm_jpu_enc_set_default_open_params() and afterwards set any explicit valus. - * 3. Call bm_jpu_enc_open(), passing in a pointer to the filled BmJpuEncOpenParams - * instance, and the DMA buffer of the bitstream DMA buffer which was allocated in - * step 1. - * 4. Call bm_jpu_enc_get_initial_info(). The encoder's initial info contains the - * minimum number of framebuffers that must be allocated and registered, and the - * address alignment that must be used when allocating DMA memory for these - * framebuffers. - * 5. (Optional) Perform the necessary size and alignment calculations by calling - * bm_jpu_calc_framebuffer_sizes(). Pass in the width & height of the frames that - * shall be encoded. (The width & height do not have to be aligned; the function - * does this automatically.) - * 6. (Optional) allocate a DMA buffer for the input frames. Only one buffer is necessary. - * If the incoming data is already stored in DMA buffers, this step can be omitted, - * since the encoder can then read the data directly. - * 7. Create an instance of BmJpuRawFrame, set its values to zero (typically by using memset()). - * 8. Create an instance of BmJpuEncodedFrame. Set its values to zero (typically by using memset()). - * 9. Set the framebuffer pointer of the BmJpuRawFrame's instance from step 7 to refer to the - * input DMA buffer (either the one allocated in step 6, or the one containing the input data if - * it already comes in DMA memory). - * 10. Fill an instance of BmJpuEncParams with valid values. It is recommended to first set its - * values to zero by using memset() to set default values. It is essential to make sure the - * acquire_output_buffer() and finish_output_buffer() function pointers are set, as these are - * used for acquiring buffers to write encoded output data into. - * Alternatively, set write_output_data() if write-callback style output is preferred. If this - * function pointer is non-NULL, then acquire_output_buffer() and finish_output_buffer() are - * ignored. - * 11. If step 6 was performed, and therefore input data does *not* come in DMA memory, copy the - * pixels from the raw input frames into the DMA buffer allocated in step 6. Otherwise, if - * the raw input frames are already stored in DMA memory, this step can be omitted. - * 12. Call bm_jpu_enc_encode(). Pass the raw frame, the encoded frame, and the encoding param - * structures from steps 9, 10, and 12 to it. - * This function will encode data, and acquire an output buffer to write the encoded data into - * by using the acquire_output_buffer() function pointer set in step 10. Once it is done - * encoding, it will call the finish_output_buffer() function from step 10. Any handle created - * by acquire_output_buffer() will be copied over to the encoded data frame structure. When - * bm_jpu_enc_encode() exits, this handle can then be used to further process the output data. - * It is guaranteed that once acquire_output_buffer() was called, finish_output_buffer() will - * be called, even if an error occurred. - * The BM_JPU_ENC_OUTPUT_CODE_ENCODED_FRAME_AVAILABLE output code bit will always be set - * unless the function returned a code other than BM_JPU_ENC_RETURN_CODE_OK. - * If the BM_JPU_ENC_OUTPUT_CODE_CONTAINS_HEADER bit is set, then header data has been - * written in the output memory block allocated in step 8. It is placed right before the - * actual encoded frame data. bm_jpu_enc_encode() will pass over the combined size of the header - * and the encoded frame data to acquire_output_buffer() in this case, ensuring that the output - * buffers are big enough. - * If write-callback style output is used instead (= if the write_output_data() function pointer - * inside the encoding_params is set to a valid value), then this function haves as described - * above, except that it does not call acquire_output_buffer() or finish_output_buffer(). It - * still adds headers etc. but outputs these immediately by calling write_output_data(). - * 13. Repeat steps 11 to 14 until there are no more frames to encode or an error occurs. - * 14. After encoding is finished, close the encoder with bm_jpu_enc_close(). - * 15. Deallocate framebuffer memory blocks, the input DMA buffer block, the output memory block, - * and the bitstream buffer memory block. - * - * Note that the encoder does not use any kind of frame reordering. h.264 data uses the - * baseline profile. An input frame immediately results in an output frame (unless an error occured). - * There is no delay. - * - * The JPU's encoders supports all formats from BmJpuColorFormat. - */ - - -/* Opaque encoder structure. */ -typedef struct _BmJpuEncoder BmJpuEncoder; - - -/* Encoder return codes. With the exception of BM_JPU_ENC_RETURN_CODE_OK, these - * should be considered hard errors, and the encoder should be closed when they - * are returned. */ -typedef enum -{ - /* Operation finished successfully. */ - BM_JPU_ENC_RETURN_CODE_OK = 0, - /* General return code for when an error occurs. This is used as a catch-all - * for when the other error return codes do not match the error. */ - BM_JPU_ENC_RETURN_CODE_ERROR, - /* Input parameters were invalid. */ - BM_JPU_ENC_RETURN_CODE_INVALID_PARAMS, - /* JPU encoder handle is invalid. This is an internal error, and most likely - * a bug in the library. Please report such errors. */ - BM_JPU_ENC_RETURN_CODE_INVALID_HANDLE, - /* Framebuffer information is invalid. Typically happens when the BmJpuFramebuffer - * structures that get passed to bm_jpu_enc_register_framebuffers() contain - * invalid values. */ - BM_JPU_ENC_RETURN_CODE_INVALID_FRAMEBUFFER, - /* Registering framebuffers for encoding failed because not enough framebuffers - * were given to the bm_jpu_enc_register_framebuffers() function. */ - BM_JPU_ENC_RETURN_CODE_INSUFFICIENT_FRAMEBUFFERS, - /* A stride value (for example one of the stride values of a framebuffer) is invalid. */ - BM_JPU_ENC_RETURN_CODE_INVALID_STRIDE, - /* A function was called at an inappropriate time. */ - BM_JPU_ENC_RETURN_CODE_WRONG_CALL_SEQUENCE, - /* The operation timed out. */ - BM_JPU_ENC_RETURN_CODE_TIMEOUT, - /* write_output_data() in BmJpuEncParams returned 0. */ - BM_JPU_ENC_RETURN_CODE_WRITE_CALLBACK_FAILED, - /* Allocation memory failure */ - BM_JPU_ENC_RETURN_ALLOC_MEM_ERROR -} -BmJpuEncReturnCodes; - - -/* Encoder output codes. These can be bitwise OR combined, so check - * for their presence in the output_codes bitmask returned by - * bm_jpu_enc_encode() by using a bitwise AND. */ -typedef enum -{ - /* Input data was used. If this code is present, the input frame - * that was given to the bm_jpu_dec_encode() must not be given - * to a following bm_jpu_dec_encode() call; instead, a new frame - * should be loaded. If this code is not present, then the encoder - * didn't use it yet, so give it to the encoder again until this - * code is set or an error is returned. */ - BM_JPU_ENC_OUTPUT_CODE_INPUT_USED = (1UL << 0), - /* A fully encoded frame is now available. The encoded_frame argument - * passed to bm_jpu_enc_encode() contains information about this frame. */ - BM_JPU_ENC_OUTPUT_CODE_ENCODED_FRAME_AVAILABLE = (1UL << 1), - /* The data in the encoded frame also contains header information - * like SPS/PSS for h.264. Headers are always placed at the beginning - * of the encoded data, and this code is never present if the - * BM_JPU_ENC_OUTPUT_CODE_ENCODED_FRAME_AVAILABLE isn't set. */ - BM_JPU_ENC_OUTPUT_CODE_CONTAINS_HEADER = (1UL << 2) -} -BmJpuEncOutputCodes; - - -/* Structure used together with bm_jpu_enc_open() */ -typedef struct -{ - /* Width and height of the incoming frames, in pixels. These - * do not have to be aligned to any boundaries. */ - unsigned int frame_width; - unsigned int frame_height; - /* Color format to use for incoming frames. MJPEG actually uses - * all possible values. - * See the BmJpuColorFormat documentation for an explanation how - * the chroma_interleave value can affec the pixel format that is used. */ - BmJpuColorFormat color_format; - - /* Quality factor for JPEG encoding, between 1 (worst quality, best - * compression) and 100 (best quality, worst compression). Default - * value is 85. - * This quality factor is the one from the Independent JPEG Group's - * formula for generating a scale factor out of the quality factor. - * This means that this quality factor is exactly the same as the - * one used by libjpeg. */ - unsigned int quality_factor; - - /* If this is 1, then Cb and Cr are interleaved in one shared chroma - * plane, otherwise they are separated in their own planes. - * See the BmJpuColorFormat documentation for the consequences of this. */ - int chroma_interleave; - - int packed_format; - int device_index; - - int rotationEnable; - int mirrorEnable; - int mirrorDirection; - int rotationAngle; -} -BmJpuEncOpenParams; - - -/* Initial encoding information, produced by the encoder. This structure is - * essential to actually begin encoding, since it contains all of the - * necessary information to create and register enough framebuffers. */ -typedef struct -{ - /* Caller must register at least this many framebuffers - * with the encoder. */ - unsigned int min_num_required_framebuffers; - - /* Physical framebuffer addresses must be aligned to this value. */ - unsigned int framebuffer_alignment; -} -BmJpuEncInitialInfo; - - -/* Function pointer used during encoding for acquiring output buffers. - * See bm_jpu_enc_encode() for details about the encoding process. - * context is the value of output_buffer_context specified in - * BmJpuEncParams. size is the size of the block to acquire, in bytes. - * acquired_handle is an output value; the function can set this to a - * handle that corresponds to the acquired buffer. For example, in - * libav/FFmpeg, this handle could be a pointer to an AVBuffer. In - * GStreamer, this could be a pointer to a GstBuffer. The value of - * *acquired_handle will later be copied to the acquired_handle value - * of BmJpuEncodedFrame. - * The return value is a pointer to a memory-mapped region of the - * output buffer, or NULL if acquiring failed. - * If the write_output_data function pointer in the encoder params - * is non-NULL, this function is not called. - * This function is only used by bm_jpu_enc_encode(). */ -typedef void* (*BmJpuEncAcquireOutputBuffer)(void *context, size_t size, void **acquired_handle); - -/* Function pointer used during encoding for notifying that the encoder - * is done with the output buffer. This is *not* a function for freeing - * allocated buffers; instead, it makes it possible to release, unmap etc. - * context is the value of output_buffer_context specified in - * BmJpuEncParams. acquired_handle equals the value of *acquired_handle in - * BmJpuEncAcquireOutputBuffer. - * If the write_output_data function pointer in the encoder params - * is non-NULL, this function is not called. */ -typedef void (*BmJpuEncFinishOutputBuffer)(void *context, void *acquired_handle); - -/* Function pointer used during encoding for passing the output encoded data - * to the user. If this function is not NULL, then BmJpuEncFinishOutputBuffer - * and BmJpuEncAcquireOutputBuffer function are not called. Instead, this - * data write function is called whenever the library wants to write output. - * encoded_frame contains valid pts, dts, and context data which was copied - * over from the corresponding raw frame. - * Returns 1 if writing succeeded, 0 otherwise. - * */ -typedef int (*BmJpuWriteOutputData)(void *context, uint8_t const *data, uint32_t size, BmJpuEncodedFrame *encoded_frame); - - -typedef struct -{ - /* Functions for acquiring and finishing output buffers. See the - * typedef documentations above for details about how these - * functions should behave, and the bm_jpu_enc_encode() - * documentation for how they are used. - * Note that these functions are only used if write_output_data - * is set to NULL. - */ - BmJpuEncAcquireOutputBuffer acquire_output_buffer; - BmJpuEncFinishOutputBuffer finish_output_buffer; - - /* Function for directly passing the output data to the user - * without copying it first. - * Using this function will inhibit calls to acquire_output_buffer - * and finish_output_buffer. See the typedef documentations - * above for details about how this function should behave, and - * the bm_jpu_enc_encode() documentation for how they are used. - * Note that if this function is NULL then acquire_output_buffer - * and finish_output_buffer must be set. - */ - BmJpuWriteOutputData write_output_data; - - /* User supplied value that will be passed to the functions */ - void *output_buffer_context; -} -BmJpuEncParams; - - -/* Returns a human-readable description of the error code. - * Useful for logging. */ -DECL_EXPORT char const * bm_jpu_enc_error_string(BmJpuEncReturnCodes code); - -/* These two functions load/unload the encoder. Due to an internal reference - * counter, it is safe to call these functions more than once. However, the - * number of unload() calls must match the number of load() calls. - * - * The encoder must be loaded before doing anything else with it. - * Similarly, the encoder must not be unloaded before all encoder activities - * have been finished. This includes opening/decoding encoder instances. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_load(int device_index); -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_unload(int device_index); - -/* Called before bm_jpu_enc_open(), it returns the alignment and size for the - * physical memory block necessary for the encoder's bitstream buffer. The user - * must allocate a DMA buffer of at least this size, and its physical address - * must be aligned according to the alignment value. */ -DECL_EXPORT void bm_jpu_enc_get_bitstream_buffer_info(size_t *size, unsigned int *alignment); - -/* Set the fields in "open_params" to valid defaults - * Useful if the caller wants to modify only a few fields (or none at all) */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_set_default_open_params(BmJpuEncOpenParams *open_params); - -/* Opens a new encoder instance. "open_params" and "bitstream_buffer" must not be NULL. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_open(BmJpuEncoder **encoder, BmJpuEncOpenParams *open_params, - bm_device_mem_t *bitstream_buffer); - -/* Closes a encoder instance. Trying to close the same instance multiple times results in undefined behavior. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_close(BmJpuEncoder *encoder); - -/* Retrieves initial information available after calling bm_jpu_enc_open(). */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_get_initial_info(BmJpuEncoder *encoder, BmJpuEncInitialInfo *info); - -/* Encodes a given raw input frame with the given encoding parameters. encoded_frame is filled with information - * about the resulting encoded output frame. The encoded frame data itself is stored in a buffer that is - * allocated by user-supplied functions (which are set as the acquire_output_buffer and finish_output_buffer - * function pointers in the encoding_params). - * - * Encoding internally works as follows: first, the actual encoding operation is performed by the JPU. Next, - * information about the encoded data is queried, particularly its size in bytes. Once this size is known, - * acquire_output_buffer() from encoding_params is called. This function must acquire a buffer that can be - * used to store the encoded data. This buffer must be at least as large as the size of the encoded data - * (which is given to acquire_output_buffer() as an argument). The return value of acquire_output_buffer() - * is a pointer to the (potentially memory-mapped) region of the buffer. The encoded frame data is then - * copied to this buffer, and finish_output_buffer() is called. This function can be used to inform the - * caller that the encoder is done with this buffer; it now contains encoded data, and will not be modified - * further. encoded_frame is filled with information about the encoded frame data. - * If acquiring the buffer fails, acquire_output_buffer() returns a NULL pointer. - * NOTE: again, finish_output_buffer() is NOT a function to free the buffer; it just signals that the encoder - * won't touch the memory inside the buffer anymore. - * - * acquire_output_buffer() can also pass on a handle to the acquired buffer (for example, in FFmpeg/libav, - * this handle would be a pointer to an AVBuffer). The handle is called the "acquired_handle". - * acquire_output_buffer() can return such a handle. This handle is copied to the encoded_frame struct's - * acquired_handle field. This way, a more intuitive workflow can be used; if for example, acquire_output_buffer() - * returns an AVBuffer pointer as the handle, this AVBuffer pointer ends up in the encoded_frame. Afterwards, - * encoded_frame contains all the necessary information to process the encoded frame data. - * - * It is guaranteed that once the buffer was acquired, finish_output_buffer() will always be called, even if - * an error occurs. This prevents potential memory/resource leaks if the finish_output_buffer() call somehow - * unlocks or releases the buffer for further processing. The acquired_handle is also copied to encoded_frame - * even if an error occurs, unless the error occurred before the acquire_output_buffer() call, in which case - * the encoded_frame's acquired_handle field will be set to NULL. - * - * The aforementioned sequences involve a copy (encoded data is copied into the acquired buffer). As an - * alternative, a write-callback-style mode of operation can be used. This alternative mode is active if - * the write_output_data function pointer in encoding_params is not NULL. In this mode, neither - * acquire_output_buffer() nor finish_output_buffer() are called. Instead, whenever the encoder needs to - * write out data, it calls write_output_data(). - * - * The other fields in encoding_params specify additional encoding parameters, which can vary from frame to - * frame. - * output_code is a bit mask containing information about the encoding result. The value is a bitwise OR - * combination of the codes in BmJpuEncOutputCodes. - * - * None of the arguments may be NULL. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_encode(BmJpuEncoder *encoder, - BmJpuRawFrame const *raw_frame, - BmJpuEncodedFrame *encoded_frame, - BmJpuEncParams *encoding_params, - unsigned int *output_code); - -DECL_EXPORT int bm_jpu_get_dump(void); - -#ifdef __cplusplus -} -#endif - - -#endif diff --git a/bmvid/jpeg/binary/pcie_arm64/include/bmjpuapi_jpeg.h b/bmvid/jpeg/binary/pcie_arm64/include/bmjpuapi_jpeg.h deleted file mode 100644 index db8f1bc..0000000 --- a/bmvid/jpeg/binary/pcie_arm64/include/bmjpuapi_jpeg.h +++ /dev/null @@ -1,246 +0,0 @@ -#ifndef BMJPUAPI_JPEG_H -#define BMJPUAPI_JPEG_H - -#include "bmjpuapi.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#if !defined DECL_EXPORT -#ifdef _WIN32 - #define DECL_EXPORT __declspec(dllexport) -#else - #define DECL_EXPORT -#endif -#endif - -typedef struct -{ - /* Width and height of JPU framebuffers are aligned to internal boundaries. - * The frame consists of the actual image pixels and extra padding pixels. - * aligned_frame_width / aligned_frame_height specify the full width/height - * including the padding pixels, and actual_frame_width / actual_frame_height - * specify the width/height without padding pixels. */ - unsigned int aligned_frame_width, aligned_frame_height; - unsigned int actual_frame_width, actual_frame_height; - - /* Stride and size of the Y, Cr, and Cb planes. The Cr and Cb planes always - * have the same stride and size. */ - unsigned int y_stride, cbcr_stride; - unsigned int y_size, cbcr_size; - - /* Offset from the start of a framebuffer's memory, in bytes. Note that the - * Cb and Cr offset values are *not* the same, unlike the stride and size ones. */ - unsigned int y_offset, cb_offset, cr_offset; - - /* Framebuffer containing the pixels of the decoded frame. */ - BmJpuFramebuffer *framebuffer; - - /* Color format of the decoded frame. */ - BmJpuColorFormat color_format; - - int chroma_interleave; -} -BmJpuJPEGDecInfo; - - -typedef struct -{ - BmJpuDecoder *decoder; - - bm_device_mem_t *bitstream_buffer; - size_t bitstream_buffer_size; - unsigned int bitstream_buffer_alignment; - - BmJpuDecInitialInfo initial_info; - - BmJpuFramebuffer *framebuffers; - bm_device_mem_t *fb_dmabuffers; - unsigned int num_framebuffers; - unsigned int num_extra_framebuffers; // TODO - BmJpuFramebufferSizes calculated_sizes; - - BmJpuRawFrame raw_frame; - int device_index; - - BmJpuFramebuffer *cur_buffer; - void *opaque; - - int rotationEnable; - int mirrorEnable; - int mirrorDirection; - int rotationAngle; -} -BmJpuJPEGDecoder; - -/* Opens a new JPU JPEG decoder instance. - * - * Internally, this function calls bm_jpu_dec_load(). - * - * If dma_buffer_allocator is NULL, the default decoder allocator is used. - * - * num_extra_framebuffers is used for instructing this function to allocate this many - * more framebuffers. Usually this value is zero, but in certain cases where many - * JPEGs need to be decoded quickly, or the DMA buffers of decoded frames need to - * be kept around elsewhere, having more framebuffers available can be helpful. - * Note though that more framebuffers also means more DMA memory consumption. - * If unsure, keep this to zero. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_jpeg_dec_open(BmJpuJPEGDecoder **jpeg_decoder, - BmJpuDecOpenParams *open_params, - unsigned int num_extra_framebuffers); - -/* Closes a JPEG decoder instance. Trying to close the same instance multiple times results in undefined behavior. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_jpeg_dec_close(BmJpuJPEGDecoder *jpeg_decoder); - -/* Determines if the JPU can decode a frame at this moment. - * - * The return value depends on how many framebuffers in the decoder are free. - * If enough framebuffers are free, this returns 1, otherwise 0. - * - * For simple decoding schemes where one frame is decoded, then displayed or - * consumed in any other way, and then returned to the decoder by calling - * bm_jpu_jpeg_dec_frame_finished(), this function does not have to be used, - * since in this case, there will always be enough free framebuffers. - * If however the consumption of the decoded frame occurs in a different thread - * than the decoding, it makes sense to use this function in order to wait - * until enough framebfufers are free (typically implemented by using mutexes - * and thread condition variables). Also, in this case, this function is more - * likely to return 1 the more extra framebuffers were requested in the - * bm_jpu_jpeg_dec_open() call. - */ -DECL_EXPORT int bm_jpu_jpeg_dec_can_decode(BmJpuJPEGDecoder *jpeg_decoder); - -/* Decodes a JPEG frame. - * - * jpeg_data must be set to the memory block that contains the encoded JPEG data, - * and jpeg_data_size must be set to the size of that block, in bytes. After this - * call, use the bm_jpu_jpeg_dec_get_info() function to retrieve information about - * the decoded frame. - * - * The JPU decoder only consumes baseline JPEG data. Progressive encoding is not supported. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_jpeg_dec_decode(BmJpuJPEGDecoder *jpeg_decoder, - uint8_t const *jpeg_data, - size_t const jpeg_data_size); - -/* Retrieves information about the decoded JPEG frame. - * - * The BmJpuJPEGDecInfo's fields will be set to those of the decoded frame. In particular, - * info's framebuffer pointer will be set to point to the framebuffer containing the - * decoded frame. Be sure to pass this pointer to bm_jpu_jpeg_dec_frame_finished() once - * the frame's pixels are no longer needed. - * - * Note that the return value of the previous bm_jpu_jpeg_dec_decode() call can be - * BM_JPU_DEC_RETURN_CODE_OK even though the framebuffer pointer retrieved here is NULL. - * This is the case when not enough free framebuffers are present. It is recommended to - * check the return value of the bm_jpu_jpeg_dec_can_decode() function before calling - * bm_jpu_jpeg_dec_decode(), unless the decoding sequence is simple (like in the example - * mentioned in the bm_jpu_jpeg_dec_can_decode() description). - * - * This function must not be called before bm_jpu_jpeg_dec_decode() , since otherwise, - * there is no information available (it is read in the decoding step). */ -DECL_EXPORT void bm_jpu_jpeg_dec_get_info(BmJpuJPEGDecoder *jpeg_decoder, BmJpuJPEGDecInfo *info); - -/* Inform the JPEG decoder that a previously decoded frame is no longer being used. - * - * This function must always be called once the user is done with a frame, otherwise - * the JPU cannot reclaim this ramebuffer, and will eventually run out of internal - * framebuffers to decode into. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_jpeg_dec_frame_finished(BmJpuJPEGDecoder *jpeg_decoder, - BmJpuFramebuffer *framebuffer); - -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_jpeg_dec_flush(BmJpuJPEGDecoder *jpeg_decoder); - - - -typedef struct -{ - /* Frame width and height of the input frame. These are the actual sizes; - * they will be aligned internally if necessary. These sizes must not be - * zero. */ - unsigned int frame_width, frame_height; - - /* Quality factor for JPEG encoding. 1 = best compression, 100 = best quality. - * This is the exact same quality factor as used by libjpeg. */ - unsigned int quality_factor; - - /* Color format of the input frame. */ - BmJpuColorFormat color_format; - - /* Functions for acquiring and finishing output buffers. See the - * typedef documentations in bmjpuapi.h for details about how - * these functions should behave. */ - BmJpuEncAcquireOutputBuffer acquire_output_buffer; - BmJpuEncFinishOutputBuffer finish_output_buffer; - - /* Function for directly passing the output data to the user - * without copying it first. - * Using this function will inhibit calls to acquire_output_buffer - * and finish_output_buffer. */ - BmJpuWriteOutputData write_output_data; - - /* User supplied value that will be passed to the functions: - * acquire_output_buffer, finish_output_buffer, write_output_data */ - void *output_buffer_context; - - int packed_format; - int chroma_interleave; - - int rotationEnable; - int mirrorEnable; - int mirrorDirection; - int rotationAngle; -} -BmJpuJPEGEncParams; - - -typedef struct _BmJpuJPEGEncoder BmJpuJPEGEncoder; - -/* Opens a new JPU JPEG encoder instance. - * - * Internally, this function calls bm_jpu_enc_load(). - * - * If dma_buffer_allocator is NULL, the default encoder allocator is used. - */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_jpeg_enc_open(BmJpuJPEGEncoder **jpeg_encoder, - int bs_buffer_size, - int device_index); - -/* Closes a JPEG encoder instance. Trying to close the same instance multiple times results in undefined behavior. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_jpeg_enc_close(BmJpuJPEGEncoder *jpeg_encoder); - -/* Encodes a raw input frame. - * - * params must be filled with valid values; frame width and height must not be zero. - * framebuffer contains the raw input pixels to encode. Its stride and offset values - * must be valid, and its dma_buffer pointer must point to a DMA buffer that contains - * the pixel data. - * - * During encoding, the encoder will call params->acquire_output_buffer() to acquire - * an output buffer and put encoded JPEG data into. Once encoding is done, the - * params->finish_output_buffer() function is called. This is *not* to be confused with - * a memory deallocation function; it is instead typically used to notify the caller - * that the encoder won't touch the acquired buffer's contents anymore. It is guaranteed - * that finish_output_buffer() is called if acquire_output_buffer() was called earlier. - * - * If acquired_handle is non-NULL, then the poiner it refers to will be set to the handle - * produced by acquire_output_buffer(), even if bm_jpu_jpeg_enc_encode() exits with an - * error (unless said error occurred *before* the acquire_output_buffer() call, in which - * case *acquired_handle will be set to NULL). If output_buffer_size is non-NULL, the - * size value it points to will be set to the number of bytes of the encoded JPEG data. - * - * The JPU encoder only produces baseline JPEG data. Progressive encoding is not supported. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_jpeg_enc_encode(BmJpuJPEGEncoder *jpeg_encoder, - BmJpuFramebuffer const *framebuffer, - BmJpuJPEGEncParams const *params, - void **acquired_handle, - size_t *output_buffer_size); - -DECL_EXPORT int bm_jpu_jpeg_get_dump(void); - -#ifdef __cplusplus -} -#endif - - -#endif diff --git a/bmvid/jpeg/binary/pcie_arm64/include/jpu_lib.h b/bmvid/jpeg/binary/pcie_arm64/include/jpu_lib.h index ff001f1..b8c5a53 100644 --- a/bmvid/jpeg/binary/pcie_arm64/include/jpu_lib.h +++ b/bmvid/jpeg/binary/pcie_arm64/include/jpu_lib.h @@ -9,6 +9,9 @@ #define DC_TABLE_INDEX1 2 #define AC_TABLE_INDEX1 3 +#ifndef BOOL +typedef int BOOL; +#endif //------------------------------------------------------------------------------ // common struct and definition @@ -60,7 +63,8 @@ typedef enum { JPG_RET_INVALID_STRIDE, JPG_RET_WRONG_CALL_SEQUENCE, JPG_RET_CALLED_BEFORE, - JPG_RET_NOT_INITIALIZED + JPG_RET_NOT_INITIALIZED, + JPG_RET_BS_BUFFER_FULL } JpgRet; typedef enum { @@ -284,10 +288,12 @@ DECL_EXPORT int jpu_DecOpen(DecHandle *, DecOpenParam *); DECL_EXPORT int jpu_DecClose(DecHandle); DECL_EXPORT int jpu_DecGetInitialInfo(DecHandle handle, DecInitialInfo * info); +DECL_EXPORT int jpu_DecSetResolutionInfo(DecHandle handle, int width, int height); DECL_EXPORT int jpu_DecRegisterFrameBuffer(DecHandle handle, FrameBuffer * bufArray, int num, int stride, void* par0); DECL_EXPORT int jpu_DecUpdateBitstreamBuffer(DecHandle handle, uint32_t size); +DECL_EXPORT int jpu_DecSetRdPtrEx(DecHandle handle, PhysicalAddress addr, BOOL updateWrPtr); DECL_EXPORT int jpu_DecSetBsPtr(DecHandle handle, uint8_t *data, int data_size); DECL_EXPORT int jpu_DecStartOneFrame(DecHandle handle, DecParam * param); @@ -297,6 +303,7 @@ DECL_EXPORT int jpu_DecGiveCommand(DecHandle handle, CodecCommand cmd, void *par DECL_EXPORT int jpu_DecWaitForInt(DecHandle handle, int timeout_in_ms, int timeout_counts); DECL_EXPORT int jpu_GetDump(); +DECL_EXPORT int jpu_HWReset(); DECL_EXPORT int vpp_Init(int32_t device_index); #endif /* __BM_JPU_LIB_H__ */ diff --git a/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpuapi.so.0 b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpuapi.so.0 index 8d5e580..254e318 120000 --- a/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpuapi.so.0 +++ b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpuapi.so.0 @@ -1 +1 @@ -libbmjpuapi.so.0.7.0 \ No newline at end of file +libbmjpuapi.so.0.11.0 \ No newline at end of file diff --git a/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpuapi.so.0.10.0 b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpuapi.so.0.10.0 new file mode 100755 index 0000000..aa8799e Binary files /dev/null and b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpuapi.so.0.10.0 differ diff --git a/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpuapi.so.0.11.0 b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpuapi.so.0.11.0 new file mode 100755 index 0000000..a632ff8 Binary files /dev/null and b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpuapi.so.0.11.0 differ diff --git a/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpuapi.so.0.7.0 b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpuapi.so.0.7.0 deleted file mode 100755 index 5558d1d..0000000 Binary files a/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpuapi.so.0.7.0 and /dev/null differ diff --git a/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.a b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.a new file mode 100755 index 0000000..7b9be0a Binary files /dev/null and b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.a differ diff --git a/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.so.0 b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.so.0 index 21f1a70..7e37060 120000 --- a/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.so.0 +++ b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.so.0 @@ -1 +1 @@ -libbmjpulite.so.0.7.0 \ No newline at end of file +libbmjpulite.so.0.11.0 \ No newline at end of file diff --git a/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.so.0.10.0 b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.so.0.10.0 new file mode 100755 index 0000000..1e61c9c Binary files /dev/null and b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.so.0.10.0 differ diff --git a/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.so.0.11.0 b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.so.0.11.0 new file mode 100755 index 0000000..d41cb1b Binary files /dev/null and b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.so.0.11.0 differ diff --git a/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.so.0.7.0 b/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.so.0.7.0 deleted file mode 100755 index de293be..0000000 Binary files a/bmvid/jpeg/binary/pcie_arm64/lib/libbmjpulite.so.0.7.0 and /dev/null differ diff --git a/bmvid/jpeg/binary/pcie_riscv64/bin/bmjpegdec b/bmvid/jpeg/binary/pcie_riscv64/bin/bmjpegdec index 9085bf6..298bd8f 100755 Binary files a/bmvid/jpeg/binary/pcie_riscv64/bin/bmjpegdec and b/bmvid/jpeg/binary/pcie_riscv64/bin/bmjpegdec differ diff --git a/bmvid/jpeg/binary/pcie_riscv64/bin/bmjpegenc b/bmvid/jpeg/binary/pcie_riscv64/bin/bmjpegenc index 9543826..6f947ce 100755 Binary files a/bmvid/jpeg/binary/pcie_riscv64/bin/bmjpegenc and b/bmvid/jpeg/binary/pcie_riscv64/bin/bmjpegenc differ diff --git a/bmvid/jpeg/binary/pcie_riscv64/bin/bmjpegmulti b/bmvid/jpeg/binary/pcie_riscv64/bin/bmjpegmulti index cee5b32..5d44f6f 100755 Binary files a/bmvid/jpeg/binary/pcie_riscv64/bin/bmjpegmulti and b/bmvid/jpeg/binary/pcie_riscv64/bin/bmjpegmulti differ diff --git a/bmvid/jpeg/binary/pcie_riscv64/include/jpu_lib.h b/bmvid/jpeg/binary/pcie_riscv64/include/jpu_lib.h index ff001f1..b8c5a53 100644 --- a/bmvid/jpeg/binary/pcie_riscv64/include/jpu_lib.h +++ b/bmvid/jpeg/binary/pcie_riscv64/include/jpu_lib.h @@ -9,6 +9,9 @@ #define DC_TABLE_INDEX1 2 #define AC_TABLE_INDEX1 3 +#ifndef BOOL +typedef int BOOL; +#endif //------------------------------------------------------------------------------ // common struct and definition @@ -60,7 +63,8 @@ typedef enum { JPG_RET_INVALID_STRIDE, JPG_RET_WRONG_CALL_SEQUENCE, JPG_RET_CALLED_BEFORE, - JPG_RET_NOT_INITIALIZED + JPG_RET_NOT_INITIALIZED, + JPG_RET_BS_BUFFER_FULL } JpgRet; typedef enum { @@ -284,10 +288,12 @@ DECL_EXPORT int jpu_DecOpen(DecHandle *, DecOpenParam *); DECL_EXPORT int jpu_DecClose(DecHandle); DECL_EXPORT int jpu_DecGetInitialInfo(DecHandle handle, DecInitialInfo * info); +DECL_EXPORT int jpu_DecSetResolutionInfo(DecHandle handle, int width, int height); DECL_EXPORT int jpu_DecRegisterFrameBuffer(DecHandle handle, FrameBuffer * bufArray, int num, int stride, void* par0); DECL_EXPORT int jpu_DecUpdateBitstreamBuffer(DecHandle handle, uint32_t size); +DECL_EXPORT int jpu_DecSetRdPtrEx(DecHandle handle, PhysicalAddress addr, BOOL updateWrPtr); DECL_EXPORT int jpu_DecSetBsPtr(DecHandle handle, uint8_t *data, int data_size); DECL_EXPORT int jpu_DecStartOneFrame(DecHandle handle, DecParam * param); @@ -297,6 +303,7 @@ DECL_EXPORT int jpu_DecGiveCommand(DecHandle handle, CodecCommand cmd, void *par DECL_EXPORT int jpu_DecWaitForInt(DecHandle handle, int timeout_in_ms, int timeout_counts); DECL_EXPORT int jpu_GetDump(); +DECL_EXPORT int jpu_HWReset(); DECL_EXPORT int vpp_Init(int32_t device_index); #endif /* __BM_JPU_LIB_H__ */ diff --git a/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpuapi.so.0 b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpuapi.so.0 index 8d5e580..254e318 120000 --- a/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpuapi.so.0 +++ b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpuapi.so.0 @@ -1 +1 @@ -libbmjpuapi.so.0.7.0 \ No newline at end of file +libbmjpuapi.so.0.11.0 \ No newline at end of file diff --git a/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpuapi.so.0.10.0 b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpuapi.so.0.10.0 new file mode 100755 index 0000000..e0ee844 Binary files /dev/null and b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpuapi.so.0.10.0 differ diff --git a/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpuapi.so.0.11.0 b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpuapi.so.0.11.0 new file mode 100755 index 0000000..711172d Binary files /dev/null and b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpuapi.so.0.11.0 differ diff --git a/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpuapi.so.0.7.0 b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpuapi.so.0.7.0 deleted file mode 100755 index 575ff1f..0000000 Binary files a/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpuapi.so.0.7.0 and /dev/null differ diff --git a/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.a b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.a new file mode 100755 index 0000000..82f966f Binary files /dev/null and b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.a differ diff --git a/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.so.0 b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.so.0 index 21f1a70..7e37060 120000 --- a/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.so.0 +++ b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.so.0 @@ -1 +1 @@ -libbmjpulite.so.0.7.0 \ No newline at end of file +libbmjpulite.so.0.11.0 \ No newline at end of file diff --git a/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.so.0.10.0 b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.so.0.10.0 new file mode 100755 index 0000000..6a43649 Binary files /dev/null and b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.so.0.10.0 differ diff --git a/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.so.0.11.0 b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.so.0.11.0 new file mode 100755 index 0000000..1349de2 Binary files /dev/null and b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.so.0.11.0 differ diff --git a/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.so.0.7.0 b/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.so.0.7.0 deleted file mode 100755 index 76edf50..0000000 Binary files a/bmvid/jpeg/binary/pcie_riscv64/lib/libbmjpulite.so.0.7.0 and /dev/null differ diff --git a/bmvid/jpeg/binary/soc/bin/bmjpegdec b/bmvid/jpeg/binary/soc/bin/bmjpegdec index 3ba93b8..ae194c7 100755 Binary files a/bmvid/jpeg/binary/soc/bin/bmjpegdec and b/bmvid/jpeg/binary/soc/bin/bmjpegdec differ diff --git a/bmvid/jpeg/binary/soc/bin/bmjpegdec_seq b/bmvid/jpeg/binary/soc/bin/bmjpegdec_seq new file mode 100755 index 0000000..8ce97f9 Binary files /dev/null and b/bmvid/jpeg/binary/soc/bin/bmjpegdec_seq differ diff --git a/bmvid/jpeg/binary/soc/bin/bmjpegenc b/bmvid/jpeg/binary/soc/bin/bmjpegenc index 712ae02..2222535 100755 Binary files a/bmvid/jpeg/binary/soc/bin/bmjpegenc and b/bmvid/jpeg/binary/soc/bin/bmjpegenc differ diff --git a/bmvid/jpeg/binary/soc/bin/bmjpegenc_seq b/bmvid/jpeg/binary/soc/bin/bmjpegenc_seq new file mode 100755 index 0000000..1968ccf Binary files /dev/null and b/bmvid/jpeg/binary/soc/bin/bmjpegenc_seq differ diff --git a/bmvid/jpeg/binary/soc/bin/bmjpegmulti b/bmvid/jpeg/binary/soc/bin/bmjpegmulti index 903bcc3..34400b4 100755 Binary files a/bmvid/jpeg/binary/soc/bin/bmjpegmulti and b/bmvid/jpeg/binary/soc/bin/bmjpegmulti differ diff --git a/bmvid/jpeg/binary/soc/include/bmjpuapi.h b/bmvid/jpeg/binary/soc/include/bmjpuapi.h deleted file mode 100644 index 73c1bc3..0000000 --- a/bmvid/jpeg/binary/soc/include/bmjpuapi.h +++ /dev/null @@ -1,1070 +0,0 @@ -#ifndef BMJPUAPI_H -#define BMJPUAPI_H - -#include -#include -#include -#include "bmlib_runtime.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -#if !defined DECL_EXPORT -#ifdef _WIN32 - #define DECL_EXPORT __declspec(dllexport) -#else - #define DECL_EXPORT -#endif -#endif - - -/* This library provides a high-level interface for controlling the BitMain JPU en/decoder. - * - * Note that the functions are _not_ thread safe. If they may be called from - * different threads, you must make sure they are surrounded by a mutex lock. - * It is recommended to use one global mutex for the bm_jpu_*_load()/unload() - * functions, and another de/encoder instance specific mutex for all of the other - * calls. */ - - - - -/**************************************************/ -/******* ALLOCATOR STRUCTURES AND FUNCTIONS *******/ -/**************************************************/ - - -/* Format and for printf-compatible format-strings - * example use: printf("physical address: %" BM_JPU_PHYS_ADDR_FORMAT, phys_addr */ -#define BM_JPU_PHYS_ADDR_FORMAT "#lx" -/* Typedef for physical addresses */ -typedef unsigned long long bm_jpu_phys_addr_t; - -/* BmJpuAllocationFlags: flags for the BmJpuDMABufferAllocator's allocate vfunc */ -typedef enum -{ - BM_JPU_ALLOCATION_FLAG_CACHED = 0, - BM_JPU_ALLOCATION_FLAG_WRITECOMBINE = 1, - BM_JPU_ALLOCATION_FLAG_UNCACHED = 2 -}BmJpuAllocationFlags; - -typedef enum -{ - BM_ION_FLAG_HEAP_VPP = 0, - BM_ION_FLAG_HEAP_NPU = 1, - BM_ION_FLAG_HEAP_VPU = 2 -}BmJpuIonHeapFlags; - -#define BM_JPU_ALLOCATION_FLAG_DEFAULT ((BM_ION_FLAG_HEAP_VPP << 4) | BM_JPU_ALLOCATION_FLAG_CACHED) -#define BM_JPU_ALLOCATION_FLAG_VPP_CACHED BM_JPU_ALLOCATION_FLAG_DEFAULT -#define BM_JPU_ALLOCATION_FLAG_JPU_CACHED ((BM_ION_FLAG_HEAP_VPU << 4) | BM_JPU_ALLOCATION_FLAG_CACHED) -#define BM_JPU_ALLOCATION_FLAG_NPU_CACHED ((BM_ION_FLAG_HEAP_NPU << 4) | BM_JPU_ALLOCATION_FLAG_CACHED) -#define BM_JPU_ALLOCATION_FLAG_VPP_WRITECOMBINE ((BM_ION_FLAG_HEAP_VPP << 4) | BM_JPU_ALLOCATION_FLAG_WRITECOMBINE) -/* BmJpuMappingFlags: flags for the BmJpuDMABufferAllocator's map vfuncs - * These flags can be bitwise-OR combined, although READ and WRITE cannot - * both be set */ -typedef enum -{ - /* Map memory for CPU write access */ - BM_JPU_MAPPING_FLAG_WRITE = (1UL << 0), - /* Map memory for CPU read access */ - BM_JPU_MAPPING_FLAG_READ = (1UL << 1) - /* XXX: When adding extra flags here, follow the pattern: BM_JPU_MAPPING_FLAG_ = (1UL << ) */ -} -BmJpuMappingFlags; - - - -/* Heap allocation function for virtual memory blocks internally allocated by bmjpuapi. - * These have nothing to do with the DMA buffer allocation interface defined above. - * By default, malloc/free are used. */ -typedef void* (*BmJpuHeapAllocFunc)(size_t const size, void *context, char const *file, int const line, char const *fn); -typedef void (*BmJpuHeapFreeFunc)(void *memblock, size_t const size, void *context, char const *file, int const line, char const *fn); - -/* This function allows for setting custom heap allocators, which are used to create internal heap blocks. - * The heap allocator referred to by "heap_alloc_fn" must return NULL if allocation fails. - * "context" is a user-defined value, which is passed on unchanged to the allocator functions. - * Calling this function with either "heap_alloc_fn" or "heap_free_fn" set to NULL resets the internal - * pointers to use malloc and free (the default allocators). */ -DECL_EXPORT void bm_jpu_set_heap_allocator_functions(BmJpuHeapAllocFunc heap_alloc_fn, BmJpuHeapFreeFunc heap_free_fn, void *context); - - - - -/***********************/ -/******* LOGGING *******/ -/***********************/ - - -/* Log levels. */ -typedef enum -{ - BM_JPU_LOG_LEVEL_ERROR = 0, - BM_JPU_LOG_LEVEL_WARNING = 1, - BM_JPU_LOG_LEVEL_INFO = 2, - BM_JPU_LOG_LEVEL_DEBUG = 3, - BM_JPU_LOG_LEVEL_LOG = 4, - BM_JPU_LOG_LEVEL_TRACE = 5 -} -BmJpuLogLevel; - -/* Function pointer type for logging functions. - * - * This function is invoked by BM_JPU_LOG() macro calls. This macro also passes the name - * of the source file, the line in that file, and the function name where the logging occurs - * to the logging function (over the file, line, and fn arguments, respectively). - * Together with the log level, custom logging functions can output this metadata, or use - * it for log filtering etc.*/ -typedef void (*BmJpuLoggingFunc)(BmJpuLogLevel level, char const *file, int const line, char const *fn, const char *format, ...); - -/* Defines the threshold for logging. Logs with lower priority are discarded. - * By default, the threshold is set to BM_JPU_LOG_LEVEL_INFO. */ -DECL_EXPORT void bm_jpu_set_logging_threshold(BmJpuLogLevel threshold); - -/* Defines a custom logging function. - * If logging_fn is NULL, logging is disabled. This is the default value. */ -DECL_EXPORT void bm_jpu_set_logging_function(BmJpuLoggingFunc logging_fn); - - - - -/******************************************************/ -/******* MISCELLANEOUS STRUCTURES AND FUNCTIONS *******/ -/******************************************************/ -typedef enum -{ - /* planar 4:2:0; if the chroma_interleave parameter is 1, the corresponding format is NV12, otherwise it is I420 */ - BM_JPU_COLOR_FORMAT_YUV420 = 0, - /* planar 4:2:2; if the chroma_interleave parameter is 1, the corresponding format is NV16 */ - BM_JPU_COLOR_FORMAT_YUV422_HORIZONTAL = 1, - /* 4:2:2 vertical, actually 2:2:4 (according to the JPU docs); no corresponding format known for the chroma_interleave=1 case */ - /* NOTE: this format is rarely used, and has only been seen in a few JPEG files */ - BM_JPU_COLOR_FORMAT_YUV422_VERTICAL = 2, - /* planar 4:4:4; if the chroma_interleave parameter is 1, the corresponding format is NV24 */ - BM_JPU_COLOR_FORMAT_YUV444 = 3, - /* 8-bit greayscale */ - BM_JPU_COLOR_FORMAT_YUV400 = 4, - /* RGBP */ - BM_JPU_COLOR_FORMAT_RGB = 5 -} -BmJpuColorFormat; - - -/* Framebuffers are frame containers, and are used both for en- and decoding. */ -typedef struct -{ - /* Stride of the Y and of the Cb&Cr components. - * Specified in bytes. */ - unsigned int y_stride; - unsigned int cbcr_stride; - - /* DMA buffer which contains the pixels. */ - bm_device_mem_t *dma_buffer; - - /* These define the starting offsets of each component - * relative to the start of the buffer. Specified in bytes. - */ - size_t y_offset; - size_t cb_offset; - size_t cr_offset; - - /* User-defined pointer. The library does not touch this value. - * Not to be confused with the context fields of BmJpuEncodedFrame - * and BmJpuRawFrame. - * This can be used for example to identify which framebuffer out of - * the initially allocated pool was used by the JPU to contain a frame. - */ - void *context; - - /* Set to 1 if the framebuffer was already marked as displayed. This is for - * internal use only. Not to be read or written from the outside. */ - int already_marked; - - /* Internal, implementation-defined data. Do not modify. */ - void *internal; -} -BmJpuFramebuffer; - - -/* Structure containing details about encoded frames. */ -typedef struct -{ - /* When decoding, data must point to the memory block which contains - * encoded frame data that gets consumed by the JPU. Not used by - * the encoder. */ - uint8_t *data; - - /* Size of the encoded data, in bytes. When decoding, this is set by - * the user, and is the size of the encoded data that is pointed to - * by data. When encoding, the encoder sets this to the size of the - * acquired output block, in bytes (exactly the same value as the - * acquire_output_buffer's size argument). */ - size_t data_size; - - /* Handle produced by the user-defined acquire_output_buffer function - * during encoding. Not used by the decoder. */ - void *acquired_handle; - - /* User-defined pointer. The library does not touch this value. - * This pointer and the one from the corresponding raw frame will have - * the same value. The library will pass then through. - * It can be used to identify which raw frame is associated with this - * encoded frame for example. */ - void *context; - - /* User-defined timestamps. These are here for convenience. In many - * cases, the context one wants to associate with raw/encoded frames - * is a PTS-DTS pair. If only the context pointer were available, users - * would have to create a separate data structure containing PTS & DTS - * values for each context. Since this use case is common, these two - * fields are added to the frame structure. Just like the context - * pointer, the library just passes them through to the associated - * raw frame, and does not actually touch their values. It is also - * perfectly OK to not use them, and just use the context pointer - * instead, or vice versa. */ - uint64_t pts, dts; -} -BmJpuEncodedFrame; - - -/* Structure containing details about raw, uncompressed frames. */ -typedef struct -{ - /* When decoding: pointer to the framebuffer containing the decoded raw frame. - * When encoding: pointer to the framebuffer containing the raw frame to encode. */ - BmJpuFramebuffer *framebuffer; - - /* User-defined pointer. The library does not touch this value. - * This pointer and the one from the corresponding encoded frame will have - * the same value. The library will pass then through. - * It can be used to identify which raw frame is associated with this - * encoded frame for example. */ - void *context; - - /* User-defined timestamps. These are here for convenience. In many - * cases, the context one wants to associate with raw/encoded frames - * is a PTS-DTS pair. If only the context pointer were available, users - * would have to create a separate data structure containing PTS & DTS - * values for each context. Since this use case is common, these two - * fields are added to the frame structure. Just like the context - * pointer, the library just passes them through to the associated - * encoded frame, and does not actually touch their values. It is also - * perfectly OK to not use them, and just use the context pointer - * instead, or vice versa. */ - uint64_t pts, dts; -} -BmJpuRawFrame; - - -/* Structure used together with bm_jpu_calc_framebuffer_sizes() */ -typedef struct -{ - /* Frame width and height, aligned to the 16-pixel boundary required by the JPU. */ - unsigned int aligned_frame_width, aligned_frame_height; - - /* Stride sizes, in bytes, with alignment applied. The Cb and Cr planes always - * use the same stride, so they share the same value. */ - unsigned int y_stride, cbcr_stride; - - /* Required DMA memory size for the Y,Cb,Cr planes in bytes. - * The Cb and Cr planes always are of the same size, so they share the same value. */ - unsigned int y_size, cbcr_size; - - /* Total required size of a framebuffer's DMA buffer, in bytes. This value includes - * the sizes of all planes, and extra bytes for alignment and padding. - * This value must be used when allocating DMA buffers for decoder framebuffers. */ - unsigned int total_size; - - /* This corresponds to the other chroma_interleave values used in bmjpuapi. - * It is stored here to allow other functions to select the correct offsets. */ - int chroma_interleave; -} -BmJpuFramebufferSizes; - - - - -/************************************************/ -/******* DECODER STRUCTURES AND FUNCTIONS *******/ -/************************************************/ - - -/* How to use the decoder (error handling omitted for clarity): - * - * Global initialization / shutdown is done by calling bm_jpu_dec_load() and - * bm_jpu_dec_unload() respectively. These functions contain a reference counter, - * so bm_jpu_dec_unload() must be called as many times as bm_jpu_dec_load() was, - * or else it will not unload. Do not try to create a decoder before calling - * bm_jpu_dec_load(), as this function loads the JPU firmware. Likewise, the - * bm_jpu_dec_unload() function unloads the firmware. This firmware (un)loading - * affects the entire process, not just the current thread. - * - * Typically, loading/unloading is done in two ways: - * (1) bm_dec_jpu_load() gets called in the startup phase of the process, and - * bm_jpu_dec_unload() in the shutdown phase. - * (2) bm_dec_jpu_load() gets called every time before a decoder is to be created, - * and bm_jpu_dec_unload() every time after a decoder was shut down. - * - * Both methods are fine; however, for (2), it is important to keep in mind that - * the bm_jpu_dec_load() / bm_jpu_dec_unload() functions are *not* thread safe, - * so surround their calls with mutex locks. - * - * How to create, use, and shutdown a decoder: - * 1. Call bm_jpu_dec_get_bitstream_buffer_info(), and allocate a DMA buffer - * with the given size and alignment. This is the minimum required size. - * The buffer can be larger, but must not be smaller than the given size. - * 2. Fill an instance of BmJpuDecOpenParams with the values specific to the - * input data. Check the documentation of BmJpuDecOpenParams for details - * about its fields. - * 3. Call bm_jpu_dec_open(), passing in a pointer to the filled BmJpuDecOpenParams - * instance, the bitstream DMA buffer which was allocated in step 1, a callback - * of type bm_jpu_dec_new_initial_info_callback, and a user defined pointer - * that is passed to the callback (if not needed, just set it to NULL). - * 4. Call bm_jpu_dec_decode(), and push data to it. Once initial information about - * the bitstream becomes available, the callback from step 3 is invoked. - * 5. Inside the callback, the new initial info is available. The new_initial_info pointer - * is never NULL. In this callback, framebuffers are allocated and registered, as - * explained in the next steps. Steps 7-9 are performed inside the callback. - * 6. (Optional) Perform the necessary size and alignment calculations by calling - * bm_jpu_calc_framebuffer_sizes(). Pass in either the frame width & height from - * BmJpuDecInitialInfo , or some explicit values that were determined externally. - * (The width & height do not have to be aligned; the function does this automatically.) - * 7. Create an array of at least as many BmJpuFramebuffer instances as specified in - * min_num_required_framebuffers. Each instance must point to a DMA buffer that is big - * enough to hold a raw decoded frame. If step 7 was performed, allocating as many bytes - * as indicated by total_size is enough. Make sure the Y,Cb,Cr offsets in each - * BmJpuFramebuffer instance are valid. Using the bm_jpu_fill_framebuffer_params() - * convenience function for this is strongly recommended. - * 8. Call bm_jpu_dec_register_framebuffers() and pass in the BmJpuFramebuffer array - * and the number of BmJpuFramebuffer instances. - * Note that this call does _not_ copy the framebuffer array, it just stores the pointer - * to it internally, so make sure the array is valid until the decoder is closed! - * This should be the last action in the callback. - * 9. Continue calling bm_jpu_dec_decode(). Make sure the input data is not NULL. - * If the BM_JPU_DEC_OUTPUT_CODE_DECODED_FRAME_AVAILABLE flag is set in the output code, - * call bm_jpu_dec_get_decoded_frame() with a pointer to an BmJpuRawFrame instance. - * The instance will get filled by the function with information about the decoded frame. - * Once the decoded frame has been processed by the user, it is important to call - * bm_jpu_dec_mark_framebuffer_as_displayed() to let the decoder know that the - * framebuffer is available for storing new decoded frames again. - * If BM_JPU_DEC_OUTPUT_CODE_EOS is set, or if bm_jpu_dec_decode() returns a value other - * than BM_JPU_DEC_RETURN_CODE_OK, stop playback and close the decoder. - * 10. In case a flush/reset is desired (typically after seeking), call bm_jpu_dec_flush(). - * Note that any internal context/PTS/DTS values from the encoded and raw frames will be thrown - * away after this call; if for example the context is an index, the system that hands - * out the indices should be informed that any previously handed out index is now unused. - * 11. After playback is finished, close the decoder with bm_jpu_dec_close(). - * 12. Deallocate framebuffer memory blocks and the bitstream buffer memory block. - * - * In situations where decoding and display of decoded frames happen in different threads, it - * is necessary to wait until decoding is possible. bm_jpu_dec_check_if_can_decode() is used - * for this purpose. This needs to be done in steps 5 and 10. Typically this is done by using - * a thread condition variable. Example pseudo code: - * - * mutex_lock(&mutex); - * - * while (dec_initialized && !bm_jpu_dec_check_if_can_decode(decode) && !abort_waiting) - * condition_wait(&condition_variable, &mutex); - * - * if (!abort_waiting) - * bm_jpu_dec_decode(decoder, encoded_frame, &output_code); - * ... - * - * mutex_unlock(&mutex); - * - * (abort_waiting would be a flag that gets raised when something from the outside signals - * that waiting and decoding needs to be shut down now, for example because the user wants - * to close the player, or because the user pressed Ctrl+C. dec_initialized would be a flag - * that is initially cleared, and raised in the initial info callback; it is pointless to - * call bm_jpu_dec_check_if_can_decode() before the callback was executed.) - * - * If any video sequence parameters (like frame width and height) in the input data change, - * the output code from bm_jpu_dec_decode() calls in step 10 will contain the - * BM_JPU_DEC_OUTPUT_CODE_VIDEO_PARAMS_CHANGED flag. (This will never happen in step 5.) - * When this occurs, decoding cannot continue, because the registered framebuffers are - * of an incorrect size, and because the decoder's configuration is set up for the previous - * parameters. Therefore, in this case, first, the decoder has to be drained of decoded- - * but-not-yet-displayed frames like in step 12, then, it has to be closed, and opened - * again. The BmJpuDecOpenParams structure that is then passed to the bm_jpu_dec_open() - * call should have its frame_width and frame_height values set to 0 to ensure the - * new sequence parameters are properly used. Then, the data that was fed into the - * bm_jpu_dec_decode() call that set the BM_JPU_DEC_OUTPUT_CODE_VIDEO_PARAMS_CHANGED flag - * has to be fed again to bm_jpu_dec_decode(). The initial info callback from - * bm_jpu_dec_open() will again be called, and decoding continues as usual. - * - * It is also recommended to make sure that framebuffers and associated DMA buffers that - * were allocated before the video sequence parameter change be deallocated in the - * initial callback to avoid memory leaks. - * - * However, if the environment is a framework like GStreamer or libav/FFmpeg, it is likely - * this will never have to be done, since these have their own parsers that detect parameter - * changes and initiate reinitializations. - */ - - -/* Opaque decoder structure. */ -typedef struct _BmJpuDecoder BmJpuDecoder; - - -/* Decoder return codes. With the exception of BM_JPU_DEC_RETURN_CODE_OK, these - * should be considered hard errors, and the decoder should be closed when they - * are returned. */ -typedef enum -{ - /* Operation finished successfully. */ - BM_JPU_DEC_RETURN_CODE_OK = 0, - /* General return code for when an error occurs. This is used as a catch-all - * for when the other error return codes do not match the error. */ - BM_JPU_DEC_RETURN_CODE_ERROR, - /* Input parameters were invalid. */ - BM_JPU_DEC_RETURN_CODE_INVALID_PARAMS, - /* JPU decoder handle is invalid. This is an internal error, and most likely - * a bug in the library. Please report such errors. */ - BM_JPU_DEC_RETURN_CODE_INVALID_HANDLE, - /* Framebuffer information is invalid. Typically happens when the BmJpuFramebuffer - * structures that get passed to bm_jpu_dec_register_framebuffers() contain - * invalid values. */ - BM_JPU_DEC_RETURN_CODE_INVALID_FRAMEBUFFER, - /* Registering framebuffers for decoding failed because not enough framebuffers - * were given to the bm_jpu_dec_register_framebuffers() function. */ - BM_JPU_DEC_RETURN_CODE_INSUFFICIENT_FRAMEBUFFERS, - /* A stride value (for example one of the stride values of a framebuffer) is invalid. */ - BM_JPU_DEC_RETURN_CODE_INVALID_STRIDE, - /* A function was called at an inappropriate time (for example, when - * bm_jpu_dec_register_framebuffers() is called before a single byte of input data - * was passed to bm_jpu_dec_decode() ). */ - BM_JPU_DEC_RETURN_CODE_WRONG_CALL_SEQUENCE, - /* The operation timed out. */ - BM_JPU_DEC_RETURN_CODE_TIMEOUT, - /* A function that should only be called once for the duration of the decoding - * session was called again. One example is bm_jpu_dec_register_framebuffers(). */ - BM_JPU_DEC_RETURN_CODE_ALREADY_CALLED, - /* Allocation memory failure */ - BM_JPU_DEC_RETURN_ALLOC_MEM_ERROR -} -BmJpuDecReturnCodes; - - -/* Decoder output codes. These can be bitwise OR combined, so check - * for their presence in the output_codes bitmask returned by - * bm_jpu_dec_decode() by using a bitwise AND. */ -typedef enum -{ - /* Input data was used. If this code is present, the input data - * that was given to the bm_jpu_dec_decode() must not be given - * to a following bm_jpu_dec_decode() call; instead, new data - * should be loaded. If this code is not present, then the decoder - * didn't use it yet, so give it to the decoder again until this - * code is set or an error is returned. - * NOTE: this flag is obsolete. It used to mean something with the - * fslwrapper backend; however, with the jpulib backend, it will - * always use the input unless an error occurs or EOS is signaled - * in drain mode. */ - BM_JPU_DEC_OUTPUT_CODE_INPUT_USED = (1UL << 0), - /* EOS was reached; no more unfinished frames are queued internally. - * This can be reached by bitstreams with no frame delay. - */ - BM_JPU_DEC_OUTPUT_CODE_EOS = (1UL << 1), - /* A fully decoded frame is now available, and can be retrieved - * by calling bm_jpu_dec_get_decoded_frame(). */ - BM_JPU_DEC_OUTPUT_CODE_DECODED_FRAME_AVAILABLE = (1UL << 2), - - /* There aren't enough free framebuffers available for decoding. - * This usually happens when bm_jpu_dec_mark_framebuffer_as_displayed() - * wasn't called before bm_jpu_dec_decode(), which can occur in - * multithreaded environments. bm_jpu_dec_check_if_can_decode() is useful - * to avoid this. Also see the guide above for more. */ - BM_JPU_DEC_OUTPUT_CODE_NOT_ENOUGH_OUTPUT_FRAMES = (1UL << 3), - /* Input data for a frame is incomplete. No decoded frame will - * be available until the input frame's data has been fully and - * correctly delivered. */ - BM_JPU_DEC_OUTPUT_CODE_NOT_ENOUGH_INPUT_DATA = (1UL << 4), - /* The JPU detected a change in the video sequence parameters - * (like frame width and height). Decoding cannot continue. See the - * explanation in the step-by-step guide above for what steps to take - * if this output code is set. Note that this refers to detected - * changes in the *input data*, not to the decoded frames. This means - * that this flag is set immediately when input data with param changes - * is fed to the decoder, even if this is for example a h.264 high - * profile stream with lots of frame reordering and frame delays. */ - BM_JPU_DEC_OUTPUT_CODE_VIDEO_PARAMS_CHANGED = (1UL << 5) -} -BmJpuDecOutputCodes; - - -/* Structure used together with bm_jpu_dec_open() */ -typedef struct -{ - /* These are necessary with some formats which do not store the width - * and height in the bitstream. If the format does store them, these - * values can be set to zero. */ - unsigned int frame_width; - unsigned int frame_height; - - /* If this is 1, then Cb and Cr are interleaved in one shared chroma - * plane, otherwise they are separated in their own planes. - * See the BmJpuColorFormat documentation for the consequences of this. */ - int chroma_interleave; - - /* 0: no scaling; n(1-3): scale by 2^n; */ - unsigned int scale_ratio; - - /* The DMA buffer size for bitstream */ - int bs_buffer_size; -#ifdef _WIN32 - uint8_t *buffer; -#else - uint8_t *buffer __attribute__((deprecated)); -#endif - - int device_index; - - int rotationEnable; - int mirrorEnable; - int mirrorDirection; - int rotationAngle; - - int roiEnable; - int roiWidth; - int roiHeight; - int roiOffsetX; - int roiOffsetY; -} -BmJpuDecOpenParams; - - -/* Structure used together with bm_jpu_dec_new_initial_info_callback() . - * The values are filled by the decoder. */ -typedef struct -{ - /* Width of height of frames, in pixels. Note: it is not guaranteed that - * these values are aligned to a 16-pixel boundary (which is required - * for JPU framebuffers). These are the width and height of the frame - * with actual pixel content. It may be a subset of the total frame, - * in case these sizes need to be aligned. In that case, there are - * padding columns to the right, and padding rows below the frames. */ - unsigned int frame_width, frame_height; - - /* Caller must register at least this many framebuffers - * with the decoder. */ - unsigned int min_num_required_framebuffers; - - /* Color format of the decoded frames. */ - BmJpuColorFormat color_format; - - int chroma_interleave; - - /* Physical framebuffer addresses must be aligned to this value. */ - unsigned int framebuffer_alignment; - - int roiFrameWidth; - int roiFrameHeight; - } -BmJpuDecInitialInfo; - -/* Convenience function which calculates various sizes out of the given width & height and color format. - * The results are stored in "calculated_sizes". The given frame width and height will be aligned if - * they aren't already, and the aligned value will be stored in calculated_sizes. Width & height must be - * nonzero. The calculated_sizes pointer must also be non-NULL. framebuffer_alignment is an alignment - * value for the sizes of the Y/U/V planes. 0 or 1 mean no alignment. uses_interlacing is set to 1 - * if interlacing is to be used, 0 otherwise. chroma_interleave is set to 1 if a shared CbCr chroma - * plane is to be used, 0 if Cb and Cr shall use separate planes. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_calc_framebuffer_sizes(BmJpuColorFormat color_format, - unsigned int frame_width, - unsigned int frame_height, - unsigned int framebuffer_alignment, - int chroma_interleave, - BmJpuFramebufferSizes *calculated_sizes); - -/* Convenience function which fills fields of the BmJpuFramebuffer structure, based on data from "calculated_sizes". - * The specified DMA buffer and context pointer are also set. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_fill_framebuffer_params(BmJpuFramebuffer *framebuffer, - BmJpuFramebufferSizes *calculated_sizes, - bm_device_mem_t *fb_dma_buffer, - void* context); - -/* Returns a human-readable description of the given color format. Useful for logging. */ -DECL_EXPORT char const *bm_jpu_color_format_string(BmJpuColorFormat color_format); - - -/* Callback for handling new BmJpuDecInitialInfo data. This is called when new - * information about the bitstream becomes available. output_code can be useful - * to check why this callback was invoked. BM_JPU_DEC_OUTPUT_CODE_INITIAL_INFO_AVAILABLE - * is always set. Every time this callback gets called, new framebuffers should be - * allocated and registered with bm_jpu_dec_register_framebuffers(). - * user_data is a user-defined pointer that is passed to this callback. It has the same - * value as the callback_user_data pointer from the bm_jpu_dec_open() call. - * The callback returns 0 if something failed, nonzero if successful. */ -DECL_EXPORT typedef int (*bm_jpu_dec_new_initial_info_callback)(BmJpuDecoder *decoder, - BmJpuDecInitialInfo *new_initial_info, - unsigned int output_code, - void *user_data); - - -/* Returns a human-readable description of the error code. - * Useful for logging. */ -DECL_EXPORT char const * bm_jpu_dec_error_string(BmJpuDecReturnCodes code); - -/* These two functions load/unload the decoder. Due to an internal reference - * counter, it is safe to call these functions more than once. However, the - * number of unload() calls must match the number of load() calls. - * - * The decoder must be loaded before doing anything else with it. - * Similarly, the decoder must not be unloaded before all decoder activities - * have been finished. This includes opening/decoding decoder instances. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_load(int device_index); -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_unload(int device_index); -DECL_EXPORT bm_handle_t bm_jpu_get_handle(int device_index); - -/* Called before bm_jpu_dec_open(), it returns the alignment and size for the - * physical memory block necessary for the decoder's bitstream buffer. The user - * must allocate a DMA buffer of at least this size, and its physical address - * must be aligned according to the alignment value. */ -DECL_EXPORT void bm_jpu_dec_get_bitstream_buffer_info(size_t *size, unsigned int *alignment); - -/* Opens a new decoder instance. "open_params", "bitstream_buffer", and "new_initial_info" - * must not be NULL. "callback_user_data" is a user-defined pointer that is passed on to - * the callback when it is invoked. The bitstream buffer must use the alignment and size - * that bm_jpu_dec_get_bitstream_buffer_info() specifies (it can also be larger, but must - * not be smaller than the size this function gives). */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_open(BmJpuDecoder **decoder, BmJpuDecOpenParams *open_params, - bm_device_mem_t *bitstream_buffer, - bm_jpu_dec_new_initial_info_callback new_initial_info_callback, - void *callback_user_data); - -/* Closes a decoder instance. Trying to close the same instance multiple times results in undefined behavior. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_close(BmJpuDecoder *decoder); - -/* Flushes the decoder. Any internal undecoded or queued frames are discarded. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_flush(BmJpuDecoder *decoder); - -/* Registers the specified array of framebuffers with the decoder. This must be called after - * bm_jpu_dec_decode() returned an output code with BM_JPU_DEC_OUTPUT_CODE_INITIAL_INFO_AVAILABLE - * set in it. Registering can happen only once during the lifetime of a decoder instance. If for some reason - * framebuffers need to be re-registered, the instance must be closed, and a new one opened. - * The caller must ensure that the specified framebuffer array remains valid until the decoder instance - * is closed, since this function does not copy it; it just stores a pointer to the array internally. Also - * note that internally, values might be written to the array (though it will never be reallocated - * and/or freed from the inside). Also, the framebuffers' DMA buffers will be memory-mapped until the decoder - * is closed. - * - * Since this function only stores a pointer to the framebuffer array internally, and does not actually copy - * the array, it is possible - and valid - to modify the "context" fields of the framebuffers even after - * this call was made. This is useful if for example system resources are associated later with the - * framebuffers. In this case, it is perfectly OK to set "context" to NULL initially, and later, when the - * resources are available, associated them to the framebuffers by setting the context fields, even if - * bm_jpu_dec_register_framebuffers() was already called earlier. - * - * The framebuffers must contain valid values. The convenience functions bm_jpu_calc_framebuffer_sizes() and - * bm_jpu_fill_framebuffer_params() can be used for this. Note that all framebuffers must have the same - * stride values. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_register_framebuffers(BmJpuDecoder *decoder, BmJpuFramebuffer *framebuffers, unsigned int num_framebuffers); - -/* Decodes an encoded input frame. "encoded_frame" must always be set, even in drain mode. See BmJpuEncodedFrame - * for details about its contents. output_code is a bit mask, must not be NULL, and returns important information - * about the decoding process. The value is a bitwise OR combination of the codes in BmJpuDecOutputCodes. Also - * look at bm_jpu_dec_get_decoded_frame() about how to retrieve decoded frames (if these exist). Note that if - * the BM_JPU_DEC_OUTPUT_CODE_VIDEO_PARAMS_CHANGED flag is set in the output_code, decoding cannot continue, - * and the decoder should be closed. See the notes below step-by-step guide above for details about this. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_decode(BmJpuDecoder *decoder, BmJpuEncodedFrame const *encoded_frame, unsigned int *output_code); - -/* Retrieves a decoded frame. The structure referred to by "decoded_frame" will be filled with data about - * the decoded frame. "decoded_frame" must not be NULL. - * - * Calling this function before bm_jpu_dec_decode() results in an BM_JPU_DEC_RETURN_CODE_WRONG_CALL_SEQUENCE - * return value. Calling this function more than once after a bm_jpu_dec_decode() yields the same result. - */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_get_decoded_frame(BmJpuDecoder *decoder, BmJpuRawFrame *decoded_frame); - - -/* Check if the JPU can decode right now. While decoding a video stream, sometimes the JPU may not be able - * to decode. This is directly related to the set of free framebuffers. If this function returns 0, decoding - * should not be attempted until after bm_jpu_dec_mark_framebuffer_as_displayed() was called. If this - * happens, bm_jpu_dec_check_if_can_decode() should be called again to check if the situation changed and - * decoding can be done again. Also, calling this function before the initial info callback was executed is - * not recommended and causes undefined behavior. See the explanation above for details. */ -DECL_EXPORT int bm_jpu_dec_check_if_can_decode(BmJpuDecoder *decoder); - -/* Marks a framebuffer as displayed. This always needs to be called once the application is done with a decoded - * frame. It returns the framebuffer to the JPU pool so it can be reused for further decoding. Not calling - * this will eventually cause the decoder to fail, because it won't find any free framebuffer for storing - * a decoded frame anymore. - * - * It is safe to mark a framebuffer multiple times. The library will simply ignore the subsequent calls. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_dec_mark_framebuffer_as_displayed(BmJpuDecoder *decoder, BmJpuFramebuffer *framebuffer); - - - - -/************************************************/ -/******* ENCODER STRUCTURES AND FUNCTIONS *******/ -/************************************************/ - - -/* How to use the encoder (error handling omitted for clarity): - * - * Global initialization / shutdown is done by calling bm_jpu_enc_load() and - * bm_jpu_enc_unload() respectively. These functions contain a reference counter, - * so bm_jpu_enc_unload() must be called as many times as bm_jpu_enc_load() was, - * or else it will not unload. Do not try to create a encoder before calling - * bm_jpu_enc_load(), as this function loads the JPU firmware. Likewise, the - * bm_jpu_enc_unload() function unloads the firmware. This firmware (un)loading - * affects the entire process, not just the current thread. - * - * Typically, loading/unloading is done in two ways: - * (1) bm_jpu_enc_load() gets called in the startup phase of the process, and - * bm_jpu_enc_unload() in the shutdown phase. - * (2) bm_jpu_enc_load() gets called every time before a encoder is to be created, - * and bm_jpu_enc_unload() every time after a encoder was shut down. - * - * Both methods are fine; however, for (2), it is important to keep in mind that - * the bm_jpu_enc_load() / bm_jpu_enc_unload() functions are *not* thread safe, - * so surround their calls with mutex locks. - * - * How to create, use, and shutdown an encoder: - * 1. Call bm_jpu_enc_get_bitstream_buffer_info(), and allocate a DMA buffer - * with the given size and alignment. This is the minimum required size. - * The buffer can be larger, but must not be smaller than the given size. - * 2. Fill an instance of BmJpuEncOpenParams with the values specific to the - * input data. Check the documentation of BmJpuEncOpenParams for details - * about its fields. It is recommended to set default values by calling - * bm_jpu_enc_set_default_open_params() and afterwards set any explicit valus. - * 3. Call bm_jpu_enc_open(), passing in a pointer to the filled BmJpuEncOpenParams - * instance, and the DMA buffer of the bitstream DMA buffer which was allocated in - * step 1. - * 4. Call bm_jpu_enc_get_initial_info(). The encoder's initial info contains the - * minimum number of framebuffers that must be allocated and registered, and the - * address alignment that must be used when allocating DMA memory for these - * framebuffers. - * 5. (Optional) Perform the necessary size and alignment calculations by calling - * bm_jpu_calc_framebuffer_sizes(). Pass in the width & height of the frames that - * shall be encoded. (The width & height do not have to be aligned; the function - * does this automatically.) - * 6. (Optional) allocate a DMA buffer for the input frames. Only one buffer is necessary. - * If the incoming data is already stored in DMA buffers, this step can be omitted, - * since the encoder can then read the data directly. - * 7. Create an instance of BmJpuRawFrame, set its values to zero (typically by using memset()). - * 8. Create an instance of BmJpuEncodedFrame. Set its values to zero (typically by using memset()). - * 9. Set the framebuffer pointer of the BmJpuRawFrame's instance from step 7 to refer to the - * input DMA buffer (either the one allocated in step 6, or the one containing the input data if - * it already comes in DMA memory). - * 10. Fill an instance of BmJpuEncParams with valid values. It is recommended to first set its - * values to zero by using memset() to set default values. It is essential to make sure the - * acquire_output_buffer() and finish_output_buffer() function pointers are set, as these are - * used for acquiring buffers to write encoded output data into. - * Alternatively, set write_output_data() if write-callback style output is preferred. If this - * function pointer is non-NULL, then acquire_output_buffer() and finish_output_buffer() are - * ignored. - * 11. If step 6 was performed, and therefore input data does *not* come in DMA memory, copy the - * pixels from the raw input frames into the DMA buffer allocated in step 6. Otherwise, if - * the raw input frames are already stored in DMA memory, this step can be omitted. - * 12. Call bm_jpu_enc_encode(). Pass the raw frame, the encoded frame, and the encoding param - * structures from steps 9, 10, and 12 to it. - * This function will encode data, and acquire an output buffer to write the encoded data into - * by using the acquire_output_buffer() function pointer set in step 10. Once it is done - * encoding, it will call the finish_output_buffer() function from step 10. Any handle created - * by acquire_output_buffer() will be copied over to the encoded data frame structure. When - * bm_jpu_enc_encode() exits, this handle can then be used to further process the output data. - * It is guaranteed that once acquire_output_buffer() was called, finish_output_buffer() will - * be called, even if an error occurred. - * The BM_JPU_ENC_OUTPUT_CODE_ENCODED_FRAME_AVAILABLE output code bit will always be set - * unless the function returned a code other than BM_JPU_ENC_RETURN_CODE_OK. - * If the BM_JPU_ENC_OUTPUT_CODE_CONTAINS_HEADER bit is set, then header data has been - * written in the output memory block allocated in step 8. It is placed right before the - * actual encoded frame data. bm_jpu_enc_encode() will pass over the combined size of the header - * and the encoded frame data to acquire_output_buffer() in this case, ensuring that the output - * buffers are big enough. - * If write-callback style output is used instead (= if the write_output_data() function pointer - * inside the encoding_params is set to a valid value), then this function haves as described - * above, except that it does not call acquire_output_buffer() or finish_output_buffer(). It - * still adds headers etc. but outputs these immediately by calling write_output_data(). - * 13. Repeat steps 11 to 14 until there are no more frames to encode or an error occurs. - * 14. After encoding is finished, close the encoder with bm_jpu_enc_close(). - * 15. Deallocate framebuffer memory blocks, the input DMA buffer block, the output memory block, - * and the bitstream buffer memory block. - * - * Note that the encoder does not use any kind of frame reordering. h.264 data uses the - * baseline profile. An input frame immediately results in an output frame (unless an error occured). - * There is no delay. - * - * The JPU's encoders supports all formats from BmJpuColorFormat. - */ - - -/* Opaque encoder structure. */ -typedef struct _BmJpuEncoder BmJpuEncoder; - - -/* Encoder return codes. With the exception of BM_JPU_ENC_RETURN_CODE_OK, these - * should be considered hard errors, and the encoder should be closed when they - * are returned. */ -typedef enum -{ - /* Operation finished successfully. */ - BM_JPU_ENC_RETURN_CODE_OK = 0, - /* General return code for when an error occurs. This is used as a catch-all - * for when the other error return codes do not match the error. */ - BM_JPU_ENC_RETURN_CODE_ERROR, - /* Input parameters were invalid. */ - BM_JPU_ENC_RETURN_CODE_INVALID_PARAMS, - /* JPU encoder handle is invalid. This is an internal error, and most likely - * a bug in the library. Please report such errors. */ - BM_JPU_ENC_RETURN_CODE_INVALID_HANDLE, - /* Framebuffer information is invalid. Typically happens when the BmJpuFramebuffer - * structures that get passed to bm_jpu_enc_register_framebuffers() contain - * invalid values. */ - BM_JPU_ENC_RETURN_CODE_INVALID_FRAMEBUFFER, - /* Registering framebuffers for encoding failed because not enough framebuffers - * were given to the bm_jpu_enc_register_framebuffers() function. */ - BM_JPU_ENC_RETURN_CODE_INSUFFICIENT_FRAMEBUFFERS, - /* A stride value (for example one of the stride values of a framebuffer) is invalid. */ - BM_JPU_ENC_RETURN_CODE_INVALID_STRIDE, - /* A function was called at an inappropriate time. */ - BM_JPU_ENC_RETURN_CODE_WRONG_CALL_SEQUENCE, - /* The operation timed out. */ - BM_JPU_ENC_RETURN_CODE_TIMEOUT, - /* write_output_data() in BmJpuEncParams returned 0. */ - BM_JPU_ENC_RETURN_CODE_WRITE_CALLBACK_FAILED, - /* Allocation memory failure */ - BM_JPU_ENC_RETURN_ALLOC_MEM_ERROR -} -BmJpuEncReturnCodes; - - -/* Encoder output codes. These can be bitwise OR combined, so check - * for their presence in the output_codes bitmask returned by - * bm_jpu_enc_encode() by using a bitwise AND. */ -typedef enum -{ - /* Input data was used. If this code is present, the input frame - * that was given to the bm_jpu_dec_encode() must not be given - * to a following bm_jpu_dec_encode() call; instead, a new frame - * should be loaded. If this code is not present, then the encoder - * didn't use it yet, so give it to the encoder again until this - * code is set or an error is returned. */ - BM_JPU_ENC_OUTPUT_CODE_INPUT_USED = (1UL << 0), - /* A fully encoded frame is now available. The encoded_frame argument - * passed to bm_jpu_enc_encode() contains information about this frame. */ - BM_JPU_ENC_OUTPUT_CODE_ENCODED_FRAME_AVAILABLE = (1UL << 1), - /* The data in the encoded frame also contains header information - * like SPS/PSS for h.264. Headers are always placed at the beginning - * of the encoded data, and this code is never present if the - * BM_JPU_ENC_OUTPUT_CODE_ENCODED_FRAME_AVAILABLE isn't set. */ - BM_JPU_ENC_OUTPUT_CODE_CONTAINS_HEADER = (1UL << 2) -} -BmJpuEncOutputCodes; - - -/* Structure used together with bm_jpu_enc_open() */ -typedef struct -{ - /* Width and height of the incoming frames, in pixels. These - * do not have to be aligned to any boundaries. */ - unsigned int frame_width; - unsigned int frame_height; - /* Color format to use for incoming frames. MJPEG actually uses - * all possible values. - * See the BmJpuColorFormat documentation for an explanation how - * the chroma_interleave value can affec the pixel format that is used. */ - BmJpuColorFormat color_format; - - /* Quality factor for JPEG encoding, between 1 (worst quality, best - * compression) and 100 (best quality, worst compression). Default - * value is 85. - * This quality factor is the one from the Independent JPEG Group's - * formula for generating a scale factor out of the quality factor. - * This means that this quality factor is exactly the same as the - * one used by libjpeg. */ - unsigned int quality_factor; - - /* If this is 1, then Cb and Cr are interleaved in one shared chroma - * plane, otherwise they are separated in their own planes. - * See the BmJpuColorFormat documentation for the consequences of this. */ - int chroma_interleave; - - int packed_format; - int device_index; - - int rotationEnable; - int mirrorEnable; - int mirrorDirection; - int rotationAngle; -} -BmJpuEncOpenParams; - - -/* Initial encoding information, produced by the encoder. This structure is - * essential to actually begin encoding, since it contains all of the - * necessary information to create and register enough framebuffers. */ -typedef struct -{ - /* Caller must register at least this many framebuffers - * with the encoder. */ - unsigned int min_num_required_framebuffers; - - /* Physical framebuffer addresses must be aligned to this value. */ - unsigned int framebuffer_alignment; -} -BmJpuEncInitialInfo; - - -/* Function pointer used during encoding for acquiring output buffers. - * See bm_jpu_enc_encode() for details about the encoding process. - * context is the value of output_buffer_context specified in - * BmJpuEncParams. size is the size of the block to acquire, in bytes. - * acquired_handle is an output value; the function can set this to a - * handle that corresponds to the acquired buffer. For example, in - * libav/FFmpeg, this handle could be a pointer to an AVBuffer. In - * GStreamer, this could be a pointer to a GstBuffer. The value of - * *acquired_handle will later be copied to the acquired_handle value - * of BmJpuEncodedFrame. - * The return value is a pointer to a memory-mapped region of the - * output buffer, or NULL if acquiring failed. - * If the write_output_data function pointer in the encoder params - * is non-NULL, this function is not called. - * This function is only used by bm_jpu_enc_encode(). */ -typedef void* (*BmJpuEncAcquireOutputBuffer)(void *context, size_t size, void **acquired_handle); - -/* Function pointer used during encoding for notifying that the encoder - * is done with the output buffer. This is *not* a function for freeing - * allocated buffers; instead, it makes it possible to release, unmap etc. - * context is the value of output_buffer_context specified in - * BmJpuEncParams. acquired_handle equals the value of *acquired_handle in - * BmJpuEncAcquireOutputBuffer. - * If the write_output_data function pointer in the encoder params - * is non-NULL, this function is not called. */ -typedef void (*BmJpuEncFinishOutputBuffer)(void *context, void *acquired_handle); - -/* Function pointer used during encoding for passing the output encoded data - * to the user. If this function is not NULL, then BmJpuEncFinishOutputBuffer - * and BmJpuEncAcquireOutputBuffer function are not called. Instead, this - * data write function is called whenever the library wants to write output. - * encoded_frame contains valid pts, dts, and context data which was copied - * over from the corresponding raw frame. - * Returns 1 if writing succeeded, 0 otherwise. - * */ -typedef int (*BmJpuWriteOutputData)(void *context, uint8_t const *data, uint32_t size, BmJpuEncodedFrame *encoded_frame); - - -typedef struct -{ - /* Functions for acquiring and finishing output buffers. See the - * typedef documentations above for details about how these - * functions should behave, and the bm_jpu_enc_encode() - * documentation for how they are used. - * Note that these functions are only used if write_output_data - * is set to NULL. - */ - BmJpuEncAcquireOutputBuffer acquire_output_buffer; - BmJpuEncFinishOutputBuffer finish_output_buffer; - - /* Function for directly passing the output data to the user - * without copying it first. - * Using this function will inhibit calls to acquire_output_buffer - * and finish_output_buffer. See the typedef documentations - * above for details about how this function should behave, and - * the bm_jpu_enc_encode() documentation for how they are used. - * Note that if this function is NULL then acquire_output_buffer - * and finish_output_buffer must be set. - */ - BmJpuWriteOutputData write_output_data; - - /* User supplied value that will be passed to the functions */ - void *output_buffer_context; -} -BmJpuEncParams; - - -/* Returns a human-readable description of the error code. - * Useful for logging. */ -DECL_EXPORT char const * bm_jpu_enc_error_string(BmJpuEncReturnCodes code); - -/* These two functions load/unload the encoder. Due to an internal reference - * counter, it is safe to call these functions more than once. However, the - * number of unload() calls must match the number of load() calls. - * - * The encoder must be loaded before doing anything else with it. - * Similarly, the encoder must not be unloaded before all encoder activities - * have been finished. This includes opening/decoding encoder instances. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_load(int device_index); -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_unload(int device_index); - -/* Called before bm_jpu_enc_open(), it returns the alignment and size for the - * physical memory block necessary for the encoder's bitstream buffer. The user - * must allocate a DMA buffer of at least this size, and its physical address - * must be aligned according to the alignment value. */ -DECL_EXPORT void bm_jpu_enc_get_bitstream_buffer_info(size_t *size, unsigned int *alignment); - -/* Set the fields in "open_params" to valid defaults - * Useful if the caller wants to modify only a few fields (or none at all) */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_set_default_open_params(BmJpuEncOpenParams *open_params); - -/* Opens a new encoder instance. "open_params" and "bitstream_buffer" must not be NULL. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_open(BmJpuEncoder **encoder, BmJpuEncOpenParams *open_params, - bm_device_mem_t *bitstream_buffer); - -/* Closes a encoder instance. Trying to close the same instance multiple times results in undefined behavior. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_close(BmJpuEncoder *encoder); - -/* Retrieves initial information available after calling bm_jpu_enc_open(). */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_get_initial_info(BmJpuEncoder *encoder, BmJpuEncInitialInfo *info); - -/* Encodes a given raw input frame with the given encoding parameters. encoded_frame is filled with information - * about the resulting encoded output frame. The encoded frame data itself is stored in a buffer that is - * allocated by user-supplied functions (which are set as the acquire_output_buffer and finish_output_buffer - * function pointers in the encoding_params). - * - * Encoding internally works as follows: first, the actual encoding operation is performed by the JPU. Next, - * information about the encoded data is queried, particularly its size in bytes. Once this size is known, - * acquire_output_buffer() from encoding_params is called. This function must acquire a buffer that can be - * used to store the encoded data. This buffer must be at least as large as the size of the encoded data - * (which is given to acquire_output_buffer() as an argument). The return value of acquire_output_buffer() - * is a pointer to the (potentially memory-mapped) region of the buffer. The encoded frame data is then - * copied to this buffer, and finish_output_buffer() is called. This function can be used to inform the - * caller that the encoder is done with this buffer; it now contains encoded data, and will not be modified - * further. encoded_frame is filled with information about the encoded frame data. - * If acquiring the buffer fails, acquire_output_buffer() returns a NULL pointer. - * NOTE: again, finish_output_buffer() is NOT a function to free the buffer; it just signals that the encoder - * won't touch the memory inside the buffer anymore. - * - * acquire_output_buffer() can also pass on a handle to the acquired buffer (for example, in FFmpeg/libav, - * this handle would be a pointer to an AVBuffer). The handle is called the "acquired_handle". - * acquire_output_buffer() can return such a handle. This handle is copied to the encoded_frame struct's - * acquired_handle field. This way, a more intuitive workflow can be used; if for example, acquire_output_buffer() - * returns an AVBuffer pointer as the handle, this AVBuffer pointer ends up in the encoded_frame. Afterwards, - * encoded_frame contains all the necessary information to process the encoded frame data. - * - * It is guaranteed that once the buffer was acquired, finish_output_buffer() will always be called, even if - * an error occurs. This prevents potential memory/resource leaks if the finish_output_buffer() call somehow - * unlocks or releases the buffer for further processing. The acquired_handle is also copied to encoded_frame - * even if an error occurs, unless the error occurred before the acquire_output_buffer() call, in which case - * the encoded_frame's acquired_handle field will be set to NULL. - * - * The aforementioned sequences involve a copy (encoded data is copied into the acquired buffer). As an - * alternative, a write-callback-style mode of operation can be used. This alternative mode is active if - * the write_output_data function pointer in encoding_params is not NULL. In this mode, neither - * acquire_output_buffer() nor finish_output_buffer() are called. Instead, whenever the encoder needs to - * write out data, it calls write_output_data(). - * - * The other fields in encoding_params specify additional encoding parameters, which can vary from frame to - * frame. - * output_code is a bit mask containing information about the encoding result. The value is a bitwise OR - * combination of the codes in BmJpuEncOutputCodes. - * - * None of the arguments may be NULL. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_enc_encode(BmJpuEncoder *encoder, - BmJpuRawFrame const *raw_frame, - BmJpuEncodedFrame *encoded_frame, - BmJpuEncParams *encoding_params, - unsigned int *output_code); - -DECL_EXPORT int bm_jpu_get_dump(void); - -#ifdef __cplusplus -} -#endif - - -#endif diff --git a/bmvid/jpeg/binary/soc/include/bmjpuapi_jpeg.h b/bmvid/jpeg/binary/soc/include/bmjpuapi_jpeg.h deleted file mode 100644 index db8f1bc..0000000 --- a/bmvid/jpeg/binary/soc/include/bmjpuapi_jpeg.h +++ /dev/null @@ -1,246 +0,0 @@ -#ifndef BMJPUAPI_JPEG_H -#define BMJPUAPI_JPEG_H - -#include "bmjpuapi.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#if !defined DECL_EXPORT -#ifdef _WIN32 - #define DECL_EXPORT __declspec(dllexport) -#else - #define DECL_EXPORT -#endif -#endif - -typedef struct -{ - /* Width and height of JPU framebuffers are aligned to internal boundaries. - * The frame consists of the actual image pixels and extra padding pixels. - * aligned_frame_width / aligned_frame_height specify the full width/height - * including the padding pixels, and actual_frame_width / actual_frame_height - * specify the width/height without padding pixels. */ - unsigned int aligned_frame_width, aligned_frame_height; - unsigned int actual_frame_width, actual_frame_height; - - /* Stride and size of the Y, Cr, and Cb planes. The Cr and Cb planes always - * have the same stride and size. */ - unsigned int y_stride, cbcr_stride; - unsigned int y_size, cbcr_size; - - /* Offset from the start of a framebuffer's memory, in bytes. Note that the - * Cb and Cr offset values are *not* the same, unlike the stride and size ones. */ - unsigned int y_offset, cb_offset, cr_offset; - - /* Framebuffer containing the pixels of the decoded frame. */ - BmJpuFramebuffer *framebuffer; - - /* Color format of the decoded frame. */ - BmJpuColorFormat color_format; - - int chroma_interleave; -} -BmJpuJPEGDecInfo; - - -typedef struct -{ - BmJpuDecoder *decoder; - - bm_device_mem_t *bitstream_buffer; - size_t bitstream_buffer_size; - unsigned int bitstream_buffer_alignment; - - BmJpuDecInitialInfo initial_info; - - BmJpuFramebuffer *framebuffers; - bm_device_mem_t *fb_dmabuffers; - unsigned int num_framebuffers; - unsigned int num_extra_framebuffers; // TODO - BmJpuFramebufferSizes calculated_sizes; - - BmJpuRawFrame raw_frame; - int device_index; - - BmJpuFramebuffer *cur_buffer; - void *opaque; - - int rotationEnable; - int mirrorEnable; - int mirrorDirection; - int rotationAngle; -} -BmJpuJPEGDecoder; - -/* Opens a new JPU JPEG decoder instance. - * - * Internally, this function calls bm_jpu_dec_load(). - * - * If dma_buffer_allocator is NULL, the default decoder allocator is used. - * - * num_extra_framebuffers is used for instructing this function to allocate this many - * more framebuffers. Usually this value is zero, but in certain cases where many - * JPEGs need to be decoded quickly, or the DMA buffers of decoded frames need to - * be kept around elsewhere, having more framebuffers available can be helpful. - * Note though that more framebuffers also means more DMA memory consumption. - * If unsure, keep this to zero. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_jpeg_dec_open(BmJpuJPEGDecoder **jpeg_decoder, - BmJpuDecOpenParams *open_params, - unsigned int num_extra_framebuffers); - -/* Closes a JPEG decoder instance. Trying to close the same instance multiple times results in undefined behavior. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_jpeg_dec_close(BmJpuJPEGDecoder *jpeg_decoder); - -/* Determines if the JPU can decode a frame at this moment. - * - * The return value depends on how many framebuffers in the decoder are free. - * If enough framebuffers are free, this returns 1, otherwise 0. - * - * For simple decoding schemes where one frame is decoded, then displayed or - * consumed in any other way, and then returned to the decoder by calling - * bm_jpu_jpeg_dec_frame_finished(), this function does not have to be used, - * since in this case, there will always be enough free framebuffers. - * If however the consumption of the decoded frame occurs in a different thread - * than the decoding, it makes sense to use this function in order to wait - * until enough framebfufers are free (typically implemented by using mutexes - * and thread condition variables). Also, in this case, this function is more - * likely to return 1 the more extra framebuffers were requested in the - * bm_jpu_jpeg_dec_open() call. - */ -DECL_EXPORT int bm_jpu_jpeg_dec_can_decode(BmJpuJPEGDecoder *jpeg_decoder); - -/* Decodes a JPEG frame. - * - * jpeg_data must be set to the memory block that contains the encoded JPEG data, - * and jpeg_data_size must be set to the size of that block, in bytes. After this - * call, use the bm_jpu_jpeg_dec_get_info() function to retrieve information about - * the decoded frame. - * - * The JPU decoder only consumes baseline JPEG data. Progressive encoding is not supported. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_jpeg_dec_decode(BmJpuJPEGDecoder *jpeg_decoder, - uint8_t const *jpeg_data, - size_t const jpeg_data_size); - -/* Retrieves information about the decoded JPEG frame. - * - * The BmJpuJPEGDecInfo's fields will be set to those of the decoded frame. In particular, - * info's framebuffer pointer will be set to point to the framebuffer containing the - * decoded frame. Be sure to pass this pointer to bm_jpu_jpeg_dec_frame_finished() once - * the frame's pixels are no longer needed. - * - * Note that the return value of the previous bm_jpu_jpeg_dec_decode() call can be - * BM_JPU_DEC_RETURN_CODE_OK even though the framebuffer pointer retrieved here is NULL. - * This is the case when not enough free framebuffers are present. It is recommended to - * check the return value of the bm_jpu_jpeg_dec_can_decode() function before calling - * bm_jpu_jpeg_dec_decode(), unless the decoding sequence is simple (like in the example - * mentioned in the bm_jpu_jpeg_dec_can_decode() description). - * - * This function must not be called before bm_jpu_jpeg_dec_decode() , since otherwise, - * there is no information available (it is read in the decoding step). */ -DECL_EXPORT void bm_jpu_jpeg_dec_get_info(BmJpuJPEGDecoder *jpeg_decoder, BmJpuJPEGDecInfo *info); - -/* Inform the JPEG decoder that a previously decoded frame is no longer being used. - * - * This function must always be called once the user is done with a frame, otherwise - * the JPU cannot reclaim this ramebuffer, and will eventually run out of internal - * framebuffers to decode into. */ -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_jpeg_dec_frame_finished(BmJpuJPEGDecoder *jpeg_decoder, - BmJpuFramebuffer *framebuffer); - -DECL_EXPORT BmJpuDecReturnCodes bm_jpu_jpeg_dec_flush(BmJpuJPEGDecoder *jpeg_decoder); - - - -typedef struct -{ - /* Frame width and height of the input frame. These are the actual sizes; - * they will be aligned internally if necessary. These sizes must not be - * zero. */ - unsigned int frame_width, frame_height; - - /* Quality factor for JPEG encoding. 1 = best compression, 100 = best quality. - * This is the exact same quality factor as used by libjpeg. */ - unsigned int quality_factor; - - /* Color format of the input frame. */ - BmJpuColorFormat color_format; - - /* Functions for acquiring and finishing output buffers. See the - * typedef documentations in bmjpuapi.h for details about how - * these functions should behave. */ - BmJpuEncAcquireOutputBuffer acquire_output_buffer; - BmJpuEncFinishOutputBuffer finish_output_buffer; - - /* Function for directly passing the output data to the user - * without copying it first. - * Using this function will inhibit calls to acquire_output_buffer - * and finish_output_buffer. */ - BmJpuWriteOutputData write_output_data; - - /* User supplied value that will be passed to the functions: - * acquire_output_buffer, finish_output_buffer, write_output_data */ - void *output_buffer_context; - - int packed_format; - int chroma_interleave; - - int rotationEnable; - int mirrorEnable; - int mirrorDirection; - int rotationAngle; -} -BmJpuJPEGEncParams; - - -typedef struct _BmJpuJPEGEncoder BmJpuJPEGEncoder; - -/* Opens a new JPU JPEG encoder instance. - * - * Internally, this function calls bm_jpu_enc_load(). - * - * If dma_buffer_allocator is NULL, the default encoder allocator is used. - */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_jpeg_enc_open(BmJpuJPEGEncoder **jpeg_encoder, - int bs_buffer_size, - int device_index); - -/* Closes a JPEG encoder instance. Trying to close the same instance multiple times results in undefined behavior. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_jpeg_enc_close(BmJpuJPEGEncoder *jpeg_encoder); - -/* Encodes a raw input frame. - * - * params must be filled with valid values; frame width and height must not be zero. - * framebuffer contains the raw input pixels to encode. Its stride and offset values - * must be valid, and its dma_buffer pointer must point to a DMA buffer that contains - * the pixel data. - * - * During encoding, the encoder will call params->acquire_output_buffer() to acquire - * an output buffer and put encoded JPEG data into. Once encoding is done, the - * params->finish_output_buffer() function is called. This is *not* to be confused with - * a memory deallocation function; it is instead typically used to notify the caller - * that the encoder won't touch the acquired buffer's contents anymore. It is guaranteed - * that finish_output_buffer() is called if acquire_output_buffer() was called earlier. - * - * If acquired_handle is non-NULL, then the poiner it refers to will be set to the handle - * produced by acquire_output_buffer(), even if bm_jpu_jpeg_enc_encode() exits with an - * error (unless said error occurred *before* the acquire_output_buffer() call, in which - * case *acquired_handle will be set to NULL). If output_buffer_size is non-NULL, the - * size value it points to will be set to the number of bytes of the encoded JPEG data. - * - * The JPU encoder only produces baseline JPEG data. Progressive encoding is not supported. */ -DECL_EXPORT BmJpuEncReturnCodes bm_jpu_jpeg_enc_encode(BmJpuJPEGEncoder *jpeg_encoder, - BmJpuFramebuffer const *framebuffer, - BmJpuJPEGEncParams const *params, - void **acquired_handle, - size_t *output_buffer_size); - -DECL_EXPORT int bm_jpu_jpeg_get_dump(void); - -#ifdef __cplusplus -} -#endif - - -#endif diff --git a/bmvid/jpeg/binary/soc/include/jpu_lib.h b/bmvid/jpeg/binary/soc/include/jpu_lib.h index ff001f1..b8c5a53 100644 --- a/bmvid/jpeg/binary/soc/include/jpu_lib.h +++ b/bmvid/jpeg/binary/soc/include/jpu_lib.h @@ -9,6 +9,9 @@ #define DC_TABLE_INDEX1 2 #define AC_TABLE_INDEX1 3 +#ifndef BOOL +typedef int BOOL; +#endif //------------------------------------------------------------------------------ // common struct and definition @@ -60,7 +63,8 @@ typedef enum { JPG_RET_INVALID_STRIDE, JPG_RET_WRONG_CALL_SEQUENCE, JPG_RET_CALLED_BEFORE, - JPG_RET_NOT_INITIALIZED + JPG_RET_NOT_INITIALIZED, + JPG_RET_BS_BUFFER_FULL } JpgRet; typedef enum { @@ -284,10 +288,12 @@ DECL_EXPORT int jpu_DecOpen(DecHandle *, DecOpenParam *); DECL_EXPORT int jpu_DecClose(DecHandle); DECL_EXPORT int jpu_DecGetInitialInfo(DecHandle handle, DecInitialInfo * info); +DECL_EXPORT int jpu_DecSetResolutionInfo(DecHandle handle, int width, int height); DECL_EXPORT int jpu_DecRegisterFrameBuffer(DecHandle handle, FrameBuffer * bufArray, int num, int stride, void* par0); DECL_EXPORT int jpu_DecUpdateBitstreamBuffer(DecHandle handle, uint32_t size); +DECL_EXPORT int jpu_DecSetRdPtrEx(DecHandle handle, PhysicalAddress addr, BOOL updateWrPtr); DECL_EXPORT int jpu_DecSetBsPtr(DecHandle handle, uint8_t *data, int data_size); DECL_EXPORT int jpu_DecStartOneFrame(DecHandle handle, DecParam * param); @@ -297,6 +303,7 @@ DECL_EXPORT int jpu_DecGiveCommand(DecHandle handle, CodecCommand cmd, void *par DECL_EXPORT int jpu_DecWaitForInt(DecHandle handle, int timeout_in_ms, int timeout_counts); DECL_EXPORT int jpu_GetDump(); +DECL_EXPORT int jpu_HWReset(); DECL_EXPORT int vpp_Init(int32_t device_index); #endif /* __BM_JPU_LIB_H__ */ diff --git a/bmvid/jpeg/binary/soc/lib/libbmjpuapi.so.0 b/bmvid/jpeg/binary/soc/lib/libbmjpuapi.so.0 index 8d5e580..254e318 120000 --- a/bmvid/jpeg/binary/soc/lib/libbmjpuapi.so.0 +++ b/bmvid/jpeg/binary/soc/lib/libbmjpuapi.so.0 @@ -1 +1 @@ -libbmjpuapi.so.0.7.0 \ No newline at end of file +libbmjpuapi.so.0.11.0 \ No newline at end of file diff --git a/bmvid/jpeg/binary/soc/lib/libbmjpuapi.so.0.10.0 b/bmvid/jpeg/binary/soc/lib/libbmjpuapi.so.0.10.0 new file mode 100755 index 0000000..7205db0 Binary files /dev/null and b/bmvid/jpeg/binary/soc/lib/libbmjpuapi.so.0.10.0 differ diff --git a/bmvid/jpeg/binary/soc/lib/libbmjpuapi.so.0.11.0 b/bmvid/jpeg/binary/soc/lib/libbmjpuapi.so.0.11.0 new file mode 100755 index 0000000..13b07f9 Binary files /dev/null and b/bmvid/jpeg/binary/soc/lib/libbmjpuapi.so.0.11.0 differ diff --git a/bmvid/jpeg/binary/soc/lib/libbmjpuapi.so.0.7.0 b/bmvid/jpeg/binary/soc/lib/libbmjpuapi.so.0.7.0 deleted file mode 100755 index 75042c4..0000000 Binary files a/bmvid/jpeg/binary/soc/lib/libbmjpuapi.so.0.7.0 and /dev/null differ diff --git a/bmvid/jpeg/binary/soc/lib/libbmjpulite.a b/bmvid/jpeg/binary/soc/lib/libbmjpulite.a new file mode 100755 index 0000000..3efef9d Binary files /dev/null and b/bmvid/jpeg/binary/soc/lib/libbmjpulite.a differ diff --git a/bmvid/jpeg/binary/soc/lib/libbmjpulite.so.0 b/bmvid/jpeg/binary/soc/lib/libbmjpulite.so.0 index 21f1a70..7e37060 120000 --- a/bmvid/jpeg/binary/soc/lib/libbmjpulite.so.0 +++ b/bmvid/jpeg/binary/soc/lib/libbmjpulite.so.0 @@ -1 +1 @@ -libbmjpulite.so.0.7.0 \ No newline at end of file +libbmjpulite.so.0.11.0 \ No newline at end of file diff --git a/bmvid/jpeg/binary/soc/lib/libbmjpulite.so.0.10.0 b/bmvid/jpeg/binary/soc/lib/libbmjpulite.so.0.10.0 new file mode 100755 index 0000000..13cf614 Binary files /dev/null and b/bmvid/jpeg/binary/soc/lib/libbmjpulite.so.0.10.0 differ diff --git a/bmvid/jpeg/binary/soc/lib/libbmjpulite.so.0.11.0 b/bmvid/jpeg/binary/soc/lib/libbmjpulite.so.0.11.0 new file mode 100755 index 0000000..0e32daf Binary files /dev/null and b/bmvid/jpeg/binary/soc/lib/libbmjpulite.so.0.11.0 differ diff --git a/bmvid/jpeg/binary/soc/lib/libbmjpulite.so.0.7.0 b/bmvid/jpeg/binary/soc/lib/libbmjpulite.so.0.7.0 deleted file mode 100755 index 4a3c767..0000000 Binary files a/bmvid/jpeg/binary/soc/lib/libbmjpulite.so.0.7.0 and /dev/null differ diff --git a/bmvid/jpeg/driver/include/version.h b/bmvid/jpeg/driver/include/version.h index 806bdd9..cff9aec 100644 --- a/bmvid/jpeg/driver/include/version.h +++ b/bmvid/jpeg/driver/include/version.h @@ -1,7 +1,7 @@ #ifndef __BM_JPU_VERSION_H__ #define __BM_JPU_VERSION_H__ -#define VERSION "version: ; commit: ; build: 2023-11-30 10:54:11" +#define VERSION "default" #endif /* __BM_JPU_VERSION_H__ */ diff --git a/bmvid/jpeg/driver/jdi/jdi.h b/bmvid/jpeg/driver/jdi/jdi.h index 49fd977..0b0a03f 100755 --- a/bmvid/jpeg/driver/jdi/jdi.h +++ b/bmvid/jpeg/driver/jdi/jdi.h @@ -127,6 +127,7 @@ extern "C" { int jdi_wait_interrupt(int device_index, Uint32 coreIdx,int timeout); int jdi_hw_reset(); + int jdi_hw_reset_all(); int jdi_set_clock_gate(int enable); int jdi_get_clock_gate(); diff --git a/bmvid/jpeg/driver/jdi/linux/driver/jpu.c b/bmvid/jpeg/driver/jdi/linux/driver/jpu.c index d68cf0a..55d0c6f 100755 --- a/bmvid/jpeg/driver/jdi/linux/driver/jpu.c +++ b/bmvid/jpeg/driver/jdi/linux/driver/jpu.c @@ -913,7 +913,7 @@ static long jpu_ioctl(struct file *filp, u_int cmd, u_long arg) { ret = -EFAULT; break; - } + } #ifdef JPU_SUPPORT_CLOCK_CONTROL if (clkgate) jpu_clk_enable(jpu_pwm_ctrl.jpu_clk); @@ -1041,6 +1041,7 @@ static long jpu_ioctl(struct file *filp, u_int cmd, u_long arg) break; #endif case JDI_IOCTL_RESET: + #if 0 { u32 core_idx; if (get_user(core_idx, (u32 __user *) arg)) @@ -1050,6 +1051,33 @@ static long jpu_ioctl(struct file *filp, u_int cmd, u_long arg) jpu_hw_reset(core_idx); } break; + #endif + case JDI_IOCTL_RESET_ALL: + { + u32 i, core_num; + + DPRINTK("[JPUDRV][+]JDI_IOCTL_RESET_ALL\n"); + if (get_user(core_num, (u32 __user *) arg)) + { + return -EFAULT; + } + + // get all cores + i = core_num; + while (i > 0) { + if ((ret = down_interruptible(&s_jpu_sem)) == 0) { + i--; + } + udelay(1); + } + + for (i = 0; i < core_num; i++) { + jpu_hw_reset(i); + up(&s_jpu_sem); + } + DPRINTK("[JPUDRV][-]JDI_IOCTL_RESET_ALL\n"); + } + break; case JDI_IOCTL_GET_REGISTER_INFO: { DPRINTK("[JPUDRV][+]JDI_IOCTL_GET_REGISTER_INFO\n"); @@ -1601,7 +1629,7 @@ static int jpu_probe(struct platform_device *pdev) err = bm_jpu_register_cdev(pdev); if (err < 0) { - printk(KERN_ERR "bm_jpu_register_cdev\n"); + printk(KERN_ERR "jpu_register_cdev\n"); goto ERROR_PROVE_DEVICE; } diff --git a/bmvid/jpeg/driver/jdi/linux/driver/jpu.h b/bmvid/jpeg/driver/jdi/linux/driver/jpu.h index 78c1cb2..5f3452e 100644 --- a/bmvid/jpeg/driver/jdi/linux/driver/jpu.h +++ b/bmvid/jpeg/driver/jdi/linux/driver/jpu.h @@ -29,6 +29,7 @@ #define JDI_IOCTL_READ_VMEM CTL_CODE(FILE_DEVICE_UNKNOWN, 0x417, METHOD_BUFFERED, FILE_ANY_ACCESS) #define JDI_IOCTL_WRITE_REGISTER CTL_CODE(FILE_DEVICE_UNKNOWN, 0x418, METHOD_BUFFERED, FILE_ANY_ACCESS) #define JDI_IOCTL_READ_REGISTER CTL_CODE(FILE_DEVICE_UNKNOWN, 0x419, METHOD_BUFFERED, FILE_ANY_ACCESS) +#define JDI_IOCTL_RESET_ALL CTL_CODE(FILE_DEVICE_UNKNOWN, 0x420, METHOD_BUFFERED, FILE_ANY_ACCESS) #else #define JDI_IOCTL_MAGIC 'J' #define JDI_IOCTL_ALLOCATE_PHYSICAL_MEMORY _IO(JDI_IOCTL_MAGIC, 0) @@ -54,10 +55,12 @@ #define JDI_IOCTL_READ_REG _IO(JDI_IOCTL_MAGIC, 18) #endif #define JDI_IOCTL_GET_MAX_NUM_JPU_CORE _IO(JDI_IOCTL_MAGIC, 19) +#define JDI_IOCTL_RESET_ALL _IO(JDI_IOCTL_MAGIC, 20) #endif + typedef struct jpudrv_buffer_t { unsigned long long phys_addr; unsigned long long base; /* kernel logical address in use kernel */ diff --git a/bmvid/update_bmvid_version.sh b/bmvid/update_bmvid_version.sh new file mode 100755 index 0000000..fdb82f3 --- /dev/null +++ b/bmvid/update_bmvid_version.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +function update_bmcv_commit_and_branch() +{ + file_path=$(find "$(git rev-parse --show-toplevel)" -type f -name "bmcv_internal.cpp" -print -quit) + + if [ -n "$file_path" ]; then + file_dir=$(dirname "$file_path") + pushd . > /dev/null + + cd "$file_dir" || exit + + if git rev-parse --git-dir > /dev/null 2>&1; then + commit_hash=$(git log -1 --pretty=format:"%H") + branch_name=$(git branch --contains HEAD | sed -n '/\* /s///p') + + sed -i "s|#define COMMIT_HASH .*|#define COMMIT_HASH \"$commit_hash\"|" "bmcv_internal.cpp" + sed -i "s|#define BRANCH_NAME .*|#define BRANCH_NAME \"$branch_name\"|" "bmcv_internal.cpp" + + echo "Commit hash $commit_hash has been written to $file_path" + echo "Branch name $branch_name has been written to $file_path" + else + echo "This directory is not a git repository." + fi + + popd > /dev/null + else + echo "bmcv_internal.cpp not found." + fi +} + +update_bmcv_commit_and_branch diff --git a/bmvid/video/decoder/bm_dec_api/inc/bm_video_interface.h b/bmvid/video/decoder/bm_dec_api/inc/bm_vpudec_interface.h similarity index 58% rename from bmvid/video/decoder/bm_dec_api/inc/bm_video_interface.h rename to bmvid/video/decoder/bm_dec_api/inc/bm_vpudec_interface.h index e71d8ac..0efd1df 100644 --- a/bmvid/video/decoder/bm_dec_api/inc/bm_video_interface.h +++ b/bmvid/video/decoder/bm_dec_api/inc/bm_vpudec_interface.h @@ -10,14 +10,16 @@ * Sophon VPU en/decoder. */ -#ifndef BM_VIDEO_INTERFACE_H -#define BM_VIDEO_INTERFACE_H +#include + +#ifndef BM_VPUDEC_INTERFACE_H +#define BM_VPUDEC_INTERFACE_H #define STREAM_BUF_SIZE 0x400000 #define TRY_FLOCK_OPEN #if defined(_WIN32) || defined(WIN32) || defined(__WIN32__) -#define ATTRIBUTE +#define ATTRIBUTE #define DECL_EXPORT __declspec(dllexport) #define DECL_IMPORT __declspec(dllimport) #else @@ -26,33 +28,122 @@ #define DECL_IMPORT #endif +typedef enum +{ + BMVPU_DEC_LOG_LEVEL_NONE=0, + BMVPU_DEC_LOG_LEVEL_ERR, + BMVPU_DEC_LOG_LEVEL_WARN, + BMVPU_DEC_LOG_LEVEL_INFO, + BMVPU_DEC_LOG_LEVEL_TRACE, + BMVPU_DEC_LOG_LEVEL_MAX_LOG_LEVEL +} +BmVpuDecLogLevel; + +#ifndef BOOL +typedef int BOOL; +#endif + +#ifndef TRUE +# define TRUE 1 +#endif + +#ifndef FALSE +# define FALSE 0 +#endif + #ifdef _WIN32 typedef unsigned long long u64; -#elif __linux__ +#else typedef unsigned long u64; #endif +#define MAX_FILE_PATH 256 + +typedef enum{ + BMDEC_AVC = 0, //264 + BMDEC_HEVC = 12, //265 +}BmVpuDecStreamFormat; + +typedef enum { + BMDEC_FRAME_SKIP_MODE = 0, // disable skip mode,decode normally + BMDEC_SKIP_NON_REF_NON_I = 1, // Skip non-reference non-intra frames + BMDEC_SKIP_NON_I = 2, // Skip non-intra frames +}BmVpuDecSkipMode; + typedef struct { - int streamFormat; //0:264 - int wtlFormat; //0:420 1 tiled v + unsigned int size; + u64 phys_addr; + u64 virt_addr; + +} BmVpuDecDMABuffer; + +typedef enum { + BMDEC_OUTPUT_UNMAP, // Original data + BMDEC_OUTPUT_TILED = 100, // Output in tiled format(deprecated) + BMDEC_OUTPUT_COMPRESSED, // Output compressed data +} BmVpuDecOutputMapType; + +/** + * @brief Enum for different bitstream modes in BMDEC. + * + * This enum defines different modes for handling the bitstream in BMDEC. + * - BMDEC_BS_MODE_INTERRUPT: Indicates the interrupt mode, which likely means the bitstream processing can be interrupted. + * - BMDEC_BS_MODE_RESERVED: Represents a reserved mode, likely indicating a mode that is not currently used or defined. + * - BMDEC_BS_MODE_PIC_END: Indicates the picture end mode, which likely means the end of a picture in the bitstream. + */ +typedef enum { + BMDEC_BS_MODE_INTERRUPT = 0, /**< Interrupt mode */ + BMDEC_BS_MODE_RESERVED, /**< Reserved mode */ + BMDEC_BS_MODE_PIC_END = 2, /**< Picture end mode */ +} BmVpuDecBitStreamMode; + +typedef enum +{ + BM_VPU_DEC_PIX_FORMAT_YUV420P = 0, /* planar 4:2:0 chroma_interleave is 0;*/ + BM_VPU_DEC_PIX_FORMAT_YUV422P = 1, /* dec not support.*/ + BM_VPU_DEC_PIX_FORMAT_YUV444P = 3, /* dec not support.*/ + BM_VPU_DEC_PIX_FORMAT_YUV400 = 4, /* dec not support 8-bit greayscale */ + BM_VPU_DEC_PIX_FORMAT_NV12 = 5, /* planar 4:2:0 chroma_interleave is 1, nv21 is 0;*/ + BM_VPU_DEC_PIX_FORMAT_NV21 = 6, /* planar 4:2:0 chroma_interleave is 1, nv21 is 1*/ + BM_VPU_DEC_PIX_FORMAT_NV16 = 7, /* dec not support.*/ + BM_VPU_DEC_PIX_FORMAT_NV24 = 8, /* dec not support.*/ + BM_VPU_DEC_PIX_FORMAT_COMPRESSED = 9, + BM_VPU_DEC_PIX_FORMAT_COMPRESSED_10BITS = 10, +} BmVpuDecPixFormat; + +typedef struct { + BmVpuDecStreamFormat streamFormat; //0:264 + BmVpuDecOutputMapType wtlFormat; + BmVpuDecSkipMode skip_mode; //2 only decode I frames. + BmVpuDecBitStreamMode bsMode; //!<<0, RING buffer interrupt. You don't know what's a frame. + int enableCrop; //!<< option for saving yuv - int cbcrInterleave; //!<< 0: None, 1: NV12, 2: NV21 - int nv21; //!<< FALSE: NV12, TRUE: NV21, - //!<< This variable is valid when cbcrInterleave is TRUE + BmVpuDecPixFormat pixel_format; int secondaryAXI; //!<< enable secondary AXI - int streamBufferSize; //!<< Set stream buffer size. 0, default size 0x700000. int mp4class; - int bsMode; //!<<0, RING buffer interrupt. You don't know what's a frame. - int extraFrameBufferNum; int frameDelay; //!<< >0, output the display frame after frameDelay frames decoding. int pcie_board_id; int pcie_no_copyback; int enable_cache; - int skip_mode; //2 only decode I frames. int perf; int core_idx; + int cmd_queue_depth; + int decode_order; + int picWidth; + int picHeight; + int timeout; + int timeout_count; + + int extraFrameBufferNum; + int min_framebuf_cnt; + int framebuf_delay; + int streamBufferSize; //!<< Set stream buffer size. 0, default size 0x700000. + BmVpuDecDMABuffer bitstream_buffer; + BmVpuDecDMABuffer* frame_buffer; + BmVpuDecDMABuffer* Ytable_buffer; + BmVpuDecDMABuffer* Ctable_buffer; int reserved[13]; } BMVidDecParam; @@ -60,7 +151,11 @@ typedef enum { BMDEC_UNCREATE, BMDEC_UNLOADING, BMDEC_UNINIT, + BMDEC_INITING, + BMDEC_WRONG_RESOLUTION, + BMDEC_FRAMEBUFFER_NOTENOUGH, BMDEC_DECODING, + BMDEC_FRAME_BUF_FULL, BMDEC_ENDOF, BMDEC_STOP, BMDEC_HUNG, @@ -68,11 +163,42 @@ typedef enum { BMDEC_CLOSED, } BMDecStatus; -typedef enum { - BMDEC_OUTPUT_UNMAP, - BMDEC_OUTPUT_TILED = 100, - BMDEC_OUTPUT_COMPRESSED, -} BMDecOutputMapType; +typedef enum +{ + BM_ERR_VDEC_INVALID_CHNID = -27, + BM_ERR_VDEC_ILLEGAL_PARAM, + BM_ERR_VDEC_EXIST, + BM_ERR_VDEC_UNEXIST, + BM_ERR_VDEC_NULL_PTR, + BM_ERR_VDEC_NOT_CONFIG, + BM_ERR_VDEC_NOT_SUPPORT, + BM_ERR_VDEC_NOT_PERM , + BM_ERR_VDEC_INVALID_PIPEID, + BM_ERR_VDEC_INVALID_GRPID, + BM_ERR_VDEC_NOMEM, + BM_ERR_VDEC_NOBUF, + BM_ERR_VDEC_BUF_EMPTY, + BM_ERR_VDEC_BUF_FULL, + BM_ERR_VDEC_SYS_NOTREADY, + BM_ERR_VDEC_BADADDR, + BM_ERR_VDEC_BUSY, + BM_ERR_VDEC_SIZE_NOT_ENOUGH, + BM_ERR_VDEC_INVALID_VB, + BM_ERR_VDEC_ERR_INIT, + BM_ERR_VDEC_ERR_INVALID_RET, + BM_ERR_VDEC_ERR_SEQ_OPER, + BM_ERR_VDEC_ERR_VDEC_MUTEX, + BM_ERR_VDEC_ERR_SEND_FAILED, + BM_ERR_VDEC_ERR_GET_FAILED, + BM_ERR_VDEC_ERR_HUNG, + BM_ERR_VDEC_FAILURE, +} BMVidDecRetStatus; + +typedef enum{ + BMDEC_FLUSH_FAIL = -1, + BMDEC_FLUSH_SUCCESS, + BMDEC_FLUSH_BUF_FULL, +}BMDecFlushStatus; typedef struct BMVidStream { unsigned char* buf; @@ -81,6 +207,7 @@ typedef struct BMVidStream { unsigned int header_size; unsigned char* extradata; unsigned int extradata_size; + unsigned char end_of_stream; u64 pts; u64 dts; } BMVidStream; @@ -145,7 +272,7 @@ The size and position of cropping window in full frame buffer is presented by using this structure. @endverbatim */ - CropRect picCropRect; + CropRect picCropRect; int mp4DataPartitionEnable; /**< data_partitioned syntax value in MPEG4 VOL header */ int mp4ReversibleVlcEnable; /**< reversible_vlc syntax value in MPEG4 VOL header */ /** @@ -293,36 +420,53 @@ please refer to the 'Appendix: ERROR DEFINITION in programmer\'s guide'. unsigned int sequenceNo; /**< This is the number of sequence information. This variable is increased by 1 when VPU detects change of sequence. */ } BMVidStreamInfo; +/** + * @brief Enum for defining the type of video frames in BmVpuDec. + * + * This enum defines the types of video frames, including progressive and interlaced frames. + * - PROGRESSIVE_FRAME: Represents progressive scan frames, where each frame is displayed in a single pass without interlacing. + * - INTERLACED_FRAME: Represents interlaced frames, where each frame consists of two interleaved fields displayed sequentially for smoother motion. + */ +typedef enum { + PROGRESSIVE_FRAME = 0, /**< Progressive scan frame */ + INTERLACED_FRAME = 1 /**< Interlaced frame */ +} BmVpuDecLaceFrame; + + +/** + * @brief Enum for specifying the format of frame buffers in BmVpuDec. + * + * This enum defines different formats for frame buffers in BmVpuDec. + * - BMDEC_FORMAT_UNCOMPRESSED: Represents uncompressed frame buffer format. + * - BMDEC_FORMAT_COMPRESSED: Represents compressed frame buffer format. + * - BMDEC_FORMAT_COMPRESSED_10BITS: Represents 10-bit compressed frame buffer format. + */ + +typedef enum { + BMDEC_PIC_TYPE_I = 0, /**< I picture */ + BMDEC_PIC_TYPE_P = 1, /**< P picture */ + BMDEC_PIC_TYPE_B = 2, /**< B picture (except VC1) */ + BMDEC_PIC_TYPE_D = 3, /**< D picture in MPEG2 that is only composed of DC coefficients (MPEG2 only) */ + BMDEC_PIC_TYPE_S = 4, /**< S picture in MPEG4 that is an acronym of Sprite and used for GMC (MPEG4 only)*/ + BMDEC_PIC_TYPE_IDR = 5, /**< H.264/H.265 IDR picture */ + BMDEC_PIC_TYPE_AVS2_G = 6, /**< G picture in AVS2 */ +}BmVpuDecPicType; + + #ifndef BMVIDFRAME #define BMVIDFRAME typedef struct BMVidFrame { - int picType; - unsigned char* buf[8]; /**< 0: Y virt addr, 1: Cb virt addr: 2, Cr virt addr. 4: Y phy addr, 5: Cb phy addr, 6: Cr phy addr */ - int stride[8]; - unsigned int width; - unsigned int height; - int frameFormat; - int interlacedFrame; - int lumaBitDepth; /**< Bit depth for luma component */ - int chromaBitDepth; /**< Bit depth for chroma component */ -/** -@verbatim -It specifies a chroma interleave mode of frame buffer. + BmVpuDecPicType picType; + BmVpuDecLaceFrame interlacedFrame; + unsigned char* buf[8]; /**< 0: Y virt addr, 1: Cb virt addr: 2, Cr virt addr. 4: Y phy addr, 5: Cb phy addr, 6: Cr phy addr */ + int stride[8]; + unsigned int width; + unsigned int height; -@* 0 : Cb data are written in Cb frame memory and Cr data are written in Cr frame memory. (chroma separate mode) -@* 1 : Cb and Cr data are written in the same chroma memory. (chroma interleave mode) -@endverbatim -*/ - int cbcrInterleave; -/** -@verbatim -It specifies the way chroma data is interleaved in the frame buffer, bufCb or bufCbBot. -@* 0 : CbCr data is interleaved in chroma memory (NV12). -@* 1 : CrCb data is interleaved in chroma memory (NV21). -@endverbatim -*/ - int nv21; + int lumaBitDepth; /**< Bit depth for luma component */ + int chromaBitDepth; /**< Bit depth for chroma component */ + BmVpuDecPixFormat pixel_format; /** @verbatim It specifies endianess of frame buffer. @@ -353,31 +497,33 @@ NOTE: For setting specific values of 128 bit endiness, please refer to the 'WAVE unsigned int coded_height; } BMVidFrame; #endif -typedef void* BMVidCodHandle; +typedef void* BMVidCodHandle; -DECL_EXPORT int BMVidDecCreate(BMVidCodHandle* pVidCodHandle, BMVidDecParam decParam); -DECL_EXPORT int BMVidDecReset(BMVidCodHandle vidCodHandle); -DECL_EXPORT int BMVidDecGetCaps(BMVidCodHandle vidCodHandle, BMVidStreamInfo* streamInfo); -DECL_EXPORT int BMVidDecDecode(BMVidCodHandle vidCodHandle, BMVidStream vidStream); -DECL_EXPORT BMVidFrame* BMVidDecGetOutput(BMVidCodHandle vidCodHandle); -DECL_EXPORT int BMVidDecClearOutput(BMVidCodHandle vidCodHandle, BMVidFrame* frame); -DECL_EXPORT int BMVidDecFlush(BMVidCodHandle vidCodHandle); //in the endof of the file, flush and then close the decoder. -DECL_EXPORT int BMVidDecFlush2(BMVidCodHandle vidCodHandle); //flush the decoder and clear the output. - -DECL_EXPORT int BMVidDecDelete(BMVidCodHandle vidCodHandle); -DECL_EXPORT int BMVidDecSeqInit(BMVidCodHandle vidCodHandle); -DECL_EXPORT BMDecStatus BMVidGetStatus(BMVidCodHandle vidCodHandle); -DECL_EXPORT int BMVidGetStreamBufferEmptySize(BMVidCodHandle vidCodHandle); -DECL_EXPORT int BMVidGetAllFramesInBuffer(BMVidCodHandle vidCodHandle); -DECL_EXPORT int BMVidGetEmptyInputBufCnt(BMVidCodHandle vidCodHandle); -DECL_EXPORT int BMVidGetPktInBufCount(BMVidCodHandle vidCodHandle); -DECL_EXPORT int BMVidVpuReset(int devIdx, int coreIdx); -DECL_EXPORT int getcoreidx(BMVidCodHandle handle); +DECL_EXPORT void bmvpu_dec_set_logging_threshold(BmVpuDecLogLevel log_level); +DECL_EXPORT BMVidDecRetStatus bmvpu_dec_create(BMVidCodHandle* pVidCodHandle, BMVidDecParam decParam); +DECL_EXPORT BMVidDecRetStatus bmvpu_dec_get_caps(BMVidCodHandle vidCodHandle, BMVidStreamInfo* streamInfo); +DECL_EXPORT BMVidDecRetStatus bmvpu_dec_decode(BMVidCodHandle vidCodHandle, BMVidStream vidStream); +DECL_EXPORT BMVidDecRetStatus bmvpu_dec_get_output(BMVidCodHandle vidCodHandle, BMVidFrame* frame); +DECL_EXPORT BMVidDecRetStatus bmvpu_dec_clear_output(BMVidCodHandle vidCodHandle, BMVidFrame* frame); +DECL_EXPORT BMVidDecRetStatus bmvpu_dec_flush(BMVidCodHandle vidCodHandle); //in the endof of the file, flush and then close the decoder. + +DECL_EXPORT BMVidDecRetStatus bmvpu_dec_delete(BMVidCodHandle vidCodHandle); +DECL_EXPORT BMDecStatus bmvpu_dec_get_status(BMVidCodHandle vidCodHandle); +DECL_EXPORT int bmvpu_dec_get_stream_buffer_empty_size(BMVidCodHandle vidCodHandle); +DECL_EXPORT BMVidDecRetStatus bmvpu_dec_get_all_frame_in_buffer(BMVidCodHandle vidCodHandle); +DECL_EXPORT int bmvpu_dec_get_all_empty_input_buf_cnt(BMVidCodHandle vidCodHandle); +DECL_EXPORT int bmvpu_dec_get_pkt_in_buf_cnt(BMVidCodHandle vidCodHandle); +DECL_EXPORT BMVidDecRetStatus bmvpu_dec_reset(int devIdx, int coreIdx); +DECL_EXPORT int bmvpu_dec_get_core_idx(BMVidCodHandle handle); //just for debuging. -DECL_EXPORT int BMVidVpuDumpStream(BMVidCodHandle vidCodHandle, unsigned char *p_stream, int size); -DECL_EXPORT int BMVidVpuGetInstIdx(BMVidCodHandle vidCodHandle); +DECL_EXPORT int bmvpu_dec_dump_stream(BMVidCodHandle vidCodHandle, unsigned char *p_stream, int size); +DECL_EXPORT int bmvpu_dec_get_inst_idx(BMVidCodHandle vidCodHandle); + +DECL_EXPORT BMVidDecRetStatus bmvpu_dec_get_stream_info(BMVidCodHandle vidCodHandle, int* width, int* height, int* mini_fb, int* frame_delay); -DECL_EXPORT void bm_syscxt_excepted(int coreid); -DECL_EXPORT void bm_syscxt_set(int coreid, int enable); /* 0 - disable, 1 - enable */ +#ifdef BM_PCIE_MODE +DECL_EXPORT BMVidDecRetStatus bmvpu_dec_read_memory(int coreIdx, u64 addr, unsigned char *data, int len, int endian); +#endif +u64 bmvpu_dec_calc_cbcr_addr(int codec_type, u64 y_addr, int y_stride, int frame_height); // calc cbcr addr by offset. #endif diff --git a/bmvid/video/decoder/bm_dec_api/inc/bm_video_internal.h b/bmvid/video/decoder/bm_dec_api/inc/bm_vpudec_internal.h similarity index 87% rename from bmvid/video/decoder/bm_dec_api/inc/bm_video_internal.h rename to bmvid/video/decoder/bm_dec_api/inc/bm_vpudec_internal.h index c6dd6ff..d65c8ab 100644 --- a/bmvid/video/decoder/bm_dec_api/inc/bm_video_internal.h +++ b/bmvid/video/decoder/bm_dec_api/inc/bm_vpudec_internal.h @@ -17,8 +17,7 @@ #if defined(_WIN32) || defined(WIN32) || defined(__WIN32__) #include #endif -#include "bmlib_runtime.h" -#include "bm_video_interface.h" +#include "bm_vpudec_interface.h" typedef enum { BMDEC_START_CREATE, @@ -50,6 +49,8 @@ typedef struct BMVidDecConfig_struct { BOOL cbcrInterleave; //!<< 0: None, 1: NV12, 2: NV21 BOOL nv21; //!<< FALSE: NV12, TRUE: NV21, //!<< This variable is valid when cbcrInterleave is TRUE + Uint32 extern_picWidth; + Uint32 extern_picHeight; EndianMode streamEndian; EndianMode frameEndian; Int32 secondaryAXI; @@ -93,8 +94,8 @@ typedef struct BMVidExtraInfo { typedef struct BMVidCodInst { DecHandle codecInst; - bm_device_mem_t vbStream; - bm_device_mem_t vbUserData; + vpu_buffer_t vbStream; + vpu_buffer_t vbUserData; volatile Uint32 seqInitFlag; volatile Uint32 isStreamBufFilled; volatile BMDecStatus decStatus; @@ -102,10 +103,10 @@ typedef struct BMVidCodInst { Queue* ppuQ; Queue* displayQ; Queue* freeQ; - bm_device_mem_t pFbMem[MAX_REG_FRAME]; - Uint64 fbMemVaddr[MAX_REG_FRAME]; - bm_device_mem_t pPPUFbMem[MAX_REG_FRAME]; - Uint64 pPUFbMemVaddr[MAX_REG_FRAME]; + vpu_buffer_t pFbMem[MAX_REG_FRAME]; + vpu_buffer_t pPPUFbMem[MAX_REG_FRAME]; + vpu_buffer_t pYtabMem[MAX_REG_FRAME]; + vpu_buffer_t pCtabMem[MAX_REG_FRAME]; volatile int endof_flag; Queue* inputQ; osal_cond_t inputCond; @@ -140,6 +141,15 @@ typedef struct BMVidCodInst { int64_t dec_idx; int perf; BMVidFrame cache_bmframe[32]; + int enable_decode_order; + int decode_index_map[MAX_REG_FRAME]; + int timeout; + int timeout_count; + + int bitstream_from_user; + int framebuf_from_user; + int min_framebuf_cnt; + int framebuf_delay; } BMVidCodInst; typedef struct PkgInfo { @@ -154,11 +164,11 @@ typedef struct BMVidCodInst* BMVidHandle; typedef struct { DecGetFramebufInfo fbInfo; - bm_device_mem_t allocFbMem[MAX_REG_FRAME]; + vpu_buffer_t allocFbMem[MAX_REG_FRAME]; } SequenceMemInfo; int BMVidDecCreateW5(BMVidCodHandle *pVidCodHandle, BMVidDecParam *decParam); - +int bmvpu_dec_seq_init(BMVidCodHandle vidCodHandle); void bm_syscxt_init(void *p_dec_param, BMVidCodHandle vidHandle); int bm_syscxt_status(int coreid, int instid, int pos); int bm_syscxt_chkstatus(int coreid); diff --git a/bmvid/video/decoder/bm_dec_api/src/bm_video_interface.c b/bmvid/video/decoder/bm_dec_api/src/bm_video_interface.c index abdf505..eff3f5d 100644 --- a/bmvid/video/decoder/bm_dec_api/src/bm_video_interface.c +++ b/bmvid/video/decoder/bm_dec_api/src/bm_video_interface.c @@ -28,14 +28,23 @@ #include // gettimeofday() #include #include +#include #endif -#include "bm_video_interface.h" -#include "bm_video_internal.h" +#include "bm_vpudec_interface.h" +#include "bm_vpudec_internal.h" #include "vpuapifunc.h" #include "vdi.h" #include "misc/debug.h" +#if defined(CHIP_BM1684) +#ifdef BM_PCIE_MODE +#define VPU_DEVICE_NAME "/dev/bm-sophon" +#else +#define VPU_DEVICE_NAME "/dev/vpu" +#endif +#endif + #define VID_PERFORMANCE_TEST //#define STREAM_BUF_SIZE 0x700000 // max bitstream size #define PPU_FB_COUNT 5 @@ -49,6 +58,8 @@ #define STREAM_ALIGNED_LEN 0 #define VIDEO_CAP_NONE 3 +#define BUFFER_ALLOC_FROM_USER 1 + #ifdef TRY_FLOCK_OPEN #define VID_OPEN_FLOCK_NAME "/tmp/vid_open_global_flock" #define VID_RESET_FLOCK_NAME "/tmp/vid_sw_reset_flock_%d_%d" @@ -83,7 +94,65 @@ extern void PrintVpuStatus(Uint32 coreIdx, Uint32 productId); int BMVidDecSeqInitW5(BMVidCodHandle vidCodHandle); -int getcoreidx(BMVidCodHandle handle) +static char * bmvpu_dec_error_string(RetCode errorcode){ + switch (errorcode) + { + case RETCODE_SUCCESS: + return "ok"; + case RETCODE_FAILURE: + return "unspecified error"; + case RETCODE_INVALID_HANDLE: + return "invalid handle"; + case RETCODE_INVALID_PARAM: + return "invalid params"; + case RETCODE_INVALID_COMMAND: + return "invalid cmmoand"; + case RETCODE_ROTATOR_STRIDE_NOT_SET: + return "rotator stride is not set"; + case RETCODE_FRAME_NOT_COMPLETE: + return "decoding was not completed yet,"; + case RETCODE_INSUFFICIENT_FRAME_BUFFERS: + return "frame buffers not enough"; + case RETCODE_INVALID_STRIDE: + return "invalid stride"; + case RETCODE_WRONG_CALL_SEQUENCE: + return "wrong call sequence"; + case RETCODE_CALLED_BEFORE: + return "invalid multiple calls"; + case RETCODE_NOT_INITIALIZED: + return "VPU not initialized yet"; + case RETCODE_USERDATA_BUF_NOT_SET: + return "not alloc the userdata mem"; + case RETCODE_MEMORY_ACCESS_VIOLATION: + return "access the protected memory"; + case RETCODE_VPU_RESPONSE_TIMEOUT: + return "timeout"; + case RETCODE_INSUFFICIENT_RESOURCE: + return "memory lack"; + case RETCODE_NOT_FOUND_BITCODE_PATH: + return "not found firmware"; + case RETCODE_NOT_SUPPORTED_FEATURE: + return "unsupport feature"; + case RETCODE_NOT_FOUND_VPU_DEVICE: + return "not found VPU"; + case RETCODE_QUERY_FAILURE: + return "query failed"; + case RETCODE_QUEUEING_FAILURE: + return "queue buffer full"; + case RETCODE_VPU_STILL_RUNNING: + return "VPU still running"; + case RETCODE_REPORT_NOT_READY: + return "report not ready"; + case RETCODE_INVALID_SFS_INSTANCE: + return "run sub-framesync failed"; + case RETCODE_ERROR_FW_FATAL: + return "error FW"; + default: + return "unkonwn error"; + } +} + +int bmvpu_dec_get_core_idx(BMVidCodHandle handle) { BMVidHandle vidHandle = (BMVidHandle)handle; DecHandle decHandle = vidHandle->codecInst; @@ -111,11 +180,18 @@ void BMVidSetLogLevel() */ } +void bmvpu_dec_set_logging_threshold(BmVpuDecLogLevel log_level) +{ + if(log_level>=BMVPU_DEC_LOG_LEVEL_NONE && log_level<=BMVPU_DEC_LOG_LEVEL_MAX_LOG_LEVEL) { + SetMaxLogLevel(log_level); + } +} + //dump compressed framebuffer for testing... BOOL writeFilefromDram( int coreIdx, char* path, - u64 vaddr, + PhysicalAddress addr, int size, int endian, int print) @@ -130,11 +206,20 @@ BOOL writeFilefromDram( } if ( print ) - VLOG(INFO, "Save %s From 0x%x, size = 0x%08x(%d)\n", path, vaddr, size, size); + VLOG(INFO, "Save %s From 0x%x, size = 0x%08x(%d)\n", path, addr, size, size); - osal_fwrite((void *)vaddr, sizeof(Uint8), size, fp); + buf = osal_malloc(size); + if ( !buf ) { + VLOG(ERR, "Fail malloc %d\n", size); + return FALSE; + } + + vdi_read_memory(coreIdx, addr, buf, size, endian); + + osal_fwrite((void *)buf, sizeof(Uint8), size, fp); osal_fflush(fp); + osal_free(buf); osal_fclose(fp); return TRUE; @@ -150,6 +235,7 @@ void dumpFbcOneFrame( Uint32 width=0; Uint32 height=0; Uint32 coreIdx = 0; + PhysicalAddress addr; pDecInfo = &handle->CodecInfo->decInfo; coreIdx = VPU_HANDLE_CORE_INDEX(handle); @@ -163,18 +249,8 @@ void dumpFbcOneFrame( char tmp[512]; int fbc_endian; size_t fbcDataSizeY, fbcDataSizeC; - Uint64 vaddrYTb1; - Uint64 vaddrCTb1; - Uint64 vaddrY; - Uint64 vaddrC; VLOG(INFO, "---> DUMP COMPRESSED FRAMEBUFFER #%d TO below log\n", outputInfo->indexFrameDisplayForTiled); VPU_DecGetFrameBuffer(handle, outputInfo->indexFrameDisplayForTiled, &cfb); - bm_device_mem_t devMemY; - bm_device_mem_t devMemC; -#ifndef BM_PCIE_MODE - bm_handle_t bm_handle; - bm_handle= bmvpu_dec_get_bmlib_handle(coreIdx); -#endif fbc_endian = 0; //cfb.endian;//vdi_convert_endian(handle->coreIdx, cfb.endian); /* Calculate FBC Data Size */ @@ -186,38 +262,21 @@ void dumpFbcOneFrame( VLOG(INFO, "displayPicHeight: %d,stride: %d, height: %d, width: %d, ytblSize: %d, ctblSize: %d\n", outputInfo->dispPicHeight, cfb.stride, cfb.height, cfb.width, pDecInfo->vbFbcYTbl[0].size, pDecInfo->vbFbcCTbl[0].size); /* Dump Y compressed data */ - devMemY=bm_mem_from_device(cfb.bufY,fbcDataSizeY); - devMemC=bm_mem_from_device(cfb.bufCb,fbcDataSizeC); - -#ifndef BM_PCIE_MODE - bm_mem_mmap_device_mem_no_cache(bm_handle,&devMemY,&vaddrY); - bm_mem_mmap_device_mem_no_cache(bm_handle,&devMemC,&vaddrC); -#endif sprintf(fileName, "./%dx%d_%04d_%d_fbc_data_y.bin", cfb.width, outputInfo->dispPicHeight, frameIdx, fbc_endian); - writeFilefromDram(coreIdx, fileName, vaddrY, fbcDataSizeY, fbc_endian, 1);//bigger than real Y size - + writeFilefromDram(coreIdx, fileName, cfb.bufY, fbcDataSizeY, fbc_endian, 1);//bigger than real Y size /* Dump C compressed data */ - sprintf(fileName, "./%dx%d_%04d_%d_fbc_data_c.bin", cfb.width, outputInfo->dispPicHeight, frameIdx, fbc_endian); - writeFilefromDram(coreIdx, fileName, vaddrC, fbcDataSizeC, fbc_endian, 1);//bigger than real C size - -#ifndef BM_PCIE_MODE - bm_mem_unmap_device_mem(bm_handle,vaddrY,devMemY.size); - bm_mem_unmap_device_mem(bm_handle,vaddrC,devMemC.size); -#endif - - - frameIndex = outputInfo->indexFrameDisplayForTiled; - /* Dump Y Offset table */ VPU_GetFBCOffsetTableSize(STD_HEVC, (int)width, (int)height, (int*)&lumaTblSize, (int*)&chromaTblSize); - - vaddrYTb1 =(Uint64)pDecInfo->vbFbcYTblVaddr[frameIdx]; + frameIndex = outputInfo->indexFrameDisplayForTiled; sprintf(fileName, "./%dx%d_%04d_%d_fbc_table_y.bin", cfb.width, outputInfo->dispPicHeight, frameIdx, fbc_endian); - writeFilefromDram(coreIdx, fileName, vaddrYTb1, pDecInfo->vbDevFbcYTbl[0].size, fbc_endian, 1); + addr = pDecInfo->vbFbcYTbl[frameIndex].phys_addr; + writeFilefromDram(coreIdx, fileName, addr, pDecInfo->vbFbcYTbl[0].size, fbc_endian, 1); - vaddrCTb1=(Uint64)pDecInfo->vbFbcCTblVaddr[frameIndex]; + /* Dump C Offset table */ + frameIndex = outputInfo->indexFrameDisplayForTiled; sprintf(fileName, "./%dx%d_%04d_%d_fbc_table_c.bin", cfb.width, outputInfo->dispPicHeight, frameIdx, fbc_endian); - writeFilefromDram(coreIdx, fileName, vaddrCTb1, pDecInfo->vbDevFbcCTbl[0].size, fbc_endian, 1); + addr = pDecInfo->vbFbcCTbl[frameIndex].phys_addr; + writeFilefromDram(coreIdx, fileName, addr, pDecInfo->vbFbcCTbl[0].size, fbc_endian, 1); sprintf(srcPath,"%dx%d_%04d_%d", cfb.width, outputInfo->dispPicHeight, frameIdx, fbc_endian); sprintf(tmp,"./fbd -x %d -y %d -b %d -d %s_ -o %s_fbd.yuv", @@ -232,8 +291,6 @@ static void DisplayQueue_En( { BMVidFrame displayInfo; int divX = 1; - bm_handle_t bm_handle; - bm_handle= bmvpu_dec_get_bmlib_handle(vidHandle->codecInst->coreIdx); if (vidHandle == NULL) { VLOG(ERR, "%s:%d Invalid handle\n", __FUNCTION__, __LINE__); @@ -241,12 +298,11 @@ static void DisplayQueue_En( } osal_memset((void *)&displayInfo, 0x00, sizeof(BMVidFrame)); - displayInfo.picType = fbInfo->picType; + displayInfo.picType = (BmVpuDecPicType)fbInfo->picType; displayInfo.width = fbInfo->rcDisplay.right - fbInfo->rcDisplay.left; displayInfo.height = fbInfo->rcDisplay.bottom - fbInfo->rcDisplay.top; displayInfo.coded_width = fbInfo->dispPicWidth; displayInfo.coded_height = fbInfo->dispPicHeight; - displayInfo.frameFormat = fbInfo->dispFrame.format; displayInfo.stride[0] = fbInfo->dispFrame.stride; if (fbInfo->dispFrame.cbcrInterleave == 0) { @@ -270,7 +326,7 @@ static void DisplayQueue_En( VLOG(ERR, "%s failed to allocate memory\n", __FUNCTION__); return; } - bm_vdi_memcpy_d2s(bm_handle,vidHandle->codecInst->coreIdx,pBase,vidHandle->vbUserData,VPU_USER_DATA_ENDIAN); + vdi_read_memory(vidHandle->codecInst->coreIdx, vidHandle->vbUserData.phys_addr, pBase, vidHandle->vbUserData.size, VPU_USER_DATA_ENDIAN); pEntry = (user_data_entry_t*)pBase; vidHandle->extraInfo.colorPrimaries = 2; vidHandle->extraInfo.colorTransferCharacteristic = 2; @@ -335,20 +391,20 @@ static void DisplayQueue_En( VPU_GetFBCOffsetTableSize(vidHandle->decConfig.bitFormat, (int)displayInfo.width, (int)displayInfo.height, &lumaTblSize, &chromaTblSize); displayInfo.buf[4] = (unsigned char *)(fbInfo->dispFrame.bufY); displayInfo.buf[5] = (unsigned char *)(fbInfo->dispFrame.bufCb); - displayInfo.buf[6] = (unsigned char *)(vidHandle->codecInst->CodecInfo->decInfo.vbDevFbcYTbl[fbInfo->dispFrame.myIndex].u.device.device_addr); - displayInfo.buf[7] = (unsigned char *)(vidHandle->codecInst->CodecInfo->decInfo.vbDevFbcCTbl[fbInfo->dispFrame.myIndex].u.device.device_addr); + displayInfo.buf[6] = (unsigned char *)(vidHandle->codecInst->CodecInfo->decInfo.vbFbcYTbl[fbInfo->dispFrame.myIndex].phys_addr); + displayInfo.buf[7] = (unsigned char *)(vidHandle->codecInst->CodecInfo->decInfo.vbFbcCTbl[fbInfo->dispFrame.myIndex].phys_addr); displayInfo.stride[1] = displayInfo.stride[0]; displayInfo.stride[2] = lumaTblSize; displayInfo.stride[6] = lumaTblSize; displayInfo.stride[3] = chromaTblSize; displayInfo.stride[7] = chromaTblSize; - displayInfo.buf[3]=(unsigned char *)vidHandle->codecInst->CodecInfo->decInfo.vbFbcCTblVaddr[fbInfo->dispFrame.myIndex]; + displayInfo.buf[3] = (Uint8 *)(vdi_get_virt_addr(vidHandle->codecInst->coreIdx, (u64)displayInfo.buf[7])); } else { VLOG(ERR, "please check the maptype of framebuffer, the map type is : %d\n", vidHandle->codecInst->CodecInfo->decInfo.mapType); } - displayInfo.frameFormat += 99 + vidHandle->codecInst->CodecInfo->decInfo.mapType; + displayInfo.pixel_format = BM_VPU_DEC_PIX_FORMAT_COMPRESSED; //VLOG(INFO, "pY: 0x%lx, pCb: 0x%lx, pCr: 0x%lx, stride: %d\n", displayInfo.buf[4], displayInfo.buf[5], displayInfo.buf[6], fbInfo->dispFrame.stride); } else { @@ -363,12 +419,23 @@ static void DisplayQueue_En( if(vidHandle->codecInst->CodecInfo->decInfo.wtlEnable == TRUE) fbIndex = VPU_CONVERT_WTL_INDEX(vidHandle->codecInst, fbIndex); //VLOG(INFO, "fbIndex: %d, frameIndex: %d\n, addr: 0x%lx, addr y: 0x%lx, flag: %d\n", fbIndex, fbInfo->dispFrame.myIndex, vidHandle->pFbMem[fbIndex].phys_addr, fbInfo->dispFrame.bufY, vidHandle->pFbMem[fbIndex].enable_cache); +#ifndef BM_PCIE_MODE + vdi_invalidate_memory(vidHandle->codecInst->coreIdx, &vidHandle->pFbMem[fbIndex]); +#endif + } + if (fbInfo->dispFrame.cbcrInterleave) { + if (fbInfo->dispFrame.nv21) + displayInfo.pixel_format = BM_VPU_DEC_PIX_FORMAT_NV21; + else + displayInfo.pixel_format = BM_VPU_DEC_PIX_FORMAT_NV12; + } else { + displayInfo.pixel_format = BM_VPU_DEC_PIX_FORMAT_YUV420P; } } #ifndef BM_PCIE_MODE - displayInfo.buf[0]=(Uint8 *)(fbInfo->dispFrame.bufYVaddr+ displayInfo.buf[4]-fbInfo->dispFrame.bufY); - displayInfo.buf[1] = (Uint8 *)(displayInfo.buf[0] + (u64)displayInfo.buf[5] - (u64)displayInfo.buf[4]); - displayInfo.buf[2] = (Uint8 *)(displayInfo.buf[0] + (u64)displayInfo.buf[6] - (u64)displayInfo.buf[4]); + displayInfo.buf[0] = (Uint8 *)(vdi_get_virt_addr(vidHandle->codecInst->coreIdx, (u64)displayInfo.buf[4])); + displayInfo.buf[1] = (Uint8 *)(vdi_get_virt_addr(vidHandle->codecInst->coreIdx, (u64)displayInfo.buf[5])); + displayInfo.buf[2] = (Uint8 *)(vdi_get_virt_addr(vidHandle->codecInst->coreIdx, (u64)displayInfo.buf[6])); #else //use the fake virtual memory displayInfo.buf[0] = (Uint8 *)0xdeadbeef; @@ -380,9 +447,7 @@ static void DisplayQueue_En( displayInfo.chromaBitDepth = fbInfo->dispFrame.chromaBitDepth; //osal_memcpy((void*)&(displayInfo.outputInfo), fbInfo, sizeof(DecOutputInfo)); displayInfo.frameIdx = fbInfo->dispFrame.myIndex; - displayInfo.cbcrInterleave = fbInfo->dispFrame.cbcrInterleave; - displayInfo.nv21 = fbInfo->dispFrame.nv21; - displayInfo.interlacedFrame = fbInfo->interlacedFrame; + displayInfo.interlacedFrame = (BmVpuDecLaceFrame)fbInfo->interlacedFrame; displayInfo.stride[4] = displayInfo.stride[0]; displayInfo.stride[5] = displayInfo.stride[1]; @@ -410,7 +475,7 @@ static void DisplayQueue_En( else { char filename[255]; FILE *fp = NULL; - if(displayInfo.cbcrInterleave) { + if(fbInfo->dispFrame.cbcrInterleave) { sprintf(filename, "/data/frame%d_core%d_inst%d_%dx%d_nv12.yuv", dump_frame_num, vidHandle->codecInst->coreIdx, vidHandle->codecInst->instIndex, displayInfo.stride[0], displayInfo.height); } else { @@ -420,7 +485,7 @@ static void DisplayQueue_En( if(fp != NULL) { fwrite(displayInfo.buf[0], 1, displayInfo.stride[0] * displayInfo.height, fp); fwrite(displayInfo.buf[1], 1, displayInfo.stride[1] * displayInfo.height/2, fp); - if(displayInfo.cbcrInterleave==0) + if(fbInfo->dispFrame.cbcrInterleave==0) fwrite(displayInfo.buf[2], 1, displayInfo.stride[2] * displayInfo.height/2, fp); fclose(fp); } @@ -740,8 +805,6 @@ static void process_vpu_msg(void *arg) u64 pts_tmp, dts_tmp; VpuRect rcPpu; BOOL updateStreamBufferFlag = TRUE; - bm_handle_t bm_handle; - bm_handle= bmvpu_dec_get_bmlib_handle(vidCodHandle->codecInst->coreIdx); while (vidCodHandle->endof_flag < BMDEC_START_CLOSE) { int *pktsize = NULL; @@ -919,7 +982,7 @@ static void process_vpu_msg(void *arg) continue; } - if ((ret = BMVidDecSeqInit(vidCodHandle)) != RETCODE_SUCCESS) + if ((ret = bmvpu_dec_seq_init(vidCodHandle)) != RETCODE_SUCCESS) { VLOG(ERR, "frame buffer allocation failed after sequence init, error = 0x%x\n", ret); seqInited = FALSE; @@ -1314,8 +1377,8 @@ static void process_vpu_msg(void *arg) { if (seqChangeRequest == TRUE) { - bm_device_mem_t *pFbMem = vidCodHandle->pFbMem; - bm_device_mem_t *pPPUFbMem = vidCodHandle->pPPUFbMem; + vpu_buffer_t *pFbMem = vidCodHandle->pFbMem; + vpu_buffer_t *pPPUFbMem = vidCodHandle->pPPUFbMem; seqChangeRequest = FALSE; VPU_DecSetRdPtr(handle, seqChangedRdPtr, TRUE); if (seqChangedStreamEndFlag == 1) @@ -1345,12 +1408,12 @@ static void process_vpu_msg(void *arg) { if (pFbMem[index].size > 0) { - bm_free_mem(bm_handle,pFbMem[index],vidCodHandle->fbMemVaddr[index]); + vdi_free_dma_memory(coreIdx, &pFbMem[index]); pFbMem[index].size=0; } if (pPPUFbMem[index].size > 0) { - bm_free_mem(bm_handle,pPPUFbMem[index],vidCodHandle->pPUFbMemVaddr[index]); + vdi_free_dma_memory(coreIdx, &pPPUFbMem[index]); pPPUFbMem[index].size=0; } } @@ -1422,13 +1485,13 @@ static void process_vpu_msg(void *arg) { if (vidCodHandle->pFbMem[index].size > 0) { - bm_free_mem(bm_handle,vidCodHandle->pPPUFbMem[index],vidCodHandle->pPUFbMemVaddr[index]); + vdi_free_dma_memory(coreIdx, &(vidCodHandle->pFbMem[index])); vidCodHandle->pFbMem[index].size=0; } if (vidCodHandle->pPPUFbMem[index].size > 0) { - bm_free_mem(bm_handle,vidCodHandle->pPPUFbMem[index],vidCodHandle->pPUFbMemVaddr[index]); + vdi_free_dma_memory(coreIdx, &(vidCodHandle->pPPUFbMem[index])); vidCodHandle->pPPUFbMem[index].size = 0; } } @@ -1438,7 +1501,7 @@ static void process_vpu_msg(void *arg) if (vidCodHandle->vbStream.size > 0) { VLOG(INFO, "free vbstream buffer !!!\n"); - bm_free_mem(bm_handle,vidCodHandle->vbStream,0x00); + vdi_free_dma_memory(coreIdx, &(vidCodHandle->vbStream)); vidCodHandle->vbStream.size=0; } @@ -1469,42 +1532,52 @@ static void process_vpu_msg(void *arg) SequenceMemInfo seqMemInfo[MAX_NUM_INSTANCE][MAX_SEQUENCE_MEM_COUNT]; -static void releasePreviousSequenceResources(DecHandle handle, bm_device_mem_t* arrFbMem, DecGetFramebufInfo* prevSeqFbInfo) +static void releasePreviousSequenceResources(DecHandle handle, vpu_buffer_t* arrFbMem, DecGetFramebufInfo* prevSeqFbInfo, int mem_type) { Uint32 i, coreIndex; - bm_handle_t bm_handle; if (handle == NULL) { return; } coreIndex = VPU_HANDLE_CORE_INDEX(handle); - bm_handle= bmvpu_dec_get_bmlib_handle(coreIndex); for (i = 0; i < MAX_REG_FRAME; i++) { if (arrFbMem[i].size > 0) { - bm_free_mem(bm_handle,arrFbMem[i],0x00); + vdi_free_dma_memory(coreIndex, &arrFbMem[i]); arrFbMem[i].size =0; } } for (i = 0; i < MAX_REG_FRAME; i++) { - if (prevSeqFbInfo->devMemInfoVbFbcYTbl[i].size > 0) + if(mem_type != BUFFER_ALLOC_FROM_USER) { - bm_free_device(bm_handle,prevSeqFbInfo->devMemInfoVbFbcYTbl[i]); - prevSeqFbInfo->devMemInfoVbFbcYTbl[i].size = 0; + if (prevSeqFbInfo->vbFbcYTbl[i].size > 0) + { + vdi_free_dma_memory(coreIndex, &prevSeqFbInfo->vbFbcYTbl[i]); + prevSeqFbInfo->vbFbcYTbl[i].size = 0; + + } + if (prevSeqFbInfo->vbFbcCTbl[i].size > 0) + { + vdi_free_dma_memory(coreIndex, &prevSeqFbInfo->vbFbcCTbl[i]); + prevSeqFbInfo->vbFbcCTbl[i].size = 0; + } } - if (prevSeqFbInfo->devMemInfoVbFbcCTbl[i].size > 0) + else { - bm_free_device(bm_handle,prevSeqFbInfo->devMemInfoVbFbcCTbl[i]); - prevSeqFbInfo->devMemInfoVbFbcCTbl[i].size = 0; + vdi_dettach_dma_memory(coreIndex, &prevSeqFbInfo->vbFbcYTbl[i]); + vdi_dettach_dma_memory(coreIndex, &prevSeqFbInfo->vbFbcCTbl[i]); + vdi_unmap_memory(coreIndex, &prevSeqFbInfo->vbFbcYTbl[i]); + vdi_unmap_memory(coreIndex, &prevSeqFbInfo->vbFbcCTbl[i]); } - if (prevSeqFbInfo->devMemInfoVbMv[i].size > 0) + + if (prevSeqFbInfo->vbMvCol[i].size > 0) { - bm_free_device(bm_handle,prevSeqFbInfo->devMemInfoVbMv[i]); - prevSeqFbInfo->devMemInfoVbMv[i].size = 0; + vdi_free_dma_memory(coreIndex, &prevSeqFbInfo->vbMvCol[i]); + prevSeqFbInfo->vbMvCol[i].size = 0; } } @@ -1519,25 +1592,25 @@ static int decSeqChange(BMVidCodHandle vidCodHandle, DecOutputInfo* outputInfo) Uint32 framebufStride; Uint32 index; FrameBuffer pFrame[MAX_REG_FRAME]; - bm_device_mem_t *pFbMem; + vpu_buffer_t *pFbMem; DecHandle handle; BMVidDecConfig *param; TestDecConfig decParam; BOOL dpbChanged, sizeChanged, bitDepthChanged; Uint32 sequenceChangeFlag; - bm_handle_t bm_handle; + Int32 coreIdx; if (vidHandle != NULL && vidHandle->codecInst != NULL) { handle = vidHandle->codecInst; param = &(vidHandle->decConfig); pFbMem = vidHandle->pFbMem; + coreIdx = VPU_HANDLE_CORE_INDEX(handle); } else { return ret; } - bm_handle= bmvpu_dec_get_bmlib_handle(vidHandle->codecInst->coreIdx); osal_memset(&initialInfo, 0x00, sizeof(DecInitialInfo)); osal_memset(&decParam, 0x00, sizeof(TestDecConfig)); @@ -1603,12 +1676,12 @@ static int decSeqChange(BMVidCodHandle vidCodHandle, DecOutputInfo* outputInfo) for (index=0; indexframebuf_from_user != BUFFER_ALLOC_FROM_USER) { // free allocated framebuffer if (pFbMem[index].size > 0) { - bm_free_mem(bm_handle,pFbMem[index],vidHandle->fbMemVaddr[index]); + vdi_free_dma_memory(coreIdx, &pFbMem[index]); pFbMem[index].size = 0; } } @@ -1619,26 +1692,26 @@ static int decSeqChange(BMVidCodHandle vidCodHandle, DecOutputInfo* outputInfo) { for ( index=0 ; index 0) + if(prevSeqFbInfo.vbMvCol[index].size > 0) { - bm_free_mem(bm_handle,prevSeqFbInfo.devMvCol[index],0x00); - prevSeqFbInfo.devMvCol[index].size = 0; + vdi_free_dma_memory(coreIdx, &prevSeqFbInfo.vbMvCol[index]); + prevSeqFbInfo.vbMvCol[index].size = 0; } - if(prevSeqFbInfo.devMemInfoVbFbcYTbl[index].size > 0) + if(prevSeqFbInfo.vbFbcYTbl[index].size > 0) { - bm_free_mem(bm_handle,prevSeqFbInfo.devMemInfoVbFbcYTbl[index],0x00); - prevSeqFbInfo.devMemInfoVbFbcYTbl[index].size=0; + vdi_free_dma_memory(coreIdx, &prevSeqFbInfo.vbFbcYTbl[index]); + prevSeqFbInfo.vbFbcYTbl[index].size=0; } - if(prevSeqFbInfo.devMemInfoVbFbcCTbl[index].size > 0) + if(prevSeqFbInfo.vbFbcCTbl[index].size > 0) { - bm_free_mem(bm_handle,prevSeqFbInfo.devMemInfoVbFbcCTbl[index],0x00); - prevSeqFbInfo.devMemInfoVbFbcCTbl[index].size=0; + vdi_free_dma_memory(coreIdx, &prevSeqFbInfo.vbFbcCTbl[index]); + prevSeqFbInfo.vbFbcCTbl[index].size=0; } } } osal_memset(pSeqMem, 0x00, sizeof(SequenceMemInfo)); osal_memcpy(&pSeqMem->fbInfo, &prevSeqFbInfo, sizeof(DecGetFramebufInfo)); - osal_memcpy(pSeqMem->allocFbMem, pFbMem, sizeof(bm_device_mem_t)*MAX_REG_FRAME); + osal_memcpy(pSeqMem->allocFbMem, pFbMem, sizeof(vpu_buffer_t)*MAX_REG_FRAME); VPU_DecGiveCommand(handle, DEC_RESET_FRAMEBUF_INFO, NULL); @@ -1666,7 +1739,11 @@ static int decSeqChange(BMVidCodHandle vidCodHandle, DecOutputInfo* outputInfo) In most case, # of linear fbs must be greater or equal than max_num_reorder, but the expression of @ in the sample code is in order to make the situation that # of linear is greater than # of fbc. */ - osal_memset((void*)pFbMem, 0x00, sizeof(bm_device_mem_t)*MAX_REG_FRAME); + // TODO: sequence should use callback + // Now, alloc framebuffer in sdk + vidHandle->framebuf_from_user = 0; + decParam.framebuf_from_user = 0; + osal_memset((void*)pFbMem, 0x00, sizeof(vpu_buffer_t)*MAX_REG_FRAME); if (AllocateDecFrameBuffer(handle, &decParam, compressedFbCount, linearFbCount, pFrame, pFbMem, &framebufStride, vidHandle->enable_cache) == FALSE) { VLOG(ERR, "[SEQ_CHANGE] AllocateDecFrameBuffer failure\n"); @@ -1692,7 +1769,7 @@ static int decSeqChange(BMVidCodHandle vidCodHandle, DecOutputInfo* outputInfo) FrameBuffer newFbs[2]; FrameBuffer* pFbcFb = NULL; FrameBuffer* pLinearFb = NULL; - bm_device_mem_t newMem[2]; + vpu_buffer_t newMem[2]; VLOG(INFO, "----- INTER RESOLUTION CHANGED -----\n"); fbcIndex = (Int8)(outputInfo->indexInterFrameDecoded & 0xff); @@ -1714,15 +1791,13 @@ static int decSeqChange(BMVidCodHandle vidCodHandle, DecOutputInfo* outputInfo) if (fbcIndex >= 0) { /* Release the FBC framebuffer */ - bm_free_mem(bm_handle,pFbMem[fbcIndex],vidHandle->fbMemVaddr[fbcIndex]); - pFbMem[fbcIndex].size =0; + vdi_free_dma_memory(coreIdx, &pFbMem[fbcIndex]); } if (linearIndex >= 0) { /* Release the linear framebuffer */ - bm_free_mem(bm_handle,pFbMem[linearIndex],vidHandle->fbMemVaddr[fbcIndex]); - osal_memset((void*)&pFbMem[linearIndex], 0x00, sizeof(vpu_buffer_t)); + vdi_free_dma_memory(coreIdx, &pFbMem[linearIndex]); } if (AllocateDecFrameBuffer(handle, &decParam, (fbcIndex>=0?1:0), (linearIndex>=0?1:0), newFbs, newMem, &newStride, vidHandle->enable_cache) == FALSE) @@ -1931,7 +2006,8 @@ static void process_vpu_msg_w5(void *arg) BMVidDecConfig *param; DecParam decParam; DecOutputInfo outputInfo; - RetCode ret = RETCODE_FAILURE; + int ret = RETCODE_FAILURE; + Int32 queueFailCount = 0; Int32 timeoutCount = 0; Int32 timeoutRetry = 0; Int32 interruptFlag = 0; @@ -1952,6 +2028,7 @@ static void process_vpu_msg_w5(void *arg) BOOL doSkipFrame = FALSE; Uint32 index; BOOL restart = FALSE, bufReuse = FALSE; + int mem_type; int bitstreamMode; u64 pts_tmp, dts_tmp; u64 *pts, *dts, *pts_org=NULL, *dts_org=NULL; @@ -1975,8 +2052,6 @@ static void process_vpu_msg_w5(void *arg) pcie_board_idx = coreIdx/MAX_NUM_VPU_CORE_CHIP; bitstreamMode = handle->CodecInfo->decInfo.openParam.bitstreamMode; - bm_handle_t bm_handle=NULL; - bm_handle =bmvpu_dec_get_bmlib_handle(handle->coreIdx); osal_memset(&decParam, 0x00, sizeof(DecParam)); osal_memset(&outputInfo, 0x00, sizeof(DecOutputInfo)); osal_memset(seqMemInfo[instIdx], 0x00, sizeof(seqMemInfo[instIdx])); @@ -2019,7 +2094,6 @@ static void process_vpu_msg_w5(void *arg) #elif _WIN32 Sleep(100); #endif - bm_syscxt_excepted(coreIdx); } supportCommandQueue = TRUE; restart = FALSE; @@ -2156,7 +2230,11 @@ static void process_vpu_msg_w5(void *arg) osal_msleep(2); } timeoutCount++; - } while (ret == RETCODE_QUEUEING_FAILURE); + if(timeoutCount % 5000 == 0) { + VLOG(ERR, "VPU_DecIssueSeqInit queueing fail.\n"); + vidCodHandle->decStatus = BMDEC_HUNG; + } + } while (ret == RETCODE_QUEUEING_FAILURE && vidCodHandle->endof_flag < BMDEC_START_CLOSE); //doingSeqInit = TRUE; } @@ -2169,18 +2247,18 @@ static void process_vpu_msg_w5(void *arg) while (seqInited == FALSE) { //if (bm_syscxt_chkstatus(coreIdx) < 0) break; - interruptFlag = VPU_WaitInterruptEx(handle, VPU_WAIT_TIME_OUT); //wait for 10ms to save stream filling time. + interruptFlag = VPU_WaitInterruptEx(handle, vidCodHandle->timeout); //wait for 10ms to save stream filling time. if (interruptFlag == -1) { timeoutCount++; -#ifndef BM_PCIE_MODE - if (timeoutCount * VPU_WAIT_TIME_OUT >= VPU_DEC_TIMEOUT) -#else - if (timeoutCount > VPU_DEC_TIMEOUT) -#endif + if (timeoutCount > vidCodHandle->timeout_count) { VLOG(ERR, "\ncoreIdx %d InstIdx %d: VPU seqinit interrupt wait timeout\n", coreIdx, instIdx); + if (timeoutRetry > 10){ + vidCodHandle->decStatus = BMDEC_HUNG; + timeoutCount = 0; + } get_reset_flock(pcie_board_idx, coreIdx); doSWReset = DoReset(handle); timeoutCount = 0; @@ -2253,9 +2331,10 @@ static void process_vpu_msg_w5(void *arg) if ((ret = BMVidDecSeqInitW5(vidCodHandle)) != RETCODE_SUCCESS) { - VLOG(ERR, "InstIdx %d: BMVidDecSeqInitW5 failed Error code is 0x%x \n", instIdx, ret); + VLOG(ERR, "InstIdx %d: BMVidDecSeqInitW5 failed Error code is %d \n", instIdx, ret); if(ret < 0) { - vidCodHandle->decStatus = BMDEC_HUNG; + if(vidCodHandle->decStatus != BMDEC_WRONG_RESOLUTION && vidCodHandle->decStatus != BMDEC_FRAMEBUFFER_NOTENOUGH) + vidCodHandle->decStatus = BMDEC_HUNG; break; } seqInited = FALSE; @@ -2320,6 +2399,11 @@ static void process_vpu_msg_w5(void *arg) VPU_DecGiveCommand(handle, DEC_GET_QUEUE_STATUS, (void *)&qStatus); if(qStatus.instanceQueueCount == 0) { + queueFailCount++; + if(queueFailCount % 5000 == 0) { + VLOG(ERR, "dec send stream queueing fail.\n"); + vidCodHandle->decStatus = BMDEC_HUNG; + } continue; } } @@ -2330,7 +2414,6 @@ static void process_vpu_msg_w5(void *arg) vidCodHandle->decStatus = BMDEC_HUNG; VLOG(ERR, "instIdx %d: VPU_DecStartOneFrame PIC ret %d, instanceQueueCount %d, totalQueueCount %d\n", instIdx, ret, qStatus.instanceQueueCount, qStatus.totalQueueCount); PrintDecVpuStatus(handle); - //bm_syscxt_excepted(coreIdx); continue; } else @@ -2338,16 +2421,17 @@ static void process_vpu_msg_w5(void *arg) //QueueStatusInfo qStatus; //VPU_DecGiveCommand(handle, DEC_GET_QUEUE_STATUS, (void *)&qStatus); //VLOG(INFO, "instIdx %d: VPU_DecStartOneFrame PIC success, instanceQueueCount %d, totalQueueCount %d\n", instIdx, qStatus.instanceQueueCount, qStatus.totalQueueCount); + queueFailCount = 0; bufReuse = FALSE; } do { //if (bm_syscxt_chkstatus(coreIdx) < 0) break; - if ((interruptFlag=VPU_WaitInterruptEx(handle, VPU_WAIT_TIME_OUT)) == -1) + if ((interruptFlag=VPU_WaitInterruptEx(handle, vidCodHandle->timeout)) == -1) { timeoutCount++; VLOG(WARN, "coreIdx %d InstIdx %d: interruptFlag %d\n", coreIdx, instIdx, interruptFlag); //wait for 10ms to save stream filling time. - if (timeoutCount * VPU_WAIT_TIME_OUT >= VPU_DEC_TIMEOUT) + if (timeoutCount >= vidCodHandle->timeout_count) { VLOG(ERR, "\ncoreIdx %d InstIdx %d: VPU interrupt wait timeout\n", coreIdx, instIdx); @@ -2373,7 +2457,7 @@ static void process_vpu_msg_w5(void *arg) //dump stream unsigned char *p_stream = malloc(0x700000); if(p_stream != NULL) { - int len = BMVidVpuDumpStream(vidCodHandle, p_stream, 0x700000); + int len = bmvpu_dec_dump_stream(vidCodHandle, p_stream, 0x700000); char timeout_dump_file_name[256] = {0}; FILE *fp = NULL; sprintf(timeout_dump_file_name, "core%d_inst%d_timeoutdump.bin", coreIdx, instIdx); @@ -2395,6 +2479,7 @@ static void process_vpu_msg_w5(void *arg) //timeoutCount++; interruptFlag = 0; } + if (interruptFlag > 0) { /* @@ -2594,7 +2679,24 @@ static void process_vpu_msg_w5(void *arg) } } - if (outputInfo.indexFrameDisplay >= 0 && vidCodHandle->no_reorder_flag == 0) + if (vidCodHandle->enable_decode_order && outputInfo.indexFrameDecoded >= 0) + { + if(outputInfo.dispFrame.myIndex>=0 && outputInfo.dispFrame.stride <= 8192) + { + osal_cond_lock(vidCodHandle->outputCond); + vidCodHandle->frameInBuffer += 1; + osal_cond_unlock(vidCodHandle->outputCond); + DisplayQueue_En(vidCodHandle, &outputInfo, pts[outputInfo.indexFrameDecoded], dts[outputInfo.indexFrameDecoded]); + vidCodHandle->decode_index_map[outputInfo.dispFrame.myIndex] = outputInfo.indexFrameDisplay; + dispIdx++; + } + else + { + VLOG(ERR, "The display stride wrong. decode index %d stride %d\n", outputInfo.indexFrameDecoded, outputInfo.dispFrame.stride); + VPU_DecClrDispFlag(handle, outputInfo.indexFrameDecoded); + } + } + else if (outputInfo.indexFrameDisplay >= 0 && vidCodHandle->no_reorder_flag == 0) { if(outputInfo.dispFrame.myIndex>=0 && outputInfo.dispFrame.stride <= 8192) { @@ -2626,7 +2728,7 @@ static void process_vpu_msg_w5(void *arg) break; } - if(vidCodHandle->decStatus != BMDEC_HUNG) + if(vidCodHandle->decStatus != BMDEC_HUNG && vidCodHandle->decStatus != BMDEC_WRONG_RESOLUTION && vidCodHandle->decStatus != BMDEC_FRAMEBUFFER_NOTENOUGH) vidCodHandle->decStatus = BMDEC_STOP; while(vidCodHandle->endof_flag != BMDEC_START_CLOSE) { @@ -2645,6 +2747,7 @@ static void process_vpu_msg_w5(void *arg) /******************************************************************************** * DESTROY INSTANCE * ********************************************************************************/ + mem_type = handle->CodecInfo->decInfo.framebuf_from_user; close_decoder(handle, doSWReset); /* Release all previous sequence resources */ @@ -2652,32 +2755,41 @@ static void process_vpu_msg_w5(void *arg) { for (index = 0; index < MAX_SEQUENCE_MEM_COUNT; index++) { - releasePreviousSequenceResources(handle, seqMemInfo[handle->instIndex][index].allocFbMem,&seqMemInfo[handle->instIndex][index].fbInfo); + releasePreviousSequenceResources(handle, seqMemInfo[handle->instIndex][index].allocFbMem,&seqMemInfo[handle->instIndex][index].fbInfo, mem_type); } } for (index = 0; index < MAX_REG_FRAME; index++) { - if(vidCodHandle->pFbMem[index].size>0) + if(vidCodHandle->pFbMem[index].size > 0) + { + if(vidCodHandle->framebuf_from_user != BUFFER_ALLOC_FROM_USER) { - bm_free_mem(bm_handle,vidCodHandle->pFbMem[index],vidCodHandle->fbMemVaddr[index]); + vdi_free_dma_memory(coreIdx, &(vidCodHandle->pFbMem[index])); vidCodHandle->pFbMem[index].size=0; } + else + { + vdi_dettach_dma_memory(coreIdx, &(vidCodHandle->pFbMem[index])); + vdi_unmap_memory(coreIdx, &(vidCodHandle->pFbMem[index])); + } + } } + VLOG(INFO, "\nDec End. Tot Frame %d. instIdx: %d\n", decodedIdx, instIdx); #ifdef VID_PERFORMANCE_TEST if(vidCodHandle->perf != 0 && decodedIdx > 0) VLOG(INFO, "core : %d, inst : %d, max_time : %ld us, min_time : %ld us, ave_time : %ld us.\n", coreIdx, instIdx, max_time, min_time, total_time/decodedIdx); #endif - if (vidCodHandle->vbStream.size > 0) + if (vidCodHandle->vbStream.size > 0 && vidCodHandle->bitstream_from_user != BUFFER_ALLOC_FROM_USER) { - bm_free_mem(bm_handle,vidCodHandle->vbStream,0x00); + vdi_free_dma_memory(coreIdx, &vidCodHandle->vbStream); vidCodHandle->vbStream.size = 0; } if (vidCodHandle->vbUserData.size > 0) { - bm_free_mem(bm_handle,vidCodHandle->vbUserData,0x00); + vdi_free_dma_memory(coreIdx, &vidCodHandle->vbUserData); vidCodHandle->vbUserData.size = 0; } VPU_DeInit(coreIdx); @@ -2720,7 +2832,7 @@ static void process_vpu_msg_w5(void *arg) @endverbatim */ #ifndef CHIP_BM1684 -static int getVpuCoreIdx(int format) +static int getVpuCoreIdx(BMDecStreamFormat format) { if(format >= STD_AVC && format <= STD_VP8) { @@ -2749,16 +2861,42 @@ static int checkHandle(BMVidCodHandle handle) VLOG(ERR, "err dec handle : 0x%p\n", handle); return 0; } + +static int bmvpu_dec_buffer_convert(int core_idx, vpu_buffer_t *vdb, BmVpuDecDMABuffer* vb) +{ + int ret; + if(vdb == NULL || vb == NULL) + return -1; + + vdb->base = 0xffffffff; + vdb->size = vb->size; + vdb->phys_addr = vb->phys_addr; +#ifndef BM_PCIE_MODE + vdb->enable_cache = 1; + ret = vdi_mmap_memory(core_idx, vdb); + if(ret != BM_SUCCESS) + { + VLOG(ERR, "bmvpu_dec_buffer_convert: mmap failed. phys addr:0x%lx size:%d\n", vb->phys_addr, vb->size); + } + vb->virt_addr = vdb->virt_addr; +#else + vdb->virt_addr = FAKE_PCIE_VIRT_ADDR; + vb->virt_addr = 0; +#endif + + return 0; +} + /** * @brief This function create a decoder instance. * @param pop [Input] this is open decoder parameter * @param pHandle [Output] decoder instance. * @return error code. */ -int BMVidDecCreate(BMVidCodHandle *pVidCodHandle, BMVidDecParam decParam) +BMVidDecRetStatus bmvpu_dec_create(BMVidCodHandle *pVidCodHandle, BMVidDecParam decParam) { - bm_device_mem_t vbStream = {0}; - RetCode ret = RETCODE_FAILURE; + vpu_buffer_t vbStream = {0}; + int ret = BM_ERR_VDEC_FAILURE; int coreIdx = 0;//getVpuCoreIdx(decParam.streamFormat); Int32 pcie_board_idx = 0; DecOpenParam decOP; @@ -2766,14 +2904,11 @@ int BMVidDecCreate(BMVidCodHandle *pVidCodHandle, BMVidDecParam decParam) DecHandle handle; BMVidHandle vidHandle; BMVidDecConfig testConfig, *param = &testConfig; - bm_handle_t devHandle; if (coreIdx < 0 || coreIdx >= MAX_NUM_VPU_CORE) - { - return ret; - } + return BM_ERR_VDEC_ILLEGAL_PARAM; - if ((decParam.streamFormat < 0) && (decParam.streamFormat > STD_HEVC)) + if ((decParam.streamFormat < BMDEC_AVC) && (decParam.streamFormat > BMDEC_HEVC)) return ret; BMVidSetLogLevel(); @@ -2781,26 +2916,32 @@ int BMVidDecCreate(BMVidCodHandle *pVidCodHandle, BMVidDecParam decParam) return BMVidDecCreateW5(pVidCodHandle, &decParam); #endif - if (decParam.streamFormat == STD_HEVC) + if (decParam.streamFormat == BMDEC_HEVC) { - return BMVidDecCreateW5(pVidCodHandle, &decParam); + ret = BMVidDecCreateW5(pVidCodHandle, &decParam); + if(ret != RETCODE_SUCCESS) + return BM_ERR_FAILURE; } osal_memset(&testConfig, 0, sizeof(testConfig)); - bmvpu_dec_load_bmlib_handle(coreIdx); - devHandle=bmvpu_dec_get_bmlib_handle(coreIdx); pcie_board_idx = decParam.pcie_board_id; if ((pcie_board_idx <0) || (pcie_board_idx >= MAX_PCIE_BOARD_NUM)) { VLOG(ERR, "pcie board id exceeds max value: %d\n",pcie_board_idx); - return RETCODE_FAILURE; + return BM_ERR_VDEC_ILLEGAL_PARAM; } testConfig.bitstreamMode = decParam.bsMode; //BS_MODE_INTERRUPT; testConfig.streamEndian = VDI_LITTLE_ENDIAN; testConfig.frameEndian = VDI_LITTLE_ENDIAN; - testConfig.cbcrInterleave = decParam.cbcrInterleave; //FALSE; + if (decParam.pixel_format == BM_VPU_DEC_PIX_FORMAT_NV12) { + testConfig.cbcrInterleave = 1; + testConfig.nv21 = 0; + } else if (decParam.pixel_format == BM_VPU_DEC_PIX_FORMAT_NV21) { + testConfig.cbcrInterleave = 1; + testConfig.nv21 = 1; + } testConfig.bitFormat = decParam.streamFormat; testConfig.coda9.mp4class = decParam.mp4class; testConfig.enableWTL = FALSE; @@ -2814,7 +2955,6 @@ int BMVidDecCreate(BMVidCodHandle *pVidCodHandle, BMVidDecParam decParam) testConfig.coda9.frameCacheMerge = 3; testConfig.coda9.frameCacheWayShape = 15; //testConfig.coda9.rotate = 90; - testConfig.nv21 = decParam.nv21; testConfig.secondaryAXI = decParam.secondaryAXI; @@ -2932,7 +3072,7 @@ int BMVidDecCreate(BMVidCodHandle *pVidCodHandle, BMVidDecParam decParam) VLOG(INFO, "load firmware success!\n"); vbStream.size = STREAM_BUF_SIZE; - if(bmvpu_malloc_device_byte_heap(devHandle,&vbStream,STREAM_BUF_SIZE,HEAP_MASK,1)!=BM_SUCCESS) + if(vdi_allocate_dma_memory(coreIdx, &vbStream) < 0) { VLOG(ERR, "fail to allocate bitstream buffer\n"); //success=FALSE; @@ -2946,7 +3086,7 @@ int BMVidDecCreate(BMVidCodHandle *pVidCodHandle, BMVidDecParam decParam) decOP.bitstreamFormat = (CodStd)param->bitFormat; decOP.avcExtension = param->coda9.enableMvc; decOP.coreIdx = coreIdx; - decOP.bitstreamBuffer = vbStream.u.device.device_addr; + decOP.bitstreamBuffer = vbStream.phys_addr; decOP.bitstreamBufferSize = vbStream.size; decOP.bitstreamMode = param->bitstreamMode; decOP.tiled2LinearEnable = param->coda9.enableTiled2Linear; @@ -2965,7 +3105,8 @@ int BMVidDecCreate(BMVidCodHandle *pVidCodHandle, BMVidDecParam decParam) ********************************************************************************/ if ((ret = VPU_DecOpen(&handle, &decOP)) != RETCODE_SUCCESS) { - VLOG(ERR, "VPU_DecOpen failed Error code is 0x%x \n", ret); + VLOG(ERR, "VPU_DecOpen failed Error reason is %s \n", bmvpu_dec_error_string(ret)); + ret = BM_ERR_VDEC_FAILURE; goto DECODE_OPEN_END; } VLOG(INFO, "boda create core_idx: %d, inst_idx: %d\n", (int)handle->coreIdx, (int)handle->instIndex); @@ -3028,7 +3169,7 @@ int BMVidDecCreate(BMVidCodHandle *pVidCodHandle, BMVidDecParam decParam) vidHandle->isStreamBufFilled = 0; vidHandle->seqInitFlag = 0; vidHandle->endof_flag = 0; - vidHandle->streamWrAddr = vbStream.u.device.device_addr; + vidHandle->streamWrAddr = vbStream.phys_addr; vidHandle->remainedSize = 0; vidHandle->decStatus = BMDEC_UNINIT; vidHandle->enable_cache = decParam.enable_cache; @@ -3069,7 +3210,8 @@ int BMVidDecCreate(BMVidCodHandle *pVidCodHandle, BMVidDecParam decParam) ERR_DEC_INIT: if (vbStream.size > 0) { - bm_free_mem(devHandle,vidHandle->vbStream,0x00); + vdi_free_dma_memory(coreIdx, &vidHandle->vbStream); + vidHandle->vbStream.size = 0; } VPU_DeInit(coreIdx); @@ -3085,7 +3227,7 @@ int BMVidDecCreate(BMVidCodHandle *pVidCodHandle, BMVidDecParam decParam) int BMVidDecCreateW5(BMVidCodHandle *pVidCodHandle, BMVidDecParam *decParam) { - bm_device_mem_t vbStream = {0}; + vpu_buffer_t vbStream = {0}; RetCode ret = RETCODE_FAILURE; #if defined(CHIP_BM1684) || defined(CHIP_BM1686) int coreIdx = 0; @@ -3118,15 +3260,15 @@ int BMVidDecCreateW5(BMVidCodHandle *pVidCodHandle, BMVidDecParam *decParam) Int32 timeoutCount = 0; Int32 interruptFlag = 0; Uint32 index, ver, rev; + Uint32 framebuffer_cnt; + int bitstream_flag = 0; - bm_handle_t bm_handle; - int stream_buf_size=0; if (coreIdx < 0) { return ret; } VLOG(INFO, "MAX instance: %d, MAX queue: %d, MAX buffer: 0x%lx, EXTRA frame num: %d\n", MAX_NUM_INSTANCE, COMMAND_QUEUE_DEPTH, STREAM_BUF_SIZE, decParam->extraFrameBufferNum); - VLOG(INFO, "PARAMETER: cbcrInterleave %d, nv12 %d, Wtlformat %d, bsmode %d\n", decParam->cbcrInterleave, decParam->nv21, decParam->wtlFormat, decParam->bsMode); + VLOG(INFO, "PARAMETER: pixel_format %d, Wtlformat %d, bsmode %d\n", decParam->pixel_format, decParam->wtlFormat, decParam->bsMode); osal_memset(&testConfig, 0, sizeof(testConfig)); #if defined(TRY_FLOCK_OPEN) pcie_board_idx = decParam->pcie_board_id; @@ -3140,9 +3282,17 @@ int BMVidDecCreateW5(BMVidCodHandle *pVidCodHandle, BMVidDecParam *decParam) testConfig.wave.fbcMode = 0x0c; // best for bandwidth testConfig.wave.bwOptimization = FALSE; // only valid for WTL enable case testConfig.wave.numVCores = 1; - testConfig.cbcrInterleave = decParam->cbcrInterleave; - testConfig.nv21 = decParam->nv21; + if (decParam->pixel_format == BM_VPU_DEC_PIX_FORMAT_NV12) { + testConfig.cbcrInterleave = 1; + testConfig.nv21 = 0; + } else if (decParam->pixel_format == BM_VPU_DEC_PIX_FORMAT_NV21) { + testConfig.cbcrInterleave = 1; + testConfig.nv21 = 1; + } + testConfig.wtlMode = FF_FRAME; + testConfig.extern_picWidth = decParam->picWidth; + testConfig.extern_picHeight = decParam->picHeight; if(decParam->wtlFormat != BMDEC_OUTPUT_COMPRESSED) testConfig.wtlFormat = decParam->wtlFormat; else { @@ -3251,13 +3401,6 @@ int BMVidDecCreateW5(BMVidCodHandle *pVidCodHandle, BMVidDecParam *decParam) #if (defined(BM_PCIE_MODE) && defined(CHIP_BM1684)) coreIdx = MAX_NUM_VPU_CORE_CHIP*decParam->pcie_board_id + coreIdx; #endif - bmvpu_dec_load_bmlib_handle(coreIdx); - bm_handle=bmvpu_dec_get_bmlib_handle(coreIdx); - if(bm_handle==NULL) - { - VLOG(ERR, "Failed to get bmlib handle\n"); - goto ERR_DEC_INIT; - } ret = VPU_InitWithBitcode(coreIdx, (const Uint16 *)pusBitCode, sizeInWord); if (ret != RETCODE_CALLED_BEFORE && ret != RETCODE_SUCCESS) @@ -3270,12 +3413,27 @@ int BMVidDecCreateW5(BMVidCodHandle *pVidCodHandle, BMVidDecParam *decParam) VPU_GetVersionInfo(coreIdx, &ver, &rev, NULL); printf("VERSION=%d, REVISION=%d\n", ver, rev); - osal_memset(&vbStream, 0, sizeof(bm_device_mem_t)); - stream_buf_size=decParam->streamBufferSize==0?STREAM_BUF_SIZE:decParam->streamBufferSize; - if (bmvpu_malloc_device_byte_heap(bm_handle,&vbStream,stream_buf_size,HEAP_MASK,1)!=BM_SUCCESS) + osal_memset(&vbStream, 0, sizeof(vpu_buffer_t)); + + if(decParam->bitstream_buffer.phys_addr && decParam->bitstream_buffer.size) { - VLOG(ERR, "fail to allocate bitstream buffer\n"); - goto ERR_DEC_INIT; + bitstream_flag = 1; + bmvpu_dec_buffer_convert(coreIdx, &vbStream, &decParam->bitstream_buffer); + if(vbStream.size == 0 || vbStream.phys_addr == 0) + { + VLOG(ERR, "bitstream buffer from user is NULL!!!\n"); + goto ERR_DEC_INIT; + } + vdi_attach_dma_memory(coreIdx, &vbStream); + } + else + { + vbStream.size = decParam->streamBufferSize == 0 ? STREAM_BUF_SIZE : decParam->streamBufferSize; + if(vdi_allocate_dma_memory(coreIdx, &vbStream) < 0) + { + VLOG(ERR, "fail to allocate bitstream buffer\n"); + goto ERR_DEC_INIT; + } } param->enableCrop = TRUE; @@ -3283,7 +3441,7 @@ int BMVidDecCreateW5(BMVidCodHandle *pVidCodHandle, BMVidDecParam *decParam) osal_memset(&decOP, 0x00, sizeof(DecOpenParam)); decOP.bitstreamFormat = (CodStd)param->bitFormat; decOP.coreIdx = coreIdx; - decOP.bitstreamBuffer = vbStream.u.device.device_addr; + decOP.bitstreamBuffer = vbStream.phys_addr; decOP.bitstreamBufferSize = vbStream.size; decOP.bitstreamMode = param->bitstreamMode; decOP.wtlEnable = param->enableWTL; @@ -3294,6 +3452,8 @@ int BMVidDecCreateW5(BMVidCodHandle *pVidCodHandle, BMVidDecParam *decParam) decOP.frameEndian = param->frameEndian; //decOP.fbc_mode = param->wave.fbcMode; decOP.bwOptimization = param->wave.bwOptimization; + decOP.decodeOrder = decParam->decode_order; + /******************************************************************************** * CREATE INSTANCE * ********************************************************************************/ @@ -3301,6 +3461,8 @@ int BMVidDecCreateW5(BMVidCodHandle *pVidCodHandle, BMVidDecParam *decParam) if ((ret = VPU_DecOpen(&handle, &decOP)) != RETCODE_SUCCESS) { VLOG(ERR, "VPU_DecOpen failed Error code is 0x%x \n", ret); + if(handle == NULL) + goto ERR_DEC_INIT; goto DECODE_OPEN_END; } @@ -3320,15 +3482,54 @@ int BMVidDecCreateW5(BMVidCodHandle *pVidCodHandle, BMVidDecParam *decParam) vidHandle->codecInst = handle; - osal_memcpy(&(vidHandle->vbStream), &vbStream, sizeof(bm_device_mem_t)); - param->enableUserData = 4; + vidHandle->bitstream_from_user = bitstream_flag; + osal_memset(vidHandle->pFbMem, 0, sizeof(vpu_buffer_t)*MAX_REG_FRAME); + if(decParam->frame_buffer) + { + if(decParam->Ytable_buffer == NULL || decParam->Ctable_buffer == NULL ) + { + VLOG(ERR, "Invalid parameter. Ytable_buffer=0x%lx Ctable_buffer=0x%lx\n", decParam->Ytable_buffer, decParam->Ctable_buffer); + goto ERR_DEC_INIT; + } - if (bmvpu_malloc_device_byte_heap(bm_handle, &vidHandle->vbUserData,(512*1024),HEAP_MASK,1) < 0) + if(decParam->min_framebuf_cnt < 0 || decParam->framebuf_delay < 0 || decParam->extraFrameBufferNum <= 0) + { + VLOG(ERR, "Invalid frame buffer count. frame_buffer:0x%x min_framebuf_cnt:%d frame_buffer:%d extra_frame_buffer:%d\n", + decParam->frame_buffer, decParam->min_framebuf_cnt, decParam->framebuf_delay, decParam->extraFrameBufferNum); + goto ERR_DEC_INIT; + } + vidHandle->framebuf_from_user = 1; + vidHandle->min_framebuf_cnt = decParam->min_framebuf_cnt; + vidHandle->framebuf_delay = decParam->framebuf_delay; + + framebuffer_cnt = decParam->min_framebuf_cnt + decParam->extraFrameBufferNum; + for(index = 0; index < framebuffer_cnt; index++) + { + bmvpu_dec_buffer_convert(coreIdx, &vidHandle->pYtabMem[index], &decParam->Ytable_buffer[index]); + bmvpu_dec_buffer_convert(coreIdx, &vidHandle->pCtabMem[index], &decParam->Ctable_buffer[index]); + vdi_attach_dma_memory(coreIdx, &vidHandle->pYtabMem[index]); + vdi_attach_dma_memory(coreIdx, &vidHandle->pCtabMem[index]); + } + + if(testConfig.enableWTL == TRUE) + framebuffer_cnt += decParam->framebuf_delay + decParam->extraFrameBufferNum + 1; + for(index = 0; index < framebuffer_cnt; index++) + { + bmvpu_dec_buffer_convert(coreIdx, &vidHandle->pFbMem[index], &decParam->frame_buffer[index]); + vdi_attach_dma_memory(coreIdx, &vidHandle->pFbMem[index]); + } + } + + osal_memcpy(&(vidHandle->vbStream), &vbStream, sizeof(vpu_buffer_t)); + param->enableUserData = 4; + osal_memset(&vidHandle->vbUserData, 0, sizeof(vpu_buffer_t)); + vidHandle->vbUserData.size = 512 * 1024; + if(vdi_allocate_dma_memory(coreIdx, &vidHandle->vbUserData) < 0) { VLOG(ERR, "fail to allocate user data buffer\n"); goto ERR_DEC_INIT; } - VPU_DecGiveCommand(handle, SET_ADDR_REP_USERDATA, (void*)&(vidHandle->vbUserData.u.device.device_addr)); + VPU_DecGiveCommand(handle, SET_ADDR_REP_USERDATA, (void*)&(vidHandle->vbUserData.phys_addr)); VPU_DecGiveCommand(handle, SET_SIZE_REP_USERDATA, (void*)&(vidHandle->vbUserData.size)); VPU_DecGiveCommand(handle, ENABLE_REP_USERDATA, (void*)¶m->enableUserData); if(testConfig.skipMode != 0) { @@ -3380,11 +3581,24 @@ int BMVidDecCreateW5(BMVidCodHandle *pVidCodHandle, BMVidDecParam *decParam) vidHandle->isStreamBufFilled = 0; vidHandle->seqInitFlag = 0; vidHandle->endof_flag = 0; - vidHandle->streamWrAddr = vbStream.u.device.device_addr; + vidHandle->streamWrAddr = vbStream.phys_addr; vidHandle->decStatus = BMDEC_UNINIT; vidHandle->enable_cache = decParam->enable_cache; vidHandle->min_time = 100000000; vidHandle->perf = decParam->perf; + vidHandle->enable_decode_order = decParam->decode_order; + + if(decParam->timeout > 0) + vidHandle->timeout = decParam->timeout; + else + vidHandle->timeout = VPU_WAIT_TIME_OUT; + + if(decParam->timeout_count > 0) + vidHandle->timeout_count = decParam->timeout_count; + else + vidHandle->timeout_count = 5; + + osal_memset(vidHandle->decode_index_map, -1, sizeof(vidHandle->decode_index_map)); #ifdef _WIN32 timeBeginPeriod(1); #endif @@ -3439,12 +3653,20 @@ int BMVidDecCreateW5(BMVidCodHandle *pVidCodHandle, BMVidDecParam *decParam) if (vidHandle->vbStream.size > 0) { - bm_free_mem(bm_handle,vidHandle->vbStream,0x00); + if(vidHandle->bitstream_from_user != BUFFER_ALLOC_FROM_USER) + vdi_free_dma_memory(coreIdx, &vidHandle->vbStream); + else + { + vdi_dettach_dma_memory(coreIdx, &vidHandle->vbStream); + vdi_unmap_memory(coreIdx, &vidHandle->vbStream); + } + vidHandle->vbStream.size = 0; } if (vidHandle) { if (vidHandle->vbUserData.size > 0) { - bm_free_mem(bm_handle,vidHandle->vbUserData,0x00); + vdi_free_dma_memory(coreIdx, &vidHandle->vbUserData); + vidHandle->vbUserData.size = 0; } if (vidHandle->freeQ != NULL) Queue_Destroy(vidHandle->freeQ); @@ -3473,10 +3695,10 @@ int BMVidDecCreateW5(BMVidCodHandle *pVidCodHandle, BMVidDecParam *decParam) return ret; } -int BMVidDecSeqInit(BMVidCodHandle vidCodHandle) +BMVidDecRetStatus bmvpu_dec_seq_init(BMVidCodHandle vidCodHandle) { BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; - RetCode ret = RETCODE_FAILURE; + int ret = BM_ERR_VDEC_FAILURE; DecInitialInfo sequenceInfo; DRAMConfig dram_cfg = {0}; Uint32 framebufStride, framebufSize; @@ -3484,11 +3706,11 @@ int BMVidDecSeqInit(BMVidCodHandle vidCodHandle) Int32 fbCount; FrameBufferAllocInfo fbAllocInfo; FrameBuffer pFrame[MAX_REG_FRAME]; - bm_device_mem_t *pFbMem; + vpu_buffer_t *pFbMem; //FrameBuffer* ppuFb; //FrameBuffer pPPUFrame[MAX_REG_FRAME]; - bm_device_mem_t *pPPUFbMem; - bm_device_mem_t *pvb = NULL; + vpu_buffer_t *pPPUFbMem; + vpu_buffer_t *pvb = NULL; Int32 coreIdx; Int32 index; DecOpenParam *pDecOP; @@ -3499,7 +3721,6 @@ int BMVidDecSeqInit(BMVidCodHandle vidCodHandle) Queue *ppuQ = NULL; DecHandle handle; BMVidDecConfig *param; - bm_handle_t bm_handle; VLOG(INFO, "INFO: enter seq init alloc memory\n"); if (vidHandle != NULL && vidHandle->codecInst != NULL) @@ -3512,31 +3733,31 @@ int BMVidDecSeqInit(BMVidCodHandle vidCodHandle) } else { - return ret; + return BM_ERR_VDEC_UNEXIST; } VLOG(INFO, "INFO: enter seq init memset\n"); osal_memset(&sequenceInfo, 0x00, sizeof(DecInitialInfo)); - osal_memset(pFbMem, 0x00, sizeof(bm_device_mem_t) * MAX_REG_FRAME); - osal_memset(pPPUFbMem, 0x00, sizeof(bm_device_mem_t) * MAX_REG_FRAME); + osal_memset(pFbMem, 0x00, sizeof(vpu_buffer_t) * MAX_REG_FRAME); + osal_memset(pPPUFbMem, 0x00, sizeof(vpu_buffer_t) * MAX_REG_FRAME); VLOG(INFO, "INFO: enter seq init VPU_DecCompleteSeqInit\n"); if ((ret = VPU_DecCompleteSeqInit(handle, &sequenceInfo)) != RETCODE_SUCCESS) { VLOG(ERR, "[ERROR] Failed to SEQ_INIT(ERROR REASON: %d)\n", sequenceInfo.seqInitErrReason); + ret = BM_ERR_VDEC_FAILURE; goto DECODE_END; } VLOG(INFO, "INFO: enter seq init VPU_DecGiveCommand\n"); ret = VPU_DecGiveCommand(handle, GET_DRAM_CONFIG, &dram_cfg); if (ret != RETCODE_SUCCESS) { - VLOG(ERR, "VPU_DecGiveCommand[GET_DRAM_CONFIG] failed Error code is 0x%x \n", ret); + VLOG(ERR, "VPU_DecGiveCommand[GET_DRAM_CONFIG] failed Error is reason:%s \n", bmvpu_dec_error_string(ret)); + ret = BM_ERR_VDEC_FAILURE; goto DECODE_END; } VLOG(INFO, "INFO: enter seq init decInfo.openParam\n"); - bm_handle= bmvpu_dec_get_bmlib_handle(coreIdx); - pDecOP = &(handle->CodecInfo->decInfo.openParam); /******************************************************************************** * ALLOCATE RECON FRAMEBUFFERS * @@ -3546,7 +3767,7 @@ int BMVidDecSeqInit(BMVidCodHandle vidCodHandle) framebufSize = VPU_GetFrameBufSize(coreIdx, framebufStride, framebufHeight, param->mapType, FORMAT_420, pDecOP->cbcrInterleave, &dram_cfg); if(framebufHeight > 1088 || framebufStride > 1920) { VLOG(ERR, "height or width too big.....width: %d, height: %d\n", framebufStride, framebufHeight); - return -1; + return BM_ERR_VDEC_ILLEGAL_PARAM; } fbCount = sequenceInfo.minFrameBufferCount + vidHandle->extraFrameBufferNum; @@ -3567,14 +3788,13 @@ int BMVidDecSeqInit(BMVidCodHandle vidCodHandle) pvb = &pFbMem[index]; pvb->size = framebufSize; VLOG(INFO, "Start fb vdi_allocate_dma_memory, %d, size: %d.....\n", index, framebufSize); - if(bmvpu_malloc_device_byte_heap(bm_handle,pvb,framebufSize,HEAP_MASK,1)!=BM_SUCCESS) - { - VLOG(ERR, "%s:%d fail to allocate frame buffer\n", __FUNCTION__, __LINE__); - ret = -1; - goto DECODE_END; - } - bm_vdi_mmap(bm_handle,pvb,(unsigned long long *)&vidHandle->fbMemVaddr[index]); - pFrame[index].bufY = pvb->u.device.device_addr; + if(vdi_allocate_dma_memory(coreIdx, pvb) < 0) + { + VLOG(ERR, "%s:%d fail to allocate frame buffer\n", __FUNCTION__, __LINE__); + ret = BM_ERR_VDEC_NOMEM; + goto DECODE_END; + } + pFrame[index].bufY = pvb->phys_addr; pFrame[index].bufCb = -1; pFrame[index].bufCr = -1; pFrame[index].updateFbInfo = TRUE; @@ -3582,7 +3802,8 @@ int BMVidDecSeqInit(BMVidCodHandle vidCodHandle) VLOG(INFO, "Start VPU_DecAllocateFrameBuffer....\n"); if ((ret = VPU_DecAllocateFrameBuffer(handle, fbAllocInfo, pFrame)) != RETCODE_SUCCESS) { - VLOG(ERR, "%s:%d failed to VPU_DecAllocateFrameBuffer(), ret(%d)\n", __FUNCTION__, __LINE__, ret); + VLOG(ERR, "%s:%d failed to VPU_DecAllocateFrameBuffer(),reason:%s\n", __FUNCTION__, __LINE__, bmvpu_dec_error_string(ret)); + ret = BM_ERR_VDEC_NOMEM; goto DECODE_END; } VLOG(INFO, "Start ALLOCATE WTL FRAMEBUFFERS....\n"); @@ -3601,14 +3822,13 @@ int BMVidDecSeqInit(BMVidCodHandle vidCodHandle) { pvb = &pFbMem[index]; pvb->size = framebufSize; - if(bmvpu_malloc_device_byte_heap(bm_handle,pvb,framebufSize,HEAP_MASK,1)!=BM_SUCCESS) + if(vdi_allocate_dma_memory(coreIdx, pvb) < 0) { VLOG(ERR, "%s:%d fail to allocate frame buffer\n", __FUNCTION__, __LINE__); - ret = -1; + ret = BM_ERR_VDEC_NOMEM; goto DECODE_END; } - bm_vdi_mmap(bm_handle,pvb,(unsigned long long *)&vidHandle->fbMemVaddr[index]); - pFrame[index].bufY = pvb->u.device.device_addr; + pFrame[index].bufY = pvb->phys_addr; pFrame[index].bufCb = -1; pFrame[index].bufCr = -1; pFrame[index].updateFbInfo = TRUE; @@ -3626,7 +3846,8 @@ int BMVidDecSeqInit(BMVidCodHandle vidCodHandle) ret = VPU_DecAllocateFrameBuffer(handle, fbAllocInfo, &pFrame[fbCount]); if (ret != RETCODE_SUCCESS) { - VLOG(ERR, "%s:%d failed to VPU_DecAllocateFrameBuffer() ret:%d\n", __FUNCTION__, __LINE__, ret); + VLOG(ERR, "%s:%d failed to VPU_DecAllocateFrameBuffer() reason:%s\n", __FUNCTION__, __LINE__, bmvpu_dec_error_string(ret)); + ret = BM_ERR_VDEC_NOMEM; goto DECODE_END; } } @@ -3658,7 +3879,8 @@ int BMVidDecSeqInit(BMVidCodHandle vidCodHandle) { if (ret == RETCODE_MEMORY_ACCESS_VIOLATION) PrintMemoryAccessViolationReason(coreIdx, NULL); - VLOG(ERR, "VPU_DecRegisterFrameBuffer failed Error code is 0x%x \n", ret); + VLOG(ERR, "VPU_DecRegisterFrameBuffer failed Error reason is %s \n", bmvpu_dec_error_string(ret)); + ret = BM_ERR_FAILURE; goto DECODE_END; } VLOG(INFO, "Start SET_FRAMEBUF....\n"); @@ -3692,15 +3914,13 @@ int BMVidDecSeqInit(BMVidCodHandle vidCodHandle) { pvb = &pPPUFbMem[index]; pvb->size = sizePPUFb; - bm_free_mem(bm_handle, *pvb,vidHandle->fbMemVaddr[index]); - if(bmvpu_malloc_device_byte_heap(bm_handle,pvb,sizePPUFb,HEAP_MASK,1)!=BM_SUCCESS) + if(vdi_allocate_dma_memory(coreIdx, pvb) < 0) { VLOG(ERR, "%s:%d fail to allocate frame buffer\n", __FUNCTION__, __LINE__); - ret = -1; + ret = BM_ERR_VDEC_NOMEM; goto DECODE_END; } - bm_vdi_mmap(bm_handle,pvb,(unsigned long long *)&vidHandle->pPUFbMemVaddr[index]); - vidHandle->pPPUFrame[index].bufY = pvb->u.device.device_addr; + vidHandle->pPPUFrame[index].bufY = pvb->phys_addr; vidHandle->pPPUFrame[index].bufCb = -1; vidHandle->pPPUFrame[index].bufCr = -1; vidHandle->pPPUFrame[index].updateFbInfo = TRUE; @@ -3718,7 +3938,8 @@ int BMVidDecSeqInit(BMVidCodHandle vidCodHandle) fbAllocInfo.type = FB_TYPE_PPU; if ((ret = VPU_DecAllocateFrameBuffer(handle, fbAllocInfo, vidHandle->pPPUFrame)) != RETCODE_SUCCESS) { - VLOG(ERR, "%s:%d failed to VPU_DecAllocateFrameBuffer() ret:%d\n", __FUNCTION__, __LINE__, ret); + VLOG(ERR, "%s:%d failed to VPU_DecAllocateFrameBuffer(),Error reason is :%s\n", __FUNCTION__, __LINE__, bmvpu_dec_error_string(ret)); + ret = BM_ERR_VDEC_NOMEM; goto DECODE_END; } // Note: Please keep the below call sequence. @@ -3728,7 +3949,7 @@ int BMVidDecSeqInit(BMVidCodHandle vidCodHandle) if ((ppuQ = Queue_Create_With_Lock(MAX_REG_FRAME, sizeof(FrameBuffer))) == NULL) { - ret = -1; + ret = BM_ERR_VDEC_NOMEM; goto DECODE_END; } for (index = 0; index < ppuFbCount; index++) @@ -3751,12 +3972,14 @@ int BMVidDecSeqInitW5(BMVidCodHandle vidCodHandle) Uint32 framebufStride; Uint32 val; FrameBuffer pFrame[MAX_REG_FRAME]; - bm_device_mem_t *pFbMem; + vpu_buffer_t *pFbMem; Int32 coreIdx; SecAxiUse secAxiUse; DecHandle handle; BMVidDecConfig *param; TestDecConfig decParam; + DecInfo *pDecInfo; + int index; VLOG(INFO, "INFO: enter seq init alloc memory\n"); @@ -3766,6 +3989,7 @@ int BMVidDecSeqInitW5(BMVidCodHandle vidCodHandle) coreIdx = handle->coreIdx; param = &(vidHandle->decConfig); pFbMem = vidHandle->pFbMem; + pDecInfo = &(handle->CodecInfo->decInfo); } else { @@ -3790,13 +4014,28 @@ int BMVidDecSeqInitW5(BMVidCodHandle vidCodHandle) decParam.wave.bwOptimization = param->wave.bwOptimization; decParam.secondaryAXI = param->secondaryAXI; osal_memset(&initialInfo, 0x00, sizeof(DecInitialInfo)); - osal_memset(pFbMem, 0x00, sizeof(bm_device_mem_t) * MAX_REG_FRAME); if ((ret = VPU_DecCompleteSeqInit(handle, &initialInfo)) != RETCODE_SUCCESS) { VLOG(ERR, "[ERROR] Failed to DEC_PIC_HDR(ERROR REASON: %08x) error code is 0x%x\n", initialInfo.seqInitErrReason, ret); goto ERR_DEC_OPEN; } + if(((vidHandle->decConfig.extern_picWidth > 0) && (vidHandle->decConfig.extern_picHeight) > 0) || vidHandle->framebuf_from_user) + { + if((vidHandle->decConfig.extern_picWidth != initialInfo.picWidth) || (vidHandle->decConfig.extern_picHeight != initialInfo.picHeight)) + { + VLOG(ERR, "[ERROR] The size information does not match. input width:%d pic width:%d input height:%d pic height:%d\n", + vidHandle->decConfig.extern_picWidth, initialInfo.picWidth, vidHandle->decConfig.extern_picHeight, initialInfo.picHeight); + vidHandle->decConfig.extern_picHeight = initialInfo.picHeight; + vidHandle->decConfig.extern_picWidth = initialInfo.picWidth; + vidHandle->min_framebuf_cnt = initialInfo.minFrameBufferCount; + vidHandle->framebuf_delay = initialInfo.frameBufDelay; + vidHandle->decStatus = BMDEC_WRONG_RESOLUTION; + ret = -1; + goto ERR_DEC_OPEN; + } + } + /******************************************************************************** * ALLOCATE FRAME BUFFER * ********************************************************************************/ @@ -3816,6 +4055,34 @@ int BMVidDecSeqInitW5(BMVidCodHandle vidCodHandle) goto ERR_DEC_OPEN; } + decParam.framebuf_from_user = 0; + pDecInfo->framebuf_from_user = 0; + if(vidHandle->framebuf_from_user) + { + if(vidHandle->min_framebuf_cnt < initialInfo.minFrameBufferCount || vidHandle->framebuf_delay < initialInfo.frameBufDelay) + { + VLOG(ERR, "ERROR: The number of framebuffers is less than the minimum required by the VPU. minFrameBufferCount:%d frameBufDelayCount:%d\n", + initialInfo.minFrameBufferCount, initialInfo.frameBufDelay); + vidHandle->decConfig.extern_picHeight = initialInfo.picHeight; + vidHandle->decConfig.extern_picWidth = initialInfo.picWidth; + vidHandle->min_framebuf_cnt = initialInfo.minFrameBufferCount; + vidHandle->framebuf_delay = initialInfo.frameBufDelay; + vidHandle->decStatus = BMDEC_FRAMEBUFFER_NOTENOUGH; + ret = -1; + goto ERR_DEC_OPEN; + } + + initialInfo.minFrameBufferCount = vidHandle->min_framebuf_cnt; + initialInfo.frameBufDelay = vidHandle->framebuf_delay; + decParam.framebuf_from_user = vidHandle->framebuf_from_user; + pDecInfo->framebuf_from_user = 1; + + for(index = 0; index < initialInfo.minFrameBufferCount + vidHandle->extraFrameBufferNum; index++) + { + pDecInfo->vbFbcYTbl[index] = vidHandle->pYtabMem[index]; + pDecInfo->vbFbcCTbl[index] = vidHandle->pCtabMem[index]; + } + } compressedFbCount = initialInfo.minFrameBufferCount + vidHandle->extraFrameBufferNum; // max_dec_pic_buffering if (compressedFbCount > MAX_FRAMEBUFFER_COUNT) { compressedFbCount = MAX_FRAMEBUFFER_COUNT; @@ -3847,9 +4114,9 @@ int BMVidDecSeqInitW5(BMVidCodHandle vidCodHandle) VLOG(INFO, "compressedFbCount=%d, linearFbCount=%d\n", compressedFbCount, linearFbCount); VLOG(INFO, "Start AllocateDecFrameBuffer....instIdx: %d\n", handle->instIndex); - osal_memset((void*)pFbMem, 0x00, sizeof(bm_device_mem_t)*MAX_REG_FRAME); if (AllocateDecFrameBuffer(handle, &decParam, compressedFbCount, linearFbCount, pFrame, pFbMem, &framebufStride, vidHandle->enable_cache) == FALSE) { + vidHandle->decStatus = BMDEC_FRAMEBUFFER_NOTENOUGH; ret = -1; goto ERR_DEC_OPEN; } @@ -3859,13 +4126,14 @@ int BMVidDecSeqInitW5(BMVidCodHandle vidCodHandle) ********************************************************************************/ VLOG(INFO, "Start VPU_DecRegisterFrameBufferEx....instIdx: %d\n", handle->instIndex); ret = VPU_DecRegisterFrameBufferEx(handle, pFrame, compressedFbCount, linearFbCount, framebufStride, initialInfo.picHeight, COMPRESSED_FRAME_MAP); - if( ret != RETCODE_SUCCESS ) { + if(ret != RETCODE_SUCCESS) { if (ret == RETCODE_MEMORY_ACCESS_VIOLATION) { EnterLock(coreIdx); PrintMemoryAccessViolationReason(coreIdx, NULL); LeaveLock(coreIdx); } + vidHandle->decStatus = BMDEC_FRAMEBUFFER_NOTENOUGH; VLOG(ERR, "VPU_DecRegisterFrameBuffer failed Error code is 0x%x \n", ret ); goto ERR_DEC_OPEN; } @@ -3889,7 +4157,7 @@ int BMVidDecSeqInitW5(BMVidCodHandle vidCodHandle) } } #endif - VLOG(INFO, "BMVidDecSeqInit END\n"); + VLOG(INFO, "bmvpu_dec_seq_init END\n"); ERR_DEC_OPEN: return ret; } @@ -3898,9 +4166,7 @@ static int fill_ringbuffer(unsigned char *buf, int feedingSize, PhysicalAddress { PhysicalAddress wrPtr = *pWrPtr; Uint32 rightSize = 0, leftSize = feedingSize; - bm_device_mem_t devMem; int coreIdx = decHandle->coreIdx; - bm_handle_t bm_handle= bmvpu_dec_get_bmlib_handle(coreIdx); if ((wrPtr + feedingSize) >= (decHandle->CodecInfo->decInfo.streamBufEndAddr)) { @@ -3909,8 +4175,7 @@ static int fill_ringbuffer(unsigned char *buf, int feedingSize, PhysicalAddress leftSize = (wrPtr + feedingSize) - endAddr; if (rightSize > 0) { - devMem=bm_mem_from_device(wrPtr,rightSize); - bm_vdi_memcpy_s2d(bm_handle,coreIdx,devMem,buf,(int)decHandle->CodecInfo->decInfo.openParam.streamEndian); + VpuWriteMem(coreIdx, wrPtr, buf, rightSize, (int)decHandle->CodecInfo->decInfo.openParam.streamEndian); } wrPtr = decHandle->CodecInfo->decInfo.streamBufStartAddr; pkginfo->flag = 1; @@ -3919,8 +4184,7 @@ static int fill_ringbuffer(unsigned char *buf, int feedingSize, PhysicalAddress //VLOG(INFO, "VpuWriteMem: %llx\n", vidStream.buf + rightSize); //VLOG(INFO, "wrPtr: 0x%llx, leftSize: %d, coreIdx: %d, \n", wrPtr, leftSize, coreIdx); if(leftSize>0) { - devMem=bm_mem_from_device(wrPtr,leftSize); - bm_vdi_memcpy_s2d(bm_handle,coreIdx,devMem,buf+rightSize,(int)decHandle->CodecInfo->decInfo.openParam.streamEndian); + VpuWriteMem(decHandle->coreIdx, wrPtr, buf+rightSize, leftSize, (int)decHandle->CodecInfo->decInfo.openParam.streamEndian); wrPtr += leftSize; } *pWrPtr = wrPtr; @@ -3941,7 +4205,7 @@ static int s_gettimeofday(u64* sec, u64* usec, u64* msec, void* tzp) return (0); } #endif -int BMVidDecDecode(BMVidCodHandle vidCodHandle, BMVidStream vidStream) +BMVidDecRetStatus bmvpu_dec_decode(BMVidCodHandle vidCodHandle, BMVidStream vidStream) { BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; Int32 feedingSize = vidStream.header_size + vidStream.length; @@ -3967,12 +4231,22 @@ int BMVidDecDecode(BMVidCodHandle vidCodHandle, BMVidStream vidStream) #endif if(vidHandle->decStatus == BMDEC_HUNG) { VLOG(ERR, "vpu decode failed ....\n"); - return -1; + return BM_ERR_VDEC_ERR_HUNG; } if(vidHandle->decStatusendof_flag>=BMDEC_START_CLOSE) { VLOG(ERR, "decoder status error..\n"); - return -2; + return BM_ERR_VDEC_SYS_NOTREADY; + } + + if(vidHandle->decStatus == BMDEC_FRAMEBUFFER_NOTENOUGH) { + VLOG(ERR, "the frame buffer count set is wrong\n"); + return BM_ERR_VDEC_NOMEM; + } + + if(vidHandle->decStatus == BMDEC_WRONG_RESOLUTION) { + VLOG(ERR, "width or height which user set is wrong\n"); + return BM_ERR_VDEC_ILLEGAL_PARAM; } /* if(vidHandle->endof_flag==BMDEC_START_GET_ALLFRAME && vidHandle->decStatus!=BMDEC_STOP) @@ -3982,22 +4256,23 @@ int BMVidDecDecode(BMVidCodHandle vidCodHandle, BMVidStream vidStream) if (feedingSize <= 0 || vidStream.buf == NULL || vidHandle == NULL || vidHandle->codecInst == NULL) { VLOG(ERR, "coreIdx=%d,feeding size is negative value: %d\n", vidHandle->codecInst->coreIdx,feedingSize); - return RETCODE_FAILURE; + return BM_ERR_VDEC_FAILURE; } //check input queue if(Queue_Is_Full(vidHandle->inputQ) || Queue_Is_Full(vidHandle->inputQ2)) - return RETCODE_STREAM_BUF_FULL; + return BM_ERR_VDEC_BUF_FULL; //VLOG(INFO, "input queue count: %d\n", Queue_Get_Cnt(vidHandle->inputQ)); decHandle = vidHandle->codecInst; if(vidHandle->endof_flag>=BMDEC_START_GET_ALLFRAME && vidHandle->decStatus != BMDEC_STOP) - return RETCODE_FAILURE; + return BM_ERR_VDEC_FAILURE; - if(vidHandle->decStatus == BMDEC_STOP && (vidHandle->endof_flag < BMDEC_START_REWIND || vidHandle->endof_flag > BMDEC_START_FLUSH)) - return -1; + if(vidHandle->decStatus == BMDEC_STOP && (vidHandle->endof_flag < BMDEC_START_REWIND || vidHandle->endof_flag > BMDEC_START_FLUSH)) { + return BM_ERR_VDEC_SYS_NOTREADY; + } if (feedingSize > 0) { PkgInfo pkginfo = {0}; @@ -4018,7 +4293,7 @@ int BMVidDecDecode(BMVidCodHandle vidCodHandle, BMVidStream vidStream) pkginfo.len = decHandle->CodecInfo->decInfo.streamBufEndAddr - wrPtr; //skip the empty size. } else - return RETCODE_STREAM_BUF_FULL; + return BM_ERR_VDEC_NOBUF; } if (vidHandle->endof_flag == BMDEC_START_GET_ALLFRAME || vidHandle->endof_flag == BMDEC_START_FLUSH) @@ -4121,29 +4396,47 @@ int BMVidDecDecode(BMVidCodHandle vidCodHandle, BMVidStream vidStream) #endif } - return RETCODE_SUCCESS; + return BM_SUCCESS; } -BMVidFrame *BMVidDecGetOutput(BMVidCodHandle vidCodHandle) +BMVidDecRetStatus bmvpu_dec_get_output(BMVidCodHandle vidCodHandle, BMVidFrame *frame) { BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; BMVidFrame *bmFrame = NULL; + if(frame == NULL) { + VLOG(ERR, "the frame buffer invalid.\n"); + return BM_ERR_VDEC_NOMEM; + } + + if(vidHandle == NULL) { + VLOG(ERR, "Vdec device fd error.\n"); + return BM_ERR_VDEC_UNEXIST; + } + if(vidHandle->decStatus > BMDEC_UNINIT && vidHandle->endof_flag < BMDEC_START_CLOSE) { bmFrame = (BMVidFrame*)Queue_Dequeue(vidHandle->displayQ); if(bmFrame && bmFrame->frameIdx < 32) vidHandle->cache_bmframe[bmFrame->frameIdx] = *bmFrame; } - return bmFrame == NULL ? bmFrame : &(vidHandle->cache_bmframe[bmFrame->frameIdx]); + bmFrame = (bmFrame == NULL ? bmFrame : &(vidHandle->cache_bmframe[bmFrame->frameIdx])); + + if(bmFrame == NULL) + return BM_ERR_VDEC_FAILURE; + else + osal_memcpy(frame, bmFrame, sizeof(BMVidFrame)); + + return BM_SUCCESS; } -int BMVidDecClearOutput(BMVidCodHandle vidCodHandle, BMVidFrame *frame) +BMVidDecRetStatus bmvpu_dec_clear_output(BMVidCodHandle vidCodHandle, BMVidFrame *frame) { BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; - RetCode ret = RETCODE_SUCCESS; FrameBuffer frameBuffer; Uint32 index; Uint32* ptr; + int clrFrmIndex = -1; + if(checkHandle(vidCodHandle) && vidHandle->decStatus>BMDEC_UNINIT && vidHandle->endof_flag < BMDEC_START_CLOSE) { if(vidHandle->enablePPU) { @@ -4156,7 +4449,7 @@ int BMVidDecClearOutput(BMVidCodHandle vidCodHandle, BMVidFrame *frame) } if(frame->frameIdx != frameBuffer.myIndex) { VLOG(ERR, "can't get frame idx!!!!\n"); - return -1; + return BM_ERR_VDEC_ILLEGAL_PARAM; } } else { @@ -4171,7 +4464,13 @@ int BMVidDecClearOutput(BMVidCodHandle vidCodHandle, BMVidFrame *frame) } else { - VPU_DecClrDispFlag(vidHandle->codecInst, frameBuffer.myIndex); + if (vidHandle->enable_decode_order) { + clrFrmIndex = vidHandle->decode_index_map[frameBuffer.myIndex]; + } else { + clrFrmIndex = frameBuffer.myIndex; + } + if (clrFrmIndex >= 0) + VPU_DecClrDispFlag(vidHandle->codecInst, clrFrmIndex); } ptr = (Uint32*)Queue_Peek(vidHandle->sequenceQ); @@ -4183,7 +4482,7 @@ int BMVidDecClearOutput(BMVidCodHandle vidCodHandle, BMVidFrame *frame) index = (*ptr) % MAX_SEQUENCE_MEM_COUNT; p = &seqMemInfo[vidHandle->codecInst->instIndex][index]; - releasePreviousSequenceResources(vidHandle->codecInst, p->allocFbMem, &p->fbInfo); + releasePreviousSequenceResources(vidHandle->codecInst, p->allocFbMem, &p->fbInfo, vidHandle->codecInst->CodecInfo->decInfo.framebuf_from_user); osal_memset(p, 0x00, sizeof(SequenceMemInfo)); Queue_Dequeue(vidHandle->sequenceQ); } @@ -4197,31 +4496,34 @@ int BMVidDecClearOutput(BMVidCodHandle vidCodHandle, BMVidFrame *frame) { VLOG(ERR, "can't clear output please check it!!!!, index: %d\n", frame->frameIdx); } - return ret; + return BM_SUCCESS; } -int BMVidDecFlush(BMVidCodHandle vidCodHandle) +BMVidDecRetStatus bmvpu_dec_flush(BMVidCodHandle vidCodHandle) { BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; - + int ret = BM_SUCCESS; if (vidHandle->endof_flag < BMDEC_START_GET_ALLFRAME) { int size = STREAM_END_SIZE; PkgInfo pkginfo; + + if(Queue_Is_Full(vidHandle->inputQ) || Queue_Is_Full(vidHandle->inputQ2)) + return BM_ERR_VDEC_BUF_FULL; pkginfo.flag = 0; pkginfo.len = 0; pkginfo.rd = vidHandle->streamWrAddr; // VLOG(INFO, "flush decoder..., core index: %d, instance index: %d\n", // vidHandle->codecInst->coreIdx, vidHandle->codecInst->instIndex); // Now that we are done with decoding, close the opening instance. - osal_cond_lock(vidHandle->inputCond); + osal_cond_lock(vidHandle->inputCond); if(vidHandle->codecInst->CodecInfo->decInfo.openParam.bitstreamMode == BS_MODE_PIC_END) { - Queue_Enqueue(vidHandle->inputQ, &pkginfo); + ret = Queue_Enqueue(vidHandle->inputQ, &pkginfo); } else - Queue_Enqueue(vidHandle->inputQ2, &size); + ret = Queue_Enqueue(vidHandle->inputQ2, &size); vidHandle->endof_flag = BMDEC_START_FLUSH; osal_cond_signal(vidHandle->inputCond); osal_cond_unlock(vidHandle->inputCond); @@ -4229,10 +4531,10 @@ int BMVidDecFlush(BMVidCodHandle vidCodHandle) } VLOG(INFO, "flush decoder Done......, core index: %d, instance index: %d\n", vidHandle->codecInst->coreIdx, vidHandle->codecInst->instIndex); - return 0; + return ret; } #define TRY_CLOSE_COUNT 500000 -int BMVidDecDelete(BMVidCodHandle vidCodHandle) +BMVidDecRetStatus bmvpu_dec_delete(BMVidCodHandle vidCodHandle) { BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; int coreIdx = -1; @@ -4244,7 +4546,7 @@ int BMVidDecDelete(BMVidCodHandle vidCodHandle) if (vidHandle == NULL || vidHandle->codecInst == NULL) { VLOG(ERR, "handle null in dec delete!!!\n"); - return RETCODE_INVALID_HANDLE; + return BM_ERR_VDEC_UNEXIST; } coreIdx = vidHandle->codecInst->coreIdx; instIdx = vidHandle->codecInst->instIndex; @@ -4253,7 +4555,7 @@ int BMVidDecDelete(BMVidCodHandle vidCodHandle) // vidHandle->isStop = 1; if(vidHandle->endof_flag < BMDEC_START_FLUSH) { - BMVidDecFlush(vidCodHandle); + bmvpu_dec_flush(vidCodHandle); } osal_cond_lock(vidHandle->outputCond); @@ -4277,7 +4579,7 @@ int BMVidDecDelete(BMVidCodHandle vidCodHandle) osal_cond_unlock(vidHandle->inputCond); printf("-----close timeout: coreIdx: %d, instIdx: %d\n", coreIdx, instIdx); osal_thread_cancel(vidHandle->processThread); - return -1; + return BM_ERR_VDEC_BUSY; } } @@ -4294,6 +4596,9 @@ int BMVidDecDelete(BMVidCodHandle vidCodHandle) s_disp_flock_fd[coreIdx] = -1; } #endif + if(vidHandle->fp_stream != NULL) { + fclose(vidHandle->fp_stream); + } if (vidHandle->inputCond) osal_cond_destroy(vidHandle->inputCond); if (vidHandle->outputCond) @@ -4301,10 +4606,10 @@ int BMVidDecDelete(BMVidCodHandle vidCodHandle) if(vidHandle) osal_free(vidHandle); VLOG(INFO, "core, %d, inst, %d closed!\n", coreIdx, instIdx); - return RETCODE_SUCCESS; + return BM_SUCCESS; } -BMDecStatus BMVidGetStatus(BMVidCodHandle vidCodHandle) +BMDecStatus bmvpu_dec_get_status(BMVidCodHandle vidCodHandle) { BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; if(checkHandle(vidCodHandle)!=0) @@ -4316,7 +4621,7 @@ BMDecStatus BMVidGetStatus(BMVidCodHandle vidCodHandle) } } -int BMVidGetStreamBufferEmptySize(BMVidCodHandle vidCodHandle) +int bmvpu_dec_get_stream_buffer_empty_size(BMVidCodHandle vidCodHandle) { BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; DecHandle decHandle; @@ -4324,7 +4629,7 @@ int BMVidGetStreamBufferEmptySize(BMVidCodHandle vidCodHandle) if (vidHandle == NULL || vidHandle->codecInst == NULL) { VLOG(ERR, "point is null. in get stream buffer size.\n"); - return RETCODE_FAILURE; + return BM_ERR_VDEC_UNEXIST; } decHandle = vidHandle->codecInst; @@ -4332,12 +4637,12 @@ int BMVidGetStreamBufferEmptySize(BMVidCodHandle vidCodHandle) return VPU_DecGetBitstreamBufferRoom(decHandle, vidHandle->streamWrAddr); } -int BMVidDecGetCaps(BMVidCodHandle vidCodHandle, BMVidStreamInfo *streamInfo) +BMVidDecRetStatus bmvpu_dec_get_caps(BMVidCodHandle vidCodHandle, BMVidStreamInfo *streamInfo) { BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; DecInitialInfo *pInitInfo = NULL; if (vidHandle == NULL || vidHandle->seqInitFlag < 1 || streamInfo == NULL) - return RETCODE_WRONG_CALL_SEQUENCE; + return BM_ERR_VDEC_UNEXIST; osal_memset(streamInfo, 0, sizeof(BMVidStreamInfo)); pInitInfo = &(vidHandle->codecInst->CodecInfo->decInfo.initialInfo); streamInfo->picWidth = pInitInfo->picWidth; @@ -4359,10 +4664,10 @@ int BMVidDecGetCaps(BMVidCodHandle vidCodHandle, BMVidStreamInfo *streamInfo) streamInfo->picCropRect.top = pInitInfo->picCropRect.top; streamInfo->picCropRect.left = pInitInfo->picCropRect.left; streamInfo->picCropRect.right = pInitInfo->picCropRect.left; - return 0; + return BM_SUCCESS; } -int BMVidGetAllFramesInBuffer(BMVidCodHandle vidCodHandle) +BMVidDecRetStatus bmvpu_dec_get_all_frame_in_buffer(BMVidCodHandle vidCodHandle) { BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; @@ -4388,23 +4693,23 @@ int BMVidGetAllFramesInBuffer(BMVidCodHandle vidCodHandle) osal_cond_unlock(vidHandle->inputCond); // osal_cond_signal(vidHandle->outputCond); // VLOG(INFO, "get all frame buffer from vpu!\n"); - return 0; + return BM_SUCCESS; } -int BMVidGetEmptyInputBufCnt(BMVidCodHandle vidCodHandle) +int bmvpu_dec_get_all_empty_input_buf_cnt(BMVidCodHandle vidCodHandle) { BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; if(!vidHandle || vidHandle->decStatus>=BMDEC_CLOSE) - return 0; + return BM_ERR_VDEC_UNEXIST; else return INPUT_QUEUE_LEN - Queue_Get_Cnt(vidHandle->inputQ); } -int BMVidGetPktInBufCount(BMVidCodHandle vidCodHandle) +int bmvpu_dec_get_pkt_in_buf_cnt(BMVidCodHandle vidCodHandle) { BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; if(!vidHandle || vidHandle->decStatus>=BMDEC_CLOSE) - return 0; + return BM_ERR_VDEC_UNEXIST; else { if(vidHandle->codecInst->CodecInfo->decInfo.openParam.bitstreamMode == BS_MODE_PIC_END) return Queue_Get_Cnt(vidHandle->inputQ); @@ -4413,20 +4718,20 @@ int BMVidGetPktInBufCount(BMVidCodHandle vidCodHandle) } } -int BMVidVpuReset(int devIdx, int coreIdx) +BMVidDecRetStatus bmvpu_dec_reset(int devIdx, int coreIdx) { int offset; int core; if (devIdx < 0 || devIdx > MAX_PCIE_BOARD_NUM-1) { - fprintf(stderr, "Invalid sophon device index %d. [0, %d]\n", + fprintf(stderr, "Invalid device index %d. [0, %d]\n", devIdx, MAX_PCIE_BOARD_NUM-1); - return -1; + return BM_ERR_VDEC_ILLEGAL_PARAM; } if (coreIdx < -1 || coreIdx > MAX_NUM_VPU_CORE_CHIP-1) { fprintf(stderr, "Invalid core index %d\n", coreIdx); - return -1; + return BM_ERR_VDEC_ILLEGAL_PARAM; } offset = MAX_NUM_VPU_CORE_CHIP*devIdx; @@ -4447,7 +4752,7 @@ int BMVidVpuReset(int devIdx, int coreIdx) #endif for(core=offset; coredecStatus>=BMDEC_CLOSE) { VLOG(ERR, "handle or status error...\n"); - return 0; + return BM_ERR_VDEC_UNEXIST; } if(p_stream == NULL) { VLOG(ERR, "stream buffer is null...\n"); - return 0; + return BM_ERR_VDEC_NULL_PTR; } - bm_handle= bmvpu_dec_get_bmlib_handle(vidHandle->codecInst->coreIdx); - len = (int)(vidHandle->streamWrAddr - vidHandle->vbStream.u.device.device_addr); + len = (int)(vidHandle->streamWrAddr - vidHandle->vbStream.phys_addr); if(len < 0 || len > size) { VLOG(ERR, "stream buffer len : %d, p_stream size : %d. maybe too small...\n", len, size); - return 0; + return BM_ERR_VDEC_ILLEGAL_PARAM; } - bm_vdi_memcpy_d2s(bm_handle,vidHandle->codecInst->coreIdx,p_stream,vidHandle->vbStream,vidHandle->decConfig.streamEndian); - + VpuReadMem(vidHandle->codecInst->coreIdx, vidHandle->vbStream.phys_addr, p_stream, len, vidHandle->decConfig.streamEndian); return len; } -int BMVidVpuGetInstIdx(BMVidCodHandle vidCodHandle) +int bmvpu_dec_get_inst_idx(BMVidCodHandle vidCodHandle) { BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; if(!vidHandle || vidHandle->decStatus>=BMDEC_CLOSE) { VLOG(ERR, "handle or status error...\n"); - return 0; + return BM_ERR_VDEC_UNEXIST; } return (int)(vidHandle->codecInst->instIndex); } + +BMVidDecRetStatus bmvpu_dec_get_stream_info(BMVidCodHandle vidCodHandle, int* width, int* height, int* mini_fb, int* frame_delay) +{ + BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; + if(!vidHandle || vidHandle->decStatus>=BMDEC_CLOSE) { + VLOG(ERR, "handle or status error...\n"); + return BM_ERR_VDEC_INVALID_CHNID; + } + + if(width == NULL || height == NULL || mini_fb == NULL || frame_delay == NULL){ + VLOG(ERR, "bmvpu_dec_get_stream_info param error...\n"); + return BM_ERR_VDEC_ILLEGAL_PARAM; + } + + *width = vidHandle->decConfig.extern_picWidth; + *height = vidHandle->decConfig.extern_picHeight; + *mini_fb = vidHandle->min_framebuf_cnt; + *frame_delay = vidHandle->framebuf_delay; + + return RETCODE_SUCCESS; +} + +int bmvpu_dec_read_memory(int coreIdx, u64 addr, unsigned char *data, int len, int endian) +{ + return vdi_read_memory(coreIdx, addr, data, len, endian); +} + +u64 bmvpu_dec_calc_cbcr_addr(int codec_type, u64 y_addr, int y_stride, int frame_height) +{ + // codec_type for sync with BM1688 vp9 dec. + return y_addr + y_stride * VPU_ALIGN32(frame_height); +} diff --git a/bmvid/video/decoder/bm_dec_api/src/bm_video_syscxt.c b/bmvid/video/decoder/bm_dec_api/src/bm_video_syscxt.c deleted file mode 100644 index 0cf1fd7..0000000 --- a/bmvid/video/decoder/bm_dec_api/src/bm_video_syscxt.c +++ /dev/null @@ -1,464 +0,0 @@ -/***************************************************************************** - * - * Copyright (C) 2022 Sophgo Technologies Inc. All rights reserved. - * - * bmvid is licensed under the 2-Clause BSD License except for the - * third-party components. - * - *****************************************************************************/ -/* This library provides a high-level interface for controlling the BitMain - * Sophon VPU en/decoder. - */ -#include -#include -#include -#ifdef __linux__ -#include -#elif _WIN32 -#include -#endif -#include "bm_video_interface.h" -#include "bm_video_internal.h" -#include "vpuapi.h" -#include "config.h" -#include "vpuconfig.h" -#include "vdi_osal.h" - -enum { - SYSCXT_STATUS_WORKDING = 0, - SYSCXT_STATUS_EXCEPTION , -}; - -typedef struct{ - uint8_t instcall[MAX_NUM_INSTANCE]; - uint8_t instantNum; - uint8_t coreid; - - DecOpenParam dec_param[MAX_NUM_INSTANCE]; - BMVidHandle vidHandle[MAX_NUM_INSTANCE]; - - osal_cond_t cond_sleep; - osal_cond_t cond_status; - osal_cond_t cond_ex; - - int sys_isinit; - osal_thread_t thread_handle; - - int crst_res; -} bm_syscxt_t; - - -#if defined(CHIP_BM1684) -static bm_syscxt_t g_syscxt[MAX_NUM_VPU_CORE] = {0}; -#endif - -int BMVidDecSeqInitW5(BMVidCodHandle vidCodHandle); -void get_lock_timeout(int sec, int pcie_board_idx); -void unlock_flock(int pcie_board_idx); - -#if defined(CHIP_BM1684) -static int _syscxt_chkinst(bm_syscxt_t *p_syscxt) { - int i; - for (i = 0; i < MAX_NUM_INSTANCE; i++) { - - int instid = -1; - if (p_syscxt->vidHandle[i]) - instid = p_syscxt->vidHandle[i]->codecInst->instIndex; - - if ((instid >= 0) && (p_syscxt->instcall[instid] != 3)) - return -1; - } - - return 1; -} - -static void _syscxt_release_mem(DecHandle handle) { - - CodecInst * pCodecInst; - DecInfo * pDecInfo; - int i; - bm_handle_t bm_handle; - if (!handle) { - VLOG(ERR, "[%s:%d]handle point NULL, memory have not found\n", - __func__, __LINE__); - return; - } - - pCodecInst = handle; - pDecInfo = &(pCodecInst->CodecInfo->decInfo); - - if (!pDecInfo) { - VLOG(ERR, "[%s:%d][core%d inst%d]pDecInfo point NULL, memory have been free\n", - __func__, __LINE__, pCodecInst->coreIdx, pCodecInst->instIndex); - return; - } - bm_handle= bmvpu_dec_get_bmlib_handle(handle->coreIdx); - EnterLock(pCodecInst->coreIdx); - if (pDecInfo->vbDevSlice.size) - { - bm_free_mem(bm_handle,pDecInfo->vbDevSlice,pDecInfo->vbSliceVddr); - pDecInfo->vbDevSlice.size=0; - } - - if (pDecInfo->vbDevWork.size) { - if (pDecInfo->workBufferAllocExt == 0) { - bm_free_mem(bm_handle,pDecInfo->vbDevWork,pDecInfo->vbWorkVaddr); - pDecInfo->vbDevWork.size=0; - } - } - - if (pDecInfo->vbDevFrame.size) { - if (pDecInfo->frameAllocExt == 0) { - bm_free_mem(bm_handle,pDecInfo->vbDevFrame,pDecInfo->vbFrameVaddr); - pDecInfo->vbDevFrame.size=0; - } - } - for ( i=0 ; ivbDevMV[i].size) - { - bm_free_mem(bm_handle,pDecInfo->vbDevMV[i],pDecInfo->vbMVVaddr[i]); - pDecInfo->vbDevMV[i].size=0; - } - - if (pDecInfo->vbDevFbcYTbl[i].size) - { - bm_free_mem(bm_handle,pDecInfo->vbDevFbcYTbl[i],pDecInfo->vbFbcYTblVaddr[i]); - pDecInfo->vbDevFbcYTbl[i].size=0; - } - - if (pDecInfo->vbDevFbcCTbl[i].size) - { - bm_free_mem(bm_handle,pDecInfo->vbDevFbcCTbl[i],pDecInfo->vbFbcCTblVaddr[i]); - pDecInfo->vbDevFbcCTbl[i].size=0; - } - } - - - if (pDecInfo->vbDevPPU.size) { - if (pDecInfo->ppuAllocExt == 0) - { - bm_free_mem(bm_handle,pDecInfo->vbDevPPU,0x00); - pDecInfo->vbDevPPU.size=0; - } - } - - if (pDecInfo->vbDevWTL.size) - { - bm_free_mem(bm_handle,pDecInfo->vbDevWTL,0x00); - pDecInfo->vbDevWTL.size=0; - } - - - if (pDecInfo->vbDevReport.size) - { - bm_free_mem(bm_handle,pDecInfo->vbDevReport,pDecInfo->vbReportVddr); - pDecInfo->vbDevReport.size=0; - } - - if (GetPendingInst(pCodecInst->coreIdx) == pCodecInst) - ClearPendingInst(pCodecInst->coreIdx); - FreeCodecInstance(pCodecInst); - LeaveLock(pCodecInst->coreIdx); -} - -static int _syscxt_restart(bm_syscxt_t *p_syscxt) { - - int coreIdx = p_syscxt->coreid; - int productId = 0; - char *fwPath = NULL; - uint16_t *pusBitCode = NULL; - uint32_t sizeInWord = 0; - RetCode ret = RETCODE_FAILURE; - int res = 0; - int i = 0; - bm_handle_t bm_handle; - if ((productId = VPU_GetProductId(coreIdx)) == -1) { - VLOG(ERR, "[%s:%d]Failed to get product ID, PC=0x%x\n", __func__, __LINE__, vdi_read_register(coreIdx, 4)); - res = -1; - goto restart_exit; - } - - switch (productId) { - case PRODUCT_ID_512: fwPath = CORE_2_BIT_CODE_FILE_PATH; break; - case PRODUCT_ID_511: fwPath = CORE_7_BIT_CODE_FILE_PATH; break; - default: - VLOG(ERR, "[%s:%d]Unknown product id: %d, PC=0x%x\n", __func__, __LINE__, productId, vdi_read_register(coreIdx, 4)); - res = -1; - goto restart_exit; - } - bm_handle= bmvpu_dec_get_bmlib_handle(coreIdx); - VLOG(INFO, "[%s:%d]PC=0x%x\n", __func__, __LINE__, vdi_read_register(coreIdx, 4)); - if (LoadFirmware(productId, (Uint8 **)&pusBitCode, &sizeInWord, fwPath) < 0) { - VLOG(ERR, "[%s:%d]Failed to load firmware: %s, PC=0x%x\n", __func__, __LINE__, fwPath, vdi_read_register(coreIdx, 4)); - res = -1; - goto restart_exit; - } - - for ( i = 0; i < MAX_NUM_INSTANCE; i++) { - - if (p_syscxt->vidHandle[i] == 0) { - continue; - } - - DecOpenParam *decOp = &p_syscxt->dec_param[i]; - bm_device_mem_t vbStream = {0}; - - ret = VPU_InitWithBitcode(coreIdx, (const Uint16 *)pusBitCode, sizeInWord); - if (ret != RETCODE_CALLED_BEFORE && ret != RETCODE_SUCCESS) { - printf("[%s:%d]Failed to boot up VPU(coreIdx: %d, productId: %d), PC=0x%x\n", - __func__, __LINE__, coreIdx, productId, vdi_read_register(coreIdx, 4)); - res = -1; - goto restart_exit; - } - - vbStream.size = STREAM_BUF_SIZE; - if (bmvpu_malloc_device_byte_heap(bm_handle,&vbStream,STREAM_BUF_SIZE,HEAP_MASK,1) !=BM_SUCCESS) { - printf("[%s:%d]alloc mem fail, PC=0x%x\n", __func__, __LINE__, vdi_read_register(coreIdx, 4)); - res = -1; - goto restart_exit; - } - - - decOp->bitstreamBuffer = vbStream.u.device.device_addr; - decOp->bitstreamBufferSize = vbStream.size; - - DecHandle handle; - - if ((ret = VPU_DecOpen(&handle, decOp) )!= RETCODE_SUCCESS) { - printf("VPU_DecOpen failed Error code is 0x%x \n", ret); - res = -1; - goto restart_exit; - } - - p_syscxt->vidHandle[i]->codecInst = handle; - osal_memcpy(&(p_syscxt->vidHandle[i]->vbStream), &vbStream, sizeof(vbStream)); - - p_syscxt->vidHandle[i]->frameInBuffer = 0; - p_syscxt->vidHandle[i]->isStreamBufFilled = 0; - p_syscxt->vidHandle[i]->seqInitFlag = 0; - p_syscxt->vidHandle[i]->endof_flag = 0; - p_syscxt->vidHandle[i]->streamWrAddr = vbStream.u.device.device_addr; - p_syscxt->vidHandle[i]->decStatus = BMDEC_UNINIT; - - p_syscxt->vidHandle[i]->isStreamBufFilled = 0; - p_syscxt->vidHandle[i]->seqInitFlag = 0; - } - -restart_exit: - return res; -} - -static void _syscxt_thread(void *arg) { - - bm_syscxt_t *p_syscxt = (bm_syscxt_t *)arg; - int coreIdx = p_syscxt->coreid; - int pcie_board_idx = 0; - int i = 0; - int index = 0; - bm_handle_t bm_handle; - while(1) { - osal_cond_lock(p_syscxt->cond_status); - osal_cond_wait(p_syscxt->cond_status); - osal_cond_unlock(p_syscxt->cond_status); - -#ifdef TRY_FLOCK_OPEN - get_lock_timeout(20,pcie_board_idx); -#else - sem_t* vid_open_sem = sem_open(VID_OPEN_SEM_NAME, O_CREAT, 0666, 1); - get_lock_timeout(vid_open_sem, TRY_OPEN_SEM_TIMEOUT); -#endif - bm_handle= bmvpu_dec_get_bmlib_handle(coreIdx); - printf("[* + *][core %d] start to reset vpu\n", coreIdx); - - for ( i = 0; i < MAX_NUM_INSTANCE; i++) { - - if (p_syscxt->vidHandle[i] == 0) { - continue; - } - - DecHandle handle = p_syscxt->vidHandle[i]->codecInst; - _syscxt_release_mem(handle); - - for (index = 0; index < MAX_REG_FRAME; index++) { - if (p_syscxt->vidHandle[i]->pFbMem[index].size > 0) - { - bm_free_mem(bm_handle,p_syscxt->vidHandle[i]->pFbMem[i],p_syscxt->vidHandle[i]->fbMemVaddr[i]); - } - } - - if (p_syscxt->vidHandle[i]->vbStream.size > 0) - { - bm_free_mem(bm_handle,p_syscxt->vidHandle[i]->vbStream,0x00); - } - - VPU_DeInit(coreIdx); - } - - p_syscxt->crst_res = 0; - if(_syscxt_restart(p_syscxt) < 0) { - printf("[* - *][core %d] vpu reset error!\n", coreIdx); - p_syscxt->crst_res = -1; - } - - printf("[* - *][core %d] vpu continue to work\n", coreIdx); -#ifdef TRY_FLOCK_OPEN - unlock_flock(pcie_board_idx); -#else - vidHandle->vid_open_sem = vid_open_sem; - sem_post(vid_open_sem); -#endif - - vdi_crst_set_status(coreIdx, SYSCXT_STATUS_WORKDING); - osal_cond_lock(p_syscxt->cond_sleep); - for( i = 0; i < p_syscxt->instantNum * 2; i++) { - osal_cond_signal(p_syscxt->cond_sleep); - } - osal_cond_unlock(p_syscxt->cond_sleep); - } -} -#endif - -void bm_syscxt_init(void *p_dec_param, BMVidCodHandle vidCodHandle) { - - #if defined(CHIP_BM1684) - BMVidHandle vidHandle = (BMVidHandle)vidCodHandle; - if ((!p_dec_param) || (!vidHandle)) { - VLOG(ERR, "[%s:%d]para error\n", __func__, __LINE__); - return ; - } - - int instid = vidHandle->codecInst->instIndex; - int coreid = vidHandle->codecInst->coreIdx; - osal_memcpy(&g_syscxt[coreid].dec_param[instid], p_dec_param, sizeof(DecOpenParam)); - g_syscxt[coreid].vidHandle[instid] = vidHandle; - g_syscxt[coreid].instantNum += 1; - - if (g_syscxt[coreid].sys_isinit) - return; - - g_syscxt[coreid].sys_isinit = 1; - g_syscxt[coreid].coreid = coreid; - - g_syscxt[coreid].cond_sleep = osal_cond_create(); - if (!g_syscxt[coreid].cond_sleep) { - g_syscxt[coreid].sys_isinit = 0; - return ; - } - - g_syscxt[coreid].cond_status = osal_cond_create(); - if (!g_syscxt[coreid].cond_status) { - g_syscxt[coreid].sys_isinit = 0; - return ; - } - - g_syscxt[coreid].cond_ex = osal_cond_create(); - if (!g_syscxt[coreid].cond_ex) { - g_syscxt[coreid].sys_isinit = 0; - return ; - } - - g_syscxt[coreid].crst_res = 0; - g_syscxt[coreid].thread_handle = osal_thread_create(_syscxt_thread, (void*)&g_syscxt[coreid]); - #endif -} - -void bm_syscxt_excepted(int coreid) { - #if defined(CHIP_BM1684) - osal_cond_lock(g_syscxt[coreid].cond_ex); - vdi_crst_set_status(coreid, SYSCXT_STATUS_EXCEPTION); - osal_cond_unlock(g_syscxt[coreid].cond_ex); - #endif -} - -void bm_syscxt_set(int coreid, int enable) { - - #if defined(CHIP_BM1684) - enable = !!enable; - vdi_crst_set_enable(coreid, enable); - #endif -} - -int bm_syscxt_status(int coreid, int instid, int pos) { - - #if defined(CHIP_BM1684) - int is_sleep = -1, is_wakeup; - int ret = vdi_crst_chk_status(coreid, instid, pos, &is_sleep, &is_wakeup); - if (ret < 0) { - VLOG(ERR, "[%s:%d]error happened!\n", __func__, __LINE__); - return 0; - } - - if (is_sleep) { - g_syscxt[coreid].instcall[instid] |= is_sleep << pos; - printf("[%s:%d]core %d inst %d, pos: %d, status: sleep:%d, wakeup:%d!\n", - __func__, __LINE__, coreid, instid, pos, is_sleep, is_wakeup); - } - - g_syscxt[coreid].instcall[instid] |= is_sleep << pos; - while((_syscxt_chkinst(&g_syscxt[coreid]) == 1) && (is_wakeup == 0)) { - - ret = vdi_crst_chk_status(coreid, instid, pos, &is_sleep, &is_wakeup); - if (ret < 0) { - VLOG(ERR, "[%s:%d]error happened!\n", __func__, __LINE__); - return 0; - } - } - - if(is_wakeup) { - osal_cond_lock(g_syscxt[coreid].cond_status); - osal_cond_signal(g_syscxt[coreid].cond_status); - osal_cond_unlock(g_syscxt[coreid].cond_status); - } - - if (is_sleep) { - if (pos == 1) osal_cond_unlock(g_syscxt[coreid].cond_ex); - osal_cond_lock(g_syscxt[coreid].cond_sleep); - osal_cond_wait(g_syscxt[coreid].cond_sleep); - osal_cond_unlock(g_syscxt[coreid].cond_sleep); - - if (pos == 1) osal_cond_lock(g_syscxt[coreid].cond_ex); - g_syscxt[coreid].instcall[instid] = 0; - return g_syscxt[coreid].crst_res < 0 ? -2 : -1; - } - #endif - - return 0; -} - -int bm_syscxt_chkstatus(int coreid) { - - #if defined(CHIP_BM1684) - int ret = vdi_crst_get_status(coreid); - if (ret < 0) { - VLOG(ERR, "[%s:%d]error happened!\n", __func__, __LINE__); - return 0; - } - - return ret == SYSCXT_STATUS_WORKDING ? 0 : -1; - #else - return 1; - #endif -} - -void bm_syscxt_statusLock(int coreid) { - - #if defined(CHIP_BM1684) - osal_cond_lock(g_syscxt[coreid].cond_ex); - #endif -} - -void bm_syscxt_statusUnlock(int coreid) { - - #if defined(CHIP_BM1684) - osal_cond_unlock(g_syscxt[coreid].cond_ex); - #endif -} - -void bm_syscxt_status_wakeup(int coreid) { - - #if defined(CHIP_BM1684) - osal_cond_lock(g_syscxt[coreid].cond_sleep); - osal_cond_signal(g_syscxt[coreid].cond_sleep); - osal_cond_unlock(g_syscxt[coreid].cond_sleep); - #endif -} diff --git a/bmvid/video/driver/linux/chagall.bin b/bmvid/video/driver/linux/chagall.bin index 78617e1..c333daa 100644 Binary files a/bmvid/video/driver/linux/chagall.bin and b/bmvid/video/driver/linux/chagall.bin differ diff --git a/bmvid/video/driver/linux/chagall_dec.bin b/bmvid/video/driver/linux/chagall_dec.bin old mode 100755 new mode 100644 diff --git a/bmvid/video/driver/linux/vpu.c b/bmvid/video/driver/linux/vpu.c index c5fcf0e..f9902f3 100755 --- a/bmvid/video/driver/linux/vpu.c +++ b/bmvid/video/driver/linux/vpu.c @@ -48,8 +48,10 @@ */ #if LINUX_VERSION_CODE > KERNEL_VERSION(5,4,0) #include <../drivers/soc/bitmain/ion/bitmain/bitmain_ion_alloc.h> +#include <../drivers/soc/bitmain/ion/ion.h> #else #include <../drivers/staging/android/ion/bitmain/bitmain_ion_alloc.h> +#include <../drivers/staging/android/ion/ion.h> #endif #endif @@ -75,6 +77,7 @@ extern void efuse_ft_get_video_cap(int *cap); extern void efuse_ft_get_bin_type(int *bin_type); extern uint32_t sophon_get_chip_id(void); +extern int bmctl_update_vpu_gmem(struct list_head *in_vpu_gmem_info); /* definitions to be changed as customer configuration */ /* if you want to have clock gating scheme frame by frame */ @@ -126,8 +129,22 @@ static int s_vpu_reg_phy_base[MAX_NUM_VPU_CORE] = {0x50440000, 0x50450000, 0x504 # define VM_RESERVED (VM_DONTEXPAND | VM_DONTDUMP) #endif +#define VPU4K_BYTE (1024 * 4) +#define VPU_4K_ALIGN(x) (((x) + VPU4K_BYTE - 1) / VPU4K_BYTE * VPU4K_BYTE) static int s_vpu_sub_bin_flag[MAX_NUM_VPU_CORE] = {1,1,1,1,1}; +#define VPU_HEAP_ID 0x2 +#define NPU_HEAP_ID 0x1 +#define VPP_HEAP_ID 0x0 +static int available_heap_mask = 0x0; + +typedef struct { + struct list_head list; + pid_t vpu_pid; + int64_t vpu_gmem_used; +} vpu_proc_gmem; + + enum { SYSCXT_STATUS_WORKDING = 0, SYSCXT_STATUS_EXCEPTION , @@ -164,6 +181,11 @@ typedef struct vpu_drv_context_all_t int core_idx; } vpu_drv_context_all_t; +typedef struct vpudrv_reset_flag_node_t { + vpudrv_reset_flag reset_flag; + struct list_head list; +} vpudrv_reset_flag_node_t; + /* To track the allocated memory buffer */ typedef struct vpudrv_buffer_pool_t { struct list_head list; @@ -316,10 +338,15 @@ static DEFINE_SEMAPHORE(s_vpu_sem); #endif static struct list_head s_vbp_head = LIST_HEAD_INIT(s_vbp_head); static struct list_head s_inst_list_head = LIST_HEAD_INIT(s_inst_list_head); +static struct list_head s_reset_flag_head = LIST_HEAD_INIT(s_reset_flag_head); +static struct list_head s_vpu_gmem_info = LIST_HEAD_INIT(s_vpu_gmem_info); +static unsigned int s_vpu_gmem_update = 0; +DEFINE_MUTEX(s_vpu_gmem_info_mutex); static vpu_bit_firmware_info_t s_bit_firmware_info[MAX_NUM_VPU_CORE] = {0}; static u32 *s_vpu_dump_flag = NULL; static u32 s_init_flag[MAX_NUM_VPU_CORE] = {0}; +static u32 s_init_state[MAX_NUM_VPU_CORE] = {0}; #ifdef CONFIG_PM /* Product register */ #define VPU_PRODUCT_CODE_REGISTER (BIT_BASE + 0x1044) @@ -341,6 +368,8 @@ typedef struct vpu_inst_info static vpu_inst_info_t s_vpu_inst_info[MAX_NUM_VPU_CORE * MAX_NUM_INSTANCE] = {0}; static DEFINE_MUTEX(s_vpu_proc_lock); +// extern int bmctl_update_vpu_gmem(int pid, int mem_used, int is_free, int is_del); /* defined in tpu soc driver*/ + int bm_vpu_monitor_thread(void *data); static int vpu_init_get_flags_bm1686(int video_cap){ int i =0; @@ -508,6 +537,7 @@ static int WaitBusyTimeout(u32 core, u32 addr) if (time_after(jiffies, timeout)) { return 1; } + msleep(1); } return 0; } @@ -555,11 +585,11 @@ static int SendQuery(u32 core, u32 instanceIndex, u32 queryOpt) return 0; } -static int Wave5DecClrDispFlag(u32 core, u32 instanceIndex, u32 index) +static int Wave5DecClrDispFlag(u32 core, u32 instanceIndex) { int ret = 0; - WriteVpuRegister(W5_CMD_DEC_CLR_DISP_IDC, (1< 5) { + regVal = ReadVpuRegister(W5_RET_FAIL_REASON); + pr_info("core:%d Wave5DecGetInstanceInfo failed. reason: 0x%x", core, instanceIndex, current->tgid, regVal); + return -1; + } + msleep(1); + count += 1; + } + + *instance_info = ReadVpuRegister(W5_RET_QUERY_DEC_GET_INSTANCE_INFO); + + return 0; +} + static int FlushDecResult(u32 core, u32 instanceIndex) { int ret = 0; @@ -604,7 +656,7 @@ static int FlushDecResult(u32 core, u32 instanceIndex) ret = SendQuery(core, instanceIndex, GET_RESULT); if (ret != 0) { regVal = ReadVpuRegister(W5_RET_FAIL_REASON); - pr_info("flush result reason: 0x%x", regVal); + DPRINTK("flush result reason: 0x%x", regVal); return 1; } @@ -621,7 +673,7 @@ static int FlushEncResult(u32 core, u32 instanceIndex) ret = SendQuery(core, instanceIndex, GET_RESULT); if (ret != 0) { regVal = ReadVpuRegister(W5_RET_FAIL_REASON); - pr_info("flush result reason: 0x%x", regVal); + DPRINTK("flush result reason: 0x%x", regVal); return 1; } return 0; @@ -630,6 +682,7 @@ static int FlushEncResult(u32 core, u32 instanceIndex) static int Wave5CloseInstanceCommand(int core, u32 instanceIndex) { int ret = 0; + u32 regVal; #define W5_DESTROY_INSTANCE 0x0020 WriteVpuRegister(W5_CMD_INSTANCE_INFO, (instanceIndex&0xffff)); @@ -639,18 +692,21 @@ static int Wave5CloseInstanceCommand(int core, u32 instanceIndex) WriteVpuRegister(W5_VPU_HOST_INT_REQ, 1); if(WaitBusyTimeout(core, W5_VPU_BUSY_STATUS)) { - pr_info("Wave5CloseInstanceCommand after BUSY timeout\n"); + DPRINTK("Wave5CloseInstanceCommand after BUSY timeout\n"); ret = 1; goto DONE_CMD; } if (ReadVpuRegister(W5_RET_SUCCESS) == 0) { - pr_info("Wave5CloseInstanceCommand failed REASON=[0x%x]\n", ReadVpuRegister(W5_RET_FAIL_REASON)); + DPRINTK("Wave5CloseInstanceCommand failed REASON=[0x%x]\n", ReadVpuRegister(W5_RET_FAIL_REASON)); - if (ReadVpuRegister(W5_RET_FAIL_REASON) == WAVE5_VPU_STILL_RUNNING) + regVal = ReadVpuRegister(W5_RET_FAIL_REASON); + if (regVal == WAVE5_INVALID_TASK_BUF) + ret = 0; + else if (regVal == WAVE5_VPU_STILL_RUNNING) ret = 2; else - ret = 1; + ret = 99; /* other failed reason */ goto DONE_CMD; } ret = 0; @@ -674,12 +730,20 @@ static void release_vpu_create_inst_flag(int core_idx, int inst_idx) static int CloseInstanceCommand(int core, u32 instanceIndex) { +#define VPU_STILL_RUNNING 2 + int product_code; + int ret = 0; + int count = 0; + product_code = ReadVpuRegister(VPU_PRODUCT_CODE_REGISTER); + DPRINTK("[VPUDRV] CloseInstanceCommand : tgid : %x\n", current->tgid); if (PRODUCT_CODE_W_SERIES(product_code)) { u32 i =0; u32 interrupt_flag_in_q = 0; - int vpu_create_inst_flag = get_vpu_create_inst_flag(core); + u32 vpu_create_inst_flag = 0; + + vpu_create_inst_flag = get_vpu_create_inst_flag(core); if ((vpu_create_inst_flag & (1 << instanceIndex)) != 0) { if(WAVE521C_CODE != product_code) { Wave5VpuDecSetBitstreamFlag(core, instanceIndex); @@ -687,19 +751,41 @@ static int CloseInstanceCommand(int core, u32 instanceIndex) interrupt_flag_in_q = kfifo_out_spinlocked(&s_interrupt_pending_q[core*MAX_NUM_INSTANCE+instanceIndex], &i, sizeof(u32), &s_kfifo_lock[core*MAX_NUM_INSTANCE+instanceIndex]); if (interrupt_flag_in_q > 0) { //FlushDecResult(core, instanceIndex); - pr_info("interrupt flag : %d\n", interrupt_flag_in_q); + DPRINTK("interrupt flag : %d\n", interrupt_flag_in_q); } FlushDecResult(core, instanceIndex); - for(i=0; i<32; i++) { - int ret = Wave5DecClrDispFlag(core, instanceIndex, i); - if(ret != 0) - break; - } + Wave5DecClrDispFlag(core, instanceIndex); } if (WAVE521C_CODE == product_code) { FlushEncResult(core, instanceIndex); } - return Wave5CloseInstanceCommand(core, instanceIndex); + + while (1) + { + ret = Wave5CloseInstanceCommand(core, instanceIndex); + if(ret == 0) { + break; + } + if(count > 500) { + pr_err("CloseInstanceCommand failed REASON=%d\n", ret); + break; + } + + if(ret == VPU_STILL_RUNNING) { + if(WAVE521C_CODE != product_code) { + FlushDecResult(core, instanceIndex); + Wave5VpuDecSetBitstreamFlag(core, instanceIndex); + } + else { + FlushEncResult(core, instanceIndex); + } + } + + msleep(20); + count += 1; + } + + return 0; } else return 0; @@ -803,6 +889,48 @@ static void vpu_dma_buffer_unattach_sg(vpudrv_buffer_t *vb) } #endif +static int vpu_update_vpu_gmem(int pid, int mem_used, int is_free, int is_del) +{ + int proc_cnt = 0; + int update_success = 0; + vpu_proc_gmem *vpu_mem_info, *tmp; + mutex_lock(&s_vpu_gmem_info_mutex); + if (is_del == 0) { + list_for_each_entry_safe(vpu_mem_info, tmp, &s_vpu_gmem_info, list) { + proc_cnt++; + if (vpu_mem_info->vpu_pid == pid) { + if (is_free) + vpu_mem_info->vpu_gmem_used -= VPU_4K_ALIGN(mem_used); + else + vpu_mem_info->vpu_gmem_used += VPU_4K_ALIGN(mem_used); + update_success = 1; + s_vpu_gmem_update++; + if (proc_cnt >= 128) + break; + } + } + if (update_success == 0 && is_free == 0 && proc_cnt < 128) { + vpu_mem_info = kzalloc(sizeof(*vpu_mem_info), GFP_KERNEL); + vpu_mem_info->vpu_pid = pid; + vpu_mem_info->vpu_gmem_used = VPU_4K_ALIGN(mem_used); + list_add(&vpu_mem_info->list, &s_vpu_gmem_info); + s_vpu_gmem_update++; + } + } else { + list_for_each_entry_safe(vpu_mem_info, tmp, &s_vpu_gmem_info, list) { + proc_cnt++; + if (pid == vpu_mem_info->vpu_pid) { + list_del(&vpu_mem_info->list); + kfree(vpu_mem_info); + s_vpu_gmem_update++; + } + } + } + mutex_unlock(&s_vpu_gmem_info_mutex); + return 0; +} + + static int vpu_alloc_dma_buffer(vpudrv_buffer_t *vb) { if (!vb) @@ -817,8 +945,19 @@ static int vpu_alloc_dma_buffer(vpudrv_buffer_t *vb) vb->base = (unsigned long)(s_video_memory.base + (vb->phys_addr - s_video_memory.phys_addr)); #elif defined(BM_ION_MEM) + struct ion_buffer *buf; + int heap_id = VPU_HEAP_ID; + for (heap_id = VPU_HEAP_ID; heap_id>=0; heap_id--){ + if (available_heap_mask & (1<ion_fd = bm_ion_alloc(ION_HEAP_TYPE_CARVEOUT, vb->size, 0); + vb->ion_fd = ion_alloc(vb->size, 1 << heap_id, 1, &buf); //mmap_cache is enable + // vb->ion_fd = bm_ion_alloc(ION_HEAP_TYPE_CARVEOUT, vb->size, 0); if (!vb->ion_fd) { printk(KERN_ERR "[VPUDRV] ion memory allocation error size=%d\n", vb->size); return -1; @@ -837,12 +976,15 @@ static int vpu_alloc_dma_buffer(vpudrv_buffer_t *vb) return -1; } #endif + vpu_update_vpu_gmem(current->tgid, vb->size, 0, 0); + return 0; } static void vpu_free_dma_buffer(vpudrv_buffer_t *vb) { + if (!vb) return; @@ -852,13 +994,14 @@ static void vpu_free_dma_buffer(vpudrv_buffer_t *vb) #elif defined(BM_ION_MEM) if (vb->ion_fd) { vpu_dma_buffer_unattach_sg(vb); - //bm_ion_free(vb->ion_fd); vb->base = 0; } #else if (vb->base) dma_free_coherent(0, PAGE_ALIGN(vb->size), (void *)vb->base, vb->phys_addr); #endif + + vpu_update_vpu_gmem(current->tgid, vb->size, 1, 0); } #if 1 int get_lock(int core_idx) @@ -881,7 +1024,7 @@ int get_lock(int core_idx) if(count >= 5000) { pr_info("can't get lock, org: %d, ker: %d", *addr, val1); ret = 0; - break; + // break; } msleep(2); count += 1; @@ -896,6 +1039,28 @@ void release_lock(int core_idx) __sync_lock_release(addr); //__atomic_store_n(addr, 0, __ATOMIC_SEQ_CST); } + +void release_exception_lock(u64 except_info) +{ + int core_idx = (except_info >> 32) & 0xff; + volatile int *current_addr = (int *)(s_instance_pool[core_idx].base + s_instance_pool[core_idx].size - PTHREAD_MUTEX_T_HANDLE_SIZE*4); + volatile int *tmp_addr; + int i; + + if(*current_addr != 0 && *current_addr != current->tgid && *current_addr != current->pid) + { + for(i=0; itgid || *tmp_addr == current->pid ) + release_lock(i); + } + } +} + int get_disp_lock(int core_idx) { int val = 0; @@ -951,7 +1116,6 @@ static int vpu_free_instances(struct file *filp) DPRINTK("[VPUDRV] vpu_free_instances detect instance crash instIdx=%d, coreIdx=%d, vip_base=%p, instance_pool_size_per_core=%d\n", (int)vil->inst_idx, (int)vil->core_idx, vip_base, (int)instance_pool_size_per_core); vip = (vpudrv_instance_pool_t *)vip_base; if (vip) { - pr_info("clean core %d, inst %d, use flag addr: %p\n",(int)vil->core_idx, (int)vil->inst_idx, vip->codecInstPool[vil->inst_idx]); if(vip->pendingInstIdxPlus1 - 1 == vil->inst_idx) vip->pendingInstIdxPlus1 = 0; memset(vip->codecInstPool[vil->inst_idx], 0x00, 4); /* only first 4 byte is key point(inUse of CodecInst in vpuapi) to free the corresponding instance. */ @@ -1241,8 +1405,9 @@ static irqreturn_t vpu_irq_handler(int irq, void *dev_id) u32 ll_intr_reason = (1 << INT_WAVE5_DEC_PIC); kfifo_in_spinlocked(&s_interrupt_pending_q[core*MAX_NUM_INSTANCE+intr_inst_index], &ll_intr_reason, sizeof(u32), &s_kfifo_lock[core*MAX_NUM_INSTANCE+intr_inst_index]); } - else + else { kfifo_in_spinlocked(&s_interrupt_pending_q[core*MAX_NUM_INSTANCE+intr_inst_index], &intr_reason, sizeof(u32), &s_kfifo_lock[core*MAX_NUM_INSTANCE+intr_inst_index]); + } } else { printk(KERN_ERR "[VPUDRV] : kfifo_is_full kfifo_count=%d \n", kfifo_len(&s_interrupt_pending_q[core*MAX_NUM_INSTANCE+intr_inst_index])); @@ -1641,9 +1806,10 @@ static long vpu_ioctl(struct file *filp, u_int cmd, u_long arg) list_for_each_entry_safe(vbp, n, &s_vbp_head, list) { - if (vbp->vb.base == vb.base) { + if (vbp->vb.phys_addr == vb.phys_addr) { list_del(&vbp->list); kfree(vbp); + vb.phys_addr = 0; break; } } @@ -1854,7 +2020,6 @@ static long vpu_ioctl(struct file *filp, u_int cmd, u_long arg) break; } } - } ret = -EFAULT; } @@ -2035,10 +2200,20 @@ static long vpu_ioctl(struct file *filp, u_int cmd, u_long arg) if (get_user(core_idx, (u32 __user *) arg)) return -EFAULT; - if (core_idx >= get_vpu_core_num(chip_id, video_cap)) + if (core_idx >= get_vpu_core_num(chip_id, video_cap) || core_idx < 0) return -EFAULT; - if(s_init_flag[core_idx] == 0) - ret = 100; + + if ((ret = mutex_lock_interruptible(&s_vpu_lock)) == 0) { + if(s_init_flag[core_idx] == 0) + ret = 100; + else + ret = s_init_flag[core_idx]; + + mutex_unlock(&s_vpu_lock); + } + else { + return -ERESTARTSYS; + } } break; #ifndef BM_ION_MEM @@ -2116,36 +2291,83 @@ static long vpu_ioctl(struct file *filp, u_int cmd, u_long arg) } break; case VDI_IOCTL_CTRL_KERNEL_RESET: + { + vpudrv_reset_flag_node_t *reset_flag; + vpudrv_reset_flag_node_t *vrf, *n; + + if ((ret = mutex_lock_interruptible(&s_vpu_lock)) == 0) { + reset_flag = kzalloc(sizeof(*reset_flag), GFP_KERNEL); + DPRINTK("[VPUDRV][+]VDI_IOCTL_CTRL_KERNEL_RESET, tpid: 0x%x, pid: 0x%x\n", current->tgid, current->pid); + ret = copy_from_user(reset_flag, (vpudrv_reset_flag *)arg, sizeof(vpudrv_reset_flag)); + if (ret != 0) { + kfree(reset_flag); + mutex_unlock(&s_vpu_lock); + return -EFAULT; + } + + if (reset_flag->reset_flag.core_idx < 0 || reset_flag->reset_flag.core_idx >= get_vpu_core_num(chip_id, video_cap)) { + kfree(reset_flag); + mutex_unlock(&s_vpu_lock); + return -EFAULT; + } + if(reset_flag->reset_flag.reset == 0) + { + list_for_each_entry_safe(vrf, n, &s_reset_flag_head, list) + { + if(vrf->reset_flag.pid == reset_flag->reset_flag.pid && vrf->reset_flag.core_idx == reset_flag->reset_flag.core_idx) + { + list_del(&vrf->list); + kfree(vrf); + } + } + kfree(reset_flag); + } else { + list_add(&reset_flag->list, &s_reset_flag_head); + } + mutex_unlock(&s_vpu_lock); + } + else + { + return -ERESTARTSYS; + } + + DPRINTK("[VPUDRV][-]VDI_IOCTL_CTRL_KERNEL_RESET, tpid: 0x%x, pid: 0x%x\n", current->tgid, current->pid); + } + break; + case VDI_IOCTL_GET_KERNEL_RESET_STATUS: { vpudrv_reset_flag reset_flag; - DPRINTK("[VPUDRV][+]VDI_IOCTL_CTRL_KERNEL_RESET\n"); + vpudrv_reset_flag_node_t *vrf, *n; + DPRINTK("[VPUDRV][+]VDI_IOCTL_GET_KERNEL_RESET_STATUS\n"); + ret = copy_from_user(&reset_flag, (vpudrv_reset_flag *)arg, sizeof(vpudrv_reset_flag)); if (ret != 0) return -EFAULT; if (reset_flag.core_idx < 0 || reset_flag.core_idx >= get_vpu_core_num(chip_id, video_cap)) return -EFAULT; - s_vpu_drv_context.reset_vpu_core_disable[reset_flag.core_idx] = reset_flag.reset_core_disable; - DPRINTK("[VPUDRV][-]VDI_IOCTL_CTRL_KERNEL_RESET\n"); + + if ((ret = mutex_lock_interruptible(&s_vpu_lock)) == 0) { + reset_flag.reset = 0; + list_for_each_entry_safe(vrf, n, &s_reset_flag_head, list) + { + if(vrf->reset_flag.pid == reset_flag.pid && vrf->reset_flag.core_idx == reset_flag.core_idx) + { + reset_flag.reset = 1; + break; + } + } + mutex_unlock(&s_vpu_lock); + } + else { + return -ERESTARTSYS; + } + ret = copy_to_user((void __user *)arg, &reset_flag, sizeof(vpudrv_reset_flag)); + if (ret != 0) + return -EFAULT; + DPRINTK("[VPUDRV][-]VDI_IOCTL_GET_KERNEL_RESET_STATUS\n"); } break; - case VDI_IOCTL_GET_KERNEL_RESET_STATUS: - { - vpudrv_reset_flag reset_flag; - DPRINTK("[VPUDRV][+]VDI_IOCTL_GET_KERNEL_RESET_STATUS\n"); - ret = copy_from_user(&reset_flag, (vpudrv_reset_flag *)arg, sizeof(vpudrv_reset_flag)); - if (ret != 0) - return -EFAULT; - - if (reset_flag.core_idx < 0 || reset_flag.core_idx >= get_vpu_core_num(chip_id, video_cap)) - return -EFAULT; - reset_flag.reset_core_disable =s_vpu_drv_context.reset_vpu_core_disable[reset_flag.core_idx]; - ret = copy_to_user((void __user *)arg, &reset_flag, sizeof(vpudrv_reset_flag)); - if (ret != 0) - return -EFAULT; - DPRINTK("[VPUDRV][-]VDI_IOCTL_GET_KERNEL_RESET_STATUS\n"); - } - break; default: { printk(KERN_ERR "[VPUDRV] No such IOCTL, cmd is %d\n", cmd); @@ -2249,6 +2471,7 @@ static ssize_t vpu_write(struct file *filp, const char __user *buf, size_t len, s_init_flag[bit_firmware_info->core_idx] = s_bit_firmware_info[bit_firmware_info->core_idx].size; kfree(bit_firmware_info); mutex_unlock(&s_vpu_lock); + pr_info("[VPUDRV] core:%d load firmware. ret=%d\n", bit_firmware_info->core_idx, len); return len; } @@ -2352,25 +2575,9 @@ static void close_vpu_instance(long flags, struct file *filp) for(i=0; i 5) { - pr_info("can not stop instances core %d inst %d", (int)core_idx, i); - return; //do not close the core inst because the core exception.. maybe... - } - continue; // means there is command which should be flush. - } - break; - } + get_lock(core_idx); + CloseInstanceCommand(core_idx, i); + release_lock(core_idx); } } } @@ -2383,11 +2590,14 @@ static int vpu_release(struct inode *inode, struct file *filp) u32 open_count; unsigned long except_info = 0; int vpu_disable_reset_flag_sum = 0; + vpudrv_reset_flag_node_t *vrf, *n; DPRINTK("[VPUDRV] vpu_release\n"); mutex_lock(&s_vpu_lock); except_info = get_exception_instance_info(filp); + if (except_info != 0) + release_exception_lock(except_info); mutex_unlock(&s_vpu_lock); close_vpu_instance(except_info, filp); @@ -2421,18 +2631,18 @@ static int vpu_release(struct inode *inode, struct file *filp) s_vpu_drv_context.open_count--; open_count = s_vpu_drv_context.open_count; - for (core_idx=0; core_idx < get_vpu_core_num(chip_id, video_cap); core_idx++){ - if ((s_vpu_drv_context.reset_vpu_core_disable[core_idx]==current->tgid) || (s_vpu_drv_context.reset_vpu_core_disable[core_idx]==current->pid)) - s_vpu_drv_context.reset_vpu_core_disable[core_idx] = 0; - - if (s_vpu_drv_context.reset_vpu_core_disable[core_idx] != 0) - vpu_disable_reset_flag_sum++; + list_for_each_entry_safe(vrf, n, &s_reset_flag_head, list) + { + if(((vrf->reset_flag.pid == current->tgid) || (vrf->reset_flag.pid == current->pid)) && vrf->reset_flag.core_idx == core_idx) + { + list_del(&vrf->list); + kfree(vrf); + } } - if (open_count == 0 && vpu_disable_reset_flag_sum == 0) { //in pcie driver, using the sum of vpu_open_ref_count instead of open_count because pcie drv is not independent(ko). + if (open_count == 0 && list_empty(&s_reset_flag_head)) { //in pcie driver, using the sum of vpu_open_ref_count instead of open_count because pcie drv is not independent(ko). memset(&s_vpu_drv_context.crst_cxt[0], 0, sizeof(vpu_crst_context_t) * get_vpu_core_num(chip_id, video_cap)); bm_vpu_assert(&vpu_rst_ctrl); - for(core_idx=0; core_idx < get_vpu_core_num(chip_id, video_cap); core_idx++) { if (s_instance_pool[core_idx].base) { #ifdef USE_VMALLOC_FOR_INSTANCE_POOL_MEMORY @@ -2446,10 +2656,13 @@ static int vpu_release(struct inode *inode, struct file *filp) if (s_common_memory[core_idx].base) { vpu_free_dma_buffer(&s_common_memory[core_idx]); s_common_memory[core_idx].base = 0; + s_common_memory[core_idx].phys_addr = 0; } + s_init_flag[core_idx] = 0; + pr_info("[VPUDRV] core:%d reset firmware.\n", core_idx); } } -#if 1 +#if 0 #if defined(CHIP_BM1682) if(ret > 0 && s_vpu_usage_info.vpu_open_ref_count[ret-1] == 0) { //printk(KERN_INFO "exception will reset the vpu core: %d\n", ret - 1); @@ -2457,7 +2670,7 @@ static int vpu_release(struct inode *inode, struct file *filp) // msleep(200); //waiting the decoder stoping maybe...... } #elif defined(CHIP_BM1684) - if(ret > 0 && s_vpu_usage_info.vpu_open_ref_count[ret-1] == 0 && s_vpu_drv_context.reset_vpu_core_disable[ret-1]==0) { + if(ret > 0 && s_vpu_usage_info.vpu_open_ref_count[ret-1] == 0 && list_empty(&s_reset_flag_head)) { //printk(KERN_INFO "exception will reset the vpu core: %d\n", ret - 1); s_init_flag[ret-1] = 0; } @@ -2467,6 +2680,10 @@ static int vpu_release(struct inode *inode, struct file *filp) } + down(&s_vpu_sem); + vpu_update_vpu_gmem(current->tgid, 0, 0, 1); + up(&s_vpu_sem); + vpu_fasync(-1, filp, 0); if(filp->private_data != NULL) vfree(filp->private_data); @@ -2480,6 +2697,7 @@ static int vpu_map_to_register(struct file *fp, struct vm_area_struct *vm, int c unsigned long pfn; vm->vm_flags |= VM_IO | VM_RESERVED; + vm->vm_page_prot = pgprot_noncached(vm->vm_page_prot); pfn = s_vpu_register[core_idx].phys_addr >> PAGE_SHIFT; @@ -2717,7 +2935,7 @@ static int vpu_probe(struct platform_device *pdev) err = bm_vpu_register_cdev(pdev); if (err < 0) { - printk(KERN_ERR "bm_vpu_register_cdev\n"); + printk(KERN_ERR "vpu_register_cdev\n"); goto ERROR_PROVE_DEVICE; } @@ -3022,7 +3240,7 @@ static void bm_vpu_assert(vpu_reset_ctrl *pRstCtrl) #elif defined(CHIP_BM1684) { int i; - pr_info("<<<<<>>>>>>>>>>>>>>\n"); + // pr_info("<<<<<>>>>>>>>>>>>>>\n"); for(i=0; i< MAX_NUM_VPU_CORE; i++) { if(s_vpu_sub_bin_flag[i] == 0) @@ -3053,7 +3271,7 @@ static void bm_vpu_deassert(vpu_reset_ctrl *pRstCtrl) #elif defined(CHIP_BM1684) { int i; - pr_info("<<<<<>>>>>>>>>>>>>>\n"); + // pr_info("<<<<<>>>>>>>>>>>>>>\n"); for(i=0; i< MAX_NUM_VPU_CORE; i++) { if(s_vpu_sub_bin_flag[i] == 0) @@ -3556,18 +3774,43 @@ static int bm_vpu_usage_info_init(vpu_statistic_info_t *vpu_usage_info) int bm_vpu_monitor_thread(void *data) { int ret = 0; - vpu_statistic_info_t *vpu_usage_info = (vpu_statistic_info_t *)data; - + vpu_statistic_info_t *vpu_usage_info = (vpu_statistic_info_t *)data; + int (*update_vpu_gmem_func)(struct list_head *in_vpu_gmem_info); + update_vpu_gmem_func = NULL; set_current_state(TASK_INTERRUPTIBLE); ret = bm_vpu_usage_info_init(vpu_usage_info); if (ret) return ret; - + unsigned int loops = 0; while (!kthread_should_stop()) { - bm_vpu_check_usage_info(vpu_usage_info); - msleep_interruptible(100); + bm_vpu_check_usage_info(vpu_usage_info); + msleep_interruptible(100); + if (loops > 10) { + vpu_proc_gmem *vpu_mem_info, *tmp; + unsigned int length = 0; + mutex_lock(&s_vpu_gmem_info_mutex); + list_for_each_entry_safe(vpu_mem_info, tmp, &s_vpu_gmem_info, list) { + length++; + break; + } + if ((length > 0) && (s_vpu_gmem_update > 0)) { + if (update_vpu_gmem_func) { + update_vpu_gmem_func(&s_vpu_gmem_info); + } else { + update_vpu_gmem_func = symbol_get(bmctl_update_vpu_gmem); + if (update_vpu_gmem_func) + update_vpu_gmem_func(&s_vpu_gmem_info); + } + s_vpu_gmem_update = 0; + } + mutex_unlock(&s_vpu_gmem_info_mutex); + loops = 0; + } + loops++; + } + if (update_vpu_gmem_func) { + symbol_put(bmctl_update_vpu_gmem); } - return ret; } @@ -3778,6 +4021,7 @@ static const struct file_operations proc_info_operations = { static int __init vpu_init(void) { + struct device_node *np; int res; int i; @@ -3833,6 +4077,34 @@ static int __init vpu_init(void) entry = proc_create("vpuinfo", 0666, NULL, &proc_info_operations); + + np = of_find_node_by_path("/reserved-memory"); + if (np) { + struct device_node *np_heap; + + /* ion reserved mem */ + np_heap = of_find_compatible_node(np, NULL, "vpp-region"); + if (np_heap) { + available_heap_mask |= (0x1 << VPP_HEAP_ID); + np_heap = NULL; + } + + /* npu reserved mem */ + np_heap = of_find_compatible_node(np, NULL, "npu-region"); + if (np_heap) { + available_heap_mask |= (0x1 << NPU_HEAP_ID); + np_heap = NULL; + } + + /* vpu reserved mem */ + np_heap = of_find_compatible_node(np, NULL, "vpu-region"); + if (np_heap) { + available_heap_mask |= (0x1 << VPU_HEAP_ID); + np_heap = NULL; + } + } + DPRINTK("[VPUDRV] available heap mask: 0x%x\n", available_heap_mask); + DPRINTK("[VPUDRV] end vpu_init result=0x%x\n", res); return res; } diff --git a/bmvid/video/driver/linux/vpu.h b/bmvid/video/driver/linux/vpu.h index 6824f60..e3a51e0 100644 --- a/bmvid/video/driver/linux/vpu.h +++ b/bmvid/video/driver/linux/vpu.h @@ -57,6 +57,7 @@ #define VDI_IOCTL_GET_MAX_CORE_NUM _IO(VDI_IOCTL_MAGIC, 33) #define VDI_IOCTL_CTRL_KERNEL_RESET _IO(VDI_IOCTL_MAGIC, 34) #define VDI_IOCTL_GET_KERNEL_RESET_STATUS _IO(VDI_IOCTL_MAGIC, 35) + typedef struct vpudrv_syscxt_info_s { unsigned int core_idx; unsigned int inst_idx; @@ -80,8 +81,8 @@ typedef struct vpudrv_buffer_t { int enable_cache; #endif -#ifdef BM_ION_MEM - int ion_fd; +#if defined(BM_PCIE_MODE) || defined(BM_ION_MEM) + unsigned int ion_fd; struct dma_buf_attachment *attach; struct sg_table *table; struct dma_buf *dma_buf; @@ -119,7 +120,8 @@ typedef struct vpudrv_regrw_info_t { typedef struct { int core_idx; - pid_t reset_core_disable; + pid_t pid; + int reset; } vpudrv_reset_flag; #ifdef BM_PCIE_MODE diff --git a/bmvid/video/driver/vpuerror.h b/bmvid/video/driver/vpuerror.h index 1e9c59f..608556a 100644 --- a/bmvid/video/driver/vpuerror.h +++ b/bmvid/video/driver/vpuerror.h @@ -32,9 +32,11 @@ #define WAVE5_SYSERR_ACCESS_VIOLATION_HW 0x00000040 #define WAVE5_RESULT_NOT_READY 0x00000800 #define WAVE5_VPU_STILL_RUNNING 0x00001000 +#define WAVE5_VPU_UNKNOWN_CMD 0x00002000 #define WAVE5_INSTANCE_DESTROYED 0x00004000 #define WAVE5_SYSERR_DEC_VLC_BUF_FULL 0x00010000 #define WAVE5_SYSERR_WATCHDOG_TIMEOUT 0x00020000 +#define WAVE5_INVALID_TASK_BUF 0x00040000 #define WAVE5_ERROR_FW_FATAL 0x00200000 diff --git a/bmvid/video/driver/vputypes.h b/bmvid/video/driver/vputypes.h index 2017ddb..70d8784 100755 --- a/bmvid/video/driver/vputypes.h +++ b/bmvid/video/driver/vputypes.h @@ -20,6 +20,9 @@ #include #define STATIC static + +#ifndef DATA_TYPES +#define DATA_TYPES /** * @brief This type is an 8-bit unsigned integral type, which is used for declaring pixel data. */ @@ -61,6 +64,7 @@ typedef unsigned long long u64; #else typedef unsigned long u64; #endif +#endif #ifndef PhysicalAddress /** diff --git a/bmvid/video/driver/wave5_regdefine.h b/bmvid/video/driver/wave5_regdefine.h index bbba8db..d601745 100755 --- a/bmvid/video/driver/wave5_regdefine.h +++ b/bmvid/video/driver/wave5_regdefine.h @@ -78,6 +78,7 @@ typedef enum { GET_BS_RD_PTR = 5, // for decoder GET_BS_WR_PTR = 6, // for encoder GET_SCHED_INFO = 9, + GET_INSTANCE_INFO = 0x60, GET_DEBUG_INFO = 0x61, } QUERY_OPT; @@ -472,6 +473,10 @@ enum { /************************************************************************/ #define W5_RET_QUERY_DEC_BS_RD_PTR (W5_REG_BASE + 0x011C) +/************************************************************************/ +/* DECODER - QUERY : GET_INSTANCE_INFO */ +/************************************************************************/ +#define W5_RET_QUERY_DEC_GET_INSTANCE_INFO (W5_REG_BASE + 0x01D4) //#ifdef FIX_WAVE_SW_RESET_V3 /************************************************************************/ diff --git a/bmvid/video/encoder/bm_enc_api/CMakeLists.txt b/bmvid/video/encoder/bm_enc_api/CMakeLists.txt index ccde7e0..6abd940 100644 --- a/bmvid/video/encoder/bm_enc_api/CMakeLists.txt +++ b/bmvid/video/encoder/bm_enc_api/CMakeLists.txt @@ -77,7 +77,6 @@ set(SRCS ${PROJECT_ROOT}/video/encoder/bm_enc_api/src/encoder.c ${PROJECT_ROOT}/video/encoder/bm_enc_api/src/log.c ${PROJECT_ROOT}/video/encoder/bm_enc_api/src/misc.c - ${PROJECT_ROOT}/video/encoder/bm_enc_api/src/queue.c ) add_library(libbmvpuapi SHARED ${SRCS}) @@ -90,6 +89,7 @@ target_link_libraries(libbmvpuapi-static PUBLIC ${SETUPAPILIB_LIBRARY} libbmvpul set(SRCSEXEC ${PROJECT_ROOT}/example/bmvpuenc.c + ${PROJECT_ROOT}/example/queue.c ${PROJECT_ROOT}/video/provider/cnm/decoder/vdi/windows/libusb-1.0.18/examples/getopt/getopt1.c ${PROJECT_ROOT}/video/provider/cnm/decoder/vdi/windows/libusb-1.0.18/examples/getopt/getopt.c ) @@ -102,7 +102,7 @@ install(TARGETS libbmvpuapi libbmvpuapi-static DESTINATION lib) if(WIN32) file( - COPY ./inc/bmqueue.h ./inc/bmvpuapi.h ./inc/bmvpuapi_common.h ./inc/bmvpuapi_enc.h + COPY ./inc/bm_vpuenc_interface.h DESTINATION ${CMAKE_INSTALL_PREFIX}/include ) -endif() \ No newline at end of file +endif() diff --git a/bmvid/video/encoder/bm_enc_api/Makefile b/bmvid/video/encoder/bm_enc_api/Makefile index 7ece0b3..63c679f 100644 --- a/bmvid/video/encoder/bm_enc_api/Makefile +++ b/bmvid/video/encoder/bm_enc_api/Makefile @@ -51,7 +51,7 @@ CXX = $(CROSS_CC_PREFIX)g++ AR = $(CROSS_CC_PREFIX)ar CFLAGS += -fPIC -Wall -Wl,--fatal-warning -CFLAGS += -I$(BMVID_TOP_DIR)/video/encoder/bm_enc_api/inc -I$(INSTALL_DIR)/include -I$(BMVID_TOP_DIR)/provider/cnm/encoder/inc -I$(BMVID_ROOT)/3rdparty/libbmcv/include +CFLAGS += -I$(BMVID_TOP_DIR)/video/encoder/bm_enc_api/inc -I$(INSTALL_DIR)/include -I$(BMVID_TOP_DIR)/provider/cnm/encoder/inc -I$(BMVID_TOP_DIR)/provider/cnm/encoder/inc/ #-I$(BMVID_ROOT)/3rdparty/libbmcv/include CFLAGS += -D$(BM_CHIP_TYPE) -D$(BM_SUB_TYPE) ifneq ($(PRODUCTFORM), soc) CFLAGS += -DBM_PCIE_MODE @@ -66,11 +66,11 @@ LDLIBS = -lpthread -lm -lrt -ldl LDLIBS += -lbmvpulite -lbmvpuapi LIBS0 = -lbmvpulite -LDFLAGS += -L./$(PRODUCTFORM) -L$(INSTALL_DIR)/lib -L$(BMVID_ROOT)/3rdparty/libbmcv/lib/$(PRODUCTFORM) +LDFLAGS += -L./$(PRODUCTFORM) -L$(INSTALL_DIR)/lib #-L$(BMVID_ROOT)/3rdparty/libbmcv/lib/$(PRODUCTFORM) -# for using bmlib to manage physical memory -LIBS0 += -lbmlib -LDLIBS += -lbmlib +# # for using bmlib to manage physical memory +# LIBS0 += -lbmlib +# LDLIBS += -lbmlib ifeq ($(PRODUCTFORM),pcie_mips64) CFLAGS += -mips64r2 -mabi=64 -march=gs464e -D_GLIBCXX_USE_CXX11_ABI=0 @@ -108,13 +108,13 @@ MKDIR ?= mkdir -p LIB_SRC = src/encoder.c \ src/log.c \ - src/queue.c \ src/misc.c LIB_OBJS=$(patsubst %.c,%.o,$(LIB_SRC)) LIB_PATHS=$(addprefix $(OBJDIR)/,$(notdir $(LIB_OBJS))) -ENC_SRC = $(BMVID_TOP_DIR)/example/bmvpuenc.c +ENC_SRC = $(BMVID_TOP_DIR)/example/bmvpuenc.c \ + $(BMVID_TOP_DIR)/example/queue.c ENC_OBJS=$(patsubst %.c,%.o,$(ENC_SRC)) ENC_PATHS=$(addprefix $(OBJDIR)/,$(notdir $(ENC_OBJS))) @@ -147,10 +147,7 @@ install: $(TARGET) install -d $(INSTALL_DIR)/lib install -d $(INSTALL_DIR)/include install $(TESTENC) $(INSTALL_DIR)/bin - install -m 0644 inc/bmqueue.h $(INSTALL_DIR)/include - install -m 0644 inc/bmvpuapi.h $(INSTALL_DIR)/include - install -m 0644 inc/bmvpuapi_common.h $(INSTALL_DIR)/include - install -m 0644 inc/bmvpuapi_enc.h $(INSTALL_DIR)/include + install -m 0644 inc/bm_vpuenc_interface.h $(INSTALL_DIR)/include ifeq ($(TARGET_SOVERSION), ) install $(TARGET) $(INSTALL_DIR)/lib else @@ -163,10 +160,7 @@ uninstall: $(RM) $(INSTALL_DIR)/lib/$(TARGET_NAME) $(RM) $(INSTALL_DIR)/lib/$(TARGET_NAME)$(SO_NAME) $(RM) $(INSTALL_DIR)/lib/$(TARGET_NAME)$(SO_VERSION) - $(RM) $(INSTALL_DIR)/include/bmqueue.h - $(RM) $(INSTALL_DIR)/include/bmvpuapi.h - $(RM) $(INSTALL_DIR)/include/bmvpuapi_common.h - $(RM) $(INSTALL_DIR)/include/bmvpuapi_enc.h + $(RM) $(INSTALL_DIR)/include/bm_vpuenc_interface.h $(OBJDIR)/encoder.o : src/encoder.c $(MAKEFILE) $(CC) $(CFLAGS) -c $< -o $@ -MD -MF $(@:.o=.dep) @@ -174,15 +168,15 @@ $(OBJDIR)/encoder.o : src/encoder.c $(MAKEFILE) $(OBJDIR)/log.o : src/log.c $(MAKEFILE) $(CC) $(CFLAGS) -c $< -o $@ -MD -MF $(@:.o=.dep) -$(OBJDIR)/queue.o : src/queue.c $(MAKEFILE) - $(CC) $(CFLAGS) -c $< -o $@ -MD -MF $(@:.o=.dep) - $(OBJDIR)/misc.o : src/misc.c $(MAKEFILE) $(CC) $(CFLAGS) -c $< -o $@ -MD -MF $(@:.o=.dep) $(OBJDIR)/bmvpuenc.o : $(BMVID_TOP_DIR)/example/bmvpuenc.c $(MAKEFILE) $(CC) $(CFLAGS) -c $< -o $@ -MD -MF $(@:.o=.dep) +$(OBJDIR)/queue.o : $(BMVID_TOP_DIR)/example/queue.c $(MAKEFILE) + $(CC) $(CFLAGS) -c $< -o $@ -MD -MF $(@:.o=.dep) + $(OBJDIR): -mkdir -p $(OBJDIR) diff --git a/bmvid/video/encoder/bm_enc_api/inc/bmvpuapi_enc.h b/bmvid/video/encoder/bm_enc_api/inc/bm_vpuenc_interface.h similarity index 58% rename from bmvid/video/encoder/bm_enc_api/inc/bmvpuapi_enc.h rename to bmvid/video/encoder/bm_enc_api/inc/bm_vpuenc_interface.h index db8b461..4f68f44 100644 --- a/bmvid/video/encoder/bm_enc_api/inc/bmvpuapi_enc.h +++ b/bmvid/video/encoder/bm_enc_api/inc/bm_vpuenc_interface.h @@ -17,6 +17,7 @@ #include #include + #if defined(_WIN32) || defined(WIN32) || defined(__WIN32__) #define ATTRIBUTE #define DECL_EXPORT __declspec(dllexport) @@ -30,7 +31,23 @@ extern "C" { #endif -#include "bmvpuapi_common.h" +#define BMVPUAPI_VERSION "1.0.0" + + +#ifndef u64 +#ifdef _WIN32 +typedef unsigned long long u64; +#elif __linux__ +typedef unsigned long u64; +#endif +#endif + +#ifndef u32 +#ifdef _WIN32 +typedef unsigned int u32; +#endif +#endif + /** @@ -59,42 +76,26 @@ extern "C" { * bmvpu_enc_set_default_open_params() and afterwards set any explicit valus. * 3. Call bmvpu_enc_open(), passing in a pointer to the filled BmVpuEncOpenParams * instance, and the DMA buffer of the bitstream which was allocated in step 1. - * 4. Call bmvpu_enc_get_initial_info(). The encoder's initial info contains the - * minimum number of framebuffers that must be allocated and registered, and the - * address alignment that must be used when allocating DMA memory for these - * framebuffers. - * 5. (Optional) Perform the necessary size and alignment calculations by calling - * bmvpu_calc_framebuffer_sizes(). Pass in the width & height of the frames that - * shall be encoded. - * 6. Create an array of at least as many BmVpuFramebuffer instances as specified in - * min_num_rec_fb. Each instance must point to a DMA buffer that is big - * enough to hold a frame. If step 5 was performed, allocating as many bytes as indicated - * by total_size is enough. Make sure the Y,Cb,Cr offsets in each BmVpuFramebuffer instance - * are valid. Using the bmvpu_fill_framebuffer_params() convenience function for this is - * recommended. Note that these framebuffers are used for temporary internal encoding only, - * and will not contain input or output data. - * 7. Call bmvpu_enc_register_framebuffers() and pass in the BmVpuFramebuffer array - * and the number of BmVpuFramebuffer instances allocated in step 6. - * 8. (Optional) allocate an array of at least as many DMA buffers as specified in + * 4. (Optional) allocate an array of at least as many DMA buffers as specified in * min_num_src_fb for the input frames. If the incoming data is already stored in DMA buffers, * this step can be omitted, since the encoder can then read the data directly. - * 9. Create an instance of BmVpuRawFrame, set its values to zero. - * 10. Create an instance of BmVpuEncodedFrame. Set its values to zero. - * 11. Set the framebuffer pointer of the BmVpuRawFrame's instance from step 9 to refer to the - * input DMA buffer (either the one allocated in step 8, or the one containing the input data if + * 5. Create an instance of BmVpuRawFrame, set its values to zero. + * 6. Create an instance of BmVpuEncodedFrame. Set its values to zero. + * 7. Set the framebuffer pointer of the BmVpuRawFrame's instance from step 6 to refer to the + * input DMA buffer (either the one allocated in step 5, or the one containing the input data if * it already comes in DMA memory). - * 12. Fill an instance of BmVpuEncParams with valid values. It is recommended to first set its + * 8. Fill an instance of BmVpuEncParams with valid values. It is recommended to first set its * values to zero by using memset(). It is essential to make sure the acquire_output_buffer() and * finish_output_buffer() function pointers are set, as these are used for acquiring buffers * to write encoded output data into. - * 13. (Optional) If step 8 was performed, and therefore input data does *not* come in DMA memory, - * copy the pixels from the raw input frames into the DMA buffer allocated in step 8. Otherwise, + * 9. (Optional) If step 5 was performed, and therefore input data does *not* come in DMA memory, + * copy the pixels from the raw input frames into the DMA buffer allocated in step 5. Otherwise, * if the raw input frames are already stored in DMA memory, this step can be omitted. - * 14. Call bmvpu_enc_encode(). Pass the raw frame, the encoded frame, and the encoding param - * structures from steps 9, 10, and 12 to it. + * 10. Call bmvpu_enc_encode(). Pass the raw frame, the encoded frame, and the encoding param + * structures from steps 6, 7, and 9 to it. * This function will encode data, and acquire an output buffer to write the encoded data into - * by using the acquire_output_buffer() function pointer set in step 12. Once it is done - * encoding, it will call the finish_output_buffer() function from step 12. Any handle created + * by using the acquire_output_buffer() function pointer set in step 9. Once it is done + * encoding, it will call the finish_output_buffer() function from step 9. Any handle created * by acquire_output_buffer() will be copied over to the encoded data frame structure. When * bmvpu_enc_encode() exits, this handle can then be used to further process the output data. * It is guaranteed that once acquire_output_buffer() was called, finish_output_buffer() will @@ -102,17 +103,378 @@ extern "C" { * The BM_VPU_ENC_OUTPUT_CODE_ENCODED_FRAME_AVAILABLE output code bit will always be set * unless the function returned a code other than BM_VPU_ENC_RETURN_CODE_OK. * If the BM_VPU_ENC_OUTPUT_CODE_CONTAINS_HEADER bit is set, then header data has been - * written in the output memory block allocated in step 10. It is placed right before the + * written in the output memory block allocated in step 7. It is placed right before the * actual encoded frame data. bmvpu_enc_encode() will pass over the combined size of the header * and the encoded frame data to acquire_output_buffer() in this case, ensuring that the output * buffers are big enough. - * 15. Repeat steps 11 to 14 until there are no more frames to encode or an error occurs. - * 16. After encoding is finished, close the encoder with bmvpu_enc_close(). - * 17. Deallocate framebuffer memory blocks, the input DMA buffer block, the output memory block, + * 11. Repeat steps 8 to 11 until there are no more frames to encode or an error occurs. + * 12. After encoding is finished, close the encoder with bmvpu_enc_close(). + * 13. Deallocate framebuffer memory blocks, the input DMA buffer block, the output memory block, * and the bitstream buffer memory block. * - * The VPU's encoders only support the BM_VPU_COLOR_FORMAT_YUV420 format. + * The VPU's encoders only support the BM_VPU_ENC_PIX_FORMAT_YUV420P format. + */ + +/**************************************************/ +/******* LOG STRUCTURES AND FUNCTIONS *******/ +/**************************************************/ +/* Log levels. */ +typedef enum +{ + BMVPU_ENC_LOG_LEVEL_ERROR = 0, + BMVPU_ENC_LOG_LEVEL_WARNING = 1, + BMVPU_ENC_LOG_LEVEL_INFO = 2, + BMVPU_ENC_LOG_LEVEL_DEBUG = 3, /* only useful for developers */ + BMVPU_ENC_LOG_LEVEL_LOG = 4, /* only useful for developers */ + BMVPU_ENC_LOG_LEVEL_TRACE = 5 /* only useful for developers */ +} BmVpuEncLogLevel; + +/* Function pointer type for logging functions. + * + * This function is invoked by BM_VPU_LOG() macro calls. This macro also passes the name + * of the source file, the line in that file, and the function name where the logging occurs + * to the logging function (over the file, line, and fn arguments, respectively). + * Together with the log level, custom logging functions can output this metadata, or use + * it for log filtering etc.*/ +typedef void (*BmVpuEncLoggingFunc)(BmVpuEncLogLevel level, char const *file, int const line, + char const *fn, const char *format, ...); + +/* Defines the threshold for logging. Logs with lower priority are discarded. + * By default, the threshold is set to BMVPU_ENC_LOG_LEVEL_INFO. */ +DECL_EXPORT void bmvpu_enc_set_logging_threshold(BmVpuEncLogLevel threshold); + +/* Defines a custom logging function. + * If logging_fn is NULL, logging is disabled. This is the default value. */ +DECL_EXPORT void bmvpu_enc_set_logging_function(BmVpuEncLoggingFunc logging_fn); + +/* Get the threshold for logging. */ +DECL_EXPORT BmVpuEncLogLevel bmvpu_enc_get_logging_threshold(void); + + + + +/**************************************************/ +/******* ALLOCATOR STRUCTURES AND FUNCTIONS *******/ +/**************************************************/ + +/* Typedef for physical addresses */ +#ifdef __linux__ +typedef unsigned long bmvpu_phys_addr_t; +#elif _WIN32 +typedef unsigned long long bmvpu_phys_addr_t; +#endif + +/* BmVpuAllocationFlags: flags for the BmVpuEncDMABufferAllocator's allocate vfunc */ +typedef enum +{ + BM_VPU_ALLOCATION_FLAG_CACHED = 0, + BM_VPU_ALLOCATION_FLAG_WRITECOMBINE = 1, + BM_VPU_ALLOCATION_FLAG_UNCACHED = 2 +} BmVpuAllocationFlags; + +#define BM_VPU_ALLOCATION_FLAG_DEFAULT BM_VPU_ALLOCATION_FLAG_WRITECOMBINE + + +typedef struct { + unsigned int size; + uint64_t phys_addr; + uint64_t virt_addr; + int enable_cache; +} BmVpuEncDMABuffer; + +/** + * Upload data from HOST to a VPU core. + * For now, only support PCIE mode. + * + * return value: + * -1, failed + * 0, done */ +DECL_EXPORT int bmvpu_enc_upload_data(int vpu_core_idx, + const uint8_t* host_va, int host_stride, + uint64_t vpu_pa, int vpu_stride, + int width, int height); + +/** + * Download data from a VPU core to HOST. + * For now, only support PCIE mode. + * + * return value: + * -1, failed + * 0, done + */ +DECL_EXPORT int bmvpu_enc_download_data(int vpu_core_idx, + uint8_t* host_va, int host_stride, + uint64_t vpu_pa, int vpu_stride, + int width, int height); + +/* Function pointer used during bmvpu_enc_open for allocate physical buffers. + * vpu_core_idx: the buffer used by specified core + * buf: the output physical buffer info + * size: the input size for allocate physical buffer +*/ +typedef void* (*BmVpuEncBufferAllocFunc)(void *context, int vpu_core_idx, + BmVpuEncDMABuffer *buf, unsigned int size); +typedef void* (*BmVpuEncBufferFreeFunc)(void *context, int vpu_core_idx, + BmVpuEncDMABuffer *buf); + +/** + * Alloc device memory according to the specified heap_id. + * return value: + * -1, failed + * 0, done + */ +DECL_EXPORT int bmvpu_enc_dma_buffer_allocate(int vpu_core_idx, BmVpuEncDMABuffer *buf, unsigned int size); + +/** + * DeAlloc device memory. + * return value: + * -1, failed + * 0, done + */ +DECL_EXPORT int bmvpu_enc_dma_buffer_deallocate(int vpu_core_idx, BmVpuEncDMABuffer *buf); + +/** + * Attach an externally allocated buffer + * return value: + * -1, failed + * 0, done + */ +DECL_EXPORT int bmvpu_enc_dma_buffer_attach(int vpu_core_idx, uint64_t paddr, unsigned int size); + +/** + * Deattach an externally allocated buffer + * return value: + * -1, failed + * 0, done + */ +DECL_EXPORT int bmvpu_enc_dma_buffer_deattach(int vpu_core_idx, uint64_t paddr, unsigned int size); + + +/** + * Mmap operation + * + * return value: + * -1, failed + * 0, done + */ +DECL_EXPORT int bmvpu_dma_buffer_map(int vpu_core_idx, BmVpuEncDMABuffer* buf, int port_flag); + +/** + * Munmap operation + * + * return value: + * -1, failed + * 0, done + */ +DECL_EXPORT int bmvpu_dma_buffer_unmap(int vpu_core_idx, BmVpuEncDMABuffer* buf); + +/** + * flush operation + * + * return value: + * -1, failed + * 0, done + */ +DECL_EXPORT int bmvpu_enc_dma_buffer_flush(int vpu_core_idx, BmVpuEncDMABuffer* buf); + +/** + * invalidate operation + * + * return value: + * -1, failed + * 0, done + */ +DECL_EXPORT int bmvpu_enc_dma_buffer_invalidate(int vpu_core_idx, BmVpuEncDMABuffer* buf); + +DECL_EXPORT uint64_t bmvpu_enc_dma_buffer_get_physical_address(BmVpuEncDMABuffer* buf); +DECL_EXPORT unsigned int bmvpu_enc_dma_buffer_get_size(BmVpuEncDMABuffer* buf); + + +/******************************************************/ +/******* MISCELLANEOUS STRUCTURES AND FUNCTIONS *******/ +/******************************************************/ + + +/* Frame types understood by the VPU. */ +typedef enum +{ + BM_VPU_ENC_FRAME_TYPE_UNKNOWN = 0, + BM_VPU_ENC_FRAME_TYPE_I, + BM_VPU_ENC_FRAME_TYPE_P, + BM_VPU_ENC_FRAME_TYPE_B, + BM_VPU_ENC_FRAME_TYPE_IDR +} BmVpuEncFrameType; + + +/* Codec format to use for en/decoding. */ +typedef enum +{ + /* H.264. + * Encoding: Baseline/Constrained Baseline/Main/High/High 10 Profiles Level @ L5.2 + */ + BM_VPU_CODEC_FORMAT_H264, + + /* H.265. + * Encoding: Supports Main/Main 10/Main Still Picture Profiles + * @ L5.1 High-tier + */ + BM_VPU_CODEC_FORMAT_H265, +} BmVpuCodecFormat; + +/* VpuMappingFlags: flags for the vpu_EncMmap() function + * These flags can be bitwise-OR combined */ +typedef enum +{ + /* Map memory for CPU write access */ + BM_VPU_ENC_MAPPING_FLAG_WRITE = (1UL << 0), + /* Map memory for CPU read access */ + BM_VPU_ENC_MAPPING_FLAG_READ = (1UL << 1) +} BmVpuEncMappingFlags; + + +typedef enum +{ + BM_VPU_ENC_PIX_FORMAT_YUV420P = 0, /* planar 4:2:0 chroma_interleave is 0;*/ + BM_VPU_ENC_PIX_FORMAT_YUV422P = 1, /* enc not support.*/ + BM_VPU_ENC_PIX_FORMAT_YUV444P = 3, /* enc not support.*/ + BM_VPU_ENC_PIX_FORMAT_YUV400 = 4, /* enc not support 8-bit greayscale */ + BM_VPU_ENC_PIX_FORMAT_NV12 = 5, /* planar 4:2:0 chroma_interleave is 1;*/ + BM_VPU_ENC_PIX_FORMAT_NV16 = 6, /* enc not support.*/ + BM_VPU_ENC_PIX_FORMAT_NV24 = 7, /* enc not support.*/ +} BmVpuEncPixFormat; + + + +/* Framebuffers are frame containers, and are used both for en- and decoding. */ +typedef struct +{ + /* DMA buffer which contains the pixels. */ + BmVpuEncDMABuffer *dma_buffer; + + /* Make sure each framebuffer has an ID that is different + * to the IDs of each other */ + int myIndex; + + /* Stride of the Y and of the Cb&Cr components. + * Specified in bytes. */ + unsigned int y_stride; + unsigned int cbcr_stride; // TODO + + unsigned int width; /* width of frame buffer */ + unsigned int height; /* height of frame buffer */ + + /* These define the starting offsets of each component + * relative to the start of the buffer. Specified in bytes. */ + size_t y_offset; + size_t cb_offset; + size_t cr_offset; + + /* Set to 1 if the framebuffer was already marked as used in encoder. + * This is for internal use only. + * Not to be read or written from the outside. */ + int already_marked; + + /* Internal, implementation-defined data. Do not modify. */ + void *internal; + + /* User-defined pointer. + * The library does not touch this value. + * This can be used for example to identify which framebuffer out of + * the initially allocated pool was used by the VPU to contain a frame. + */ + void *context; +} BmVpuFramebuffer; + +/* Structure containing details about encoded frames. */ +typedef struct +{ + /* When decoding, data must point to the memory block which contains + * encoded frame data that gets consumed by the VPU. + * Only used by the decoder. */ + uint8_t *data; + + /* Size of the encoded data, in bytes. When decoding, this is set by + * the user, and is the size of the encoded data that is pointed to + * by data. When encoding, the encoder sets this to the size of the + * acquired output block, in bytes (exactly the same value as the + * acquire_output_buffer's size argument). */ + size_t data_size; + + /* Frame type (I, P, B, ..) of the encoded frame. Filled by the encoder. + * Only used by the encoder. */ + BmVpuEncFrameType frame_type; + + /* Handle produced by the user-defined acquire_output_buffer function + * during encoding. + * Only used by the encoder. */ + void *acquired_handle; + + /* User-defined pointer. + * The library does not touch this value. + * This pointer and the one from the corresponding raw frame will have + * the same value. The library will pass then through. */ + void *context; + + /* User-defined timestamps. + * In many cases, the context one wants to associate with raw/encoded frames + * is a PTS-DTS pair. Just like the context pointer, the library just passes + * them through to the associated raw frame, and does not actually touch + * their values. */ + uint64_t pts; + uint64_t dts; + + int src_idx; + + // for roi index + bmvpu_phys_addr_t u64CustomMapPhyAddr; + + int avg_ctu_qp; +} BmVpuEncodedFrame; + + +/* Structure containing details about raw, uncompressed frames. */ +typedef struct +{ + BmVpuFramebuffer *framebuffer; + + /* User-defined pointer. + * The library does not touch this value. + * This pointer and the one from the corresponding encoded frame will have + * the same value. The library will pass then through. */ + void *context; + + /* User-defined timestamps. + * In many cases, the context one wants to associate with raw/encoded frames + * is a PTS-DTS pair. Just like the context pointer, the library just passes + * them through to the associated encoded frame, and does not actually touch + * their values. */ + uint64_t pts; + uint64_t dts; +} BmVpuRawFrame; + +typedef struct { + /* Frame width and height, aligned to the 16-pixel boundary required by the VPU. */ + int width; + int height; + + /* Stride sizes, in bytes, with alignment applied. + * The Cb and Cr planes always use the same stride. */ + int y_stride; /* aligned stride */ + int c_stride; /* aligned stride (optional) */ + + /* Required DMA memory size for the Y,Cb,Cr planes, in bytes. + * The Cb and Cr planes always are of the same size. */ + int y_size; + int c_size; + + /* Total required size of a framebuffer's DMA buffer, in bytes. + * This value includes the sizes of all planes. */ + int size; +} BmVpuFbInfo; + + + /* Encoder return codes. With the exception of BM_VPU_ENC_RETURN_CODE_OK, these @@ -143,6 +505,10 @@ typedef enum BM_VPU_ENC_RETURN_CODE_WRONG_CALL_SEQUENCE, /* The operation timed out. */ BM_VPU_ENC_RETURN_CODE_TIMEOUT, + /* resend frame*/ + BM_VPU_ENC_RETURN_CODE_RESEND_FRAME, + /* encode end*/ + BM_VPU_ENC_RETURN_CODE_ENC_END, /* The encoding end. */ BM_VPU_ENC_RETURN_CODE_END } BmVpuEncReturnCodes; @@ -180,6 +546,27 @@ enum { BM_COMPRESSED_FRAME_MAP = 10 /* Compressed frame map type */ }; +/** + * @brief   This is a special enumeration type for defining GOP structure presets. + */ +typedef enum { + BM_VPU_ENC_GOP_PRESET_ALL_I = 1, /**< All Intra, gopsize = 1 */ + BM_VPU_ENC_GOP_PRESET_IPP = 2, /**< Consecutive P, cyclic gopsize = 1 */ + BM_VPU_ENC_GOP_PRESET_IBBB = 3, /**< Consecutive B, cyclic gopsize = 1 */ + BM_VPU_ENC_GOP_PRESET_IBPBP = 4, /**< gopsize = 2 */ + BM_VPU_ENC_GOP_PRESET_IBBBP = 5, /**< gopsize = 4 */ + BM_VPU_ENC_GOP_PRESET_IPPPP = 6, /**< Consecutive P, cyclic gopsize = 4 */ + BM_VPU_ENC_GOP_PRESET_IBBBB = 7, /**< Consecutive B, cyclic gopsize = 4 */ + BM_VPU_ENC_GOP_PRESET_RA_IB = 8, /**< Random Access, cyclic gopsize = 8 */ +} BMVpuEncGopPreset; + +typedef enum { + BM_VPU_ENC_CUSTOM_MODE = 0, // Custom mode, + BM_VPU_ENC_RECOMMENDED_MODE = 1, // recommended encoder parameters (slow encoding speed, highest picture quality) + BM_VPU_ENC_BOOST_MODE = 2, // Boost mode (normal encoding speed, normal picture quality), + BM_VPU_ENC_FAST_MODE = 3, // Fast mode (high encoding speed, low picture quality) */ +} BMVpuEncMode; + /* h.264 parameters for the new encoder instance. */ typedef struct { @@ -226,11 +613,10 @@ typedef struct /* Color format to use for incoming frames. * Video codec formats only allow for the two formats - * BM_VPU_COLOR_FORMAT_YUV420 and BM_VPU_COLOR_FORMAT_YUV400 (the second + * BM_VPU_ENC_PIX_FORMAT_YUV420P and BM_VPU_COLOR_FORMAT_YUV400 (the second * one is supported by using YUV420 and dummy U and V planes internally). - * See the BmVpuColorFormat documentation for an explanation how - * the chroma_interleave value can affec the pixel format that is used. */ - BmVpuColorFormat color_format; + * See the BmVpuEncPixFormat documentation. */ + BmVpuEncPixFormat pix_format; /* Width and height of the incoming frames, in pixels. These * do not have to be aligned to any boundaries. */ @@ -265,7 +651,7 @@ typedef struct * 1 : recommended encoder parameters (slow encoding speed, highest picture quality) * 2 : Boost mode (normal encoding speed, normal picture quality), * 3 : Fast mode (high encoding speed, low picture quality) */ - int enc_mode; + BMVpuEncMode enc_mode; /* The number of merge candidates in RDO(1 or 2). * 1: improve encoding performance. @@ -307,10 +693,6 @@ typedef struct BmVpuEncH265Params h265_params; }; - /* If this is 1, then Cb and Cr are interleaved in one shared chroma - * plane, otherwise they are separated in their own planes. - * See the BmVpuColorFormat documentation for the consequences of this. */ - int chroma_interleave; /* only used for PCIE mode. For SOC mode, this must be 0. * Default value is 0. */ @@ -327,7 +709,7 @@ typedef struct * 8: Random Access, I-B-B-B-B-B-B-B-B, cyclic gopsize = 8 * Low delay cases are 1, 2, 3, 6, 7. * Default value is 5. */ - int gop_preset; + BMVpuEncGopPreset gop_preset; // TODO /* A period of intra picture in GOP size. @@ -357,6 +739,18 @@ typedef struct /* roi encoding flag * Default value is 0 */ int roi_enable; + + /* set cmd queue depath + * Default value is 4 + * the value must 1 <= value <= 4*/ + int cmd_queue_depth; + + int timeout; + int timeout_count; + + BmVpuEncBufferAllocFunc buffer_alloc_func; + BmVpuEncBufferFreeFunc buffer_free_func; + void *buffer_context; } BmVpuEncOpenParams; /* Initial encoding information, produced by the encoder. This structure is @@ -425,7 +819,7 @@ typedef struct { * coefficient drop flag, QPs, and lambdas like the below illustration. * image::../figure/wave520_ctumap.svg["Format of custom Map", width=300] */ - bm_pa_t addrCustomMap; + bmvpu_phys_addr_t addrCustomMap; } BmCustomMapOpt; @@ -447,17 +841,15 @@ typedef struct /* User supplied value that will be passed to the functions */ void *output_buffer_context; - int customMapOptUsedIndex; - BmCustomMapOpt* customMapOpt; - bm_device_mem_t** roi_dma_buffer; + /* roi custom map info */ + BmCustomMapOpt* customMapOpt; } BmVpuEncParams; /* BM VPU Encoder structure. */ typedef struct { void* handle; - bm_handle_t bm_handle; int soc_idx; /* The index of Sophon SoC. * For PCIE mode, please refer to the number at /dev/bm-sophonxx. @@ -465,7 +857,7 @@ typedef struct int core_idx; /* unified index for vpu encoder cores at all Sophon SoCs */ BmVpuCodecFormat codec_format; - BmVpuColorFormat color_format; + BmVpuEncPixFormat pix_format; uint32_t frame_width; uint32_t frame_height; @@ -473,50 +865,50 @@ typedef struct uint32_t fps_n; uint32_t fps_d; - int cbcr_interleave; int first_frame; int rc_enable; /* constant qp when rc is disabled */ int cqp; - /* DMA buffer allocator */ - // BmVpuDMABufferAllocator *dmabuffers_allocator; //Deprecated, now use bmlib - /* DMA buffer for working */ - bm_device_mem_t* work_dmabuffer; + BmVpuEncDMABuffer* work_dmabuffer; /* DMA buffer for bitstream */ - bm_device_mem_t* bs_dmabuffer; + BmVpuEncDMABuffer* bs_dmabuffer; unsigned long long bs_virt_addr; bmvpu_phys_addr_t bs_phys_addr; /* DMA buffer for frame data */ uint32_t num_framebuffers; - VpuFrameBuffer* internal_framebuffers; + void * /*VpuFrameBuffer**/ internal_framebuffers; BmVpuFramebuffer* framebuffers; /* TODO change all as the parameters of bmvpu_enc_register_framebuffers() */ /* DMA buffer for colMV */ - bm_device_mem_t* buffer_mv; + BmVpuEncDMABuffer* buffer_mv; /* DMA buffer for FBC luma table */ - bm_device_mem_t* buffer_fbc_y_tbl; + BmVpuEncDMABuffer* buffer_fbc_y_tbl; /* DMA buffer for FBC chroma table */ - bm_device_mem_t* buffer_fbc_c_tbl; + BmVpuEncDMABuffer* buffer_fbc_c_tbl; /* Sum-sampled DMA buffer for ME */ - bm_device_mem_t* buffer_sub_sam; + BmVpuEncDMABuffer* buffer_sub_sam; uint8_t* headers_rbsp; size_t headers_rbsp_size; BmVpuEncInitialInfo initial_info; -} BmVpuEncoder; + int timeout; + int timeout_count; + /* internal use */ + void *video_enc_ctx; +} BmVpuEncoder; /* Returns a human-readable description of the error code. * Useful for logging. */ @@ -535,12 +927,7 @@ DECL_EXPORT int bmvpu_enc_get_core_idx(int soc_idx); DECL_EXPORT int bmvpu_enc_load(int soc_idx); DECL_EXPORT int bmvpu_enc_unload(int soc_idx); -/* - * If a bm_handle_t on this soc already exists, return it directly, - * otherwise return NULL. This function should be called after bmvpu_enc_load() - * is called, otherwise it is possibily that there is not bm_handle_t on that soc. - */ -DECL_EXPORT bm_handle_t bmvpu_enc_get_bmlib_handle(int soc_idx); +DECL_EXPORT int bmvpu_get_ext_addr(); /* Called before bmvpu_enc_open(), it returns the alignment and size for the * physical memory block necessary for the encoder's bitstream buffer. @@ -552,27 +939,26 @@ DECL_EXPORT void bmvpu_enc_get_bitstream_buffer_info(size_t *size, uint32_t *ali * Useful if the caller wants to modify only a few fields (or none at all) */ DECL_EXPORT void bmvpu_enc_set_default_open_params(BmVpuEncOpenParams *open_params, BmVpuCodecFormat codec_format); + +/** + * Fill fields of the BmVpuFramebuffer structure, based on data from "fb_info". + * The specified DMA buffer and context pointer are also set. + */ +DECL_EXPORT int bmvpu_fill_framebuffer_params(BmVpuFramebuffer *framebuffer, + BmVpuFbInfo *fb_info, + BmVpuEncDMABuffer *fb_dma_buffer, + int fb_id, void* context); + + /* Opens a new encoder instance. * "open_params" and "bs_dmabuffer" must not be NULL. */ DECL_EXPORT int bmvpu_enc_open(BmVpuEncoder **encoder, BmVpuEncOpenParams *open_params, - bm_device_mem_t *bs_dmabuffer); + BmVpuEncDMABuffer *bs_dmabuffer, BmVpuEncInitialInfo *initial_info); /* Closes a encoder instance. * Trying to close the same instance multiple times results in undefined behavior. */ DECL_EXPORT int bmvpu_enc_close(BmVpuEncoder *encoder); -/* Retrieves initial information available after calling bmvpu_enc_open(). - * Internally this also generates stream headers. */ -DECL_EXPORT int bmvpu_enc_get_initial_info(BmVpuEncoder *encoder, BmVpuEncInitialInfo *info); - -/* Registers the specified array of framebuffers with the encoder. - * These framebuffers are used for temporary values during encoding. - * The minimum valid value for "num_framebuffers" is - * the "min_num_rec_fb" field of BmVpuEncInitialInfo. */ -DECL_EXPORT int bmvpu_enc_register_framebuffers(BmVpuEncoder *encoder, - BmVpuFramebuffer *framebuffers, - uint32_t num_framebuffers); - /* Encodes a given raw input frame with the given encoding parameters. * encoded_frame is filled with information about the resulting encoded output frame. * The encoded frame data itself is stored in a buffer that is allocated @@ -620,6 +1006,20 @@ DECL_EXPORT int bmvpu_enc_encode(BmVpuEncoder *encoder, BmVpuEncParams *encoding_params, uint32_t *output_code); + +/** + * Returns a human-readable description of the given color format. + * Useful for logging. + */ +char const *bmvpu_pix_format_string(BmVpuEncPixFormat pix_format); + +/** + * Returns a human-readable description of the given frame type. + * Useful for logging. + */ +char const *bmvpu_frame_type_string(BmVpuEncFrameType frame_type); + + /** * Parse the parameters in string * @@ -629,6 +1029,9 @@ DECL_EXPORT int bmvpu_enc_encode(BmVpuEncoder *encoder, */ DECL_EXPORT int bmvpu_enc_param_parse(BmVpuEncOpenParams *p, const char *name, const char *value); + + + #ifdef __cplusplus } #endif diff --git a/bmvid/video/encoder/bm_enc_api/inc/bmvpuapi_internal.h b/bmvid/video/encoder/bm_enc_api/inc/bm_vpuenc_internal.h similarity index 51% rename from bmvid/video/encoder/bm_enc_api/inc/bmvpuapi_internal.h rename to bmvid/video/encoder/bm_enc_api/inc/bm_vpuenc_internal.h index 8b32c0b..33c20c6 100644 --- a/bmvid/video/encoder/bm_enc_api/inc/bmvpuapi_internal.h +++ b/bmvid/video/encoder/bm_enc_api/inc/bm_vpuenc_internal.h @@ -21,7 +21,7 @@ #include "bmvpu.h" -#include "bmvpuapi.h" +#include "bm_vpuenc_interface.h" #define BMVPUAPI_UNUSED_PARAM(x) ((void)(x)) @@ -33,12 +33,12 @@ /* Log macros and functions */ /***********************************************/ -#define BM_VPU_ERROR_FULL(FILE_, LINE_, FUNCTION_, ...) do { if (bmvpu_cur_log_level_threshold >= BM_VPU_LOG_LEVEL_ERROR) { bmvpu_cur_logging_fn(BM_VPU_LOG_LEVEL_ERROR, FILE_, LINE_, FUNCTION_, __VA_ARGS__); } } while(0) -#define BM_VPU_WARNING_FULL(FILE_, LINE_, FUNCTION_, ...) do { if (bmvpu_cur_log_level_threshold >= BM_VPU_LOG_LEVEL_WARNING) { bmvpu_cur_logging_fn(BM_VPU_LOG_LEVEL_WARNING, FILE_, LINE_, FUNCTION_, __VA_ARGS__); } } while(0) -#define BM_VPU_INFO_FULL(FILE_, LINE_, FUNCTION_, ...) do { if (bmvpu_cur_log_level_threshold >= BM_VPU_LOG_LEVEL_INFO) { bmvpu_cur_logging_fn(BM_VPU_LOG_LEVEL_INFO, FILE_, LINE_, FUNCTION_, __VA_ARGS__); } } while(0) -#define BM_VPU_DEBUG_FULL(FILE_, LINE_, FUNCTION_, ...) do { if (bmvpu_cur_log_level_threshold >= BM_VPU_LOG_LEVEL_DEBUG) { bmvpu_cur_logging_fn(BM_VPU_LOG_LEVEL_DEBUG, FILE_, LINE_, FUNCTION_, __VA_ARGS__); } } while(0) -#define BM_VPU_LOG_FULL(FILE_, LINE_, FUNCTION_, ...) do { if (bmvpu_cur_log_level_threshold >= BM_VPU_LOG_LEVEL_LOG) { bmvpu_cur_logging_fn(BM_VPU_LOG_LEVEL_LOG, FILE_, LINE_, FUNCTION_, __VA_ARGS__); } } while(0) -#define BM_VPU_TRACE_FULL(FILE_, LINE_, FUNCTION_, ...) do { if (bmvpu_cur_log_level_threshold >= BM_VPU_LOG_LEVEL_TRACE) { bmvpu_cur_logging_fn(BM_VPU_LOG_LEVEL_TRACE, FILE_, LINE_, FUNCTION_, __VA_ARGS__); } } while(0) +#define BM_VPU_ERROR_FULL(FILE_, LINE_, FUNCTION_, ...) do { if (bmvpu_cur_log_level_threshold >= BMVPU_ENC_LOG_LEVEL_ERROR) { bmvpu_cur_logging_fn(BMVPU_ENC_LOG_LEVEL_ERROR, FILE_, LINE_, FUNCTION_, __VA_ARGS__); } } while(0) +#define BM_VPU_WARNING_FULL(FILE_, LINE_, FUNCTION_, ...) do { if (bmvpu_cur_log_level_threshold >= BMVPU_ENC_LOG_LEVEL_WARNING) { bmvpu_cur_logging_fn(BMVPU_ENC_LOG_LEVEL_WARNING, FILE_, LINE_, FUNCTION_, __VA_ARGS__); } } while(0) +#define BM_VPU_INFO_FULL(FILE_, LINE_, FUNCTION_, ...) do { if (bmvpu_cur_log_level_threshold >= BMVPU_ENC_LOG_LEVEL_INFO) { bmvpu_cur_logging_fn(BMVPU_ENC_LOG_LEVEL_INFO, FILE_, LINE_, FUNCTION_, __VA_ARGS__); } } while(0) +#define BM_VPU_DEBUG_FULL(FILE_, LINE_, FUNCTION_, ...) do { if (bmvpu_cur_log_level_threshold >= BMVPU_ENC_LOG_LEVEL_DEBUG) { bmvpu_cur_logging_fn(BMVPU_ENC_LOG_LEVEL_DEBUG, FILE_, LINE_, FUNCTION_, __VA_ARGS__); } } while(0) +#define BM_VPU_LOG_FULL(FILE_, LINE_, FUNCTION_, ...) do { if (bmvpu_cur_log_level_threshold >= BMVPU_ENC_LOG_LEVEL_LOG) { bmvpu_cur_logging_fn(BMVPU_ENC_LOG_LEVEL_LOG, FILE_, LINE_, FUNCTION_, __VA_ARGS__); } } while(0) +#define BM_VPU_TRACE_FULL(FILE_, LINE_, FUNCTION_, ...) do { if (bmvpu_cur_log_level_threshold >= BMVPU_ENC_LOG_LEVEL_TRACE) { bmvpu_cur_logging_fn(BMVPU_ENC_LOG_LEVEL_TRACE, FILE_, LINE_, FUNCTION_, __VA_ARGS__); } } while(0) #define BM_VPU_ERROR(...) BM_VPU_ERROR_FULL (__FILE__, __LINE__, __func__, __VA_ARGS__) @@ -49,8 +49,8 @@ #define BM_VPU_TRACE(...) BM_VPU_TRACE_FULL (__FILE__, __LINE__, __func__, __VA_ARGS__) -extern BmVpuLogLevel bmvpu_cur_log_level_threshold; -extern BmVpuLoggingFunc bmvpu_cur_logging_fn; +extern BmVpuEncLogLevel bmvpu_cur_log_level_threshold; +extern BmVpuEncLoggingFunc bmvpu_cur_logging_fn; @@ -86,9 +86,55 @@ extern BmVpuLoggingFunc bmvpu_cur_logging_fn; #endif #define VPU_MAX_TIMEOUT_COUNTS 40 /* how many timeouts are allowed in series */ +#define VPU_MAX_SRC_BUFFER_NUM 32 + +typedef struct +{ + bm_pa_t src_fb_addr; + bm_pa_t custom_map_addr; + int src_idx; +} BmVpuFrameAttr; + +typedef struct +{ + BmVpuFramebuffer* rec_fb_list; + BmVpuEncDMABuffer** rec_fb_dmabuffers; + int num_rec_fb; + + BmVpuEncBufferAllocFunc buffer_alloc_func; + BmVpuEncBufferFreeFunc buffer_free_func; + void *buffer_context; +} BmVpuEncoderCtx; + + +typedef enum +{ + /* U and V in two separate planes */ + BM_VPU_ENC_CHROMA_NO_INTERLEAVE = 0, + /* U and V in one plane, e.g. UVUV */ + BM_VPU_ENC_CHROMA_INTERLEAVE_CBCR = 1, + /* U and V in one plane, e.g. VUVU(not support). */ + BM_VPU_ENC_CHROMA_INTERLEAVE_CRCB = 2, +} BmVpuEncChromaFormat; + int bmvpu_load(void); int bmvpu_unload(void); +/** + * Calculate various sizes out of the given width & height and color format. + * The results are stored in "fb_info". + * The given frame width and height will be aligned if they aren't already, + * and the aligned value will be stored in fb_info. + * Width & height must be nonzero. + * The fb_info pointer must also be non-NULL. + * framebuffer_alignment is an alignment value for the sizes of the Y/U/V planes. + * 0 or 1 mean no alignment. + * chroma_interleave is set to 1 if a shared CbCr chroma plane is to be used, + * 0 if Cb and Cr shall use separate planes. + */ +DECL_EXPORT int bmvpu_calc_framebuffer_sizes(int mapType, BmVpuEncPixFormat color_format, + int frame_width, int frame_height, + BmVpuFbInfo *fb_info); #endif /* _BM_VPU_INTERNAL_H_ */ diff --git a/bmvid/video/encoder/bm_enc_api/inc/bmvpuapi_common.h b/bmvid/video/encoder/bm_enc_api/inc/bmvpuapi_common.h deleted file mode 100644 index 6b7dbce..0000000 --- a/bmvid/video/encoder/bm_enc_api/inc/bmvpuapi_common.h +++ /dev/null @@ -1,367 +0,0 @@ -/***************************************************************************** - * - * Copyright (C) 2022 Sophgo Technologies Inc. All rights reserved. - * - * bmvid is licensed under the 2-Clause BSD License except for the - * third-party components. - * - *****************************************************************************/ -/* This library provides a high-level interface for controlling the BitMain - * Sophon VPU en/decoder. - */ - -#ifndef __BMVPUAPI_COMMON_H__ -#define __BMVPUAPI_COMMON_H__ - -#include -#include - -#if defined(_WIN32) || defined(WIN32) || defined(__WIN32__) -#define ATTRIBUTE -#define DECL_EXPORT __declspec(dllexport) -#define DECL_IMPORT __declspec(dllimport) -#include -#else -#define ATTRIBUTE __attribute__((deprecated)) -#define DECL_EXPORT -#define DECL_IMPORT -#endif -#ifdef __cplusplus -extern "C" { -#endif - -/* for using bmlib */ -#include "bmlib_runtime.h" - -/**************************************************/ -/******* ALLOCATOR STRUCTURES AND FUNCTIONS *******/ -/**************************************************/ - -/* Typedef for physical addresses */ -#ifdef __linux__ -typedef unsigned long bmvpu_phys_addr_t; -#elif _WIN32 -typedef unsigned long long bmvpu_phys_addr_t; -#endif - -/* BmVpuAllocationFlags: flags for the BmVpuDMABufferAllocator's allocate vfunc */ -typedef enum -{ - BM_VPU_ALLOCATION_FLAG_CACHED = 0, - BM_VPU_ALLOCATION_FLAG_WRITECOMBINE = 1, - BM_VPU_ALLOCATION_FLAG_UNCACHED = 2 -} BmVpuAllocationFlags; - -#define BM_VPU_ALLOCATION_FLAG_DEFAULT BM_VPU_ALLOCATION_FLAG_WRITECOMBINE - - -/* BmVpuMappingFlags: flags for the BmVpuDMABufferAllocator's map function pointers - * These flags can be bitwise-OR combined, although READ and WRITE cannot - * both be set */ -typedef enum -{ - /* Map memory for CPU write access */ - BM_VPU_MAPPING_FLAG_WRITE = (1UL << 0), - /* Map memory for CPU read access */ - BM_VPU_MAPPING_FLAG_READ = (1UL << 1) -} BmVpuMappingFlags; - -/***********************/ -/******* LOGGING *******/ -/***********************/ - - -/* Log levels. */ -typedef enum -{ - BM_VPU_LOG_LEVEL_ERROR = 0, - BM_VPU_LOG_LEVEL_WARNING = 1, - BM_VPU_LOG_LEVEL_INFO = 2, - BM_VPU_LOG_LEVEL_DEBUG = 3, /* only useful for developers */ - BM_VPU_LOG_LEVEL_LOG = 4, /* only useful for developers */ - BM_VPU_LOG_LEVEL_TRACE = 5 /* only useful for developers */ -} BmVpuLogLevel; - -/* Function pointer type for logging functions. - * - * This function is invoked by BM_VPU_LOG() macro calls. This macro also passes the name - * of the source file, the line in that file, and the function name where the logging occurs - * to the logging function (over the file, line, and fn arguments, respectively). - * Together with the log level, custom logging functions can output this metadata, or use - * it for log filtering etc.*/ -typedef void (*BmVpuLoggingFunc)(BmVpuLogLevel level, char const *file, int const line, - char const *fn, const char *format, ...); - -/* Defines the threshold for logging. Logs with lower priority are discarded. - * By default, the threshold is set to BM_VPU_LOG_LEVEL_INFO. */ -DECL_EXPORT void bmvpu_set_logging_threshold(BmVpuLogLevel threshold); - -/* Defines a custom logging function. - * If logging_fn is NULL, logging is disabled. This is the default value. */ -DECL_EXPORT void bmvpu_set_logging_function(BmVpuLoggingFunc logging_fn); - -/* Get the threshold for logging. */ -DECL_EXPORT BmVpuLogLevel bmvpu_get_logging_threshold(void); - - - -/******************************************************/ -/******* MISCELLANEOUS STRUCTURES AND FUNCTIONS *******/ -/******************************************************/ - - -/* Frame types understood by the VPU. */ -typedef enum -{ - BM_VPU_FRAME_TYPE_UNKNOWN = 0, - BM_VPU_FRAME_TYPE_I, - BM_VPU_FRAME_TYPE_P, - BM_VPU_FRAME_TYPE_B, - BM_VPU_FRAME_TYPE_IDR -} BmVpuFrameType; - - -/* Codec format to use for en/decoding. */ -typedef enum -{ - /* H.264. - * Encoding: Baseline/Constrained Baseline/Main/High/High 10 Profiles Level @ L5.2 - */ - BM_VPU_CODEC_FORMAT_H264, - - /* H.265. - * Encoding: Supports Main/Main 10/Main Still Picture Profiles - * @ L5.1 High-tier - */ - BM_VPU_CODEC_FORMAT_H265, -} BmVpuCodecFormat; - - -typedef enum -{ - /* planar 4:2:0; if the chroma_interleave parameter is 1, the corresponding format is NV12, otherwise it is I420 */ - BM_VPU_COLOR_FORMAT_YUV420 = 0, - /* planar 4:2:2; if the chroma_interleave parameter is 1, the corresponding format is NV16 */ - BM_VPU_COLOR_FORMAT_YUV422 = 1, - /* planar 4:4:4; if the chroma_interleave parameter is 1, the corresponding format is NV24 */ - BM_VPU_COLOR_FORMAT_YUV444 = 3, - /* 8-bit greayscale */ - BM_VPU_COLOR_FORMAT_YUV400 = 4 -} BmVpuColorFormat; - - -/* Framebuffers are frame containers, and are used both for en- and decoding. */ -typedef struct -{ - /* DMA buffer which contains the pixels. */ - bm_device_mem_t *dma_buffer; - - /* Make sure each framebuffer has an ID that is different - * to the IDs of each other */ - int myIndex; - - /* Stride of the Y and of the Cb&Cr components. - * Specified in bytes. */ - unsigned int y_stride; - unsigned int cbcr_stride; // TODO - - unsigned int width; /* width of frame buffer */ - unsigned int height; /* height of frame buffer */ - - /* These define the starting offsets of each component - * relative to the start of the buffer. Specified in bytes. */ - size_t y_offset; - size_t cb_offset; - size_t cr_offset; - - /* Set to 1 if the framebuffer was already marked as used in encoder. - * This is for internal use only. - * Not to be read or written from the outside. */ - int already_marked; - - /* Internal, implementation-defined data. Do not modify. */ - void *internal; - - /* User-defined pointer. - * The library does not touch this value. - * This can be used for example to identify which framebuffer out of - * the initially allocated pool was used by the VPU to contain a frame. - */ - void *context; -} BmVpuFramebuffer; - - -/* Structure containing details about encoded frames. */ -typedef struct -{ - /* When decoding, data must point to the memory block which contains - * encoded frame data that gets consumed by the VPU. - * Only used by the decoder. */ - uint8_t *data; - - /* Size of the encoded data, in bytes. When decoding, this is set by - * the user, and is the size of the encoded data that is pointed to - * by data. When encoding, the encoder sets this to the size of the - * acquired output block, in bytes (exactly the same value as the - * acquire_output_buffer's size argument). */ - size_t data_size; - - /* Frame type (I, P, B, ..) of the encoded frame. Filled by the encoder. - * Only used by the encoder. */ - BmVpuFrameType frame_type; - - /* Handle produced by the user-defined acquire_output_buffer function - * during encoding. - * Only used by the encoder. */ - void *acquired_handle; - - /* User-defined pointer. - * The library does not touch this value. - * This pointer and the one from the corresponding raw frame will have - * the same value. The library will pass then through. */ - void *context; - - /* User-defined timestamps. - * In many cases, the context one wants to associate with raw/encoded frames - * is a PTS-DTS pair. Just like the context pointer, the library just passes - * them through to the associated raw frame, and does not actually touch - * their values. */ - uint64_t pts; - uint64_t dts; - - int src_idx; - - int avg_ctu_qp; -} BmVpuEncodedFrame; - - -/* Structure containing details about raw, uncompressed frames. */ -typedef struct -{ - BmVpuFramebuffer *framebuffer; - - /* User-defined pointer. - * The library does not touch this value. - * This pointer and the one from the corresponding encoded frame will have - * the same value. The library will pass then through. */ - void *context; - - /* User-defined timestamps. - * In many cases, the context one wants to associate with raw/encoded frames - * is a PTS-DTS pair. Just like the context pointer, the library just passes - * them through to the associated encoded frame, and does not actually touch - * their values. */ - uint64_t pts; - uint64_t dts; -} BmVpuRawFrame; - -/* Structure used together with bmvpu_calc_framebuffer_sizes() */ -typedef struct { - /* Frame width and height, aligned to the 16-pixel boundary required by the VPU. */ - int width; - int height; - - /* Stride sizes, in bytes, with alignment applied. - * The Cb and Cr planes always use the same stride. */ - int y_stride; /* aligned stride */ - int c_stride; /* aligned stride (optional) */ - - /* Required DMA memory size for the Y,Cb,Cr planes, in bytes. - * The Cb and Cr planes always are of the same size. */ - int y_size; - int c_size; - - /* Total required size of a framebuffer's DMA buffer, in bytes. - * This value includes the sizes of all planes. */ - int size; -} BmVpuFbInfo; - -/** - * Calculate various sizes out of the given width & height and color format. - * The results are stored in "fb_info". - * The given frame width and height will be aligned if they aren't already, - * and the aligned value will be stored in fb_info. - * Width & height must be nonzero. - * The fb_info pointer must also be non-NULL. - * framebuffer_alignment is an alignment value for the sizes of the Y/U/V planes. - * 0 or 1 mean no alignment. - * chroma_interleave is set to 1 if a shared CbCr chroma plane is to be used, - * 0 if Cb and Cr shall use separate planes. - */ -DECL_EXPORT int bmvpu_calc_framebuffer_sizes(int mapType, BmVpuColorFormat color_format, - int frame_width, int frame_height, - int chroma_interleave, BmVpuFbInfo *fb_info); - -/** - * Fill fields of the BmVpuFramebuffer structure, based on data from "fb_info". - * The specified DMA buffer and context pointer are also set. - */ -DECL_EXPORT int bmvpu_fill_framebuffer_params(BmVpuFramebuffer *framebuffer, - BmVpuFbInfo *fb_info, - bm_device_mem_t *fb_dma_buffer, - int fb_id, void* context); - -/** - * Upload data from HOST to a VPU core. - * For now, only support PCIE mode. - * - * return value: - * -1, failed - * 0, done - */ -DECL_EXPORT int bmvpu_upload_data(int soc_idx, - const uint8_t* host_va, int host_stride, - uint64_t vpu_pa, int vpu_stride, - int width, int height); - -/** - * Alloc device memory according to the specified heap_id_mask. - * if high_bit_first is 1, high bit in heap_id_mask will be considered. - * The interface will consider heap_num for different device. - * For example, - * ---------------------------------------------------------------- - * heap_id_mask high_bit_first priority - * ---------------------------------------------------------------- - * 0x07 (0000 0110) 1 heap2(vpu) > heap1(vpp) - * 0x07 (0000 0110) 0 heap1(vpp) > heap2(vpu) - * ---------------------------------------------------------------- - * - * return value: - * -1, failed - * 0, done - */ -DECL_EXPORT int bmvpu_malloc_device_byte_heap(bm_handle_t handle, - bm_device_mem_t *pmem, unsigned int size, - int heap_id_mask, int high_bit_first); - -/** - * Download data from a VPU core to HOST. - * For now, only support PCIE mode. - * - * return value: - * -1, failed - * 0, done - */ -int bmvpu_download_data(int soc_idx, - uint8_t* host_va, int host_stride, - uint64_t vpu_pa, int vpu_stride, - int width, int height); - -/** - * Returns a human-readable description of the given color format. - * Useful for logging. - */ -char const *bmvpu_color_format_string(BmVpuColorFormat color_format); - -/** - * Returns a human-readable description of the given frame type. - * Useful for logging. - */ -char const *bmvpu_frame_type_string(BmVpuFrameType frame_type); - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/bmvid/video/encoder/bm_enc_api/src/encoder.c b/bmvid/video/encoder/bm_enc_api/src/encoder.c index 7f281f2..d270ebf 100644 --- a/bmvid/video/encoder/bm_enc_api/src/encoder.c +++ b/bmvid/video/encoder/bm_enc_api/src/encoder.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #elif _WIN32 #include #include < MMSystem.h > @@ -31,15 +33,9 @@ #include /* SIG_SETMASK */ #include "bmvpu.h" -#include "bmvpuapi.h" -#include "bmvpuapi_internal.h" +#include "bm_vpuenc_interface.h" +#include "bm_vpuenc_internal.h" #define MAX_SOC_NUM 64 -typedef struct _G_bm_handle{ - bm_handle_t bm_handle; - unsigned int count; -} G_bm_handle; -static G_bm_handle g_bm_handle[MAX_SOC_NUM] = { {0, 0} }; -#define HEAP_MASK 0x06 #define VPU_ENC_BITSTREAM_BUFFER_SIZE (1024*1024*1) @@ -56,7 +52,8 @@ typedef struct _BmVpuEncWriteContext { bmvpu_enc_handle_error_full(__FILE__, __LINE__, __func__, (MSG_START), (RET_CODE)) #define BM_VPU_ENC_GET_BS_VA(BMVPUENC, BITSTREAM_PHYS_ADDR) \ - (((BMVPUENC)->bs_virt_addr) + ((bm_pa_t)(BITSTREAM_PHYS_ADDR) - (bm_pa_t)((BMVPUENC)->bs_phys_addr))) + (((BMVPUENC)->bs_virt_addr) + ((bmvpu_phys_addr_t)(BITSTREAM_PHYS_ADDR) - (bmvpu_phys_addr_t)((BMVPUENC)->bs_phys_addr))) + /* For BM1684 */ #define MAX_NUM_VPU_CORE_CHIP 5 @@ -70,14 +67,11 @@ enum { }; #define VID_OPEN_ENC_FLOCK_NAME "/tmp/vid_open_enc_global_flock" #ifdef __linux__ - -static int bmhandle_atomic_lock = 0; /* atomic lock for bmlib_handle */ -static int bmve_atomic_lock = 0; +// static int bmve_atomic_lock = 0; static __thread int s_vid_enc_open_flock_fd[MAX_SOC_NUM] = {[0 ... (MAX_SOC_NUM-1)] = -1}; #elif _WIN32 static __declspec(thread) HANDLE s_vid_enc_open_flock_fd[MAX_SOC_NUM] = { NULL }; static volatile long bmve_atomic_lock = 0; -static volatile long bmhandle_atomic_lock = 0; #endif #ifdef __linux__ @@ -86,6 +80,7 @@ static __thread sigset_t newmask[128]; #endif #define TRY_OPEN_SEM_TIMEOUT 20 + void get_lock_timeout(int sec,int soc_idx) { char flock_name[255]; @@ -135,36 +130,6 @@ void unlock_flock(int soc_idx) } } - - -static void bm_handle_lock() -{ -#ifdef __linux__ - // while (atomic_flag_test_and_set(&bmhandle_atomic_lock)) - while (__atomic_test_and_set(&bmhandle_atomic_lock, __ATOMIC_SEQ_CST)) - { - usleep(100); - } -#endif -#ifdef _WIN32 - while (InterlockedCompareExchange(&bmhandle_atomic_lock, 1, 0)) { - Sleep(1); - } -#endif -} - - -static void bm_handle_unlock() -{ -#ifdef __linux__ - // atomic_flag_clear(&bmhandle_atomic_lock); - __atomic_clear(&bmhandle_atomic_lock, __ATOMIC_SEQ_CST); -#endif -#ifdef _WIN32 - InterlockedExchange(&bmhandle_atomic_lock, 0); -#endif -} - static void bmvpu_enc_lock(int soc_idx) { #if __linux__ @@ -186,23 +151,7 @@ static void bmvpu_enc_unlock(int soc_idx) static int bmvpu_enc_handle_error_full(char const *fn, int linenr, char const *funcn, char const *msg_start, int ret_code); -static BmVpuFrameType convert_frame_type(int vpu_pic_type); - -/* - * If a bm_handle_t on this soc already exists, return it directly, - * otherwise return after creat one (using bm_dev_request). - * This function is only be called by bmvpu_enc_load(), the bm_handle_t's reference count +1 - * if this function is called. - */ -static void bmvpu_enc_load_bmlib_handle(int soc_idx); - -/* - * If a bm_handle_t on this soc already exists, then the bm_handle_t's reference count -1. - * After that, if bm_handle_t's reference count is 0, free it(bm_dev_free), - * otherwise do nothing. - * This function is only be called by bmvpu_enc_unload(). - */ -static void bmvpu_enc_unload_bmlib_handle(int soc_idx); +static BmVpuEncFrameType convert_frame_type(int vpu_pic_type); int bmvpu_enc_get_core_idx(int soc_idx) { @@ -213,19 +162,47 @@ static int bmvpu_enc_bs_download(BmVpuEncoder* bve, uint8_t* host_va, uint64_t vpu_pa, int size) { #ifdef BM_PCIE_MODE - bm_device_mem_t vpu_mem = bm_mem_from_device(vpu_pa, size); - int ret = bm_memcpy_d2s_partial(bve->bm_handle, (void*)host_va, vpu_mem, size); - return ret; + return bmvpu_enc_download_data(bve->core_idx, host_va, size, + vpu_pa, size, size, 1); #else - uint8_t* va = BM_VPU_ENC_GET_BS_VA(bve, vpu_pa); - - bm_device_mem_t vpu_mem = bm_mem_from_device(vpu_pa, size); + uint8_t* va = (uint8_t*)BM_VPU_ENC_GET_BS_VA(bve, vpu_pa); memcpy(host_va, va, size); - return 0; #endif } +static int bmvpu_enc_alloc_proc(void *enc_ctx, + int vpu_core_idx, BmVpuEncDMABuffer *buf, unsigned int size) +{ + BmVpuEncoderCtx *video_enc_ctx = enc_ctx; + int ret = 0; + + if (video_enc_ctx->buffer_alloc_func) { + ret = video_enc_ctx->buffer_alloc_func(video_enc_ctx->buffer_context + , vpu_core_idx, buf, size); + } else { + ret = bmvpu_enc_dma_buffer_allocate(vpu_core_idx, buf, size); + } + + return ret; +} + +static int bmvpu_enc_free_proc(void *enc_ctx, + int vpu_core_idx, BmVpuEncDMABuffer *buf) +{ + BmVpuEncoderCtx *video_enc_ctx = enc_ctx; + int ret = 0; + + if (video_enc_ctx->buffer_free_func) { + ret = video_enc_ctx->buffer_free_func(video_enc_ctx->buffer_context + , vpu_core_idx, buf); + } else { + ret = bmvpu_enc_dma_buffer_deallocate(vpu_core_idx, buf); + } + + return ret; +} + static int bmvpu_enc_generate_header_data(BmVpuEncoder *encoder) { @@ -324,7 +301,7 @@ int bmvpu_enc_load(int soc_idx) get_lock_timeout(TRY_OPEN_SEM_TIMEOUT,soc_idx); - BM_VPU_LOG("VPU at Sophon device [%d] init", soc_idx); + BM_VPU_LOG("VPU at device [%d] init", soc_idx); BM_VPU_INFO("libbmvpuapi version %s", BMVPUAPI_VERSION); @@ -337,8 +314,6 @@ int bmvpu_enc_load(int soc_idx) goto cleanup; } - bmvpu_enc_load_bmlib_handle(soc_idx); - #ifdef _WIN32 timeBeginPeriod(1); #endif @@ -358,16 +333,11 @@ int bmvpu_enc_unload(int soc_idx) get_lock_timeout(TRY_OPEN_SEM_TIMEOUT,soc_idx); - - - BM_VPU_LOG("VPU at Sophon device [%d] deinit", soc_idx); + BM_VPU_LOG("VPU at device [%d] deinit", soc_idx); vpu_EncUnInit(soc_idx); BM_VPU_DEBUG("unloaded VPU"); - /* free bm_handle_t */ - bmvpu_enc_unload_bmlib_handle(soc_idx); - /* Leave critical region */ // atomic_flag_clear(&bmve_atomic_lock); unlock_flock(soc_idx); @@ -399,7 +369,7 @@ void bmvpu_enc_set_default_open_params(BmVpuEncOpenParams *open_params, memset(open_params, 0, sizeof(BmVpuEncOpenParams)); open_params->codec_format = codec_format; - open_params->color_format = BM_VPU_COLOR_FORMAT_YUV420; + open_params->pix_format = BM_VPU_ENC_PIX_FORMAT_YUV420P; open_params->frame_width = 0; open_params->frame_height = 0; open_params->fps_num = 1; @@ -417,14 +387,12 @@ void bmvpu_enc_set_default_open_params(BmVpuEncOpenParams *open_params, open_params->min_qp = 8; open_params->max_qp = 51; - open_params->chroma_interleave = 0; - open_params->soc_idx = 0; - open_params->gop_preset = 5; + open_params->gop_preset = BM_VPU_ENC_GOP_PRESET_IBBBP; open_params->intra_period = 60; - open_params->enc_mode = 1; + open_params->enc_mode = BM_VPU_ENC_RECOMMENDED_MODE; open_params->roi_enable = 0; @@ -523,7 +491,7 @@ static int bmvpu_enc_check_open_params(BmVpuEncOpenParams *opt) return BM_VPU_ENC_RETURN_CODE_INVALID_PARAMS; } - if (opt->gop_preset < 1 || opt->gop_preset > 8) + if (opt->gop_preset < BM_VPU_ENC_GOP_PRESET_ALL_I || opt->gop_preset > BM_VPU_ENC_GOP_PRESET_RA_IB) { BM_VPU_ERROR("GOP preset IDX is only one of 1,2,...,8"); return BM_VPU_ENC_RETURN_CODE_INVALID_PARAMS; @@ -536,9 +504,11 @@ static int bmvpu_enc_check_open_params(BmVpuEncOpenParams *opt) } low_delay = false; - if (opt->gop_preset == 1 || - opt->gop_preset == 2 || opt->gop_preset == 3 || - opt->gop_preset == 6 || opt->gop_preset == 7) + if (opt->gop_preset == BM_VPU_ENC_GOP_PRESET_ALL_I || + opt->gop_preset == BM_VPU_ENC_GOP_PRESET_IPP || + opt->gop_preset == BM_VPU_ENC_GOP_PRESET_IBBB || + opt->gop_preset == BM_VPU_ENC_GOP_PRESET_IPPPP || + opt->gop_preset == BM_VPU_ENC_GOP_PRESET_IBBBB) low_delay = true; if (low_delay) @@ -574,11 +544,11 @@ static int bmvpu_enc_check_open_params(BmVpuEncOpenParams *opt) return BM_VPU_ENC_RETURN_CODE_OK; } -static int bmvpu_enc_clear_work_buffer(bm_handle_t bm_handle, bm_device_mem_t *work_dmabuffer) +static int bmvpu_enc_clear_work_buffer(int core_idx, BmVpuDMABuffer *work_dmabuffer) { int size = (*work_dmabuffer).size; -#ifdef BM_PCIE_MODE + bmvpu_phys_addr_t work_buf_addr = work_dmabuffer->phys_addr; uint8_t* tmp_va = calloc(size, 1); if (tmp_va == NULL) { @@ -586,7 +556,7 @@ static int bmvpu_enc_clear_work_buffer(bm_handle_t bm_handle, bm_device_mem_t *w return -1; } - int ret = bm_memcpy_s2d_partial(bm_handle, *work_dmabuffer, tmp_va, size); + int ret = bmvpu_enc_upload_data(core_idx, tmp_va, size, work_buf_addr, size, size, 1); if (ret != 0) { BM_VPU_ERROR("PCIE mode vpu upload data failed"); @@ -595,25 +565,13 @@ static int bmvpu_enc_clear_work_buffer(bm_handle_t bm_handle, bm_device_mem_t *w } free(tmp_va); -#else - unsigned long long tmp_va; - int ret = bm_mem_mmap_device_mem_no_cache(bm_handle, work_dmabuffer, &tmp_va); - if (ret != 0) - { - BM_VPU_ERROR("SoC mode vpu upload data failed"); - return -1; - } - memset(tmp_va, 0, size); - bm_mem_unmap_device_mem(bm_handle, tmp_va, size); - -#endif return 0; } -int bmvpu_enc_open(BmVpuEncoder **encoder, +int bmvpu_enc_open_internal(BmVpuEncoder **encoder, BmVpuEncOpenParams *open_params, - bm_device_mem_t *bs_dmabuffer) + BmVpuEncDMABuffer *bs_dmabuffer) { VpuEncOpenParam eop; size_t work_buffer_size; @@ -621,6 +579,7 @@ int bmvpu_enc_open(BmVpuEncoder **encoder, int soc_idx = open_params->soc_idx; BmVpuEncReturnCodes ret; int enc_ret; + BmVpuEncoderCtx *video_enc_ctx = NULL; if((encoder == NULL) || (open_params == NULL) || (bs_dmabuffer == NULL)){ BM_VPU_ERROR("bmvpu_enc_open params err: encoder(0X%x), open_params(0X%x), bs_dmabuffer(0X%x)", encoder, open_params, bs_dmabuffer); @@ -628,6 +587,14 @@ int bmvpu_enc_open(BmVpuEncoder **encoder, return BM_VPU_ENC_RETURN_CODE_INVALID_PARAMS; } + if ((open_params->buffer_alloc_func && !open_params->buffer_free_func) + || (!open_params->buffer_alloc_func && open_params->buffer_free_func)) { + BM_VPU_ERROR("bmvpu_enc_open params err: alloc_func(0X%x), free_func(0X%x)" + , open_params->buffer_alloc_func, open_params->buffer_free_func); + unlock_flock(soc_idx); + return BM_VPU_ENC_RETURN_CODE_INVALID_PARAMS; + } + /* Check that the allocated bitstream buffer is big enough */ int size = bs_dmabuffer->size; if (size < VPU_ENC_BITSTREAM_BUFFER_SIZE) { @@ -666,30 +633,28 @@ int bmvpu_enc_open(BmVpuEncoder **encoder, /* Map the bitstream buffer. This mapping will persist until the encoder is closed. */ #ifndef BM_PCIE_MODE - bm_mem_mmap_device_mem_no_cache(bmvpu_enc_get_bmlib_handle(soc_idx), bs_dmabuffer, &((*encoder)->bs_virt_addr)); + ret = bmvpu_dma_buffer_map(bmvpu_enc_get_core_idx(soc_idx), bs_dmabuffer, BM_VPU_MAPPING_FLAG_READ|BM_VPU_MAPPING_FLAG_WRITE); + if (ret < 0) { + BM_VPU_ERROR("mmap failed for bitstream buffer"); + if (*encoder) + free(*encoder); + return BM_VPU_ENC_RETURN_CODE_ERROR; + } #endif - (*encoder)->bs_phys_addr = bm_mem_get_device_addr(*bs_dmabuffer); + (*encoder)->bs_virt_addr = bs_dmabuffer->virt_addr; + (*encoder)->bs_phys_addr = bmvpu_enc_dma_buffer_get_physical_address(bs_dmabuffer); (*encoder)->bs_dmabuffer = bs_dmabuffer; - (*encoder)->cbcr_interleave = open_params->chroma_interleave; - - #ifdef BM_PCIE_MODE (*encoder)->soc_idx = open_params->soc_idx; - (*encoder)->bm_handle = bmvpu_enc_get_bmlib_handle(open_params->soc_idx); #else (*encoder)->soc_idx = 0; - (*encoder)->bm_handle = bmvpu_enc_get_bmlib_handle(0); #endif (*encoder)->core_idx = bmvpu_enc_get_core_idx((*encoder)->soc_idx); memset(&eop, 0, sizeof(VpuEncOpenParam)); - vpu_SetEncOpenParam(&eop, open_params->frame_width, open_params->frame_height, - open_params->fps_num, open_params->fps_den, - open_params->bitrate, open_params->cqp); - eop.socIdx = (*encoder)->soc_idx; eop.coreIdx = (*encoder)->core_idx; @@ -698,9 +663,13 @@ int bmvpu_enc_open(BmVpuEncoder **encoder, else /* if (open_params->codec_format == BM_VPU_CODEC_FORMAT_H265) */ eop.bitstreamFormat = VPU_CODEC_HEVC; + vpu_SetEncOpenParam(&eop, open_params->frame_width, open_params->frame_height, + open_params->fps_num, open_params->fps_den, + open_params->bitrate, open_params->cqp); + /* Fill in the bitstream buffer address and size. */ - eop.bitstreamBuffer = bm_mem_get_device_addr(*bs_dmabuffer); - eop.bitstreamBufferSize = bm_mem_get_device_size(*bs_dmabuffer); + eop.bitstreamBuffer = bmvpu_enc_dma_buffer_get_physical_address(bs_dmabuffer); + eop.bitstreamBufferSize = bmvpu_enc_dma_buffer_get_size(bs_dmabuffer); /* Miscellaneous codec format independent values */ BM_VPU_TRACE("input bit rate: %d", open_params->bitrate); @@ -735,7 +704,14 @@ int bmvpu_enc_open(BmVpuEncoder **encoder, eop.waveParam.maxQpP = eop.waveParam.maxQpB = open_params->max_qp; - eop.cbcrInterleave = open_params->chroma_interleave; + if ((open_params->pix_format == BM_VPU_ENC_PIX_FORMAT_NV12) || + (open_params->pix_format == BM_VPU_ENC_PIX_FORMAT_NV16) || + (open_params->pix_format == BM_VPU_ENC_PIX_FORMAT_NV24)) { + eop.cbcrInterleave = BM_VPU_ENC_CHROMA_INTERLEAVE_CBCR; + } else { + eop.cbcrInterleave = BM_VPU_ENC_CHROMA_NO_INTERLEAVE; + } + eop.cbcrOrder = 0; eop.nv21 = 0; @@ -800,9 +776,21 @@ int bmvpu_enc_open(BmVpuEncoder **encoder, /* in unit of 4k bytes */ work_buffer_size = (work_buffer_size +(4*1024-1)) & (~(4*1024-1)); + video_enc_ctx = (BmVpuEncoderCtx *)calloc(1, sizeof(BmVpuEncoderCtx)); + if (video_enc_ctx == NULL) { + BM_VPU_ERROR("malloc video_enc_ctx failed\n"); + goto cleanup; + } + (*encoder)->video_enc_ctx = video_enc_ctx; + + video_enc_ctx->buffer_alloc_func = open_params->buffer_alloc_func;; + video_enc_ctx->buffer_free_func = open_params->buffer_free_func; + video_enc_ctx->buffer_context = open_params->buffer_context; + /* Create work buffer */ - (*encoder)->work_dmabuffer = (bm_device_mem_t *)malloc(sizeof(bm_device_mem_t)); - ret = bmvpu_malloc_device_byte_heap(bmvpu_enc_get_bmlib_handle((*encoder)->soc_idx), (*encoder)->work_dmabuffer, work_buffer_size, HEAP_MASK, 1); + (*encoder)->work_dmabuffer = (BmVpuEncDMABuffer *)malloc(sizeof(BmVpuEncDMABuffer)); + + ret = bmvpu_enc_dma_buffer_allocate((*encoder)->core_idx, (*encoder)->work_dmabuffer, work_buffer_size); if (ret != 0) { BM_VPU_ERROR("bm_malloc_device_byte for work_dmabuffer failed!\n"); @@ -811,11 +799,11 @@ int bmvpu_enc_open(BmVpuEncoder **encoder, } // BM_VPU_LOG("work buffer: pa = 0x%lx, size = %zd", - // bmvpu_dma_buffer_get_physical_address(work_dmabuffer), + // bmvpu_enc_dma_buffer_get_physical_address(work_dmabuffer), // work_buffer_size); /* Clear the working buffer */ - ret = bmvpu_enc_clear_work_buffer(bmvpu_enc_get_bmlib_handle((*encoder)->soc_idx), (*encoder)->work_dmabuffer); + ret = bmvpu_enc_clear_work_buffer((*encoder)->core_idx, (BmVpuDMABuffer *)(*encoder)->work_dmabuffer); if (ret != 0) { BM_VPU_ERROR("bmvpu_enc_clear_work_buffer failed"); @@ -824,8 +812,8 @@ int bmvpu_enc_open(BmVpuEncoder **encoder, } /* Fill in the working buffer address and size. */ - eop.workBuffer = bm_mem_get_device_addr(*((*encoder)->work_dmabuffer)); - eop.workBufferSize = bm_mem_get_device_size(*((*encoder)->work_dmabuffer)); + eop.workBuffer = bmvpu_enc_dma_buffer_get_physical_address((*encoder)->work_dmabuffer); + eop.workBufferSize = bmvpu_enc_dma_buffer_get_size((*encoder)->work_dmabuffer); /* Now actually open the encoder instance */ BM_VPU_LOG("opening encoder, frame size: %u x %u pixel", @@ -837,7 +825,7 @@ int bmvpu_enc_open(BmVpuEncoder **encoder, goto cleanup; (*encoder)->codec_format = open_params->codec_format; - (*encoder)->color_format = open_params->color_format; + (*encoder)->pix_format = open_params->pix_format; (*encoder)->frame_width = open_params->frame_width; (*encoder)->frame_height = open_params->frame_height; (*encoder)->fps_n = open_params->fps_num; @@ -845,6 +833,16 @@ int bmvpu_enc_open(BmVpuEncoder **encoder, (*encoder)->rc_enable = (open_params->bitrate > 0); (*encoder)->cqp = open_params->cqp; + if(open_params->timeout > 0) + (*encoder)->timeout = open_params->timeout; + else + (*encoder)->timeout = VPU_WAIT_TIMEOUT; + + if(open_params->timeout_count > 0) + (*encoder)->timeout_count = open_params->timeout_count; + else + (*encoder)->timeout_count = VPU_MAX_TIMEOUT_COUNTS; + finish: unlock_flock(soc_idx); if (ret == BM_VPU_ENC_RETURN_CODE_OK) @@ -855,24 +853,134 @@ int bmvpu_enc_open(BmVpuEncoder **encoder, cleanup: unlock_flock(soc_idx); #ifndef BM_PCIE_MODE - bm_mem_unmap_device_mem(bmvpu_enc_get_bmlib_handle((*encoder)->soc_idx), (void *)((*encoder)->bs_virt_addr), bm_mem_get_device_size(*bs_dmabuffer)); + bmvpu_dma_buffer_unmap((*encoder)->core_idx, bs_dmabuffer); #endif if ((*encoder)->work_dmabuffer!=NULL) { - bm_free_device(bmvpu_enc_get_bmlib_handle((*encoder)->soc_idx), *((*encoder)->work_dmabuffer)); + bmvpu_enc_free_proc(video_enc_ctx, (*encoder)->core_idx, (*encoder)->work_dmabuffer); free((*encoder)->work_dmabuffer); } + if ((*encoder)->video_enc_ctx) { + free((*encoder)->video_enc_ctx); + (*encoder)->video_enc_ctx = NULL; + } + free(*encoder); *encoder = NULL; goto finish; } +int bmvpu_enc_open(BmVpuEncoder **encoder, + BmVpuEncOpenParams *open_params, + BmVpuEncDMABuffer *bs_dmabuffer, + BmVpuEncInitialInfo *initial_info) +{ + BmVpuEncReturnCodes ret; + BmVpuEncoder *video_encoder = NULL; + BmVpuEncoderCtx *video_enc_ctx = NULL; + int i; + + // step 1: Open an encoder instance + ret = bmvpu_enc_open_internal(encoder, open_params, bs_dmabuffer); + if (ret != BM_VPU_ENC_RETURN_CODE_OK) { + return ret; + } + + // step 2: get initial information: min_num_rec_fb and min_num_src_fb + video_encoder = *encoder; + ret = bmvpu_enc_get_initial_info(video_encoder, initial_info); + if (ret != BM_VPU_ENC_RETURN_CODE_OK) { + return ret; + } + + video_enc_ctx = video_encoder->video_enc_ctx; + do { + // step 3: alloc rec fb and register to vpu + video_enc_ctx->num_rec_fb = initial_info->min_num_rec_fb; + + /* Allocate memory blocks for the framebuffer and DMA buffer structures, + * and allocate the DMA buffers themselves */ + video_enc_ctx->rec_fb_list = calloc(video_enc_ctx->num_rec_fb, sizeof(BmVpuFramebuffer)); + if (video_enc_ctx->rec_fb_list == NULL) { + BM_VPU_ERROR("malloc rec_fb_list failed\n"); + ret = BM_VPU_ENC_RETURN_CODE_ERROR; + break; + } + + video_enc_ctx->rec_fb_dmabuffers = calloc(video_enc_ctx->num_rec_fb, sizeof(BmVpuEncDMABuffer*)); + if (video_enc_ctx->rec_fb_dmabuffers == NULL) { + BM_VPU_ERROR("malloc rec_fb_dmabuffers failed\n"); + ret = BM_VPU_ENC_RETURN_CODE_ERROR; + break;; + } + + for (i = 0; i < video_enc_ctx->num_rec_fb; i++) { + int rec_id = 0x200 + i; // TODO + + /* Allocate a DMA buffer for each framebuffer. + * It is possible to specify alternate allocators; + * all that is required is that the allocator provides physically contiguous memory + * (necessary for DMA transfers) and repects the alignment value. */ + video_enc_ctx->rec_fb_dmabuffers[i] = (BmVpuEncDMABuffer *)calloc(1, sizeof(BmVpuEncDMABuffer)); + ret = bmvpu_enc_alloc_proc(video_enc_ctx, video_encoder->core_idx + , video_enc_ctx->rec_fb_dmabuffers[i] + , initial_info->rec_fb.size); + if (ret != 0) { + BM_VPU_ERROR("bmvpu_enc_dma_buffer_allocate failed! line=%d\n", __LINE__); + return BM_VPU_ENC_RETURN_CODE_ERROR; + } + + bmvpu_fill_framebuffer_params(&(video_enc_ctx->rec_fb_list[i]), + &(initial_info->rec_fb), + video_enc_ctx->rec_fb_dmabuffers[i], rec_id, NULL); + } + + /* Buffer registration help the VPU knows which buffers to use for + * storing temporary frames into. */ + ret = bmvpu_enc_register_framebuffers(video_encoder, video_enc_ctx->rec_fb_list + , video_enc_ctx->num_rec_fb); + if (ret != 0) { + BM_VPU_ERROR("bmvpu_enc_register_framebuffers failed\n"); + ret = BM_VPU_ENC_RETURN_CODE_ERROR; + break; + } + } while(0); + + if (ret != BM_VPU_ENC_RETURN_CODE_OK && video_enc_ctx != NULL) { + if (video_enc_ctx->rec_fb_list) { + free(video_enc_ctx->rec_fb_list); + video_enc_ctx->rec_fb_list = NULL; + } + + if (video_enc_ctx->rec_fb_dmabuffers) { + for (i = 0; i < video_enc_ctx->num_rec_fb; ++i) { + bmvpu_enc_free_proc(video_enc_ctx, video_encoder->core_idx + , video_enc_ctx->rec_fb_dmabuffers[i]); + free(video_enc_ctx->rec_fb_dmabuffers[i]); + } + free(video_enc_ctx->rec_fb_dmabuffers); + video_enc_ctx->rec_fb_dmabuffers = NULL; + } + + if (video_enc_ctx) { + free(video_enc_ctx); + } + + video_encoder->video_enc_ctx = NULL; + return ret; + } + + return BM_VPU_ENC_RETURN_CODE_OK; +} + int bmvpu_enc_close(BmVpuEncoder *encoder) { BmVpuEncReturnCodes ret; + BmVpuEncoderCtx *pst_video_enc_ctx = NULL; int enc_ret; + int i = 0; if (encoder == NULL) return BM_VPU_ENC_RETURN_CODE_OK; @@ -901,7 +1009,7 @@ int bmvpu_enc_close(BmVpuEncoder *encoder) #ifndef BM_PCIE_MODE if (encoder->bs_dmabuffer != NULL) - bm_mem_unmap_device_mem(encoder->bm_handle, (void *)(encoder->bs_virt_addr), bm_mem_get_device_size(*encoder->bs_dmabuffer)); + bmvpu_dma_buffer_unmap(encoder->core_idx, encoder->bs_dmabuffer); #endif if (encoder->internal_framebuffers != NULL) @@ -913,42 +1021,61 @@ int bmvpu_enc_close(BmVpuEncoder *encoder) if (encoder->buffer_mv) { - bm_free_device(encoder->bm_handle, *(encoder->buffer_mv)); + bmvpu_enc_free_proc(encoder->video_enc_ctx, encoder->core_idx, encoder->buffer_mv); free(encoder->buffer_mv); encoder->buffer_mv = NULL; } if (encoder->buffer_fbc_y_tbl) { - bm_free_device(encoder->bm_handle, *(encoder->buffer_fbc_y_tbl)); + bmvpu_enc_free_proc(encoder->video_enc_ctx, encoder->core_idx, encoder->buffer_fbc_y_tbl); free(encoder->buffer_fbc_y_tbl); encoder->buffer_fbc_y_tbl = NULL; } if (encoder->buffer_fbc_c_tbl) { - bm_free_device(encoder->bm_handle, *(encoder->buffer_fbc_c_tbl)); + bmvpu_enc_free_proc(encoder->video_enc_ctx, encoder->core_idx, encoder->buffer_fbc_c_tbl); free(encoder->buffer_fbc_c_tbl); encoder->buffer_fbc_c_tbl = NULL; } if (encoder->buffer_sub_sam) { - bm_free_device(encoder->bm_handle, *(encoder->buffer_sub_sam)); + bmvpu_enc_free_proc(encoder->video_enc_ctx, encoder->core_idx, encoder->buffer_sub_sam); free(encoder->buffer_sub_sam); encoder->buffer_sub_sam = NULL; } if (encoder->work_dmabuffer) { - bm_free_device(encoder->bm_handle, *(encoder->work_dmabuffer)); + bmvpu_enc_free_proc(encoder->video_enc_ctx, encoder->core_idx, encoder->work_dmabuffer); free(encoder->work_dmabuffer); encoder->work_dmabuffer = NULL; } - free(encoder); - encoder = NULL; + if (encoder->video_enc_ctx) { + pst_video_enc_ctx = (BmVpuEncoderCtx *)encoder->video_enc_ctx; + if (pst_video_enc_ctx->rec_fb_list) { + free(pst_video_enc_ctx->rec_fb_list); + pst_video_enc_ctx->rec_fb_list = NULL; + } + if (pst_video_enc_ctx->rec_fb_dmabuffers) { + for (i = 0; i < pst_video_enc_ctx->num_rec_fb; ++i) { + bmvpu_enc_free_proc(encoder->video_enc_ctx, encoder->core_idx + , pst_video_enc_ctx->rec_fb_dmabuffers[i]); + free(pst_video_enc_ctx->rec_fb_dmabuffers[i]); + } + free(pst_video_enc_ctx->rec_fb_dmabuffers); + pst_video_enc_ctx->rec_fb_dmabuffers = NULL; + } + free(encoder->video_enc_ctx); + encoder->video_enc_ctx = NULL; + } + + free(encoder); + encoder = NULL; if (ret == BM_VPU_ENC_RETURN_CODE_OK) BM_VPU_DEBUG("successfully closed encoder"); @@ -962,12 +1089,12 @@ int bmvpu_enc_register_framebuffers(BmVpuEncoder *encoder, uint32_t num_framebuffers) { VpuEncoder* handle = encoder->handle; - bm_handle_t bm_handle = encoder->bm_handle; + BmVpuEncoderCtx *video_enc_ctx = encoder->video_enc_ctx; - bm_device_mem_t* dmabuffer_mv = (bm_device_mem_t*)malloc(sizeof(bm_device_mem_t)); - bm_device_mem_t* dmabuffer_fbc_y_tbl = (bm_device_mem_t*)malloc(sizeof(bm_device_mem_t)); - bm_device_mem_t* dmabuffer_fbc_c_tbl = (bm_device_mem_t*)malloc(sizeof(bm_device_mem_t)); - bm_device_mem_t* dmabuffer_sub_sam = (bm_device_mem_t*)malloc(sizeof(bm_device_mem_t)); + BmVpuEncDMABuffer* dmabuffer_mv = (BmVpuEncDMABuffer*)malloc(sizeof(BmVpuEncDMABuffer)); + BmVpuEncDMABuffer* dmabuffer_fbc_y_tbl = (BmVpuEncDMABuffer*)malloc(sizeof(BmVpuEncDMABuffer)); + BmVpuEncDMABuffer* dmabuffer_fbc_c_tbl = (BmVpuEncDMABuffer*)malloc(sizeof(BmVpuEncDMABuffer)); + BmVpuEncDMABuffer* dmabuffer_sub_sam = (BmVpuEncDMABuffer*)malloc(sizeof(BmVpuEncDMABuffer)); uint32_t i; BmVpuEncReturnCodes ret; @@ -981,7 +1108,7 @@ int bmvpu_enc_register_framebuffers(BmVpuEncoder *encoder, BM_VPU_DEBUG("attempting to register %u framebuffers", num_framebuffers); /* Allocate memory for framebuffer structures */ - encoder->internal_framebuffers = (VpuFrameBuffer*)malloc(sizeof(VpuFrameBuffer) * num_framebuffers); + encoder->internal_framebuffers = (void*)malloc(sizeof(VpuFrameBuffer) * num_framebuffers); if (encoder->internal_framebuffers == NULL) { BM_VPU_ERROR("allocating memory for framebuffers failed"); @@ -991,15 +1118,16 @@ int bmvpu_enc_register_framebuffers(BmVpuEncoder *encoder, /* Copy the values from the framebuffers array to the internal_framebuffers * one, which in turn will be used by the VPU */ + VpuFrameBuffer* _framebuffers = (VpuFrameBuffer*)encoder->internal_framebuffers; for (i = 0; i < num_framebuffers; ++i) { VpuFrameBuffer *internal_fb = NULL; BmVpuFramebuffer *fb = &framebuffers[i]; bmvpu_phys_addr_t phys_addr = 0; - phys_addr = bm_mem_get_device_addr(*(fb->dma_buffer)); + phys_addr = bmvpu_enc_dma_buffer_get_physical_address(fb->dma_buffer); - internal_fb = &(encoder->internal_framebuffers[i]); + internal_fb = &((_framebuffers[i])); internal_fb->myIndex = i; internal_fb->mapType = BM_COMPRESSED_FRAME_MAP; internal_fb->format = FB_FMT_420; @@ -1012,9 +1140,9 @@ int bmvpu_enc_register_framebuffers(BmVpuEncoder *encoder, /* if mapType is BM_COMPRESSED_FRAME_MAP, set 128BIT_LITTLE_ENDIAN */ internal_fb->endian = 16; /* 128BIT_LITTLE_ENDIAN */ - internal_fb->bufY = (bm_pa_t)(phys_addr + fb->y_offset); - internal_fb->bufCb = (bm_pa_t)(phys_addr + fb->cb_offset); - internal_fb->bufCr = (bm_pa_t)(phys_addr + fb->cr_offset); + internal_fb->bufY = (bmvpu_phys_addr_t)(phys_addr + fb->y_offset); + internal_fb->bufCb = (bmvpu_phys_addr_t)(phys_addr + fb->cb_offset); + internal_fb->bufCr = (bmvpu_phys_addr_t)(phys_addr + fb->cr_offset); } ret = vpu_EncCalcCoreBufferSize(handle, num_framebuffers); @@ -1031,7 +1159,7 @@ int bmvpu_enc_register_framebuffers(BmVpuEncoder *encoder, int sub_sam_size = handle->vbSubSamBuf.size; /* buffer_mv */ - ret = bmvpu_malloc_device_byte_heap(bm_handle, dmabuffer_mv, mv_size, HEAP_MASK, 1); + ret = bmvpu_enc_alloc_proc(video_enc_ctx, encoder->core_idx, dmabuffer_mv, mv_size); if (ret != 0) { BM_VPU_ERROR("Get buffer_mv failed"); @@ -1040,7 +1168,7 @@ int bmvpu_enc_register_framebuffers(BmVpuEncoder *encoder, } /* buffer_fbc_y_tbl */ - ret = bmvpu_malloc_device_byte_heap(bm_handle, dmabuffer_fbc_y_tbl, fbc_y_tbl_size, HEAP_MASK, 1); + ret = bmvpu_enc_alloc_proc(video_enc_ctx, encoder->core_idx, dmabuffer_fbc_y_tbl, fbc_y_tbl_size); if (ret != 0) { BM_VPU_ERROR("Get buffer_fbc_y_tbl failed"); @@ -1049,7 +1177,7 @@ int bmvpu_enc_register_framebuffers(BmVpuEncoder *encoder, } /* buffer_fbc_y_tbl */ - ret = bmvpu_malloc_device_byte_heap(bm_handle, dmabuffer_fbc_c_tbl, fbc_c_tbl_size, HEAP_MASK, 1); + ret = bmvpu_enc_alloc_proc(video_enc_ctx, encoder->core_idx, dmabuffer_fbc_c_tbl, fbc_c_tbl_size); if (ret != 0) { BM_VPU_ERROR("Get buffer_fbc_c_tbl failed"); @@ -1058,7 +1186,7 @@ int bmvpu_enc_register_framebuffers(BmVpuEncoder *encoder, } /* buffer_fbc_y_tbl */ - ret = bmvpu_malloc_device_byte_heap(bm_handle, dmabuffer_sub_sam, sub_sam_size, HEAP_MASK, 1); + ret = bmvpu_enc_alloc_proc(video_enc_ctx, encoder->core_idx, dmabuffer_sub_sam, sub_sam_size); if (ret != 0) { BM_VPU_ERROR("Get buffer_sub_sam failed"); @@ -1067,30 +1195,30 @@ int bmvpu_enc_register_framebuffers(BmVpuEncoder *encoder, } BM_VPU_LOG("mv: pa = 0x%lx, size = %d \n", - bm_mem_get_device_addr(*(encoder->buffer_mv)), + bmvpu_enc_dma_buffer_get_physical_address(encoder->buffer_mv), mv_size); BM_VPU_LOG("fbc_y_tbl: pa = 0x%lx, size = %d", - bm_mem_get_device_addr(*(encoder->buffer_fbc_y_tbl)), + bmvpu_enc_dma_buffer_get_physical_address(encoder->buffer_fbc_y_tbl), fbc_y_tbl_size); BM_VPU_LOG("fbc_c_tbl: pa = 0x%lx, size = %d", - bm_mem_get_device_addr(*(encoder->buffer_fbc_c_tbl)), + bmvpu_enc_dma_buffer_get_physical_address(encoder->buffer_fbc_c_tbl), fbc_c_tbl_size); BM_VPU_LOG("sub_sam: pa = 0x%lx, size = %d", - bm_mem_get_device_addr(*(encoder->buffer_sub_sam)), + bmvpu_enc_dma_buffer_get_physical_address(encoder->buffer_sub_sam), sub_sam_size); - encoder->buffer_mv = dmabuffer_mv; - encoder->buffer_fbc_y_tbl = dmabuffer_fbc_y_tbl; - encoder->buffer_fbc_c_tbl = dmabuffer_fbc_c_tbl; - encoder->buffer_sub_sam = dmabuffer_sub_sam; + encoder->buffer_mv = (BmVpuEncDMABuffer*)dmabuffer_mv; + encoder->buffer_fbc_y_tbl = (BmVpuEncDMABuffer*)dmabuffer_fbc_y_tbl; + encoder->buffer_fbc_c_tbl = (BmVpuEncDMABuffer*)dmabuffer_fbc_c_tbl; + encoder->buffer_sub_sam = (BmVpuEncDMABuffer*)dmabuffer_sub_sam; - handle->vbMV.pa = bm_mem_get_device_addr(*(encoder->buffer_mv)); - handle->vbFbcYTbl.pa = bm_mem_get_device_addr(*(encoder->buffer_fbc_y_tbl)); - handle->vbFbcCTbl.pa = bm_mem_get_device_addr(*(encoder->buffer_fbc_c_tbl)); - handle->vbSubSamBuf.pa = bm_mem_get_device_addr(*(encoder->buffer_sub_sam)); + handle->vbMV.pa = bmvpu_enc_dma_buffer_get_physical_address((BmVpuEncDMABuffer*)encoder->buffer_mv); + handle->vbFbcYTbl.pa = bmvpu_enc_dma_buffer_get_physical_address((BmVpuEncDMABuffer*)encoder->buffer_fbc_y_tbl); + handle->vbFbcCTbl.pa = bmvpu_enc_dma_buffer_get_physical_address((BmVpuEncDMABuffer*)encoder->buffer_fbc_c_tbl); + handle->vbSubSamBuf.pa = bmvpu_enc_dma_buffer_get_physical_address((BmVpuEncDMABuffer*)encoder->buffer_sub_sam); enc_ret = vpu_EncRegisterFrameBuffer(encoder->handle, - encoder->internal_framebuffers, + (VpuFrameBuffer*)encoder->internal_framebuffers, num_framebuffers); ret = BM_VPU_ENC_HANDLE_ERROR("could not register framebuffers", enc_ret); if (ret != BM_VPU_ENC_RETURN_CODE_OK) @@ -1122,32 +1250,32 @@ int bmvpu_enc_register_framebuffers(BmVpuEncoder *encoder, cleanup: - handle->vbMV.pa = bm_mem_get_device_addr(*(encoder->buffer_mv)); - handle->vbFbcYTbl.pa = bm_mem_get_device_addr(*(encoder->buffer_fbc_y_tbl)); - handle->vbFbcCTbl.pa = bm_mem_get_device_addr(*(encoder->buffer_fbc_c_tbl)); - handle->vbSubSamBuf.pa = bm_mem_get_device_addr(*(encoder->buffer_sub_sam)); + handle->vbMV.pa = bmvpu_enc_dma_buffer_get_physical_address(encoder->buffer_mv); + handle->vbFbcYTbl.pa = bmvpu_enc_dma_buffer_get_physical_address(encoder->buffer_fbc_y_tbl); + handle->vbFbcCTbl.pa = bmvpu_enc_dma_buffer_get_physical_address(encoder->buffer_fbc_c_tbl); + handle->vbSubSamBuf.pa = bmvpu_enc_dma_buffer_get_physical_address(encoder->buffer_sub_sam); if (encoder->buffer_mv != NULL) { - bm_free_device(bm_handle, *(encoder->buffer_mv)); + bmvpu_enc_free_proc(encoder->video_enc_ctx, encoder->core_idx, encoder->buffer_mv); free(encoder->buffer_mv); } if (encoder->buffer_fbc_y_tbl != NULL) { - bm_free_device(bm_handle, *(encoder->buffer_fbc_y_tbl)); + bmvpu_enc_free_proc(encoder->video_enc_ctx, encoder->core_idx, encoder->buffer_fbc_y_tbl); free(encoder->buffer_fbc_y_tbl); } if (encoder->buffer_fbc_c_tbl != NULL) { - bm_free_device(bm_handle, *(encoder->buffer_fbc_c_tbl)); + bmvpu_enc_free_proc(encoder->video_enc_ctx, encoder->core_idx, encoder->buffer_fbc_c_tbl); free(encoder->buffer_fbc_c_tbl); } if (encoder->buffer_sub_sam != NULL) { - bm_free_device(bm_handle, *(encoder->buffer_sub_sam)); + bmvpu_enc_free_proc(encoder->video_enc_ctx, encoder->core_idx, encoder->buffer_sub_sam); free(encoder->buffer_sub_sam); } @@ -1195,16 +1323,16 @@ int bmvpu_enc_get_initial_info(BmVpuEncoder *encoder, BmVpuEncInitialInfo *info) BM_VPU_DEBUG("min_num_rec_fb=%d", info->min_num_rec_fb); - enc_ret = bmvpu_calc_framebuffer_sizes(BM_LINEAR_FRAME_MAP, encoder->color_format, + enc_ret = bmvpu_calc_framebuffer_sizes(BM_LINEAR_FRAME_MAP, encoder->pix_format, encoder->frame_width, encoder->frame_height, - encoder->cbcr_interleave, &(info->src_fb)); + &(info->src_fb)); if (enc_ret != BM_VPU_ENC_RETURN_CODE_OK) { return BM_VPU_ENC_RETURN_CODE_ERROR; } - enc_ret = bmvpu_calc_framebuffer_sizes(BM_COMPRESSED_FRAME_MAP, encoder->color_format, + enc_ret = bmvpu_calc_framebuffer_sizes(BM_COMPRESSED_FRAME_MAP, encoder->pix_format, encoder->frame_width, encoder->frame_height, - encoder->cbcr_interleave, &(info->rec_fb)); + &(info->rec_fb)); if (enc_ret != BM_VPU_ENC_RETURN_CODE_OK) { return BM_VPU_ENC_RETURN_CODE_ERROR; } @@ -1265,8 +1393,8 @@ int bmvpu_enc_encode(BmVpuEncoder *encoder, enc_param.skipPicture = encoding_params->skip_frame; - enc_param.picStreamBufferAddr = bm_mem_get_device_addr(*(encoder->bs_dmabuffer)); - enc_param.picStreamBufferSize = bm_mem_get_device_size(*(encoder->bs_dmabuffer)); + enc_param.picStreamBufferAddr = bmvpu_enc_dma_buffer_get_physical_address(encoder->bs_dmabuffer); + enc_param.picStreamBufferSize = bmvpu_enc_dma_buffer_get_size(encoder->bs_dmabuffer); /* FW will encode header data implicitly when changing the header syntaxes * If this value is 1, encodeVPS, encodeSPS, and encodePPS are ignored. */ @@ -1284,13 +1412,13 @@ int bmvpu_enc_encode(BmVpuEncoder *encoder, enc_param.codeOption.encodeFiller = 0; /* encode filler data nal unit explicitly */ #endif if (encoding_params->customMapOpt != NULL) { - enc_param.customMapOpt.roiAvgQp = encoding_params->customMapOpt[encoding_params->customMapOptUsedIndex].roiAvgQp; - enc_param.customMapOpt.customRoiMapEnable = encoding_params->customMapOpt[encoding_params->customMapOptUsedIndex].customRoiMapEnable; - enc_param.customMapOpt.customLambdaMapEnable = encoding_params->customMapOpt[encoding_params->customMapOptUsedIndex].customLambdaMapEnable; - enc_param.customMapOpt.customModeMapEnable = encoding_params->customMapOpt[encoding_params->customMapOptUsedIndex].customModeMapEnable; - enc_param.customMapOpt.customCoefDropEnable = encoding_params->customMapOpt[encoding_params->customMapOptUsedIndex].customCoefDropEnable; - enc_param.customMapOpt.customCoefDropEnable = encoding_params->customMapOpt[encoding_params->customMapOptUsedIndex].customCoefDropEnable; - enc_param.customMapOpt.addrCustomMap = encoding_params->customMapOpt[encoding_params->customMapOptUsedIndex].addrCustomMap; + enc_param.customMapOpt.roiAvgQp = encoding_params->customMapOpt->roiAvgQp; + enc_param.customMapOpt.customRoiMapEnable = encoding_params->customMapOpt->customRoiMapEnable; + enc_param.customMapOpt.customLambdaMapEnable = encoding_params->customMapOpt->customLambdaMapEnable; + enc_param.customMapOpt.customModeMapEnable = encoding_params->customMapOpt->customModeMapEnable; + enc_param.customMapOpt.customCoefDropEnable = encoding_params->customMapOpt->customCoefDropEnable; + enc_param.customMapOpt.customCoefDropEnable = encoding_params->customMapOpt->customCoefDropEnable; + enc_param.customMapOpt.addrCustomMap = encoding_params->customMapOpt->addrCustomMap; } /* Copy over information from the raw_frame's framebuffer into the @@ -1300,9 +1428,16 @@ int bmvpu_enc_encode(BmVpuEncoder *encoder, src_fb.mapType = BM_LINEAR_FRAME_MAP; src_fb.format = FB_FMT_420; - src_fb.cbcrInterleave = encoder->cbcr_interleave; + if ((encoder->pix_format == BM_VPU_ENC_PIX_FORMAT_NV12) || + (encoder->pix_format == BM_VPU_ENC_PIX_FORMAT_NV16) || + (encoder->pix_format == BM_VPU_ENC_PIX_FORMAT_NV24)) { + src_fb.cbcrInterleave = BM_VPU_ENC_CHROMA_INTERLEAVE_CBCR; + } else { + src_fb.cbcrInterleave = BM_VPU_ENC_CHROMA_NO_INTERLEAVE; + } src_fb.nv21 = 0; + src_fb.lumaBitDepth = 8; src_fb.chromaBitDepth = 8; @@ -1314,7 +1449,7 @@ int bmvpu_enc_encode(BmVpuEncoder *encoder, /* Get the physical address for the raw_frame that shall be encoded * and the virtual pointer to the output buffer */ - pa = bm_mem_get_device_addr(*(framebuffer->dma_buffer)); + pa = bmvpu_enc_dma_buffer_get_physical_address(framebuffer->dma_buffer); BM_VPU_LOG("source framebuffer: myIndex: 0x%x, Y stride: %u, CbCr stride: %u, pa: 0x%lx", framebuffer->myIndex, framebuffer->y_stride, framebuffer->cbcr_stride, pa); @@ -1324,11 +1459,11 @@ int bmvpu_enc_encode(BmVpuEncoder *encoder, src_fb.width = framebuffer->width; src_fb.height = framebuffer->height; - src_fb.bufY = (bm_pa_t)(pa + framebuffer-> y_offset); - src_fb.bufCb = (bm_pa_t)(pa + framebuffer->cb_offset); - src_fb.bufCr = (bm_pa_t)(pa + framebuffer->cr_offset); + src_fb.bufY = (bmvpu_phys_addr_t)(pa + framebuffer-> y_offset); + src_fb.bufCb = (bmvpu_phys_addr_t)(pa + framebuffer->cb_offset); + src_fb.bufCr = (bmvpu_phys_addr_t)(pa + framebuffer->cr_offset); - src_fb.size = bm_mem_get_device_size(*(framebuffer->dma_buffer)); + src_fb.size = bmvpu_enc_dma_buffer_get_size(framebuffer->dma_buffer); enc_param.srcEndFlag = 0; } @@ -1383,15 +1518,15 @@ int bmvpu_enc_encode(BmVpuEncoder *encoder, * one vpu_EncWaitForInt() call to cover the encoding interval */ timeout = TRUE; BM_VPU_LOG("waiting for encoding completion"); - for (i = 0; i < VPU_MAX_TIMEOUT_COUNTS; i++) + for (i = 0; i < encoder->timeout_count; i++) { - ret = vpu_EncWaitForInt(encoder->handle, VPU_WAIT_TIMEOUT); + ret = vpu_EncWaitForInt(encoder->handle, encoder->timeout); if (ret == VPU_RET_SUCCESS) { timeout = FALSE; break; } - BM_VPU_WARNING("timeout after waiting %d ms for frame completion", VPU_WAIT_TIMEOUT); + BM_VPU_WARNING("timeout after waiting %d ms for frame completion", encoder->timeout); } /* Retrieve information about the result of the encode process. Do so even if @@ -1423,13 +1558,14 @@ int bmvpu_enc_encode(BmVpuEncoder *encoder, encoded_frame->data_size = 0; - encoded_frame->frame_type = convert_frame_type(enc_output_info.picType); + encoded_frame->frame_type = convert_frame_type((int)enc_output_info.picType); encoded_frame->src_idx = enc_output_info.encSrcIdx; encoded_frame->context = enc_output_info.context; encoded_frame->pts = enc_output_info.pts; encoded_frame->dts = enc_output_info.dts; encoded_frame->avg_ctu_qp = enc_output_info.avgCtuQp; + encoded_frame->u64CustomMapPhyAddr = enc_output_info.addrCustomMap; BM_VPU_LOG("output info: picType %d (%s), skipEncoded %d, bitstream buffer %lx size %u", enc_output_info.picType, bmvpu_frame_type_string(encoded_frame->frame_type), @@ -1454,8 +1590,8 @@ int bmvpu_enc_encode(BmVpuEncoder *encoder, encoded_data_size = enc_output_info.bitstreamSize; add_header = (encoder->first_frame || - (encoded_frame->frame_type == BM_VPU_FRAME_TYPE_IDR) || - (encoded_frame->frame_type == BM_VPU_FRAME_TYPE_I)); + (encoded_frame->frame_type == BM_VPU_ENC_FRAME_TYPE_IDR) || + (encoded_frame->frame_type == BM_VPU_ENC_FRAME_TYPE_I)); if (add_header) encoded_data_size += encoder->headers_rbsp_size; @@ -1663,111 +1799,18 @@ bmvpu_enc_handle_error_full(char const *fn, int linenr, char const *funcn, char } -static BmVpuFrameType convert_frame_type(int vpu_pic_type) +static BmVpuEncFrameType convert_frame_type(int vpu_pic_type) { - BmVpuFrameType type = BM_VPU_FRAME_TYPE_UNKNOWN; + BmVpuEncFrameType type = BM_VPU_ENC_FRAME_TYPE_UNKNOWN; switch (vpu_pic_type) { - case 0: type = BM_VPU_FRAME_TYPE_I; break; - case 1: type = BM_VPU_FRAME_TYPE_P; break; - case 2: type = BM_VPU_FRAME_TYPE_B; break; - case 5: type = BM_VPU_FRAME_TYPE_IDR; break; + case 0: type = BM_VPU_ENC_FRAME_TYPE_I; break; + case 1: type = BM_VPU_ENC_FRAME_TYPE_P; break; + case 2: type = BM_VPU_ENC_FRAME_TYPE_B; break; + case 5: type = BM_VPU_ENC_FRAME_TYPE_IDR; break; default: break; } return type; } - -/* - * If a bm_handle_t on this soc already exists, return it directly, - * otherwise return after creat one (using bm_dev_request). - * This function is only be called by bmvpu_enc_load(), the bm_handle_t's reference count +1 - * if this function is called. - */ -static void bmvpu_enc_load_bmlib_handle(int soc_idx){ - if (soc_idx > MAX_SOC_NUM) - { - BM_VPU_ERROR("soc_idx excess MAX_SOC_NUM!\n"); - exit(0); - } - - bm_handle_lock(); - if (g_bm_handle[soc_idx].bm_handle) - { - g_bm_handle[soc_idx].count += 1; - bm_handle_unlock(); - return ; - } - - bm_handle_t handle; - bm_status_t ret = bm_dev_request(&handle, soc_idx); - if (ret != BM_SUCCESS) { - BM_VPU_ERROR("Create Bm Handle Failed\n"); - bm_handle_unlock(); - exit(0); - } - g_bm_handle[soc_idx].count = 1; - g_bm_handle[soc_idx].bm_handle = handle; - bm_handle_unlock(); - return ; -} - - -/* - * If a bm_handle_t on this soc already exists, then the bm_handle_t's reference count -1. - * After that, if bm_handle_t's reference count is 0, free it(bm_dev_free), - * otherwise do nothing. - * This function is only be called by bmvpu_enc_unload(). - */ -static void bmvpu_enc_unload_bmlib_handle(int soc_idx){ - if (soc_idx > MAX_SOC_NUM) - { - BM_VPU_ERROR("soc_idx excess MAX_SOC_NUM!\n"); - exit(0); - } - - if (g_bm_handle[soc_idx].bm_handle) - { - bm_handle_lock(); - if (g_bm_handle[soc_idx].count <= 1) - { - bm_dev_free(g_bm_handle[soc_idx].bm_handle); - g_bm_handle[soc_idx].count = 0; - g_bm_handle[soc_idx].bm_handle = 0; - BM_VPU_DEBUG("Free bm_handle for encode on soc %d \n", soc_idx); - } - else - { - g_bm_handle[soc_idx].count -= 1; - BM_VPU_DEBUG("The bm_handle for encode on soc is used by %d users \n", g_bm_handle[soc_idx].count); - } - bm_handle_unlock(); - } - else - BM_VPU_DEBUG("Bm_handle for encode on soc %d not exist \n", soc_idx); -} - -bm_handle_t bmvpu_enc_get_bmlib_handle(int soc_idx) -{ - bm_handle_t handle = 0; - if (soc_idx > MAX_SOC_NUM) - { - BM_VPU_ERROR("soc_idx excess MAX_SOC_NUM!\n"); - exit(0); - } - - bm_handle_lock(); - if (g_bm_handle[soc_idx].bm_handle) - { - handle = g_bm_handle[soc_idx].bm_handle; - bm_handle_unlock(); - return handle; - } - else - { - bm_handle_unlock(); - BM_VPU_ERROR("There is not bmlib_handle on soc %d, This function should be called after bmvpu_enc_load()! \n"); - return handle; - } -} diff --git a/bmvid/video/encoder/bm_enc_api/src/log.c b/bmvid/video/encoder/bm_enc_api/src/log.c index 8c44eb5..53faf63 100644 --- a/bmvid/video/encoder/bm_enc_api/src/log.c +++ b/bmvid/video/encoder/bm_enc_api/src/log.c @@ -12,12 +12,12 @@ #include #include -#include "bmvpuapi.h" -#include "bmvpuapi_internal.h" +#include "bm_vpuenc_interface.h" +#include "bm_vpuenc_internal.h" #include "bmvpu_logging.h" -static void default_logging_fn(BmVpuLogLevel level, char const *file, +static void default_logging_fn(BmVpuEncLogLevel level, char const *file, int const line, char const *fn, const char *format, ...) { @@ -28,21 +28,21 @@ static void default_logging_fn(BmVpuLogLevel level, char const *file, BMVPUAPI_UNUSED_PARAM(format); } -BmVpuLogLevel bmvpu_cur_log_level_threshold = BM_VPU_LOG_LEVEL_ERROR; -BmVpuLoggingFunc bmvpu_cur_logging_fn = default_logging_fn; +BmVpuEncLogLevel bmvpu_cur_log_level_threshold = BMVPU_ENC_LOG_LEVEL_ERROR; +BmVpuEncLoggingFunc bmvpu_cur_logging_fn = default_logging_fn; -void bmvpu_set_logging_function(BmVpuLoggingFunc logging_fn) +void bmvpu_enc_set_logging_function(BmVpuEncLoggingFunc logging_fn) { bmvpu_cur_logging_fn = (logging_fn != NULL) ? logging_fn : default_logging_fn; } -void bmvpu_set_logging_threshold(BmVpuLogLevel threshold) +void bmvpu_enc_set_logging_threshold(BmVpuEncLogLevel threshold) { bmvpu_cur_log_level_threshold = threshold; vpu_set_logging_threshold(threshold); } -BmVpuLogLevel bmvpu_get_logging_threshold() +BmVpuEncLogLevel bmvpu_enc_get_logging_threshold() { return bmvpu_cur_log_level_threshold; } diff --git a/bmvid/video/encoder/bm_enc_api/src/misc.c b/bmvid/video/encoder/bm_enc_api/src/misc.c index 985c333..70da191 100644 --- a/bmvid/video/encoder/bm_enc_api/src/misc.c +++ b/bmvid/video/encoder/bm_enc_api/src/misc.c @@ -16,15 +16,15 @@ #include "bmvpu.h" -#include "bmvpuapi.h" -#include "bmvpuapi_internal.h" +#include "bm_vpuenc_interface.h" +#include "bm_vpuenc_internal.h" - -int bmvpu_calc_framebuffer_sizes(int mapType, BmVpuColorFormat color_format, +int bmvpu_calc_framebuffer_sizes(int mapType, BmVpuEncPixFormat pix_format, int frame_width, int frame_height, - int chroma_interleave, BmVpuFbInfo *info) + BmVpuFbInfo *info) { int fb_fmt = FB_FMT_420; + BmVpuEncChromaFormat chroma_interleave = BM_VPU_ENC_CHROMA_NO_INTERLEAVE; if((info == NULL) || (frame_width <= 0) || (frame_height <= 0)){ BM_VPU_ERROR("bmvpu_calc_framebuffer_sizes params err: info(0X%x), frame_width(%d), frame_height(%d).",info, frame_width, frame_height); return -1; @@ -35,15 +35,35 @@ int bmvpu_calc_framebuffer_sizes(int mapType, BmVpuColorFormat color_format, info->width = BM_VPU_ALIGN_VAL_TO(frame_width, FRAME_ALIGN); info->height = BM_VPU_ALIGN_VAL_TO(frame_height, FRAME_ALIGN); - switch (color_format) + switch (pix_format) { - case BM_VPU_COLOR_FORMAT_YUV420: fb_fmt = FB_FMT_420; break; - case BM_VPU_COLOR_FORMAT_YUV422: fb_fmt = FB_FMT_422; break; - case BM_VPU_COLOR_FORMAT_YUV444: fb_fmt = FB_FMT_444; break; - case BM_VPU_COLOR_FORMAT_YUV400: fb_fmt = FB_FMT_400; break; + case BM_VPU_ENC_PIX_FORMAT_YUV420P: + fb_fmt = FB_FMT_420; + break; + case BM_VPU_ENC_PIX_FORMAT_YUV422P: + fb_fmt = FB_FMT_422; + break; + case BM_VPU_ENC_PIX_FORMAT_YUV444P: + fb_fmt = FB_FMT_444; + break; + case BM_VPU_ENC_PIX_FORMAT_YUV400: + fb_fmt = FB_FMT_400; + break; + case BM_VPU_ENC_PIX_FORMAT_NV12: + fb_fmt = FB_FMT_420; + chroma_interleave = BM_VPU_ENC_CHROMA_INTERLEAVE_CBCR; + break; + case BM_VPU_ENC_PIX_FORMAT_NV16: + fb_fmt = FB_FMT_422; + chroma_interleave = BM_VPU_ENC_CHROMA_INTERLEAVE_CBCR; + break; + case BM_VPU_ENC_PIX_FORMAT_NV24: + fb_fmt = FB_FMT_444; + chroma_interleave = BM_VPU_ENC_CHROMA_INTERLEAVE_CBCR; + break; default: { - BM_VPU_ERROR("bmvpu_calc_framebuffer_sizes color_format(%d) err.", color_format); + BM_VPU_ERROR("bmvpu_calc_framebuffer_sizes pix_format(%s) err.", bmvpu_pix_format_string(pix_format)); return -1; } } @@ -61,20 +81,25 @@ int bmvpu_calc_framebuffer_sizes(int mapType, BmVpuColorFormat color_format, fb_fmt, chroma_interleave); /* TODO */ - switch (color_format) + switch (pix_format) { - case BM_VPU_COLOR_FORMAT_YUV420: info->c_stride = info->y_stride / 2; break; - case BM_VPU_COLOR_FORMAT_YUV422: info->c_stride = info->y_stride / 2; break; - case BM_VPU_COLOR_FORMAT_YUV444: info->c_stride = info->y_stride; break; - case BM_VPU_COLOR_FORMAT_YUV400: info->c_stride = 0; break; + case BM_VPU_ENC_PIX_FORMAT_YUV420P: info->c_stride = info->y_stride / 2; break; + case BM_VPU_ENC_PIX_FORMAT_YUV422P: info->c_stride = info->y_stride / 2; break; + case BM_VPU_ENC_PIX_FORMAT_YUV444P: info->c_stride = info->y_stride; break; + case BM_VPU_ENC_PIX_FORMAT_YUV400: info->c_stride = 0; break; + case BM_VPU_ENC_PIX_FORMAT_NV12: info->c_stride = info->y_stride / 2; break; + case BM_VPU_ENC_PIX_FORMAT_NV16: info->c_stride = info->y_stride / 2; break; + case BM_VPU_ENC_PIX_FORMAT_NV24: info->c_stride = info->y_stride; break; default: { - BM_VPU_ERROR("bmvpu_calc_framebuffer_sizes color_format(%d) err.", color_format); + BM_VPU_ERROR("bmvpu_calc_framebuffer_sizes pix_format(%s) err.", bmvpu_pix_format_string(pix_format)); return -1; } } - if (chroma_interleave) + if ((pix_format == BM_VPU_ENC_PIX_FORMAT_NV12) || \ + (pix_format == BM_VPU_ENC_PIX_FORMAT_NV16) || \ + (pix_format == BM_VPU_ENC_PIX_FORMAT_NV24)) info->c_stride *= 2; return 0; } @@ -82,7 +107,7 @@ int bmvpu_calc_framebuffer_sizes(int mapType, BmVpuColorFormat color_format, int bmvpu_fill_framebuffer_params(BmVpuFramebuffer *fb, BmVpuFbInfo *info, - bm_device_mem_t *fb_dma_buffer, + BmVpuEncDMABuffer *fb_dma_buffer, int fb_id, void* context) { if((fb == NULL) || (info == NULL)){ @@ -108,27 +133,149 @@ int bmvpu_fill_framebuffer_params(BmVpuFramebuffer *fb, return 0; } -char const *bmvpu_color_format_string(BmVpuColorFormat color_format) +/** + * Upload data from HOST to a VPU core + * + * return value: + * -1, failed + * 0, done + */ +int bmvpu_enc_upload_data(int vpu_core_idx, + const uint8_t* host_va, int host_stride, + uint64_t vpu_pa, int vpu_stride, + int width, int height) { - switch (color_format) + int size = vpu_stride*height; + int ret = 0; + + if (vpu_stride != host_stride) + { + const uint8_t *s0 = host_va; + uint8_t *buffer, *s1; + int i; + + buffer = calloc(size, sizeof(uint8_t)); + if (buffer == NULL) + { + BM_VPU_ERROR("calloc failed!"); + return -1; + } + + s1 = buffer; + for (i=0; i"; } } -char const *bmvpu_frame_type_string(BmVpuFrameType frame_type) +char const *bmvpu_frame_type_string(BmVpuEncFrameType frame_type) { switch (frame_type) { - case BM_VPU_FRAME_TYPE_I: return "I"; - case BM_VPU_FRAME_TYPE_P: return "P"; - case BM_VPU_FRAME_TYPE_B: return "B"; - case BM_VPU_FRAME_TYPE_IDR: return "IDR"; + case BM_VPU_ENC_FRAME_TYPE_I: return "I"; + case BM_VPU_ENC_FRAME_TYPE_P: return "P"; + case BM_VPU_ENC_FRAME_TYPE_B: return "B"; + case BM_VPU_ENC_FRAME_TYPE_IDR: return "IDR"; default: return ""; } } @@ -209,13 +356,13 @@ int bmvpu_enc_param_parse(BmVpuEncOpenParams *p, const char *name, const char *v * 2 : Boost mode (normal encoding speed, normal picture quality), * 3 : Fast mode (high encoding speed, low picture quality) */ if (!strcmp(value, "fast") || !strcmp(value, "0")) - p->enc_mode = 3; + p->enc_mode = BM_VPU_ENC_FAST_MODE; else if (!strcmp(value, "medium") || !strcmp(value, "1")) - p->enc_mode = 2; + p->enc_mode = BM_VPU_ENC_BOOST_MODE; else if (!strcmp(value, "slow") || !strcmp(value, "2")) - p->enc_mode = 1; + p->enc_mode = BM_VPU_ENC_RECOMMENDED_MODE; else { - p->enc_mode = 2; /* TODO change to custom after lots of cfg parameters are added. */ + p->enc_mode = BM_VPU_ENC_BOOST_MODE; /* TODO change to custom after lots of cfg parameters are added. */ BM_VPU_WARNING("Invalid preset:%s. Use slow encoding preset instead.", value); } } @@ -286,192 +433,166 @@ int bmvpu_enc_param_parse(BmVpuEncOpenParams *p, const char *name, const char *v return b_error ? -1 : 0; } -int bmvpu_malloc_device_byte_heap(bm_handle_t bm_handle, bm_device_mem_t *pmem, unsigned int size, int heap_id_mask, int high_bit_first) +/************************************************************************************ + * encoder device memory management + *************************************************************************************/ +/** + * Allocate device memory + * + * return value: + * -1, failed + * 0, done + */ +int bmvpu_enc_dma_buffer_allocate(int vpu_core_idx, BmVpuEncDMABuffer *buf, unsigned int size) { int ret = 0; - int i = 0; - int heap_num = 0; - ret = bm_get_gmem_total_heap_num(bm_handle, &heap_num); - if (ret != 0) - { - BM_VPU_ERROR("bmvpu_malloc_device_byte_heap failed!\n"); + + ret = vpu_EncAllocateDMABuffer(vpu_core_idx, (BmVpuDMABuffer *)buf, size); + if (ret != 0) { + BM_VPU_ERROR("vpu_EncAllocateDMABuffer failed!"); return -1; } - int available_heap_mask = 0; - for (i=0; i=0; i--) - { - if ((enable_heap_mask & (0x1<phys_addr; +} - d0 = buffer; - d1 = host_va; - for (i=0; isize; +} - free(buffer); - } - else - { - bm_device_mem_t vpu_mem = bm_mem_from_device(vpu_pa, size); - ret = bm_memcpy_d2s_partial(bmvpu_enc_get_bmlib_handle(soc_idx), host_va, vpu_mem, size); - if (ret != 0) - { - BM_VPU_ERROR("vpu_write_memory failed!"); - return -1; - } +int bmvpu_enc_dma_buffer_flush(int vpu_core_idx, BmVpuEncDMABuffer* buf) +{ + int ret; + ret = vpu_EncFlushDecache(vpu_core_idx, (BmVpuDMABuffer*)buf); + if (ret != 0) { + BM_VPU_ERROR("vpu_EncFlushDecache failed"); + return -1; } return 0; -#endif } +int bmvpu_enc_dma_buffer_invalidate(int vpu_core_idx, BmVpuEncDMABuffer* buf) +{ + int ret; + ret = vpu_EncInvalidateDecache(vpu_core_idx, (BmVpuDMABuffer*)buf); + if (ret != 0) { + BM_VPU_ERROR("vpu_EncInvalidateDecache failed"); + return -1; + } + return 0; +} diff --git a/bmvid/video/provider/cnm/CMakeLists.txt b/bmvid/video/provider/cnm/CMakeLists.txt index 9253b51..725527a 100644 --- a/bmvid/video/provider/cnm/CMakeLists.txt +++ b/bmvid/video/provider/cnm/CMakeLists.txt @@ -115,7 +115,6 @@ set(SRCS ${PROJECT_ROOT}/video/provider/cnm/decoder/vdi/windows/vdi_osal.c ${PROJECT_ROOT}/video/provider/cnm/decoder/vdi/mm.c ${PROJECT_ROOT}/video/decoder/bm_dec_api/src/bm_video_interface.c - ${PROJECT_ROOT}/video/decoder/bm_dec_api/src/bm_video_syscxt.c ) add_library(libbmvideo SHARED ${SRCS}) @@ -141,7 +140,7 @@ install(TARGETS libbmvideo libbmvideo-static DESTINATION lib) if(WIN32) file( - COPY ${PROJECT_ROOT}/video/decoder/bm_dec_api/inc/bm_video_interface.h + COPY ${PROJECT_ROOT}/video/decoder/bm_dec_api/inc/bm_vpudec_interface.h DESTINATION ${CMAKE_INSTALL_PREFIX}/include ) -endif() \ No newline at end of file +endif() diff --git a/bmvid/video/provider/cnm/Makefile b/bmvid/video/provider/cnm/Makefile index 50f9f85..b3c6b74 100755 --- a/bmvid/video/provider/cnm/Makefile +++ b/bmvid/video/provider/cnm/Makefile @@ -3,7 +3,7 @@ # Project: C&M Video decoder sample # # ---------------------------------------------------------------------- -.PHONY: CREATE_DIR +.PHONY: CREATE_DIR COPY_HEADERS # ---------------------------------------------------------------------- #configurable parameter @@ -57,7 +57,6 @@ sub_type := WAVE512_FPGA # default asic subtype endif endif -$(shell cp $(BMVID_TOP_DIR)/video/provider/cnm/sample_v2/component_list_all.h $(BMVID_TOP_DIR)/video/provider/cnm/sample_v2/component/component_list.h) ifeq ($(OS),linux) ifeq ($(PRODUCTFORM),pcie) BUILD_CONFIGURATION = NativeLinux @@ -312,28 +311,22 @@ SOURCES_COMMON =main_helper.c vpuhelper.c bits $(VDI_C) $(VDI_OSAL_C) $(MM_C) ifeq ("$(BUILD_CONFIGURATION)", "EmbeddedLinux") -SOURCES_COMMON += $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_interface.c \ - $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_syscxt.c +SOURCES_COMMON += $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_interface.c endif ifeq ("$(BUILD_CONFIGURATION)", "NativeLinux") -SOURCES_COMMON += $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_interface.c \ - $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_syscxt.c +SOURCES_COMMON += $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_interface.c endif ifeq ("$(BUILD_CONFIGURATION)", "MipsLinux") -SOURCES_COMMON += $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_interface.c \ - $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_syscxt.c +SOURCES_COMMON += $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_interface.c endif ifeq ("$(BUILD_CONFIGURATION)", "LoongLinux") -SOURCES_COMMON += $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_interface.c \ - $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_syscxt.c +SOURCES_COMMON += $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_interface.c endif ifeq ("$(BUILD_CONFIGURATION)", "SunwayLinux") -SOURCES_COMMON += $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_interface.c \ - $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_syscxt.c +SOURCES_COMMON += $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_interface.c endif ifeq ("$(BUILD_CONFIGURATION)", "RiscvLinux") -SOURCES_COMMON += $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_interface.c \ - $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_syscxt.c +SOURCES_COMMON += $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/src/bm_video_interface.c endif @@ -365,9 +358,9 @@ ifneq ($(SO_NAME),) TARGET_SOVERSION=$(DECTEST)$(SO_VERSION) endif -all: $(BUILDLIST) +all: COPY_HEADERS $(BUILDLIST) -test: CREATE_DIR $(TEST) +test: CREATE_DIR COPY_HEADERS $(TEST) $(TEST): $(DECTEST) $(SOURCES_DECTEST) $(CC) -o $@ $(LDFLAGS) -O2 $(SOURCES_DECTEST) $(LDLIBS) $(CFLAGS) @@ -390,13 +383,13 @@ ifneq ($(TARGET_SOVERSION), ) endif endif - @cp $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/inc/bm_video_interface.h $(INSTALL_DIR)/ + @cp $(BMVID_TOP_DIR)/video/decoder/bm_dec_api/inc/bm_vpudec_interface.h $(INSTALL_DIR)/ install-lib: install -d $(DESTDIR)/lib install -d $(DESTDIR)/include ln -sf "$(SLIBNAME)" "$(DESTDIR)/lib/libvideo_bm.so" - install -m 755 $(INSTALL_DIR)/bm_video_interface.h $(DESTDIR)/include + install -m 755 $(INSTALL_DIR)/bm_vpudec_interface.h $(DESTDIR)/include ifeq ($(TARGET_SOVERSION), ) install -m 755 $(DECTEST) "$(DESTDIR)/lib/$(SLIBNAME)" else @@ -423,6 +416,9 @@ CREATE_DIR: -mkdir -p $(OBJDIR) -mkdir -p $(INSTALL_DIR) +COPY_HEADERS: + cp $(BMVID_TOP_DIR)/video/provider/cnm/sample_v2/component_list_all.h $(BMVID_TOP_DIR)/video/provider/cnm/sample_v2/component/component_list.h + ${OBJDIR}/%.o: %.c $(CC) $(LDFLAGS) $(CFLAGS) $(LDLIBS) -c $< -o $@ -MD -MF $(@:.o=.dep) diff --git a/bmvid/video/provider/cnm/Makefile.wave521c b/bmvid/video/provider/cnm/Makefile.wave521c index 97a8351..07b56c5 100644 --- a/bmvid/video/provider/cnm/Makefile.wave521c +++ b/bmvid/video/provider/cnm/Makefile.wave521c @@ -15,7 +15,7 @@ # make -f Makefile.wave521c PRODUCTFORM=pcie # -.PHONY: CREATE_DIR clean all +.PHONY: CREATE_DIR COPY_HEADERS clean all PRODUCT := WAVE521 PRODUCTFORM ?= soc @@ -26,8 +26,6 @@ ION_DIR ?= ./release INSTALL_DIR ?= ./release BMVID_TOP_DIR ?= ../../../ -$(shell cp $(BMVID_TOP_DIR)/video/provider/cnm/sample_v2/component_list_encoder.h $(BMVID_TOP_DIR)/video/provider/cnm/sample_v2/component/component_list.h) - USE_PTHREAD = yes LINT_HOME = etc/lint @@ -169,7 +167,7 @@ OBJECTPATHS=$(addprefix $(OBJDIR)/,$(notdir $(OBJECTNAMES))) LINT_SRC_INCLUDES = -I$(BMVID_TOP_DIR)/video/provider/cnm/sample_v2 -I$(BMVID_TOP_DIR)/video/provider/cnm/sample_v2/component -I$(BMVID_TOP_DIR)/video/provider/cnm/sample_v2/component_decoder -I$(BMVID_TOP_DIR)/video/provider/cnm/sample_v2/component_encoder LINT_SRC_INCLUDES += -I$(BMVID_TOP_DIR)/video/provider/cnm/sample_v2/helper -I$(BMVID_TOP_DIR)/video/driver -all: CREATE_DIR $(OBJECTPATHS) +all: CREATE_DIR COPY_HEADERS $(OBJECTPATHS) $(LINKER) -o $(TARGET) $(LDFLAGS) -Wl,-gc-section -Wl,--start-group $(OBJECTPATHS) $(LDLIBS) -Wl,--end-group -include $(OBJECTPATHS:.o=.dep) @@ -189,6 +187,9 @@ uninstall: CREATE_DIR: -mkdir -p $(OBJDIR) +COPY_HEADERS: + cp $(BMVID_TOP_DIR)/video/provider/cnm/sample_v2/component_list_encoder.h $(BMVID_TOP_DIR)/video/provider/cnm/sample_v2/component/component_list.h + obj/%.o: %.c $(MAKEFILE) $(CC) $(CFLAGS) -Wall -Werror -c $< -o $@ -MD -MF $(@:.o=.dep) diff --git a/bmvid/video/provider/cnm/Wave5xxDecV2.mak b/bmvid/video/provider/cnm/Wave5xxDecV2.mak index 71d352f..f2cfaf7 100644 --- a/bmvid/video/provider/cnm/Wave5xxDecV2.mak +++ b/bmvid/video/provider/cnm/Wave5xxDecV2.mak @@ -3,7 +3,7 @@ # Project: C&M Video decoder sample # # ---------------------------------------------------------------------- -.PHONY: CREATE_DIR +.PHONY: CREATE_DIR COPY_HEADERS PRODUCT := WAVE511 ifneq ($(PRODUCTFORM), pcie) @@ -12,7 +12,6 @@ else BUILD_CONFIGURATION = NativeLinux endif #BUILD_CONFIGURATION = NonOS -$(shell cp sample_v2/component_list_decoder.h sample_v2/component/component_list.h) USE_FFMPEG = no USE_PTHREAD = yes @@ -165,12 +164,12 @@ endif OBJECTNAMES_DECTEST=$(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(SOURCES_DECTEST))) OBJECTPATHS_DECTEST=$(addprefix $(OBJDIR)/,$(notdir $(OBJECTNAMES_DECTEST))) $(OBJECTPATHS_COMMON) -all: $(BUILDLIST) +all: COPY_HEADERS $(BUILDLIST) ifeq ($(USE_RTL_SIM), yes) -DECTEST: CREATE_DIR $(OBJECTPATHS_DECTEST) +DECTEST: CREATE_DIR COPY_HEADERS $(OBJECTPATHS_DECTEST) else -DECTEST: CREATE_DIR $(OBJECTPATHS_DECTEST) +DECTEST: CREATE_DIR COPY_HEADERS $(OBJECTPATHS_DECTEST) $(LINKER) -o $(DECTEST) $(LDFLAGS) -Wl,-gc-section -Wl,--start-group $(OBJECTPATHS_DECTEST) $(LDLIBS) -Wl,--end-group endif @@ -184,6 +183,9 @@ clean: CREATE_DIR: -mkdir -p $(OBJDIR) +COPY_HEADERS: + cp sample_v2/component_list_decoder.h sample_v2/component/component_list.h + obj/%.o: %.c $(MAKEFILE) $(CC) $(CFLAGS) -Wall -Werror -c $< -o $@ -MD -MF $(@:.o=.dep) diff --git a/bmvid/video/provider/cnm/Wave5xxEncV2.mak b/bmvid/video/provider/cnm/Wave5xxEncV2.mak index 265261c..0e36a1c 100644 --- a/bmvid/video/provider/cnm/Wave5xxEncV2.mak +++ b/bmvid/video/provider/cnm/Wave5xxEncV2.mak @@ -3,11 +3,10 @@ # Project: C&M Video encoder sample # # ---------------------------------------------------------------------- -.PHONY: CREATE_DIR clean all +.PHONY: CREATE_DIR COPY_HEADERS clean all PRODUCT := WAVE521C BUILD_CONFIGURATION = EmbeddedLinux -$(shell cp sample_v2/component_list_encoder.h sample_v2/component/component_list.h) USE_FFMPEG = no USE_PTHREAD = yes @@ -169,9 +168,9 @@ LINT_SRC_INCLUDES = -I./sample_v2 -I./sample_v2/component -I./sample_v2/componen LINT_SRC_INCLUDES += -I./sample_v2/helper -I./sample_v2/helper/bitstream -I./sample_v2/helper/comparator -I./sample_v2/helper/misc -I./sample_v2/helper/yuv ifeq ($(USE_RTL_SIM), yes) -all: CREATE_DIR $(OBJECTPATHS) +all: CREATE_DIR COPY_HEADERS $(OBJECTPATHS) else -all: CREATE_DIR $(OBJECTPATHS) +all: CREATE_DIR COPY_HEADERS $(OBJECTPATHS) $(LINKER) -o $(TARGET) $(LDFLAGS) -Wl,-gc-section -Wl,--start-group $(OBJECTPATHS) $(LDLIBS) -Wl,--end-group endif @@ -185,6 +184,9 @@ clean: CREATE_DIR: -mkdir -p $(OBJDIR) +COPY_HEADERS: + cp sample_v2/component_list_encoder.h sample_v2/component/component_list.h + obj/%.o: %.c $(MAKEFILE) $(CC) $(CFLAGS) -Wall -Werror -c $< -o $@ -MD -MF $(@:.o=.dep) diff --git a/bmvid/video/provider/cnm/decoder/vdi/linux/vdi.c b/bmvid/video/provider/cnm/decoder/vdi/linux/vdi.c index f5f90bc..8a62f2d 100755 --- a/bmvid/video/provider/cnm/decoder/vdi/linux/vdi.c +++ b/bmvid/video/provider/cnm/decoder/vdi/linux/vdi.c @@ -49,10 +49,6 @@ # define VPU_DEVICE_NAME "/dev/vpu" #endif -#ifdef BM_PCIE_MODE -#define FAKE_PCIE_VIRT_ADDR 0xDEADBEEFl -#endif - #ifdef TRY_SEM_MUTEX #include typedef sem_t MUTEX_HANDLE; @@ -303,7 +299,7 @@ int vdi_init(u64 core_idx) vdi_set_clock_gate(core_idx, 1); vdi->product_code = vdi_read_register(core_idx, VPU_PRODUCT_CODE_REGISTER); - if (vdi_allocate_common_memory(core_idx) !=BM_SUCCESS) + if (vdi_allocate_common_memory(core_idx) < 0) { VLOG(ERR, "[VDI] fail to get vpu common buffer from driver\n"); goto ERR_VDI_INIT; @@ -436,7 +432,7 @@ int vdi_release(u64 core_idx) vdi->task_num--; vdi_get_kernel_reset(core_idx); - if(vdi->reset_core_flag.reset_core_disable!=0) + if(vdi->reset_core_flag.reset == 1) vdi_resume_kernel_reset(core_idx); vdi_unlock(core_idx); @@ -515,6 +511,32 @@ int vdi_get_common_memory(u64 core_idx, vpu_buffer_t *vb) return 0; } +int vdi_get_init_status(u64 core_idx) +{ + int ret; + int chip_core_idx; + vdi_info_t *vdi; + + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + + vdi = &s_vdi_info[core_idx]; + + chip_core_idx = core_idx%MAX_NUM_VPU_CORE_CHIP; + + if((ret = ioctl(vdi->vpu_fd, VDI_IOCTL_GET_FIRMWARE_STATUS, &chip_core_idx)) < 0) + { + VLOG(ERR, "[VDI] fail to vdi_get_init_status\n"); + return -1; + } + + if(ret == 100) { + return 0; + } + return 1; +} + + int vdi_allocate_common_memory(u64 core_idx) { vdi_info_t *vdi = &s_vdi_info[core_idx]; @@ -1498,6 +1520,55 @@ int vdi_read_memory(u64 core_idx, u64 src_addr, unsigned char *dst_data, int len return len; } +int vdi_mmap_memory(u64 core_idx, vpu_buffer_t *vb) +{ + vdi_info_t *vdi; +#if defined(BM_PCIE_MODE) + int chip_core_idx = core_idx%MAX_NUM_VPU_CORE_CHIP; +#endif + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + + vdi = &s_vdi_info[core_idx]; + if(!vdi || vdi->vpu_fd==-1 || vdi->vpu_fd == 0x00) + return -1; + + vb->virt_addr = (unsigned long)mmap(NULL, vb->size, PROT_READ | PROT_WRITE, + MAP_SHARED, vdi->vpu_fd, vb->phys_addr); + if ((void *)vb->virt_addr == MAP_FAILED) + { + vb->virt_addr = 0; + return -1; + } + + return 0; +} + +int vdi_unmap_memory(u64 core_idx, vpu_buffer_t *vb) +{ + vdi_info_t *vdi; +#if defined(BM_PCIE_MODE) + int chip_core_idx = core_idx%MAX_NUM_VPU_CORE_CHIP; +#endif + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + + vdi = &s_vdi_info[core_idx]; + if(!vdi || vdi->vpu_fd==-1 || vdi->vpu_fd == 0x00) + return -1; + + if(vb->virt_addr != 0 && vb->virt_addr != FAKE_PCIE_VIRT_ADDR) + { + if (munmap((void *)vb->virt_addr, vb->size) != 0) + { + VLOG(ERR, "[VDI] fail to vdi_free_dma_memory virtial address = 0x%lx\n", vb->virt_addr); + } + return -1; + } + + return 0; +} + int vdi_allocate_dma_memory(u64 core_idx, vpu_buffer_t *vb) { vdi_info_t *vdi; @@ -1525,28 +1596,7 @@ int vdi_allocate_dma_memory(u64 core_idx, vpu_buffer_t *vb) #if !defined(BM_PCIE_MODE) vdb.enable_cache = vb->enable_cache; #endif -#ifdef BM_ION_MEM - { - // int flag = BM_ION_FLAG_WRITECOMBINE; - // if(vb->enable_cache == 1) - // flag = BM_ION_FLAG_CACHED; - // bm_ion_buffer_t* p_ion_buf = bm_ion_allocate_buffer(0, vdb.size, (BM_ION_FLAG_VPU << 4) | flag); - // if (p_ion_buf == NULL) - // { - // VLOG(ERR, "[VDI] fail to vdi_allocate_dma_memory size=%d\n", vdb.size); - // return -1; - // } - // if (bm_ion_map_buffer(p_ion_buf, BM_ION_MAPPING_FLAG_READ | BM_ION_MAPPING_FLAG_WRITE) != 0) - // { - // VLOG(ERR, "ion map failed.\n"); - // return -1; - // } - - // vdb.base = (unsigned long)p_ion_buf; - // vdb.phys_addr = p_ion_buf->paddr; - // vdb.virt_addr = (unsigned long)p_ion_buf->vaddr; - } -#else + if (ioctl(vdi->vpu_fd, VDI_IOCTL_ALLOCATE_PHYSICAL_MEMORY, &vdb) < 0) { VLOG(ERR, "[VDI] fail to vdi_allocate_dma_memory size=%d\n", vdb.size); @@ -1565,7 +1615,6 @@ int vdi_allocate_dma_memory(u64 core_idx, vpu_buffer_t *vb) #else vdb.virt_addr = FAKE_PCIE_VIRT_ADDR; vdi_clear_memory(core_idx, vdb.phys_addr, vdb.size, VDI_SYSTEM_ENDIAN); -#endif #endif vb->base = vdb.base; @@ -1593,6 +1642,7 @@ void vdi_free_dma_memory(u64 core_idx, vpu_buffer_t *vb) { vdi_info_t *vdi; int i; + int ret; vpudrv_buffer_t vdb; if (core_idx >= MAX_NUM_VPU_CORE) @@ -1615,6 +1665,7 @@ void vdi_free_dma_memory(u64 core_idx, vpu_buffer_t *vb) vdi->vpu_buffer_pool[i].inuse = 0; vdi->vpu_buffer_pool_count--; vdb = vdi->vpu_buffer_pool[i].vdb; + osal_memset(&vdi->vpu_buffer_pool[i].vdb, 0x00, sizeof(vpudrv_buffer_t)); break; } } @@ -1625,6 +1676,23 @@ void vdi_free_dma_memory(u64 core_idx, vpu_buffer_t *vb) return ; } + ret = ioctl(vdi->vpu_fd, VDI_IOCTL_FREE_PHYSICALMEMORY, &vdb); + if(ret != 0) + { + VLOG(ERR, "[VDI] free dma memory failed. addr = 0x%lx\n", vdb.virt_addr); + } + + if(vdb.virt_addr != 0 && vdb.virt_addr != FAKE_PCIE_VIRT_ADDR) + { + if (munmap((void *)vdb.virt_addr, vdb.size) != 0) + { + VLOG(ERR, "[VDI] fail to vdi_free_dma_memory virtial address = 0x%lx\n", vdb.virt_addr); + } + } + + VLOG(INFO, "[VDI] vdi_free_dma_memory, physaddr=0x%lx, virtaddr=0x%lx~0x%lx, size=%d\n", + vdb.phys_addr, vdb.virt_addr, vdb.virt_addr + vdb.size, vdb.size); + osal_memset(vb, 0, sizeof(vpu_buffer_t)); } @@ -2380,22 +2448,17 @@ void vdi_invalidate_memory(u64 core_idx, vpu_buffer_t *vb) return; #if defined(BM_ION_MEM) - // if (!vb->size) { - // VLOG(ERR, "address 0x%08x is not mapped address!!!\n", (int)vb->phys_addr); - // } - // else - // { - // bm_ion_buffer_t* p_ion_buf = (bm_ion_buffer_t *)vb->base; - // if(p_ion_buf != NULL) - // { - // if(vb->enable_cache == 1) - // bm_ion_invalidate_buffer(p_ion_buf); - // } - // else - // { - // VLOG(ERR, "invalid ion buffer addr!\n"); - // } - // } + if (!vb->size) { + VLOG(ERR, "address 0x%08x is not mapped address!!!\n", (int)vb->phys_addr); + } + else + { + if(vb->enable_cache == 1) { + if(msync((void *)vb->virt_addr, vb->size, MS_INVALIDATE) == -1) { + VLOG(ERR, "[VDI] fail to invalidate memory. addr=0x%lx size=%d\n", vb->virt_addr, vb->size); + } + } + } #elif !defined(BM_PCIE_MODE) vdb.phys_addr = vb->phys_addr; vdb.size = vb->size; @@ -2597,12 +2660,14 @@ int vdi_resume_kernel_reset(u64 coreIdx){ #if defined(BM_PCIE_MODE) chip_core_idx = coreIdx%MAX_NUM_VPU_CORE_CHIP; #endif - vdi->reset_core_flag.reset_core_disable = 0; - vdi->reset_core_flag.core_idx = chip_core_idx; - ret = ioctl(vdi->vpu_fd, VDI_IOCTL_CTRL_KERNEL_RESET, &(vdi->reset_core_flag)); - if (ret < 0) { - VLOG(ERR, "decoder fail to resume vpu_reset with ioctl()\n"); - return -1; + if(vdi->reset_core_flag.pid == vdi->pid){ + vdi->reset_core_flag.reset = 0; + vdi->reset_core_flag.core_idx = chip_core_idx; + ret = ioctl(vdi->vpu_fd, VDI_IOCTL_CTRL_KERNEL_RESET, &(vdi->reset_core_flag)); + if (ret < 0) { + VLOG(ERR, "decoder fail to resume vpu_reset with ioctl()\n"); + return -1; + } } return 0; @@ -2651,7 +2716,8 @@ int vdi_disable_kernel_reset(u64 coreIdx){ #if defined(BM_PCIE_MODE) chip_core_idx = coreIdx%MAX_NUM_VPU_CORE_CHIP; #endif - vdi->reset_core_flag.reset_core_disable = vdi->pid; + vdi->reset_core_flag.pid = vdi->pid; + vdi->reset_core_flag.reset = 1; vdi->reset_core_flag.core_idx = chip_core_idx; ret = ioctl(vdi->vpu_fd, VDI_IOCTL_CTRL_KERNEL_RESET, &(vdi->reset_core_flag)); if (ret < 0) { diff --git a/bmvid/video/provider/cnm/decoder/vdi/linux/vdi_osal.c b/bmvid/video/provider/cnm/decoder/vdi/linux/vdi_osal.c index 5f528b3..3349a93 100644 --- a/bmvid/video/provider/cnm/decoder/vdi/linux/vdi_osal.c +++ b/bmvid/video/provider/cnm/decoder/vdi/linux/vdi_osal.c @@ -708,18 +708,19 @@ BOOL osal_mutex_unlock(osal_mutex_t mutex) Uint64 osal_gettime(void) { -/* struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); return (tp.tv_sec*1000 + tp.tv_nsec/1000000); - */ + + /* struct timeval tv; tv.tv_sec = 0; tv.tv_usec = 0; gettimeofday(&tv, NULL); return tv.tv_sec*1000 + tv.tv_usec/1000; + */ } diff --git a/bmvid/video/provider/cnm/decoder/vdi/vdi.h b/bmvid/video/provider/cnm/decoder/vdi/vdi.h index ebe9823..1f13b27 100755 --- a/bmvid/video/provider/cnm/decoder/vdi/vdi.h +++ b/bmvid/video/provider/cnm/decoder/vdi/vdi.h @@ -48,6 +48,8 @@ #include "windatatype.h" #endif +#define FAKE_PCIE_VIRT_ADDR 0xDEADBEEFl + /************************************************************************/ /* COMMON REGISTERS */ /************************************************************************/ @@ -190,11 +192,14 @@ extern "C" { vpu_instance_pool_t * vdi_get_instance_pool(u64 core_idx); int vdi_allocate_common_memory(u64 core_idx); int vdi_get_common_memory(u64 core_idx, vpu_buffer_t *vb); + int vdi_get_init_status(u64 core_idx); int vdi_allocate_dma_memory(u64 core_idx, vpu_buffer_t *vb); int vdi_attach_dma_memory(u64 core_idx, vpu_buffer_t *vb); void vdi_free_dma_memory(u64 core_idx, vpu_buffer_t *vb); int vdi_get_sram_memory(u64 core_idx, vpu_buffer_t *vb); int vdi_dettach_dma_memory(u64 core_idx, vpu_buffer_t *vb); + int vdi_mmap_memory(u64 core_idx, vpu_buffer_t *vb); + int vdi_unmap_memory(u64 core_idx, vpu_buffer_t *vb); #ifdef SUPPORT_MULTI_INST_INTR int vdi_wait_interrupt(u64 coreIdx, unsigned int instIdx, int timeout); @@ -225,6 +230,7 @@ extern "C" { int vdi_done_change_clock(u64 core_idx); int vdi_get_instance_num(u64 core_idx); + int vdi_get_init_status(u64 core_idx); void vdi_write_register(u64 core_idx, u64 addr, unsigned int data); unsigned int vdi_read_register(u64 core_idx, u64 addr); diff --git a/bmvid/video/provider/cnm/decoder/vdi/windows/vdi.c b/bmvid/video/provider/cnm/decoder/vdi/windows/vdi.c index 5216712..3601f7c 100644 --- a/bmvid/video/provider/cnm/decoder/vdi/windows/vdi.c +++ b/bmvid/video/provider/cnm/decoder/vdi/windows/vdi.c @@ -14,7 +14,7 @@ #include /* SIGIO */ #include /* fcntl */ #include -#include +#include #include #include #include @@ -101,7 +101,7 @@ typedef pthread_mutex_t MUTEX_HANDLE; const GUID* g_guid_interface[] = { &GUID_DEVINTERFACE_bm_sophon0}; - + typedef struct vpudrv_buffer_pool_t { vpudrv_buffer_t vdb; @@ -192,9 +192,9 @@ static BOOL getDriverContext(vdi_info_t* vdi, uint32_t board_idx) { DIGCF_DEVICEINTERFACE | DIGCF_PRESENT); // Initialize the SP_DEVICE_INTERFACE_DATA Structure. DeviceInterfaceData.cbSize = sizeof(SP_DEVICE_INTERFACE_DATA); - + if (INVALID_HANDLE_VALUE == vdi->hDevInfo) { - printf("No sophon devices interface class are in the system.\n"); + printf("No devices interface class are in the system.\n"); return FALSE; } @@ -210,7 +210,7 @@ static BOOL getDriverContext(vdi_info_t* vdi, uint32_t board_idx) { (LPGUID)g_guid_interface[0], vdi->dev_id, &DeviceInterfaceData)) { - printf("No sophon devices SetupDiEnumDeviceInterfaces for dev%d.\n", vdi->dev_id); + printf("No devices SetupDiEnumDeviceInterfaces for dev%d.\n", vdi->dev_id); goto Error; } @@ -416,10 +416,10 @@ int vdi_init(u64 core_idx) #else vdi->vdb_register.size = chip_core_idx; - + #endif - if(winDeviceIoControl(vdi->hDevice, VDI_IOCTL_GET_REGISTER_INFO, &vdi->vdb_register) == -1){ + if(winDeviceIoControl(vdi->hDevice, VDI_IOCTL_GET_REGISTER_INFO, &vdi->vdb_register) == -1){ goto ERR_VDI_INIT; } @@ -572,8 +572,8 @@ int vdi_release(u64 core_idx) { memset(&vdi->vpu_common_memory, 0x00, sizeof(vpu_buffer_t));//release by drive } - - + + if (vdi->vpu_inst_memory.virt_addr) { memset(&vdi->vpu_inst_memory, 0x00, sizeof(vpu_buffer_t)); //release by drive } @@ -630,10 +630,36 @@ int vdi_release(u64 core_idx) memset(vdi, 0x00, sizeof(vdi_info_t)); vdi->hDevice = INVALID_HANDLE_VALUE; - + return 0; } +int vdi_get_init_status(u64 core_idx) +{ + int ret; + vdi_info_t *vdi; +#if defined(BM_PCIE_MODE) + int chip_core_idx = core_idx%MAX_NUM_VPU_CORE_CHIP; +#endif + + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + vdi = &s_vdi_info[core_idx]; + + if(!vdi || vdi->hDevice == INVALID_HANDLE_VALUE || !(vdi->hDevice)) + return -1; +#ifdef BM_PCIE_MODE + if (ret = winDeviceIoControl(vdi->hDevice, VDI_IOCTL_GET_FIRMWARE_STATUS, &chip_core_idx) < 0) { + return NULL; + } +#endif + + if(ret == 100) { + return 0; + } + return 1; +} + int vdi_get_common_memory(u64 core_idx, vpu_buffer_t *vb) { vdi_info_t *vdi; @@ -842,7 +868,7 @@ int vdi_get_instance_num(u64 core_idx) if (core_idx >= MAX_NUM_VPU_CORE) return inst_num; - vdi = &s_vdi_info[core_idx]; + vdi = &s_vdi_info[core_idx]; if(!vdi || vdi->hDevice == INVALID_HANDLE_VALUE || !(vdi->hDevice)) { #ifndef BM_PCIE_MODE @@ -855,7 +881,7 @@ int vdi_get_instance_num(u64 core_idx) VLOG(TRACE,"[VDI] Get instance num. board %d, core %d, fd %d\n", board_idx, chip_core_idx, vdi->hDevice); #endif - + vpudrv_inst_info_t inst_info = {0}; #if defined(BM_PCIE_MODE) && defined(CHIP_BM1684) @@ -873,7 +899,7 @@ int vdi_get_instance_num(u64 core_idx) } else { - inst_num = vdi->pvip->vpu_instance_num; + inst_num = vdi->pvip->vpu_instance_num; } return inst_num; } @@ -904,8 +930,8 @@ int vdi_hw_reset(u64 core_idx) // DEVICE_ADDR_SW_RESET winDeviceIoControl(vdi->hDevice, VDI_IOCTL_RESET, &chip_core_idx); closeDrive(vdi); return 0; - }else - return winDeviceIoControl(vdi->hDevice, VDI_IOCTL_RESET, &chip_core_idx); + }else + return winDeviceIoControl(vdi->hDevice, VDI_IOCTL_RESET, &chip_core_idx); } int vdi_crst_set_status(int core_idx, int status) @@ -972,7 +998,7 @@ int vdi_crst_set_enable(int core_idx, int en) #endif winDeviceIoControl(vdi->hDevice, VDI_IOCTL_SYSCXT_SET_EN, &syscxt_info); - + closeDrive(vdi); return 0; } @@ -1553,6 +1579,55 @@ int vdi_read_memory(u64 core_idx, u64 src_addr, unsigned char *dst_data, int len return len; } +int vdi_mmap_memory(u64 core_idx, vpu_buffer_t *vb) +{ +// vdi_info_t *vdi; +// #if defined(BM_PCIE_MODE) +// int chip_core_idx = core_idx%MAX_NUM_VPU_CORE_CHIP; +// #endif +// if (core_idx >= MAX_NUM_VPU_CORE) +// return -1; + +// vdi = &s_vdi_info[core_idx]; +// if(!vdi || vdi->vpu_fd==-1 || vdi->vpu_fd == 0x00) +// return -1; + +// vb->virt_addr = (unsigned long)mmap(NULL, vb->size, PROT_READ | PROT_WRITE, +// MAP_SHARED, vdi->vpu_fd, vb->phys_addr); +// if ((void *)vb->virt_addr == MAP_FAILED) +// { +// vb->virt_addr = 0; +// return -1; +// } + + return 0; +} + +int vdi_unmap_memory(u64 core_idx, vpu_buffer_t *vb) +{ +// vdi_info_t *vdi; +// #if defined(BM_PCIE_MODE) +// int chip_core_idx = core_idx%MAX_NUM_VPU_CORE_CHIP; +// #endif +// if (core_idx >= MAX_NUM_VPU_CORE) +// return -1; + +// vdi = &s_vdi_info[core_idx]; +// if(!vdi || vdi->vpu_fd==-1 || vdi->vpu_fd == 0x00) +// return -1; + +// if(vb->virt_addr != 0 && vb->virt_addr != FAKE_PCIE_VIRT_ADDR) +// { +// if (munmap((void *)vb->virt_addr, vb->size) != 0) +// { +// VLOG(ERR, "[VDI] fail to vdi_free_dma_memory virtial address = 0x%lx\n", vb->virt_addr); +// } +// return -1; +// } + + return 0; +} + int vdi_allocate_dma_memory(u64 core_idx, vpu_buffer_t *vb) { vdi_info_t *vdi; @@ -1722,7 +1797,7 @@ u64 vdi_get_dma_memory_free_size(u64 coreIdx) else { if(winDeviceIoControl(vdi->hDevice, VDI_IOCTL_GET_FREE_MEM_SIZE, &size) == -1){ return 0; - } + } } #endif @@ -1891,8 +1966,8 @@ int vdi_set_clock_gate(u64 core_idx, int enable) } return 0; - - + + } int vdi_get_clock_gate(u64 core_idx) @@ -1987,7 +2062,7 @@ int vdi_wait_vpu_busy(u64 core_idx, int timeout, unsigned int addr_bit_busy_flag Uint32 idx; for (idx=0; idx<50; idx++) { VLOG(ERR, "[VDI] vdi_wait_vpu_busy timeout, PC=0x%x, LR=0x%x\n", vdi_read_register(core_idx, pc), vdi_read_register(core_idx, W5_VCPU_CUR_LR)); - VLOG(ERR, "[VDI] W5_VPU_BUSY_STATUS=0x%x, W5_VPU_HALT_STATUS=0x%x, W5_VPU_VCPU_STATUS=0x%x, W5_VPU_PRESCAN_STATUS=0x%x\n", vdi_read_register(core_idx, W5_VPU_BUSY_STATUS), + VLOG(ERR, "[VDI] W5_VPU_BUSY_STATUS=0x%x, W5_VPU_HALT_STATUS=0x%x, W5_VPU_VCPU_STATUS=0x%x, W5_VPU_PRESCAN_STATUS=0x%x\n", vdi_read_register(core_idx, W5_VPU_BUSY_STATUS), vdi_read_register(core_idx, W5_VPU_HALT_STATUS), vdi_read_register(core_idx, W5_VPU_VCPU_STATUS), vdi_read_register(core_idx, W5_VPU_PRESCAN_STATUS)); { Uint32 vcpu_reg[31]= {0,}; diff --git a/bmvid/video/provider/cnm/decoder/vpuapi/product.c b/bmvid/video/provider/cnm/decoder/vpuapi/product.c index 1beb9ab..36e8774 100755 --- a/bmvid/video/provider/cnm/decoder/vpuapi/product.c +++ b/bmvid/video/provider/cnm/decoder/vpuapi/product.c @@ -806,12 +806,12 @@ RetCode ProductVpuAllocateFramebuffer( CodecInst* inst, FrameBuffer* fbArr, TiledMapType mapType, Int32 num, Int32 stride, Int32 height, FrameBufferFormat format, BOOL cbcrInterleave, BOOL nv21, Int32 endian, - bm_device_mem_t* vb, Int32 gdiIndex, + vpu_buffer_t* vb, Int32 gdiIndex, FramebufferAllocType fbType) { Int32 i; Uint32 coreIdx; - bm_device_mem_t vbFrame; + vpu_buffer_t vbFrame; FrameBufInfo fbInfo; DecInfo* pDecInfo = &inst->CodecInfo->decInfo; EncInfo* pEncInfo = &inst->CodecInfo->encInfo; @@ -821,7 +821,7 @@ RetCode ProductVpuAllocateFramebuffer( ProductId productId = (ProductId)inst->productId; RetCode ret = RETCODE_SUCCESS; - osal_memset((void*)&vbFrame, 0x00, sizeof(bm_device_mem_t)); + osal_memset((void*)&vbFrame, 0x00, sizeof(vpu_buffer_t)); osal_memset((void*)&fbInfo, 0x00, sizeof(FrameBufInfo)); coreIdx = inst->coreIdx; @@ -954,13 +954,13 @@ RetCode ProductVpuAllocateFramebuffer( pDramCfg = (inst->isDecoder == TRUE) ? &pDecInfo->dramCfg : &pEncInfo->dramCfg; pMapCfg = (inst->isDecoder == TRUE) ? &pDecInfo->mapCfg : &pEncInfo->mapCfg; - vbFrame.u.device.device_addr = GetTiledFrameBase(coreIdx, fbArr, num); + vbFrame.phys_addr = GetTiledFrameBase(coreIdx, fbArr, num); if (fbType == FB_TYPE_PPU) { tiledBaseAddr = pMapCfg->tiledBaseAddr; } else { - pMapCfg->tiledBaseAddr = vbFrame.u.device.device_addr; - tiledBaseAddr = vbFrame.u.device.device_addr; + pMapCfg->tiledBaseAddr = vbFrame.phys_addr; + tiledBaseAddr = vbFrame.phys_addr; } *vb = vbFrame; ret = AllocateTiledFrameBufferGdiV1(mapType, tiledBaseAddr, fbArr, num, sizeLuma, sizeChroma, pDramCfg); diff --git a/bmvid/video/provider/cnm/decoder/vpuapi/product.h b/bmvid/video/provider/cnm/decoder/vpuapi/product.h index d3401df..7902132 100755 --- a/bmvid/video/provider/cnm/decoder/vpuapi/product.h +++ b/bmvid/video/provider/cnm/decoder/vpuapi/product.h @@ -196,7 +196,7 @@ extern RetCode ProductVpuAllocateFramebuffer( BOOL cbcrInterleave, BOOL nv21, Int32 endian, - bm_device_mem_t* vb, + vpu_buffer_t* vb, Int32 gdiIndex, FramebufferAllocType fbType ); diff --git a/bmvid/video/provider/cnm/decoder/vpuapi/vpuapi.c b/bmvid/video/provider/cnm/decoder/vpuapi/vpuapi.c index 6920557..32d7d09 100755 --- a/bmvid/video/provider/cnm/decoder/vpuapi/vpuapi.c +++ b/bmvid/video/provider/cnm/decoder/vpuapi/vpuapi.c @@ -41,48 +41,6 @@ static int s_bitCodeSize[MAX_NUM_VPU_CORE] = {0,}; Uint32 __VPU_BUSY_TIMEOUT = VPU_BUSY_CHECK_TIMEOUT; -typedef struct _G_bm_handle{ - bm_handle_t bm_handle; - unsigned int count; -} G_bm_handle; -static G_bm_handle g_bm_handle[MAX_NUM_VPU_CORE] = { {0, 0} }; - - -#ifdef __linux__ -static int bmhandle_atomic_lock = 0; /* atomic lock for bmlib_handle */ -#elif _WIN32 -static volatile long bmhandle_atomic_lock = 0; -#endif - -/* atomic lock for bmlib_handle operations*/ -static void bm_handle_lock() -{ -#ifdef __linux__ - while (__atomic_test_and_set(&bmhandle_atomic_lock, __ATOMIC_SEQ_CST)) - { - usleep(100); - } -#endif -#ifdef _WIN32 - while (InterlockedCompareExchange(&bmhandle_atomic_lock, 1, 0)) { - Sleep(1); - } -#endif -} - -static void bm_handle_unlock() -{ -#ifdef __linux__ - __atomic_clear(&bmhandle_atomic_lock, __ATOMIC_SEQ_CST); -#endif -#ifdef _WIN32 - InterlockedExchange(&bmhandle_atomic_lock, 0); -#endif -} - - - - static RetCode CheckDecInstanceValidity(CodecInst* pCodecInst) { RetCode ret; @@ -239,6 +197,7 @@ int VPU_GetOpenInstanceNum(Uint32 coreIdx) static RetCode InitializeVPU(Uint32 coreIdx, const Uint16* code, Uint32 size) { RetCode ret; + int init_status; if (vdi_init(coreIdx) < 0) return RETCODE_FAILURE; @@ -256,7 +215,8 @@ static RetCode InitializeVPU(Uint32 coreIdx, const Uint16* code, Uint32 size) create_sw_uart_thread(coreIdx); vdi_delay_ms(500); #endif - if (VPU_IsInit(coreIdx) != 0 && ((vdi_get_instance_num(coreIdx) > 0) || ProductVpuGetId(coreIdx)==PRODUCT_ID_960)) { + init_status = vdi_get_init_status(coreIdx); + if (VPU_IsInit(coreIdx) != 0 && ((init_status == 1) || ProductVpuGetId(coreIdx)==PRODUCT_ID_960)) { if(ProductVpuGetId(coreIdx)==PRODUCT_ID_960 && VPU_GetFirmwareStatus(coreIdx) == 0) { ret = ProductVpuReset(coreIdx, 0, SW_RESET_ON_BOOT); if (ret != RETCODE_SUCCESS) { @@ -273,7 +233,7 @@ static RetCode InitializeVPU(Uint32 coreIdx, const Uint16* code, Uint32 size) InitCodecInstancePool(coreIdx); - VLOG(INFO, "reload firmware...\n"); + VLOG(ERR, "reload firmware...\n"); ret = ProductVpuReset(coreIdx, 0, SW_RESET_ON_BOOT); if (ret != RETCODE_SUCCESS) { @@ -450,6 +410,7 @@ RetCode VPU_DecOpen(DecHandle* pHandle, DecOpenParam* pop) { *pHandle = 0; LeaveLock(pop->coreIdx); + VLOG(ERR, "GetCodecInstance fail. ret=%d\n", ret); return ret; } @@ -534,10 +495,12 @@ RetCode VPU_DecOpen(DecHandle* pHandle, DecOpenParam* pop) pDecInfo->reorderEnable = VPU_REORDER_ENABLE; pDecInfo->mirrorDirection = MIRDIR_NONE; pDecInfo->prevFrameEndPos = pop->bitstreamBuffer; + pDecInfo->enableDecodeOrder = pop->decodeOrder; if ((ret=ProductVpuDecBuildUpOpenParam(pCodecInst, pop)) != RETCODE_SUCCESS) { *pHandle = 0; LeaveLock(pCodecInst->coreIdx); + VLOG(ERR, "ProductVpuDecBuildUpOpenParam fail. ret=%d\n", ret); return ret; } @@ -551,7 +514,6 @@ RetCode VPU_DecOpen(DecHandle* pHandle, DecOpenParam* pop) osal_memset((void*)&pDecInfo->cacheConfig, 0x00, sizeof(MaverickCacheConfig)); - vdi_resume_kernel_reset(pCodecInst->coreIdx); LeaveLock(pCodecInst->coreIdx); return RETCODE_SUCCESS; @@ -586,8 +548,6 @@ RetCode VPU_DecClose(DecHandle handle) DecInfo * pDecInfo; RetCode ret; int i; - bm_handle_t bm_handle; - bm_handle =bmvpu_dec_get_bmlib_handle(handle->coreIdx); ret = CheckDecInstanceValidity(handle); if (ret != RETCODE_SUCCESS) { return ret; @@ -613,87 +573,91 @@ RetCode VPU_DecClose(DecHandle handle) if (pCodecInst->loggingEnable) vdi_log(pCodecInst->coreIdx, DEC_SEQ_END, 0); - if (pDecInfo->vbDevSlice.size) + if (pDecInfo->vbSlice.size) { - bm_free_mem(bm_handle,pDecInfo->vbDevSlice,pDecInfo->vbSliceVddr); - pDecInfo->vbDevSlice.size=0; + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbSlice); + pDecInfo->vbSlice.size=0; } - if (pDecInfo->vbDevWork.size) { + if (pDecInfo->vbWork.size) { if (pDecInfo->workBufferAllocExt == 0) { -#ifndef BM_PCIE_MODE - bm_mem_unmap_device_mem(bm_handle,(void *)pDecInfo->vbWorkVaddr,pDecInfo->vbDevWork.size); -#endif - bm_free_device(bm_handle,pDecInfo->vbDevWork); - pDecInfo->vbDevWork.size=0; + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbWork); + pDecInfo->vbWork.size=0; } else { -#ifndef BM_PCIE_MODE - bm_mem_unmap_device_mem(bm_handle,(void *)pDecInfo->vbWorkVaddr,pDecInfo->vbDevWork.size); -#endif + vdi_dettach_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbWork); } } - if (pDecInfo->vbDevFrame.size) { + if (pDecInfo->vbFrame.size) { if (pDecInfo->frameAllocExt == 0) { - bm_free_mem(bm_handle,pDecInfo->vbDevFrame,pDecInfo->vbFrameVaddr); - pDecInfo->vbDevFrame.size=0; + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFrame); + pDecInfo->vbFrame.size=0; } } for ( i=0 ; ivbDevMV[i].size) + if (pDecInfo->vbMV[i].size) { - bm_free_mem(bm_handle,pDecInfo->vbDevMV[i],pDecInfo->vbMVVaddr[i]); - pDecInfo->vbDevMV[i].size=0; + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbMV[i]); + pDecInfo->vbMV[i].size=0; } - if (pDecInfo->vbDevFbcYTbl[i].size) + if (pDecInfo->vbFbcYTbl[i].size) { - - bm_free_mem(bm_handle,pDecInfo->vbDevFbcYTbl[i],pDecInfo->vbFbcYTblVaddr[i]); - pDecInfo->vbDevFbcYTbl[i].size=0; + if(pDecInfo->framebuf_from_user != 1) + { + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcYTbl[i]); + pDecInfo->vbFbcYTbl[i].size=0; + } + else + { + vdi_dettach_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcYTbl[i]); + vdi_unmap_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcYTbl[i]); + } } - if (pDecInfo->vbDevFbcCTbl[i].size) + if (pDecInfo->vbFbcCTbl[i].size) { - bm_free_mem(bm_handle,pDecInfo->vbDevFbcCTbl[i],pDecInfo->vbFbcCTblVaddr[i]); - pDecInfo->vbDevFbcCTbl[i].size=0; - } -#ifndef BM_PCIE_MODE - if(pDecInfo->vpu_frame_buffer_vaddr[i]!=0x00) + if(pDecInfo->framebuf_from_user != 1) { - bm_mem_unmap_device_mem(bm_handle,(void *)pDecInfo->vpu_frame_buffer_vaddr[i],pDecInfo->vpu_frame_buffer_vaddr_size[i]); + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcCTbl[i]); + pDecInfo->vbFbcCTbl[i].size=0; } -#endif + else + { + vdi_dettach_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcCTbl[i]); + vdi_unmap_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcCTbl[i]); + } + } } - if (pDecInfo->vbDevTemp.size) + if (pDecInfo->vbTemp.size) { vdi_dettach_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbTemp); } - if (pDecInfo->vbDevPPU.size) { + if (pDecInfo->vbPPU.size) { if (pDecInfo->ppuAllocExt == 0) { - bm_free_mem(bm_handle,pDecInfo->vbDevPPU,0x00); - pDecInfo->vbDevPPU.size=0; + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbPPU); + pDecInfo->vbPPU.size=0; } } - if (pDecInfo->vbDevWTL.size) + if (pDecInfo->vbWTL.size) { - bm_free_mem(bm_handle,pDecInfo->vbDevWTL,0x00); - pDecInfo->vbDevWTL.size=0; + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbWTL); + pDecInfo->vbWTL.size=0; + } + if (pDecInfo->vbUserData.size) + { + vdi_dettach_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbUserData); } - // if (pDecInfo->vbUserData.size) - // { - // vdi_dettach_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbUserData); - // } - if (pDecInfo->vbDevReport.size) + if (pDecInfo->vbReport.size) { - bm_free_mem(bm_handle,pDecInfo->vbDevReport,pDecInfo->vbReportVddr); - pDecInfo->vbDevReport.size=0; + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbReport); + pDecInfo->vbReport.size=0; } if (GetPendingInst(pCodecInst->coreIdx) == pCodecInst) ClearPendingInst(pCodecInst->coreIdx); @@ -924,7 +888,7 @@ static RetCode DecRegisterFrameBuffer(DecHandle handle, FrameBuffer *bufArray, i Int32 i; RetCode ret; FrameBuffer* fb, nullFb; - bm_device_mem_t *vb; + vpu_buffer_t *vb; FrameBufferFormat format = FORMAT_420; Int32 totalNumOfFbs; @@ -992,7 +956,7 @@ static RetCode DecRegisterFrameBuffer(DecHandle handle, FrameBuffer *bufArray, i pDecInfo->frameBufPool[i] = bufArray[i]; } else { - vb = &pDecInfo->vbDevFrame; + vb = &pDecInfo->vbFrame; fb = &pDecInfo->frameBufPool[0]; ret = ProductVpuAllocateFramebuffer( (CodecInst*)handle, fb, (TiledMapType)mapType, numFbsForDecoding, stride, height, format, @@ -1014,7 +978,7 @@ static RetCode DecRegisterFrameBuffer(DecHandle handle, FrameBuffer *bufArray, i if (!bufArray) { TiledMapType map; map = pDecInfo->wtlMode==FF_FRAME ? LINEAR_FRAME_MAP : LINEAR_FIELD_MAP; - vb = &pDecInfo->vbDevWTL; + vb = &pDecInfo->vbWTL; fb = &pDecInfo->frameBufPool[numFbsForDecoding]; ret = ProductVpuAllocateFramebuffer( @@ -1645,14 +1609,14 @@ RetCode VPU_DecGetOutputInfo(DecHandle handle, DecOutputInfo* info) info->dispPicHeight = pDecInfo->decOutInfo[displayIndex].decPicHeight; } - if (pDecInfo->scalerEnable == TRUE) { + if (pDecInfo->scalerEnable == TRUE) { if ((pDecInfo->scaleWidth != 0) && (pDecInfo->scaleHeight != 0)) { info->dispPicWidth = pDecInfo->scaleWidth; info->dispPicHeight = pDecInfo->scaleHeight; info->rcDisplay.right = pDecInfo->scaleWidth; info->rcDisplay.bottom = pDecInfo->scaleHeight; } - } + } } else { @@ -1708,8 +1672,14 @@ RetCode VPU_DecGetOutputInfo(DecHandle handle, DecOutputInfo* info) maxDecIndex = (pDecInfo->numFbsForDecoding > pDecInfo->numFbsForWTL) ? (SvacSvcFlag ? pDecInfo->numFbsForDecoding*2 : pDecInfo->numFbsForDecoding) : pDecInfo->numFbsForWTL; - if (0 <= info->indexFrameDisplay && info->indexFrameDisplay < (int)maxDecIndex) + if (pDecInfo->enableDecodeOrder && (0 <= info->indexFrameDecoded && info->indexFrameDecoded < (int)maxDecIndex)) + { + info->dispFrame = pDecInfo->frameBufPool[val+info->indexFrameDecoded]; + } + else if (0 <= info->indexFrameDisplay && info->indexFrameDisplay < (int)maxDecIndex) + { info->dispFrame = pDecInfo->frameBufPool[val+info->indexFrameDisplay]; + } } info->rdPtr = streamRdPtr; info->wrPtr = pDecInfo->streamWrPtr; @@ -1851,7 +1821,7 @@ RetCode VPU_DecFrameBufferFlush(DecHandle handle, DecOutputInfo* pRemainings, Ui pOut->dispPicWidth = pDecInfo->scaleWidth; pOut->dispPicHeight = pDecInfo->scaleHeight; } - } + } else { pOut->dispPicWidth = pOut->decPicWidth; pOut->dispPicHeight = pOut->decPicHeight; @@ -2026,8 +1996,7 @@ RetCode VPU_DecGiveCommand(DecHandle handle, CodecCommand cmd, void* param) CodecInst* pCodecInst; DecInfo* pDecInfo; RetCode ret; - bm_handle_t bm_handle; - bm_handle=bmvpu_dec_get_bmlib_handle(handle->coreIdx); + ret = CheckDecInstanceValidity(handle); if (ret != RETCODE_SUCCESS) return ret; @@ -2390,50 +2359,66 @@ RetCode VPU_DecGiveCommand(DecHandle handle, CodecCommand cmd, void* param) case DEC_FREE_FRAME_BUFFER: { int i; - if (pDecInfo->vbDevSlice.size) + if (pDecInfo->vbSlice.size) { - bm_free_mem(bm_handle,pDecInfo->vbDevSlice,pDecInfo->vbSliceVddr); - pDecInfo->vbDevSlice.size=0; + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbSlice); + pDecInfo->vbSlice.size = 0; } - if (pDecInfo->vbDevFrame.size){ - if (pDecInfo->frameAllocExt == 0) { - bm_free_mem(bm_handle,pDecInfo->vbDevFrame,pDecInfo->vbFrameVaddr); - pDecInfo->vbDevFrame.size=0; + if (pDecInfo->vbFrame.size){ + if (pDecInfo->frameAllocExt == 0) { + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFrame); + pDecInfo->vbFrame.size=0; } } for (i=0 ; ivbDevFbcYTbl[i].size) + if (pDecInfo->vbFbcYTbl[i].size) + { + if(pDecInfo->framebuf_from_user != 1) { - bm_free_mem(bm_handle,pDecInfo->vbDevFbcYTbl[i],pDecInfo->vbFbcYTblVaddr[i]); - pDecInfo->vbFbcYTblVaddr[i]=0x00; + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcYTbl[i]); + pDecInfo->vbFbcYTbl[i].size = 0; } - if (pDecInfo->vbDevFbcCTbl[i].size) + else + { + vdi_dettach_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcYTbl[i]); + vdi_unmap_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcYTbl[i]); + } + } + if (pDecInfo->vbFbcCTbl[i].size) { - bm_free_mem(bm_handle,pDecInfo->vbDevFbcCTbl[i],pDecInfo->vbFbcCTblVaddr[i]); - pDecInfo->vbFbcCTblVaddr[i]=0x00; + if(pDecInfo->framebuf_from_user != 1) + { + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcCTbl[i]); + pDecInfo->vbFbcCTbl[i].size = 0; + } + else + { + vdi_dettach_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcCTbl[i]); + vdi_unmap_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcCTbl[i]); + } } - if (pDecInfo->vbDevMV[i].size) + if (pDecInfo->vbMV[i].size) { - bm_free_mem(bm_handle,pDecInfo->vbDevMV[i],pDecInfo->vbMVVaddr[i]); - pDecInfo->vbMVVaddr[i]=0x00; + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbMV[i]); + pDecInfo->vbMV[i].size = 0; } } - if (pDecInfo->vbDevPPU.size) { + if (pDecInfo->vbPPU.size) { if (pDecInfo->ppuAllocExt == 0) { - bm_free_mem(bm_handle,pDecInfo->vbDevPPU,0x00); - pDecInfo->vbDevPPU.size=0; + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbPPU); + pDecInfo->vbPPU.size=0; } } if (pDecInfo->wtlEnable) { - if (pDecInfo->vbDevWTL.size) + if (pDecInfo->vbWTL.size) { - bm_free_mem(bm_handle,pDecInfo->vbDevWTL,0x00); - pDecInfo->vbDevWTL.size=0; + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbWTL); + pDecInfo->vbWTL.size=0; } } break; @@ -2442,13 +2427,13 @@ RetCode VPU_DecGiveCommand(DecHandle handle, CodecCommand cmd, void* param) { DecGetFramebufInfo* fbInfo = (DecGetFramebufInfo*)param; Uint32 i; - fbInfo->devMemFrame = pDecInfo->vbDevFrame; - fbInfo->devMemWTL = pDecInfo->vbDevWTL; + fbInfo->vbFrame = pDecInfo->vbFrame; + fbInfo->vbWTL = pDecInfo->vbWTL; for (i=0 ; idevMemInfoVbFbcYTbl[i] = pDecInfo->vbDevFbcYTbl[i]; - fbInfo->devMemInfoVbFbcCTbl[i] = pDecInfo->vbDevFbcCTbl[i]; - fbInfo->devMvCol[i] = pDecInfo->vbDevMV[i]; + fbInfo->vbFbcYTbl[i] = pDecInfo->vbFbcYTbl[i]; + fbInfo->vbFbcCTbl[i] = pDecInfo->vbFbcCTbl[i]; + fbInfo->vbMvCol[i] = pDecInfo->vbMV[i]; } for (i=0; ivbDevFrame.u.device.device_addr = 0; - pDecInfo->vbFrameVaddr = 0; - pDecInfo->vbDevFrame.size = 0; - //pDecInfo->vbWTLVaddr = 0; - pDecInfo->vbDevWTL.u.device.device_addr = 0; - pDecInfo->vbDevWTL.size = 0; + pDecInfo->vbFrame.base = 0; + pDecInfo->vbFrame.phys_addr = 0; + pDecInfo->vbFrame.virt_addr = 0; + pDecInfo->vbFrame.size = 0; + pDecInfo->vbWTL.base = 0; + pDecInfo->vbWTL.phys_addr = 0; + pDecInfo->vbWTL.virt_addr = 0; + pDecInfo->vbWTL.size = 0; for (i=0 ; ivbDevFbcYTbl[i].u.device.device_addr = 0; - pDecInfo->vbFbcCTblVaddr[i] = 0; - pDecInfo->vbDevFbcYTbl[i].size = 0; - pDecInfo->vbDevFbcCTbl[i].u.device.device_addr = 0; - pDecInfo->vbFbcCTblVaddr[i] = 0; - pDecInfo->vbDevFbcCTbl[i].size = 0; - pDecInfo->vbDevMV[i].u.device.device_addr = 0; - pDecInfo->vbMVVaddr[i] = 0; - pDecInfo->vbDevMV[i].size = 0; + pDecInfo->vbFbcYTbl[i].base = 0; + pDecInfo->vbFbcYTbl[i].phys_addr = 0; + pDecInfo->vbFbcYTbl[i].virt_addr = 0; + pDecInfo->vbFbcYTbl[i].size = 0; + pDecInfo->vbFbcCTbl[i].base = 0; + pDecInfo->vbFbcCTbl[i].phys_addr = 0; + pDecInfo->vbFbcCTbl[i].virt_addr = 0; + pDecInfo->vbFbcCTbl[i].size = 0; + pDecInfo->vbMV[i].base = 0; + pDecInfo->vbMV[i].phys_addr = 0; + pDecInfo->vbMV[i].virt_addr = 0; + pDecInfo->vbMV[i].size = 0; } pDecInfo->frameDisplayFlag = 0; @@ -2706,22 +2696,38 @@ RetCode VPU_DecGiveCommand(DecHandle handle, CodecCommand cmd, void* param) case DEC_FREE_FBC_TABLE_BUFFER: { Uint32 fbcCurFrameIdx = *(Uint32*)param; - if(pDecInfo->vbDevFbcYTbl[fbcCurFrameIdx].size > 0) { - bm_free_mem(bm_handle,pDecInfo->vbDevFbcYTbl[fbcCurFrameIdx],pDecInfo->vbFbcYTblVaddr[fbcCurFrameIdx]); - pDecInfo->vbDevFbcYTbl[fbcCurFrameIdx].size = 0; + if(pDecInfo->vbFbcYTbl[fbcCurFrameIdx].size > 0) { + if(pDecInfo->framebuf_from_user != 1) + { + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcYTbl[fbcCurFrameIdx]); + pDecInfo->vbFbcYTbl[fbcCurFrameIdx].size = 0; + } + else + { + vdi_dettach_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcYTbl[fbcCurFrameIdx]); + vdi_unmap_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcYTbl[fbcCurFrameIdx]); + } } - if(pDecInfo->vbDevFbcCTbl[fbcCurFrameIdx].size > 0) { - bm_free_mem(bm_handle,pDecInfo->vbDevFbcCTbl[fbcCurFrameIdx],pDecInfo->vbFbcCTblVaddr[fbcCurFrameIdx]); - pDecInfo->vbDevFbcCTbl[fbcCurFrameIdx].size = 0; + if(pDecInfo->vbFbcCTbl[fbcCurFrameIdx].size > 0) { + if(pDecInfo->framebuf_from_user != 1) + { + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcCTbl[fbcCurFrameIdx]); + pDecInfo->vbFbcCTbl[fbcCurFrameIdx].size = 0; + } + else + { + vdi_dettach_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcCTbl[fbcCurFrameIdx]); + vdi_unmap_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcCTbl[fbcCurFrameIdx]); + } } } break; case DEC_FREE_MV_BUFFER: { Uint32 fbcCurFrameIdx = *(Uint32*)param; - if(pDecInfo->vbDevMV[fbcCurFrameIdx].size > 0) { - bm_free_mem(bm_handle,pDecInfo->vbDevMV[fbcCurFrameIdx],pDecInfo->vbMVVaddr[fbcCurFrameIdx]); - pDecInfo->vbDevMV[fbcCurFrameIdx].size = 0; + if(pDecInfo->vbMV[fbcCurFrameIdx].size > 0) { + vdi_free_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbMV[fbcCurFrameIdx]); + pDecInfo->vbMV[fbcCurFrameIdx].size = 0; } } break; @@ -2730,17 +2736,10 @@ RetCode VPU_DecGiveCommand(DecHandle handle, CodecCommand cmd, void* param) Uint32 fbcCurFrameIdx = *(Uint32*)param; Uint32 size; size = WAVE5_DEC_VP9_MVCOL_BUF_SIZE(pDecInfo->initialInfo.picWidth, pDecInfo->initialInfo.picHeight); - pDecInfo->vbDevMV[fbcCurFrameIdx].u.device.device_addr = 0; - pDecInfo->vbDevMV[fbcCurFrameIdx].size = ((size+4095)&~4095)+4096; /* 4096 is a margin */ - bm_free_mem(bm_handle, pDecInfo->vbDevMV[fbcCurFrameIdx],pDecInfo->vbMVVaddr[fbcCurFrameIdx]); - if(bmvpu_malloc_device_byte_heap(bm_handle,&pDecInfo->vbDevMV[fbcCurFrameIdx],pDecInfo->vbDevMV[fbcCurFrameIdx].size,HEAP_MASK,1)!=BM_SUCCESS) + pDecInfo->vbMV[fbcCurFrameIdx].phys_addr = 0; + pDecInfo->vbMV[fbcCurFrameIdx].size = ((size+4095)&~4095)+4096; /* 4096 is a margin */ + if(vdi_allocate_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbMV[fbcCurFrameIdx]) < 0) return RETCODE_INSUFFICIENT_RESOURCE; -// #ifndef BM_PCIE_MODE -// bm_mem_mmap_device_mem_no_cache(bm_handle,&pDecInfo->vbDevMV[fbcCurFrameIdx],&pDecInfo->vbMVVaddr[fbcCurFrameIdx]); -// #else -// pDecInfo->vbMVVaddr[fbcCurFrameIdx]=0xDEADBEEFl; -// #endif - bm_vdi_mmap(bm_handle,&pDecInfo->vbDevMV[fbcCurFrameIdx],&pDecInfo->vbMVVaddr[fbcCurFrameIdx]); } break; case DEC_ALLOC_FBC_Y_TABLE_BUFFER: @@ -2749,17 +2748,10 @@ RetCode VPU_DecGiveCommand(DecHandle handle, CodecCommand cmd, void* param) Uint32 size; size = WAVE5_FBC_LUMA_TABLE_SIZE(VPU_ALIGN64(pDecInfo->initialInfo.picWidth), VPU_ALIGN64(pDecInfo->initialInfo.picHeight)); size = VPU_ALIGN16(size); - pDecInfo->vbDevFbcYTbl[fbcCurFrameIdx].u.device.device_addr = 0; - pDecInfo->vbDevFbcYTbl[fbcCurFrameIdx].size = ((size+4095)&~4095)+4096; - bm_free_mem(bm_handle, pDecInfo->vbDevFbcYTbl[fbcCurFrameIdx],pDecInfo->vbFbcYTblVaddr[fbcCurFrameIdx]); - if(bmvpu_malloc_device_byte_heap(bm_handle,&pDecInfo->vbDevFbcYTbl[fbcCurFrameIdx],pDecInfo->vbDevFbcYTbl[fbcCurFrameIdx].size,HEAP_MASK,1)!=BM_SUCCESS) + pDecInfo->vbFbcYTbl[fbcCurFrameIdx].phys_addr = 0; + pDecInfo->vbFbcYTbl[fbcCurFrameIdx].size = ((size+4095)&~4095)+4096; + if(vdi_allocate_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcYTbl[fbcCurFrameIdx]) < 0) return RETCODE_INSUFFICIENT_RESOURCE; -// #ifndef BM_PCIE_MODE -// bm_mem_mmap_device_mem_no_cache(bm_handle,&pDecInfo->vbDevFbcYTbl[fbcCurFrameIdx],&pDecInfo->vbFbcYTblVaddr[fbcCurFrameIdx]); -// #else -// pDecInfo->vbFbcYTblVaddr[fbcCurFrameIdx]=0xDEADBEEFl; -// #endif - bm_vdi_mmap(bm_handle,&pDecInfo->vbDevFbcYTbl[fbcCurFrameIdx],&pDecInfo->vbFbcYTblVaddr[fbcCurFrameIdx]); } break; case DEC_ALLOC_FBC_C_TABLE_BUFFER: @@ -2768,17 +2760,10 @@ RetCode VPU_DecGiveCommand(DecHandle handle, CodecCommand cmd, void* param) Uint32 size; size = WAVE5_FBC_CHROMA_TABLE_SIZE(VPU_ALIGN64(pDecInfo->initialInfo.picWidth), VPU_ALIGN64(pDecInfo->initialInfo.picHeight)); size = VPU_ALIGN16(size); - pDecInfo->vbDevFbcCTbl[fbcCurFrameIdx].u.device.device_addr = 0; - pDecInfo->vbDevFbcCTbl[fbcCurFrameIdx].size = ((size+4095)&~4095)+4096; - bm_free_mem(bm_handle, pDecInfo->vbDevFbcCTbl[fbcCurFrameIdx],pDecInfo->vbFbcCTblVaddr[fbcCurFrameIdx]); - if(bmvpu_malloc_device_byte_heap(bm_handle,&pDecInfo->vbDevFbcCTbl[fbcCurFrameIdx],pDecInfo->vbDevFbcCTbl[fbcCurFrameIdx].size,HEAP_MASK,1)!=BM_SUCCESS) + pDecInfo->vbFbcCTbl[fbcCurFrameIdx].phys_addr = 0; + pDecInfo->vbFbcCTbl[fbcCurFrameIdx].size = ((size+4095)&~4095)+4096; + if(vdi_allocate_dma_memory(pCodecInst->coreIdx, &pDecInfo->vbFbcCTbl[fbcCurFrameIdx]) < 0) return RETCODE_INSUFFICIENT_RESOURCE; -// #ifndef BM_PCIE_MODE -// bm_mem_mmap_device_mem_no_cache(bm_handle,&pDecInfo->vbDevFbcCTbl[fbcCurFrameIdx],&pDecInfo->vbFbcCTblVaddr[fbcCurFrameIdx]); -// #else -// pDecInfo->vbFbcYTblVaddr[fbcCurFrameIdx]=0xDEADBEEFl; -// #endif - bm_vdi_mmap(bm_handle,&pDecInfo->vbDevFbcCTbl[fbcCurFrameIdx],&pDecInfo->vbFbcCTblVaddr[fbcCurFrameIdx]); } break; case GET_BANDWIDTH_REPORT: @@ -2815,7 +2800,6 @@ RetCode VPU_DecAllocateFrameBuffer(DecHandle handle, FrameBufferAllocInfo info, if (!frameBuffer) { return RETCODE_INVALID_PARAM; } - //bm_handle= bmvpu_dec_get_bmlib_handle(handle->coreIdx); if (info.type == FB_TYPE_PPU) { if (pDecInfo->numFrameBuffers == 0) return RETCODE_WRONG_CALL_SEQUENCE; @@ -2826,7 +2810,7 @@ RetCode VPU_DecAllocateFrameBuffer(DecHandle handle, FrameBufferAllocInfo info, gdiIndex = pDecInfo->numFbsForDecoding; ret = ProductVpuAllocateFramebuffer( pCodecInst, frameBuffer, (TiledMapType)info.mapType, (Int32)info.num, - info.stride, info.height, info.format, info.cbcrInterleave, info.nv21, info.endian, &pDecInfo->vbDevPPU, gdiIndex, FB_TYPE_PPU); + info.stride, info.height, info.format, info.cbcrInterleave, info.nv21, info.endian, &pDecInfo->vbPPU, gdiIndex, FB_TYPE_PPU); } else if (info.type == FB_TYPE_CODEC) { gdiIndex = 0; @@ -2835,8 +2819,9 @@ RetCode VPU_DecAllocateFrameBuffer(DecHandle handle, FrameBufferAllocInfo info, } ret = ProductVpuAllocateFramebuffer( pCodecInst, frameBuffer, (TiledMapType)info.mapType, (Int32)info.num, - info.stride, info.height, info.format, info.cbcrInterleave, info.nv21, info.endian, &pDecInfo->vbDevFrame, gdiIndex, (FramebufferAllocType)info.type); - pDecInfo->mapCfg.tiledBaseAddr = pDecInfo->vbDevFrame.u.device.device_addr ; + info.stride, info.height, info.format, info.cbcrInterleave, info.nv21, info.endian, &pDecInfo->vbFrame, gdiIndex, (FramebufferAllocType)info.type); + + pDecInfo->mapCfg.tiledBaseAddr = pDecInfo->vbFrame.phys_addr; } return ret; @@ -3035,16 +3020,16 @@ RetCode VPU_EncRegisterFrameBuffer(EncHandle handle, FrameBuffer* bufArray, int pEncInfo = &pCodecInst->CodecInfo->encInfo; openParam = &pEncInfo->openParam; - if (pEncInfo->stride) + if (pEncInfo->stride) return RETCODE_CALLED_BEFORE; - if (!pEncInfo->initialInfoObtained) + if (!pEncInfo->initialInfoObtained) return RETCODE_WRONG_CALL_SEQUENCE; - if (num < pEncInfo->initialInfo.minFrameBufferCount) + if (num < pEncInfo->initialInfo.minFrameBufferCount) return RETCODE_INSUFFICIENT_FRAME_BUFFERS; - if (stride == 0 || (stride % 8 != 0) || stride < 0) + if (stride == 0 || (stride % 8 != 0) || stride < 0) return RETCODE_INVALID_STRIDE; if (height == 0 || height < 0) @@ -3097,7 +3082,7 @@ RetCode VPU_EncRegisterFrameBuffer(EncHandle handle, FrameBuffer* bufArray, int } ret = ProductVpuAllocateFramebuffer( pCodecInst, fb, (TiledMapType)mapType, num, stride, height, (FrameBufferFormat)openParam->srcFormat, - openParam->cbcrInterleave, FALSE, openParam->frameEndian, (bm_device_mem_t*)&pEncInfo->vbFrame, 0, FB_TYPE_CODEC); + openParam->cbcrInterleave, FALSE, openParam->frameEndian, &pEncInfo->vbFrame, 0, FB_TYPE_CODEC); if (ret != RETCODE_SUCCESS) { SetPendingInst(pCodecInst->coreIdx, 0); LeaveLock(pCodecInst->coreIdx); @@ -3247,7 +3232,7 @@ RetCode VPU_EncUpdateBitstreamBuffer( RetCode VPU_EncStartOneFrame( EncHandle handle, - EncParam * param + EncParam * param ) { CodecInst* pCodecInst; @@ -3367,13 +3352,13 @@ RetCode VPU_EncGiveCommand( pCodecInst = handle; pEncInfo = &pCodecInst->CodecInfo->encInfo; - switch (cmd) + switch (cmd) { case ENABLE_ROTATION : { - pEncInfo->rotationEnable = 1; + pEncInfo->rotationEnable = 1; } - break; + break; case DISABLE_ROTATION : { pEncInfo->rotationEnable = 0; @@ -3386,11 +3371,11 @@ RetCode VPU_EncGiveCommand( break; case DISABLE_MIRRORING : { - pEncInfo->mirrorEnable = 0; + pEncInfo->mirrorEnable = 0; } break; case SET_MIRROR_DIRECTION : - { + { MirrorDirection mirDir; if (param == 0) { @@ -3402,7 +3387,7 @@ RetCode VPU_EncGiveCommand( } pEncInfo->mirrorDirection = mirDir; } - break; + break; case SET_ROTATION_ANGLE : { int angle; @@ -3418,7 +3403,7 @@ RetCode VPU_EncGiveCommand( if (pEncInfo->initialInfoObtained && (angle == 90 || angle ==270)) { return RETCODE_INVALID_PARAM; } - pEncInfo->rotationAngle = angle; + pEncInfo->rotationAngle = angle; } break; case SET_CACHE_CONFIG: @@ -3437,13 +3422,13 @@ RetCode VPU_EncGiveCommand( if (param == 0) { return RETCODE_INVALID_PARAM; - } + } encHeaderParam = (EncHeaderParam *)param; if (pCodecInst->codecMode == MP4_ENC ) { if (!( VOL_HEADER<=encHeaderParam->headerType && encHeaderParam->headerType <= VIS_HEADER)) { return RETCODE_INVALID_PARAM; } - } + } else if (pCodecInst->codecMode == AVC_ENC) { if (!( SPS_RBSP<=encHeaderParam->headerType && encHeaderParam->headerType <= PPS_RBSP_MVC)) { return RETCODE_INVALID_PARAM; @@ -3454,7 +3439,7 @@ RetCode VPU_EncGiveCommand( return RETCODE_INVALID_PARAM; } if (pEncInfo->ringBufferEnable == 0 ) { - if (encHeaderParam->buf % 16 || encHeaderParam->size == 0) + if (encHeaderParam->buf % 16 || encHeaderParam->size == 0) return RETCODE_INVALID_PARAM; } if (encHeaderParam->headerType & CODEOPT_ENC_VCL) // ENC_PUT_VIDEO_HEADER encode only non-vcl header. @@ -3478,21 +3463,21 @@ RetCode VPU_EncGiveCommand( else { return GetEncHeader(handle, encHeaderParam); } - } + } case ENC_SET_PARAM: { if (param == 0) { return RETCODE_INVALID_PARAM; } pEncInfo->openParam = *(EncOpenParam *)param; - + if (pCodecInst->codecMode != AVC_ENC) return RETCODE_INVALID_COMMAND; - + ret = EncParaSet(handle, SPS_RBSP); if (ret != RETCODE_SUCCESS) return ret; - + ret = EncParaSet(handle, PPS_RBSP); if (ret != RETCODE_SUCCESS) return ret; @@ -3507,7 +3492,7 @@ RetCode VPU_EncGiveCommand( if (*pGopNumber < 0) return RETCODE_INVALID_PARAM; pEncInfo->openParam.gopSize = *pGopNumber; - SetGopNumber(handle, (Uint32 *)pGopNumber); + SetGopNumber(handle, (Uint32 *)pGopNumber); } break; case ENC_SET_INTRA_QP: @@ -3517,16 +3502,16 @@ RetCode VPU_EncGiveCommand( return RETCODE_INVALID_COMMAND; } if (pCodecInst->codecMode == MP4_ENC) - { + { if(*pIntraQp<1 || *pIntraQp>31) return RETCODE_INVALID_PARAM; } if (pCodecInst->codecMode == AVC_ENC) - { + { if(*pIntraQp<0 || *pIntraQp>51) return RETCODE_INVALID_PARAM; } - SetIntraQp(handle, (Uint32 *)pIntraQp); + SetIntraQp(handle, (Uint32 *)pIntraQp); } break; case ENC_SET_BITRATE: @@ -3540,7 +3525,7 @@ RetCode VPU_EncGiveCommand( return RETCODE_INVALID_PARAM; } } - SetBitrate(handle, (Uint32 *)pBitrate); + SetBitrate(handle, (Uint32 *)pBitrate); } break; case ENC_SET_FRAME_RATE: @@ -3553,13 +3538,13 @@ RetCode VPU_EncGiveCommand( if (*pFramerate <= 0) { return RETCODE_INVALID_PARAM; } - SetFramerate(handle, (Uint32 *)pFramerate); + SetFramerate(handle, (Uint32 *)pFramerate); } break; case ENC_SET_INTRA_MB_REFRESH_NUMBER: { int *pIntraRefreshNum =(int *)param; - SetIntraRefreshNum(handle, (Uint32 *)pIntraRefreshNum); + SetIntraRefreshNum(handle, (Uint32 *)pIntraRefreshNum); } break; @@ -3606,16 +3591,16 @@ RetCode VPU_EncGiveCommand( pEncInfo->secAxiInfo.u.wave.useEncRdoEnable = secAxiUse.u.wave.useEncRdoEnable; pEncInfo->secAxiInfo.u.wave.useEncLfEnable = secAxiUse.u.wave.useEncLfEnable; } - else { // coda9 or coda7q or ... + else { // coda9 or coda7q or ... pEncInfo->secAxiInfo.u.coda9.useBitEnable = secAxiUse.u.coda9.useBitEnable; pEncInfo->secAxiInfo.u.coda9.useIpEnable = secAxiUse.u.coda9.useIpEnable; pEncInfo->secAxiInfo.u.coda9.useDbkYEnable = secAxiUse.u.coda9.useDbkYEnable; pEncInfo->secAxiInfo.u.coda9.useDbkCEnable = secAxiUse.u.coda9.useDbkCEnable; pEncInfo->secAxiInfo.u.coda9.useOvlEnable = secAxiUse.u.coda9.useOvlEnable; - pEncInfo->secAxiInfo.u.coda9.useBtpEnable = secAxiUse.u.coda9.useBtpEnable; - } + pEncInfo->secAxiInfo.u.coda9.useBtpEnable = secAxiUse.u.coda9.useBtpEnable; + } } - break; + break; case ENC_CONFIG_SUB_FRAME_SYNC: // for CODA9 { EncSubFrameSyncConfig *subFrameSyncConfig; @@ -3670,7 +3655,7 @@ RetCode VPU_EncGiveCommand( } case ENABLE_LOGGING: { - pCodecInst->loggingEnable = 1; + pCodecInst->loggingEnable = 1; } break; case DISABLE_LOGGING: @@ -3746,14 +3731,14 @@ RetCode VPU_EncAllocateFrameBuffer(EncHandle handle, FrameBufferAllocInfo info, gdiIndex = pEncInfo->numFrameBuffers; ret = ProductVpuAllocateFramebuffer(pCodecInst, frameBuffer, (TiledMapType)info.mapType, (Int32)info.num, info.stride, info.height, info.format, info.cbcrInterleave, info.nv21, - info.endian, (bm_device_mem_t*)&pEncInfo->vbPPU, gdiIndex, (FramebufferAllocType)info.type); + info.endian, &pEncInfo->vbPPU, gdiIndex, (FramebufferAllocType)info.type); }//this is for compile,can not guaranteed the operation else if (info.type == FB_TYPE_CODEC) { gdiIndex = 0; pEncInfo->frameAllocExt = frameBuffer[0].updateFbInfo; ret = ProductVpuAllocateFramebuffer( pCodecInst, frameBuffer, (TiledMapType)info.mapType, (Int32)info.num, - info.stride, info.height, info.format, info.cbcrInterleave, FALSE, info.endian, (bm_device_mem_t*)&pEncInfo->vbFrame, gdiIndex, (FramebufferAllocType)info.type); + info.stride, info.height, info.format, info.cbcrInterleave, FALSE, info.endian, &pEncInfo->vbFrame, gdiIndex, (FramebufferAllocType)info.type); //this is for compile,can not guaranteed the operation } else { @@ -3824,7 +3809,7 @@ RetCode VPU_EncCompleteSeqInit(EncHandle handle, EncInitialInfo * info) else { if (pCodecInst != GetPendingInst(pCodecInst->coreIdx)) { SetPendingInst(pCodecInst->coreIdx, 0); - LeaveLock(pCodecInst->coreIdx); + LeaveLock(pCodecInst->coreIdx); return RETCODE_WRONG_CALL_SEQUENCE; } } @@ -4037,152 +4022,3 @@ void VPU_PrintW5AllReg(int coreIdx) return; } - - - void bmvpu_dec_load_bmlib_handle(int coreIdx){ - if (coreIdx > MAX_NUM_VPU_CORE) - { - VLOG(INFO,"soc_idx excess MAX_SOC_NUM!\n"); - exit(0); - } - int soc_idx= coreIdx/MAX_NUM_VPU_CORE_CHIP; - bm_handle_lock(); - if (g_bm_handle[soc_idx].bm_handle) - { - g_bm_handle[soc_idx].count += 1; - bm_handle_unlock(); - return ; - } - - bm_handle_t handle; - bm_status_t ret = bm_dev_request(&handle, soc_idx); - if (ret != BM_SUCCESS) { - VLOG(INFO,"Create Bm Handle Failed\n"); - bm_handle_unlock(); - exit(0); - } - g_bm_handle[soc_idx].count = 1; - g_bm_handle[soc_idx].bm_handle = handle; - bm_handle_unlock(); - return ; -} - -/* - * If a bm_handle_t on this soc already exists, then the bm_handle_t's reference count -1. - * After that, if bm_handle_t's reference count is 0, free it(bm_dev_free), - * otherwise do nothing. - * This function is only be called by bmvpu_enc_unload(). - */ -void bmvpu_dec_unload_bmlib_handle(int coreIdx){ - if (coreIdx > MAX_NUM_VPU_CORE) - { - VLOG(ERR,"soc_idx excess MAX_SOC_NUM!\n"); - exit(0); - } - int soc_idx= coreIdx/MAX_NUM_VPU_CORE_CHIP; - if (g_bm_handle[soc_idx].bm_handle) - { - bm_handle_lock(); - if (g_bm_handle[soc_idx].count <= 1) - { - bm_dev_free(g_bm_handle[soc_idx].bm_handle); - g_bm_handle[soc_idx].count = 0; - g_bm_handle[soc_idx].bm_handle = 0; - VLOG(INFO,"Free bm_handle for decode on soc %d \n", soc_idx); - } - else - { - g_bm_handle[soc_idx].count -= 1; - VLOG(INFO,"The bm_handle for decode on soc is used by %d users \n", g_bm_handle[soc_idx].count); - } - bm_handle_unlock(); - } - else - VLOG(ERR,"Bm_handle for encode on soc %d not exist \n", soc_idx); -} - - - -bm_handle_t bmvpu_dec_get_bmlib_handle(int coreIdx) -{ - bm_handle_t handle = 0; - int soc_idx= coreIdx/MAX_NUM_VPU_CORE_CHIP; - if (coreIdx > MAX_NUM_VPU_CORE) - { - VLOG(ERR,"soc_idx excess MAX_SOC_NUM!\n"); - exit(0); - } - bm_handle_lock(); - if (g_bm_handle[soc_idx].bm_handle) - { - handle = g_bm_handle[soc_idx].bm_handle; - bm_handle_unlock(); - //VLOG(ERR,"core_idx=%d,bm_handle[soc_idx].bm_handle=%p\n",pcie_id,handle); - return handle; - } - else - { - bm_handle_unlock(); - VLOG(ERR,"There is not bmlib_handle on soc %d, This function should be called after bmvpu_dec_load()! \n",soc_idx); - return handle; - } -} - - - -int bmvpu_malloc_device_byte_heap(bm_handle_t bm_handle, bm_device_mem_t *pmem, unsigned int size, int heap_id_mask, int high_bit_first) -{ - int ret = 0; - int i = 0; - unsigned int heap_num = 0; - ret = bm_get_gmem_total_heap_num(bm_handle, &heap_num); - if (ret != 0) - { - VLOG(ERR,"bmvpu_malloc_device_byte_heap failed!\n"); - return -1; - } - - int available_heap_mask = 0; - for (i=0; i=0; i--) - { - if ((enable_heap_mask & (0x1< #endif -#include "bmlib_interface.h" #define MAX_GDI_IDX 31 #define MAX_REG_FRAME MAX_GDI_IDX*2 // 2 for WTL -#define HEAP_MASK 0x06 //for alloc memory in 2 heap firstly #define WAVE5_ENC_FBC50_LUMA_TABLE_SIZE(_w, _h) (VPU_ALIGN2048(VPU_ALIGN32(_w))*VPU_ALIGN4(_h)/64) #define WAVE5_ENC_FBC50_CHROMA_TABLE_SIZE(_w, _h) (VPU_ALIGN2048(VPU_ALIGN32(_w)/2)*VPU_ALIGN4(_h)/128) @@ -1474,13 +1472,6 @@ It enables source frame data with long burst length to be loaded for reducing DM @* 1 : enable the long-burst mode. @endverbatim */ - Uint64 bufYVaddr; /**< It indicates the base address for Y component in the physical address space when linear map is used. It is the RAS base address for Y component when tiled map is used (CODA9). It is also compressed Y buffer or ARM compressed framebuffer (WAVE). */ - Uint64 bufCbVaddr; /**< It indicates the base address for Cb component in the physical address space when linear map is used. It is the RAS base address for Cb component when tiled map is used (CODA9). It is also compressed CbCr buffer (WAVE) */ - Uint64 bufCrVaddr; /**< It indicates the base address for Cr component in the physical address space when linear map is used. It is the RAS base address for Cr component when tiled map is used (CODA9). */ - Uint64 bufYBotVaddr; /**< It indicates the base address for Y bottom field component in the physical address space when linear map is used. It is the RAS base address for Y bottom field component when tiled map is used (CODA980 only). */ // coda980 only - Uint64 bufCbBotVaddr; /**< It indicates the base address for Cb bottom field component in the physical address space when linear map is used. It is the RAS base address for Cb bottom field component when tiled map is used (CODA980 only). */ // coda980 only - Uint64 bufCrBotVaddr; - int sourceLBurstEn; int sequenceNo; /**< A sequence number that the frame belongs to. It increases by 1 every time a sequence changes in decoder. */ @@ -2067,7 +2058,6 @@ when VPU_DecOpen() is executed. */ vpu_buffer_t vbWork; - bm_device_mem_t devMemInfoVbWork; /** @verbatim It determines prediction mode of frame buffer compression. @@ -2081,13 +2071,14 @@ It determines prediction mode of frame buffer compression. Uint32 virtAxiID; /**< AXI_ID to distinguish guest OS. For virtualization only. Set this value in highest bit order.*/ BOOL bwOptimization; /**< Bandwidth optimization feature which allows WTL(Write to Linear)-enabled VPU to skip writing compressed format of non-reference pictures or linear format of non-display pictures to the frame buffer for BW saving reason. */ - /** @verbatim It record the sophon chip index setted by user. @endverbatim */ Uint32 sophon_idx; + + Uint32 decodeOrder; /**< get yuv frame by decode order */ } DecOpenParam; /** @@ -2907,24 +2898,6 @@ A CTU size (only for WAVE series) vpu_buffer_t vbMvCol[MAX_REG_FRAME]; /**< The information of frame buffer to save co-located motion vector buffer */ FrameBuffer framebufPool[64]; /**< This is an array of <> which contains the information of each frame buffer. When WTL is enabled, the number of framebufPool would be [number of compressed frame buffer] x 2, and the starting index of frame buffer for WTL is framebufPool[number of compressed frame buffer]. */ - - Uint64 sysMemCol[MAX_REG_FRAME]; - bm_system_mem_t devMvCol[MAX_REG_FRAME]; - - - Uint64 sysMemWtl; - bm_system_mem_t devMemWTL; - - - Uint64 sysMemFrame; - bm_device_mem_t devMemFrame; - - bm_device_mem_t devMemInfoVbMv[MAX_REG_FRAME]; - bm_device_mem_t devMemInfoVbFbcYTbl[MAX_REG_FRAME]; - bm_device_mem_t devMemInfoVbFbcCTbl[MAX_REG_FRAME]; - - - } DecGetFramebufInfo; /** @@ -6198,10 +6171,6 @@ int VPU_GetInNum(Uint32 coreIdx, Uint32 instIdx); int VPU_GetOutNum(Uint32 coreIdx, Uint32 instIdx); DecHandle VPU_GetDecHanle(Uint32 coreIdx, Uint32 instIdx); RetCode VPU_DecDestroy(DecHandle handle); -void bmvpu_dec_load_bmlib_handle(int coreIdx); -void bmvpu_dec_unload_bmlib_handle(int coreIdx); -int bmvpu_malloc_device_byte_heap(bm_handle_t bm_handle, bm_device_mem_t *pmem, unsigned int size, int heap_id_mask, int high_bit_first); -bm_handle_t bmvpu_dec_get_bmlib_handle(int coreIdx); #ifdef __cplusplus } #endif diff --git a/bmvid/video/provider/cnm/decoder/vpuapi/vpuapifunc.h b/bmvid/video/provider/cnm/decoder/vpuapi/vpuapifunc.h index c3a1268..999f19a 100755 --- a/bmvid/video/provider/cnm/decoder/vpuapi/vpuapifunc.h +++ b/bmvid/video/provider/cnm/decoder/vpuapi/vpuapifunc.h @@ -18,6 +18,7 @@ #define VPUAPI_UTIL_H_INCLUDED #include "vpuapi.h" +#include "bmlib_runtime.h" // COD_STD enum { @@ -302,7 +303,8 @@ typedef struct { Uint32 PrevDecodeEndTick; Uint32 cyclePerTick; Uint32 productCode; - + Uint32 enableDecodeOrder; + int framebuf_from_user; } DecInfo; typedef struct { EncOpenParam openParam; @@ -806,7 +808,7 @@ typedef struct Uint8 film_grain_model_id; Uint8 separate_colour_description_present_flag; Uint8 film_grain_bit_depth_luma_minus8; - + Uint8 film_grain_bit_depth_chroma_minus8; Uint8 film_grain_full_range_flag; Uint8 film_grain_colour_primaries; diff --git a/bmvid/video/provider/cnm/decoder/vpuapi/wave/wave5.c b/bmvid/video/provider/cnm/decoder/vpuapi/wave/wave5.c index 91fd200..30326ed 100755 --- a/bmvid/video/provider/cnm/decoder/vpuapi/wave/wave5.c +++ b/bmvid/video/provider/cnm/decoder/vpuapi/wave/wave5.c @@ -432,7 +432,6 @@ RetCode Wave5VpuBuildUpDecParam(CodecInst* instance, DecOpenParam* param) VpuAttr* pAttr = &g_VpuCoreAttributes[instance->coreIdx]; Uint32 bsEndian = 0; vpu_buffer_t vb; - bm_handle_t bm_handle; pDecInfo = VPU_HANDLE_TO_DECINFO(instance); int core_idx = instance->coreIdx; int inst_idx = instance->instIndex; @@ -465,49 +464,44 @@ RetCode Wave5VpuBuildUpDecParam(CodecInst* instance, DecOpenParam* param) default: return RETCODE_NOT_SUPPORTED_FEATURE; } - bm_handle=bmvpu_dec_get_bmlib_handle(core_idx); pDecInfo->scaleWidth = 0; pDecInfo->scaleHeight = 0; pDecInfo->targetSubLayerId = (param->bitstreamFormat == STD_AVS2) ? AVS2_MAX_SUB_LAYER_ID : HEVC_MAX_SUB_LAYER_ID; - if (param->devMemInfoVbWork.size > 0) { - pDecInfo->vbDevWork = param->devMemInfoVbWork; + if (param->vbWork.size > 0) { + pDecInfo->vbWork = param->vbWork; pDecInfo->workBufferAllocExt = TRUE; vdi_attach_dma_memory(instance->coreIdx, ¶m->vbWork); } else { if (instance->productId == PRODUCT_ID_512) { - pDecInfo->vbDevWork.size = WAVE512DEC_WORKBUF_SIZE; + pDecInfo->vbWork.size = WAVE512DEC_WORKBUF_SIZE; } else if (instance->productId == PRODUCT_ID_515) { - pDecInfo->vbDevWork.size = WAVE515DEC_WORKBUF_SIZE; + pDecInfo->vbWork.size = WAVE515DEC_WORKBUF_SIZE; } else if (instance->productId == PRODUCT_ID_525) { if (param->bitstreamFormat == STD_SVAC) - pDecInfo->vbDevWork.size = WAVE525_SVAC_DEC_WORKBUF_SIZE; + pDecInfo->vbWork.size = WAVE525_SVAC_DEC_WORKBUF_SIZE; else - pDecInfo->vbDevWork.size = (Uint32)WAVE525DEC_WORKBUF_SIZE; + pDecInfo->vbWork.size = (Uint32)WAVE525DEC_WORKBUF_SIZE; } else if (instance->productId == PRODUCT_ID_521) { - pDecInfo->vbDevWork.size = (Uint32)WAVE521DEC_WORKBUF_SIZE; // FIX ME + pDecInfo->vbWork.size = (Uint32)WAVE521DEC_WORKBUF_SIZE; // FIX ME } else if (instance->productId == PRODUCT_ID_511) { - pDecInfo->vbDevWork.size = (Uint32)WAVE521DEC_WORKBUF_SIZE; // FIX ME + pDecInfo->vbWork.size = (Uint32)WAVE521DEC_WORKBUF_SIZE; // FIX ME } pDecInfo->workBufferAllocExt = FALSE; - if (bmvpu_malloc_device_byte_heap(bm_handle,&pDecInfo->vbDevWork, pDecInfo->vbDevWork.size,HEAP_MASK,1) !=BM_SUCCESS) { - pDecInfo->vbDevWork.u.device.device_addr = 0; - pDecInfo->vbDevWork.size = 0; - pDecInfo->vbWorkVaddr = 0; + if (vdi_allocate_dma_memory(instance->coreIdx, &pDecInfo->vbWork) < 0) { + pDecInfo->vbWork.base = 0; + pDecInfo->vbWork.phys_addr = 0; + pDecInfo->vbWork.size = 0; + pDecInfo->vbWork.virt_addr = 0; VLOG(ERR,"in there %d\n",__LINE__); return RETCODE_INSUFFICIENT_RESOURCE; } - else - { - - bm_vdi_mmap(bm_handle,&pDecInfo->vbDevWork,&pDecInfo->vbWorkVaddr); - } } VpuWriteReg(instance->coreIdx, W5_CMD_DEC_VCORE_INFO, 1); @@ -517,15 +511,12 @@ RetCode Wave5VpuBuildUpDecParam(CodecInst* instance, DecOpenParam* param) pDecInfo->vbTemp.phys_addr = vb.phys_addr + WAVE5_TEMPBUF_OFFSET; pDecInfo->vbTemp.size = WAVE5_TEMPBUF_SIZE; - Uint8* zero; - zero=osal_malloc(pDecInfo->vbDevWork.size); - osal_memset(zero, 0, pDecInfo->vbDevWork.size); - bm_memcpy_s2d(bm_handle,pDecInfo->vbDevWork,zero); - VpuWriteReg(instance->coreIdx, W5_ADDR_WORK_BASE, pDecInfo->vbDevWork.u.device.device_addr); - VpuWriteReg(instance->coreIdx, W5_WORK_SIZE, pDecInfo->vbDevWork.size); + vdi_clear_memory(instance->coreIdx, pDecInfo->vbWork.phys_addr, pDecInfo->vbWork.size, 0); + VpuWriteReg(instance->coreIdx, W5_ADDR_WORK_BASE, pDecInfo->vbWork.phys_addr); + VpuWriteReg(instance->coreIdx, W5_WORK_SIZE, pDecInfo->vbWork.size); VpuWriteReg(instance->coreIdx, W5_CMD_DEC_BS_START_ADDR, pDecInfo->streamBufStartAddr); VpuWriteReg(instance->coreIdx, W5_CMD_DEC_BS_SIZE, pDecInfo->streamBufSize); - osal_free(zero); + bsEndian = vdi_convert_endian(instance->coreIdx, param->streamEndian); /* NOTE: When endian mode is 0, SDMA reads MSB first */ bsEndian = (~bsEndian&VDI_128BIT_ENDIAN_MASK); @@ -538,15 +529,16 @@ RetCode Wave5VpuBuildUpDecParam(CodecInst* instance, DecOpenParam* param) if (vdi_wait_vpu_busy(instance->coreIdx, __VPU_BUSY_TIMEOUT, W5_VPU_BUSY_STATUS) == -1) { // Check QUEUE_DONE if (instance->loggingEnable) vdi_log(instance->coreIdx, W5_CREATE_INSTANCE, 2); - bm_free_mem(bm_handle,pDecInfo->vbDevWork,pDecInfo->vbWorkVaddr); + vdi_free_dma_memory(instance->coreIdx, &pDecInfo->vbWork); return RETCODE_VPU_RESPONSE_TIMEOUT; } #ifdef WAVE512_FVP VpuWriteReg(instance->coreIdx, W5_RET_SUCCESS, 1); #endif if (VpuReadReg(instance->coreIdx, W5_RET_SUCCESS) == FALSE) { // FAILED for adding into VCPU QUEUE - bm_free_mem(bm_handle,pDecInfo->vbDevWork,pDecInfo->vbWorkVaddr); + vdi_free_dma_memory(instance->coreIdx, &pDecInfo->vbWork); ret = RETCODE_FAILURE; + return ret; } // 0-31:instance index; vpu_inst_flag = 1 << inst_idx; @@ -842,7 +834,7 @@ RetCode Wave5VpuDecRegisterFramebuffer(CodecInst* inst, FrameBuffer* fbArr, Tile Uint32 endian, yuvFormat = 0; Uint32 addrY, addrCb, addrCr; Uint32 mvColSize, fbcYTblSize, fbcCTblSize; - bm_device_mem_t vbBuffer; + vpu_buffer_t vbBuffer; Uint32 stride; Uint32 colorFormat = 0; Uint32 outputFormat = 0; @@ -850,7 +842,6 @@ RetCode Wave5VpuDecRegisterFramebuffer(CodecInst* inst, FrameBuffer* fbArr, Tile Uint32 initPicWidth = 0, initPicHeight = 0; Uint32 scalerFlag = 0; Uint32 bwbFlag = 0; - bm_handle_t bm_handle; coreIdx = inst->coreIdx; axiID = pDecInfo->openParam.virtAxiID; @@ -860,7 +851,6 @@ RetCode Wave5VpuDecRegisterFramebuffer(CodecInst* inst, FrameBuffer* fbArr, Tile initPicWidth = pDecInfo->initialInfo.picWidth; initPicHeight = pDecInfo->initialInfo.picHeight; - bm_handle=bmvpu_dec_get_bmlib_handle(coreIdx); if (inst->codecMode == W_SVAC_DEC && mapType == COMPRESSED_FRAME_MAP_SVAC_SVC_BL) { initPicWidth = pDecInfo->initialInfo.picWidth>>1; // BL size is half as EL initPicHeight = pDecInfo->initialInfo.picHeight>>1; @@ -884,25 +874,20 @@ RetCode Wave5VpuDecRegisterFramebuffer(CodecInst* inst, FrameBuffer* fbArr, Tile } mvColSize = VPU_ALIGN16(mvColSize); - vbBuffer.u.device.device_addr = 0; - vbBuffer.size = ((mvColSize+4095)&~4095)+4096; + vbBuffer.phys_addr = 0; if (inst->codecMode == W_HEVC_DEC || inst->codecMode == W_AVS2_DEC || inst->codecMode == W_VP9_DEC || inst->codecMode == W_AVC_DEC) { + vbBuffer.size = ((mvColSize+4095)&~4095)+4096; mvCount = count; APIDPRINT("ALLOC MEM - MV\n"); for (k=0 ; kvbDevMV[k].size == 0) { - if(bmvpu_malloc_device_byte_heap(bm_handle, &vbBuffer,((mvColSize+4095)&~4095)+4096 ,HEAP_MASK,1) !=BM_SUCCESS) - { - VLOG(ERR,"in there %d\n",__LINE__); - return -1; - } - else - { - pDecInfo->vbDevMV[k] = vbBuffer; - - bm_vdi_mmap(bm_handle,&pDecInfo->vbDevMV[k],&pDecInfo->vbMVVaddr[k]); - } + if ( pDecInfo->vbMV[k].size == 0) { + if (vdi_allocate_dma_memory(inst->coreIdx, &vbBuffer) < 0) + { + VLOG(ERR,"in there %d\n",__LINE__); + return -1; + } + pDecInfo->vbMV[k] = vbBuffer; } } } @@ -935,21 +920,19 @@ RetCode Wave5VpuDecRegisterFramebuffer(CodecInst* inst, FrameBuffer* fbArr, Tile fbcYTblSize = VPU_ALIGN16(fbcYTblSize); } - vbBuffer.u.device.device_addr = 0; + vbBuffer.phys_addr = 0; vbBuffer.size = ((fbcYTblSize+4095)&~4095)+4096; APIDPRINT("ALLOC MEM - FBC Y TBL\n"); - for (k=0 ; kvbDevFbcYTbl[k+svcBLbaseIdx].size == 0) { - if(bmvpu_malloc_device_byte_heap(bm_handle, &vbBuffer, vbBuffer.size,HEAP_MASK,1) !=BM_SUCCESS) + if(pDecInfo->framebuf_from_user == 0) + { + for (k=0 ; kvbDevFbcYTbl[k+svcBLbaseIdx].size == 0) { + if (vdi_allocate_dma_memory(inst->coreIdx, &vbBuffer) < 0) { VLOG(ERR,"in there %d\n",__LINE__); return -1; } - else - { - pDecInfo->vbDevFbcYTbl[k+svcBLbaseIdx] = vbBuffer; - - bm_vdi_mmap(bm_handle,&pDecInfo->vbDevFbcYTbl[k+svcBLbaseIdx],&pDecInfo->vbFbcYTblVaddr[k+svcBLbaseIdx]); + pDecInfo->vbFbcYTbl[k+svcBLbaseIdx] = vbBuffer; } } } @@ -982,20 +965,19 @@ RetCode Wave5VpuDecRegisterFramebuffer(CodecInst* inst, FrameBuffer* fbArr, Tile fbcCTblSize = VPU_ALIGN16(fbcCTblSize); } - vbBuffer.u.device.device_addr = 0; + vbBuffer.phys_addr = 0; vbBuffer.size = ((fbcCTblSize+4095)&~4095)+4096; APIDPRINT("ALLOC MEM - FBC C TBL\n"); - for (k=0 ; kvbDevFbcCTbl[k+svcBLbaseIdx].size == 0) { - if(bmvpu_malloc_device_byte_heap(bm_handle, &pDecInfo->vbDevFbcCTbl[k+svcBLbaseIdx], vbBuffer.size,HEAP_MASK,1) !=BM_SUCCESS) + if(pDecInfo->framebuf_from_user == 0) + { + for (k=0 ; kvbDevFbcCTbl[k+svcBLbaseIdx].size == 0) { + if (vdi_allocate_dma_memory(inst->coreIdx, &vbBuffer) < 0) { VLOG(ERR,"in there %d\n",__LINE__); return -1; } - else - { - - bm_vdi_mmap(bm_handle,&pDecInfo->vbDevFbcCTbl[k+svcBLbaseIdx],&pDecInfo->vbFbcCTblVaddr[k+svcBLbaseIdx]); + pDecInfo->vbFbcCTbl[k+svcBLbaseIdx] = vbBuffer; } } } @@ -1120,13 +1102,13 @@ RetCode Wave5VpuDecRegisterFramebuffer(CodecInst* inst, FrameBuffer* fbArr, Tile VpuWriteReg(coreIdx, W5_ADDR_CB_BASE0 + (i<<4), addrCb); APIDPRINT("REGISTER FB[%02d] Y(0x%08x), Cb(0x%08x) ", i, addrY, addrCb); if (mapType >= COMPRESSED_FRAME_MAP) { - VpuWriteReg(coreIdx, W5_ADDR_FBC_Y_OFFSET0 + (i<<4), pDecInfo->vbDevFbcYTbl[idx+svcBLbaseIdx].u.device.device_addr); /* Luma FBC offset table */ - VpuWriteReg(coreIdx, W5_ADDR_FBC_C_OFFSET0 + (i<<4), pDecInfo->vbDevFbcCTbl[idx+svcBLbaseIdx].u.device.device_addr); /* Chroma FBC offset table */ - VpuWriteReg(coreIdx, W5_ADDR_MV_COL0 + (i<<2), pDecInfo->vbDevMV[idx+svcBLbaseIdx].u.device.device_addr); + VpuWriteReg(coreIdx, W5_ADDR_FBC_Y_OFFSET0 + (i<<4), pDecInfo->vbFbcYTbl[idx+svcBLbaseIdx].phys_addr); /* Luma FBC offset table */ + VpuWriteReg(coreIdx, W5_ADDR_FBC_C_OFFSET0 + (i<<4), pDecInfo->vbFbcCTbl[idx+svcBLbaseIdx].phys_addr); /* Chroma FBC offset table */ + VpuWriteReg(coreIdx, W5_ADDR_MV_COL0 + (i<<2), pDecInfo->vbMV[idx+svcBLbaseIdx].phys_addr); APIDPRINT("Yo(0x%08x) Co(0x%08x), Mv(0x%08x)\n", - pDecInfo->vbDevFbcYTbl[idx].u.device.device_addr, - pDecInfo->vbDevFbcCTbl[idx].u.device.device_addr, - pDecInfo->vbDevMV[idx].u.device.device_addr); + pDecInfo->vbFbcYTbl[idx].phys_addr, + pDecInfo->vbFbcCTbl[idx].phys_addr, + pDecInfo->vbMV[idx].phys_addr); } else { if(addrCr == 0xFFFFFFFF) //Maybe this is a bug of firmware. In some stream, In the invaild Cr address, VPU will make the bus hang. So Now I change it to a reserved address. When fixed the firmware issue, We need remove the code. @@ -1169,13 +1151,12 @@ RetCode Wave5VpuDecUpdateFramebuffer(CodecInst* inst, FrameBuffer* fbcFb, FrameB Uint32 coreIdx, regVal; Uint32 mvColSize, fbcYTblSize, fbcCTblSize; Uint32 linearStride, fbcStride; - bm_device_mem_t* pvbMv = NULL; - bm_device_mem_t* pvbFbcYOffset = NULL; - bm_device_mem_t* pvbFbcCOffset = NULL; + vpu_buffer_t* pvbMv = NULL; + vpu_buffer_t* pvbFbcYOffset = NULL; + vpu_buffer_t* pvbFbcCOffset = NULL; CodStd codec; u64 fbcYoffsetAddr = 0; u64 fbcCoffsetAddr = 0; - bm_handle_t bm_handle; coreIdx = inst->coreIdx; linearIndex = (linearFb == NULL) ? -1 : linearFb->myIndex - pDecInfo->numFbsForDecoding; fbcIndex = (fbcFb == NULL) ? -1 : fbcFb->myIndex; @@ -1184,7 +1165,6 @@ RetCode Wave5VpuDecUpdateFramebuffer(CodecInst* inst, FrameBuffer* fbcFb, FrameB if (codec != STD_VP9) { return RETCODE_NOT_SUPPORTED_FEATURE; } - bm_handle=bmvpu_dec_get_bmlib_handle(coreIdx); mvColSize = WAVE5_DEC_VP9_MVCOL_BUF_SIZE(picWidth, picHeight); if ((fbcFb != NULL) && (fbcIndex >= 0)) { @@ -1195,13 +1175,12 @@ RetCode Wave5VpuDecUpdateFramebuffer(CodecInst* inst, FrameBuffer* fbcFb, FrameB } if (mvIndex >= 0) { - pvbMv = &pDecInfo->vbDevMV[mvIndex]; - bm_free_mem(bm_handle,pDecInfo->vbDevMV[mvIndex],pDecInfo->vbMVVaddr[mvIndex]); + pvbMv = &pDecInfo->vbMV[mvIndex]; + vdi_free_dma_memory(inst->coreIdx, pvbMv); pvbMv->size = ((mvColSize+4095)&~4095) + 4096; - if(bmvpu_malloc_device_byte_heap(bm_handle,pvbMv,pvbMv->size,HEAP_MASK,1)!=BM_SUCCESS) + if (vdi_allocate_dma_memory(inst->coreIdx, pvbMv) < 0) { return RETCODE_INSUFFICIENT_RESOURCE; - - bm_vdi_mmap(bm_handle,pvbMv,&pDecInfo->vbMVVaddr[mvIndex]); + } } /* Reallocate FBC offset tables */ @@ -1224,15 +1203,15 @@ RetCode Wave5VpuDecUpdateFramebuffer(CodecInst* inst, FrameBuffer* fbcFb, FrameB } if (fbcIndex >= 0) { - pvbFbcYOffset = &pDecInfo->vbDevFbcYTbl[fbcIndex]; - bm_free_mem(bm_handle,*pvbFbcYOffset,pDecInfo->vbFbcYTblVaddr[fbcIndex]); - pvbFbcYOffset->u.device.device_addr = 0; + pvbFbcYOffset = &pDecInfo->vbFbcYTbl[fbcIndex]; + vdi_free_dma_memory(inst->coreIdx, pvbFbcYOffset); + pvbFbcYOffset->phys_addr = 0; pvbFbcYOffset->size = ((fbcYTblSize+4095)&~4095)+4096; - if(bmvpu_malloc_device_byte_heap(bm_handle,pvbFbcYOffset,pvbFbcYOffset->size,HEAP_MASK,1)!=BM_SUCCESS) + if (vdi_allocate_dma_memory(inst->coreIdx, pvbFbcYOffset) < 0) { return RETCODE_INSUFFICIENT_RESOURCE; + } - bm_vdi_mmap(bm_handle,pvbFbcYOffset,&pDecInfo->vbFbcYTblVaddr[fbcIndex]); - fbcYoffsetAddr = pvbFbcYOffset->u.device.device_addr; + fbcYoffsetAddr = pvbFbcYOffset->phys_addr; } if (codec == STD_HEVC) { @@ -1253,14 +1232,14 @@ RetCode Wave5VpuDecUpdateFramebuffer(CodecInst* inst, FrameBuffer* fbcFb, FrameB } if (fbcIndex >= 0) { - pvbFbcCOffset = &pDecInfo->vbDevFbcCTbl[fbcIndex]; - bm_free_mem(bm_handle,*pvbFbcCOffset,pDecInfo->vbFbcCTblVaddr[fbcIndex]); - pvbFbcCOffset->u.device.device_addr = 0; + pvbFbcCOffset = &pDecInfo->vbFbcCTbl[fbcIndex]; + vdi_free_dma_memory(inst->coreIdx, pvbFbcCOffset); + pvbFbcCOffset->phys_addr = 0; pvbFbcCOffset->size = ((fbcCTblSize+4095)&~4095)+4096; - if(bmvpu_malloc_device_byte_heap(bm_handle,pvbFbcCOffset,pvbFbcCOffset->size,HEAP_MASK,1)!=BM_SUCCESS) + if (vdi_allocate_dma_memory(inst->coreIdx, pvbFbcCOffset) < 0) { return RETCODE_INSUFFICIENT_RESOURCE; - bm_vdi_mmap(bm_handle,pvbFbcCOffset,&pDecInfo->vbFbcCTblVaddr[fbcIndex]); - fbcCoffsetAddr = pvbFbcCOffset->u.device.device_addr; + } + fbcCoffsetAddr = pvbFbcCOffset->phys_addr; } linearStride = linearFb == NULL ? 0 : linearFb->stride; @@ -1281,7 +1260,7 @@ RetCode Wave5VpuDecUpdateFramebuffer(CodecInst* inst, FrameBuffer* fbcFb, FrameB VpuWriteReg(coreIdx, W5_ADDR_LUMA_BASE, linearFb == NULL ? 0 : linearFb->bufY); VpuWriteReg(coreIdx, W5_ADDR_CB_BASE, linearFb == NULL ? 0 : linearFb->bufCb); VpuWriteReg(coreIdx, W5_ADDR_CR_BASE, linearFb == NULL ? 0 : linearFb->bufCr); - VpuWriteReg(coreIdx, W5_ADDR_MV_COL, pvbMv == NULL ? 0 : pvbMv->u.device.device_addr); + VpuWriteReg(coreIdx, W5_ADDR_MV_COL, pvbMv == NULL ? 0 : pvbMv->phys_addr); VpuWriteReg(coreIdx, W5_ADDR_FBC_Y_BASE, fbcFb == NULL ? 0 : fbcFb->bufY); VpuWriteReg(coreIdx, W5_ADDR_FBC_C_BASE, fbcFb == NULL ? 0 : fbcFb->bufCb); VpuWriteReg(coreIdx, W5_ADDR_FBC_Y_OFFSET, fbcYoffsetAddr); @@ -1397,7 +1376,7 @@ RetCode Wave5VpuDecode(CodecInst* instance, DecParam* option) VpuWriteReg(instance->coreIdx, W5_CMD_DEC_TEMPORAL_ID_PLUS1, pDecInfo->targetSubLayerId+1); VpuWriteReg(instance->coreIdx, W5_CMD_SEQ_CHANGE_ENABLE_FLAG, pDecInfo->seqChangeMask); -#ifdef __LINUX__ +#ifdef __linux__ if(getenv("NO_FRAMEBUFFER")!=NULL && strcmp(getenv("NO_FRAMEBUFFER"),"1")==0) { forceLatency = 0; diff --git a/bmvid/video/provider/cnm/encoder/inc/bmvpu.h b/bmvid/video/provider/cnm/encoder/inc/bmvpu.h index f8b0f09..b021220 100644 --- a/bmvid/video/provider/cnm/encoder/inc/bmvpu.h +++ b/bmvid/video/provider/cnm/encoder/inc/bmvpu.h @@ -124,6 +124,16 @@ enum { VPU_GOP_PRESET_IDX_RA_IB = 8, /* Random Access, cyclic gopsize = 8 */ }; +/* VpuMappingFlags: flags for the vpu_EncMmap() function + * These flags can be bitwise-OR combined */ +typedef enum +{ + /* Map memory for CPU write access */ + BM_VPU_MAPPING_FLAG_WRITE = (1UL << 0), + /* Map memory for CPU read access */ + BM_VPU_MAPPING_FLAG_READ = (1UL << 1) +} BmVpuMappingFlags; + /** * Adding a header syntax layer into the encoded bitstream. * The headerType, buf are input parameters to VPU. @@ -980,6 +990,7 @@ typedef struct { uint32_t picDistortionHigh; int result; /* VPU_RET_xxx */ + bm_pa_t addrCustomMap; } VpuEncOutputInfo; typedef struct { @@ -1020,6 +1031,7 @@ typedef struct { uint64_t pts; uint64_t dts; int idx; + bm_pa_t addrCustomMap; } inputMap[32]; void* priv; @@ -1028,6 +1040,12 @@ typedef struct { int bframe_delay; } VpuEncoder; +typedef struct { + unsigned int size; + uint64_t phys_addr; + uint64_t virt_addr; + BOOL enable_cache; +} BmVpuDMABuffer; DECL_EXPORT int vpu_EncGetUniCoreIdx(int soc_idx); @@ -1065,6 +1083,17 @@ DECL_EXPORT int vpu_CalcChromaSize(int mapType, uint32_t stride, uint32_t height DECL_EXPORT int vpu_GetFrameBufSize(int mapType, int stride, int height, int yuv_format, int interleave); +DECL_EXPORT int vpu_write_memory(const uint8_t *host_va, int size, int vpu_core_idx, uint64_t vpu_pa); +DECL_EXPORT int vpu_read_memory(uint8_t *host_va, int size, int vpu_core_idx, uint64_t vpu_pa); + +DECL_EXPORT int vpu_EncAllocateDMABuffer(int coreIdx, BmVpuDMABuffer *buf, unsigned int size); +DECL_EXPORT int vpu_EncDeAllocateDMABuffer(int coreIdx, BmVpuDMABuffer *buf); +DECL_EXPORT int vpu_EncAttachDMABuffer(int coreIdx, BmVpuDMABuffer *buf); +DECL_EXPORT int vpu_EncDeattachDMABuffer(int coreIdx, BmVpuDMABuffer *buf); +DECL_EXPORT int vpu_EncMmap(int coreIdx, BmVpuDMABuffer* buf, int port_flag); +DECL_EXPORT int vpu_EncMunmap(int coreIdx, BmVpuDMABuffer* buf); +DECL_EXPORT int vpu_EncFlushDecache(int coreIdx, BmVpuDMABuffer* buf); +DECL_EXPORT int vpu_EncInvalidateDecache(int coreIdx, BmVpuDMABuffer* buf); #endif /* __BM_VPU_LIB_H__ */ diff --git a/bmvid/video/provider/cnm/encoder/inc/bmvpu_types.h b/bmvid/video/provider/cnm/encoder/inc/bmvpu_types.h index da7ff0e..5f88201 100644 --- a/bmvid/video/provider/cnm/encoder/inc/bmvpu_types.h +++ b/bmvid/video/provider/cnm/encoder/inc/bmvpu_types.h @@ -48,12 +48,19 @@ typedef int BOOL; # define NULL 0 #endif +#ifndef u64 #ifdef _WIN32 typedef unsigned long long u64; -typedef unsigned int u32; #elif __linux__ typedef unsigned long u64; #endif +#endif + +#ifndef u32 +#ifdef _WIN32 +typedef unsigned int u32; +#endif +#endif #ifndef UNREFERENCED_PARAMETER # define UNREFERENCED_PARAMETER(P) \ diff --git a/bmvid/video/provider/cnm/encoder/inc/vdi.h b/bmvid/video/provider/cnm/encoder/inc/vdi.h index 7f3bbc0..973e124 100644 --- a/bmvid/video/provider/cnm/encoder/inc/vdi.h +++ b/bmvid/video/provider/cnm/encoder/inc/vdi.h @@ -98,7 +98,8 @@ typedef struct vpu_buffer_t { u64 base; u64 virt_addr; - unsigned int reserve[2]; + unsigned int core_idx; + int enable_cache; } vpu_buffer_t; #ifndef ENDIANMODE @@ -226,5 +227,18 @@ int bm_vdi_get_firmware_status(uint32_t core_idx); int bm_vdi_resume_kernel_reset(uint32_t core_idx); int bm_vdi_disable_kernel_reset(uint32_t core_idx); int bm_vdi_get_kernel_reset(uint32_t core_idx); + +int bm_vdi_write_memory(uint32_t core_idx, uint64_t dst_addr, uint8_t *src_data, int len); +int bm_vdi_read_memory(uint32_t core_idx, uint64_t src_addr, uint8_t *dst_data, int len); + +int bm_vdi_allocate_dma_memory(uint32_t core_idx, vpu_buffer_t *vb); +int bm_vdi_free_dma_memory(uint32_t core_idx, vpu_buffer_t *vb); +int bm_vdi_mmap_dma_memory(uint32_t core_idx, vpu_buffer_t *vb, int enable_read, int enable_write); +int bm_vdi_unmap_dma_memory(uint32_t core_idx, vpu_buffer_t *vb); +int bm_vdi_attach_dma_memory(uint32_t core_idx, vpu_buffer_t *vb); +int bm_vdi_deattach_dma_memory(uint32_t core_idx, vpu_buffer_t *vb); +int bm_vdi_flush_dma_memory(uint32_t core_idx, vpu_buffer_t *vb); +int bm_vdi_invalidate_dma_memory(uint32_t core_idx, vpu_buffer_t *vb); + #endif //#ifndef _VDI_H_ diff --git a/bmvid/video/provider/cnm/encoder/inc/vpuopt.h b/bmvid/video/provider/cnm/encoder/inc/vpuopt.h index 481a660..51801aa 100644 --- a/bmvid/video/provider/cnm/encoder/inc/vpuopt.h +++ b/bmvid/video/provider/cnm/encoder/inc/vpuopt.h @@ -74,7 +74,7 @@ #define DEFAULT_SRC_AXI USE_SRC_PRP_AXI /* vpu common memory */ -#define COMMAND_QUEUE_DEPTH 4 +#define COMMAND_QUEUE_DEPTH 1 #define ONE_TASKBUF_SIZE_FOR_CQ (8*1024*1024) /* upto 8Kx8K, need 8Mbyte per task. TODO */ #define SIZE_COMMON ((2*1024*1024)+(COMMAND_QUEUE_DEPTH*ONE_TASKBUF_SIZE_FOR_CQ)) diff --git a/bmvid/video/provider/cnm/encoder/src/common.c b/bmvid/video/provider/cnm/encoder/src/common.c index 810de5e..58c78c7 100644 --- a/bmvid/video/provider/cnm/encoder/src/common.c +++ b/bmvid/video/provider/cnm/encoder/src/common.c @@ -325,11 +325,19 @@ int vpu_ShowProductInfo(uint32_t coreIdx, ProductInfo *productInfo) uint64_t vpu_gettime(void) { #ifdef __linux__ + /* struct timeval tv; tv.tv_sec = 0; tv.tv_usec = 0; gettimeofday(&tv, NULL); return tv.tv_sec*1000 + tv.tv_usec/1000; +*/ + + struct timespec tp; + + clock_gettime(CLOCK_MONOTONIC, &tp); + + return (tp.tv_sec*1000 + tp.tv_nsec/1000000); #elif _WIN32 SYSTEMTIME wtm; time_t clock; diff --git a/bmvid/video/provider/cnm/encoder/src/enc.c b/bmvid/video/provider/cnm/encoder/src/enc.c index 3285ca9..20492b9 100644 --- a/bmvid/video/provider/cnm/encoder/src/enc.c +++ b/bmvid/video/provider/cnm/encoder/src/enc.c @@ -20,6 +20,7 @@ #include "vpuapi.h" #ifdef __linux__ +#include #ifdef BM_PCIE_MODE #include #endif @@ -563,8 +564,12 @@ void vpu_SetEncOpenParam(VpuEncOpenParam* pEncOP, int width, int height, /* for VUI / time information. */ wave_par->numTicksPocDiffOne = 0; - wave_par->timeScale = fps_n ; //pEncOP->frameRateInfo * 1000; // TODO - wave_par->numUnitsInTick = fps_d; + if (pEncOP->bitstreamFormat == VPU_CODEC_AVC) + wave_par->timeScale = pEncOP->frameRateInfo * 1000 * 2; + else + wave_par->timeScale = pEncOP->frameRateInfo * 1000; + + wave_par->numUnitsInTick = 1000; wave_par->chromaCbQpOffset = 0; wave_par->chromaCrQpOffset = 0; @@ -917,6 +922,7 @@ int vpu_EncStartOneFrame(VpuEncoder* h, VpuEncParam* param) h->inputMap[i].context = param->context; h->inputMap[i].pts = param->pts; h->inputMap[i].dts = param->dts; + h->inputMap[i].addrCustomMap = param->customMapOpt.addrCustomMap; break; } } @@ -966,6 +972,7 @@ int vpu_EncGetOutputInfo(VpuEncoder* h, VpuEncOutputInfo* info) } // TODO info->dts = info->encPicCnt-1-h->bframe_delay+h->fistIFramePTS; + info->addrCustomMap = h->inputMap[i].addrCustomMap; /* release */ h->inputMap[i].idx = -2; @@ -1269,6 +1276,15 @@ int vpu_GetFrameBufSize(int mapType, int stride, int height, return size_dpb_all; } +int vpu_write_memory(const uint8_t* host_va, int size, int vpu_core_idx, uint64_t vpu_pa) +{ + return bm_vdi_write_memory(vpu_core_idx, vpu_pa, (uint8_t*)host_va, size); +} + +int vpu_read_memory(uint8_t* host_va, int size, int vpu_core_idx, uint64_t vpu_pa) +{ + return bm_vdi_read_memory(vpu_core_idx, vpu_pa, host_va, size); +} int vpu_GetProductId(int coreIdx) { @@ -1302,6 +1318,7 @@ int vpu_GetProductId(int coreIdx) int vpu_InitWithBitcode(uint32_t coreIdx, const uint16_t* code, uint32_t size) { int ret; + int init_status; if (coreIdx >= MAX_NUM_VPU_CORE) return VPU_RET_INVALID_PARAM; @@ -1325,7 +1342,8 @@ int vpu_InitWithBitcode(uint32_t coreIdx, const uint16_t* code, uint32_t size) } } - if (VpuIsInit(coreIdx) != 0) { + init_status = bm_vdi_get_firmware_status(coreIdx); + if (VpuIsInit(coreIdx) != 0 && init_status == 1) { Wave5VpuReInit(coreIdx, (void*)code, size); LeaveLock(coreIdx); return VPU_RET_CALLED_BEFORE; @@ -1373,6 +1391,188 @@ int vpu_GetProductInfo(uint32_t coreIdx, ProductInfo* productInfo) return ret; } +/*--------------------------------------- + device memory management + ----------------------------------------*/ +int vpu_EncAllocateDMABuffer(int coreIdx, BmVpuDMABuffer *buf, unsigned int size) +{ + int ret; + vpu_buffer_t vb; + + if (coreIdx >= MAX_NUM_VPU_CORE) + return VPU_RET_INVALID_PARAM; + + vb.size = size; + vb.enable_cache = TRUE; + + ret = bm_vdi_allocate_dma_memory(coreIdx, &vb); + if (ret < 0) { + VLOG(INFO, "allocate device memory failed, size=%d byte\n", vb.size); + return VPU_RET_FAILURE; + } + + buf->phys_addr = vb.phys_addr; + buf->size = size; + buf->enable_cache = TRUE; + if (vb.virt_addr) + buf->virt_addr = vb.virt_addr; + else + buf->virt_addr = 0; + + return VPU_RET_SUCCESS; +} + +int vpu_EncDeAllocateDMABuffer(int coreIdx, BmVpuDMABuffer *buf) +{ + int ret; + vpu_buffer_t vb; + + if (coreIdx >= MAX_NUM_VPU_CORE) + return VPU_RET_INVALID_PARAM; + + vb.phys_addr = buf->phys_addr; + vb.size = buf->size; + vb.virt_addr = buf->virt_addr; + vb.enable_cache = buf->enable_cache; + + ret = bm_vdi_free_dma_memory(coreIdx, &vb); + if (ret < 0) { + VLOG(INFO, "allocate device memory failed, size=%d byte\n", vb.size); + return VPU_RET_FAILURE; + } + + return VPU_RET_SUCCESS; +} + + +int vpu_EncAttachDMABuffer(int coreIdx, BmVpuDMABuffer *buf) +{ + int ret; + vpu_buffer_t vb; + + if (coreIdx >= MAX_NUM_VPU_CORE) + return VPU_RET_INVALID_PARAM; + + vb.phys_addr = buf->phys_addr; + vb.size = buf->size; + ret = bm_vdi_attach_dma_memory(coreIdx, &vb); + if (ret < 0) { + VLOG(INFO, "attach device memory failed, size=%d byte\n", vb.size); + return VPU_RET_FAILURE; + } + return VPU_RET_SUCCESS; +} + +int vpu_EncDeattachDMABuffer(int coreIdx, BmVpuDMABuffer *buf) +{ + int ret; + vpu_buffer_t vb; + + if (coreIdx >= MAX_NUM_VPU_CORE) + return VPU_RET_INVALID_PARAM; + + vb.phys_addr = buf->phys_addr; + vb.size = buf->size; + ret = bm_vdi_deattach_dma_memory(coreIdx, &vb); + if (ret < 0) { + VLOG(INFO, "deattach device memory failed, size=%d byte\n", vb.size); + return VPU_RET_FAILURE; + } + return VPU_RET_SUCCESS; +} + + + +int vpu_EncMmap(int coreIdx, BmVpuDMABuffer* buf, int port_flag) +{ + int ret; + int enable_read = port_flag & BM_VPU_MAPPING_FLAG_READ; + int enable_write = port_flag & BM_VPU_MAPPING_FLAG_WRITE; + vpu_buffer_t tmp_buf; + + if (coreIdx >= MAX_NUM_VPU_CORE) + return VPU_RET_INVALID_PARAM; + + if (buf->virt_addr) { + VLOG(WARN, "%s:%d dma_buffer already have vaddr 0x%lx\n", __func__, __LINE__, buf->virt_addr); + } + + tmp_buf.phys_addr = buf->phys_addr; + tmp_buf.size = buf->size; + ret = bm_vdi_mmap_dma_memory(coreIdx, &tmp_buf, enable_read, enable_write); + if (ret < 0) + return VPU_RET_FAILURE; + + buf->virt_addr = tmp_buf.virt_addr; + return VPU_RET_SUCCESS; +}; + +int vpu_EncMunmap(int coreIdx, BmVpuDMABuffer* buf) +{ +#ifdef BM_PCIE_MODE + return 0; +#else + int ret; + vpu_buffer_t tmp_buf; + + if (buf->virt_addr == 0 || buf->size == 0) { + return VPU_RET_SUCCESS; + } + + tmp_buf.phys_addr = buf->phys_addr; + tmp_buf.virt_addr = buf->virt_addr; + tmp_buf.size = buf->size; + ret = bm_vdi_unmap_dma_memory(coreIdx, &tmp_buf); + if (ret < 0) + return VPU_RET_FAILURE; + + buf->virt_addr = 0; + return ret; +#endif +} + +int vpu_EncFlushDecache(int coreIdx, BmVpuDMABuffer* buf) +{ +#ifdef BM_PCIE_MODE +#else + int ret; + vpu_buffer_t tmp_buf; + + if (buf->size == 0) { + return VPU_RET_SUCCESS; + } + + tmp_buf.size = buf->size; + tmp_buf.phys_addr = buf->phys_addr; + tmp_buf.virt_addr = buf->virt_addr; + tmp_buf.enable_cache = buf->enable_cache; + ret = bm_vdi_flush_dma_memory(coreIdx, &tmp_buf); + if (ret < 0) + return VPU_RET_FAILURE; +#endif + return VPU_RET_SUCCESS; +} + +int vpu_EncInvalidateDecache(int coreIdx, BmVpuDMABuffer* buf) +{ + int ret; + vpu_buffer_t tmp_buf; + + if (buf->size == 0) { + return VPU_RET_SUCCESS; + } + + tmp_buf.size = buf->size; + tmp_buf.phys_addr = buf->phys_addr; + tmp_buf.virt_addr = buf->virt_addr; + tmp_buf.enable_cache = buf->enable_cache; + ret = bm_vdi_invalidate_dma_memory(coreIdx, &tmp_buf); + if (ret < 0) + return VPU_RET_FAILURE; + return VPU_RET_SUCCESS; +} + + /** * vpu_sw_reset * IN @@ -1419,6 +1619,31 @@ int vpu_unlock(int soc_idx) return VPU_RET_SUCCESS; } +static int SendQuery(EncHandle handle, QUERY_OPT queryOpt) +{ + uint32_t regVal; + int ret; + + /* Send QUERY cmd */ + VpuWriteReg(handle->coreIdx, W5_QUERY_OPTION, queryOpt); + VpuWriteReg(handle->coreIdx, W5_VPU_BUSY_STATUS, 1); + Wave5BitIssueCommand(handle, W5_QUERY); + + ret = enc_wait_vpu_busy(handle->coreIdx, __VPU_BUSY_TIMEOUT, W5_VPU_BUSY_STATUS); + if (ret == -1) { + VLOG(ERR, "timeout\n"); + if (handle->loggingEnable) + bm_vdi_log(handle->coreIdx, W5_QUERY, 2); + return VPU_RET_VPU_RESPONSE_TIMEOUT; + } + + regVal = VpuReadReg(handle->coreIdx, W5_RET_SUCCESS); + if (regVal == FALSE) + return VPU_RET_FAILURE; + + return VPU_RET_SUCCESS; +} + static int enc_open(EncHandle* pHandle, VpuEncOpenParam* pop) { @@ -1458,7 +1683,6 @@ static int enc_open(EncHandle* pHandle, VpuEncOpenParam* pop) VLOG(ERR, "Wave5VpuBuildUpEncParam failed\n"); } - bm_vdi_resume_kernel_reset(pop->coreIdx); LeaveLock(pop->coreIdx); return ret; @@ -1481,16 +1705,26 @@ static int enc_close(EncHandle handle) if (pEncInfo->initialInfoObtained) { VpuWriteReg(handle->coreIdx, pEncInfo->streamWrPtrRegAddr, pEncInfo->streamWrPtr); VpuWriteReg(handle->coreIdx, pEncInfo->streamRdPtrRegAddr, pEncInfo->streamRdPtr); - ret = Wave5VpuEncFiniSeq(handle); - if (ret != VPU_RET_SUCCESS) { + while(Wave5VpuEncFiniSeq(handle) != VPU_RET_SUCCESS) + { if (handle->loggingEnable) bm_vdi_log(handle->coreIdx, ENC_SEQ_END, 2); if (ret == VPU_RET_VPU_STILL_RUNNING) { + SendQuery(handle, GET_RESULT); + if (ret != VPU_RET_SUCCESS) { + if (VpuReadReg(handle->coreIdx, W5_RET_FAIL_REASON) == WAVE5_RESULT_NOT_READY) + return VPU_RET_REPORT_NOT_READY; + else + return VPU_RET_QUERY_FAILURE; + } + } + else{ LeaveLock(handle->coreIdx); return ret; } } + if (handle->loggingEnable) bm_vdi_log(handle->coreIdx, ENC_SEQ_END, 0); pEncInfo->streamWrPtr = VpuReadReg(handle->coreIdx, pEncInfo->streamWrPtrRegAddr); @@ -2278,31 +2512,6 @@ static void Wave5BitIssueCommand(EncHandle handle, uint32_t cmd) return; } -static int SendQuery(EncHandle handle, QUERY_OPT queryOpt) -{ - uint32_t regVal; - int ret; - - /* Send QUERY cmd */ - VpuWriteReg(handle->coreIdx, W5_QUERY_OPTION, queryOpt); - VpuWriteReg(handle->coreIdx, W5_VPU_BUSY_STATUS, 1); - Wave5BitIssueCommand(handle, W5_QUERY); - - ret = enc_wait_vpu_busy(handle->coreIdx, __VPU_BUSY_TIMEOUT, W5_VPU_BUSY_STATUS); - if (ret == -1) { - VLOG(ERR, "timeout\n"); - if (handle->loggingEnable) - bm_vdi_log(handle->coreIdx, W5_QUERY, 2); - return VPU_RET_VPU_RESPONSE_TIMEOUT; - } - - regVal = VpuReadReg(handle->coreIdx, W5_RET_SUCCESS); - if (regVal == FALSE) - return VPU_RET_FAILURE; - - return VPU_RET_SUCCESS; -} - static int SetupWave5Properties(uint32_t coreIdx) { VpuAttr* pAttr = &s_VpuCoreAttributes[coreIdx]; @@ -3395,6 +3604,9 @@ static int Wave5VpuEncInitSeq(EncHandle handle) } VpuWriteReg(coreIdx, W5_CMD_ENC_SEQ_USER_SCALING_LIST_ADDR, pParam->userScalingListAddr); + VpuWriteReg(coreIdx, W5_CMD_ENC_SEQ_NUM_UNITS_IN_TICK, pParam->numUnitsInTick); + VpuWriteReg(coreIdx, W5_CMD_ENC_SEQ_TIME_SCALE, pParam->timeScale); + VpuWriteReg(coreIdx, W5_CMD_ENC_SEQ_NUM_TICKS_POC_DIFF_ONE, pParam->numTicksPocDiffOne); } if (handle->codecMode == W_HEVC_ENC) { @@ -3441,10 +3653,6 @@ static int Wave5VpuEncInitSeq(EncHandle handle) regVal = (pParam->dependSliceModeArg<<16 | pParam->dependSliceMode); VpuWriteReg(coreIdx, W5_CMD_ENC_SEQ_DEPENDENT_SLICE, regVal); - VpuWriteReg(coreIdx, W5_CMD_ENC_SEQ_NUM_UNITS_IN_TICK, pParam->numUnitsInTick); - VpuWriteReg(coreIdx, W5_CMD_ENC_SEQ_TIME_SCALE, pParam->timeScale); - VpuWriteReg(coreIdx, W5_CMD_ENC_SEQ_NUM_TICKS_POC_DIFF_ONE, pParam->numTicksPocDiffOne); - regVal = ((pParam->nrYEnable<<0) | (pParam->nrCbEnable<<1) | (pParam->nrCrEnable<<2) | diff --git a/bmvid/video/provider/cnm/encoder/src/vdi.c b/bmvid/video/provider/cnm/encoder/src/vdi.c index e4cccda..a2cb935 100644 --- a/bmvid/video/provider/cnm/encoder/src/vdi.c +++ b/bmvid/video/provider/cnm/encoder/src/vdi.c @@ -149,9 +149,9 @@ int bm_vdi_init(uint32_t core_idx) if (vdi->vpu_fd < 0) { #ifndef BM_PCIE_MODE - VLOG(ERR, "[VDI] Can't open vpu driver. [error=%s]. try to load vpu.ko again\n", strerror(errno)); + VLOG(ERR, "[VDI] Can't open vpu driver. [error=%s]. try to load ko again\n", strerror(errno)); #else - VLOG(ERR, "[VDI] Can't open Sophon device driver. [error=%s]. try to load bmsophon.ko again\n", strerror(errno)); + VLOG(ERR, "[VDI] Can't open device driver. [error=%s]. try to load ko again\n", strerror(errno)); #endif goto ERR_VDI_INIT0; } @@ -331,6 +331,7 @@ int bm_vdi_release(uint32_t core_idx) /* get common memory information to free virtual address */ /* TODO */ + pthread_mutex_lock((pthread_mutex_t *)vdi->buffer_pool_lock); for (i=0; icommon_memory.phys_addr >= vdi->buffer_pool[i].vdb.phys_addr && vdi->common_memory.phys_addr < (vdi->buffer_pool[i].vdb.phys_addr + @@ -341,10 +342,11 @@ int bm_vdi_release(uint32_t core_idx) break; } } + pthread_mutex_unlock((pthread_mutex_t *)vdi->buffer_pool_lock); vdi->task_num--; bm_vdi_get_kernel_reset(core_idx); - if(vdi->reset_core_flag.reset_core_disable!=0) + if(vdi->reset_core_flag.reset == 1) bm_vdi_resume_kernel_reset(core_idx); bm_vdi_unlock(core_idx); @@ -452,6 +454,383 @@ static int bm_vdi_allocate_common_memory(uint32_t core_idx) return 0; } +int bm_vdi_allocate_dma_memory(uint32_t core_idx, vpu_buffer_t *vb) +{ + bm_vdi_info_t *vdi; + vpudrv_buffer_t vdb; +#if defined(BM_PCIE_MODE) + int chip_core_idx = core_idx%MAX_NUM_VPU_CORE_CHIP; +#endif + int i; + + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + + vdi = bm_vdi_check_info_ptr(core_idx); + + if(vdi == NULL || vdi->vpu_fd < 0) + return -1; + + memset(&vdb, 0x00, sizeof(vpudrv_buffer_t)); + +#if defined(BM_PCIE_MODE) + vdb.core_idx = chip_core_idx; +#endif + + vdb.size = vb->size; +#if !defined(BM_PCIE_MODE) + vdb.enable_cache = vb->enable_cache; +#endif + if (ioctl(vdi->vpu_fd, VDI_IOCTL_ALLOCATE_PHYSICAL_MEMORY, &vdb) < 0) { + VLOG(ERR, "[VDI] fail to vdi_allocate_dma_memory size=%d\n", vdb.size); + return -1; + } + +#ifndef BM_PCIE_MODE + /* map to virtual address */ + // vdb.virt_addr = (unsigned long)mmap(NULL, vdb.size, PROT_READ | PROT_WRITE, + // MAP_SHARED, vdi->vpu_fd, vdb.phys_addr); + // if ((void *)vdb.virt_addr == MAP_FAILED) { + // memset(vb, 0x00, sizeof(vpu_buffer_t)); + // return -1; + // } + + // memset(vb->virt_addr, 0x00, sizeof(vb->size)); + /* unmap */ + // if (munmap((void *)vdb.virt_addr, vdb.size) < 0) { + // VLOG(ERR, "[VDI] munmap failed, vaddr=0x%lx\n", vdb.virt_addr); + // return -1; + // } + // vdb.virt_addr = 0; + + /* flush operation */ + // if (vdb.enable_cache) { + // if (ioctl(vdi->vpu_fd, VDI_IOCTL_FLUSH_DCACHE, &vdb) < 0) { + // VLOG(ERR, "[VDI] fail to flush ioctl\n"); + // return -1; + // } + // } +#else + vdb.virt_addr = FAKE_PCIE_VIRT_ADDR; + // vdi_clear_memory(core_idx, vdb.phys_addr, vdb.size, VDI_SYSTEM_ENDIAN); //if need clear, todo +#endif + + vb->base = vdb.base; + vb->phys_addr = vdb.phys_addr; + vb->virt_addr = vdb.virt_addr; + + pthread_mutex_lock((pthread_mutex_t *)vdi->buffer_pool_lock); + for (i=0; ibuffer_pool[i].inuse == 0) { + vdi->buffer_pool[i].vdb = vdb; + vdi->buffer_pool_count++; + vdi->buffer_pool[i].inuse = 1; + break; + } + } + pthread_mutex_unlock((pthread_mutex_t *)vdi->buffer_pool_lock); + + VLOG(DEBUG, "[VDI] vdi_allocate_dma_memory, physaddr=0x%lx, virtaddr=0x%lx~0x%lx, size=%d\n", + vb->phys_addr, vb->virt_addr, vb->virt_addr + vb->size, vb->size); + + return 0; +} + +int bm_vdi_free_dma_memory(uint32_t core_idx, vpu_buffer_t *vb) +{ + bm_vdi_info_t *vdi; + int i, ret; + vpudrv_buffer_t vdb; + + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + + vdi = bm_vdi_check_info_ptr(core_idx); + if (vdi == NULL || vdi->vpu_fd < 0) { + VLOG(ERR, "bm_vdi_check_info_ptr failed. Please call bm_vdi_init first.\n"); + return -1; + } + + if (vb->size == 0) + return 0; + + memset(&vdb, 0x00, sizeof(vpudrv_buffer_t)); + + pthread_mutex_lock((pthread_mutex_t *)vdi->buffer_pool_lock); + for (i=0; ibuffer_pool[i].vdb.phys_addr == vb->phys_addr) + { + vdi->buffer_pool[i].inuse = 0; + vdi->buffer_pool_count--; + vdb = vdi->buffer_pool[i].vdb; + break; + } + } + pthread_mutex_unlock((pthread_mutex_t *)vdi->buffer_pool_lock); + + if (!vdb.size) { + VLOG(ERR, "[VDI] invalid buffer to free address = 0x%lx\n", vdb.virt_addr); + return -1; + } + + ret = ioctl(vdi->vpu_fd, VDI_IOCTL_FREE_PHYSICALMEMORY, &vdb); + if (ret < 0) { + VLOG(ERR, "[VDI] ioctl free_physical_memory failed!\n", vdb.virt_addr); + if (vdb.virt_addr) { +#ifndef BM_PCIE_MODE + munmap((void *)vdb.virt_addr, vdb.size); +#endif + vdb.virt_addr = 0; + } + return -1; + } + + if (vdb.virt_addr) { +#ifndef BM_PCIE_MODE + munmap((void *)vdb.virt_addr, vdb.size); +#endif + vdb.virt_addr = 0; + } + + VLOG(DEBUG, "[VDI] vdi_free_dma_memory, physaddr=0x%lx, virtaddr=0x%lx~0x%lx, size=%d\n", + vdb.phys_addr, vdb.virt_addr, vdb.virt_addr + vdb.size, vdb.size); + + memset(vb, 0, sizeof(vpu_buffer_t)); + + return 0; +} + +int bm_vdi_mmap_dma_memory(uint32_t core_idx, vpu_buffer_t *vb, int enable_read, int enable_write) +{ +#ifdef BM_PCIE_MODE + return 0; +#else + bm_vdi_info_t *vdi; + int port_flag = 0; + + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + + vdi = bm_vdi_check_info_ptr(core_idx); + if (vdi == NULL || vdi->vpu_fd < 0) { + VLOG(ERR, "bm_vdi_check_info_ptr failed. Please call bm_vdi_init first.\n"); + return -1; + } + + if (vb->size == 0) + return -1; + + if (enable_read) + port_flag |= PROT_READ; + if (enable_write) + port_flag |= PROT_WRITE; + + //defaule enable_cache + vb->virt_addr = (unsigned long)mmap(NULL, vb->size, port_flag, MAP_SHARED, vdi->vpu_fd, vb->phys_addr); + + if ((void *)vb->virt_addr == MAP_FAILED) { + VLOG(ERR, "[VDI] mmap failed, addr=0x%lx size=%d\n", vb->phys_addr, vb->size); + vb->virt_addr = 0; + return -1; + } + + return 0; +#endif +} + +int bm_vdi_unmap_dma_memory(uint32_t core_idx, vpu_buffer_t *vb) +{ +#ifdef BM_PCIE_MODE + return 0; +#else + bm_vdi_info_t *vdi; + + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + + vdi = bm_vdi_check_info_ptr(core_idx); + if (vdi == NULL || vdi->vpu_fd < 0) { + VLOG(ERR, "bm_vdi_check_info_ptr failed. Please call bm_vdi_init first.\n"); + return -1; + } + + if (vb->size == 0) + return -1; + + if (vb->virt_addr) { + if (munmap((void *)vb->virt_addr, vb->size) < 0) { + vb->virt_addr = 0; + return -1; + } + } + vb->virt_addr = 0; + return 0; +#endif +} + +int bm_vdi_flush_dma_memory(uint32_t core_idx, vpu_buffer_t *vb) +{ +#if !defined(BM_PCIE_MODE) && !defined(BM_ION_MEM) + vpudrv_buffer_t vdb = {0}; +#endif + bm_vdi_info_t *vdi; + + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + + vdi = bm_vdi_check_info_ptr(core_idx); + if (vdi == NULL || vdi->vpu_fd < 0) { + VLOG(ERR, "bm_vdi_check_info_ptr failed. Please call bm_vdi_init first.\n"); + return -1; + } + + if(vdi == NULL || vdi->vpu_fd < 0) + return -1; + +#if defined(BM_ION_MEM) + if (!vb->size) { + VLOG(ERR, "address 0x%08x is not mapped address!!!\n", (int)vb->phys_addr); + } + else + { + if(vb->enable_cache == 1) { + if(msync((void *)vb->virt_addr, vb->size, MS_ASYNC) == -1) { + VLOG(ERR, "[VDI] fail to flush memory. addr=0x%lx size=%d\n", vb->virt_addr, vb->size); + } + } + } +#elif !defined(BM_PCIE_MODE) + vdb.phys_addr = vb->phys_addr; + vdb.size = vb->size; + if (ioctl(vdi->vpu_fd, VDI_IOCTL_FLUSH_DCACHE, &vdb) < 0) + { + VLOG(ERR, "[VDI] fail to flush dcache mem addr 0x%lx size=%d\n", vdb.phys_addr, vdb.size); + } +#endif + return 0; +} + +int bm_vdi_invalidate_dma_memory(uint32_t core_idx, vpu_buffer_t *vb) +{ +#if !defined(BM_PCIE_MODE) && !defined(BM_ION_MEM) + vpudrv_buffer_t vdb = {0}; +#endif + bm_vdi_info_t *vdi; + + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + + vdi = bm_vdi_check_info_ptr(core_idx); + if (vdi == NULL || vdi->vpu_fd < 0) { + VLOG(ERR, "bm_vdi_check_info_ptr failed. Please call bm_vdi_init first.\n"); + return -1; + } + + if(vdi == NULL || vdi->vpu_fd < 0) + return -1; + +#if defined(BM_ION_MEM) + if (!vb->size) { + VLOG(ERR, "address 0x%08x is not mapped address!!!\n", (int)vb->phys_addr); + } + else + { + if(vb->enable_cache == 1) { + if(msync((void *)vb->virt_addr, vb->size, MS_INVALIDATE) == -1) { + VLOG(ERR, "[VDI] fail to invalidate memory. addr=0x%lx size=%d\n", vb->virt_addr, vb->size); + } + } + } +#elif !defined(BM_PCIE_MODE) + vdb.phys_addr = vb->phys_addr; + vdb.size = vb->size; + if (ioctl(vdi->vpu_fd, VDI_IOCTL_INVALIDATE_DCACHE, &vdb) < 0) + { + VLOG(ERR, "[VDI] fail to fluch invalidate mem addr 0x%lx size=%d\n", vdb.phys_addr, vdb.size); + } +#endif + return 0; +} + +int bm_vdi_attach_dma_memory(uint32_t core_idx, vpu_buffer_t *vb) +{ + vpudrv_buffer_t vdb; + bm_vdi_info_t *vdi; + int i; + + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + + vdi = bm_vdi_check_info_ptr(core_idx); + if (vdi == NULL || vdi->vpu_fd < 0) { + VLOG(ERR, "bm_vdi_check_info_ptr failed. Please call bm_vdi_init first.\n"); + return -1; + } + + if (vb->size == 0) + return -1; + + memset(&vdb, 0x00, sizeof(vpudrv_buffer_t)); + + vdb.size = vb->size; + vdb.phys_addr = vb->phys_addr; + + pthread_mutex_lock((pthread_mutex_t *)vdi->buffer_pool_lock); + for (i=0; ibuffer_pool[i].vdb.phys_addr == vb->phys_addr) + { + vdi->buffer_pool[i].vdb = vdb; + vdi->buffer_pool[i].inuse = 1; + break; + } + else + { + if (vdi->buffer_pool[i].inuse == 0) + { + vdi->buffer_pool[i].vdb = vdb; + vdi->buffer_pool[i].inuse = 1; + break; + } + } + } + pthread_mutex_unlock((pthread_mutex_t *)vdi->buffer_pool_lock); + return 0; +} + +int bm_vdi_deattach_dma_memory(uint32_t core_idx, vpu_buffer_t *vb) +{ + bm_vdi_info_t *vdi; + int i; + + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + + vdi = bm_vdi_check_info_ptr(core_idx); + if (vdi == NULL || vdi->vpu_fd < 0) { + VLOG(ERR, "bm_vdi_check_info_ptr failed. Please call bm_vdi_init first.\n"); + return -1; + } + + if(!vb || !vdi || vdi->vpu_fd==-1 || vdi->vpu_fd == 0x00) + return -1; + + pthread_mutex_lock((pthread_mutex_t *)vdi->buffer_pool_lock); + for (i=0; ibuffer_pool[i].vdb.phys_addr == vb->phys_addr) + { + vdi->buffer_pool[i].vdb.phys_addr = 0; + vdi->buffer_pool[i].vdb.size = 0; + vdi->buffer_pool[i].inuse = 0; + break; + } + } + pthread_mutex_unlock((pthread_mutex_t *)vdi->buffer_pool_lock); + return 0; +} + vpu_instance_pool_t* bm_vdi_get_instance_pool(uint32_t core_idx) { bm_vdi_info_t *vdi; @@ -681,12 +1060,14 @@ int bm_vdi_resume_kernel_reset(uint32_t core_idx){ #if defined(BM_PCIE_MODE) chip_core_idx = core_idx%MAX_NUM_VPU_CORE_CHIP; #endif - vdi->reset_core_flag.reset_core_disable = 0; - vdi->reset_core_flag.core_idx = chip_core_idx; - ret = ioctl(vdi->vpu_fd, VDI_IOCTL_CTRL_KERNEL_RESET, &(vdi->reset_core_flag)); - if (ret < 0) { - VLOG(ERR, "[VDI] encoder fail to resume kernel_reset ability with ioctl()\n"); - return -1; + if(vdi->reset_core_flag.pid == vdi->pid) { + vdi->reset_core_flag.reset = 0; + vdi->reset_core_flag.core_idx = chip_core_idx; + ret = ioctl(vdi->vpu_fd, VDI_IOCTL_CTRL_KERNEL_RESET, &(vdi->reset_core_flag)); + if (ret < 0) { + VLOG(ERR, "[VDI] encoder fail to resume kernel_reset ability with ioctl()\n"); + return -1; + } } return 0; } @@ -707,7 +1088,8 @@ int bm_vdi_disable_kernel_reset(uint32_t core_idx){ #if defined(BM_PCIE_MODE) chip_core_idx = core_idx%MAX_NUM_VPU_CORE_CHIP; #endif - vdi->reset_core_flag.reset_core_disable = vdi->pid; + vdi->reset_core_flag.pid = vdi->pid; + vdi->reset_core_flag.reset = 1; vdi->reset_core_flag.core_idx = chip_core_idx; ret = ioctl(vdi->vpu_fd, VDI_IOCTL_CTRL_KERNEL_RESET, &(vdi->reset_core_flag)); if (ret < 0) { @@ -823,12 +1205,14 @@ void bm_vdi_fio_write_register(uint32_t core_idx, uint64_t addr, uint32_t data) VpuWriteReg(core_idx, W5_VPU_FIO_CTRL_ADDR, ctrl); } + int bm_vdi_write_memory(uint32_t core_idx, uint64_t dst_addr, uint8_t *src_data, int len) { bm_vdi_info_t *vdi; vpudrv_buffer_t vdb; #ifndef BM_PCIE_MODE uint64_t offset; + int mmap_flag; #endif int i; @@ -859,8 +1243,33 @@ int bm_vdi_write_memory(uint32_t core_idx, uint64_t dst_addr, uint8_t *src_data, #ifndef BM_PCIE_MODE offset = dst_addr - (uint64_t)vdb.phys_addr; + vpu_buffer_t vb; + vb.phys_addr = vdb.phys_addr; + vb.size = vdb.size; + vb.enable_cache = vdb.enable_cache; + if (!vdb.virt_addr) { + bm_vdi_mmap_dma_memory(core_idx, &vb, 0, 1); + vdb.virt_addr = vb.virt_addr; + mmap_flag = 1; + } else { + mmap_flag = 0; + } memcpy((void *)((uint64_t)vdb.virt_addr+offset), src_data, len); + + /* flush operation */ + if (vdb.enable_cache) { + int ret = bm_vdi_flush_dma_memory(core_idx, &vb); + if (ret < 0) { + VLOG(ERR, "[VDI] fail to flush cache\n"); + return -1; + } + } + + if (mmap_flag) { + bm_vdi_unmap_dma_memory(core_idx, &vb); + vdb.virt_addr = 0; + } #else vpu_video_mem_op_t vmem_op; vmem_op.dst = dst_addr; @@ -877,12 +1286,15 @@ int bm_vdi_write_memory(uint32_t core_idx, uint64_t dst_addr, uint8_t *src_data, return len; } -#ifdef BM_PCIE_MODE int bm_vdi_read_memory(uint32_t core_idx, uint64_t src_addr, uint8_t *dst_data, int len) { bm_vdi_info_t *vdi; vpudrv_buffer_t vdb; int i; +#ifndef BM_PCIE_MODE + uint64_t offset; + int mmap_flag; +#endif vdi = bm_vdi_check_info_ptr(core_idx); if (vdi == NULL || vdi->vpu_fd < 0) { @@ -902,7 +1314,7 @@ int bm_vdi_read_memory(uint32_t core_idx, uint64_t src_addr, uint8_t *dst_data, if (!vdb.size) return -1; - +#ifdef BM_PCIE_MODE vpu_video_mem_op_t vmem_op; vmem_op.src = src_addr; vmem_op.dst = (uint64_t)dst_data; @@ -914,9 +1326,38 @@ int bm_vdi_read_memory(uint32_t core_idx, uint64_t src_addr, uint8_t *dst_data, return -1; } +#else + offset = src_addr - (uint64_t)vdb.phys_addr; + + vpu_buffer_t vb; + vb.phys_addr = vdb.phys_addr; + vb.size = vdb.size; + if (!vdb.virt_addr) { + bm_vdi_mmap_dma_memory(core_idx, &vb, 1, 0); + vdb.virt_addr = vb.virt_addr; + mmap_flag = 1; + } else { + mmap_flag = 0; + } + + /* invalidate operation */ + if (vdb.enable_cache) { + int ret = bm_vdi_invalidate_dma_memory(core_idx, &vb); + if (ret < 0) { + VLOG(ERR, "[VDI] fail to invalidate cache\n"); + return -1; + } + } + + memcpy((void*)dst_data, (void *)((uint64_t)vdb.virt_addr+offset), len); + + if (mmap_flag) { + bm_vdi_unmap_dma_memory(core_idx, &vb); + vdb.virt_addr = 0; + } +#endif return len; } -#endif int bm_vdi_get_sram_memory(uint32_t core_idx, vpu_buffer_t *vb) { diff --git a/bmvid/video/provider/cnm/encoder/src/vdi_win.c b/bmvid/video/provider/cnm/encoder/src/vdi_win.c index c0e1258..cf2f4d9 100644 --- a/bmvid/video/provider/cnm/encoder/src/vdi_win.c +++ b/bmvid/video/provider/cnm/encoder/src/vdi_win.c @@ -20,9 +20,9 @@ #include #include #include -#include +#include #include -#include +#include #include #include #include @@ -153,7 +153,7 @@ static BOOL getDriverContext(bm_vid_drv_info* vdi, uint32_t board_idx) { DeviceInterfaceData.cbSize = sizeof(SP_DEVICE_INTERFACE_DATA); if (INVALID_HANDLE_VALUE == vdi->hDevInfo) { - printf("No sophon devices interface class are in the system.\n"); + printf("No devices interface class are in the system.\n"); return FALSE; } @@ -169,7 +169,7 @@ static BOOL getDriverContext(bm_vid_drv_info* vdi, uint32_t board_idx) { (LPGUID)g_guid_interface[0], vdi->dev_id, &DeviceInterfaceData)) { - printf("No sophon devices SetupDiEnumDeviceInterfaces for dev%d.\n", vdi->dev_id); + printf("No devices SetupDiEnumDeviceInterfaces for dev%d.\n", vdi->dev_id); goto Error; } @@ -312,9 +312,9 @@ int bm_vdi_init(uint32_t core_idx) vdi->vpu_fd = open(vpu_dev_name, O_RDWR); if (vdi->vpu_fd < 0) { #ifndef BM_PCIE_MODE - VLOG(ERR, "[VDI] Can't open vpu driver. [error=%s]. try to load vpu.ko again\n", strerror(errno)); + VLOG(ERR, "[VDI] Can't open vpu driver. [error=%s]. try to load ko again\n", strerror(errno)); #else - VLOG(ERR, "[VDI] Can't open Sophon device driver. [error=%s]. try to load bmsophon.ko again\n", strerror(errno)); + VLOG(ERR, "[VDI] Can't open device driver. [error=%s]. try to load ko again\n", strerror(errno)); #endif goto ERR_VDI_INIT0; } @@ -492,6 +492,7 @@ int bm_vdi_release(uint32_t core_idx) /* get common memory information to free virtual address */ /* TODO */ + WaitForSingleObject(*(vdi->buffer_pool_lock), INFINITE); for (i=0; icommon_memory.phys_addr >= vdi->buffer_pool[i].vdb.phys_addr && vdi->common_memory.phys_addr < (vdi->buffer_pool[i].vdb.phys_addr + @@ -502,6 +503,7 @@ int bm_vdi_release(uint32_t core_idx) break; } } + ReleaseMutex(*(vdi->buffer_pool_lock)); bm_vdi_unlock(core_idx); @@ -606,6 +608,212 @@ static int bm_vdi_allocate_common_memory(uint32_t core_idx) return 0; } +int bm_vdi_allocate_dma_memory(uint32_t core_idx, vpu_buffer_t *vb) +{ + bm_vdi_info_t *vdi; + vpudrv_buffer_t vdb; +#if defined(BM_PCIE_MODE) + int chip_core_idx = core_idx%MAX_NUM_VPU_CORE_CHIP; +#endif + int i; + + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + + vdi = bm_vdi_check_info_ptr(core_idx); + + if(vdi == NULL || vdi->vid_drv_info->hDevice == INVALID_HANDLE_VALUE || !(vdi->vid_drv_info->hDevice)) + return -1; + + memset(&vdb, 0x00, sizeof(vpudrv_buffer_t)); + +#if defined(BM_PCIE_MODE) + vdb.core_idx = chip_core_idx; +#endif + + vdb.size = vb->size; +#if !defined(BM_PCIE_MODE) + vdb.enable_cache = vb->enable_cache; +#endif + if (winDeviceIoControl(vdi->vid_drv_info->hDevice, VDI_IOCTL_ALLOCATE_PHYSICAL_MEMORY, &vdb) < 0) { + VLOG(ERR, "[VDI] fail to vdi_allocate_dma_memory size=%d\n", vdb.size); + return -1; + } + + vdb.virt_addr = FAKE_PCIE_VIRT_ADDR; + // vdi_clear_memory(core_idx, vdb.phys_addr, vdb.size, VDI_SYSTEM_ENDIAN); //if need clear, todo + + vb->base = vdb.base; + vb->phys_addr = vdb.phys_addr; + vb->virt_addr = vdb.virt_addr; + + WaitForSingleObject(*(vdi->buffer_pool_lock), INFINITE); + for (i=0; ibuffer_pool[i].inuse == 0) { + vdi->buffer_pool[i].vdb = vdb; + vdi->buffer_pool_count++; + vdi->buffer_pool[i].inuse = 1; + break; + } + } + ReleaseMutex(*(vdi->buffer_pool_lock)); + + VLOG(DEBUG, "[VDI] vdi_allocate_dma_memory, physaddr=0x%lx, size=%d\n", vb->phys_addr, vb->size); + + return 0; +} + +int bm_vdi_free_dma_memory(uint32_t core_idx, vpu_buffer_t *vb) +{ + bm_vdi_info_t *vdi; + int i, ret; + vpudrv_buffer_t vdb; + + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + + vdi = bm_vdi_check_info_ptr(core_idx); + if (vdi == NULL || vdi->vid_drv_info->hDevice == INVALID_HANDLE_VALUE || !(vdi->vid_drv_info->hDevice)) { + VLOG(ERR, "bm_vdi_check_info_ptr failed. Please call bm_vdi_init first.\n"); + return -1; + } + + if (vb->size == 0) + return 0; + + memset(&vdb, 0x00, sizeof(vpudrv_buffer_t)); + + WaitForSingleObject(*(vdi->buffer_pool_lock), INFINITE); + for (i=0; ibuffer_pool[i].vdb.phys_addr == vb->phys_addr) + { + vdi->buffer_pool[i].inuse = 0; + vdi->buffer_pool_count--; + vdb = vdi->buffer_pool[i].vdb; + break; + } + } + ReleaseMutex(*(vdi->buffer_pool_lock)); + + if (!vdb.size) { + VLOG(ERR, "[VDI] invalid buffer to free address = 0x%lx\n", vdb.virt_addr); + return -1; + } + + ret = winDeviceIoControl(vdi->vid_drv_info->hDevice, VDI_IOCTL_FREE_PHYSICALMEMORY, &vdb); + if (ret < 0) { + VLOG(ERR, "[VDI] ioctl free_physical_memory failed!\n", vdb.virt_addr); + return -1; + } + + VLOG(INFO, "[VDI] vdi_free_dma_memory, physaddr=0x%lx, virtaddr=0x%lx~0x%lx, size=%d\n", + vdb.phys_addr, vdb.virt_addr, vdb.virt_addr + vdb.size, vdb.size); + + memset(vb, 0, sizeof(vpu_buffer_t)); + + return 0; +} + +int bm_vdi_attach_dma_memory(uint32_t core_idx, vpu_buffer_t *vb) +{ + vpudrv_buffer_t vdb; + bm_vdi_info_t *vdi; + int i; + + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + + vdi = bm_vdi_check_info_ptr(core_idx); + if (vdi == NULL || vdi->vid_drv_info->hDevice == INVALID_HANDLE_VALUE || !(vdi->vid_drv_info->hDevice)) { + VLOG(ERR, "bm_vdi_check_info_ptr failed. Please call bm_vdi_init first.\n"); + return -1; + } + + if (vb->size == 0) + return -1; + + memset(&vdb, 0x00, sizeof(vpudrv_buffer_t)); + + vdb.size = vb->size; + vdb.phys_addr = vb->phys_addr; + + WaitForSingleObject(*(vdi->buffer_pool_lock), INFINITE); + for (i=0; ibuffer_pool[i].vdb.phys_addr == vb->phys_addr) + { + vdi->buffer_pool[i].vdb = vdb; + vdi->buffer_pool[i].inuse = 1; + break; + } + else + { + if (vdi->buffer_pool[i].inuse == 0) + { + vdi->buffer_pool[i].vdb = vdb; + vdi->buffer_pool[i].inuse = 1; + break; + } + } + } + ReleaseMutex(*(vdi->buffer_pool_lock)); + return 0; +} + +int bm_vdi_deattach_dma_memory(uint32_t core_idx, vpu_buffer_t *vb) +{ + bm_vdi_info_t *vdi; + int i; + + if (core_idx >= MAX_NUM_VPU_CORE) + return -1; + + vdi = bm_vdi_check_info_ptr(core_idx); + if (vdi == NULL || vdi->vid_drv_info->hDevice == INVALID_HANDLE_VALUE || !(vdi->vid_drv_info->hDevice)) { + VLOG(ERR, "bm_vdi_check_info_ptr failed. Please call bm_vdi_init first.\n"); + return -1; + } + + WaitForSingleObject(*(vdi->buffer_pool_lock), INFINITE); + for (i=0; ibuffer_pool[i].vdb.phys_addr == vb->phys_addr) + { + vdi->buffer_pool[i].vdb.phys_addr = 0; + vdi->buffer_pool[i].vdb.size = 0; + vdi->buffer_pool[i].inuse = 0; + break; + } + } + ReleaseMutex(*(vdi->buffer_pool_lock)); + return 0; +} + +int bm_vdi_mmap_dma_memory(uint32_t core_idx, vpu_buffer_t *vb, int enable_read, int enable_write) +{ + //win pcie mode, don't need mmap and unmap + return 0; +} + +int bm_vdi_unmap_dma_memory(uint32_t core_idx, vpu_buffer_t *vb) +{ + //win pcie mode, don't need mmap and unmap + return 0; +} + +int bm_vdi_flush_dma_memory(uint32_t core_idx, vpu_buffer_t *vb) +{ + //win pcie mode, don't need this function + return 0; +} + +int bm_vdi_invalidate_dma_memory(uint32_t core_idx, vpu_buffer_t *vb) +{ + //win pcie mode, don't need this function + return 0; +} + vpu_instance_pool_t* bm_vdi_get_instance_pool(uint32_t core_idx) { bm_vdi_info_t *vdi; diff --git a/bmvid/video/provider/cnm/sample_v2/component_decoder/component_dec_renderer.c b/bmvid/video/provider/cnm/sample_v2/component_decoder/component_dec_renderer.c index 1169ea2..9b52d9b 100644 --- a/bmvid/video/provider/cnm/sample_v2/component_decoder/component_dec_renderer.c +++ b/bmvid/video/provider/cnm/sample_v2/component_decoder/component_dec_renderer.c @@ -33,7 +33,7 @@ typedef struct { Uint32 framebufStride; Uint32 displayPeriodTime; FrameBuffer pFrame[MAX_REG_FRAME]; - bm_device_mem_t pFbMem[MAX_REG_FRAME]; + vpu_buffer_t pFbMem[MAX_REG_FRAME]; Uint64 FbmemVaddr[MAX_REG_FRAME]; BOOL fbAllocated; ParamDecNeedFrameBufferNum fbCount; @@ -131,28 +131,18 @@ static BOOL ReallocateFrameBuffers(ComponentImpl* com, ParamDecReallocFB* param) RendererContext* ctx = (RendererContext*)com->context; Int32 fbcIndex = param->compressedIdx; Int32 linearIndex = param->linearIdx; - bm_device_mem_t* pFbMem = ctx->pFbMem; + vpu_buffer_t* pFbMem = ctx->pFbMem; FrameBuffer* pFrame = ctx->pFrame; FrameBuffer* newFbs = param->newFbs; - bm_handle_t bm_handle; vdi_lock(ctx->testDecConfig.coreIdx); if (fbcIndex >= 0) { /* Release the FBC framebuffer */ - - bm_handle= bmvpu_dec_get_bmlib_handle(ctx->testDecConfig.coreIdx); -#ifndef BM_PCIE_MODE - bm_mem_unmap_device_mem(bm_handle,(void *)ctx->FbmemVaddr[fbcIndex],ctx->pFbMem[fbcIndex].size); -#endif - bm_free_device(bm_handle,pFbMem[fbcIndex]); - + vdi_free_dma_memory(ctx->testDecConfig.coreIdx, &pFbMem[fbcIndex]); } if (linearIndex >= 0) { -#ifndef BM_PCIE_MODE - bm_mem_unmap_device_mem(bm_handle,(void *)ctx->FbmemVaddr[linearIndex],ctx->pFbMem[linearIndex].size); -#endif - bm_free_device(bm_handle,pFbMem[linearIndex]); - + /* Release the linear framebuffer */ + vdi_free_dma_memory(ctx->testDecConfig.coreIdx, &pFbMem[linearIndex]); } vdi_unlock(ctx->testDecConfig.coreIdx); @@ -389,18 +379,11 @@ static void ReleaseRenderer(ComponentImpl* com) RendererContext* ctx = (RendererContext*)com->context; Uint32 coreIdx = ctx->testDecConfig.coreIdx; Uint32 i; - bm_handle_t bm_handle; vdi_lock(coreIdx); - bm_handle=bmvpu_dec_get_bmlib_handle(coreIdx); for (i=0; ipFbMem[i].size) { -#ifndef BM_PCIE_MODE - bm_mem_unmap_device_mem(bm_handle,(void *)ctx->FbmemVaddr[i],ctx->pFbMem[i].size); -#endif - bm_free_device(bm_handle,ctx->pFbMem[i]); - - + vdi_free_dma_memory(coreIdx, &ctx->pFbMem[i]); } } vdi_unlock(coreIdx); diff --git a/bmvid/video/provider/cnm/sample_v2/helper/main_helper.c b/bmvid/video/provider/cnm/sample_v2/helper/main_helper.c index 7c552ff..db07414 100755 --- a/bmvid/video/provider/cnm/sample_v2/helper/main_helper.c +++ b/bmvid/video/provider/cnm/sample_v2/helper/main_helper.c @@ -141,41 +141,29 @@ Int32 LoadFirmware( char addr[255] = "/system/data/lib/vpu_firmware/"; strcat(addr, path); if((fp=osal_fopen(addr, "rb")) == NULL) { - void *handle = dlopen("libbmvideo.so", RTLD_NOW); - struct link_map *map; - if (handle == NULL) { - fprintf(stderr, "dlopen() failed: %s\n", dlerror()); + Dl_info dl_info; + if (!dladdr("libbmvideo.so", &dl_info)) { + fprintf(stderr, "dladdr(libbmvideo.so) failed: %s \n", dlerror()); return -1; } - if(dlinfo(handle, RTLD_DI_LINKMAP, &map) != -1) { - char *ptr = strrchr(map->l_name, '/'); - if(ptr) { - int name_len = ptr - map->l_name + 1; - printf("libbmvideo.so addr : %s, name_len: %d\n", map->l_name, name_len); - - if(name_len > 0) { - memset(addr, 0, 255); - strncpy(addr, map->l_name, name_len); - strcat(addr, "vpu_firmware/"); - strcat(addr, path); - printf("vpu firmware addr: %s\n", addr); - fp=osal_fopen(addr, "rb"); - } - } - else { - VLOG(ERR, "can't get the absolute pathname of libbmvideo.so\n"); - dlclose(handle); - return -1; + char *ptr = strrchr(dl_info.dli_fname, '/'); + if(ptr) { + size_t name_len = ptr - dl_info.dli_fname + 1; + printf("so addr : %s, name_len: %lu\n", dl_info.dli_fname, name_len); + + if(name_len > 0) { + memset(addr, 0, 255); + strncpy(addr, dl_info.dli_fname, name_len); + strcat(addr, "vpu_firmware/"); + strcat(addr, path); + printf("vpu firmware addr: %s\n", addr); + fp=osal_fopen(addr, "rb"); } } else { - printf("can't get addr of libbmvideo.so file.\n"); - dlclose(handle); + VLOG(ERR, "can't get the absolute pathname of so\n"); return -1; } - int ret = dlclose(handle); - if (ret != 0) - VLOG(WARN, "WARNING! dlclose failed.\n"); } else { VLOG(INFO, "vpu firmware %s open.", addr); @@ -202,13 +190,13 @@ Int32 LoadFirmware( if (!ptr) { - printf("can't get vpu_firmware path by libbmvideo.dll\n"); + printf("can't get vpu_firmware path\n"); } dirname_len = strlen(strDLLPath1) - strlen(ptr) + 1; if (dirname_len <= 0) { - printf("can't get vpu_firmware path by libbmvideo.dll\n"); + printf("can't get vpu_firmware path\n"); } memset(fw_path, 0, 512); strncpy(fw_path, strDLLPath1, dirname_len); @@ -980,9 +968,8 @@ void ChangePathStyle( } void ReleaseVideoMemory( - bm_handle_t handle, Uint32 coreIndex, - bm_device_mem_t*memoryArr, + vpu_buffer_t* memoryArr, Uint32 count ) { @@ -992,19 +979,20 @@ void ReleaseVideoMemory( for (idx=0; idxframebuf_from_user != FRAME_BUFFER_FROM_USER) + osal_memset((void*)retFbAddrs, 0x00, sizeof(vpu_buffer_t)*totalFbCount); APIDPRINT("ALLOC MEM - FBC data\n"); vdi_lock(coreIndex); for (idx=0; idxsize = framebufSize; - if(bmvpu_malloc_device_byte_heap(bm_handle,pvb,framebufSize,HEAP_MASK,1)!=BM_SUCCESS) + if(config->framebuf_from_user != FRAME_BUFFER_FROM_USER) { - vdi_unlock(coreIndex); - VLOG(ERR, "coreIdx:%d,%s:%d fail to allocate frame buffer\n",coreIndex, __FUNCTION__, __LINE__); - ReleaseVideoMemory(bm_handle,coreIndex, retFbAddrs, totalFbCount); + pvb->size = framebufSize; + if (vdi_allocate_dma_memory(coreIndex, pvb) < 0) + { + vdi_unlock(coreIndex); + VLOG(ERR, "coreIdx:%d,%s:%d fail to allocate frame buffer\n",coreIndex, __FUNCTION__, __LINE__); + ReleaseVideoMemory(coreIndex, retFbAddrs, totalFbCount); - return FALSE; + return FALSE; + } } - bm_vdi_mmap(bm_handle,pvb,(unsigned long long *)&pvbVaddr); -#ifndef BM_PCIE_MODE - retFbArray[idx].bufYVaddr=(Uint64)pvbVaddr; -#endif - decHandle->CodecInfo->decInfo.vpu_frame_buffer_vaddr[idx]=pvbVaddr; - decHandle->CodecInfo->decInfo.vpu_frame_buffer_vaddr_size[idx]=framebufSize; - retFbArray[idx].bufY = pvb->u.device.device_addr; + retFbArray[idx].bufY = pvb->phys_addr; retFbArray[idx].bufCb = (PhysicalAddress)-1; retFbArray[idx].bufCr = (PhysicalAddress)-1; retFbArray[idx].updateFbInfo = TRUE; @@ -1110,7 +1094,7 @@ BOOL AllocateDecFrameBuffer( if ((ret=VPU_DecAllocateFrameBuffer(decHandle, fbAllocInfo, retFbArray)) != RETCODE_SUCCESS) { VLOG(ERR, "%s:%d failed to VPU_DecAllocateFrameBuffer(), ret(%d)\n", __FUNCTION__, __LINE__, ret); - ReleaseVideoMemory(bm_handle,coreIndex,retFbAddrs,totalFbCount); + ReleaseVideoMemory(coreIndex,retFbAddrs,totalFbCount); return FALSE; } } @@ -1161,22 +1145,20 @@ BOOL AllocateDecFrameBuffer( vdi_lock(coreIndex); for (idx=linearFbStartIdx; idxsize = framebufSize; - if(bmvpu_malloc_device_byte_heap(bm_handle,pvb,pvb->size,HEAP_MASK,1)!=BM_SUCCESS) + pvb->enable_cache = 1; + if(config->framebuf_from_user != FRAME_BUFFER_FROM_USER) { - vdi_unlock(coreIndex); - VLOG(ERR, "%s:%d fail to allocate frame buffer\n", __FUNCTION__, __LINE__); - ReleaseVideoMemory(bm_handle,coreIndex,retFbAddrs, totalFbCount-idx); - return FALSE; + pvb->size = framebufSize; + if (vdi_allocate_dma_memory(coreIndex, pvb) < 0) + { + vdi_unlock(coreIndex); + VLOG(ERR, "%s:%d fail to allocate frame buffer\n", __FUNCTION__, __LINE__); + ReleaseVideoMemory(coreIndex,retFbAddrs, totalFbCount); + return FALSE; + } } - bm_vdi_mmap(bm_handle,pvb,(unsigned long long *)&pvbVaddr); -#ifndef BM_PCIE_MODE - retFbArray[idx].bufYVaddr=(Uint64)pvbVaddr; -#endif - decHandle->CodecInfo->decInfo.vpu_frame_buffer_vaddr[idx]=pvbVaddr; - decHandle->CodecInfo->decInfo.vpu_frame_buffer_vaddr_size[idx]=framebufSize; - retFbArray[idx].bufY = pvb->u.device.device_addr; + retFbArray[idx].bufY = pvb->phys_addr; retFbArray[idx].bufCb = (PhysicalAddress)-1; retFbArray[idx].bufCr = (PhysicalAddress)-1; retFbArray[idx].updateFbInfo = TRUE; @@ -1195,7 +1177,7 @@ BOOL AllocateDecFrameBuffer( if (ret != RETCODE_SUCCESS) { VLOG(ERR, "%s:%d failed to VPU_DecAllocateFrameBuffer() ret:%d\n", __FUNCTION__, __LINE__, ret); - ReleaseVideoMemory(bm_handle,coreIndex, retFbAddrs, totalFbCount); + ReleaseVideoMemory(coreIndex, retFbAddrs, totalFbCount); return FALSE; } } diff --git a/bmvid/video/provider/cnm/sample_v2/helper/main_helper.h b/bmvid/video/provider/cnm/sample_v2/helper/main_helper.h index 5ff3def..6b85f96 100644 --- a/bmvid/video/provider/cnm/sample_v2/helper/main_helper.h +++ b/bmvid/video/provider/cnm/sample_v2/helper/main_helper.h @@ -1341,6 +1341,7 @@ typedef struct TestDecConfig_struct { Uint32 feedingSize; Uint32 loopCount; BOOL errorInject; + Int32 framebuf_from_user; } TestDecConfig; #ifdef __cplusplus @@ -1357,9 +1358,8 @@ struct option* ConvertOptions( ); void ReleaseVideoMemory( - bm_handle_t handle, Uint32 coreIndex, - bm_device_mem_t*memoryArr, + vpu_buffer_t*memoryArr, Uint32 count ); BOOL AllocateDecFrameBuffer( @@ -1368,7 +1368,7 @@ BOOL AllocateDecFrameBuffer( Uint32 tiledFbCount, Uint32 linearFbCount, FrameBuffer* retFbArray, - bm_device_mem_t*retFbAddrs, + vpu_buffer_t*retFbAddrs, Uint32* retStride, int enable_cache ); diff --git a/bmvid/vpp/bmvppapi/Makefile b/bmvid/vpp/bmvppapi/Makefile index 3ae9cd5..fd91428 100644 --- a/bmvid/vpp/bmvppapi/Makefile +++ b/bmvid/vpp/bmvppapi/Makefile @@ -45,9 +45,10 @@ ifneq ($(DEBUG), 0) CFLAGS += -O0 -g endif +LDLIBS += -lpthread ifeq ($(PRODUCTFORM), pcie_loongarch64) LDFLAGS += -Wl,-melf64loongarch -LDLIBS += -lm -lstdc++ -lpthread +LDLIBS += -lm -lstdc++ endif ifeq ($(PRODUCTFORM), pcie_sw64) diff --git a/bmvid/vpp/bmvppapi/test_bmvpp b/bmvid/vpp/bmvppapi/test_bmvpp index 8fd9c13..907cd4e 100755 Binary files a/bmvid/vpp/bmvppapi/test_bmvpp and b/bmvid/vpp/bmvppapi/test_bmvpp differ diff --git a/doc/guide/1_install.rst b/doc/guide/1_install.rst index bcc108a..d8f4e2f 100644 --- a/doc/guide/1_install.rst +++ b/doc/guide/1_install.rst @@ -1,7 +1,7 @@ 安装libsophon -------------- -.. |ver| replace:: 0.5.0 +.. |ver| replace:: 0.5.1 libsophon在不同的Linux发行版上提供不同类型的安装方式。请根据您的系统选择对应的方式,不要在一台机器上混用多种安装方式。 以下描述中“|ver|”仅为示例,视当前实际安装版本会有变化。 @@ -17,8 +17,8 @@ libsophon在不同的Linux发行版上提供不同类型的安装方式。请根 sudo rm -f /lib/modules/$(uname -r)/kernel/drivers/pci/bmsophon.ko -**如果使用Debian/Ubuntu系统:** - +Debian/Ubuntu系统 +~~~~~~~~~~~~~~~~~~~ 安装包由三个文件构成,其中“$arch”为当前机器的硬件架构,使用以下命令可以获取当前服务器的arch: @@ -75,7 +75,7 @@ deb包安装方式并不允许您安装同一个包的多个不同版本,但 sudo modprobe bmsophon -卸载方式: +**卸载方式:** 注意:如果安装了sophon-mw及sophon-rpc,因为它们对libsophon有依赖关系,请先卸载它们。 @@ -104,8 +104,8 @@ deb包安装方式并不允许您安装同一个包的多个不同版本,但 sudo apt purge sophon-libsophon -**如果使用Centos系统, 当前版本仅支持x86_64:** - +Centos系统, 当前版本仅支持x86_64 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 安装包由三个文件构成,其中“$arch”为当前机器的硬件架构,使用以下命令可以获取当前服务器的arch: @@ -124,7 +124,6 @@ x86_64机器对应的安装包名称为: .. parsed-literal:: 安装依赖库,只需要执行一次: - sudo yum install -y epel-release sudo yum install -y dkms sudo yum install -y ncurses* 安装libsophon: @@ -134,7 +133,7 @@ x86_64机器对应的安装包名称为: 在终端执行如下命令,或者登出再登入当前用户后即可使用bm-smi等命令: source /etc/profile -卸载方式: +**卸载方式:** .. parsed-literal:: @@ -142,7 +141,45 @@ x86_64机器对应的安装包名称为: sudo rpm -e sophon-libsophon-dev-\ |ver|\ -1.x86_64 sudo rpm -e sophon-libsophon-\ |ver|\ -1.x86_64 -**如果使用其它Linux系统:** +Fedora系统, 当前版本仅支持riscv架构 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +安装包由三个文件构成,其中“$arch”为当前机器的硬件架构,使用以下命令可以获取当前服务器的arch: + +.. parsed-literal:: + + uname -m + +x86_64机器对应的安装包名称为: + - sophon-driver-\ |ver|\ -1.$arch.rpm + - sophon-libsophon-\ |ver|\ -1.$arch.rpm + - sophon-libsophon-dev-\ |ver|\ -1.$arch.rpm + + +安装前需要通过后面“卸载方式”中的步骤卸载旧版本libsophon,可以通过如下步骤安装: + +.. parsed-literal:: + + 安装依赖库,只需要执行一次: + sudo yum install -y dkms + sudo yum install -y ncurses* + 安装libsophon: + sudo rpm -ivh sophon-driver-\ |ver|\ -1.riscv64.rpm + sudo rpm -ivh sophon-libsophon-\ |ver|\ -1.riscv64.rpm + sudo rpm -ivh --force sophon-libsophon-dev-\ |ver|\ -1.riscv64.rpm + 在终端执行如下命令,或者登出再登入当前用户后即可使用bm-smi等命令: + source /etc/profile + +**卸载方式:** + +.. parsed-literal:: + + sudo rpm -e sophon-driver-\ |ver|\ -1.riscv64 + sudo rpm -e sophon-libsophon-dev-\ |ver|\ -1.riscv64 + sudo rpm -e sophon-libsophon-\ |ver|\ -1.riscv64 + +其它Linux系统 +~~~~~~~~~~~~~~~ 安装包由一个文件构成,其中“$arch”为当前机器的硬件架构,使用以下命令可以获取当前服务器的arch: @@ -202,7 +239,7 @@ x86_64机器对应的安装包名称为: sudo mkdir -p /usr/lib/cmake/libsophon sudo cp /opt/sophon/libsophon-current/data/libsophon-config.cmake /usr/lib/cmake/libsophon/ -卸载方式: +**卸载方式:** .. parsed-literal:: diff --git a/doc/guide/3_1_bmsmi_description.rst b/doc/guide/3_1_bmsmi_description.rst index c849e45..7fe3407 100644 --- a/doc/guide/3_1_bmsmi_description.rst +++ b/doc/guide/3_1_bmsmi_description.rst @@ -17,11 +17,11 @@ * - BM1684X - 算能面向深度学习领域推出的第四代张量处理器 - * - TPU - - 芯片内部神经网络处理单元 + * - NPU + - 神经网络处理单元 * - SOC模式 - - 一种产品形态,SDK运行于A53 AARCH64平台,TPU作为平台总线设备 + - 一种产品形态,SDK运行于A53 AARCH64平台,智能视觉深度学习处理器作为平台总线设备 * - PCIe模式 - 一种产品形态,SDK运行于X86平台,BM1684、BM1684X存在于PCIe接口的深度学习计算加速卡上 @@ -30,7 +30,7 @@ - Drivers是API接口访问硬件的通道 * - Gmem - - 卡上用于TPU加速的DDR内存 + - 卡上用于智能视觉深度学习处理器加速的DDR内存 * - F - FAULT 故障状态 @@ -49,7 +49,7 @@ bm-smi介绍 - 查看物理板卡ID - - 查看设备芯片ID,所在PCIe总线ID + - 查看设备npu ID,所在PCIe总线ID - 查看设备温度和功耗 @@ -57,7 +57,7 @@ bm-smi介绍 - 查看gmem总数和利率 - - 查看tpu利用率 + - 查看智能视觉深度学习处理器利用率 - 查看设备工作频率信息 @@ -100,7 +100,7 @@ bm-smi介绍 - 支持 - 支持 - * - tpu的设备号 + * - 智能视觉深度学习处理器的设备号 - 支持 - 支持 @@ -116,7 +116,7 @@ bm-smi介绍 - 支持 - 不支持 - * - 芯片温度 + * - 片上温度 - 支持 - 不支持 @@ -180,7 +180,7 @@ bm-smi介绍 - 支持 - 支持 - * - tpu的瞬时利用率 + * - 智能视觉深度学习处理器的瞬时利用率 - 支持 - 支持 @@ -206,7 +206,7 @@ bm-smi介绍 :height: 6.22083in :alt: "图1" -图1为SC5+(三芯)/SC5H/SC5P(八芯)的显示状态,每张卡之间用=======隔开,最左边显示的板卡级别的属性,右边和中间显示的是单个芯片的状态。 +图1为SC5+(三芯)/SC5H/SC5P(八芯)的显示状态,每张卡之间用=======隔开,最左边显示的板卡级别的属性,右边和中间显示的是单个npu的状态。 bm-smi是一个可执行文件,不依赖其他动态库,位于/opt/sophon/libsophon-current/bin目录下,上图为一个执行bm-smi的示意图。 @@ -229,21 +229,21 @@ bm-smi是一个可执行文件,不依赖其他动态库,位于/opt/sophon/li - SN : 板卡序列号(共17位) -- TPU : tpu的设备号 +- TPU : 智能视觉深度学习处理器的设备号 - BoardT:板级温度 -- chipT:芯片温度 +- chipT:片上温度 -- TPU_P:TPU模块功耗 +- TPU_P:智能视觉深度学习处理器模块功耗 -- TPU_V:TPU模块电压 +- TPU_V:智能视觉深度学习处理器模块电压 - ECC: DDR ECC是否使能 - CorrectNum:若DDR使能,纠正错误的次数 -- Tpu-Util:tpu的瞬时利用率 +- Tpu-Util:智能视觉深度学习处理器的瞬时利用率 - 12V_ATX:板级12V供电电流 @@ -251,9 +251,9 @@ bm-smi是一个可执行文件,不依赖其他动态库,位于/opt/sophon/li - boardP:板级功耗 -- Minclk:tpu最小工作频率 +- Minclk:智能视觉深度学习处理器最小工作频率 -- Maxclk:tpu最大工作频率 +- Maxclk:智能视觉深度学习处理器最大工作频率 - Fan:风扇转速,显示N/A 表示本卡无风扇,显示F 表示有风扇故障 @@ -261,9 +261,9 @@ bm-smi是一个可执行文件,不依赖其他动态库,位于/opt/sophon/li - Status:板卡状态,Active为活动状态, Fault为故障状态 -- Curclk:tpu当前工作频率,显示的值的颜色根据当前工作频率而不同,550M(bm1684)或1000M(bm1684x)显示白色,75M显示红色,其他频率显示黄色;红色和黄色用于提示用户当前工作频率不是最大工作频率。显示不同颜色只在2.1.0版本及以上版本才有。 +- Curclk:智能视觉深度学习处理器当前工作频率,显示的值的颜色根据当前工作频率而不同,550M(bm1684)或1000M(bm1684x)显示白色,75M显示红色,其他频率显示黄色;红色和黄色用于提示用户当前工作频率不是最大工作频率。显示不同颜色只在2.1.0版本及以上版本才有。 -- TPU_C: tpu模块的工作电流 +- TPU_C: 智能视觉深度学习处理器模块的工作电流 - Memory-Usage:gmem总数和已使用数量;默认106M表示VPU的固件占用的内存大小。板卡上的memory有可能分布在不同的地址空间,我们分配的内存都是地址连续的内存,而且由于每次分配的大小不一样,会导致内存的碎片化,所以有可能出现利用率达不到100%的情况。 @@ -343,7 +343,7 @@ bm-smi支持的参数有: bm-smi --dev=0x0 --led=off - 注意:此功能在SC5+和SC5P上支持 on/off/blink,在SC5H上支持on/off,其它板卡类型不支持。SC5+板卡只有第一个芯片才能控制LED灯的状态,SC5P拥有8个led,每个设备都对应一个led,每个led都支持单独设置状态。 + 注意:此功能在SC5+和SC5P上支持 on/off/blink,在SC5H上支持on/off,其它板卡类型不支持。SC5+板卡只有第一个NPU才能控制LED灯的状态,SC5P拥有8个led,每个设备都对应一个led,每个led都支持单独设置状态。 该功能SOC模式不支持。 @@ -424,7 +424,7 @@ bm-smi输出的是一个简单的图形界面,描述了板卡的状态,为 1684-SC5+ PCIE chip0: 0 000:01:00.0 Active 56C 55C 2W 615mV OFF N/A 0% 75M 550M 550M 3.3A 0MB 7086MB -| 三芯卡上的第0个chip的状态,1684-SC5+ PCIE chip0: +| 三芯卡上的第0个processor的状态,1684-SC5+ PCIE chip0: | 后面的信息依次对应bm-smi中的:TPU Bus-ID Status boardT chipT TPU_P TPU_V ECC CorrectN Tpu-Util Minclk Maxclk Curclk TPU_C Memory-Usage 第二个区域: @@ -432,7 +432,7 @@ bm-smi输出的是一个简单的图形界面,描述了板卡的状态,为 1684-SC5+ PCIE chip1: 1 000:01:00.1 Active 56C 55C 2W 613mV OFF N/A 0% 75M 550M 550M 4.2A 0MB 7086MB -| 三芯卡上的第1个chip的状态,1684-SC5+ PCIE chip1: +| 三芯卡上的第1个processor的状态,1684-SC5+ PCIE chip1: | 后面的信息依次对应bm-smi中的:TPU Bus-ID Status boardT chipT TPU_P TPU_V ECC CorrectN Tpu-Util Minclk Maxclk Curclk TPU_C Memory-Usage 第三个区域: @@ -440,14 +440,14 @@ bm-smi输出的是一个简单的图形界面,描述了板卡的状态,为 1684-SC5+ PCIE chip2: 2 000:01:00.2 Active 54C 53C 1W 615mV OFF N/A 0% 75M 550M 550M 2.6A 0MB 7086MB -| 三芯卡上的第2个chip的状态,1684-SC5+ PCIE chip2: +| 三芯卡上的第2个processor的状态,1684-SC5+ PCIE chip2: | 后面的信息依次对应bm-smi中的:TPU Bus-ID Status boardT chipT TPU_P TPU_V ECC CorrectN Tpu-Util Minclk Maxclk Curclk TPU_C Memory-Usage .. parsed-literal:: 注意事项: - 1、--start_dev=0 --last_dev=2 表示bm-smi中显示的某张卡的第0个和最后1个chip对应的设备号; + 1、--start_dev=0 --last_dev=2 表示bm-smi中显示的某张卡的第0个和最后1个processor对应的设备号; 2、--start_dev --last_dev --text_format要一起使用。 diff --git a/doc/guide/3_2_proc_description.rst b/doc/guide/3_2_proc_description.rst index 7fe30bc..5956c9f 100644 --- a/doc/guide/3_2_proc_description.rst +++ b/doc/guide/3_2_proc_description.rst @@ -506,7 +506,7 @@ PCIe模式各个设备的详细信息 读写属性:只读; - 含义:芯片id(0x1684x/0x1684/0x1682) + 含义:npu id(0x1684x/0x1684/0x1682) - chip_temp @@ -514,7 +514,7 @@ PCIe模式各个设备的详细信息 读写属性:只读 - 含义:芯片温度 + 含义:片上温度 - dbdf @@ -530,7 +530,7 @@ PCIe模式各个设备的详细信息 读写属性:读写 - 含义:使能或者禁止动态tpu调频功能;0/1有效,其他值无效 + 含义:使能或者禁止动态智能视觉深度学习处理器调频功能;0/1有效,其他值无效 - ecc @@ -602,7 +602,7 @@ PCIe模式各个设备的详细信息 读写属性:只读 - 含义:tpu的ID(0/1/2/3……) + 含义:智能视觉深度学习处理器的ID(0/1/2/3……) - tpu_maxclk @@ -610,7 +610,7 @@ PCIe模式各个设备的详细信息 读写属性:只读 - 含义:tpu的最大工作频率 + 含义:智能视觉深度学习处理器的最大工作频率 - tpu_minclk @@ -618,7 +618,7 @@ PCIe模式各个设备的详细信息 读写属性:只读 - 含义:tpu的最小工作频率 + 含义:智能视觉深度学习处理器的最小工作频率 - tpu_freq @@ -626,7 +626,7 @@ PCIe模式各个设备的详细信息 读写属性:读写 - 含义:tpu的工作频率,可通过写入参数来改变频率,写入前应向dynfreq写入0来关闭动态tpu调频,示例如下: + 含义:智能视觉深度学习处理器的工作频率,可通过写入参数来改变频率,写入前应向dynfreq写入0来关闭动态智能视觉深度学习处理器调频,示例如下: :: @@ -640,7 +640,7 @@ PCIe模式各个设备的详细信息 读写属性:只读 - 含义:tpu的瞬时功率 + 含义:智能视觉深度学习处理器的瞬时功率 - firmware_info @@ -744,7 +744,7 @@ PCIe模式各个设备的详细信息 读写属性:只读 - 含义:tpu 处理过程中消耗的时间 + 含义:智能视觉深度学习处理器处理过程中消耗的时间 - completed_api_counter @@ -768,7 +768,7 @@ PCIe模式各个设备的详细信息 读写属性:读写 - 含义:tpu的电压,可通过写入参数来改变电压 + 含义:智能视觉深度学习处理器的电压,可通过写入参数来改变电压 - tpu_cur @@ -776,7 +776,7 @@ PCIe模式各个设备的详细信息 读写属性:只读 - 含义:tpu 电流 + 含义:智能视觉深度学习处理器电流 - fan_speed @@ -864,7 +864,7 @@ PCIe模式各个设备的详细信息 读写属性:读写 - 含义:转存寄存器,输入1转存到tpu寄存器,输入2转存到gdma寄存器 + 含义:转存寄存器,输入1转存到智能视觉深度学习处理器寄存器,输入2转存到gdma寄存器 - heap @@ -933,7 +933,7 @@ SOC模式只有JPU和VPU支持proc接口,对应的proc节点为/proc/jpuinfo 读写属性:只读 - JPU loadbalance : 记录JPU0-JPU1(1684x),JPU0-JPU3(1684)编码/解码次数,JPU*为芯片内部的JPEG编解码器, 取值范围:0~ 2147483647 + JPU loadbalance : 记录JPU0-JPU1(1684x),JPU0-JPU3(1684)编码/解码次数,JPU*为内部的JPEG编解码器, 取值范围:0~ 2147483647 - vpuinfo diff --git a/doc/guide/3_3_sysfs_interface_description.rst b/doc/guide/3_3_sysfs_interface_description.rst index efb209f..eb19023 100644 --- a/doc/guide/3_3_sysfs_interface_description.rst +++ b/doc/guide/3_3_sysfs_interface_description.rst @@ -1,10 +1,10 @@ .. vim: syntax=rst -Tpu驱动sysfs文件系统介绍 +智能视觉深度学习处理器驱动sysfs文件系统介绍 ---------------------- -sysfs文件系统接口用来获取TPU的利用率等信息。 -下表列举了Tpu驱动sysfs文件系统可以获取的设备信息以及在PCIe和SOC模式下的支持情况: +sysfs文件系统接口用来获取智能视觉深度学习处理器的利用率等信息。 +下表列举了智能视觉深度学习处理器驱动sysfs文件系统可以获取的设备信息以及在PCIe和SOC模式下的支持情况: .. list-table:: :widths: 40 30 30 @@ -38,13 +38,13 @@ SOC模式:/sys/class/bm-tpu/bm-tpu0/device 下面逐一介绍每个部分代表的含义。 -- npu_usage,Tpu(npu)在一段时间内(窗口宽度)处于工作状态的百分比。 +- npu_usage,智能视觉深度学习处理器在一段时间内(窗口宽度)处于工作状态的百分比。 - npu_usage_enable,是否使能统计npu利用率,默认使能。 - npu_usage_interval,统计npu利用率的时间窗口宽度,单位ms,默认500ms。取值范围[200,2000]。 -Tpu驱动sysfs文件系统接口的具体使用方法 +智能视觉深度学习处理器驱动sysfs文件系统接口的具体使用方法 ----------------------- 使用例子如下: diff --git a/doc/guide/3_tools.rst b/doc/guide/3_tools.rst index c2a5f7d..b7effd9 100644 --- a/doc/guide/3_tools.rst +++ b/doc/guide/3_tools.rst @@ -19,7 +19,7 @@ proc文件系统使用说明 3_2_proc_description -Tpu驱动sysfs文件系统使用说明 +智能视觉深度学习处理器驱动sysfs文件系统使用说明 ~~~~~~~~~~~~~~ .. toctree:: diff --git a/doc/guide/4_docker_usage.rst b/doc/guide/4_docker_usage.rst index f33320f..1cd0d38 100644 --- a/doc/guide/4_docker_usage.rst +++ b/doc/guide/4_docker_usage.rst @@ -49,7 +49,7 @@ Docker 测试环境搭建 构建镜像 """""""" -使用 dockerfile 构建测试所需的 Docker 镜像, dockerfile 位于 libsophon 文件夹下。构建镜像前请检查系统时间是否准确,错误的系统时间会导致构建过程中更新 package list 时证书验证失败。 +使用 dockerfile 构建测试所需的 Docker 镜像, dockerfile 位于 从官网下载的SDK包中libsophon 文件夹下。构建镜像前请检查系统时间是否准确,错误的系统时间会导致构建过程中更新 package list 时证书验证失败。 .. code-block:: shell @@ -65,9 +65,9 @@ Docker 测试环境搭建 $ sudo docker images REPOSITORY TAG IMAGE ID CREATED SIZE - image_name image_version image_id create_time 1.74GB + image_name image_version image_id create_time size -以上内容中 ``image_name`` 与 ``image_version`` 应与构建时设置内容一致, ``image_id`` 与 ``create_time`` 应与实际情况相符合。 +以上内容中 ``image_name`` 与 ``image_version`` 应与构建时设置内容一致, ``image_id`` 、 ``create_time`` 与 ``size`` 应与实际情况相符合。 创建容器 """""""" @@ -118,3 +118,5 @@ Docker 测试环境搭建 for f in /etc/profile.d/*sophon*; do source $f; done 运行以上命令后可运行 ``bm-smi`` 命令以检查是否可正常使用 libsophon, 命令输出应与 :doc:`bm-smi使用说明 <3_1_bmsmi_description>` 中对应内容相符。 + +注意Docker内部时区为UTC时区,用户可根据需要手动修改Docker内的时区。 \ No newline at end of file diff --git a/doc/guide/5_out_of_band_management_interface.rst b/doc/guide/5_out_of_band_management_interface.rst index 0e32a74..5731252 100644 --- a/doc/guide/5_out_of_band_management_interface.rst +++ b/doc/guide/5_out_of_band_management_interface.rst @@ -7,7 +7,7 @@ SMBUS 协议接口定义 命令组成 ^^^^^^^^^^^ -先写 1 byte 的 CMD 到 i2c slave, 再读取 n byte 数据。以下为读取 CHIP0 芯片温度的例子: +先写 1 byte 的 CMD 到 i2c slave, 再读取 n byte 数据。以下为读取温度的例子: .. code-block:: shell @@ -37,7 +37,7 @@ SC5+ MCU 接口命令 ============== ========== ======== ============================================= 含义 地址 属性 说明 -------------- ---------- -------- --------------------------------------------- - 芯片温度 0x00 RO unsigned byte, 单位:摄氏度 + 片上温度 0x00 RO unsigned byte, 单位:摄氏度 -------------- ---------- -------- --------------------------------------------- 单板温度 0x01 RO unsigned byte, 单位:摄氏度 -------------- ---------- -------- --------------------------------------------- @@ -49,7 +49,7 @@ SC5+ MCU 接口命令 -------------- ---------- -------- --------------------------------------------- 硬件版本 0x14 RO unsigned byte -------------- ---------- -------- --------------------------------------------- - 固件版本 0x18 RO unsigned int;[7:0]小版本号;[15:8]主版本号;[31:16]chip版本号 + 固件版本 0x18 RO unsigned int;[7:0]小版本号;[15:8]主版本号;[31:16]processor版本号 -------------- ---------- -------- --------------------------------------------- 板卡种类 0x1c RO unsigned byte(代表板卡种类,sc5+是7) -------------- ---------- -------- --------------------------------------------- @@ -70,7 +70,7 @@ SC7 系列板卡带外管理接口 ^^^^^^ :服务器厂商 BMC 控制: -- SC7 系列多芯卡 Slave 地址为 0x60, CHIP1 为 0x61, CHIP2 为 0x62, 依此类推, CHIP7 为 0x67。 +- SC7 系列多芯卡 Slave 地址为 0x60, 81 为 0x61, CHIP2 为 0x62, 依此类推, CHIP7 为 0x67。 - 返回int类型数据时候按照高字节在前顺序发送(例如int类型数为 0x16861f1c,返回顺序为 0x16, 0x86, 0x1f, 0x1c)。 @@ -83,7 +83,7 @@ SC7PRO MCU 接口命令 ============== ========== ======== ============================================= 含义 地址 属性 说明 -------------- ---------- -------- --------------------------------------------- - 芯片温度 0x00 RO unsigned byte, 单位:摄氏度 + 片上温度 0x00 RO unsigned byte, 单位:摄氏度 -------------- ---------- -------- --------------------------------------------- 单板温度 0x01 RO unsigned byte, 单位:摄氏度 -------------- ---------- -------- --------------------------------------------- @@ -95,7 +95,7 @@ SC7PRO MCU 接口命令 -------------- ---------- -------- --------------------------------------------- 硬件版本 0x14 RO unsigned byte -------------- ---------- -------- --------------------------------------------- - 固件版本 0x18 RO unsigned int;[7:0]小版本号;[15:8]主版本号;[31:16]chip版本号 + 固件版本 0x18 RO unsigned int;[7:0]小版本号;[15:8]主版本号;[31:16]processor版本号 -------------- ---------- -------- --------------------------------------------- 板卡种类 0x1c RO unsigned byte(代表板卡种类,sc7pro是0x21) -------------- ---------- -------- --------------------------------------------- diff --git a/doc/guide_en/1_install.rst b/doc/guide_en/1_install.rst index a19df32..5782da4 100644 --- a/doc/guide_en/1_install.rst +++ b/doc/guide_en/1_install.rst @@ -18,7 +18,8 @@ Depending on the current situation, the installed version may change. sudo rm -f /lib/modules/$(uname -r)/kernel/drivers/pci/bmsophon.ko -**If the Debian/Ubuntu system is used:** +Debian/Ubuntu system +~~~~~~~~~~~~~~~~~~~~~~ the installation package consists of three files: @@ -78,7 +79,7 @@ You need to manually execute the following command to install the driver in the sudo modprobe bmsophon -Uninstallation method: +**Uninstallation method:** .. code-block:: shell @@ -104,7 +105,8 @@ If there is any trouble in uninstallation, you can try the following operations: #Completely clear libsophon: sudo dpkg --purge sophon-libsophon -**If another Linux system is used:** +Another Linux system +~~~~~~~~~~~~~~~~~~~~~~ the installation package consist of one file: @@ -156,7 +158,7 @@ Finally, some configuration work should be done: sudo mkdir -p /usr/lib/cmake/libsophon sudo cp /opt/sophon/libsophon-current/data/libsophon-config.cmake /usr/lib/cmake/libsophon/ -Uninstallation method: +**Uninstallation method:** .. code-block:: shell diff --git a/doc/guide_en/3_1_bmsmi_description.rst b/doc/guide_en/3_1_bmsmi_description.rst index b918cbd..45742e4 100644 --- a/doc/guide_en/3_1_bmsmi_description.rst +++ b/doc/guide_en/3_1_bmsmi_description.rst @@ -17,11 +17,11 @@ Interpretation of terms * - BM1684X - The fourth-generation tensor processor launched by SOPHGO for the field of deep learning. - * - TPU - - On-chip neural network processing unit. + * - Tensor Computing Processor + - On-processor neural network processing unit. * - SOC mode - - A working mode, the SDK runs on A53 AARCH64 platform, and TPU is used as the platform bus device. + - A working mode, the SDK runs on A53 AARCH64 platform, and Tensor Computing Processor is used as the platform bus device. * - PCIe mode - A working mode, SDK runs on the X86 platform, BM1684 and BM1684X are at deep learning computing accelerator cards in PCIe interface. @@ -30,7 +30,7 @@ Interpretation of terms - Drivers are the channels through which the API interface accesses the hardware. * - Gmem - - DDR memory on card for TPU acceleration. + - DDR memory on card for Tensor Computing Processor acceleration. * - F - FAULT, fault status. @@ -53,7 +53,7 @@ Bm-smi functions mainly include: - Viewing physical board ID. - - Viewing the device chip ID, and the PCIe bus ID where it is located. + - Viewing the device processor ID, and the PCIe bus ID where it is located. - Viewing device temperature and power consumption. @@ -61,7 +61,7 @@ Bm-smi functions mainly include: - Viewing the total number and usage of gmem. - - Viewing the usage of tpu. + - Viewing the usage of Tensor Computing Processor. - Viewing the work frequency information of the device. @@ -104,7 +104,7 @@ Bm-smi functions mainly include: - Supported - Supported - * - Tpu device number + * - Tensor Computing Processor device number - Supported - Supported @@ -120,7 +120,7 @@ Bm-smi functions mainly include: - Supported - Not supported - * - Chip temperature + * - Processor temperature - Supported - Not supported @@ -184,7 +184,7 @@ Bm-smi functions mainly include: - Supported - Supported - * - Instantaneous usage of tpu + * - Instantaneous usage of Tensor Computing Processor - Supported - Supported @@ -209,7 +209,7 @@ Bm-smi functions mainly include: :width: 5.76806in :height: 6.22083in -Figure 1 shows the display status of SC5+ (three-core)/SC5H/SC5P (eight-core), each card is separated by =======, the board attributes are displayed on the left, and the state of a single chip is displayed on the right and middle. +Figure 1 shows the display status of SC5+ (three-core)/SC5H/SC5P (eight-core), each card is separated by =======, the board attributes are displayed on the left, and the state of a single processor is displayed on the right and middle. bm-smi is an executable file that does not depend on other dynamic libraries, and it is located under /opt/sophon/libsophon-current/bin directory. The above figure is a schematic diagram about the execution of bm-smi. @@ -232,21 +232,21 @@ The meaning of each part is introduced one by one below: - SN: board serial number (total of 17 bits). -- TPU: device number of tpu. +- TPU: device number of Tensor Computing Processor. - BoardT: board temperature. -- chipT: chip temperature. +- chipT: processor temperature. -- TPU_P: power consumption of TPU module. +- TPU_P: power consumption of Tensor Computing Processor module. -- TPU_V: voltage of TPU module. +- TPU_V: voltage of Tensor Computing Processor module. - ECC: whether DDR ECC is enabled. - CorrectNum: the number of correction times if DDR is enabled. -- Tpu-Util: instantaneous usage of tpu. +- Tpu-Util: instantaneous usage of Tensor Computing Processor. - 12V_ATX: 12V board supply current. @@ -254,9 +254,9 @@ The meaning of each part is introduced one by one below: - boardP: board power consumption. -- Minclk: minimum work frequency of tpu. +- Minclk: minimum work frequency of Tensor Computing Processor. -- Maxclk: maximum work frequency of tpu. +- Maxclk: maximum work frequency of Tensor Computing Processor. - Fan: fan speed, N/A means the card has no fan, and F means there is a failure in the fan. @@ -264,9 +264,9 @@ The meaning of each part is introduced one by one below: - Status: board status. Active means active status; and Fault means fault status. -- Curclk: current work frequency of tpu. The color of the displayed value varies according to the current work frequency. 550M (bm1684) or 1000M (bm1684x) is displayed in white, 75M in red, and other frequencies in yellow; red and yellow are used to indicate to the user that the current work frequency is not the maximum work frequency. Displaying different colors are only available in version 2.1.0 and above. +- Curclk: current work frequency of Tensor Computing Processor. The color of the displayed value varies according to the current work frequency. 550M (bm1684) or 1000M (bm1684x) is displayed in white, 75M in red, and other frequencies in yellow; red and yellow are used to indicate to the user that the current work frequency is not the maximum work frequency. Displaying different colors are only available in version 2.1.0 and above. -- TPU_C: work current of tpu module. +- TPU_C: work current of Tensor Computing Processor module. - Memory-Usage: byte numbers of gmem totals and used. The 106M indicates the memory size of the VPU firmware by default. The memory on the board may be distributed in different address spaces. All the memory we allocate is continuous address, and because of the different size of each allocation, it will lead to fragmentation of the memory, so the usage may not reach 100%. @@ -348,7 +348,7 @@ The parameters supported by bm-smi include: bm-smi --dev=0x0 --led=off - Note: This function support on/off/blink on SC5+ and SC5P, on/off on SC5H, does not support other board types. For the SC5+ board, only the first chip can control the status of LED. SC5P has 8 LEDs, each device corresponds to one LED, and each LED supports setting status separately. + Note: This function support on/off/blink on SC5+ and SC5P, on/off on SC5H, does not support other board types. For the SC5+ board, only the first processor can control the status of LED. SC5P has 8 LEDs, each device corresponds to one LED, and each LED supports setting status separately. This function is not supported in SOC mode. @@ -435,7 +435,7 @@ First area: 1684-SC5+ PCIE chip0: 0 000:01:00.0 Active 56C 55C 2W 615mV OFF N/A 0% 75M 550M 550M 3.3A 0MB 7086MB -Status of the 0th chip on the three-core card, 1684-SC5+ PCIE chip0: +Status of the 0th processor on the three-core card, 1684-SC5+ PCIE chip0: :: @@ -447,7 +447,7 @@ Second area: 1684-SC5+ PCIE chip1: 1 000:01:00.1 Active 56C 55C 2W 613mV OFF N/A 0% 75M 550M 550M 4.2A 0MB 7086MB -Status of the 1st chip on the three-chip card, 1684-SC5+ PCIE chip1: +Status of the 1st processor on the three-processor card, 1684-SC5+ PCIE chip1: The following information corresponds in sequence to parameters in bm-smi: @@ -461,7 +461,7 @@ Third area: 1684-SC5+ PCIE chip2: 2 000:01:00.2 Active 54C 53C 1W 615mV OFF N/A 0% 75M 550M 550M 2.6A 0MB 7086MB -Status of the 2nd chip on the three-chip card, 1684-SC5+ PCIE chip2: +Status of the 2nd processor on the three-processor card, 1684-SC5+ PCIE chip2: The following information corresponds in sequence to parameters in bm-smi: @@ -473,7 +473,7 @@ The following information corresponds in sequence to parameters in bm-smi: Notes: - 1. --start_dev=0 --last_dev=2 indicates the device numbers corresponding to the 0th chip and the last chip of a certain card displayed in bm-smi; + 1. --start_dev=0 --last_dev=2 indicates the device numbers corresponding to the 0th processor and the last processor of a certain card displayed in bm-smi; 2. --start_dev --last_dev --text_format should be used together. diff --git a/doc/guide_en/3_2_proc_description.rst b/doc/guide_en/3_2_proc_description.rst index 279c32c..146462b 100644 --- a/doc/guide_en/3_2_proc_description.rst +++ b/doc/guide_en/3_2_proc_description.rst @@ -505,7 +505,7 @@ Detailed Information of Devices in PCIe Mode Readwrite property: read only. - Meaning: chip id (0x1684x/0x1684/0x1682). + Meaning: processor id (0x1684x/0x1684/0x1682). - chip_temp @@ -513,7 +513,7 @@ Detailed Information of Devices in PCIe Mode Readwrite property: read only. - Meaning: chip temperature. + Meaning: processor temperature. - dbdf @@ -529,7 +529,7 @@ Detailed Information of Devices in PCIe Mode Readwrite property: read and write. - Meaning: Enable or disable the dynamic tpu frequency modulation function; 0/1 is valid, other values are invalid. + Meaning: Enable or disable the dynamic Tensor Computing Processor frequency modulation function; 0/1 is valid, other values are invalid. - ecc @@ -601,7 +601,7 @@ Detailed Information of Devices in PCIe Mode Readwrite property: read only. - Meaning: tpu ID (0/1/2/3⋯⋯). + Meaning: Tensor Computing Processor ID (0/1/2/3⋯⋯). - tpu_maxclk @@ -609,7 +609,7 @@ Detailed Information of Devices in PCIe Mode Readwrite property: read only. - Meaning: maximum work frequency of tpu. + Meaning: maximum work frequency of Tensor Computing Processor. - tpu_minclk @@ -617,7 +617,7 @@ Detailed Information of Devices in PCIe Mode Readwrite property: read only. - Meaning: minimum work frequency of tpu. + Meaning: minimum work frequency of Tensor Computing Processor. - tpu_freq @@ -625,7 +625,7 @@ Detailed Information of Devices in PCIe Mode Readwrite property: read and write. - Meaning: work frequency of the tpu, which can be changed by writing parameters. 0 should be written into dynfreq to turn off the dynamic TPU frequency modulation before writing parameters. + Meaning: work frequency of the Tensor Computing Processor, which can be changed by writing parameters. 0 should be written into dynfreq to turn off the dynamic Tensor Computing Processor frequency modulation before writing parameters. - tpu_power @@ -633,7 +633,7 @@ Detailed Information of Devices in PCIe Mode Readwrite property: read only. - Meaning: instantaneous power of tpu. + Meaning: instantaneous power of Tensor Computing Processor. - firmware_info @@ -737,7 +737,7 @@ Detailed Information of Devices in PCIe Mode Readwrite property: read only. - Meaning: the time spent in tpu processing. + Meaning: the time spent in Tensor Computing Processor processing. - completed_api_counter @@ -761,7 +761,7 @@ Detailed Information of Devices in PCIe Mode Readwrite property: read and write. - Meaning: tpu voltage, the voltage can be changed by writing a parameter. + Meaning: Tensor Computing Processor voltage, the voltage can be changed by writing a parameter. - tpu_cur @@ -769,7 +769,7 @@ Detailed Information of Devices in PCIe Mode Readwrite property: read only. - Meaning: tpu current. + Meaning: Tensor Computing Processor current. - fan_speed @@ -857,7 +857,7 @@ Detailed Information of Devices in PCIe Mode Readwrite property: read and write. - Meaning: Dump register, input 1 is dumped to tpu register, input 2 is dumped to gdma register. + Meaning: Dump register, input 1 is dumped to Tensor Computing Processor register, input 2 is dumped to gdma register. - heap @@ -926,7 +926,7 @@ In SOC mode, only JPU and VPU supports proc interface, and the correspondin Readwrite property: read only. - JPU loadbalance : recording JPU0-JPU1(1684x),JPU0-JPU3(1684) encoding/decoding times, JPU* is JPEG encoder/decoder inside chip, value range: 0~2147483647 + JPU loadbalance : recording JPU0-JPU1(1684x),JPU0-JPU3(1684) encoding/decoding times, JPU* is JPEG encoder/decoder inside processor, value range: 0~2147483647 - vpuinfo diff --git a/doc/guide_en/3_3_sysfs_interface_description.rst b/doc/guide_en/3_3_sysfs_interface_description.rst index 5b33e0e..ed77d71 100644 --- a/doc/guide_en/3_3_sysfs_interface_description.rst +++ b/doc/guide_en/3_3_sysfs_interface_description.rst @@ -1,10 +1,10 @@ .. vim: syntax=rst -Introduction of TPU Drive Sysfs File System +Introduction of Tensor Computing Processor Drive Sysfs File System ------------------------------------------------ -The sysfs file system interface is used to obtain information such as the usage of TPU. -The table below lists the device information that can be obtained by TPU Drive Sysfs File System and the support information in PCIe and SOC modes: +The sysfs file system interface is used to obtain information such as the usage of Tensor Computing Processor. +The table below lists the device information that can be obtained by Tensor Computing Processor Drive Sysfs File System and the support information in PCIe and SOC modes: .. list-table:: :widths: 40 30 30 @@ -38,13 +38,13 @@ Meanings of Parameters The meaning of each part is introduced below. -- npu_usage, the percentage of time TPU (NPU) is working over a period of time (window width) +- npu_usage, the percentage of time Tensor Computing Processor is working over a period of time (window width) - npu_usage_enable, npu_usage_enable, whether to enable statistics on NPU usage, enabled by default. - npu_usage_interval, the time window width of statistics on NPU usage (in ms). The default is 500ms and the value range is [200,2000]. -Specific use method of TPU Drive Sysfs File System interface +Specific use method of Tensor Computing Processor Drive Sysfs File System interface ------------------------------------------------------------- Examples are as follows: diff --git a/doc/guide_en/3_tools.rst b/doc/guide_en/3_tools.rst index 79f3c3e..2cf6011 100644 --- a/doc/guide_en/3_tools.rst +++ b/doc/guide_en/3_tools.rst @@ -19,7 +19,7 @@ Operation instructions for proc file system 3_2_proc_description -Operation Instructions for Tpu Drive Sysfs File System +Operation Instructions for Tensor Computing Processor Drive Sysfs File System ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. toctree:: diff --git a/doc/guide_en/5_out_of_band_management_interface.rst b/doc/guide_en/5_out_of_band_management_interface.rst index d696d83..04beb8b 100644 --- a/doc/guide_en/5_out_of_band_management_interface.rst +++ b/doc/guide_en/5_out_of_band_management_interface.rst @@ -38,7 +38,7 @@ SC5+ MCU protocol command ===================== ========== ========== ============================================= Meaning Address Attribute Description --------------------- ---------- ---------- --------------------------------------------- - Chip temperature 0x00 RO unsigned byte, unit: centigrade + Processor temperature 0x00 RO unsigned byte, unit: centigrade --------------------- ---------- ---------- --------------------------------------------- Card temperature 0x01 RO unsigned byte, unit: centigrade --------------------- ---------- ---------- --------------------------------------------- @@ -50,7 +50,7 @@ SC5+ MCU protocol command --------------------- ---------- ---------- --------------------------------------------- Hardware Version 0x14 RO unsigned byte --------------------- ---------- ---------- --------------------------------------------- - Firmware Version 0x18 RO unsigned int;[7:0]Minor version;[15:8]Major version;[31:16]chip version + Firmware Version 0x18 RO unsigned int;[7:0]Minor version;[15:8]Major version;[31:16]processor version --------------------- ---------- ---------- --------------------------------------------- Kind of card 0x1c RO unsigned byte(Kind of card, SC5+ is 7) --------------------- ---------- ---------- --------------------------------------------- @@ -84,7 +84,7 @@ SC7Pro MCU protocol command ==================== ========== ========== ============================================= Meaning Address Attribute Description -------------------- ---------- ---------- --------------------------------------------- - Chip temperature 0x00 RO unsigned byte, unit: centigrade + Processor temperature 0x00 RO unsigned byte, unit: centigrade -------------------- ---------- ---------- --------------------------------------------- Card temperature 0x01 RO unsigned byte, unit: centigrade -------------------- ---------- ---------- --------------------------------------------- @@ -96,7 +96,7 @@ SC7Pro MCU protocol command -------------------- ---------- ---------- --------------------------------------------- Hardware Version 0x14 RO unsigned byte -------------------- ---------- ---------- --------------------------------------------- - Firmware Version 0x18 RO unsigned int;[7:0]Minor version;[15:8]Major version;[31:16]chip version + Firmware Version 0x18 RO unsigned int;[7:0]Minor version;[15:8]Major version;[31:16]processor version -------------------- ---------- ---------- --------------------------------------------- Kind of card 0x1c RO unsigned byte(Kind of card,sc7pro is 0x21) -------------------- ---------- ---------- --------------------------------------------- diff --git a/doc/reference/1_bmlib_fast_begin.rst b/doc/reference/1_bmlib_fast_begin.rst index 671f161..1e37428 100644 --- a/doc/reference/1_bmlib_fast_begin.rst +++ b/doc/reference/1_bmlib_fast_begin.rst @@ -18,11 +18,11 @@ * - BM1684X - 算能面向深度学习领域推出的第四代张量处理器 - * - TPU + * - 智能视觉深度学习处理器 - BM1684内部神经网络处理单元 * - SOC Mode - - 一种产品形态,SDK运行于A53 AARCH64平台,TPU作为平台总线设备。 + - 一种产品形态,SDK运行于A53 AARCH64平台,智能视觉深度学习处理器作为平台总线设备。 * - PCIE Mode - 一种产品形态,SDK运行于host平台(可以是X86/AARCH64服务器),BM1684作为PCIE接口的深度学习计算加速卡存在 diff --git a/doc/reference/2_bmlib_basic_concept.rst b/doc/reference/2_bmlib_basic_concept.rst index 549b117..130866d 100644 --- a/doc/reference/2_bmlib_basic_concept.rst +++ b/doc/reference/2_bmlib_basic_concept.rst @@ -1,7 +1,7 @@ Bmlib的基本概念和功能 ===================== -基于算能神经网络加速芯片设计的SDK的简单功能框图如下: +基于算能神经网络加速处理器设计的SDK的简单功能框图如下: .. image:: ./images/image1.png :align: center @@ -33,7 +33,7 @@ Bmlib是在内核驱动之上封装的一层底层软件库,完成的主要功 Handle的概念 ------------ -我们的神经网络加速设备,无论是在PCIE模式,还是SOC模式,安装完tpu的驱动后,会成为一个标准的字符设备。上层用户进程如果要使用这个设备,需要在这个设备上创建一个handle句柄。 +我们的神经网络加速设备,无论是在PCIE模式,还是SOC模式,安装完智能视觉深度学习处理器的驱动后,会成为一个标准的字符设备。上层用户进程如果要使用这个设备,需要在这个设备上创建一个handle句柄。 Handle是管理api,申请memory,释放memory的handle,如果一个进程创建了两个handle,名字为handle_A1,handle_A2,这是两个独立的handle。 @@ -76,7 +76,7 @@ Memory的种类 上图以PCIE模式介绍memory的种类,其中host可以是PC机/服务器,PCIE板卡就是SC5系列板卡。Host端的memory我们称之system memory,PCIE板卡上的memory我们称之为global memory,或者device -memory。BM1684芯片中有专门的DMA硬件单元在system memory和global +memory。BM1684处理器中有专门的DMA硬件单元在system memory和global memory之间搬运数据。 Api的概念和同步 @@ -85,7 +85,7 @@ Api的概念和同步 .. image:: ./images/image7.png :align: center -Host这端的软件如果想让tpu完成一个任务,需要向tpu发送一个“api”,类似于一个命令。请注意发送api的函数和api的执行完成是异步的,所以host这端的软件还需要调用一个sync函数类等到api的真正完成。 +Host这端的软件如果想让智能视觉深度学习处理器完成一个任务,需要向智能视觉深度学习处理器发送一个“api”,类似于一个命令。请注意发送api的函数和api的执行完成是异步的,所以host这端的软件还需要调用一个sync函数类等到api的真正完成。 目前发送api的动作,都已经封在bmcv/bmrt功能库中了,客户无法直接发送api,只能通过调用bmcv/bmrt的接口发送api。 @@ -94,7 +94,7 @@ Host这端的软件如果想让tpu完成一个任务,需要向tpu发送一个 Profile接口 ----------- -Profile接口用于获取tpu处理api花费的时间,这个时间是从tpu开始工作后一直累加的(如果有不断的api得到处理),如果系统中只有一个进程使用tpu设备,我们可以通过计算调用api前后profile数据的差值来得到api的处理时间。 +Profile接口用于获取智能视觉深度学习处理器处理api花费的时间,这个时间是从智能视觉深度学习处理器开始工作后一直累加的(如果有不断的api得到处理),如果系统中只有一个进程使用智能视觉深度学习处理器,我们可以通过计算调用api前后profile数据的差值来得到api的处理时间。 A53的使能 --------- @@ -105,9 +105,9 @@ core,并让他们完成一些加速任务。 Power控制 --------- -我们提供了接口用于获取和设置tpu的工作频率,用户可以自己定义一些自己的功耗控制策略。 +我们提供了接口用于获取和设置智能视觉深度学习处理器的工作频率,用户可以自己定义一些自己的功耗控制策略。 杂项信息接口 ------------ -用于获取板卡的信息和运行过程中的统计信息。目前包括memory总量和使用量,tpu的利用率 +用于获取板卡的信息和运行过程中的统计信息。目前包括memory总量和使用量,智能视觉深度学习处理器的利用率 diff --git a/doc/reference/3_bmlib_api_detail.rst b/doc/reference/3_bmlib_api_detail.rst index 2d3e5cc..c20d225 100644 --- a/doc/reference/3_bmlib_api_detail.rst +++ b/doc/reference/3_bmlib_api_detail.rst @@ -9,14 +9,14 @@ bm_dev_getcount 函数原型:bm_status_t bm_dev_getcount(int \*count) -函数作用:获取当前系统中,存在多少个sophon设备,如果获取的设备个数为N,则devid的合法取值为[0,N-1]。 +函数作用:获取当前系统中,存在多少个SOPHON设备,如果获取的设备个数为N,则devid的合法取值为[0,N-1]。 参数介绍: +--------------+--------------+---------------------------------------+ | 参数名 | 输入/输出 | 说明 | +==============+==============+=======================================+ -| count | 输出 | 用于存放sophon设备个数的指针 | +| count | 输出 | 用于存放SOPHON设备个数的指针 | +--------------+--------------+---------------------------------------+ 返回值 :BM_SUCCESS代表获得正确个数;其他错误码代表无法获取个数 @@ -366,6 +366,30 @@ bm_device_mem_t \*pmem, unsigned int size); 返回值 :BM_SUCCESS代表分配成功;其他错误码代表分配失败 +bm_malloc_device_mem +~~~~~~~~~~~~~~~~~~~~~ + +函数原型:bm_status_t bm_malloc_device_mem(bm_handle_t handle, +unsigned long long \*paddr, int heap_id, unsigned long long size); + +函数作用:分配指定字节个数大小的device类型的memory,输出分配的物理地址 + +参数介绍: + ++--------------+--------------+---------------------------------------+ +| 参数名 | 输入/输出 | 说明 | ++==============+==============+=======================================+ +| handle | 输入 | 设备句柄 | ++--------------+--------------+---------------------------------------+ +| paddr | 输出 | 分配出device memory的地址 | ++--------------+--------------+---------------------------------------+ +| heap_id | 输入 | 所指定分配GMEM的HEAP(0/1/2) | ++--------------+--------------+---------------------------------------+ +| size | 输入 | 需要分配的byte的个数 | ++--------------+--------------+---------------------------------------+ + +返回值 :BM_SUCCESS代表分配成功;其他错误码代表分配失败 + bm_malloc_device_byte_heap ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -447,6 +471,25 @@ bm_free_device 返回值 :无 +bm_free_device_mem +~~~~~~~~~~~~~~ + +函数原型:void bm_free_device_mem(bm_handle_t handle, unsigned long long paddr); + +函数作用:释放一块device类型的memory,输入分配的物理地址 + +参数介绍: + ++--------------+--------------+---------------------------------------+ +| 参数名 | 输入/输出 | 说明 | ++==============+==============+=======================================+ +| handle | 输入 | 设备句柄 | ++--------------+--------------+---------------------------------------+ +| paddr | 输入 | 要释放的device addr | ++--------------+--------------+---------------------------------------+ + +返回值 :无 + bm_gmem_arm_reserved_request ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1244,6 +1287,25 @@ bm_handle_sync 返回值 :BM_SUCCESS代表同步成功;其他错误码代表同步失败 +bm_set_sync_timeout +~~~~~~~~~~~~~~~~~~ + +函数原型:bm_status_t bm_set_sync_timeout(bm_handle_t handle, int timeout); + +函数作用:设置最大等待tpu返回消息的时间,单位ms + +参数介绍: + ++--------------+--------------+---------------------------------------+ +| 参数名 | 输入/输出 | 说明 | ++==============+==============+=======================================+ +| handle | 输入 | 设备句柄 | ++--------------+--------------+---------------------------------------+ +| timeout | 输入 | 超时时间 | ++--------------+--------------+---------------------------------------+ + +返回值 :BM_SUCCESS代表同步成功;其他错误码代表同步失败 + profile接口 ----------- @@ -1287,16 +1349,16 @@ bm_set_clk_tpu_freq 函数原型:bm_status_t bm_set_clk_tpu_freq(bm_handle_t handle, int freq); -函数作用:设置当前tpu的工作频率,只在PCIE模式有效 +函数作用:设置当前智能视觉深度学习处理器的工作频率,只在PCIE模式有效 参数介绍: +--------------+--------------+---------------------------------------+ -| 参数名 | 输入/输出 | 说明 | +| 参数名 | 输入/输出 | 说明 | +==============+==============+=======================================+ -| handle | 输入 | 设备句柄 | +| handle | 输入 | 设备句柄 | +--------------+--------------+---------------------------------------+ -| freq | 输入 | tpu的目标工作频率 | +| freq | 输入 | 智能视觉深度学习处理器的目标工作频率 | +--------------+--------------+---------------------------------------+ 返回值 :BM_SUCCESS代表成功;其他错误码代表失败 @@ -1307,16 +1369,16 @@ bm_get_clk_tpu_freq 函数原型:bm_status_t bm_get_clk_tpu_freq(bm_handle_t handle, int \*freq); -函数作用:获取当前tpu的工作频率 +函数作用:获取当前智能视觉深度学习处理器的工作频率 参数介绍: +--------------+--------------+---------------------------------------+ -| 参数名 | 输入/输出 | 说明 | +| 参数名 | 输入/输出 | 说明 | +==============+==============+=======================================+ -| handle | 输入 | 设备句柄 | +| handle | 输入 | 设备句柄 | +--------------+--------------+---------------------------------------+ -| freq | 输出 | 保存tpu当前工作频率的指针 | +| freq | 输出 | 保存智能视觉深度学习处理器当前工作频率的指针 | +--------------+--------------+---------------------------------------+ 返回值 :BM_SUCCESS代表成功;其他错误码代表失败 @@ -1335,11 +1397,11 @@ bm_misc_info \*pmisc_info); 参数介绍: +--------------+--------------+---------------------------------------+ -| 参数名 | 输入/输出 | 说明 | +| 参数名 | 输入/输出 | 说明 | +==============+==============+=======================================+ -| Handle | 输入 | 设备句柄 | +| Handle | 输入 | 设备句柄 | +--------------+--------------+---------------------------------------+ -| pmisc_info | 输出 | 存放misc数据的指针 | +| pmisc_info | 输出 | 存放misc数据的指针 | +--------------+--------------+---------------------------------------+ 返回值 :BM_SUCCESS代表成功;其他错误码代表失败 @@ -1355,9 +1417,9 @@ bm_get_card_num 参数介绍: +--------------+--------------+---------------------------------------+ -| 参数名 | 输入/输出 | 说明 | +| 参数名 | 输入/输出 | 说明 | +==============+==============+=======================================+ -| card_num | 输出 | 存放卡数量的指针 | +| card_num | 输出 | 存放卡数量的指针 | +--------------+--------------+---------------------------------------+ 返回值 :BM_SUCCESS代表成功;其他错误码代表失败 @@ -1411,7 +1473,7 @@ bm_get_chipid 函数原型:bm_status_t bm_get_chipid(bm_handle_t handle, unsigned int \*p_chipid); -函数作用:获取设备对应的芯片ID(0x1684和0x1686) +函数作用:获取设备对应的处理器ID(0x1684和0x1686) 参数介绍: @@ -1420,7 +1482,7 @@ bm_get_chipid +==============+==============+=======================================+ | Handle | 输入 | 设备句柄 | +--------------+--------------+---------------------------------------+ -| p_chipid | 输出 | 存放芯片ID的指针 | +| p_chipid | 输出 | 存放处理器ID的指针 | +--------------+--------------+---------------------------------------+ 返回值 :BM_SUCCESS代表成功;其他错误码代表失败 @@ -1589,7 +1651,7 @@ bm_set_debug_mode 函数原型:void bm_set_debug_mode(bm_handle_t handle, int mode); -函数作用:为tpu fw log设置debug模式 备注:此函数SC3在使用 +函数作用:为智能视觉深度学习处理器 fw log设置debug模式 备注:此函数SC3在使用 参数介绍: @@ -1848,7 +1910,7 @@ bm_get_board_name 函数原型:bm_status_t bm_get_board_name(bm_handle_t handle, char \*name); -函数作用:获取当前板卡的名称,名称:芯片id-板卡类型(如:1684-SC5+)。 +函数作用:获取当前板卡的名称,名称:处理器id-板卡类型(如:1684-SC5+)。 参数介绍: diff --git a/doc/reference/4_bmlib_data_struct.rst b/doc/reference/4_bmlib_data_struct.rst index 449ab2f..cf37900 100644 --- a/doc/reference/4_bmlib_data_struct.rst +++ b/doc/reference/4_bmlib_data_struct.rst @@ -135,6 +135,8 @@ unsigned long cdma_out_counter; unsigned long tpu_process_time; +unsigned long tpu1_process_time; + unsigned long sent_api_counter; unsigned long completed_api_counter; diff --git a/doc/reference_en/1_bmlib_fast_begin.rst b/doc/reference_en/1_bmlib_fast_begin.rst index e4b2f35..808523a 100644 --- a/doc/reference_en/1_bmlib_fast_begin.rst +++ b/doc/reference_en/1_bmlib_fast_begin.rst @@ -18,11 +18,11 @@ Term Interpretation * - BM1684X - The fourth-generation tensor processor unit for deep learning developed by SOPHGO - * - TPU + * - Tensor Computing Processor - Neural network processing unit in BM1684 * - SOC Mode - - A product form, the SDK runs on A53 AARCH64 platform, and TPU is used as the platform bus device + - A product form, the SDK runs on A53 AARCH64 platform, and Tensor Computing Processor is used as the platform bus device * - PCIE Mode - A product form, SDK runs on the host platform ( it can be X86 or AARCH64 server), BM1684 serves as deep learning computing accelerator card in PCIe interface diff --git a/doc/reference_en/2_bmlib_basic_concept.rst b/doc/reference_en/2_bmlib_basic_concept.rst index f5b228d..4c2f7cc 100644 --- a/doc/reference_en/2_bmlib_basic_concept.rst +++ b/doc/reference_en/2_bmlib_basic_concept.rst @@ -1,7 +1,7 @@ Basic Concepts and Functions of Bmlib ===================== -The simple functional diagram of SDK based on SOPHGO neural network accelerator chip is as follows: +The simple functional diagram of SDK based on SOPHGO neural network accelerator processor is as follows: .. image:: ./images/image1.png :align: center @@ -34,7 +34,7 @@ Concept of Handle ------------ Our neural network acceleration device, whether in PCIe mode or SOC mode, -will become a standard character device after the installation of the TPU driver. +will become a standard character device after the installation of the Tensor Computing Processor driver. A handle needs to be created when the upper user process try to run on this device. Handle is used in managing api, applying for memory and releasing memory. @@ -87,7 +87,7 @@ The above figure introduces the types of memory in PCIe mode, in which host can be PC/server, and PCIe board is SC5 series board. The memory on the host side is called system memory, and the memory on the PCIe board is called global memory or device memory. -There is a DMA hardware in BM1684 chip special for carrying data between system memory and global memory. +There is a DMA hardware in BM1684 processor special for carrying data between system memory and global memory. Concept and Synchronization of Api --------------- @@ -95,8 +95,8 @@ Concept and Synchronization of Api .. image:: ./images/image7.png :align: center -If the software on the host side wants the TPU to complete a task, -it needs to send an “API” to the TPU, which is like a command. +If the software on the host side wants the Tensor Computing Processor to complete a task, +it needs to send an “API” to the Tensor Computing Processor, which is like a command. Please note that the sending API function and the execution completion of the API are asynchronous, so the software on the host side needs to call a sync function class until the API is really completed. @@ -111,9 +111,9 @@ In this way, the API has been completed when the interface function of bmcv / bm Interface of Profile ----------- -Profile interface is used to obtain the time spent by the TPU processing API, -which has been accumulated since TPU began to work (if there are continuous APIs to be processed). -If only one process in the system uses TPU device, we can calculate the processing time of the API by calculating the difference between the profile data before and after calling the API. +Profile interface is used to obtain the time spent by the Tensor Computing Processor processing API, +which has been accumulated since Tensor Computing Processor began to work (if there are continuous APIs to be processed). +If only one process in the system uses Tensor Computing Processor device, we can calculate the processing time of the API by calculating the difference between the profile data before and after calling the API. Enable of A53 --------- @@ -124,11 +124,11 @@ and let them complete some acceleration tasks. Power Control --------- -We provide interfaces to obtain and set the working frequency of TPU. +We provide interfaces to obtain and set the working frequency of Tensor Computing Processor. Users can define their own power consumption control strategies. Interface of Miscellaneous Information ------------ It is used to obtain the information of the board and the statistical information during operation. -At present, it includes the total amount and usage of memory and the utilization rate of TPU +At present, it includes the total amount and usage of memory and the utilization rate of Tensor Computing Processor diff --git a/doc/reference_en/3_bmlib_api_detail.rst b/doc/reference_en/3_bmlib_api_detail.rst index c3af0c3..949b0f9 100644 --- a/doc/reference_en/3_bmlib_api_detail.rst +++ b/doc/reference_en/3_bmlib_api_detail.rst @@ -10,14 +10,14 @@ bm_dev_getcount Function prototype: bm_status_t bm_dev_getcount(int *count) -Function: obtain the number of sophon devices in the current system. If the number of devices obtained is N, the legal value of devid is [0, N-1]. +Function: obtain the number of SOPHON devices in the current system. If the number of devices obtained is N, the legal value of devid is [0, N-1]. Parameter introduction: +--------------+---------------+-------------------------------------------------+ | Parameter | Input / output| Description | +==============+===============+=================================================+ -| count | Output | Pointer for storing the number of sophon devices| +| count | Output | Pointer for storing the number of SOPHON devices| +--------------+---------------+-------------------------------------------------+ Return value: BM_SUCCESS indicates getting the correct number; @@ -1250,7 +1250,7 @@ bm_set_clk_tpu_freq Function prototype: bm_status_t bm_set_clk_tpu_freq(bm_handle_t handle, int freq); -Function: set the working frequency of the current TPU, which is only valid in PCIe mode +Function: set the working frequency of the current Tensor Computing Processor, which is only valid in PCIe mode Parameter introduction: @@ -1259,7 +1259,7 @@ Parameter introduction: +==============+===============+=================================================+ | handle | input | Device handle | +--------------+---------------+-------------------------------------------------+ -| freq | input | Pointer to save TPU current operating frequency | +| freq | input | Pointer to save Tensor Computing Processor current operating frequency | +--------------+---------------+-------------------------------------------------+ Return value: BM_SUCCESS indicates success; Other error codes represent failure @@ -1269,17 +1269,17 @@ bm_get_clk_tpu_freq Function prototype: bm_status_t bm_get_clk_tpu_freq(bm_handle_t handle, int *freq); -Function: obtain the working frequency of the current TPU +Function: obtain the working frequency of the current Tensor Computing Processor Parameter introduction: -+--------------+---------------+-------------------------------------------------+ -| Parameter | Input / output| Description | -+==============+===============+=================================================+ -| handle | input | Device handle | -+--------------+---------------+-------------------------------------------------+ -| freq | output | Pointer to save current TPU operating frequency | -+--------------+---------------+-------------------------------------------------+ ++--------------+---------------+------------------------------------------------------------------------+ +| Parameter | Input / output| Description | ++==============+===============+========================================================================+ +| handle | input | Device handle | ++--------------+---------------+------------------------------------------------------------------------+ +| freq | output | Pointer to save current Tensor Computing Processor operating frequency | ++--------------+---------------+------------------------------------------------------------------------+ Return value: BM_SUCCESS indicates success; Other error codes represent failure @@ -1310,7 +1310,7 @@ bm_get_chipid Function prototype: bm_status_t bm_get_chipid(bm_handle_t handle, unsigned int *p_chipid); -Function: obtain the chip ID corresponding to the device +Function: obtain the processor ID corresponding to the device Parameter introduction: @@ -1319,7 +1319,7 @@ Parameter introduction: +==============+===============+=================================================+ | handle | input | Device handle | +--------------+---------------+-------------------------------------------------+ -| p_chipid | output | Pointer to save chip ID | +| p_chipid | output | Pointer to save processor ID | +--------------+---------------+-------------------------------------------------+ Return value: BM_SUCCESS indicates success; Other error codes represent failure @@ -1417,7 +1417,7 @@ bm_set_debug_mode Function prototype: void bm_set_debug_mode(bm_handle_t handle, int mode); -Function: set debug mode for tpu fw log +Function: set debug mode for Tensor Computing Processor fw log Note: this function is used in SC3 @@ -1671,7 +1671,7 @@ bm_get_board_name Function prototype: bm_status_t bm_get_board_name(bm_handle_t handle, char *name); -Function: obtain the name of the current board, format: chip id - board type (e.g., 1684-SC5+). +Function: obtain the name of the current board, format: processor id - board type (e.g., 1684-SC5+). Parameter introduction: diff --git a/driver/Makefile b/driver/Makefile index 15ce344..c2ceb60 100644 --- a/driver/Makefile +++ b/driver/Makefile @@ -130,6 +130,13 @@ else CENTOS_RELEASE_FIX = $(shell echo $(CENTOS_RELEASE)|cut -d . -f1) cflags-y += -DCENTOS_KERNEL_FIX=$(CENTOS_RELEASE_FIX) ccflags-y += -DCENTOS_KERNEL_FIX=$(CENTOS_RELEASE_FIX) + else ifeq ($(findstring BigCloud,$(shell cat /etc/redhat-release)),BigCloud) + LINUX_SRC = /lib/modules/$(shell uname -r)/build + CENTOS_KERNEL = $(shell uname -r |cut -d - -f1) + CENTOS_RELEASE = $(shell uname -r |cut -d - -f 2 |cut -d . -f 1-3) + CENTOS_RELEASE_FIX = $(shell echo $(CENTOS_RELEASE)|cut -d . -f1) + cflags-y += -DCENTOS_KERNEL_FIX=$(CENTOS_RELEASE_FIX) + ccflags-y += -DCENTOS_KERNEL_FIX=$(CENTOS_RELEASE_FIX) else LINUX_SRC = /lib/modules/$(shell uname -r)/build endif diff --git a/driver/a53lite_pkg.bin b/driver/a53lite_pkg.bin index ba23990..f997d07 100644 Binary files a/driver/a53lite_pkg.bin and b/driver/a53lite_pkg.bin differ diff --git a/driver/bm1682/bm1682_smmu.c b/driver/bm1682/bm1682_smmu.c index 5023820..3cac8bc 100644 --- a/driver/bm1682/bm1682_smmu.c +++ b/driver/bm1682/bm1682_smmu.c @@ -411,8 +411,11 @@ static int bm_setup_iommu_pages(struct iommu_ctrl *ctrl, struct bm_buffer_object return ret; } - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 5, 0) + page_done = get_user_pages(bo->iommu.start_aligned, bo->nr_pages, + bo->iommu.is_dst == 1 ? 1 : 0, // dst need write, src only need read + bo->pages); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) page_done = get_user_pages(bo->iommu.start_aligned, bo->nr_pages, bo->iommu.is_dst == 1 ? 1 : 0, // dst need write, src only need read bo->pages, NULL); diff --git a/driver/bm1684/bm1684_card.c b/driver/bm1684/bm1684_card.c index b45f011..6dae6e2 100644 --- a/driver/bm1684/bm1684_card.c +++ b/driver/bm1684/bm1684_card.c @@ -30,7 +30,8 @@ int bm1684_card_get_chip_index(struct bm_device_info *bmdi) int mode = 0x0; if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || - (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO)) { + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150)) { parent = bm_pci_upstream_bridge(bmdi->cinfo.pcidev); if (parent != NULL) bmdi->cinfo.chip_index = PCI_SLOT(parent->devfn); @@ -48,7 +49,10 @@ int bm1684_card_get_chip_index(struct bm_device_info *bmdi) bmdi->cinfo.chip_index = 0x2; else bmdi->cinfo.chip_index = 0x0; - } else if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS) { + } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X)) { mode = bmdrv_pcie_get_mode(bmdi) & 0x7; if ((mode == 0x7) || (mode == 0x6)) bmdi->cinfo.chip_index = 0x0; @@ -98,6 +102,9 @@ int bm1684_card_get_chip_num(struct bm_device_info *bmdi) (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO)) return 0x8; + if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) + return 0x6; + mode = bmdrv_pcie_get_mode(bmdi) & 0x7; switch (mode) { @@ -118,6 +125,9 @@ int bm1684_card_get_chip_num(struct bm_device_info *bmdi) } num = 0x3; break; + case 0x5: + num = 0x2; + break; case 0x4: case 0x3: num = 0x1; @@ -262,11 +272,23 @@ int bm1684_get_board_type_by_id(struct bm_device_info *bmdi, char *s_board_type, strncpy(s_board_type, "EVB", 10); break; case BOARD_TYPE_SC7_PRO: - strncpy(s_board_type, "SC7P", 10); + strncpy(s_board_type, "SC7-224T", 10); + break; + case BOARD_TYPE_SC7_FP150: + strncpy(s_board_type, "SC7-FP150", 10); break; case BOARD_TYPE_SC7_PLUS: strncpy(s_board_type, "SC7+", 10); break; + case BOARD_TYPE_AIV03X: + strncpy(s_board_type, "AIV03X", 10); + break; + case BOARD_TYPE_AIV02X: + strncpy(s_board_type, "AIV02X", 10); + break; + case BOARD_TYPE_AIV01X: + strncpy(s_board_type, "AIV01X", 10); + break; case BOARD_TYPE_CP24: strncpy(s_board_type, "CP24", 10); break; @@ -363,12 +385,24 @@ int bm1684_get_board_version_by_id(struct bm_device_info *bmdi, char *s_board_ve else snprintf(s_board_version, 10, "V1_%d", board_version); break; + case BOARD_TYPE_SC7_FP150: + strncpy(s_board_version, "V0_0",10); + break; case BOARD_TYPE_SC7_PLUS: if (board_version == 0x11) strncpy(s_board_version, "V1_0", 10); else snprintf(s_board_version, 10, "V1_%d", board_version); break; + case BOARD_TYPE_AIV01X: + strncpy(s_board_version, "V0_0", 10); + break; + case BOARD_TYPE_AIV02X: + strncpy(s_board_version, "V0_0", 10); + break; + case BOARD_TYPE_AIV03X: + strncpy(s_board_version, "V0_0", 10); + break; case BOARD_TYPE_CP24: if (board_version == 0x11) strncpy(s_board_version, "V1_0", 10); @@ -407,9 +441,9 @@ void bm1684_get_clk_temperature(struct bm_device_info *bmdi) } case BOARD_TYPE_SC7_PRO: { - bmdi->c_attr.thermal_info.half_clk_tmp = 65; + bmdi->c_attr.thermal_info.half_clk_tmp = 85; bmdi->c_attr.thermal_info.min_clk_tmp = 95; - bmdi->c_attr.thermal_info.max_clk_tmp = 60; + bmdi->c_attr.thermal_info.max_clk_tmp = 80; bmdi->c_attr.thermal_info.extreme_tmp = 105; break; } diff --git a/driver/bm1684/bm1684_clkrst.c b/driver/bm1684/bm1684_clkrst.c index bdc1421..cb064af 100644 --- a/driver/bm1684/bm1684_clkrst.c +++ b/driver/bm1684/bm1684_clkrst.c @@ -115,8 +115,7 @@ int bmdrv_clk_set_tpu_target_freq(struct bm_device_info *bmdi, int target) int val = 0; if (bmdi->misc_info.pcie_soc_mode == 0) { - if(targetboot_info.tpu_min_clk || - target>bmdi->boot_info.tpu_max_clk) { + if (target < TPU_MIN_CLK || target > TPU_MAX_CLK) { pr_err("%s: freq %d is too small or large\n", __func__, target); return -1; } diff --git a/driver/bm1684/bm1684_clkrst.h b/driver/bm1684/bm1684_clkrst.h index 3296d67..80af0e7 100644 --- a/driver/bm1684/bm1684_clkrst.h +++ b/driver/bm1684/bm1684_clkrst.h @@ -36,6 +36,9 @@ typedef enum { #define BIT_SW_RESET_SMMU 12 #define BIT_SW_RESET_CDMA 11 +#define TPU_MIN_CLK 25 +#define TPU_MAX_CLK 1000 + void bmdrv_clk_set_tpu_divider(struct bm_device_info *bmdi, int devider_factor); void bmdrv_clk_set_tpu_divider_fpll(struct bm_device_info *bmdi, int devider_factor); void bmdrv_clk_set_close(struct bm_device_info *bmdi); diff --git a/driver/bm1684/bm1684_flash.c b/driver/bm1684/bm1684_flash.c index c15d11f..7afa8cd 100644 --- a/driver/bm1684/bm1684_flash.c +++ b/driver/bm1684/bm1684_flash.c @@ -102,7 +102,8 @@ int bm1684_get_bootload_version(struct bm_device_info *bmdi) boot_load_spi_addr + i * SPI_BLOCK, SPI_BLOCK); } if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_BM1684X_EVB) || - (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO)) + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150)) bm1684_cat_message(BLv_cat_bm1686, len, 4, (char *)stagemem_s2d->v_addr, bmdi->cinfo.boot_loader_version[0]); else bm1684_cat_message(BLv_cat, len, 4, (char *)stagemem_s2d->v_addr, bmdi->cinfo.boot_loader_version[0]); @@ -111,7 +112,8 @@ int bm1684_get_bootload_version(struct bm_device_info *bmdi) mutex_unlock(&memcpy_info->stagemem_s2d.stage_mutex); if ((BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_BM1684X_EVB) && - (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PRO)) { + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PRO) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_FP150)) { ret = bm1684_get_bootload_version_number(bmdi); if (ret < 0) { bmdi->cinfo.boot_loader_num = 0; diff --git a/driver/bm1684/bm1684_jpu.c b/driver/bm1684/bm1684_jpu.c index 3edc8fb..2d83750 100644 --- a/driver/bm1684/bm1684_jpu.c +++ b/driver/bm1684/bm1684_jpu.c @@ -262,11 +262,11 @@ long bm_jpu_ioctl(struct file *filp, u_int cmd, u_long arg) if (jil->core_idx == core_idx && jil->filp == filp) { jil->inuse = 0; bmdi->jpudrvctx.s_jpu_usage_info.jpu_open_status[core_idx] = 0; + up(&bmdi->jpudrvctx.jpu_sem); dprintk("[jpudrv]:core_idx=%d,filp=%p\n", core_idx, filp); break; } } - up(&bmdi->jpudrvctx.jpu_sem); mutex_unlock(&bmdi->jpudrvctx.jpu_core_lock); } @@ -383,6 +383,11 @@ long bm_jpu_ioctl(struct file *filp, u_int cmd, u_long arg) dprintk("[jpudrv]:JDI_IOCTL_GET_MAX_NUM_JPU_CORE: max_num_jpu_core=%d\n", max_num_jpu_core); break; } + case JDI_IOCTL_RESET_ALL: { + jpu_cores_reset(bmdi); + dprintk("[jpudrv]:JDI_IOCTL_RESET_ALL\n"); + break; + } default: pr_err("[jpudrv]:No such ioctl, cmd is %d\n", cmd); ret = -EFAULT; @@ -397,7 +402,11 @@ static int jpu_map_to_register(struct file *filp, struct vm_area_struct *vm, int unsigned long pfn; struct bm_device_info *bmdi = (struct bm_device_info *)filp->private_data; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 5, 0) + vm_flags_set(vm, VM_IO | VM_RESERVED); +#else vm->vm_flags |= VM_IO | VM_RESERVED; +#endif vm->vm_page_prot = pgprot_noncached(vm->vm_page_prot); pfn = bmdi->jpudrvctx.jpu_register[core_idx].phys_addr >> PAGE_SHIFT; diff --git a/driver/bm1684/bm1684_jpu.h b/driver/bm1684/bm1684_jpu.h index 53bbefa..8358f90 100644 --- a/driver/bm1684/bm1684_jpu.h +++ b/driver/bm1684/bm1684_jpu.h @@ -25,6 +25,7 @@ #define JDI_IOCTL_WRITE_VMEM _IO(JDI_IOCTL_MAGIC, 16) #define JDI_IOCTL_READ_VMEM _IO(JDI_IOCTL_MAGIC, 17) #define JDI_IOCTL_GET_MAX_NUM_JPU_CORE _IO(JDI_IOCTL_MAGIC, 19) +#define JDI_IOCTL_RESET_ALL _IO(JDI_IOCTL_MAGIC, 20) #define MAX_NUM_BOARD 128 #define MAX_NUM_JPU_CORE 4 diff --git a/driver/bm1684/bm1684_lpddr4.h b/driver/bm1684/bm1684_lpddr4.h index 99b3822..4417b33 100644 --- a/driver/bm1684/bm1684_lpddr4.h +++ b/driver/bm1684/bm1684_lpddr4.h @@ -821,96 +821,99 @@ static struct dwc_reg_data phy_pie_data[] = { {0x0009004d, 0x0000}, {0x0009004e, 0x0001}, {0x0009004f, 0x0008}, - {0x00090050, 0x0000}, + {0x00090050, 0x0002}, {0x00090051, 0x045a}, {0x00090052, 0x0009}, - {0x00090053, 0x0000}, - {0x00090054, 0x0448}, - {0x00090055, 0x0109}, - {0x00090056, 0x0040}, - {0x00090057, 0x0633}, - {0x00090058, 0x0179}, - {0x00090059, 0x0001}, - {0x0009005a, 0x0618}, - {0x0009005b, 0x0109}, - {0x0009005c, 0x40c0}, - {0x0009005d, 0x0633}, - {0x0009005e, 0x0149}, - {0x0009005f, 0x0008}, - {0x00090060, 0x0004}, - {0x00090061, 0x0048}, - {0x00090062, 0x4040}, - {0x00090063, 0x0633}, - {0x00090064, 0x0149}, - {0x00090065, 0x0000}, - {0x00090066, 0x0004}, - {0x00090067, 0x0048}, - {0x00090068, 0x0040}, - {0x00090069, 0x0633}, - {0x0009006a, 0x0149}, - {0x0009006b, 0x0010}, - {0x0009006c, 0x0004}, - {0x0009006d, 0x0018}, + {0x00090053, 0x0030}, + {0x00090054, 0x065a}, + {0x00090055, 0x0009}, + {0x00090056, 0x0000}, + {0x00090057, 0x0448}, + {0x00090058, 0x0109}, + {0x00090059, 0x0040}, + {0x0009005a, 0x0633}, + {0x0009005b, 0x0179}, + {0x0009005c, 0x0001}, + {0x0009005d, 0x0618}, + {0x0009005e, 0x0109}, + {0x0009005f, 0x40c0}, + {0x00090060, 0x0633}, + {0x00090061, 0x0149}, + {0x00090062, 0x0008}, + {0x00090063, 0x0004}, + {0x00090064, 0x0048}, + {0x00090065, 0x4040}, + {0x00090066, 0x0633}, + {0x00090067, 0x0149}, + {0x00090068, 0x0000}, + {0x00090069, 0x0004}, + {0x0009006a, 0x0048}, + {0x0009006b, 0x0040}, + {0x0009006c, 0x0633}, + {0x0009006d, 0x0149}, {0x0009006e, 0x0000}, - {0x0009006f, 0x0004}, - {0x00090070, 0x0078}, - {0x00090071, 0x0549}, - {0x00090072, 0x0633}, - {0x00090073, 0x0159}, - {0x00090074, 0x0d49}, - {0x00090075, 0x0633}, - {0x00090076, 0x0159}, - {0x00090077, 0x094a}, + {0x0009006f, 0x0658}, + {0x00090070, 0x0109}, + {0x00090071, 0x0010}, + {0x00090072, 0x0004}, + {0x00090073, 0x0018}, + {0x00090074, 0x0000}, + {0x00090075, 0x0004}, + {0x00090076, 0x0078}, + {0x00090077, 0x0549}, {0x00090078, 0x0633}, {0x00090079, 0x0159}, - {0x0009007a, 0x0441}, + {0x0009007a, 0x0d49}, {0x0009007b, 0x0633}, - {0x0009007c, 0x0149}, - {0x0009007d, 0x0042}, + {0x0009007c, 0x0159}, + {0x0009007d, 0x094a}, {0x0009007e, 0x0633}, - {0x0009007f, 0x0149}, - {0x00090080, 0x0001}, + {0x0009007f, 0x0159}, + {0x00090080, 0x0441}, {0x00090081, 0x0633}, {0x00090082, 0x0149}, - {0x00090083, 0x0000}, - {0x00090084, 0x00e0}, - {0x00090085, 0x0109}, - {0x00090086, 0x000a}, - {0x00090087, 0x0010}, - {0x00090088, 0x0109}, - {0x00090089, 0x0009}, - {0x0009008a, 0x03c0}, - {0x0009008b, 0x0149}, - {0x0009008c, 0x0009}, - {0x0009008d, 0x03c0}, - {0x0009008e, 0x0159}, - {0x0009008f, 0x0018}, - {0x00090090, 0x0010}, - {0x00090091, 0x0109}, - {0x00090092, 0x0000}, + {0x00090083, 0x0042}, + {0x00090084, 0x0633}, + {0x00090085, 0x0149}, + {0x00090086, 0x0001}, + {0x00090087, 0x0633}, + {0x00090088, 0x0149}, + {0x00090089, 0x0000}, + {0x0009008a, 0x00e0}, + {0x0009008b, 0x0109}, + {0x0009008c, 0x000a}, + {0x0009008d, 0x0010}, + {0x0009008e, 0x0109}, + {0x0009008f, 0x0009}, + {0x00090090, 0x03c0}, + {0x00090091, 0x0149}, + {0x00090092, 0x0009}, {0x00090093, 0x03c0}, - {0x00090094, 0x0109}, + {0x00090094, 0x0159}, {0x00090095, 0x0018}, - {0x00090096, 0x0004}, - {0x00090097, 0x0048}, - {0x00090098, 0x0018}, - {0x00090099, 0x0004}, - {0x0009009a, 0x0058}, - {0x0009009b, 0x000b}, - {0x0009009c, 0x0010}, - {0x0009009d, 0x0109}, - {0x0009009e, 0x0001}, - {0x0009009f, 0x0010}, - {0x000900a0, 0x0109}, - {0x000900a1, 0x0005}, - {0x000900a2, 0x07c0}, + {0x00090096, 0x0010}, + {0x00090097, 0x0109}, + {0x00090098, 0x0000}, + {0x00090099, 0x03c0}, + {0x0009009a, 0x0109}, + {0x0009009b, 0x0018}, + {0x0009009c, 0x0004}, + {0x0009009d, 0x0048}, + {0x0009009e, 0x0018}, + {0x0009009f, 0x0004}, + {0x000900a0, 0x0058}, + {0x000900a1, 0x000b}, + {0x000900a2, 0x0010}, {0x000900a3, 0x0109}, - {0x000900a4, 0x000d}, - {0x000900a5, 0x07c0}, + {0x000900a4, 0x0001}, + {0x000900a5, 0x0010}, {0x000900a6, 0x0109}, - {0x000900a7, 0x0004}, + {0x000900a7, 0x0005}, {0x000900a8, 0x07c0}, {0x000900a9, 0x0109}, + {0x000900aa, 0x000d}, + {0x000900ab, 0x07c0}, + {0x000900ac, 0x0189}, {0x00040000, 0x0811}, {0x00040020, 0x0880}, {0x00040040, 0x0000}, @@ -1019,57 +1022,54 @@ static struct dwc_reg_data phy_pie_data[] = { {0x0004003a, 0x0880}, {0x0004005a, 0x0000}, {0x0004007a, 0x0000}, - {0x000900aa, 0x0000}, - {0x000900ab, 0x0790}, - {0x000900ac, 0x011a}, - {0x000900ad, 0x0008}, - {0x000900ae, 0x07aa}, - {0x000900af, 0x002a}, - {0x000900b0, 0x0010}, - {0x000900b1, 0x07b2}, + {0x000900ad, 0x0000}, + {0x000900ae, 0x0790}, + {0x000900af, 0x011a}, + {0x000900b0, 0x0008}, + {0x000900b1, 0x07aa}, {0x000900b2, 0x002a}, - {0x000900b3, 0x0000}, - {0x000900b4, 0x07c8}, - {0x000900b5, 0x0109}, - {0x000900b6, 0x0010}, - {0x000900b7, 0x0010}, + {0x000900b3, 0x0010}, + {0x000900b4, 0x07b2}, + {0x000900b5, 0x002a}, + {0x000900b6, 0x0000}, + {0x000900b7, 0x07c8}, {0x000900b8, 0x0109}, {0x000900b9, 0x0010}, - {0x000900ba, 0x02a8}, - {0x000900bb, 0x0129}, - {0x000900bc, 0x0008}, - {0x000900bd, 0x0370}, + {0x000900ba, 0x0010}, + {0x000900bb, 0x0109}, + {0x000900bc, 0x0010}, + {0x000900bd, 0x02a8}, {0x000900be, 0x0129}, - {0x000900bf, 0x000a}, - {0x000900c0, 0x03c8}, - {0x000900c1, 0x01a9}, - {0x000900c2, 0x000c}, - {0x000900c3, 0x0408}, - {0x000900c4, 0x0199}, - {0x000900c5, 0x0014}, - {0x000900c6, 0x0790}, - {0x000900c7, 0x011a}, - {0x000900c8, 0x0008}, - {0x000900c9, 0x0004}, - {0x000900ca, 0x0018}, - {0x000900cb, 0x000e}, - {0x000900cc, 0x0408}, - {0x000900cd, 0x0199}, - {0x000900ce, 0x0008}, - {0x000900cf, 0x8568}, - {0x000900d0, 0x0108}, - {0x000900d1, 0x0018}, - {0x000900d2, 0x0790}, - {0x000900d3, 0x016a}, - {0x000900d4, 0x0008}, - {0x000900d5, 0x01d8}, - {0x000900d6, 0x0169}, - {0x000900d7, 0x0010}, - {0x000900d8, 0x8558}, - {0x000900d9, 0x0168}, - {0x000900da, 0x0070}, - {0x000900db, 0x0788}, - {0x000900dc, 0x016a}, + {0x000900bf, 0x0008}, + {0x000900c0, 0x0370}, + {0x000900c1, 0x0129}, + {0x000900c2, 0x000a}, + {0x000900c3, 0x03c8}, + {0x000900c4, 0x01a9}, + {0x000900c5, 0x000c}, + {0x000900c6, 0x0408}, + {0x000900c7, 0x0199}, + {0x000900c8, 0x0014}, + {0x000900c9, 0x0790}, + {0x000900ca, 0x011a}, + {0x000900cb, 0x0008}, + {0x000900cc, 0x0004}, + {0x000900cd, 0x0018}, + {0x000900ce, 0x000e}, + {0x000900cf, 0x0408}, + {0x000900d0, 0x0199}, + {0x000900d1, 0x0008}, + {0x000900d2, 0x8568}, + {0x000900d3, 0x0108}, + {0x000900d4, 0x0018}, + {0x000900d5, 0x0790}, + {0x000900d6, 0x016a}, + {0x000900d7, 0x0008}, + {0x000900d8, 0x01d8}, + {0x000900d9, 0x0169}, + {0x000900da, 0x0010}, + {0x000900db, 0x8558}, + {0x000900dc, 0x0168}, {0x000900dd, 0x1ff8}, {0x000900de, 0x85a8}, {0x000900df, 0x01e8}, @@ -1199,67 +1199,58 @@ static struct dwc_reg_data phy_pie_data[] = { {0x0009015b, 0x0020}, {0x0009015c, 0x02aa}, {0x0009015d, 0x0009}, - {0x0009015e, 0x0000}, - {0x0009015f, 0x0400}, - {0x00090160, 0x010e}, - {0x00090161, 0x0008}, - {0x00090162, 0x00e8}, - {0x00090163, 0x0109}, - {0x00090164, 0x0000}, - {0x00090165, 0x8140}, - {0x00090166, 0x010c}, - {0x00090167, 0x0010}, - {0x00090168, 0x8138}, - {0x00090169, 0x010c}, - {0x0009016a, 0x0008}, - {0x0009016b, 0x07c8}, - {0x0009016c, 0x0101}, - {0x0009016d, 0x0008}, - {0x0009016e, 0x0448}, + {0x0009015e, 0x0008}, + {0x0009015f, 0x00e8}, + {0x00090160, 0x0109}, + {0x00090161, 0x0000}, + {0x00090162, 0x8140}, + {0x00090163, 0x010c}, + {0x00090164, 0x0010}, + {0x00090165, 0x8138}, + {0x00090166, 0x0104}, + {0x00090167, 0x0008}, + {0x00090168, 0x0448}, + {0x00090169, 0x0109}, + {0x0009016a, 0x000f}, + {0x0009016b, 0x07c0}, + {0x0009016c, 0x0109}, + {0x0009016d, 0x0000}, + {0x0009016e, 0x00e8}, {0x0009016f, 0x0109}, - {0x00090170, 0x000f}, - {0x00090171, 0x07c0}, + {0x00090170, 0x0047}, + {0x00090171, 0x0630}, {0x00090172, 0x0109}, - {0x00090173, 0x0000}, - {0x00090174, 0x00e8}, + {0x00090173, 0x0008}, + {0x00090174, 0x0618}, {0x00090175, 0x0109}, - {0x00090176, 0x0047}, - {0x00090177, 0x0630}, + {0x00090176, 0x0008}, + {0x00090177, 0x00e0}, {0x00090178, 0x0109}, - {0x00090179, 0x0008}, - {0x0009017a, 0x0618}, + {0x00090179, 0x0000}, + {0x0009017a, 0x07c8}, {0x0009017b, 0x0109}, {0x0009017c, 0x0008}, - {0x0009017d, 0x00e0}, - {0x0009017e, 0x0109}, + {0x0009017d, 0x8140}, + {0x0009017e, 0x010c}, {0x0009017f, 0x0000}, - {0x00090180, 0x07c8}, + {0x00090180, 0x0478}, {0x00090181, 0x0109}, - {0x00090182, 0x0008}, - {0x00090183, 0x8140}, - {0x00090184, 0x010c}, - {0x00090185, 0x0000}, - {0x00090186, 0x0478}, - {0x00090187, 0x0109}, - {0x00090188, 0x0000}, - {0x00090189, 0x0001}, - {0x0009018a, 0x0008}, - {0x0009018b, 0x0008}, - {0x0009018c, 0x0004}, - {0x0009018d, 0x0008}, - {0x0009018e, 0x0008}, - {0x0009018f, 0x07c8}, - {0x00090190, 0x0101}, - {0x00090006, 0x0000}, - {0x00090007, 0x0000}, - {0x00090008, 0x0008}, + {0x00090182, 0x0000}, + {0x00090183, 0x0001}, + {0x00090184, 0x0008}, + {0x00090185, 0x0008}, + {0x00090186, 0x0004}, + {0x00090187, 0x0000}, + {0x00090006, 0x0008}, + {0x00090007, 0x07c8}, + {0x00090008, 0x0109}, {0x00090009, 0x0000}, - {0x0009000a, 0x0000}, - {0x0009000b, 0x0000}, + {0x0009000a, 0x0400}, + {0x0009000b, 0x0106}, {0x000d00e7, 0x0400}, {0x00090017, 0x0000}, - {0x0009001f, 0x002b}, - {0x00090026, 0x006c}, + {0x0009001f, 0x002c}, + {0x00090026, 0x006a}, {0x000400d0, 0x0000}, {0x000400d1, 0x0101}, {0x000400d2, 0x0105}, @@ -1269,7 +1260,8 @@ static struct dwc_reg_data phy_pie_data[] = { {0x000400d6, 0x020a}, {0x000400d7, 0x020b}, {0x0002003a, 0x0002}, - {0x0002000b, 0x0085}, + {0x000200be, 0x0003}, + {0x0002000b, 0x0086}, {0x0002000c, 0x010a}, {0x0002000d, 0x0a6a}, {0x0002000e, 0x002c}, @@ -1290,6 +1282,7 @@ static struct dwc_reg_data phy_pie_data[] = { {0x00040084, 0x00e0}, {0x00040085, 0x0012}, {0x000400fd, 0x000f}, + {0x000400f1, 0x000e}, {0x00010011, 0x0001}, {0x00010012, 0x0001}, {0x00010013, 0x0180}, @@ -1348,8 +1341,7 @@ static struct dwc_reg_data phy_pie_data[] = { {0x000138b4, 0x0001}, {0x00020089, 0x0001}, {0x00020088, 0x0019}, - {0x000c0080, 0x0002}, - + {0x000c0080, 0x0000}, }; /*******************************************************/ @@ -1388,14 +1380,14 @@ static struct init_data phy_initial_data[] = { {0x0001214d, 0x0600}, {0x0001304d, 0x0600}, {0x0001314d, 0x0600}, - {0x00010049, 0x0e00}, - {0x00010149, 0x0e00}, - {0x00011049, 0x0e00}, - {0x00011149, 0x0e00}, - {0x00012049, 0x0e00}, - {0x00012149, 0x0e00}, - {0x00013049, 0x0e00}, - {0x00013149, 0x0e00}, + {0x00010049, 0x0604}, + {0x00010149, 0x0604}, + {0x00011049, 0x0604}, + {0x00011149, 0x0604}, + {0x00012049, 0x0604}, + {0x00012149, 0x0604}, + {0x00013049, 0x0604}, + {0x00013149, 0x0604}, {0x00000043, 0x0060}, {0x00001043, 0x0060}, {0x00002043, 0x0060}, @@ -1434,12 +1426,11 @@ static struct init_data phy_initial_data[] = { {0x00020025, 0x0000}, {0x0002002d, 0x0001}, {0x0002002c, 0x0000}, - }; /*******************************************************/ static u16 lpddr4x_train1d_imem[] = { - 0x00f8, 0x0000, 0x0050, 0x0000, + 0x0114, 0x0000, 0x0050, 0x0000, 0x0050, 0x0000, 0x0050, 0x0000, 0x0050, 0x0000, 0x0050, 0x0000, 0x0050, 0x0000, 0x0050, 0x0000, @@ -1455,410 +1446,409 @@ static u16 lpddr4x_train1d_imem[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0xc4e1, 0x44cb, 0x8000, 0x3000, - 0x0c15, 0x1703, 0x44cb, 0x8000, - 0x4000, 0x0c0d, 0x1702, 0xc4c1, - 0x2020, 0x07c0, 0x7fff, 0x78e0, - 0x1cfc, 0xb3c8, 0x1cfc, 0xb388, - 0x7fe0, 0x1cfc, 0xb348, 0x78e0, - 0x7ee0, 0x78e0, 0x748d, 0xdd0c, - 0xf00c, 0x78e0, 0x748d, 0xdd08, - 0xf009, 0x78e0, 0x748d, 0x74ad, - 0xf006, 0x78e0, 0x748d, 0xf006, - 0xc702, 0xc601, 0x24b0, 0x334d, - 0x24b0, 0x331f, 0x7ee0, 0x78e0, - 0x2244, 0x8ffc, 0x264a, 0x7000, - 0x20e8, 0x01a2, 0x202f, 0x8000, - 0x212f, 0x004b, 0x7ee0, 0x78e0, - 0x702c, 0x704c, 0x706c, 0x704d, - 0x706d, 0x708d, 0x70ad, 0x70cd, - 0x70ed, 0x206a, 0x0280, 0xb88d, - 0x2029, 0x8000, 0x44db, 0x8000, - 0x4000, 0x42db, 0x8000, 0x0400, - 0x706f, 0x78e0, 0x2022, 0x0f80, - 0x0000, 0x32e8, 0x2069, 0x0040, - 0x78e0, 0xf1fe, 0xc3e1, 0xc2e1, - 0xc1e1, 0xc0e1, 0xc0f1, 0xc5e1, - 0xc1a1, 0xe806, 0x7487, 0xdc14, - 0x077f, 0xffcf, 0xda25, 0xba9f, - 0x8a60, 0x1233, 0x0080, 0x7865, - 0x1aea, 0x8002, 0x12ed, 0x8080, - 0x208c, 0x8fc3, 0xf20d, 0xc085, - 0xc040, 0x4020, 0x09c2, 0x0360, - 0xc185, 0x41c3, 0x0402, 0x0000, - 0x0d82, 0x0060, 0xd8ff, 0x70ad, - 0x0dc6, 0x02e0, 0x1a09, 0x3342, - 0x0906, 0x0360, 0xd8ff, 0x25ab, - 0x10c4, 0x7fff, 0xf1ff, 0x78e0, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a5, - 0xc040, 0x40c3, 0x8000, 0x076d, - 0x8800, 0x70cd, 0x4338, 0xc041, - 0xd83f, 0xa1c0, 0x1c0d, 0x3382, - 0xc242, 0xc063, 0xf002, 0x71c5, - 0x0e0b, 0x10b2, 0x70ad, 0xf057, - 0x71a5, 0x0df7, 0x90d3, 0xc002, - 0x08f9, 0x836e, 0xc083, 0x60cb, - 0xd83f, 0x0d15, 0x10b0, 0x7866, - 0x0d15, 0x1071, 0x4260, 0x780f, - 0x4100, 0xf00a, 0x780f, 0x4100, - 0x4200, 0xf007, 0xed87, 0x780f, - 0x4160, 0x4200, 0x4300, 0x0c3a, - 0x0380, 0x40c3, 0x0000, 0x4e20, - 0x0f42, 0x01a0, 0x702c, 0x70ed, - 0xf027, 0xc001, 0x205f, 0x0402, - 0xc000, 0x7a16, 0x62f8, 0x6119, - 0x8904, 0x201a, 0x0f80, 0x0000, - 0x2000, 0x2005, 0x0f80, 0x9002, - 0x0054, 0x9000, 0x8924, 0x0d12, - 0x0060, 0x780f, 0xc183, 0x61c9, - 0xb8c5, 0x71e5, 0x7907, 0x255a, - 0x1200, 0xb8c4, 0xe805, 0xb861, - 0x08ff, 0x8031, 0x793b, 0x4067, - 0x7825, 0x1b00, 0x3000, 0xc101, - 0xc000, 0x7834, 0x41c3, 0x8000, - 0x0728, 0x6028, 0x0fa7, 0x9002, - 0xf1ac, 0xc0a5, 0x1404, 0x341b, - 0xc6c6, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0x2482, 0x340d, 0x206f, - 0x0243, 0x1054, 0x0081, 0x88a0, - 0x1021, 0x0080, 0xc149, 0x2400, - 0x3f81, 0x0000, 0x033c, 0xc04a, - 0x0eb6, 0x0160, 0x710c, 0xc09e, - 0xc040, 0xc08b, 0x2456, 0x3d81, - 0x2400, 0x3f82, 0x0000, 0x01a4, - 0x0f2a, 0x00a0, 0x2440, 0x38c3, - 0x706c, 0xc341, 0xf002, 0x7164, - 0x0b17, 0x0332, 0x704c, 0xf00e, - 0x235a, 0x0601, 0x6038, 0x6059, - 0x700c, 0xa900, 0x7144, 0x0af5, - 0x8632, 0x2455, 0x3840, 0xf1f1, - 0x251a, 0x1f80, 0x0020, 0x0000, - 0x702c, 0x2005, 0x0f80, 0x9001, - 0xe100, 0xc045, 0xb020, 0x251a, - 0x1f80, 0x0010, 0x0000, 0xb887, - 0x0e26, 0x00a0, 0xc046, 0x0e6a, - 0x00a0, 0xc02b, 0xc044, 0x0e62, - 0x00a0, 0xc02d, 0xc047, 0xf005, - 0xc001, 0x7104, 0xc041, 0xc001, - 0x7314, 0x0330, 0x0029, 0x71ed, - 0xc001, 0x084b, 0x0093, 0xc001, - 0x1e00, 0x7003, 0x8000, 0x076d, - 0xc204, 0x2079, 0x0001, 0x1600, - 0x7080, 0x8000, 0x004a, 0x2044, - 0x004d, 0x7d27, 0x40a1, 0x0cee, - 0x00e0, 0x702c, 0x4318, 0xc00a, - 0x46e9, 0x2544, 0x17ca, 0x0a11, - 0x1010, 0x2242, 0x104a, 0x0aff, - 0x9031, 0x7edb, 0x2011, 0x8340, - 0xf021, 0xc001, 0x1e00, 0x73c2, - 0x8000, 0x076d, 0xc207, 0x2079, - 0x0081, 0x1600, 0x7080, 0x8000, - 0x007d, 0x2044, 0x004d, 0x7d27, - 0x40a1, 0x0caa, 0x00e0, 0x712c, - 0x4318, 0xc009, 0x46e9, 0x2544, - 0x17cf, 0xef06, 0xbf61, 0x0f01, - 0x1031, 0x7edb, 0x2011, 0x8340, - 0x72ed, 0xf3b4, 0x704c, 0x700c, - 0xf00a, 0x2400, 0x3f81, 0x0000, - 0x01bc, 0x6119, 0xa940, 0x19c0, - 0x0082, 0x7104, 0x208c, 0x8fc2, - 0xf6f5, 0x0f7e, 0x0060, 0x40e1, - 0x0f76, 0x0060, 0x40e1, 0x1e00, - 0x7043, 0x8000, 0x0684, 0x0f6a, - 0x0060, 0x40e1, 0x78cf, 0x4163, - 0x082e, 0x00a0, 0xc043, 0xc003, - 0x70cd, 0x1e00, 0x7382, 0x8000, - 0x0684, 0x0d92, 0x0000, 0x0f4a, - 0x0060, 0x40e1, 0x700c, 0xf004, - 0xc002, 0x7104, 0xc042, 0xc002, - 0x0871, 0x0072, 0x706f, 0xf056, - 0x0d06, 0x0080, 0x40a1, 0xc191, - 0x0d6a, 0xffef, 0xda07, 0x1600, - 0x7080, 0x8000, 0x076d, 0x2400, - 0x3f81, 0x0000, 0x01bc, 0xc211, + 0x7054, 0x7ce0, 0x4300, 0x1101, + 0x048c, 0x2242, 0x8042, 0x1b01, + 0x0312, 0xf5fb, 0x7ee0, 0x78e0, + 0xc4e1, 0x240a, 0x1f80, 0x8000, + 0x3000, 0x0c19, 0x1703, 0x240a, + 0x1f80, 0x8000, 0x4000, 0x0c0d, + 0x1702, 0xc4c1, 0x2020, 0x07c0, + 0x7fff, 0x78e0, 0x1cfc, 0xb3c8, + 0x1cfc, 0xb388, 0x7fe0, 0x1cfc, + 0xb348, 0x78e0, 0x7ee0, 0x78e0, + 0x748d, 0xdd0c, 0xf00c, 0x78e0, + 0x748d, 0xdd08, 0xf009, 0x78e0, + 0x748d, 0x74ad, 0xf006, 0x78e0, + 0x748d, 0xf006, 0xc702, 0xc601, + 0x24b0, 0x334d, 0x24b0, 0x331f, + 0x7ee0, 0x78e0, 0x2244, 0x8ffc, + 0x264a, 0x7000, 0x20e8, 0x01a2, + 0x202f, 0x8000, 0x212f, 0x004b, + 0x7ee0, 0x78e0, 0x702c, 0x704c, + 0x706c, 0x704d, 0x706d, 0x708d, + 0x70ad, 0x70cd, 0x70ed, 0x206a, + 0x0280, 0xb88d, 0x2029, 0x8000, + 0x44db, 0x8000, 0x4000, 0x42db, + 0x8000, 0x0400, 0x706f, 0x78e0, + 0x2022, 0x0f80, 0x0000, 0x3668, + 0x2069, 0x0040, 0x78e0, 0xf1fe, + 0xc3e1, 0xc2e1, 0xc1e1, 0xc0e1, + 0xc0f1, 0xc5e1, 0xc1a1, 0xe806, + 0x7487, 0xdc14, 0x077f, 0xffcf, + 0xda25, 0xba9f, 0x8a60, 0x1233, + 0x0080, 0x7865, 0x1aea, 0x8002, + 0x12ed, 0x8080, 0x208c, 0x8fc3, + 0xf20d, 0xc085, 0xc040, 0x4020, + 0x092e, 0x0360, 0xc185, 0x41c3, + 0x0402, 0x0000, 0x0812, 0x00a0, + 0xd8ff, 0x70ad, 0x0d42, 0x02e0, + 0x1a09, 0x3003, 0x086e, 0x0360, + 0xd8ff, 0x25ab, 0x10c4, 0x7fff, + 0xf1ff, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1a6, 0xc141, 0xc042, + 0xc001, 0x70cd, 0xc243, 0xa0c0, + 0xd83f, 0xc064, 0x40c3, 0x8000, + 0x07c9, 0x88a0, 0x1c11, 0x3382, + 0xf054, 0xc003, 0x089d, 0x06ee, + 0xc084, 0x60c8, 0xc040, 0xc100, + 0xd83f, 0x0b19, 0x30b0, 0x7826, + 0x0b19, 0x3070, 0x4100, 0x0b1d, + 0x3031, 0x4200, 0xc100, 0xf007, + 0xc300, 0x4100, 0x4200, 0xf004, + 0xc200, 0x4300, 0x0c22, 0x0380, + 0x40c3, 0x0000, 0x4e20, 0x0a52, + 0x01e0, 0x702c, 0x70ed, 0xf026, + 0x255f, 0x1402, 0xc002, 0x7a16, + 0x62f8, 0x6038, 0x8824, 0x211a, + 0x0f80, 0x0000, 0x2000, 0x2005, + 0x0f80, 0x9002, 0x0054, 0x9000, + 0x0faa, 0x0060, 0x780f, 0xc100, + 0xb8c5, 0x71e5, 0x7907, 0x235a, + 0x3200, 0x4220, 0xb8c4, 0xe806, + 0xb861, 0x0801, 0x0031, 0x7a5b, + 0xc001, 0x8020, 0x7945, 0xa020, + 0xc002, 0x41c3, 0x8000, 0x0784, + 0x78b4, 0x6028, 0x0fad, 0x9004, + 0x7167, 0x0b63, 0xb0d4, 0x71c5, + 0x0efb, 0x90b4, 0x706f, 0xc0a6, + 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0x2482, + 0x320d, 0x216f, 0x0243, 0x1154, + 0x0080, 0x89a0, 0xc047, 0x1121, + 0x0080, 0x2400, 0x3f81, 0x0000, + 0x0334, 0xc048, 0x0a2e, 0x01a0, + 0x710c, 0xc09c, 0xc040, 0xc089, + 0x2456, 0x3d41, 0x2400, 0x3f82, + 0x0000, 0x019c, 0x0a8e, 0x00e0, + 0x2440, 0x3fc3, 0x70ed, 0x704c, + 0xf00c, 0x225a, 0x0603, 0xf006, + 0x6078, 0x6038, 0xa8e0, 0x7124, + 0x09f9, 0x8634, 0xc09f, 0x7144, + 0x0aeb, 0x8334, 0x702c, 0x251a, + 0x1f80, 0x0020, 0x0000, 0x2005, + 0x0f80, 0x9001, 0xe100, 0xc043, + 0xb0e0, 0x251a, 0x1f80, 0x0010, + 0x0000, 0xb887, 0x0992, 0x00e0, + 0xc045, 0x035b, 0x0000, 0x0f1d, + 0x10b5, 0xdd4a, 0xbd9f, 0x1e00, + 0x7003, 0x8000, 0x07c9, 0xef30, + 0x8d00, 0x71ad, 0x7d06, 0xf033, + 0xde7d, 0x71ad, 0x1e00, 0x7043, + 0x8000, 0x07c9, 0x0f17, 0x10b1, + 0xbe9f, 0x09a2, 0x00e0, 0xc02b, + 0xc044, 0x8e00, 0x2044, 0x004d, + 0xf003, 0x8e00, 0x7d06, 0xc007, + 0x2011, 0x8340, 0xf21e, 0x712c, + 0x4338, 0x2544, 0x17c0, 0xe807, + 0xb861, 0x0801, 0x0031, 0x232f, + 0x36c0, 0xc004, 0x7a0f, 0x0826, + 0x0120, 0x40a1, 0x4608, 0x720c, + 0xc041, 0xf020, 0x095e, 0x00e0, + 0xc029, 0xc046, 0x8d00, 0x2044, + 0x004d, 0xc008, 0x2011, 0x8340, + 0x02d0, 0x0021, 0x710c, 0xc041, + 0x4318, 0x2544, 0x17c0, 0xe807, + 0xb861, 0x0801, 0x0031, 0x232f, + 0x36c0, 0x702c, 0xc006, 0x7a0f, + 0x0fe2, 0x00e0, 0x40a1, 0x4608, + 0x704c, 0x700c, 0xf00a, 0x2400, + 0x3f81, 0x0000, 0x01b4, 0x6119, + 0x19c0, 0x0082, 0xa940, 0x7104, + 0x208c, 0x8003, 0xf7b5, 0x0ad6, + 0x00a0, 0xc001, 0x0ace, 0x00a0, + 0xc001, 0xc001, 0x1e00, 0x7043, + 0x8000, 0x06e0, 0x0abe, 0x0080, + 0x202f, 0x06c7, 0x41c1, 0x0b86, + 0x00a0, 0xc042, 0xc002, 0x70cd, + 0x1e00, 0x7382, 0x8000, 0x06e0, + 0x0d96, 0x0000, 0x0a9e, 0x00a0, + 0xc001, 0x43d9, 0xf04e, 0x143f, + 0x308c, 0x2384, 0x0001, 0x2353, + 0x3100, 0x7b05, 0x704c, 0xf00e, + 0x6049, 0xc005, 0x7144, 0x211a, + 0x0f81, 0x0000, 0x1000, 0x7825, + 0x781b, 0xb89c, 0xb89f, 0xb060, + 0x0ae9, 0x8324, 0x2456, 0x3d40, + 0x084e, 0x00c0, 0x40a1, 0xc190, + 0x0d56, 0xffef, 0xda07, 0x1600, + 0x7080, 0x8000, 0x07c9, 0x2400, + 0x3f81, 0x0000, 0x01b4, 0xc210, 0x209f, 0x0003, 0x224a, 0x1200, 0x6119, 0x7163, 0x8960, 0x2253, 0x0140, 0x7167, 0x7865, 0xa900, - 0x4040, 0x1140, 0x0083, 0x2242, + 0x1140, 0x0083, 0x4040, 0x2242, 0x104a, 0x0aff, 0x9031, 0x781d, 0xb8c5, 0x7865, 0x1940, 0x0002, - 0x4040, 0xda10, 0x1180, 0x0083, + 0x1180, 0x0083, 0x4040, 0xda10, 0xba61, 0x0a01, 0x0031, 0x781d, 0xb8c5, 0x7865, 0x1980, 0x0002, - 0x234c, 0xbfc0, 0x0784, 0xffe9, - 0x202f, 0x06c0, 0x2084, 0x0001, - 0x2353, 0x3101, 0x7905, 0x704c, - 0xf00d, 0x6048, 0x7144, 0x201a, - 0x0f83, 0x0000, 0x1000, 0xc006, - 0x7865, 0x781b, 0xb89c, 0xb89f, - 0xb020, 0x1423, 0x3080, 0x0ae7, - 0x8022, 0x2456, 0x3d80, 0xf1ae, - 0xc005, 0xb0c0, 0x1e00, 0x7043, - 0x8000, 0x0684, 0x0c52, 0x0080, - 0x0a9a, 0x00a0, 0xc003, 0x750c, - 0x41c3, 0x0094, 0x0001, 0x0a0e, - 0x0060, 0x42a1, 0xf002, 0x71c5, - 0x75d5, 0x0628, 0xffe9, 0x2440, - 0x3b1b, 0x2332, 0x3380, 0x208c, - 0x8fc3, 0xf3f7, 0x0fa3, 0x103e, - 0x750c, 0x41c3, 0x0095, 0x0001, - 0x09e2, 0x0060, 0x42c1, 0x70ad, - 0xf01e, 0x2332, 0x3382, 0x2400, - 0x3f81, 0x0000, 0x01a4, 0x6149, - 0x215f, 0x0602, 0x2455, 0x3841, - 0x6159, 0x7abc, 0x7a5c, 0x7a5c, - 0x623a, 0x8a20, 0x7965, 0x0dfe, - 0x02a0, 0xaa20, 0x4200, 0x41c3, - 0x0096, 0x0001, 0x09a6, 0x0060, - 0x750c, 0xe508, 0x42a1, 0x706c, - 0x708d, 0x258c, 0x9fc2, 0xf6db, - 0xf01d, 0x2400, 0x3f81, 0x0000, - 0x01bc, 0x6149, 0x2644, 0x17c3, - 0xeb05, 0xbb61, 0x0bff, 0x8031, - 0x793d, 0xb9c0, 0x4320, 0x2444, - 0x17c1, 0xe906, 0xb961, 0x0901, - 0x0031, 0x7b7b, 0x7b05, 0x7185, - 0x7144, 0x0cd3, 0x9232, 0x786f, - 0xf1c1, 0x41c3, 0x0097, 0x0000, - 0x0952, 0x0060, 0x750c, 0x0f4b, - 0x907e, 0x750c, 0x41c3, 0x0098, - 0x0001, 0x0942, 0x0060, 0x42c1, - 0x70ad, 0xf020, 0xc18b, 0x61d9, - 0x8968, 0x2400, 0x3f81, 0x0000, - 0x01a4, 0x6169, 0x215f, 0x0603, - 0x2455, 0x3841, 0x6179, 0x7bbc, - 0x7b7c, 0x7b7c, 0x633b, 0x8b20, - 0x7945, 0x0d5a, 0x02a0, 0xab20, - 0x4200, 0x41c3, 0x0099, 0x0001, - 0x0902, 0x0060, 0x750c, 0xe508, - 0x43a1, 0x704c, 0x708d, 0x258c, - 0x9fc2, 0xf6dd, 0xf01f, 0x2400, - 0x3f81, 0x0000, 0x01bc, 0x6179, - 0x11c0, 0x0081, 0x2644, 0x17c2, - 0xea05, 0xba61, 0x0aff, 0x8031, - 0x793d, 0xb9c0, 0x4220, 0x2444, - 0x17c1, 0xe906, 0xb961, 0x0901, - 0x0031, 0x7a5b, 0x7a05, 0x7185, - 0x7164, 0x0ccf, 0x9232, 0x784f, - 0xf1be, 0x41c3, 0x009a, 0x0000, - 0x08aa, 0x0060, 0x750c, 0xf151, - 0x700c, 0xc044, 0xc005, 0x702c, - 0x0ace, 0x00a0, 0xb020, 0x700c, - 0xf003, 0xc002, 0x7104, 0xc042, - 0xc002, 0x7114, 0x01e6, 0x0029, - 0x708d, 0x1423, 0x3080, 0xc047, - 0x40c3, 0x0000, 0xffff, 0xc041, - 0xc043, 0xf003, 0x7185, 0xc007, - 0x7410, 0x0146, 0x002a, 0xc09e, - 0x6089, 0xc002, 0x70ad, 0xdaff, - 0x70cd, 0x70ed, 0x47b9, 0x09e9, - 0x8021, 0x706c, 0xf002, 0x7164, - 0x238c, 0x8fc2, 0x007a, 0x0029, - 0x797c, 0x793c, 0xd81d, 0xb861, - 0x08ff, 0x8031, 0x793d, 0x6338, - 0xb8c2, 0x2002, 0x8040, 0xf20b, - 0x2044, 0x07c1, 0x40e3, 0xe906, - 0xb961, 0x0901, 0x0031, 0x781d, - 0xf00b, 0x245a, 0x1600, 0x2455, - 0x3841, 0x6119, 0x787c, 0x781c, - 0x781c, 0x6108, 0x4718, 0x7950, - 0x218c, 0x8fc3, 0xf40a, 0xe3df, - 0x07ae, 0xffe9, 0xdaff, 0x08ab, - 0x801f, 0x70ed, 0x4260, 0xf1d1, - 0xb8e0, 0x27c0, 0x1061, 0xf3cd, - 0x79f0, 0x78d0, 0x203c, 0x0041, - 0x237f, 0x0fc0, 0x790b, 0x755c, - 0x76fd, 0xdaff, 0xf1c1, 0x262f, - 0xf388, 0xf407, 0x4548, 0x46e9, - 0x262f, 0xf3c8, 0xf24a, 0x2604, - 0x1f80, 0x0000, 0xfffe, 0x781d, - 0x79b0, 0x6119, 0x782f, 0xe0c0, - 0xf646, 0xe0a0, 0xdf20, 0xd840, - 0xf688, 0xf007, 0xe0df, 0xd85f, - 0xdf5f, 0xf604, 0x4020, 0x4728, - 0x79af, 0x7dcf, 0x780f, 0x42a1, - 0x0b62, 0x0160, 0xc045, 0x7eef, - 0x4308, 0x40c1, 0x0b56, 0x0160, - 0x42a1, 0xc105, 0xc398, 0x7b94, - 0x2155, 0x0c01, 0x7a3b, 0x2284, - 0x0001, 0xb9c4, 0x7a25, 0x79db, - 0xb340, 0x2156, 0x0e01, 0x2642, - 0x1802, 0xc392, 0x2184, 0x0001, - 0xbac4, 0x7b94, 0x7945, 0xb320, - 0xc101, 0x7930, 0x230c, 0x9040, - 0xc101, 0x21ca, 0x02cb, 0xc141, + 0x238c, 0xb001, 0x0762, 0xffe5, + 0x232f, 0x06c0, 0xc003, 0x1e00, + 0x7043, 0x8000, 0x06e0, 0x0fd2, + 0x00a0, 0xb0c0, 0x0e0a, 0x00a0, + 0xc002, 0x750c, 0x41c3, 0x0094, + 0x0001, 0x0cbe, 0x0060, 0x42a1, + 0xf0bb, 0x2332, 0x3380, 0x208c, + 0x8fc3, 0xf2b6, 0x1600, 0x7080, + 0x8000, 0x001b, 0x79f2, 0x781d, + 0x7812, 0x7825, 0xc042, 0xc001, + 0x08a7, 0x003e, 0x750c, 0x41c3, + 0x0095, 0x0001, 0x0c8a, 0x0060, + 0x42c1, 0x70ad, 0xf03e, 0x702c, + 0x706c, 0xf018, 0x2400, 0x3f80, + 0x0000, 0x01b4, 0x6048, 0x792f, + 0xec05, 0xbc61, 0x0cff, 0x9031, + 0x781d, 0x7144, 0xb8c0, 0x2344, + 0x07cc, 0xec06, 0xbc61, 0x0c01, + 0x1031, 0x781b, 0x7905, 0x7164, + 0x0bd5, 0x8234, 0x2644, 0x17cc, + 0xc002, 0x082b, 0x001e, 0x2332, + 0x3382, 0x2400, 0x3f80, 0x0000, + 0x019c, 0x6048, 0xc29f, 0x205f, + 0x0600, 0x621a, 0x78bd, 0x781d, + 0x781d, 0x6058, 0x8840, 0x7a25, + 0xa840, 0x0d4e, 0x02a0, 0x782f, + 0x4200, 0x41c3, 0x0096, 0x0001, + 0x0c0e, 0x0060, 0x750c, 0xe508, + 0x258c, 0x9003, 0x0782, 0xffe5, + 0x42a1, 0x41c3, 0x0097, 0x0000, + 0x0bf6, 0x0060, 0x750c, 0xc001, + 0x08ad, 0x007e, 0x750c, 0x41c3, + 0x0098, 0x0001, 0x0be2, 0x0060, + 0x42c1, 0x70ad, 0xf041, 0x702c, + 0x706c, 0xf01a, 0x2400, 0x3f80, + 0x0000, 0x01b4, 0x6058, 0x10c0, + 0x0080, 0x792f, 0xec05, 0xbc61, + 0x0cff, 0x9031, 0x781d, 0x7144, + 0xb8c0, 0x2344, 0x07cc, 0xec06, + 0xbc61, 0x0c01, 0x1031, 0x781b, + 0x7905, 0x7164, 0x0bd1, 0x8234, + 0x2644, 0x17cc, 0xc002, 0x082d, + 0x003e, 0xc089, 0x60d8, 0x8848, + 0x2400, 0x3f80, 0x0000, 0x019c, + 0x6048, 0xc29f, 0x205f, 0x0600, + 0x621a, 0x78bd, 0x781d, 0x781d, + 0x6058, 0x8840, 0x7a25, 0xa840, + 0x0c9e, 0x02a0, 0x782f, 0x4200, + 0x41c3, 0x0099, 0x0001, 0x0b62, + 0x0060, 0x750c, 0xe508, 0x258c, + 0x9003, 0x077e, 0xffe5, 0x42a1, + 0x41c3, 0x009a, 0x0000, 0x0b4a, + 0x0060, 0x750c, 0x71c5, 0x75d5, + 0x068a, 0xffee, 0x2440, 0x391b, + 0x71e5, 0x74f5, 0x04aa, 0xffe5, + 0x700c, 0xc044, 0xc003, 0x702c, + 0x0e26, 0x00a0, 0xb020, 0x264a, + 0x3000, 0xf0ed, 0x702c, 0x08d6, + 0x01e0, 0xda18, 0xc096, 0x08ce, + 0x01e0, 0xda18, 0x143f, 0x3080, + 0x70ed, 0xc042, 0x40c3, 0x0000, + 0xffff, 0xc041, 0xc043, 0xf096, + 0x60e8, 0x260c, 0xb000, 0xf491, + 0x704c, 0x70cd, 0xd9ff, 0x70ad, + 0x4758, 0x700c, 0xf033, 0xf20c, + 0x2344, 0x07cc, 0x43e3, 0xec06, + 0xbc61, 0x0c01, 0x1031, 0x7b7d, + 0xbbc0, 0xf00d, 0x275a, 0x1603, + 0xc49f, 0x647c, 0x7b1d, 0x7b7d, + 0x7b7d, 0x2432, 0x10df, 0x2744, + 0x3043, 0x7c30, 0x248c, 0x9fc3, + 0xf408, 0xe0df, 0xd9ff, 0xf716, + 0xeb94, 0x704c, 0x4100, 0xf012, + 0x7074, 0x22c0, 0x0061, 0xf20e, + 0xdb3f, 0x233c, 0x000b, 0x7c50, + 0x7bb0, 0x233c, 0x0303, 0x230b, + 0x82c0, 0x755c, 0x763c, 0xd9ff, + 0x7104, 0x208c, 0x8003, 0x079a, + 0xffe5, 0x2053, 0x8083, 0x78b0, + 0x262f, 0xf088, 0x2079, 0x0000, + 0x20c5, 0x0061, 0x7014, 0x72bd, + 0x71dd, 0x262f, 0xf088, 0xf211, + 0x2204, 0x0f80, 0x0000, 0xfffe, + 0x781d, 0x7e30, 0x661e, 0x78cf, + 0xe0c0, 0xf74a, 0x081b, 0x0835, + 0xdd40, 0xde20, 0xf009, 0x710c, + 0xc044, 0xf033, 0xe0df, 0x45c9, + 0xf7c3, 0xdd5f, 0xde5f, 0x78af, + 0x792f, 0x0ece, 0x0160, 0x7a4f, + 0x4308, 0x0ec6, 0x0160, 0x78cf, + 0x2580, 0x103f, 0x79bb, 0x2184, + 0x0001, 0xbdc4, 0x7d25, 0xc190, + 0x79f4, 0xb1a0, 0x2642, 0x1801, + 0x7a3b, 0x2284, 0x0001, 0xb9c4, + 0x7a25, 0xc101, 0xc396, 0x7bf4, + 0x7930, 0x230c, 0x9040, 0xc101, + 0xb340, 0x21ca, 0x02c5, 0xc141, 0xc103, 0x7930, 0x7030, 0xc103, - 0x21ca, 0x000b, 0xc143, 0xf160, - 0x710c, 0xc044, 0xf15c, 0xc003, - 0x7910, 0xc001, 0x7810, 0x0845, - 0x0065, 0x2454, 0x3c0b, 0xc002, - 0xc48b, 0x42c3, 0x9000, 0x0000, - 0x7c16, 0x8c06, 0xdb40, 0x2454, - 0x390b, 0x201a, 0x0f81, 0x0000, - 0x1000, 0xc006, 0x7825, 0x781b, - 0x7845, 0xb060, 0x8c07, 0x201a, - 0x0f81, 0x0000, 0x1000, 0xc006, - 0x7825, 0x781b, 0x7845, 0xb060, - 0x702c, 0xf003, 0x7124, 0x1423, - 0x3080, 0x7110, 0x063e, 0xffea, - 0xc09e, 0x602a, 0xc002, 0x0af1, - 0x8021, 0x2456, 0x3d80, 0x6028, - 0x23f4, 0x1042, 0x201a, 0x0f83, - 0x0000, 0x1000, 0xc006, 0x7865, - 0x781b, 0xb89c, 0xb89f, 0xb040, - 0xf1e6, 0xc004, 0x41c3, 0x00a9, - 0x0000, 0x08d6, 0xffef, 0x2078, - 0x0000, 0x08c6, 0x0080, 0x0b0a, - 0x0280, 0x1600, 0x7080, 0x8000, - 0x001b, 0xb8e0, 0x0ff4, 0x00c2, - 0x2400, 0x3f81, 0x0000, 0x033c, - 0x08ce, 0x0160, 0x700c, 0x2480, - 0x340d, 0x1404, 0x341b, 0xc6c6, - 0xb8e6, 0x781d, 0x781d, 0x781d, - 0xb8c2, 0xf209, 0x205f, 0x0101, - 0x6904, 0x080f, 0x0352, 0x6906, - 0xf003, 0x781b, 0x7404, 0x7104, - 0x7fe0, 0x780f, 0x42c3, 0x8000, - 0x076d, 0x8a20, 0x215f, 0x0c81, + 0x21ca, 0x0005, 0xc143, 0x71e5, + 0xc002, 0x7710, 0x06d4, 0xffe5, + 0xc09c, 0xc003, 0x2454, 0x380b, + 0x7910, 0xc001, 0x7810, 0x0841, + 0x0065, 0xc289, 0x2216, 0x0782, + 0x8a06, 0xdb40, 0x2454, 0x3b0b, + 0x201a, 0x0f81, 0x0000, 0x1000, + 0xc005, 0x7825, 0x781b, 0x41c3, + 0x9000, 0x0000, 0x7825, 0xb060, + 0x8a07, 0x201a, 0x0f82, 0x0000, + 0x1000, 0xc005, 0x7845, 0x781b, + 0x7825, 0xb060, 0x702c, 0xf016, + 0xc09c, 0x6028, 0x0e23, 0x3021, + 0x2456, 0x3d40, 0x6028, 0x23f4, + 0x1042, 0x201a, 0x0f83, 0x0000, + 0x1000, 0xc005, 0x7865, 0x781b, + 0xb89c, 0xb89f, 0xb040, 0x7124, + 0xc002, 0x09d9, 0x8004, 0x2640, + 0x305e, 0x264c, 0xb080, 0x0628, + 0xffe5, 0xc090, 0xc004, 0x41c3, + 0x00a9, 0x0000, 0x08de, 0xffef, + 0x2078, 0x0000, 0x0c2a, 0x0080, + 0x0a8a, 0x0280, 0x1600, 0x7080, + 0x8000, 0x001b, 0xb8e0, 0x0b4c, + 0x0102, 0x2400, 0x3f81, 0x0000, + 0x0334, 0x0c4a, 0x0160, 0x700c, + 0x2480, 0x320d, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0xb8e6, 0x781d, + 0x781d, 0x781d, 0xb8c2, 0x742c, + 0xf403, 0x7914, 0xf006, 0x7915, + 0x0909, 0x0354, 0x762c, 0x7915, + 0x7fe0, 0x6901, 0x42c3, 0x8000, + 0x07c9, 0x8a20, 0x215f, 0x0c81, 0x6038, 0x8a21, 0x215f, 0x0641, - 0x6038, 0x8823, 0xb9e6, 0x8822, - 0x1600, 0x7080, 0x8000, 0x001c, - 0xb9c2, 0xf209, 0x7014, 0x40c3, - 0x8000, 0x0634, 0xf209, 0xe010, - 0xf007, 0x7014, 0x40c3, 0x8000, - 0x062c, 0xf203, 0xe010, 0x6038, - 0x7fe0, 0x8800, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1b1, 0xc250, 0xc14e, - 0x4200, 0x41c3, 0x00ed, 0x0001, - 0x0de2, 0x0020, 0x750c, 0x70ad, - 0xf023, 0x448b, 0x9412, 0x9479, - 0x943a, 0xc04f, 0x941b, 0x9455, - 0x94f6, 0x94d7, 0x9498, 0x1426, - 0x311b, 0x1428, 0x311f, 0xc048, - 0xc147, 0xc346, 0xc30f, 0xc445, - 0xc644, 0xc743, 0xc242, 0x750c, - 0x41c3, 0x00ee, 0x000b, 0x4261, - 0x1c04, 0x37c0, 0x0d9e, 0x0020, - 0x1c00, 0x36c0, 0x71a5, 0xc00e, - 0x79b0, 0x4320, 0x4328, 0x091b, - 0x0022, 0x704c, 0xf00e, 0xc010, - 0x20f4, 0x00cc, 0xc089, 0x7834, - 0xb080, 0xc00e, 0x631b, 0x794f, - 0x09ef, 0x82b4, 0x7144, 0xf1cb, - 0xc0b1, 0x1404, 0x341b, 0xc6c6, - 0xc2e4, 0xc1a4, 0x4608, 0x700c, - 0x0866, 0x0020, 0x702c, 0x702c, - 0xd8ff, 0x754c, 0x726c, 0x1c0c, - 0x30c1, 0xc142, 0xc141, 0x4528, - 0x093e, 0x0020, 0xc140, 0x265f, - 0x1100, 0x702c, 0xc543, 0xb98d, - 0x78c5, 0x2084, 0x0f03, 0xc042, - 0xd8ff, 0x734c, 0x746c, 0xc541, - 0x091e, 0x0020, 0xc540, 0xd8ff, - 0xd980, 0x754c, 0x726c, 0xc543, - 0xc542, 0xc541, 0x090a, 0x0020, - 0xc540, 0xc0a4, 0xc6c4, 0x78e0, - 0x1e00, 0x7005, 0x9004, 0x0102, - 0x7ee0, 0x78e0, 0xc2e2, 0x4308, - 0x700c, 0x0b5a, 0x0020, 0x4220, - 0x0b36, 0x0020, 0x700c, 0x0b16, - 0x0020, 0x700c, 0x228c, 0x8fc3, - 0xf227, 0x216f, 0x0243, 0x8900, - 0x43c3, 0x8000, 0x0771, 0x201a, - 0x0f80, 0x0020, 0x0000, 0x2005, - 0x0f80, 0x9004, 0x0040, 0xb040, - 0x8b80, 0xf012, 0x8900, 0x241a, - 0x1f8d, 0x0000, 0x2000, 0x7185, - 0x201a, 0x0f80, 0x0020, 0x0000, - 0x78a5, 0x2005, 0x0f80, 0x9002, - 0x0040, 0xb040, 0x8b01, 0x08e1, - 0x8303, 0x090e, 0x0280, 0x208a, - 0x021a, 0x41c3, 0x900e, 0x002a, - 0x1e00, 0x72c4, 0x9008, 0x01e0, - 0xb100, 0x40c3, 0x0000, 0x0fac, - 0xb101, 0x40c3, 0x8000, 0x0446, - 0x1800, 0x0003, 0x700c, 0xc6c2, - 0x702c, 0x7110, 0x20e0, 0x07ca, - 0x1600, 0x7102, 0x9008, 0x01d6, - 0x0af3, 0x803e, 0x7124, 0x7ee0, - 0xc2e6, 0x1cfc, 0xb6c8, 0x4338, - 0x0e76, 0x0060, 0x4508, 0x71ed, - 0x46cb, 0x9004, 0x00f2, 0xb6e0, - 0x1e00, 0x1005, 0x1e00, 0x7344, - 0x9008, 0x01e2, 0x0e1e, 0x0160, - 0x760c, 0x0b17, 0x3030, 0xb6e8, + 0x6119, 0x8903, 0xb8e6, 0x8902, + 0x1600, 0x7081, 0x8000, 0x001c, + 0xb8c2, 0xf409, 0x7034, 0x41c3, + 0x8000, 0x0640, 0xf209, 0xe108, + 0xf007, 0x7034, 0x41c3, 0x8000, + 0x0630, 0xf203, 0xe108, 0x6119, + 0x7fe0, 0x8900, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1b5, 0xc254, 0x4528, + 0x4200, 0x41c3, 0x00ef, 0x0001, + 0x087e, 0x0060, 0x750c, 0x706f, + 0xf035, 0x4261, 0xf009, 0xc014, + 0x20f4, 0x0083, 0xc08e, 0x7834, + 0xb060, 0x62ba, 0x7124, 0x09f3, + 0x8294, 0x4183, 0x911c, 0x1446, + 0x3103, 0x915f, 0xc053, 0x1448, + 0x3101, 0x144a, 0x3100, 0x143a, + 0x311f, 0x143c, 0x311e, 0x1440, + 0x310f, 0x1442, 0x310e, 0x1444, + 0x310c, 0xc346, 0xc313, 0xc048, + 0xc147, 0xc242, 0x750c, 0x41c3, + 0x00f0, 0x000b, 0xc445, 0xc644, + 0xc743, 0x1c04, 0x3780, 0x1c00, + 0x37c0, 0x0816, 0x0060, 0x4261, + 0x7167, 0x232f, 0x16c8, 0x0b97, + 0x9364, 0x702c, 0xc0b5, 0x1404, + 0x341b, 0xc6c6, 0xc2e4, 0xc1a4, + 0x4608, 0x700c, 0x0866, 0x0020, + 0x702c, 0x702c, 0xd8ff, 0x754c, + 0x726c, 0x1c0c, 0x30c1, 0x4528, + 0xc142, 0xc141, 0x0926, 0x0020, + 0xc140, 0x265f, 0x1100, 0x702c, + 0xb98d, 0x734c, 0x78c5, 0x2084, + 0x0f03, 0xc042, 0xd8ff, 0x746c, + 0xc543, 0xc541, 0x0906, 0x0020, + 0xc540, 0xd8ff, 0xd980, 0x754c, + 0x726c, 0xc543, 0xc542, 0xc541, + 0x08f2, 0x0020, 0xc540, 0xc0a4, + 0xc6c4, 0x78e0, 0x700c, 0x1e00, + 0x7004, 0x9004, 0x0102, 0x7ee0, + 0xc2e4, 0x4308, 0x70ad, 0x46cb, + 0x9008, 0x01e4, 0x218c, 0x8fc3, + 0xb6a0, 0xb6a1, 0xb6a2, 0xf224, + 0x1600, 0x7080, 0x8000, 0x0004, + 0x201a, 0x0f83, 0x0020, 0x0000, + 0x2305, 0x0f80, 0x9004, 0x0040, + 0xb020, 0x40c3, 0x8000, 0x07cd, + 0x8881, 0x8800, 0xf00c, 0x201a, + 0x0f82, 0x0000, 0x2000, 0x7104, + 0x7a65, 0x2205, 0x0f82, 0x9002, + 0x0040, 0xb220, 0x0ceb, 0x9005, + 0x089a, 0x0280, 0x208a, 0x021a, + 0x41c3, 0x900e, 0x002a, 0x1efc, + 0x92c4, 0xb100, 0x40c3, 0x8000, + 0x0446, 0xa8a0, 0x40c3, 0x0000, + 0x0fac, 0xb101, 0x700c, 0xc6c4, + 0x704c, 0x7210, 0x20e0, 0x07ca, + 0x1600, 0x7101, 0x9008, 0x01d6, + 0x09f3, 0x803e, 0x7144, 0x7ee0, + 0xc2e2, 0x4328, 0x09f2, 0x00a0, + 0x260a, 0x3000, 0x45cb, 0x9004, + 0x00f2, 0x1d00, 0x1045, 0x1d00, + 0x1005, 0x1e00, 0x7784, 0x9008, + 0x01e2, 0x093e, 0x01a0, 0x760c, + 0x0b17, 0x1030, 0x1d10, 0x1045, 0x40c3, 0x0000, 0x61a8, 0x0fb6, - 0xffcf, 0x1e10, 0x1005, 0x1404, - 0x341b, 0xc6c6, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1a6, 0xc70b, 0xc345, - 0xdb7f, 0xc040, 0x7be4, 0xd84b, - 0x4328, 0x2338, 0x0001, 0xd87d, - 0x78e4, 0xbfe6, 0x21c5, 0x0061, - 0x20b8, 0x0341, 0x7825, 0x23b8, - 0x0441, 0x2305, 0x803e, 0x208a, - 0x003e, 0x70cd, 0x761c, 0xc000, - 0x1600, 0x7083, 0x8000, 0x0446, - 0xc100, 0x208c, 0x8fc3, 0x6a09, - 0x21ca, 0x00c1, 0x7b0f, 0xc344, - 0xc30c, 0x0857, 0x06b5, 0xc343, - 0xdb30, 0xc504, 0xc341, 0xdb50, - 0xc342, 0xdb20, 0xd810, 0x278a, - 0x3801, 0x4378, 0x708d, 0x2025, - 0x0340, 0xf01b, 0xf019, 0xf019, - 0xf017, 0xf061, 0xf016, 0xf015, - 0xf048, 0xf04a, 0xf04b, 0xf011, - 0xf00f, 0xf04c, 0xf03c, 0xf042, - 0xf00b, 0xf00b, 0xf009, 0xf009, - 0xf007, 0xf007, 0xf005, 0xf044, - 0xf035, 0xf004, 0xf0af, 0x706c, - 0x0a51, 0x06b5, 0x4060, 0x726c, - 0x748d, 0xc042, 0xc041, 0x4318, - 0x4718, 0x4608, 0x2025, 0x0080, - 0xf03d, 0xf035, 0xf032, 0xf018, - 0xf016, 0xf018, 0xf09e, 0xf0a0, - 0xf01a, 0xf022, 0xf023, 0xf012, - 0xf00e, 0xf024, 0xf014, 0xf014, - 0xf00c, 0xf00c, 0xf00a, 0xf00a, - 0xf008, 0xf021, 0xf01e, 0xf01c, - 0xf00d, 0xf00d, 0xdb07, 0xf01b, - 0x0a09, 0x02f1, 0xdc20, 0xf01e, - 0x708d, 0xf01c, 0xc602, 0x758d, - 0xf019, 0x758d, 0x46eb, 0xf017, - 0xdb10, 0xf1cc, 0xc601, 0x758d, - 0xf011, 0x718d, 0x466b, 0xf00f, - 0x768d, 0xf00c, 0x708d, 0xf00b, - 0x736c, 0x0a0f, 0x00d1, 0xc403, - 0x249a, 0x1004, 0x7f85, 0x4468, - 0x4608, 0x0b09, 0x11de, 0xbc86, - 0x208a, 0x0ffd, 0x232f, 0x02c2, - 0x2004, 0x02c0, 0x1438, 0x301b, - 0xc50d, 0x2344, 0x0c03, 0xe0b0, - 0x0a13, 0x01b1, 0x23cf, 0x01e1, - 0x78f0, 0x080b, 0x0051, 0xbe86, - 0x7ecf, 0x202f, 0x02c2, 0x781d, - 0x781d, 0x2004, 0x0f82, 0x0000, - 0x2000, 0xc005, 0x272f, 0x3040, - 0x7a05, 0xd858, 0x2004, 0x02c0, - 0x7865, 0x7885, 0x209a, 0x0004, - 0x43c3, 0x9008, 0x0000, 0x2754, - 0x380c, 0x7c65, 0x7a05, 0x2705, - 0x30c0, 0xb040, 0x4061, 0x209a, - 0x0004, 0x2004, 0x0f82, 0x0000, - 0x0f00, 0xc003, 0x201a, 0x0f80, - 0x0000, 0x1000, 0x7a05, 0x2553, - 0x10c0, 0x7845, 0x7acf, 0x7845, - 0xb400, 0x2755, 0x3800, 0x7865, - 0xb0e0, 0x2304, 0x1f80, 0x0000, - 0xff00, 0x2305, 0x3002, 0x2755, - 0x3c00, 0x7b05, 0xb340, 0x6901, - 0xc100, 0x218c, 0x8fc3, 0xf406, - 0x1e00, 0x7002, 0x8000, 0x0446, - 0x780f, 0xc0a6, 0x1404, 0x341b, - 0xc6c6, 0x738d, 0xde10, 0xf19b, - 0x758d, 0xf198, 0x718d, 0xf196, + 0xffcf, 0x1d10, 0x1005, 0xc6c2, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a6, + 0x260a, 0x3080, 0xc20b, 0xc345, + 0xdb7f, 0xc040, 0x7b44, 0xd84b, + 0x2338, 0x000f, 0x279a, 0x1002, + 0xbae6, 0x208a, 0x003e, 0x234f, + 0x004c, 0x27ca, 0x1001, 0xe4cf, + 0x27ca, 0x1001, 0xe3d1, 0x27ca, + 0x1001, 0xc000, 0x1600, 0x7083, + 0x8000, 0x0446, 0x1400, 0x301f, + 0x208c, 0x8fc3, 0xc00e, 0x2642, + 0x304b, 0x27ca, 0x30c1, 0xc043, + 0xc00d, 0xc044, 0xc00c, 0x0b51, + 0x16b5, 0xc042, 0xdb50, 0xc341, + 0xdb20, 0xd810, 0xde30, 0xdd60, + 0x4378, 0x708d, 0x2025, 0x02c0, + 0xf01a, 0xf01a, 0xf018, 0xf018, + 0xf05c, 0xf019, 0xf014, 0xf016, + 0xf045, 0xf048, 0xf010, 0xf010, + 0xf047, 0xf03b, 0xf00e, 0xf00c, + 0xf00a, 0xf00a, 0xf008, 0xf008, + 0xf006, 0xf006, 0xf03f, 0xf034, + 0xf005, 0xf0a8, 0x706c, 0xf003, + 0xdb10, 0x264c, 0xb640, 0x014a, + 0x002d, 0x4060, 0x726c, 0x748d, + 0xc041, 0x4608, 0x4318, 0x4508, + 0x4708, 0x2025, 0x0780, 0xf036, + 0xf02c, 0xf02b, 0xf017, 0xf017, + 0xf093, 0xf099, 0xf093, 0xf015, + 0xf019, 0xf01c, 0xf091, 0xf00f, + 0xf01b, 0xf00f, 0xf00d, 0xf089, + 0xf087, 0xf087, 0xf085, 0xf085, + 0xf018, 0xf017, 0xf013, 0xf008, + 0xf006, 0xdb07, 0xf012, 0xc701, + 0x758d, 0xf018, 0x758d, 0x47a9, + 0xf014, 0x758d, 0x47c9, 0xf012, + 0x718d, 0x476b, 0xf00e, 0x768d, + 0xf00b, 0x708d, 0xf00a, 0x736c, + 0x0e0d, 0x30d1, 0xc402, 0x249a, + 0x1004, 0x7a85, 0x4468, 0x4708, + 0x238a, 0x0ffd, 0x7b24, 0x783d, + 0x2044, 0x0c0b, 0xe3b0, 0xdb58, + 0x23cf, 0x11e1, 0x7b24, 0x2084, + 0x0001, 0x2305, 0x02c3, 0x7885, + 0x7b05, 0x783d, 0x239a, 0x0004, + 0x781d, 0xc405, 0x781d, 0x2004, + 0x0f80, 0x0000, 0x2000, 0x7c05, + 0x7c65, 0x232f, 0x17c0, 0x43c3, + 0x9008, 0x0000, 0x2305, 0x10c0, + 0xb080, 0x4020, 0x209a, 0x0004, + 0x7154, 0x2104, 0x0f81, 0x0000, + 0xff00, 0x2004, 0x0f8c, 0x0000, + 0x0f00, 0xc002, 0x201a, 0x0f80, + 0x0000, 0x1000, 0x7c05, 0x40e1, + 0x20cf, 0x01a1, 0x264c, 0xb180, + 0x27ca, 0x1001, 0xc004, 0xb8c3, + 0x7f05, 0x78ef, 0x7c05, 0x2354, + 0x1800, 0x7865, 0xb080, 0x2355, + 0x1800, 0x7865, 0xb040, 0xc003, + 0x7905, 0x2355, 0x1c00, 0x7b05, + 0xb320, 0xc000, 0x2740, 0x3041, + 0x208c, 0x8fc3, 0xf405, 0x1e00, + 0x7042, 0x8000, 0x0446, 0x782f, + 0xc0a6, 0x1404, 0x341b, 0xc6c6, + 0x738d, 0xdf10, 0xf19e, 0x708d, + 0xf19b, 0x718d, 0xf199, 0xdc20, + 0xf197, 0x758d, 0xf195, 0x78e0, 0xc0f1, 0xc1a4, 0xc408, 0xc443, 0xc407, 0xc442, 0xc406, 0xc441, - 0xc405, 0x0dde, 0xffef, 0xc440, + 0xc405, 0x0de2, 0xffef, 0xc440, 0xc0a4, 0xc0d1, 0x7ee0, 0x78e0, 0x41c3, 0x8000, 0x0446, 0x8920, 0x208c, 0x8fc3, 0x20ca, 0x0041, @@ -1867,3657 +1857,3659 @@ static u16 lpddr4x_train1d_imem[] = { 0x0446, 0x8920, 0x208c, 0x8fc3, 0x20ca, 0x0041, 0x1e00, 0x7004, 0x9008, 0x01e6, 0x7ee0, 0x78e0, - 0x41c3, 0x8000, 0x0446, 0x8920, - 0x208c, 0x8fc3, 0x20ca, 0x0041, - 0x1e00, 0x7004, 0x9008, 0x01e4, - 0x7ee0, 0x78e0, 0x41c3, 0x900e, - 0x004c, 0x9160, 0x9141, 0x221a, - 0x0f8c, 0x0001, 0x0000, 0x7c65, - 0xe807, 0x231a, 0x0f81, 0x8000, - 0x0000, 0x789d, 0xf008, 0x4140, - 0xda0f, 0xba61, 0x0aff, 0x8031, - 0x793d, 0x789b, 0x7825, 0x41c3, - 0x900f, 0xfe4c, 0xda10, 0xb100, - 0xba61, 0x0a01, 0x0031, 0x781d, - 0x7fe0, 0xb101, 0xc0e4, 0x70ad, - 0xf002, 0x71a5, 0x0d1f, 0x10b2, - 0x70cd, 0xf010, 0x255a, 0x1c83, - 0x607c, 0x265a, 0x1643, 0x71c5, - 0x639b, 0x633b, 0x8b80, 0x7c44, - 0xab80, 0x0eed, 0x9092, 0xf1ef, - 0xc4c4, 0x78e0, 0xc0f1, 0xc1a4, - 0x700c, 0x0c0e, 0xffef, 0xd9ff, + 0x41c3, 0x900e, 0x004c, 0x9160, + 0x9141, 0x221a, 0x0f81, 0x0001, + 0x0000, 0x7965, 0xe807, 0x231a, + 0x0f80, 0x8000, 0x0000, 0x793d, + 0xf008, 0x4040, 0xda0f, 0xba61, + 0x0aff, 0x8031, 0x781d, 0x793b, + 0x7825, 0x41c3, 0x900f, 0xfe4c, + 0xda10, 0xb100, 0xba61, 0x0a01, + 0x0031, 0x781d, 0x7fe0, 0xb101, + 0x264a, 0x3000, 0xf010, 0x265a, + 0x3c83, 0x607c, 0x235a, 0x1643, + 0x7165, 0x639b, 0x633b, 0x8b80, + 0x7c44, 0xab80, 0x0beb, 0x9094, + 0x2640, 0x305e, 0x0ef9, 0xb0b4, + 0x706d, 0x7ee0, 0xc0f1, 0xc1a4, + 0x700c, 0x0c4a, 0xffef, 0xd9ff, 0x700c, 0xc043, 0xc042, 0xc041, 0xc040, 0xd8ff, 0xd988, 0x754c, - 0x0ce6, 0xffef, 0xdb40, 0x700c, - 0x0c9a, 0xffef, 0x712c, 0xc0a4, + 0x0d0a, 0xffef, 0xdb40, 0x700c, + 0x0cc2, 0xffef, 0x712c, 0xc0a4, 0xc0d1, 0x7ee0, 0x7014, 0x20c0, 0x0fe9, 0x7fe0, 0x2884, 0x0001, - 0x42c3, 0x8000, 0x076d, 0x8a01, - 0x205f, 0x0641, 0x8a00, 0x205f, - 0x0c80, 0x6038, 0x6058, 0x8816, - 0x781d, 0x781d, 0x781d, 0x781d, - 0x781d, 0x781d, 0x7fe0, 0xb8c0, - 0x0917, 0x03f0, 0x228a, 0x0fc7, - 0x714c, 0xb9c4, 0xe905, 0xb961, - 0x09ff, 0x8031, 0x7a5b, 0x201a, - 0x0f80, 0x0000, 0x2000, 0x224f, - 0x0241, 0x2005, 0x0f80, 0x9002, - 0x016a, 0x1800, 0x0005, 0xb040, - 0xb020, 0xb040, 0x7fe0, 0x1800, - 0x0005, 0x78e0, 0x080d, 0x0072, - 0x702c, 0x48ff, 0x2984, 0x0001, - 0x7fe0, 0x782e, 0xc0f1, 0x4200, - 0x0eba, 0x0060, 0x4020, 0x795b, - 0x2144, 0x0181, 0xb9c4, 0xe906, - 0xb961, 0x0901, 0x0031, 0x781d, - 0xc0d1, 0x7fe0, 0xb8c1, 0x78e0, - 0xc0f1, 0xc1a4, 0x700c, 0x0b3a, - 0xffef, 0xd9ff, 0x700c, 0xc043, - 0xc042, 0xc041, 0xc040, 0xd8ff, - 0xd990, 0x754c, 0x0c12, 0xffef, - 0xdb40, 0x700c, 0x0bc6, 0xffef, - 0x712c, 0xc0a4, 0xc0d1, 0x7ee0, - 0xc3e1, 0xc2e1, 0xc1e1, 0xc0e1, - 0xc0f1, 0xc1a1, 0x4220, 0xc184, - 0xc140, 0x1600, 0x7081, 0x8000, - 0x0012, 0x080f, 0x0064, 0x4040, - 0x0c16, 0x02e0, 0xc100, 0x7487, - 0xc0d1, 0x7fe0, 0xc0a4, 0x78e0, - 0x791d, 0x793d, 0x793d, 0x793d, - 0x793d, 0x793d, 0x215f, 0x0802, - 0x219a, 0x0001, 0x7822, 0x6058, - 0x7fe0, 0x780e, 0xc0e6, 0x215f, - 0x0483, 0x44cb, 0x8000, 0x0684, - 0x70cd, 0x647a, 0x8aa2, 0x8a43, - 0x7d04, 0xf003, 0x71c5, 0x0e1d, - 0x10a3, 0x2314, 0x0381, 0x6199, - 0x89e4, 0x7f0b, 0xf3f8, 0x89e5, - 0x79af, 0x210f, 0x03cd, 0xf1f4, - 0x78af, 0xc4c6, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1a4, 0x46cb, 0x8000, - 0x077c, 0x6e04, 0x0b42, 0x00a0, - 0x4318, 0x70ed, 0x70ad, 0xf003, - 0x71a5, 0x0d1d, 0x10b2, 0x708d, - 0xf00d, 0x245a, 0x1641, 0x255a, - 0x1c80, 0x6038, 0x0ebe, 0x0260, - 0x7063, 0x7185, 0x0cef, 0x9092, - 0xf1f0, 0x40c3, 0x8000, 0x076d, - 0x1e00, 0x16c0, 0xa8e1, 0xa8e0, - 0x098a, 0x00a0, 0xa8ee, 0x0992, - 0x00a0, 0x4508, 0x4100, 0x0fa2, - 0x0260, 0x40a1, 0x266f, 0x10c3, - 0x8e18, 0xe809, 0x41c3, 0x00ad, - 0x0000, 0x0f12, 0xffef, 0xd80a, - 0xf01e, 0x8e00, 0x45cb, 0x001e, - 0x8480, 0x41c3, 0x05f5, 0xe100, - 0x2044, 0x808f, 0x40a1, 0x20ca, - 0x0041, 0x0942, 0x0160, 0x702c, - 0x70f5, 0x40c3, 0x3b9a, 0xca00, - 0x25ca, 0x1001, 0x40a1, 0x1e00, - 0x70c5, 0x9004, 0x00c0, 0x0926, - 0x0160, 0x702c, 0x1600, 0x7080, - 0x8000, 0x000d, 0x080d, 0x001e, - 0x1e00, 0x7045, 0x9004, 0x00c0, - 0x47cb, 0x8000, 0x0684, 0x8f00, - 0xe804, 0x8e00, 0x0817, 0x00de, - 0x0da6, 0xffcf, 0x40c3, 0x000f, - 0x4240, 0x08f2, 0x0160, 0x702c, - 0xf054, 0x0dda, 0x0000, 0x40c3, - 0x0000, 0x0a00, 0x099a, 0xffef, - 0xd9ff, 0x70ad, 0xd8ff, 0x702c, - 0x754c, 0x736c, 0xc543, 0xc542, - 0xc541, 0x0a76, 0xffef, 0xc540, - 0x8f01, 0x0825, 0x00b4, 0x702c, - 0x7104, 0x781d, 0xb862, 0x780f, - 0xc542, 0xc541, 0xc540, 0xc043, - 0xd8ff, 0x754c, 0x0a52, 0xffef, - 0x726c, 0x8f01, 0xc542, 0xc541, - 0xc540, 0xc043, 0xd8ff, 0x702c, - 0x754c, 0x0a3e, 0xffef, 0xdb0c, - 0x8f01, 0xd908, 0x754c, 0xc043, - 0xd8ff, 0x726c, 0xc542, 0xc541, - 0x0a26, 0xffef, 0xc540, 0x0c72, - 0xffef, 0xd8ff, 0x8f01, 0x702c, - 0x754c, 0xc043, 0xd8ff, 0x726c, - 0xc542, 0xc541, 0x0a0a, 0xffef, - 0xc540, 0x0c3a, 0xffef, 0xd8ff, - 0x0a0a, 0x0280, 0xd896, 0x09b6, - 0xffef, 0x712c, 0x0f32, 0x0000, - 0x8e00, 0x083f, 0x001e, 0x43c3, - 0x9004, 0x0014, 0x9320, 0xca01, - 0x080d, 0x01b0, 0x214f, 0x0082, - 0x080b, 0x00d1, 0x2185, 0x0108, - 0x4220, 0xb340, 0xd823, 0x42c3, - 0x9005, 0xe034, 0xb200, 0x702c, - 0x40c3, 0x9003, 0xe034, 0xb020, - 0xb8b1, 0xb020, 0xb221, 0xddff, - 0x4063, 0x702c, 0x228a, 0x0dff, - 0x776c, 0x0ba6, 0x0320, 0xc540, - 0x774c, 0x4063, 0x712c, 0x4340, - 0x0b96, 0x0320, 0xc540, 0xc0a4, - 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0x706c, 0x704c, 0xf002, 0x7144, - 0xca04, 0x7210, 0x20e0, 0x07ca, - 0x40c3, 0x8000, 0x0771, 0x8820, - 0x0a0b, 0x0042, 0x8801, 0x08eb, - 0x8083, 0x221a, 0x0f80, 0x0000, - 0x2000, 0x2005, 0x0f80, 0x9002, - 0x1f64, 0xb060, 0xf1e9, 0x78e0, - 0xc2e4, 0x70ad, 0xbd9a, 0x70cd, - 0xd8ff, 0x41a1, 0x0d46, 0xffef, - 0x1a09, 0x3382, 0x0d8a, 0x0240, - 0x254f, 0x1401, 0x0d36, 0xffef, - 0xd8ff, 0x08c6, 0x02e0, 0xd8ff, - 0x26ab, 0x10c4, 0x7fff, 0xf000, - 0xc0f1, 0xc1a5, 0x4300, 0xc080, - 0x702c, 0x0a82, 0x0160, 0xda14, - 0x231a, 0x0f80, 0x0010, 0x0000, - 0xc180, 0x714c, 0x0f06, 0x0220, - 0xb885, 0x41c3, 0x00f5, 0x0000, - 0x0cfa, 0xffef, 0x750c, 0x700c, - 0x712c, 0x0efe, 0xffaf, 0xc280, - 0xc0a5, 0xc0d1, 0x7ee0, 0x78e0, 0xc2e6, 0x1cfc, 0xb6c8, 0x2482, - 0x3d02, 0x4748, 0x4338, 0x4408, - 0xc080, 0x702c, 0x0a36, 0x0160, - 0xdab4, 0x0c25, 0x10b1, 0x750c, - 0x41c3, 0x00f1, 0x0001, 0x0cbe, - 0xffef, 0x4260, 0xc080, 0x702c, - 0x080a, 0x0260, 0x714c, 0x700c, - 0xd909, 0xf035, 0x231a, 0x3f9b, - 0x0010, 0x0000, 0x750c, 0xec0c, - 0x41c3, 0x00f3, 0x0001, 0x0c96, - 0xffef, 0x4260, 0xdd09, 0xdec0, - 0xd8d0, 0xf00b, 0x41c3, 0x00f2, - 0x0001, 0x0c82, 0xffef, 0x4260, - 0x72ad, 0xde8c, 0xd880, 0x60f8, - 0x2005, 0x06c0, 0xc180, 0x0e6e, - 0x0220, 0x724c, 0x700c, 0x722c, - 0x0e6e, 0xffaf, 0xc280, 0xc080, - 0x702c, 0x09c2, 0x0160, 0xdab4, - 0x66fe, 0x2605, 0x16c0, 0xc180, - 0x0e4a, 0x0220, 0x42a1, 0x710c, - 0x41a1, 0x0e4e, 0xffaf, 0xc280, - 0x2480, 0x3d02, 0x1404, 0x341b, - 0xc6c6, 0x78e0, 0xc3e2, 0x2482, - 0x3d02, 0x4528, 0x4300, 0xc080, - 0x702c, 0x098a, 0x0160, 0xdab4, - 0x2354, 0x0d00, 0xc180, 0x0e16, - 0x0220, 0xda09, 0x750c, 0x41c3, - 0x00f4, 0x0001, 0x0c06, 0xffef, - 0x42a1, 0x700c, 0xd909, 0x0e0a, - 0xffaf, 0xc280, 0xc7c2, 0x78e0, - 0xc0f1, 0x2482, 0x3d02, 0xc080, - 0x702c, 0x0952, 0x0160, 0xdab4, - 0x41c3, 0x00f6, 0x0000, 0x0bde, - 0xffef, 0x750c, 0xd840, 0xc180, - 0x0dd2, 0x0220, 0xda09, 0x700c, - 0xd909, 0x0dd6, 0xffaf, 0xc280, - 0x2480, 0x3d02, 0xc0d1, 0x7ee0, - 0x1600, 0x7081, 0x8000, 0x0012, - 0x7534, 0x20e0, 0x07c5, 0x7fe0, - 0x1a09, 0x3002, 0xc0e4, 0x43c3, - 0x8000, 0x076f, 0x41c3, 0x9004, - 0x00e4, 0x083f, 0x00b1, 0xdc25, - 0x8b86, 0x789b, 0xab00, 0x8b47, - 0x785b, 0x7104, 0xab01, 0xab82, - 0xab43, 0x704c, 0xb140, 0x1600, - 0x7080, 0x8000, 0x0058, 0xb101, - 0x41c3, 0x9004, 0x00f6, 0xd8ff, - 0xb140, 0xb144, 0x1906, 0x03c5, - 0x19fe, 0x8004, 0xc4c4, 0x8ba4, - 0x46cb, 0x9004, 0x0030, 0x7abb, - 0x0825, 0x0071, 0xbc9f, 0xab40, - 0x8b05, 0x7a1b, 0x7144, 0xab41, - 0xaba2, 0xab03, 0x9640, 0x8c00, - 0x0a29, 0x0171, 0xb100, 0x8c00, - 0xb101, 0xf012, 0xab40, 0x8b07, - 0x7a1b, 0x7144, 0xab41, 0xaba2, - 0xab03, 0x8c00, 0xb100, 0x9600, - 0x0827, 0x0151, 0x8c00, 0xf015, - 0x1902, 0x0005, 0x40c3, 0x9004, - 0x00fe, 0xd9ff, 0x1800, 0x03c5, - 0x18f8, 0x8044, 0x18f6, 0x8005, - 0x18fe, 0x8005, 0xc4c4, 0x1600, - 0x7080, 0x8000, 0x0058, 0xb101, - 0x702c, 0x40c3, 0x9004, 0x00f6, - 0xb020, 0xb024, 0x18fe, 0x8044, - 0xb023, 0xc4c4, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1a5, 0x4608, 0xb8c1, - 0x45cb, 0x8000, 0x076d, 0x2078, - 0x0080, 0x8de0, 0xc042, 0xc144, - 0xc102, 0x0cea, 0x00e0, 0x40e1, - 0xe513, 0xc043, 0x40a1, 0x0ac2, - 0x0260, 0xd90d, 0x40a1, 0xd90d, - 0x0fe6, 0x00e0, 0xda40, 0x40a1, - 0xd90d, 0x0966, 0xffef, 0xda7f, - 0x265f, 0x1100, 0x7e05, 0x1600, - 0x7080, 0x8000, 0x0684, 0xe809, - 0x1600, 0x7080, 0x8000, 0x0001, - 0x0829, 0x00fe, 0x704c, 0xc002, - 0x275f, 0x1c81, 0x704c, 0x205f, - 0x0640, 0x43c3, 0xffbf, 0x87f7, - 0x1c00, 0x3fc1, 0x6038, 0x60b9, - 0x0c96, 0x02e0, 0x78cf, 0xf013, - 0xc002, 0x275f, 0x1c81, 0x205f, - 0x0640, 0x43c3, 0xffbf, 0x87f7, - 0x1c04, 0x3001, 0x1c00, 0x3fc1, - 0x6038, 0x60b9, 0x0db6, 0x02e0, - 0x78cf, 0x706d, 0x40c3, 0x9003, - 0xe04c, 0x218a, 0x0fc7, 0x1800, - 0x02c4, 0x18fc, 0x8044, 0xc003, - 0xe8aa, 0xf022, 0x275f, 0x1401, + 0x360a, 0x0b42, 0x0020, 0x712c, + 0x1600, 0x709b, 0x8000, 0x0004, + 0x0ab6, 0x0020, 0xc043, 0xc042, + 0x2400, 0x3f80, 0x0000, 0x023c, + 0x702c, 0x0932, 0x01a0, 0xda5a, + 0x2456, 0x3c40, 0x0926, 0x01a0, + 0xdab4, 0x0896, 0x0160, 0x208a, + 0x0b04, 0x082e, 0x0160, 0xc041, + 0x704c, 0x1e00, 0x7084, 0x9003, + 0xfec4, 0xc002, 0x2079, 0x0000, + 0xe008, 0xc040, 0xf012, 0x4300, + 0x2244, 0x07c0, 0x6a21, 0x229a, + 0x0008, 0xe806, 0xb861, 0x0801, + 0x0031, 0x7b7b, 0x2205, 0x0f80, + 0x9003, 0xe0c4, 0xb060, 0x4220, + 0xc000, 0x0adf, 0x8024, 0x710c, + 0xd880, 0xb88e, 0x1e00, 0x7004, + 0x9003, 0xe004, 0x231a, 0x3f80, + 0x0020, 0x0000, 0x2005, 0x0f81, + 0x9003, 0xfe86, 0x2005, 0x0f80, + 0x9002, 0x0086, 0x9000, 0xb8a0, + 0x0a32, 0x00a0, 0xb100, 0x208c, + 0x8f03, 0xd93f, 0xf643, 0x2844, + 0x0101, 0x231a, 0x3f9b, 0x0010, + 0x0000, 0x1c10, 0x36c0, 0x43db, + 0x8000, 0x07cd, 0x1300, 0x308e, + 0x1301, 0x309e, 0x42c1, 0xf012, + 0x7844, 0x201a, 0x0f83, 0x0000, + 0x1000, 0xc003, 0x7144, 0x2054, + 0x0d00, 0x7865, 0x781b, 0x2005, + 0x0f80, 0x9002, 0x1e00, 0xb020, + 0x784f, 0x0ee1, 0xb025, 0xd8ef, + 0xd97f, 0x40c3, 0x9003, 0xe024, + 0xb020, 0x218a, 0x03c4, 0x70ad, + 0x180c, 0x0045, 0x18fe, 0x8044, + 0xf098, 0xc003, 0x2055, 0x08c1, + 0xc004, 0x7905, 0xd8ef, 0x78c4, + 0x201a, 0x0f80, 0x0000, 0x1000, + 0x71c5, 0x7825, 0x781b, 0x2005, + 0x0f80, 0x9002, 0x1e00, 0xb0a0, + 0x78cf, 0x0edb, 0xb005, 0xd80f, + 0x09ae, 0x0020, 0xd90f, 0x46cb, + 0x9003, 0xfe64, 0x0e1e, 0x01a0, + 0x1e00, 0x1005, 0x0e16, 0x0180, + 0x0c9e, 0x0160, 0xc001, 0x710c, + 0x08e2, 0x02a0, 0x218a, 0x0fc7, + 0xd820, 0x0b32, 0xffef, 0x712c, + 0xc088, 0x702c, 0x0a66, 0x0260, + 0x714c, 0xd80f, 0x0972, 0x0020, + 0xd90f, 0xd87f, 0x0de6, 0x01a0, + 0xb600, 0x0de2, 0x0180, 0x0c6a, + 0x0160, 0xc001, 0x730c, 0x08ae, + 0x02a0, 0x218a, 0x0fc7, 0xd820, + 0x0afa, 0xffef, 0x712c, 0x2455, + 0x3d40, 0x702c, 0x0a2e, 0x0260, + 0x714c, 0x1300, 0x308b, 0x235f, + 0x1243, 0xf03b, 0xc085, 0x702c, + 0x0f8a, 0x0160, 0xda09, 0x70cd, + 0xf02b, 0x787b, 0x2084, 0x0f87, + 0xc188, 0x2135, 0x001f, 0x2455, + 0x3d41, 0x2135, 0x001e, 0x275f, + 0x1241, 0x2602, 0x37c0, 0x2048, + 0x0002, 0xc085, 0x60d8, 0xa840, + 0x2400, 0x3f80, 0x0000, 0x023c, + 0x6038, 0x60d8, 0x8820, 0x7c4f, + 0x091b, 0x0305, 0x275f, 0x1481, + 0xa840, 0x2456, 0x3c40, 0x6038, + 0x2700, 0x3781, 0x78d4, 0x793d, + 0xb020, 0x71c5, 0x7164, 0xc000, + 0x0eab, 0x9004, 0xc002, 0x7165, + 0x7014, 0x23c0, 0x0061, 0x1301, + 0x309e, 0x272f, 0x12c7, 0x0e89, + 0xb3c5, 0x1300, 0x308e, 0x74a5, + 0xe59e, 0x06f8, 0xffce, 0xf033, + 0x255f, 0x1241, 0x2400, 0x3f80, + 0x0000, 0x023c, 0x43e1, 0x6038, + 0x41c3, 0x003e, 0x0002, 0x08de, + 0xffaf, 0x60e8, 0x42e1, 0x251a, + 0x1f80, 0x0000, 0x2000, 0x229a, + 0x0008, 0x255f, 0x148c, 0x43c3, + 0x9002, 0x0080, 0x7a05, 0x2456, + 0x3c40, 0x641c, 0x24f4, 0x13c0, + 0x2205, 0x00c1, 0xb100, 0x2342, + 0x0800, 0x7a05, 0x24f4, 0x13c0, + 0x71e5, 0xb200, 0xc000, 0x0fad, + 0x9024, 0x42a1, 0x1301, 0x309e, + 0x71c5, 0x7dcf, 0x0ef1, 0xb365, + 0x70ed, 0x2480, 0x360a, 0x1404, + 0x341b, 0xc6c6, 0x40c3, 0x8000, + 0x07c9, 0x8821, 0x8800, 0x215f, + 0x0641, 0x205f, 0x0c80, 0x6038, + 0x2000, 0x0f80, 0x8000, 0x067c, + 0x8803, 0x781d, 0x781d, 0x781d, + 0x781d, 0x781d, 0x781d, 0x7fe0, + 0xb8c0, 0x78e0, 0x0917, 0x03f0, + 0x228a, 0x0fc7, 0x714c, 0xb9c4, + 0xe905, 0xb961, 0x09ff, 0x8031, + 0x7a5b, 0x201a, 0x0f80, 0x0000, + 0x2000, 0x2005, 0x0f81, 0x9002, + 0x016a, 0x224f, 0x0240, 0x1900, + 0x0005, 0xb140, 0xb100, 0xb140, + 0x7fe0, 0x1900, 0x0005, 0x78e0, + 0x080d, 0x0072, 0x702c, 0x48ff, + 0x2984, 0x0001, 0x7fe0, 0x782e, + 0xc0f1, 0x4200, 0x0f72, 0x0060, + 0x4020, 0x795b, 0x2144, 0x0181, + 0xb9c4, 0xe906, 0xb961, 0x0901, + 0x0031, 0x781d, 0xc0d1, 0x7fe0, + 0xb8c1, 0x78e0, 0xc0f1, 0xc1a4, + 0x700c, 0x08a2, 0xffef, 0xd9ff, + 0x700c, 0xc043, 0xc042, 0xc041, + 0xc040, 0xd8ff, 0xd990, 0x754c, + 0x0962, 0xffef, 0xdb40, 0x700c, + 0x091a, 0xffef, 0x712c, 0xc0a4, + 0xc0d1, 0x7ee0, 0xc3e1, 0xc2e1, + 0xc1e1, 0xc0e1, 0xc0f1, 0xc1a1, + 0x4220, 0xc184, 0xc140, 0x1600, + 0x7081, 0x8000, 0x0012, 0x080f, + 0x0064, 0x4040, 0x08f2, 0x02e0, + 0xc100, 0x7487, 0xc0d1, 0x7fe0, + 0xc0a4, 0x78e0, 0x791d, 0x216c, + 0x0102, 0x206c, 0x0141, 0x7822, + 0x621a, 0x7fe0, 0x784e, 0x78e0, + 0xc0e6, 0x215f, 0x0483, 0x44cb, + 0x8000, 0x06e2, 0x70cd, 0x638d, + 0x6479, 0x8941, 0x7d04, 0xf00b, + 0x6199, 0x89e2, 0x7f0b, 0xf206, + 0x89e3, 0x79af, 0x210f, 0x03cd, + 0x71c5, 0x0ef1, 0x90a4, 0x2314, + 0x0381, 0x78af, 0xc4c6, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a6, + 0x40c3, 0x8000, 0x067c, 0x0bda, + 0x00a0, 0x4318, 0x70ad, 0x43a9, + 0xf00d, 0x235a, 0x0641, 0x235a, + 0x1c80, 0x6038, 0x0bae, 0x0260, + 0x7063, 0x7164, 0x0bef, 0x8094, + 0x7165, 0x0bfd, 0x90b4, 0x706c, + 0x40c3, 0x8000, 0x07c9, 0xa8ae, + 0xa8a1, 0xa8a0, 0x40c3, 0x8000, + 0x07d8, 0x0a4a, 0x00a0, 0x1800, + 0x06c0, 0x0a4e, 0x00a0, 0x4508, + 0x4100, 0x0c96, 0x0260, 0x40a1, + 0x266f, 0x10c3, 0x8e18, 0xe809, + 0x41c3, 0x00ad, 0x0000, 0x0f1a, + 0xffef, 0xd80a, 0xf01e, 0x8e00, + 0x47cb, 0x001e, 0x8480, 0x41c3, + 0x05f5, 0xe100, 0x2044, 0x808d, + 0x40e1, 0x20ca, 0x0041, 0x09ba, + 0x0160, 0x702c, 0x70b5, 0x40c3, + 0x3b9a, 0xca00, 0x27ca, 0x1001, + 0x40e1, 0x1e00, 0x70c5, 0x9004, + 0x00c0, 0x099e, 0x0160, 0x702c, + 0x1600, 0x7080, 0x8000, 0x000d, + 0x080d, 0x001e, 0x1e00, 0x7045, + 0x9004, 0x00c0, 0x47cb, 0x8000, + 0x06e0, 0x8f00, 0x7014, 0xf2d3, + 0x8e00, 0xb8e3, 0xf4cf, 0x0e9e, + 0x0000, 0x40c3, 0x0000, 0x0a00, + 0x0f1a, 0xffaf, 0xd9ff, 0x70ad, + 0xd8ff, 0x702c, 0x754c, 0x736c, + 0xc543, 0xc542, 0xc541, 0x0fde, + 0xffaf, 0xc540, 0x8f01, 0x0825, + 0x00b4, 0x702c, 0x7104, 0x781d, + 0xb862, 0x780f, 0xc043, 0xd8ff, + 0x754c, 0x726c, 0xc542, 0xc541, + 0x0fba, 0xffaf, 0xc540, 0x8f01, + 0xc043, 0xd8ff, 0x702c, 0x754c, + 0xdb0c, 0xc542, 0xc541, 0x0fa6, + 0xffaf, 0xc540, 0x8f01, 0xd908, + 0x754c, 0xc043, 0xd8ff, 0x726c, + 0xc542, 0xc541, 0x0f8e, 0xffaf, + 0xc540, 0x09d6, 0xffef, 0xd8ff, + 0x8f01, 0x702c, 0x754c, 0xc043, + 0xd8ff, 0x726c, 0xc542, 0xc541, + 0x0f72, 0xffaf, 0xc540, 0x099e, + 0xffef, 0xd8ff, 0x1600, 0x70c0, + 0x8000, 0x000e, 0x08ed, 0x0013, + 0x1303, 0x3080, 0xc044, 0xd825, + 0xb89f, 0x88c0, 0x1033, 0x008d, + 0x0b12, 0x0020, 0x730c, 0x8f01, + 0x2605, 0x1341, 0x704c, 0xc043, + 0x265f, 0x1100, 0x71cd, 0x726c, + 0xc641, 0x7d05, 0x7d25, 0x78af, + 0xc045, 0xc042, 0xc004, 0x702c, + 0x781d, 0x781d, 0x2044, 0x0800, + 0x2045, 0x0180, 0xc040, 0x0f16, + 0xffaf, 0xd8ff, 0x8f01, 0x702c, + 0x704c, 0xc043, 0xd8ff, 0x726c, + 0x4528, 0xc142, 0xc641, 0x0efe, + 0xffaf, 0x1c00, 0x30c1, 0x8f01, + 0x702c, 0x704c, 0xc043, 0xc005, + 0x726c, 0xc641, 0xc042, 0xc004, + 0x781d, 0x2044, 0x0800, 0x2045, + 0x0580, 0xc040, 0x0ed6, 0xffaf, + 0xd8ff, 0x8f01, 0x702c, 0x704c, + 0xc043, 0xc004, 0x726c, 0xc542, + 0xb8c5, 0xc040, 0xd8ff, 0x0ebe, + 0xffaf, 0xc641, 0x40c3, 0x0000, + 0x2710, 0x0846, 0x0160, 0xd90a, + 0x4608, 0x208a, 0x0e0f, 0x41c3, + 0x00ae, 0x0000, 0x0cfe, 0xff6f, + 0x263c, 0x1000, 0x78dd, 0x781d, + 0x7704, 0x7e0f, 0xf81a, 0xc643, + 0xc542, 0xc541, 0x0e86, 0xffaf, + 0xc540, 0xf817, 0xc643, 0xc542, + 0xc541, 0x0e7a, 0xffaf, 0xc540, + 0x0e06, 0x0240, 0xd896, 0x0e2e, + 0xffaf, 0x712c, 0x0f22, 0x0000, + 0xf009, 0x093e, 0xffcf, 0x40c3, + 0x000f, 0x4240, 0x0fca, 0x0120, + 0x702c, 0xddff, 0x4063, 0x702c, + 0x228a, 0x0dff, 0x776c, 0x084e, + 0x0320, 0xc540, 0x774c, 0x4063, + 0x712c, 0x4340, 0x083e, 0x0320, + 0xc540, 0xc0a6, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0xd8ff, 0x702c, + 0x704c, 0x726c, 0x7ee0, 0x78e0, + 0xca04, 0x704c, 0x43c3, 0x8000, + 0x07cd, 0xf013, 0x8b20, 0x0a0d, + 0x0044, 0x8b21, 0x0919, 0x0085, + 0x221a, 0x0f81, 0x0000, 0x2000, + 0x2105, 0x0f81, 0x9002, 0x1f64, + 0x1900, 0x0005, 0x7144, 0x0ae1, + 0x8004, 0x7ee0, 0xc2e4, 0x70ad, + 0xbd9a, 0xd8ff, 0x41a1, 0x70cd, + 0x0c8e, 0xffef, 0x1a09, 0x3003, + 0x09be, 0x0240, 0x254f, 0x1401, + 0x0c7e, 0xffef, 0xd8ff, 0x0ce6, + 0x02a0, 0xd8ff, 0x26ab, 0x10c4, + 0x7fff, 0xf000, 0xc0f1, 0xc1a5, + 0x4300, 0xc080, 0x702c, 0x0a1e, + 0x0160, 0xda14, 0x231a, 0x0f80, + 0x0010, 0x0000, 0xc180, 0x714c, + 0x0b46, 0x0220, 0xb885, 0x41c3, + 0x00f7, 0x0000, 0x0c42, 0xffef, + 0x750c, 0x700c, 0x712c, 0x0baa, + 0xffaf, 0xc280, 0xc0a5, 0xc0d1, + 0x7ee0, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0x2482, 0x3d02, 0x4748, + 0x4338, 0x4308, 0xc080, 0x702c, + 0x09d2, 0x0160, 0xdab4, 0x0b25, + 0x10b1, 0x750c, 0x41c3, 0x00f3, + 0x0001, 0x0c06, 0xffef, 0x4260, + 0xc080, 0x702c, 0x0c46, 0x0220, + 0x714c, 0x700c, 0xd909, 0xf036, + 0x231a, 0x3f9b, 0x0010, 0x0000, + 0x0b1b, 0x1030, 0x750c, 0x41c3, + 0x00f5, 0x0001, 0x0bda, 0xffef, + 0x4260, 0xdd09, 0xdec0, 0xd8d0, + 0xf00a, 0x41c3, 0x00f4, 0x0001, + 0x0bc6, 0xffef, 0x4260, 0x72ad, + 0xde8c, 0xd880, 0x60f8, 0x2005, + 0x06c0, 0xc180, 0x0aaa, 0x0220, + 0x724c, 0x700c, 0x722c, 0x0b1a, + 0xffaf, 0xc280, 0xc080, 0x702c, + 0x095a, 0x0160, 0xdab4, 0x66fe, + 0x2605, 0x16c0, 0xc180, 0x0a8a, + 0x0220, 0x42a1, 0x710c, 0x41a1, + 0x0af6, 0xffaf, 0xc280, 0x2480, + 0x3d02, 0x1404, 0x341b, 0xc6c6, + 0xc3e2, 0x2482, 0x3d02, 0x4528, + 0x4300, 0xc080, 0x702c, 0x0926, + 0x0160, 0xdab4, 0x2354, 0x0d00, + 0xc180, 0x0a56, 0x0220, 0xda09, + 0x750c, 0x41c3, 0x00f6, 0x0001, + 0x0b4e, 0xffef, 0x42a1, 0x700c, + 0xd909, 0x0ab6, 0xffaf, 0xc280, + 0xc7c2, 0x78e0, 0xc0f1, 0x2482, + 0x3d02, 0xc080, 0x702c, 0x08ee, + 0x0160, 0xdab4, 0x41c3, 0x00f8, + 0x0000, 0x0b26, 0xffef, 0x750c, + 0xd840, 0xc180, 0x0a12, 0x0220, + 0xda09, 0x700c, 0xd909, 0x0a82, + 0xffaf, 0xc280, 0x2480, 0x3d02, + 0xc0d1, 0x7ee0, 0x1600, 0x7081, + 0x8000, 0x0012, 0x7534, 0x20e0, + 0x07c5, 0x7fe0, 0x1a09, 0x3002, + 0xc0e6, 0x44cb, 0x9004, 0x00e4, + 0x42c3, 0x8000, 0x07cb, 0x0835, + 0x00b0, 0x716c, 0x7114, 0xd825, + 0x46cb, 0x9004, 0x0030, 0xb89f, + 0xf42a, 0x8a25, 0xaa23, 0x8aa4, + 0x7b34, 0xaaa2, 0x96c0, 0x88e0, + 0x78bb, 0xb4e0, 0xaa00, 0x0e5f, + 0x1171, 0xaa61, 0xb4e1, 0xf02d, + 0x700c, 0xb400, 0x1600, 0x7081, + 0x8000, 0x0058, 0xb421, 0x44cb, + 0x9004, 0x00f6, 0xb400, 0xb404, + 0x1c06, 0x13c5, 0x8a27, 0xaa23, + 0x8a06, 0x7b34, 0xaa02, 0x781b, + 0xaa00, 0xd8ff, 0xaa61, 0x1cfe, + 0x9004, 0xc4c6, 0x88a0, 0xb4a0, + 0x8a27, 0xaa23, 0x8a04, 0x7b34, + 0xaa02, 0x781b, 0xaa00, 0xaa61, + 0x9600, 0x0829, 0x0151, 0xb4a1, + 0xf015, 0x1c02, 0x1005, 0x41c3, + 0x9004, 0x00fe, 0xd8ff, 0x1900, + 0x03c5, 0x19f8, 0x8004, 0x19f6, + 0x8005, 0x19fe, 0x8005, 0xc4c6, + 0x1600, 0x7080, 0x8000, 0x0058, + 0xb401, 0x702c, 0x40c3, 0x9004, + 0x00f6, 0xb020, 0xb024, 0x18fe, + 0x8044, 0xb023, 0xc4c6, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a5, + 0x4708, 0x40c3, 0x8000, 0x07c9, + 0x88c0, 0x2753, 0x1040, 0x2078, + 0x0080, 0xc042, 0xc144, 0xc102, + 0x0d0e, 0x00e0, 0x40c1, 0x45cb, + 0x8000, 0x067c, 0xc043, 0x40a1, + 0x0eee, 0x0220, 0xd90d, 0x40a1, + 0xd90d, 0x0fbe, 0x00e0, 0xda40, + 0x40a1, 0xd90d, 0x0dd6, 0xffaf, + 0xda7f, 0x275f, 0x1100, 0x43db, + 0x8000, 0x06e0, 0x7f05, 0x1300, + 0x3080, 0xe814, 0x1600, 0x7080, + 0x8000, 0x0001, 0x081d, 0x00df, + 0xc002, 0xf863, 0x1c04, 0x3001, + 0x1c00, 0x3fc1, 0x6159, 0x61b9, + 0x0a66, 0x02e0, 0x704c, 0xf00b, + 0xc002, 0xf85d, 0x1c00, 0x3fc1, + 0x6159, 0x61b9, 0x090a, 0x02e0, + 0x704c, 0x40c3, 0x9003, 0xe04c, + 0x218a, 0x0fc7, 0x1800, 0x0005, + 0x18fc, 0x8044, 0x706d, 0xc003, + 0xe8aa, 0xf022, 0x265f, 0x1401, 0xc002, 0x2116, 0x0000, 0x7061, - 0x2032, 0x0f81, 0x8000, 0x074c, - 0xc004, 0x0eea, 0x0260, 0x4338, - 0x231a, 0x3f9b, 0x0000, 0x2000, - 0x42c3, 0x9002, 0x0050, 0xb88a, - 0x7165, 0x2305, 0x3081, 0xba64, - 0x2305, 0x309b, 0xb100, 0x1b00, - 0x3fc5, 0xb100, 0xc102, 0x40c3, - 0x8000, 0x0728, 0x79f4, 0x6108, - 0x0bb5, 0x9002, 0x40a1, 0xd90d, - 0x088e, 0xffef, 0xda7f, 0x40a1, - 0xd90d, 0x0efe, 0x00e0, 0xda49, - 0x1600, 0x7080, 0x8000, 0x0684, - 0xe808, 0x1600, 0x7080, 0x8000, - 0x0001, 0x0847, 0x00fe, 0x704c, - 0xc002, 0x275f, 0x1c81, 0x704c, - 0x205f, 0x0640, 0x43c3, 0xffff, - 0xdfff, 0x1c00, 0x3fc1, 0x6038, - 0x60b9, 0x0bbe, 0x02e0, 0x78cf, - 0x40c3, 0x0000, 0x2710, 0x0bc6, - 0x0120, 0x702c, 0x093e, 0xffcf, - 0x40c3, 0x0000, 0x1d4c, 0x0bb6, - 0x0120, 0x732c, 0xf012, 0xc002, - 0x275f, 0x1c81, 0x205f, 0x0640, - 0x43c3, 0xffff, 0xdfff, 0x1c04, - 0x3001, 0x1c00, 0x3fc1, 0x6038, - 0x60b9, 0x0cc2, 0x02e0, 0x78cf, - 0x40c3, 0x0003, 0xf7a0, 0x0b86, - 0x0120, 0x702c, 0x40a1, 0x0d3e, - 0x0220, 0xd90d, 0xc003, 0x70ad, - 0xe8ac, 0xf003, 0x71a5, 0xc102, - 0x40c3, 0x8000, 0x0728, 0x79f4, - 0x6108, 0x0d49, 0x1003, 0x275f, - 0x1401, 0xc002, 0x7916, 0x61b8, - 0x2032, 0x0f81, 0x8000, 0x074c, - 0xc004, 0x0de2, 0x0260, 0x4328, - 0x702c, 0xf00f, 0x231a, 0x1f82, - 0x0000, 0x2000, 0x7124, 0x2205, - 0x0f82, 0x9002, 0x0050, 0xb260, - 0x204f, 0x0283, 0xb260, 0x09e9, - 0x80b2, 0x204f, 0x0243, 0xf1d8, - 0xc0a5, 0x1404, 0x341b, 0xc6c6, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a2, + 0x2032, 0x0f9e, 0x8000, 0x07a8, + 0xc004, 0x0b1e, 0x0260, 0x210a, + 0x0780, 0x261a, 0x3f82, 0x0000, + 0x2000, 0x41c3, 0x9002, 0x0050, + 0xb88a, 0x7165, 0x2205, 0x0043, + 0xb964, 0x7945, 0xb300, 0x1900, + 0x0fc5, 0xb300, 0xc002, 0x78d4, + 0x2032, 0x0f80, 0x8000, 0x0784, + 0x0bb5, 0x9004, 0x40a1, 0xd90d, + 0x0d1a, 0xffaf, 0xda7f, 0x40a1, + 0xd90d, 0x0eee, 0x00e0, 0xda49, + 0x1300, 0x3080, 0xe813, 0x1600, + 0x7080, 0x8000, 0x0001, 0x081f, + 0x00df, 0xc002, 0xf82f, 0x1c04, + 0x3001, 0x1c00, 0x3fc1, 0x6159, + 0x61b9, 0x09ae, 0x02e0, 0x704c, + 0xf018, 0xc002, 0xf829, 0x1c00, + 0x3fc1, 0x6159, 0x61b9, 0x0852, + 0x02e0, 0x704c, 0x40c3, 0x0000, + 0x2710, 0x0b8e, 0x0120, 0x702c, + 0x0896, 0xffcf, 0x40c3, 0x0000, + 0x1d4c, 0x0b7e, 0x0120, 0x732c, + 0x40c3, 0x0003, 0xf7a0, 0x0b72, + 0x0120, 0x702c, 0x40a1, 0x099e, + 0x0220, 0xd90d, 0xc003, 0xe8ad, + 0x70ad, 0xf023, 0x265f, 0x1401, + 0xc002, 0x7916, 0x61b8, 0x2032, + 0x0f81, 0x8000, 0x07a8, 0xc004, + 0x0a46, 0x0260, 0x4328, 0x702c, + 0xf00e, 0x231a, 0x1f82, 0x0000, + 0x2000, 0x7124, 0x2205, 0x0f82, + 0x9002, 0x0050, 0xb260, 0x204f, + 0x0283, 0xb260, 0x09e7, 0x80b4, + 0x204f, 0x0243, 0x71a5, 0xc002, + 0x78d4, 0x2032, 0x0f80, 0x8000, + 0x0784, 0x0db5, 0x9004, 0xc0a5, + 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0x265f, 0x1c82, 0x43c3, 0xffff, + 0xdfff, 0x205f, 0x0641, 0x78ef, + 0x7ee0, 0x78e0, 0x265f, 0x1c82, + 0x43c3, 0xffbf, 0x87f7, 0x205f, + 0x0641, 0x78ef, 0x7ee0, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a1, 0x46cb, 0x9004, 0x00f4, 0x9600, - 0x6ea2, 0x702c, 0xc040, 0x9500, - 0xc041, 0xd8ff, 0x95e3, 0x1508, - 0x111b, 0x1d08, 0x13c5, 0xb500, + 0x6ea2, 0x702c, 0xc040, 0xd8ff, + 0x1500, 0x111b, 0x95e3, 0x1508, + 0x110b, 0x1d08, 0x13c5, 0xb500, 0x1d06, 0x13c5, 0xb600, 0x1600, - 0x7080, 0x8000, 0x076c, 0x1e00, + 0x7080, 0x8000, 0x07c8, 0x1e00, 0x7004, 0x9008, 0x0190, 0x40c3, - 0x0000, 0x2af8, 0x0ace, 0x0120, - 0x1efa, 0x9485, 0xc000, 0x41c3, - 0x8000, 0x0684, 0xb600, 0xb5e3, - 0xc001, 0xb500, 0x1d08, 0x16c4, - 0x1600, 0x7080, 0x8000, 0x076c, - 0x781d, 0xb8a0, 0x2080, 0x0fc3, - 0xa901, 0xc0a2, 0x1404, 0x341b, - 0xc6c6, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1a1, 0x43db, 0x9004, - 0x01e0, 0x1300, 0x3101, 0x1600, - 0x7080, 0x8000, 0x0004, 0x205f, - 0x0100, 0x2044, 0x07c2, 0x4020, - 0xea05, 0xba61, 0x0aff, 0x8031, - 0x781d, 0xb8c3, 0x702c, 0x0811, - 0x0070, 0xc140, 0x0809, 0x01b0, - 0x710c, 0xc040, 0x700c, 0x0a56, - 0x0120, 0xd940, 0x71ed, 0x45cb, - 0x9005, 0xe112, 0x1dde, 0x13d4, - 0x1da6, 0x93d4, 0x13b6, 0xb10e, - 0xc000, 0xe827, 0x0e4b, 0x13bf, - 0x2684, 0x1c07, 0x264f, 0x13c0, - 0xb500, 0x700c, 0xd91c, 0x0a26, - 0x0120, 0x1df6, 0x9205, 0x40c3, - 0x000f, 0x4240, 0x702c, 0x0a16, - 0x0120, 0x1df6, 0x9245, 0x13be, - 0xb100, 0x1df0, 0x9045, 0x1df6, - 0x9205, 0x209a, 0x0008, 0x2004, - 0x0f80, 0x0000, 0x3e00, 0x78c5, - 0xb88e, 0xb500, 0xf005, 0x1df6, - 0x9205, 0x1df0, 0x93c4, 0x1e00, - 0x73c4, 0x9013, 0xe050, 0x7487, - 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a2, - 0x4708, 0x40c3, 0x8000, 0x076d, - 0x88c0, 0xe013, 0xd90d, 0x0f82, - 0x0220, 0x4318, 0x4063, 0xd90d, - 0x0e2e, 0xffaf, 0xda7f, 0x4063, - 0xd90d, 0x0e26, 0xffaf, 0xdafe, - 0x40c3, 0x8000, 0x0684, 0x8800, - 0x2753, 0x104d, 0xe809, 0x1600, - 0x7080, 0x8000, 0x0001, 0x082b, - 0x00fe, 0x2578, 0x1080, 0x0e32, - 0xff8f, 0x2578, 0x1080, 0xf815, - 0x1c00, 0x3fc1, 0x6119, 0x275f, - 0x1100, 0x7163, 0x7f05, 0x095a, - 0x02e0, 0x78ef, 0xf00b, 0xf80f, - 0x1c04, 0x3041, 0x6119, 0x7163, - 0x40e1, 0x0a8a, 0x02e0, 0x1c00, - 0x3fc1, 0x40c3, 0x0000, 0x36b0, - 0x094a, 0x0120, 0xd90a, 0x4063, - 0x0b02, 0x0220, 0xd90d, 0x1e00, - 0x7005, 0x9003, 0xe048, 0xc0a2, - 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0x265f, 0x1c81, 0x205f, 0x0640, - 0x704c, 0x43c3, 0xffff, 0xdfff, - 0x7ee0, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1a1, 0x46cb, 0x9004, - 0x00f4, 0x9600, 0x6ea2, 0xc040, - 0xd8ff, 0x1500, 0x111b, 0x1506, - 0x110b, 0x95e4, 0x1d08, 0x13c5, - 0xb500, 0x1d06, 0x13c5, 0xb600, - 0xd834, 0x1e00, 0x7005, 0x9008, - 0x0190, 0x1efa, 0x9004, 0x40c3, - 0x0000, 0x2af8, 0x08d6, 0x0120, - 0x702c, 0xc000, 0xb600, 0x1d06, - 0x12c4, 0x1d00, 0x16c4, 0xb5e4, + 0x0000, 0x2af8, 0x0a92, 0x0120, + 0x1efa, 0x9485, 0xc000, 0xb600, + 0xb5e3, 0x1d00, 0x16c4, 0x1d08, + 0x12c4, 0x1600, 0x7080, 0x8000, + 0x07c8, 0x781d, 0xb8a0, 0x6829, + 0x40c3, 0x8000, 0x06e0, 0xa821, 0x7487, 0x1404, 0x341b, 0xc6c6, - 0xc2e6, 0x1cfc, 0xb6c8, 0x246f, - 0x1343, 0x4320, 0x9420, 0x218c, - 0x864a, 0x0084, 0x002d, 0x4348, - 0x0dfe, 0xffaf, 0x712c, 0x4318, - 0x14fe, 0x9080, 0x235f, 0x180d, - 0x70cd, 0x201a, 0x0f8c, 0x0010, - 0x0000, 0x75c3, 0x0000, 0xfffc, - 0xf02a, 0x231a, 0x0f81, 0x0000, - 0x1000, 0x7825, 0x2005, 0x0301, - 0x40c1, 0x209a, 0x0004, 0x7825, + 0xc2e6, 0x1cfc, 0xb6c8, 0x46cb, + 0x9004, 0x01e0, 0x9620, 0x1600, + 0x709b, 0x8000, 0x0004, 0x70ad, + 0x235f, 0x3100, 0x2044, 0x07c2, + 0x4020, 0xea06, 0xba61, 0x0a01, + 0x0031, 0x781d, 0xb8c3, 0x080f, + 0x0070, 0xc1a1, 0x0807, 0x0190, + 0x71ad, 0x700c, 0x0a22, 0x0120, + 0xd940, 0x47cb, 0x9005, 0xe112, + 0x1fde, 0x1055, 0x1fa6, 0x9055, + 0x16b6, 0x9100, 0xc040, 0xed37, + 0xc000, 0x086b, 0x03bf, 0x70ad, + 0xc000, 0xd91c, 0x2084, 0x0c07, + 0xc040, 0xb88f, 0xb700, 0x700c, + 0xbd8e, 0x09ee, 0x0120, 0x1ff6, + 0x9205, 0x40c3, 0x000f, 0x4240, + 0x702c, 0x09de, 0x0120, 0x1ff6, + 0x9245, 0x16be, 0x9100, 0x1ff0, + 0x9045, 0x1ff6, 0x9205, 0x209a, + 0x0008, 0xc100, 0x231a, 0x3f9b, + 0x0020, 0x0000, 0x2004, 0x0f80, + 0x0000, 0x3e00, 0x7905, 0x254f, + 0x13c0, 0xb720, 0x7825, 0x2305, + 0x3f81, 0x9013, 0xe40c, 0xb100, + 0xf005, 0x1ff6, 0x9205, 0x1ff0, + 0x9045, 0x710c, 0x1e00, 0x7004, + 0x9013, 0xe050, 0x7487, 0x1404, + 0x341b, 0xc6c6, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1a2, 0x4608, 0x40c3, + 0x8000, 0x07c9, 0x1000, 0x009b, + 0x45cb, 0x8000, 0x067c, 0x40a1, + 0x0ba6, 0x0220, 0xd90d, 0x40a1, + 0xd90d, 0x0a9a, 0xffaf, 0xda7f, + 0x40a1, 0xd90d, 0x0a8e, 0xffaf, + 0xdafe, 0x40c3, 0x8000, 0x06e0, + 0x8800, 0x2653, 0x104f, 0xe81c, + 0x1600, 0x7080, 0x8000, 0x0001, + 0x082d, 0x00ff, 0x2778, 0x1080, + 0x235f, 0x3c82, 0x205f, 0x0641, + 0x40c1, 0x43c3, 0xffff, 0xdfff, + 0x1c04, 0x3041, 0x6159, 0x61b9, + 0x704c, 0x0f16, 0x02a0, 0x1c00, + 0x3fc1, 0xf018, 0x0a72, 0xff8f, + 0x2778, 0x1080, 0x235f, 0x3c82, + 0x205f, 0x0641, 0x265f, 0x1100, + 0x43c3, 0xffff, 0xdfff, 0x1c00, + 0x3fc1, 0x6159, 0x78c5, 0x61b9, + 0x780f, 0x0d9e, 0x02a0, 0x704c, + 0x40c3, 0x0000, 0x36b0, 0x08da, + 0x0120, 0xd90a, 0x40a1, 0x0f06, + 0x01e0, 0xd90d, 0x1e00, 0x7005, + 0x9003, 0xe048, 0xc0a2, 0x1404, + 0x341b, 0xc6c6, 0xc2e6, 0x1cfc, + 0xb6c8, 0x46cb, 0x9004, 0x00f4, + 0x6ea2, 0xd8ff, 0x1600, 0x111b, + 0x1500, 0x111e, 0x1506, 0x110b, + 0x95e4, 0x1d08, 0x13c5, 0xb500, + 0x1d06, 0x13c5, 0xb600, 0x700c, + 0x1e00, 0x7004, 0x9008, 0x0190, + 0xd834, 0x1efa, 0x9004, 0x40c3, + 0x0000, 0x2af8, 0x087a, 0x0120, + 0x702c, 0x1e00, 0x16c4, 0x1d06, + 0x12c4, 0x1d00, 0x1784, 0xb5e4, + 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0x236f, + 0x0343, 0x260a, 0x3040, 0x9320, + 0x4348, 0x218c, 0x864a, 0x0080, + 0x002d, 0x712c, 0x0d2e, 0xff8f, + 0x4508, 0x235f, 0x1800, 0x2042, + 0x011b, 0x13fe, 0x8080, 0x706c, + 0x201a, 0x0f8e, 0x0010, 0x0000, + 0xf02a, 0x261a, 0x3f81, 0x0000, + 0x1000, 0x7825, 0x2005, 0x0381, + 0x4060, 0x209a, 0x0004, 0x7825, 0x781b, 0x2005, 0x0f8f, 0x9002, - 0x0000, 0x0e42, 0xffaf, 0x9700, - 0x60b8, 0x7a0e, 0x785c, 0x781c, + 0x0000, 0x0d76, 0xffaf, 0x9700, + 0x7063, 0x7a0e, 0x785c, 0x781c, 0x781c, 0x781c, 0x4100, 0xd81b, 0xb861, 0x0801, 0x0031, 0x793d, - 0x6238, 0xb8c4, 0x4831, 0x2a44, - 0x0800, 0x71c5, 0x209a, 0x0001, - 0x7825, 0xb700, 0x0eaf, 0x90b2, - 0x2355, 0x3800, 0x1404, 0x341b, + 0x7164, 0x6238, 0xb8c4, 0x4831, + 0x2a44, 0x0800, 0x209a, 0x0001, + 0x7825, 0xb700, 0x0baf, 0x80b4, + 0x2555, 0x1800, 0x1404, 0x341b, 0xc6c6, 0x78e0, 0xc2e2, 0x45cb, - 0x9004, 0x0006, 0xd820, 0x0fbe, + 0x9004, 0x0006, 0xd820, 0x0f62, 0x00e0, 0x1d00, 0x1045, 0x1d00, 0x1005, 0xc6c2, 0xc2e6, 0x45cb, - 0x9004, 0x01c6, 0x95c0, 0x208a, - 0x0fdf, 0x78c4, 0x204f, 0x02cf, - 0xb88a, 0xb88b, 0xb5e0, 0xb500, - 0x0f92, 0x00e0, 0xd80a, 0xb5e0, - 0x9501, 0xbecb, 0xb5c0, 0xb8c9, - 0xc6c6, 0x78e0, 0xc1e2, 0xc1a1, - 0x781b, 0x2005, 0x0f80, 0x9000, - 0x0086, 0x9000, 0x706c, 0x704c, - 0x2053, 0x0101, 0x781d, 0x781d, - 0x781d, 0x781d, 0x781d, 0xb8c4, - 0x1c02, 0x3042, 0x1c03, 0x3002, - 0xf008, 0x782f, 0xd978, 0x2905, - 0x0001, 0x786f, 0x7144, 0x611b, - 0x0a21, 0x00b3, 0x2440, 0x3080, - 0x604c, 0x712c, 0x700c, 0xf005, - 0x61b9, 0x7c9d, 0x7104, 0x08fd, - 0x8172, 0x2444, 0x104d, 0xf1eb, - 0x2384, 0x0f83, 0x787d, 0xc5c2, - 0x06c1, 0xff6f, 0x4040, 0x78e0, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a8, - 0x4308, 0xca01, 0xc345, 0xc40d, - 0xc044, 0x1600, 0x711b, 0x9018, - 0x01dc, 0x70cd, 0x706c, 0xf013, - 0x45cb, 0x9004, 0x0200, 0x2705, - 0x3340, 0x9000, 0xc780, 0xe520, - 0x60f8, 0xa860, 0x2705, 0x3340, - 0x9000, 0x671f, 0x4b18, 0xaf08, - 0x7164, 0x0be1, 0x81b2, 0x272f, - 0x30c0, 0xc004, 0xe08c, 0x0306, - 0x002d, 0x238a, 0x0205, 0xc004, - 0x0ba7, 0x002e, 0x74ef, 0xc005, - 0x1800, 0x07c2, 0xc004, 0x080d, - 0x0190, 0xc004, 0xe088, 0xf4ab, - 0x700c, 0x71ad, 0xac00, 0xac01, - 0xaca2, 0xaca3, 0x726c, 0x738d, - 0x75cd, 0xa900, 0xa961, 0xa982, - 0xa9c3, 0xaa00, 0xaaa2, 0xaa63, - 0xaa85, 0xc120, 0x7161, 0xa900, - 0x1401, 0x3081, 0x7161, 0xa900, - 0x1402, 0x3080, 0x7061, 0xa860, - 0x1403, 0x3080, 0x7061, 0xa860, - 0xc021, 0x7061, 0xa860, 0x1405, - 0x3080, 0x7061, 0xa860, 0x1b07, - 0x1342, 0x1b06, 0x1342, 0xc022, - 0x7061, 0xa880, 0x1409, 0x3080, - 0x7061, 0xa880, 0x140a, 0x3080, - 0x7061, 0xa8c0, 0x140b, 0x3080, - 0x7061, 0xa8c0, 0xc023, 0x7061, - 0xa8c0, 0x140d, 0x3080, 0x7061, - 0xa8c0, 0x1b0f, 0x17c2, 0x025f, - 0x0020, 0x1b0e, 0x17c2, 0xc004, - 0x43c3, 0x0000, 0x1400, 0x2311, - 0x8000, 0x024a, 0x0021, 0x710c, - 0x73ed, 0x72ad, 0xac03, 0xacc2, - 0xacc1, 0xacc0, 0xac05, 0xac04, - 0x0bed, 0x3f81, 0x0000, 0x2208, - 0xc305, 0x234a, 0x3280, 0x1b00, - 0x0143, 0x42eb, 0xf88a, 0x4759, - 0xac04, 0x766c, 0xdc0b, 0xa9c0, - 0xa9e1, 0xa962, 0x1903, 0x06c2, - 0xa984, 0x742c, 0xaac0, 0xaa03, - 0xaaa6, 0xaaea, 0xaa2b, 0xc120, - 0x7161, 0xa9c0, 0x1401, 0x3081, - 0x7161, 0xa9c0, 0x1402, 0x3081, - 0x7161, 0xa9e0, 0x1403, 0x3081, - 0x7161, 0xa9e0, 0xc121, 0x7161, - 0xa9e0, 0x1405, 0x3081, 0x7161, - 0xa9e0, 0x1b07, 0x1002, 0x740c, - 0x1b06, 0x1002, 0xc022, 0x7061, - 0xa860, 0x1409, 0x3080, 0x7061, - 0xa860, 0x140a, 0x3080, 0x7061, - 0x1800, 0x06c2, 0x140b, 0x3080, - 0x7061, 0xa880, 0xc023, 0x7061, - 0xa880, 0x140d, 0x3080, 0x7061, - 0x1800, 0x06c2, 0x1b0f, 0x11c3, - 0xf07f, 0xc005, 0x726c, 0xa860, - 0xacc0, 0xacc1, 0xa9c0, 0xa961, - 0x712c, 0xaac0, 0xaa22, 0xc020, - 0x7061, 0xa8c0, 0x1401, 0x3080, - 0x7061, 0xa8c0, 0x1402, 0x3080, - 0x7061, 0xa860, 0x1403, 0x3080, - 0x7061, 0xa860, 0xc021, 0x7061, - 0xa860, 0x1405, 0x3080, 0x7061, - 0xa860, 0x1b07, 0x1042, 0x1b06, - 0x1042, 0xf0a5, 0xc320, 0x1401, - 0x309f, 0x234a, 0x3200, 0x7361, - 0xc347, 0xc304, 0x2700, 0x32df, - 0x1c18, 0x37c0, 0x274a, 0x31c0, - 0x0b9d, 0x0331, 0x70cd, 0xc305, - 0x768d, 0xab80, 0xa900, 0xa9a1, - 0xa9e2, 0xa983, 0x1904, 0x07c2, - 0x1905, 0x06c2, 0x742c, 0xaac1, - 0xaa02, 0xaaa3, 0xaae6, 0xaa27, - 0xc107, 0x1a08, 0x0143, 0xa9e0, - 0xc106, 0xa900, 0x1402, 0x3081, - 0x7161, 0xa9e0, 0x1403, 0x3081, - 0x7161, 0xa9a0, 0xc121, 0x7161, - 0xa9a0, 0x1405, 0x3081, 0x7161, - 0xa900, 0x740c, 0x1b07, 0x1243, - 0x1b06, 0x1002, 0xc022, 0x7061, - 0x1800, 0x07c2, 0x1409, 0x3080, - 0x7061, 0x1800, 0x06c2, 0x140a, - 0x3080, 0x7061, 0x1800, 0x06c2, - 0x140b, 0x3080, 0x7061, 0x1800, - 0x06c2, 0xc023, 0x7061, 0x1800, - 0x07c2, 0x140d, 0x3080, 0x7061, - 0xa880, 0x1b0f, 0x1283, 0x1b0e, - 0x1143, 0xf049, 0xc305, 0x42eb, - 0xf825, 0x4759, 0x748d, 0xab80, - 0xa9a0, 0xa9e1, 0x1902, 0x07c2, - 0x1903, 0x06c2, 0xc107, 0xaac2, - 0xaa03, 0xaaa7, 0xaae8, 0xa9a0, - 0xc106, 0xa9a0, 0x1402, 0x3081, - 0x7161, 0xa9a0, 0x1403, 0x3081, - 0x7161, 0xa9a0, 0xc121, 0x7161, - 0xa9e0, 0x1405, 0x3081, 0x7161, - 0xa9e0, 0x1b07, 0x1382, 0x1b06, - 0x1002, 0xc022, 0x7061, 0x1800, - 0x07c2, 0x1409, 0x3080, 0x7061, - 0x1800, 0x07c2, 0x140a, 0x3080, - 0x7061, 0x1800, 0x07c2, 0x140b, - 0x3080, 0x7061, 0x1800, 0x07c2, - 0xc023, 0x7061, 0x1800, 0x06c2, - 0x140d, 0x3080, 0x7061, 0x1800, - 0x06c2, 0x1b0f, 0x1143, 0x1b0e, - 0x1183, 0xc0a8, 0x1404, 0x341b, - 0xc6c6, 0x78e0, 0xacc0, 0xacc1, - 0xac02, 0xac03, 0x7ee0, 0x78e0, - 0x215f, 0x0641, 0x205f, 0x0c80, - 0x6038, 0x2032, 0x0f80, 0x8000, - 0x078c, 0x7fe0, 0xb8c6, 0x78e0, - 0x7014, 0x1600, 0x7080, 0x8000, - 0x0004, 0x41c3, 0x9004, 0x0160, - 0x21c0, 0x0221, 0x201a, 0x0f80, - 0x0020, 0x0000, 0x7825, 0x7fe0, - 0x9000, 0x78e0, 0xc2e6, 0x1600, - 0x710d, 0x9004, 0x01c6, 0xd80a, - 0x47cb, 0x9005, 0xe1c6, 0xbdc9, - 0x254f, 0x12ce, 0x0b56, 0x00e0, - 0xb7c0, 0x2505, 0x1f80, 0x0000, - 0x0c00, 0xb700, 0x0b46, 0x00e0, - 0xd80a, 0xd80a, 0x0b3e, 0x00e0, - 0xb7c0, 0x1600, 0x7100, 0x9002, - 0x01c8, 0xb7a0, 0xb8c9, 0xc6c6, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a7, - 0x4318, 0x1600, 0x7080, 0x8000, - 0x0004, 0xc144, 0x704c, 0xc046, - 0x40c3, 0x8000, 0x077c, 0x8020, - 0x4063, 0x0a82, 0x00a0, 0x706c, - 0x700c, 0x0c16, 0xff6f, 0xd9ff, - 0x710c, 0x4608, 0x2344, 0x37c0, - 0xe805, 0xb861, 0x08ff, 0x8031, - 0x7edb, 0x702c, 0x2653, 0x1040, - 0xc143, 0xc042, 0xc045, 0xd8ff, - 0xda09, 0xdbff, 0xc141, 0x4528, - 0x0cd6, 0xff6f, 0xc140, 0x265a, - 0x110e, 0xc543, 0x702c, 0x734c, - 0x260f, 0x16c0, 0x7e0f, 0xd840, - 0xc642, 0x1c04, 0x35c1, 0xc040, - 0xd8ff, 0x0cb6, 0xff6f, 0xdb22, - 0xd84b, 0xc543, 0xc642, 0xc541, - 0xc040, 0xd8ff, 0xd980, 0xda17, - 0x0c9e, 0xff6f, 0x746c, 0x700c, - 0x0c52, 0xff6f, 0x712c, 0x0d12, - 0x01c0, 0x0a8a, 0x00e0, 0x208a, - 0x0010, 0x40c3, 0x0000, 0x9c40, - 0x0ad2, 0x00e0, 0xd908, 0x700c, - 0x0b86, 0xff6f, 0xd9ff, 0x47cb, - 0x0000, 0x1200, 0xd8ff, 0xd908, - 0xda0d, 0xdb20, 0xc543, 0xc642, - 0xc541, 0x0c5e, 0xff6f, 0xc740, - 0x2756, 0x1800, 0xc543, 0xc642, - 0xc541, 0xc040, 0xd8ff, 0xd988, - 0xda0d, 0x0c46, 0xff6f, 0xdb08, - 0x700c, 0x0bfa, 0xff6f, 0x712c, - 0x46cb, 0x8000, 0x0771, 0x8ea0, - 0xf002, 0x71a5, 0x8e01, 0x0823, - 0x0362, 0x7faf, 0x4063, 0x0b66, - 0x0020, 0x41e1, 0xe877, 0x40e1, - 0x0a6e, 0x01e0, 0x702c, 0xc104, - 0x79b4, 0xb100, 0xf1ef, 0x8ee0, - 0xf003, 0x8e01, 0x71e5, 0x0877, - 0x03e2, 0x7def, 0x4063, 0x0b3e, - 0x0020, 0x41a1, 0xe877, 0x40a1, - 0x0a46, 0x01e0, 0xd910, 0xc504, + 0x9004, 0x01c6, 0x95c0, 0xbecb, + 0x264f, 0x12cf, 0x2605, 0x1f80, + 0x0000, 0x0c00, 0xb5e0, 0xb500, + 0x0f36, 0x00e0, 0xd80a, 0xb5e0, + 0x9501, 0xb5c0, 0xb8c9, 0xc6c6, + 0xc1e2, 0xc1a1, 0x201a, 0x0f80, + 0x0000, 0x2000, 0x704c, 0x706c, + 0x2005, 0x0f80, 0x9000, 0x0086, + 0x9020, 0x2153, 0x0100, 0x1c02, + 0x3002, 0x783d, 0x781d, 0x781d, + 0x781d, 0x781d, 0xb8c4, 0x1c03, + 0x3002, 0xf014, 0x606c, 0x712c, + 0x700c, 0xf005, 0x61b9, 0x7c9d, + 0x7104, 0x08fd, 0x8174, 0x2444, + 0x104d, 0x782f, 0xd978, 0x2905, + 0x0001, 0x784f, 0x7164, 0x611a, + 0x0bdd, 0x80b4, 0x2440, 0x3080, + 0x2284, 0x0f83, 0x785d, 0xc5c2, + 0x035d, 0xff6f, 0x4040, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a4, + 0x1600, 0x711e, 0x9018, 0x01dc, + 0x1201, 0x308c, 0xc609, 0x4378, + 0x706c, 0xf013, 0x43cb, 0x9004, + 0x0200, 0x2705, 0x32cd, 0x95a0, + 0xc780, 0x677f, 0x2340, 0x180b, + 0xafa0, 0x2705, 0x32cd, 0x95a0, + 0x7164, 0xe508, 0xafa8, 0x0be1, + 0x81b4, 0x272f, 0x30c0, 0xe48c, + 0x0304, 0x002d, 0x238a, 0x0205, + 0x0b9d, 0x032e, 0x746d, 0x0c0f, + 0x1230, 0x1b00, 0x32c2, 0x7695, + 0xf4af, 0x71ad, 0x706c, 0xaea3, + 0xaea2, 0xae61, 0xae60, 0x75ed, + 0x73cd, 0x728d, 0xa9e3, 0xa9c2, + 0xa981, 0xa960, 0xaac5, 0xaa83, + 0xaaa2, 0xaa60, 0xc120, 0x6119, + 0xa960, 0x1401, 0x3081, 0x6119, + 0xa960, 0x1402, 0x3081, 0x6119, + 0xa980, 0x1403, 0x3081, 0x6119, + 0xa980, 0xc121, 0x6119, 0xa980, + 0x1405, 0x3081, 0x6119, 0xa980, + 0xa8a7, 0xa8a6, 0xc122, 0x6119, + 0xa9c0, 0x1409, 0x3081, 0x6119, + 0xa9c0, 0x140a, 0x3081, 0x6119, + 0xa9e0, 0x140b, 0x3081, 0x6119, + 0xa9e0, 0xc123, 0x6119, 0xa9e0, + 0x140d, 0x3081, 0x6119, 0xa9e0, + 0x180f, 0x02c2, 0x0269, 0x0020, + 0x180e, 0x02c2, 0x43c3, 0x0000, + 0x1400, 0x2311, 0x8300, 0x0258, + 0x0021, 0x71ed, 0x706c, 0xaee5, + 0xaee4, 0xaee3, 0xae62, 0xae61, + 0xae60, 0x260c, 0xbf80, 0x0000, + 0x2208, 0x73ad, 0x726c, 0xf47c, + 0x708d, 0x1b00, 0x3143, 0xaee4, + 0xaee3, 0xaee2, 0xae81, 0xae80, + 0xde0b, 0x234a, 0x1280, 0x264a, + 0x3180, 0xa9c4, 0x1903, 0x02c2, + 0x1902, 0x0782, 0xa9a1, 0xa980, + 0x742c, 0xaa2b, 0x702c, 0xaaaa, + 0xaa66, 0xaae3, 0xaa20, 0xc120, + 0x603a, 0x702c, 0xaa20, 0x1401, + 0x3081, 0x603a, 0x702c, 0xaa20, + 0x1402, 0x3081, 0x6119, 0xa9a0, + 0x1403, 0x3081, 0x6119, 0xa9a0, + 0xc121, 0x6119, 0xa9a0, 0x1405, + 0x3081, 0x6119, 0xa9a0, 0x742c, + 0xa8e7, 0xa826, 0xc122, 0x6119, + 0x1900, 0x0782, 0x1409, 0x3081, + 0x6119, 0x1900, 0x0782, 0x140a, + 0x3081, 0x6119, 0x1900, 0x02c2, + 0x140b, 0x3081, 0x6119, 0xa9c0, + 0xc123, 0x6119, 0xa9c0, 0x140d, + 0x3081, 0x6119, 0x1900, 0x02c2, + 0x180f, 0x01c3, 0xf079, 0x726c, + 0x708d, 0x1b00, 0x30c2, 0xae81, + 0xae80, 0xa961, 0xa980, 0x718d, + 0x702c, 0xaa82, 0xaa20, 0xc120, + 0x603a, 0x702c, 0xaa20, 0x1401, + 0x3081, 0x603a, 0x702c, 0xaa20, + 0x1402, 0x3081, 0x6119, 0xa960, + 0x1403, 0x3081, 0x6119, 0xa960, + 0xc121, 0x6119, 0xa960, 0x1405, + 0x3081, 0x6119, 0xa960, 0xa887, + 0xa886, 0xf0a2, 0x264a, 0x3200, + 0x0ca5, 0x1331, 0x234a, 0x11c0, + 0x768d, 0x1b00, 0x3302, 0x1905, + 0x0782, 0x1904, 0x02c2, 0xa983, + 0xa9a2, 0xa961, 0xa9e0, 0x742c, + 0x1a08, 0x0143, 0xaa27, 0x702c, + 0xaaa6, 0xaa63, 0xaae2, 0xaa21, + 0xc120, 0x6119, 0xa9a0, 0x1401, + 0x3081, 0x6119, 0xa9e0, 0x1402, + 0x3081, 0x6119, 0xa9a0, 0x1403, + 0x3081, 0x6119, 0xa960, 0xc121, + 0x6119, 0xa960, 0x1405, 0x3081, + 0x6119, 0xa9e0, 0x742c, 0x1807, + 0x0243, 0xa826, 0xc122, 0x6119, + 0x1900, 0x02c2, 0x1409, 0x3081, + 0x6119, 0x1900, 0x0782, 0x140a, + 0x3081, 0x6119, 0x1900, 0x0782, + 0x140b, 0x3081, 0x6119, 0x1900, + 0x0782, 0xc123, 0x6119, 0x1900, + 0x02c2, 0x140d, 0x3081, 0x6119, + 0xa980, 0x180f, 0x0283, 0x180e, + 0x0143, 0xf04e, 0x708d, 0xaee3, + 0xaee2, 0xae81, 0xae80, 0x748d, + 0x1b00, 0x3302, 0x1903, 0x0782, + 0x1902, 0x02c2, 0xa9a1, 0xa960, + 0x702c, 0xaaa8, 0xaa67, 0xaae3, + 0xaa22, 0xc120, 0x6119, 0xa960, + 0x1401, 0x3081, 0x6119, 0xa960, + 0x1402, 0x3081, 0x6119, 0xa960, + 0x1403, 0x3081, 0x6119, 0xa960, + 0xc121, 0x6119, 0xa9a0, 0x1405, + 0x3081, 0x6119, 0xa9a0, 0x702c, + 0xa827, 0xa8e6, 0xc122, 0x6119, + 0x1900, 0x02c2, 0x1409, 0x3081, + 0x6119, 0x1900, 0x02c2, 0x140a, + 0x3081, 0x6119, 0x1900, 0x02c2, + 0x140b, 0x3081, 0x6119, 0x1900, + 0x02c2, 0xc123, 0x6119, 0x1900, + 0x0782, 0x140d, 0x3081, 0x6119, + 0x1900, 0x0782, 0x180f, 0x0143, + 0x180e, 0x0183, 0xc0a4, 0x1404, + 0x341b, 0xc6c6, 0x215f, 0x0641, + 0x205f, 0x0c80, 0x6119, 0x2132, + 0x0f80, 0x8000, 0x0688, 0x7fe0, + 0xb8c6, 0x78e0, 0x7014, 0x1600, + 0x7080, 0x8000, 0x0004, 0x41c3, + 0x9004, 0x0168, 0x21c2, 0x0222, + 0x201a, 0x0f80, 0x0020, 0x0000, + 0x7905, 0x7fe0, 0x9100, 0x78e0, + 0xc2e6, 0x1600, 0x710d, 0x9004, + 0x01c6, 0x47cb, 0x9005, 0xe1c6, + 0xd80a, 0xbdc9, 0x254f, 0x12ce, + 0x0b0e, 0x00e0, 0xb7c0, 0x2505, + 0x1f80, 0x0000, 0x0c00, 0xb700, + 0x0afe, 0x00e0, 0xd80a, 0xd80a, + 0x0af6, 0x00e0, 0xb7c0, 0x1600, + 0x7100, 0x9002, 0x01c8, 0xb7a0, + 0xb8c9, 0xc6c6, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1a7, 0x4318, 0x1600, + 0x7080, 0x8000, 0x0004, 0xc144, + 0x704c, 0xc046, 0x40c3, 0x8000, + 0x07d8, 0x8020, 0x4063, 0x0aa6, + 0x00a0, 0x706c, 0x700c, 0x08c6, + 0xff6f, 0xd9ff, 0x710c, 0x4508, + 0x2344, 0x37c0, 0xe805, 0xb861, + 0x08ff, 0x8031, 0x7dbb, 0x70cd, + 0x2553, 0x1040, 0xc045, 0xc042, + 0xd8ff, 0x702c, 0xda09, 0xdbff, + 0xc643, 0xc641, 0x096e, 0xff6f, + 0xc640, 0x255a, 0x110d, 0x702c, + 0x734c, 0xdb22, 0x250f, 0x16c0, + 0x7d0f, 0xd840, 0xc040, 0xd8ff, + 0xc643, 0xc542, 0x094e, 0xff6f, + 0x1c04, 0x35c1, 0xd84b, 0xc040, + 0xd8ff, 0xd980, 0xda17, 0x746c, + 0xc643, 0xc542, 0x0936, 0xff6f, + 0xc641, 0x700c, 0x08ee, 0xff6f, + 0x712c, 0x0942, 0x01c0, 0x0a42, + 0x00e0, 0x208a, 0x0010, 0x40c3, + 0x0000, 0x9c40, 0x0a8a, 0x00e0, + 0xd908, 0x700c, 0x0836, 0xff6f, + 0xd9ff, 0x47cb, 0x0000, 0x1200, + 0xd8ff, 0xd908, 0xda0d, 0xdb20, + 0xc643, 0xc542, 0xc641, 0x08f6, + 0xff6f, 0xc740, 0x2756, 0x1800, + 0xc040, 0xd8ff, 0xd988, 0xda0d, + 0xdb08, 0xc643, 0xc542, 0x08de, + 0xff6f, 0xc641, 0x700c, 0x0896, + 0xff6f, 0x712c, 0x46cb, 0x8000, + 0x07cd, 0x8ea0, 0xf00e, 0x4063, + 0x0b62, 0x0020, 0x41e1, 0xe809, + 0x40e1, 0x0eb6, 0x01a0, 0x702c, + 0xc104, 0x79b4, 0xb100, 0x71a5, + 0x8e41, 0x0ae7, 0x8365, 0x7faf, + 0x8ee0, 0xf03c, 0x4063, 0x0b3e, + 0x0020, 0x41a1, 0xe834, 0x40a1, + 0x0e8e, 0x01a0, 0xd910, 0xc504, 0x209a, 0x0004, 0x42e1, 0x7df4, 0x9520, 0x7825, 0xb500, 0x7810, - 0x41c3, 0x00df, 0x0001, 0x0a12, - 0xff2f, 0x2079, 0x0000, 0x1600, + 0x41c3, 0x00e1, 0x0001, 0x0ed6, + 0xfeef, 0x2079, 0x0000, 0x1600, 0x7100, 0x8000, 0x0006, 0x208c, - 0x81c3, 0xf704, 0x710c, 0xf00d, - 0x9500, 0x080f, 0x0095, 0x40c3, - 0x0000, 0xffff, 0xf005, 0x2e05, - 0x7000, 0x0001, 0x0000, 0xb500, - 0x7b10, 0x740c, 0x41c3, 0x00e0, - 0x0002, 0x0f9a, 0xff6f, 0x42e1, - 0xf1c5, 0x8ea0, 0xf003, 0x8e01, - 0x71a5, 0x0825, 0x0362, 0x79af, - 0x0ac2, 0x0020, 0x4063, 0xe8f9, - 0xc004, 0x0d0f, 0x103e, 0x78b4, - 0x10fe, 0x8101, 0xf002, 0x9021, - 0xb020, 0xf1ef, 0x8e40, 0xf019, - 0xc106, 0x40c3, 0x9002, 0x015e, - 0x7077, 0x211a, 0x0f81, 0x0020, - 0x0000, 0x20c2, 0x00a1, 0x7905, - 0x221a, 0x0f80, 0x0000, 0x2000, - 0x7905, 0xc004, 0x20f4, 0x0080, - 0x7144, 0xb100, 0x8e01, 0x08d5, - 0x80a3, 0x700c, 0x0a3a, 0xff6f, - 0xd9ff, 0xc005, 0x702c, 0xc143, - 0xc042, 0xd8ff, 0xda0a, 0x746c, - 0xc141, 0x4528, 0x0b12, 0xff6f, + 0x81c3, 0xf706, 0x710c, 0x1d00, + 0x1045, 0xf00e, 0x9500, 0x080f, + 0x0095, 0x40c3, 0x0000, 0xffff, + 0xf005, 0x2e05, 0x7000, 0x0001, + 0x0000, 0xb500, 0x7b10, 0x740c, + 0x41c3, 0x00e2, 0x0002, 0x0eea, + 0xff6f, 0x42e1, 0x8e41, 0x71e5, + 0x0a8d, 0x83e5, 0x7def, 0x8ea0, + 0xf010, 0x0ac2, 0x0020, 0x4063, + 0xe88a, 0xc004, 0x0d0b, 0x103f, + 0x78b4, 0x9021, 0xf003, 0x10fe, + 0x8101, 0xb020, 0x8e41, 0x71a5, + 0x0ae3, 0x8365, 0x79af, 0x8e60, + 0xf016, 0xc106, 0x40c3, 0x9002, + 0x015c, 0x211a, 0x0f81, 0x0020, + 0x0000, 0x20c0, 0x00a2, 0x7905, + 0x231a, 0x0f80, 0x0000, 0x2000, + 0x7905, 0xc004, 0x20f4, 0x00c0, + 0x7164, 0xb100, 0x0ad7, 0x80e5, + 0x7077, 0x700c, 0x0eee, 0xff2f, + 0xd9ff, 0xc005, 0x702c, 0xda0a, + 0xc042, 0xd8ff, 0x746c, 0x4528, + 0xc143, 0xc141, 0x0fae, 0xff2f, 0xc140, 0xd8ff, 0x702c, 0x754c, - 0x766c, 0xc043, 0xc542, 0xc541, - 0x4608, 0x0afe, 0xff6f, 0xc540, - 0xc005, 0xc543, 0x702c, 0xc042, - 0xd8ff, 0xda0e, 0x746c, 0xc541, - 0x0ae6, 0xff6f, 0xc540, 0xd8ff, + 0x766c, 0x4608, 0xc043, 0xc542, + 0xc541, 0x0f9a, 0xff2f, 0xc540, + 0xc005, 0x702c, 0xda0e, 0xc042, + 0xd8ff, 0x746c, 0xc543, 0xc541, + 0x0f82, 0xff2f, 0xc540, 0xd8ff, 0xd980, 0x754c, 0x766c, 0xc643, - 0xc542, 0xc541, 0x0ad2, 0xff6f, - 0xc540, 0x700c, 0x0a86, 0xff6f, - 0x712c, 0x0b46, 0x01c0, 0xc0a7, + 0xc542, 0xc541, 0x0f6e, 0xff2f, + 0xc540, 0x700c, 0x0f26, 0xff2f, + 0x712c, 0x0f7a, 0x0180, 0xc0a7, 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0xc2e2, 0x4508, 0x4320, 0x4020, - 0x0d02, 0xffef, 0x41a1, 0xe8b8, - 0x216f, 0x0dc3, 0x1109, 0x048c, - 0x0c65, 0x103e, 0x70b5, 0x496b, - 0x4308, 0x20ca, 0x0041, 0x8800, - 0x235f, 0x0c81, 0x7c1d, 0x13f3, - 0x9080, 0x7c9d, 0x7c9d, 0xb8c0, - 0x205f, 0x0640, 0x7c9d, 0x7095, - 0x24ca, 0x1061, 0x6038, 0x2032, - 0x0f80, 0x8000, 0x078b, 0x781d, - 0x781d, 0x781d, 0x791d, 0xb9c2, - 0xd8f0, 0x2805, 0x0040, 0x2805, - 0x8301, 0x208a, 0x0404, 0xf209, - 0x238a, 0x0a0f, 0x7b2c, 0x6158, - 0x781b, 0x2b05, 0x0000, 0x0f8a, - 0x0020, 0x7810, 0xc6c2, 0x78e0, + 0xc0f1, 0x4300, 0x4328, 0x4020, + 0x0d06, 0xffef, 0x4160, 0xe8b8, + 0x246f, 0x1dc3, 0x8c20, 0x0969, + 0x001e, 0x142f, 0x1080, 0x235f, + 0x1c81, 0xb8c0, 0x205f, 0x0640, + 0x6038, 0x2032, 0x0f80, 0x8000, + 0x0687, 0x781d, 0x781d, 0x781d, + 0x791d, 0xb9c2, 0xd8f0, 0x2805, + 0x0041, 0xeb05, 0x143c, 0x1080, + 0xf002, 0x8c09, 0x781d, 0x781d, + 0x781d, 0x781d, 0x7014, 0x7b2f, + 0x20ca, 0x0061, 0x0b1b, 0x0024, + 0x218a, 0x0404, 0x2b05, 0x0001, + 0x208a, 0x0a0f, 0x782c, 0x6159, + 0x793b, 0x2805, 0x0041, 0x0f9e, + 0x0020, 0x7830, 0xc0d1, 0x7ee0, 0x1600, 0x7100, 0x8000, 0x0022, 0x7014, 0x7de0, 0x1600, 0x7100, 0x9004, 0x0008, 0x7ee0, 0x78e0, - 0xc2e4, 0x43c3, 0x8000, 0x076e, - 0x8b00, 0x0dc6, 0xff6f, 0x712c, + 0xc2e4, 0x43c3, 0x8000, 0x07ca, + 0x8b00, 0x0d12, 0xff6f, 0x712c, 0x1600, 0x7081, 0x8000, 0x0004, - 0x8bc1, 0x774c, 0x211a, 0x0f8c, - 0x0010, 0x0000, 0xf01b, 0x261a, - 0x1f81, 0x0000, 0x0800, 0x2104, - 0x0f8d, 0x37fe, 0xf000, 0x2055, - 0x0801, 0x7985, 0x79a5, 0x6ea1, - 0x269a, 0x1004, 0x2684, 0x1004, - 0x79c5, 0x793b, 0xb991, 0xb99c, - 0xb99f, 0x9120, 0x46a9, 0x2108, - 0x0082, 0x8b22, 0x09cb, 0x83a3, - 0x4140, 0xd808, 0xb861, 0x0801, - 0x0031, 0x793c, 0x784f, 0x2079, - 0x0000, 0x6038, 0x7204, 0xc6c4, - 0x40c3, 0x8000, 0x077c, 0x0769, + 0x8b82, 0x8b41, 0x211a, 0x0f8d, + 0x0010, 0x0000, 0x776c, 0xf01c, + 0x221a, 0x0f81, 0x0000, 0x0800, + 0x2104, 0x0f8e, 0x37fe, 0xf000, + 0x2055, 0x0801, 0x79a5, 0x79c5, + 0x6ac1, 0x229a, 0x0004, 0x2284, + 0x0004, 0x7a25, 0x795b, 0xb991, + 0xb99c, 0xb99f, 0x9120, 0x42c1, + 0x2108, 0x00c3, 0x0ccd, 0x9085, + 0x4060, 0xd908, 0xb961, 0x0901, + 0x0031, 0x781c, 0x796f, 0x2179, + 0x0001, 0x6038, 0x7204, 0xc6c4, + 0x40c3, 0x8000, 0x07d8, 0x0419, 0xff2f, 0x8000, 0x40c3, 0x8000, - 0x077c, 0x42c3, 0x8000, 0x076d, + 0x07d8, 0x42c3, 0x8000, 0x07c9, 0x8020, 0x8a00, 0x205f, 0x0c80, - 0x6119, 0x8a01, 0x205f, 0x0640, - 0x6038, 0x8842, 0x700c, 0x005d, - 0xffef, 0x702c, 0xc0e2, 0x42c3, - 0x8000, 0x076f, 0x8a60, 0xf004, - 0xb500, 0x7164, 0x8a01, 0x0849, - 0x00e2, 0x248a, 0x1fcf, 0x231a, - 0x0f80, 0x0000, 0x0800, 0x2004, - 0x0f81, 0x0000, 0x0800, 0x231a, - 0x0f80, 0x0000, 0x1000, 0x2004, - 0x0f80, 0x6ffd, 0xe000, 0x7825, - 0x2005, 0x0f8d, 0x9002, 0x0064, - 0x9520, 0x4928, 0x7c04, 0x0cc5, - 0x9832, 0xb970, 0x7830, 0x2080, - 0x0010, 0xf1dc, 0xc4c2, 0x78e0, - 0xc0e4, 0x4408, 0x249a, 0x1008, - 0x45cb, 0x900e, 0x0048, 0x2405, - 0x134e, 0xb620, 0x6d22, 0x2405, - 0x104d, 0x7224, 0xb540, 0x2405, + 0x6038, 0x8a21, 0x215f, 0x0641, + 0x6038, 0x8842, 0x700c, 0x0075, + 0xffef, 0x702c, 0x40c3, 0x8000, + 0x07cb, 0x8881, 0x8860, 0xf022, + 0x231a, 0x0f80, 0x0000, 0x0800, + 0x2004, 0x0f81, 0x0000, 0x0800, + 0x231a, 0x0f80, 0x0000, 0x1000, + 0x2004, 0x0f80, 0x6ffd, 0xe000, + 0x7825, 0x2005, 0x0f82, 0x9002, + 0x0064, 0x9220, 0x4928, 0x208b, + 0x880f, 0xf206, 0x2142, 0x0400, + 0x2080, 0x0010, 0xb200, 0x7164, + 0x0cc1, 0x90c5, 0x7ee0, 0x78e0, + 0xc0e4, 0x4508, 0x259a, 0x1008, + 0x44cb, 0x900e, 0x0048, 0x2505, + 0x130e, 0xb620, 0x6c22, 0x2505, + 0x104c, 0x7224, 0xb440, 0x2505, 0x1042, 0xb260, 0x6962, 0xc102, - 0x7c65, 0x080f, 0x03f1, 0xb420, + 0x7d65, 0x080f, 0x03f1, 0xb520, 0x208a, 0x0fc7, 0xf005, 0x137c, 0x0101, 0x210f, 0x0000, 0x1b7c, 0x0004, 0xc4c4, 0x44cb, 0x900e, 0x00c2, 0x1c02, 0x1014, 0xb420, 0xb441, 0x7fe0, 0xb462, 0x78e0, - 0xc0f1, 0x4328, 0x4100, 0x40c3, - 0x8000, 0x0774, 0x8800, 0x0ee6, - 0x0060, 0x203c, 0x02c0, 0x7014, - 0x710c, 0x708d, 0xf423, 0xf003, - 0x7185, 0x0c41, 0x10b3, 0xda25, - 0xba9f, 0x7095, 0x22c0, 0x0ce2, - 0x8a40, 0x0af1, 0x806e, 0x2114, - 0x0302, 0x2232, 0x0f9f, 0x8000, - 0x0728, 0x704c, 0x0add, 0x87c3, - 0x245a, 0x1403, 0x7b36, 0x635b, - 0x2332, 0x0f83, 0x8000, 0x074c, - 0x0bed, 0x82e1, 0x7144, 0xf003, - 0x700c, 0xc0d1, 0x7ee0, 0x78e0, - 0x4100, 0xb863, 0x7314, 0x710c, - 0x20e0, 0x07c5, 0x7fe0, 0x2178, - 0x0180, 0x78e0, 0x219a, 0x0010, - 0x781b, 0x2005, 0x0f82, 0x9003, - 0xe064, 0x2145, 0x07c0, 0x079d, - 0x00e0, 0xb200, 0xc0e2, 0x706c, - 0x704c, 0xf004, 0xb160, 0x7144, - 0x7950, 0x09fd, 0x8674, 0x2014, - 0x0041, 0xda32, 0xba9f, 0x1201, - 0x0481, 0x708d, 0xa821, 0x1201, - 0x0481, 0xa822, 0x1201, 0x0481, - 0xa823, 0x1201, 0x0481, 0xa824, - 0x1201, 0x0481, 0xa82b, 0x1201, - 0x0481, 0xa82c, 0x1201, 0x0481, - 0xa82d, 0x1201, 0x0481, 0xa82e, - 0x1201, 0x0481, 0xa830, 0x8a20, - 0xa831, 0x8a21, 0xa836, 0x8a22, - 0xa838, 0x12dd, 0x8081, 0x7034, - 0xf48e, 0xda3e, 0xba9f, 0x1201, - 0x0481, 0xa83a, 0x1201, 0x0481, - 0xa83b, 0x1201, 0x0481, 0xa83c, - 0x1201, 0x0481, 0xa83d, 0x1201, - 0x0481, 0x1824, 0x0042, 0x1201, - 0x0481, 0x1825, 0x0042, 0x1201, - 0x0481, 0x1826, 0x0042, 0x1201, - 0x0481, 0x1827, 0x0042, 0x1201, - 0x0481, 0x1829, 0x0042, 0x1201, - 0x0481, 0x182a, 0x0042, 0x1201, - 0x0481, 0x182f, 0x0042, 0x121c, - 0x0481, 0x1831, 0x0042, 0x1201, - 0x0481, 0x1833, 0x0042, 0x1201, - 0x0481, 0x1834, 0x0042, 0x1201, - 0x0481, 0x1835, 0x0042, 0x1201, - 0x0481, 0x1836, 0x0042, 0x1201, - 0x0481, 0x183d, 0x0042, 0x1201, - 0x0481, 0x183e, 0x0042, 0x1201, - 0x0481, 0x183f, 0x0042, 0x1201, - 0x0481, 0x1840, 0x0042, 0x1201, - 0x0481, 0x1842, 0x0042, 0x1201, - 0x0481, 0x1843, 0x0042, 0x1201, - 0x0481, 0x1848, 0x0042, 0x1201, - 0x0481, 0x184a, 0x0042, 0x1201, - 0x0481, 0x184c, 0x0042, 0x1201, - 0x0481, 0x184d, 0x0042, 0x1201, - 0x0481, 0x184e, 0x0042, 0x1201, - 0x0481, 0x184f, 0x0042, 0x1201, - 0x0481, 0x1856, 0x0042, 0x1201, - 0x0481, 0x1857, 0x0042, 0x1201, - 0x0481, 0x1858, 0x0042, 0x1201, - 0x0481, 0x1859, 0x0042, 0x1201, - 0x0481, 0x185b, 0x0042, 0x8a20, - 0x185c, 0x0042, 0x8a21, 0x1861, - 0x0042, 0x8a22, 0x1863, 0x0042, - 0xf031, 0x7185, 0x0c59, 0x10b2, - 0x706c, 0xf02d, 0x245a, 0x1c81, - 0x235a, 0x064d, 0x7164, 0x6119, - 0x653d, 0x12f7, 0x8081, 0xad21, - 0x12f8, 0x8081, 0xad22, 0x12f9, - 0x8081, 0xad23, 0x12fa, 0x8081, - 0xad24, 0x12fb, 0x8081, 0xad2b, - 0x12fc, 0x8081, 0xad2c, 0x12fd, - 0x8081, 0xad2d, 0x12fe, 0x8081, - 0xad2e, 0x12ff, 0x8081, 0xad30, - 0x8a20, 0xad31, 0x8a21, 0xad36, - 0x8a22, 0xad38, 0x0bb1, 0x8092, - 0xf1d1, 0xc4c2, 0xc2e4, 0x40c3, - 0x8000, 0x0658, 0x09be, 0x02a0, - 0xd92a, 0xde25, 0xbe9f, 0x8e00, - 0x45cb, 0x9004, 0x00e4, 0xb500, - 0x1633, 0x1080, 0xb501, 0x16f4, - 0x9080, 0xe888, 0x1e00, 0x7005, - 0x9003, 0xe154, 0x1d42, 0x905d, - 0x0be2, 0xffcf, 0x791d, 0x793d, - 0x793d, 0x793d, 0xb9c3, 0x1a02, - 0x3042, 0x4100, 0xda08, 0xba61, - 0x0aff, 0x8031, 0x793d, 0xb9c1, - 0xb8c3, 0x1a06, 0x3042, 0x0eca, - 0x01e0, 0x1a01, 0x3002, 0x09c6, - 0x0000, 0x098e, 0x01c0, 0x154c, - 0x9100, 0x080f, 0x0151, 0x8e00, - 0xb500, 0x8e00, 0xb501, 0xc6c4, - 0xc2e2, 0x4508, 0x40c3, 0x8000, - 0x0450, 0x0942, 0x02a0, 0xd972, + 0xc2e4, 0x4320, 0x4100, 0x40c3, + 0x8000, 0x07d0, 0x8800, 0x0f1a, + 0x0060, 0x203c, 0x00c0, 0x7014, + 0x710c, 0xf427, 0xde25, 0xbe9f, + 0x708d, 0xf01f, 0xec04, 0x1633, + 0x1082, 0xf003, 0x8e40, 0x0a31, + 0x006e, 0x2114, 0x0302, 0x2232, + 0x0f8b, 0x8000, 0x0784, 0x704c, + 0x0a1d, 0x02c5, 0x245a, 0x140d, + 0x7d36, 0x655d, 0x2532, 0x1f8d, + 0x8000, 0x07a8, 0x0ded, 0x90e1, + 0x7144, 0xf007, 0x7185, 0x0cc9, + 0x9094, 0x700c, 0xc6c4, 0xc6c4, + 0x1e00, 0x77c5, 0x9003, 0xfe64, + 0x0781, 0x00c0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1a3, 0x704c, 0xf005, + 0x1900, 0x0005, 0x7144, 0x0afd, + 0x8674, 0x2014, 0x0081, 0xdb32, + 0xbb9f, 0x8b20, 0xa821, 0x6b21, + 0xc140, 0x8920, 0xa822, 0xc100, + 0x7124, 0xc141, 0x8920, 0xa823, + 0xc101, 0x7124, 0xc142, 0x8920, + 0xa824, 0xc102, 0x6941, 0x8a20, + 0x2240, 0x005f, 0x2740, 0x304c, + 0xa82b, 0x1700, 0x3081, 0x6ca1, + 0x6dc1, 0xa82c, 0x8c20, 0x6ee1, + 0x4358, 0xa82d, 0x8d20, 0xa82e, + 0x8e20, 0xa830, 0x8f20, 0xa831, + 0x8f21, 0xa836, 0x8f22, 0xa838, + 0x13e6, 0x8081, 0xe930, 0x264a, + 0x3000, 0xf02a, 0x265a, 0x3c82, + 0x235a, 0x1641, 0x7165, 0x621a, + 0x6159, 0x8b40, 0xa941, 0xc200, + 0x8a40, 0xa942, 0xc201, 0x8a40, + 0xa943, 0xc202, 0x8a40, 0xa944, + 0x1300, 0x3082, 0xa94b, 0x1700, + 0x3082, 0xa94c, 0x8c40, 0xa94d, + 0x8d40, 0xa94e, 0x8e40, 0xa950, + 0x8f40, 0xa951, 0x8f41, 0xa956, + 0x8f42, 0xa958, 0x0bb9, 0x9094, + 0x2640, 0x305e, 0x0ef9, 0xb0b4, + 0x706d, 0xf08d, 0xda3e, 0xba9f, + 0x1201, 0x0481, 0xa83a, 0x1201, + 0x0481, 0xa83b, 0x1201, 0x0481, + 0xa83c, 0x1201, 0x0481, 0xa83d, + 0x1201, 0x0481, 0x1824, 0x0042, + 0x1201, 0x0481, 0x1825, 0x0042, + 0x1201, 0x0481, 0x1826, 0x0042, + 0x1201, 0x0481, 0x1827, 0x0042, + 0x1201, 0x0481, 0x1829, 0x0042, + 0x1201, 0x0481, 0x182a, 0x0042, + 0x1201, 0x0481, 0x182f, 0x0042, + 0x121c, 0x0481, 0x1831, 0x0042, + 0x1201, 0x0481, 0x1833, 0x0042, + 0x1201, 0x0481, 0x1834, 0x0042, + 0x1201, 0x0481, 0x1835, 0x0042, + 0x1201, 0x0481, 0x1836, 0x0042, + 0x1201, 0x0481, 0x183d, 0x0042, + 0x1201, 0x0481, 0x183e, 0x0042, + 0x1201, 0x0481, 0x183f, 0x0042, + 0x1201, 0x0481, 0x1840, 0x0042, + 0x1201, 0x0481, 0x1842, 0x0042, + 0x1201, 0x0481, 0x1843, 0x0042, + 0x1201, 0x0481, 0x1848, 0x0042, + 0x1201, 0x0481, 0x184a, 0x0042, + 0x1201, 0x0481, 0x184c, 0x0042, + 0x1201, 0x0481, 0x184d, 0x0042, + 0x1201, 0x0481, 0x184e, 0x0042, + 0x1201, 0x0481, 0x184f, 0x0042, + 0x1201, 0x0481, 0x1856, 0x0042, + 0x1201, 0x0481, 0x1857, 0x0042, + 0x1201, 0x0481, 0x1858, 0x0042, + 0x1201, 0x0481, 0x1859, 0x0042, + 0x1201, 0x0481, 0x185b, 0x0042, + 0x8a20, 0x185c, 0x0042, 0x8a21, + 0x1861, 0x0042, 0x8a22, 0x1863, + 0x0042, 0xc0a3, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0xc3e4, 0xc1ab, + 0xc080, 0x41c3, 0x8000, 0x0650, + 0x087a, 0xfeef, 0xda2a, 0xc080, + 0x0e5a, 0x0260, 0xd92a, 0xdd25, + 0xbd9f, 0x8d00, 0x46cb, 0x9004, + 0x00e4, 0xb600, 0x1533, 0x1080, + 0xb601, 0x15f4, 0x9080, 0xe888, + 0x1e00, 0x7005, 0x9003, 0xe154, + 0x1e42, 0x905d, 0x0be6, 0xffcf, + 0x2053, 0x00c1, 0x1a01, 0x3042, + 0x791d, 0x793d, 0x793d, 0x793d, + 0xb9c3, 0x1a02, 0x3042, 0xd908, + 0xb961, 0x0901, 0x0031, 0x781d, + 0xb8c1, 0x0af2, 0x01e0, 0x1a06, + 0x3002, 0x09e2, 0x0000, 0x0dba, + 0x0180, 0x164c, 0x9100, 0x080d, + 0x0151, 0x8d00, 0xb600, 0xb601, + 0xc7c4, 0x78e0, 0xc3e2, 0xc1bd, + 0x4508, 0xc080, 0x41c3, 0x8000, + 0x04ad, 0x0ff2, 0xfeaf, 0xda72, + 0xc080, 0x0dd2, 0x0260, 0xd972, 0xed07, 0x208a, 0x0c07, 0x1e00, - 0x7004, 0x9003, 0xe8c4, 0xc6c2, + 0x7004, 0x9003, 0xe8c4, 0xc7c2, 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a7, - 0x0c0e, 0xffef, 0x4708, 0x0b86, - 0xffef, 0xc044, 0x0bf6, 0xffef, - 0xc045, 0xc046, 0x40c3, 0x0000, - 0x2710, 0x0c02, 0x00a0, 0x742c, - 0x4608, 0x700c, 0x0c92, 0xff2f, + 0x0c06, 0xffef, 0x4708, 0x0b7e, + 0xffef, 0xc046, 0x0bee, 0xffef, + 0xc044, 0xc045, 0x40c3, 0x0000, + 0x2710, 0x0bb6, 0x00a0, 0x742c, + 0x4608, 0x700c, 0x093e, 0xff2f, 0xd91f, 0xca00, 0x702c, 0x70ad, 0x7604, 0x7b0f, 0xd8ff, 0xb98e, 0x754c, 0xc543, 0xc742, 0xc541, - 0x0d66, 0xff2f, 0xc540, 0x7bcf, - 0xd8ff, 0x702c, 0x704c, 0xc543, - 0xc742, 0xc541, 0x0d52, 0xff2f, - 0xc540, 0x71cd, 0xd8ff, 0x702c, - 0x704c, 0xdb7c, 0xc543, 0xc742, - 0xc641, 0x0d3e, 0xff2f, 0xc540, - 0x0f86, 0xff2f, 0xd8ff, 0x41c3, - 0x0001, 0x0000, 0xd8ff, 0x724c, + 0x09fa, 0xff2f, 0xc540, 0x0c42, + 0xff2f, 0xd8ff, 0x7bcf, 0xd8ff, + 0x702c, 0x704c, 0xc543, 0xc742, + 0xc541, 0x09e2, 0xff2f, 0xc540, + 0x71cd, 0xd8ff, 0x702c, 0x704c, + 0xdb7c, 0xc543, 0xc742, 0xc641, + 0x09ca, 0xff2f, 0xc540, 0xd8ff, + 0x41c3, 0x0001, 0x0000, 0x724c, 0xdb08, 0xc643, 0xc742, 0xc641, - 0xc540, 0x0d1e, 0xff2f, 0x4338, - 0xf839, 0xc543, 0xc742, 0xc541, - 0x0d0e, 0xff2f, 0xc540, 0xf836, - 0xc543, 0xc742, 0xc641, 0x0d02, + 0xc540, 0x09b2, 0xff2f, 0x4338, + 0xf83d, 0xc543, 0xc742, 0xc541, + 0x09a2, 0xff2f, 0xc540, 0xf83a, + 0xc543, 0xc742, 0xc641, 0x0996, 0xff2f, 0xc540, 0xd8ff, 0x4163, 0x724c, 0xdb08, 0xc643, 0xc742, - 0xc641, 0x0cee, 0xff2f, 0xc540, - 0xc004, 0x754c, 0xc543, 0x2042, - 0x0800, 0x2044, 0x0041, 0xc004, - 0xc542, 0xc541, 0x6038, 0xe020, - 0x7b0f, 0xd8ff, 0x702c, 0x0cca, - 0xff2f, 0xc540, 0xd8ff, 0x218a, - 0x0010, 0x714c, 0xdb08, 0xc543, - 0xc742, 0xc541, 0x0cb2, 0xff2f, - 0xc540, 0xc106, 0xc005, 0x714c, - 0xc543, 0x7914, 0x2142, 0x0700, - 0xb8c0, 0x6038, 0x2054, 0x0c80, - 0x7b0f, 0xd8ff, 0x218a, 0x0010, - 0xc742, 0xc641, 0x0c8a, 0xff2f, - 0xc540, 0x700c, 0xb88e, 0x791b, - 0xd8ff, 0x754c, 0x726c, 0x1c0c, - 0x30c1, 0xc542, 0xc541, 0x0c72, - 0xff2f, 0xc540, 0xd8ff, 0x702c, - 0x754c, 0xdb40, 0xc543, 0xc542, - 0xc541, 0x0c5e, 0xff2f, 0xc540, - 0x0e8a, 0xff2f, 0xd8ff, 0xbfc1, - 0xd8ff, 0xd980, 0x764c, 0x746c, - 0xc543, 0xc742, 0xc541, 0x0c42, - 0xff2f, 0xc640, 0xc0a7, 0x1404, - 0x341b, 0xc6c6, 0xd8ff, 0x218a, + 0xc641, 0x0982, 0xff2f, 0xc540, + 0xc006, 0x754c, 0xc543, 0x2044, + 0x0041, 0x6038, 0xe020, 0x7b0f, + 0xd8ff, 0x702c, 0xc542, 0xc541, + 0x0962, 0xff2f, 0xc540, 0xd8ff, + 0x218a, 0x0010, 0x714c, 0xdb08, + 0xc543, 0xc742, 0xc541, 0x094e, + 0xff2f, 0xc540, 0xc105, 0xc004, + 0x714c, 0xc543, 0x7914, 0x2154, + 0x0c80, 0xb8c0, 0x6038, 0x2054, + 0x0c80, 0x7b0f, 0xd8ff, 0x218a, + 0x0010, 0xc742, 0xc641, 0x0926, + 0xff2f, 0xc540, 0x700c, 0xb88e, + 0x791b, 0xd8ff, 0x754c, 0x726c, + 0x1c0c, 0x30c1, 0xc542, 0xc541, + 0x090a, 0xff2f, 0xc540, 0xd8ff, + 0x702c, 0x754c, 0xdb40, 0xc543, + 0xc542, 0xc541, 0x08f6, 0xff2f, + 0xc540, 0xbfc1, 0xd8ff, 0x702c, + 0x764c, 0xdb2e, 0xc543, 0xc742, + 0xc541, 0x08e2, 0xff2f, 0xc640, + 0x0b0a, 0xff2f, 0xd8ff, 0xd8ff, + 0xd980, 0x754c, 0x746c, 0xc543, + 0xc542, 0xc541, 0x08c6, 0xff2f, + 0xc540, 0xc0a7, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0xd8ff, 0x218a, 0x0010, 0x724c, 0xdb08, 0x7ee0, 0xc2e6, 0x1cfc, 0xb6c8, 0x1600, 0x7101, 0x9004, 0x01e0, 0x1600, 0x7082, 0x8000, 0x0004, 0x225f, 0x0100, 0x2044, 0x07c3, 0x4020, 0xeb05, 0xbb61, 0x0bff, 0x8031, - 0x781d, 0xb8c3, 0x080b, 0x0050, - 0x0877, 0x01b1, 0x71ed, 0x46cb, + 0x781d, 0xb8c3, 0x080b, 0x0190, + 0x0877, 0x0071, 0xdf18, 0x46cb, 0x9004, 0x0116, 0x1e08, 0x1094, - 0x71ad, 0x1e02, 0x1354, 0xd829, + 0x71ad, 0x1e02, 0x1055, 0xd829, 0xb6a0, 0x1e6c, 0x1205, 0x1e6c, 0x1004, 0x702c, 0x40c3, 0x0098, - 0x9680, 0x1ef2, 0x9044, 0x4728, - 0x0a1a, 0x00a0, 0x1e66, 0x1044, + 0x9680, 0x4328, 0x1ef2, 0x9044, + 0x09be, 0x00a0, 0x1e66, 0x1044, 0xd8a9, 0x1e6c, 0x1004, 0x208a, 0x0a46, 0x1e6c, 0x1004, 0x208a, 0x0a44, 0x1e6c, 0x1004, 0x1e6c, 0x1004, 0x40c3, 0x0000, 0x6000, 0x1e72, 0x935c, 0x1ed0, 0x9344, - 0x1e72, 0x90dd, 0x098e, 0x00a0, - 0x1ed0, 0x93c4, 0x1e72, 0x935c, - 0x1e72, 0x93dc, 0xf048, 0x45cb, + 0x1e72, 0x90dd, 0x0932, 0x00a0, + 0x1ed0, 0x92c4, 0x1e72, 0x935c, + 0x1e72, 0x92dc, 0xf046, 0x45cb, 0x9004, 0x0116, 0x1d08, 0x1094, - 0x1d02, 0x13d4, 0x702c, 0x40c3, - 0x000f, 0x4240, 0x234a, 0x1200, - 0x43db, 0x0000, 0x0818, 0xb5e0, - 0x1d6c, 0x12c4, 0x1df2, 0x9044, - 0x1d66, 0x1044, 0x4628, 0x09a6, - 0x00a0, 0x1d6c, 0x16c4, 0x40c3, - 0x000f, 0x4240, 0x2342, 0x341b, - 0x781d, 0x702c, 0x098e, 0x00a0, - 0x1d6c, 0x16c4, 0x40c3, 0x0098, - 0x9680, 0x702c, 0x097e, 0x00a0, - 0x1d6c, 0x12c4, 0xd888, 0x1d6c, - 0x1004, 0x40c3, 0x0000, 0x6000, - 0x1d6c, 0x12c4, 0x1d6c, 0x1384, - 0x1d72, 0x93dc, 0x1dd0, 0x93c4, - 0x1d72, 0x90dd, 0x08fe, 0x00a0, - 0x1dd0, 0x9384, 0x1d72, 0x93dc, - 0x1d72, 0x939c, 0x700c, 0x0946, - 0x00a0, 0xd920, 0x45cb, 0x9004, - 0x01f0, 0xd820, 0x08de, 0x00a0, - 0x1d00, 0x1005, 0xd820, 0x08d6, - 0x00a0, 0x1d00, 0x1045, 0x740c, - 0x08ca, 0x00a0, 0x1d00, 0x1005, - 0x1d0a, 0x909d, 0x1404, 0x341b, - 0xc6c6, 0x78e0, 0xe0e4, 0xd964, - 0x20ca, 0x0045, 0x208c, 0x8906, - 0x218a, 0x0906, 0x20ca, 0x004d, + 0x71cd, 0x1d02, 0x1055, 0x234a, + 0x1200, 0x702c, 0xbf8b, 0x40c3, + 0x000f, 0x4240, 0xb5c0, 0x1d6c, + 0x12c4, 0x260a, 0x3040, 0x1df2, + 0x9044, 0x1d66, 0x1044, 0x1d6c, + 0x13c4, 0x0946, 0x00a0, 0x4318, + 0xbf70, 0x202f, 0x06c2, 0x702c, + 0x0936, 0x00a0, 0x1d6c, 0x13c4, + 0x40c3, 0x0098, 0x9680, 0x702c, + 0x0926, 0x00a0, 0x1d6c, 0x12c4, + 0xd888, 0x1d6c, 0x1004, 0x40c3, + 0x0000, 0x6000, 0x1d6c, 0x12c4, + 0x1d6c, 0x1784, 0x1d72, 0x939c, + 0x1dd0, 0x9384, 0x1d72, 0x90dd, + 0x08a6, 0x00a0, 0x1dd0, 0x9784, + 0x1d72, 0x939c, 0x1d72, 0x979c, + 0x700c, 0x08ee, 0x00a0, 0xd920, + 0x45cb, 0x9004, 0x01f0, 0xd820, + 0x0886, 0x00a0, 0x1d00, 0x1005, + 0xd820, 0x087e, 0x00a0, 0x1d00, + 0x1045, 0x740c, 0x0872, 0x00a0, + 0x1d00, 0x1005, 0x1d0a, 0x909d, + 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0x2088, 0x0901, 0x2089, 0x0906, 0x2080, 0x078e, 0x781d, 0x781d, 0x0009, 0x0020, 0x780f, 0x78e0, - 0x080b, 0x08d4, 0x2080, 0x0883, - 0xb886, 0x7fe0, 0x780f, 0x78e0, + 0x080b, 0x08f4, 0x4100, 0xb97e, + 0xb986, 0x7fe0, 0x782f, 0x78e0, 0xc3e6, 0xc1aa, 0x70cd, 0x40c3, - 0x8000, 0x076d, 0xa8cd, 0x40c3, - 0x8000, 0x076d, 0xa8cc, 0x40c3, - 0x8000, 0x076d, 0xa8cb, 0x40c3, - 0x8000, 0x076d, 0xa8ca, 0x41c3, - 0x0089, 0x0000, 0x0e5e, 0xff2f, - 0xd80a, 0xf003, 0x71c5, 0x71d5, - 0x0226, 0x0029, 0x712c, 0x4020, - 0x2644, 0x17c1, 0xe905, 0xb961, - 0x09ff, 0x8031, 0x781b, 0x1be8, - 0xb000, 0x700c, 0xf002, 0x7104, - 0x7a0f, 0x1e00, 0x7002, 0x8000, - 0x076d, 0x0ad5, 0x80b5, 0x78cf, - 0x1bf0, 0xb000, 0x13f0, 0xb001, - 0x0c6a, 0xffaf, 0x4040, 0x0a1b, - 0x0071, 0x4508, 0x1600, 0x7081, - 0x8000, 0x0058, 0x13e8, 0xb000, - 0x790b, 0x710c, 0xf00a, 0xea8b, - 0x1600, 0x7081, 0x8000, 0x0025, - 0x13e8, 0xb000, 0x790b, 0x700c, - 0xf3db, 0x71ed, 0x40e1, 0xbac4, + 0x8000, 0x07c9, 0xa8cd, 0x40c3, + 0x8000, 0x07c9, 0xa8cc, 0x40c3, + 0x8000, 0x07c9, 0xa8cb, 0x40c3, + 0x8000, 0x07c9, 0xa8ca, 0x41c3, + 0x0089, 0x0000, 0x0da2, 0xff2f, + 0xd80a, 0x0249, 0x0000, 0x2644, + 0x17c1, 0xe906, 0xb961, 0x0901, + 0x0031, 0x781b, 0x1be4, 0xb000, + 0x021f, 0x0020, 0x702c, 0x1600, + 0x7080, 0x8000, 0x001b, 0x081f, + 0x005e, 0xea07, 0x1600, 0x7080, + 0x8000, 0x007d, 0xf005, 0x1600, + 0x7080, 0x8000, 0x004a, 0x7610, + 0xf4fa, 0x78cf, 0x1bf0, 0xb000, + 0x13f0, 0xb001, 0x0c5a, 0xffaf, + 0x4040, 0x4508, 0xea0b, 0x1600, + 0x7081, 0x8000, 0x0058, 0x13e4, + 0xb000, 0x782b, 0x712c, 0xf40c, + 0xf0e6, 0x1600, 0x7081, 0x8000, + 0x0025, 0x13e4, 0xb000, 0x782b, + 0x702c, 0xf2de, 0x710c, 0xbac4, 0xea05, 0xba61, 0x0aff, 0x8031, - 0x781b, 0x0a36, 0xff6f, 0x780f, - 0x13e8, 0xb000, 0x41a1, 0x1e00, - 0x73c2, 0x8000, 0x0684, 0x780f, - 0x0aee, 0xff6f, 0x1bec, 0xb000, - 0x700c, 0x1be0, 0xb000, 0x1e00, - 0x7002, 0x8000, 0x0684, 0xd854, - 0x1bfc, 0xb700, 0x2402, 0x301c, - 0xc082, 0x0f2a, 0xfeaf, 0x1be4, - 0xb000, 0x13e4, 0xb001, 0x700c, - 0xf004, 0x1901, 0x0013, 0x7104, - 0xe0d0, 0xf6fd, 0x70ad, 0xf003, - 0x71a5, 0xe5d0, 0x0086, 0x0029, - 0x776d, 0x0ee2, 0xffef, 0x78af, - 0x1bf4, 0xb000, 0x1600, 0x7081, - 0x8000, 0x076d, 0x13ec, 0xb000, - 0x13f4, 0xb002, 0x0e3a, 0x0020, - 0x1bf8, 0xb001, 0x40c3, 0x0003, - 0xd090, 0x0fa2, 0x0060, 0x702c, - 0x0fea, 0xfeef, 0x13ec, 0xb000, - 0x13f0, 0xb000, 0x2342, 0x3201, - 0x0fda, 0xfeaf, 0x724c, 0x13f8, - 0xb000, 0x13f4, 0xb002, 0x41c3, - 0x008a, 0x0004, 0x2004, 0x0f8f, - 0x0000, 0x3f00, 0x1600, 0x7080, - 0x8000, 0x076d, 0x1bf8, 0xb3c0, - 0xc741, 0xc040, 0x740c, 0x0d0e, - 0xff2f, 0x43c1, 0xef42, 0x13e4, - 0xb000, 0x60b8, 0x1800, 0x0043, - 0xf1bc, 0x77ad, 0x47a9, 0x41a1, - 0xf006, 0x13e0, 0xb000, 0x7104, - 0x1be0, 0xb000, 0x13e0, 0xb000, - 0xe0d0, 0x0046, 0x0029, 0x7734, - 0x13e4, 0xb000, 0x1001, 0x0482, - 0x1be4, 0xb000, 0xf213, 0x13e0, - 0xb000, 0xe0d0, 0xf204, 0x2279, - 0x0000, 0xe869, 0x13e0, 0xb000, - 0x4fb3, 0x7704, 0x4832, 0x7270, - 0x27ca, 0x1009, 0x25ca, 0x1049, - 0xf002, 0xea04, 0x772c, 0xf1db, - 0x13e0, 0xb001, 0xf1d7, 0x2139, - 0x02c2, 0x2538, 0x12c0, 0x784b, - 0xd850, 0x771c, 0x753c, 0x700c, - 0x77b5, 0xf205, 0x4fb0, 0x207f, + 0x781b, 0x0a2a, 0xff6f, 0x780f, + 0x13e4, 0xb000, 0x41a1, 0x1e00, + 0x7043, 0x8000, 0x06e0, 0x780f, + 0x0ae2, 0xff6f, 0x1be8, 0xb000, + 0x702c, 0xd854, 0x1be0, 0xb040, + 0x1e00, 0x7042, 0x8000, 0x06e0, + 0x1bf8, 0xb700, 0x2402, 0x301c, + 0xc082, 0x0bd2, 0xfeaf, 0x1bec, + 0xb000, 0xf005, 0x1801, 0x0013, + 0x7124, 0xe1d0, 0xf7fc, 0x70ed, + 0xf048, 0x0eea, 0xffef, 0x78ef, + 0x4508, 0x1600, 0x7081, 0x8000, + 0x07c9, 0x13e8, 0xb000, 0x42a1, + 0x0e66, 0x0020, 0x1bf4, 0xb001, + 0x0f0d, 0x18d1, 0x40c3, 0x0003, + 0xd090, 0xf005, 0x40c3, 0x0001, + 0x3880, 0x0f56, 0x0060, 0x702c, + 0x0c96, 0xfeef, 0x13e8, 0xb000, + 0x13f0, 0xb000, 0x2342, 0x3301, + 0x0c8e, 0xfeaf, 0x724c, 0x13f4, + 0xb000, 0x1600, 0x7081, 0x8000, + 0x07c9, 0x42a1, 0x2004, 0x0f80, + 0x0000, 0x3f00, 0x1bfc, 0xb000, + 0x1bf4, 0xb000, 0xc041, 0xc140, + 0x740c, 0x41c3, 0x008a, 0x0004, + 0x0c4e, 0xff2f, 0x43c1, 0x13fc, + 0xb000, 0xe807, 0x13ec, 0xb000, + 0x60f8, 0x1800, 0x0043, 0x71e5, + 0xe7d0, 0x0772, 0xffee, 0x774c, + 0x4548, 0x4748, 0x4140, 0xf023, + 0x13ec, 0xb000, 0x1001, 0x0483, + 0x1bec, 0xb000, 0xf211, 0x13e0, + 0xb000, 0xe0d0, 0xf202, 0xeb12, + 0x13e0, 0xb000, 0x4fb4, 0x7704, + 0x4833, 0x7391, 0x27ca, 0x1009, + 0x25ca, 0x1049, 0xf002, 0xeb04, + 0x772c, 0xf004, 0x13e0, 0xb001, + 0x13e0, 0xb000, 0x7104, 0x1be0, + 0xb000, 0x13e0, 0xb000, 0xe0d1, + 0x07b8, 0xffe5, 0x7734, 0x2139, + 0x0083, 0x2538, 0x1080, 0x786b, + 0xd850, 0x753c, 0x771c, 0x77b5, + 0x700c, 0xf205, 0x4fb0, 0x207f, 0x0100, 0x1600, 0x7083, 0x8000, - 0x076d, 0x41c3, 0x008c, 0x0002, - 0x0eae, 0xfeaf, 0x42c1, 0x67b8, - 0x2844, 0x0080, 0x0dce, 0xffef, - 0x780f, 0x1600, 0x7081, 0x8000, - 0x076d, 0x2614, 0x1041, 0x71c3, - 0x8000, 0x076d, 0xa90a, 0x13ec, - 0xb000, 0x1e00, 0x7043, 0x8000, - 0x0684, 0x0cc2, 0xff4f, 0x13fc, - 0xb01c, 0x1600, 0x7080, 0x8000, - 0x076d, 0x05ff, 0xffcf, 0x40c3, - 0x8000, 0x076d, 0xa82e, 0xc7c6, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1bc, - 0xd825, 0xb89f, 0x8820, 0x1c36, - 0x3042, 0x1033, 0x0080, 0x41c3, - 0x8000, 0x076c, 0x89a0, 0x1c37, - 0x3002, 0x8907, 0xc06d, 0x8909, - 0x1c35, 0x3002, 0x8908, 0x1c32, - 0x3002, 0x890a, 0xc192, 0x1c33, - 0x3002, 0x0e46, 0x0020, 0x710c, - 0xd80c, 0x2805, 0x0340, 0x702c, - 0xc145, 0x205a, 0x0100, 0x7404, - 0xc049, 0xf006, 0x7a34, 0x700c, - 0xb200, 0x7124, 0x09f9, 0x82b2, - 0xc297, 0x700c, 0x47cb, 0x8000, - 0x0684, 0xc044, 0xf004, 0xc004, - 0x7104, 0xc044, 0xc004, 0x080d, - 0x00b2, 0x706f, 0x041b, 0x0000, - 0x0bef, 0xb0b3, 0x712c, 0xc004, - 0x2044, 0x07c2, 0x4020, 0xc104, + 0x07c9, 0x41c3, 0x008c, 0x0002, + 0x0b5a, 0xfeaf, 0x42c1, 0x67b8, + 0x2844, 0x0080, 0x0dc6, 0xffef, + 0x780f, 0x1e00, 0x7043, 0x8000, + 0x06e0, 0x1600, 0x7081, 0x8000, + 0x07c9, 0x2614, 0x1041, 0x2100, + 0x0f81, 0x8000, 0x07c9, 0xa90a, + 0x0cbe, 0xff6f, 0x13e8, 0xb000, + 0x13f8, 0xb01c, 0x1600, 0x7081, + 0x8000, 0x07c9, 0x7124, 0x7a2f, + 0x7154, 0x1e00, 0x7042, 0x8000, + 0x07c9, 0x05de, 0xffce, 0x71c5, + 0x72d5, 0x05be, 0xffe5, 0x710c, + 0x40c3, 0x8000, 0x07c9, 0x180e, + 0x0043, 0xc7c6, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1bd, 0xd925, 0xb99f, + 0x8900, 0x1c3a, 0x3002, 0x1133, + 0x0080, 0x1c3b, 0x3002, 0x40c3, + 0x8000, 0x07c8, 0x8827, 0x88a0, + 0xc16e, 0x8829, 0x1c39, 0x3042, + 0x8828, 0x1c36, 0x3042, 0x882a, + 0x710c, 0x1c37, 0x3042, 0x0e46, + 0x0020, 0xc193, 0x702c, 0xc146, + 0xf005, 0x7a34, 0x700c, 0xb200, + 0x7124, 0x09fb, 0x82b4, 0xc298, + 0xd90c, 0x2905, 0x0341, 0x740c, + 0x7835, 0xc049, 0x040d, 0x0020, + 0x700c, 0x03ff, 0x0020, 0x706f, + 0xc005, 0x712c, 0x2044, 0x07c2, + 0x4020, 0xc105, 0xea05, 0xba61, + 0x0aff, 0x8031, 0x781b, 0xc047, + 0x202f, 0x06c7, 0x0dca, 0x0020, + 0x792f, 0x46cb, 0x8000, 0x06e0, + 0xc048, 0x8e00, 0x7014, 0xf2f6, + 0x1600, 0x7080, 0x8000, 0x0001, + 0xb8e3, 0xf4f0, 0x0a86, 0xff4f, + 0x70ed, 0x700c, 0xf07e, 0xc004, + 0x2079, 0x0080, 0x7704, 0xc04a, + 0xc004, 0xb882, 0x0815, 0x0131, + 0xd9ff, 0x40c3, 0x0000, 0x0a00, + 0x0aea, 0xfecf, 0x0baa, 0x0180, + 0x8e01, 0x71ad, 0x702c, 0xc043, + 0xc007, 0x704c, 0x726c, 0x780f, + 0xc04b, 0xc042, 0xc00a, 0xc541, + 0x780f, 0xc04c, 0x781d, 0x781d, + 0x2044, 0x0800, 0x2045, 0x0180, + 0xc040, 0x0b92, 0xfeef, 0xd8ff, + 0x8e01, 0xc742, 0xc541, 0xc043, + 0xc004, 0x702c, 0x704c, 0x726c, + 0x2032, 0x0f80, 0x8000, 0x0448, + 0xb8c5, 0xc040, 0x0b6e, 0xfeef, + 0xd8ff, 0x8e01, 0x702c, 0x704c, + 0xc043, 0xc00b, 0x726c, 0xc541, + 0xc042, 0xc00c, 0x781d, 0x2044, + 0x0800, 0x2045, 0x0580, 0xc040, + 0x0b4a, 0xfeef, 0xd8ff, 0x8e01, + 0x702c, 0x704c, 0xc043, 0xc00a, + 0x726c, 0xc742, 0xb8c5, 0xc040, + 0xd8ff, 0x0b32, 0xfeef, 0xc541, + 0x40c3, 0x0000, 0x2710, 0x0cba, + 0x0060, 0xd90a, 0x781d, 0x781d, + 0x7704, 0x7d0f, 0xf8dd, 0xc742, + 0xc741, 0xc740, 0x0b0e, 0xfeef, + 0xc543, 0xf8da, 0xc543, 0xc742, + 0xc741, 0x0b02, 0xfeef, 0xc740, + 0xc004, 0xb863, 0x0811, 0x0095, + 0x0a86, 0x0180, 0x700c, 0x0aae, + 0xfeef, 0x712c, 0xc004, 0x7104, + 0xc044, 0xc004, 0x7514, 0x0702, + 0xffe5, 0xd9ff, 0x40c3, 0x0000, + 0x0a00, 0x09fa, 0xfecf, 0x0aba, + 0x0180, 0x8e01, 0x702c, 0x71ed, + 0xc043, 0xd8ff, 0x704c, 0xdb18, + 0x4528, 0xc142, 0xc741, 0x0ab6, + 0xfeef, 0xc140, 0x0cfa, 0xfeef, + 0xd8ff, 0x8e01, 0x702c, 0x704c, + 0xc043, 0xc007, 0x726c, 0xc741, + 0x780f, 0xc044, 0xc042, 0xd820, + 0xc040, 0x0a92, 0xfeef, 0xd8ff, + 0x8e01, 0x702c, 0x704c, 0xc043, + 0xd8ff, 0x726c, 0xc542, 0xc741, + 0x0a7a, 0xfeef, 0x1c00, 0x30c1, + 0x8e01, 0x702c, 0x704c, 0xc043, + 0xc004, 0x726c, 0xc741, 0xc042, + 0xd8ff, 0x0a62, 0xfeef, 0x1c00, + 0x3481, 0x8e01, 0x702c, 0x704c, + 0xc043, 0xd8ff, 0x726c, 0xc542, + 0xc741, 0x0a4a, 0xfeef, 0xc540, + 0x8e01, 0x702c, 0x704c, 0xc043, + 0xd8ff, 0xdb08, 0xc542, 0xc741, + 0x0a32, 0xfeef, 0xc540, 0x0c5e, + 0xfeef, 0xd8ff, 0x8e01, 0x702c, + 0x704c, 0xc043, 0xd8ff, 0xdb18, + 0xc542, 0xc741, 0x0a16, 0xfeef, + 0xc540, 0x09a6, 0x0180, 0xf07f, + 0x700c, 0x0932, 0xfeef, 0xd9ff, + 0xc007, 0x70ad, 0x734c, 0x205a, + 0x0101, 0xdb22, 0xc543, 0xc540, + 0x7825, 0x7e0f, 0xd820, 0xc041, + 0xd8ff, 0x702c, 0x0bf6, 0xfeef, + 0xc642, 0xd828, 0xc041, 0xf88c, + 0xc543, 0xc642, 0x0be6, 0xfeef, + 0xc540, 0xf889, 0xc543, 0xc642, + 0x1c04, 0x33c1, 0x0bd6, 0xfeef, + 0xc040, 0xf885, 0xc543, 0xc642, + 0x1c04, 0x3501, 0x0bc6, 0xfeef, + 0xc540, 0xd8ff, 0xd980, 0x734c, + 0xdb22, 0xc543, 0xc642, 0x1c04, + 0x3081, 0x0bb2, 0xfeef, 0xc540, + 0x700c, 0x095a, 0xfeef, 0x712c, + 0x1600, 0x7080, 0x8000, 0x0004, + 0xdae0, 0x201a, 0x0f81, 0x0020, + 0x0000, 0x40c3, 0x9008, 0x0100, + 0x2105, 0x0003, 0x7204, 0x7825, + 0xb340, 0x1800, 0x0485, 0x700c, + 0x0892, 0xfeef, 0xd9ff, 0x0bb2, + 0xfeef, 0xd8ff, 0xc007, 0x702c, + 0xb990, 0x780f, 0xc042, 0xd8ff, + 0x714c, 0x746c, 0xc543, 0xc541, + 0x094a, 0xfeef, 0xc540, 0xd90c, + 0xd8ff, 0xb98d, 0x734c, 0x746c, + 0xc543, 0xc642, 0xc541, 0x0936, + 0xfeef, 0xc540, 0x0b5e, 0xfeef, + 0xd8ff, 0xd8ff, 0xd980, 0x754c, + 0x746c, 0xc543, 0xc542, 0xc541, + 0x091a, 0xfeef, 0xc540, 0xd850, + 0xc049, 0x218a, 0x0fc7, 0x40c3, + 0x9003, 0xe048, 0xb020, 0x1804, + 0x0005, 0x2440, 0x3e80, 0x2032, + 0x06c1, 0xc007, 0x782b, 0xf264, + 0xc009, 0x702c, 0x08ae, 0xfeef, + 0x7810, 0x700c, 0x0a5a, 0x0060, + 0xd91e, 0x2440, 0x3d80, 0x2032, + 0x06cb, 0xc08e, 0x2032, 0x06cd, + 0x41a1, 0xf00f, 0x211a, 0x0f80, + 0x0000, 0x2000, 0x7a34, 0x7124, + 0x2005, 0x0f80, 0x9002, 0x0054, + 0x9000, 0x780f, 0xb200, 0x0be9, + 0x9065, 0xc28f, 0x702c, 0x704c, + 0xf034, 0x23f4, 0x034c, 0xc008, + 0x7bb4, 0xbc80, 0xb380, 0xe886, + 0xc08f, 0x20f4, 0x0340, 0xe80f, + 0x235a, 0x340e, 0xc005, 0x7e16, + 0x6638, 0x2000, 0x0f80, 0x8000, + 0x07a8, 0xa8a0, 0x7124, 0xc008, + 0xe817, 0x235a, 0x340e, 0xc005, + 0x7e16, 0x6658, 0x2000, 0x0f80, + 0x8000, 0x0788, 0xa8a0, 0x7144, + 0xc008, 0xe88f, 0xc005, 0x2079, + 0x0000, 0x781b, 0x781b, 0x7404, + 0x7c05, 0xb380, 0xf005, 0xc08f, + 0x20f4, 0x0340, 0xe867, 0x71a5, + 0x0b9b, 0x9365, 0xc398, 0x40c3, + 0x0000, 0x09c4, 0x0fde, 0xfe8f, + 0x0f4e, 0xfe8f, 0x7167, 0x7277, + 0x0408, 0xffc5, 0xc005, 0x7104, + 0xc045, 0xc005, 0x7114, 0x03f6, + 0xffce, 0xf01a, 0xc006, 0x21f4, + 0x0001, 0xe913, 0x1600, 0x7080, + 0x8000, 0x0019, 0xe88d, 0xc006, + 0x2144, 0x0301, 0x201a, 0x0f80, + 0x0000, 0x2000, 0x2005, 0x0f80, + 0x9002, 0x0154, 0xb020, 0xc006, + 0x7104, 0xc046, 0xc006, 0x08d1, + 0x82b4, 0xc198, 0x40c3, 0x8000, + 0x06e0, 0x8800, 0xe808, 0x1600, + 0x7080, 0x8000, 0x0001, 0xb8e3, + 0x0884, 0xff41, 0x700c, 0x1e00, + 0x7005, 0x9003, 0xe048, 0x09ae, + 0x0020, 0xc193, 0xc0bd, 0x1404, + 0x341b, 0xc6c6, 0xd8ff, 0x702c, + 0x734c, 0xdb22, 0x7ee0, 0x78e0, + 0xd8ff, 0x702c, 0x704c, 0x726c, + 0x7ee0, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1a3, 0x4648, 0x4338, + 0x4708, 0x700c, 0xf031, 0x235f, + 0x3401, 0xc000, 0x7956, 0x6038, + 0x2032, 0x0f81, 0x8000, 0x07a8, + 0x40c1, 0x0fe6, 0x0160, 0x4328, + 0x4508, 0x231a, 0x1f80, 0x0000, + 0x2000, 0x2005, 0x0f80, 0x9002, + 0x0050, 0xc042, 0x254f, 0x1280, + 0xc041, 0xc101, 0xc002, 0xb020, + 0x40c3, 0x0000, 0x0bb8, 0x08ba, + 0x0060, 0x702c, 0x700c, 0xf007, + 0xc102, 0x7104, 0xb140, 0xc201, + 0xb140, 0x08f9, 0x80b4, 0x254f, + 0x1242, 0xc000, 0x7104, 0x2779, + 0x1042, 0xc040, 0x2214, 0x06c0, + 0x2032, 0x0f81, 0x8000, 0x0784, + 0xc000, 0x088f, 0x8044, 0xc0a3, + 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a2, + 0x4728, 0x4608, 0x4020, 0x732c, + 0xc341, 0x0aa6, 0x0160, 0x4358, + 0x710c, 0x4508, 0x2644, 0x17c0, + 0xe805, 0xb861, 0x08ff, 0x8031, + 0x7dbb, 0x0b15, 0x3031, 0x255a, + 0x110d, 0x40e1, 0x732c, 0x097e, + 0xfeef, 0xdabf, 0xc001, 0x250f, + 0x139b, 0xe887, 0x40e1, 0x732c, + 0x096a, 0xfeef, 0xda7f, 0x1600, + 0x7080, 0x8000, 0x07c9, 0x265f, + 0x164d, 0x704c, 0x205f, 0x0c80, + 0x238a, 0x0dff, 0x1c00, 0x3fc1, + 0x60f8, 0x60b9, 0x0cc2, 0x01e0, + 0x202f, 0x06c7, 0x084e, 0xff8f, + 0x0856, 0xffaf, 0x4300, 0x4100, + 0x0a9e, 0x0160, 0x4060, 0x1600, + 0x7080, 0x8000, 0x07c9, 0x205f, + 0x0c80, 0x60f8, 0x60b8, 0x1003, + 0x00c1, 0x783d, 0x781d, 0x781d, + 0x781d, 0x2044, 0x0100, 0x7034, + 0x20c5, 0x04ab, 0x1e00, 0x7004, + 0x900e, 0x0022, 0x40e1, 0x0df6, + 0x0120, 0x732c, 0xc0a2, 0x1404, + 0x341b, 0xc6c6, 0x7a1b, 0x1600, + 0x7080, 0x8000, 0x001c, 0xbac4, 0xea05, 0xba61, 0x0aff, 0x8031, - 0x781b, 0xc046, 0x202f, 0x06c7, - 0x0db2, 0x0020, 0x792f, 0xc048, - 0x8f00, 0xe808, 0x1600, 0x7080, - 0x8000, 0x0001, 0xb8e3, 0xf289, - 0x700c, 0x0e76, 0xfeef, 0xd9ff, - 0x70ad, 0xd820, 0xc543, 0xc041, - 0xc006, 0x734c, 0xdb22, 0x205a, - 0x0101, 0xc540, 0x7825, 0x7e0f, - 0xd8ff, 0x702c, 0x0956, 0xff2f, - 0xc642, 0xd828, 0xc543, 0xc642, - 0xc041, 0xd8ff, 0x702c, 0x734c, - 0xdb22, 0x0942, 0xff2f, 0xc540, - 0xd8ff, 0x702c, 0x734c, 0xdb22, - 0xc543, 0xc642, 0x1c04, 0x33c1, - 0x092a, 0xff2f, 0xc040, 0xd8ff, - 0x702c, 0x734c, 0xdb22, 0xc543, - 0xc642, 0x1c04, 0x3501, 0x0916, - 0xff2f, 0xc540, 0xd8ff, 0xd980, - 0x734c, 0xdb22, 0xc543, 0xc642, - 0x1c04, 0x3081, 0x08fe, 0xff2f, - 0xc540, 0x700c, 0x0e9e, 0xfeef, - 0x712c, 0x1600, 0x7080, 0x8000, - 0x0004, 0xdae0, 0x201a, 0x0f81, - 0x0020, 0x0000, 0x40c3, 0x9008, - 0x0100, 0x2105, 0x0003, 0x7204, - 0x7825, 0xb340, 0x1800, 0x0485, - 0x700c, 0x0dc6, 0xfeef, 0xd9ff, - 0x08fe, 0xff2f, 0xd8ff, 0xc006, - 0x702c, 0xc543, 0x780f, 0xc042, - 0xd8ff, 0xb990, 0x714c, 0x746c, - 0xc541, 0x0e96, 0xfeef, 0xc540, - 0xd90c, 0xd8ff, 0xb98d, 0x734c, - 0x746c, 0xc543, 0xc642, 0xc541, - 0x0e7e, 0xfeef, 0xc540, 0x08ae, - 0xff2f, 0xd8ff, 0xd8ff, 0xd980, - 0x754c, 0x746c, 0xc543, 0xc542, - 0xc541, 0x0e66, 0xfeef, 0xc540, - 0xd850, 0xc049, 0xf0f4, 0x099e, - 0xff4f, 0x70ad, 0x70cd, 0xf003, - 0x71c5, 0x74d5, 0x010c, 0x0029, - 0x2679, 0x1080, 0x7704, 0xc04a, - 0x2650, 0x90be, 0xf409, 0x40c3, - 0x0000, 0x0a00, 0x0d42, 0xfeef, - 0xd9ff, 0x0e92, 0x0180, 0x8f01, - 0x702c, 0x704c, 0xc043, 0xc006, - 0x726c, 0x780f, 0xc04b, 0xc042, - 0x710c, 0xc041, 0xc00a, 0x780f, - 0xc047, 0x781d, 0x781d, 0x2045, - 0x0180, 0x2044, 0x0980, 0xc040, - 0x0dfe, 0xfeef, 0xd8ff, 0x8f01, - 0x702c, 0x704c, 0xc043, 0x710c, - 0xc542, 0xc041, 0x2632, 0x1f80, - 0x8000, 0x0448, 0x726c, 0xb8c5, - 0xc040, 0x0dde, 0xfeef, 0xd8ff, - 0x8f01, 0x702c, 0x704c, 0xc043, - 0xc00b, 0x726c, 0xc042, 0x710c, - 0xc041, 0xc007, 0x781d, 0x2045, - 0x0580, 0x2044, 0x0d80, 0xc040, - 0x0db6, 0xfeef, 0xd8ff, 0x8f01, - 0x702c, 0x704c, 0xc043, 0x710c, - 0xc542, 0xc041, 0xc00a, 0x726c, - 0xb8c5, 0xc040, 0x0d9a, 0xfeef, - 0xd8ff, 0x40c3, 0x0000, 0x2710, - 0x0c02, 0x0060, 0xd90a, 0x781d, - 0x781d, 0x7704, 0x780f, 0xc542, - 0xc541, 0xc540, 0xc047, 0xc043, - 0xd8ff, 0x702c, 0x704c, 0x0d72, - 0xfeef, 0x726c, 0xc007, 0x702c, - 0x704c, 0xc043, 0xd8ff, 0x726c, - 0xc542, 0xc541, 0x0d5a, 0xfeef, - 0xc540, 0x6e0b, 0x7114, 0x0704, - 0xffcd, 0x0d5a, 0x0180, 0x700c, - 0x0d02, 0xfeef, 0x712c, 0xf17a, - 0x40c3, 0x0000, 0x0a00, 0x0c4a, - 0xfeef, 0xd9ff, 0x0d96, 0x0180, - 0x8f01, 0x702c, 0x71cd, 0xc043, - 0xd8ff, 0x704c, 0xdb18, 0xc142, - 0xc641, 0x4528, 0x0d1a, 0xfeef, - 0xc140, 0x0f66, 0xfeef, 0xd8ff, - 0x8f01, 0x702c, 0x704c, 0xc043, - 0xc006, 0x726c, 0x780f, 0xc042, - 0xc047, 0xd820, 0xc641, 0xc040, - 0x0cf6, 0xfeef, 0xd8ff, 0x8f01, - 0x702c, 0x704c, 0xc043, 0xd8ff, - 0x726c, 0xc542, 0xc641, 0x0ce2, - 0xfeef, 0x1c00, 0x30c1, 0x8f01, - 0x702c, 0x704c, 0xc043, 0xc007, - 0x726c, 0xc042, 0xd8ff, 0xc641, - 0x0cc6, 0xfeef, 0x1c00, 0x3481, - 0x8f01, 0x702c, 0x704c, 0xc043, - 0xd8ff, 0x726c, 0xc542, 0xc641, - 0x0cae, 0xfeef, 0xc540, 0x8f01, - 0x702c, 0x704c, 0xc043, 0xd8ff, - 0xdb08, 0xc542, 0xc641, 0x0c9a, - 0xfeef, 0xc540, 0x0ec6, 0xfeef, - 0xd8ff, 0x8f01, 0x702c, 0x704c, - 0xc043, 0xd8ff, 0xdb18, 0xc542, - 0xc641, 0x0c7e, 0xfeef, 0xc540, - 0x0c82, 0x0180, 0x218a, 0x0fc7, - 0x40c3, 0x9003, 0xe048, 0xb020, - 0x1804, 0x0005, 0x2440, 0x3d80, - 0x2032, 0x06c1, 0xc006, 0x790b, - 0xf261, 0xc009, 0x702c, 0x0c0e, - 0xfeef, 0x7810, 0x700c, 0x0a9e, - 0x0060, 0xd91e, 0xc18d, 0x2132, - 0x06c2, 0xf010, 0x221a, 0x0f80, - 0x0000, 0x2000, 0xc38e, 0x7b54, - 0x7144, 0x2005, 0x0f80, 0x9002, - 0x0054, 0x9000, 0x780f, 0xb300, - 0x2440, 0x3c80, 0x2032, 0x06cb, - 0x0bdd, 0x90a3, 0x706c, 0x2132, - 0x06cd, 0x708d, 0xf002, 0x71a5, - 0x0b65, 0x1362, 0xc197, 0x79b4, - 0x9140, 0xc008, 0xba80, 0xb140, - 0xe885, 0xc08e, 0x20f4, 0x0340, - 0xe811, 0x235a, 0x340e, 0xc004, - 0x7e16, 0x6678, 0x70c3, 0x8000, - 0x074c, 0xa8a0, 0xc008, 0x7164, - 0xe885, 0xc08e, 0x20f4, 0x0340, - 0xe8e3, 0x235a, 0x340e, 0xc004, - 0x7e16, 0x6698, 0x70c3, 0x8000, - 0x072c, 0xa8a0, 0xc008, 0x7185, - 0xe8d7, 0xc004, 0x2079, 0x0000, - 0x781b, 0x781b, 0x7404, 0x7a05, - 0xb140, 0xf1cf, 0x40c3, 0x0000, - 0x09c4, 0x0b42, 0xfecf, 0x0a9e, - 0xfecf, 0x03f9, 0xffef, 0x7167, - 0xc005, 0x7104, 0xc045, 0xc005, - 0x0833, 0x02b3, 0xc197, 0xc005, - 0x21f4, 0x0002, 0xea76, 0x1600, - 0x7080, 0x8000, 0x0019, 0xe8f2, - 0xc005, 0x201a, 0x0f80, 0x0000, - 0x2000, 0x2005, 0x0f81, 0x9002, - 0x0154, 0x2244, 0x0300, 0xb100, - 0xf1e4, 0x40c3, 0x8000, 0x0684, - 0x8800, 0xe809, 0x1600, 0x7080, - 0x8000, 0x0001, 0xb8e3, 0x0880, - 0xff41, 0x700c, 0x1e00, 0x7005, - 0x9003, 0xe048, 0x0992, 0x0020, - 0xc192, 0xc0bc, 0x1404, 0x341b, - 0xc6c6, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1a3, 0x4708, 0x4548, - 0x4338, 0x700c, 0xf003, 0xc000, - 0x7104, 0x2779, 0x1042, 0xc040, - 0x2214, 0x06c0, 0x2032, 0x0f81, - 0x8000, 0x0728, 0xc000, 0x0865, - 0x0043, 0x235f, 0x3401, 0xc000, - 0x7956, 0x6038, 0x2032, 0x0f81, - 0x8000, 0x074c, 0x40a1, 0x0bce, - 0x01a0, 0x4328, 0x231a, 0x1f8b, - 0x0000, 0x2000, 0x4608, 0x2305, - 0x1f80, 0x9002, 0x0050, 0xc042, - 0x264f, 0x1280, 0xc041, 0xc101, - 0xc002, 0xb020, 0x40c3, 0x0000, - 0x0bb8, 0x090a, 0x0060, 0x702c, - 0x700c, 0xf007, 0xc102, 0x7104, - 0xb140, 0xc201, 0xb140, 0x08f9, - 0x80b2, 0x264f, 0x1242, 0xf1c5, - 0xc0a3, 0x1404, 0x341b, 0xc6c6, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a1, - 0x4318, 0x4628, 0x4020, 0x732c, - 0x4768, 0x0e96, 0x0160, 0x4548, - 0x710c, 0x2344, 0x37ca, 0x0a11, - 0x1010, 0x2242, 0x104a, 0x0aff, - 0x9031, 0x781b, 0x205a, 0x010b, - 0xed86, 0x40c1, 0x732c, 0x0d2a, - 0xfeef, 0xdabf, 0x230f, 0x16cd, - 0xef86, 0x40c1, 0x732c, 0x0d1a, - 0xfeef, 0xda7f, 0x1600, 0x7080, - 0x8000, 0x076d, 0x235f, 0x364f, - 0x704c, 0x205f, 0x0c80, 0x238a, - 0x0dff, 0x1c00, 0x3fc1, 0x60d8, - 0x60f9, 0x0866, 0x0220, 0x78af, - 0x087a, 0xff8f, 0x0882, 0xffaf, - 0x4508, 0x4100, 0x0e92, 0x0160, - 0x40a1, 0x1600, 0x7080, 0x8000, - 0x076d, 0x205f, 0x0c80, 0x60d8, - 0x60f8, 0x1003, 0x00c1, 0xd840, - 0x7824, 0x781d, 0x781d, 0x781d, - 0x781d, 0x7034, 0x20c5, 0x04ab, - 0x1e00, 0x7004, 0x900e, 0x0022, - 0x40c1, 0x09f2, 0x0160, 0x732c, - 0x7487, 0x1404, 0x341b, 0xc6c6, - 0x7a1b, 0x1600, 0x7080, 0x8000, - 0x001c, 0xbac4, 0xea05, 0xba61, - 0x0aff, 0x8031, 0x781d, 0x780f, - 0x2144, 0x07c2, 0xea05, 0xba61, - 0x0aff, 0x8031, 0x781d, 0x7fe0, - 0xb8c0, 0x78e0, 0xc0e2, 0x1600, - 0x7082, 0x8000, 0x0004, 0x7014, - 0x221a, 0x0f83, 0x0020, 0x0000, - 0x2305, 0x0f82, 0x9002, 0x0086, - 0x9240, 0x2305, 0x0f8c, 0x9003, - 0xfe86, 0x706c, 0x22c4, 0x0f81, - 0x0000, 0xbfff, 0xf202, 0xba8e, - 0xb440, 0xf004, 0xb440, 0x7164, - 0x0b39, 0x0293, 0x231a, 0x0f82, - 0x0000, 0x1000, 0x2205, 0x0f82, - 0x0001, 0x004a, 0x7a5b, 0x2205, - 0x0f8c, 0x9000, 0x0000, 0xe80b, - 0x9440, 0x2114, 0x00cd, 0xb540, - 0x2204, 0x0f82, 0x0000, 0xfbff, - 0xf1e6, 0x21f4, 0x00c2, 0xf1e4, - 0xc4c2, 0x78e0, 0xc2e6, 0xdb24, - 0x708d, 0x41c3, 0x8000, 0x0773, - 0xbb9f, 0xa980, 0x8b00, 0x2340, - 0x0d0d, 0xe007, 0x7a1d, 0x7a5d, - 0x7a5d, 0xd8ff, 0x6058, 0xa901, - 0x15ff, 0x948e, 0xee0d, 0xca02, - 0x7a1d, 0xa942, 0x8d00, 0xe007, - 0x781d, 0x781d, 0x781d, 0x6058, - 0x2080, 0x0fc3, 0xf002, 0xa942, - 0xa903, 0xf003, 0x7185, 0x0c1f, - 0x10b2, 0x70cd, 0xf01c, 0x781d, - 0x2414, 0x1381, 0x781d, 0x781d, - 0x71c3, 0x8000, 0x0728, 0xa900, - 0x71c5, 0x0ee5, 0x90b3, 0x70d5, - 0x40a1, 0x20ca, 0x00c1, 0x88e0, - 0x78cf, 0x0eea, 0xffef, 0x798f, - 0xe803, 0x6f07, 0xf1e9, 0x4f1f, - 0x781d, 0xf1e7, 0xc6c6, 0x78e0, - 0xc2e6, 0x1cfc, 0xb6c8, 0x4318, - 0x710c, 0x4628, 0x4100, 0x2344, - 0x37c0, 0xe806, 0xb861, 0x0801, + 0x781d, 0x780f, 0x2144, 0x07c2, + 0xea05, 0xba61, 0x0aff, 0x8031, + 0x781d, 0x7fe0, 0xb8c0, 0x78e0, + 0xc0e2, 0x1600, 0x7082, 0x8000, + 0x0004, 0x7014, 0x221a, 0x0f83, + 0x0020, 0x0000, 0x2305, 0x0f82, + 0x9002, 0x0086, 0x9240, 0x2305, + 0x0f83, 0x9003, 0xfe86, 0x22c4, + 0x0f81, 0x0000, 0xbfff, 0xf203, + 0xba8e, 0x70ad, 0xb340, 0xf01d, + 0x251a, 0x1f82, 0x0000, 0x1000, + 0x2205, 0x0f82, 0x0001, 0x004a, + 0x7a5b, 0x2205, 0x0f83, 0x9000, + 0x0000, 0xe80b, 0x9340, 0x2114, + 0x034c, 0xb440, 0x2204, 0x0f82, + 0x0000, 0xfbff, 0xf003, 0x21f4, + 0x0342, 0xb340, 0x71a5, 0x0dcd, + 0x9294, 0xc4c2, 0xc2e6, 0x706c, + 0x41c3, 0x8000, 0x07cf, 0xdd24, + 0xa960, 0xbd9f, 0x8d00, 0xe007, + 0x7a1d, 0x7a5d, 0x7a5d, 0x6a09, + 0xa901, 0x1534, 0x108c, 0xec0e, + 0xca02, 0x7a1d, 0xa942, 0x1533, + 0x1080, 0xe007, 0x781d, 0x781d, + 0x781d, 0x6058, 0x7704, 0xf003, + 0xa942, 0xa903, 0xf01e, 0xee05, + 0x1533, 0x108f, 0xf002, 0x8de0, + 0x78cf, 0x0f16, 0xffef, 0x796f, + 0xe804, 0x6f07, 0x791d, 0xf005, + 0x4f1f, 0x791d, 0x793d, 0x793d, + 0x2314, 0x0380, 0x793d, 0x2000, + 0x0f80, 0x8000, 0x0784, 0xa820, + 0x71c5, 0x0ecf, 0x9094, 0x7164, + 0x0bfb, 0x80b4, 0x70cd, 0xc6c6, + 0xc2e6, 0x1cfc, 0xb6c8, 0x4608, + 0x710c, 0x4338, 0x4100, 0x2644, + 0x17c0, 0xe806, 0xb861, 0x0801, 0x0031, 0x793b, 0x40c3, 0x8000, - 0x076d, 0x8800, 0x0817, 0x0071, - 0x4548, 0x1600, 0x7080, 0x8000, - 0x0058, 0x72ed, 0x782b, 0x700c, - 0xf00a, 0x73ed, 0xe889, 0x1600, - 0x7080, 0x8000, 0x0025, 0x71ed, - 0x782b, 0x700c, 0xf212, 0x0c26, - 0xfecf, 0x08ae, 0xff2f, 0x40e1, - 0x0b46, 0xfecf, 0x40c3, 0x8000, - 0x077c, 0x8020, 0x4063, 0x42c1, - 0x0daa, 0xffef, 0x43a1, 0x710c, - 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0x4833, 0x4910, 0x621a, 0x2348, - 0x0003, 0x2248, 0x0000, 0x2309, + 0x07c9, 0x8800, 0x0817, 0x0070, + 0x4748, 0x73ad, 0xe88f, 0x1600, + 0x7080, 0x8000, 0x0025, 0x71ad, + 0xf006, 0x1600, 0x7080, 0x8000, + 0x0058, 0x72ad, 0x790b, 0x700c, + 0xf212, 0x0b56, 0xfecf, 0x0896, + 0xff2f, 0x40a1, 0x0fa2, 0xfe8f, + 0x40c3, 0x8000, 0x07d8, 0x8020, + 0x40c1, 0x4263, 0x0db6, 0xffef, + 0x43e1, 0x710c, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0x4833, 0x4910, + 0x6058, 0x2348, 0x0003, 0x2048, + 0x0000, 0x7b6e, 0x780e, 0x2309, 0x0000, 0x7fe0, 0x780f, 0x78e0, - 0xc0f1, 0x098a, 0x0040, 0xc0d1, + 0xc0f1, 0x090a, 0x0040, 0xc0d1, 0x7fe0, 0x700c, 0xc2e6, 0x1cfc, - 0xb6c8, 0x42c3, 0x9008, 0x01e8, - 0x9200, 0x41c3, 0x0001, 0xfffe, - 0x2114, 0x0000, 0x7904, 0x2259, - 0x0f40, 0x2155, 0x0c0d, 0x2105, - 0x000e, 0x7d05, 0x1600, 0x111b, - 0x95e0, 0x083e, 0x0020, 0xd896, - 0x781d, 0x781d, 0x781d, 0x781d, - 0x6822, 0x7104, 0xb8e0, 0x21ca, - 0x0001, 0x782f, 0x41c3, 0x0000, - 0xff00, 0x2304, 0x305b, 0x2005, - 0x06c0, 0x7f24, 0xb600, 0x2745, - 0x13c0, 0xb500, 0x1404, 0x341b, - 0xc6c6, 0x78e0, 0x1600, 0x7101, - 0x8000, 0x0006, 0x782c, 0x2885, - 0x041f, 0x7fe0, 0x7104, 0x78e0, - 0x790d, 0x218c, 0x8bff, 0xf607, - 0x208c, 0x8802, 0xf451, 0x7fe0, - 0x730c, 0x0911, 0x0733, 0x208c, - 0x8c03, 0x720c, 0x7ce0, 0xf049, - 0xe1f7, 0x0058, 0x0029, 0xe1df, - 0x0058, 0x0029, 0xe1cf, 0x005c, - 0x0029, 0xe1c4, 0x005c, 0x0029, - 0xe0c5, 0x095f, 0x0f13, 0x0963, - 0x0d53, 0x0967, 0x0c13, 0xb87c, - 0x0867, 0x0475, 0x790f, 0x2025, - 0x0040, 0xf012, 0xf02d, 0xf02f, - 0xf02b, 0xf02f, 0xf029, 0xf02f, - 0xf027, 0xf027, 0xf02d, 0xf025, - 0xf023, 0xf02d, 0xf021, 0xf021, - 0xf01f, 0xf02b, 0x7fe0, 0xd83f, - 0xe0f8, 0xf41b, 0x7fe0, 0xd808, - 0xe0e0, 0xf417, 0x7fe0, 0xd807, - 0xe0d0, 0xf413, 0x7fe0, 0xd80a, - 0xf40f, 0x7fe0, 0xd80b, 0x081b, - 0x0f31, 0xd818, 0x7ee0, 0x0813, - 0x0d71, 0xd80f, 0x7ee0, 0x080b, - 0x0c31, 0xd81a, 0x7ee0, 0x7fe0, - 0x700c, 0x7fe0, 0xd83e, 0x7fe0, - 0xd83b, 0x7fe0, 0xd83a, 0x7fe0, - 0xd81f, 0x7fe0, 0xd838, 0x7fe0, - 0xd81b, 0x78e0, 0xc0e4, 0x70ad, - 0xf002, 0x71a5, 0x0d1f, 0x10b2, - 0x70cd, 0xf010, 0x255a, 0x1c83, - 0x607c, 0x265a, 0x1643, 0x71c5, - 0x639b, 0x633b, 0x8b80, 0x7c45, - 0xab80, 0x0eed, 0x9092, 0xf1ef, - 0xc4c4, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0x2482, 0x3d0d, 0x0e5a, - 0xff6f, 0xc086, 0xc086, 0xd90d, - 0x0fbe, 0xffef, 0xdac8, 0x1600, - 0x7080, 0x8000, 0x001b, 0x45cb, - 0x8000, 0x076d, 0x086d, 0x001e, - 0x8d0e, 0xe834, 0x8dca, 0x8d0b, - 0xdb4b, 0xc669, 0x1c3d, 0x3002, - 0x8d0c, 0xbb9f, 0x42c3, 0x9000, - 0x0000, 0x1c56, 0x3002, 0x8d0d, - 0x1c6f, 0x3002, 0x1b01, 0x0392, - 0x8d0b, 0xab00, 0x8d2c, 0x209a, - 0x0004, 0x1b32, 0x0042, 0x8d2d, - 0x7e05, 0x1b33, 0x0042, 0x13b8, - 0x8080, 0x205f, 0x018c, 0x40c3, - 0x0012, 0x0310, 0x6419, 0x7204, - 0x7945, 0xb1c0, 0x6098, 0x7a05, - 0x1333, 0x0080, 0x1332, 0x0081, - 0x209a, 0x0004, 0x7825, 0xb200, - 0xd8ff, 0xc040, 0xc086, 0x712c, - 0x42c3, 0x0040, 0x5800, 0x0fea, - 0x01e0, 0x706c, 0x0c46, 0x0000, - 0x09b2, 0xfecf, 0x0cc2, 0x0040, - 0x702c, 0x1e00, 0x7044, 0x9004, + 0xb6c8, 0x40c3, 0x9008, 0x01e8, + 0x9020, 0x2059, 0x0f40, 0x7724, + 0x7930, 0x793b, 0x2155, 0x0c0d, + 0x2105, 0x000e, 0x7d05, 0x1600, + 0x111b, 0x95e0, 0x083a, 0x0020, + 0xd896, 0x781d, 0x781d, 0x781d, + 0x781d, 0x6822, 0x7104, 0xb8e0, + 0x21ca, 0x0001, 0x782f, 0x41c3, + 0x0000, 0xff00, 0x2304, 0x305b, + 0x2005, 0x06c0, 0x79e4, 0xb600, + 0x2145, 0x03c0, 0xb500, 0x1404, + 0x341b, 0xc6c6, 0x1600, 0x7101, + 0x8000, 0x0006, 0x790c, 0x2985, + 0x041f, 0x7fe0, 0x6901, 0x78e0, + 0x2042, 0x0701, 0x0939, 0x0454, + 0x0889, 0x0c30, 0xd91a, 0x0885, + 0x0d70, 0xd90f, 0x087d, 0x0f30, + 0xd918, 0xe0c5, 0xf227, 0xe0d0, + 0xf227, 0xe0e0, 0xf227, 0xe0f8, + 0xf227, 0x208c, 0x8c03, 0x722c, + 0xf230, 0x208c, 0x8802, 0xf419, + 0x732c, 0xf02c, 0x2025, 0x0040, + 0xf011, 0xf013, 0xf01b, 0xf011, + 0xf01b, 0xf00f, 0xf01b, 0xf00d, + 0xf00b, 0xf01b, 0xf009, 0xf009, + 0xf019, 0xf007, 0xf005, 0xf005, + 0xf017, 0xd93f, 0xf016, 0x702c, + 0xf014, 0xd90b, 0xf012, 0xd90a, + 0xf010, 0xd907, 0xf00e, 0xd908, + 0xf00c, 0xd93e, 0xf00a, 0xd93b, + 0xf008, 0xd93a, 0xf006, 0xd91f, + 0xf004, 0xd938, 0xf002, 0xd91b, + 0x7fe0, 0x4020, 0x264a, 0x3000, + 0xf010, 0x265a, 0x3c83, 0x607c, + 0x235a, 0x1643, 0x7165, 0x639b, + 0x633b, 0x8b80, 0x7c45, 0xab80, + 0x0beb, 0x9094, 0x2640, 0x305e, + 0x0ef9, 0xb0b4, 0x706d, 0x7ee0, + 0xc2e6, 0x1cfc, 0xb6c8, 0x2482, + 0x3a0d, 0x0e56, 0xff6f, 0xc083, + 0xc083, 0xd90d, 0x0fc2, 0xffef, + 0xdac8, 0x1600, 0x7080, 0x8000, + 0x001b, 0x0899, 0x001e, 0x40c3, + 0x8000, 0x07c9, 0x880e, 0x088d, + 0x0030, 0xdb4b, 0x40c3, 0x8000, + 0x07c9, 0x888d, 0x40c3, 0x8000, + 0x07c9, 0x88ac, 0x40c3, 0x8000, + 0x07c9, 0x88cb, 0x40c3, 0x8000, + 0x07c9, 0x882a, 0xbb9f, 0x1b01, + 0x0052, 0x40c3, 0x8000, 0x07c9, + 0x88eb, 0x40c3, 0x8000, 0x07c9, + 0xabe0, 0x884c, 0x279a, 0x1004, + 0x1b32, 0x0082, 0xc166, 0x1c31, + 0x3382, 0x1c4a, 0x3342, 0x1c63, + 0x3302, 0x13b8, 0x8080, 0x7f25, + 0x41c3, 0x8000, 0x07c9, 0x205f, + 0x018d, 0x46cb, 0x0012, 0x0310, + 0x892d, 0x44cb, 0x9000, 0x0000, + 0x65d8, 0x7885, 0xb0e0, 0x1b33, + 0x0042, 0x219a, 0x0004, 0x6e02, + 0x60b8, 0x7885, 0x7945, 0xb020, + 0xd8ff, 0xc040, 0xc083, 0x712c, + 0x42c3, 0x0040, 0x5800, 0x0c8e, + 0x01e0, 0x706c, 0x0c02, 0x0000, + 0x08fe, 0xfecf, 0x0c82, 0x0040, + 0x700c, 0x1e00, 0x7004, 0x9004, 0x00dc, 0x1600, 0x7080, 0x8000, - 0x0019, 0x7014, 0x02fa, 0x0002, - 0xf002, 0x6821, 0x782f, 0x7114, - 0x02ee, 0x002d, 0xad20, 0xe809, - 0x1600, 0x7080, 0x8000, 0x0058, - 0x7014, 0x710c, 0xf007, 0x1600, - 0x7080, 0x8000, 0x0025, 0x7014, - 0x700c, 0xf3ed, 0x6901, 0x0df2, - 0xfeef, 0x780f, 0x206f, 0x00c3, - 0x1000, 0x00c1, 0x88c3, 0x2400, - 0x3f80, 0x0000, 0x02d4, 0xc144, - 0x702c, 0x0ee2, 0x0020, 0xdaa0, - 0x2400, 0x3f80, 0x0000, 0x0234, - 0x0ed2, 0x0020, 0xdaa0, 0x2400, - 0x3f80, 0x0000, 0x0194, 0x0ec6, - 0x0020, 0xdaa0, 0x2455, 0x3f40, - 0x0eba, 0x0020, 0xdaa0, 0x261a, - 0x1f8e, 0x0010, 0x0000, 0x70ed, - 0xf002, 0x71e5, 0x0fd9, 0x1133, - 0x2455, 0x3cc0, 0x702c, 0x0e9e, - 0x0020, 0xda28, 0x2455, 0x3a40, - 0x0e92, 0x0020, 0xda28, 0xc09f, - 0x0e8a, 0x0020, 0xda28, 0x2755, - 0x1800, 0x78c5, 0x2455, 0x3cc1, - 0x0b12, 0x0120, 0x724c, 0x2755, - 0x18c0, 0x78c5, 0x0b06, 0x0120, - 0x2455, 0x3a41, 0x2755, 0x1d00, - 0x78c5, 0x0afa, 0x0120, 0xc19f, - 0x8d03, 0x8d62, 0xc041, 0xf03e, - 0x20f4, 0x00c0, 0x0916, 0xfeef, - 0x4308, 0x4408, 0x2455, 0x3a40, - 0x20f4, 0x00db, 0xc09f, 0x20f4, - 0x00c0, 0x2353, 0x315b, 0x08fe, - 0xfeef, 0x2300, 0x331b, 0x275a, - 0x1a01, 0x4718, 0x2400, 0x3f80, - 0x0000, 0x02d4, 0x2400, 0x3f82, - 0x0000, 0x0234, 0x6038, 0x7874, - 0x1800, 0x06c4, 0x202f, 0x02c2, - 0x781d, 0x781d, 0x781d, 0x781d, - 0x781d, 0x205f, 0x0800, 0x623a, - 0x7a74, 0xb200, 0x2400, 0x3f80, - 0x0000, 0x0194, 0x6038, 0x7874, - 0xb080, 0x2455, 0x3f40, 0x6038, - 0x7874, 0x1800, 0x07c4, 0x7164, - 0xc001, 0x0889, 0x80e3, 0x2455, - 0x3cc0, 0xf195, 0x8d20, 0xd830, - 0xb89f, 0x2040, 0x0cc3, 0x7034, - 0x2342, 0x0282, 0x23ca, 0x0001, - 0xb86a, 0x718d, 0x22ca, 0x0001, - 0xc443, 0xf003, 0x7785, 0x0c0d, - 0x1033, 0x71cd, 0xf05d, 0x77c5, - 0x0ef5, 0x9012, 0x0efb, 0x9320, - 0x70ed, 0x8d03, 0xbf8f, 0xc045, - 0x8d02, 0xc742, 0xf01b, 0xc101, - 0x61f9, 0x21f4, 0x0001, 0x2102, - 0x07c1, 0x2455, 0x3f5f, 0x2308, - 0x104f, 0x2700, 0x36c1, 0x21f4, - 0x000b, 0xc101, 0x71e3, 0x21f4, - 0x0001, 0x7104, 0x230e, 0x104b, - 0xc102, 0x792e, 0x2108, 0x02c1, - 0xc142, 0xc105, 0x0943, 0x0022, - 0x232f, 0x13c6, 0x245a, 0x1a01, - 0xc141, 0xc104, 0x0921, 0x0032, - 0x265a, 0x1a1b, 0x2400, 0x3f81, - 0x0000, 0x0234, 0x7163, 0x21f4, - 0x001f, 0x2400, 0x3f8f, 0x0000, - 0x02d4, 0xf1cf, 0x2400, 0x3f8f, - 0x0000, 0x0194, 0x2700, 0x16c1, - 0x21f4, 0x001f, 0xf1c5, 0xc004, - 0x7014, 0x0f44, 0xfeaa, 0x20ca, - 0x02ca, 0x0ecc, 0xfeab, 0x20ca, - 0x02cb, 0x4100, 0xc002, 0x0ec2, - 0xfeaf, 0x780e, 0x1a01, 0x0052, - 0x1b01, 0x0012, 0xf1a5, 0x8d20, - 0xd82c, 0xb89f, 0x2040, 0x0cc3, - 0x7034, 0x6b8c, 0x23ca, 0x0001, - 0xb864, 0x24ca, 0x1001, 0xf005, - 0xc003, 0x7704, 0xc043, 0xc003, - 0x0821, 0x0033, 0x714c, 0xf046, - 0x0e86, 0xfeaf, 0xc001, 0x4100, - 0x0e7e, 0xfeaf, 0xc002, 0x1c01, - 0x1052, 0x1b01, 0x0012, 0x7744, - 0x0ad9, 0x8032, 0x70cd, 0x8d03, - 0x8d22, 0xbe8f, 0xc044, 0x40c1, - 0xf028, 0x225a, 0x0a1f, 0x140c, - 0x300b, 0x2400, 0x3f9b, 0x0000, - 0x0194, 0x2455, 0x3f4e, 0x2300, - 0x37c0, 0x235a, 0x1a0b, 0x20f4, - 0x0040, 0xc045, 0x2600, 0x12c0, - 0x20f4, 0x004f, 0xc005, 0x76e3, - 0x2300, 0x32db, 0x26f4, 0x105f, - 0x23f4, 0x304b, 0x7f02, 0xc002, - 0xc601, 0x2302, 0x17cb, 0x2008, - 0x03c0, 0x2608, 0x12ce, 0x7124, - 0x780e, 0xc042, 0xc004, 0x7ece, - 0x08ab, 0x8063, 0xc641, 0xf1be, - 0x0513, 0xffef, 0x8d00, 0x0b2a, - 0xfeef, 0x730c, 0x700c, 0xad01, - 0xad00, 0x1600, 0x7080, 0x8000, - 0x0019, 0x7014, 0x0924, 0x0141, - 0x41c3, 0x9003, 0xe174, 0x700c, - 0xb100, 0x0e66, 0xfeef, 0xb101, - 0x2480, 0x3d0d, 0x1404, 0x341b, + 0x0019, 0x7014, 0x02f6, 0x0022, + 0x700c, 0x02e3, 0x0000, 0xe90b, + 0x1600, 0x7081, 0x8000, 0x0058, + 0x7034, 0x712c, 0xf40b, 0x02cd, + 0x0000, 0x1600, 0x7081, 0x8000, + 0x0025, 0x7034, 0x02bc, 0x0021, + 0x702c, 0x7104, 0x0df6, 0xfeef, + 0x780f, 0x216f, 0x00c3, 0x1100, + 0x00c0, 0x89c3, 0x702c, 0xc041, + 0x2400, 0x3f80, 0x0000, 0x02c8, + 0x0e82, 0x0020, 0xdaa0, 0x2400, + 0x3f80, 0x0000, 0x0228, 0x0e76, + 0x0020, 0xdaa0, 0x2456, 0x3c40, + 0x0e6a, 0x0020, 0xdaa0, 0x2455, + 0x3e80, 0x0e62, 0x0020, 0xdaa0, + 0x261a, 0x1f8e, 0x0010, 0x0000, + 0x4338, 0xf064, 0x702c, 0x0e4e, + 0x0020, 0xda28, 0x2455, 0x3980, + 0x0e42, 0x0020, 0xda28, 0xc09c, + 0x0e3a, 0x0020, 0xda28, 0x2355, + 0x3800, 0x78c5, 0x2455, 0x3c01, + 0x0f66, 0x00e0, 0x724c, 0x2355, + 0x38c0, 0x78c5, 0x0f5a, 0x00e0, + 0x2455, 0x3981, 0x2355, 0x3d00, + 0x78c5, 0x0f4e, 0x00e0, 0xc19c, + 0x40c3, 0x8000, 0x07c9, 0x1003, + 0x008b, 0x40c3, 0x8000, 0x07c9, + 0x8862, 0xf035, 0x2455, 0x3c00, + 0x20f4, 0x00de, 0x0862, 0xfeef, + 0x200a, 0x0780, 0x4708, 0x2455, + 0x3980, 0x20f4, 0x00cd, 0xc09c, + 0x20f4, 0x00c0, 0xbdc5, 0x084a, + 0xfeef, 0x65fd, 0x235a, 0x3a02, + 0x2400, 0x3f81, 0x0000, 0x02c8, + 0x6159, 0x7974, 0xb1a0, 0x2400, + 0x3f81, 0x0000, 0x0228, 0x615c, + 0x212f, 0x0782, 0x7c74, 0x216c, + 0x0101, 0xb420, 0x2456, 0x3c41, + 0x6159, 0x7974, 0xb1e0, 0x2455, + 0x3e81, 0x6159, 0x7974, 0xb100, + 0x7164, 0x0b9d, 0x90c5, 0x7167, + 0x0b3d, 0xb134, 0x2455, 0x3c00, + 0x1600, 0x7080, 0x8000, 0x07c9, + 0xd930, 0xb99f, 0x2140, 0x0cce, + 0x7014, 0x2642, 0x1283, 0x2142, + 0x0280, 0x23ca, 0x0001, 0x710c, + 0x26ca, 0x1041, 0x4308, 0xc042, + 0xf059, 0x0dab, 0x12e0, 0x704c, + 0x40c3, 0x8000, 0x07c9, 0x1003, + 0x009f, 0x40c3, 0x8000, 0x07c9, + 0x8882, 0xba8f, 0x4358, 0xf02e, + 0x255a, 0x1a0f, 0xc101, 0x0923, + 0x0032, 0x235a, 0x1a00, 0x2400, + 0x3f81, 0x0000, 0x0228, 0x61f9, + 0x21f4, 0x0302, 0x2400, 0x3f81, + 0x0000, 0x02c8, 0xf006, 0x2456, + 0x3c41, 0x61fa, 0x22f4, 0x0302, + 0x6119, 0x21f4, 0x0301, 0x7942, + 0x2608, 0x3042, 0x2455, 0x3e81, + 0x673f, 0x6038, 0x27f4, 0x130f, + 0x20f4, 0x0300, 0x212f, 0x06c6, + 0x7185, 0x78e2, 0x2108, 0x001b, + 0x0fa9, 0xb325, 0x262f, 0x3086, + 0xc001, 0x0811, 0x0032, 0x200a, + 0x0780, 0x0eb2, 0xfe8f, 0xf006, + 0x0b66, 0xfeaf, 0x200a, 0x0780, + 0x4100, 0x0b5e, 0xfeaf, 0x202f, + 0x06c6, 0x1b01, 0x0052, 0x1e01, + 0x1012, 0x77a5, 0x0d57, 0x9013, + 0x7765, 0x0bfd, 0x9033, 0x71ad, + 0x1600, 0x7080, 0x8000, 0x07c9, + 0xd92c, 0xb99f, 0x2140, 0x0ccb, + 0x7014, 0x2342, 0x1100, 0xc041, + 0x23ca, 0x1041, 0x690c, 0xc101, + 0x21ca, 0x0001, 0xc141, 0xf042, + 0x40c3, 0x8000, 0x07c9, 0x1003, + 0x009e, 0x40c3, 0x8000, 0x07c9, + 0x8842, 0x700c, 0xb88f, 0x4100, + 0xf01e, 0xc602, 0x235a, 0x3a0c, + 0x2456, 0x3c4d, 0x265a, 0x1a0e, + 0x2455, 0x3e8f, 0x6599, 0x21f4, + 0x009f, 0x67d9, 0x21f4, 0x0081, + 0x64fc, 0x65dd, 0x2102, 0x07c1, + 0x2308, 0x0041, 0x24f4, 0x108c, + 0x25f4, 0x1083, 0x7144, 0x7b82, + 0x2008, 0x00c0, 0x780e, 0x0ec7, + 0xb0a5, 0x7b2e, 0x0aba, 0xfe8f, + 0x4200, 0x0ab6, 0xfeaf, 0x4060, + 0xc101, 0x7767, 0x1901, 0x0092, + 0xc141, 0x1b01, 0x1012, 0x0b8d, + 0xb013, 0xc002, 0x7704, 0xc042, + 0xc002, 0x08f7, 0x8033, 0x716f, + 0x1600, 0x7081, 0x8000, 0x07c9, + 0x6901, 0x790f, 0x7134, 0x1e00, + 0x7002, 0x8000, 0x07c9, 0x051a, + 0xffce, 0x0b32, 0xfeef, 0x730c, + 0x41c3, 0x8000, 0x07c9, 0x700c, + 0xa901, 0x1e00, 0x7002, 0x8000, + 0x07c9, 0x1600, 0x7080, 0x8000, + 0x0019, 0x7014, 0x0d44, 0x0101, + 0x40c3, 0x9003, 0xe174, 0x702c, + 0xb020, 0x0e52, 0xfeef, 0xb021, + 0x2480, 0x3a0d, 0x1404, 0x341b, 0xc6c6, 0x78e0, 0xc2e4, 0xc1a4, - 0x4608, 0x700c, 0x099a, 0xfeaf, - 0xd9ff, 0x1200, 0x3083, 0x702c, - 0xd8ff, 0x754c, 0xc143, 0xc142, - 0xc141, 0x4528, 0x0a72, 0xfeaf, - 0xc140, 0xbec1, 0xd8ff, 0xd980, - 0x764c, 0xdb20, 0xc543, 0xc642, - 0xc541, 0x0a5e, 0xfeaf, 0x1c00, - 0x3041, 0x700c, 0x0a0e, 0xfeaf, - 0x712c, 0xc0a4, 0xc6c4, 0x78e0, - 0xc2e4, 0xc1a4, 0xd858, 0xb89f, - 0x88a0, 0x10cd, 0x808e, 0x700c, - 0x0946, 0xfeaf, 0xd9ff, 0x2505, - 0x1380, 0x702c, 0xb8c1, 0xc143, - 0xc042, 0xc141, 0xc140, 0xd8ff, - 0xd980, 0xda09, 0x0a1a, 0xfeaf, - 0xdb20, 0x700c, 0x09ce, 0xfeaf, - 0x712c, 0xc0a4, 0xc6c4, 0x78e0, - 0x21aa, 0x00c4, 0x0911, 0x003f, - 0x722c, 0x21ab, 0x00c4, 0x712c, - 0x21ab, 0x00c4, 0x080f, 0x0415, - 0x1600, 0x7100, 0x9008, 0x01d6, - 0x7ee0, 0x21aa, 0x0104, 0x7b32, - 0x0b19, 0x0025, 0x24aa, 0x1144, - 0x22aa, 0x0104, 0x09fd, 0x8084, - 0x7862, 0x24aa, 0x1144, 0xf003, - 0x6038, 0x21aa, 0x0104, 0x7110, - 0x20e1, 0x07c6, 0x22aa, 0x0144, - 0x0cf3, 0x9080, 0x7ee0, 0x78e0, - 0xc0f1, 0x0866, 0x0000, 0x0fa6, - 0xffcf, 0xc0d1, 0x7ee0, 0x78e0, - 0xc0f1, 0x0f5a, 0xffcf, 0x0f96, - 0xffef, 0xd814, 0xc0d1, 0x7ee0, - 0xc0f1, 0x0846, 0x0000, 0x41c3, - 0x8000, 0x0684, 0x8920, 0x781b, - 0xe915, 0x1600, 0x7081, 0x8000, - 0x0001, 0x0923, 0x00df, 0x41c3, - 0x8000, 0x076c, 0x8920, 0x0917, - 0x0134, 0x216c, 0x0041, 0x2809, - 0x8042, 0xf208, 0x6038, 0x7842, - 0xf004, 0x0809, 0x0115, 0x740c, - 0xc0d1, 0x7ee0, 0x246f, 0x1343, - 0x9460, 0x231a, 0x0002, 0x2a05, - 0x0f82, 0x001e, 0x8480, 0x0817, - 0x0f84, 0x0003, 0x0d41, 0x2885, - 0x0a0f, 0x201a, 0x00c2, 0x2a85, - 0x041f, 0x793d, 0x6a01, 0x6941, - 0x41c3, 0x8000, 0x0684, 0x8920, - 0xe90c, 0x14fb, 0x9081, 0x0915, - 0x00df, 0x41c3, 0x8000, 0x076c, - 0x8920, 0x7034, 0x22da, 0x0042, - 0x7210, 0x7fe0, 0x20ca, 0x008d, - 0xc2e6, 0x1cfc, 0xb6c8, 0x0ebe, - 0xff2f, 0xc1a4, 0x791d, 0x793d, - 0x793d, 0x793d, 0xb9c3, 0x43db, - 0x8000, 0x0058, 0x1a02, 0x3042, - 0x1300, 0x3081, 0xe90b, 0x781d, - 0x1600, 0x7081, 0x8000, 0x0057, - 0x781d, 0x2044, 0x0e00, 0x611d, - 0xf005, 0x1600, 0x708d, 0x8000, - 0x0024, 0x0e82, 0xff0f, 0x79af, - 0x7a3d, 0x7a5d, 0x2553, 0x9083, - 0x7a5d, 0x1a07, 0x30c2, 0x7b3d, - 0x22c0, 0x0062, 0x7b7d, 0x2553, - 0x907e, 0xb8c3, 0x23c0, 0x0062, - 0x1a03, 0x3002, 0x1a04, 0x3082, - 0x1a05, 0x30c2, 0xc040, 0x41c3, - 0x0053, 0x0003, 0x0c7e, 0xfeaf, - 0xd80a, 0x276f, 0x10c3, 0x8f00, - 0x45e9, 0xb8e2, 0x730c, 0x78c0, - 0x1a08, 0x3002, 0x150f, 0x148e, - 0x6f05, 0x9520, 0x8d82, 0x9000, - 0x1300, 0x3083, 0x1724, 0x1082, - 0xc043, 0xc642, 0xc441, 0xc140, - 0x41c3, 0x0055, 0x0006, 0x0c46, - 0xfeaf, 0xd80a, 0x8d08, 0xe808, - 0x41c3, 0x005a, 0x0000, 0x0c36, - 0xfeaf, 0xd80a, 0xde32, 0xbe9f, - 0x1601, 0x1483, 0x16d1, 0x9082, - 0x1601, 0x148c, 0x1602, 0x1481, - 0x1601, 0x1480, 0xc042, 0xc141, - 0xd80a, 0x41c3, 0x005b, 0x0005, - 0x0c0a, 0xfeaf, 0xc440, 0x1601, - 0x1483, 0x8e21, 0x8e04, 0x16cc, - 0x9082, 0x8e80, 0xc042, 0xc141, - 0xd80a, 0x41c3, 0x005c, 0x0005, - 0x0bea, 0xfeaf, 0xc440, 0x8d08, - 0x7014, 0xf471, 0x1724, 0x1080, - 0x0847, 0x007e, 0xdd3e, 0xbd9f, - 0x1501, 0x1483, 0x16cc, 0x9082, - 0x1501, 0x148c, 0x1502, 0x1481, - 0x1501, 0x1480, 0xc042, 0xc141, - 0xd80a, 0x41c3, 0x005d, 0x0005, - 0x0bb2, 0xfeaf, 0xc440, 0x1501, - 0x1483, 0xf841, 0xc042, 0xc141, - 0xd80a, 0x41c3, 0x005e, 0x0005, - 0x0b9a, 0xfeaf, 0xc440, 0x1300, - 0x3080, 0x084d, 0x003e, 0xdd65, - 0xbd9f, 0x1501, 0x1483, 0x16cc, - 0x9082, 0x1501, 0x148c, 0x1502, - 0x1481, 0x1501, 0x1480, 0xc042, - 0xc141, 0xd80a, 0x41c3, 0x005f, - 0x0005, 0x0b6a, 0xfeaf, 0xc440, - 0x1501, 0x1483, 0xf82e, 0xc042, - 0xc141, 0xd80a, 0x41c3, 0x0060, - 0x0005, 0x0b52, 0xfeaf, 0xc440, - 0x1300, 0x3080, 0x0847, 0x007e, - 0xdd71, 0xbd9f, 0x1501, 0x1483, - 0x16cc, 0x9082, 0x1501, 0x148c, - 0x1502, 0x1481, 0x1501, 0x1480, - 0xc042, 0xc141, 0xd80a, 0x41c3, - 0x0061, 0x0005, 0x0b1e, 0xfeaf, - 0xc440, 0x1501, 0x1483, 0xf81c, + 0xd858, 0xb89f, 0x88c0, 0x10cd, + 0x808d, 0x700c, 0x0e3e, 0xfe6f, + 0xd9ff, 0x2605, 0x1341, 0x700c, + 0xb9c1, 0xc043, 0xc142, 0xc041, + 0xc040, 0xd8ff, 0xd980, 0xda09, + 0x0efa, 0xfe6f, 0xdb20, 0x700c, + 0x0eb2, 0xfe6f, 0x712c, 0xc0a4, + 0xc6c4, 0x78e0, 0x21aa, 0x00c4, + 0x0911, 0x003f, 0x722c, 0x21ab, + 0x00c4, 0x712c, 0x21ab, 0x00c4, + 0x080f, 0x0415, 0x1600, 0x7100, + 0x9008, 0x01d6, 0x7ee0, 0x21aa, + 0x0104, 0x7b32, 0x0b19, 0x0025, + 0x24aa, 0x1144, 0x22aa, 0x0104, + 0x09fd, 0x8084, 0x7862, 0x24aa, + 0x1144, 0xf003, 0x6038, 0x21aa, + 0x0104, 0x7110, 0x20e1, 0x07c6, + 0x22aa, 0x0144, 0x0cf3, 0x9080, + 0x7ee0, 0x78e0, 0xc0f1, 0x0866, + 0x0000, 0x0fa6, 0xffcf, 0xc0d1, + 0x7ee0, 0x78e0, 0xc0f1, 0x0f5a, + 0xffcf, 0x0f96, 0xffef, 0xd814, + 0xc0d1, 0x7ee0, 0xc0f1, 0x0846, + 0x0000, 0x41c3, 0x8000, 0x06e0, + 0x8920, 0x781b, 0xe915, 0x1600, + 0x7081, 0x8000, 0x0001, 0x0923, + 0x00df, 0x41c3, 0x8000, 0x07c8, + 0x8920, 0x0917, 0x0134, 0x216c, + 0x0041, 0x2809, 0x8042, 0xf208, + 0x6038, 0x7842, 0xf004, 0x0809, + 0x0115, 0x740c, 0xc0d1, 0x7ee0, + 0x246f, 0x1343, 0x9460, 0x231a, + 0x0002, 0x2a05, 0x0f82, 0x001e, + 0x8480, 0x0817, 0x0f84, 0x0003, + 0x0d41, 0x2885, 0x0a0f, 0x201a, + 0x00c2, 0x2a85, 0x041f, 0x793d, + 0x6a01, 0x6941, 0x41c3, 0x8000, + 0x06e0, 0x8920, 0xe90c, 0x14fb, + 0x9081, 0x0915, 0x00df, 0x41c3, + 0x8000, 0x07c8, 0x8920, 0x7034, + 0x22da, 0x0042, 0x7210, 0x7fe0, + 0x20ca, 0x008d, 0xc2e6, 0x0f06, + 0xff2f, 0xc1a4, 0x781d, 0x781d, + 0x781d, 0x791d, 0xb9c3, 0xdd58, + 0x1a02, 0x3042, 0xbd9f, 0x8d00, + 0xe80b, 0x215f, 0x0100, 0x1600, + 0x7082, 0x8000, 0x0057, 0x2044, + 0x0e00, 0x621e, 0xf005, 0x1600, + 0x708e, 0x8000, 0x0024, 0x0ece, + 0xff0f, 0x79cf, 0x7b3d, 0x7a3d, + 0x2653, 0x907e, 0x7b7d, 0x7a5d, + 0x23c0, 0x0062, 0x7a5d, 0x2653, + 0x9081, 0xb8c3, 0x22c0, 0x0062, + 0x1a03, 0x3002, 0x1a05, 0x30c2, + 0x1a07, 0x3042, 0x1a04, 0x3082, + 0xc040, 0x41c3, 0x0053, 0x0003, + 0x0c16, 0xfeaf, 0xd80a, 0x276f, + 0x10c3, 0x8f00, 0xb8e2, 0x700c, + 0x20ca, 0x00e1, 0x1a08, 0x3002, + 0x1724, 0x1082, 0x170f, 0x1481, + 0x2742, 0x1280, 0x9000, 0x8d60, + 0x97c0, 0x8f82, 0xc043, 0xc142, + 0xd80a, 0x41c3, 0x0055, 0x0006, + 0xc441, 0x0bde, 0xfeaf, 0xc640, + 0x8f08, 0xe808, 0x41c3, 0x005a, + 0x0000, 0x0bce, 0xfeaf, 0xd80a, + 0xde32, 0xbe9f, 0x1601, 0x1483, + 0x16d1, 0x9082, 0x1601, 0x148c, + 0x1602, 0x1481, 0x1601, 0x1480, + 0xc440, 0xc141, 0xc042, 0x41c3, + 0x005b, 0x0005, 0x0ba2, 0xfeaf, + 0xd80a, 0x1601, 0x1483, 0x8e21, + 0x8e04, 0x16cc, 0x9082, 0x8e80, 0xc042, 0xc141, 0xd80a, 0x41c3, - 0x0062, 0x0005, 0x0b06, 0xfeaf, - 0xc440, 0x41c3, 0x9008, 0x01a0, - 0x1902, 0x0015, 0x208a, 0x0044, - 0x1902, 0x0014, 0x208a, 0x0144, - 0x1902, 0x0014, 0x208a, 0x01c4, - 0x1904, 0x0014, 0x208a, 0x0088, - 0xb100, 0x208a, 0x0288, 0xb101, - 0x208a, 0x02c8, 0xb102, 0x208a, - 0x03cc, 0x1e00, 0x7004, 0x9009, - 0xe180, 0xd934, 0x40c3, 0x9005, - 0xe0ee, 0xb020, 0x0d16, 0xffef, - 0x18fc, 0x8105, 0xc0a4, 0x1404, - 0x341b, 0xc6c6, 0x8d21, 0x8d04, - 0x16cc, 0x9082, 0x8d80, 0x7ee0, + 0x005c, 0x0005, 0x0b82, 0xfeaf, + 0xc440, 0x8f08, 0x08b5, 0x0011, + 0x8f15, 0x0831, 0x007e, 0xdf3e, + 0xf840, 0xc141, 0xc042, 0xd80a, + 0x41c3, 0x005d, 0x0005, 0x0b62, + 0xfeaf, 0xc440, 0xf841, 0xc042, + 0xc141, 0xd80a, 0x41c3, 0x005e, + 0x0005, 0x0b4e, 0xfeaf, 0xc440, + 0x8d00, 0x0833, 0x003e, 0xdf65, + 0xf834, 0xc141, 0xc042, 0xd80a, + 0x41c3, 0x005f, 0x0005, 0x0b32, + 0xfeaf, 0xc440, 0xf835, 0xc042, + 0xc141, 0xd80a, 0x41c3, 0x0060, + 0x0005, 0x0b1e, 0xfeaf, 0xc440, + 0x8d00, 0x0851, 0x007e, 0xdd71, + 0xbd9f, 0x1501, 0x1483, 0x1501, + 0x148c, 0x1502, 0x1481, 0x1501, + 0x1480, 0x16cc, 0x9082, 0xc141, + 0xc042, 0xd80a, 0x41c3, 0x0061, + 0x0005, 0x0aee, 0xfeaf, 0xc440, + 0x1501, 0x1483, 0x8d21, 0x8d04, + 0x16cc, 0x9082, 0x8d80, 0xc042, + 0xc141, 0xd80a, 0x41c3, 0x0062, + 0x0005, 0x0ace, 0xfeaf, 0xc440, + 0x40c3, 0x9008, 0x01a0, 0x1802, + 0x0015, 0x218a, 0x0044, 0x1802, + 0x0054, 0x218a, 0x0144, 0x1802, + 0x0054, 0x218a, 0x01c4, 0x1804, + 0x0054, 0x218a, 0x0088, 0xb020, + 0x218a, 0x0288, 0xb021, 0x218a, + 0x02c8, 0xb022, 0x208a, 0x03cc, + 0x1e00, 0x7004, 0x9009, 0xe180, + 0xd934, 0x40c3, 0x9005, 0xe0ee, + 0xb020, 0x0d4e, 0xffef, 0x18fc, + 0x8105, 0xc0a4, 0xc6c6, 0x78e0, + 0xbf9f, 0x1701, 0x1483, 0x1701, + 0x148c, 0x1702, 0x1481, 0x1701, + 0x1480, 0x16cc, 0x9082, 0x7ee0, + 0x1701, 0x1483, 0x8f21, 0x8f04, + 0x16cc, 0x9082, 0x8f80, 0x7ee0, 0x621a, 0xf004, 0x1801, 0x0052, 0x08fd, 0x8084, 0x7ee0, 0x78e0, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a8, - 0x714c, 0xba8c, 0x40c3, 0x0000, - 0xbd61, 0x46cb, 0x9012, 0x0328, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a7, + 0xda1a, 0xba8c, 0x40c3, 0x0000, + 0x9660, 0x45cb, 0x9012, 0x0328, 0x1e00, 0x7084, 0x8000, 0x0002, - 0xb600, 0xd846, 0xb601, 0x41c3, - 0x00b8, 0x0001, 0x0a5e, 0xfeaf, - 0xd80a, 0xcc20, 0x9620, 0x70ad, - 0x7030, 0x700c, 0xf405, 0xcc21, - 0x9621, 0x2038, 0x0040, 0x41c3, - 0x00b9, 0x0000, 0x0c7a, 0xfe0f, - 0x1600, 0x7080, 0x8000, 0x0004, - 0xc040, 0x201a, 0x0f80, 0x0020, - 0x0000, 0xc041, 0x2005, 0x0f81, - 0x9004, 0x0032, 0x206f, 0x0243, - 0x90e6, 0x9100, 0xc043, 0x1600, - 0x7080, 0x8000, 0x0004, 0x201a, - 0x0f80, 0x0020, 0x0000, 0x2005, - 0x0f80, 0x9002, 0x0040, 0x9000, - 0x1a00, 0x3103, 0x1e00, 0x7043, - 0x8000, 0x0684, 0x0ad6, 0x0120, - 0xc047, 0xbfe0, 0x0286, 0x0001, - 0xc001, 0x2005, 0x0f81, 0x9005, - 0xe032, 0xc003, 0xb880, 0xb100, - 0xc001, 0x2005, 0x0f80, 0x9003, - 0xe090, 0x0cae, 0x0020, 0x1800, - 0x0005, 0x256f, 0x1cc3, 0x8d00, - 0xe885, 0x1e00, 0x7085, 0x9004, - 0x00c0, 0x0cba, 0xffcf, 0x0f62, - 0xff0f, 0x8d00, 0x7014, 0xf4c7, - 0x1600, 0x7080, 0x8000, 0x0004, - 0xc045, 0x201a, 0x0f80, 0x0020, - 0x0000, 0xc044, 0x2005, 0x0f80, - 0x9002, 0x0086, 0x9020, 0x1600, - 0x7083, 0x8000, 0x0000, 0x783d, - 0x781d, 0x781d, 0x781d, 0xb8c2, - 0x0817, 0x0091, 0x46cb, 0x8000, - 0x0430, 0x1a09, 0x3398, 0x45cb, - 0x8000, 0x0418, 0xf01d, 0x0821, - 0x00f1, 0xbbe7, 0xce0e, 0x46cb, - 0x8000, 0x0434, 0x761c, 0x1a09, - 0x3398, 0xce08, 0x45cb, 0x8000, - 0x041c, 0xf00e, 0xbbe7, 0xce0b, - 0x46cb, 0x8000, 0x0428, 0x761c, + 0xb500, 0xd8cb, 0xb501, 0x41c3, + 0x00b9, 0x0001, 0x0a0a, 0xfeaf, + 0xd80a, 0xcc20, 0x9520, 0x2038, + 0x0042, 0xcc21, 0x9521, 0x2038, + 0x0040, 0x41c3, 0x00ba, 0x0000, + 0x099a, 0xfe2f, 0x7844, 0x1e00, + 0x7043, 0x8000, 0x06e0, 0x206f, + 0x0143, 0x8802, 0x1a00, 0x3103, + 0x216f, 0x0143, 0xc041, 0x201a, + 0x0f80, 0x0020, 0x0000, 0x91c7, + 0xc040, 0x2005, 0x0f80, 0x9004, + 0x0032, 0x9000, 0xc042, 0xc000, + 0x2005, 0x0f80, 0x9002, 0x0040, + 0x9000, 0x0f7e, 0x00e0, 0xc046, + 0xbee0, 0x02b8, 0x0001, 0xc000, + 0x2005, 0x0f81, 0x9005, 0xe032, + 0xc002, 0xb880, 0xb100, 0xc000, + 0x2005, 0x0f80, 0x9003, 0xe090, + 0x0cde, 0x0020, 0x1800, 0x0005, + 0x256f, 0x1cc3, 0x8d00, 0xe886, + 0x1e00, 0x7085, 0x9004, 0x00c0, + 0x0ce6, 0xffcf, 0x0fc2, 0xff0f, + 0x1600, 0x7080, 0x8000, 0x0001, + 0x0841, 0x001e, 0x43c3, 0x9004, + 0x0014, 0x9320, 0xca01, 0x080f, + 0x01b0, 0x214f, 0x0082, 0x080d, + 0x00d1, 0x2185, 0x0108, 0x4220, + 0xb340, 0xd823, 0x42c3, 0x9005, + 0xe034, 0xb200, 0x702c, 0x40c3, + 0x9003, 0xe034, 0xb020, 0xb8b1, + 0xb020, 0x0cde, 0x0020, 0xb221, + 0x8d00, 0x7014, 0xf4ba, 0x206f, + 0x0143, 0x8802, 0x1600, 0x708b, + 0x8000, 0x0000, 0xc045, 0x201a, + 0x0f80, 0x0020, 0x0000, 0xc044, + 0x2005, 0x0f80, 0x9002, 0x0086, + 0x9000, 0xc043, 0x781d, 0x781d, + 0x781d, 0x781d, 0xb8c2, 0x0827, + 0x0090, 0x0835, 0x00f1, 0x238b, + 0x903e, 0xce08, 0x45cb, 0x8000, + 0x041c, 0x751c, 0x1a03, 0x3358, + 0xce0e, 0x47cb, 0x8000, 0x0434, + 0xf017, 0x45cb, 0x8000, 0x0418, + 0x1a03, 0x3358, 0x47cb, 0x8000, + 0x0430, 0xf010, 0x238b, 0x903e, 0xce05, 0x45cb, 0x8000, 0x0410, - 0x1a09, 0x3398, 0x751c, 0x1a03, - 0x3358, 0x1600, 0x7082, 0x8000, - 0x000c, 0x221a, 0x0f80, 0x0001, - 0x86a0, 0xc042, 0x1408, 0x301b, - 0xd807, 0xb861, 0x08ff, 0x8031, - 0x232f, 0x36c2, 0x2144, 0x0080, - 0x0b19, 0x01fe, 0xc046, 0x233a, - 0x3f80, 0x0001, 0x4c08, 0x41c3, - 0x0140, 0x0001, 0x0b22, 0xfe0f, - 0xc006, 0x6512, 0x6611, 0xc005, - 0xe892, 0x2302, 0x3040, 0x2048, - 0x0000, 0x2804, 0x0080, 0xd97f, - 0x780f, 0xe0fe, 0x21ca, 0x000c, - 0x40c3, 0x9003, 0xfe80, 0xb020, - 0x18e0, 0x8044, 0xc002, 0xc500, - 0x2805, 0x0f80, 0x0000, 0xfa00, + 0x751c, 0xce0b, 0x47cb, 0x8000, + 0x0428, 0x1a03, 0x3358, 0x771c, + 0x1a09, 0x33d8, 0x1600, 0x7082, + 0x8000, 0x000c, 0x238b, 0x903e, + 0x221a, 0x0f9b, 0x0001, 0x86a0, + 0xf20a, 0x233c, 0x3f80, 0x00a6, + 0x0400, 0x41c3, 0x0142, 0x0001, + 0x081a, 0xfe0f, 0x212f, 0x06c2, + 0x793d, 0x793d, 0x793d, 0xc005, + 0x793d, 0x793d, 0x793d, 0xe895, + 0xc003, 0x2044, 0x0080, 0x6512, + 0x6710, 0x4910, 0x2048, 0x0000, + 0x2805, 0x0080, 0x42c3, 0x9003, + 0xfe80, 0x780f, 0x2089, 0x0fc1, + 0xb200, 0x1ae0, 0x8004, 0xc501, + 0x2b05, 0x3f80, 0x0000, 0xfa00, 0x251a, 0x1f8d, 0x0010, 0x0000, - 0x7104, 0x790f, 0x218c, 0x8002, - 0xf68c, 0x2300, 0x3f80, 0xffff, - 0x793c, 0x708d, 0x208c, 0x8d47, - 0xf686, 0x2884, 0x0d07, 0xf005, - 0x718d, 0xf003, 0x710c, 0x205a, - 0x0202, 0xc104, 0x780f, 0xe0ff, - 0x245f, 0x1100, 0x2105, 0x0f83, - 0x9005, 0xe164, 0x2284, 0x0e1f, - 0x218a, 0x0e0f, 0x21ca, 0x008b, - 0x7905, 0x1600, 0x7100, 0x9004, - 0x0164, 0xb8c1, 0x7825, 0xb300, - 0xc001, 0x2005, 0x0f80, 0x9002, - 0x0086, 0x09ce, 0x0120, 0x90c0, - 0x095a, 0x0120, 0x40a1, 0x1600, - 0x7081, 0x8000, 0x001f, 0xc000, - 0x0907, 0x000f, 0xbe80, 0xc001, - 0x2005, 0x0f80, 0x9003, 0xfe86, - 0xb0c0, 0x0b1e, 0x0000, 0x0aea, - 0xff8f, 0x0896, 0xfe8f, 0x0bee, - 0xff4f, 0x702c, 0x706c, 0x71cd, - 0xf002, 0x7164, 0xca04, 0x0b71, - 0x0023, 0x708d, 0x235a, 0x049f, - 0x2700, 0x3f82, 0x8000, 0x0684, - 0xb221, 0xf003, 0x7185, 0x0ce7, - 0x9213, 0x231a, 0x0f80, 0x0000, - 0x1000, 0x7885, 0x781b, 0x2005, - 0x0f80, 0x9002, 0x0140, 0x9000, - 0x7d0f, 0x0d15, 0x1321, 0x2544, - 0x17db, 0x8a02, 0x200f, 0x0300, - 0xaa02, 0xf1ea, 0x8a03, 0x45c9, - 0x7104, 0xaa03, 0x780f, 0x2714, - 0x3000, 0x0b11, 0x3010, 0x2342, - 0x305b, 0x0bff, 0xb031, 0x7dbb, - 0x70c3, 0x8000, 0x0684, 0xa8a2, - 0xa883, 0xf1d6, 0x0f13, 0x133e, - 0x710c, 0x0b0e, 0xfe0f, 0x0aae, - 0x0020, 0xd80d, 0x700c, 0x080e, - 0x01c0, 0x0e76, 0xfe4f, 0x70ad, - 0x1e00, 0x7342, 0x8000, 0x0684, - 0x0a92, 0x0020, 0x700c, 0xf003, - 0x71a5, 0xca04, 0x0d2d, 0x1022, - 0x702c, 0xf019, 0x2000, 0x0f82, - 0x8000, 0x08a0, 0x251a, 0x1f80, - 0x0000, 0x1000, 0x7825, 0x781b, - 0x2005, 0x0f80, 0x9002, 0x0140, - 0x9060, 0xaa60, 0xb020, 0x7124, - 0x09dd, 0x8232, 0x2116, 0x0340, - 0xf1e4, 0x0b72, 0xfeaf, 0x710c, - 0x0f11, 0x10be, 0x700c, 0x0c3a, - 0x0100, 0x0a42, 0x0020, 0x720c, - 0x2744, 0x908e, 0xf207, 0x0c2a, - 0x0120, 0x710c, 0x0a2e, 0x0020, - 0x710c, 0x0f1d, 0x121e, 0x1600, - 0x7080, 0x8000, 0x0004, 0xe888, - 0x0c0e, 0x0120, 0x730c, 0x0a16, - 0x0020, 0xd80a, 0x2744, 0x920d, - 0xf207, 0x0bfe, 0x0120, 0x750c, - 0x0a02, 0x0020, 0xd8fd, 0xee08, - 0x0bee, 0x0120, 0x720c, 0x09f6, - 0x0020, 0xd8fe, 0x0f11, 0x113e, - 0x760c, 0x0bde, 0x0100, 0x09e6, - 0x0020, 0x740c, 0x0f15, 0x1051, - 0x1600, 0x7080, 0x8000, 0x0019, - 0xe884, 0x0bc6, 0x0120, 0xd80d, - 0xed07, 0x0bbe, 0x0120, 0x740c, - 0x09c2, 0x0020, 0x730c, 0x0f87, - 0x127e, 0xdd0f, 0x1600, 0x7080, - 0x8000, 0x0001, 0x46cb, 0x8000, - 0x076d, 0xb8e2, 0x25ca, 0x1062, - 0x700c, 0xf003, 0x7104, 0xae00, - 0x780f, 0x084b, 0x0095, 0xe809, - 0x1600, 0x7080, 0x8000, 0x0058, - 0x7014, 0x710c, 0xf007, 0x1600, - 0x7080, 0x8000, 0x0025, 0x7014, - 0x700c, 0xf3ee, 0x0e1e, 0xfe4f, - 0x8e00, 0x7104, 0x0aa2, 0xfeaf, - 0x780f, 0x0d3e, 0xfe4f, 0x0d76, - 0xfe6f, 0x1e01, 0x1003, 0x4100, - 0x0e9a, 0x0120, 0x40a1, 0x8e00, - 0xf1da, 0x0a86, 0xfeaf, 0x730c, - 0xca09, 0xe885, 0x08fe, 0xfeaf, - 0xc000, 0x0942, 0x0020, 0xd809, - 0xf00f, 0x1600, 0x7080, 0x8000, - 0x0004, 0x201a, 0x0f80, 0x0020, - 0x0000, 0x2005, 0x0f81, 0x9003, - 0xe040, 0xc007, 0xb100, 0xca09, - 0xe837, 0xd825, 0xb89f, 0x88c0, - 0x1033, 0x0080, 0x70ed, 0x7e05, - 0xf002, 0x71e5, 0x0f47, 0x10b3, - 0x710c, 0x2744, 0x17c1, 0xe906, - 0xb961, 0x0901, 0x0031, 0x781b, - 0x7e0b, 0xf3f5, 0x40e1, 0x0d72, - 0xfe6f, 0x712c, 0xc100, 0x4508, - 0x7bef, 0x710c, 0x42a1, 0x08d6, - 0xfeaf, 0x4378, 0xc100, 0x700c, - 0x42a1, 0x08ca, 0xfeaf, 0x4363, - 0x4163, 0x0976, 0xfeaf, 0x40a1, - 0xf1dd, 0x087a, 0xfeaf, 0xc000, + 0x7104, 0x0821, 0x01fe, 0x744c, + 0x2100, 0x0f80, 0xffff, 0x793c, + 0x208c, 0x8d47, 0x704c, 0xf685, + 0x2885, 0x0d07, 0xf002, 0x710c, + 0x780f, 0x2089, 0x0fc1, 0x205a, + 0x0200, 0xc104, 0x7a05, 0x1600, + 0x7100, 0x9004, 0x0164, 0x2105, + 0x0f81, 0x9005, 0xe164, 0xb8c1, + 0x7845, 0xb100, 0xc000, 0x2005, + 0x0f80, 0x9002, 0x0086, 0x0e46, + 0x00e0, 0x90e0, 0x0dd2, 0x00e0, + 0x40a1, 0x1600, 0x7081, 0x8000, + 0x001f, 0xc001, 0x2111, 0x8000, + 0xc000, 0x27cf, 0x1021, 0x2005, + 0x0f80, 0x9003, 0xfe86, 0xb0e0, + 0x0b1e, 0x0000, 0x0b52, 0xff8f, + 0x081a, 0xfe8f, 0x0c5a, 0xff4f, + 0xca04, 0x704c, 0x708d, 0xf038, + 0x245a, 0x149e, 0x2600, 0x3f83, + 0x8000, 0x06e0, 0xb341, 0xf02d, + 0x241a, 0x1f81, 0x0000, 0x1000, + 0x79a5, 0x793b, 0x2105, 0x0f81, + 0x9002, 0x0140, 0x9120, 0x7f2f, + 0x0d11, 0x13e1, 0x716d, 0x8b22, + 0x210f, 0x0341, 0xab22, 0xf018, + 0x8b23, 0x2744, 0x17db, 0x7124, + 0xab23, 0x792f, 0x2614, 0x3041, + 0x2100, 0x0f81, 0x8000, 0x06e0, + 0x4769, 0xa9a3, 0x0b0f, 0x3010, + 0x2342, 0x305b, 0x0bfd, 0xb031, + 0x7ffb, 0xa9e2, 0x71a5, 0x0dad, + 0x9214, 0x7185, 0x0c95, 0x9024, + 0x70ad, 0x0e0b, 0x133f, 0x710c, + 0xf009, 0x0ace, 0x0000, 0x0ffe, + 0xfdcf, 0x0aaa, 0x0020, 0xd80d, + 0x700c, 0x0cd2, 0x0180, 0x0b2a, + 0xfe4f, 0x1e00, 0x7003, 0x8000, + 0x06e0, 0x0a92, 0x0020, 0x700c, + 0xca04, 0x706d, 0x704c, 0xf01a, + 0x702c, 0xf015, 0x221a, 0x0f83, + 0x0000, 0x1000, 0x2400, 0x1f8d, + 0x8000, 0x07f0, 0x7185, 0x7b25, + 0x7b7b, 0x2305, 0x0f83, 0x9002, + 0x0140, 0x93e0, 0xade0, 0xb320, + 0x7124, 0x09dd, 0x8214, 0x7144, + 0x0ad1, 0x8024, 0x2316, 0x108c, + 0x0bae, 0xfeaf, 0x710c, 0x0e13, + 0x10be, 0x700c, 0x0896, 0x0100, + 0x0a3a, 0x0020, 0x720c, 0x2644, + 0x908d, 0xf208, 0x0886, 0x0120, + 0x710c, 0x0a2a, 0x0020, 0x710c, + 0x0e19, 0x123e, 0x206f, 0x0143, + 0x8802, 0xe888, 0x086e, 0x0120, + 0x730c, 0x0a12, 0x0020, 0xd80a, + 0x2644, 0x920f, 0xf207, 0x085e, + 0x0120, 0x750c, 0x09fe, 0x0020, + 0xd8fd, 0xed08, 0x084e, 0x0120, + 0x720c, 0x09f2, 0x0020, 0xd8fe, + 0x0e11, 0x113e, 0x760c, 0x083e, + 0x0100, 0x09e2, 0x0020, 0x740c, + 0x0e15, 0x1051, 0x1600, 0x7080, + 0x8000, 0x0019, 0xe884, 0x0826, + 0x0120, 0xd80d, 0xef07, 0x081e, + 0x0120, 0x740c, 0x09be, 0x0020, + 0x730c, 0x0e23, 0x127f, 0x206f, + 0x0143, 0x8802, 0x201a, 0x0f80, + 0x0020, 0x0000, 0x2005, 0x0f81, + 0x9003, 0xe040, 0xc006, 0xb100, + 0xf040, 0x1600, 0x7080, 0x8000, + 0x0001, 0x71ad, 0x46cb, 0x8000, + 0x07c9, 0xb8e2, 0x25ca, 0x13e1, + 0x700c, 0xae00, 0x780f, 0x084f, + 0x0095, 0xe81d, 0x1600, 0x7080, + 0x8000, 0x0058, 0x7014, 0x710c, + 0xf213, 0x0d9e, 0xfe4f, 0x8e00, + 0x7104, 0x0ada, 0xfeaf, 0x780f, + 0x09e6, 0xfe4f, 0x0cea, 0xfe6f, + 0x1e01, 0x1003, 0x4100, 0x0af2, + 0x0120, 0x40a1, 0x8e00, 0x7104, + 0xf1e1, 0x1600, 0x7080, 0x8000, + 0x0025, 0x7014, 0x700c, 0xf5e7, + 0xf1f7, 0x0aaa, 0xfeaf, 0x730c, + 0xca09, 0xe885, 0x0922, 0xfeaf, + 0xc001, 0x0922, 0x0020, 0xd809, + 0xca09, 0xe837, 0xd925, 0xb99f, + 0x8900, 0x1133, 0x008f, 0x70cd, + 0x7f05, 0xf022, 0x2644, 0x17c1, + 0xe905, 0xb961, 0x09ff, 0x8031, + 0x781b, 0x78eb, 0xf217, 0x40c1, + 0x0d02, 0xfe6f, 0x712c, 0xc101, + 0x4508, 0x7bcf, 0x710c, 0x42a1, + 0x091e, 0xfeaf, 0x4378, 0xc101, + 0x700c, 0x42a1, 0x0912, 0xfeaf, + 0x4363, 0x4163, 0x09be, 0xfeaf, + 0x40a1, 0x71c5, 0x0ec1, 0x90b4, + 0x710c, 0x08be, 0xfeaf, 0xc001, 0x1600, 0x7080, 0x8000, 0x0000, - 0xb8e5, 0x0998, 0xfe82, 0x0afa, - 0xff8f, 0x70ad, 0x706c, 0xf003, - 0x7164, 0xca04, 0x0b2d, 0x0022, - 0x702c, 0xf017, 0x231a, 0x0f80, - 0x0000, 0x1000, 0x7825, 0x781b, - 0x2005, 0x0f82, 0x9002, 0x0140, - 0x2116, 0x00c0, 0x2032, 0x0f80, - 0x8000, 0x08a0, 0x7124, 0xb200, - 0x09dd, 0x8212, 0xf1e6, 0xc001, + 0xb8e5, 0x09dc, 0xfe82, 0x0b16, + 0xff8f, 0xca04, 0x706f, 0x704c, + 0xf017, 0x706c, 0xf012, 0x221a, + 0x0f81, 0x0000, 0x1000, 0x7965, + 0x793b, 0x2105, 0x0f8c, 0x9002, + 0x0140, 0x2332, 0x1f81, 0x8000, + 0x07f0, 0x7164, 0x7165, 0xb420, + 0x0bdf, 0x8214, 0x7144, 0x0ad7, + 0x8024, 0x2316, 0x308b, 0xc000, 0x2005, 0x0f81, 0x9005, 0xe032, - 0xc003, 0xb100, 0x097e, 0xfeaf, - 0x700c, 0x0d7e, 0x00c0, 0x08c2, - 0x0160, 0xd807, 0x25ab, 0x10c4, + 0xc002, 0xb100, 0x09c2, 0xfeaf, + 0x700c, 0x09f6, 0x00c0, 0x0d26, + 0x0120, 0xd807, 0x23ab, 0x30c4, 0x7fff, 0xf000, 0xc2e2, 0x45cb, 0x9004, 0x0004, 0x9500, 0xb8a1, - 0xb500, 0x0f0a, 0xffaf, 0x740c, - 0xd81f, 0x0f02, 0xffaf, 0x1df6, - 0x105d, 0xc6c2, 0xc2e2, 0x0ef6, + 0xb500, 0x0f06, 0xffaf, 0x740c, + 0xd81f, 0x0efe, 0xffaf, 0x1df6, + 0x105d, 0xc6c2, 0xc2e2, 0x0ef2, 0xffaf, 0xd81f, 0x45cb, 0x9004, - 0x01f0, 0x740c, 0x0ee6, 0xffaf, + 0x01f0, 0x740c, 0x0ee2, 0xffaf, 0x1d00, 0x1005, 0x150a, 0x9700, 0xb881, 0x1d0a, 0x901c, 0xc6c2, 0xc0f1, 0x1600, 0x7081, 0x8000, - 0x0012, 0x218c, 0x8203, 0x0860, - 0x014e, 0x080e, 0x0000, 0xc0d1, + 0x0012, 0x218c, 0x8203, 0x0cc4, + 0x010e, 0x080e, 0x0000, 0xc0d1, 0x7ee0, 0x78e0, 0xc2e2, 0x1600, 0x7080, 0x8000, 0x0001, 0x081b, 0x003e, 0xd80a, 0x45cb, 0x9005, - 0xe036, 0x0ea2, 0xffaf, 0x1d00, + 0xe036, 0x0e9e, 0xffaf, 0x1d00, 0x1045, 0x1d00, 0x1005, 0xc6c2, 0x40c3, 0x9004, 0x00f2, 0x1800, 0x0045, 0x7fe0, 0x1800, 0x0005, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1bf, - 0x4708, 0x4040, 0x4568, 0x4648, - 0x0822, 0xff2f, 0xc14e, 0xc04f, - 0x4060, 0xdc08, 0xbc61, 0x0c01, - 0x1031, 0x781d, 0xb8c0, 0x0a96, - 0xff2f, 0x4318, 0x2553, 0x10c0, - 0x42c3, 0x9003, 0xe0c4, 0xb200, - 0x7077, 0xd9f0, 0x208a, 0x0c07, - 0x20ca, 0x0041, 0x2004, 0x0341, - 0xba8b, 0xb220, 0xc095, 0x702c, - 0xda28, 0x09a2, 0xffef, 0xc395, - 0x1498, 0x3000, 0xc743, 0x714c, - 0xc050, 0xc044, 0x149c, 0x3000, - 0x41c1, 0xc045, 0xc051, 0xc00e, - 0xc346, 0x1c1c, 0x3103, 0xc048, - 0xd820, 0x1c24, 0x3003, 0xc04a, - 0x40c3, 0x0000, 0x0f00, 0xc04b, - 0x78af, 0x20b8, 0x0fc3, 0xc54c, - 0xc06d, 0x4040, 0x2744, 0x17c3, - 0xeb05, 0xbb61, 0x0bff, 0x8031, - 0x781b, 0x205a, 0x0100, 0x1c25, - 0x3382, 0x200f, 0x03c0, 0x780f, - 0x09e6, 0x0020, 0xc04e, 0xc00f, - 0x7014, 0xc052, 0xf21f, 0x41c3, - 0x9008, 0x01ea, 0x9100, 0x2004, - 0x0f82, 0x0000, 0xc0ff, 0x2004, - 0x0f80, 0x0000, 0x3f00, 0x2080, - 0x0010, 0x7845, 0xb100, 0x1e00, - 0x7005, 0x900f, 0xe022, 0x1600, - 0x7100, 0x9004, 0x00ee, 0xc052, - 0xb8a5, 0x1e00, 0x7004, 0x9005, - 0xe0ee, 0x1494, 0x3000, 0xc054, - 0x1490, 0x3000, 0xc053, 0x0a2e, - 0x0060, 0xc083, 0x47cb, 0x8000, - 0x076f, 0x8f41, 0x8f20, 0xf003, - 0x7124, 0x0a25, 0x0062, 0x6e09, - 0xc310, 0x7b34, 0x9300, 0xe8fa, - 0xc095, 0x20f5, 0x0040, 0x08ed, - 0x8813, 0x70c3, 0x0000, 0xffe0, - 0xb300, 0xf1f0, 0x083d, 0x00b5, - 0x7077, 0x77cd, 0x40c3, 0xffff, - 0xf0f0, 0x45cb, 0x0000, 0xf0f0, - 0x70dd, 0x7b10, 0xd80f, 0x41a1, - 0x42a1, 0x0e1a, 0xfeef, 0xc340, - 0x7077, 0x40c3, 0xffff, 0xf0f0, - 0x761c, 0x7bd0, 0xd808, 0x41a1, - 0x42a1, 0x0e02, 0xfeef, 0xc340, - 0x0bba, 0x0060, 0xc083, 0x8f61, - 0x8f20, 0xf003, 0x7124, 0x0b21, - 0x0042, 0xc211, 0x7a34, 0x9200, - 0x08f5, 0x87f1, 0xc095, 0x20f5, - 0x0040, 0x08ed, 0x8833, 0xe020, - 0xb200, 0xf1f2, 0xc00f, 0xe885, - 0x0c4e, 0xffaf, 0xc00e, 0xc013, - 0xe807, 0x41c3, 0x0036, 0x0000, - 0x0ad2, 0xfe6f, 0x740c, 0x8fa0, - 0xf00e, 0xc30e, 0x202f, 0x06c6, + 0xc2e6, 0x1cfc, 0xb6c8, 0x2482, + 0x3402, 0xc34e, 0x4608, 0xc00e, + 0xdc08, 0xc24f, 0xbc61, 0x0c01, + 0x1031, 0x781d, 0x2044, 0x004d, + 0x40a1, 0x0ad6, 0xff2f, 0xc151, + 0xc00e, 0x42c3, 0x9003, 0xe0c4, + 0x70b5, 0xb8c3, 0xb200, 0xd9f0, + 0x208a, 0x0c07, 0x20ca, 0x0041, + 0xc10e, 0xba8b, 0xc39a, 0x7904, + 0xb220, 0xc09a, 0x702c, 0x0986, + 0xffef, 0xda28, 0xc00e, 0x714c, + 0xc346, 0x780f, 0x20b8, 0x0fc3, + 0xc06d, 0xc00e, 0x2644, 0x17c3, + 0x4338, 0xc04c, 0x40c3, 0x0000, + 0x0f00, 0xc04b, 0xd820, 0xc053, + 0xc04a, 0xc011, 0xc10f, 0x1c24, + 0x3003, 0xc048, 0x14b0, 0x3000, + 0x1c1c, 0x3103, 0xc643, 0xc050, + 0xc045, 0x14ac, 0x3000, 0xc052, + 0xc044, 0xc00f, 0x1c25, 0x3002, + 0x4040, 0xeb06, 0xbb61, 0x0b01, + 0x0031, 0x781b, 0x205a, 0x0100, + 0x200f, 0x0380, 0x0b6a, 0x0020, + 0x780f, 0xc00f, 0xb863, 0x0849, + 0x0135, 0xc058, 0x41c3, 0x9008, + 0x01ea, 0x9100, 0x2004, 0x0f82, + 0x0000, 0xc0ff, 0x2004, 0x0f80, + 0x0000, 0x3f00, 0x2080, 0x0010, + 0x7845, 0xb100, 0x1e00, 0x76c4, + 0x900f, 0xe022, 0x1600, 0x7100, + 0x9004, 0x00ee, 0x2050, 0x0141, + 0xb885, 0x1e00, 0x7044, 0x9005, + 0xe0ee, 0xc053, 0x14a8, 0x3000, + 0xc059, 0x14a4, 0x3000, 0xc057, + 0x0b26, 0x0060, 0xc083, 0x40c3, + 0x8000, 0x07cb, 0x8861, 0x1600, + 0x7081, 0x8000, 0x07cb, 0xf00f, + 0xc212, 0x7a34, 0x9200, 0xe88a, + 0xc09a, 0x20f5, 0x0040, 0x080d, + 0x0833, 0x2042, 0x0800, 0xb200, + 0x7124, 0x0be9, 0x8045, 0xc00f, + 0x7704, 0x0837, 0x00b5, 0x70b5, + 0x46cb, 0x0000, 0xf0f0, 0x47cb, + 0x0000, 0xffff, 0x43c1, 0x73fd, + 0xd80f, 0x41c1, 0x42c1, 0x0e46, + 0xfeef, 0xc340, 0x70b5, 0x77dd, + 0xd808, 0x41c1, 0x42c1, 0x43e1, + 0x0e32, 0xfeef, 0xc740, 0x1600, + 0x7100, 0x9008, 0x01e0, 0x218a, + 0x0ffd, 0xc056, 0x7824, 0xc054, + 0x40c3, 0x900e, 0x004c, 0x1600, + 0x711b, 0x900e, 0x004c, 0x90a1, + 0xc00f, 0xe88a, 0x40c3, 0x0000, + 0xffff, 0x4100, 0x4200, 0x0e42, + 0xfeef, 0x4300, 0xd80f, 0x09c2, + 0xfe6f, 0xd90f, 0x40c3, 0x0000, + 0x0f00, 0x0eaa, 0xfeef, 0x702c, + 0xc014, 0x1e00, 0x7004, 0x9009, + 0xe1e0, 0xc10e, 0x08f6, 0x00e0, + 0x730c, 0x70ed, 0x46cb, 0x9003, + 0xe03e, 0xd820, 0x712c, 0xb6e0, + 0x1e9b, 0x13dc, 0x1e9c, 0x13dc, + 0x1ee6, 0x9fc5, 0x1ef2, 0x9045, + 0x0b2a, 0xfe2f, 0x1ee4, 0x9045, + 0xc083, 0x4163, 0x0aee, 0x00a0, + 0x42a1, 0xd80f, 0x096a, 0xfe6f, + 0xd90f, 0xc016, 0x712c, 0x1e00, + 0x7004, 0x9009, 0xe1e0, 0xb6e0, + 0xc011, 0x0b02, 0xfe2f, 0x7810, + 0xc010, 0x712c, 0x0a36, 0x00a0, + 0x704c, 0xc083, 0x4163, 0x0abe, + 0x00a0, 0x42a1, 0xd80f, 0x093a, + 0xfe6f, 0xd90f, 0xc00f, 0xe806, + 0x0d82, 0xfe2f, 0x700c, 0xf00a, + 0x40c3, 0x0000, 0xfffe, 0x6821, + 0x4220, 0x0d96, 0xfeef, 0x4320, + 0x40c3, 0x900e, 0x004c, 0x1600, + 0x711b, 0x900e, 0x004c, 0x9001, + 0xc055, 0xc014, 0x1e00, 0x7004, + 0x9009, 0xe1e0, 0xc10e, 0x084e, + 0x00e0, 0xd807, 0x40c3, 0x0000, + 0x0f00, 0x0de2, 0xfeef, 0x702c, + 0x70ed, 0x712c, 0xd820, 0xb6e0, + 0x1e9b, 0x13dc, 0x1e9c, 0x13dc, + 0x1ee6, 0x9fc5, 0x4528, 0x1ef2, + 0x9044, 0x0a7a, 0xfe2f, 0x1ee4, + 0x9044, 0xc215, 0xc083, 0x0a3e, + 0x00a0, 0x4163, 0xc016, 0xd90f, + 0x1e00, 0x7004, 0x9009, 0xe1e0, + 0xd80f, 0x08ae, 0xfe6f, 0xb6e0, + 0xc10e, 0x0ffa, 0x00a0, 0xd807, + 0x40c3, 0x0000, 0xff00, 0x1ee6, + 0x9004, 0x1ef2, 0x9344, 0x1ee4, + 0x9344, 0xc011, 0x712c, 0x7104, + 0x0a32, 0xfe2f, 0x7810, 0xc215, + 0xc083, 0x09fa, 0x00a0, 0x4163, + 0xc00f, 0xe805, 0x0cc6, 0xfe2f, + 0x710c, 0xc09a, 0x712c, 0x0956, + 0x00a0, 0x704c, 0x40c3, 0x8000, + 0x07cb, 0x1600, 0x708e, 0x8000, + 0x07cb, 0x8861, 0x41c1, 0xf00f, + 0xc210, 0x7a34, 0x9200, 0x0815, + 0x07f1, 0xc09a, 0x20f5, 0x0040, + 0x0809, 0x0833, 0xe020, 0xb200, + 0x7124, 0x0be9, 0x8045, 0xc017, + 0x087f, 0x0030, 0x740c, 0x41c3, + 0x0036, 0x0000, 0x08c2, 0xfe4f, + 0x1600, 0x708e, 0x8000, 0x07cb, + 0xf033, 0xc012, 0x7ddd, 0x20f5, + 0x0382, 0xc010, 0x20f5, 0x0383, + 0x6358, 0x781c, 0x2048, 0x0000, + 0x2049, 0x07c1, 0xc019, 0x78d4, + 0xb020, 0x4b50, 0x2048, 0x000f, + 0xc017, 0xe813, 0xc012, 0x4363, + 0x20f5, 0x0382, 0xc010, 0x20f5, + 0x0380, 0xc142, 0xc240, 0xc041, + 0x740c, 0x41c3, 0x0038, 0x0005, + 0x086e, 0xfe6f, 0x42a1, 0x78ee, 0xc040, 0x207f, 0x0140, 0x41c3, - 0x0039, 0x0003, 0x0cf2, 0xfdef, - 0x42c1, 0x71a5, 0x8f01, 0x0877, - 0x0362, 0x778d, 0xc010, 0x7ebc, - 0x20f5, 0x0342, 0xc011, 0x20f5, - 0x0343, 0x6359, 0x202f, 0x8041, - 0x203f, 0x030c, 0x207f, 0x07c0, - 0x7884, 0x4408, 0x24c5, 0x1064, - 0x7014, 0x700c, 0x20ca, 0x07e2, - 0x793d, 0x7095, 0x711c, 0xc014, - 0x78b4, 0xb020, 0x4b50, 0x2048, - 0x001b, 0x2544, 0x1040, 0xc04e, - 0xc013, 0xe84d, 0xc010, 0x20f5, - 0x0342, 0xc011, 0x20f5, 0x0343, - 0x782e, 0xc042, 0xc341, 0xc30e, - 0xc240, 0x740c, 0x41c3, 0x0038, - 0x0005, 0x0a42, 0xfe6f, 0x42c1, - 0xf1b9, 0xc00f, 0xe818, 0x42c3, - 0x9008, 0x01ea, 0x9200, 0x2004, - 0x0f81, 0x0000, 0xc0ff, 0x2004, - 0x0f80, 0x0000, 0x3f00, 0x70c3, + 0x0039, 0x0003, 0x42a1, 0x0806, + 0xfdef, 0x4363, 0x71c5, 0x40c3, + 0x8000, 0x07cb, 0x8801, 0x0897, + 0x83a5, 0x2644, 0x105b, 0xc018, + 0x0833, 0x0115, 0x42c3, 0x9008, + 0x01ea, 0x9200, 0x2004, 0x0f81, + 0x0000, 0xc0ff, 0x2004, 0x0f80, + 0x0000, 0x3f00, 0x2000, 0x0f80, 0x0000, 0xfc00, 0x7825, 0xb200, - 0xc012, 0xb885, 0x1e00, 0x7004, - 0x9005, 0xe0ee, 0x1e00, 0x7005, - 0x9003, 0xe004, 0xc0bf, 0x1404, + 0xc013, 0x1e00, 0x7004, 0x9005, + 0xe0ee, 0x1e00, 0x7005, 0x9003, + 0xe004, 0x2480, 0x3402, 0x1404, 0x341b, 0xc6c6, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1ac, 0xc247, 0x4728, - 0x0bca, 0xfeef, 0xc046, 0xc048, - 0x0d8a, 0xfeef, 0x40e1, 0x0916, - 0xfe6f, 0x4608, 0x4508, 0xee97, - 0x700c, 0x0cde, 0xfe2f, 0x702c, - 0x1200, 0x3083, 0xd981, 0x700c, + 0xb6c8, 0xc1ad, 0xc248, 0x4338, + 0x0a82, 0xfeef, 0xc046, 0x2342, + 0x30cd, 0xc047, 0x0f12, 0xfe2f, + 0x7eaf, 0x0d33, 0x1134, 0xc044, + 0x700c, 0x084a, 0xfe2f, 0x702c, + 0x1200, 0x3083, 0x700c, 0xd981, 0xc043, 0xc042, 0xc041, 0xc040, - 0xd8ff, 0xb98e, 0x0db2, 0xfe2f, - 0x754c, 0x700c, 0x0d66, 0xfe2f, - 0x712c, 0x0c02, 0xfecf, 0x0c0a, - 0xfeef, 0x4318, 0xc04b, 0x40c3, - 0x0000, 0x2710, 0x0c06, 0xffaf, - 0xd908, 0xc04a, 0xc008, 0x6f49, - 0x794f, 0x2314, 0x301b, 0x2342, - 0x3400, 0x2044, 0x004c, 0x710c, - 0x203c, 0x0043, 0x2678, 0x1001, - 0x796b, 0x218a, 0x0fc7, 0x20ca, - 0x0222, 0x79c0, 0xee05, 0x2578, - 0x1003, 0xeb83, 0xb882, 0x2300, - 0x331b, 0x43c3, 0x900f, 0xe0c0, - 0x1c10, 0x36c0, 0xb300, 0xb325, - 0x772c, 0x70b5, 0x43db, 0xffff, - 0xf0f0, 0x4020, 0x707f, 0x23ca, - 0x3042, 0xc145, 0x0a2d, 0x00b5, - 0xc045, 0x45cb, 0x0000, 0xf0f0, - 0x232f, 0x06c8, 0xd80f, 0x41a1, - 0x42a1, 0x0c0a, 0xfeef, 0xc340, - 0xc005, 0x41a1, 0x42a1, 0x7b10, - 0xd808, 0x0bfa, 0xfeef, 0xc340, - 0xc004, 0xe030, 0xc049, 0x274f, - 0x1040, 0x0899, 0x00b1, 0x70d5, - 0xc006, 0xb8c1, 0xc044, 0xc008, - 0x790f, 0x0bfe, 0xfe2f, 0x700c, - 0x702c, 0x70ad, 0x716f, 0xd8ff, - 0xb98e, 0x754c, 0x746c, 0xc543, - 0xc542, 0x1c04, 0x36c0, 0x0cd2, - 0xfe2f, 0xc540, 0xc006, 0x702c, - 0x704c, 0x7f0f, 0xd8ff, 0x746c, - 0xc543, 0xc742, 0xc541, 0x0cba, - 0xfe2f, 0xc540, 0xc004, 0xc543, - 0x702c, 0xc042, 0xd8ff, 0x764c, - 0x746c, 0x1c04, 0x36c0, 0x0ca2, - 0xfe2f, 0xc540, 0xc004, 0xc543, - 0x702c, 0xc042, 0x750c, 0xc041, - 0xd8ff, 0x764c, 0xdb28, 0x0c8a, - 0xfe2f, 0xc540, 0x0ed2, 0xfe2f, - 0xd8ff, 0xc007, 0x46cb, 0x0000, - 0x3100, 0xe818, 0x1c0c, 0x3441, - 0xc541, 0xc540, 0xc742, 0xf099, - 0x02e8, 0x0021, 0x71f5, 0x1600, - 0x7080, 0x8000, 0x0013, 0x7014, - 0x045a, 0x0021, 0x4608, 0x269a, - 0x1004, 0x0481, 0x0020, 0x7e05, - 0xd8ff, 0x41c1, 0x724c, 0x746c, - 0xc543, 0xc541, 0xc540, 0x0c3a, - 0xfe2f, 0xc742, 0xd8ff, 0x702c, - 0x704c, 0x746c, 0xc543, 0xc742, - 0x1c04, 0x36c0, 0x0c22, 0xfe2f, - 0xc540, 0xd8ff, 0x41c1, 0x724c, - 0xdb08, 0x1c0c, 0x36c0, 0xc742, - 0xc541, 0x0c0e, 0xfe2f, 0xc540, - 0xd8ff, 0x41c1, 0x724c, 0x746c, - 0xc543, 0xc742, 0xc541, 0x0bfa, - 0xfe2f, 0xc540, 0x750c, 0xc543, - 0xc742, 0xc041, 0xd8ff, 0x702c, - 0x704c, 0x746c, 0x0be2, 0xfe2f, - 0xc540, 0xd8ff, 0x41c1, 0x724c, - 0xdb08, 0x1c0c, 0x36c0, 0xc742, - 0xc541, 0x0bce, 0xfe2f, 0xc540, - 0x750c, 0xc043, 0xd8ff, 0x41c1, - 0x724c, 0xdb08, 0xc742, 0xc541, - 0x0bb6, 0xfe2f, 0xc540, 0xd8ff, + 0xd8ff, 0xb98e, 0x0906, 0xfe2f, + 0x754c, 0x700c, 0x08be, 0xfe2f, + 0x712c, 0x730c, 0x203c, 0x0381, + 0x2342, 0x304e, 0x78cf, 0x71ed, + 0x273c, 0x1000, 0x2004, 0x8040, + 0xc045, 0xc004, 0x27ca, 0x1222, + 0x7014, 0x40e1, 0x20cf, 0x00a2, + 0x74b5, 0x0a92, 0xfeef, 0x27ca, + 0x1005, 0x0a96, 0xfeef, 0xc04a, + 0xc04c, 0x40c3, 0x0000, 0x2710, + 0x0a4e, 0xffaf, 0xd908, 0x41c3, + 0x900f, 0xe0c0, 0xc04b, 0xb1e0, + 0xc005, 0x47cb, 0xffff, 0xf0f0, + 0x7014, 0x208a, 0x0fc7, 0x78c0, + 0xb105, 0xc004, 0x772c, 0xc145, + 0x7014, 0x4020, 0x70fd, 0xc045, + 0x773c, 0xc10a, 0xc007, 0x2114, + 0x0000, 0x48e8, 0xb9c0, 0x6038, + 0x0e2b, 0x10b5, 0xc044, 0x46cb, + 0x0000, 0xf0f0, 0x7bf0, 0xd80f, + 0x41c1, 0x42c1, 0x0aae, 0xfeef, + 0xc340, 0xc005, 0x41c1, 0x42c1, + 0x7b10, 0xd808, 0x0a9e, 0xfeef, + 0xc340, 0xc004, 0xe030, 0xc049, + 0x234f, 0x3040, 0x0891, 0x00b1, + 0x73b5, 0xc006, 0xb8c1, 0xc044, + 0xc007, 0x790f, 0x0f5e, 0xfdef, + 0x700c, 0x702c, 0x70ad, 0x716f, + 0xd8ff, 0xb98e, 0x754c, 0x746c, + 0xc543, 0xc542, 0x1c04, 0x36c0, + 0x081a, 0xfe2f, 0xc540, 0xd8ff, + 0x702c, 0x754c, 0x746c, 0xc543, + 0xc542, 0xc541, 0x0806, 0xfe2f, + 0xc540, 0x0a4e, 0xfe2f, 0xd8ff, + 0xc006, 0x702c, 0x704c, 0x7f0f, + 0xd8ff, 0x746c, 0xc543, 0xc742, + 0xc541, 0x0fea, 0xfdef, 0xc540, + 0xd8ff, 0x702c, 0x754c, 0xdb28, + 0xc543, 0xc542, 0xc541, 0x0fd6, + 0xfdef, 0xc540, 0xc008, 0x46cb, + 0x0000, 0x3100, 0x7014, 0x021c, + 0x0021, 0xd8ff, 0x02e5, 0x0020, + 0x1c0c, 0x3441, 0x0464, 0x000d, + 0x1600, 0x7080, 0x8000, 0x0013, + 0x7014, 0x05e0, 0x0021, 0x7377, + 0x4608, 0x269a, 0x1004, 0x7e05, + 0xd80f, 0x41c1, 0x42c1, 0x43c1, + 0x09da, 0xfeef, 0xc640, 0xc007, + 0x790f, 0x700c, 0x0eae, 0xfdef, + 0x4338, 0xc006, 0x702c, 0x734c, + 0x7f0f, 0xd820, 0xc041, 0x78cf, + 0xc040, 0xd8ff, 0xdb22, 0x4528, + 0xc143, 0x0f6a, 0xfdef, 0xc742, + 0xd828, 0xc041, 0xd808, 0xb861, + 0x08ff, 0x8031, 0x7edd, 0x702c, + 0xd8ff, 0x734c, 0xdb22, 0xc543, + 0xc742, 0x0f4a, 0xfdef, 0xc640, + 0xd8ff, 0x702c, 0x734c, 0xdb22, + 0xc543, 0xc742, 0x1c04, 0x33c1, + 0x0f32, 0xfdef, 0xc540, 0xd8ff, + 0xd980, 0x734c, 0xdb22, 0xc543, + 0xc742, 0x1c04, 0x3501, 0x0f1e, + 0xfdef, 0xc540, 0x700c, 0x0ed6, + 0xfdef, 0x712c, 0x1600, 0x7080, + 0x8000, 0x0004, 0xdae0, 0x201a, + 0x0f81, 0x0020, 0x0000, 0x40c3, + 0x9008, 0x0100, 0x2105, 0x0003, + 0x7204, 0x7825, 0xb340, 0x1800, + 0x0485, 0x700c, 0x0e0e, 0xfdef, + 0x4163, 0x092e, 0xfe2f, 0xd8ff, + 0x70cd, 0xbe90, 0xf967, 0xc543, + 0xc742, 0xc541, 0x0ece, 0xfdef, + 0xc540, 0xd8ff, 0x41c3, 0x0000, + 0x2000, 0x734c, 0x746c, 0xc543, + 0xc742, 0xc541, 0xc540, 0x0eb6, + 0xfdef, 0x4338, 0xf95d, 0xc543, + 0xc742, 0xc541, 0x0ea6, 0xfdef, + 0xc540, 0xf957, 0xc543, 0xc742, + 0xc541, 0x0e9a, 0xfdef, 0xc540, + 0xf956, 0xc543, 0xc742, 0xc541, + 0x0e8a, 0xfdef, 0xc540, 0xf950, + 0xc543, 0xc742, 0xc541, 0x0e7e, + 0xfdef, 0xc540, 0xf94f, 0xc543, + 0xc742, 0xc541, 0x0e6e, 0xfdef, + 0xc540, 0xf949, 0xc543, 0xc742, + 0xc541, 0x0e62, 0xfdef, 0xc540, + 0xf948, 0xc543, 0xc742, 0xc541, + 0x0e52, 0xfdef, 0xc540, 0xf942, + 0xc543, 0xc742, 0xc541, 0x0e46, + 0xfdef, 0xc540, 0xf941, 0xc543, + 0xc742, 0xc541, 0x0e36, 0xfdef, + 0xc540, 0xf93b, 0xc543, 0xc742, + 0xc541, 0x0e2a, 0xfdef, 0xc540, + 0xf93a, 0xc543, 0xc742, 0xc541, + 0x0e1a, 0xfdef, 0xc540, 0xf934, + 0xc543, 0xc742, 0xc541, 0x0e0e, + 0xfdef, 0xc540, 0xf933, 0xc543, + 0xc742, 0xc541, 0x0dfe, 0xfdef, + 0xc540, 0xf92d, 0xc543, 0xc742, + 0xc541, 0x0df2, 0xfdef, 0xc540, + 0xc009, 0x702c, 0x754c, 0x7b0f, + 0xd8ff, 0xc543, 0xc542, 0xc541, + 0x0dda, 0xfdef, 0xc540, 0x79dd, + 0xd8ff, 0x754c, 0x746c, 0x1c0c, + 0x3081, 0xc542, 0xc541, 0x0dc6, + 0xfdef, 0xc540, 0xc543, 0xc542, + 0xc541, 0xc540, 0xd8ff, 0x702c, + 0x754c, 0x023b, 0x0020, 0xdb30, 0x41c1, 0x724c, 0x746c, 0xc543, - 0xc742, 0xc541, 0x0ba2, 0xfe2f, - 0xc540, 0xc004, 0xc543, 0x702c, - 0xc042, 0xd8ff, 0x764c, 0x746c, - 0x1c04, 0x36c0, 0x0b8a, 0xfe2f, - 0xc540, 0xd8ff, 0x41c1, 0x724c, - 0xdb08, 0x1c0c, 0x36c0, 0xc742, - 0xc541, 0x0b76, 0xfe2f, 0xc540, - 0xd8ff, 0x41c1, 0x724c, 0x746c, - 0xc543, 0xc742, 0xc541, 0x0b62, - 0xfe2f, 0xc540, 0xc004, 0xc543, - 0x702c, 0xc042, 0x750c, 0xc041, - 0xd8ff, 0x764c, 0x746c, 0x0b4a, - 0xfe2f, 0xc540, 0x1c0c, 0x36c0, - 0xc742, 0xc541, 0xc540, 0xd8ff, - 0x41c1, 0x724c, 0x0b32, 0xfe2f, - 0xdb08, 0xc009, 0x70ad, 0x702c, - 0x780f, 0xc045, 0xc305, 0xd8ff, - 0x754c, 0xc543, 0xc542, 0xc541, - 0x0b16, 0xfe2f, 0xc540, 0xc007, - 0x46cb, 0x0000, 0x2100, 0x716f, - 0x7014, 0xf28a, 0xd8ff, 0x41c1, - 0x714c, 0x746c, 0xc543, 0xc541, - 0xc540, 0x0af6, 0xfe2f, 0xc742, - 0xd8ff, 0x702c, 0x704c, 0x746c, - 0xc543, 0xc742, 0x1c04, 0x36c0, - 0x0ade, 0xfe2f, 0xc540, 0xd8ff, - 0x41c1, 0x714c, 0xdb08, 0x1c0c, - 0x36c0, 0xc742, 0xc541, 0x0aca, - 0xfe2f, 0xc540, 0xd8ff, 0x41c1, - 0x714c, 0x746c, 0xc543, 0xc742, - 0xc541, 0x0ab6, 0xfe2f, 0xc540, - 0x750c, 0xc543, 0xc742, 0xc041, - 0xd8ff, 0x702c, 0x704c, 0x746c, - 0x0a9e, 0xfe2f, 0xc540, 0xd8ff, - 0x41c1, 0x714c, 0xdb08, 0x1c0c, - 0x36c0, 0xc742, 0xc541, 0x0a8a, - 0xfe2f, 0xc540, 0x750c, 0xc043, - 0xd8ff, 0x41c1, 0x714c, 0xdb08, - 0xc742, 0xc541, 0x0a72, 0xfe2f, - 0xc540, 0xd8ff, 0x41c1, 0x714c, - 0x746c, 0xc543, 0xc742, 0xc541, - 0x0a5e, 0xfe2f, 0xc540, 0xc004, - 0xc543, 0x702c, 0xc042, 0xd8ff, - 0x764c, 0x746c, 0x1c04, 0x36c0, - 0x0a46, 0xfe2f, 0xc540, 0xd8ff, - 0x41c1, 0x714c, 0xdb08, 0x1c0c, - 0x36c0, 0xc742, 0xc541, 0x0a32, - 0xfe2f, 0xc540, 0xd8ff, 0x41c1, - 0x714c, 0x746c, 0xc543, 0xc742, - 0xc541, 0x0a1e, 0xfe2f, 0xc540, - 0xc004, 0xc543, 0x702c, 0xc042, - 0x750c, 0xc041, 0xd8ff, 0x764c, - 0x746c, 0x0a06, 0xfe2f, 0xc540, + 0xc742, 0xc541, 0x0d9e, 0xfdef, + 0xc540, 0xf912, 0xc543, 0xc742, + 0x1c04, 0x36c0, 0x0d8e, 0xfdef, + 0xc540, 0xf90b, 0x1c0c, 0x36c0, + 0xc742, 0xc541, 0x0d7e, 0xfdef, + 0xc540, 0xf904, 0xc543, 0xc742, + 0xc541, 0x0d72, 0xfdef, 0xc540, + 0x750c, 0xc041, 0xf905, 0xc543, + 0xc742, 0x0d62, 0xfdef, 0xc540, + 0xf8ff, 0x1c0c, 0x36c0, 0xc742, + 0xc541, 0x0d52, 0xfdef, 0xc540, + 0x750c, 0xc043, 0xf8fa, 0xc742, + 0xc541, 0x0d42, 0xfdef, 0xc540, + 0xf8f4, 0xc543, 0xc742, 0xc541, + 0x0d32, 0xfdef, 0xc540, 0xc004, + 0x702c, 0x764c, 0xc042, 0xd8ff, + 0x746c, 0xc543, 0x1c04, 0x36c0, + 0x0d1a, 0xfdef, 0xc540, 0xf8ee, 0x1c0c, 0x36c0, 0xc742, 0xc541, - 0xc540, 0xf007, 0x1c0c, 0x3441, - 0xc541, 0xc540, 0xc742, 0xd8ff, - 0x41c1, 0x714c, 0x09e2, 0xfe2f, - 0xdb08, 0xc305, 0x702c, 0xd8ff, - 0x754c, 0xc143, 0xc142, 0xc141, - 0x4528, 0x09ce, 0xfe2f, 0xc140, - 0x702c, 0xd8ff, 0xb98f, 0x754c, - 0x746c, 0x1c0c, 0x30c1, 0xc542, - 0xc541, 0x09b6, 0xfe2f, 0xc540, - 0xd8ff, 0x702c, 0x754c, 0xdb30, - 0xc543, 0xc542, 0xc541, 0x09a2, - 0xfe2f, 0xc540, 0x0bce, 0xfe2f, - 0xd8ff, 0xc004, 0xc543, 0xd980, - 0xc042, 0xc541, 0x1c00, 0x3041, - 0xd8ff, 0x0423, 0x0020, 0x764c, - 0xc00a, 0x70ad, 0x791d, 0xc00b, - 0x70ed, 0x6038, 0xe008, 0x2044, - 0x0041, 0x6038, 0x040c, 0x0022, - 0xc047, 0xf064, 0x2779, 0x1000, - 0x7704, 0x7e10, 0x232f, 0x06c8, - 0xd80f, 0x41c1, 0x42c1, 0x083e, - 0xfeef, 0xc340, 0xc005, 0x41c1, - 0x42c1, 0x7b10, 0xd808, 0x082e, - 0xfeef, 0xc340, 0xc104, 0x084a, - 0xfe2f, 0x700c, 0x1200, 0x3083, - 0xd8ff, 0x702c, 0x754c, 0xc543, - 0xc542, 0xc541, 0x0922, 0xfe2f, + 0x0d0a, 0xfdef, 0xc540, 0xf8e7, + 0xc543, 0xc742, 0xc541, 0x0cfe, + 0xfdef, 0xc540, 0xc004, 0x702c, + 0x764c, 0xc042, 0x750c, 0xc041, + 0xd8ff, 0x746c, 0xc543, 0x0ce6, + 0xfdef, 0xc540, 0x1c0c, 0x36c0, + 0xf8df, 0xc742, 0xc541, 0x0cd6, + 0xfdef, 0xc540, 0xc009, 0x70ad, + 0x702c, 0x780f, 0xc045, 0xc305, + 0xd8ff, 0x754c, 0xc543, 0xc542, + 0xc541, 0x0cba, 0xfdef, 0xc540, + 0xc008, 0x46cb, 0x0000, 0x2100, + 0x7014, 0xf26d, 0x714c, 0xd8ff, + 0x41c1, 0x746c, 0x4358, 0xc543, + 0xc742, 0xc541, 0x0c96, 0xfdef, + 0xc540, 0xf8d0, 0xc543, 0xc742, + 0x1c04, 0x36c0, 0x0c86, 0xfdef, + 0xc540, 0xf8c3, 0x1c0c, 0x36c0, + 0xc742, 0xc541, 0x0c76, 0xfdef, + 0xc540, 0xf8ce, 0xc543, 0xc742, + 0xc541, 0x0c6a, 0xfdef, 0xc540, + 0x750c, 0xc041, 0xf8c3, 0xc543, + 0xc742, 0x0c5a, 0xfdef, 0xc540, + 0xf8b7, 0x1c0c, 0x36c0, 0xc742, + 0xc541, 0x0c4a, 0xfdef, 0xc540, + 0x750c, 0xc043, 0xf8b2, 0xc742, + 0xc541, 0x0c3a, 0xfdef, 0xc540, + 0xf8be, 0xc543, 0xc742, 0xc541, + 0x0c2a, 0xfdef, 0xc540, 0xc004, + 0x702c, 0x764c, 0xc042, 0xd8ff, + 0x746c, 0xc543, 0x1c04, 0x36c0, + 0x0c12, 0xfdef, 0xc540, 0xf8a6, + 0x1c0c, 0x36c0, 0xc742, 0xc541, + 0x0c02, 0xfdef, 0xc540, 0xf8b1, + 0xc543, 0xc742, 0xc541, 0x0bf6, + 0xfdef, 0xc540, 0xc004, 0x702c, + 0x764c, 0xc042, 0x750c, 0xc041, + 0xd8ff, 0x746c, 0xc543, 0x0bde, + 0xfdef, 0xc540, 0x1c0c, 0x36c0, + 0xf003, 0x1c0c, 0x3441, 0xf896, + 0xc742, 0xc541, 0x0bc6, 0xfdef, + 0xc540, 0xc305, 0xf88f, 0xc143, + 0xc142, 0xc141, 0x0bb6, 0xfdef, + 0xc140, 0xf886, 0x1c0c, 0x30c1, + 0xc542, 0xc541, 0x0ba6, 0xfdef, 0xc540, 0xd8ff, 0x702c, 0x754c, - 0x726c, 0xc543, 0xc542, 0xc541, - 0x090e, 0xfe2f, 0xc540, 0xc006, - 0x73cd, 0xc643, 0x780f, 0xc042, - 0xc044, 0xd8ff, 0x702c, 0x724c, - 0xdb08, 0xc541, 0x08f2, 0xfe2f, - 0x1c00, 0x3281, 0xc004, 0xc643, - 0x702c, 0xc042, 0xd8ff, 0x724c, - 0xdb08, 0xc541, 0x08da, 0xfe2f, - 0xc540, 0xd8ff, 0x702c, 0x724c, - 0xdb08, 0xc643, 0xc542, 0xc541, - 0x08c6, 0xfe2f, 0x1c00, 0x3281, - 0xc007, 0xd980, 0x754c, 0x7b0f, - 0xd8ff, 0xc543, 0xc542, 0xc541, - 0x08ae, 0xfe2f, 0xc540, 0x700c, - 0x0862, 0xfe2f, 0x712c, 0x71e5, - 0xc008, 0x780f, 0x0f39, 0x90b2, - 0xc044, 0xc104, 0x0fa2, 0xfdef, - 0x700c, 0x1200, 0x3083, 0xd8ff, - 0x702c, 0x754c, 0xc543, 0xc542, - 0xc541, 0x087e, 0xfe2f, 0xc540, - 0x0ac6, 0xfe2f, 0xd8ff, 0xd8ff, - 0x702c, 0x754c, 0x726c, 0xc543, - 0xc542, 0xc541, 0x0862, 0xfe2f, - 0xc540, 0xca08, 0x702c, 0x714c, - 0xc043, 0xc006, 0xdb08, 0x780f, - 0xc042, 0xd8ff, 0xc541, 0x084a, - 0xfe2f, 0xc540, 0xc009, 0x702c, - 0x754c, 0x2054, 0x0c80, 0x7b0f, - 0xd8ff, 0xc543, 0xc542, 0xc541, - 0x082e, 0xfe2f, 0xc540, 0x702c, - 0xd8ff, 0xb98f, 0x754c, 0x746c, - 0x1c0c, 0x3081, 0xc542, 0xc541, - 0x0816, 0xfe2f, 0xc540, 0xd8ff, - 0x702c, 0x754c, 0xdb50, 0xc543, - 0xc542, 0xc541, 0x0287, 0x0020, - 0xc540, 0x46cb, 0x0000, 0xaaaa, - 0x0f29, 0x10d0, 0x0f0d, 0x1151, - 0x46cb, 0x0000, 0xb2b2, 0xf00e, - 0x0f0f, 0x1131, 0x70cd, 0x46cb, - 0x0000, 0xcccc, 0xf006, 0x0f0d, - 0x1191, 0x46cb, 0x0000, 0x8282, - 0x232f, 0x3388, 0xd80f, 0x4163, - 0x4263, 0x4363, 0x0eae, 0xfeaf, - 0x1c00, 0x36c0, 0xc008, 0x780f, - 0xc044, 0xc104, 0x0ec2, 0xfdef, - 0x700c, 0xc006, 0x702c, 0xc143, - 0x7f0f, 0xd820, 0xc742, 0xc041, - 0x78cf, 0xc040, 0xd8ff, 0x734c, - 0xdb22, 0x0f96, 0xfdef, 0x4528, - 0xd828, 0xc543, 0xc742, 0xc041, - 0xd808, 0xb861, 0x08ff, 0x8031, - 0x232f, 0x36c2, 0xd8ff, 0x702c, - 0x734c, 0xdb22, 0x0f72, 0xfdef, - 0x1c00, 0x36c0, 0xd8ff, 0x702c, - 0x734c, 0xdb22, 0xc543, 0xc742, - 0x1c04, 0x33c1, 0x0f5a, 0xfdef, - 0xc540, 0xd8ff, 0xd980, 0x734c, - 0xdb22, 0xc543, 0xc742, 0x1c04, - 0x3501, 0x0f46, 0xfdef, 0xc540, - 0x700c, 0x0efa, 0xfdef, 0x712c, - 0x1600, 0x7080, 0x8000, 0x0004, - 0xdae0, 0x201a, 0x0f81, 0x0020, - 0x0000, 0x40c3, 0x9008, 0x0100, - 0x2105, 0x0003, 0x7204, 0x7825, - 0xb340, 0x1800, 0x0485, 0xc104, - 0x0e1e, 0xfdef, 0x700c, 0x095a, - 0xfe2f, 0xd8ff, 0x70cd, 0xbe90, + 0xdb30, 0xc543, 0xc542, 0xc541, + 0x0b92, 0xfdef, 0xc540, 0xc004, + 0x702c, 0x764c, 0xc042, 0xd8ff, + 0xdb2e, 0xc543, 0xc541, 0x1c00, + 0x3041, 0x0b7a, 0xfdcf, 0x0da6, + 0xfdef, 0xd8ff, 0xc543, 0xc542, + 0xc541, 0xc540, 0xd8ff, 0xd980, + 0x754c, 0x0b62, 0xfdef, 0x746c, + 0xc0ad, 0x1404, 0x341b, 0xc6c6, + 0x0bf9, 0xb071, 0x70cd, 0xc00b, + 0x791d, 0xc00c, 0x6119, 0x4918, + 0xb8c0, 0x6119, 0x4918, 0xc04a, + 0x700c, 0xf063, 0xc004, 0x45cb, + 0x0000, 0xffff, 0x7014, 0x7dc0, + 0xd80f, 0x41a1, 0x42a1, 0x0d6e, + 0xfeaf, 0xc340, 0xc005, 0x41a1, + 0x42a1, 0x7b10, 0xd808, 0x0d5e, + 0xfeaf, 0xc340, 0x700c, 0x0a36, + 0xfdef, 0x4163, 0x1200, 0x3083, + 0xf860, 0xc143, 0xc142, 0xc141, + 0x0afa, 0xfdef, 0xc140, 0xf85a, + 0xc543, 0xc542, 0xc541, 0x0aee, + 0xfdef, 0xc540, 0xc006, 0x736f, + 0x702c, 0x780f, 0xc048, 0xc042, + 0xd8ff, 0x724c, 0xdb08, 0x1c0c, + 0x36c0, 0xc541, 0x0ace, 0xfdef, + 0x1c00, 0x3281, 0xc008, 0x702c, + 0x724c, 0xc042, 0xd8ff, 0xdb08, + 0x1c0c, 0x36c0, 0xc541, 0x0ab6, + 0xfdef, 0xc540, 0xd8ff, 0x702c, + 0x724c, 0xdb08, 0x1c0c, 0x36c0, + 0xc542, 0xc541, 0x0a9e, 0xfdef, + 0x1c00, 0x3281, 0xc00a, 0xd980, + 0x754c, 0x7b0f, 0xd8ff, 0xc543, + 0xc542, 0xc541, 0x0a86, 0xfdef, + 0xc540, 0x700c, 0x0a3e, 0xfdef, + 0x712c, 0xc004, 0x7104, 0xc044, + 0xc007, 0x232f, 0x3007, 0xc004, + 0x0835, 0x80b4, 0x7bf0, 0x700c, + 0x098a, 0xfdef, 0x4163, 0x1200, + 0x3083, 0xd8ff, 0x702c, 0x754c, + 0xc643, 0xc642, 0xc641, 0x0a4e, + 0xfdef, 0xc640, 0x0c92, 0xfdef, + 0xd8ff, 0xf82d, 0xc643, 0xc642, + 0xc641, 0x0a3a, 0xfdef, 0xc640, + 0xca08, 0x702c, 0x714c, 0xc043, + 0xc006, 0xdb08, 0xc641, 0x780f, + 0xc042, 0xd8ff, 0x0a1e, 0xfdef, + 0xc640, 0xc009, 0x702c, 0x754c, + 0x2054, 0x0c80, 0x7b0f, 0xd8ff, + 0xc643, 0xc642, 0xc641, 0x0a06, + 0xfdef, 0xc640, 0xf819, 0x1c0c, + 0x3081, 0xc642, 0xc641, 0x09f6, + 0xfdef, 0xc640, 0xd8ff, 0x702c, + 0x754c, 0xdb50, 0xc643, 0xc642, + 0xc641, 0x09e2, 0xfdef, 0xc640, + 0x0c0a, 0xfdef, 0xd8ff, 0xc643, + 0xc642, 0xc641, 0xc640, 0xf134, + 0x46cb, 0x0000, 0xaaaa, 0xf214, + 0x0b17, 0x3190, 0x0b1b, 0x3150, + 0x0b1d, 0x3131, 0x70cd, 0x46cb, + 0x0000, 0xcccc, 0xf008, 0x46cb, + 0x0000, 0x8282, 0xf004, 0x46cb, + 0x0000, 0xb2b2, 0x0205, 0xffcf, + 0x702c, 0xd8ff, 0xb98f, 0x754c, + 0x746c, 0x7ee0, 0xd8ff, 0x702c, + 0x754c, 0x726c, 0x7ee0, 0x78e0, + 0x702c, 0xd8ff, 0x754c, 0x4528, + 0x7ee0, 0x78e0, 0xd8ff, 0x41c1, + 0x714c, 0xdb08, 0x7ee0, 0x78e0, + 0xd8ff, 0x41c1, 0x724c, 0x746c, + 0x7ee0, 0x78e0, 0xd8ff, 0x41c1, + 0x724c, 0xdb08, 0x7ee0, 0x78e0, + 0xd8ff, 0x702c, 0x704c, 0x746c, + 0x7ee0, 0x78e0, 0xd8ff, 0x4163, + 0x734c, 0x746c, 0x7ee0, 0x78e0, 0xd8ff, 0x41c1, 0x714c, 0x746c, - 0xc543, 0xc742, 0xc541, 0x0ef2, - 0xfdef, 0xc540, 0x41c3, 0x0000, - 0x2000, 0xd8ff, 0x734c, 0x746c, - 0xc543, 0xc742, 0xc541, 0xc540, - 0x0ed6, 0xfdef, 0x4338, 0xd8ff, - 0x41c1, 0x714c, 0x746c, 0xc543, - 0xc742, 0xc541, 0x0ec2, 0xfdef, - 0xc540, 0xd8ff, 0x4163, 0x734c, - 0x746c, 0xc543, 0xc742, 0xc541, - 0x0eae, 0xfdef, 0xc540, 0xd8ff, - 0x41c1, 0x714c, 0x746c, 0xc543, - 0xc742, 0xc541, 0x0e9a, 0xfdef, - 0xc540, 0xd8ff, 0x4163, 0x734c, - 0x746c, 0xc543, 0xc742, 0xc541, - 0x0e86, 0xfdef, 0xc540, 0xd8ff, - 0x41c1, 0x714c, 0x746c, 0xc543, - 0xc742, 0xc541, 0x0e72, 0xfdef, - 0xc540, 0xd8ff, 0x4163, 0x734c, - 0x746c, 0xc543, 0xc742, 0xc541, - 0x0e5e, 0xfdef, 0xc540, 0xd8ff, - 0x41c1, 0x714c, 0x746c, 0xc543, - 0xc742, 0xc541, 0x0e4a, 0xfdef, - 0xc540, 0xd8ff, 0x4163, 0x734c, - 0x746c, 0xc543, 0xc742, 0xc541, - 0x0e36, 0xfdef, 0xc540, 0xd8ff, - 0x41c1, 0x714c, 0x746c, 0xc543, - 0xc742, 0xc541, 0x0e22, 0xfdef, - 0xc540, 0xd8ff, 0x4163, 0x734c, - 0x746c, 0xc543, 0xc742, 0xc541, - 0x0e0e, 0xfdef, 0xc540, 0xd8ff, - 0x41c1, 0x714c, 0x746c, 0xc543, - 0xc742, 0xc541, 0x0dfa, 0xfdef, - 0xc540, 0xd8ff, 0x4163, 0x734c, - 0x746c, 0xc543, 0xc742, 0xc541, - 0x0de6, 0xfdef, 0xc540, 0xd8ff, - 0x41c1, 0x714c, 0x746c, 0xc543, - 0xc742, 0xc541, 0x0dd2, 0xfdef, - 0xc540, 0xd8ff, 0x4163, 0x734c, - 0x746c, 0xc543, 0xc742, 0xc541, - 0x0dbe, 0xfdef, 0xc540, 0xc009, - 0x702c, 0x754c, 0x7b0f, 0xd8ff, - 0xc543, 0xc542, 0xc541, 0x0daa, - 0xfdef, 0xc540, 0x79dd, 0xd8ff, - 0x754c, 0x746c, 0x1c0c, 0x3081, - 0xc542, 0xc541, 0x0d92, 0xfdef, - 0xc540, 0xc543, 0xc542, 0xc541, - 0xc540, 0xd8ff, 0x702c, 0x754c, - 0xdb30, 0x0d7e, 0xfdcf, 0x0fae, - 0xfdef, 0xd8ff, 0xd8ff, 0xd980, - 0x754c, 0xc543, 0xc542, 0xc541, - 0xc540, 0x0d66, 0xfdef, 0x746c, - 0xc0ac, 0x1404, 0x341b, 0xc6c6, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a7, - 0x4708, 0x1600, 0x7100, 0x9008, - 0x01e0, 0xc046, 0x2004, 0x0f80, - 0x0000, 0xff7f, 0xc045, 0x8f19, - 0xc044, 0x8f10, 0xc040, 0x1600, - 0x7100, 0x900e, 0x004c, 0xc041, - 0x40c3, 0x900e, 0x004c, 0x9001, - 0xc042, 0xc004, 0xe889, 0x40c3, - 0x0000, 0xffff, 0x4100, 0x4200, - 0x0c3e, 0xfeaf, 0x4300, 0xc000, - 0xd90f, 0x2078, 0x0100, 0xc043, - 0x0862, 0xfe2f, 0xd80f, 0x8708, - 0x70ad, 0x781b, 0x2005, 0x0f80, - 0x9003, 0xe064, 0x0c56, 0xffaf, - 0xb0a0, 0x8729, 0x0ab2, 0x00a0, - 0x710c, 0xc005, 0x712c, 0x4338, + 0x7ee0, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1a6, 0x4608, 0x1600, + 0x7100, 0x9008, 0x01e0, 0x218a, + 0x0ffd, 0xc045, 0x7824, 0xc042, + 0x1600, 0x7100, 0x900e, 0x004c, + 0xc040, 0x8e19, 0xc044, 0x8e10, + 0xc043, 0x40c3, 0x900e, 0x004c, + 0x9001, 0xc041, 0xc004, 0xe88a, + 0x40c3, 0x0000, 0xffff, 0x4100, + 0x4200, 0x0b76, 0xfeaf, 0x4300, + 0xd80f, 0x0ef6, 0xfdef, 0xd90f, + 0x8608, 0x70ad, 0x781b, 0x2005, + 0x0f80, 0x9003, 0xe064, 0x0b5e, + 0xffaf, 0xb0a0, 0x8629, 0x0e2e, + 0x0060, 0x710c, 0xc002, 0x712c, 0x1e00, 0x7004, 0x9009, 0xe1e0, 0x40c3, 0x9003, 0xe064, 0x2042, - 0x098e, 0xb6a0, 0xb6a0, 0x1e9c, - 0x135c, 0x1e9b, 0x135c, 0x1ee6, - 0x9fc5, 0x1ef2, 0x9044, 0x1ee4, - 0x9044, 0x0c6a, 0xfdef, 0x970e, - 0xc101, 0xc202, 0x0c9a, 0x0060, - 0x40e1, 0xd80f, 0x0ffe, 0xfdef, - 0xd90f, 0x8729, 0x0a62, 0x00a0, - 0x710c, 0xc006, 0x712c, 0x1e00, - 0x7004, 0x9009, 0xe1e0, 0xb6a0, - 0x1ee6, 0x9f84, 0x0000, 0xff00, - 0x1ef2, 0x96c4, 0x1ee4, 0x96c4, - 0x0c2a, 0xfdef, 0x970a, 0x8f58, - 0x8701, 0x0bc2, 0x0060, 0x712c, - 0xc101, 0xc202, 0x0c52, 0x0060, - 0x40e1, 0xd80f, 0x0fb6, 0xfdef, - 0xd90f, 0xc004, 0xe805, 0x0ed2, - 0xfdef, 0xc003, 0xf00f, 0xc100, - 0xc003, 0x2179, 0x0101, 0xb962, - 0xb862, 0x7a30, 0x41c3, 0x0000, - 0xffff, 0x7810, 0x0b5a, 0xfeaf, - 0x4320, 0x1600, 0x7100, 0x900e, - 0x004c, 0xc040, 0x40c3, 0x900e, - 0x004c, 0x9001, 0xc041, 0xc005, - 0x1e00, 0x7004, 0x9009, 0xe1e0, - 0xc003, 0x8729, 0x781b, 0x781b, - 0x7504, 0x09ce, 0x00a0, 0xc042, - 0x8708, 0x70ad, 0x781b, 0x2005, - 0x0f80, 0x9003, 0xe064, 0x0b56, - 0xffaf, 0xb0a0, 0x712c, 0xb6a0, - 0x1e9c, 0x135c, 0x1e9b, 0x135c, - 0x1ee6, 0x9fc5, 0x1ef2, 0x9044, - 0x1ee4, 0x9044, 0x970e, 0x0b86, - 0xfdef, 0x4338, 0xc100, 0xc201, - 0x0bb6, 0x0060, 0x40e1, 0xc006, - 0xd90f, 0x1e00, 0x7004, 0x9009, - 0xe1e0, 0xd80f, 0x0f0e, 0xfdef, - 0xb6a0, 0x8729, 0x0972, 0x00a0, - 0xc002, 0x1ee6, 0x9f84, 0x0000, - 0xff00, 0x1ef2, 0x96c4, 0x1ee4, - 0x96c4, 0x8705, 0x712c, 0x7104, - 0x0b42, 0xfdef, 0x7810, 0xc100, - 0xc201, 0x0b76, 0x0060, 0x40e1, - 0xc004, 0xe807, 0xc003, 0x0dfa, - 0xfdef, 0x2052, 0x0000, 0x8f58, - 0x8703, 0x0ac2, 0x0060, 0x712c, - 0xc0a7, 0x1404, 0x341b, 0xc6c6, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a8, - 0x4708, 0x88b0, 0x1600, 0x7100, - 0x9008, 0x01e0, 0x43db, 0x900e, - 0x004c, 0xc047, 0x2004, 0x0f80, - 0x0000, 0xff7f, 0xc045, 0x8f19, - 0xc043, 0x1300, 0x3100, 0xc040, - 0x1302, 0x3100, 0xc041, 0xc003, - 0xe889, 0x40c3, 0x0000, 0xffff, - 0x4100, 0x4200, 0x0a42, 0xfeaf, - 0x4300, 0x2578, 0x1100, 0xc042, - 0xd80f, 0x0e6a, 0xfdef, 0xd90f, - 0x8708, 0x2579, 0x1101, 0x0aba, - 0xfeaf, 0xc144, 0xc005, 0x1e00, - 0x7004, 0x9009, 0xe1e0, 0x8729, - 0x08b6, 0x00a0, 0x730c, 0x70ad, - 0x46cb, 0x9003, 0xe03e, 0xb6a0, - 0x1e9b, 0x135c, 0x1e9c, 0x135c, - 0x1ee6, 0x9fc5, 0x1ef2, 0x9045, - 0x1ee4, 0x9045, 0x970e, 0x0a7e, - 0xfdef, 0x712c, 0xc100, 0xc201, - 0x0aae, 0x0060, 0x40e1, 0xd80f, - 0x0e12, 0xfdef, 0xd90f, 0xc007, + 0x098f, 0xb7a0, 0xb7a0, 0x1f9c, + 0x135c, 0x1f9b, 0x135c, 0x1fe6, + 0x9f84, 0x0000, 0xffff, 0x1ff2, + 0x9044, 0x1fe4, 0x9044, 0x084e, + 0xfdef, 0x960e, 0xc100, 0xc201, + 0x0812, 0x0060, 0x40c1, 0xd80f, + 0x0e8e, 0xfdef, 0xd90f, 0x8629, + 0x0dda, 0x0060, 0x710c, 0xc005, 0x712c, 0x1e00, 0x7004, 0x9009, - 0xe1e0, 0xb6a0, 0x0a56, 0xfdef, - 0x970a, 0x8f58, 0x8702, 0x09ee, - 0x0060, 0x712c, 0xc100, 0xc201, - 0x0a7e, 0x0060, 0x40e1, 0xd80f, - 0x0de2, 0xfdef, 0xd90f, 0xc003, - 0xe807, 0xc002, 0x0cfa, 0xfdef, - 0x2052, 0x0000, 0xf00d, 0xc102, - 0xc004, 0xb962, 0xb862, 0x7a30, - 0x41c3, 0x0000, 0xffff, 0x7810, - 0x0986, 0xfeaf, 0x4320, 0x1300, - 0x3100, 0xc046, 0x1302, 0x3100, - 0xc040, 0xc005, 0x1e00, 0x7004, - 0x9009, 0xe1e0, 0xc004, 0x8729, - 0x781b, 0x781b, 0xe007, 0x0802, - 0x00a0, 0xc041, 0x8708, 0x09ea, - 0xfeaf, 0xc104, 0x70ad, 0x712c, - 0xb6a0, 0x1e9b, 0x135c, 0x1e9c, - 0x135c, 0x1ee6, 0x9fc5, 0x1ef2, - 0x9044, 0x1ee4, 0x9044, 0x970e, - 0x09c2, 0xfdef, 0x4338, 0xc106, - 0xc200, 0x09f6, 0x0060, 0x40e1, - 0xc007, 0xd90f, 0x1e00, 0x7004, - 0x9009, 0xe1e0, 0xd80f, 0x0d4e, - 0xfdef, 0xb6a0, 0x8729, 0x0fb2, - 0x0060, 0xc001, 0x40c3, 0x0000, - 0xff00, 0x1ee6, 0x9004, 0x1ef2, - 0x96c4, 0x1ee4, 0x96c4, 0x8705, - 0x712c, 0x7104, 0x097e, 0xfdef, - 0x7810, 0xc106, 0xc200, 0x09b2, - 0x0060, 0x40e1, 0xc003, 0xe805, - 0x0c36, 0xfdef, 0xc002, 0x8f58, - 0x8703, 0x0902, 0x0060, 0x712c, - 0xc0a8, 0x1404, 0x341b, 0xc6c6, - 0xc2e6, 0x1cfc, 0xb6c8, 0x2482, - 0x3703, 0xc140, 0x0d3a, 0xfdef, - 0x712c, 0x1600, 0x7083, 0x8000, - 0x0004, 0xc045, 0xc08a, 0xd9ff, - 0x0ae2, 0xff6f, 0xdab4, 0x40c3, - 0x0000, 0x2100, 0x1e00, 0x7004, - 0x9003, 0xe004, 0x1e00, 0x7005, - 0x9003, 0xfec4, 0xc000, 0x2079, - 0x0000, 0x4898, 0x205f, 0x0200, - 0xc147, 0xc044, 0x231a, 0x0f80, - 0x0010, 0x0000, 0xc304, 0xc049, - 0x710c, 0xc041, 0xf011, 0x4200, - 0x2144, 0x07c0, 0x219a, 0x0008, - 0xe805, 0xb861, 0x08ff, 0x8031, - 0x7a5b, 0x7164, 0x2105, 0x0f80, - 0x9003, 0xe0c4, 0xb040, 0xc007, - 0x796f, 0x09df, 0x8022, 0x710c, - 0x43db, 0x9002, 0x0064, 0xc001, - 0x780f, 0xe098, 0x01d8, 0x002d, - 0xc048, 0x1600, 0x7082, 0x8000, - 0x0771, 0xf013, 0xc005, 0x2055, - 0x08c1, 0xc009, 0x7905, 0x231a, - 0x0f80, 0x0000, 0x1000, 0x7825, - 0x781b, 0x2005, 0x0f81, 0x9002, - 0x1e00, 0xc008, 0xb100, 0x40c3, - 0x8000, 0x0771, 0x8801, 0x7b4f, - 0x08d5, 0x80e5, 0x7144, 0x40c3, - 0x0000, 0x2100, 0x1e00, 0x7004, - 0x9003, 0xe004, 0xc000, 0x41c3, - 0x9003, 0xf064, 0x7014, 0x40c3, - 0x9003, 0xfe64, 0x703c, 0x080e, - 0xffaf, 0x1800, 0x0005, 0x0806, - 0xff8f, 0xd80f, 0x0bf6, 0xfdef, - 0xd90f, 0x730c, 0x0e5a, 0x0060, - 0x218a, 0x0fc7, 0xde7f, 0x45cb, - 0x9003, 0xe024, 0x710c, 0xb5c0, - 0xc046, 0xb506, 0x208a, 0x03c4, - 0x1dfe, 0x9004, 0xd818, 0x0826, - 0xfdef, 0x712c, 0x208a, 0x0004, - 0x1e00, 0x7004, 0x9003, 0xe004, - 0xd80f, 0x0bba, 0xfdef, 0xd90f, - 0xd807, 0x080a, 0xfdef, 0x712c, - 0x710c, 0xb5c0, 0xb506, 0xd80f, - 0xd90f, 0x0ba2, 0xfdef, 0x1dfe, - 0x93c5, 0x730c, 0x0fee, 0xfdaf, - 0x712c, 0xd80f, 0x0b8e, 0xfdef, - 0xd90f, 0x700c, 0x712c, 0x1d00, - 0x1fc5, 0x1d0c, 0x1fc5, 0x0fd6, - 0xfdaf, 0x1dfe, 0x9005, 0x1600, - 0x709f, 0x8000, 0x0771, 0xf003, - 0x71e7, 0x40c3, 0x8000, 0x0771, - 0x8801, 0x252f, 0x17c7, 0x08c5, - 0x0364, 0x718d, 0x1400, 0x300b, - 0xc604, 0xf003, 0x71c5, 0xc007, - 0x7bcf, 0x0be1, 0x8023, 0x7474, - 0x255f, 0x1480, 0x23c0, 0x1061, - 0x24ca, 0x1061, 0xc043, 0xc103, - 0xc08a, 0x6119, 0x7974, 0x9100, - 0xe0c0, 0xf7ae, 0x251a, 0x1f80, - 0x0000, 0x1000, 0xc042, 0x4060, - 0x209a, 0x0004, 0xc202, 0x7845, - 0x7a1b, 0x2205, 0x06c2, 0x9200, - 0x7014, 0xc006, 0x24ca, 0x1021, - 0x20ca, 0x0021, 0xc046, 0xec0d, - 0x0b1f, 0x00d0, 0x0b17, 0x0211, - 0x9200, 0xb100, 0x2614, 0x7341, - 0x8000, 0x088d, 0xf023, 0x708d, - 0xf1ca, 0x0b95, 0x81d1, 0x202f, - 0x02c7, 0x2079, 0x0000, 0x205f, - 0x0102, 0xf00f, 0xc103, 0x7144, - 0x673f, 0x7f14, 0x209a, 0x0004, - 0xc102, 0x7825, 0x781b, 0x2005, - 0x06c0, 0x9000, 0xb700, 0x784f, - 0x0be5, 0x8025, 0xc78a, 0x0b61, - 0x81d1, 0x2614, 0x7341, 0x8000, - 0x088c, 0xc001, 0xa900, 0xf1a8, - 0xc008, 0x7104, 0xc041, 0xc006, - 0x7014, 0xf313, 0x1600, 0x709b, - 0x8000, 0x0771, 0x1600, 0x7080, - 0x8000, 0x0000, 0xc042, 0xf003, - 0x7167, 0x40c3, 0x8000, 0x0771, - 0x8801, 0x262f, 0x16c7, 0x085f, - 0x03a5, 0xc704, 0xf031, 0x265f, - 0x1481, 0x42c1, 0x43a1, 0x6038, - 0xc043, 0x20f4, 0x0340, 0x41c3, - 0x0047, 0x0002, 0x0d2a, 0xfd6f, - 0x20bc, 0x0001, 0xc005, 0x261a, - 0x1f81, 0x0000, 0x1000, 0x71e5, - 0x2054, 0x0d00, 0x7905, 0x40a1, - 0x209a, 0x0004, 0x7825, 0x781b, - 0x2005, 0x0f81, 0x9002, 0x0000, - 0xc003, 0x20f4, 0x0342, 0xc002, - 0xb8c3, 0x6058, 0x2049, 0x0fc0, - 0xb100, 0xc007, 0x7def, 0x0dab, - 0x9022, 0xc08a, 0xf1c6, 0xc000, - 0x0887, 0x0010, 0x092a, 0xfe4f, - 0x1600, 0x708f, 0x8000, 0x0771, - 0x4308, 0xf003, 0x71e5, 0x41c3, - 0x8000, 0x0771, 0x8921, 0x78ef, - 0x0967, 0x0024, 0xdd09, 0x41c3, - 0x8000, 0x088c, 0x7914, 0x8960, - 0x8941, 0x7270, 0x4a74, 0x4b51, - 0x21ca, 0x030d, 0x7e2f, 0x261a, - 0x12ce, 0xdc08, 0x25ca, 0x122d, - 0x24ca, 0x112d, 0x2e44, 0x180e, - 0xf016, 0xc205, 0x219a, 0x0004, - 0x2254, 0x0d03, 0x201a, 0x0f82, - 0x0000, 0x1000, 0x7a65, 0x7945, - 0x7b3b, 0xbb91, 0xbb9c, 0xbb9f, - 0x9340, 0x79d0, 0x6159, 0x2149, - 0x0fc1, 0xb320, 0x798f, 0x09d7, - 0x8364, 0x7185, 0xf1c8, 0x2480, - 0x3703, 0x1404, 0x341b, 0xc6c6, - 0xc2e6, 0x1cfc, 0xb6c8, 0x2482, - 0x3f0a, 0xc041, 0x1600, 0x7080, - 0x8000, 0x0004, 0xc14b, 0x71ad, + 0xe1e0, 0xb7a0, 0x1fe6, 0x9f84, + 0x0000, 0xff00, 0x1ff2, 0x9044, + 0x1fe4, 0x9044, 0x080e, 0xfdef, + 0x960a, 0x8e58, 0x8601, 0x0f46, + 0x0020, 0x712c, 0xc100, 0xc201, + 0x0fca, 0x0020, 0x40c1, 0xd80f, + 0x0e46, 0xfdef, 0xd90f, 0xc004, + 0xe807, 0xc003, 0x0a8e, 0xfdef, + 0x2078, 0x0100, 0xf011, 0xc003, + 0x41c3, 0x0000, 0xffff, 0x4220, + 0x7414, 0x40c3, 0x0000, 0xfffe, + 0x22ca, 0x0001, 0x20ca, 0x0041, + 0x0a8e, 0xfeaf, 0x4320, 0x1600, + 0x7100, 0x900e, 0x004c, 0xc040, + 0x40c3, 0x900e, 0x004c, 0x9001, + 0xc041, 0xc002, 0x1e00, 0x7004, + 0x9009, 0xe1e0, 0xc003, 0x8629, + 0x2078, 0x0100, 0x781b, 0x781b, + 0x7504, 0x0d3a, 0x0060, 0xc042, + 0x8608, 0x706f, 0x781b, 0x2005, + 0x0f80, 0x9003, 0xe064, 0x0a4e, + 0xffaf, 0x1800, 0x06c4, 0x712c, + 0x1f00, 0x16c4, 0x1f9c, 0x16dc, + 0x1f9b, 0x16dc, 0x1fe6, 0x9f84, + 0x0000, 0xffff, 0x1ff2, 0x9044, + 0x1fe4, 0x9044, 0x960e, 0x0f56, + 0xfdaf, 0x4528, 0xc100, 0xc201, + 0x0f1a, 0x0020, 0x40c1, 0xc005, + 0xd90f, 0x1e00, 0x7004, 0x9009, + 0xe1e0, 0xd80f, 0x0d8a, 0xfdef, + 0x1f00, 0x16c4, 0x8629, 0x0cd6, + 0x0060, 0xc002, 0x1fe6, 0x9f84, + 0x0000, 0xff00, 0x1ff2, 0x9344, + 0x1fe4, 0x9344, 0x8605, 0x712c, + 0x7104, 0x0f12, 0xfdaf, 0x7810, + 0xc100, 0xc201, 0x0ed6, 0x0020, + 0x40c1, 0xc004, 0xe806, 0xc003, + 0x09a2, 0xfdef, 0x2079, 0x0100, + 0x8e58, 0x8603, 0x0e2e, 0x0020, + 0x712c, 0xc0a6, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0x2482, 0x3c03, 0xc042, + 0x1600, 0x7080, 0x8000, 0x0004, + 0x4338, 0x41c3, 0x004a, 0x0000, 0x201a, 0x0f80, 0x0020, 0x0000, - 0x41c3, 0x004a, 0x0000, 0x2005, - 0x0f80, 0x9002, 0x0086, 0x9000, + 0x2005, 0x0f80, 0x9002, 0x0086, + 0x9000, 0x2084, 0x0001, 0x781d, 0x781d, 0x781d, 0x781d, 0x781d, - 0x781d, 0x781d, 0x0c12, 0xfd6f, - 0x2506, 0x1000, 0x0ffe, 0xfe6f, - 0x700c, 0x700c, 0x41c3, 0x9003, - 0xff68, 0x1e00, 0x7004, 0x9003, - 0xe004, 0xb100, 0xc001, 0x47cb, - 0x8000, 0x076e, 0x2044, 0x07ce, - 0x40a1, 0xee06, 0xbe61, 0x0e01, - 0x1031, 0x781b, 0x205a, 0x0102, - 0x1600, 0x7100, 0x9008, 0x01ea, - 0xc045, 0x11c3, 0x0700, 0xc044, - 0xca0a, 0x208c, 0x8fc3, 0xf40c, - 0x1600, 0x7080, 0x8000, 0x0000, - 0xb8e5, 0xd8ff, 0xf204, 0x8f00, - 0x1a0a, 0x3002, 0xc101, 0x220f, - 0x0041, 0xc14a, 0x8f20, 0x2139, - 0x0000, 0xf00f, 0xc004, 0xb885, - 0x1e00, 0x7004, 0x9005, 0xe0ee, - 0xc005, 0x1e00, 0x7004, 0x9008, - 0x01ea, 0xc002, 0x7104, 0xc042, - 0xc00b, 0x2079, 0x0000, 0x6822, - 0xc002, 0x7d0f, 0x71b1, 0x0328, - 0x002c, 0x70cd, 0xc005, 0x714c, - 0x2004, 0x0f81, 0x0000, 0xc0ff, - 0x2004, 0x0f80, 0x0000, 0x3f00, - 0x2080, 0x0010, 0x7825, 0x1e00, - 0x7004, 0x9008, 0x01ea, 0x1e00, - 0x7384, 0x900f, 0xe022, 0xc004, - 0x762c, 0xb8a5, 0x1e00, 0x7004, - 0x9005, 0xe0ee, 0xc00a, 0x0f02, - 0xff6f, 0x780f, 0xc001, 0x780f, - 0xed06, 0x0b52, 0xffef, 0x2578, - 0x1081, 0xf1c2, 0x088a, 0xfdef, - 0x712c, 0x1600, 0x709b, 0x8000, - 0x0004, 0x080a, 0xfdef, 0xc048, - 0xc047, 0x2400, 0x3f80, 0x0000, - 0x0260, 0x702c, 0x0e26, 0xff2f, - 0xda5a, 0x2400, 0x3f80, 0x0000, - 0x01ac, 0x0e1a, 0xff2f, 0xdab4, - 0x0d0e, 0xfeef, 0x208a, 0x0b04, - 0x0c9e, 0xfeef, 0xc046, 0x1e00, - 0x7384, 0x9003, 0xfec4, 0xc007, - 0x2079, 0x0000, 0xe008, 0xc043, - 0x231a, 0x3f80, 0x0010, 0x0000, - 0xc04c, 0xf012, 0x4200, 0x2144, - 0x07c0, 0x219a, 0x0008, 0xe806, - 0xb861, 0x0801, 0x0031, 0x7a5b, - 0x71c5, 0x2105, 0x0f80, 0x9003, - 0xe0c4, 0xb040, 0xc003, 0x79cf, - 0x09dd, 0x8022, 0x710c, 0x231a, - 0x3f9b, 0x0020, 0x0000, 0xd880, - 0xb88e, 0x1e00, 0x7004, 0x9003, - 0xe004, 0x2305, 0x3f80, 0x9002, - 0x0086, 0x9000, 0x2305, 0x3f81, - 0x9003, 0xfe86, 0xb8a0, 0x0eba, - 0xfe2f, 0xb100, 0xda3f, 0x208c, - 0x8ec3, 0xf604, 0x2844, 0x0102, - 0x8f23, 0xf011, 0xc008, 0x7124, - 0x2054, 0x0d0c, 0x231a, 0x0f80, - 0x0000, 0x1000, 0x7885, 0x781b, - 0x2005, 0x0f80, 0x9002, 0x1e00, - 0xb040, 0x8f04, 0x7b2f, 0x08e1, - 0x80e5, 0x208a, 0x07c4, 0x41c3, - 0x9003, 0xe024, 0xb100, 0x208a, - 0x03c4, 0x190c, 0x0045, 0x19fe, - 0x8004, 0x700c, 0xf003, 0xc009, - 0x7404, 0x8fc3, 0x780f, 0x082f, - 0x07f4, 0xc049, 0xf09d, 0xc008, - 0x71c5, 0x2055, 0x08c1, 0xc00c, - 0x7905, 0x221a, 0x0f80, 0x0000, - 0x1000, 0x7825, 0x781b, 0x2005, - 0x0f81, 0x9002, 0x1e00, 0xc009, - 0xb100, 0x8f04, 0x7acf, 0x08db, - 0x80a5, 0xd80f, 0x0ef6, 0xfdaf, - 0xd90f, 0x45cb, 0x9003, 0xfe64, - 0x0af2, 0xff6f, 0x1d00, 0x1005, - 0x0aea, 0xff4f, 0x0976, 0xff2f, - 0xc006, 0x710c, 0x0942, 0x0060, - 0x218a, 0x0fc7, 0xd820, 0x0b26, - 0xfdaf, 0x712c, 0xc091, 0x702c, - 0x0aba, 0x0020, 0x714c, 0xd80f, - 0x0eba, 0xfdaf, 0xd90f, 0xd87f, - 0x0aba, 0xff6f, 0xb500, 0x0ab6, - 0xff4f, 0x0942, 0xff2f, 0xc006, - 0x730c, 0x090e, 0x0060, 0x218a, - 0x0fc7, 0xd820, 0x0aee, 0xfdaf, - 0x712c, 0x2455, 0x3f80, 0x702c, - 0x0a82, 0x0020, 0x714c, 0x8f63, - 0x235f, 0x024c, 0xf006, 0xc007, - 0x7164, 0x7014, 0x24c0, 0x1061, - 0x8f04, 0x796f, 0x083b, 0x8064, - 0x4338, 0xc08e, 0x702c, 0x0c6e, - 0xff2f, 0xda09, 0x70cd, 0xf004, - 0x71c5, 0x7185, 0xc003, 0x7dcf, - 0x0dd7, 0x9023, 0x798f, 0xc091, - 0x20f5, 0x0040, 0xc040, 0x2455, - 0x3f80, 0x20f5, 0x005f, 0xc000, - 0x235f, 0x3241, 0x200e, 0x07c0, - 0x2048, 0x000b, 0xc08e, 0x60b8, - 0x1800, 0x02c2, 0x202f, 0x02c7, - 0xc04d, 0x2400, 0x3f80, 0x0000, - 0x0260, 0x6038, 0x60b8, 0x8840, - 0xc10d, 0x0ab9, 0x8043, 0x235f, - 0x3481, 0x1800, 0x02c2, 0x2400, - 0x3f80, 0x0000, 0x01ac, 0x6119, - 0xc000, 0x79b4, 0x70e3, 0x781d, - 0xb100, 0xf1cc, 0x71c5, 0x8f04, - 0x79cf, 0x4338, 0x7110, 0x700c, - 0x0066, 0x002e, 0xc040, 0x0521, - 0xffcf, 0x235f, 0x3241, 0x2400, - 0x3f80, 0x0000, 0x0260, 0x43a1, - 0x6038, 0x41c3, 0x003e, 0x0002, - 0x089e, 0xfd6f, 0x60a8, 0x41a1, - 0x231a, 0x3f80, 0x0000, 0x2000, - 0x219a, 0x0008, 0x235f, 0x3483, - 0x42c3, 0x9002, 0x0080, 0x7905, - 0x2400, 0x3f80, 0x0000, 0x01ac, - 0x631b, 0x23f4, 0x0340, 0x2105, - 0x008c, 0xb400, 0x2242, 0x0800, - 0x7905, 0x23f4, 0x0340, 0xb100, - 0xc000, 0x7104, 0xc040, 0xc000, - 0x7d0f, 0xc003, 0x0d9f, 0x9022, - 0x4263, 0xf1c2, 0x2480, 0x3f0a, - 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0xc2e6, 0x44cb, 0x8000, 0x0771, - 0x8cc0, 0x261f, 0x108f, 0xf003, - 0x71c5, 0x8c61, 0x0b31, 0x03a3, - 0x706d, 0xf01c, 0x261a, 0x1f9f, - 0x0000, 0x1000, 0x239a, 0x0004, - 0x2114, 0x034d, 0x2705, 0x301f, - 0x2305, 0x07c3, 0x7b7b, 0xbb91, - 0xbb9c, 0xbb9f, 0x9360, 0x71e5, - 0x7165, 0xb560, 0x232f, 0x02c7, - 0x0bd5, 0x80a2, 0x7df0, 0xf1e2, - 0xc6c6, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0x201a, 0x0f8e, 0x0000, - 0x2000, 0x4220, 0x7d5d, 0x7fbd, - 0x4100, 0x40c3, 0x9002, 0x00b2, - 0x2605, 0x1003, 0x708d, 0x2745, - 0x160d, 0x7204, 0xb380, 0xb3a0, - 0x2605, 0x100d, 0x7204, 0x95a0, - 0x78c5, 0xbac1, 0x90c0, 0xb380, - 0x6a83, 0x2444, 0x17c3, 0x40a1, + 0x781d, 0x0d42, 0xfd6f, 0x2052, + 0x0000, 0x0c6e, 0xfeaf, 0x700c, + 0x700c, 0x42c3, 0x9003, 0xff68, + 0x1e00, 0x7004, 0x9003, 0xe004, + 0xb200, 0xc002, 0x712c, 0x2044, + 0x07cd, 0x4020, 0xed05, 0xbd61, + 0x0dff, 0x9031, 0x781b, 0x205a, + 0x0103, 0x1600, 0x7100, 0x9008, + 0x01ea, 0xc043, 0x12c3, 0x0700, + 0xc04a, 0xca0a, 0x208c, 0x8fc3, + 0xf40e, 0x1600, 0x7080, 0x8000, + 0x0000, 0xb8e5, 0xd8ff, 0xf208, + 0x1600, 0x7080, 0x8000, 0x07ca, + 0x1a0a, 0x3002, 0xc102, 0x230f, + 0x0041, 0xc14b, 0x2379, 0x3001, + 0x7224, 0xc14c, 0x1600, 0x7081, + 0x8000, 0x07ca, 0x03e9, 0x0020, + 0x2139, 0x0000, 0xc103, 0xc003, + 0x2104, 0x0f81, 0x0000, 0x3f00, + 0x2004, 0x0f80, 0x0000, 0xc0ff, + 0x2180, 0x0010, 0x7825, 0x1e00, + 0x7004, 0x9008, 0x01ea, 0x700c, + 0x1e00, 0x7004, 0x900f, 0xe022, + 0xc00a, 0x762c, 0xb8a5, 0x1e00, + 0x7004, 0x9005, 0xe0ee, 0xc00b, + 0x0cd6, 0xffaf, 0x780f, 0xc002, + 0xc100, 0x7034, 0x037a, 0x0021, + 0x780f, 0xc100, 0x2178, 0x0081, + 0xc145, 0x0c62, 0xfdef, 0x712c, + 0x1600, 0x709e, 0x8000, 0x0004, + 0xc047, 0xc08f, 0xd9ff, 0x0a5e, + 0xff6f, 0xdab4, 0x40c3, 0x9003, + 0xe004, 0x1800, 0x0f84, 0x0000, + 0x2100, 0x700c, 0x1e00, 0x7004, + 0x9003, 0xfec4, 0xc005, 0x71ad, + 0xe008, 0xc049, 0xc005, 0x205f, + 0x0200, 0xc046, 0xc106, 0xf011, + 0x6941, 0x219a, 0x0008, 0x43a1, + 0xe805, 0xb861, 0x08ff, 0x8031, + 0x7b7b, 0x2105, 0x0f80, 0x9003, + 0xe0c4, 0xb060, 0x4140, 0xc009, + 0x09e1, 0x8024, 0x2144, 0x07c0, + 0x40c3, 0x8000, 0x07ca, 0x261a, + 0x3f9e, 0x0010, 0x0000, 0x1003, + 0x009b, 0x1c34, 0x3780, 0xe598, + 0x01dc, 0x000d, 0x40c3, 0x8000, + 0x07ca, 0x8824, 0xf015, 0xc007, + 0x2055, 0x08c2, 0xc00d, 0x7a05, + 0xd8ef, 0x2004, 0x06c0, 0x201a, + 0x0f80, 0x0000, 0x1000, 0x7167, + 0x7845, 0x781b, 0x2005, 0x0f80, + 0x9002, 0x1e00, 0xb0a0, 0x202f, + 0x06c7, 0x09d7, 0x8005, 0x40c3, + 0x9003, 0xe004, 0x1800, 0x0f84, + 0x0000, 0x2100, 0x41c3, 0x9003, + 0xfe64, 0xc000, 0x7214, 0x40c3, + 0x9003, 0xf064, 0x21ca, 0x0001, + 0x700c, 0x0faa, 0xff6f, 0xb100, + 0x0fa2, 0xff4f, 0xd80f, 0x0b22, + 0xfdef, 0xd90f, 0x730c, 0x0a6e, + 0x0060, 0x218a, 0x0fc7, 0xdf7f, + 0x46cb, 0x9003, 0xe024, 0x710c, + 0xb6e0, 0xc048, 0xb606, 0x208a, + 0x03c4, 0x1efe, 0x9004, 0xd818, + 0x0ca2, 0xfdaf, 0x712c, 0x208a, + 0x0004, 0x1e00, 0x7004, 0x9003, + 0xe004, 0xd80f, 0x0ae2, 0xfdef, + 0xd90f, 0xd807, 0x0c86, 0xfdaf, + 0x712c, 0x710c, 0xb6e0, 0xb606, + 0xd80f, 0xd90f, 0x0aca, 0xfdef, + 0x1efe, 0x93c5, 0x730c, 0x0c6e, + 0xfdaf, 0x712c, 0xd80f, 0x0aba, + 0xfdef, 0xd90f, 0x700c, 0x712c, + 0x1e00, 0x1fc5, 0x1e0c, 0x1fc5, + 0x0c52, 0xfdaf, 0x1efe, 0x9004, + 0x40c3, 0x8000, 0x07ca, 0x8804, + 0x260a, 0x3f80, 0x9002, 0x0064, + 0xc04e, 0x40c3, 0x8000, 0x07ca, + 0x8843, 0x4140, 0x4358, 0xf05f, + 0x221a, 0x0f9f, 0x0000, 0x1000, + 0xc606, 0xc405, 0xf053, 0x225f, + 0x0480, 0x74d5, 0x24c0, 0x1061, + 0x23ca, 0x1061, 0xc041, 0xc301, + 0xc08f, 0x631b, 0x7bd4, 0x9300, + 0xe0c0, 0x0088, 0x0025, 0x40c1, + 0x209a, 0x0004, 0x2005, 0x07c0, + 0x7f1b, 0x2705, 0x178f, 0x9700, + 0x7014, 0xc008, 0x23ca, 0x1021, + 0x20ca, 0x0021, 0x0b53, 0x1030, + 0xc048, 0x78cf, 0x080d, 0x00d0, + 0x084b, 0x0210, 0x0855, 0x01d1, + 0x788f, 0x2079, 0x0000, 0x205f, + 0x0100, 0xf010, 0xc301, 0x677f, + 0x7f14, 0x6861, 0x209a, 0x0004, + 0x2005, 0x07c0, 0x781b, 0x2005, + 0x0780, 0x9000, 0xb700, 0x4060, + 0x0ee5, 0x9025, 0xc78f, 0x0e25, + 0x11d1, 0x40c3, 0x8000, 0x07ca, + 0x7854, 0xa8b2, 0xf00a, 0x706d, + 0xf008, 0x9700, 0xb300, 0x40c3, + 0x8000, 0x07ca, 0x7854, 0xa8b3, + 0x71c5, 0xc009, 0x0e5b, 0x9004, + 0x7124, 0xc00e, 0x7a2f, 0x0845, + 0x80a5, 0x716d, 0xc008, 0x71a5, + 0x7014, 0xf313, 0x1600, 0x7080, + 0x8000, 0x0000, 0xb8c3, 0xc041, + 0xf031, 0xc506, 0xf02a, 0x203c, + 0x0343, 0x275f, 0x1480, 0xc68f, + 0x41c3, 0x0047, 0x0002, 0x42e1, + 0x661e, 0x26f4, 0x1340, 0x09d6, + 0xfd6f, 0x20bc, 0x0001, 0xc007, + 0x271a, 0x1f81, 0x0000, 0x1000, + 0x26f4, 0x1342, 0x2054, 0x0d00, + 0x7905, 0x40a1, 0x209a, 0x0004, + 0x71a5, 0x7825, 0x781b, 0x2005, + 0x0f81, 0x9002, 0x0000, 0xc001, + 0x6058, 0x2049, 0x0fc0, 0xb100, + 0xc009, 0x0daf, 0x9024, 0x730c, + 0x7167, 0x40c3, 0x8000, 0x07ca, + 0x8804, 0x272f, 0x16c7, 0x0897, + 0x83c5, 0xc000, 0x088f, 0x0091, + 0x0912, 0xfe4f, 0x4308, 0x40c3, + 0x8000, 0x07ca, 0x8884, 0x40c3, + 0x8000, 0x07ca, 0x8803, 0xf034, + 0x41c3, 0x8000, 0x07ca, 0x79b4, + 0x8972, 0x8953, 0x4a76, 0x4b51, + 0x7270, 0x21ca, 0x038d, 0x7e2f, + 0x261a, 0x12ce, 0xd908, 0x27ca, + 0x122d, 0x21ca, 0x012d, 0x2e44, + 0x180e, 0xf019, 0xc207, 0x219a, + 0x0004, 0x2254, 0x0d03, 0x251a, + 0x1f82, 0x0000, 0x1000, 0x7a65, + 0x7945, 0x793b, 0xb991, 0xb99c, + 0xb99f, 0x9160, 0x7ad0, 0x627a, + 0x2249, 0x0fc2, 0xb140, 0x210a, + 0x0780, 0x09d5, 0x83e4, 0x2140, + 0x005e, 0x7104, 0x7d0f, 0x0c9d, + 0x9365, 0xdf09, 0xf003, 0x0dae, + 0xfd8f, 0xc00a, 0xb885, 0x1e00, + 0x7004, 0x9005, 0xe0ee, 0xc003, + 0x1e00, 0x7004, 0x9008, 0x01ea, + 0xc004, 0x7104, 0xc044, 0xc004, + 0x780f, 0xc040, 0xc100, 0xc00c, + 0x7030, 0x0414, 0xffed, 0x714c, + 0x2480, 0x3c03, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0xc0e6, 0x43c3, + 0x8000, 0x07cd, 0x8ba0, 0x8b81, + 0x251f, 0x108f, 0xf01b, 0x251a, + 0x1f9e, 0x0000, 0x1000, 0x239a, + 0x0004, 0x2114, 0x038e, 0x2605, + 0x301e, 0x2305, 0x0783, 0x7b7b, + 0xbb91, 0xbb9c, 0xbb9f, 0x9360, + 0x71e5, 0x7165, 0xb660, 0x232f, + 0x02c7, 0x0bd7, 0x80a4, 0x7ef0, + 0x71a5, 0x0cf7, 0x9365, 0x706d, + 0xc4c6, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0x201a, 0x0f8d, 0x0000, + 0x2000, 0x4728, 0x7afd, 0x4100, + 0x40c3, 0x9002, 0x00b2, 0x7a5d, + 0x2505, 0x100c, 0x2245, 0x0602, + 0x7204, 0x1c00, 0x1005, 0xb440, + 0x2505, 0x1002, 0x92c0, 0x7204, + 0xbfc1, 0x78a5, 0x6f43, 0x90a0, + 0x40c1, 0x2244, 0x07c3, 0xeb06, + 0xbb61, 0x0b01, 0x0031, 0x781d, + 0x1c00, 0x1005, 0x2044, 0x0083, + 0x40c1, 0x2744, 0x17cc, 0xec06, + 0xbc61, 0x0c01, 0x1031, 0x781d, + 0xb8c0, 0x2005, 0x00cc, 0x6f66, + 0x40c1, 0x2344, 0x07db, 0x0b11, + 0x3010, 0x2342, 0x305b, 0x0bff, + 0xb031, 0x781d, 0xbbc4, 0x2044, + 0x0100, 0x2405, 0x101e, 0x2740, + 0x124c, 0x40c1, 0x2444, 0x17ce, + 0xee05, 0xbe61, 0x0eff, 0x9031, + 0x781d, 0xbfc4, 0x2244, 0x07ce, + 0x42a1, 0xee06, 0xbe61, 0x0e01, + 0x1031, 0x7a5d, 0xbcc4, 0x2244, + 0x008e, 0x42a1, 0xef05, 0xbf61, + 0x0fff, 0x9031, 0x7a5d, 0x2044, + 0x0200, 0xbac0, 0x7e45, 0x42a1, 0xeb05, 0xbb61, 0x0bff, 0x8031, - 0x781d, 0x2044, 0x0083, 0x40a1, - 0x2244, 0x07cf, 0xef05, 0xbf61, - 0x0fff, 0x9031, 0x781d, 0xb8c0, - 0x6ae6, 0x7b05, 0x2744, 0x17db, - 0x40a1, 0x0b11, 0x3010, 0x2342, - 0x305b, 0x0bff, 0xb031, 0x781d, - 0x2044, 0x0100, 0x2240, 0x025f, - 0x7b05, 0x40a1, 0x2744, 0x37cd, - 0xed05, 0xbd61, 0x0dff, 0x9031, - 0x781d, 0x2044, 0x0200, 0x7b05, - 0x40c1, 0xbcc4, 0xec05, 0xbc61, - 0x0cff, 0x9031, 0x781d, 0x2044, - 0x008c, 0x40c1, 0xbac4, 0xea06, - 0xba61, 0x0a01, 0x0031, 0x781d, - 0xb8c0, 0x2005, 0x0302, 0x40c1, - 0xbfc4, 0xef06, 0xbf61, 0x0f01, - 0x1031, 0x781d, 0x2044, 0x0100, - 0x7a05, 0x40c1, 0x2744, 0x37df, - 0x0f0f, 0x3010, 0x2742, 0x305f, - 0x0ffd, 0xb031, 0x781d, 0x2044, - 0x0200, 0x7845, 0x205f, 0x0400, - 0x7865, 0x0d06, 0xfdaf, 0x780f, + 0x7a5d, 0x2005, 0x0780, 0x2244, + 0x0103, 0x42a1, 0xec05, 0xbc61, + 0x0cff, 0x9031, 0x7a5d, 0x7bc5, + 0x2244, 0x0202, 0x7a65, 0x225f, + 0x0402, 0x080a, 0xfdef, 0x7845, 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a2, - 0x43db, 0x8000, 0x0771, 0x1300, - 0x308d, 0x706c, 0x71ef, 0xc141, - 0xc040, 0xea86, 0x7ebb, 0x742c, - 0x748d, 0xf010, 0x255f, 0x124e, - 0x0a13, 0x00b1, 0xdc08, 0xe608, - 0x274a, 0x3240, 0x712c, 0xdb08, - 0xf004, 0x712c, 0xf002, 0x71a5, - 0x1301, 0x3080, 0x0813, 0x0363, - 0x4260, 0xf026, 0xc700, 0x623a, - 0x7fd4, 0xb700, 0x76e3, 0x0ceb, - 0x90a2, 0x4040, 0x209a, 0x0008, - 0x251a, 0x1f8f, 0x0000, 0x2000, - 0x78e5, 0x2005, 0x0f80, 0x9002, - 0x0064, 0x9000, 0xc701, 0xef6c, - 0x278a, 0x1fcf, 0x224a, 0x1280, - 0x7f04, 0x2242, 0x104a, 0x0aff, - 0x9031, 0x781d, 0x205f, 0x0800, - 0x60f8, 0xf1de, 0xc0a2, 0x1404, - 0x341b, 0xc6c6, 0xc0f1, 0x8819, - 0x0fb2, 0xfe2f, 0x4320, 0xe807, - 0x40c3, 0x900e, 0x1e4c, 0xb060, - 0xb041, 0xc0d1, 0x7ee0, 0x78e0, - 0xc0e4, 0x70ad, 0xf002, 0x71a5, - 0x0d27, 0x10b2, 0x708d, 0xf014, - 0x255a, 0x1c82, 0x605b, 0x245a, - 0x1642, 0x627a, 0x623e, 0x2414, - 0x1343, 0x42c3, 0x8000, 0x043c, - 0x634a, 0x7185, 0xae40, 0x0ce5, - 0x9092, 0xf1eb, 0xc4c4, 0x78e0, - 0x791b, 0x781d, 0x2184, 0x0a82, - 0x2046, 0x0a80, 0x7825, 0x205a, - 0x0101, 0x781d, 0x781d, 0x2044, - 0x0cc0, 0x2184, 0x0303, 0x7825, - 0x205f, 0x0401, 0x781d, 0x781d, - 0x781d, 0x781d, 0x7825, 0x7fe0, - 0x780f, 0x78e0, 0xc2e2, 0x45cb, - 0x9005, 0xe000, 0xd820, 0x0d76, - 0xfeef, 0x1d00, 0x1045, 0x1d00, - 0x1005, 0xc6c2, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1ad, 0x4318, 0x710c, - 0x2344, 0x37c2, 0xea05, 0xba61, - 0x0aff, 0x8031, 0x781b, 0x205a, - 0x0100, 0xc142, 0x702c, 0x200f, - 0x06cd, 0x1600, 0x7080, 0x8000, - 0x0004, 0x201a, 0x0f80, 0x0010, - 0x0000, 0xc044, 0xf010, 0x211a, - 0x0f80, 0x0000, 0x2000, 0x7124, - 0x2005, 0x0f82, 0x9002, 0x0094, - 0x9200, 0x2004, 0x0f80, 0x0000, - 0xfbff, 0xb200, 0x09e3, 0x8292, - 0x1600, 0x7100, 0x9004, 0x00aa, - 0xc045, 0x40c3, 0x8000, 0x0528, - 0x1e00, 0x7085, 0x9005, 0xe0aa, - 0x0a72, 0x00e0, 0xd978, 0x0d6e, - 0xfd8f, 0xd80f, 0x0a4e, 0xfdaf, - 0xd90f, 0x0e52, 0xff0f, 0x40c3, - 0x8000, 0x076d, 0xe013, 0x712c, - 0x0aee, 0x0020, 0xc041, 0xc001, - 0x712c, 0x099e, 0xfdaf, 0xdaf7, - 0x1600, 0x7081, 0x8000, 0x076d, - 0x235f, 0x3640, 0x704c, 0x215f, - 0x0c81, 0x238a, 0x0f7f, 0xc043, - 0x1c00, 0x3fc1, 0x6119, 0xc001, - 0x6119, 0x255a, 0x1100, 0x78a5, - 0x780f, 0x0cde, 0x00a0, 0xc047, - 0xc001, 0x0eaa, 0xffef, 0x712c, - 0x1600, 0x7080, 0x8000, 0x076d, - 0x7daf, 0x205f, 0x0c81, 0xc003, - 0x6038, 0x70c3, 0x8000, 0x076d, - 0x88d4, 0x40c3, 0x9003, 0xe024, - 0x218a, 0x0044, 0x1800, 0x0045, - 0x180c, 0x0045, 0x18fe, 0x8044, - 0x40a1, 0x0b96, 0x00a0, 0x702c, - 0xd830, 0x0e12, 0xfd6f, 0x712c, - 0x2644, 0x1200, 0x0cda, 0xfe2f, - 0xc046, 0x700c, 0xf005, 0x6119, - 0x1900, 0x0fc3, 0x7104, 0x08fb, - 0x8532, 0xc188, 0x700c, 0x0dee, - 0xfd6f, 0x712c, 0xd80f, 0x098e, - 0xfdaf, 0xd90f, 0x40a1, 0x0b5a, - 0x00a0, 0x712c, 0x208a, 0x0fc7, - 0x45cb, 0x9003, 0xe174, 0x70cd, - 0xb500, 0xb501, 0x1d59, 0x939c, - 0x0e8d, 0x1a33, 0x40e1, 0x700c, - 0x0dba, 0xfd6f, 0x712c, 0x40c3, - 0x8000, 0x076d, 0x8822, 0x70ed, - 0xf002, 0x7124, 0x40c3, 0x8000, - 0x076d, 0x8803, 0x085b, 0x0062, - 0xd80f, 0x211a, 0x0f80, 0x0000, - 0x1000, 0xc388, 0x633b, 0x206c, - 0x0302, 0x2205, 0x0f80, 0x9002, - 0x0166, 0x9000, 0x8b80, 0x2079, - 0x0000, 0x7c9b, 0x7885, 0xab00, - 0x211a, 0x0f80, 0x0000, 0x0800, - 0x2004, 0x0f80, 0x0000, 0x0800, - 0x7845, 0x2005, 0x0f82, 0x9002, - 0x0064, 0x9200, 0x8b60, 0x2353, - 0x80be, 0xf3d5, 0x2080, 0x0010, - 0x71ed, 0xb200, 0xf1cf, 0x08ee, - 0xfdaf, 0xd90f, 0x71c5, 0x0f7d, - 0x9031, 0x700c, 0x41c3, 0x006e, - 0x0000, 0x0bb6, 0xfd2f, 0x2078, - 0x0000, 0x1d00, 0x1005, 0x0bfa, - 0xfe2f, 0x1d02, 0x1005, 0x700c, - 0x0a12, 0x0020, 0x732c, 0xd840, - 0x0d12, 0xfd6f, 0x712c, 0xc002, - 0x712c, 0x0caa, 0xffef, 0x704c, - 0xc006, 0x205f, 0x0100, 0x2052, - 0x0142, 0x40c3, 0x8000, 0x076d, - 0x8863, 0x40c3, 0x8000, 0x076d, - 0x8822, 0xe220, 0xf009, 0xc002, - 0x20f4, 0x0040, 0x4854, 0xc002, - 0x7834, 0xb080, 0x7124, 0x0bf3, - 0x8063, 0x4063, 0x08c2, 0xfdaf, - 0x712c, 0x2055, 0x0801, 0xc004, - 0x714c, 0x716c, 0x7825, 0x08e2, - 0x00e0, 0xc102, 0x704c, 0x1e00, - 0x7084, 0x9003, 0xe004, 0x1e00, - 0x7084, 0x9005, 0xe076, 0xc005, - 0x1e00, 0x7004, 0x9005, 0xe0aa, - 0xf00d, 0x221a, 0x0f80, 0x0000, - 0x2000, 0x7144, 0x2005, 0x0f81, - 0x9002, 0x0094, 0x9100, 0xb88a, - 0xb100, 0x0aeb, 0x8292, 0x0d42, - 0xffcf, 0x1600, 0x7080, 0x8000, - 0x076d, 0x704c, 0x238a, 0x0f7f, - 0x205f, 0x0c81, 0xc003, 0x1c00, - 0x3fc1, 0x6119, 0xc001, 0x6119, - 0x0ade, 0x00a0, 0xc007, 0xc0ad, + 0xc2e6, 0x1cfc, 0xb6c8, 0x45cb, + 0x8000, 0x07cd, 0x8d80, 0x4338, + 0x4718, 0x706d, 0x710c, 0xea0d, + 0x245f, 0x1243, 0x0a1b, 0x00b1, + 0xd908, 0xe308, 0xd809, 0x71ed, + 0x234a, 0x1200, 0xf006, 0x7b9b, + 0x74ed, 0x742c, 0xf002, 0x71ed, + 0x1501, 0x109e, 0xf023, 0x45c9, + 0x259a, 0x1008, 0x241a, 0x1f82, + 0x0000, 0x2000, 0x7aa5, 0x2205, + 0x0f82, 0x9002, 0x0064, 0x0b1f, + 0x3030, 0x9a40, 0x258a, 0x1fcf, + 0x7d44, 0x7a5d, 0x7a5d, 0x7a5d, + 0x7a5d, 0x7a5d, 0x2284, 0x081f, + 0x62ba, 0x2714, 0x30cd, 0xb540, + 0x66fe, 0x631b, 0x09c3, 0x8385, + 0x7185, 0x0efd, 0xb325, 0x4669, 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0x8821, 0xdac8, 0x216c, 0x0041, - 0xa821, 0x8822, 0xb9c6, 0xa822, - 0x886d, 0x2344, 0x0c01, 0x7a25, - 0x0b13, 0x017f, 0xa84d, 0x2185, - 0x0a03, 0xa82d, 0x8823, 0xb987, - 0xa823, 0x702c, 0xa831, 0xa830, - 0x7fe0, 0xa838, 0xc2e2, 0x0aaa, - 0xfdcf, 0x256f, 0x1343, 0x9520, - 0x205a, 0x0400, 0x7b3d, 0x218a, - 0x02c5, 0x233f, 0x004c, 0x238c, - 0x82c5, 0xd920, 0x21ca, 0x0c29, - 0x7c25, 0x238c, 0x84c8, 0xd942, - 0x24ca, 0x1049, 0x238c, 0x87cc, - 0xd953, 0x24ca, 0x1049, 0x238c, - 0x8bd2, 0xd954, 0x24ca, 0x1049, - 0x15fe, 0x9081, 0x2004, 0x0f80, - 0x0000, 0x1ff0, 0xb881, 0x211a, - 0x0f83, 0x0020, 0x0000, 0x41c3, - 0x9004, 0x00f8, 0x2305, 0x0042, + 0x8819, 0xb863, 0x7314, 0x20e0, + 0x07cd, 0x40c3, 0x900e, 0x1e4c, + 0xb020, 0x7fe0, 0xb041, 0x78e0, + 0xc0f1, 0x706d, 0x706c, 0xf016, + 0x70ef, 0xf011, 0x235a, 0x0c82, + 0x605c, 0x275a, 0x3642, 0x71e7, + 0x629a, 0x623c, 0x2600, 0x3682, + 0x123c, 0x0082, 0x2640, 0x305e, + 0xac40, 0x0fe5, 0xb094, 0x7164, + 0x0bd9, 0x80b4, 0x2314, 0x10de, + 0xc0d1, 0x7ee0, 0x791b, 0x781d, + 0x2184, 0x0a82, 0x2046, 0x0a80, + 0x7905, 0x215a, 0x0100, 0x793d, + 0x793d, 0x2144, 0x0cc1, 0x2084, + 0x0303, 0x7905, 0x215f, 0x0400, + 0x793d, 0x793d, 0x793d, 0x793d, + 0x7825, 0x7fe0, 0x780f, 0x78e0, + 0xc2e2, 0x45cb, 0x9005, 0xe000, + 0xd820, 0x08fe, 0xff2f, 0x1d00, + 0x1045, 0x1d00, 0x1005, 0xc6c2, + 0xc2e6, 0x1cfc, 0xb6c8, 0x2482, + 0x3b02, 0x1600, 0x709b, 0x8000, + 0x0004, 0xc142, 0x702c, 0xc041, + 0xf007, 0xf8a8, 0x2004, 0x0f80, + 0x0000, 0xfbff, 0xb200, 0x09f7, + 0x8294, 0x1600, 0x7100, 0x9004, + 0x00aa, 0x41c3, 0x8000, 0x051f, + 0xda78, 0xc045, 0x1e00, 0x7085, + 0x9005, 0xe0aa, 0x0d1e, 0xfd2f, + 0xc08d, 0xc08d, 0x0afe, 0x00e0, + 0xd978, 0x096a, 0xfdcf, 0xd80f, + 0x0d8e, 0xfdaf, 0xd90f, 0x0a06, + 0xff4f, 0x40c3, 0x8000, 0x067c, + 0x0b1e, 0x0020, 0x712c, 0x40c3, + 0x8000, 0x067c, 0x712c, 0x0a0e, + 0xfdaf, 0xdaf7, 0x40c3, 0x8000, + 0x067c, 0x712c, 0x0bda, 0xfeef, + 0xda80, 0xc001, 0x71ed, 0x42e1, + 0xb8c4, 0xe806, 0xb861, 0x0801, + 0x0031, 0x7a5b, 0x45cb, 0x8000, + 0x07c9, 0x225a, 0x0102, 0xc001, + 0x8d20, 0x238a, 0x0f7f, 0x220f, + 0x000e, 0x265a, 0x1100, 0x704c, + 0x1c00, 0x3fc1, 0x78c5, 0x780f, + 0xc044, 0xc001, 0x205f, 0x0640, + 0xc043, 0x215f, 0x0c80, 0xc103, + 0x6119, 0x2100, 0x0f81, 0x8000, + 0x067c, 0x0d26, 0x00a0, 0xc004, + 0x40c3, 0x8000, 0x067c, 0x0e96, + 0xffef, 0x712c, 0x8d00, 0x7ecf, + 0x205f, 0x0c81, 0xc003, 0x6038, + 0x2000, 0x0f80, 0x8000, 0x067c, + 0x8801, 0x218a, 0x0044, 0xc047, + 0x40c3, 0x9003, 0xe024, 0xb0e0, + 0xb0e6, 0x18fe, 0x8044, 0x40c1, + 0x0b92, 0x00a0, 0x702c, 0xd830, + 0x0e72, 0xfd6f, 0x712c, 0x08a2, + 0xfe4f, 0x231a, 0x3f80, 0x0010, + 0x0000, 0xc046, 0x700c, 0xf006, + 0x6119, 0x1900, 0x0fc3, 0x7104, + 0x08f9, 0x8534, 0xc188, 0x700c, + 0x0e4a, 0xfd6f, 0x712c, 0xd80f, + 0x0c96, 0xfdaf, 0xd90f, 0x40c1, + 0x0b52, 0x00a0, 0x712c, 0x208a, + 0x0fc7, 0x46cb, 0x9003, 0xe174, + 0x70ed, 0xb600, 0xb601, 0x1e59, + 0x901d, 0x0f7f, 0x1a35, 0x700c, + 0x0e1a, 0xfd6f, 0x712c, 0x1503, + 0x108b, 0x8d82, 0x706f, 0xf02b, + 0x241a, 0x1f80, 0x0000, 0x1000, + 0x206c, 0x0302, 0x2205, 0x0f80, + 0x9002, 0x0166, 0x9000, 0x2079, + 0x0003, 0xc088, 0x6098, 0x8820, + 0x793b, 0x7965, 0xa820, 0x241a, + 0x1f80, 0x0000, 0x0800, 0x2153, + 0x80be, 0x2004, 0x0f80, 0x0000, + 0x0800, 0x7a05, 0x2205, 0x0f82, + 0x9002, 0x0064, 0x9200, 0xf206, + 0x2080, 0x0010, 0x716f, 0xb200, + 0x7185, 0x0bb1, 0x9325, 0xd80f, + 0x0c06, 0xfdaf, 0xd90f, 0x0b8f, + 0xb031, 0x71e5, 0x706f, 0x41c3, + 0x006e, 0x0000, 0x0c3e, 0xfd2f, + 0x2378, 0x3000, 0x1e00, 0x1005, + 0x0fc6, 0xfe2f, 0x1e02, 0x1005, + 0x700c, 0x0a16, 0x0020, 0x732c, + 0xd840, 0x0d82, 0xfd6f, 0x712c, + 0xc002, 0x712c, 0x0cb6, 0xffef, + 0x704c, 0x8d43, 0x8d62, 0xf00f, + 0xc102, 0xc007, 0x21f4, 0x00c1, + 0xb8e3, 0xd840, 0x20ca, 0x0822, + 0x7902, 0xc002, 0x7874, 0x7164, + 0xb020, 0x0ae9, 0x80c5, 0xc001, + 0x0bea, 0xfdaf, 0x712c, 0xc106, + 0x2055, 0x0800, 0x714c, 0x7825, + 0xc102, 0x094e, 0x00e0, 0x716c, + 0x702c, 0x1e00, 0x7044, 0x9003, + 0xe004, 0x1e00, 0x7044, 0x9005, + 0xe076, 0xc005, 0x1e00, 0x7004, + 0x9005, 0xe0aa, 0xf004, 0xf80f, + 0xb88a, 0xb200, 0x09fb, 0x8294, + 0x0d62, 0xffcf, 0x8d00, 0x704c, + 0x238a, 0x0f7f, 0x205f, 0x0c81, + 0xc003, 0x1c00, 0x3fc1, 0x6038, + 0x2000, 0x0f81, 0x8000, 0x067c, + 0x0b56, 0x00a0, 0xc004, 0x2480, + 0x3b02, 0x1404, 0x341b, 0xc6c6, + 0x211a, 0x0f80, 0x0000, 0x2000, + 0x7124, 0x2005, 0x0f82, 0x9002, + 0x0094, 0x9200, 0x7ee0, 0x78e0, + 0x8821, 0x216c, 0x0041, 0xa821, + 0x8822, 0xb9c6, 0xa822, 0x882d, + 0x2144, 0x0c02, 0x2285, 0x023f, + 0x0913, 0x017f, 0xa84d, 0x794f, + 0xb985, 0xa82d, 0x8823, 0xb987, + 0xa823, 0x702c, 0xa838, 0xa831, + 0x7fe0, 0xa830, 0xc2e2, 0x0e9a, + 0xfdcf, 0x256f, 0x1343, 0x9560, + 0xda20, 0xd940, 0x238c, 0x85ca, + 0x22ca, 0x0c2d, 0x238c, 0x89d0, + 0x22ca, 0x004d, 0x238c, 0x8fd8, + 0xd950, 0x22ca, 0x004d, 0x218a, + 0x05ca, 0x213c, 0x00c1, 0x238c, + 0x89d0, 0x21ca, 0x00ad, 0x238c, + 0x8fd8, 0x21ca, 0x00ed, 0x73d3, + 0x0000, 0x095f, 0x21ca, 0x012d, + 0x2105, 0x008c, 0x15fe, 0x9081, + 0x205a, 0x0400, 0x211a, 0x0f83, + 0x0020, 0x0000, 0x41c3, 0x9004, + 0x00f8, 0x2004, 0x0f80, 0x0000, + 0x1ff0, 0xb881, 0x2305, 0x0042, 0x7224, 0x7965, 0xb280, 0xb100, - 0xc6c2, 0x78e0, 0xc0e4, 0x708d, - 0xf002, 0x7185, 0x0c23, 0x10b2, - 0x70ad, 0xf014, 0x42c3, 0x8000, - 0x043c, 0x635b, 0x245a, 0x1c82, - 0x605e, 0x255a, 0x1642, 0x71a5, - 0x62da, 0x622a, 0xab40, 0x0de9, - 0x90b2, 0x2514, 0x1303, 0xf1eb, - 0xc4c4, 0x78e0, 0xc2e6, 0x1209, - 0x308d, 0x70ed, 0x70cd, 0x1a09, - 0x33c2, 0xf009, 0x26f0, 0x7380, - 0x8000, 0x08f0, 0x0b72, 0x0040, - 0x71c5, 0xcc22, 0x0ef1, 0x9002, - 0x1a22, 0x33dc, 0x1a09, 0x3342, - 0xc6c6, 0x78e0, 0x209a, 0x0004, - 0x43c3, 0x9008, 0x01ea, 0x71c3, - 0x0000, 0xfffe, 0x2000, 0x0f82, - 0x0000, 0xfe00, 0x9300, 0x2004, - 0x0f80, 0x0000, 0xc0ff, 0x7845, - 0x9341, 0xb300, 0x226c, 0x0140, - 0x7825, 0x7fe0, 0xb301, 0x78e0, - 0x7a1b, 0x2205, 0x0f82, 0x9003, - 0xe164, 0x1a00, 0x0005, 0x43c3, - 0x8000, 0x0771, 0x8b80, 0xf00e, - 0x241a, 0x1f82, 0x0000, 0x1000, - 0x7185, 0x7a05, 0x7a5b, 0x2205, - 0x0f82, 0x9002, 0x0164, 0xb220, - 0x8b41, 0x0ae9, 0x8303, 0x7ee0, - 0xc2e2, 0x1600, 0x710d, 0x8000, - 0x0006, 0xe56d, 0x2d85, 0x1b81, - 0x2553, 0x9040, 0xf205, 0x79af, - 0x204e, 0x0100, 0x603d, 0x7aaf, - 0x41c3, 0x00af, 0x0001, 0x0ef6, - 0xfd6f, 0xd80a, 0x40c3, 0x8000, - 0x076c, 0xa8a0, 0xc6c2, 0x78e0, - 0xc2e6, 0x266f, 0x15c3, 0x8e40, - 0x4508, 0xea17, 0x0b4e, 0xfeaf, - 0x4040, 0x4708, 0xe887, 0x41c3, - 0x00d5, 0x0001, 0x0902, 0xfd2f, - 0x700c, 0x40e1, 0x209a, 0x0001, - 0x7f05, 0x78bb, 0x2005, 0x0f80, - 0x9003, 0xfe92, 0xb0e0, 0x16ff, - 0x9082, 0xea18, 0x0b1e, 0xfeaf, - 0x4040, 0x228c, 0x8fc3, 0xf209, - 0xe888, 0x41c3, 0x00d6, 0x0001, - 0x08ce, 0xfd2f, 0x700c, 0x700c, - 0x209a, 0x0001, 0x79bb, 0x2105, - 0x0f81, 0x9003, 0xfe9a, 0xb100, - 0xc6c6, 0x78e0, 0xc0f1, 0x1600, - 0x7082, 0x8000, 0x0009, 0x228c, - 0x8c03, 0xf210, 0xea16, 0x0a1f, - 0x0a30, 0x722c, 0xe2f8, 0xf404, - 0x712c, 0xf009, 0x41c3, 0x00d7, - 0x0001, 0x088e, 0xfd2f, 0x700c, - 0x702c, 0x215f, 0x0400, 0x7825, - 0x1e00, 0x7004, 0x9005, 0xe0a0, - 0xc0d1, 0x7ee0, 0xc2e6, 0x1cfc, - 0xb6c8, 0x4328, 0x4100, 0x70ed, - 0x40c3, 0x9003, 0xff64, 0xb0e0, - 0x1804, 0x0045, 0x40c3, 0x8000, - 0x0771, 0x88c0, 0x232f, 0x32c7, - 0xf002, 0x71c5, 0x8841, 0x0a77, - 0x03a2, 0x270a, 0x32c0, 0x706c, - 0x238c, 0xbfc3, 0xf41a, 0x261a, - 0x1f83, 0x0000, 0x2000, 0x42c3, - 0x9002, 0x1f64, 0x2305, 0x008c, - 0x7444, 0x7b45, 0xb420, 0x0bd7, - 0x923f, 0xb3e0, 0x42c3, 0x9003, - 0xf164, 0xb2e0, 0x1a04, 0x0045, - 0xf1e1, 0x7164, 0x272f, 0x37c2, - 0x0fbb, 0xb010, 0x0bb7, 0x8253, - 0x0ff3, 0xb03e, 0x4260, 0x261a, - 0x1f8c, 0x0000, 0x2000, 0x229a, - 0x0008, 0x7c45, 0x42c3, 0x9002, - 0x0164, 0x2405, 0x108d, 0x7444, - 0x7a85, 0xb520, 0x1a00, 0x0005, - 0xf1e5, 0x1404, 0x341b, 0xc6c6, + 0xc6c2, 0x78e0, 0xc0e4, 0x264a, + 0x3000, 0x704c, 0xf013, 0x706c, + 0xf00c, 0x225a, 0x0c8c, 0x7165, + 0x609d, 0x235a, 0x064c, 0x7164, + 0x64bc, 0x642c, 0x1e3c, 0x1302, + 0x0beb, 0x80b4, 0x2300, 0x168e, + 0x7144, 0x0adf, 0x80b4, 0x2614, + 0x308b, 0xc4c4, 0xc2e6, 0x1209, + 0x308d, 0x70cd, 0x70ed, 0x1a09, + 0x3382, 0xf009, 0x26f0, 0x73c0, + 0x8000, 0x08e8, 0x0b5e, 0x0040, + 0x71e5, 0xcc22, 0x0ff1, 0x9004, + 0x1a09, 0x3342, 0x1a22, 0x339c, + 0xc6c6, 0x78e0, 0x43c3, 0x9008, + 0x01ea, 0x9340, 0x209a, 0x0004, + 0x2000, 0x0f80, 0x0000, 0xfe00, + 0x2204, 0x0f82, 0x0000, 0xc0ff, + 0x7845, 0x9341, 0xb300, 0xb962, + 0x226c, 0x0140, 0x7825, 0x7fe0, + 0xb301, 0x78e0, 0x7a1b, 0x2205, + 0x0f82, 0x9003, 0xe164, 0x1a00, + 0x0005, 0x42c3, 0x8000, 0x07cd, + 0x8a61, 0x8a80, 0xf00d, 0x241a, + 0x1f82, 0x0000, 0x1000, 0x7185, + 0x7a05, 0x7a5b, 0x2205, 0x0f82, + 0x9002, 0x0164, 0xb220, 0x0beb, + 0x8305, 0x7ee0, 0xc2e2, 0x1600, + 0x710d, 0x8000, 0x0006, 0xe56d, + 0x2d85, 0x1b81, 0x2553, 0x9041, + 0xf205, 0x78af, 0x214e, 0x010d, + 0x651d, 0x7aaf, 0x41c3, 0x00b0, + 0x0001, 0x0a0e, 0xfdaf, 0xd80a, + 0x40c3, 0x8000, 0x07c8, 0xa8a0, + 0xc6c2, 0x78e0, 0xc2e6, 0x266f, + 0x15c3, 0x8e40, 0x4508, 0xea17, + 0x0f32, 0xfeaf, 0x4040, 0x4708, + 0xe887, 0x41c3, 0x00d7, 0x0001, + 0x098a, 0xfd2f, 0x700c, 0x40e1, + 0x209a, 0x0001, 0x7f05, 0x78bb, + 0x2005, 0x0f80, 0x9003, 0xfe92, + 0xb0e0, 0x16ff, 0x9082, 0xea18, + 0x0f02, 0xfeaf, 0x4040, 0x228c, + 0x8fc3, 0xf209, 0xe888, 0x41c3, + 0x00d8, 0x0001, 0x0956, 0xfd2f, + 0x700c, 0x700c, 0x209a, 0x0001, + 0x79bb, 0x2105, 0x0f81, 0x9003, + 0xfe9a, 0xb100, 0xc6c6, 0x78e0, + 0xc0f1, 0x1600, 0x7082, 0x8000, + 0x0009, 0xea1a, 0x0a23, 0x0a30, + 0x720c, 0x228c, 0x8c03, 0xf20c, + 0xe2f8, 0xf404, 0x710c, 0xf009, + 0x41c3, 0x00d9, 0x0001, 0x0916, + 0xfd2f, 0x700c, 0x700c, 0x205f, + 0x0401, 0x7825, 0x1e00, 0x7004, + 0x9005, 0xe0a0, 0xc0d1, 0x7ee0, + 0xc0e6, 0x4328, 0x70ed, 0x41c3, + 0x9003, 0xff64, 0xb1e0, 0x1904, + 0x0045, 0x42c3, 0x8000, 0x07cd, + 0x8a21, 0x8ac0, 0xf039, 0x228c, + 0x8fc3, 0xf418, 0x261a, 0x1f83, + 0x0000, 0x2000, 0x42c3, 0x9002, + 0x1f64, 0x2305, 0x008c, 0x7444, + 0x7a65, 0xb400, 0x0b51, 0x123f, + 0xb2e0, 0x42c3, 0x9003, 0xf164, + 0xb2e0, 0x1a04, 0x0045, 0xf020, + 0x260a, 0x32c0, 0x708d, 0xf016, + 0xf753, 0x4381, 0x261a, 0x1f82, + 0x0000, 0x2000, 0x239a, 0x0008, + 0x7b45, 0x42c3, 0x9002, 0x0164, + 0x2305, 0x008d, 0x7444, 0x7b45, + 0xb500, 0x1b00, 0x0005, 0x7185, + 0x0e0d, 0x3010, 0x0cd5, 0x9274, + 0x262f, 0xb782, 0x71c5, 0x0993, + 0x83a5, 0x222f, 0x02c7, 0xc4c6, 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a1, - 0xdb25, 0x41c3, 0x9004, 0x0030, - 0xbb9f, 0x11b4, 0x0502, 0x8b00, - 0x0a0b, 0x0171, 0xb100, 0x8b00, - 0xf005, 0x1600, 0x7080, 0x8000, - 0x0058, 0xb101, 0x8be0, 0x0f6f, - 0x1030, 0x2744, 0x1080, 0xc040, - 0x1600, 0x7080, 0x8000, 0x0032, - 0x1600, 0x708d, 0x8000, 0x0773, - 0x2744, 0x105b, 0xf84f, 0xf003, - 0x71a5, 0x40c3, 0x8000, 0x0773, - 0x8801, 0x0843, 0x0362, 0x4063, - 0x209a, 0x0004, 0x2753, 0x1041, - 0x7905, 0xc000, 0x209a, 0x0004, - 0x7905, 0x40c1, 0x209a, 0x0010, - 0xf83d, 0x0b13, 0x3030, 0xb100, - 0x79af, 0x7acf, 0x0ea6, 0xfdaf, - 0x700c, 0xc000, 0xe862, 0x79af, - 0x7acf, 0x0e9a, 0xfdaf, 0x710c, - 0xf1dc, 0x1600, 0x708f, 0x8000, - 0x0058, 0x0f6f, 0x1030, 0x2744, - 0x105b, 0x40c3, 0x8000, 0x0773, - 0x88a2, 0x2744, 0x1080, 0xc040, + 0x40c3, 0x9004, 0x0030, 0x10b4, + 0x0501, 0x1600, 0x708f, 0x8000, + 0x0025, 0x090d, 0x0171, 0xb0e0, + 0xb0e1, 0xf007, 0x1600, 0x7081, + 0x8000, 0x0058, 0xb021, 0x0f6f, + 0x1030, 0x716f, 0x1600, 0x7080, + 0x8000, 0x0032, 0x2744, 0x108d, + 0xb8e3, 0x1600, 0x7080, 0x8000, + 0x07cf, 0x23ca, 0x30a1, 0x2744, + 0x104e, 0xc040, 0xf01b, 0x209a, + 0x0004, 0x2753, 0x1041, 0x7905, + 0x40a1, 0xf848, 0xc000, 0xf841, + 0xee07, 0xc000, 0x4263, 0x790f, + 0x0aaa, 0xfdef, 0x700c, 0xed08, + 0xc000, 0x4263, 0x790f, 0x0a9e, + 0xfdef, 0x710c, 0xc000, 0x7104, + 0xc040, 0x40c3, 0x8000, 0x07cf, + 0x8821, 0xc000, 0x09c3, 0x8025, + 0x40c1, 0x1600, 0x708d, 0x8000, + 0x0058, 0x0d6f, 0x1030, 0x716f, 0x1600, 0x7080, 0x8000, 0x0065, - 0xf832, 0xf003, 0x71a5, 0x40c3, - 0x8000, 0x0773, 0x8803, 0x0843, - 0x0362, 0x4063, 0x209a, 0x0004, - 0x2753, 0x1041, 0x7905, 0xc000, - 0x209a, 0x0004, 0x7905, 0x40c1, - 0x209a, 0x0010, 0xf820, 0x0b13, - 0x3030, 0xb100, 0x79af, 0x7acf, - 0x0e32, 0xfdaf, 0x700c, 0xc000, - 0xe862, 0x79af, 0x7acf, 0x0e26, - 0xfdaf, 0x710c, 0xf1dc, 0x1600, - 0x708e, 0x8000, 0x0004, 0x0ed6, - 0xfdcf, 0x0ede, 0xfdef, 0x4508, - 0x4100, 0x0cee, 0xffef, 0x40a1, - 0x43c3, 0x9008, 0x01ea, 0x261a, - 0x1f8c, 0x0020, 0x0000, 0x9300, - 0x45cb, 0x9012, 0x0402, 0x2004, - 0x0f81, 0x0000, 0x3f00, 0x2405, - 0x1342, 0xb220, 0x6d44, 0x7a85, - 0x71c3, 0x0000, 0x0c00, 0x6d02, - 0xb220, 0x7c05, 0x9301, 0xb8c5, - 0xb400, 0x7487, 0x1404, 0x341b, - 0xc6c6, 0x78e0, 0x2105, 0x0002, - 0x251a, 0x1f80, 0x0000, 0x2000, + 0x2544, 0x108e, 0xb8e3, 0x40c3, + 0x8000, 0x07cf, 0x8802, 0x23ca, + 0x30a1, 0x2544, 0x104f, 0xc040, + 0xf01b, 0x209a, 0x0004, 0x2553, + 0x1041, 0x7905, 0x40c1, 0xf82b, + 0xc000, 0xf824, 0xef07, 0xc000, + 0x4263, 0x790f, 0x0a36, 0xfdef, + 0x700c, 0xee08, 0xc000, 0x4263, + 0x790f, 0x0a2a, 0xfdef, 0x710c, + 0xc000, 0x7104, 0xc040, 0x40c3, + 0x8000, 0x07cf, 0x8823, 0xc000, + 0x09c3, 0x8025, 0x40e1, 0x1600, + 0x708e, 0x8000, 0x0004, 0x0aae, + 0xfe0f, 0x0ab6, 0xfe2f, 0x4508, + 0x4100, 0x0cfe, 0xffef, 0x40a1, + 0x261a, 0x1f8c, 0x0020, 0x0000, + 0x43c3, 0x9008, 0x01ea, 0x9300, + 0x45cb, 0x9012, 0x0402, 0x2405, + 0x1342, 0x2004, 0x0f81, 0x0000, + 0x3f00, 0xb220, 0x6d44, 0x7a85, + 0x2196, 0x0006, 0xb220, 0x9321, + 0x6d02, 0x7885, 0xb9c5, 0xb020, + 0x7487, 0x1404, 0x341b, 0xc6c6, + 0x201a, 0x0f80, 0x0000, 0x2000, 0x2005, 0x0f81, 0x9002, 0x0154, - 0x9100, 0x7845, 0x7ee0, 0x78e0, - 0x2044, 0x0200, 0x781d, 0x781d, - 0x781d, 0x204e, 0x008e, 0x7ee0, - 0xc2e2, 0xc1a4, 0x1600, 0x7080, - 0x8000, 0x0685, 0x702c, 0x754c, - 0xc043, 0xd8ff, 0xdb08, 0xc142, - 0xc141, 0x4528, 0x0fda, 0xfd2f, - 0xc140, 0x1600, 0x7080, 0x8000, - 0x0685, 0xd910, 0x754c, 0xc043, - 0xd8ff, 0xdb0c, 0xc542, 0xc541, - 0x0fbe, 0xfd2f, 0xc540, 0xd8ff, - 0xd980, 0x754c, 0x726c, 0xc543, - 0xc542, 0xc541, 0x0faa, 0xfd2f, - 0xc540, 0xc0a4, 0xc6c2, 0x78e0, - 0xc2e4, 0xc1a4, 0x70ad, 0xd8ff, - 0x702c, 0x754c, 0x736c, 0xc543, - 0xc542, 0xc541, 0x0f8a, 0xfd2f, - 0xc540, 0x46cb, 0x8000, 0x0685, - 0x8e00, 0x0825, 0x00b4, 0x702c, - 0x7104, 0x781d, 0xb862, 0x780f, - 0xc542, 0xc541, 0xc540, 0xc043, - 0xd8ff, 0x754c, 0x0f62, 0xfd2f, - 0x726c, 0x8e00, 0xc542, 0xc541, - 0xc540, 0xc043, 0xd8ff, 0x702c, - 0x754c, 0x0f4e, 0xfd2f, 0xdb08, - 0x8e00, 0xd908, 0x754c, 0xc043, - 0xd8ff, 0xdb0c, 0xc542, 0xc541, - 0x0f36, 0xfd2f, 0xc540, 0xc0a4, - 0xc6c4, 0x78e0, 0x42c3, 0x9004, - 0x0110, 0x9200, 0x204f, 0x0101, - 0xb8a4, 0xb220, 0x7fe0, 0xb200, - 0xc0e6, 0x215f, 0x0483, 0x44cb, - 0x8000, 0x0684, 0x702c, 0x647a, - 0x8aa2, 0x8ac3, 0x7d04, 0xf003, - 0x7124, 0x0919, 0x03a3, 0x2314, - 0x0042, 0x629a, 0x8ae5, 0x08f5, - 0x83ce, 0x8a44, 0x7d45, 0xf1f6, - 0x40a1, 0xc4c6, 0xc2e6, 0x1cfc, - 0xb6c8, 0x2482, 0x3d09, 0x4608, - 0xd825, 0xb89f, 0x8840, 0x1033, - 0x0083, 0x10df, 0x8080, 0x702c, - 0x70ed, 0xc04b, 0xca09, 0x0e1f, - 0x11b5, 0xc146, 0x2653, 0x114c, - 0xd939, 0xbcc4, 0xec05, 0xbc61, - 0x0cff, 0x9031, 0x793d, 0xb9c0, - 0xc146, 0xc10b, 0x7b45, 0xc34d, - 0x211a, 0x0f81, 0x0010, 0x0000, - 0x2078, 0x0000, 0x1600, 0x7083, - 0x8000, 0x0001, 0x1208, 0x308c, - 0xc14f, 0xc048, 0x2400, 0x3f80, - 0x0000, 0x010c, 0x702c, 0x0fe6, - 0xfeaf, 0xdab4, 0xc096, 0x0fde, - 0xfeaf, 0xdab4, 0xbbe2, 0x710c, - 0x20ca, 0x03e1, 0x6c21, 0x782c, - 0x43db, 0x8000, 0x076d, 0xc049, - 0x2679, 0x1180, 0xc04e, 0x2642, - 0x1240, 0xc04c, 0x700c, 0xc045, - 0xf002, 0x71e5, 0x71f5, 0x0282, - 0x0029, 0x2650, 0x1000, 0x2078, - 0x0101, 0x2678, 0x10c0, 0x7825, - 0xc050, 0x710c, 0x4300, 0x2744, - 0x17c0, 0xc106, 0xe805, 0xb861, - 0x08ff, 0x8031, 0x7b7b, 0x09ca, - 0xfd6f, 0x40e1, 0xc04a, 0xc00d, - 0x786b, 0x700c, 0xf405, 0xf1e3, - 0x1300, 0x3080, 0x7104, 0x1b00, - 0x3002, 0x780f, 0x7114, 0x0206, - 0x002d, 0x7def, 0xc110, 0x40a1, - 0x0d52, 0xfe6f, 0x704c, 0xe872, - 0x700c, 0x1b01, 0x33c2, 0xc047, - 0x0e3f, 0x11f5, 0xc044, 0xd858, - 0x2011, 0x8380, 0x700c, 0xc044, - 0xf217, 0xc006, 0x7014, 0x0904, - 0xfd42, 0xf412, 0x1300, 0x3080, - 0x255f, 0x1641, 0x205f, 0x0c80, - 0x6038, 0x7063, 0x8816, 0x781d, - 0x781d, 0x781d, 0x781d, 0x781d, - 0x781d, 0x781d, 0xc044, 0x2456, - 0x3e00, 0x702c, 0x0f06, 0xfeaf, - 0xdab4, 0x0e0b, 0x11d4, 0xc005, - 0xf027, 0x2025, 0x0380, 0xf02b, - 0xf02f, 0xf035, 0xf037, 0xf004, - 0xf002, 0xf03b, 0xc096, 0xc043, + 0x9100, 0x7845, 0xb100, 0x7ee0, + 0x209a, 0x0004, 0x7905, 0x4063, + 0x209a, 0x0010, 0x2105, 0x0002, + 0x7ee0, 0x78e0, 0xc2e2, 0xc1a4, + 0x1600, 0x7080, 0x8000, 0x06e1, + 0x702c, 0x754c, 0xc043, 0xd8ff, + 0xdb08, 0x4528, 0xc142, 0xc141, + 0x0852, 0xfd6f, 0xc140, 0x1600, + 0x7080, 0x8000, 0x06e1, 0xd910, + 0x754c, 0xc043, 0xd8ff, 0xdb0c, + 0xc542, 0xc541, 0x0836, 0xfd6f, + 0xc540, 0xd8ff, 0xd980, 0x754c, + 0x726c, 0xc543, 0xc542, 0xc541, + 0x0822, 0xfd6f, 0xc540, 0xc0a4, + 0xc6c2, 0x78e0, 0xc2e4, 0xc1a4, + 0x70ad, 0xd8ff, 0x702c, 0x754c, + 0x736c, 0xc543, 0xc542, 0xc541, + 0x0802, 0xfd6f, 0xc540, 0x46cb, + 0x8000, 0x06e1, 0x8e00, 0x0825, + 0x00b4, 0x702c, 0x7104, 0x781d, + 0xb862, 0x780f, 0xc043, 0xd8ff, + 0x754c, 0x726c, 0xc542, 0xc541, + 0x0fda, 0xfd2f, 0xc540, 0x8e00, + 0xc043, 0xd8ff, 0x702c, 0x754c, + 0xdb08, 0xc542, 0xc541, 0x0fc6, + 0xfd2f, 0xc540, 0x8e00, 0xd908, + 0x754c, 0xc043, 0xd8ff, 0xdb0c, + 0xc542, 0xc541, 0x0fae, 0xfd2f, + 0xc540, 0xc0a4, 0xc6c4, 0x78e0, + 0x42c3, 0x9004, 0x0110, 0x9200, + 0x204f, 0x0101, 0xb8a4, 0xb220, + 0x7fe0, 0xb200, 0xc0e6, 0x215f, + 0x0483, 0x44cb, 0x8000, 0x06e2, + 0x638d, 0x6479, 0x89c1, 0x7d04, + 0x702c, 0xf009, 0x629a, 0x8ae3, + 0x0809, 0x03ce, 0x8a42, 0x7d45, + 0x7124, 0x09f5, 0x83a4, 0x2314, + 0x0042, 0x40a1, 0xc4c6, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0x2482, + 0x3d09, 0xc044, 0x206f, 0x0243, + 0x8820, 0x1054, 0x0083, 0x1021, + 0x008d, 0x10fd, 0x808e, 0x1209, + 0x308b, 0x1208, 0x308f, 0xc149, 0x2400, 0x3f80, 0x0000, 0x010c, - 0xc042, 0x2456, 0x3e00, 0xc041, - 0xc008, 0xdbff, 0xc109, 0xc040, - 0xc004, 0x704c, 0x7014, 0x208a, - 0x0fc7, 0x731c, 0x75d5, 0x22ca, - 0x01a1, 0x0cca, 0xfeef, 0x40e1, - 0xd88c, 0xc047, 0x710c, 0xc044, - 0x0ed9, 0x1190, 0x0edd, 0x1351, - 0xf068, 0x40a1, 0x0dca, 0xffaf, - 0x2456, 0x3e01, 0xf00f, 0x40a1, - 0x0ede, 0x00a0, 0x2456, 0x3e01, - 0xf009, 0x0a5e, 0x00a0, 0x40a1, - 0xf005, 0xc104, 0x0f06, 0xff6f, - 0x40e1, 0x700c, 0xf067, 0x704c, - 0x1e00, 0x7084, 0x900f, 0xe022, + 0x702c, 0x0b9a, 0xfeef, 0xdab4, + 0xc096, 0x0b92, 0xfeef, 0xdab4, + 0xbee2, 0x6f01, 0xc046, 0x20df, + 0x03e1, 0x70cd, 0x7ba5, 0xc046, + 0xc004, 0x43d9, 0xc34d, 0x2078, + 0x0141, 0xb882, 0x2078, 0x0100, + 0x7905, 0xc004, 0x2078, 0x00c0, + 0xc04c, 0x7825, 0xc04a, 0xc004, + 0x2079, 0x0180, 0xc04e, 0xc004, + 0xb869, 0xc04b, 0xc009, 0x201a, + 0x0f80, 0x0010, 0x0000, 0xc04f, + 0x2378, 0x1000, 0x028d, 0x0020, + 0xc048, 0x4300, 0x2644, 0x17c0, + 0xc10a, 0xe806, 0xb861, 0x0801, + 0x0031, 0x7b7b, 0x0d16, 0xfd6f, + 0x40c1, 0xc047, 0xc00d, 0x7b0b, + 0x0266, 0x0001, 0xc004, 0xb880, + 0x2078, 0x0141, 0xc00c, 0x7825, + 0xc050, 0x0215, 0x0020, 0x700c, + 0xc110, 0x704c, 0x0976, 0xfeaf, + 0xc045, 0x7014, 0x01f6, 0x0021, + 0x70ed, 0x40c3, 0x8000, 0x07c9, + 0xa8c1, 0xc004, 0xb863, 0x7214, + 0x0c54, 0xfd45, 0x4508, 0xf79a, + 0xc004, 0x0831, 0x01b1, 0x70ad, + 0x1600, 0x7080, 0x8000, 0x07c9, + 0x265a, 0x1641, 0x205f, 0x0c80, + 0x6038, 0x2032, 0x0f8d, 0x8000, + 0x067f, 0x7dbd, 0x7dbd, 0x7dbd, + 0x7dbd, 0x7dbd, 0x7dbd, 0x7dbd, + 0x2456, 0x3e00, 0x702c, 0x0aa6, + 0xfeef, 0xdab4, 0xc004, 0x7614, + 0x0164, 0x000d, 0xc004, 0x706f, + 0x2025, 0x0000, 0xf025, 0xf02b, + 0xf02f, 0xf033, 0xf003, 0xf003, + 0xf034, 0xc096, 0xc043, 0x2400, + 0x3f80, 0x0000, 0x010c, 0xc042, + 0x2456, 0x3e00, 0xc041, 0xc008, + 0x70b5, 0x238a, 0x0fc7, 0xc040, + 0xd8ff, 0x23ca, 0x0001, 0xc004, + 0xc106, 0x7514, 0x23ca, 0x31a1, + 0x40c1, 0x0882, 0xff2f, 0x4263, + 0xdf8c, 0x716f, 0xf088, 0xc005, + 0x0e02, 0xffaf, 0x2456, 0x3e01, + 0xf09d, 0xc005, 0x0f8e, 0x00a0, + 0x2456, 0x3e01, 0xf097, 0x0aea, + 0x00a0, 0xc005, 0xf093, 0x40c1, + 0x0e96, 0xff6f, 0x41a1, 0xf08f, + 0x1e00, 0x76c4, 0x900f, 0xe022, 0xc096, 0xc042, 0x2400, 0x3f80, - 0x0000, 0x010c, 0xc109, 0xc308, + 0x0000, 0x010c, 0xc106, 0xc308, 0xc041, 0x2456, 0x3e00, 0xc040, - 0x0d6e, 0x0020, 0x40e1, 0xc004, - 0x7014, 0x710c, 0xc044, 0xd8c0, - 0xc047, 0xf238, 0x1304, 0x3083, - 0x235f, 0x0240, 0xf003, 0x7164, - 0x40e3, 0x1305, 0x3081, 0x0953, - 0x00e3, 0x704c, 0xf041, 0x2456, - 0x3e01, 0x21f5, 0x000c, 0xd91b, - 0x789c, 0x781c, 0x781c, 0x781c, - 0xb961, 0x0901, 0x0031, 0x781d, - 0x6419, 0x2c44, 0x180c, 0xb9c4, - 0x7902, 0x231a, 0x0f80, 0x0000, - 0x2000, 0x249a, 0x1010, 0x7985, - 0x6a81, 0x229a, 0x0008, 0x7845, - 0x2005, 0x0f80, 0x9002, 0x0064, - 0xb020, 0x4281, 0x40e3, 0x0abb, - 0x8232, 0x2040, 0x005f, 0xf1d1, - 0x40a1, 0x0f3a, 0xfdaf, 0xc191, - 0xc004, 0x7014, 0x700c, 0xc045, - 0xf328, 0xc107, 0xc00a, 0xc20e, - 0x716c, 0x6119, 0xc00f, 0x7825, - 0x0826, 0x00a0, 0x2456, 0x3e01, - 0xc004, 0xc045, 0xf11a, 0x2340, - 0x34c0, 0xc045, 0xc105, 0x40a1, - 0x704c, 0x09a2, 0xfe6f, 0x716c, - 0xc096, 0xc042, 0x2400, 0x3f80, - 0x0000, 0x010c, 0xc109, 0xc308, - 0xc041, 0x2456, 0x3e00, 0xc040, - 0x40e1, 0x0c96, 0x0020, 0x714c, - 0xc105, 0x40a1, 0x704c, 0x0976, - 0xfe6f, 0x706c, 0xd8c0, 0xc047, - 0xf15c, 0x0c56, 0xfd6f, 0x730c, - 0xca09, 0x7014, 0xf413, 0x0e11, - 0x10f1, 0x79ef, 0x0bba, 0xfd6f, - 0xc00a, 0xf00d, 0xc00c, 0x7414, - 0xf789, 0xc006, 0xc10b, 0xc20a, - 0x2052, 0x0000, 0x0aee, 0xfd6f, - 0x7bef, 0x0583, 0xffcf, 0x0e19, - 0x10d1, 0xca09, 0xe888, 0x1600, - 0x7080, 0x8000, 0x0000, 0xb8e5, - 0x0bc0, 0xfd42, 0x1e00, 0x7003, - 0x8000, 0x000f, 0x2480, 0x3d09, - 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1b1, - 0xc050, 0x1600, 0x7080, 0x8000, - 0x0004, 0xc046, 0x0db6, 0xfdef, - 0x4020, 0x40c3, 0x0000, 0xffff, - 0x4100, 0x4200, 0x0aa2, 0xfdef, - 0x4300, 0xd80f, 0x0ece, 0xfd2f, - 0xd90f, 0x700c, 0x47cb, 0x9003, - 0xe004, 0xc04a, 0x1f22, 0x1014, + 0x40c1, 0x0d96, 0x0020, 0x704c, + 0x716f, 0x0dcf, 0x1030, 0xdfc0, + 0x40c3, 0x8000, 0x07c9, 0x8825, + 0x40c3, 0x8000, 0x07c9, 0x8844, + 0x225f, 0x0243, 0xf02d, 0x260a, + 0x30c0, 0xf025, 0x20f5, 0x078d, + 0xdf1b, 0x2640, 0x305e, 0x78bc, + 0x781c, 0x781c, 0x781c, 0xbf61, + 0x0fff, 0x9031, 0x781d, 0x651f, + 0xbfc4, 0x7f02, 0x2d44, 0x1800, + 0x6ca1, 0x249a, 0x1008, 0x209a, + 0x0010, 0x7f05, 0x221a, 0x0f80, + 0x0000, 0x2000, 0x7885, 0x2005, + 0x0f80, 0x9002, 0x0064, 0xb0e0, + 0x44a9, 0x0cbd, 0x9234, 0x2456, + 0x3e00, 0x7144, 0xe309, 0x09ab, + 0x80a5, 0x708d, 0xc005, 0x45cb, + 0x8000, 0x067c, 0x41a1, 0x704c, + 0x0dfa, 0xfe6f, 0x716c, 0xc096, + 0xc042, 0x2400, 0x3f80, 0x0000, + 0x010c, 0xc106, 0xc308, 0xc041, + 0x2456, 0x3e00, 0xc040, 0x40c1, + 0x0ce6, 0x0020, 0x714c, 0xc005, + 0x41a1, 0x704c, 0x0dce, 0xfe6f, + 0x706c, 0xdfc0, 0xc004, 0x0815, + 0x0370, 0x456b, 0xc004, 0x080d, + 0x01b0, 0x456b, 0xf006, 0x71ad, + 0xc005, 0x0aee, 0xfdef, 0xc191, + 0x706f, 0xed0d, 0xc007, 0xc20e, + 0x2456, 0x3e01, 0x671f, 0xc00f, + 0x716c, 0x086e, 0x00a0, 0x78e5, + 0x43b9, 0x1600, 0x7080, 0x8000, + 0x07c9, 0x7104, 0x1e00, 0x7002, + 0x8000, 0x07c9, 0x780f, 0x7114, + 0x05e8, 0xffee, 0x78cf, 0x083e, + 0xfdaf, 0x730c, 0xca09, 0xe895, + 0xc004, 0x0811, 0x00f1, 0x79cf, + 0x0fa2, 0xfd6f, 0xc007, 0xf00d, + 0xc00b, 0x0817, 0x0134, 0x7bcf, + 0xc00a, 0xc109, 0xc207, 0x0eda, + 0xfd6f, 0x2052, 0x0000, 0x71c5, + 0x72d5, 0x057a, 0xffe5, 0x710c, + 0xc004, 0x0819, 0x00d1, 0xca09, + 0xe888, 0x1600, 0x7080, 0x8000, + 0x0000, 0xb8e5, 0x0fa0, 0xfd42, + 0x1e00, 0x7003, 0x8000, 0x000f, + 0x2480, 0x3d09, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1b0, 0xc04f, 0x1600, + 0x7080, 0x8000, 0x0004, 0xc046, + 0x098e, 0xfe2f, 0x4020, 0x40c3, + 0x0000, 0xffff, 0x4100, 0x4200, + 0x0e76, 0xfdef, 0x4300, 0xd80f, + 0x09f6, 0xfd6f, 0xd90f, 0x47cb, + 0x9003, 0xe004, 0x1f22, 0x1015, 0x208a, 0x0fc7, 0x1fa7, 0x101c, 0x1fa8, 0x101c, 0x710c, 0xd9ff, 0xb700, 0x1ffe, 0x9004, 0xb705, - 0x090e, 0xffef, 0x1ffc, 0x9004, - 0x46cb, 0x8000, 0x076d, 0x8e03, - 0xc04f, 0x8e02, 0xc04e, 0x8e05, - 0xc049, 0x8e04, 0xc048, 0x206f, - 0x0243, 0x10fd, 0x8082, 0x700c, - 0x0a0f, 0x017e, 0xc041, 0xc009, - 0xc04a, 0xc008, 0xc041, 0xc006, - 0x201a, 0x0f80, 0x0010, 0x0000, - 0xc044, 0xf007, 0x10fd, 0x8082, - 0xc001, 0x7104, 0xc041, 0xc101, - 0xc00a, 0x7110, 0x0258, 0x0029, - 0x706f, 0x770c, 0x0a15, 0x017e, - 0xc042, 0xc001, 0xae04, 0xae05, - 0x781b, 0xae02, 0x7104, 0xae03, - 0x71ad, 0xf003, 0x77a5, 0x70b5, - 0x019e, 0x002b, 0xd825, 0xaea1, - 0x8e20, 0xb89f, 0x7034, 0x20c0, - 0x0ce2, 0x8820, 0x710c, 0x2544, - 0x17c2, 0xea06, 0xba61, 0x0a01, - 0x0031, 0x781b, 0x205a, 0x0100, - 0x200f, 0x0340, 0x780f, 0xc04d, - 0x790b, 0xf3e6, 0x78af, 0xc04c, - 0x4e2b, 0xc04b, 0xc00c, 0xc10b, - 0x714c, 0x0ffa, 0xfe2f, 0x706c, - 0xc00d, 0x702c, 0x0c8a, 0xfeef, - 0x714c, 0x700c, 0x0a2e, 0xfd2f, - 0x712c, 0xf003, 0x7167, 0x234c, - 0xb7c0, 0x011a, 0x0029, 0x700c, - 0xc006, 0x201a, 0x0f80, 0x0020, - 0x0000, 0x2005, 0x0f80, 0x9005, - 0xe040, 0x1800, 0x06c4, 0x206f, - 0x0243, 0x10fd, 0x8080, 0x0825, - 0x015e, 0xc001, 0x201a, 0x0f81, - 0x0000, 0x1000, 0xc004, 0x7825, + 0x0922, 0xffef, 0x1ffc, 0x9004, + 0x46cb, 0x8000, 0x07c9, 0x8e04, + 0xc048, 0x206f, 0x0243, 0x10fd, + 0x809e, 0x265f, 0x3100, 0x780d, + 0x791c, 0x793c, 0x793c, 0x793c, + 0x793c, 0xc008, 0x793c, 0x793c, + 0x7824, 0xc041, 0x8e05, 0xc04c, + 0x7824, 0xc04d, 0xc006, 0x201a, + 0x0f80, 0x0010, 0x0000, 0xc045, + 0x8e03, 0xc04b, 0x8e02, 0x0259, + 0x0020, 0xc04a, 0x0e13, 0x315e, + 0xc001, 0xae05, 0xae04, 0x781b, + 0xae02, 0x7104, 0xae03, 0x70ad, + 0x770c, 0x716f, 0xc043, 0xf0cd, + 0x2344, 0x37c1, 0xe905, 0xb961, + 0x09ff, 0x8031, 0x781b, 0x1e01, + 0x16c2, 0x205a, 0x0100, 0x200f, + 0x06c2, 0x8e00, 0xe806, 0x1600, + 0x7081, 0x8000, 0x0058, 0xf006, + 0x1600, 0x7081, 0x8000, 0x0025, + 0x784f, 0x790b, 0xc042, 0xf2b0, + 0x202f, 0x06c7, 0x41c3, 0x8000, + 0x067c, 0x714c, 0x706c, 0x0c06, + 0xfe6f, 0xc04e, 0xc002, 0x702c, + 0x09b6, 0xff2f, 0x714c, 0x700c, + 0x0ab2, 0xfd2f, 0x712c, 0x700c, + 0xc049, 0xf003, 0x71a5, 0xe59f, + 0x010c, 0x0009, 0xc006, 0x201a, + 0x0f80, 0x0020, 0x0000, 0x2005, + 0x0f80, 0x9005, 0xe040, 0xb0a0, + 0x206f, 0x0243, 0x10fd, 0x8080, + 0x082b, 0x015f, 0x8e25, 0x8e44, + 0xf00e, 0x221a, 0x0f83, 0x0000, + 0x1000, 0xc005, 0x7144, 0x7865, 0x781b, 0x2005, 0x0f80, 0x9002, - 0x0040, 0x1800, 0x06c4, 0xf015, - 0x8e24, 0xf010, 0x211a, 0x0f82, - 0x0000, 0x1000, 0xc004, 0x7124, - 0x7845, 0x781b, 0x2005, 0x0f80, - 0x9002, 0x0040, 0x1800, 0x06c4, - 0x8e05, 0x08e5, 0x8043, 0x0ff6, - 0xfe6f, 0xd814, 0x0a72, 0xff8f, - 0xc010, 0x712c, 0x09a6, 0xfd2f, - 0x7810, 0x8e04, 0xc043, 0x700c, - 0xc047, 0xf009, 0xc105, 0xc007, - 0x7825, 0xc047, 0xc003, 0x7104, - 0xc043, 0x8e25, 0xc003, 0x0967, - 0x0022, 0xd80f, 0xc003, 0x704c, - 0x201a, 0x0f81, 0x0000, 0x2000, - 0x2105, 0x0f80, 0x9002, 0x0170, - 0x9000, 0xc245, 0xf002, 0x7144, - 0x0a25, 0x0233, 0x4040, 0x209a, - 0x0008, 0x7825, 0x2005, 0x0f80, - 0x9002, 0x0166, 0x9000, 0xe875, - 0xc005, 0x7810, 0x200f, 0x0080, - 0xc045, 0xf1ef, 0xc005, 0x262f, - 0xf008, 0xf5d2, 0xc303, 0x740c, - 0x41c3, 0x000e, 0x0003, 0x42a1, - 0x0d72, 0xfd2f, 0x1c00, 0x36c0, - 0xf1c6, 0x0cca, 0xfd2f, 0xd90f, - 0xc007, 0x262f, 0xf008, 0xf578, - 0xc002, 0x2008, 0x06c0, 0xc042, - 0x710c, 0x41c3, 0x0011, 0x0001, - 0x0f86, 0xfcaf, 0x42a1, 0x0eb2, - 0xfe6f, 0xc00d, 0xc00c, 0xc10b, - 0x704c, 0x0eaa, 0xfe2f, 0x706c, - 0x1408, 0x301b, 0xf130, 0xc002, - 0xd91f, 0xb961, 0x09ff, 0x8031, - 0x781d, 0x41c3, 0x0013, 0x0000, - 0x0f56, 0xfcaf, 0x2052, 0x0000, - 0xc006, 0x1600, 0x7081, 0x8000, - 0x0016, 0x201a, 0x0f80, 0x0020, - 0x0000, 0x2005, 0x0f82, 0x9005, - 0xe040, 0xc002, 0x6038, 0x2049, - 0x07c1, 0xb220, 0x206f, 0x0243, - 0x10fd, 0x8080, 0x0821, 0x015e, - 0xc001, 0x201a, 0x0f82, 0x0000, - 0x1000, 0xc004, 0x7845, 0x781b, - 0x2005, 0x0f80, 0x9002, 0x0040, - 0xb020, 0xf016, 0x8e44, 0xf00f, - 0x221a, 0x0f83, 0x0000, 0x1000, - 0xc004, 0x7144, 0x7865, 0x781b, + 0x0040, 0xb0a0, 0x09e7, 0x8085, + 0xf00e, 0xc001, 0x201a, 0x0f81, + 0x0000, 0x1000, 0xc005, 0x7825, + 0x781b, 0x2005, 0x0f80, 0x9002, + 0x0040, 0xb0a0, 0x0b9a, 0xfeaf, + 0xd814, 0x0a92, 0xff8f, 0xc00f, + 0x712c, 0x0a32, 0xfd2f, 0x7810, + 0x8e04, 0xc044, 0x700c, 0xc047, + 0xf034, 0xc004, 0x704c, 0x201a, + 0x0f81, 0x0000, 0x2000, 0x2105, + 0x0f80, 0x9002, 0x0170, 0x9000, + 0xc242, 0xf011, 0x209a, 0x0008, + 0x7825, 0x2005, 0x0f80, 0x9002, + 0x0166, 0x9000, 0xe806, 0xc002, + 0x7810, 0x200f, 0x0080, 0xc042, + 0x7144, 0x0ae5, 0x8234, 0x4040, + 0xc002, 0x262f, 0xf008, 0xf40b, + 0xc304, 0x740c, 0x41c3, 0x000e, + 0x0003, 0x4263, 0x08c2, 0xfd6f, + 0xc540, 0xc102, 0xc007, 0x7825, + 0xc047, 0xc004, 0x7104, 0xc044, + 0x8e25, 0xc004, 0x0997, 0x8025, + 0xd80f, 0x0806, 0xfd6f, 0xd90f, + 0xc007, 0x262f, 0xf008, 0xf580, + 0xc003, 0x2008, 0x0340, 0xc043, + 0x710c, 0xc049, 0xc009, 0x41c3, + 0x0011, 0x0001, 0x082e, 0xfcef, + 0x4263, 0xc00e, 0x41c3, 0x8000, + 0x067c, 0x704c, 0x0abe, 0xfe6f, + 0x706c, 0xc503, 0x7767, 0x7077, + 0x0668, 0xffea, 0x710c, 0xc003, + 0x41c3, 0x0013, 0x0000, 0x7812, + 0x202f, 0x000d, 0x0ffe, 0xfcaf, + 0xb8c0, 0xc006, 0x1600, 0x7081, + 0x8000, 0x0016, 0x201a, 0x0f80, + 0x0020, 0x0000, 0x2005, 0x0f82, + 0x9005, 0xe040, 0xc003, 0x6038, + 0x2049, 0x07c1, 0xb220, 0x206f, + 0x0243, 0x10fd, 0x809e, 0x0e39, + 0x315f, 0x8e65, 0x8e44, 0xf00f, + 0x221a, 0x0f8c, 0x0000, 0x1000, + 0xc005, 0x7144, 0x7885, 0x781b, 0x2005, 0x0f80, 0x9002, 0x0040, - 0xb020, 0x8e05, 0x08e5, 0x8083, - 0x091e, 0xff8f, 0x05a1, 0xffef, - 0x206f, 0x0243, 0x0a15, 0x015e, - 0xc008, 0xae04, 0xc009, 0xae05, - 0xc00e, 0xae02, 0xc00f, 0xae03, - 0x700c, 0x1fa7, 0x101c, 0x1fa8, - 0x101c, 0xb700, 0xc0b1, 0x1404, - 0x341b, 0xc6c6, 0x4200, 0x208c, - 0x8fc3, 0xf408, 0x41c3, 0x9004, - 0x0238, 0x1900, 0x0105, 0xf009, - 0x0a13, 0x01d1, 0x41c3, 0x9004, - 0x0238, 0x1900, 0x0045, 0x1900, - 0x0005, 0xca09, 0xe804, 0x0019, - 0x0020, 0x4040, 0x41c3, 0x9018, - 0x0064, 0xb140, 0x0051, 0x0020, - 0x1902, 0x0005, 0xc2e2, 0x4508, - 0xca09, 0xe812, 0xcc22, 0x208c, - 0x8010, 0xf785, 0x0ece, 0xfd0f, - 0xcc22, 0x6821, 0x2615, 0x7000, - 0x8000, 0x08f0, 0x1a22, 0x305c, - 0xa0a0, 0xc6c2, 0x40c3, 0x9018, - 0x0064, 0xd910, 0xb0a0, 0xb961, - 0x09ff, 0x8031, 0x7dbd, 0xb0a2, - 0x080e, 0x0020, 0x1802, 0x0005, - 0xc6c2, 0x78e0, 0x41c3, 0x9018, + 0xb020, 0x0be9, 0x8085, 0x095e, + 0xff8f, 0x206f, 0x0243, 0x10fd, + 0x809e, 0xf00f, 0xc001, 0x201a, + 0x0f82, 0x0000, 0x1000, 0xc005, + 0x7845, 0x781b, 0x2005, 0x0f80, + 0x9002, 0x0040, 0xb020, 0xc001, + 0x7104, 0xc041, 0xc101, 0xc00d, + 0x7110, 0x05ac, 0xffce, 0x0e17, + 0x315e, 0xc00c, 0xae05, 0xc008, + 0xae04, 0xc00b, 0xae03, 0xc00a, + 0xae02, 0x700c, 0x1fa7, 0x101c, + 0x1fa8, 0x101c, 0xb700, 0xc0b0, + 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0x208c, 0x8fc3, 0xf209, 0x0821, + 0x01d1, 0x41c3, 0x9004, 0x0238, + 0x1900, 0x0045, 0xf006, 0x41c3, + 0x9004, 0x0238, 0x1900, 0x0105, + 0x1900, 0x0005, 0x1209, 0x3081, + 0xe903, 0x0019, 0x0000, 0x41c3, + 0x9018, 0x0064, 0xb100, 0x0059, + 0x0020, 0x1902, 0x0005, 0x78e0, + 0xc0f1, 0x1209, 0x3081, 0xe90a, + 0x1222, 0x3702, 0x228c, 0x8010, + 0xf793, 0x0aae, 0xfd4f, 0xf019, + 0x41c3, 0x9018, 0x0064, 0xda10, + 0xb100, 0xba61, 0x0aff, 0x8031, + 0x781d, 0xb102, 0x0822, 0x0020, + 0x1902, 0x0005, 0xf009, 0x2615, + 0x7081, 0x8000, 0x08e8, 0xa100, + 0x6a01, 0x1a22, 0x301c, 0xc0d1, + 0x7ee0, 0x78e0, 0x41c3, 0x9018, 0x0008, 0x9100, 0x08ff, 0x801f, 0x195e, 0x0045, 0x9100, 0x0801, - 0x001e, 0x7ee0, 0xc2e4, 0x1cfc, - 0xb6c8, 0x4608, 0x1600, 0x7080, - 0x8000, 0x0001, 0x083b, 0x013f, - 0x4338, 0xca09, 0x7dd0, 0xe80a, - 0xcc22, 0x218a, 0x0010, 0x7902, - 0x6d02, 0x7030, 0x0e54, 0xfd0a, - 0x0f36, 0xffef, 0xd808, 0x0f72, - 0xffef, 0x40c1, 0x70cd, 0xf007, - 0x0f66, 0xffef, 0x1304, 0x3400, - 0x71c5, 0x0ef9, 0x9342, 0x1404, - 0x341b, 0xc6c4, 0xc2e6, 0x1cfc, - 0xb6c8, 0x2482, 0x3107, 0x4318, - 0x206f, 0x0243, 0xc154, 0x8820, - 0x10fc, 0x808e, 0xc352, 0xc146, + 0x001e, 0x7ee0, 0xc2e6, 0x4708, + 0x1600, 0x7080, 0x8000, 0x0001, + 0x083b, 0x013f, 0x4528, 0xca09, + 0x7ef0, 0xe80a, 0xcc22, 0x218a, + 0x0010, 0x7902, 0x6e02, 0x7030, + 0x0a34, 0xfd4a, 0x0f36, 0xffef, + 0xd808, 0x0f72, 0xffef, 0x40e1, + 0x70ed, 0xf007, 0x0f66, 0xffef, + 0x1504, 0x1400, 0x71e5, 0x0ff9, + 0x9384, 0xc6c6, 0xc2e6, 0x1cfc, + 0xb6c8, 0x2482, 0x3009, 0xc352, + 0xc245, 0xc153, 0x4318, 0x206f, + 0x0243, 0x88a0, 0x10fc, 0x808e, 0x1600, 0x710f, 0x9008, 0x01e0, - 0x4063, 0x702c, 0x0b02, 0xfd2f, - 0x4548, 0xc04f, 0x40c3, 0x0000, - 0xffff, 0x4100, 0x4200, 0x0e72, - 0xfdaf, 0x4300, 0x40c3, 0x0000, - 0xffff, 0x2058, 0x0800, 0x78e4, - 0x1e00, 0x7004, 0x9009, 0xe1e0, - 0x40c3, 0x8000, 0x04c4, 0x0a9e, - 0x0060, 0xd960, 0x718d, 0x4081, + 0x4063, 0x0e32, 0xfd2f, 0x702c, + 0xc050, 0x40c3, 0x0000, 0xffff, + 0x4100, 0x4200, 0x0a4a, 0xfdef, + 0x4300, 0x40c3, 0x0000, 0xff7f, + 0x78e4, 0x1e00, 0x7004, 0x9009, + 0xe1e0, 0x2456, 0x3f00, 0x41c3, + 0x8000, 0x044d, 0x0d2e, 0xfcaf, + 0xda60, 0x2456, 0x3f00, 0x0b0e, + 0x0060, 0xd960, 0x716c, 0x4060, 0x2344, 0x37cf, 0xef05, 0xbf61, - 0x0fff, 0x9031, 0x781b, 0x205a, - 0x0100, 0xc545, 0xbee6, 0x704c, - 0x200f, 0x06c1, 0xc005, 0x20ca, - 0x0061, 0xc045, 0xed15, 0xd8ff, + 0x0fff, 0x9031, 0x781b, 0x7edd, + 0x205a, 0x0100, 0x7edd, 0x7edd, + 0x7edd, 0x200f, 0x06c2, 0xc005, + 0x7edd, 0x7edd, 0xe807, 0xd8ff, 0x1e00, 0x7004, 0x9003, 0xf0c4, - 0xf014, 0x209a, 0x0008, 0x4381, - 0xee05, 0xbe61, 0x0eff, 0x9031, - 0x7b7b, 0x7144, 0x2005, 0x0f80, - 0x9003, 0xe0c4, 0xb060, 0x7850, - 0x08e3, 0x8234, 0x2044, 0x07ce, - 0xc006, 0x704c, 0x201a, 0x0f80, - 0x0010, 0x0000, 0xc050, 0x1478, - 0x3600, 0xc04d, 0x1477, 0x3600, - 0xc04c, 0x1476, 0x3600, 0xc04e, - 0x782f, 0x702c, 0x08aa, 0xfeef, - 0xc053, 0x1600, 0x708f, 0x8000, - 0x0771, 0xd8ff, 0x70b5, 0x218a, - 0x0004, 0xc049, 0x703c, 0xc049, - 0xf002, 0x71e5, 0x40c3, 0x8000, - 0x0771, 0x8801, 0x7aef, 0x080d, - 0x00a5, 0x70cd, 0xf049, 0x71c5, - 0x7bcf, 0x0beb, 0x8255, 0xc00f, - 0x2055, 0x0c01, 0xc010, 0x7825, - 0x221a, 0x0f81, 0x0000, 0x1000, - 0x7905, 0x4060, 0x209a, 0x0004, - 0x7825, 0x781b, 0xb891, 0xb89c, - 0xb89f, 0x9080, 0x2579, 0x1001, - 0x2378, 0x0200, 0x2107, 0x803e, - 0xf5e3, 0x789d, 0x781d, 0x781d, - 0x781d, 0x781d, 0x781d, 0x706d, - 0xe80a, 0x4308, 0x239a, 0x1010, - 0x2453, 0x1101, 0x2380, 0x1030, - 0x2305, 0x104b, 0x225f, 0x049f, - 0x2456, 0x3881, 0x71e3, 0x7974, - 0x1900, 0x02c4, 0x41c3, 0x0000, - 0x141f, 0x0815, 0x0170, 0xbcc4, - 0x209a, 0x0010, 0x2080, 0x0010, - 0x2005, 0x0301, 0xc097, 0x70e3, - 0x7874, 0xb020, 0xf1b9, 0x2578, - 0x1000, 0xc051, 0x1600, 0x7080, - 0x8000, 0x0771, 0x71ed, 0x205f, - 0x0240, 0xc044, 0x2579, 0x1000, - 0xc055, 0x205f, 0x0200, 0xc047, - 0x700c, 0xf004, 0xc00a, 0x7104, - 0xc04a, 0x70f5, 0xf2e7, 0xc109, - 0x0896, 0x0060, 0x2456, 0x3880, - 0xd80f, 0x0902, 0xfd2f, 0xd90f, - 0x710c, 0x0b66, 0xffaf, 0xd9ff, - 0x45cb, 0x9003, 0xe024, 0x712c, - 0x70ed, 0x1d00, 0x1f84, 0x0000, - 0xffff, 0xb526, 0x1dfe, 0x9044, - 0x1da8, 0x13dc, 0x1da9, 0x13dc, - 0xc014, 0xe020, 0x7810, 0x0d26, - 0xfcef, 0x4318, 0xc015, 0x712c, - 0x68c1, 0xc00c, 0x0cb6, 0xff6f, - 0x42c1, 0xc109, 0x0842, 0x0060, - 0xc097, 0xd80f, 0x08ae, 0xfd2f, - 0xd90f, 0x730c, 0x0b12, 0xffaf, - 0xd9ff, 0x712c, 0x4063, 0x1d00, - 0x1f84, 0x0000, 0xffff, 0xb526, - 0x1dfe, 0x9044, 0x1da8, 0x13dc, - 0x0ce2, 0xfcef, 0x1da9, 0x13dc, - 0xc00d, 0x712c, 0x0c76, 0xff6f, - 0x42c1, 0xc00a, 0xc504, 0x780f, - 0xc056, 0x7314, 0xc005, 0x20ca, - 0x0061, 0xc045, 0x1600, 0x7080, - 0x8000, 0x0771, 0xc04b, 0xf005, + 0xf016, 0x700c, 0xf011, 0x2044, + 0x07c1, 0xe906, 0xb961, 0x0901, + 0x0031, 0x7c9b, 0x6821, 0x209a, + 0x0008, 0x2005, 0x0f80, 0x9003, + 0xe0c4, 0xb080, 0x4020, 0x08e3, + 0x8234, 0x4468, 0x1497, 0x3600, + 0x251a, 0x1f8d, 0x0010, 0x0000, + 0x7bc6, 0xc04d, 0x1496, 0x3600, + 0x702c, 0xc34c, 0xc04e, 0x1495, + 0x3600, 0xc551, 0xc04f, 0x784f, + 0x0dc6, 0xfeef, 0x704c, 0x700c, + 0xc04a, 0xf00c, 0x6861, 0x7a7b, + 0x7034, 0x2456, 0x3e01, 0x22ce, + 0x0021, 0x6038, 0xa840, 0x4060, + 0x08ed, 0x8434, 0x2044, 0x0041, + 0x40c3, 0x8000, 0x07cd, 0x1600, + 0x709e, 0x8000, 0x07cd, 0x8881, + 0x250a, 0x1780, 0xf049, 0x706c, + 0xf044, 0xc010, 0x2055, 0x0c01, + 0xc011, 0x7825, 0x261a, 0x1f81, + 0x0000, 0x1000, 0x7905, 0x4060, + 0x209a, 0x0004, 0x7825, 0x781b, + 0xb891, 0xb89c, 0xb89f, 0x90e0, + 0xc005, 0x2378, 0x0201, 0x2079, + 0x0000, 0x2007, 0x807e, 0xf429, + 0x78fd, 0x781d, 0x781d, 0x781d, + 0x781d, 0x781d, 0xbfc4, 0x702c, + 0xe807, 0x4100, 0x219a, 0x0010, + 0x2180, 0x0030, 0x79e5, 0x265f, + 0x148b, 0x2400, 0x3f82, 0x0000, + 0x010c, 0x7261, 0x7a74, 0xb220, + 0x41c3, 0x0000, 0x141f, 0x0813, + 0x0150, 0x209a, 0x0010, 0x2080, + 0x0010, 0x2005, 0x03c1, 0xc096, + 0x7061, 0x7874, 0xb020, 0x7164, + 0x0b7b, 0x8254, 0x71a5, 0x7eaf, + 0x0c6f, 0x93a5, 0x218a, 0x0004, + 0xc005, 0x71ad, 0x7014, 0xd8ff, + 0xc049, 0x703c, 0xc049, 0x265f, + 0x3240, 0xc044, 0xc005, 0x2079, + 0x0000, 0xc054, 0x205f, 0x0200, + 0xc047, 0xf0df, 0x2400, 0x3f80, + 0x0000, 0x010c, 0x08f6, 0x0060, + 0xc109, 0xd80f, 0x0c12, 0xfd2f, + 0xd90f, 0x710c, 0x0b5e, 0xffaf, + 0xd9ff, 0x46cb, 0x9003, 0xe024, + 0x70ad, 0x1e00, 0x1f84, 0x0000, + 0xffff, 0xb606, 0x1efe, 0x9004, + 0x1ea8, 0x135c, 0x1ea9, 0x135c, + 0xc013, 0x712c, 0xe020, 0x7810, + 0x0d8a, 0xfcef, 0x4318, 0xc014, + 0x712c, 0x68e1, 0xc00e, 0x0cbe, + 0xff6f, 0x42e1, 0xc109, 0x08a6, + 0x0060, 0xc096, 0xd80f, 0x0bc2, + 0xfd2f, 0xd90f, 0x730c, 0x0b0e, + 0xffaf, 0xd9ff, 0x712c, 0x4063, + 0x1e00, 0x1f84, 0x0000, 0xffff, + 0xb626, 0x1efe, 0x9044, 0x1ea8, + 0x135c, 0x0d4a, 0xfcef, 0x1ea9, + 0x135c, 0xc00d, 0x712c, 0x0c7e, + 0xff6f, 0x42e1, 0xc00a, 0x780f, + 0xc055, 0xe090, 0xc00c, 0x20ca, + 0x0061, 0xc04c, 0x1600, 0x7080, + 0x8000, 0x07cd, 0xc04b, 0xc004, + 0xc046, 0xf073, 0xc106, 0xc007, + 0xc607, 0x6119, 0xc146, 0x722c, + 0xf066, 0xc006, 0x0e17, 0x1231, + 0x7a10, 0xc005, 0xe887, 0xc00f, + 0x7854, 0x10f0, 0x8102, 0xb040, + 0xf056, 0xc00e, 0x20f5, 0x0083, + 0xc00d, 0x20f5, 0x008c, 0x4c70, + 0x2048, 0x0000, 0x7f0e, 0x0f89, + 0x11b3, 0x730c, 0x203c, 0x039b, + 0x098d, 0x06e0, 0x43c1, 0xc00c, + 0xc208, 0x41c3, 0x0014, 0x0003, + 0x780f, 0x2078, 0x0000, 0x0b5e, + 0xfcaf, 0xc740, 0xc010, 0x4363, + 0x2055, 0x08c1, 0xc011, 0x7905, + 0xc008, 0x201a, 0x0f80, 0x0000, + 0x1000, 0x7905, 0x4063, 0x209a, + 0x0004, 0x7825, 0x781b, 0x2005, + 0x0f8d, 0x9002, 0x0000, 0x9540, + 0xc015, 0x2456, 0x3e01, 0x2133, + 0x0001, 0xc240, 0x6158, 0x780e, + 0xc208, 0xc042, 0x2048, 0x0000, + 0xc141, 0x2049, 0x080f, 0x740c, + 0x41c3, 0x0015, 0x0006, 0x0b5a, + 0xfd2f, 0xc743, 0xb5e0, 0x71ad, + 0x4163, 0xf00a, 0xc00f, 0x2014, + 0x0082, 0x6478, 0x781c, 0x2048, + 0x0000, 0xb200, 0xc006, 0x71c5, + 0x7104, 0xc046, 0x0e37, 0x9254, 0xc00b, 0x7104, 0xc04b, 0xc00b, 0x780f, 0xc048, 0x40c3, 0x8000, - 0x0771, 0x8821, 0xc008, 0x7030, - 0x00fc, 0x002d, 0x70f5, 0xc007, - 0x7db0, 0x722c, 0x780f, 0x651d, - 0xc007, 0xf005, 0xc006, 0x71a5, - 0x7104, 0xc046, 0xc006, 0x7e0f, - 0x0ec9, 0x9275, 0x2678, 0x1202, - 0xc011, 0x7844, 0x0811, 0x0071, - 0x7bb0, 0xc00e, 0x7874, 0x10f0, - 0x8102, 0xf060, 0xc00c, 0x20f5, - 0x00c2, 0xc00d, 0x20f5, 0x00c0, - 0x4854, 0x2448, 0x100c, 0x7c8e, - 0x0c9b, 0x11b3, 0x6058, 0x730c, - 0x203c, 0x039b, 0x09b9, 0x86e0, - 0x43c1, 0xc005, 0xc208, 0x41c3, - 0x0014, 0x0003, 0x2078, 0x0000, - 0x0aae, 0xfcaf, 0xc440, 0xc00f, - 0x269a, 0x1001, 0x4363, 0x2055, - 0x08c1, 0xc010, 0x2604, 0x1f82, - 0x0000, 0x3f00, 0x7905, 0xc008, - 0x201a, 0x0f80, 0x0000, 0x1000, - 0x7825, 0x7845, 0x781b, 0x2005, - 0x0f8e, 0x9002, 0x0000, 0x9620, - 0xc016, 0xc140, 0x70c3, 0x8000, - 0x04c4, 0x1060, 0x00c0, 0x6119, - 0xc041, 0x6909, 0x7f2e, 0x7a10, - 0x277f, 0x17c0, 0x205f, 0x0800, - 0xe29e, 0xc742, 0xc208, 0x27ca, - 0x100d, 0x78f0, 0xc043, 0x41c3, - 0x0015, 0x0006, 0x0806, 0xfd2f, - 0xd80a, 0xb6e0, 0x71ed, 0x4163, - 0xf196, 0x262f, 0xf001, 0x7a1d, - 0xc00e, 0x22ca, 0x0024, 0x7874, - 0xb040, 0xf18e, 0x0b4c, 0xfe82, - 0xf116, 0xc012, 0x087f, 0x0030, + 0x07cd, 0x8821, 0xc008, 0x0911, + 0x8025, 0x70b5, 0x0eec, 0xfe82, + 0xc00a, 0x7104, 0xc04a, 0x70b5, + 0xf522, 0xc012, 0x0881, 0x0030, 0x740c, 0x41c3, 0x0016, 0x0000, - 0x0fd2, 0xfccf, 0x1600, 0x708e, - 0x8000, 0x0771, 0xf002, 0x71c5, - 0x40c3, 0x8000, 0x0771, 0x8801, - 0x7dcf, 0x085b, 0x0344, 0xc007, - 0xc707, 0x790f, 0xc004, 0x7810, - 0x6038, 0xc044, 0xf005, 0xc004, - 0x71e5, 0x7104, 0xc044, 0x7bef, - 0x0bd7, 0x8275, 0x2378, 0x0201, - 0xc011, 0x790b, 0xf5f5, 0xc004, - 0x7910, 0xc00c, 0x20f5, 0x0042, - 0xc00d, 0x20f5, 0x004c, 0xc00e, - 0x20f5, 0x0040, 0x41c3, 0x0017, - 0x0005, 0xc042, 0xc441, 0xc240, - 0x740c, 0x0f6a, 0xfcef, 0x42a1, - 0xf1df, 0x08ce, 0xfe6f, 0xc013, - 0x700c, 0x1e00, 0x7004, 0x9003, - 0xe004, 0x2480, 0x3107, 0x1404, - 0x341b, 0xc6c6, 0xc2e6, 0xc1a4, - 0x712c, 0x2044, 0x07c3, 0xeb06, - 0xbb61, 0x0b01, 0x0031, 0x793b, - 0x215a, 0x0101, 0x205f, 0x0643, - 0x210f, 0x000e, 0x41c3, 0x8000, - 0x076d, 0x8900, 0x205f, 0x0c80, - 0x6078, 0x6038, 0x88f5, 0xea03, - 0xbf87, 0x700c, 0x0a1a, 0xfcef, - 0x702c, 0x702c, 0x78cf, 0xc143, - 0xc042, 0xd8ff, 0x734c, 0xdb22, - 0x1c04, 0x3081, 0x4528, 0x0d06, + 0x0af6, 0xfd0f, 0x1600, 0x708e, + 0x8000, 0x07cd, 0xf02d, 0xc004, + 0xc707, 0x7910, 0xc007, 0x6038, + 0xc044, 0xf024, 0x0f09, 0x1211, + 0xc005, 0xe81c, 0xc004, 0x781b, + 0x2004, 0x0f81, 0x0001, 0xfffe, + 0xc00e, 0x2035, 0x0043, 0xc00d, + 0x2035, 0x0042, 0xc00f, 0x2035, + 0x0040, 0xc241, 0xc340, 0xc042, + 0x740c, 0x41c3, 0x0017, 0x0005, + 0x42a1, 0x0aa6, 0xfd2f, 0x43e1, + 0xc004, 0x71e5, 0x7104, 0xc044, + 0x0fbd, 0x9254, 0x71c5, 0x40c3, + 0x8000, 0x07cd, 0x8801, 0x7dcf, + 0x089f, 0x8345, 0x1e00, 0x7005, + 0x9003, 0xe004, 0x2480, 0x3009, + 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a4, + 0x4508, 0x205f, 0x0641, 0x40c3, + 0x8000, 0x07c9, 0x8800, 0x4358, + 0x205f, 0x0c80, 0x6038, 0x2000, + 0x0f80, 0x8000, 0x067c, 0x88e2, + 0x700c, 0x0aba, 0xfcef, 0x702c, + 0x710c, 0x2544, 0x17ca, 0x0a11, + 0x1010, 0x2242, 0x104a, 0x0aff, + 0x9031, 0x781b, 0x7077, 0x205a, + 0x0100, 0x702c, 0x27cf, 0x11e2, + 0x734c, 0x200f, 0x0340, 0x780f, + 0xc042, 0xd8ff, 0xdb22, 0x4628, + 0xc143, 0x1c04, 0x3081, 0x0d66, 0xfcef, 0xc740, 0xd8ff, 0xd980, - 0x754c, 0x726c, 0xc543, 0xc542, - 0xc541, 0x0ade, 0xfcef, 0xc540, - 0x700c, 0x0a92, 0xfcef, 0x712c, - 0xc0a4, 0xc6c6, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1a4, 0x4338, 0x091e, - 0xfdaf, 0x4608, 0x4708, 0x700c, - 0x09c6, 0xfcef, 0xd908, 0x70ad, - 0xd8ff, 0x702c, 0x704c, 0xdb28, - 0xc543, 0xc642, 0xc541, 0x0aa2, - 0xfcef, 0xc540, 0x0cea, 0xfcef, - 0xd8ff, 0x0b2f, 0x3030, 0xe72e, - 0xd8ff, 0xd90c, 0x714c, 0xdb08, - 0xc543, 0xc642, 0xc541, 0x0a82, - 0xfcef, 0xc540, 0xd8ff, 0xd90c, - 0x714c, 0xdb0a, 0xc543, 0xc642, - 0xc541, 0xc540, 0xf00a, 0x1c0c, - 0x3041, 0xc642, 0xc541, 0xc540, - 0xd8ff, 0xd90c, 0x714c, 0xdb08, - 0x0a56, 0xfccf, 0xd8ff, 0x702c, - 0x714c, 0xdb08, 0xc543, 0xc642, - 0xc541, 0x0a46, 0xfcef, 0xc540, - 0x7bef, 0xd8ff, 0xd90c, 0x714c, - 0xc543, 0xc541, 0xc540, 0x0a32, - 0xfcef, 0xc642, 0xd8ff, 0x702c, + 0x754c, 0x726c, 0xc643, 0xc642, + 0xc641, 0x0b42, 0xfcef, 0xc640, + 0x700c, 0x0afa, 0xfcef, 0x712c, + 0xc0a4, 0x1404, 0x341b, 0xc6c6, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a4, + 0x4528, 0x0ce2, 0xfdaf, 0x4708, + 0x4608, 0x700c, 0x0a3e, 0xfcef, + 0xd908, 0x706f, 0xd8ff, 0x702c, + 0x754c, 0xdb0a, 0x1c0c, 0x36c0, + 0x1c08, 0x36c0, 0x1c04, 0x36c0, + 0x0afa, 0xfcef, 0x1c00, 0x36c0, + 0x0d3e, 0xfcef, 0xd8ff, 0xd8ff, + 0x702c, 0x704c, 0xdb28, 0x1c0c, + 0x36c0, 0xc742, 0x1c04, 0x36c0, + 0x0ada, 0xfcef, 0x1c00, 0x36c0, + 0xe62e, 0xed18, 0xf83b, 0x1c0c, + 0x36c0, 0xc742, 0x1c04, 0x36c0, + 0x0ac2, 0xfcef, 0x1c00, 0x36c0, + 0xd8ff, 0xd90c, 0x714c, 0xdb0a, + 0x1c0c, 0x36c0, 0xc742, 0x1c04, + 0x36c0, 0x1c00, 0x36c0, 0xf00a, + 0x1c0c, 0x3041, 0xc742, 0x1c04, + 0x36c0, 0x1c00, 0x36c0, 0xf82d, + 0x0a92, 0xfccf, 0xd8ff, 0x702c, + 0x714c, 0xdb08, 0x1c0c, 0x36c0, + 0xc742, 0x1c04, 0x36c0, 0x0a7e, + 0xfcef, 0x1c00, 0x36c0, 0x70ad, + 0x7bcf, 0xd8ff, 0xd90c, 0x714c, + 0xc543, 0xc742, 0xc541, 0x0a66, + 0xfcef, 0xc540, 0xd8ff, 0x702c, 0x754c, 0x726c, 0x1c0c, 0x30c1, - 0xc542, 0xc541, 0x0a1a, 0xfcef, + 0xc542, 0xc541, 0x0a4e, 0xfcef, 0xc540, 0x702c, 0xd8ff, 0xb98f, 0x754c, 0x746c, 0x1c0c, 0x3041, - 0xc542, 0xc541, 0x0a02, 0xfcef, + 0xc542, 0xc541, 0x0a36, 0xfcef, 0xc540, 0xd8ff, 0x702c, 0x754c, 0xdb10, 0xc543, 0xc542, 0xc541, - 0x09ee, 0xfcef, 0xc540, 0x0c1e, - 0xfcef, 0xd8ff, 0xbec1, 0xd8ff, - 0xd980, 0x764c, 0x746c, 0xc543, - 0xc642, 0xc541, 0x09d2, 0xfcef, - 0x1c00, 0x3041, 0xc0a4, 0x1404, - 0x341b, 0xc6c6, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1a8, 0xc146, 0xc044, - 0x700c, 0xd9ff, 0xc347, 0x08c2, - 0xfcef, 0xc245, 0x1434, 0x301b, - 0x70cd, 0x70ad, 0xf002, 0x71a5, - 0x0d77, 0x1335, 0x710c, 0x2532, - 0x1f8f, 0x8000, 0x064c, 0x702c, - 0x0f7a, 0xfc6f, 0x42e1, 0x2104, - 0x06c2, 0xc107, 0x7824, 0x2005, - 0x80be, 0xf5ef, 0xc006, 0x734c, - 0xdb22, 0x60e9, 0xc004, 0x1c0c, - 0x3001, 0xc042, 0xc741, 0xc140, - 0xd8ff, 0x0b7a, 0xfcef, 0x702c, - 0xed03, 0x0dbf, 0x9191, 0x70b5, - 0xd9c8, 0xd8fa, 0x09ba, 0xfe2f, + 0x0a22, 0xfcef, 0xc540, 0xbfc1, + 0xd8ff, 0x702c, 0x764c, 0xdb2e, + 0xc543, 0xc742, 0xc541, 0x0a0e, + 0xfcef, 0x1c00, 0x3041, 0x0c36, + 0xfcef, 0xd8ff, 0xd8ff, 0xd980, + 0x754c, 0x746c, 0xc543, 0xc542, + 0xc541, 0x09f2, 0xfcef, 0xc540, + 0xc0a4, 0x1404, 0x341b, 0xc6c6, + 0xd8ff, 0xd90c, 0x714c, 0xdb08, + 0x7ee0, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1a8, 0xc145, 0x4318, + 0x700c, 0xd9ff, 0xc346, 0x08ee, + 0xfcef, 0xc244, 0xc00d, 0x70cd, + 0x70ad, 0xc047, 0xf03a, 0x2532, + 0x1f8f, 0x8000, 0x0624, 0x702c, + 0x0fb6, 0xfc6f, 0x42e1, 0xc207, + 0x7944, 0xc206, 0x7844, 0x2005, + 0x807e, 0xf42b, 0xc005, 0x702c, + 0x734c, 0xdb22, 0x60e8, 0x1c0c, + 0x3001, 0x1c08, 0x36c0, 0xc040, + 0xd8ff, 0x0b92, 0xfcef, 0xc741, + 0x0d07, 0x1190, 0xed99, 0x70b5, + 0xd9c8, 0xd8fa, 0x0d52, 0xfe2f, 0x20ca, 0x0041, 0x781d, 0x781d, - 0x781d, 0x781d, 0x781d, 0x702c, - 0x780f, 0xc142, 0xc141, 0xc140, - 0xc043, 0xd8ff, 0x754c, 0x0b46, - 0xfcef, 0xdb20, 0xf1c5, 0xd8ff, + 0x781d, 0x781d, 0x781d, 0x780f, + 0xc043, 0x700c, 0xc042, 0xc041, + 0xc040, 0xd8ff, 0x702c, 0x754c, + 0x0b5a, 0xfcef, 0xdb20, 0x71a5, + 0x0d8f, 0x9334, 0x710c, 0xd8ff, 0x702c, 0x754c, 0xdb41, 0x1c0c, - 0x3401, 0xc642, 0xc641, 0x091a, + 0x3401, 0xc642, 0xc641, 0x092e, 0xfcef, 0xc640, 0xd8ff, 0xd980, 0x754c, 0x746c, 0xc643, 0xc642, - 0xc641, 0x0906, 0xfcef, 0xc640, - 0x700c, 0x08ba, 0xfcef, 0x712c, - 0xc005, 0xe837, 0x700c, 0x0802, - 0xfcef, 0xd9ff, 0xc604, 0x70ad, - 0xd84f, 0xbec1, 0xc543, 0xc642, - 0xc541, 0xc040, 0xd8ff, 0xd980, - 0xda17, 0x08d6, 0xfcef, 0x746c, - 0x700c, 0x088a, 0xfcef, 0x712c, - 0x40c3, 0x000f, 0x4240, 0x0f16, - 0xfe2f, 0x702c, 0x700c, 0x0fca, - 0xfcaf, 0xd9ff, 0xd851, 0xc543, - 0xc642, 0xc541, 0xc040, 0xd8ff, - 0xd980, 0xda17, 0x08a2, 0xfcef, - 0x746c, 0x700c, 0x0856, 0xfcef, - 0x712c, 0x40c3, 0x0000, 0x1f40, - 0x0ee2, 0xfe2f, 0xd91e, 0xc0a8, + 0xc641, 0x091a, 0xfcef, 0xc640, + 0x700c, 0x08d2, 0xfcef, 0x712c, + 0xc004, 0xe833, 0x700c, 0x082e, + 0xfcef, 0xd9ff, 0xd84f, 0x70ad, + 0x2353, 0x305b, 0xc040, 0xf817, + 0xc543, 0x1c08, 0x36c0, 0x08ee, + 0xfcef, 0xc541, 0x700c, 0x08a6, + 0xfcef, 0x712c, 0x40c3, 0x000f, + 0x4240, 0x0a4e, 0xfe6f, 0x702c, + 0x700c, 0x0ffa, 0xfcaf, 0xd9ff, + 0xd851, 0xc040, 0xf80b, 0xc543, + 0x1c08, 0x36c0, 0x08be, 0xfcef, + 0xc541, 0x700c, 0x0876, 0xfcef, + 0x712c, 0x40c3, 0x0000, 0x7530, + 0x0a1e, 0xfe6f, 0xd908, 0xc0a8, 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1ad, - 0xc34a, 0xc248, 0xc14b, 0x0bb6, - 0xfd2f, 0xc045, 0xc012, 0x70cd, - 0xddff, 0xc04c, 0xc013, 0xc049, - 0x40c1, 0xf004, 0xc004, 0x7104, - 0xc044, 0xc004, 0xe08b, 0x0170, - 0x000d, 0xc004, 0x2032, 0x0f80, - 0x8000, 0x064c, 0xc047, 0xc107, - 0xc00b, 0x6028, 0xc046, 0x78af, - 0x208c, 0x8fc3, 0xf40a, 0x40c3, - 0x0000, 0x0a00, 0x0f3a, 0xfcaf, - 0xd9ff, 0x088a, 0xff8f, 0x70ad, - 0xc007, 0x712c, 0xc20a, 0xb8c4, - 0xe805, 0xb861, 0x08ff, 0x8031, - 0x793b, 0xd81f, 0x7a24, 0xb861, - 0x08ff, 0x8031, 0x793c, 0xc00c, - 0x7824, 0x2205, 0x803e, 0xf47b, - 0xc006, 0x47cb, 0x8000, 0x0684, - 0x781d, 0x781d, 0x2045, 0x019b, - 0xc009, 0xe80e, 0x8f01, 0x702c, - 0x704c, 0xc043, 0x700c, 0xc042, - 0xc041, 0xc040, 0xd8ff, 0x0fd2, - 0xfcaf, 0xdb20, 0x8f01, 0x702c, - 0x704c, 0xc043, 0xc005, 0x726c, - 0x71a5, 0xc042, 0x710c, 0xc041, - 0x2344, 0x3980, 0xc040, 0x0fb2, - 0xfcaf, 0xd8ff, 0x8f01, 0x702c, - 0x704c, 0xc043, 0x710c, 0xc642, - 0xc041, 0xc007, 0x726c, 0xb8c5, - 0xc040, 0x0f96, 0xfcaf, 0xd8ff, - 0x8f01, 0x702c, 0x704c, 0xc043, - 0xc005, 0x726c, 0xc042, 0x710c, - 0xc041, 0xc006, 0x781d, 0x2045, - 0x0580, 0x2044, 0x0d80, 0xc040, - 0x0f6e, 0xfcaf, 0xd8ff, 0x8f01, - 0x702c, 0x704c, 0xc043, 0x710c, - 0xc642, 0xc041, 0xc006, 0x726c, - 0xb8c5, 0xc040, 0x0f52, 0xfcaf, - 0xd8ff, 0xc004, 0xe818, 0x40c3, - 0x0000, 0x2710, 0x0db6, 0xfe2f, - 0xd90a, 0x781d, 0x781d, 0x7704, - 0x7f0f, 0xf84a, 0xc642, 0xc641, - 0xc640, 0x0f2e, 0xfcaf, 0xc743, - 0xf846, 0xc743, 0xc642, 0xc641, - 0xc640, 0xf00b, 0x8f01, 0x702c, - 0x704c, 0xc043, 0xc642, 0xc641, - 0xc640, 0xd8ff, 0xdb18, 0x0f0a, - 0xfc8f, 0x78af, 0x0811, 0x0110, - 0xc104, 0xe18b, 0xf550, 0x70ad, - 0x7014, 0xf34e, 0x0efe, 0xff4f, - 0x700c, 0x0eaa, 0xfcaf, 0x712c, - 0xddff, 0xf146, 0xc008, 0x08c9, - 0x0030, 0xd9ff, 0x40c3, 0x0000, - 0x0a00, 0x0de6, 0xfcaf, 0x4318, - 0x0f32, 0xff4f, 0x46cb, 0x8000, - 0x0685, 0x8e00, 0x702c, 0xdf20, - 0xc043, 0xc005, 0x704c, 0x726c, - 0xc042, 0xd8ff, 0xc141, 0x4528, - 0x0eae, 0xfcaf, 0xc740, 0x8e00, + 0xd8ff, 0xd980, 0xda17, 0x746c, + 0x7ee0, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1ac, 0xc349, 0xc247, + 0xc14a, 0x0f22, 0xfd2f, 0xc046, + 0xc011, 0x71ed, 0xc04b, 0xc012, + 0xc048, 0xd8ff, 0xc044, 0x700c, + 0xc045, 0xf0b8, 0xc005, 0x2032, + 0x0f9b, 0x8000, 0x0624, 0xc00a, + 0x2032, 0x06ce, 0xc004, 0x780f, + 0x208c, 0x8fc3, 0xf40b, 0x40c3, + 0x0000, 0x0a00, 0x0f6e, 0xfcaf, + 0xd9ff, 0x082e, 0xff8f, 0x700c, + 0xc044, 0x41e1, 0x2344, 0x37c0, + 0xc209, 0xe806, 0xb861, 0x0801, + 0x0031, 0x793b, 0xd81f, 0x7a24, + 0xb861, 0x0801, 0x0031, 0x793c, + 0xc00b, 0x7824, 0x2205, 0x803e, + 0xf479, 0x78dd, 0x781d, 0x2044, + 0x0800, 0x2045, 0x018d, 0xc008, + 0xe80b, 0xf87e, 0xc043, 0x700c, + 0xc042, 0xc041, 0xc040, 0xd8ff, + 0x0ff2, 0xfcaf, 0xdb20, 0xc004, + 0x702c, 0x704c, 0x7104, 0xc044, + 0x40c3, 0x8000, 0x06e0, 0x8801, + 0x726c, 0xc741, 0xc043, 0xc006, + 0xc540, 0xc042, 0x0fce, 0xfcaf, + 0xd8ff, 0x40c3, 0x8000, 0x06e0, + 0x8801, 0x70ad, 0x2353, 0x315b, + 0xc043, 0xf869, 0xc542, 0xc741, + 0x0fb2, 0xfcaf, 0x1c00, 0x36c0, + 0xf868, 0xc043, 0xc006, 0x726c, + 0xc741, 0xc042, 0x78dd, 0x2044, + 0x0800, 0x2045, 0x0580, 0xc040, + 0x0f92, 0xfcaf, 0xd8ff, 0x40c3, + 0x8000, 0x06e0, 0x8801, 0xbec5, + 0x702c, 0xc043, 0xd8ff, 0x704c, + 0x726c, 0xc542, 0xc741, 0x0f76, + 0xfcaf, 0xc640, 0xc005, 0xe819, + 0x40c3, 0x0000, 0x2710, 0x08fa, + 0xfe6f, 0xd90a, 0x781d, 0x781d, + 0x7704, 0x7e0f, 0xf850, 0xc542, + 0xc541, 0xc540, 0x0f4e, 0xfcaf, + 0xc643, 0xf84d, 0xc643, 0xc542, + 0xc541, 0xc540, 0xf00d, 0x40c3, + 0x8000, 0x06e0, 0x8801, 0xc542, + 0xc541, 0xc043, 0xc540, 0xd8ff, + 0x702c, 0x704c, 0xdb18, 0x0f26, + 0xfc8f, 0xc004, 0x790f, 0x090f, + 0x0110, 0xc005, 0x0817, 0x02d1, + 0xe909, 0x0ea6, 0xff4f, 0x700c, + 0x0eca, 0xfcaf, 0x712c, 0xd8ff, + 0xc044, 0xc005, 0x7104, 0xc045, + 0xc005, 0xe08c, 0x0690, 0xffc5, + 0xc007, 0x08cf, 0x0030, 0xd9ff, + 0x47cb, 0x0000, 0x0a00, 0x0e0e, + 0xfcaf, 0x40e1, 0x0eca, 0xff4f, + 0x46cb, 0x8000, 0x06e1, 0x8e00, + 0x702c, 0x234a, 0x3800, 0xc043, + 0xc006, 0x704c, 0x726c, 0xc042, + 0xd8ff, 0x4528, 0xc141, 0x0ebe, + 0xfcaf, 0x1c00, 0x36c0, 0x8e00, 0x702c, 0x704c, 0xc043, 0xd8ff, - 0x726c, 0xc542, 0xc541, 0x0e9a, - 0xfcaf, 0x1c00, 0x33c1, 0x0e9e, - 0xff4f, 0x700c, 0x0e46, 0xfcaf, + 0x726c, 0xc542, 0xc541, 0x0ea6, + 0xfcaf, 0x1c00, 0x33c1, 0x0e32, + 0xff4f, 0x700c, 0x0e56, 0xfcaf, 0x712c, 0x40c3, 0x000f, 0x4240, - 0x0cd2, 0xfe2f, 0x702c, 0x4063, - 0x0d86, 0xfcaf, 0xd9ff, 0x0ed6, + 0x0ffe, 0xfe2f, 0x702c, 0x40e1, + 0x0daa, 0xfcaf, 0xd9ff, 0x0e6a, 0xff4f, 0x8e00, 0x702c, 0x704c, - 0xc043, 0xc005, 0x726c, 0xc042, - 0xd8ff, 0xc541, 0x0e5a, 0xfcaf, - 0xc740, 0x8e00, 0x702c, 0x704c, - 0xc043, 0xd8ff, 0x726c, 0xc542, - 0xc541, 0x0e46, 0xfcaf, 0x1c00, - 0x3441, 0x8e00, 0x702c, 0x704c, - 0xc043, 0xd8ff, 0xdb10, 0xc542, - 0xc541, 0x0e2e, 0xfcaf, 0xc540, - 0x0e32, 0xff4f, 0x700c, 0x0dde, - 0xfcaf, 0x712c, 0x0b5a, 0xfd0f, - 0xc0ad, 0x1404, 0x341b, 0xc6c6, - 0xd8ff, 0x702c, 0x704c, 0x726c, - 0x7ee0, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1aa, 0xc043, 0x1600, - 0x7080, 0x8000, 0x0058, 0x70ad, - 0xc346, 0xc048, 0xd858, 0xb89f, - 0x10cd, 0x809b, 0xc00f, 0xc245, - 0xc144, 0xc047, 0xf002, 0x71a5, - 0x0db7, 0x10b3, 0x710c, 0x2544, - 0x17c1, 0xe906, 0xb961, 0x0901, - 0x0031, 0x781b, 0xc042, 0xc102, - 0xc007, 0x70cd, 0x7824, 0xc049, - 0xf002, 0x71c5, 0x0edb, 0x90b3, - 0x70d5, 0xc108, 0x4063, 0x703c, - 0xc109, 0x782b, 0xf3f7, 0xc002, - 0x2644, 0x17c1, 0x7f0f, 0x710c, + 0xc043, 0xc006, 0x726c, 0xc541, + 0xc042, 0xd8ff, 0x0e66, 0xfcaf, + 0x1c00, 0x36c0, 0x8e00, 0x702c, + 0x704c, 0xc043, 0xd8ff, 0x726c, + 0xc542, 0xc541, 0x0e4e, 0xfcaf, + 0x1c00, 0x3441, 0x8e00, 0x702c, + 0x704c, 0xc043, 0xd8ff, 0xdb10, + 0xc542, 0xc541, 0x0e36, 0xfcaf, + 0xc540, 0x0dc6, 0xff4f, 0x700c, + 0x0dea, 0xfcaf, 0x712c, 0x0ee2, + 0xfd0f, 0xc0ac, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0xd8ff, 0x702c, + 0x704c, 0x726c, 0x7ee0, 0x78e0, + 0x40c3, 0x8000, 0x06e0, 0x8801, + 0x702c, 0x704c, 0x7ee0, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a9, + 0xc145, 0xd958, 0xb99f, 0xc046, + 0x8900, 0x11cd, 0x808f, 0x70ad, + 0xc047, 0xc00e, 0x71cd, 0xc343, + 0xc244, 0xc048, 0xf058, 0x2544, + 0x17c1, 0x706f, 0xe905, 0xb961, + 0x09ff, 0x8031, 0x781b, 0xc042, + 0xf04b, 0xc007, 0xc202, 0xc108, + 0x7077, 0x20ca, 0x03c1, 0x7944, + 0x790b, 0xf242, 0xc002, 0x2344, + 0x37c1, 0x262f, 0x3007, 0x40c1, 0xe905, 0xb961, 0x09ff, 0x8031, - 0x781b, 0x0dee, 0xfcef, 0x780f, - 0x40c3, 0x8000, 0x0684, 0x8800, - 0xe807, 0xd858, 0xb89f, 0x10a9, - 0x8080, 0x082d, 0x00de, 0xc006, - 0x265a, 0x1c81, 0x275f, 0x1102, - 0xc040, 0xc003, 0xc305, 0x6038, - 0x255a, 0x1641, 0x6119, 0xc002, - 0x7845, 0xc204, 0x0b92, 0xffef, - 0x780f, 0xf1cd, 0xc006, 0x265a, - 0x1c81, 0x1c04, 0x3001, 0xc040, - 0xc003, 0x275f, 0x1102, 0xc305, - 0x6038, 0x255a, 0x1641, 0x6119, - 0xc002, 0x7845, 0xc204, 0x0cae, - 0xffef, 0x780f, 0xf1b7, 0x0d82, - 0xfcef, 0x730c, 0xc0aa, 0x1404, - 0x341b, 0xc6c6, 0xc2e6, 0x43c3, - 0x8000, 0x0771, 0x8b80, 0x245f, - 0x1242, 0x7854, 0xf002, 0x7185, - 0x8b41, 0x7d8f, 0x0a0d, 0x0365, - 0x70cd, 0xf017, 0x71c5, 0x7204, - 0x7acf, 0x0aef, 0x8255, 0x09f9, - 0x808e, 0x229a, 0x0008, 0x251a, + 0x781b, 0x0962, 0xfd2f, 0x780f, + 0x40c3, 0x8000, 0x06e0, 0x8800, + 0xe81c, 0x1600, 0x7080, 0x8000, + 0x0001, 0x0831, 0x00df, 0xc003, + 0x235a, 0x3c81, 0x265f, 0x3102, + 0xc040, 0xc006, 0xc304, 0x1c04, + 0x3001, 0x6119, 0x255a, 0x1640, + 0x6119, 0xc002, 0x7845, 0xc205, + 0x0cce, 0xffef, 0x780f, 0xf014, + 0xc003, 0x235a, 0x3c81, 0x265f, + 0x3102, 0xc040, 0xc006, 0xc304, + 0x6119, 0x255a, 0x1640, 0x6119, + 0xc002, 0x7845, 0xc205, 0x0b62, + 0xffef, 0x780f, 0x7167, 0x0b6f, + 0xb094, 0x71a5, 0x0d53, 0x90b4, + 0x40c1, 0x08ea, 0xfd2f, 0x730c, + 0xc0a9, 0x1404, 0x341b, 0xc6c6, + 0xc2e6, 0x43c3, 0x8000, 0x07cd, + 0x8b80, 0x8ba1, 0x245f, 0x1482, + 0x6058, 0xf017, 0x0921, 0x00ee, + 0x4260, 0x229a, 0x0008, 0x261a, 0x1f8f, 0x0000, 0x2000, 0x7ae5, 0x2205, 0x0f8f, 0x9002, 0x0064, - 0x9040, 0xb740, 0xf1ec, 0x0c36, + 0x9040, 0xb740, 0x7164, 0x7204, + 0x0bdd, 0x8254, 0x7185, 0x7e8f, + 0x0df9, 0x93a5, 0x706c, 0x0f5e, 0xfe4f, 0xc6c6, 0xc2e2, 0x219a, 0x0001, 0x239a, 0x0001, 0x44cb, 0x9008, 0x0100, 0x7825, 0x1600, 0x7081, 0x8000, 0x0004, 0x7b45, 0x211a, 0x0f8d, 0x0020, 0x0000, 0x2505, 0x1301, 0xb100, 0x6c02, - 0x78a5, 0xb060, 0x700c, 0x0c4e, + 0x78a5, 0xb060, 0x700c, 0x0c56, 0xfcaf, 0x712c, 0xc6c2, 0x78e0, - 0xc2e2, 0x1cfc, 0xb6c8, 0x4338, - 0x4308, 0x70ad, 0xf002, 0x71a5, - 0x2b45, 0x3180, 0x0d49, 0x1005, - 0x255a, 0x1182, 0x7261, 0x8a01, - 0x8a60, 0x8a22, 0x209a, 0x0004, - 0x7b05, 0x8a03, 0x209a, 0x0004, - 0x7825, 0x201a, 0x0f80, 0x0001, - 0x0000, 0x2005, 0x00c1, 0x8a05, - 0x8a64, 0x7734, 0x209a, 0x0004, - 0x7865, 0xf207, 0x793b, 0xb99c, - 0xb99f, 0xb100, 0xf1dd, 0x0a2e, - 0xfe0f, 0xf1db, 0x1404, 0x341b, + 0xc2e2, 0x260a, 0x3040, 0x4308, + 0x70ad, 0xf024, 0x255a, 0x1182, + 0x7261, 0x8a01, 0x8a60, 0x8a22, + 0x209a, 0x0004, 0x7b05, 0x8a03, + 0x209a, 0x0004, 0x7825, 0x201a, + 0x0f80, 0x0001, 0x0000, 0x2005, + 0x00c1, 0x8a05, 0x8a64, 0x7734, + 0x209a, 0x0004, 0x7865, 0xf207, + 0x793b, 0xb99c, 0xb99f, 0xb100, + 0xf003, 0x0d5e, 0xfe0f, 0x71a5, + 0x2e45, 0x3180, 0x0db9, 0x9004, 0xc6c2, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1a2, 0x45cb, 0x8000, - 0x0771, 0x8de0, 0x4318, 0xc340, - 0xc141, 0xea04, 0x7bfb, 0xf004, - 0x275f, 0x1243, 0x7054, 0x274a, - 0x3200, 0x27ca, 0x3062, 0xf003, - 0x71e5, 0x8d01, 0x7710, 0x700c, - 0xf6d3, 0xf03b, 0x219a, 0x0001, - 0x7104, 0x7164, 0x7e25, 0x271a, - 0x1f81, 0x0000, 0x1000, 0x2105, - 0x06c1, 0x7985, 0x793b, 0xb991, - 0xb99c, 0xb99f, 0xb1c0, 0x0fd5, - 0xb022, 0x4408, 0xc101, 0x249a, - 0x1004, 0x21f5, 0x00c2, 0xc100, - 0xe912, 0x7e5c, 0x7edc, 0x7edc, - 0x7edc, 0x224a, 0x16c0, 0x2242, - 0x104a, 0x0aff, 0x9031, 0x7edd, - 0x2a44, 0x0801, 0x62da, 0xbac4, - 0x4ad6, 0xf1d6, 0x268a, 0x1fcf, - 0x7e44, 0x2204, 0x0f81, 0x0000, - 0xfc00, 0xda0a, 0xba61, 0x0a01, - 0x0031, 0x793d, 0xf1c8, 0xc0a2, - 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0xb6c8, 0xc1a1, 0x4718, 0x40c3, + 0x8000, 0x07cd, 0x88c0, 0x4338, + 0xc340, 0xea04, 0x79db, 0xf004, + 0x265f, 0x1241, 0x1001, 0x009e, + 0x7054, 0xdd08, 0x25ca, 0x1062, + 0xf039, 0x40e1, 0x209a, 0x0004, + 0xc300, 0x23f5, 0x3042, 0xeb13, + 0x7c5c, 0x7c9c, 0x7c9c, 0x7c9c, + 0x224a, 0x16c0, 0x2a44, 0x0803, + 0x2242, 0x104a, 0x0afd, 0x9031, + 0x7c9d, 0x629a, 0xbac4, 0x4a94, + 0xf00d, 0x248a, 0x1fcf, 0x7c44, + 0x2204, 0x0f83, 0x0000, 0xfc00, + 0xda0a, 0xba61, 0x0aff, 0x8031, + 0x7b7d, 0x261a, 0x1f82, 0x0000, + 0x1000, 0x239a, 0x0001, 0x71e5, + 0x7124, 0x2205, 0x07c2, 0x7845, + 0x781b, 0xb891, 0xb89c, 0x7b85, + 0xb89f, 0xb060, 0x0d97, 0x93c5, + 0x71c5, 0x0efd, 0xb3a5, 0x70ed, + 0x7487, 0x1404, 0x341b, 0xc6c6, 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a1, 0x7014, 0x1600, 0x7080, 0x8000, - 0x001b, 0x41c3, 0x0040, 0x1800, - 0x70ed, 0x27ca, 0x1041, 0x45cb, - 0x8000, 0x0777, 0x082d, 0x003e, - 0x706f, 0x8d04, 0xe812, 0x8d00, - 0x8d41, 0xad15, 0x40c3, 0x8000, - 0x07a5, 0xa840, 0x8d42, 0xa859, - 0x8d43, 0x40c3, 0x8000, 0x07d7, - 0xa840, 0x2150, 0x0300, 0x7f04, - 0xe509, 0x40a1, 0x0f32, 0xff2f, - 0xd90d, 0x40a1, 0xd90d, 0x0c5a, - 0xfdef, 0xda40, 0x40a1, 0xd90d, - 0x0dd6, 0xfcaf, 0xda7f, 0xdeff, - 0x40a1, 0x702c, 0x42e1, 0x4363, - 0x0cf6, 0xffef, 0xc640, 0x40a1, - 0xd90d, 0x0c36, 0xfdef, 0xda80, - 0x40a1, 0x702c, 0x42c3, 0xffff, - 0xdfff, 0x776c, 0x0cda, 0xffef, - 0xc640, 0x40a1, 0x0ae6, 0xff2f, + 0x001b, 0x43c3, 0x0040, 0x1800, + 0x4568, 0x7dc0, 0x42c3, 0x8000, + 0x0688, 0x082f, 0x003e, 0x706f, + 0x41c3, 0x8000, 0x07d3, 0x8904, + 0xe80f, 0x8900, 0x44cb, 0x8000, + 0x06ba, 0xaa00, 0x8901, 0xaa19, + 0x8902, 0xac00, 0x8903, 0xac19, + 0x2350, 0x0300, 0x7d04, 0x2242, + 0x030e, 0x40c1, 0x0ee2, 0xff2f, + 0xd90d, 0x40c1, 0xd90d, 0x0fb2, + 0xfdef, 0xda40, 0x40c1, 0xd90d, + 0x0dca, 0xfcaf, 0xda7f, 0xdfff, + 0x40c1, 0x702c, 0x42a1, 0x4363, + 0x0d12, 0xffef, 0xc740, 0x40c1, + 0xd90d, 0x0f8e, 0xfdef, 0xda80, + 0x40c1, 0x702c, 0x42c3, 0xffff, + 0xdfff, 0x776c, 0x0cf6, 0xffef, + 0xc740, 0x40c1, 0x0a8e, 0xff2f, 0xd90d, 0x7487, 0x1404, 0x341b, 0xc6c6, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0x2482, 0x3303, 0x702c, - 0x0e66, 0xfcaf, 0x4708, 0x1600, - 0x708d, 0x8000, 0x0004, 0x0c46, - 0x0020, 0xc042, 0x40e1, 0x0d76, - 0xfd2f, 0x2455, 0x3b81, 0x71cd, - 0x40c1, 0x2744, 0x17db, 0x0b11, - 0x3010, 0x2342, 0x305b, 0x0bff, - 0xb031, 0x781b, 0x205a, 0x0100, - 0x200f, 0x03c0, 0x0cce, 0xfd6f, - 0x780f, 0x251a, 0x1f8d, 0x0010, - 0x0000, 0x702c, 0x40c3, 0x9003, - 0xfe64, 0x1e00, 0x7044, 0x9003, - 0xfe64, 0x1882, 0x005c, 0x208a, - 0x0808, 0x1e00, 0x7004, 0x9003, - 0xe004, 0x1e00, 0x7384, 0x900f, - 0xe0c0, 0x1e00, 0x7044, 0x9003, - 0xfe64, 0xc543, 0xf010, 0x6941, - 0x219a, 0x0008, 0x43c1, 0xe806, + 0xb6c8, 0x2482, 0x3403, 0x702c, + 0x092a, 0xfcef, 0x4508, 0xc043, + 0x1600, 0x7080, 0x8000, 0x0004, + 0x702c, 0xda14, 0xc041, 0x0f26, + 0xfe2f, 0x2455, 0x3bc0, 0x0c4a, + 0x0000, 0x40a1, 0x08e2, 0xfd6f, + 0x2455, 0x3bc1, 0x710c, 0x2544, + 0x17ce, 0x4318, 0xee05, 0xbe61, + 0x0eff, 0x9031, 0x781b, 0x205a, + 0x0100, 0x200f, 0x0340, 0x0846, + 0xfdaf, 0x780f, 0x702c, 0x40c3, + 0x9003, 0xfe64, 0x1e00, 0x7044, + 0x9003, 0xfe64, 0x1882, 0x005c, + 0x208a, 0x0808, 0x1e00, 0x7004, + 0x9003, 0xe004, 0x1e00, 0x76c4, + 0x900f, 0xe0c0, 0x1e00, 0x7044, + 0x9003, 0xfe64, 0xf010, 0x6941, + 0x219a, 0x0008, 0x4363, 0xe806, 0xb861, 0x0801, 0x0031, 0x7b7b, 0x2105, 0x0f80, 0x9003, 0xe0c4, - 0xb060, 0x4140, 0x09e3, 0x8232, - 0x2144, 0x07c0, 0x45cb, 0x0000, - 0xaaaa, 0xd80f, 0x41a1, 0x42a1, - 0x43a1, 0x08f2, 0xfd6f, 0xc540, - 0xd80f, 0x0d62, 0xfcaf, 0xd90f, - 0x40c3, 0x0000, 0x0f00, 0x0ea6, - 0xff2f, 0x712c, 0x47cb, 0x9003, - 0xe174, 0x700c, 0xb700, 0x208a, - 0x0fc7, 0xb701, 0x43db, 0x0000, - 0xffff, 0xd830, 0x1f58, 0x96dc, - 0x1f5e, 0x939c, 0x0986, 0xfcaf, - 0x1f57, 0x939c, 0xd80f, 0x702c, - 0x704c, 0x4363, 0x08a6, 0xfd6f, - 0x1c00, 0x36c0, 0x740c, 0x096e, - 0xfcaf, 0x712c, 0xd80f, 0x41a1, - 0x42a1, 0x43a1, 0x088e, 0xfd6f, - 0xc540, 0x700c, 0xb700, 0xb701, - 0xd820, 0x1f58, 0x901c, 0x208a, - 0x0844, 0x1f5e, 0x939c, 0x1f57, - 0x901c, 0x40c3, 0x0000, 0x0f00, - 0x0e32, 0xff2f, 0x732c, 0xd830, - 0x0932, 0xfcaf, 0x712c, 0x1600, - 0x7081, 0x8000, 0x0771, 0xf003, - 0x7124, 0x40c3, 0x8000, 0x0771, - 0x8801, 0x083d, 0x0063, 0x704c, - 0xf01e, 0x209a, 0x0008, 0x211a, - 0x0f83, 0x0000, 0x2000, 0x2216, - 0x004c, 0x7144, 0x7865, 0x2005, + 0xb060, 0x4140, 0x09e3, 0x8234, + 0x2144, 0x07c0, 0x47cb, 0x0000, + 0xaaaa, 0xd80f, 0x41e1, 0x42e1, + 0x43e1, 0x0c62, 0xfd6f, 0xc740, + 0xd80f, 0x0826, 0xfcef, 0xd90f, + 0x40c3, 0x0000, 0x0f00, 0x0e52, + 0xff2f, 0x712c, 0x45cb, 0x9003, + 0xe174, 0x700c, 0xb500, 0x208a, + 0x0fc7, 0xb501, 0xd830, 0x1d58, + 0x9f9c, 0x0000, 0xffff, 0x1d5e, + 0x96dc, 0x09a2, 0xfcaf, 0x1d57, + 0x96dc, 0xd80f, 0x702c, 0x43c3, + 0x0000, 0xffff, 0x1c00, 0x3f80, + 0x0000, 0xffff, 0x0c0e, 0xfd6f, + 0x704c, 0x740c, 0x097e, 0xfcaf, + 0x712c, 0xd80f, 0x41e1, 0x42e1, + 0x43e1, 0x0bfa, 0xfd6f, 0xc740, + 0x700c, 0xb500, 0xb501, 0xd820, + 0x1d58, 0x901c, 0x208a, 0x0844, + 0x1d5e, 0x96dc, 0x1d57, 0x901c, + 0x40c3, 0x0000, 0x0f00, 0x0dda, + 0xff2f, 0x732c, 0xd830, 0x0946, + 0xfcaf, 0x712c, 0xc001, 0x1600, + 0x7082, 0x8000, 0x07cd, 0x201a, + 0x0f80, 0x0010, 0x0000, 0xc044, + 0x40c3, 0x8000, 0x07cd, 0x8821, + 0xf021, 0x700c, 0x2016, 0x0083, + 0xf019, 0x209a, 0x0008, 0x221a, + 0x0f8c, 0x0000, 0x2000, 0x7885, + 0x2005, 0x0f80, 0x9002, 0x0064, + 0x9000, 0x7c1d, 0x7c9d, 0x7c9d, + 0x7c9d, 0x7c9d, 0xb8c4, 0x7c05, + 0xc087, 0x7874, 0xb080, 0x7164, + 0x40c1, 0x08d3, 0x8234, 0x68c1, + 0x7144, 0x09c3, 0x80a5, 0x208a, + 0x0fc7, 0xb500, 0x700c, 0xc042, + 0xb501, 0x712c, 0x208a, 0x07d0, + 0x1d58, 0x9f9c, 0x0000, 0xffff, + 0x1d5e, 0x905c, 0x1d57, 0x905c, + 0x1e00, 0x7004, 0x9003, 0xfe64, + 0xd830, 0x08b2, 0xfcaf, 0x4628, + 0xd80f, 0x702c, 0x43c3, 0x0000, + 0xffff, 0x1c00, 0x3f80, 0x0000, + 0xffff, 0x0b22, 0xfd6f, 0x704c, + 0x740c, 0x0892, 0xfcaf, 0x712c, + 0xd80f, 0x41e1, 0x42e1, 0x43e1, + 0x0b0a, 0xfd6f, 0xc740, 0x700c, + 0xb500, 0xb501, 0x40c3, 0x0000, + 0x0f00, 0x0cfe, 0xff2f, 0x712c, + 0xd820, 0x1d58, 0x901c, 0x208a, + 0x0844, 0x1d5e, 0x939c, 0x1d57, + 0x901c, 0x085a, 0xfcaf, 0xd830, + 0x40c3, 0x8000, 0x07cd, 0x8801, + 0x1600, 0x7081, 0x8000, 0x07cd, + 0xc046, 0xf082, 0x211a, 0x0f80, + 0x0000, 0x1000, 0x70cd, 0x2616, + 0x104b, 0xc045, 0xc205, 0xc004, + 0x7845, 0xc041, 0xc003, 0x2055, + 0x0d02, 0xc001, 0x7845, 0x781b, + 0xb891, 0xb89c, 0xb89f, 0x9040, + 0x2253, 0x011b, 0xf05b, 0x42c1, + 0x229a, 0x0004, 0xc005, 0x2455, + 0x3bcc, 0x7845, 0x781b, 0x2005, 0x0f80, 0x9002, 0x0064, 0x9000, - 0x7b1d, 0x7b7d, 0x7b7d, 0x7b7d, - 0x7b7d, 0xb8c4, 0x7b05, 0xc086, - 0x7894, 0xb060, 0x0acf, 0x8232, - 0x4040, 0xf1dc, 0x208a, 0x0fc7, - 0xb700, 0x700c, 0xc041, 0xb701, - 0x712c, 0x208a, 0x07d0, 0x1f58, - 0x96dc, 0x1f5e, 0x905c, 0x1f57, - 0x905c, 0x1e00, 0x7004, 0x9003, - 0xfe64, 0xd830, 0x08ae, 0xfcaf, - 0x4628, 0xd80f, 0x702c, 0x704c, - 0x4363, 0x0fd2, 0xfd2f, 0x1c00, - 0x36c0, 0x740c, 0x0896, 0xfcaf, - 0x712c, 0xd80f, 0x41a1, 0x42a1, - 0x43a1, 0x0fba, 0xfd2f, 0xc540, - 0x700c, 0xb700, 0xb701, 0x40c3, - 0x0000, 0x0f00, 0x0d6e, 0xff2f, - 0x712c, 0xd820, 0x1f58, 0x901c, - 0x208a, 0x0844, 0x1f5e, 0x939c, - 0x1f57, 0x901c, 0x085e, 0xfcaf, - 0xd830, 0x1600, 0x708d, 0x8000, - 0x0771, 0xf00d, 0x251a, 0x1f80, - 0x0000, 0x2000, 0x71a5, 0x2005, - 0x0f81, 0x9002, 0x1e64, 0xc004, - 0xb100, 0x40c3, 0x8000, 0x0771, - 0x8801, 0x7510, 0x010a, 0x0029, - 0xd8ff, 0xc002, 0x70cd, 0x2055, - 0x0d01, 0xc003, 0x7905, 0x251a, - 0x1f80, 0x0000, 0x1000, 0xc045, - 0x7825, 0x781b, 0xb891, 0xb89c, - 0xb89f, 0x9000, 0xb8c4, 0xc044, - 0xf002, 0x71c5, 0x0eb1, 0x9233, - 0x41c1, 0x219a, 0x0004, 0xc005, - 0x7825, 0x781b, 0x2005, 0x0f80, - 0x9002, 0x0064, 0x9000, 0x7a1d, - 0x7a5d, 0x7a5d, 0x7a5d, 0x7c5d, - 0xb8c4, 0x7c05, 0x2616, 0x1342, - 0xc086, 0x20f5, 0x0083, 0x2455, - 0x3b80, 0x20f4, 0x0342, 0xc004, - 0x7391, 0x621a, 0xe210, 0x23d4, - 0x082b, 0x6398, 0x780e, 0x7b1d, - 0x7a4e, 0x4778, 0x7b6e, 0x4b50, - 0x2354, 0x0803, 0x4b54, 0x224a, - 0x17c0, 0x2242, 0x104a, 0x0aff, - 0x9031, 0x7c9c, 0x232f, 0x3009, - 0x6098, 0x2054, 0x0800, 0x7887, - 0x08db, 0x86c2, 0x208a, 0x003f, - 0x2002, 0x008b, 0x40e3, 0x7b0e, - 0x2300, 0x02cc, 0x232f, 0x3309, - 0x4b54, 0x7c91, 0x2300, 0x0f9f, - 0x0000, 0xffc0, 0x0be9, 0xb302, - 0x7074, 0x20ca, 0x002b, 0x780e, - 0x7a1b, 0x2284, 0x0007, 0xb8c4, - 0x7a05, 0xc002, 0x2055, 0x0c03, - 0xc003, 0x7865, 0xc305, 0x7b05, - 0x7965, 0x783b, 0xb891, 0xb89c, - 0xb89f, 0x0e3b, 0x91f1, 0xb040, - 0x787b, 0x2005, 0x0f80, 0x9002, - 0x1000, 0xb040, 0xf193, 0x1f58, + 0x24f4, 0x105e, 0x7b1d, 0x7b7d, + 0x7b7d, 0x7b7d, 0x7b7d, 0xb8c4, + 0x7b05, 0xc087, 0x20f5, 0x02c0, + 0x2600, 0x36de, 0x7310, 0x20d4, + 0x0829, 0x6078, 0x2640, 0x340c, + 0x780e, 0x7f1c, 0x7b8e, 0x78ee, + 0x44e9, 0x4877, 0x272f, 0x33c9, + 0x2054, 0x080f, 0x4f70, 0x7811, + 0x08ef, 0x87c2, 0x208a, 0x003f, + 0x4877, 0x788e, 0x671f, 0x4874, + 0x262f, 0x33c9, 0x7f91, 0x2000, + 0x0f8c, 0x0000, 0xffc0, 0x0ee9, + 0xb3c2, 0x2048, 0x0000, 0x7c1b, + 0x2484, 0x1007, 0xb8c4, 0x7c05, + 0xc003, 0x2055, 0x0c03, 0xc001, + 0x7865, 0x7a05, 0x7a5b, 0xba91, + 0xba9c, 0xba9f, 0x0e13, 0x11f1, + 0xb280, 0x781b, 0x2005, 0x0f80, + 0x9002, 0x1000, 0xb080, 0x71c5, + 0x7165, 0x0e4f, 0x9214, 0x211a, + 0x0f80, 0x0000, 0x2000, 0x7124, + 0x2005, 0x0f80, 0x9002, 0x1e64, + 0x1800, 0x06c4, 0xc006, 0x7110, + 0x06fc, 0xffee, 0xd8ff, 0x1d58, 0x901c, 0x712c, 0x40c3, 0x0000, - 0x0f00, 0x1f5e, 0x905c, 0x4528, - 0x0c0a, 0xff2f, 0x1f57, 0x905c, - 0x0eba, 0xfe0f, 0x700c, 0x1e00, + 0x0f00, 0x4628, 0x1d5e, 0x905c, + 0x0bae, 0xff2f, 0x1d57, 0x905c, + 0x09ea, 0xfe4f, 0x700c, 0x1e00, 0x7004, 0x9003, 0xe004, 0x1e00, - 0x7344, 0x900f, 0xe0c0, 0x1e00, + 0x7384, 0x900f, 0xe0c0, 0x1e00, 0x7004, 0x9003, 0xfec4, 0x702c, 0x40c3, 0x9003, 0xfe64, 0x1882, - 0x005c, 0xd80f, 0x0a86, 0xfcaf, + 0x005c, 0xd80f, 0x0d42, 0xfcaf, 0xd90f, 0x700c, 0x41c3, 0x0000, 0xffff, 0xc040, 0xd80f, 0x704c, - 0x0dfa, 0xfd2f, 0x4320, 0xf00b, - 0x0a6a, 0xfcaf, 0xd90f, 0x0e6e, - 0xfe0f, 0xc001, 0x41a1, 0x7104, - 0xc041, 0xc001, 0x086d, 0x0233, - 0x70ad, 0x700c, 0x0ea6, 0xfc6f, - 0x712c, 0x1600, 0x7081, 0x8000, - 0x0771, 0xf004, 0x655d, 0x7124, - 0x40c3, 0x8000, 0x0771, 0x8801, - 0x08c9, 0x8062, 0xd80f, 0x211a, - 0x0f80, 0x0000, 0x2000, 0x2005, - 0x0f80, 0x9002, 0x0166, 0x9040, - 0xea6e, 0xc002, 0x2055, 0x0d03, - 0xc003, 0x7b05, 0x211a, 0x0f80, - 0x0000, 0x1000, 0x7865, 0x781b, - 0x2005, 0x0f83, 0x9002, 0x1e00, - 0xb891, 0xb89c, 0xb89f, 0x9000, - 0x2055, 0x0800, 0xb300, 0xf1d8, - 0x2178, 0x0000, 0x41c3, 0x0155, - 0x0000, 0x0cbe, 0xfc0f, 0x1e00, - 0x7344, 0x9003, 0xe004, 0x2480, - 0x3303, 0x1404, 0x341b, 0xc6c6, - 0xc0f1, 0x40c3, 0x8000, 0x05a0, - 0x09da, 0xffef, 0xd98a, 0xd80f, - 0x09ba, 0xfcaf, 0xd90f, 0xc0d1, - 0x7ee0, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1b0, 0x4318, 0x710c, - 0x2344, 0x37c2, 0xea05, 0xba61, - 0x0aff, 0x8031, 0x781b, 0x205a, - 0x0100, 0xc148, 0x702c, 0x200f, - 0x06c0, 0xc045, 0x09d2, 0xfcaf, - 0x4063, 0x1600, 0x708e, 0x8000, - 0x0004, 0x0c7e, 0xfd2f, 0x4708, - 0x2054, 0x0880, 0x208c, 0x8f83, - 0x70ad, 0xd8fe, 0xc044, 0xf607, - 0x0c66, 0xfd0f, 0x2054, 0x0880, - 0xc044, 0x261a, 0x1f8e, 0x0010, - 0x0000, 0x710c, 0xc18b, 0x0c4a, - 0xfdaf, 0xc649, 0x0f7e, 0xffcf, - 0x700c, 0xb88f, 0x1e00, 0x7004, - 0x9003, 0xe002, 0xd940, 0x40c3, - 0x9003, 0xe002, 0x0c46, 0xfcaf, - 0xb032, 0x1600, 0x7100, 0x9004, - 0x004a, 0xd998, 0x46cb, 0x9008, - 0x0000, 0x2011, 0x83c0, 0xd8aa, - 0x703c, 0x47cb, 0x9005, 0xe04c, - 0x1f9c, 0x1014, 0x202f, 0x06c0, + 0x0962, 0xfd6f, 0x4320, 0xf044, + 0x700c, 0x0ed2, 0xfc6f, 0x712c, + 0x1600, 0x708e, 0x8000, 0x07cd, + 0xf02a, 0x261a, 0x1f80, 0x0000, + 0x2000, 0x2005, 0x0f80, 0x9002, + 0x0166, 0x90e0, 0xef1e, 0xc003, + 0x42c1, 0x2055, 0x0d01, 0xc004, + 0x7905, 0x261a, 0x1f80, 0x0000, + 0x1000, 0x7825, 0x781b, 0x2005, + 0x0f81, 0x9002, 0x1e00, 0xb891, + 0xb89c, 0xb89f, 0x9060, 0x2355, + 0x0800, 0xb100, 0x41c3, 0x0155, + 0x0002, 0x0d6e, 0xfcaf, 0x740c, + 0x65fd, 0x71c5, 0x40c3, 0x8000, + 0x07cd, 0x8801, 0x08a7, 0x83a5, + 0xd80f, 0x0cb6, 0xfcaf, 0xd90f, + 0x092a, 0xfe4f, 0xc002, 0x41a1, + 0x7104, 0xc042, 0xc002, 0x087d, + 0x8234, 0x70ad, 0x2178, 0x0000, + 0x41c3, 0x0157, 0x0000, 0x0cde, + 0xfc0f, 0x1e00, 0x7005, 0x9003, + 0xe004, 0x2480, 0x3403, 0x1404, + 0x341b, 0xc6c6, 0xc0f1, 0x2482, + 0x3302, 0xc080, 0x41c3, 0x8000, + 0x0597, 0x0bea, 0xfc2f, 0xda8a, + 0xc080, 0x09ca, 0xffef, 0xd98a, + 0xd80f, 0x0c5e, 0xfcaf, 0xd90f, + 0x2480, 0x3302, 0xc0d1, 0x7ee0, + 0xc3e6, 0xc1b2, 0xc14a, 0xc045, + 0x712c, 0xb8c4, 0xe805, 0xb861, + 0x08ff, 0x8031, 0x793b, 0x215a, + 0x0101, 0xc005, 0x210f, 0x0000, + 0xc046, 0xc005, 0x0c76, 0xfcaf, + 0x702c, 0x1600, 0x708d, 0x8000, + 0x0004, 0x0fd6, 0xfd2f, 0x4608, + 0x208c, 0x8e82, 0x70ed, 0xd8fe, + 0xc044, 0xf707, 0x0fc2, 0xfd0f, + 0x2054, 0x0880, 0xc044, 0x251a, + 0x1f8d, 0x0010, 0x0000, 0x710c, + 0xc18d, 0x0fd2, 0xfdaf, 0xc54b, + 0x0f76, 0xffcf, 0x45cb, 0x9003, + 0xe002, 0xd840, 0x1d00, 0x1f84, + 0x0000, 0x8000, 0x0fae, 0xfcaf, + 0xb512, 0x1600, 0x7100, 0x9004, + 0x004a, 0xd998, 0x2011, 0x8380, + 0xd8aa, 0x21ca, 0x0001, 0x40c3, + 0x9005, 0xe04c, 0xc049, 0x189c, + 0x0054, 0xc049, 0xc005, 0x46cb, + 0x9008, 0x0000, 0x712c, 0x781b, 0x2056, 0x0e00, 0x78c5, 0x9000, - 0x712c, 0xc046, 0x16e9, 0x1700, - 0xc047, 0x0c9e, 0xfc6f, 0x208a, - 0x0004, 0x1200, 0x3083, 0xd8ff, - 0x702c, 0x754c, 0xc543, 0xc542, - 0xc541, 0x0d76, 0xfc6f, 0xc540, - 0xc005, 0xc543, 0x724c, 0x780f, - 0xc042, 0xc045, 0xc004, 0xc541, - 0xc540, 0x2044, 0x0041, 0x6038, - 0x780f, 0xc044, 0xc304, 0xd980, - 0xd8ff, 0x0d4e, 0xfc6f, 0xb990, - 0x700c, 0x0d02, 0xfc6f, 0x712c, - 0x4063, 0x09fe, 0xffaf, 0x714c, - 0x16f0, 0x1700, 0x46cb, 0x9009, - 0xe1e0, 0xc04a, 0xb888, 0xb600, - 0xc006, 0x209a, 0x0004, 0xc046, - 0x2004, 0x0f81, 0x0000, 0x0f00, - 0xc007, 0x2004, 0x0f80, 0x0000, - 0xf0ff, 0x7825, 0xc046, 0xb887, - 0x1ef2, 0x9004, 0x710c, 0x0b5e, - 0xfd2f, 0xb700, 0xc047, 0x208a, - 0x0004, 0x0c06, 0xfc6f, 0x712c, + 0xc047, 0x16e9, 0x1700, 0xc048, + 0x0caa, 0xfc6f, 0x208a, 0x0004, 0x1200, 0x3083, 0xd8ff, 0x702c, - 0x754c, 0xc543, 0xc542, 0xc541, - 0x0cde, 0xfc6f, 0xc540, 0x0f2a, - 0xfc6f, 0xd8ff, 0xc005, 0xc543, - 0xc304, 0xc042, 0x700c, 0xb88f, - 0x791b, 0xd8ff, 0x724c, 0xc541, - 0x0cbe, 0xfc6f, 0xc540, 0xc007, - 0x714c, 0xc543, 0xe01e, 0x2044, - 0x0041, 0x6038, 0x7b0f, 0xd8ff, - 0x742c, 0xc542, 0xc541, 0x0ca2, - 0xfc6f, 0xc540, 0x702c, 0x710c, - 0xc043, 0xd8ff, 0xb98f, 0x754c, - 0x746c, 0xc542, 0xc541, 0x0c8a, - 0xfc6f, 0xc540, 0xd8ff, 0x702c, - 0x754c, 0xdb10, 0xc543, 0xc542, - 0xc541, 0x0c76, 0xfc6f, 0xc540, - 0x0ea2, 0xfc6f, 0xd8ff, 0xd8ff, - 0xd980, 0x754c, 0x746c, 0xc543, - 0xc542, 0xc541, 0x0c5a, 0xfc6f, - 0xc540, 0x1600, 0x7080, 0x8000, - 0x0001, 0x080d, 0x00be, 0x208a, - 0x0606, 0xd833, 0x0bfe, 0xfc6f, - 0x712c, 0xc006, 0x704c, 0x1ef2, - 0x9004, 0x08f6, 0xffaf, 0x4063, - 0x700c, 0x0a8e, 0xfdaf, 0xc18b, - 0x40c3, 0x9003, 0xe002, 0x1e00, - 0x7344, 0x9003, 0xe002, 0xb0b2, - 0x208a, 0x0004, 0x0b22, 0xfc6f, - 0x712c, 0x1200, 0x3083, 0xd8ff, - 0x702c, 0x754c, 0xc543, 0xc542, - 0xc541, 0x0bfe, 0xfc6f, 0xc540, - 0xc005, 0xc304, 0x702c, 0xc543, - 0xc042, 0xd8ff, 0xb990, 0x724c, - 0xc541, 0x0be6, 0xfc6f, 0xc540, - 0xd8ff, 0xd980, 0x754c, 0x746c, - 0xc543, 0xc542, 0xc541, 0x0bd2, - 0xfc6f, 0xc540, 0x700c, 0x0b86, - 0xfc6f, 0x712c, 0xc00a, 0x702c, - 0x704c, 0xb600, 0xb7a0, 0x1e00, - 0x7344, 0x9003, 0xe004, 0x0b0e, - 0xfeef, 0xc008, 0x4063, 0x0f5a, - 0xfc6f, 0x702c, 0x2055, 0x0d01, - 0xc009, 0x714c, 0x706c, 0x7825, - 0x0f76, 0xffaf, 0xc108, 0x0afe, - 0xfe0f, 0x0c0e, 0xfecf, 0xc0b0, - 0x1404, 0x341b, 0xc6c6, 0x0000, + 0x754c, 0xc743, 0xc742, 0xc741, + 0x0d6a, 0xfc6f, 0xc740, 0xc006, + 0x724c, 0xc743, 0x780f, 0xc046, + 0xc042, 0xc004, 0xc741, 0xc740, + 0x2044, 0x0041, 0x6038, 0x780f, + 0xc044, 0xc304, 0xd980, 0xd8ff, + 0x0d42, 0xfc6f, 0xb990, 0x700c, + 0x0cfa, 0xfc6f, 0x712c, 0xc005, + 0x097a, 0xffaf, 0x714c, 0x16f0, + 0x1700, 0x46cb, 0x9009, 0xe1e0, + 0x41c3, 0x0000, 0x0f00, 0xc04c, + 0xb888, 0xb600, 0xc007, 0x209a, + 0x0004, 0xc047, 0x2004, 0x0042, + 0xc008, 0x7932, 0x7824, 0x7845, + 0xc048, 0xb887, 0x1ef2, 0x9004, + 0xc109, 0x710c, 0x0eb6, 0xfd2f, + 0xb100, 0xc047, 0x208a, 0x0004, + 0x0c12, 0xfc6f, 0x712c, 0x1200, + 0x3083, 0xd8ff, 0x702c, 0x754c, + 0xc743, 0xc742, 0xc741, 0x0cd6, + 0xfc6f, 0xc740, 0x0f1a, 0xfc6f, + 0xd8ff, 0xc006, 0xc304, 0x724c, + 0xc042, 0x700c, 0xb88f, 0x791b, + 0xd8ff, 0xc743, 0xc741, 0x0cb6, + 0xfc6f, 0xc740, 0xc007, 0x714c, + 0xc743, 0x2044, 0x0041, 0x6038, + 0xe01e, 0x7b0f, 0xd8ff, 0x742c, + 0xc742, 0xc741, 0x0c96, 0xfc6f, + 0xc740, 0x710c, 0x702c, 0xc043, + 0xd8ff, 0xb98f, 0x754c, 0x746c, + 0xc742, 0xc741, 0x0c7e, 0xfc6f, + 0xc740, 0xd8ff, 0x702c, 0x754c, + 0xdb10, 0xc743, 0xc742, 0xc741, + 0x0c6a, 0xfc6f, 0xc740, 0x0e96, + 0xfc6f, 0xd8ff, 0xf838, 0xc743, + 0xc742, 0xc741, 0x0c56, 0xfc6f, + 0xc740, 0x40c3, 0x0000, 0x6210, + 0xb501, 0x730c, 0x0c06, 0xfc6f, + 0x712c, 0x208a, 0x0408, 0xb501, + 0x1600, 0x7080, 0x8000, 0x0001, + 0x080b, 0x00bf, 0x208a, 0x0606, + 0xf002, 0xd833, 0x0be6, 0xfc6f, + 0x712c, 0xc008, 0x704c, 0x1ef2, + 0x9004, 0x0862, 0xffaf, 0xc005, + 0x700c, 0x0e02, 0xfdaf, 0xc18d, + 0x70ed, 0x208a, 0x0004, 0x712c, + 0xb5e0, 0x0b2a, 0xfc6f, 0xb5f2, + 0x1200, 0x3083, 0xd8ff, 0x702c, + 0x754c, 0xc743, 0xc742, 0xc741, + 0x0bea, 0xfc6f, 0xc740, 0xc006, + 0xc304, 0x702c, 0xc042, 0xd8ff, + 0xb990, 0x724c, 0xc743, 0xc741, + 0x0bd2, 0xfc6f, 0xc740, 0xf814, + 0xc743, 0xc742, 0xc741, 0x0bc6, + 0xfc6f, 0xc740, 0x700c, 0x0b7e, + 0xfc6f, 0x712c, 0xc00c, 0x702c, + 0x704c, 0xb600, 0xc009, 0xb0e0, + 0xb5e1, 0x0aaa, 0xfeef, 0xc00a, + 0xc005, 0x0a02, 0xfcaf, 0x702c, + 0xc10b, 0x2055, 0x0d00, 0x714c, + 0x7825, 0xc10a, 0x0f62, 0xffaf, + 0x706c, 0x0e1a, 0xfe0f, 0x0b9e, + 0xfecf, 0xc7c6, 0xd8ff, 0xd980, + 0x754c, 0x746c, 0x7ee0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, @@ -5676,73 +5668,72 @@ static u16 lpddr4x_train1d_dmem[] = { 0x701c, 0x61a8, 0x35ac, 0x35ac, 0x125c, 0x125c, 0xc738, 0xb0f4, 0x6590, 0x6590, 0x0000, 0x0000, - 0xbd61, 0x0046, 0x0000, 0x0000, - 0x2820, 0x140f, 0x0002, 0x0000, - 0xf01f, 0x0001, 0x0000, 0xffb4, - 0x0001, 0x0001, 0xf0b4, 0x0001, - 0x0000, 0xf4b4, 0x0001, 0x0000, - 0xf0b9, 0x0001, 0x0000, 0xf0ba, - 0x0001, 0x0000, 0xf0bb, 0x0001, - 0x0000, 0xf001, 0x0001, 0x0000, - 0xf011, 0x0001, 0x0001, 0xf012, - 0x0001, 0xf000, 0xf018, 0x0001, - 0x0001, 0xf013, 0x0001, 0x0000, - 0xf0f9, 0x0004, 0x0200, 0xf0fa, - 0x0004, 0x0000, 0xf0fb, 0x0004, - 0x0400, 0xff62, 0x0001, 0x0000, - 0xf062, 0x0001, 0x000f, 0xf462, - 0x0001, 0x00f0, 0xf002, 0x0001, - 0x0204, 0x0000, 0xf01f, 0x0001, - 0x0000, 0xffb4, 0x0001, 0x0001, - 0xf0b4, 0x0001, 0x0000, 0xf4b4, - 0x0001, 0x0000, 0xf0b9, 0x0001, - 0x0000, 0xf0ba, 0x0001, 0x0000, - 0xf0bb, 0x0001, 0x0000, 0xf001, - 0x0001, 0x0000, 0xf013, 0x0001, - 0x0000, 0xf0f9, 0x0004, 0x0200, - 0xf0fa, 0x0004, 0x0000, 0xf0fb, - 0x0004, 0x0400, 0xf060, 0x0007, - 0x0008, 0xf065, 0x0007, 0x0000, - 0xff62, 0x0001, 0x0000, 0xf002, - 0x0001, 0x0220, 0x10f8, 0x20e8, - 0xf01f, 0x0001, 0x0000, 0xf03b, - 0x0002, 0x0001, 0xffb2, 0x0001, - 0x0000, 0xf0b2, 0x0001, 0x0001, - 0xffb4, 0x0001, 0x0001, 0xf0b4, - 0x0001, 0x0000, 0xf0b9, 0x0001, - 0x0000, 0xf0ba, 0x0001, 0x0000, - 0xf0bb, 0x0001, 0x0000, 0xf001, - 0x0001, 0x0000, 0xf060, 0x0007, - 0x0001, 0xf065, 0x0007, 0x01ff, - 0xff26, 0x0007, 0x0000, 0xff27, - 0x0007, 0x0000, 0xf013, 0x0001, - 0x0002, 0xff32, 0x0001, 0x0800, - 0xff62, 0x0001, 0x0000, 0xf062, - 0x0001, 0x0001, 0xf462, 0x0001, - 0x0001, 0xf002, 0x0001, 0x0208, - 0xf01f, 0x0001, 0x0000, 0xffb2, - 0x0001, 0x0000, 0xf0b2, 0x0001, - 0x0001, 0xf4b2, 0x0001, 0x0001, - 0xffb4, 0x0001, 0x0001, 0xf0b4, - 0x0001, 0x0000, 0xf4b4, 0x0001, - 0x0000, 0xf0b9, 0x0001, 0x0000, - 0xf0ba, 0x0001, 0x0000, 0xf0bb, - 0x0001, 0x0000, 0xf011, 0x0001, - 0x0101, 0xf012, 0x0001, 0x0001, - 0xf013, 0x0001, 0x0002, 0xf018, - 0x0001, 0x0001, 0xf060, 0x0007, - 0x0001, 0xf065, 0x0007, 0x01ff, - 0xff26, 0x0007, 0xffff, 0xff27, - 0x0007, 0xffff, 0xff62, 0x0001, - 0x0000, 0xf062, 0x0001, 0x0001, - 0xf462, 0x0001, 0x0010, 0xff32, - 0x0001, 0x0800, 0xf002, 0x0001, - 0x0210, 0x0000, 0x0a06, 0x140e, - 0x1c18, 0x2420, 0x0c06, 0x1610, - 0x201c, 0x2824, 0x0a06, 0x1610, - 0x201a, 0x2824, 0x0c06, 0x1812, - 0x241e, 0x2c28, 0x020d, 0x0301, + 0x9660, 0x00cb, 0x0000, 0x0000, + 0x2820, 0x140f, 0x1f02, 0x01f0, + 0x0000, 0xb400, 0x01ff, 0x0100, + 0xb400, 0x01f0, 0x0000, 0xb400, + 0x01f4, 0x0000, 0xb900, 0x01f0, + 0x0000, 0xba00, 0x01f0, 0x0000, + 0xbb00, 0x01f0, 0x0000, 0x0100, + 0x01f0, 0x0000, 0x1300, 0x01f0, + 0x0000, 0xf900, 0x04f0, 0x0000, + 0xfa02, 0x04f0, 0x0000, 0xfb00, + 0x04f0, 0x0000, 0x6004, 0x07f0, + 0x0800, 0x6500, 0x07f0, 0x0000, + 0x6200, 0x01ff, 0x0000, 0x0200, + 0x01f0, 0x2000, 0x1f02, 0x01f0, + 0x0000, 0xb400, 0x01ff, 0x0100, + 0xb400, 0x01f0, 0x0000, 0xb400, + 0x01f4, 0x0000, 0xb900, 0x01f0, + 0x0000, 0xba00, 0x01f0, 0x0000, + 0xbb00, 0x01f0, 0x0000, 0x0100, + 0x01f0, 0x0000, 0x1100, 0x01f0, + 0x0100, 0x1200, 0x01f0, 0x0000, + 0x18f0, 0x01f0, 0x0100, 0x1300, + 0x01f0, 0x0000, 0xf900, 0x04f0, + 0x0000, 0xfa02, 0x04f0, 0x0000, + 0xfb00, 0x04f0, 0x0000, 0x6204, + 0x01ff, 0x0000, 0x6200, 0x01f0, + 0x0f00, 0x6200, 0x01f4, 0xf000, + 0x0200, 0x01f0, 0x0400, 0x1f02, + 0x01f0, 0x0000, 0x3b00, 0x02f0, + 0x0100, 0xb200, 0x01ff, 0x0000, + 0xb200, 0x01f0, 0x0100, 0xb400, + 0x01ff, 0x0100, 0xb400, 0x01f0, + 0x0000, 0xb900, 0x01f0, 0x0000, + 0xba00, 0x01f0, 0x0000, 0xbb00, + 0x01f0, 0x0000, 0x0100, 0x01f0, + 0x0000, 0x6000, 0x07f0, 0x0100, + 0x6500, 0x07f0, 0xff00, 0x2601, + 0x07ff, 0x0000, 0x2700, 0x07ff, + 0x0000, 0x1300, 0x01f0, 0x0200, + 0x3200, 0x01ff, 0x0000, 0x6208, + 0x01ff, 0x0000, 0x6200, 0x01f0, + 0x0100, 0x6200, 0x01f4, 0x0100, + 0x0200, 0x01f0, 0x0800, 0x1f02, + 0x01f0, 0x0000, 0xb200, 0x01ff, + 0x0000, 0xb200, 0x01f0, 0x0100, + 0xb200, 0x01f4, 0x0100, 0xb400, + 0x01ff, 0x0100, 0xb400, 0x01f0, + 0x0000, 0xb400, 0x01f4, 0x0000, + 0xb900, 0x01f0, 0x0000, 0xba00, + 0x01f0, 0x0000, 0xbb00, 0x01f0, + 0x0000, 0x1100, 0x01f0, 0x0100, + 0x1201, 0x01f0, 0x0100, 0x1300, + 0x01f0, 0x0200, 0x1800, 0x01f0, + 0x0100, 0x6000, 0x07f0, 0x0100, + 0x6500, 0x07f0, 0xff00, 0x2601, + 0x07ff, 0xff00, 0x27ff, 0x07ff, + 0xff00, 0x62ff, 0x01ff, 0x0000, + 0x6200, 0x01f0, 0x0100, 0x6200, + 0x01f4, 0x1000, 0x3200, 0x01ff, + 0x0000, 0x0208, 0x01f0, 0x1000, + 0x0002, 0x0000, 0x020d, 0x0301, 0x0c0b, 0x160e, 0x1004, 0x1811, + 0x0c06, 0x1610, 0x201c, 0x2824, + 0x0c06, 0x1812, 0x241e, 0x2c28, + 0x0a06, 0x140e, 0x1c18, 0x2420, + 0x0a06, 0x1610, 0x201a, 0x2824, 0x006e, 0x0002, 0x0001, 0x00fd, 0x0004, 0x000f, 0x0060, 0x0007, 0x0000, 0x00e8, 0x0004, 0x00ff, @@ -5753,7 +5744,7 @@ static u16 lpddr4x_train1d_dmem[] = { /*******************************************************/ static u16 lpddr4x_train2d_imem[] = { - 0x00d8, 0x0000, 0x0050, 0x0000, + 0x0204, 0x0000, 0x0050, 0x0000, 0x0050, 0x0000, 0x0050, 0x0000, 0x0050, 0x0000, 0x0050, 0x0000, 0x0050, 0x0000, 0x0050, 0x0000, @@ -5769,6 +5760,9 @@ static u16 lpddr4x_train2d_imem[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x7054, 0x7ce0, 0x4300, 0x1101, + 0x048c, 0x2242, 0x8042, 0x1b01, + 0x0312, 0xf5fb, 0x7ee0, 0x78e0, 0x1cfc, 0xb3c8, 0x1cfc, 0xb388, 0x7fe0, 0x1cfc, 0xb348, 0x78e0, 0x7ee0, 0x78e0, 0x748d, 0xdd0c, @@ -5777,860 +5771,950 @@ static u16 lpddr4x_train2d_imem[] = { 0xf006, 0x78e0, 0x748d, 0xf006, 0xc702, 0xc601, 0x24b0, 0x334d, 0x24b0, 0x331f, 0x7ee0, 0x78e0, - 0x2244, 0x8ffc, 0x264a, 0x7000, - 0x20e8, 0x01a2, 0x202f, 0x8000, - 0x212f, 0x004b, 0x7ee0, 0x78e0, - 0x702c, 0x704c, 0x706c, 0x704d, - 0x706d, 0x708d, 0x70ad, 0x70cd, - 0x70ed, 0x206a, 0x0280, 0xb88d, - 0x2029, 0x8000, 0x44db, 0x8000, - 0x4000, 0x42db, 0x8000, 0x0400, - 0x706f, 0x78e0, 0x2022, 0x0f80, - 0x0000, 0x2368, 0x2069, 0x0040, - 0x78e0, 0xf1fe, 0xc3e1, 0xc2e1, - 0xc1e1, 0xc0e1, 0xc0f1, 0xc5e1, - 0xc1a1, 0xe806, 0x7487, 0xdc14, - 0x077f, 0xffcf, 0xda25, 0xba9f, - 0x8a60, 0x1233, 0x0080, 0x7865, - 0x1aea, 0x8002, 0x12ed, 0x8080, - 0x208c, 0x8fc3, 0xf20d, 0xc085, - 0xc040, 0x4020, 0x0852, 0x0260, - 0xc185, 0x41c3, 0x0402, 0x0000, - 0x0fae, 0x0020, 0xd8ff, 0x70ad, - 0x0986, 0x0220, 0x1a0c, 0x3342, - 0x0f96, 0x0220, 0xd8ff, 0x25ab, - 0x10c4, 0x7fff, 0xf1ff, 0x78e0, - 0xb8e6, 0x781d, 0x781d, 0x781d, - 0xb8c2, 0xf209, 0x205f, 0x0101, - 0x6904, 0x080f, 0x0352, 0x6906, - 0xf003, 0x781b, 0x7404, 0x7104, - 0x7fe0, 0x780f, 0x42c3, 0x8000, - 0x0661, 0x8a20, 0x215f, 0x0c81, + 0xc5e1, 0xc6e1, 0xc7e1, 0x2107, + 0x00cb, 0x262f, 0xf040, 0x704d, + 0x0010, 0x0026, 0x232f, 0x12c4, + 0x200e, 0x8280, 0x2203, 0x1041, + 0x0b19, 0x07de, 0x220e, 0x8282, + 0x2203, 0x10c3, 0xf006, 0x78e0, + 0xc5e1, 0xc6e1, 0xc7e1, 0x706d, + 0x250a, 0x9080, 0x23cc, 0x8021, + 0xf260, 0x7034, 0xf20c, 0x003c, + 0x002b, 0xdc40, 0x202f, 0x8000, + 0x212f, 0x804b, 0x07f8, 0xffe3, + 0xbc61, 0xf014, 0xdc20, 0x210a, + 0x8000, 0x22ca, 0x0021, 0x23ca, + 0x0021, 0xf222, 0x0014, 0x0024, + 0x700c, 0x212f, 0x8040, 0xbc61, + 0x07fe, 0xffe3, 0x21c0, 0x8043, + 0x240a, 0x7300, 0x260a, 0x90c0, + 0x704c, 0x0046, 0x0022, 0x706c, + 0x20a8, 0x0380, 0x2000, 0x8000, + 0x2101, 0x8041, 0x2201, 0x8082, + 0x25cc, 0x9086, 0x20c0, 0x006e, + 0x22c2, 0x034e, 0x7075, 0xf22f, + 0xf645, 0x220e, 0x8282, 0x2203, + 0x10c3, 0x232f, 0x92c0, 0x004e, + 0x0003, 0x200e, 0x8280, 0x0047, + 0x0020, 0x2203, 0x1041, 0x20a8, + 0x0580, 0x2000, 0x8000, 0x2101, + 0x8041, 0x2201, 0x8082, 0x2301, + 0x00c3, 0x2202, 0x8342, 0x2303, + 0x8383, 0xf746, 0x2200, 0x8342, + 0x2301, 0x8383, 0x20c0, 0x0066, + 0x7075, 0x0012, 0x0001, 0xf1da, + 0x4241, 0x4341, 0x6a09, 0x212f, + 0x0002, 0xc7c1, 0xc6c1, 0xc5c1, + 0x7ee0, 0x78e0, 0x2244, 0x8ffc, + 0x264a, 0x7000, 0x20e8, 0x01a2, + 0x202f, 0x8000, 0x212f, 0x004b, + 0x7ee0, 0x78e0, 0x702c, 0x704c, + 0x706c, 0x704d, 0x706d, 0x708d, + 0x70ad, 0x70cd, 0x70ed, 0x206a, + 0x0280, 0xb88d, 0x2029, 0x8000, + 0x44db, 0x8000, 0x4000, 0x42db, + 0x8000, 0x0400, 0x706f, 0x78e0, + 0x2022, 0x0f80, 0x0000, 0x2640, + 0x2069, 0x0040, 0x78e0, 0xf1fe, + 0xc3e1, 0xc2e1, 0xc1e1, 0xc0e1, + 0xc0f1, 0xc5e1, 0xc1a1, 0xe806, + 0x7487, 0xdc14, 0x066b, 0xffcf, + 0xda25, 0xba9f, 0x8a60, 0x1233, + 0x0080, 0x7865, 0x1aea, 0x8002, + 0x12ed, 0x8080, 0x208c, 0x8fc3, + 0xf20d, 0xc085, 0xc040, 0x4020, + 0x0cae, 0x0260, 0xc185, 0x41c3, + 0x0402, 0x0000, 0x0f6e, 0x0020, + 0xd8ff, 0x70ad, 0x0b62, 0x0220, + 0x1a10, 0x3003, 0x0bee, 0x0260, + 0xd8ff, 0x25ab, 0x10c4, 0x7fff, + 0xf1ff, 0x78e0, 0xb8e6, 0x781d, + 0x781d, 0x781d, 0xb8c2, 0x742c, + 0xf403, 0x7914, 0xf006, 0x7915, + 0x0909, 0x0354, 0x762c, 0x7915, + 0x7fe0, 0x6901, 0x42c3, 0x8000, + 0x06cd, 0x8a20, 0x215f, 0x0c81, 0x6038, 0x8a21, 0x215f, 0x0641, - 0x6038, 0x8823, 0xb9e6, 0x8822, - 0x1600, 0x7080, 0x8000, 0x001c, - 0xb9c2, 0xf209, 0x7014, 0x40c3, - 0x8000, 0x0528, 0xf209, 0xe010, - 0xf007, 0x7014, 0x40c3, 0x8000, - 0x0520, 0xf203, 0xe010, 0x6038, - 0x7fe0, 0x8800, 0xc2e2, 0x4718, - 0x762c, 0x4508, 0xb961, 0x0901, - 0x0031, 0x272f, 0x37c2, 0x700c, - 0x704c, 0xf006, 0x1166, 0x0081, - 0x7144, 0x6178, 0x794f, 0x091b, - 0x00b5, 0x7143, 0x1168, 0x008c, - 0x2744, 0x304b, 0x0ce9, 0x92e1, - 0x7b0f, 0xbdc5, 0x63b8, 0x780f, - 0xc6c2, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1b2, 0xc24e, 0xc14f, - 0x4200, 0x750c, 0x41c3, 0x0180, - 0x0001, 0x0ece, 0x0020, 0xc350, - 0x70ad, 0xf023, 0x9412, 0x9479, - 0x943a, 0xc051, 0x941b, 0x9455, - 0x94f6, 0x94d7, 0x9498, 0x1426, - 0x311b, 0x1428, 0x311f, 0xc048, - 0xc147, 0xc346, 0xc311, 0xc445, - 0xc644, 0xc743, 0xc242, 0x750c, - 0x41c3, 0x0181, 0x000b, 0x4261, - 0x1c04, 0x37c0, 0x0e8a, 0x0020, - 0x1c00, 0x36c0, 0x71a5, 0xc00f, - 0x7bb0, 0x4260, 0x4368, 0x0b11, - 0x0022, 0x702c, 0xf01c, 0xc00f, - 0x7124, 0x621a, 0x7b2f, 0x0ba9, - 0x82b5, 0x448b, 0xc010, 0x0815, - 0x0091, 0xc00e, 0x20f4, 0x008c, - 0xc089, 0x7874, 0xb080, 0xf1f1, - 0xc010, 0x0815, 0x0071, 0xc089, - 0x2014, 0x00c3, 0xc00e, 0x6048, - 0xb300, 0xf1e7, 0xc0b2, 0x1404, - 0x341b, 0xc6c6, 0x1e00, 0x7005, - 0x9004, 0x0102, 0x7ee0, 0x78e0, - 0xc2e2, 0x4308, 0x700c, 0x0b76, - 0x0020, 0x4220, 0x0b52, 0x0020, - 0x700c, 0x0b32, 0x0020, 0x700c, - 0x228c, 0x8fc3, 0xf227, 0x216f, - 0x0243, 0x8900, 0x43c3, 0x8000, - 0x0665, 0x201a, 0x0f80, 0x0020, - 0x0000, 0x2005, 0x0f80, 0x9004, - 0x0040, 0xb040, 0x8b80, 0xf012, - 0x8900, 0x241a, 0x1f8d, 0x0000, - 0x2000, 0x7185, 0x201a, 0x0f80, - 0x0020, 0x0000, 0x78a5, 0x2005, - 0x0f80, 0x9002, 0x0040, 0xb040, - 0x8b01, 0x08e1, 0x8303, 0x0fc6, - 0x0180, 0x208a, 0x021a, 0x41c3, - 0x900e, 0x002a, 0x1e00, 0x72c4, - 0x9008, 0x01e0, 0xb100, 0x40c3, - 0x0000, 0x0fac, 0xb101, 0x40c3, - 0x8000, 0x04b2, 0x1800, 0x0003, - 0x700c, 0xc6c2, 0x702c, 0x7110, - 0x20e0, 0x07ca, 0x1600, 0x7102, - 0x9008, 0x01d6, 0x0af3, 0x803e, - 0x7124, 0x7ee0, 0xc2e6, 0x1cfc, - 0xb6c8, 0x4338, 0x0f16, 0x0060, - 0x4508, 0x71ed, 0x46cb, 0x9004, - 0x00f2, 0xb6e0, 0x1e00, 0x1005, - 0x1e00, 0x7344, 0x9008, 0x01e2, - 0x0e16, 0x0120, 0x760c, 0x0b17, - 0x3030, 0xb6e8, 0x40c3, 0x0000, - 0x61a8, 0x0fb6, 0xffcf, 0x1e10, - 0x1005, 0x1404, 0x341b, 0xc6c6, - 0xc2e2, 0x70ad, 0xf007, 0xca0d, - 0x7704, 0x0fb6, 0xffef, 0x7810, - 0x71a5, 0xc811, 0x0df3, 0x9024, - 0x712c, 0xc6c2, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1a6, 0xc70b, 0xc345, - 0xdb7f, 0xc040, 0x7be4, 0xd84b, - 0x4328, 0x2338, 0x0001, 0xd87d, - 0x78e4, 0xbfe6, 0x21c5, 0x0061, - 0x20b8, 0x0341, 0x7825, 0x23b8, - 0x0441, 0x2305, 0x803e, 0x208a, - 0x003e, 0x70cd, 0x761c, 0xc000, - 0x1600, 0x7083, 0x8000, 0x04b2, - 0xc100, 0x208c, 0x8fc3, 0x6a09, - 0x21ca, 0x00c1, 0x7b0f, 0xc344, - 0xc30c, 0x0857, 0x06b5, 0xc343, - 0xdb30, 0xc504, 0xc341, 0xdb50, - 0xc342, 0xdb20, 0xd810, 0x278a, - 0x3801, 0x4378, 0x708d, 0x2025, - 0x0340, 0xf01b, 0xf019, 0xf019, - 0xf017, 0xf061, 0xf016, 0xf015, - 0xf048, 0xf04a, 0xf04b, 0xf011, - 0xf00f, 0xf04c, 0xf03c, 0xf042, - 0xf00b, 0xf00b, 0xf009, 0xf009, - 0xf007, 0xf007, 0xf005, 0xf044, - 0xf035, 0xf004, 0xf0af, 0x706c, - 0x0a51, 0x06b5, 0x4060, 0x726c, - 0x748d, 0xc042, 0xc041, 0x4318, - 0x4718, 0x4608, 0x2025, 0x0080, - 0xf03d, 0xf035, 0xf032, 0xf018, - 0xf016, 0xf018, 0xf09e, 0xf0a0, - 0xf01a, 0xf022, 0xf023, 0xf012, - 0xf00e, 0xf024, 0xf014, 0xf014, - 0xf00c, 0xf00c, 0xf00a, 0xf00a, - 0xf008, 0xf021, 0xf01e, 0xf01c, - 0xf00d, 0xf00d, 0xdb07, 0xf01b, - 0x0a09, 0x02f1, 0xdc20, 0xf01e, - 0x708d, 0xf01c, 0xc602, 0x758d, - 0xf019, 0x758d, 0x46eb, 0xf017, - 0xdb10, 0xf1cc, 0xc601, 0x758d, - 0xf011, 0x718d, 0x466b, 0xf00f, - 0x768d, 0xf00c, 0x708d, 0xf00b, - 0x736c, 0x0a0f, 0x00d1, 0xc403, - 0x249a, 0x1004, 0x7f85, 0x4468, - 0x4608, 0x0b09, 0x11de, 0xbc86, - 0x208a, 0x0ffd, 0x232f, 0x02c2, - 0x2004, 0x02c0, 0x1438, 0x301b, - 0xc50d, 0x2344, 0x0c03, 0xe0b0, - 0x0a13, 0x01b1, 0x23cf, 0x01e1, - 0x78f0, 0x080b, 0x0051, 0xbe86, - 0x7ecf, 0x202f, 0x02c2, 0x781d, - 0x781d, 0x2004, 0x0f82, 0x0000, - 0x2000, 0xc005, 0x272f, 0x3040, - 0x7a05, 0xd858, 0x2004, 0x02c0, - 0x7865, 0x7885, 0x209a, 0x0004, - 0x43c3, 0x9008, 0x0000, 0x2754, - 0x380c, 0x7c65, 0x7a05, 0x2705, - 0x30c0, 0xb040, 0x4061, 0x209a, - 0x0004, 0x2004, 0x0f82, 0x0000, - 0x0f00, 0xc003, 0x201a, 0x0f80, - 0x0000, 0x1000, 0x7a05, 0x2553, - 0x10c0, 0x7845, 0x7acf, 0x7845, - 0xb400, 0x2755, 0x3800, 0x7865, - 0xb0e0, 0x2304, 0x1f80, 0x0000, - 0xff00, 0x2305, 0x3002, 0x2755, - 0x3c00, 0x7b05, 0xb340, 0x6901, - 0xc100, 0x218c, 0x8fc3, 0xf406, - 0x1e00, 0x7002, 0x8000, 0x04b2, - 0x780f, 0xc0a6, 0x1404, 0x341b, - 0xc6c6, 0x738d, 0xde10, 0xf19b, - 0x758d, 0xf198, 0x718d, 0xf196, + 0x6119, 0x8903, 0xb8e6, 0x8902, + 0x1600, 0x7081, 0x8000, 0x001c, + 0xb8c2, 0xf409, 0x7034, 0x41c3, + 0x8000, 0x0544, 0xf209, 0xe108, + 0xf007, 0x7034, 0x41c3, 0x8000, + 0x0534, 0xf203, 0xe108, 0x6119, + 0x7fe0, 0x8900, 0xc0e2, 0x2053, + 0x014c, 0x781d, 0x781d, 0x781d, + 0x781d, 0x781d, 0x781d, 0x2044, + 0x004d, 0x702c, 0x704c, 0xf00b, + 0x1070, 0x0083, 0x0d19, 0x10e0, + 0x792f, 0x106e, 0x0080, 0x7144, + 0x6119, 0x784f, 0x08ed, 0x80b4, + 0x7043, 0xf003, 0x6199, 0x782f, + 0xc4c2, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1b6, 0x4548, 0xc153, + 0x4200, 0x750c, 0x41c3, 0x018c, + 0x0001, 0x0e92, 0x0020, 0xc354, + 0x706f, 0xf041, 0x4261, 0xf015, + 0xc014, 0x0819, 0x00b0, 0xc08e, + 0xc014, 0x087f, 0x0071, 0xc38e, + 0x6548, 0x7b34, 0xb300, 0xf006, + 0x25f4, 0x1083, 0x7834, 0xb060, + 0xc013, 0x7124, 0x621a, 0x09dd, + 0x8294, 0x4183, 0x911c, 0x1446, + 0x3103, 0x915f, 0xc055, 0x1448, + 0x3101, 0x144a, 0x3100, 0x143a, + 0x311f, 0x143c, 0x311e, 0x1440, + 0x310f, 0x1442, 0x310e, 0x1444, + 0x310c, 0xc346, 0xc315, 0xc048, + 0xc147, 0xc242, 0x750c, 0x41c3, + 0x018d, 0x000b, 0xc445, 0xc644, + 0xc743, 0x1c04, 0x3780, 0x1c00, + 0x37c0, 0x0e12, 0x0020, 0x4261, + 0x7167, 0xc013, 0x232f, 0x16c8, + 0x0b7d, 0x9024, 0x702c, 0xc0b6, + 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0x700c, 0x1e00, 0x7004, 0x9004, + 0x0102, 0x7ee0, 0xc2e4, 0x4308, + 0x70ad, 0x46cb, 0x9008, 0x01e4, + 0x218c, 0x8fc3, 0xb6a0, 0xb6a1, + 0xb6a2, 0xf224, 0x1600, 0x7080, + 0x8000, 0x0004, 0x201a, 0x0f83, + 0x0020, 0x0000, 0x2305, 0x0f80, + 0x9004, 0x0040, 0xb020, 0x40c3, + 0x8000, 0x06d1, 0x8881, 0x8800, + 0xf00c, 0x201a, 0x0f82, 0x0000, + 0x2000, 0x7104, 0x7a65, 0x2205, + 0x0f82, 0x9002, 0x0040, 0xb220, + 0x0ceb, 0x9005, 0x09be, 0x01c0, + 0x208a, 0x021a, 0x41c3, 0x900e, + 0x002a, 0x1efc, 0x92c4, 0xb100, + 0x40c3, 0x8000, 0x04bc, 0xa8a0, + 0x40c3, 0x0000, 0x0fac, 0xb101, + 0x700c, 0xc6c4, 0x704c, 0x7210, + 0x20e0, 0x07ca, 0x1600, 0x7101, + 0x9008, 0x01d6, 0x09f3, 0x803e, + 0x7144, 0x7ee0, 0xc2e2, 0x4328, + 0x0fb2, 0x0060, 0x260a, 0x3000, + 0x45cb, 0x9004, 0x00f2, 0x1d00, + 0x1045, 0x1d00, 0x1005, 0x1e00, + 0x7784, 0x9008, 0x01e2, 0x0fea, + 0x0120, 0x760c, 0x0b17, 0x1030, + 0x1d10, 0x1045, 0x40c3, 0x0000, + 0x61a8, 0x0fb6, 0xffcf, 0x1d10, + 0x1005, 0xc6c2, 0xc2e2, 0x70ad, + 0xf007, 0xca11, 0x7704, 0x0fba, + 0xffef, 0x7810, 0x71a5, 0xc813, + 0x0df3, 0x9024, 0x712c, 0xc6c2, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a6, + 0x260a, 0x3080, 0xc20b, 0xc345, + 0xdb7f, 0xc040, 0x7b44, 0xd84b, + 0x2338, 0x000f, 0x279a, 0x1002, + 0xbae6, 0x208a, 0x003e, 0x234f, + 0x004c, 0x27ca, 0x1001, 0xe4cf, + 0x27ca, 0x1001, 0xe3d1, 0x27ca, + 0x1001, 0xc000, 0x1600, 0x7083, + 0x8000, 0x04bc, 0x1400, 0x301f, + 0x208c, 0x8fc3, 0xc00e, 0x2642, + 0x304b, 0x27ca, 0x30c1, 0xc043, + 0xc00d, 0xc044, 0xc00c, 0x0b51, + 0x16b5, 0xc042, 0xdb50, 0xc341, + 0xdb20, 0xd810, 0xde30, 0xdd60, + 0x4378, 0x708d, 0x2025, 0x02c0, + 0xf01a, 0xf01a, 0xf018, 0xf018, + 0xf05c, 0xf019, 0xf014, 0xf016, + 0xf045, 0xf048, 0xf010, 0xf010, + 0xf047, 0xf03b, 0xf00e, 0xf00c, + 0xf00a, 0xf00a, 0xf008, 0xf008, + 0xf006, 0xf006, 0xf03f, 0xf034, + 0xf005, 0xf0a8, 0x706c, 0xf003, + 0xdb10, 0x264c, 0xb640, 0x014a, + 0x002d, 0x4060, 0x726c, 0x748d, + 0xc041, 0x4608, 0x4318, 0x4508, + 0x4708, 0x2025, 0x0780, 0xf036, + 0xf02c, 0xf02b, 0xf017, 0xf017, + 0xf093, 0xf099, 0xf093, 0xf015, + 0xf019, 0xf01c, 0xf091, 0xf00f, + 0xf01b, 0xf00f, 0xf00d, 0xf089, + 0xf087, 0xf087, 0xf085, 0xf085, + 0xf018, 0xf017, 0xf013, 0xf008, + 0xf006, 0xdb07, 0xf012, 0xc701, + 0x758d, 0xf018, 0x758d, 0x47a9, + 0xf014, 0x758d, 0x47c9, 0xf012, + 0x718d, 0x476b, 0xf00e, 0x768d, + 0xf00b, 0x708d, 0xf00a, 0x736c, + 0x0e0d, 0x30d1, 0xc402, 0x249a, + 0x1004, 0x7a85, 0x4468, 0x4708, + 0x238a, 0x0ffd, 0x7b24, 0x783d, + 0x2044, 0x0c0b, 0xe3b0, 0xdb58, + 0x23cf, 0x11e1, 0x7b24, 0x2084, + 0x0001, 0x2305, 0x02c3, 0x7885, + 0x7b05, 0x783d, 0x239a, 0x0004, + 0x781d, 0xc405, 0x781d, 0x2004, + 0x0f80, 0x0000, 0x2000, 0x7c05, + 0x7c65, 0x232f, 0x17c0, 0x43c3, + 0x9008, 0x0000, 0x2305, 0x10c0, + 0xb080, 0x4020, 0x209a, 0x0004, + 0x7154, 0x2104, 0x0f81, 0x0000, + 0xff00, 0x2004, 0x0f8c, 0x0000, + 0x0f00, 0xc002, 0x201a, 0x0f80, + 0x0000, 0x1000, 0x7c05, 0x40e1, + 0x20cf, 0x01a1, 0x264c, 0xb180, + 0x27ca, 0x1001, 0xc004, 0xb8c3, + 0x7f05, 0x78ef, 0x7c05, 0x2354, + 0x1800, 0x7865, 0xb080, 0x2355, + 0x1800, 0x7865, 0xb040, 0xc003, + 0x7905, 0x2355, 0x1c00, 0x7b05, + 0xb320, 0xc000, 0x2740, 0x3041, + 0x208c, 0x8fc3, 0xf405, 0x1e00, + 0x7042, 0x8000, 0x04bc, 0x782f, + 0xc0a6, 0x1404, 0x341b, 0xc6c6, + 0x738d, 0xdf10, 0xf19e, 0x708d, + 0xf19b, 0x718d, 0xf199, 0xdc20, + 0xf197, 0x758d, 0xf195, 0x78e0, 0xc0f1, 0xc1a4, 0xc408, 0xc443, 0xc407, 0xc442, 0xc406, 0xc441, - 0xc405, 0x0dde, 0xffef, 0xc440, + 0xc405, 0x0de2, 0xffef, 0xc440, 0xc0a4, 0xc0d1, 0x7ee0, 0x78e0, - 0x41c3, 0x8000, 0x04b2, 0x8920, + 0x41c3, 0x8000, 0x04bc, 0x8920, 0x208c, 0x8fc3, 0x20ca, 0x0041, 0x1e00, 0x7004, 0x9008, 0x01e8, 0x7ee0, 0x78e0, 0x41c3, 0x8000, - 0x04b2, 0x8920, 0x208c, 0x8fc3, + 0x04bc, 0x8920, 0x208c, 0x8fc3, 0x20ca, 0x0041, 0x1e00, 0x7004, 0x9008, 0x01e6, 0x7ee0, 0x78e0, - 0x41c3, 0x8000, 0x04b2, 0x8920, - 0x208c, 0x8fc3, 0x20ca, 0x0041, - 0x1e00, 0x7004, 0x9008, 0x01e4, - 0x7ee0, 0x78e0, 0xc0e4, 0x70ad, - 0xf002, 0x71a5, 0x0d1f, 0x10b2, - 0x70cd, 0xf010, 0x255a, 0x1c83, - 0x607c, 0x265a, 0x1643, 0x71c5, - 0x639b, 0x633b, 0x8b80, 0x7c44, - 0xab80, 0x0eed, 0x9092, 0xf1ef, - 0xc4c4, 0x78e0, 0xc0f1, 0xc1a4, - 0x700c, 0x0c3a, 0xffef, 0xd9ff, + 0x264a, 0x3000, 0xf010, 0x265a, + 0x3c83, 0x607c, 0x235a, 0x1643, + 0x7165, 0x639b, 0x633b, 0x8b80, + 0x7c44, 0xab80, 0x0beb, 0x9094, + 0x2640, 0x305e, 0x0ef9, 0xb0b4, + 0x706d, 0x7ee0, 0xc0f1, 0xc1a4, + 0x700c, 0x0c76, 0xffef, 0xd9ff, 0x700c, 0xc043, 0xc042, 0xc041, 0xc040, 0xd8ff, 0xd988, 0x754c, - 0x0d2e, 0xffef, 0xdb40, 0x700c, - 0x0cc6, 0xffef, 0x712c, 0xc0a4, + 0x0d52, 0xffef, 0xdb40, 0x700c, + 0x0cee, 0xffef, 0x712c, 0xc0a4, 0xc0d1, 0x7ee0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1a1, 0x4428, 0x4308, - 0x702c, 0x704c, 0xf002, 0x7144, - 0x784f, 0x0815, 0x07f5, 0x2314, - 0x1000, 0x8860, 0x8801, 0x0bf3, - 0x8005, 0xf003, 0x704c, 0xdb1e, - 0xf002, 0x7764, 0x202f, 0x80c7, - 0xf208, 0x2314, 0x1000, 0x88a0, - 0x8801, 0x0df3, 0x9005, 0x4160, - 0x7f2f, 0x70ad, 0xdeff, 0x706c, - 0x4140, 0xf003, 0x7124, 0x782f, - 0x0f39, 0x1024, 0x2314, 0x1000, - 0x1001, 0x009b, 0x1000, 0x009f, - 0xca0e, 0x230c, 0xb340, 0x25ca, - 0x16cd, 0x270c, 0xb380, 0x26ca, - 0x17c5, 0xe86e, 0xca5f, 0x270c, - 0xb000, 0x23c0, 0x0061, 0x230c, - 0xb000, 0x23c0, 0x0061, 0xf1e4, + 0xb6c8, 0xc1a2, 0x4528, 0x4300, + 0x70ed, 0x704c, 0xf006, 0x8820, + 0x8801, 0x0915, 0x0004, 0x7144, + 0x784f, 0x08f7, 0x87f4, 0x2314, + 0x0000, 0x704c, 0xd91e, 0xf009, + 0x2314, 0x0000, 0x8880, 0x8801, + 0x0c0f, 0x1004, 0x7724, 0x202f, + 0x8047, 0xf5f8, 0xf002, 0x4728, + 0x78ef, 0xc040, 0x784f, 0x264a, + 0x3000, 0xdeff, 0x4718, 0x4318, + 0x706d, 0xf013, 0x8821, 0x8880, + 0xca14, 0xe80a, 0xca66, 0x7410, + 0xca67, 0x090d, 0x0021, 0x23c0, + 0x1061, 0x7165, 0x2108, 0x079e, + 0x2409, 0x138e, 0x71e7, 0xc000, + 0x08dd, 0x87e5, 0x2314, 0x07c0, 0x1600, 0x7080, 0x8000, 0x000e, - 0xc040, 0x133f, 0x1080, 0xe00f, - 0xac00, 0x780f, 0x08e2, 0x0060, - 0x2314, 0x1000, 0x232f, 0x3087, - 0xac01, 0x2340, 0x30c0, 0xac02, - 0x780f, 0x08ce, 0x0060, 0x2314, - 0x1000, 0xac03, 0xd8fd, 0x60f8, - 0xac04, 0x6f0b, 0x780f, 0x08ba, - 0x0060, 0x2314, 0x1000, 0xac05, - 0xc813, 0x1214, 0x3602, 0x262f, - 0xf0c7, 0x5052, 0x1a16, 0x3098, - 0xf209, 0xc100, 0x090f, 0x015e, - 0xd80f, 0x704c, 0x5052, 0x1a16, - 0x3098, 0x120e, 0x3081, 0xe92a, - 0x120f, 0x3083, 0x712c, 0x2344, - 0x07cc, 0x4320, 0x4dd1, 0xec06, - 0xbc61, 0x0c01, 0x1031, 0x7b7b, - 0x2904, 0x00c1, 0xdd64, 0x7124, - 0x7c2f, 0x234e, 0x3041, 0x673f, - 0x7bef, 0x0b13, 0x0323, 0xd964, - 0x4181, 0x219f, 0x0901, 0x2905, - 0x00c1, 0x782c, 0x0c0f, 0x10e3, - 0x5052, 0x239f, 0x0901, 0x2b05, - 0x030d, 0x251a, 0x1080, 0x5056, - 0x7487, 0x1404, 0x341b, 0xc6c6, - 0x704c, 0x41c3, 0x9003, 0xe004, - 0xb140, 0xb15d, 0x1e00, 0x7004, - 0x9009, 0xe1e0, 0x40c3, 0x9003, - 0xff64, 0xb040, 0x7fe0, 0x1804, - 0x0045, 0x78e0, 0x0917, 0x03f0, - 0x228a, 0x0fc7, 0x714c, 0xb9c4, - 0xe905, 0xb961, 0x09ff, 0x8031, - 0x7a5b, 0x201a, 0x0f80, 0x0000, - 0x2000, 0x224f, 0x0241, 0x2005, - 0x0f80, 0x9002, 0x016a, 0x1800, - 0x0005, 0xb040, 0xb020, 0xb040, - 0x7fe0, 0x1800, 0x0005, 0x78e0, - 0xc0f1, 0x4718, 0x706c, 0xf00c, - 0x43e3, 0xbcc4, 0xec05, 0xbc61, - 0x0cff, 0x9031, 0x7b7d, 0xbbc0, - 0x631b, 0x7124, 0x7c2f, 0x0aed, - 0x8325, 0x786f, 0xc0d1, 0x7ee0, - 0xc0f1, 0x4200, 0x0b8a, 0x0060, - 0x4020, 0x795b, 0x2144, 0x0181, - 0xb9c4, 0xe906, 0xb961, 0x0901, - 0x0031, 0x781d, 0xc0d1, 0x7fe0, - 0xb8c1, 0x78e0, 0xc0f1, 0xc1a4, - 0x700c, 0x0a0a, 0xffef, 0xd9ff, - 0x700c, 0xc043, 0xc042, 0xc041, - 0xc040, 0xd8ff, 0xd990, 0x754c, - 0x0afe, 0xffef, 0xdb40, 0x700c, - 0x0a96, 0xffef, 0x712c, 0xc0a4, - 0xc0d1, 0x7ee0, 0xc3e1, 0xc2e1, - 0xc1e1, 0xc0e1, 0xc0f1, 0xc1a1, - 0x4220, 0xc184, 0xc140, 0x1600, - 0x7081, 0x8000, 0x0012, 0x080f, - 0x0064, 0x4040, 0x087a, 0x0220, - 0xc100, 0x7487, 0xc0d1, 0x7fe0, - 0xc0a4, 0x78e0, 0x791d, 0x793d, - 0x793d, 0x793d, 0x793d, 0x793d, - 0x215f, 0x0802, 0x219a, 0x0001, - 0x7822, 0x6058, 0x7fe0, 0x780e, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a4, - 0x46cb, 0x8000, 0x0670, 0x6e04, - 0x0f56, 0x0060, 0x4318, 0x70ed, - 0x70ad, 0xf003, 0x71a5, 0x0d1d, - 0x10b2, 0x708d, 0xf00d, 0x245a, - 0x1641, 0x255a, 0x1c80, 0x6038, - 0x09a2, 0x01a0, 0x7063, 0x7185, - 0x0cef, 0x9092, 0xf1f0, 0x40c3, - 0x8000, 0x0661, 0x1e00, 0x16c0, - 0xa8e1, 0xa8e0, 0x0b66, 0x0060, - 0xa8ee, 0x0b6e, 0x0060, 0x4508, - 0x4100, 0x096e, 0x01e0, 0x40a1, - 0x266f, 0x10c3, 0x8e18, 0xe809, - 0x41c3, 0x0140, 0x0000, 0x0f4a, - 0xffef, 0xd80a, 0xf01e, 0x8e00, - 0x45cb, 0x001e, 0x8480, 0x41c3, - 0x05f5, 0xe100, 0x2044, 0x808f, - 0x40a1, 0x20ca, 0x0041, 0x0842, - 0x0120, 0x702c, 0x70f5, 0x40c3, - 0x3b9a, 0xca00, 0x25ca, 0x1001, - 0x40a1, 0x1e00, 0x70c5, 0x9004, - 0x00c0, 0x0826, 0x0120, 0x702c, - 0x1600, 0x7080, 0x8000, 0x000d, - 0x080d, 0x001e, 0x1e00, 0x7045, - 0x9004, 0x00c0, 0x47cb, 0x8000, - 0x0578, 0x8f00, 0xe804, 0x8e00, - 0x0817, 0x00de, 0x0c82, 0xffcf, - 0x40c3, 0x000f, 0x4240, 0x0ff2, - 0x00e0, 0x702c, 0xf054, 0x0c86, - 0x0000, 0x40c3, 0x0000, 0x0a00, - 0x08a2, 0xffef, 0xd9ff, 0x70ad, - 0xd8ff, 0x702c, 0x754c, 0x736c, - 0xc543, 0xc542, 0xc541, 0x099a, - 0xffef, 0xc540, 0x8f01, 0x0825, - 0x00b4, 0x702c, 0x7104, 0x781d, - 0xb862, 0x780f, 0xc542, 0xc541, - 0xc540, 0xc043, 0xd8ff, 0x754c, - 0x0976, 0xffef, 0x726c, 0x8f01, - 0xc542, 0xc541, 0xc540, 0xc043, - 0xd8ff, 0x702c, 0x754c, 0x0962, - 0xffef, 0xdb0c, 0x8f01, 0xd908, - 0x754c, 0xc043, 0xd8ff, 0x726c, - 0xc542, 0xc541, 0x094a, 0xffef, - 0xc540, 0x0b96, 0xffef, 0xd8ff, - 0x8f01, 0x702c, 0x754c, 0xc043, + 0xc041, 0x133f, 0x0080, 0xe00f, + 0xad00, 0x780f, 0x099e, 0x0060, + 0x2314, 0x0000, 0x2340, 0x30c1, + 0xad22, 0xad01, 0x6a03, 0x780f, + 0x098a, 0x0060, 0x2314, 0x0000, + 0xc100, 0xb963, 0xad24, 0xad03, + 0x6f0b, 0x780f, 0x0976, 0x0060, + 0x2314, 0x0000, 0xad05, 0x1215, + 0x3603, 0xc816, 0x262f, 0xf2c7, + 0x1a14, 0x30d8, 0x5070, 0xf20b, + 0xc101, 0x0913, 0x015e, 0x700c, + 0xdb0f, 0x1a18, 0x3019, 0x1a14, + 0x33d9, 0x1214, 0x3081, 0xe92c, + 0x1215, 0x3082, 0x712c, 0xdd64, + 0x2244, 0x07cc, 0x4220, 0x2602, + 0x3381, 0xec06, 0xbc61, 0x0c01, + 0x1031, 0x7a5b, 0x2904, 0x0081, + 0x234e, 0x3042, 0x7124, 0x7c2f, + 0xc100, 0x6159, 0x7a2f, 0x0a13, + 0x0325, 0xd964, 0x4181, 0x219f, + 0x0901, 0x2905, 0x0081, 0x7b2c, + 0x0c11, 0x10a5, 0x1a14, 0x30d8, + 0x229f, 0x0901, 0x2a05, 0x030d, + 0x78ac, 0x5070, 0xc0a2, 0x1404, + 0x341b, 0xc6c6, 0x704c, 0x41c3, + 0x9003, 0xe004, 0xb140, 0xb15d, + 0x1e00, 0x7004, 0x9009, 0xe1e0, + 0x40c3, 0x9003, 0xff64, 0xb040, + 0x7fe0, 0x1804, 0x0045, 0x78e0, + 0x0917, 0x03f0, 0x228a, 0x0fc7, + 0x714c, 0xb9c4, 0xe905, 0xb961, + 0x09ff, 0x8031, 0x7a5b, 0x201a, + 0x0f80, 0x0000, 0x2000, 0x2005, + 0x0f81, 0x9002, 0x016a, 0x224f, + 0x0240, 0x1900, 0x0005, 0xb140, + 0xb100, 0xb140, 0x7fe0, 0x1900, + 0x0005, 0x78e0, 0x260a, 0x3000, + 0x706c, 0xf00d, 0x230a, 0x0780, + 0xbcc4, 0xec06, 0xbc61, 0x0c01, + 0x1031, 0x7b7d, 0x7124, 0xbbc0, + 0x631b, 0x7c2f, 0x0ae9, 0x8325, + 0x786f, 0x7ee0, 0xc0f1, 0x4200, + 0x0cf2, 0x0060, 0x4020, 0x795b, + 0x2144, 0x0181, 0xb9c4, 0xe906, + 0xb961, 0x0901, 0x0031, 0x781d, + 0xc0d1, 0x7fe0, 0xb8c1, 0x78e0, + 0xc0f1, 0xc1a4, 0x700c, 0x0a4a, + 0xffef, 0xd9ff, 0x700c, 0xc043, + 0xc042, 0xc041, 0xc040, 0xd8ff, + 0xd990, 0x754c, 0x0b26, 0xffef, + 0xdb40, 0x700c, 0x0ac2, 0xffef, + 0x712c, 0xc0a4, 0xc0d1, 0x7ee0, + 0xc3e1, 0xc2e1, 0xc1e1, 0xc0e1, + 0xc0f1, 0xc1a1, 0x4220, 0xc184, + 0xc140, 0x1600, 0x7081, 0x8000, + 0x0012, 0x080f, 0x0064, 0x4040, + 0x0d16, 0x0220, 0xc100, 0x7487, + 0xc0d1, 0x7fe0, 0xc0a4, 0x78e0, + 0x791d, 0x216c, 0x0102, 0x206c, + 0x0141, 0x7822, 0x621a, 0x7fe0, + 0x784e, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1a6, 0x40c3, 0x8000, + 0x0580, 0x093a, 0x00a0, 0x4318, + 0x70ad, 0x43a9, 0xf00d, 0x235a, + 0x0641, 0x235a, 0x1c80, 0x6038, + 0x0be2, 0x01a0, 0x7063, 0x7164, + 0x0bef, 0x8094, 0x7165, 0x0bfd, + 0x90b4, 0x706c, 0x40c3, 0x8000, + 0x06cd, 0xa8ae, 0xa8a1, 0xa8a0, + 0x40c3, 0x8000, 0x06dc, 0x0cda, + 0x0060, 0x1800, 0x06c0, 0x0cde, + 0x0060, 0x4508, 0x4100, 0x0b92, + 0x01e0, 0x40a1, 0x266f, 0x10c3, + 0x8e18, 0xe809, 0x41c3, 0x014a, + 0x0000, 0x0f52, 0xffef, 0xd80a, + 0xf01e, 0x8e00, 0x47cb, 0x001e, + 0x8480, 0x41c3, 0x05f5, 0xe100, + 0x2044, 0x808d, 0x40e1, 0x20ca, + 0x0041, 0x0a46, 0x0120, 0x702c, + 0x70b5, 0x40c3, 0x3b9a, 0xca00, + 0x27ca, 0x1001, 0x40e1, 0x1e00, + 0x70c5, 0x9004, 0x00c0, 0x0a2a, + 0x0120, 0x702c, 0x1600, 0x7080, + 0x8000, 0x000d, 0x080d, 0x001e, + 0x1e00, 0x7045, 0x9004, 0x00c0, + 0x47cb, 0x8000, 0x05e4, 0x8f00, + 0x7014, 0xf2d3, 0x8e00, 0xb8e3, + 0xf4cf, 0x0d42, 0x0000, 0x40c3, + 0x0000, 0x0a00, 0x08fa, 0xffef, + 0xd9ff, 0x70ad, 0xd8ff, 0x702c, + 0x754c, 0x736c, 0xc543, 0xc542, + 0xc541, 0x09da, 0xffef, 0xc540, + 0x8f01, 0x0825, 0x00b4, 0x702c, + 0x7104, 0x781d, 0xb862, 0x780f, + 0xc043, 0xd8ff, 0x754c, 0x726c, + 0xc542, 0xc541, 0x09b6, 0xffef, + 0xc540, 0x8f01, 0xc043, 0xd8ff, + 0x702c, 0x754c, 0xdb0c, 0xc542, + 0xc541, 0x09a2, 0xffef, 0xc540, + 0x8f01, 0xd908, 0x754c, 0xc043, 0xd8ff, 0x726c, 0xc542, 0xc541, - 0x092e, 0xffef, 0xc540, 0x0b5e, - 0xffef, 0xd8ff, 0x0aea, 0x01c0, - 0xd896, 0x08be, 0xffef, 0x712c, - 0x0d22, 0x0000, 0x8e00, 0x083f, - 0x001e, 0x43c3, 0x9004, 0x0014, - 0x9320, 0xca01, 0x080d, 0x01b0, - 0x214f, 0x0082, 0x080b, 0x00d1, - 0x2185, 0x0108, 0x4220, 0xb340, - 0xd823, 0x42c3, 0x9005, 0xe034, - 0xb200, 0x702c, 0x40c3, 0x9003, - 0xe034, 0xb020, 0xb8b1, 0xb020, - 0xb221, 0xddff, 0x4063, 0x702c, - 0x228a, 0x0dff, 0x776c, 0x0cea, - 0x0260, 0xc540, 0x774c, 0x4063, - 0x712c, 0x4340, 0x0cda, 0x0260, - 0xc540, 0xc0a4, 0x1404, 0x341b, - 0xc6c6, 0x78e0, 0xc2e4, 0x70ad, - 0xbd9a, 0x70cd, 0xd8ff, 0x41a1, - 0x0db6, 0xffef, 0x1a0c, 0x3382, - 0x0f8e, 0x0180, 0x254f, 0x1401, - 0x0da6, 0xffef, 0xd8ff, 0x0d9a, - 0x01e0, 0xd8ff, 0x26ab, 0x10c4, + 0x098a, 0xffef, 0xc540, 0x0bd2, + 0xffef, 0xd8ff, 0x8f01, 0x702c, + 0x754c, 0xc043, 0xd8ff, 0x726c, + 0xc542, 0xc541, 0x096e, 0xffef, + 0xc540, 0x0b9a, 0xffef, 0xd8ff, + 0x1600, 0x70c0, 0x8000, 0x000e, + 0x08ed, 0x0013, 0x1303, 0x3080, + 0xc044, 0xd825, 0xb89f, 0x88c0, + 0x1033, 0x008d, 0x0bbe, 0x0020, + 0x730c, 0x8f01, 0x2605, 0x1341, + 0x704c, 0xc043, 0x265f, 0x1100, + 0x71cd, 0x726c, 0xc641, 0x7d05, + 0x7d25, 0x78af, 0xc045, 0xc042, + 0xc004, 0x702c, 0x781d, 0x781d, + 0x2044, 0x0800, 0x2045, 0x0180, + 0xc040, 0x0912, 0xffef, 0xd8ff, + 0x8f01, 0x702c, 0x704c, 0xc043, + 0xd8ff, 0x726c, 0x4528, 0xc142, + 0xc641, 0x08fa, 0xffef, 0x1c00, + 0x30c1, 0x8f01, 0x702c, 0x704c, + 0xc043, 0xc005, 0x726c, 0xc641, + 0xc042, 0xc004, 0x781d, 0x2044, + 0x0800, 0x2045, 0x0580, 0xc040, + 0x08d2, 0xffef, 0xd8ff, 0x8f01, + 0x702c, 0x704c, 0xc043, 0xc004, + 0x726c, 0xc542, 0xb8c5, 0xc040, + 0xd8ff, 0x08ba, 0xffef, 0xc641, + 0x40c3, 0x0000, 0x2710, 0x08d2, + 0x0120, 0xd90a, 0x4608, 0x208a, + 0x0e0f, 0x41c3, 0x014b, 0x0000, + 0x0dda, 0xffaf, 0x263c, 0x1000, + 0x78dd, 0x781d, 0x7704, 0x7e0f, + 0xf81a, 0xc643, 0xc542, 0xc541, + 0x0882, 0xffef, 0xc540, 0xf817, + 0xc643, 0xc542, 0xc541, 0x0876, + 0xffef, 0xc540, 0x0c1a, 0x01c0, + 0xd896, 0x080e, 0xffef, 0x712c, + 0x0cfe, 0x0000, 0xf009, 0x0af2, + 0xffcf, 0x40c3, 0x000f, 0x4240, + 0x0856, 0x0120, 0x702c, 0xddff, + 0x4063, 0x702c, 0x228a, 0x0dff, + 0x776c, 0x095a, 0x02a0, 0xc540, + 0x774c, 0x4063, 0x712c, 0x4340, + 0x094a, 0x02a0, 0xc540, 0xc0a6, + 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0xd8ff, 0x702c, 0x704c, 0x726c, + 0x7ee0, 0x78e0, 0xc2e4, 0x70ad, + 0xbd9a, 0xd8ff, 0x41a1, 0x70cd, + 0x0cfa, 0xffef, 0x1a10, 0x3003, + 0x08ee, 0x01c0, 0x254f, 0x1401, + 0x0cea, 0xffef, 0xd8ff, 0x0976, + 0x0220, 0xd8ff, 0x26ab, 0x10c4, 0x7fff, 0xf000, 0xc2e6, 0x1cfc, - 0xb6c8, 0x2482, 0x3803, 0x1c84, - 0x3000, 0x1600, 0x7080, 0x8000, - 0x0012, 0x7414, 0x01ee, 0x000d, - 0x1600, 0x7080, 0x8000, 0x0665, - 0xc05d, 0x700c, 0xc05e, 0xf005, - 0xc01d, 0x7104, 0xc05d, 0xc01d, - 0x780f, 0x1c80, 0x3000, 0x40c3, - 0x8000, 0x0665, 0x8821, 0x1480, - 0x3000, 0x7030, 0x700c, 0xc05f, - 0xf7c5, 0xf0dd, 0xc01f, 0x7104, - 0xc05f, 0xc01f, 0x7b0f, 0x0bd5, - 0x8255, 0x1484, 0x3000, 0x08f1, - 0x80ee, 0xd941, 0xc01e, 0x780f, - 0x790a, 0x1c88, 0x3000, 0x2100, - 0x0f80, 0x8000, 0x07dc, 0x885e, - 0x2132, 0x0f80, 0x8000, 0x07fc, - 0x41c3, 0x0016, 0x0004, 0xc041, - 0xc240, 0x1480, 0x3002, 0x0d02, - 0xffef, 0x740c, 0x71ad, 0xf0af, - 0x1488, 0x3000, 0x209f, 0x0041, - 0x6119, 0x40c3, 0x8000, 0x07bc, - 0x603a, 0x6108, 0x8a62, 0x1c90, - 0x3000, 0x8a04, 0x1c8c, 0x30c0, - 0x1c94, 0x3000, 0x8a06, 0x1c98, - 0x3000, 0x8a08, 0x1c9c, 0x3000, - 0x8a0a, 0x1ca0, 0x3000, 0x8a0c, - 0x1ca4, 0x3000, 0x8a0e, 0x1ca8, - 0x3000, 0x8a10, 0x1cac, 0x3000, - 0x8a12, 0x1cb0, 0x3000, 0x8a14, - 0x1cb4, 0x3000, 0x8a16, 0x1cb8, - 0x3000, 0x8a18, 0x1cbc, 0x3000, - 0x8a1a, 0x1cc0, 0x3000, 0x8a1c, - 0x1cc4, 0x3000, 0x8a1e, 0x1cc8, - 0x3000, 0x40c3, 0x8000, 0x07dc, - 0x6108, 0x1ccc, 0x3000, 0x2100, - 0x0f80, 0x8000, 0x07dc, 0x8822, - 0x100a, 0x009b, 0x100c, 0x009f, - 0x1cd0, 0x3040, 0x8824, 0x100e, - 0x008b, 0x8870, 0x1cd4, 0x3040, - 0x8826, 0x8892, 0x88d4, 0x1cd8, - 0x3040, 0x8828, 0x88f6, 0x8858, - 0x1cdc, 0x3040, 0x883a, 0x881c, - 0xc05c, 0x14dc, 0x3000, 0xc15b, - 0xc25a, 0xc759, 0xc658, 0xc457, - 0xc356, 0x1c54, 0x32c0, 0x1c50, - 0x37c0, 0x1c4c, 0x36c0, 0xc052, - 0x14d8, 0x3000, 0x1490, 0x3002, - 0x148c, 0x3003, 0xc051, 0x14d4, - 0x3000, 0x41c3, 0x0014, 0x001f, - 0xc050, 0x14d0, 0x3000, 0xc04f, - 0x14cc, 0x3000, 0xc04e, 0x14c8, - 0x3000, 0xc04d, 0x14c4, 0x3000, - 0xc04c, 0x14c0, 0x3000, 0xc04b, - 0x14bc, 0x3000, 0xc04a, 0x14b8, - 0x3000, 0xc049, 0x14b4, 0x3000, - 0xc048, 0x14b0, 0x3000, 0xc047, - 0x14ac, 0x3000, 0xc046, 0x14a8, - 0x3000, 0xc045, 0x14a4, 0x3000, - 0xc044, 0x14a0, 0x3000, 0xc043, - 0x149c, 0x3000, 0xc042, 0x1498, - 0x3000, 0xc041, 0x1494, 0x3000, - 0xc040, 0x0ba6, 0xffef, 0x740c, - 0x77a5, 0x212f, 0x8345, 0x06a4, - 0xffc3, 0xc01e, 0x7104, 0xc05e, - 0xf126, 0x2480, 0x3803, 0x1404, + 0xb6c8, 0xc1a5, 0xc044, 0x1600, + 0x7080, 0x8000, 0x0012, 0x086d, + 0x0175, 0x700c, 0x43db, 0x8000, + 0x06d1, 0x1300, 0x308f, 0xc042, + 0xf026, 0x70cd, 0xf021, 0xc004, + 0x083d, 0x03ae, 0x43c1, 0xc002, + 0xc203, 0x7d0f, 0x259f, 0x1041, + 0x40c3, 0x8000, 0x071e, 0x6509, + 0x60b8, 0x8802, 0xc140, 0x41c3, + 0x0016, 0x0004, 0xc041, 0x0c86, + 0xffef, 0x740c, 0x2340, 0x33c0, + 0x082a, 0x0020, 0x60b8, 0xc002, + 0x7104, 0xc042, 0x71c5, 0x0ec3, + 0x9254, 0x71e5, 0x1301, 0x3081, + 0x78ef, 0x09b3, 0x8025, 0xc043, + 0xc0a5, 0x1404, 0x341b, 0xc6c6, + 0xc2e6, 0x1cfc, 0xb6c8, 0x2482, + 0x3b04, 0x68a1, 0x716f, 0xf0ac, + 0x8d62, 0x15ff, 0x9480, 0x152d, + 0x109f, 0x152f, 0x109e, 0x1cd8, + 0x3000, 0x8d05, 0x1531, 0x108b, + 0x1533, 0x108f, 0x1cdc, 0x3000, + 0x8d07, 0x1535, 0x108e, 0x1537, + 0x108c, 0x1ce0, 0x3000, 0x8d09, + 0x1539, 0x1082, 0x153b, 0x1081, + 0x1ce4, 0x3000, 0x8d0b, 0x1ce8, + 0x3000, 0x8d0d, 0x1cec, 0x3000, + 0x8d0f, 0x1cf0, 0x3000, 0x8d11, + 0x1cf4, 0x3000, 0x8d13, 0x1cf8, + 0x3000, 0x8d15, 0x1cfc, 0x3000, + 0x8d17, 0x1c40, 0x3018, 0x8d19, + 0x1c41, 0x3018, 0x8d1b, 0x1c42, + 0x3018, 0x8d1d, 0x1c43, 0x3018, + 0x8d1f, 0x1c44, 0x3018, 0x1521, + 0x1080, 0x1c45, 0x3018, 0x1523, + 0x1080, 0x1c46, 0x3018, 0x1525, + 0x1080, 0x1c47, 0x3018, 0x1527, + 0x1080, 0x1c48, 0x3018, 0x1529, + 0x1080, 0x1c49, 0x3018, 0x152b, + 0x1080, 0x1c4a, 0x3018, 0x153d, + 0x1080, 0xc25a, 0x14d8, 0x3002, + 0xc05c, 0x144a, 0x3600, 0xc15b, + 0x41c3, 0x0014, 0x001f, 0xc053, + 0x1449, 0x3600, 0xc459, 0xc658, + 0xc052, 0x1448, 0x3600, 0xc757, + 0x1c58, 0x32c0, 0xc051, 0x1447, + 0x3600, 0x1c54, 0x3780, 0x1c50, + 0x37c0, 0xc050, 0x1446, 0x3600, + 0xc04f, 0x1445, 0x3600, 0xc04e, + 0x1444, 0x3600, 0xc04d, 0x1443, + 0x3600, 0xc04c, 0x1442, 0x3600, + 0xc04b, 0x1441, 0x3600, 0xc04a, + 0x1440, 0x3600, 0xc049, 0x14fc, + 0x3000, 0xc048, 0x14f8, 0x3000, + 0xc047, 0x14f4, 0x3000, 0xc046, + 0x14f0, 0x3000, 0xc045, 0x14ec, + 0x3000, 0xc044, 0x14e8, 0x3000, + 0xc043, 0x14e4, 0x3000, 0xc042, + 0x14e0, 0x3000, 0xc041, 0x14dc, + 0x3000, 0xc040, 0x0af6, 0xffef, + 0x740c, 0x7767, 0x7077, 0x06ac, + 0xffca, 0x2480, 0x3b04, 0x1404, 0x341b, 0xc6c6, 0xc0f1, 0x2482, - 0x3d02, 0xc080, 0x702c, 0x0fb6, - 0x00e0, 0xdab4, 0x41c3, 0x0189, - 0x0000, 0x0b6e, 0xffef, 0x750c, - 0xd840, 0xc180, 0x0af2, 0x0160, + 0x3d02, 0xc080, 0x702c, 0x08f6, + 0x0120, 0xdab4, 0x41c3, 0x0195, + 0x0000, 0x0aca, 0xffef, 0x750c, + 0xd840, 0xc180, 0x0c9a, 0x0160, 0xda09, 0x700c, 0xd909, 0xc280, - 0x0c76, 0xffaf, 0x726c, 0xd830, - 0xc180, 0x0ade, 0x0160, 0xda09, - 0x710c, 0xd909, 0xc280, 0x0c62, + 0x0c0e, 0xffaf, 0x726c, 0xd830, + 0xc180, 0x0c86, 0x0160, 0xda09, + 0x710c, 0xd909, 0xc280, 0x0bfa, 0xffaf, 0x726c, 0x2480, 0x3d02, 0xc0d1, 0x7ee0, 0x1600, 0x7081, 0x8000, 0x0012, 0x7534, 0x20e0, - 0x07c5, 0x7fe0, 0x1a0c, 0x3002, - 0xc0e4, 0x43c3, 0x8000, 0x0663, - 0x41c3, 0x9004, 0x00e4, 0x083f, - 0x00b1, 0xdc25, 0x8b86, 0x789b, - 0xab00, 0x8b47, 0x785b, 0x7104, - 0xab01, 0xab82, 0xab43, 0x704c, - 0xb140, 0x1600, 0x7080, 0x8000, - 0x0058, 0xb101, 0x41c3, 0x9004, - 0x00f6, 0xd8ff, 0xb140, 0xb144, - 0x1906, 0x03c5, 0x19fe, 0x8004, - 0xc4c4, 0x8ba4, 0x46cb, 0x9004, - 0x0030, 0x7abb, 0x0825, 0x0071, - 0xbc9f, 0xab40, 0x8b05, 0x7a1b, - 0x7144, 0xab41, 0xaba2, 0xab03, - 0x9640, 0x8c00, 0x0a29, 0x0171, - 0xb100, 0x8c00, 0xb101, 0xf012, - 0xab40, 0x8b07, 0x7a1b, 0x7144, - 0xab41, 0xaba2, 0xab03, 0x8c00, - 0xb100, 0x9600, 0x0827, 0x0151, - 0x8c00, 0xf015, 0x1902, 0x0005, - 0x40c3, 0x9004, 0x00fe, 0xd9ff, - 0x1800, 0x03c5, 0x18f8, 0x8044, - 0x18f6, 0x8005, 0x18fe, 0x8005, - 0xc4c4, 0x1600, 0x7080, 0x8000, - 0x0058, 0xb101, 0x702c, 0x40c3, - 0x9004, 0x00f6, 0xb020, 0xb024, - 0x18fe, 0x8044, 0xb023, 0xc4c4, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a2, + 0x07c5, 0x7fe0, 0x1a10, 0x3002, + 0xc0e6, 0x44cb, 0x9004, 0x00e4, + 0x42c3, 0x8000, 0x06cf, 0x0835, + 0x00b0, 0x716c, 0x7114, 0xd825, + 0x46cb, 0x9004, 0x0030, 0xb89f, + 0xf42a, 0x8a25, 0xaa23, 0x8aa4, + 0x7b34, 0xaaa2, 0x96c0, 0x88e0, + 0x78bb, 0xb4e0, 0xaa00, 0x0e5f, + 0x1171, 0xaa61, 0xb4e1, 0xf02d, + 0x700c, 0xb400, 0x1600, 0x7081, + 0x8000, 0x0058, 0xb421, 0x44cb, + 0x9004, 0x00f6, 0xb400, 0xb404, + 0x1c06, 0x13c5, 0x8a27, 0xaa23, + 0x8a06, 0x7b34, 0xaa02, 0x781b, + 0xaa00, 0xd8ff, 0xaa61, 0x1cfe, + 0x9004, 0xc4c6, 0x88a0, 0xb4a0, + 0x8a27, 0xaa23, 0x8a04, 0x7b34, + 0xaa02, 0x781b, 0xaa00, 0xaa61, + 0x9600, 0x0829, 0x0151, 0xb4a1, + 0xf015, 0x1c02, 0x1005, 0x41c3, + 0x9004, 0x00fe, 0xd8ff, 0x1900, + 0x03c5, 0x19f8, 0x8004, 0x19f6, + 0x8005, 0x19fe, 0x8005, 0xc4c6, + 0x1600, 0x7080, 0x8000, 0x0058, + 0xb401, 0x702c, 0x40c3, 0x9004, + 0x00f6, 0xb020, 0xb024, 0x18fe, + 0x8044, 0xb023, 0xc4c6, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a1, 0x46cb, 0x9004, 0x00f4, 0x9600, - 0x6ea2, 0x702c, 0xc040, 0x9500, - 0xc041, 0xd8ff, 0x95e3, 0x1508, - 0x111b, 0x1d08, 0x13c5, 0xb500, + 0x6ea2, 0x702c, 0xc040, 0xd8ff, + 0x1500, 0x111b, 0x95e3, 0x1508, + 0x110b, 0x1d08, 0x13c5, 0xb500, 0x1d06, 0x13c5, 0xb600, 0x1600, - 0x7080, 0x8000, 0x0660, 0x1e00, + 0x7080, 0x8000, 0x06cc, 0x1e00, 0x7004, 0x9008, 0x0190, 0x40c3, - 0x0000, 0x2af8, 0x0b22, 0x00e0, - 0x1efa, 0x9485, 0xc000, 0x41c3, - 0x8000, 0x0578, 0xb600, 0xb5e3, - 0xc001, 0xb500, 0x1d08, 0x16c4, - 0x1600, 0x7080, 0x8000, 0x0660, - 0x781d, 0xb8a0, 0x2080, 0x0fc3, - 0xa901, 0xc0a2, 0x1404, 0x341b, - 0xc6c6, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1a1, 0x43db, 0x9004, - 0x01e0, 0x1300, 0x3101, 0x1600, - 0x7080, 0x8000, 0x0004, 0x205f, - 0x0100, 0x2044, 0x07c2, 0x4020, - 0xea05, 0xba61, 0x0aff, 0x8031, - 0x781d, 0xb8c3, 0x702c, 0x0811, - 0x0070, 0xc140, 0x0809, 0x01b0, - 0x710c, 0xc040, 0x700c, 0x0aaa, - 0x00e0, 0xd940, 0x71ed, 0x45cb, - 0x9005, 0xe112, 0x1dde, 0x13d4, - 0x1da6, 0x93d4, 0x13b6, 0xb10e, - 0xc000, 0xe827, 0x0e4b, 0x13bf, - 0x2684, 0x1c07, 0x264f, 0x13c0, - 0xb500, 0x700c, 0xd91c, 0x0a7a, - 0x00e0, 0x1df6, 0x9205, 0x40c3, - 0x000f, 0x4240, 0x702c, 0x0a6a, - 0x00e0, 0x1df6, 0x9245, 0x13be, - 0xb100, 0x1df0, 0x9045, 0x1df6, - 0x9205, 0x209a, 0x0008, 0x2004, - 0x0f80, 0x0000, 0x3e00, 0x78c5, - 0xb88e, 0xb500, 0xf005, 0x1df6, - 0x9205, 0x1df0, 0x93c4, 0x1e00, - 0x73c4, 0x9013, 0xe050, 0x7487, + 0x0000, 0x2af8, 0x0c7a, 0x00e0, + 0x1efa, 0x9485, 0xc000, 0xb600, + 0xb5e3, 0x1d00, 0x16c4, 0x1d08, + 0x12c4, 0x1600, 0x7080, 0x8000, + 0x06cc, 0x781d, 0xb8a0, 0x6829, + 0x40c3, 0x8000, 0x05e4, 0xa821, + 0x7487, 0x1404, 0x341b, 0xc6c6, + 0xc2e6, 0x1cfc, 0xb6c8, 0x46cb, + 0x9004, 0x01e0, 0x9620, 0x1600, + 0x709b, 0x8000, 0x0004, 0x70ad, + 0x235f, 0x3100, 0x2044, 0x07c2, + 0x4020, 0xea06, 0xba61, 0x0a01, + 0x0031, 0x781d, 0xb8c3, 0x080f, + 0x0070, 0xc1a1, 0x0807, 0x0190, + 0x71ad, 0x700c, 0x0c0a, 0x00e0, + 0xd940, 0x47cb, 0x9005, 0xe112, + 0x1fde, 0x1055, 0x1fa6, 0x9055, + 0x16b6, 0x9100, 0xc040, 0xed37, + 0xc000, 0x086b, 0x03bf, 0x70ad, + 0xc000, 0xd91c, 0x2084, 0x0c07, + 0xc040, 0xb88f, 0xb700, 0x700c, + 0xbd8e, 0x0bd6, 0x00e0, 0x1ff6, + 0x9205, 0x40c3, 0x000f, 0x4240, + 0x702c, 0x0bc6, 0x00e0, 0x1ff6, + 0x9245, 0x16be, 0x9100, 0x1ff0, + 0x9045, 0x1ff6, 0x9205, 0x209a, + 0x0008, 0xc100, 0x231a, 0x3f9b, + 0x0020, 0x0000, 0x2004, 0x0f80, + 0x0000, 0x3e00, 0x7905, 0x254f, + 0x13c0, 0xb720, 0x7825, 0x2305, + 0x3f81, 0x9013, 0xe40c, 0xb100, + 0xf005, 0x1ff6, 0x9205, 0x1ff0, + 0x9045, 0x710c, 0x1e00, 0x7004, + 0x9013, 0xe050, 0x7487, 0x1404, + 0x341b, 0xc6c6, 0xc2e6, 0x1cfc, + 0xb6c8, 0x46cb, 0x9004, 0x00f4, + 0x6ea2, 0xd8ff, 0x1600, 0x111b, + 0x1500, 0x111e, 0x1506, 0x110b, + 0x95e4, 0x1d08, 0x13c5, 0xb500, + 0x1d06, 0x13c5, 0xb600, 0x700c, + 0x1e00, 0x7004, 0x9008, 0x0190, + 0xd834, 0x1efa, 0x9004, 0x40c3, + 0x0000, 0x2af8, 0x0b2a, 0x00e0, + 0x702c, 0x1e00, 0x16c4, 0x1d06, + 0x12c4, 0x1d00, 0x1784, 0xb5e4, 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a1, - 0x46cb, 0x9004, 0x00f4, 0x9600, - 0x6ea2, 0xc040, 0xd8ff, 0x1500, - 0x111b, 0x1506, 0x110b, 0x95e4, - 0x1d08, 0x13c5, 0xb500, 0x1d06, - 0x13c5, 0xb600, 0xd834, 0x1e00, - 0x7005, 0x9008, 0x0190, 0x1efa, - 0x9004, 0x40c3, 0x0000, 0x2af8, - 0x09e6, 0x00e0, 0x702c, 0xc000, - 0xb600, 0x1d06, 0x12c4, 0x1d00, - 0x16c4, 0xb5e4, 0x7487, 0x1404, - 0x341b, 0xc6c6, 0x8821, 0x8800, - 0x6038, 0x781d, 0x7fe0, 0x780f, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a3, - 0x4628, 0x7034, 0x1600, 0x7081, - 0x8000, 0x001a, 0x4508, 0xd8f0, - 0x20ca, 0x03e2, 0x4358, 0x08ce, - 0x0160, 0x790b, 0xf471, 0x1600, - 0x7080, 0x8000, 0x0001, 0x08db, - 0x00bf, 0x41a1, 0xca09, 0x4263, - 0xc042, 0xca0f, 0xdb40, 0x1a0f, - 0x30c3, 0xc041, 0x40c1, 0x0e4a, - 0x0020, 0x1a09, 0x3083, 0xee0b, - 0x40a1, 0x4163, 0x714c, 0xdb80, - 0x0bde, 0x0160, 0x1c00, 0x3001, - 0xf006, 0x40a1, 0x4163, 0x0e1e, - 0x0160, 0x704c, 0x40c3, 0x8000, - 0x0665, 0x1001, 0x008b, 0x88a0, - 0x704c, 0xdf7f, 0x70cd, 0xf003, - 0x71a5, 0x78af, 0x0b4f, 0x1024, - 0x708d, 0xf003, 0x7185, 0x788f, - 0x08f1, 0x8275, 0x706c, 0x0b33, - 0x300f, 0xf1fa, 0x209f, 0x0041, - 0x7164, 0x2014, 0x005f, 0x41c3, - 0x8000, 0x07bc, 0x2100, 0x07c0, - 0x8801, 0x2732, 0x3041, 0x7210, - 0x20ca, 0x008d, 0x7730, 0x21ca, - 0x03c5, 0x4728, 0x4200, 0x796f, - 0x09d5, 0x87f4, 0x78cf, 0x71c5, - 0xf1de, 0xca5d, 0xe208, 0xbf68, - 0x2209, 0x0000, 0x1a5f, 0x3002, - 0x7b0f, 0xca5c, 0x41c3, 0x0032, - 0x0002, 0x2708, 0x1000, 0x1a5e, - 0x3002, 0x7a0f, 0x0fb2, 0xffaf, - 0x740c, 0xc001, 0x1a0f, 0x3002, - 0xc002, 0x1a09, 0x3002, 0xc0a3, + 0x8821, 0x8800, 0x6119, 0x7fe0, + 0x783d, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1a3, 0x4528, 0x7034, + 0x1600, 0x7081, 0x8000, 0x001a, + 0x4308, 0xd8f0, 0x20ca, 0x03e2, + 0xc241, 0x0a5a, 0x0160, 0x782b, + 0xf463, 0x1600, 0x7080, 0x8000, + 0x0001, 0x08bf, 0x00bf, 0x4161, + 0xca0c, 0xc201, 0x1215, 0x309b, + 0xc042, 0x40a1, 0xdb40, 0x1a15, + 0x30c3, 0x0f7a, 0x0020, 0x1a0c, + 0x3083, 0xed0b, 0xc101, 0x4061, + 0x714c, 0xdb80, 0x0d56, 0x0160, + 0x1c00, 0x3001, 0xf006, 0xc101, + 0x4061, 0x0fa2, 0x0160, 0x704c, + 0x44cb, 0x8000, 0x06d1, 0x1401, + 0x109e, 0x8cc0, 0x70ad, 0xdf7f, + 0x43a9, 0xf01b, 0xc001, 0x082b, + 0x00ae, 0x702c, 0xf00c, 0x209f, + 0x0041, 0x7834, 0x6098, 0x8870, + 0x880f, 0x7124, 0x2508, 0x10cd, + 0x2709, 0x100f, 0x09eb, 0x87f4, + 0x202f, 0x02c7, 0x7165, 0x7144, + 0x0ad5, 0x8254, 0x71c5, 0x78cf, + 0x0ef9, 0xb025, 0x704c, 0xca65, + 0xe508, 0xbf68, 0x2009, 0x0343, + 0xca64, 0x1a67, 0x30c2, 0x41c3, + 0x0033, 0x0002, 0x2708, 0x1000, + 0x1a66, 0x3002, 0x7a0f, 0x0f16, + 0xffaf, 0x740c, 0xc002, 0x1a15, + 0x36c2, 0x1a0c, 0x3002, 0xc0a3, + 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0xc0e6, 0xc704, 0x090f, 0x07f4, + 0xc503, 0x700c, 0x1f00, 0x1003, + 0xf04c, 0x2014, 0x004e, 0x8e81, + 0x0cf3, 0x9084, 0x8ec0, 0x0aef, + 0x83a4, 0x2402, 0x109e, 0x2202, + 0x038b, 0x7074, 0x23ca, 0x1782, + 0x1f00, 0x12c2, 0x4328, 0xf014, + 0x0c2d, 0x10a4, 0x7765, 0x232f, + 0x12c7, 0x0b25, 0x1775, 0x7074, + 0xdcff, 0x24ca, 0x1062, 0x679f, + 0x7ced, 0x2014, 0x030c, 0x8cc0, + 0x8c81, 0x43e9, 0x0add, 0x83a5, + 0x272f, 0x12c5, 0x7074, 0xd8ff, + 0x20ca, 0x0061, 0x60f8, 0x7f0f, + 0x0f0d, 0x1770, 0x238a, 0x1fc3, + 0x0f15, 0x1051, 0x0c11, 0x1084, + 0x0a0d, 0x03a4, 0x7074, 0x23ca, + 0x1062, 0x7061, 0x7a0d, 0xca0c, + 0xeb03, 0x4a31, 0xf002, 0x7942, + 0x2044, 0x07c2, 0x4020, 0xea06, + 0xba61, 0x0a01, 0x0031, 0x781b, + 0xad00, 0xc4c6, 0xc2e6, 0x1cfc, + 0xb6c8, 0x4338, 0x1600, 0x7101, + 0x8000, 0x0006, 0x4348, 0x218c, + 0x864a, 0x0096, 0x002d, 0xc1a1, + 0x0dde, 0xffaf, 0x712c, 0x260a, + 0x3000, 0x235f, 0x1800, 0x70ad, + 0x68ec, 0x1600, 0x7080, 0x8000, + 0x0004, 0x201a, 0x0f8e, 0x0010, + 0x0000, 0xf033, 0x231a, 0x3f81, + 0x0000, 0x1000, 0x7825, 0x2005, + 0x0381, 0x40a1, 0x209a, 0x0004, + 0x7825, 0x781b, 0xb891, 0xb89c, + 0xb89f, 0xc040, 0x0e1e, 0xffaf, + 0x9000, 0xeb08, 0x2704, 0x1f81, + 0x0000, 0xfffc, 0x6038, 0xf003, + 0x78e2, 0x7a0e, 0x785c, 0x781c, + 0x781c, 0x781c, 0x4100, 0xd81b, + 0xb861, 0x0801, 0x0031, 0x793d, + 0x71a5, 0x6238, 0xb8c4, 0x4831, + 0x2a44, 0x0800, 0x209a, 0x0001, + 0x7905, 0xc000, 0xb020, 0x0da1, + 0x90b4, 0x2655, 0x3800, 0x7487, 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0xc2e6, 0x4718, 0xc705, 0x0963, - 0x07f5, 0xc004, 0x2714, 0x304c, - 0x8cc1, 0x0e57, 0x1084, 0x8ca0, - 0x0a4f, 0x0364, 0x2602, 0x108b, - 0x4ab4, 0x7074, 0x747d, 0xaf80, - 0x4728, 0xf00b, 0xddff, 0x25ca, - 0x1062, 0x64bf, 0x7ced, 0x2714, - 0x330c, 0x8ca0, 0x8cc1, 0x0a15, - 0x0364, 0x7ced, 0x0e0d, 0x10a4, - 0x7def, 0x0de5, 0x97f4, 0x7074, - 0x7074, 0xdbff, 0x714c, 0x727c, - 0x1209, 0x3083, 0x629a, 0x7a4d, - 0xf206, 0x4a31, 0xf005, 0x702c, - 0xaf20, 0xf009, 0x7942, 0xbbc4, - 0xeb05, 0xbb61, 0x0bff, 0x8031, - 0x793b, 0xa820, 0xc6c6, 0x78e0, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a2, - 0xc140, 0x1600, 0x7101, 0x8000, - 0x0006, 0xc341, 0x218c, 0x864a, - 0x0098, 0x002d, 0x4340, 0x0ea6, - 0xffaf, 0x712c, 0x4318, 0x235f, - 0x0800, 0x45cb, 0x0000, 0xfffc, - 0x70cd, 0x60bc, 0x1600, 0x7080, - 0x8000, 0x0004, 0x201a, 0x0f83, - 0x0010, 0x0000, 0xf016, 0x7a0e, - 0x785c, 0x781c, 0x781c, 0x781c, - 0x4100, 0xd81b, 0xb861, 0x0801, - 0x0031, 0x793d, 0x6238, 0xb8c4, - 0x4831, 0x2a44, 0x0800, 0x71c5, - 0x209a, 0x0001, 0x7825, 0xb700, - 0x0e41, 0x1093, 0xc000, 0x201a, - 0x0f81, 0x0000, 0x1000, 0x2355, - 0x3800, 0x7825, 0x2005, 0x00c1, - 0x40c1, 0x209a, 0x0004, 0x7825, - 0x7f1b, 0xbf91, 0xbf9c, 0xbf9f, - 0x0eae, 0xffaf, 0x9700, 0xc101, - 0x7810, 0xe906, 0x2404, 0x1341, - 0x6038, 0xf1cf, 0x7882, 0xf1cd, - 0xc0a2, 0x1404, 0x341b, 0xc6c6, 0xc2e2, 0x45cb, 0x9004, 0x0006, - 0xd820, 0x0f16, 0x00a0, 0x1d00, + 0xd820, 0x084e, 0x00e0, 0x1d00, 0x1045, 0x1d00, 0x1005, 0xc6c2, 0xc2e6, 0x45cb, 0x9004, 0x01c6, - 0x95c0, 0x208a, 0x0fdf, 0x78c4, - 0x204f, 0x02cf, 0xb88a, 0xb88b, - 0xb5e0, 0xb500, 0x0eea, 0x00a0, - 0xd80a, 0xb5e0, 0x9501, 0xbecb, - 0xb5c0, 0xb8c9, 0xc6c6, 0x78e0, - 0x0689, 0xff6f, 0x4040, 0x78e0, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a5, - 0xc243, 0xc141, 0x702c, 0x744c, - 0xc344, 0x0a3a, 0x00e0, 0xc040, - 0xc001, 0xc703, 0x70ad, 0x7704, - 0x7114, 0xde80, 0xd880, 0x004c, - 0x002d, 0xc042, 0xf010, 0x8800, - 0x70b5, 0x41c3, 0x0052, 0x0001, - 0x26ca, 0x1001, 0x0de2, 0xff6f, - 0x2638, 0x1000, 0x70b5, 0x25ca, - 0x1061, 0x71e5, 0xc004, 0x7aef, - 0x0a1f, 0x0025, 0xd841, 0xc101, - 0x090f, 0x00b1, 0x784a, 0x70c3, - 0x8000, 0x07fa, 0xf1e5, 0x70c3, - 0x8000, 0x07fc, 0xf1e1, 0xc642, - 0x70ef, 0x70ed, 0xf003, 0xa901, - 0x71e5, 0xc403, 0x7eef, 0xddff, - 0x0e35, 0x17f4, 0x706c, 0xf044, - 0x2314, 0x100b, 0x41c3, 0x8000, - 0x07bc, 0x2100, 0x02c0, 0x8801, - 0x222f, 0x07c7, 0x7185, 0x2000, - 0x009b, 0x78af, 0x2309, 0x300d, - 0x2332, 0x1040, 0x621a, 0x786f, - 0x2208, 0x0003, 0xc004, 0x798f, - 0x0931, 0x0025, 0x4328, 0x239f, - 0x1041, 0x41c3, 0x8000, 0x07fb, - 0xc001, 0x2332, 0x1042, 0x0813, - 0x0070, 0x62da, 0x7161, 0x8921, - 0xd880, 0x2002, 0x005f, 0x784f, - 0x08a9, 0x87f4, 0x784d, 0xf00e, - 0x79af, 0x786f, 0x0815, 0x0065, - 0x202f, 0x07c7, 0xc100, 0x4b12, - 0x79d4, 0xa940, 0x4d10, 0xf1bd, - 0xc100, 0xd8ff, 0x79d4, 0xa900, - 0x700c, 0xf1b7, 0xc001, 0x228a, - 0x003e, 0x4140, 0x7114, 0xc002, - 0x21ca, 0x0001, 0xc000, 0x1840, - 0x0042, 0xc001, 0xc100, 0x7214, - 0xc002, 0x22ca, 0x0001, 0xc000, - 0x183e, 0x0082, 0x700c, 0x193f, - 0x0002, 0xc0a5, 0x1404, 0x341b, - 0xc6c6, 0x78e0, 0x7014, 0x1600, - 0x7080, 0x8000, 0x0004, 0x41c3, - 0x9004, 0x0160, 0x21c0, 0x0221, - 0x201a, 0x0f80, 0x0020, 0x0000, - 0x7825, 0x7fe0, 0x9000, 0x78e0, - 0x1600, 0x7100, 0x8000, 0x0022, - 0x7014, 0x7de0, 0x1600, 0x7100, - 0x9004, 0x0008, 0x7ee0, 0x78e0, - 0x42c3, 0x8000, 0x0661, 0x080f, - 0x0071, 0x7014, 0x8a40, 0xd853, - 0xf005, 0x700c, 0xf407, 0x8a40, - 0xd84f, 0xb89f, 0x7054, 0x20c0, - 0x0ce2, 0x7034, 0x7fe0, 0x20c0, - 0x00a1, 0x78e0, 0xc0f1, 0x4200, - 0x7114, 0x40c3, 0x8000, 0x0661, - 0xf404, 0x8820, 0xd84e, 0xf005, - 0xea88, 0x8820, 0xd84d, 0xb89f, - 0x7034, 0x20c0, 0x0ce2, 0xf008, - 0x712c, 0x700c, 0x0c4a, 0xff6f, - 0xb993, 0x700c, 0xc0d1, 0x7ee0, - 0xc2e6, 0x4308, 0x70ed, 0x70cd, - 0xf002, 0x71c5, 0x7dcf, 0x0d19, - 0x10b5, 0x40a1, 0x0e9e, 0x0060, - 0x4161, 0xe879, 0x78ef, 0x200f, - 0x034f, 0xf1f5, 0x78ef, 0xc6c6, - 0x40c3, 0x8000, 0x0670, 0x0499, - 0xff6f, 0x8000, 0x40c3, 0x8000, - 0x0670, 0x42c3, 0x8000, 0x0661, - 0x8020, 0x8a00, 0x205f, 0x0c80, - 0x6119, 0x8a01, 0x205f, 0x0640, - 0x6038, 0x8842, 0x700c, 0x05c5, - 0xffef, 0x702c, 0x702c, 0x708d, - 0xf002, 0x7185, 0x7a8f, 0x0a1d, - 0x0195, 0x08fb, 0x80ae, 0x2279, - 0x0003, 0x7164, 0x792f, 0x6179, - 0x7354, 0x21c0, 0x00a6, 0xf1f3, - 0x7fe0, 0x782f, 0xc2e6, 0x216f, - 0x0743, 0x4608, 0x8900, 0x2084, - 0x8001, 0x1a0e, 0x3002, 0x8952, - 0x1a12, 0x3098, 0x8973, 0x1a16, - 0x30d8, 0xf213, 0xee16, 0x7a5d, + 0x95c0, 0xbecb, 0x264f, 0x12cf, + 0x2605, 0x1f80, 0x0000, 0x0c00, + 0xb5e0, 0xb500, 0x0822, 0x00e0, + 0xd80a, 0xb5e0, 0x9501, 0xb5c0, + 0xb8c9, 0xc6c6, 0x0609, 0xff6f, + 0x4040, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1ac, 0xc249, 0xc144, + 0x702c, 0xda3e, 0xc34b, 0x0b5e, + 0x00e0, 0xc043, 0xc103, 0x700c, + 0x1940, 0x0002, 0x193f, 0x0002, + 0x193e, 0x0002, 0xc004, 0x7704, + 0x084f, 0x00b5, 0x238a, 0x3002, + 0xc609, 0x70ad, 0x238a, 0x3002, + 0xf01b, 0x79cc, 0xc004, 0x7214, + 0x40c3, 0x8000, 0x071e, 0xf404, + 0x6109, 0xf004, 0x6038, 0x8822, + 0x70b5, 0x23ca, 0x3041, 0x2138, + 0x06c0, 0x41c3, 0x0059, 0x0001, + 0x0d3a, 0xff6f, 0x42c1, 0x70b5, + 0x25ca, 0x1061, 0x71c5, 0xc00b, + 0x0ecb, 0x9024, 0xd941, 0x70ed, + 0x700c, 0xc045, 0xf08e, 0xc009, + 0x70cd, 0xc54a, 0xf03b, 0x0943, + 0x0004, 0xca02, 0x083b, 0x0044, + 0xc106, 0xc008, 0x7834, 0x41c3, + 0x8000, 0x06e0, 0x6029, 0x2000, + 0x0f80, 0x8000, 0x06e0, 0x8861, + 0xc005, 0x780f, 0x631b, 0x6038, + 0xc041, 0xc006, 0xc342, 0x41c3, + 0x005c, 0x0005, 0xc040, 0x740c, + 0x0c8a, 0xffaf, 0x43e1, 0xc106, + 0xc008, 0x7daf, 0x7ecf, 0x7834, + 0x41c3, 0x8000, 0x06e0, 0x2000, + 0x0f83, 0x8000, 0x06e0, 0x602a, + 0xc005, 0x8b61, 0x790f, 0x6338, + 0x70b1, 0x25ca, 0x1005, 0x6238, + 0x70d1, 0x26ca, 0x100d, 0xc007, + 0x7104, 0xc047, 0xc007, 0x7a0f, + 0xc00b, 0x0a57, 0x0025, 0xd841, + 0x784a, 0x43c3, 0x8000, 0x071f, + 0xc048, 0x6069, 0xc004, 0x0815, + 0x0070, 0x61f9, 0xc008, 0x6078, + 0x8861, 0xd880, 0x7862, 0xc045, + 0x782d, 0xc046, 0x7b2f, 0xca03, + 0x0b57, 0x87f4, 0x5042, 0x091f, + 0x0004, 0xca02, 0x0817, 0x0064, + 0x43e1, 0xc006, 0x41c3, 0x005d, + 0x0003, 0xc040, 0x0bfe, 0xffaf, + 0x740c, 0x70ad, 0xdeff, 0x79af, + 0x7bcf, 0x0b13, 0x0065, 0x704c, + 0xc005, 0x780f, 0x4912, 0x4b10, + 0xc04a, 0xc003, 0x78f4, 0xa841, + 0xc20a, 0xa840, 0x1203, 0x3082, + 0xc812, 0x081f, 0x0084, 0x1202, + 0x3082, 0x0a17, 0x0024, 0x740c, + 0xc140, 0x41c3, 0x005e, 0x0003, + 0x0bba, 0xffaf, 0x42e1, 0x71e5, + 0xe79f, 0x06e6, 0xffe5, 0xddff, + 0xc103, 0x700c, 0x228a, 0x003e, + 0x193f, 0x0002, 0xc004, 0x7214, + 0x4040, 0x20ca, 0x06c1, 0x193e, + 0x0002, 0xc004, 0x7114, 0xc003, + 0x22ca, 0x06c1, 0x1840, 0x0082, + 0x1203, 0x3081, 0xc812, 0x0815, + 0x0044, 0x1202, 0x3081, 0x090d, + 0x0004, 0x0f22, 0xffaf, 0xc003, + 0xc0ac, 0x1404, 0x341b, 0xc6c6, + 0x7014, 0x1600, 0x7080, 0x8000, + 0x0004, 0x41c3, 0x9004, 0x0168, + 0x21c2, 0x0222, 0x201a, 0x0f80, + 0x0020, 0x0000, 0x7905, 0x7fe0, + 0x9100, 0x78e0, 0x1600, 0x7100, + 0x8000, 0x0022, 0x7014, 0x7de0, + 0x1600, 0x7100, 0x9004, 0x0008, + 0x7ee0, 0x78e0, 0x42c3, 0x8000, + 0x06cd, 0x0813, 0x0070, 0x7014, + 0x700c, 0xf40b, 0x8a40, 0xd84f, + 0xf003, 0x8a40, 0xd853, 0xb89f, + 0x7054, 0x20c0, 0x0ce2, 0x7034, + 0x7fe0, 0x20c0, 0x00a1, 0x78e0, + 0xc0f1, 0x4200, 0x7114, 0x40c3, + 0x8000, 0x06cd, 0xf205, 0xea8c, + 0x8820, 0xd84d, 0xf003, 0x8820, + 0xd84e, 0xb89f, 0x7034, 0x20c0, + 0x0ce2, 0xf008, 0x712c, 0x700c, + 0x0b22, 0xff6f, 0xb993, 0x700c, + 0xc0d1, 0x7ee0, 0xc2e2, 0x260a, + 0x3000, 0x706d, 0x70ad, 0xf00b, + 0x0f2a, 0x0060, 0x210a, 0x0780, + 0xe804, 0x78af, 0x200f, 0x02cd, + 0x7165, 0x0bf1, 0x90b4, 0x4061, + 0x78af, 0xc6c2, 0x40c3, 0x8000, + 0x06dc, 0x036d, 0xff6f, 0x8000, + 0x40c3, 0x8000, 0x06dc, 0x42c3, + 0x8000, 0x06cd, 0x8020, 0x8a00, + 0x205f, 0x0c80, 0x6038, 0x8a21, + 0x215f, 0x0641, 0x6038, 0x8842, + 0x700c, 0x051d, 0xffef, 0x702c, + 0x704c, 0x706c, 0xf00c, 0x0817, + 0x00ae, 0x796f, 0xea03, 0x6b21, + 0x792f, 0x0a0b, 0x00f4, 0x6961, + 0x6963, 0x7144, 0x0aeb, 0x8194, + 0x7fe0, 0x786f, 0xc2e4, 0x1cfc, + 0xb6c8, 0x4318, 0xe806, 0x1600, + 0x7080, 0x8000, 0x0014, 0xf006, + 0x1600, 0x7080, 0x8000, 0x0015, + 0x791d, 0x793d, 0x1a12, 0x3002, + 0x256f, 0x1bc3, 0x793d, 0x8dc0, + 0x793d, 0x793d, 0x793d, 0x1a03, + 0x3382, 0x793d, 0x15f7, 0x908c, + 0x1a0b, 0x3042, 0x791d, 0x793d, + 0x793d, 0x793d, 0x793d, 0x793d, + 0x2144, 0x0042, 0x2053, 0x8141, + 0x776c, 0x4020, 0x20ca, 0x00c1, + 0x2484, 0x1001, 0x1a13, 0x3082, + 0x1a12, 0x3042, 0x1a0f, 0x3002, + 0x1a14, 0x3302, 0xe902, 0xea04, + 0x1a12, 0x3503, 0x0e0d, 0x1435, + 0x700c, 0x1a03, 0x3fc3, 0xf00e, + 0x79dd, 0x793d, 0x793d, 0x793d, + 0x6909, 0xbec3, 0x1a02, 0x3002, + 0x61d8, 0x1a03, 0x3382, 0x7704, + 0x1a02, 0x3002, 0xd820, 0xb89f, + 0x8840, 0x1a14, 0x3098, 0x8861, + 0x1a18, 0x30d8, 0xec14, 0x0b1d, + 0x3010, 0x7b7d, 0x7b7d, 0x7a5d, 0x7b7d, 0x7a5d, 0x7b7d, 0x7a5d, - 0x7b7d, 0x7a5d, 0x7b7d, 0x1a12, - 0x3098, 0x1a16, 0x30d8, 0x70ed, - 0x206f, 0x0ec3, 0xf00f, 0x70ed, - 0x206f, 0x0ec3, 0xee8b, 0xf008, - 0xbac3, 0xbbc3, 0x1a12, 0x3098, - 0x1a16, 0x30d8, 0x71ed, 0x206f, - 0x0f43, 0x8880, 0x7c9d, 0xbcc1, - 0x1a0f, 0x3302, 0x8800, 0x781d, - 0x781d, 0x781d, 0xb8c1, 0x1a09, - 0x3002, 0x8920, 0x710c, 0xb9c3, - 0xb9c4, 0xe906, 0xb961, 0x0901, - 0x0031, 0x781b, 0x5051, 0x41c3, - 0x002b, 0x0002, 0x0b1a, 0xffaf, - 0x740c, 0xc812, 0x70d5, 0xdd50, - 0x5053, 0xc816, 0x1a5c, 0x3003, - 0x5054, 0xd87f, 0x751c, 0x1a5d, - 0x3342, 0x1600, 0x7080, 0x8000, - 0x001a, 0x70d5, 0x791b, 0x781d, - 0x781d, 0x781d, 0x703c, 0x2044, - 0x878e, 0xf259, 0x269f, 0x1a0f, - 0x70f5, 0x70ed, 0xf210, 0x40c3, - 0x8000, 0x0661, 0x8800, 0x205f, - 0x0c80, 0x2032, 0x0f80, 0x8000, - 0x0682, 0x0ba6, 0xff4f, 0x2e85, - 0x1406, 0xf030, 0x216f, 0x0243, - 0x8900, 0x201a, 0x0f80, 0x0020, - 0x0000, 0x2005, 0x0f80, 0x9002, - 0x0086, 0x9000, 0x125d, 0x308d, - 0x125c, 0x308f, 0x2044, 0x0082, - 0xc80a, 0x6053, 0x8908, 0x201a, - 0x0f80, 0x0001, 0x86a0, 0x781d, - 0x781d, 0x781d, 0x781d, 0x781d, - 0x781d, 0x781d, 0x7862, 0x2048, - 0x0001, 0xc804, 0x6052, 0x2904, - 0x0081, 0x782f, 0xe0ff, 0xd87f, - 0x20ca, 0x004b, 0x2e05, 0x108e, - 0x790f, 0x6e01, 0x7b0f, 0x4970, - 0x2008, 0x03c0, 0x1a5c, 0x3002, - 0x7a0f, 0x6338, 0x2009, 0x0340, - 0x1a5d, 0x3002, 0x7b0f, 0x41c3, - 0x0029, 0x0002, 0x0a3a, 0xffaf, - 0x740c, 0x0a7a, 0x0100, 0xc6c6, - 0x44cb, 0x900e, 0x00c2, 0x1c02, - 0x1014, 0xb420, 0xb441, 0x7fe0, - 0xb462, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1ab, 0xc344, 0x1600, - 0x7083, 0x8000, 0x0004, 0xc24a, - 0xc143, 0xc040, 0x40c3, 0x8000, - 0x07bc, 0x42c3, 0x0000, 0x1491, - 0x0e32, 0x00a0, 0x702c, 0x41c3, - 0x8000, 0x04a4, 0xd820, 0xb104, - 0xb105, 0xc000, 0x70ed, 0x1900, - 0x0405, 0xb1e1, 0x1a0a, 0x33c2, - 0x1a0b, 0x33c2, 0xb1e2, 0x190c, - 0x0405, 0xe88b, 0x40c3, 0x0000, - 0x0c20, 0x2042, 0x0802, 0xb105, - 0xb870, 0xb142, 0xb106, 0xc003, - 0x231a, 0x0f83, 0x0010, 0x0000, - 0x205f, 0x0501, 0x40c3, 0x8000, - 0x0661, 0x8802, 0xc349, 0x6119, - 0x40c3, 0x8000, 0x07bc, 0x2042, - 0x0a00, 0x6038, 0xc042, 0xc003, - 0x205f, 0x0641, 0x1600, 0x7080, - 0x8000, 0x0661, 0x205f, 0x0c80, - 0x6038, 0x2032, 0x0f80, 0x8000, - 0x0682, 0xc048, 0x40c3, 0x8000, - 0x0661, 0x8804, 0xf003, 0xc001, - 0x7104, 0xc041, 0x40c3, 0x8000, - 0x0661, 0x8825, 0xc001, 0x780f, - 0x7030, 0x00ec, 0x002d, 0xdeff, - 0x201a, 0x0f80, 0x0000, 0x1000, - 0x70ad, 0x706c, 0xc046, 0xc647, - 0xc545, 0xf003, 0x71a5, 0x78af, - 0x08cf, 0x8275, 0x4318, 0xc000, - 0xe806, 0x254f, 0x1080, 0x780f, - 0x0847, 0x0111, 0xc000, 0xd98c, + 0x1a18, 0x30d8, 0x7a5d, 0xf006, + 0xbbc3, 0xbac3, 0x1a18, 0x30d8, + 0x1a14, 0x3098, 0x206f, 0x0f43, + 0x68c9, 0x7077, 0x41c1, 0x21ca, + 0x0001, 0x8900, 0x781d, 0xb8c1, + 0x1a15, 0x3002, 0x8900, 0x781d, + 0x781d, 0x781d, 0xb8c1, 0x1a0c, + 0x3002, 0x15f7, 0x9081, 0x710c, + 0xb9c3, 0xb9c4, 0xe905, 0xb961, + 0x09ff, 0x8031, 0x781b, 0x5053, + 0x41c3, 0x002c, 0x0002, 0x092e, + 0xffaf, 0x740c, 0x7077, 0xd87f, + 0xdd50, 0x751c, 0xc814, 0x1a65, + 0x3342, 0x1a64, 0x3003, 0x5055, + 0xc818, 0x7077, 0x5056, 0x16fd, + 0x9080, 0x791b, 0x781d, 0x781d, + 0x781d, 0x703c, 0x2044, 0x879e, + 0xf254, 0x0b61, 0x3030, 0x269f, + 0x3a0f, 0x206f, 0x0243, 0x8820, + 0x8808, 0x201a, 0x0f80, 0x0001, + 0x86a0, 0x7b1d, 0x211a, 0x0f80, + 0x0020, 0x0000, 0x7b7d, 0x7b7d, + 0x7b7d, 0x2005, 0x0f80, 0x9002, + 0x0086, 0x9000, 0x7b7d, 0x7b7d, + 0x2044, 0x0082, 0xc80c, 0x7b7d, + 0x6050, 0x4b10, 0x2048, 0x0001, + 0xc806, 0x6052, 0x2905, 0x0081, + 0x2e05, 0x309e, 0x782f, 0xe0ff, + 0xd87f, 0x20ca, 0x0045, 0xf011, + 0x40c3, 0x8000, 0x06cd, 0x8800, + 0x205f, 0x0c80, 0x2000, 0x0f80, + 0x8000, 0x0580, 0x09a2, 0xff6f, + 0x880e, 0x2e85, 0x3406, 0x7a0f, + 0x2640, 0x3040, 0x790f, 0x6158, + 0x2509, 0x1003, 0x4a30, 0x2048, + 0x0000, 0x1a65, 0x30c2, 0x1a64, + 0x3002, 0x7a0f, 0x41c3, 0x002a, + 0x0002, 0x085a, 0xffaf, 0x740c, + 0x0ada, 0x0100, 0x1404, 0x341b, + 0xc6c4, 0x78e0, 0x44cb, 0x900e, + 0x00c2, 0x1c02, 0x1014, 0xb420, + 0xb441, 0x7fe0, 0xb462, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1ad, + 0xc344, 0x1600, 0x7083, 0x8000, + 0x0004, 0x47cb, 0x8000, 0x06cd, + 0xc24b, 0xc143, 0xc040, 0x4f2b, + 0x702c, 0x42c3, 0x0000, 0x1491, + 0x0e2a, 0x00a0, 0xc04c, 0x41c3, + 0x8000, 0x0474, 0x40c3, 0x001f, + 0x001f, 0xa100, 0xc000, 0x70cd, + 0x1908, 0x0401, 0xb1c6, 0x1904, + 0x03c5, 0x1a0e, 0x3382, 0x1a0d, + 0x3382, 0xe88b, 0x40c3, 0x0000, + 0x0c00, 0xb106, 0x2040, 0x07c2, + 0xe00f, 0xb141, 0xb102, 0xc003, + 0x8f22, 0x231a, 0x0f83, 0x0010, + 0x0000, 0x205f, 0x0500, 0xc34a, + 0x6038, 0x2000, 0x0f80, 0x8000, + 0x1b74, 0xc042, 0xc003, 0x205f, + 0x0641, 0x8f00, 0x205f, 0x0c80, + 0x6038, 0x2000, 0x0f80, 0x8000, + 0x0580, 0x880e, 0xc049, 0x8f05, + 0xc048, 0x8f04, 0xf077, 0x201a, + 0x0f80, 0x0000, 0x1000, 0x264a, + 0x3000, 0x706f, 0xc046, 0x700c, + 0xc547, 0xc045, 0xf067, 0xc000, + 0xe806, 0x264f, 0x3080, 0x780f, + 0x0849, 0x0111, 0xc000, 0xd98c, 0x7014, 0xd8c0, 0x703c, 0xc103, - 0x6119, 0xc009, 0x7905, 0xc006, + 0x6119, 0xc00a, 0x7905, 0xc006, 0x7905, 0xc007, 0x7104, 0xc047, - 0x780d, 0xf829, 0xb89f, 0x0932, - 0xffaf, 0x9000, 0x7910, 0xca09, - 0x2044, 0x07cc, 0x4020, 0xec06, - 0xbc61, 0x0c01, 0x1031, 0x781d, - 0xc045, 0xc000, 0xe80a, 0xc106, - 0xc004, 0x71c5, 0x7905, 0x78cd, - 0xf81d, 0xb89f, 0x9060, 0xf015, - 0x254f, 0x1080, 0x780f, 0x0823, - 0x0111, 0xc004, 0x0811, 0x0091, - 0xc002, 0xc102, 0x8800, 0x7124, - 0xc142, 0xf003, 0xc008, 0x0992, - 0xff6f, 0x71c5, 0x4300, 0xc00a, - 0x086d, 0x86ee, 0x232f, 0x13c7, - 0x239f, 0x1041, 0xc105, 0x40c3, - 0x8000, 0x07fa, 0x708d, 0x7061, - 0xa820, 0x702c, 0xa862, 0xf009, - 0x70c3, 0x8000, 0x07bc, 0xda7f, - 0xa840, 0xa881, 0x7124, 0x782f, - 0x08f1, 0x87f4, 0x2314, 0x1000, - 0x71e5, 0xf19a, 0xc0ab, 0x1404, - 0x341b, 0xc6c6, 0x209a, 0x0004, - 0x7825, 0x781b, 0xb891, 0xb89c, - 0x7ee0, 0x78e0, 0xc0e2, 0x706c, - 0x704c, 0xf004, 0xb160, 0x7144, - 0x7950, 0x09fd, 0x8674, 0x2014, - 0x0041, 0xda32, 0xba9f, 0x1201, - 0x0481, 0x708d, 0xa821, 0x1201, - 0x0481, 0xa822, 0x1201, 0x0481, - 0xa823, 0x1201, 0x0481, 0xa824, - 0x1201, 0x0481, 0xa82b, 0x1201, - 0x0481, 0xa82c, 0x1201, 0x0481, - 0xa82d, 0x1201, 0x0481, 0xa82e, - 0x1201, 0x0481, 0xa830, 0x8a20, - 0xa831, 0x8a21, 0xa836, 0x8a22, - 0xa838, 0x12dd, 0x8081, 0x7034, - 0xf48e, 0xda3e, 0xba9f, 0x1201, - 0x0481, 0xa83a, 0x1201, 0x0481, - 0xa83b, 0x1201, 0x0481, 0xa83c, - 0x1201, 0x0481, 0xa83d, 0x1201, - 0x0481, 0x1824, 0x0042, 0x1201, - 0x0481, 0x1825, 0x0042, 0x1201, - 0x0481, 0x1826, 0x0042, 0x1201, - 0x0481, 0x1827, 0x0042, 0x1201, - 0x0481, 0x1829, 0x0042, 0x1201, - 0x0481, 0x182a, 0x0042, 0x1201, - 0x0481, 0x182f, 0x0042, 0x121c, - 0x0481, 0x1831, 0x0042, 0x1201, - 0x0481, 0x1833, 0x0042, 0x1201, - 0x0481, 0x1834, 0x0042, 0x1201, - 0x0481, 0x1835, 0x0042, 0x1201, - 0x0481, 0x1836, 0x0042, 0x1201, - 0x0481, 0x183d, 0x0042, 0x1201, - 0x0481, 0x183e, 0x0042, 0x1201, - 0x0481, 0x183f, 0x0042, 0x1201, - 0x0481, 0x1840, 0x0042, 0x1201, - 0x0481, 0x1842, 0x0042, 0x1201, - 0x0481, 0x1843, 0x0042, 0x1201, - 0x0481, 0x1848, 0x0042, 0x1201, - 0x0481, 0x184a, 0x0042, 0x1201, - 0x0481, 0x184c, 0x0042, 0x1201, - 0x0481, 0x184d, 0x0042, 0x1201, - 0x0481, 0x184e, 0x0042, 0x1201, - 0x0481, 0x184f, 0x0042, 0x1201, - 0x0481, 0x1856, 0x0042, 0x1201, - 0x0481, 0x1857, 0x0042, 0x1201, - 0x0481, 0x1858, 0x0042, 0x1201, - 0x0481, 0x1859, 0x0042, 0x1201, - 0x0481, 0x185b, 0x0042, 0x8a20, - 0x185c, 0x0042, 0x8a21, 0x1861, - 0x0042, 0x8a22, 0x1863, 0x0042, - 0xf031, 0x7185, 0x0c59, 0x10b2, - 0x706c, 0xf02d, 0x245a, 0x1c81, - 0x235a, 0x064d, 0x7164, 0x6119, - 0x653d, 0x12f7, 0x8081, 0xad21, - 0x12f8, 0x8081, 0xad22, 0x12f9, - 0x8081, 0xad23, 0x12fa, 0x8081, - 0xad24, 0x12fb, 0x8081, 0xad2b, - 0x12fc, 0x8081, 0xad2c, 0x12fd, - 0x8081, 0xad2d, 0x12fe, 0x8081, - 0xad2e, 0x12ff, 0x8081, 0xad30, - 0x8a20, 0xad31, 0x8a21, 0xad36, - 0x8a22, 0xad38, 0x0bb1, 0x8092, - 0xf1d1, 0xc4c2, 0xc2e4, 0x40c3, - 0x8000, 0x054c, 0x0eb2, 0x01e0, - 0xd92a, 0xde25, 0xbe9f, 0x8e00, - 0x45cb, 0x9004, 0x00e4, 0xb500, - 0x1633, 0x1080, 0xb501, 0x16f4, + 0x780d, 0x209a, 0x0004, 0xf830, + 0x0f72, 0xff6f, 0x9000, 0x7910, + 0xca0c, 0x2044, 0x07c3, 0x4020, + 0xeb05, 0xbb61, 0x0bff, 0x8031, + 0x781d, 0xc045, 0xc000, 0xe80d, + 0xc106, 0xc004, 0x71a5, 0x7905, + 0x78ad, 0x209a, 0x0004, 0xf824, + 0x1000, 0x011b, 0xf014, 0x264f, + 0x3080, 0x780f, 0x0821, 0x0111, + 0xc004, 0x0813, 0x0091, 0xc002, + 0xc102, 0x8800, 0x7124, 0xc142, + 0xf002, 0xc009, 0x080a, 0xff6f, + 0x71a5, 0x4318, 0xc00b, 0x083b, + 0x07ae, 0x7acf, 0x229f, 0x0041, + 0xc105, 0x40c3, 0x8000, 0x071e, + 0x706c, 0x6058, 0x1802, 0x06c2, + 0xa820, 0xf00a, 0x6719, 0x1914, + 0x0003, 0x7164, 0xc10c, 0x6119, + 0xd87f, 0xa900, 0x0bf1, 0x87f4, + 0x2214, 0x00c0, 0x71c5, 0x2640, + 0x305e, 0x0e37, 0xb254, 0xc001, + 0x7104, 0xc041, 0xc001, 0xc108, + 0x780f, 0x7030, 0x070a, 0xffee, + 0xddff, 0xc0ad, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0x7825, 0x781b, + 0xb891, 0xb89c, 0xb89f, 0x7ee0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a3, + 0x704c, 0xf005, 0x1900, 0x0005, + 0x7144, 0x0afd, 0x8674, 0x2014, + 0x0081, 0xdb32, 0xbb9f, 0x8b20, + 0xa821, 0x6b21, 0xc140, 0x8920, + 0xa822, 0xc100, 0x7124, 0xc141, + 0x8920, 0xa823, 0xc101, 0x7124, + 0xc142, 0x8920, 0xa824, 0xc102, + 0x6941, 0x8a20, 0x2240, 0x005f, + 0x2740, 0x304c, 0xa82b, 0x1700, + 0x3081, 0x6ca1, 0x6dc1, 0xa82c, + 0x8c20, 0x6ee1, 0x4358, 0xa82d, + 0x8d20, 0xa82e, 0x8e20, 0xa830, + 0x8f20, 0xa831, 0x8f21, 0xa836, + 0x8f22, 0xa838, 0x13e6, 0x8081, + 0xe930, 0x264a, 0x3000, 0xf02a, + 0x265a, 0x3c82, 0x235a, 0x1641, + 0x7165, 0x621a, 0x6159, 0x8b40, + 0xa941, 0xc200, 0x8a40, 0xa942, + 0xc201, 0x8a40, 0xa943, 0xc202, + 0x8a40, 0xa944, 0x1300, 0x3082, + 0xa94b, 0x1700, 0x3082, 0xa94c, + 0x8c40, 0xa94d, 0x8d40, 0xa94e, + 0x8e40, 0xa950, 0x8f40, 0xa951, + 0x8f41, 0xa956, 0x8f42, 0xa958, + 0x0bb9, 0x9094, 0x2640, 0x305e, + 0x0ef9, 0xb0b4, 0x706d, 0xf08d, + 0xda3e, 0xba9f, 0x1201, 0x0481, + 0xa83a, 0x1201, 0x0481, 0xa83b, + 0x1201, 0x0481, 0xa83c, 0x1201, + 0x0481, 0xa83d, 0x1201, 0x0481, + 0x1824, 0x0042, 0x1201, 0x0481, + 0x1825, 0x0042, 0x1201, 0x0481, + 0x1826, 0x0042, 0x1201, 0x0481, + 0x1827, 0x0042, 0x1201, 0x0481, + 0x1829, 0x0042, 0x1201, 0x0481, + 0x182a, 0x0042, 0x1201, 0x0481, + 0x182f, 0x0042, 0x121c, 0x0481, + 0x1831, 0x0042, 0x1201, 0x0481, + 0x1833, 0x0042, 0x1201, 0x0481, + 0x1834, 0x0042, 0x1201, 0x0481, + 0x1835, 0x0042, 0x1201, 0x0481, + 0x1836, 0x0042, 0x1201, 0x0481, + 0x183d, 0x0042, 0x1201, 0x0481, + 0x183e, 0x0042, 0x1201, 0x0481, + 0x183f, 0x0042, 0x1201, 0x0481, + 0x1840, 0x0042, 0x1201, 0x0481, + 0x1842, 0x0042, 0x1201, 0x0481, + 0x1843, 0x0042, 0x1201, 0x0481, + 0x1848, 0x0042, 0x1201, 0x0481, + 0x184a, 0x0042, 0x1201, 0x0481, + 0x184c, 0x0042, 0x1201, 0x0481, + 0x184d, 0x0042, 0x1201, 0x0481, + 0x184e, 0x0042, 0x1201, 0x0481, + 0x184f, 0x0042, 0x1201, 0x0481, + 0x1856, 0x0042, 0x1201, 0x0481, + 0x1857, 0x0042, 0x1201, 0x0481, + 0x1858, 0x0042, 0x1201, 0x0481, + 0x1859, 0x0042, 0x1201, 0x0481, + 0x185b, 0x0042, 0x8a20, 0x185c, + 0x0042, 0x8a21, 0x1861, 0x0042, + 0x8a22, 0x1863, 0x0042, 0xc0a3, + 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0xc3e4, 0xc1ab, 0xc080, 0x41c3, + 0x8000, 0x0554, 0x0b06, 0xff2f, + 0xda2a, 0xc080, 0x09c2, 0x0220, + 0xd92a, 0xdd25, 0xbd9f, 0x8d00, + 0x46cb, 0x9004, 0x00e4, 0xb600, + 0x1533, 0x1080, 0xb601, 0x15f4, 0x9080, 0xe888, 0x1e00, 0x7005, - 0x9003, 0xe154, 0x1d42, 0x905d, - 0x099a, 0xffcf, 0x791d, 0x793d, - 0x793d, 0x793d, 0xb9c3, 0x1a02, - 0x3042, 0x4100, 0xda08, 0xba61, - 0x0aff, 0x8031, 0x793d, 0xb9c1, - 0xb8c3, 0x1a06, 0x3042, 0x0b96, - 0x0160, 0x1a01, 0x3002, 0x081e, - 0x0000, 0x085e, 0x0100, 0x154c, - 0x9100, 0x080f, 0x0151, 0x8e00, - 0xb500, 0x8e00, 0xb501, 0xc6c4, + 0x9003, 0xe154, 0x1e42, 0x905d, + 0x0906, 0xffcf, 0x2053, 0x00c1, + 0x1a01, 0x3042, 0x791d, 0x793d, + 0x793d, 0x793d, 0xb9c3, 0x1a04, + 0x3042, 0xd908, 0xb961, 0x0901, + 0x0031, 0x781d, 0xb8c1, 0x0ba6, + 0x0160, 0x1a08, 0x3002, 0x081e, + 0x0000, 0x088e, 0x0100, 0x164c, + 0x9100, 0x080d, 0x0151, 0x8d00, + 0xb600, 0xb601, 0xc7c4, 0x78e0, 0xc2e6, 0x1cfc, 0xb6c8, 0x1600, 0x7101, 0x9004, 0x01e0, 0x1600, 0x7082, 0x8000, 0x0004, 0x225f, 0x0100, 0x2044, 0x07c3, 0x4020, 0xeb05, 0xbb61, 0x0bff, 0x8031, - 0x781d, 0xb8c3, 0x080b, 0x0050, - 0x0877, 0x01b1, 0x71ed, 0x46cb, + 0x781d, 0xb8c3, 0x080b, 0x0190, + 0x0877, 0x0071, 0xdf18, 0x46cb, 0x9004, 0x0116, 0x1e08, 0x1094, - 0x71ad, 0x1e02, 0x1354, 0xd829, + 0x71ad, 0x1e02, 0x1055, 0xd829, 0xb6a0, 0x1e6c, 0x1205, 0x1e6c, 0x1004, 0x702c, 0x40c3, 0x0098, - 0x9680, 0x1ef2, 0x9044, 0x4728, + 0x9680, 0x4328, 0x1ef2, 0x9044, 0x0eae, 0x0060, 0x1e66, 0x1044, 0xd8a9, 0x1e6c, 0x1004, 0x208a, 0x0a46, 0x1e6c, 0x1004, 0x208a, @@ -6638,457 +6722,457 @@ static u16 lpddr4x_train2d_imem[] = { 0x1004, 0x40c3, 0x0000, 0x6000, 0x1e72, 0x935c, 0x1ed0, 0x9344, 0x1e72, 0x90dd, 0x0e22, 0x0060, - 0x1ed0, 0x93c4, 0x1e72, 0x935c, - 0x1e72, 0x93dc, 0xf048, 0x45cb, + 0x1ed0, 0x92c4, 0x1e72, 0x935c, + 0x1e72, 0x92dc, 0xf046, 0x45cb, 0x9004, 0x0116, 0x1d08, 0x1094, - 0x1d02, 0x13d4, 0x702c, 0x40c3, - 0x000f, 0x4240, 0x234a, 0x1200, - 0x43db, 0x0000, 0x0818, 0xb5e0, - 0x1d6c, 0x12c4, 0x1df2, 0x9044, - 0x1d66, 0x1044, 0x4628, 0x0e3a, - 0x0060, 0x1d6c, 0x16c4, 0x40c3, - 0x000f, 0x4240, 0x2342, 0x341b, - 0x781d, 0x702c, 0x0e22, 0x0060, - 0x1d6c, 0x16c4, 0x40c3, 0x0098, - 0x9680, 0x702c, 0x0e12, 0x0060, - 0x1d6c, 0x12c4, 0xd888, 0x1d6c, - 0x1004, 0x40c3, 0x0000, 0x6000, - 0x1d6c, 0x12c4, 0x1d6c, 0x1384, - 0x1d72, 0x93dc, 0x1dd0, 0x93c4, - 0x1d72, 0x90dd, 0x0d92, 0x0060, - 0x1dd0, 0x9384, 0x1d72, 0x93dc, - 0x1d72, 0x939c, 0x700c, 0x0dda, - 0x0060, 0xd920, 0x45cb, 0x9004, - 0x01f0, 0xd820, 0x0d72, 0x0060, - 0x1d00, 0x1005, 0xd820, 0x0d6a, - 0x0060, 0x1d00, 0x1045, 0x740c, - 0x0d5e, 0x0060, 0x1d00, 0x1005, - 0x1d0a, 0x909d, 0x1404, 0x341b, - 0xc6c6, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1bc, 0xd825, 0xb89f, - 0x8820, 0x1c36, 0x3042, 0x1033, - 0x0080, 0x41c3, 0x8000, 0x0660, - 0x89a0, 0x1c37, 0x3002, 0x8907, - 0xc06d, 0x8909, 0x1c35, 0x3002, - 0x8908, 0x1c32, 0x3002, 0x890a, - 0xc192, 0x1c33, 0x3002, 0x0db2, - 0x0020, 0x710c, 0xd80c, 0x2805, - 0x0340, 0x702c, 0xc145, 0x205a, - 0x0100, 0x7404, 0xc049, 0xf006, - 0x7a34, 0x700c, 0xb200, 0x7124, - 0x09f9, 0x82b2, 0xc297, 0x700c, - 0x47cb, 0x8000, 0x0578, 0xc044, - 0xf004, 0xc004, 0x7104, 0xc044, - 0xc004, 0x080d, 0x00b2, 0x706f, - 0x041b, 0x0000, 0x0bef, 0xb0b3, - 0x712c, 0xc004, 0x2044, 0x07c2, - 0x4020, 0xc104, 0xea05, 0xba61, - 0x0aff, 0x8031, 0x781b, 0xc046, - 0x202f, 0x06c7, 0x0d1e, 0x0020, - 0x792f, 0xc048, 0x8f00, 0xe808, - 0x1600, 0x7080, 0x8000, 0x0001, - 0xb8e3, 0xf289, 0x700c, 0x0dae, - 0xff2f, 0xd9ff, 0x70ad, 0xd820, - 0xc543, 0xc041, 0xc006, 0x734c, - 0xdb22, 0x205a, 0x0101, 0xc540, - 0x7825, 0x7e0f, 0xd8ff, 0x702c, - 0x08aa, 0xff6f, 0xc642, 0xd828, - 0xc543, 0xc642, 0xc041, 0xd8ff, - 0x702c, 0x734c, 0xdb22, 0x0896, - 0xff6f, 0xc540, 0xd8ff, 0x702c, - 0x734c, 0xdb22, 0xc543, 0xc642, - 0x1c04, 0x33c1, 0x087e, 0xff6f, - 0xc040, 0xd8ff, 0x702c, 0x734c, - 0xdb22, 0xc543, 0xc642, 0x1c04, - 0x3501, 0x086a, 0xff6f, 0xc540, - 0xd8ff, 0xd980, 0x734c, 0xdb22, - 0xc543, 0xc642, 0x1c04, 0x3081, - 0x0852, 0xff6f, 0xc540, 0x700c, - 0x0dd6, 0xff2f, 0x712c, 0x1600, - 0x7080, 0x8000, 0x0004, 0xdae0, - 0x201a, 0x0f81, 0x0020, 0x0000, - 0x40c3, 0x9008, 0x0100, 0x2105, - 0x0003, 0x7204, 0x7825, 0xb340, - 0x1800, 0x0485, 0x700c, 0x0cfe, - 0xff2f, 0xd9ff, 0x0852, 0xff6f, - 0xd8ff, 0xc006, 0x702c, 0xc543, - 0x780f, 0xc042, 0xd8ff, 0xb990, - 0x714c, 0x746c, 0xc541, 0x0dea, - 0xff2f, 0xc540, 0xd90c, 0xd8ff, - 0xb98d, 0x734c, 0x746c, 0xc543, - 0xc642, 0xc541, 0x0dd2, 0xff2f, - 0xc540, 0x0802, 0xff6f, 0xd8ff, - 0xd8ff, 0xd980, 0x754c, 0x746c, - 0xc543, 0xc542, 0xc541, 0x0dba, - 0xff2f, 0xc540, 0xd850, 0xc049, - 0xf0f4, 0x087a, 0xff8f, 0x70ad, - 0x70cd, 0xf003, 0x71c5, 0x74d5, - 0x010c, 0x0029, 0x2679, 0x1080, - 0x7704, 0xc04a, 0x2650, 0x90be, - 0xf409, 0x40c3, 0x0000, 0x0a00, - 0x0c7a, 0xff2f, 0xd9ff, 0x0fa2, - 0x0100, 0x8f01, 0x702c, 0x704c, - 0xc043, 0xc006, 0x726c, 0x780f, - 0xc04b, 0xc042, 0x710c, 0xc041, - 0xc00a, 0x780f, 0xc047, 0x781d, - 0x781d, 0x2045, 0x0180, 0x2044, - 0x0980, 0xc040, 0x0d52, 0xff2f, - 0xd8ff, 0x8f01, 0x702c, 0x704c, - 0xc043, 0x710c, 0xc542, 0xc041, - 0x2632, 0x1f80, 0x8000, 0x04b4, - 0x726c, 0xb8c5, 0xc040, 0x0d32, - 0xff2f, 0xd8ff, 0x8f01, 0x702c, - 0x704c, 0xc043, 0xc00b, 0x726c, - 0xc042, 0x710c, 0xc041, 0xc007, - 0x781d, 0x2045, 0x0580, 0x2044, - 0x0d80, 0xc040, 0x0d0a, 0xff2f, - 0xd8ff, 0x8f01, 0x702c, 0x704c, - 0xc043, 0x710c, 0xc542, 0xc041, - 0xc00a, 0x726c, 0xb8c5, 0xc040, - 0x0cee, 0xff2f, 0xd8ff, 0x40c3, - 0x0000, 0x2710, 0x0b32, 0x0060, - 0xd90a, 0x781d, 0x781d, 0x7704, - 0x780f, 0xc542, 0xc541, 0xc540, - 0xc047, 0xc043, 0xd8ff, 0x702c, - 0x704c, 0x0cc6, 0xff2f, 0x726c, - 0xc007, 0x702c, 0x704c, 0xc043, - 0xd8ff, 0x726c, 0xc542, 0xc541, - 0x0cae, 0xff2f, 0xc540, 0x6e0b, - 0x7114, 0x0704, 0xffcd, 0x0e6a, - 0x0100, 0x700c, 0x0c3a, 0xff2f, - 0x712c, 0xf17a, 0x40c3, 0x0000, - 0x0a00, 0x0b82, 0xff2f, 0xd9ff, - 0x0ea6, 0x0100, 0x8f01, 0x702c, - 0x71cd, 0xc043, 0xd8ff, 0x704c, - 0xdb18, 0xc142, 0xc641, 0x4528, - 0x0c6e, 0xff2f, 0xc140, 0x0eba, - 0xff2f, 0xd8ff, 0x8f01, 0x702c, - 0x704c, 0xc043, 0xc006, 0x726c, - 0x780f, 0xc042, 0xc047, 0xd820, - 0xc641, 0xc040, 0x0c4a, 0xff2f, - 0xd8ff, 0x8f01, 0x702c, 0x704c, - 0xc043, 0xd8ff, 0x726c, 0xc542, - 0xc641, 0x0c36, 0xff2f, 0x1c00, - 0x30c1, 0x8f01, 0x702c, 0x704c, - 0xc043, 0xc007, 0x726c, 0xc042, - 0xd8ff, 0xc641, 0x0c1a, 0xff2f, - 0x1c00, 0x3481, 0x8f01, 0x702c, + 0x71cd, 0x1d02, 0x1055, 0x234a, + 0x1200, 0x702c, 0xbf8b, 0x40c3, + 0x000f, 0x4240, 0xb5c0, 0x1d6c, + 0x12c4, 0x260a, 0x3040, 0x1df2, + 0x9044, 0x1d66, 0x1044, 0x1d6c, + 0x13c4, 0x0e36, 0x0060, 0x4318, + 0xbf70, 0x202f, 0x06c2, 0x702c, + 0x0e26, 0x0060, 0x1d6c, 0x13c4, + 0x40c3, 0x0098, 0x9680, 0x702c, + 0x0e16, 0x0060, 0x1d6c, 0x12c4, + 0xd888, 0x1d6c, 0x1004, 0x40c3, + 0x0000, 0x6000, 0x1d6c, 0x12c4, + 0x1d6c, 0x1784, 0x1d72, 0x939c, + 0x1dd0, 0x9384, 0x1d72, 0x90dd, + 0x0d96, 0x0060, 0x1dd0, 0x9784, + 0x1d72, 0x939c, 0x1d72, 0x979c, + 0x700c, 0x0dde, 0x0060, 0xd920, + 0x45cb, 0x9004, 0x01f0, 0xd820, + 0x0d76, 0x0060, 0x1d00, 0x1005, + 0xd820, 0x0d6e, 0x0060, 0x1d00, + 0x1045, 0x740c, 0x0d62, 0x0060, + 0x1d00, 0x1005, 0x1d0a, 0x909d, + 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1bd, + 0xd925, 0xb99f, 0x8900, 0x1c3a, + 0x3002, 0x1133, 0x0080, 0x1c3b, + 0x3002, 0x40c3, 0x8000, 0x06cc, + 0x8827, 0x88a0, 0xc16e, 0x8829, + 0x1c39, 0x3042, 0x8828, 0x1c36, + 0x3042, 0x882a, 0x710c, 0x1c37, + 0x3042, 0x0db2, 0x0020, 0xc193, + 0x702c, 0xc146, 0xf005, 0x7a34, + 0x700c, 0xb200, 0x7124, 0x09fb, + 0x82b4, 0xc298, 0xd90c, 0x2905, + 0x0341, 0x740c, 0x7835, 0xc049, + 0x040d, 0x0020, 0x700c, 0x03ff, + 0x0020, 0x706f, 0xc005, 0x712c, + 0x2044, 0x07c2, 0x4020, 0xc105, + 0xea05, 0xba61, 0x0aff, 0x8031, + 0x781b, 0xc047, 0x202f, 0x06c7, + 0x0d36, 0x0020, 0x792f, 0x46cb, + 0x8000, 0x05e4, 0xc048, 0x8e00, + 0x7014, 0xf2f6, 0x1600, 0x7080, + 0x8000, 0x0001, 0xb8e3, 0xf4f0, + 0x0842, 0xff8f, 0x70ed, 0x700c, + 0xf07e, 0xc004, 0x2079, 0x0080, + 0x7704, 0xc04a, 0xc004, 0xb882, + 0x0815, 0x0131, 0xd9ff, 0x40c3, + 0x0000, 0x0a00, 0x0be2, 0xff0f, + 0x08d6, 0x0140, 0x8e01, 0x71ad, + 0x702c, 0xc043, 0xc007, 0x704c, + 0x726c, 0x780f, 0xc04b, 0xc042, + 0xc00a, 0xc541, 0x780f, 0xc04c, + 0x781d, 0x781d, 0x2044, 0x0800, + 0x2045, 0x0180, 0xc040, 0x0ca6, + 0xff2f, 0xd8ff, 0x8e01, 0xc742, + 0xc541, 0xc043, 0xc004, 0x702c, + 0x704c, 0x726c, 0x2032, 0x0f80, + 0x8000, 0x04c0, 0xb8c5, 0xc040, + 0x0c82, 0xff2f, 0xd8ff, 0x8e01, + 0x702c, 0x704c, 0xc043, 0xc00b, + 0x726c, 0xc541, 0xc042, 0xc00c, + 0x781d, 0x2044, 0x0800, 0x2045, + 0x0580, 0xc040, 0x0c5e, 0xff2f, + 0xd8ff, 0x8e01, 0x702c, 0x704c, + 0xc043, 0xc00a, 0x726c, 0xc742, + 0xb8c5, 0xc040, 0xd8ff, 0x0c46, + 0xff2f, 0xc541, 0x40c3, 0x0000, + 0x2710, 0x0c5e, 0x0060, 0xd90a, + 0x781d, 0x781d, 0x7704, 0x7d0f, + 0xf8dd, 0xc742, 0xc741, 0xc740, + 0x0c22, 0xff2f, 0xc543, 0xf8da, + 0xc543, 0xc742, 0xc741, 0x0c16, + 0xff2f, 0xc740, 0xc004, 0xb863, + 0x0811, 0x0095, 0x0fb2, 0x0100, + 0x700c, 0x0ba6, 0xff2f, 0x712c, + 0xc004, 0x7104, 0xc044, 0xc004, + 0x7514, 0x0702, 0xffe5, 0xd9ff, + 0x40c3, 0x0000, 0x0a00, 0x0af2, + 0xff0f, 0x0fe6, 0x0100, 0x8e01, + 0x702c, 0x71ed, 0xc043, 0xd8ff, + 0x704c, 0xdb18, 0x4528, 0xc142, + 0xc741, 0x0bca, 0xff2f, 0xc140, + 0x0e0e, 0xff2f, 0xd8ff, 0x8e01, + 0x702c, 0x704c, 0xc043, 0xc007, + 0x726c, 0xc741, 0x780f, 0xc044, + 0xc042, 0xd820, 0xc040, 0x0ba6, + 0xff2f, 0xd8ff, 0x8e01, 0x702c, 0x704c, 0xc043, 0xd8ff, 0x726c, - 0xc542, 0xc641, 0x0c02, 0xff2f, - 0xc540, 0x8f01, 0x702c, 0x704c, - 0xc043, 0xd8ff, 0xdb08, 0xc542, - 0xc641, 0x0bee, 0xff2f, 0xc540, - 0x0e1a, 0xff2f, 0xd8ff, 0x8f01, + 0xc542, 0xc741, 0x0b8e, 0xff2f, + 0x1c00, 0x30c1, 0x8e01, 0x702c, + 0x704c, 0xc043, 0xc004, 0x726c, + 0xc741, 0xc042, 0xd8ff, 0x0b76, + 0xff2f, 0x1c00, 0x3481, 0x8e01, 0x702c, 0x704c, 0xc043, 0xd8ff, - 0xdb18, 0xc542, 0xc641, 0x0bd2, - 0xff2f, 0xc540, 0x0d92, 0x0100, - 0x218a, 0x0fc7, 0x40c3, 0x9003, - 0xe048, 0xb020, 0x1804, 0x0005, - 0x2440, 0x3d80, 0x2032, 0x06c1, - 0xc006, 0x790b, 0xf261, 0xc009, - 0x702c, 0x0b46, 0xff2f, 0x7810, - 0x700c, 0x09ce, 0x0060, 0xd91e, - 0xc18d, 0x2132, 0x06c2, 0xf010, - 0x221a, 0x0f80, 0x0000, 0x2000, - 0xc38e, 0x7b54, 0x7144, 0x2005, - 0x0f80, 0x9002, 0x0054, 0x9000, - 0x780f, 0xb300, 0x2440, 0x3c80, - 0x2032, 0x06cb, 0x0bdd, 0x90a3, - 0x706c, 0x2132, 0x06cd, 0x708d, - 0xf002, 0x71a5, 0x0b65, 0x1362, - 0xc197, 0x79b4, 0x9140, 0xc008, - 0xba80, 0xb140, 0xe885, 0xc08e, - 0x20f4, 0x0340, 0xe811, 0x235a, - 0x340e, 0xc004, 0x7e16, 0x6678, - 0x70c3, 0x8000, 0x0640, 0xa8a0, - 0xc008, 0x7164, 0xe885, 0xc08e, - 0x20f4, 0x0340, 0xe8e3, 0x235a, - 0x340e, 0xc004, 0x7e16, 0x6698, - 0x70c3, 0x8000, 0x0620, 0xa8a0, - 0xc008, 0x7185, 0xe8d7, 0xc004, - 0x2079, 0x0000, 0x781b, 0x781b, - 0x7404, 0x7a05, 0xb140, 0xf1cf, - 0x40c3, 0x0000, 0x09c4, 0x0a7a, - 0xff0f, 0x09d6, 0xff0f, 0x03f9, - 0xffef, 0x7167, 0xc005, 0x7104, - 0xc045, 0xc005, 0x0833, 0x02b3, - 0xc197, 0xc005, 0x21f4, 0x0002, - 0xea76, 0x1600, 0x7080, 0x8000, - 0x0019, 0xe8f2, 0xc005, 0x201a, - 0x0f80, 0x0000, 0x2000, 0x2005, - 0x0f81, 0x9002, 0x0154, 0x2244, - 0x0300, 0xb100, 0xf1e4, 0x40c3, - 0x8000, 0x0578, 0x8800, 0xe809, - 0x1600, 0x7080, 0x8000, 0x0001, - 0xb8e3, 0x0ea0, 0xff41, 0x700c, - 0x1e00, 0x7005, 0x9003, 0xe048, - 0x08fe, 0x0020, 0xc192, 0xc0bc, - 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a1, - 0x4318, 0x4628, 0x4020, 0x732c, - 0x4768, 0x0ad6, 0x00e0, 0x4548, - 0x710c, 0x2344, 0x37ca, 0x0a11, - 0x1010, 0x2242, 0x104a, 0x0aff, - 0x9031, 0x781b, 0x205a, 0x010b, - 0xed86, 0x40c1, 0x732c, 0x0cca, - 0xff2f, 0xdabf, 0x230f, 0x16cd, - 0xef86, 0x40c1, 0x732c, 0x0cba, - 0xff2f, 0xda7f, 0x1600, 0x7080, - 0x8000, 0x0661, 0x235f, 0x364f, - 0x704c, 0x205f, 0x0c80, 0x238a, - 0x0dff, 0x1c00, 0x3fc1, 0x60d8, - 0x60f9, 0x09fe, 0x01a0, 0x78af, - 0x0b1a, 0xff8f, 0x0b22, 0xffaf, - 0x4508, 0x4100, 0x0922, 0x0120, - 0x40a1, 0x1600, 0x7080, 0x8000, - 0x0661, 0x205f, 0x0c80, 0x60d8, - 0x60f8, 0x1003, 0x00c1, 0xd840, - 0x7824, 0x781d, 0x781d, 0x781d, - 0x781d, 0x7034, 0x20c5, 0x04ab, - 0x1e00, 0x7004, 0x900e, 0x0022, - 0x40c1, 0x0f62, 0x00a0, 0x732c, - 0x7487, 0x1404, 0x341b, 0xc6c6, - 0x7a1b, 0x1600, 0x7080, 0x8000, - 0x001c, 0xbac4, 0xea05, 0xba61, - 0x0aff, 0x8031, 0x781d, 0x780f, - 0x2144, 0x07c2, 0xea05, 0xba61, - 0x0aff, 0x8031, 0x781d, 0x7fe0, - 0xb8c0, 0x78e0, 0xc0e2, 0x1600, - 0x7082, 0x8000, 0x0004, 0x7014, - 0x221a, 0x0f83, 0x0020, 0x0000, - 0x2305, 0x0f82, 0x9002, 0x0086, - 0x9240, 0x2305, 0x0f8c, 0x9003, - 0xfe86, 0x706c, 0x22c4, 0x0f81, - 0x0000, 0xbfff, 0xf202, 0xba8e, - 0xb440, 0xf004, 0xb440, 0x7164, - 0x0b39, 0x0293, 0x231a, 0x0f82, - 0x0000, 0x1000, 0x2205, 0x0f82, - 0x0001, 0x004a, 0x7a5b, 0x2205, - 0x0f8c, 0x9000, 0x0000, 0xe80b, - 0x9440, 0x2114, 0x00cd, 0xb540, - 0x2204, 0x0f82, 0x0000, 0xfbff, - 0xf1e6, 0x21f4, 0x00c2, 0xf1e4, - 0xc4c2, 0x78e0, 0xc2e6, 0xdb24, - 0x708d, 0x41c3, 0x8000, 0x0667, - 0xbb9f, 0xa980, 0x8b00, 0x2340, - 0x0d0d, 0xe007, 0x7a1d, 0x7a5d, - 0x7a5d, 0xd8ff, 0x6058, 0xa901, - 0x15ff, 0x948e, 0xee0d, 0xca02, - 0x7a1d, 0xa942, 0x8d00, 0xe007, - 0x781d, 0x781d, 0x781d, 0x6058, - 0x2080, 0x0fc3, 0xf002, 0xa942, - 0xa903, 0xf003, 0x7185, 0x0c1f, - 0x10b2, 0x70cd, 0xf01c, 0x781d, - 0x2414, 0x1381, 0x781d, 0x781d, - 0x71c3, 0x8000, 0x061c, 0xa900, - 0x71c5, 0x0ee5, 0x90b3, 0x70d5, - 0x40a1, 0x20ca, 0x00c1, 0x88e0, - 0x78cf, 0x0eea, 0xffef, 0x798f, - 0xe803, 0x6f07, 0xf1e9, 0x4f1f, - 0x781d, 0xf1e7, 0xc6c6, 0x78e0, - 0xc0f1, 0x09fa, 0x0040, 0xc0d1, + 0x726c, 0xc542, 0xc741, 0x0b5e, + 0xff2f, 0xc540, 0x8e01, 0x702c, + 0x704c, 0xc043, 0xd8ff, 0xdb08, + 0xc542, 0xc741, 0x0b46, 0xff2f, + 0xc540, 0x0d72, 0xff2f, 0xd8ff, + 0x8e01, 0x702c, 0x704c, 0xc043, + 0xd8ff, 0xdb18, 0xc542, 0xc741, + 0x0b2a, 0xff2f, 0xc540, 0x0ed2, + 0x0100, 0xf07f, 0x700c, 0x0a2a, + 0xff2f, 0xd9ff, 0xc007, 0x70ad, + 0x734c, 0x205a, 0x0101, 0xdb22, + 0xc543, 0xc540, 0x7825, 0x7e0f, + 0xd820, 0xc041, 0xd8ff, 0x702c, + 0x0d0a, 0xff2f, 0xc642, 0xd828, + 0xc041, 0xf88c, 0xc543, 0xc642, + 0x0cfa, 0xff2f, 0xc540, 0xf889, + 0xc543, 0xc642, 0x1c04, 0x33c1, + 0x0cea, 0xff2f, 0xc040, 0xf885, + 0xc543, 0xc642, 0x1c04, 0x3501, + 0x0cda, 0xff2f, 0xc540, 0xd8ff, + 0xd980, 0x734c, 0xdb22, 0xc543, + 0xc642, 0x1c04, 0x3081, 0x0cc6, + 0xff2f, 0xc540, 0x700c, 0x0a52, + 0xff2f, 0x712c, 0x1600, 0x7080, + 0x8000, 0x0004, 0xdae0, 0x201a, + 0x0f81, 0x0020, 0x0000, 0x40c3, + 0x9008, 0x0100, 0x2105, 0x0003, + 0x7204, 0x7825, 0xb340, 0x1800, + 0x0485, 0x700c, 0x098a, 0xff2f, + 0xd9ff, 0x0cc6, 0xff2f, 0xd8ff, + 0xc007, 0x702c, 0xb990, 0x780f, + 0xc042, 0xd8ff, 0x714c, 0x746c, + 0xc543, 0xc541, 0x0a5e, 0xff2f, + 0xc540, 0xd90c, 0xd8ff, 0xb98d, + 0x734c, 0x746c, 0xc543, 0xc642, + 0xc541, 0x0a4a, 0xff2f, 0xc540, + 0x0c72, 0xff2f, 0xd8ff, 0xd8ff, + 0xd980, 0x754c, 0x746c, 0xc543, + 0xc542, 0xc541, 0x0a2e, 0xff2f, + 0xc540, 0xd850, 0xc049, 0x218a, + 0x0fc7, 0x40c3, 0x9003, 0xe048, + 0xb020, 0x1804, 0x0005, 0x2440, + 0x3e80, 0x2032, 0x06c1, 0xc007, + 0x782b, 0xf264, 0xc009, 0x702c, + 0x09a6, 0xff2f, 0x7810, 0x700c, + 0x09fe, 0x0060, 0xd91e, 0x2440, + 0x3d80, 0x2032, 0x06cb, 0xc08e, + 0x2032, 0x06cd, 0x41a1, 0xf00f, + 0x211a, 0x0f80, 0x0000, 0x2000, + 0x7a34, 0x7124, 0x2005, 0x0f80, + 0x9002, 0x0054, 0x9000, 0x780f, + 0xb200, 0x0be9, 0x9065, 0xc28f, + 0x702c, 0x704c, 0xf034, 0x23f4, + 0x034c, 0xc008, 0x7bb4, 0xbc80, + 0xb380, 0xe886, 0xc08f, 0x20f4, + 0x0340, 0xe80f, 0x235a, 0x340e, + 0xc005, 0x7e16, 0x6638, 0x2000, + 0x0f80, 0x8000, 0x06ac, 0xa8a0, + 0x7124, 0xc008, 0xe817, 0x235a, + 0x340e, 0xc005, 0x7e16, 0x6658, + 0x2000, 0x0f80, 0x8000, 0x068c, + 0xa8a0, 0x7144, 0xc008, 0xe88f, + 0xc005, 0x2079, 0x0000, 0x781b, + 0x781b, 0x7404, 0x7c05, 0xb380, + 0xf005, 0xc08f, 0x20f4, 0x0340, + 0xe867, 0x71a5, 0x0b9b, 0x9365, + 0xc398, 0x40c3, 0x0000, 0x09c4, + 0x08d6, 0xff0f, 0x0846, 0xff0f, + 0x7167, 0x7277, 0x0408, 0xffc5, + 0xc005, 0x7104, 0xc045, 0xc005, + 0x7114, 0x03f6, 0xffce, 0xf01a, + 0xc006, 0x21f4, 0x0001, 0xe913, + 0x1600, 0x7080, 0x8000, 0x0019, + 0xe88d, 0xc006, 0x2144, 0x0301, + 0x201a, 0x0f80, 0x0000, 0x2000, + 0x2005, 0x0f80, 0x9002, 0x0154, + 0xb020, 0xc006, 0x7104, 0xc046, + 0xc006, 0x08d1, 0x82b4, 0xc198, + 0x40c3, 0x8000, 0x05e4, 0x8800, + 0xe808, 0x1600, 0x7080, 0x8000, + 0x0001, 0xb8e3, 0x0d78, 0xff41, + 0x700c, 0x1e00, 0x7005, 0x9003, + 0xe048, 0x091a, 0x0020, 0xc193, + 0xc0bd, 0x1404, 0x341b, 0xc6c6, + 0xd8ff, 0x702c, 0x734c, 0xdb22, + 0x7ee0, 0x78e0, 0xd8ff, 0x702c, + 0x704c, 0x726c, 0x7ee0, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a2, + 0x4728, 0x4608, 0x4020, 0x732c, + 0xc341, 0x0b12, 0x00e0, 0x4358, + 0x710c, 0x4508, 0x2644, 0x17c0, + 0xe805, 0xb861, 0x08ff, 0x8031, + 0x7dbb, 0x0b15, 0x3031, 0x255a, + 0x110d, 0x40e1, 0x732c, 0x0ade, + 0xff2f, 0xdabf, 0xc001, 0x250f, + 0x139b, 0xe887, 0x40e1, 0x732c, + 0x0aca, 0xff2f, 0xda7f, 0x1600, + 0x7080, 0x8000, 0x06cd, 0x265f, + 0x164d, 0x704c, 0x205f, 0x0c80, + 0x238a, 0x0dff, 0x1c00, 0x3fc1, + 0x60f8, 0x60b9, 0x0d0a, 0x01a0, + 0x202f, 0x06c7, 0x0a8a, 0xff8f, + 0x0a92, 0xffaf, 0x4300, 0x4100, + 0x0946, 0x0120, 0x4060, 0x1600, + 0x7080, 0x8000, 0x06cd, 0x205f, + 0x0c80, 0x60f8, 0x60b8, 0x1003, + 0x00c1, 0x783d, 0x781d, 0x781d, + 0x781d, 0x2044, 0x0100, 0x7034, + 0x20c5, 0x04ab, 0x1e00, 0x7004, + 0x900e, 0x0022, 0x40e1, 0x0fa2, + 0x00a0, 0x732c, 0xc0a2, 0x1404, + 0x341b, 0xc6c6, 0x7a1b, 0x1600, + 0x7080, 0x8000, 0x001c, 0xbac4, + 0xea05, 0xba61, 0x0aff, 0x8031, + 0x781d, 0x780f, 0x2144, 0x07c2, + 0xea05, 0xba61, 0x0aff, 0x8031, + 0x781d, 0x7fe0, 0xb8c0, 0x78e0, + 0xc0e2, 0x1600, 0x7082, 0x8000, + 0x0004, 0x7014, 0x221a, 0x0f83, + 0x0020, 0x0000, 0x2305, 0x0f82, + 0x9002, 0x0086, 0x9240, 0x2305, + 0x0f83, 0x9003, 0xfe86, 0x22c4, + 0x0f81, 0x0000, 0xbfff, 0xf203, + 0xba8e, 0x70ad, 0xb340, 0xf01d, + 0x251a, 0x1f82, 0x0000, 0x1000, + 0x2205, 0x0f82, 0x0001, 0x004a, + 0x7a5b, 0x2205, 0x0f83, 0x9000, + 0x0000, 0xe80b, 0x9340, 0x2114, + 0x034c, 0xb440, 0x2204, 0x0f82, + 0x0000, 0xfbff, 0xf003, 0x21f4, + 0x0342, 0xb340, 0x71a5, 0x0dcd, + 0x9294, 0xc4c2, 0xc2e6, 0x706c, + 0x41c3, 0x8000, 0x06d3, 0xdd24, + 0xa960, 0xbd9f, 0x8d00, 0xe007, + 0x7a1d, 0x7a5d, 0x7a5d, 0x6a09, + 0xa901, 0x1534, 0x108c, 0xec0e, + 0xca04, 0x7a1d, 0xa942, 0x1533, + 0x1080, 0xe007, 0x781d, 0x781d, + 0x781d, 0x6058, 0x7704, 0xf003, + 0xa942, 0xa903, 0xf01e, 0xee05, + 0x1533, 0x108f, 0xf002, 0x8de0, + 0x78cf, 0x0f16, 0xffef, 0x796f, + 0xe804, 0x6f07, 0x791d, 0xf005, + 0x4f1f, 0x791d, 0x793d, 0x793d, + 0x2314, 0x0380, 0x793d, 0x2000, + 0x0f80, 0x8000, 0x0688, 0xa820, + 0x71c5, 0x0ecf, 0x9094, 0x7164, + 0x0bfb, 0x80b4, 0x70cd, 0xc6c6, + 0xc0f1, 0x09f2, 0x0040, 0xc0d1, 0x7fe0, 0x700c, 0x1600, 0x7101, - 0x8000, 0x0006, 0x782c, 0x2885, - 0x041f, 0x7fe0, 0x7104, 0x78e0, - 0xc2e4, 0x1cfc, 0xb6c8, 0x4318, - 0x08ae, 0xffaf, 0x4020, 0x41c3, - 0x8000, 0x0661, 0x8940, 0xd925, - 0xb99f, 0x7054, 0x21c0, 0x0ce2, - 0x8960, 0x704c, 0x708d, 0x71ad, - 0xf002, 0x7144, 0x0a4d, 0x0135, - 0x795b, 0x2144, 0x07ce, 0x4100, + 0x8000, 0x0006, 0x790c, 0x2985, + 0x041f, 0x7fe0, 0x6901, 0x78e0, + 0xc2e4, 0x4200, 0x082e, 0xffaf, + 0x4020, 0x41c3, 0x8000, 0x06cd, + 0x8920, 0xe907, 0x1600, 0x708d, + 0x8000, 0x0058, 0xf005, 0x1600, + 0x708d, 0x8000, 0x0025, 0x706c, + 0x708d, 0xf024, 0x2144, 0x07ce, + 0x4100, 0xee06, 0xbe61, 0x0e01, + 0x1031, 0x793d, 0xb9c1, 0x0933, + 0x00a1, 0x712c, 0x2344, 0x07ce, 0xee05, 0xbe61, 0x0eff, 0x9031, - 0x793d, 0xb9c1, 0x09e7, 0x86e1, - 0x41a1, 0x2244, 0x07ce, 0xee06, + 0x793b, 0x79ab, 0xd910, 0x2344, + 0x07ce, 0x24c0, 0x1062, 0xee06, 0xbe61, 0x0e01, 0x1031, 0x793b, - 0x796b, 0x6a24, 0x2144, 0x07ce, - 0x41a1, 0xee06, 0xbe61, 0x0e01, - 0x1031, 0x793b, 0x24c0, 0x1062, - 0x796b, 0x24c0, 0x1062, 0xf1db, - 0x4081, 0x1404, 0x341b, 0xc6c4, - 0x790d, 0x218c, 0x8bff, 0xf607, - 0x208c, 0x8802, 0xf451, 0x7fe0, - 0x730c, 0x0911, 0x0733, 0x208c, - 0x8c03, 0x720c, 0x7ce0, 0xf049, - 0xe1f7, 0x0058, 0x0029, 0xe1df, - 0x0058, 0x0029, 0xe1cf, 0x005c, - 0x0029, 0xe1c4, 0x005c, 0x0029, - 0xe0c5, 0x095f, 0x0f13, 0x0963, - 0x0d53, 0x0967, 0x0c13, 0xb87c, - 0x0867, 0x0475, 0x790f, 0x2025, - 0x0040, 0xf012, 0xf02d, 0xf02f, - 0xf02b, 0xf02f, 0xf029, 0xf02f, - 0xf027, 0xf027, 0xf02d, 0xf025, - 0xf023, 0xf02d, 0xf021, 0xf021, - 0xf01f, 0xf02b, 0x7fe0, 0xd83f, - 0xe0f8, 0xf41b, 0x7fe0, 0xd808, - 0xe0e0, 0xf417, 0x7fe0, 0xd807, - 0xe0d0, 0xf413, 0x7fe0, 0xd80a, - 0xf40f, 0x7fe0, 0xd80b, 0x081b, - 0x0f31, 0xd818, 0x7ee0, 0x0813, - 0x0d71, 0xd80f, 0x7ee0, 0x080b, - 0x0c31, 0xd81a, 0x7ee0, 0x7fe0, - 0x700c, 0x7fe0, 0xd83e, 0x7fe0, - 0xd83b, 0x7fe0, 0xd83a, 0x7fe0, - 0xd81f, 0x7fe0, 0xd838, 0x7fe0, - 0xd81b, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1b5, 0xc242, 0xc041, - 0x4328, 0x4020, 0x702c, 0x0ba6, - 0xff2f, 0x734c, 0x4318, 0x4061, - 0x742c, 0x0b9a, 0xff2f, 0xda08, - 0xc043, 0x1600, 0x7080, 0x8000, - 0x0663, 0x70ad, 0xc040, 0xf005, - 0xc000, 0x7104, 0xc040, 0xc000, - 0x7a0f, 0x40c3, 0x8000, 0x0663, - 0x8801, 0x0849, 0x00a4, 0xbae0, - 0xc103, 0x4063, 0x703c, 0xe872, - 0x7eaf, 0x60dd, 0x7faf, 0x1a10, - 0x3098, 0xc084, 0x722c, 0x42c1, - 0x0dc2, 0xff6f, 0x43e1, 0xc102, - 0x0982, 0x0120, 0xc084, 0x2454, - 0x39c0, 0x722c, 0x42c1, 0x0dba, - 0x0160, 0x43e1, 0xc001, 0x712c, - 0x0b2a, 0x0120, 0xc284, 0xf1d6, - 0xc0b5, 0x1404, 0x341b, 0xc6c6, + 0x79ab, 0x24c0, 0x1062, 0x7164, + 0x0bbd, 0x8134, 0x797b, 0x4081, + 0xc6c4, 0x78e0, 0x2042, 0x0701, + 0x0939, 0x0454, 0x0889, 0x0c30, + 0xd91a, 0x0885, 0x0d70, 0xd90f, + 0x087d, 0x0f30, 0xd918, 0xe0c5, + 0xf227, 0xe0d0, 0xf227, 0xe0e0, + 0xf227, 0xe0f8, 0xf227, 0x208c, + 0x8c03, 0x722c, 0xf230, 0x208c, + 0x8802, 0xf419, 0x732c, 0xf02c, + 0x2025, 0x0040, 0xf011, 0xf013, + 0xf01b, 0xf011, 0xf01b, 0xf00f, + 0xf01b, 0xf00d, 0xf00b, 0xf01b, + 0xf009, 0xf009, 0xf019, 0xf007, + 0xf005, 0xf005, 0xf017, 0xd93f, + 0xf016, 0x702c, 0xf014, 0xd90b, + 0xf012, 0xd90a, 0xf010, 0xd907, + 0xf00e, 0xd908, 0xf00c, 0xd93e, + 0xf00a, 0xd93b, 0xf008, 0xd93a, + 0xf006, 0xd91f, 0xf004, 0xd938, + 0xf002, 0xd91b, 0x7fe0, 0x4020, 0xc2e6, 0x1cfc, 0xb6c8, 0xc1b6, - 0xc044, 0xd8ff, 0x702c, 0x0b16, - 0xff2f, 0x734c, 0x4508, 0xd8ff, - 0x742c, 0x0b0a, 0xff2f, 0xda08, - 0x60ba, 0xca04, 0x794f, 0x43db, - 0x8000, 0x0663, 0x790a, 0x785b, - 0xc041, 0x1600, 0x70c0, 0x8000, - 0x001e, 0xc140, 0x702c, 0x7014, - 0x740c, 0xc042, 0xf647, 0x1301, - 0x3080, 0x7104, 0xc042, 0xc000, - 0xc041, 0x1300, 0x308f, 0xf005, - 0xc002, 0x780f, 0x671f, 0x1301, - 0x3080, 0x7fef, 0x087f, 0x03e4, - 0x7d2f, 0xc001, 0xc600, 0x7a0f, - 0xc000, 0x655b, 0x780f, 0x7310, - 0x26ca, 0x10cb, 0xea6e, 0x78cf, - 0xc043, 0xc303, 0xc085, 0x712c, - 0x42a1, 0x0d0a, 0xff6f, 0x1a10, - 0x33d8, 0xc085, 0x08c6, 0x0120, - 0x702c, 0xc303, 0x2454, 0x3a40, - 0x712c, 0x0cfe, 0x0160, 0x42a1, - 0xc004, 0x0e6a, 0xff6f, 0x702c, - 0x1453, 0x3081, 0xc435, 0xe10f, - 0x7a2f, 0xc185, 0x7954, 0x8940, - 0x8921, 0x4c52, 0x7982, 0x7b4f, - 0x7c2f, 0x7470, 0x22ca, 0x004b, - 0x8821, 0x7b4f, 0x7170, 0x0784, - 0xffee, 0x41c1, 0x41c1, 0xa841, - 0xf1bc, 0xc0b6, 0x1404, 0x341b, - 0xc6c6, 0x78e0, 0xc0e4, 0x70ad, - 0xf002, 0x71a5, 0x0d1f, 0x10b2, - 0x70cd, 0xf010, 0x255a, 0x1c83, - 0x607c, 0x265a, 0x1643, 0x71c5, - 0x639b, 0x633b, 0x8b80, 0x7c45, - 0xab80, 0x0eed, 0x9092, 0xf1ef, - 0xc4c4, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1bb, 0x0a2a, 0xffaf, - 0xc082, 0xc082, 0xd90d, 0x0fc2, + 0xc253, 0x4328, 0xc052, 0x4020, + 0x702c, 0x09f6, 0xff2f, 0x734c, + 0xc054, 0x4061, 0x742c, 0x09ea, + 0xff2f, 0xda08, 0x1600, 0x708e, + 0x8000, 0x06cf, 0x70ed, 0xc055, + 0xf03c, 0xc015, 0xc114, 0x20ca, + 0x0046, 0xe837, 0x7def, 0x60bf, + 0x1a12, 0x3098, 0x7bef, 0xc081, + 0x722c, 0x42a1, 0x0cea, 0xff6f, + 0x4378, 0x1212, 0x3081, 0xc213, + 0x09f2, 0x0120, 0xc081, 0xc812, + 0x1203, 0x3081, 0x082b, 0x0044, + 0x1202, 0x3081, 0x0923, 0x0004, + 0xc031, 0x1443, 0x30c3, 0x1442, + 0x3082, 0xc040, 0x41c3, 0x006c, + 0x0003, 0x0a02, 0xff2f, 0x740c, + 0x0daa, 0xff2f, 0xc081, 0x2454, + 0x3840, 0x722c, 0x42a1, 0x08f6, + 0x01a0, 0x4363, 0xc012, 0x712c, + 0x0df6, 0x0120, 0xc281, 0x71c5, + 0x40c3, 0x8000, 0x06cf, 0x8801, + 0x79cf, 0x0883, 0x8065, 0x222f, + 0x8042, 0xc0b6, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1b8, 0xc155, 0xc057, + 0xd8ff, 0x702c, 0x0932, 0xff2f, + 0x734c, 0x4508, 0xd8ff, 0x742c, + 0x0926, 0xff2f, 0xda08, 0x60ba, + 0xca06, 0x794f, 0x43db, 0x8000, + 0x06cf, 0x782a, 0xc052, 0x785b, + 0xc053, 0x1600, 0x70c0, 0x8000, + 0x001e, 0x7014, 0x740c, 0xc054, + 0xf647, 0x1301, 0x3080, 0x7104, + 0xc054, 0xc012, 0xc053, 0x1300, + 0x308f, 0x702c, 0xf058, 0xc013, + 0xc312, 0x7a0f, 0x62b8, 0x7e6f, + 0x70d1, 0x0a9f, 0x0030, 0x26ca, + 0x1005, 0x78fd, 0x5052, 0x78cf, + 0xc056, 0xc316, 0xc081, 0x712c, + 0x0bf6, 0xff6f, 0x42a1, 0x1212, + 0x3081, 0xc215, 0x08fe, 0x0120, + 0xc081, 0xc812, 0x1203, 0x3081, + 0x082b, 0x0044, 0x1202, 0x3081, + 0x0923, 0x0004, 0xc031, 0x1443, + 0x30c3, 0x1442, 0x3082, 0xc040, + 0x41c3, 0x0073, 0x0003, 0x090e, + 0xff2f, 0x740c, 0x0cb6, 0xff2f, + 0xc081, 0xc015, 0x0841, 0x00f0, + 0x2454, 0x3840, 0xc316, 0x712c, + 0x0ffa, 0x0160, 0x42a1, 0xc017, + 0x0dc6, 0xff6f, 0x702c, 0x1443, + 0x3081, 0xc481, 0xc331, 0xe10f, + 0x792f, 0x7c34, 0x8c21, 0x7962, + 0x7a2f, 0x8c20, 0x4b31, 0x792f, + 0x2109, 0x0081, 0x8841, 0x0909, + 0x0085, 0xa821, 0x41c1, 0xc014, + 0x780f, 0x671f, 0x1301, 0x3080, + 0x7fef, 0x084f, 0x83e5, 0x7d2f, + 0xc0b8, 0x1404, 0x341b, 0xc6c6, + 0x264a, 0x3000, 0xf010, 0x265a, + 0x3c83, 0x607c, 0x235a, 0x1643, + 0x7165, 0x639b, 0x633b, 0x8b80, + 0x7c45, 0xab80, 0x0beb, 0x9094, + 0x2640, 0x305e, 0x0ef9, 0xb0b4, + 0x706d, 0x7ee0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1bb, 0x09f6, 0xffaf, + 0xc082, 0xc082, 0xd90d, 0x0fc6, 0xffef, 0xdac8, 0x1600, 0x7080, - 0x8000, 0x001b, 0x47cb, 0x8000, - 0x0661, 0x086f, 0x001e, 0x8f0e, - 0xe833, 0x8faa, 0x8f0b, 0xdb4b, - 0xc565, 0x1c2d, 0x3002, 0x8f0c, - 0xbb9f, 0x42c3, 0x9000, 0x0000, - 0x1c46, 0x3002, 0x8f0d, 0x1c5f, - 0x3002, 0x1b01, 0x0352, 0x8f0b, - 0xab00, 0x8f2c, 0x209a, 0x0004, - 0x1b32, 0x0042, 0x8f2d, 0x7d05, - 0x1b33, 0x0042, 0x13b8, 0x8080, - 0x205f, 0x018c, 0x40c3, 0x0012, - 0x0310, 0x6419, 0x7204, 0x7945, - 0xb1a0, 0x6098, 0x7a05, 0x1333, - 0x0080, 0x1332, 0x0081, 0x209a, - 0x0004, 0x7825, 0xb200, 0xd8ff, - 0xc040, 0xc082, 0x712c, 0x42c3, - 0x0040, 0x5800, 0x08ea, 0x01a0, - 0x706c, 0x0b06, 0x0000, 0x09aa, - 0xff0f, 0x0b46, 0x0040, 0x702c, - 0x256f, 0x1cc3, 0x1e00, 0x7044, - 0x9004, 0x00dc, 0x8d00, 0xe89d, - 0xf002, 0x6821, 0x782f, 0x0833, - 0x00b5, 0xaf20, 0xe808, 0x1600, + 0x8000, 0x001b, 0x0897, 0x001e, + 0x40c3, 0x8000, 0x06cd, 0x880e, + 0x088b, 0x0030, 0xdb4b, 0x40c3, + 0x8000, 0x06cd, 0x888d, 0x40c3, + 0x8000, 0x06cd, 0x88ac, 0x40c3, + 0x8000, 0x06cd, 0x88cb, 0x40c3, + 0x8000, 0x06cd, 0x882a, 0xbb9f, + 0x1b01, 0x0052, 0x40c3, 0x8000, + 0x06cd, 0x88eb, 0x40c3, 0x8000, + 0x06cd, 0xabe0, 0x884c, 0x279a, + 0x1004, 0x1b32, 0x0082, 0xc165, + 0x1c2d, 0x3382, 0x1c46, 0x3342, + 0x1c5f, 0x3302, 0x13b8, 0x8080, + 0x7f25, 0x41c3, 0x8000, 0x06cd, + 0x205f, 0x018d, 0x46cb, 0x0012, + 0x0310, 0x892d, 0x44cb, 0x9000, + 0x0000, 0x65d8, 0x7885, 0xb0e0, + 0x1b33, 0x0042, 0x219a, 0x0004, + 0x6e02, 0x60b8, 0x7885, 0x7945, + 0xb020, 0xd8ff, 0xc040, 0xc082, + 0x712c, 0x42c3, 0x0040, 0x5800, + 0x0bda, 0x01a0, 0x706c, 0x0ad2, + 0x0000, 0x0f7a, 0xfecf, 0x0b82, + 0x0040, 0x700c, 0x1e00, 0x7004, + 0x9004, 0x00dc, 0x256f, 0x1cc3, + 0x8d00, 0xe8a4, 0x702c, 0x782f, + 0x1e00, 0x7042, 0x8000, 0x06cd, + 0x0839, 0x0095, 0xe812, 0x1600, 0x7080, 0x8000, 0x0058, 0x7014, - 0x710c, 0xf008, 0x1600, 0x7080, - 0x8000, 0x0025, 0x7014, 0x700c, - 0xf3ed, 0x6901, 0x0e76, 0xff2f, - 0x780f, 0x8f00, 0xf1e7, 0x0e6e, - 0xff2f, 0x730c, 0x700c, 0xaf01, - 0xaf00, 0x8d00, 0x7014, 0xf4ca, - 0xdb25, 0x41c3, 0x9004, 0x0030, - 0xbb9f, 0x11b4, 0x0502, 0x8b00, - 0x0a0b, 0x0171, 0xb100, 0x8b00, - 0xf005, 0x1600, 0x7080, 0x8000, - 0x0058, 0xb101, 0x8b00, 0x088b, - 0x0030, 0x4308, 0x2344, 0x1040, - 0xc041, 0x1600, 0x7080, 0x8000, - 0x0032, 0x8fa6, 0x2344, 0x109b, - 0x2044, 0x0200, 0x781d, 0x781d, - 0x781d, 0x204e, 0x008e, 0xf003, - 0x71a5, 0x8f07, 0x085b, 0x0362, - 0x2353, 0x1041, 0xc001, 0x209a, - 0x0004, 0x7905, 0x4063, 0x209a, - 0x0004, 0x7905, 0x40c1, 0x209a, - 0x0010, 0x2105, 0x0002, 0x251a, - 0x1f80, 0x0000, 0x2000, 0x2005, - 0x0f81, 0x9002, 0x0154, 0x9100, - 0x7845, 0xb100, 0xc001, 0xe808, - 0x79af, 0x7acf, 0x700c, 0x09ce, - 0xff6f, 0x716c, 0x0bb5, 0xb030, - 0x79af, 0x7acf, 0x710c, 0x09be, - 0xff6f, 0x716c, 0xf1d2, 0x1600, - 0x708b, 0x8000, 0x0058, 0x0b89, - 0x1030, 0x2344, 0x1040, 0xc041, - 0x1600, 0x7080, 0x8000, 0x0065, - 0x8fa8, 0x2344, 0x109b, 0x2044, - 0x0200, 0x781d, 0x781d, 0x781d, - 0x204e, 0x008e, 0xf002, 0x71a5, - 0x8f09, 0x085d, 0x0362, 0x2353, - 0x1041, 0xc001, 0x209a, 0x0004, - 0x7905, 0x4063, 0x209a, 0x0004, - 0x7905, 0x40c1, 0x209a, 0x0010, - 0x2105, 0x0002, 0x251a, 0x1f80, - 0x0000, 0x2000, 0x2005, 0x0f81, + 0x710c, 0xf20a, 0x6901, 0x0cf6, + 0xff2f, 0x780f, 0x1600, 0x7080, + 0x8000, 0x06cd, 0x6821, 0xf1e9, + 0x1600, 0x7080, 0x8000, 0x0025, + 0x7014, 0x700c, 0xf5f0, 0xf1f8, + 0x0cd2, 0xff2f, 0x730c, 0x41c3, + 0x8000, 0x06cd, 0x700c, 0xa901, + 0x1e00, 0x7002, 0x8000, 0x06cd, + 0x8d00, 0x7014, 0xf4b5, 0x41c3, + 0x9004, 0x0030, 0x11b4, 0x0502, + 0x1600, 0x7080, 0x8000, 0x0025, + 0xc041, 0x0a0d, 0x0171, 0xb100, + 0xc001, 0xf006, 0x1600, 0x7080, + 0x8000, 0x0058, 0xb101, 0xc001, + 0x086d, 0x0030, 0x716f, 0x1600, + 0x7080, 0x8000, 0x0032, 0xb8e3, + 0x40c3, 0x8000, 0x06cd, 0x88e6, + 0xc001, 0x23ca, 0x30a1, 0x2044, + 0x008e, 0x2044, 0x004d, 0xf01d, + 0xc101, 0x209a, 0x0004, 0xb9c1, + 0x7905, 0x40c1, 0xf849, 0x271a, + 0x1f80, 0x0000, 0x2000, 0xf84c, + 0xed07, 0x79ef, 0x700c, 0x4263, + 0x086e, 0xff6f, 0x716c, 0xee08, + 0x79ef, 0x710c, 0x4263, 0x0862, + 0xff6f, 0x716c, 0x71e5, 0x40c3, + 0x8000, 0x06cd, 0x8807, 0x08c5, + 0x83e5, 0x40a1, 0x1600, 0x708d, + 0x8000, 0x0058, 0x0d79, 0x1030, + 0x716f, 0x1600, 0x7080, 0x8000, + 0x0065, 0x2544, 0x108e, 0xb8e3, + 0x40c3, 0x8000, 0x06cd, 0x8808, + 0x23ca, 0x30a1, 0x2544, 0x104f, + 0xc041, 0xf022, 0x209a, 0x0004, + 0x2553, 0x1041, 0x7905, 0x40c1, + 0xf82c, 0xc001, 0x201a, 0x0f80, + 0x0000, 0x2000, 0xf82e, 0xef09, + 0xc001, 0x4263, 0x716c, 0x790f, + 0x0ff6, 0xff2f, 0x700c, 0xee09, + 0xc001, 0x4263, 0x716c, 0x790f, + 0x0fe6, 0xff2f, 0x710c, 0xc001, + 0x7104, 0xc041, 0x40c3, 0x8000, + 0x06cd, 0x8829, 0xc001, 0x09b9, + 0x8025, 0x40e1, 0x1600, 0x708e, + 0x8000, 0x0004, 0x0b72, 0xff4f, + 0x0b7a, 0xff6f, 0x4508, 0x4100, + 0x0a2e, 0x00e0, 0x40a1, 0x261a, + 0x1f8c, 0x0020, 0x0000, 0x43c3, + 0x9008, 0x01ea, 0x9300, 0x45cb, + 0x9012, 0x0402, 0x2405, 0x1342, + 0x2004, 0x0f81, 0x0000, 0x3f00, + 0xb220, 0x6d44, 0x7a85, 0x2196, + 0x0006, 0xb220, 0x9321, 0x6d02, + 0x7885, 0xb9c5, 0xb020, 0x40c3, + 0x9003, 0xe174, 0x702c, 0xb020, + 0x0c8a, 0xff2f, 0xb021, 0xc0bb, + 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0x209a, 0x0004, 0x7905, 0x4063, + 0x209a, 0x0010, 0x2105, 0x0002, + 0x7ee0, 0x78e0, 0x2005, 0x0f81, 0x9002, 0x0154, 0x9100, 0x7845, - 0xb100, 0xc001, 0xe807, 0x79af, - 0x7acf, 0x700c, 0x093e, 0xff6f, - 0x716c, 0x0bb7, 0xb030, 0x79af, - 0x7acf, 0x710c, 0x092e, 0xff6f, - 0x716c, 0xf1d3, 0x1600, 0x708e, - 0x8000, 0x0004, 0x0c2e, 0xff4f, - 0x0c36, 0xff6f, 0x4508, 0x4100, - 0x0a36, 0x00e0, 0x40a1, 0x43c3, - 0x9008, 0x01ea, 0x261a, 0x1f8c, - 0x0020, 0x0000, 0x9300, 0x45cb, - 0x9012, 0x0402, 0x2004, 0x0f81, - 0x0000, 0x3f00, 0x2405, 0x1342, - 0xb220, 0x6d44, 0x7a85, 0x71c3, - 0x0000, 0x0c00, 0x6d02, 0xb220, - 0x7c05, 0x9301, 0xb8c5, 0xb400, - 0x41c3, 0x9003, 0xe174, 0x700c, - 0xb100, 0x0e0e, 0xff2f, 0xb101, - 0xc0bb, 0x1404, 0x341b, 0xc6c6, - 0xc2e4, 0xc1a4, 0x4608, 0x700c, - 0x09a2, 0xfeef, 0xd9ff, 0x1200, - 0x3083, 0x702c, 0xd8ff, 0x754c, - 0xc143, 0xc142, 0xc141, 0x4528, - 0x0a96, 0xfeef, 0xc140, 0xbec1, - 0xd8ff, 0xd980, 0x764c, 0xdb20, - 0xc543, 0xc642, 0xc541, 0x0a82, - 0xfeef, 0x1c00, 0x3041, 0x700c, - 0x0a16, 0xfeef, 0x712c, 0xc0a4, - 0xc6c4, 0x78e0, 0xc2e4, 0xc1a4, - 0xd858, 0xb89f, 0x88a0, 0x10cd, - 0x808e, 0x700c, 0x094e, 0xfeef, - 0xd9ff, 0x2505, 0x1380, 0x702c, - 0xb8c1, 0xc143, 0xc042, 0xc141, - 0xc140, 0xd8ff, 0xd980, 0xda09, - 0x0a3e, 0xfeef, 0xdb20, 0x700c, - 0x09d6, 0xfeef, 0x712c, 0xc0a4, + 0xb100, 0x7ee0, 0xc2e4, 0xc1a4, + 0xd858, 0xb89f, 0x88c0, 0x10cd, + 0x808d, 0x700c, 0x0f92, 0xfeaf, + 0xd9ff, 0x2605, 0x1341, 0x700c, + 0xb9c1, 0xc043, 0xc142, 0xc041, + 0xc040, 0xd8ff, 0xd980, 0xda09, + 0x086a, 0xfeef, 0xdb20, 0x700c, + 0x0806, 0xfeef, 0x712c, 0xc0a4, 0xc6c4, 0x78e0, 0x21aa, 0x00c4, 0x0911, 0x003f, 0x722c, 0x21ab, 0x00c4, 0x712c, 0x21ab, 0x00c4, @@ -7105,10 +7189,10 @@ static u16 lpddr4x_train2d_imem[] = { 0x7ee0, 0x78e0, 0xc0f1, 0x0f5a, 0xffcf, 0x0f96, 0xffef, 0xd814, 0xc0d1, 0x7ee0, 0xc0f1, 0x0846, - 0x0000, 0x41c3, 0x8000, 0x0578, + 0x0000, 0x41c3, 0x8000, 0x05e4, 0x8920, 0x781b, 0xe915, 0x1600, 0x7081, 0x8000, 0x0001, 0x0923, - 0x00df, 0x41c3, 0x8000, 0x0660, + 0x00df, 0x41c3, 0x8000, 0x06cc, 0x8920, 0x0917, 0x0134, 0x216c, 0x0041, 0x2809, 0x8042, 0xf208, 0x6038, 0x7842, 0xf004, 0x0809, @@ -7119,1863 +7203,1770 @@ static u16 lpddr4x_train2d_imem[] = { 0x0d41, 0x2885, 0x0a0f, 0x201a, 0x00c2, 0x2a85, 0x041f, 0x793d, 0x6a01, 0x6941, 0x41c3, 0x8000, - 0x0578, 0x8920, 0xe90c, 0x14fb, + 0x05e4, 0x8920, 0xe90c, 0x14fb, 0x9081, 0x0915, 0x00df, 0x41c3, - 0x8000, 0x0660, 0x8920, 0x7034, + 0x8000, 0x06cc, 0x8920, 0x7034, 0x22da, 0x0042, 0x7210, 0x7fe0, 0x20ca, 0x008d, 0xc2e6, 0x1cfc, - 0xb6c8, 0x098a, 0xff6f, 0xc1a4, - 0x791d, 0x793d, 0x793d, 0x793d, - 0xb9c3, 0x43db, 0x8000, 0x0058, - 0x1a02, 0x3042, 0x1300, 0x3081, - 0xe90b, 0x781d, 0x1600, 0x7081, - 0x8000, 0x0057, 0x781d, 0x2044, - 0x0e00, 0x611d, 0xf005, 0x1600, - 0x708d, 0x8000, 0x0024, 0x094e, - 0xff4f, 0x79af, 0x7a3d, 0x7a5d, - 0x2553, 0x9083, 0x7a5d, 0x1a07, - 0x30c2, 0x7b3d, 0x22c0, 0x0062, - 0x7b7d, 0x2553, 0x907e, 0xb8c3, - 0x23c0, 0x0062, 0x1a03, 0x3002, - 0x1a04, 0x3082, 0x1a05, 0x30c2, - 0xc040, 0x41c3, 0x00e6, 0x0003, - 0x0db6, 0xfeef, 0xd80a, 0x276f, - 0x10c3, 0x8f00, 0x45e9, 0xb8e2, - 0x730c, 0x78c0, 0x1a08, 0x3002, - 0x150f, 0x148e, 0x6f05, 0x9520, - 0x8d82, 0x9000, 0x1300, 0x3083, - 0x1724, 0x1082, 0xc043, 0xc642, - 0xc441, 0xc140, 0x41c3, 0x00e8, - 0x0006, 0x0d7e, 0xfeef, 0xd80a, - 0x8d08, 0xe808, 0x41c3, 0x00ed, - 0x0000, 0x0d6e, 0xfeef, 0xd80a, - 0xde32, 0xbe9f, 0x1601, 0x1483, - 0x16d1, 0x9082, 0x1601, 0x148c, - 0x1602, 0x1481, 0x1601, 0x1480, - 0xc042, 0xc141, 0xd80a, 0x41c3, - 0x00ee, 0x0005, 0x0d42, 0xfeef, - 0xc440, 0x1601, 0x1483, 0x8e21, - 0x8e04, 0x16cc, 0x9082, 0x8e80, - 0xc042, 0xc141, 0x41c3, 0x00ee, - 0x0005, 0xb990, 0xd80a, 0x0d22, + 0xb6c8, 0x08f6, 0xff6f, 0xc1a4, + 0x781d, 0x781d, 0x781d, 0x791d, + 0xb9c3, 0x1a04, 0x3042, 0x43db, + 0x8000, 0x0058, 0x1300, 0x3080, + 0xe80b, 0x215f, 0x0100, 0x1600, + 0x7082, 0x8000, 0x0057, 0x2044, + 0x0e00, 0x621d, 0xf005, 0x1600, + 0x708d, 0x8000, 0x0024, 0x08ba, + 0xff4f, 0x79af, 0x7b3d, 0x7a3d, + 0x2553, 0x907e, 0x7b7d, 0x7a5d, + 0x23c0, 0x0062, 0x7a5d, 0x2553, + 0x9081, 0xb8c3, 0x22c0, 0x0062, + 0x1a05, 0x3002, 0x1a07, 0x30c2, + 0x1a09, 0x3042, 0x1a06, 0x3082, + 0xc040, 0x41c3, 0x00f0, 0x0003, + 0x0bba, 0xfeef, 0xd80a, 0x256f, + 0x10c3, 0x8d00, 0xb8e2, 0x700c, + 0x20ca, 0x00e1, 0x1a0a, 0x3002, + 0x1524, 0x1082, 0x150f, 0x1481, + 0x2542, 0x1280, 0x9000, 0x1300, + 0x3083, 0x95c0, 0x8d82, 0xc043, + 0xc142, 0xd80a, 0x41c3, 0x00f2, + 0x0006, 0xc441, 0x0b7e, 0xfeef, + 0xc640, 0x8d08, 0xe807, 0x41c3, + 0x00f7, 0x0000, 0x0b6e, 0xfeef, + 0xd80a, 0xde32, 0xbe9f, 0x1601, + 0x1483, 0x16d1, 0x9082, 0x1601, + 0x148c, 0x1602, 0x1481, 0x1601, + 0x1480, 0x47cb, 0x00f8, 0x0005, + 0xc141, 0xc042, 0xd80a, 0x41e1, + 0x0b42, 0xfeef, 0xc440, 0x1601, + 0x1483, 0x8e21, 0x8e04, 0x16cc, + 0x9082, 0x8e80, 0xc042, 0xc141, + 0x274f, 0x1401, 0xd80a, 0x0b26, 0xfeef, 0xc440, 0x8d08, 0x7014, - 0xf474, 0x1724, 0x1080, 0x0849, - 0x007e, 0xdd3e, 0xbd9f, 0x1501, - 0x1483, 0x16cc, 0x9082, 0x1501, - 0x148c, 0x47cb, 0x00f0, 0x0005, - 0x1502, 0x1481, 0x1501, 0x1480, - 0xc042, 0xc141, 0xd80a, 0x41e1, - 0x0ce6, 0xfeef, 0xc440, 0x1501, - 0x1483, 0xf843, 0xc042, 0xc141, - 0x274f, 0x1401, 0xd80a, 0x0cd2, + 0xf45b, 0x8d15, 0x082f, 0x007e, + 0xdd3e, 0xf842, 0x47cb, 0x00fa, + 0x0005, 0xc042, 0xc141, 0xd80a, + 0x41e1, 0x0b02, 0xfeef, 0xc440, + 0xf842, 0xc042, 0xc141, 0x274f, + 0x1401, 0xd80a, 0x0aee, 0xfeef, + 0xc440, 0x1300, 0x3080, 0x0835, + 0x003e, 0xdd65, 0xf835, 0x47cb, + 0x00fc, 0x0005, 0xc042, 0xc141, + 0xd80a, 0x41e1, 0x0ace, 0xfeef, + 0xc440, 0xf836, 0xc042, 0xc141, + 0x274f, 0x1401, 0xd80a, 0x0abe, 0xfeef, 0xc440, 0x1300, 0x3080, - 0x084b, 0x003e, 0xdd65, 0xbd9f, - 0x1501, 0x1483, 0x16cc, 0x9082, - 0x1501, 0x148c, 0x47cb, 0x00f2, - 0x0005, 0x1502, 0x1481, 0x1501, - 0x1480, 0xc042, 0xc141, 0xd80a, - 0x41e1, 0x0c9e, 0xfeef, 0xc440, - 0x1501, 0x1483, 0xf830, 0xc042, - 0xc141, 0x274f, 0x1401, 0xd80a, - 0x0c86, 0xfeef, 0xc440, 0x1300, - 0x3080, 0x0851, 0x007e, 0xdf71, - 0xbf9f, 0x1701, 0x1483, 0x16cc, - 0x9082, 0x1701, 0x148c, 0x45cb, - 0x00f4, 0x0005, 0x1702, 0x1481, - 0x1701, 0x1480, 0xc042, 0xc141, - 0xd80a, 0x41a1, 0x0c52, 0xfeef, - 0xc440, 0x1701, 0x1483, 0x8f21, - 0x8f04, 0x16cc, 0x9082, 0x8f80, - 0xc042, 0xc141, 0x254f, 0x1401, - 0xd80a, 0x0c36, 0xfeef, 0xc440, - 0x41c3, 0x9008, 0x01a0, 0x1902, - 0x0015, 0x208a, 0x0044, 0x1902, - 0x0014, 0x208a, 0x0144, 0x1902, - 0x0014, 0x208a, 0x01c4, 0x1904, - 0x0014, 0x208a, 0x0088, 0xb100, - 0x208a, 0x0288, 0xb101, 0x208a, - 0x02c8, 0xb102, 0x208a, 0x03cc, - 0x1e00, 0x7004, 0x9009, 0xe180, - 0xd934, 0x40c3, 0x9005, 0xe0ee, - 0xb020, 0x0d0e, 0xffef, 0x18fc, - 0x8105, 0xc0a4, 0x1404, 0x341b, - 0xc6c6, 0x78e0, 0x8d21, 0x8d04, + 0x084f, 0x007e, 0xdf71, 0xbf9f, + 0x1701, 0x1483, 0x1701, 0x148c, + 0x1702, 0x1481, 0x1701, 0x1480, + 0x16cc, 0x9082, 0x45cb, 0x00fe, + 0x0005, 0xc042, 0xc141, 0xd80a, + 0x41a1, 0x0a8a, 0xfeef, 0xc440, + 0x1701, 0x1483, 0x8f21, 0x8f04, + 0x16cc, 0x9082, 0x8f80, 0xc042, + 0xc141, 0x254f, 0x1401, 0xd80a, + 0x0a6a, 0xfeef, 0xc440, 0x40c3, + 0x9008, 0x01a0, 0x1802, 0x0015, + 0x218a, 0x0044, 0x1802, 0x0054, + 0x218a, 0x0144, 0x1802, 0x0054, + 0x218a, 0x01c4, 0x1804, 0x0054, + 0x218a, 0x0088, 0xb020, 0x218a, + 0x0288, 0xb021, 0x218a, 0x02c8, + 0xb022, 0x208a, 0x03cc, 0x1e00, + 0x7004, 0x9009, 0xe180, 0xd934, + 0x40c3, 0x9005, 0xe0ee, 0xb020, + 0x0d3e, 0xffef, 0x18fc, 0x8105, + 0xc0a4, 0x1404, 0x341b, 0xc6c6, + 0xbd9f, 0x1501, 0x1483, 0x1501, + 0x148c, 0x1502, 0x1481, 0x1501, + 0x1480, 0x16cc, 0x9082, 0x7ee0, + 0x1501, 0x1483, 0x8d21, 0x8d04, 0x16cc, 0x9082, 0x8d80, 0x7ee0, 0x621a, 0xf004, 0x1801, 0x0052, 0x08fd, 0x8084, 0x7ee0, 0x78e0, 0x2014, 0x0082, 0xf003, 0x1802, 0x0054, 0x08ff, 0x8084, 0x7ee0, 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a4, - 0x71cd, 0xbe8c, 0x40c3, 0x0000, - 0xbd61, 0x45cb, 0x9012, 0x0328, + 0xde1a, 0xbe8c, 0x40c3, 0x0000, + 0x9660, 0x45cb, 0x9012, 0x0328, 0x1e00, 0x7384, 0x8000, 0x0002, - 0xb500, 0xd846, 0xb501, 0x41c3, - 0x0147, 0x0000, 0x0b7a, 0xfeef, - 0xd80a, 0xd80a, 0x41c3, 0x014b, - 0x0001, 0x0b6e, 0xfeef, 0x42c1, - 0xcc30, 0x9520, 0x70ed, 0x7030, - 0x700c, 0xf406, 0xcc31, 0x9521, - 0x2038, 0x0040, 0x41c3, 0x014c, - 0x0000, 0x0b5e, 0xfe8f, 0x1600, - 0x708d, 0x8000, 0x0004, 0x43db, - 0x8000, 0x0578, 0x251a, 0x1f80, - 0x0020, 0x0000, 0xc041, 0x2005, - 0x0f81, 0x9004, 0x0032, 0x206f, - 0x0243, 0x9006, 0xc040, 0x9100, - 0xc042, 0x1600, 0x7080, 0x8000, - 0x0004, 0x201a, 0x0f80, 0x0020, - 0x0000, 0x2005, 0x0f80, 0x9002, - 0x0040, 0x9000, 0x1a00, 0x3103, - 0x0d42, 0x00a0, 0x1b00, 0x3043, - 0xc000, 0xb8e0, 0x0210, 0x0001, - 0xc001, 0x2005, 0x0f81, 0x9005, - 0xe032, 0xc002, 0xb880, 0xb100, - 0xc001, 0x2005, 0x0f80, 0x9003, - 0xe090, 0x0c46, 0x0020, 0x1800, - 0x0005, 0x266f, 0x1cc3, 0x8e00, - 0xe885, 0x1e00, 0x7085, 0x9004, - 0x00c0, 0x0c8e, 0xffcf, 0x0c4a, - 0xff4f, 0x8e00, 0x0899, 0x0011, - 0x1600, 0x7080, 0x8000, 0x0004, - 0x41c3, 0x9002, 0x0086, 0x251a, - 0x1f8d, 0x0010, 0x0000, 0x201a, - 0x0f80, 0x0020, 0x0000, 0x7825, + 0xb500, 0xd8cb, 0xb501, 0x41c3, + 0x0152, 0x0000, 0x0996, 0xfeef, + 0xd80a, 0xd80a, 0x41c3, 0x0156, + 0x0001, 0x098a, 0xfeef, 0x42c1, + 0xcc34, 0x9520, 0x2038, 0x0042, + 0xcc35, 0x9521, 0x2038, 0x0040, + 0x41c3, 0x0157, 0x0000, 0x09be, + 0xfeaf, 0x7844, 0x1e00, 0x7043, + 0x8000, 0x05e4, 0x206f, 0x0143, + 0x88c2, 0x1a00, 0x3103, 0x261a, + 0x1f80, 0x0020, 0x0000, 0x43d9, + 0xc040, 0x2005, 0x0f81, 0x9004, + 0x0032, 0x206f, 0x0143, 0x90a7, + 0x9100, 0xc042, 0xc000, 0x2005, + 0x0f80, 0x9002, 0x0040, 0x0d8a, + 0x00a0, 0x9000, 0xbde0, 0x0286, + 0x0001, 0xc000, 0x2005, 0x0f81, + 0x9005, 0xe032, 0xc002, 0xb880, + 0xb100, 0xc000, 0x2005, 0x0f80, + 0x9003, 0xe090, 0x0ce2, 0x0020, + 0x1800, 0x0005, 0x266f, 0x1cc3, + 0x8e00, 0xe886, 0x1e00, 0x7085, + 0x9004, 0x00c0, 0x0cba, 0xffcf, + 0x0c6a, 0xff4f, 0x1600, 0x7080, + 0x8000, 0x0001, 0x0841, 0x001e, + 0x43c3, 0x9004, 0x0014, 0x9320, + 0xca01, 0x080f, 0x01b0, 0x214f, + 0x0082, 0x080d, 0x00d1, 0x2185, + 0x0108, 0x4220, 0xb340, 0xd823, + 0x42c3, 0x9005, 0xe034, 0xb200, + 0x702c, 0x40c3, 0x9003, 0xe034, + 0xb020, 0xb8b1, 0xb020, 0x0ce2, + 0x0020, 0xb221, 0x8e00, 0x089d, + 0x0031, 0x206f, 0x0143, 0x8802, + 0x41c3, 0x9002, 0x0086, 0x1600, + 0x7082, 0x8000, 0x0000, 0x201a, + 0x0f80, 0x0020, 0x0000, 0x231a, + 0x3f9b, 0x0010, 0x0000, 0x7825, 0x9000, 0x781d, 0x781d, 0x781d, - 0x781d, 0xb8c2, 0x080d, 0x0091, - 0xce0d, 0x5032, 0xce07, 0xf020, - 0x1600, 0x7082, 0x8000, 0x0000, - 0x081d, 0x00f1, 0xbae7, 0x42c3, - 0x8000, 0x043c, 0xce0e, 0x705c, - 0x5032, 0x42c3, 0x8000, 0x0424, - 0xce08, 0xf00d, 0xbae7, 0x42c3, - 0x8000, 0x0430, 0xce0b, 0x705c, - 0x5032, 0x42c3, 0x8000, 0x0418, - 0xce05, 0x705c, 0x5014, 0xc001, - 0x7825, 0x0d1e, 0x00a0, 0x90c0, - 0x0caa, 0x00a0, 0x40a1, 0xc001, - 0xbea0, 0x2005, 0x0f81, 0x9003, - 0xfe86, 0xb1c0, 0x0baa, 0x0000, - 0x0bee, 0xff8f, 0x0a5e, 0xfecf, - 0x0d86, 0xff4f, 0x70cd, 0x70ad, - 0xf002, 0x71a5, 0xca04, 0x0d63, - 0x1023, 0x706c, 0x255a, 0x149f, - 0x2300, 0x37cc, 0xb4c1, 0xf003, - 0x7164, 0x0beb, 0x8213, 0x251a, - 0x1f80, 0x0000, 0x1000, 0x7865, - 0x781b, 0x2005, 0x0f80, 0x9002, - 0x0140, 0x9000, 0x7a0f, 0x0a13, - 0x00e1, 0xbac4, 0x8c02, 0x200f, - 0x00c0, 0xac02, 0xf1ea, 0x8c03, - 0x6821, 0xac23, 0x792f, 0x710c, - 0x2714, 0x3041, 0xea05, 0xba61, - 0x0aff, 0x8031, 0x781b, 0x7163, - 0xa902, 0xa963, 0xf1da, 0x1600, - 0x708b, 0x8000, 0x0025, 0x47cb, - 0x8000, 0x0667, 0x0b41, 0x1010, - 0x1600, 0x7080, 0x8000, 0x0032, - 0x8fa0, 0x42eb, 0xf8b8, 0x4759, - 0xf002, 0x71a5, 0x8f01, 0x0829, - 0x0342, 0x0b13, 0x103e, 0x79af, - 0x7acf, 0x700c, 0x0a56, 0xff2f, - 0x706c, 0x0beb, 0x907e, 0x79af, - 0x7acf, 0x710c, 0x0a46, 0xff2f, - 0x706c, 0xf1ed, 0x1600, 0x708b, - 0x8000, 0x0058, 0x0b41, 0x1010, - 0x1600, 0x7080, 0x8000, 0x0065, - 0x8fa2, 0x42eb, 0xf8a6, 0x4759, - 0xf002, 0x71a5, 0x8f03, 0x0829, - 0x0342, 0x0b13, 0x103e, 0x79af, - 0x7acf, 0x700c, 0x0a0e, 0xff2f, - 0x706c, 0x0beb, 0x907e, 0x79af, - 0x7acf, 0x710c, 0x09fe, 0xff2f, - 0x706c, 0xf1ed, 0xc000, 0x0811, - 0x033e, 0x710c, 0x0ab6, 0x0020, - 0xd80d, 0x700c, 0x0a5a, 0x0140, - 0x0e96, 0xfe8f, 0x70ed, 0x700c, - 0x0aa2, 0x0020, 0x1b00, 0x33c2, - 0xf002, 0x71e5, 0xca04, 0x0f2f, - 0x1022, 0x70cd, 0xf018, 0x2000, - 0x0f82, 0x8000, 0x1c50, 0x271a, - 0x1f80, 0x0000, 0x1000, 0x78c5, - 0x781b, 0x2005, 0x0f80, 0x9002, - 0x0140, 0x9020, 0xaa20, 0xb0c0, - 0x71c5, 0x0edf, 0x9232, 0x2616, - 0x13c0, 0xf1e5, 0xc000, 0x45cb, - 0x8000, 0x0661, 0xb8e6, 0x700c, - 0xf403, 0xf02b, 0x7104, 0xad00, - 0x780f, 0x0849, 0x0095, 0xe809, + 0x781d, 0xb8c2, 0x0823, 0x0090, + 0x0827, 0x00f1, 0x228b, 0x803e, + 0x42c3, 0x8000, 0x042c, 0xce0a, + 0x705c, 0x5016, 0x42c3, 0x8000, + 0x0444, 0xce10, 0xf011, 0xce09, + 0x5016, 0xce0f, 0xf00e, 0x228b, + 0x803e, 0x42c3, 0x8000, 0x0420, + 0xce07, 0x705c, 0x5016, 0x42c3, + 0x8000, 0x0438, 0xce0d, 0x705c, + 0x5034, 0xc000, 0x7825, 0x0d1e, + 0x00a0, 0x90c0, 0x0caa, 0x00a0, + 0x4063, 0xc000, 0xbea0, 0x2005, + 0x0f81, 0x9003, 0xfe86, 0xb1c0, + 0x0bfe, 0x0000, 0x0bca, 0xff8f, + 0x083e, 0xfecf, 0x0d66, 0xff4f, + 0xca06, 0x704c, 0x708d, 0xf038, + 0x245a, 0x149e, 0x2600, 0x3f83, + 0x8000, 0x05e4, 0xb341, 0xf02d, + 0x241a, 0x1f81, 0x0000, 0x1000, + 0x79c5, 0x793b, 0x2105, 0x0f81, + 0x9002, 0x0140, 0x9120, 0x7f2f, + 0x0e11, 0x13e1, 0x716d, 0x8b22, + 0x210f, 0x0381, 0xab22, 0xf018, + 0x8b23, 0x2744, 0x17db, 0x7124, + 0xab23, 0x792f, 0x2614, 0x3041, + 0x2100, 0x0f81, 0x8000, 0x05e4, + 0x4769, 0xa9c3, 0x0b0f, 0x3010, + 0x2342, 0x305b, 0x0bfd, 0xb031, + 0x7ffb, 0xa9e2, 0x71c5, 0x0ead, + 0x9214, 0x7185, 0x0c95, 0x9024, + 0x70cd, 0x1600, 0x708f, 0x8000, + 0x0025, 0xef28, 0x1600, 0x7080, + 0x8000, 0x0032, 0x1600, 0x709b, + 0x8000, 0x06d3, 0x71cd, 0xb8e3, + 0x26ca, 0x10a1, 0xf014, 0x0f15, + 0x103e, 0x212f, 0x06c7, 0x700c, + 0x42c1, 0x08fe, 0xff2f, 0x706c, + 0x0f13, 0x107e, 0x212f, 0x06c7, + 0x710c, 0x42c1, 0x08ea, 0xff2f, + 0x706c, 0x7167, 0x40c3, 0x8000, + 0x06d3, 0x8801, 0x08d3, 0x86c5, + 0x1600, 0x708f, 0x8000, 0x0058, + 0xef28, 0x1600, 0x7080, 0x8000, + 0x0065, 0x71cd, 0xb8e3, 0x40c3, + 0x8000, 0x06d3, 0x1002, 0x009b, + 0x26ca, 0x10a1, 0xf014, 0x0f15, + 0x103e, 0x212f, 0x06c7, 0x700c, + 0x42c1, 0x08a6, 0xff2f, 0x706c, + 0x0f13, 0x107e, 0x212f, 0x06c7, + 0x710c, 0x42c1, 0x0892, 0xff2f, + 0x706c, 0x7167, 0x40c3, 0x8000, + 0x06d3, 0x8803, 0x08d3, 0x86c5, + 0x0d09, 0x133f, 0x710c, 0xf008, + 0x0afe, 0x0000, 0x0ade, 0x0020, + 0xd80d, 0x700c, 0x0d12, 0x0140, + 0x0c56, 0xfe8f, 0x1e00, 0x7003, + 0x8000, 0x05e4, 0x0ac6, 0x0020, + 0x700c, 0xca06, 0x70ed, 0x704c, + 0xf019, 0x702c, 0xf014, 0x221a, + 0x0f83, 0x0000, 0x1000, 0x2300, + 0x1f8c, 0x8000, 0x1b9c, 0x7165, + 0x7b25, 0x7b7b, 0x2305, 0x0f83, + 0x9002, 0x0140, 0x93c0, 0xacc0, + 0xb320, 0x7124, 0x09db, 0x8214, + 0x7144, 0x0ad3, 0x8024, 0x2716, + 0x108b, 0x0d63, 0x11be, 0x700c, + 0x46cb, 0x8000, 0x06cd, 0xae00, + 0x780f, 0x084d, 0x0095, 0xe81c, 0x1600, 0x7080, 0x8000, 0x0058, - 0x7014, 0x710c, 0xf007, 0x1600, - 0x7080, 0x8000, 0x0025, 0x7014, - 0x700c, 0xf3ee, 0x0842, 0xfecf, - 0x8d00, 0x7104, 0x0d4e, 0xfeef, - 0x780f, 0x0e06, 0xfe8f, 0x09e2, - 0x00e0, 0x1d01, 0x1003, 0x0a0e, - 0x0020, 0x760c, 0x8d00, 0xf1dc, - 0x0d32, 0xfeef, 0x730c, 0xc000, - 0xb8e5, 0xf29e, 0x47cb, 0x008a, - 0x0000, 0xf003, 0x68c1, 0x78cf, - 0x7114, 0x0126, 0x002d, 0xadc0, - 0xe808, 0x1600, 0x7080, 0x8000, - 0x0058, 0x7014, 0x710c, 0xf008, - 0x1600, 0x7080, 0x8000, 0x0025, - 0x7014, 0x700c, 0xf3ec, 0x0fe2, - 0xfe8f, 0x8d00, 0x7104, 0x0cee, - 0xfeef, 0x780f, 0x0da2, 0xfe8f, - 0x700c, 0xc040, 0xad01, 0x0c52, - 0xff2f, 0x710c, 0x8d01, 0x205f, - 0x0641, 0x8d00, 0x205f, 0x0c80, - 0x6038, 0x60b8, 0x88d6, 0x1600, - 0x7080, 0x8000, 0x0004, 0xc043, - 0x0baa, 0xff2f, 0x710c, 0x712c, - 0x0822, 0x0060, 0x4318, 0x2684, - 0x1001, 0xd8ff, 0x78d5, 0x7e10, - 0x4063, 0x41c1, 0x714c, 0x0d5a, - 0x0020, 0x706c, 0x1600, 0x7080, - 0x8000, 0x001d, 0x085b, 0x001e, - 0xc003, 0x201a, 0x0f80, 0x0020, - 0x0000, 0x2005, 0x0f81, 0x9003, - 0xfe86, 0x2005, 0x0f80, 0x9002, - 0x0086, 0x9000, 0x2046, 0x0340, - 0xb882, 0xb100, 0x740c, 0x0f82, - 0xfeaf, 0x41e1, 0x4063, 0x41c1, - 0x704c, 0x0d16, 0x0020, 0xdb40, - 0x274f, 0x1401, 0x0f6a, 0xfeaf, - 0x740c, 0x4063, 0x41c1, 0x704c, - 0x0cfe, 0x0020, 0xdb30, 0xf005, - 0xc000, 0x7104, 0xc040, 0xc000, - 0x0837, 0x0133, 0x750c, 0x1600, - 0x7081, 0x8000, 0x001f, 0xc000, - 0x09e9, 0x800e, 0xc000, 0x201a, - 0x0f80, 0x0020, 0x0000, 0x2005, - 0x0f81, 0x9003, 0xfe86, 0x2005, - 0x0f80, 0x9002, 0x0086, 0x9000, - 0xb8a0, 0xb100, 0xf1e2, 0x08ce, - 0x0000, 0x8d00, 0xf16c, 0x0bf6, - 0xfeef, 0x730c, 0x0c8a, 0xff8f, - 0x70ad, 0x706c, 0xf002, 0x7164, - 0xca04, 0x0b2f, 0x0022, 0x702c, - 0xf016, 0x231a, 0x0f80, 0x0000, - 0x1000, 0x7825, 0x781b, 0x2005, - 0x0f82, 0x9002, 0x0140, 0x2116, - 0x00c0, 0x2032, 0x0f80, 0x8000, - 0x1c50, 0x7124, 0xb200, 0x09df, - 0x8212, 0xf1e7, 0xc001, 0x2005, - 0x0f81, 0x9005, 0xe032, 0xc002, - 0xb100, 0x0b8e, 0xfeef, 0x700c, - 0x0896, 0x0080, 0x0eaa, 0x00a0, - 0xd807, 0x25ab, 0x10c4, 0x7fff, - 0xf1ff, 0x78e0, 0x2044, 0x0200, - 0x781d, 0x781d, 0x781d, 0x204e, - 0x008e, 0x7ee0, 0xc2e2, 0x45cb, + 0x7014, 0x710c, 0xf212, 0x0e0e, + 0xfe8f, 0x8e00, 0x7104, 0x0bbe, + 0xfeef, 0x780f, 0x0bd2, 0xfe8f, + 0x0c8e, 0x00e0, 0x1e01, 0x1003, + 0x0a42, 0x0020, 0x760c, 0x8e00, + 0x7104, 0xf1e3, 0x1600, 0x7080, + 0x8000, 0x0025, 0x7014, 0x700c, + 0xf5e7, 0xf1f8, 0x0b8e, 0xfeef, + 0x730c, 0xbde5, 0xf4ae, 0xf0b9, + 0xe809, 0x1600, 0x7080, 0x8000, + 0x0058, 0x7014, 0x710c, 0xf40a, + 0xf0a3, 0x1600, 0x7080, 0x8000, + 0x0025, 0x7014, 0x700c, 0xf29d, + 0x0daa, 0xfe8f, 0x1600, 0x7080, + 0x8000, 0x06cd, 0x7104, 0x0b56, + 0xfeef, 0x780f, 0x0b6a, 0xfe8f, + 0x700c, 0xc041, 0x40c3, 0x8000, + 0x06cd, 0x1801, 0x0003, 0x0b72, + 0xff2f, 0x710c, 0x40c3, 0x8000, + 0x06cd, 0x8801, 0x1600, 0x7081, + 0x8000, 0x06cd, 0x205f, 0x0640, + 0x215f, 0x0c81, 0x6038, 0x2000, + 0x0f80, 0x8000, 0x0580, 0x88a3, + 0x206f, 0x0143, 0x88c2, 0x261a, + 0x1f8e, 0x0020, 0x0000, 0x2605, + 0x1f8f, 0x9002, 0x0086, 0x0ab2, + 0xff2f, 0x710c, 0x97e0, 0x712c, + 0x0ffa, 0x0020, 0x4318, 0x2605, + 0x1f8e, 0x9003, 0xfe86, 0x2746, + 0x1340, 0xc043, 0xb600, 0xbde6, + 0xd8ff, 0x258a, 0x1fc7, 0x25ca, + 0x1001, 0x4063, 0x41a1, 0x714c, + 0x0d4e, 0x0020, 0x706c, 0x1600, + 0x7080, 0x8000, 0x001d, 0x087f, + 0x001e, 0xc003, 0x41c3, 0x0093, + 0x0000, 0xb882, 0xb600, 0x0d26, + 0xfeaf, 0x740c, 0x4063, 0x41a1, + 0x704c, 0x0d26, 0x0020, 0xdb40, + 0x41c3, 0x0094, 0x0000, 0x0d0e, + 0xfeaf, 0x740c, 0x4063, 0x41a1, + 0x704c, 0x0d0e, 0x0020, 0xdb30, + 0x4063, 0x41a1, 0x724c, 0x0d02, + 0x0020, 0x706c, 0xf01b, 0x1600, + 0x7081, 0x8000, 0x001f, 0xc001, + 0x0925, 0x000e, 0xc001, 0x201a, + 0x0f81, 0x0020, 0x0000, 0x2105, + 0x0f80, 0x9003, 0xfe86, 0x2105, + 0x0f81, 0x9002, 0x0086, 0x9120, + 0xb9a0, 0xb020, 0xc001, 0x7104, + 0xc041, 0xc001, 0x08cb, 0x8134, + 0x750c, 0x08d2, 0x0000, 0x1600, + 0x7080, 0x8000, 0x06cd, 0x68e1, + 0x78ef, 0x7114, 0x1e00, 0x73c2, + 0x8000, 0x06cd, 0x069c, 0xffce, + 0x0a1a, 0xfeef, 0x730c, 0x0c1a, + 0xff8f, 0xca06, 0x706f, 0x704c, + 0xf017, 0x706c, 0xf012, 0x221a, + 0x0f81, 0x0000, 0x1000, 0x7965, + 0x793b, 0x2105, 0x0f8c, 0x9002, + 0x0140, 0x2332, 0x1f81, 0x8000, + 0x1b9c, 0x7164, 0x7165, 0xb420, + 0x0bdf, 0x8214, 0x7144, 0x0ad7, + 0x8024, 0x2316, 0x308b, 0xc000, + 0x2005, 0x0f81, 0x9005, 0xe032, + 0xc002, 0xb100, 0x09b2, 0xfeef, + 0x700c, 0x0836, 0x0080, 0x08c6, + 0x00e0, 0xd807, 0x23ab, 0x30c4, + 0x7fff, 0xf000, 0xc2e2, 0x45cb, 0x9004, 0x0004, 0x9500, 0xb8a1, - 0xb500, 0x0f46, 0xffaf, 0x740c, - 0xd81f, 0x0f3e, 0xffaf, 0x1df6, - 0x105d, 0xc6c2, 0xc2e2, 0x0f32, + 0xb500, 0x0ed6, 0xffaf, 0x740c, + 0xd81f, 0x0ece, 0xffaf, 0x1df6, + 0x105d, 0xc6c2, 0xc2e2, 0x0ec2, 0xffaf, 0xd81f, 0x45cb, 0x9004, - 0x01f0, 0x740c, 0x0f22, 0xffaf, + 0x01f0, 0x740c, 0x0eb2, 0xffaf, 0x1d00, 0x1005, 0x150a, 0x9700, 0xb881, 0x1d0a, 0x901c, 0xc6c2, 0xc0f1, 0x1600, 0x7081, 0x8000, - 0x0012, 0x218c, 0x8203, 0x0e38, - 0x008e, 0x080e, 0x0000, 0xc0d1, + 0x0012, 0x218c, 0x8203, 0x0864, + 0x00ce, 0x080e, 0x0000, 0xc0d1, 0x7ee0, 0x78e0, 0xc2e2, 0x1600, 0x7080, 0x8000, 0x0001, 0x081b, 0x003e, 0xd80a, 0x45cb, 0x9005, - 0xe036, 0x0ede, 0xffaf, 0x1d00, + 0xe036, 0x0e6e, 0xffaf, 0x1d00, 0x1045, 0x1d00, 0x1005, 0xc6c2, 0x40c3, 0x9004, 0x00f2, 0x1800, 0x0045, 0x7fe0, 0x1800, 0x0005, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a8, - 0x4608, 0x40c3, 0x8000, 0x0665, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a7, + 0xc044, 0x40c3, 0x8000, 0x06d1, 0x8800, 0xc146, 0x702c, 0x201a, 0x0f80, 0x0000, 0x2000, 0x2005, - 0x0f80, 0x9002, 0x0040, 0x9000, - 0xc044, 0x0fb2, 0xfe6f, 0x700c, - 0x1200, 0x3083, 0xd981, 0x70ad, - 0xd8ff, 0xb98e, 0x754c, 0xc543, - 0xc542, 0xc541, 0x08a2, 0xfeaf, - 0xc540, 0x700c, 0x083a, 0xfeaf, - 0x712c, 0x09aa, 0xff0f, 0x09b2, - 0xff2f, 0xc047, 0x40c3, 0x0000, - 0x2710, 0x0ed6, 0xffaf, 0xd908, - 0x40c3, 0x900f, 0xe0c0, 0x1800, - 0x0205, 0xb0a5, 0xc004, 0x790f, - 0x0f62, 0xfe6f, 0x700c, 0x702c, - 0x716f, 0xd8ff, 0xb98e, 0x754c, - 0x746c, 0xc543, 0xc542, 0x1c04, - 0x36c0, 0x0856, 0xfeaf, 0xc540, - 0x7fcf, 0xf8bb, 0xc543, 0xc742, - 0xc541, 0x0846, 0xfeaf, 0xc540, - 0xbec1, 0xc645, 0xc005, 0xc543, - 0x702c, 0xc042, 0xd8ff, 0x764c, - 0x746c, 0x1c04, 0x36c0, 0x082a, - 0xfeaf, 0xc540, 0xc005, 0xc543, - 0x702c, 0xc042, 0x750c, 0xc041, - 0xd8ff, 0x764c, 0xdb28, 0x0812, - 0xfeaf, 0xc540, 0x0a5a, 0xfeaf, - 0xd8ff, 0xc104, 0xc007, 0x46cb, - 0x0000, 0x3100, 0x2014, 0x0041, - 0x2142, 0x0400, 0xb8c0, 0x6038, - 0xe030, 0xc044, 0xc006, 0xe808, - 0x1c0c, 0x3441, 0xc541, 0xc540, - 0xc742, 0xf06b, 0xf8a1, 0xc543, - 0xc541, 0xc540, 0x0fd2, 0xfe6f, - 0xc742, 0xf89b, 0xc543, 0xc742, - 0x1c04, 0x36c0, 0x0fc2, 0xfe6f, - 0xc540, 0xf894, 0x1c0c, 0x36c0, - 0xc742, 0xc541, 0x0fb2, 0xfe6f, - 0xc540, 0xf896, 0xc543, 0xc742, - 0xc541, 0x0fa6, 0xfe6f, 0xc540, - 0x750c, 0xc543, 0xc742, 0xc041, - 0xf88d, 0x0f96, 0xfe6f, 0xc540, - 0xf888, 0x1c0c, 0x36c0, 0xc742, - 0xc541, 0x0f86, 0xfe6f, 0xc540, - 0x750c, 0xc043, 0xf883, 0xc742, - 0xc541, 0x0f76, 0xfe6f, 0xc540, - 0xf886, 0xc543, 0xc742, 0xc541, - 0x0f66, 0xfe6f, 0xc540, 0xc005, - 0xc543, 0x702c, 0xc042, 0xd8ff, - 0x764c, 0x746c, 0x1c04, 0x36c0, - 0x0f4e, 0xfe6f, 0xc540, 0xf877, - 0x1c0c, 0x36c0, 0xc742, 0xc541, - 0x0f3e, 0xfe6f, 0xc540, 0xf879, - 0xc543, 0xc742, 0xc541, 0x0f32, - 0xfe6f, 0xc540, 0xc005, 0xc543, - 0x702c, 0xc042, 0x750c, 0xc041, - 0xd8ff, 0x764c, 0x746c, 0x0f1a, - 0xfe6f, 0xc540, 0x1c0c, 0x36c0, - 0xc742, 0xc541, 0xc540, 0xd8ff, - 0x41c1, 0x724c, 0x0f02, 0xfe6f, - 0xdb08, 0xc004, 0x702c, 0x754c, - 0x780f, 0xc044, 0xc304, 0xd8ff, - 0xc543, 0xc542, 0xc541, 0x0eea, - 0xfe6f, 0xc540, 0xc006, 0x46cb, - 0x0000, 0x2100, 0x7014, 0xf26c, - 0xf857, 0xc543, 0xc541, 0xc540, - 0x0ece, 0xfe6f, 0xc742, 0xf85a, - 0xc543, 0xc742, 0x1c04, 0x36c0, - 0x0ebe, 0xfe6f, 0xc540, 0xf84d, - 0x1c0c, 0x36c0, 0xc742, 0xc541, - 0x0eae, 0xfe6f, 0xc540, 0xf84c, - 0xc543, 0xc742, 0xc541, 0x0ea2, - 0xfe6f, 0xc540, 0x750c, 0xc543, - 0xc742, 0xc041, 0xf84c, 0x0e92, - 0xfe6f, 0xc540, 0xf841, 0x1c0c, - 0x36c0, 0xc742, 0xc541, 0x0e82, - 0xfe6f, 0xc540, 0x750c, 0xc043, - 0xf83c, 0xc742, 0xc541, 0x0e72, - 0xfe6f, 0xc540, 0xf83c, 0xc543, - 0xc742, 0xc541, 0x0e62, 0xfe6f, - 0xc540, 0xc005, 0xc543, 0x702c, - 0xc042, 0xd8ff, 0x764c, 0x746c, - 0x1c04, 0x36c0, 0x0e4a, 0xfe6f, - 0xc540, 0xf830, 0x1c0c, 0x36c0, - 0xc742, 0xc541, 0x0e3a, 0xfe6f, - 0xc540, 0xf82f, 0xc543, 0xc742, - 0xc541, 0x0e2e, 0xfe6f, 0xc540, - 0xc005, 0xc543, 0x702c, 0xc042, - 0x750c, 0xc041, 0xd8ff, 0x764c, - 0x746c, 0x0e16, 0xfe6f, 0xc540, - 0x1c0c, 0x36c0, 0xc742, 0xc541, - 0xc540, 0xf007, 0x1c0c, 0x3441, - 0xc541, 0xc540, 0xc742, 0xd8ff, - 0x41c1, 0x714c, 0x0df2, 0xfe6f, - 0xdb08, 0xc304, 0xd8ff, 0x702c, - 0x754c, 0xc543, 0xc542, 0xc541, - 0x0dde, 0xfe6f, 0xc540, 0x702c, - 0xd8ff, 0xb98f, 0x754c, 0x746c, - 0x1c0c, 0x30c1, 0xc542, 0xc541, - 0x0dc6, 0xfe6f, 0xc540, 0xd8ff, - 0x702c, 0x754c, 0xdb30, 0xc543, - 0xc542, 0xc541, 0x0db2, 0xfe6f, - 0xc540, 0x0fe2, 0xfe6f, 0xd8ff, - 0xc005, 0xc543, 0xd980, 0xc042, - 0xd8ff, 0x764c, 0x746c, 0xc541, - 0x0d96, 0xfe6f, 0x1c00, 0x36c0, - 0xc0a8, 0x1404, 0x341b, 0xc6c6, - 0xd8ff, 0x41c1, 0x714c, 0xdb08, + 0x0f80, 0x9002, 0x0040, 0x90c0, + 0x0d86, 0xfe6f, 0x700c, 0x1200, + 0x3083, 0xd981, 0x70ad, 0xd8ff, + 0xb98e, 0x754c, 0xc543, 0xc542, + 0xc541, 0x0e62, 0xfe6f, 0xc540, + 0x700c, 0x0dfe, 0xfe6f, 0x712c, + 0x08a6, 0xff0f, 0x08ae, 0xff2f, + 0xc045, 0x40c3, 0x0000, 0x2710, + 0x0e66, 0xffaf, 0xd908, 0x40c3, + 0x900f, 0xe0c0, 0x1800, 0x0205, + 0xb0a5, 0x79cf, 0x0d3a, 0xfe6f, + 0x700c, 0x702c, 0x716f, 0xd8ff, + 0xb98e, 0x754c, 0x746c, 0xc543, + 0xc542, 0x1c04, 0x36c0, 0x0e16, + 0xfe6f, 0xc540, 0xd8ff, 0x702c, + 0x754c, 0x746c, 0xc543, 0xc542, + 0xc541, 0x0e02, 0xfe6f, 0xc540, + 0x0846, 0xfeaf, 0xd8ff, 0xc004, + 0x702c, 0x704c, 0x7f0f, 0xd8ff, + 0x746c, 0xc543, 0xc742, 0xc541, + 0x0de2, 0xfe6f, 0xc540, 0xd8ff, + 0x702c, 0x754c, 0xdb28, 0xc543, + 0xc542, 0xc541, 0x0dce, 0xfe6f, + 0xc540, 0xc105, 0x79d4, 0x4968, + 0xb8c0, 0x6038, 0xe030, 0xc045, + 0xc004, 0x46cb, 0x0000, 0x3100, + 0xb8c1, 0xc044, 0xc006, 0xe805, + 0x1c0c, 0x3441, 0xf067, 0xf8a5, + 0xc543, 0xc742, 0xc541, 0x0d9e, + 0xfe6f, 0xc540, 0xf89e, 0xc543, + 0xc742, 0x1c04, 0x36c0, 0x0d8e, + 0xfe6f, 0xc540, 0xf897, 0x1c0c, + 0x36c0, 0xc742, 0xc541, 0x0d7e, + 0xfe6f, 0xc540, 0xf899, 0xc543, + 0xc742, 0xc541, 0x0d6e, 0xfe6f, + 0xc540, 0x750c, 0xc041, 0xf892, + 0xc543, 0xc742, 0x0d5e, 0xfe6f, + 0xc540, 0xf88c, 0x1c0c, 0x36c0, + 0xc742, 0xc541, 0x0d4e, 0xfe6f, + 0xc540, 0x750c, 0xc043, 0xf887, + 0xc742, 0xc541, 0x0d3e, 0xfe6f, + 0xc540, 0xf88a, 0xc543, 0xc742, + 0xc541, 0x0d32, 0xfe6f, 0xc540, + 0xc004, 0x702c, 0x764c, 0xc042, + 0xd8ff, 0x746c, 0xc543, 0x1c04, + 0x36c0, 0x0d1a, 0xfe6f, 0xc540, + 0xf87a, 0x1c0c, 0x36c0, 0xc742, + 0xc541, 0x0d0a, 0xfe6f, 0xc540, + 0xf87c, 0xc543, 0xc742, 0xc541, + 0x0cfa, 0xfe6f, 0xc540, 0xc004, + 0x702c, 0x764c, 0xc042, 0x750c, + 0xc041, 0xd8ff, 0x746c, 0xc543, + 0x0ce2, 0xfe6f, 0xc540, 0x1c0c, + 0x36c0, 0xf86c, 0xc742, 0xc541, + 0x0cd2, 0xfe6f, 0xc540, 0xc005, + 0x70ad, 0x702c, 0x780f, 0xc045, + 0xc305, 0xd8ff, 0x754c, 0xc543, + 0xc542, 0xc541, 0x0cb6, 0xfe6f, + 0xc540, 0xc006, 0x46cb, 0x0000, + 0x2100, 0x7014, 0xf26c, 0x714c, + 0xd8ff, 0x41c1, 0x746c, 0x4358, + 0xc543, 0xc742, 0xc541, 0x0c96, + 0xfe6f, 0xc540, 0xf85c, 0xc543, + 0xc742, 0x1c04, 0x36c0, 0x0c86, + 0xfe6f, 0xc540, 0xf852, 0x1c0c, + 0x36c0, 0xc742, 0xc541, 0x0c76, + 0xfe6f, 0xc540, 0xf84b, 0xc543, + 0xc742, 0xc541, 0x0c66, 0xfe6f, + 0xc540, 0x750c, 0xc041, 0xf850, + 0xc543, 0xc742, 0x0c56, 0xfe6f, + 0xc540, 0xf847, 0x1c0c, 0x36c0, + 0xc742, 0xc541, 0x0c46, 0xfe6f, + 0xc540, 0x750c, 0xc043, 0xf842, + 0xc742, 0xc541, 0x0c36, 0xfe6f, + 0xc540, 0xf83c, 0xc543, 0xc742, + 0xc541, 0x0c2a, 0xfe6f, 0xc540, + 0xc004, 0x702c, 0x764c, 0xc042, + 0xd8ff, 0x746c, 0xc543, 0x1c04, + 0x36c0, 0x0c12, 0xfe6f, 0xc540, + 0xf835, 0x1c0c, 0x36c0, 0xc742, + 0xc541, 0x0c02, 0xfe6f, 0xc540, + 0xf82e, 0xc543, 0xc742, 0xc541, + 0x0bf2, 0xfe6f, 0xc540, 0xc004, + 0x702c, 0x764c, 0xc042, 0x750c, + 0xc041, 0xd8ff, 0x746c, 0xc543, + 0x0bda, 0xfe6f, 0xc540, 0x1c0c, + 0x36c0, 0xf004, 0x1c0c, 0x3441, + 0xf825, 0xc742, 0xc541, 0x0bc6, + 0xfe6f, 0xc540, 0xc305, 0x702c, + 0xd8ff, 0x754c, 0x4528, 0xc143, + 0xc142, 0xc141, 0x0bae, 0xfe6f, + 0xc140, 0x702c, 0xd8ff, 0xb98f, + 0x754c, 0x746c, 0x1c0c, 0x30c1, + 0xc542, 0xc541, 0x0b96, 0xfe6f, + 0xc540, 0xd8ff, 0x702c, 0x754c, + 0xdb30, 0xc543, 0xc542, 0xc541, + 0x0b82, 0xfe6f, 0xc540, 0xc004, + 0x702c, 0x764c, 0xc042, 0xd8ff, + 0xdb2e, 0xc543, 0xc541, 0x0b6e, + 0xfe6f, 0x1c00, 0x3041, 0x0d96, + 0xfe6f, 0xd8ff, 0xd8ff, 0xd980, + 0x754c, 0x746c, 0xc543, 0xc542, + 0xc541, 0x0b52, 0xfe6f, 0xc540, + 0xc0a7, 0x1404, 0x341b, 0xc6c6, + 0xd8ff, 0x41c1, 0x714c, 0x746c, 0x7ee0, 0x78e0, 0xd8ff, 0x41c1, - 0x714c, 0x746c, 0x7ee0, 0x78e0, + 0x714c, 0xdb08, 0x7ee0, 0x78e0, 0xd8ff, 0x41c1, 0x724c, 0xdb08, 0x7ee0, 0x78e0, 0xd8ff, 0x702c, 0x704c, 0x746c, 0x7ee0, 0x78e0, 0xd8ff, 0x41c1, 0x724c, 0x746c, 0x7ee0, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0x2482, 0x3103, 0x4548, - 0x4338, 0xc044, 0xc088, 0x41c3, - 0x0000, 0x7f00, 0xda51, 0x0e96, - 0xffaf, 0xc346, 0x40c3, 0x9003, - 0xe024, 0x70ed, 0x1800, 0x0fc5, - 0x180c, 0x0fc5, 0x18fe, 0x83c4, - 0xc006, 0xd940, 0xe0b0, 0xd880, - 0xc047, 0x20ca, 0x0041, 0xc047, - 0xf002, 0x71e5, 0x7eef, 0x0ef3, - 0x1095, 0xc004, 0x08f7, 0x83ae, - 0x40c1, 0x712c, 0x0966, 0xfeef, - 0x4263, 0xc006, 0x41c1, 0x4263, + 0xb6c8, 0x2482, 0x3203, 0xc244, + 0xc143, 0xc048, 0xc089, 0x41c3, + 0x0000, 0x7f00, 0xda51, 0x0e0e, + 0xffaf, 0xc345, 0x40c3, 0x9003, + 0xe024, 0x1800, 0x0fc5, 0x180c, + 0x0fc5, 0x18fe, 0x8005, 0xd940, + 0xc005, 0x70ed, 0x43db, 0x8000, + 0x06d1, 0xe0b0, 0xd880, 0xc046, + 0x20ca, 0x0041, 0xc046, 0xc005, 0x2079, 0x0c00, 0x781b, 0x781b, - 0x781b, 0x7b1b, 0xe330, 0x0fda, - 0xfeef, 0x710c, 0xc307, 0x40c1, - 0x4163, 0x42a1, 0x0d72, 0x0020, - 0x1c00, 0x3041, 0xed0e, 0x40c1, - 0x0d0b, 0x10b1, 0x4163, 0x724c, - 0xf002, 0x704c, 0x0d9a, 0xff4f, - 0x40c1, 0x0bee, 0x0120, 0x4163, - 0x40c3, 0x8000, 0x0665, 0x8820, - 0x1001, 0x008b, 0x4063, 0xc143, - 0x702c, 0x0932, 0xfeaf, 0xda08, - 0xc103, 0x2340, 0x1042, 0x708d, - 0x4a31, 0x7829, 0xdbff, 0xc045, - 0xf024, 0xd841, 0x271f, 0x3002, - 0x43c3, 0x8000, 0x07bc, 0x7185, - 0x2232, 0x0f80, 0x8000, 0x07fb, - 0xe00f, 0x780f, 0x7a14, 0xc088, - 0x2014, 0x07c0, 0x2232, 0x00cb, - 0x8820, 0x627a, 0x2308, 0x1041, - 0xa820, 0x8a21, 0x8841, 0x2109, - 0x0082, 0xa841, 0x2102, 0x02c0, - 0x791c, 0xc003, 0x2109, 0x0003, - 0xc005, 0x7a8f, 0x4758, 0x790f, - 0x786f, 0x0ab3, 0x8062, 0xc043, - 0x40c1, 0x0cb2, 0xfeef, 0x712c, - 0x8821, 0xc203, 0x7230, 0x21ca, - 0x00c5, 0xa821, 0xf187, 0x0dab, - 0x10b0, 0x740c, 0x41c3, 0x006c, - 0x0000, 0x091e, 0xfe8f, 0x1600, - 0x7080, 0x8000, 0x0665, 0xc044, - 0x700c, 0xc043, 0xf004, 0xc004, - 0x7104, 0xc044, 0xc004, 0x780f, - 0xc045, 0x40c3, 0x8000, 0x0665, - 0x8821, 0xc005, 0x090b, 0x0025, - 0x70ad, 0xf037, 0x71a5, 0x7eaf, - 0x0edf, 0x9255, 0x0bf9, 0xb3ae, - 0x43c1, 0xc003, 0xc205, 0x790f, - 0xc088, 0x7834, 0x8820, 0x8801, - 0xc041, 0x6038, 0xc140, 0x7f1d, - 0x740c, 0x41c3, 0x006e, 0x0005, - 0x08be, 0xfeaf, 0xc742, 0xc005, - 0x269a, 0x1008, 0x201a, 0x0f80, - 0x0000, 0x2000, 0x7e05, 0xc006, - 0x0815, 0x0c10, 0x2605, 0x1f80, - 0x9002, 0x0080, 0xb0e0, 0xc006, - 0xe0c0, 0xf207, 0x2605, 0x1f80, - 0x9002, 0x0060, 0xb0e0, 0xc003, - 0x7104, 0xc043, 0xf1cc, 0x0d02, - 0xfe8f, 0x2480, 0x3103, 0x1404, - 0x341b, 0xc6c6, 0xc2e6, 0x44cb, - 0x8000, 0x0665, 0x8cc0, 0x261f, - 0x108f, 0xf003, 0x71c5, 0x8c61, - 0x0b31, 0x03a3, 0x706d, 0xf01c, - 0x261a, 0x1f9f, 0x0000, 0x1000, - 0x239a, 0x0004, 0x2114, 0x034d, - 0x2705, 0x301f, 0x2305, 0x07c3, - 0x7b7b, 0xbb91, 0xbb9c, 0xbb9f, - 0x9360, 0x71e5, 0x7165, 0xb560, - 0x232f, 0x02c7, 0x0bd5, 0x80a2, - 0x7df0, 0xf1e2, 0xc6c6, 0x78e0, - 0xc0e6, 0x42c3, 0x8000, 0x0665, - 0x8a60, 0x235f, 0x024c, 0xe408, - 0xf002, 0x7164, 0x8a21, 0x0943, - 0x00e3, 0xdd08, 0xf021, 0x219a, - 0x0008, 0x231a, 0x0f8e, 0x0000, - 0x2000, 0x278a, 0x1fcf, 0x71a5, - 0x79c5, 0x2105, 0x0f81, 0x9002, - 0x0064, 0x9120, 0xde0a, 0x7f24, - 0xbe61, 0x0e01, 0x1031, 0x793d, - 0x215f, 0x080e, 0x2014, 0x0301, - 0xe409, 0x66fe, 0xb1c0, 0x0dcb, - 0x9272, 0x41a1, 0xf1db, 0xc4c6, - 0xca5c, 0x1a5e, 0x3002, 0xca5d, - 0x7fe0, 0x1a5f, 0x3002, 0x78e0, - 0xc2e2, 0x4408, 0x70ad, 0xf003, - 0x71a5, 0x78af, 0x0821, 0x0095, - 0x0cf9, 0x900e, 0x0b0e, 0xfecf, - 0x706c, 0xf005, 0x1a00, 0x0fc3, - 0x7164, 0x7a6f, 0x0af9, 0x80b4, - 0x621a, 0xf1f0, 0xc6c2, 0x78e0, - 0xc0e4, 0x70ad, 0xf002, 0x71a5, - 0x0d27, 0x10b2, 0x708d, 0xf014, - 0x255a, 0x1c82, 0x605b, 0x245a, - 0x1642, 0x627a, 0x623e, 0x2414, - 0x1343, 0x42c3, 0x8000, 0x0454, - 0x634a, 0x7185, 0xae40, 0x0ce5, - 0x9092, 0xf1eb, 0xc4c4, 0x78e0, + 0x781b, 0x781b, 0xe030, 0xc047, + 0xf064, 0xc008, 0x08c3, 0x03ee, + 0x7eef, 0xc203, 0x40c1, 0x0fa2, + 0xfeaf, 0x712c, 0xc203, 0xc307, + 0x710c, 0x0f52, 0xfeef, 0x41c1, + 0xc103, 0xc204, 0xc306, 0x40c1, + 0x0d32, 0x0020, 0x1c00, 0x3041, + 0xc004, 0xe812, 0xc004, 0x080f, + 0x00b1, 0x40c1, 0xc103, 0x724c, + 0xf004, 0xc103, 0x40c1, 0x704c, + 0x0cd2, 0xff4f, 0xc103, 0x0e6e, + 0x0120, 0x40c1, 0xc003, 0x1300, + 0x308d, 0x1301, 0x308b, 0x702c, + 0x0ebe, 0xfe6f, 0xda08, 0x2340, + 0x1041, 0x79a2, 0x7829, 0x70ad, + 0xd9ff, 0x7b0f, 0xf01c, 0xda41, + 0x7a0a, 0x71a5, 0x2232, 0x0f81, + 0x8000, 0x071f, 0xe10f, 0x792f, + 0x7a34, 0xc189, 0x7263, 0x7914, + 0x8a8f, 0x8900, 0x2008, 0x0300, + 0xa900, 0x8a10, 0x8941, 0x2209, + 0x0002, 0x7882, 0x781c, 0xa941, + 0x2009, 0x02c1, 0x78af, 0x08cb, + 0x80e4, 0x232f, 0x1047, 0x40c1, + 0x0bbe, 0xfeef, 0x712c, 0x8821, + 0x2309, 0x1041, 0xa821, 0x71e5, + 0x0f3b, 0x9094, 0xc004, 0x089b, + 0x00b0, 0x740c, 0x41c3, 0x0074, + 0x0000, 0x0ec2, 0xfe4f, 0x1600, + 0x708f, 0x8000, 0x06d1, 0x706f, + 0xf035, 0xc003, 0x085f, 0x036e, + 0x212f, 0x06c7, 0xc089, 0x7834, + 0x8841, 0x8800, 0x43a1, 0xc241, + 0x6059, 0xc204, 0x7e3d, 0xc040, + 0x740c, 0x41c3, 0x0076, 0x0005, + 0x0e8a, 0xfe6f, 0xc642, 0xc004, + 0x201a, 0x0f81, 0x0000, 0x2000, + 0x40a1, 0x209a, 0x0008, 0x7905, + 0xc005, 0x0817, 0x0c10, 0x2105, + 0x0f80, 0x9002, 0x0080, 0xb0c0, + 0xc005, 0xe0c0, 0xf206, 0x2105, + 0x0f80, 0x9002, 0x0060, 0xb0c0, + 0x7167, 0x71a5, 0x0d9f, 0x9254, + 0x71e5, 0x78ef, 0xc044, 0x40c3, + 0x8000, 0x06d1, 0x8821, 0xc004, + 0x09ed, 0x8025, 0x70ad, 0x0b5a, + 0xfe8f, 0x2480, 0x3203, 0x1404, + 0x341b, 0xc6c6, 0xc0e6, 0x43c3, + 0x8000, 0x06d1, 0x8ba0, 0x8b81, + 0x251f, 0x108f, 0xf01b, 0x251a, + 0x1f9e, 0x0000, 0x1000, 0x239a, + 0x0004, 0x2114, 0x038e, 0x2605, + 0x301e, 0x2305, 0x0783, 0x7b7b, + 0xbb91, 0xbb9c, 0xbb9f, 0x9360, + 0x71e5, 0x7165, 0xb660, 0x232f, + 0x02c7, 0x0bd7, 0x80a4, 0x7ef0, + 0x71a5, 0x0cf7, 0x9365, 0x706d, + 0xc4c6, 0x78e0, 0xc0e6, 0x41c3, + 0x8000, 0x06d1, 0x8960, 0x8941, + 0x235f, 0x024d, 0xe508, 0xf022, + 0x219a, 0x0008, 0x231a, 0x0f8e, + 0x0000, 0x2000, 0x278a, 0x1fcf, + 0x7185, 0x79c5, 0x2105, 0x0f81, + 0x9002, 0x0064, 0x9120, 0x7f24, + 0x793d, 0x793d, 0x793d, 0x793d, + 0x793d, 0x216c, 0x010e, 0x66fe, + 0x2014, 0x0341, 0xb1c0, 0xe509, + 0x0cc9, 0x9274, 0x4181, 0x7164, + 0x0af9, 0x80e5, 0xdc08, 0xc4c6, + 0xcc32, 0x7fe0, 0x1a33, 0x301c, + 0xc0f1, 0x4308, 0x706c, 0xf00f, + 0x0b19, 0x10ee, 0x786f, 0x0a42, + 0xfecf, 0x704c, 0xf004, 0x1801, + 0x0fd3, 0x7144, 0x0afb, 0x8094, + 0x7164, 0x0be9, 0x8094, 0xc0d1, + 0x7ee0, 0x78e0, 0xc0f1, 0x706d, + 0x706c, 0xf016, 0x70ef, 0xf011, + 0x235a, 0x0c82, 0x605c, 0x275a, + 0x3642, 0x71e7, 0x629a, 0x623c, + 0x2600, 0x3682, 0x125c, 0x0082, + 0x2640, 0x305e, 0xac40, 0x0fe5, + 0xb094, 0x7164, 0x0bd9, 0x80b4, + 0x2314, 0x10de, 0xc0d1, 0x7ee0, 0xc2e6, 0x1cfc, 0xb6c8, 0x2482, - 0x3303, 0xc241, 0xc142, 0xc043, - 0xc09c, 0x702c, 0xda5a, 0x0b6e, - 0xffaf, 0xc340, 0xc085, 0x0b66, - 0xffaf, 0xda5a, 0xc000, 0x0811, - 0x0071, 0xc09c, 0xc185, 0x09da, - 0x0020, 0x714c, 0xd80f, 0x0e5a, - 0xfe6f, 0xd90f, 0x125e, 0x309b, - 0x47cb, 0x8000, 0x0663, 0xf012, - 0xd80f, 0x0e46, 0xfe6f, 0xd90f, - 0xca0f, 0x712c, 0x4338, 0xb8c4, - 0xe806, 0xb861, 0x08ff, 0x8031, - 0x232f, 0x36c0, 0x2300, 0x335b, - 0xca5f, 0x252f, 0x16c7, 0x08c1, - 0x0344, 0xc000, 0x0811, 0x0071, - 0x40a1, 0xc201, 0x0922, 0x00e0, - 0xc185, 0xf013, 0x702c, 0xf003, - 0x7124, 0x782f, 0x081b, 0x0255, - 0xc201, 0x0af9, 0x800e, 0x209a, - 0x0008, 0x2005, 0x0f80, 0x9003, - 0xe064, 0xb0a0, 0xf1f2, 0x096e, - 0xfe4f, 0x8f00, 0xc302, 0x8fc2, - 0x47db, 0x8000, 0x07bc, 0x7b14, - 0xf003, 0x71c5, 0x4320, 0x8f23, - 0x78cf, 0x708d, 0x090b, 0x0025, - 0xc044, 0xf1c0, 0x7185, 0x788f, - 0x08eb, 0x8275, 0x6b22, 0x7414, - 0x23ca, 0x0041, 0xc101, 0x09f1, - 0x800e, 0x9b20, 0x0943, 0x0012, - 0xc104, 0x209a, 0x0008, 0x211a, - 0x0f81, 0x0000, 0x2000, 0x7825, + 0x3003, 0x4548, 0x4628, 0xc041, + 0xc099, 0x702c, 0xda5a, 0x0b16, + 0xffaf, 0x4378, 0xc082, 0x0b0e, + 0xffaf, 0xda5a, 0x0b0f, 0x0071, + 0xc099, 0xc182, 0x09da, 0x0020, + 0x714c, 0xd80f, 0x0c1e, 0xfe6f, + 0xd90f, 0xca66, 0xf06b, 0x0b13, + 0x3071, 0xc182, 0xc000, 0x0c26, + 0x00e0, 0x42a1, 0xf011, 0x702c, + 0xf00d, 0x0d19, 0x106e, 0x4020, + 0x209a, 0x0008, 0x2005, 0x0f82, + 0x9003, 0xe064, 0xc000, 0xb200, + 0x7124, 0x09eb, 0x8254, 0x0f92, + 0xfe0f, 0x43c3, 0x8000, 0x06cf, + 0x1302, 0x0480, 0x1301, 0x0481, + 0x130e, 0x048b, 0x2614, 0x100f, + 0xf034, 0x708d, 0xf02d, 0x7495, + 0x0d55, 0x132e, 0x27ca, 0x1001, + 0x9f00, 0x084b, 0x0032, 0x4081, + 0x209a, 0x0008, 0x221a, 0x0f9e, + 0x0000, 0x2000, 0x2005, 0x0780, 0x2005, 0x0f80, 0x9002, 0x0166, - 0x9000, 0xe893, 0xc003, 0x4163, - 0x2714, 0x3000, 0x8841, 0x1000, - 0x008b, 0x72b1, 0x21ca, 0x008d, - 0xa821, 0x230c, 0x9340, 0x4163, - 0x21ca, 0x02c5, 0xa820, 0x2780, - 0x3041, 0xf1d2, 0xc000, 0x0811, - 0x0071, 0xc09c, 0xc185, 0x08d2, - 0x0020, 0x704c, 0x2480, 0x3303, + 0x9000, 0xe895, 0xc001, 0x1400, + 0x301e, 0x2314, 0x0000, 0x1001, + 0x009f, 0x2708, 0x379e, 0x1801, + 0x0782, 0x1000, 0x009f, 0x1400, + 0x301e, 0x2709, 0x379e, 0x1800, + 0x0782, 0xe341, 0x7185, 0x0cab, + 0x9274, 0x6f02, 0x7124, 0x4708, + 0x7a2f, 0x0b9b, 0x90a5, 0xd80f, + 0x0b5a, 0xfe6f, 0xd90f, 0xca15, + 0x712c, 0xb8c4, 0xe805, 0xb861, + 0x08ff, 0x8031, 0x793b, 0xc000, + 0x6038, 0x780f, 0xc040, 0xca67, + 0xc100, 0x0827, 0x8045, 0x0b11, + 0x3071, 0xc099, 0xc182, 0x08e2, + 0x0020, 0x704c, 0x2480, 0x3003, 0x1404, 0x341b, 0xc6c6, 0x78e0, 0xc2e2, 0x45cb, 0x9005, 0xe000, - 0xd820, 0x0eb6, 0xff6f, 0x1d00, + 0xd820, 0x0e86, 0xff6f, 0x1d00, 0x1045, 0x1d00, 0x1005, 0xc6c2, - 0x8821, 0xdac8, 0x216c, 0x0041, - 0xa821, 0x8822, 0xb9c6, 0xa822, - 0x886d, 0x2344, 0x0c01, 0x7a25, - 0x0b13, 0x017f, 0xa84d, 0x2185, - 0x0a03, 0xa82d, 0x8823, 0xb987, - 0xa823, 0x702c, 0xa831, 0xa830, - 0x7fe0, 0xa838, 0xc2e2, 0x0f6e, - 0xfe8f, 0x256f, 0x1343, 0x9520, - 0x205a, 0x0400, 0x7b3d, 0x218a, - 0x02c5, 0x233f, 0x004c, 0x238c, - 0x82c5, 0xd920, 0x21ca, 0x0c29, - 0x7c25, 0x238c, 0x84c8, 0xd942, - 0x24ca, 0x1049, 0x238c, 0x87cc, - 0xd953, 0x24ca, 0x1049, 0x238c, - 0x8bd2, 0xd954, 0x24ca, 0x1049, - 0x15fe, 0x9081, 0x2004, 0x0f80, - 0x0000, 0x1ff0, 0xb881, 0x211a, - 0x0f83, 0x0020, 0x0000, 0x41c3, - 0x9004, 0x00f8, 0x2305, 0x0042, + 0x8821, 0x216c, 0x0041, 0xa821, + 0x8822, 0xb9c6, 0xa822, 0x882d, + 0x2144, 0x0c02, 0x2285, 0x023f, + 0x0913, 0x017f, 0xa84d, 0x794f, + 0xb985, 0xa82d, 0x8823, 0xb987, + 0xa823, 0x702c, 0xa838, 0xa831, + 0x7fe0, 0xa830, 0xc2e2, 0x0e06, + 0xfe8f, 0x256f, 0x1343, 0x9560, + 0xda20, 0xd940, 0x238c, 0x85ca, + 0x22ca, 0x0c2d, 0x238c, 0x89d0, + 0x22ca, 0x004d, 0x238c, 0x8fd8, + 0xd950, 0x22ca, 0x004d, 0x218a, + 0x05ca, 0x213c, 0x00c1, 0x238c, + 0x89d0, 0x21ca, 0x00ad, 0x238c, + 0x8fd8, 0x21ca, 0x00ed, 0x73d3, + 0x0000, 0x095f, 0x21ca, 0x012d, + 0x2105, 0x008c, 0x15fe, 0x9081, + 0x205a, 0x0400, 0x211a, 0x0f83, + 0x0020, 0x0000, 0x41c3, 0x9004, + 0x00f8, 0x2004, 0x0f80, 0x0000, + 0x1ff0, 0xb881, 0x2305, 0x0042, 0x7224, 0x7965, 0xb280, 0xb100, 0xc6c2, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1a3, 0x43db, 0x8000, - 0x0665, 0x1300, 0x308f, 0xc241, - 0xc142, 0xc040, 0xf002, 0x71e5, - 0x1301, 0x3080, 0x79ef, 0x4328, - 0x080b, 0x0065, 0x704c, 0xf034, - 0x7144, 0x7b4f, 0x0beb, 0x8275, - 0x4060, 0x231a, 0x1f8c, 0x0000, - 0x1000, 0x209a, 0x0004, 0xd930, - 0xb990, 0x235f, 0x125f, 0x7c05, - 0x2405, 0x1040, 0xe110, 0x7985, - 0xc600, 0x7c1b, 0xc002, 0x76e3, - 0x667e, 0x70e3, 0x631b, 0xc001, - 0x7d3b, 0x41c3, 0x9000, 0x0000, - 0x7d25, 0x7985, 0xe808, 0x9500, - 0xae00, 0x9120, 0x8e00, 0x4910, - 0xab00, 0xf1d8, 0x8e00, 0xb500, - 0x8e80, 0x1300, 0x00c0, 0x6098, - 0xb100, 0xf1d0, 0xc0a3, 0x1404, - 0x341b, 0xc6c6, 0xc0e4, 0x708d, - 0xf002, 0x7185, 0x0c23, 0x10b2, - 0x70ad, 0xf014, 0x42c3, 0x8000, - 0x0454, 0x635b, 0x245a, 0x1c82, - 0x605e, 0x255a, 0x1642, 0x71a5, - 0x62da, 0x622a, 0xab40, 0x0de9, - 0x90b2, 0x2514, 0x1303, 0xf1eb, - 0xc4c4, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1bd, 0xc243, 0xc042, - 0x1600, 0x7100, 0x9008, 0x01e0, - 0x4528, 0x712c, 0xc046, 0xc002, - 0x09e6, 0x0060, 0x4378, 0x4708, + 0xb6c8, 0x4358, 0x42c3, 0x8000, + 0x06d1, 0x1200, 0x008b, 0x4738, + 0x260a, 0x3000, 0xf030, 0x4300, + 0x211a, 0x0f8d, 0x0000, 0x1000, + 0x239a, 0x0004, 0xdc30, 0xbc90, + 0x47cb, 0x9000, 0x0000, 0x7d65, + 0x2505, 0x1303, 0xe410, 0x7ca5, + 0x215f, 0x024d, 0x7e9b, 0x7c7b, + 0x7ee5, 0x2600, 0x3343, 0x75e3, + 0x7f85, 0x631b, 0x0b13, 0x3030, + 0x651d, 0x9680, 0xab80, 0x9760, + 0x7b82, 0xad60, 0xf008, 0x8b80, + 0xb680, 0x8b60, 0x1500, 0x10cc, + 0x639b, 0xb760, 0x7104, 0x08ab, + 0x8254, 0x7165, 0x8a01, 0x212f, + 0x02c7, 0x08f7, 0x8065, 0x700c, + 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0xc0e4, 0x264a, 0x3000, 0x704c, + 0xf013, 0x706c, 0xf00c, 0x225a, + 0x0c8c, 0x7165, 0x609d, 0x235a, + 0x064c, 0x7164, 0x64bc, 0x642c, + 0x1e5c, 0x1302, 0x0beb, 0x80b4, + 0x2300, 0x168e, 0x7144, 0x0adf, + 0x80b4, 0x2614, 0x308b, 0xc4c4, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1bb, + 0xc343, 0xc142, 0x1600, 0x710f, + 0x9008, 0x01e0, 0x712c, 0x4548, + 0x09c6, 0x0060, 0x4318, 0x1e00, + 0x7002, 0x8000, 0x06ce, 0x4608, 0x1600, 0x7080, 0x8000, 0x0004, - 0x46cb, 0x8000, 0x0662, 0xaee0, - 0xc044, 0x1600, 0x7001, 0x8000, - 0x0670, 0xc002, 0x714c, 0x0ca6, - 0xff2f, 0x706c, 0x0b4e, 0xfe6f, - 0xc006, 0x208a, 0x0008, 0x41c3, - 0x9009, 0xe1f2, 0xb100, 0x700c, - 0x228a, 0x0010, 0xb101, 0xb142, - 0x1e00, 0x7004, 0x9003, 0xfec4, - 0x710c, 0x2744, 0x17c1, 0xe906, + 0x1600, 0x7001, 0x8000, 0x06dc, + 0x714c, 0xc044, 0x4063, 0x0c6e, + 0xff2f, 0x706c, 0x0922, 0xfe6f, + 0x40e1, 0x208a, 0x0008, 0x41c3, + 0x9009, 0xe1f2, 0xb100, 0x70ed, + 0x208a, 0x0010, 0xb1e1, 0xb102, + 0x710c, 0x2644, 0x17c1, 0xe906, 0xb961, 0x0901, 0x0031, 0x781b, + 0x1e00, 0x73c4, 0x9003, 0xfec4, 0x205a, 0x0100, 0x712c, 0x200f, - 0x03c0, 0x780f, 0x0dee, 0xffaf, - 0xc045, 0xc704, 0xc003, 0x271a, - 0x1f8f, 0x0010, 0x0000, 0x7214, - 0x40c3, 0x9003, 0xe004, 0xf405, - 0x1800, 0x0005, 0xf003, 0x1800, - 0x06c4, 0x1488, 0x3000, 0x41a1, - 0xc047, 0x1600, 0x7100, 0x9002, - 0x0164, 0x206c, 0x00c0, 0x0fbe, - 0x0020, 0xb880, 0x40c3, 0x0000, - 0xffff, 0x4100, 0x4200, 0x0976, - 0xfeef, 0x4300, 0xc093, 0x702c, - 0x0fd2, 0xff6f, 0xda28, 0xc002, - 0xc193, 0x724c, 0x2055, 0x08c0, - 0x7f05, 0xc744, 0x0b12, 0xffef, - 0xc004, 0xc003, 0x08f5, 0x0030, - 0xd80f, 0xc089, 0x218a, 0x0038, - 0x0fba, 0xff6f, 0xda14, 0xca09, - 0xd910, 0x4220, 0x2044, 0x07c1, - 0xe905, 0xb961, 0x09ff, 0x8031, - 0x7a5b, 0x712c, 0x2044, 0x07c3, - 0x4020, 0xeb06, 0xbb61, 0x0b01, - 0x0031, 0x781b, 0x8e22, 0x8e81, - 0x780f, 0xc046, 0x7813, 0x706f, - 0xf00a, 0x2454, 0x398b, 0x7f74, - 0x23f4, 0x10c3, 0x7185, 0x7b04, - 0x7b42, 0xb760, 0x7b8f, 0x09ef, - 0x80e5, 0xc789, 0xf00c, 0x0d0e, - 0xff8f, 0xc003, 0xc189, 0x42a1, - 0x2079, 0x0043, 0x0bc6, 0xffef, - 0xc008, 0x7167, 0x202f, 0x06c7, - 0x086b, 0x07f5, 0xc048, 0x8e01, - 0x8e63, 0xc489, 0x7c14, 0xf003, - 0x7164, 0x8e04, 0x796f, 0x4328, - 0x080d, 0x0065, 0x70ed, 0xf1e5, - 0x71e5, 0x7285, 0x78ef, 0x08ed, - 0x8095, 0x9440, 0xc106, 0x623a, - 0x212f, 0x8086, 0x4758, 0x07ec, - 0xffe4, 0xb440, 0x0925, 0x0813, - 0x231a, 0x1f82, 0x0000, 0x1000, - 0xc104, 0x209a, 0x0004, 0x7945, - 0x7825, 0x781b, 0xb891, 0xb89c, - 0xb89f, 0x1800, 0x07c4, 0xf1e2, - 0x40c3, 0x0000, 0xfe00, 0xb400, - 0xf1dc, 0xc004, 0xc193, 0x714c, - 0x0b3a, 0x00e0, 0x706c, 0xf007, - 0xc193, 0x42a1, 0x0b3e, 0xffef, - 0x706c, 0x0ab2, 0xff6f, 0xc005, - 0x1600, 0x7001, 0x8000, 0x0670, - 0xc002, 0x704c, 0x0ade, 0xff2f, - 0x706c, 0xc007, 0xe81a, 0xc003, - 0x0815, 0x00b1, 0x740c, 0xc202, - 0xc305, 0xc540, 0x41c3, 0x001f, - 0x0003, 0xf00b, 0xc005, 0xc541, - 0xc302, 0xc040, 0xc003, 0x742c, - 0xb995, 0x6841, 0x740c, 0x0a3a, - 0xfe4f, 0x0ca6, 0xfe6f, 0x40a1, - 0xc0bd, 0x1404, 0x341b, 0xc6c6, + 0x0380, 0x780f, 0x0e2e, 0xffaf, + 0xc045, 0xc004, 0x41c3, 0x9003, + 0xe004, 0x201a, 0x0f8e, 0x0010, + 0x0000, 0x0d0b, 0x1091, 0xb1e0, + 0xf003, 0xc003, 0xb100, 0x1480, + 0x3000, 0xc046, 0x1600, 0x7100, + 0x9002, 0x0164, 0xc102, 0x206c, + 0x00c0, 0x0fae, 0x0020, 0xb880, + 0x40c3, 0x0000, 0xffff, 0x4100, + 0x4200, 0x0936, 0xfeef, 0x4300, + 0xc091, 0x702c, 0x0f8e, 0xff6f, + 0xda28, 0x2355, 0x38c0, 0x78c5, + 0xc191, 0x724c, 0x0b3a, 0xffef, + 0xc043, 0x70b5, 0xf287, 0xc087, + 0x218a, 0x0038, 0x0f7e, 0xff6f, + 0xda14, 0xca0c, 0xd910, 0x4220, + 0x2044, 0x07c1, 0xe905, 0xb961, + 0x09ff, 0x8031, 0x7a5b, 0x2044, + 0x07c3, 0x712c, 0x4020, 0xeb06, + 0xbb61, 0x0b01, 0x0031, 0x781b, + 0x780f, 0xc044, 0x7913, 0x40c3, + 0x8000, 0x06ce, 0x8862, 0x40c3, + 0x8000, 0x06ce, 0x88c1, 0xf00b, + 0x2454, 0x388b, 0x7c14, 0x23f4, + 0x1000, 0x71c5, 0x7824, 0x7842, + 0xb400, 0x78cf, 0x0bed, 0x8025, + 0xc487, 0x70cd, 0xf047, 0x40c3, + 0x8000, 0x06ce, 0x8801, 0x2714, + 0x301f, 0x40c3, 0x8000, 0x06ce, + 0x8824, 0x40c3, 0x8000, 0x06ce, + 0x8843, 0xf02c, 0x706d, 0xf027, + 0x1700, 0x3100, 0xc404, 0x641c, + 0x202f, 0x8306, 0x003a, 0x0024, + 0x1f00, 0x3304, 0x0829, 0x0813, + 0x231a, 0x0f9e, 0x0000, 0x1000, + 0xc003, 0x2605, 0x301e, 0x4061, + 0x209a, 0x0004, 0x2005, 0x0780, + 0x781b, 0xb891, 0xb89c, 0xb89f, + 0xb080, 0xf007, 0x40c3, 0x0000, + 0xfe00, 0x1f00, 0x3004, 0x7165, + 0x72e7, 0x0bb9, 0x9094, 0x7144, + 0x7b4f, 0x09ad, 0x80c5, 0x0cd6, + 0xff8f, 0xc202, 0x2579, 0x1043, + 0x40e1, 0x0b62, 0xffef, 0xc187, + 0x71c5, 0x7fcf, 0x0f73, 0x97f4, + 0x2440, 0x371f, 0xc003, 0xc191, + 0x714c, 0x0e02, 0x00e0, 0x706c, + 0xf007, 0xc202, 0xd80f, 0xc191, + 0x0b3a, 0xffef, 0x706c, 0x1600, + 0x7001, 0x8000, 0x06dc, 0x4063, + 0x704c, 0x0a9a, 0xff2f, 0x706c, + 0xc006, 0xe81b, 0x0d15, 0x10b1, + 0x732c, 0xc002, 0xc305, 0xc040, + 0x740c, 0xb995, 0x4263, 0xf00c, + 0xc002, 0x6d41, 0x41c3, 0x0021, + 0x0004, 0xc041, 0xc005, 0x4363, + 0xc040, 0x740c, 0x0ffe, 0xfe0f, + 0x0b26, 0xfe6f, 0xc002, 0xc0bb, + 0x1404, 0x341b, 0xc6c6, 0x78e0, 0xc2e6, 0x1cfc, 0xb6c8, 0x2482, - 0x3a06, 0xc042, 0x205f, 0x0640, - 0xc143, 0xc24a, 0xc048, 0x1600, - 0x7080, 0x8000, 0x0661, 0x205f, - 0x0c81, 0xc008, 0x6038, 0x70c3, - 0x8000, 0x0670, 0x8812, 0x702c, - 0xc04c, 0x0f86, 0x0020, 0xc002, - 0x4318, 0xc002, 0x1600, 0x710f, - 0x9008, 0x01e0, 0xc302, 0x1600, - 0x708e, 0x8000, 0x0004, 0x2338, - 0x3000, 0x41c3, 0x001b, 0x0002, - 0x09de, 0xfe2f, 0x4263, 0x40c3, - 0x8000, 0x0661, 0x1801, 0x06c2, - 0x710c, 0x4508, 0x2344, 0x37c0, - 0xe805, 0xb861, 0x08ff, 0x8031, - 0x7dbb, 0xc003, 0x218a, 0x0004, - 0x255a, 0x110d, 0x261a, 0x1f8e, - 0x0010, 0x0000, 0x2004, 0x8040, - 0x1600, 0x7001, 0x8000, 0x0670, - 0xc04b, 0xf206, 0xc002, 0x704c, - 0x716c, 0xf005, 0xc002, 0x704c, - 0x706c, 0x09f2, 0xff0f, 0x250f, - 0x16c0, 0xc04d, 0x0896, 0xfe6f, - 0x40e1, 0xd820, 0x1e00, 0x7004, - 0x9003, 0xe004, 0x208a, 0x0008, - 0x41c3, 0x9009, 0xe1f2, 0xb100, - 0x700c, 0x228a, 0x0010, 0xb101, - 0xb142, 0x1e00, 0x7004, 0x9003, - 0xfec4, 0x730c, 0x0d4e, 0x0020, - 0x218a, 0x0fc7, 0x40c3, 0x0000, - 0xffff, 0x4100, 0x4200, 0x0f06, - 0xfeaf, 0x4300, 0xc090, 0x218a, - 0x0038, 0xda5a, 0x0d6e, 0xff6f, - 0xc490, 0xc002, 0x772c, 0x4738, - 0x2055, 0x0c00, 0x7e05, 0xca09, - 0x2044, 0x07c1, 0xe906, 0xb961, - 0x09ff, 0x8031, 0x272f, 0x37c0, - 0x41c3, 0x8000, 0x0661, 0x8944, - 0x225f, 0x0241, 0x7c34, 0xf003, - 0x7144, 0x41c3, 0x8000, 0x0661, - 0x8925, 0x7b4f, 0x0953, 0x00e5, - 0x70ad, 0xf02d, 0x2044, 0x07cb, - 0x0b0f, 0x1010, 0x2342, 0x104b, - 0x0bfd, 0x9031, 0x7ffb, 0x231a, - 0x0f8b, 0x0000, 0x1000, 0x219a, - 0x0004, 0x71a5, 0x2305, 0x138b, - 0x2105, 0x02c1, 0x793b, 0xb991, - 0xb99c, 0xb99f, 0x9120, 0x2704, - 0x304b, 0x2353, 0x110b, 0x793d, - 0x2300, 0x13cb, 0x216c, 0x0101, - 0x7161, 0x1c02, 0x1054, 0x79af, - 0x09b5, 0x8274, 0x278a, 0x1c7f, - 0xf1cc, 0xca5e, 0xf00e, 0x08b6, - 0xff6f, 0xc004, 0xca0f, 0x712c, + 0x3706, 0xc042, 0x205f, 0x0640, + 0xc144, 0xc24a, 0xc049, 0x1600, + 0x7080, 0x8000, 0x06cd, 0x205f, + 0x0c81, 0xc009, 0x6038, 0x2000, + 0x0f80, 0x8000, 0x0580, 0x880e, + 0x702c, 0xc047, 0x0f52, 0x0020, + 0xc002, 0xc043, 0xc103, 0xc002, + 0x1600, 0x711b, 0x9008, 0x01e0, + 0xc203, 0xc302, 0x1600, 0x708e, + 0x8000, 0x0004, 0x2138, 0x0000, + 0x41c3, 0x001c, 0x0002, 0x0fde, + 0xfdcf, 0xc003, 0x41c3, 0x8000, + 0x06cd, 0x261a, 0x1f8e, 0x0010, + 0x0000, 0xa901, 0x712c, 0x4528, 0xb8c4, 0xe806, 0xb861, 0x0801, - 0x0031, 0x793b, 0xc007, 0x6038, - 0xc045, 0xc005, 0xc10d, 0x7810, - 0xc047, 0x792f, 0xc144, 0xca5f, - 0xc107, 0x7110, 0x01d0, 0x0009, - 0xc005, 0xc50c, 0x780f, 0x0932, - 0x0060, 0x2584, 0x1002, 0x2005, - 0x0341, 0x0c5a, 0x00a0, 0xc004, - 0xc004, 0x0a3a, 0xffaf, 0x702c, - 0xd80f, 0x0f76, 0xfe2f, 0xd90f, - 0xca09, 0x712c, 0x2044, 0x07cd, - 0x4020, 0x702c, 0xed05, 0xbd61, - 0x0dff, 0x9031, 0x781b, 0xc04f, - 0xc146, 0xf009, 0x2455, 0x3f40, - 0x603a, 0xc090, 0x6028, 0x7124, - 0xaa00, 0x218c, 0x8cc2, 0xf7f8, - 0xf008, 0xd80f, 0x0f3a, 0xfe2f, - 0xd90f, 0xc006, 0x7104, 0xc046, - 0xc006, 0x7810, 0x0863, 0x87f5, - 0xc04e, 0x40c3, 0x8000, 0x0661, - 0x8844, 0x2455, 0x3f43, 0x225f, - 0x0241, 0x7b34, 0xf002, 0x7144, - 0x40c3, 0x8000, 0x0661, 0x8805, - 0x794f, 0x080f, 0x0065, 0x70ad, - 0xf035, 0x71a5, 0x7264, 0x7caf, - 0x0ce7, 0x9255, 0xc003, 0x08f7, - 0x830e, 0x9b00, 0x08ef, 0x8012, - 0x2844, 0x080e, 0x7ecf, 0x0e45, - 0x1233, 0x234a, 0x36c0, 0x249a, - 0x1008, 0x211a, 0x0f8f, 0x0000, - 0x2000, 0x269a, 0x1010, 0x7ce5, - 0x7f1c, 0x7ffc, 0x7ffc, 0x7ffc, - 0x2342, 0x305b, 0x0bfd, 0xb031, - 0x7ffd, 0x60f8, 0xb8c4, 0x78e2, - 0x780f, 0x2405, 0x1f8c, 0x9002, - 0x0064, 0x78c5, 0xb400, 0xf1d3, - 0x40c3, 0x0000, 0xfe00, 0xb300, - 0xf1cd, 0x0942, 0xff8f, 0x0a16, - 0xfe0f, 0x40c3, 0x8000, 0x0661, - 0x88e4, 0x2455, 0x3f4d, 0x43db, - 0x8000, 0x07bc, 0x275f, 0x1240, - 0x7d14, 0xf003, 0x71e5, 0x78ef, - 0xc049, 0x40c3, 0x8000, 0x0661, - 0x8825, 0xc009, 0x708d, 0x0919, - 0x0025, 0x70cd, 0xf18f, 0xc00f, - 0x71c5, 0x790f, 0x9500, 0x6038, - 0x1d02, 0x1014, 0x79cf, 0x09d9, - 0x8275, 0x4020, 0xc209, 0x209a, - 0x0008, 0x221a, 0x0f82, 0x0000, - 0x2000, 0x7845, 0x2005, 0x0f80, - 0x9002, 0x0166, 0x9000, 0xc203, - 0x228c, 0x8004, 0x24ca, 0x1002, - 0xf403, 0x7c90, 0x641c, 0xc003, - 0x08bf, 0x804e, 0x9d00, 0x0843, - 0x0012, 0xc005, 0x7810, 0x70d3, - 0x0000, 0xffff, 0xc007, 0xf404, - 0x1340, 0x3080, 0x262f, 0xf308, - 0xf413, 0xc10e, 0x272f, 0x3007, - 0x4200, 0x2314, 0x3041, 0x8961, - 0x1100, 0x008b, 0x73f3, 0x22ca, - 0x00c9, 0x230c, 0x97c0, 0x20ca, - 0x02cb, 0xa941, 0xa900, 0x2380, - 0x3041, 0xf1bb, 0x1600, 0x7080, - 0x8000, 0x0661, 0x205f, 0x0c81, - 0xc008, 0x6038, 0x70c3, 0x8000, - 0x0670, 0x8832, 0x0a86, 0x00a0, - 0xc004, 0xc00a, 0xe810, 0xc003, - 0xc302, 0x41c3, 0x001d, 0x0004, - 0xc041, 0xc004, 0x724c, 0xc040, - 0x0e4e, 0xfe2f, 0x740c, 0x08ba, - 0xfe6f, 0xc003, 0xc00b, 0xe80b, - 0x1600, 0x7001, 0x8000, 0x0670, - 0xc002, 0x704c, 0x0ea6, 0xfeef, - 0x706c, 0x2480, 0x3a06, 0x1404, - 0x341b, 0xc6c6, 0xc2e6, 0x120c, - 0x308d, 0x70ed, 0x70cd, 0x1a0c, - 0x33c2, 0xf009, 0x26f0, 0x7380, - 0x8000, 0x1ca0, 0x0e42, 0x0000, - 0x71c5, 0xcc32, 0x0ef1, 0x9002, - 0x1a32, 0x33dc, 0x1a0c, 0x3342, - 0xc6c6, 0x78e0, 0x209a, 0x0004, - 0x43c3, 0x9008, 0x01ea, 0x71c3, - 0x0000, 0xfffe, 0x2000, 0x0f82, - 0x0000, 0xfe00, 0x9300, 0x2004, - 0x0f80, 0x0000, 0xc0ff, 0x7845, - 0x9341, 0xb300, 0x226c, 0x0140, - 0x7825, 0x7fe0, 0xb301, 0x78e0, - 0xc2e2, 0x1600, 0x710d, 0x8000, - 0x0006, 0xe56d, 0x2d85, 0x1b81, - 0x2553, 0x9040, 0xf205, 0x79af, - 0x204e, 0x0100, 0x603d, 0x7aaf, - 0x41c3, 0x0142, 0x0001, 0x0d9a, - 0xfe2f, 0xd80a, 0x40c3, 0x8000, - 0x0660, 0xa8a0, 0xc6c2, 0x78e0, - 0xc2e6, 0x266f, 0x15c3, 0x8e40, - 0x4508, 0xea17, 0x087e, 0xff2f, - 0x4040, 0x4708, 0xe887, 0x41c3, - 0x0168, 0x0001, 0x0d7a, 0xfdef, - 0x700c, 0x40e1, 0x209a, 0x0001, - 0x7f05, 0x78bb, 0x2005, 0x0f80, - 0x9003, 0xfe92, 0xb0e0, 0x16ff, - 0x9082, 0xea18, 0x084e, 0xff2f, - 0x4040, 0x228c, 0x8fc3, 0xf209, - 0xe888, 0x41c3, 0x0169, 0x0001, - 0x0d46, 0xfdef, 0x700c, 0x700c, - 0x209a, 0x0001, 0x79bb, 0x2105, - 0x0f81, 0x9003, 0xfe9a, 0xb100, - 0xc6c6, 0x78e0, 0xc0f1, 0x1600, - 0x7082, 0x8000, 0x0009, 0x228c, - 0x8c03, 0xf210, 0xea16, 0x0a1f, - 0x0a30, 0x722c, 0xe2f8, 0xf404, - 0x712c, 0xf009, 0x41c3, 0x016a, - 0x0001, 0x0d06, 0xfdef, 0x700c, - 0x702c, 0x215f, 0x0400, 0x7825, - 0x1e00, 0x7004, 0x9005, 0xe0a0, - 0xc0d1, 0x7ee0, 0xc2e6, 0x1cfc, - 0xb6c8, 0x4748, 0x4528, 0x2109, - 0x0002, 0x6f21, 0x0a17, 0x0063, - 0xc1a2, 0x41c3, 0x005a, 0x0000, - 0x0cbe, 0xfe2f, 0x740c, 0x700c, - 0xf05b, 0x1600, 0x7081, 0x8000, - 0x0004, 0x46cb, 0x8000, 0x046c, - 0x78e2, 0x211a, 0x0f81, 0x0020, - 0x0000, 0x2614, 0x13db, 0xc041, - 0x2614, 0x1000, 0x904a, 0x1314, - 0x3103, 0xc140, 0x41c3, 0x005b, - 0x0002, 0x0c86, 0xfe2f, 0x750c, - 0x7de2, 0x2614, 0x1340, 0x906a, - 0x1314, 0x3102, 0x41c3, 0x005c, - 0x0002, 0x0c6e, 0xfe2f, 0x750c, - 0x44cb, 0x8000, 0x0663, 0x8c60, - 0xf029, 0xc001, 0x2632, 0x134b, - 0x7164, 0x239a, 0x1001, 0x6608, - 0x2305, 0x100b, 0x211a, 0x0f80, - 0x0000, 0x1000, 0x219a, 0x0008, - 0x2004, 0x0f82, 0x000f, 0xe000, - 0xc000, 0x2184, 0x0008, 0x7845, - 0x2005, 0x0042, 0x40c3, 0x9002, - 0x0082, 0x2205, 0x0001, 0x1900, - 0x02c4, 0x66e9, 0xe014, 0x7a05, + 0x0031, 0x7dbb, 0x218a, 0x0004, + 0xc004, 0x255a, 0x110d, 0x2004, + 0x8040, 0x1600, 0x7001, 0x8000, + 0x06dc, 0xc04b, 0xf205, 0xc002, + 0x704c, 0x716c, 0xf004, 0xc002, + 0x704c, 0x706c, 0x09a6, 0xff0f, + 0xc003, 0x250f, 0x1000, 0xc04c, + 0xc007, 0x2084, 0x003e, 0xc047, + 0x0e4e, 0xfe2f, 0x4063, 0xd820, + 0x1e00, 0x7004, 0x9003, 0xe004, + 0x208a, 0x0008, 0x41c3, 0x9009, + 0xe1f2, 0xb100, 0x700c, 0x228a, + 0x0010, 0xb101, 0xb142, 0x1e00, + 0x7004, 0x9003, 0xfec4, 0x730c, + 0x0d1e, 0x0020, 0x218a, 0x0fc7, + 0x40c3, 0x0000, 0xffff, 0x4100, + 0x4200, 0x0ea6, 0xfeaf, 0x4300, + 0xc08d, 0x218a, 0x0038, 0xda5a, + 0x0d0a, 0xff6f, 0xc78d, 0xc002, + 0x120c, 0x309f, 0x2055, 0x0c0c, + 0x40c3, 0x8000, 0x06cd, 0x8864, + 0x7cc5, 0x235f, 0x0480, 0x671f, + 0x770c, 0x260a, 0x3000, 0x2744, + 0x37c0, 0xe807, 0xb861, 0x0801, + 0x0031, 0x262f, 0x3780, 0x40c3, + 0x8000, 0x06cd, 0x88a5, 0x2653, + 0x311e, 0xf027, 0x2744, 0x37c2, + 0xea05, 0xba61, 0x0aff, 0x8031, + 0x781b, 0x2140, 0x004b, 0x261a, + 0x1f82, 0x0000, 0x1000, 0x219a, + 0x0004, 0x7a85, 0x7945, 0x793b, + 0xb991, 0xb99c, 0xb99f, 0x9140, + 0x2604, 0x3081, 0x6038, 0x795d, + 0x216c, 0x0101, 0x6038, 0x1f02, + 0x1014, 0x4161, 0x09c1, 0x8274, + 0x208a, 0x0c7f, 0x7164, 0x7e6f, + 0x0df5, 0x93a5, 0x702c, 0xca66, + 0xf0e2, 0xc006, 0x0bba, 0x0060, + 0x780f, 0xc107, 0x7825, 0x790f, + 0x0f56, 0x00a0, 0x40a1, 0x40a1, + 0x0a9a, 0xffaf, 0x702c, 0xd80f, + 0x0d6a, 0xfe2f, 0xd90f, 0xca0c, + 0x704c, 0xc245, 0xf008, 0x2455, + 0x3e81, 0x615b, 0xc18d, 0x6149, + 0x7144, 0xab20, 0x228c, 0x8d02, + 0xf7b7, 0x712c, 0x2044, 0x07c2, + 0x4020, 0xea06, 0xba61, 0x0a01, + 0x0031, 0x781b, 0xc043, 0xf0a8, + 0x40c3, 0x8000, 0x06cd, 0x8844, + 0x2455, 0x3e80, 0x225f, 0x0481, + 0x603c, 0x40c3, 0x8000, 0x06cd, + 0x8825, 0xf032, 0xc004, 0x0857, + 0x036e, 0x40a1, 0x209a, 0x0008, + 0x231a, 0x0f8f, 0x0000, 0x2000, + 0x9cc0, 0x78e5, 0x2005, 0x0f8f, + 0x9002, 0x0064, 0x0e1f, 0x1012, + 0x260b, 0x9f80, 0x0000, 0x1f00, + 0xf20c, 0x40c3, 0x0000, 0xfe00, + 0xb400, 0x40c3, 0x0000, 0x1c1f, + 0xf00c, 0x1f00, 0x1005, 0xf00b, + 0x265e, 0x1800, 0xbec4, 0x2004, + 0x0f80, 0x0000, 0xfc00, 0x78c5, + 0xb700, 0x71a5, 0x7285, 0x0da9, + 0x9254, 0x7144, 0x7b4f, 0x09fb, + 0x80e5, 0x70ad, 0x09c6, 0xff8f, + 0x0856, 0xfe0f, 0x40c3, 0x8000, + 0x06cd, 0x8864, 0x2455, 0x3e8f, + 0x43cb, 0x8000, 0x06e0, 0x235f, + 0x0480, 0x671f, 0x40c3, 0x8000, + 0x06cd, 0x1005, 0x009f, 0xf046, + 0x70ad, 0x704c, 0xf03f, 0x40a1, + 0x209a, 0x0008, 0x241a, 0x1f81, + 0x0000, 0x2000, 0x7825, 0x2005, + 0x0f80, 0x9002, 0x0166, 0x9000, + 0xc104, 0x218c, 0x8004, 0x22ca, + 0x0002, 0xf403, 0x621a, 0xc004, + 0x0847, 0x036e, 0x97c0, 0x262f, + 0xf386, 0x003a, 0x0004, 0xc006, + 0x1420, 0x301e, 0x7810, 0x080f, + 0x0f81, 0x0000, 0xffff, 0x1340, + 0x109e, 0x262f, 0xf088, 0xf40f, + 0xc005, 0x262f, 0x3787, 0x2314, + 0x1000, 0x8821, 0x2608, 0x3041, + 0xa821, 0x8820, 0x2609, 0x3041, + 0xa820, 0x2380, 0x1041, 0xc003, + 0x71a5, 0x780f, 0x60d8, 0x1f02, + 0x1014, 0x0d87, 0x9254, 0x7164, + 0x7c6f, 0x0f79, 0xb325, 0xd80f, + 0x0bf2, 0xfe2f, 0xd90f, 0xc005, + 0x7104, 0xc045, 0xc005, 0xe09e, + 0x06b0, 0xffee, 0x712c, 0xca15, + 0x2044, 0x07c2, 0x4020, 0xc108, + 0xea05, 0xba61, 0x0aff, 0x8031, + 0x781b, 0x6038, 0xc046, 0xc006, + 0xc10c, 0x7810, 0xc048, 0xca67, + 0x7d2f, 0xc108, 0x7110, 0x062e, + 0xffce, 0x1600, 0x7080, 0x8000, + 0x06cd, 0x205f, 0x0c81, 0xc009, + 0x6119, 0x2100, 0x0f80, 0x8000, + 0x0580, 0x882e, 0x0d72, 0x00a0, + 0x40a1, 0xc00a, 0xe80f, 0xc004, + 0xc302, 0x41c3, 0x001e, 0x0004, + 0xc041, 0x740c, 0x724c, 0x0c36, + 0xfe2f, 0xc540, 0x0f5a, 0xfe2f, + 0xc004, 0xc00b, 0xe80a, 0x1600, + 0x7001, 0x8000, 0x06dc, 0xc002, + 0x704c, 0x0e82, 0xfeef, 0x706c, + 0x2480, 0x3706, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0xc2e6, 0x1210, + 0x308d, 0x70cd, 0x70ed, 0x1a10, + 0x3382, 0xf009, 0x26f0, 0x73c0, + 0x8000, 0x1ca8, 0x08be, 0x0040, + 0x71e5, 0xcc36, 0x0ff1, 0x9004, + 0x1a10, 0x3342, 0x1a36, 0x339c, + 0xc6c6, 0x78e0, 0x43c3, 0x9008, + 0x01ea, 0x9340, 0x209a, 0x0004, + 0x2000, 0x0f80, 0x0000, 0xfe00, + 0x2204, 0x0f82, 0x0000, 0xc0ff, + 0x7845, 0x9341, 0xb300, 0xb962, + 0x226c, 0x0140, 0x7825, 0x7fe0, + 0xb301, 0x78e0, 0xc2e2, 0x1600, + 0x710d, 0x8000, 0x0006, 0xe56d, + 0x2d85, 0x1b81, 0x2553, 0x9041, + 0xf205, 0x78af, 0x214e, 0x010d, + 0x651d, 0x7aaf, 0x41c3, 0x014d, + 0x0001, 0x0b82, 0xfe2f, 0xd80a, + 0x40c3, 0x8000, 0x06cc, 0xa8a0, + 0xc6c2, 0x78e0, 0xc2e6, 0x266f, + 0x15c3, 0x8e40, 0x4508, 0xea17, + 0x084e, 0xff2f, 0x4040, 0x4708, + 0xe887, 0x41c3, 0x0174, 0x0001, + 0x0ba2, 0xfdef, 0x700c, 0x40e1, + 0x209a, 0x0001, 0x7f05, 0x78bb, + 0x2005, 0x0f80, 0x9003, 0xfe92, + 0xb0e0, 0x16ff, 0x9082, 0xea18, + 0x081e, 0xff2f, 0x4040, 0x228c, + 0x8fc3, 0xf209, 0xe888, 0x41c3, + 0x0175, 0x0001, 0x0b6e, 0xfdef, + 0x700c, 0x700c, 0x209a, 0x0001, + 0x79bb, 0x2105, 0x0f81, 0x9003, + 0xfe9a, 0xb100, 0xc6c6, 0x78e0, + 0xc0f1, 0x1600, 0x7082, 0x8000, + 0x0009, 0xea1a, 0x0a23, 0x0a30, + 0x720c, 0x228c, 0x8c03, 0xf20c, + 0xe2f8, 0xf404, 0x710c, 0xf009, + 0x41c3, 0x0176, 0x0001, 0x0b2e, + 0xfdef, 0x700c, 0x700c, 0x205f, + 0x0401, 0x7825, 0x1e00, 0x7004, + 0x9005, 0xe0a0, 0xc0d1, 0x7ee0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a2, + 0x4528, 0x2009, 0x0041, 0x0ac1, + 0x0065, 0x4748, 0x1600, 0x7081, + 0x8000, 0x0004, 0x46cb, 0x8000, + 0x0474, 0x78e2, 0x211a, 0x0f81, + 0x0020, 0x0000, 0x2614, 0x13db, + 0xc041, 0x2614, 0x1000, 0x1324, + 0x3103, 0x9052, 0xc140, 0x41c3, + 0x0063, 0x0002, 0x0a7e, 0xfe2f, + 0x750c, 0x7de2, 0x2614, 0x1340, + 0x9072, 0x1324, 0x3102, 0x41c3, + 0x0064, 0x0002, 0x0a66, 0xfe2f, + 0x750c, 0x40c3, 0x8000, 0x06cf, + 0x1001, 0x008b, 0x8840, 0xf02b, + 0x66b9, 0xc001, 0x8970, 0x7144, + 0x60d8, 0x239a, 0x0001, 0x8810, + 0x7b05, 0x241a, 0x1f80, 0x0000, + 0x1000, 0x249a, 0x1008, 0x2004, + 0x0f81, 0x000f, 0xe000, 0xc000, + 0x2484, 0x1008, 0x7825, 0x2005, + 0x0301, 0x40c3, 0x9002, 0x0082, + 0x2105, 0x000c, 0xe014, 0xb460, + 0x2105, 0x0003, 0x66f8, 0x8830, 0x4020, 0x209a, 0x0001, 0x7825, - 0xb200, 0x8c01, 0x796f, 0x7110, - 0x07aa, 0xffee, 0x710c, 0xc0a2, - 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0xc2e6, 0x1cfc, 0xb6c8, 0x4328, - 0x4100, 0x70ed, 0x40c3, 0x9003, - 0xff64, 0xb0e0, 0x1804, 0x0045, - 0x40c3, 0x8000, 0x0665, 0x88c0, - 0x232f, 0x32c7, 0xf002, 0x71c5, - 0x8841, 0x0a77, 0x03a2, 0x270a, - 0x32c0, 0x706c, 0x238c, 0xbfc3, - 0xf41a, 0x261a, 0x1f83, 0x0000, - 0x2000, 0x42c3, 0x9002, 0x1f64, - 0x2305, 0x008c, 0x7444, 0x7b45, - 0xb420, 0x0bd7, 0x923f, 0xb3e0, - 0x42c3, 0x9003, 0xf164, 0xb2e0, - 0x1a04, 0x0045, 0xf1e1, 0x7164, - 0x272f, 0x37c2, 0x0fbb, 0xb010, - 0x0bb7, 0x8253, 0x0ff3, 0xb03e, - 0x4260, 0x261a, 0x1f8c, 0x0000, - 0x2000, 0x229a, 0x0008, 0x7c45, - 0x42c3, 0x9002, 0x0164, 0x2405, - 0x108d, 0x7444, 0x7a85, 0xb520, - 0x1a00, 0x0005, 0xf1e5, 0x1404, - 0x341b, 0xc6c6, 0xc2e2, 0xc1a4, - 0x1600, 0x7080, 0x8000, 0x0579, + 0xb300, 0x7c4f, 0x0bad, 0x9325, + 0x710c, 0xf009, 0x41c3, 0x0062, + 0x0000, 0x09f2, 0xfe2f, 0x740c, + 0x700c, 0xc0a2, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0xc0e6, 0x4328, + 0x70ed, 0x41c3, 0x9003, 0xff64, + 0xb1e0, 0x1904, 0x0045, 0x42c3, + 0x8000, 0x06d1, 0x8a21, 0x8ac0, + 0xf039, 0x228c, 0x8fc3, 0xf418, + 0x261a, 0x1f83, 0x0000, 0x2000, + 0x42c3, 0x9002, 0x1f64, 0x2305, + 0x008c, 0x7444, 0x7a65, 0xb400, + 0x0b51, 0x123f, 0xb2e0, 0x42c3, + 0x9003, 0xf164, 0xb2e0, 0x1a04, + 0x0045, 0xf020, 0x260a, 0x32c0, + 0x708d, 0xf016, 0xf753, 0x4381, + 0x261a, 0x1f82, 0x0000, 0x2000, + 0x239a, 0x0008, 0x7b45, 0x42c3, + 0x9002, 0x0164, 0x2305, 0x008d, + 0x7444, 0x7b45, 0xb500, 0x1b00, + 0x0005, 0x7185, 0x0e0d, 0x3010, + 0x0cd5, 0x9274, 0x262f, 0xb782, + 0x71c5, 0x0993, 0x83a5, 0x222f, + 0x02c7, 0xc4c6, 0xc2e2, 0xc1a4, + 0x1600, 0x7080, 0x8000, 0x05e5, 0x702c, 0x754c, 0xc043, 0xd8ff, - 0xdb08, 0xc142, 0xc141, 0x4528, - 0x0e1e, 0xfdef, 0xc140, 0x1600, - 0x7080, 0x8000, 0x0579, 0xd910, + 0xdb08, 0x4528, 0xc142, 0xc141, + 0x0c3a, 0xfdef, 0xc140, 0x1600, + 0x7080, 0x8000, 0x05e5, 0xd910, 0x754c, 0xc043, 0xd8ff, 0xdb0c, - 0xc542, 0xc541, 0x0e02, 0xfdef, + 0xc542, 0xc541, 0x0c1e, 0xfdef, 0xc540, 0xd8ff, 0xd980, 0x754c, 0x726c, 0xc543, 0xc542, 0xc541, - 0x0dee, 0xfdef, 0xc540, 0xc0a4, + 0x0c0a, 0xfdef, 0xc540, 0xc0a4, 0xc6c2, 0x78e0, 0xc2e4, 0xc1a4, 0x70ad, 0xd8ff, 0x702c, 0x754c, 0x736c, 0xc543, 0xc542, 0xc541, - 0x0dce, 0xfdef, 0xc540, 0x46cb, - 0x8000, 0x0579, 0x8e00, 0x0825, + 0x0bea, 0xfdef, 0xc540, 0x46cb, + 0x8000, 0x05e5, 0x8e00, 0x0825, 0x00b4, 0x702c, 0x7104, 0x781d, - 0xb862, 0x780f, 0xc542, 0xc541, - 0xc540, 0xc043, 0xd8ff, 0x754c, - 0x0da6, 0xfdef, 0x726c, 0x8e00, - 0xc542, 0xc541, 0xc540, 0xc043, - 0xd8ff, 0x702c, 0x754c, 0x0d92, - 0xfdef, 0xdb08, 0x8e00, 0xd908, + 0xb862, 0x780f, 0xc043, 0xd8ff, + 0x754c, 0x726c, 0xc542, 0xc541, + 0x0bc2, 0xfdef, 0xc540, 0x8e00, + 0xc043, 0xd8ff, 0x702c, 0x754c, + 0xdb08, 0xc542, 0xc541, 0x0bae, + 0xfdef, 0xc540, 0x8e00, 0xd908, 0x754c, 0xc043, 0xd8ff, 0xdb0c, - 0xc542, 0xc541, 0x0d7a, 0xfdef, + 0xc542, 0xc541, 0x0b96, 0xfdef, 0xc540, 0xc0a4, 0xc6c4, 0x78e0, 0x42c3, 0x9004, 0x0110, 0x9200, 0x204f, 0x0101, 0xb8a4, 0xb220, 0x7fe0, 0xb200, 0xc2e2, 0x4200, - 0x0da6, 0xfe6f, 0x4020, 0x4408, + 0x0d02, 0xfe6f, 0x4020, 0x4408, 0x706c, 0x786f, 0x791b, 0x2144, 0x07cd, 0x4181, 0xed05, 0xbd61, 0x0dff, 0x9031, 0x793d, 0xb9c1, 0x09eb, 0x80a1, 0x7164, 0xc6c2, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1af, - 0xc145, 0xc041, 0xc08d, 0x702c, - 0x0e72, 0xff2f, 0x764c, 0xc001, - 0x0ffe, 0xfdef, 0xc18d, 0xc005, - 0x2079, 0x0041, 0x1600, 0x7080, + 0xc2e6, 0x1cfc, 0xb6c8, 0x2482, + 0x3002, 0xc24c, 0xc15d, 0xc04b, + 0xc09e, 0x702c, 0x0e46, 0xff2f, + 0x764c, 0xc00b, 0x0df2, 0xfdef, + 0xc19e, 0xca13, 0xe807, 0xc00b, + 0x0a26, 0x0020, 0xc10c, 0x0217, + 0x0000, 0x700c, 0xc050, 0xc00c, + 0x2079, 0x0040, 0x781b, 0x781b, + 0x7204, 0xc056, 0x1600, 0x7080, 0x8000, 0x000e, 0x781d, 0x781d, - 0x781d, 0x781d, 0x782b, 0x720c, - 0xc04c, 0x20ca, 0x01a2, 0xc04c, - 0x700c, 0xc047, 0xc046, 0xc049, - 0x700c, 0xf013, 0xc007, 0xc100, - 0x7610, 0xc009, 0x20ca, 0x004d, - 0xc049, 0xc006, 0x20ca, 0x034d, - 0xc046, 0xc007, 0x20ca, 0x038d, - 0xc047, 0xc00a, 0x7204, 0x780f, - 0xc04a, 0xc10a, 0xc00c, 0x7110, - 0x0110, 0x0026, 0xc18d, 0xc00a, - 0x6038, 0x8821, 0x88a0, 0xc001, - 0xc140, 0xc200, 0x0ad2, 0x0020, - 0x41a1, 0x4608, 0xd814, 0x716f, - 0x71ed, 0xc048, 0xf009, 0xc00b, - 0x7910, 0xc002, 0x6038, 0xc040, - 0xc008, 0x7704, 0xc048, 0xc008, - 0x262f, 0xf007, 0xf3cc, 0xc008, - 0xb8c1, 0x0815, 0x0051, 0x232f, - 0x36c8, 0x2300, 0x3f9b, 0x0000, - 0xfffc, 0x77e5, 0x202f, 0x06c6, - 0x7114, 0x78ee, 0x23ca, 0x306b, - 0x7114, 0x78af, 0xc042, 0xc005, - 0x27ca, 0x106b, 0x7114, 0x700c, - 0xc04b, 0xf404, 0x702c, 0xf021, - 0xc002, 0x7df0, 0x78a2, 0x790f, - 0xc000, 0x780f, 0xc044, 0xc001, - 0x0a5e, 0x0020, 0xc204, 0xc043, - 0xc002, 0xc204, 0x60b8, 0x790f, - 0x0a4e, 0x0020, 0xc001, 0xc103, - 0x0e0b, 0x1065, 0x79b3, 0xc603, - 0xf007, 0x702c, 0x70d1, 0x26ca, - 0x100d, 0x21ca, 0x03cd, 0xc502, - 0x7830, 0x651d, 0xc000, 0x780f, - 0xc042, 0xc005, 0x0863, 0x80b0, - 0x202f, 0x06c8, 0xc040, 0xc100, - 0xc002, 0x7822, 0x7a0f, 0x78af, - 0xc044, 0xc001, 0x0a0a, 0x0020, - 0xc104, 0xc043, 0xc100, 0xc002, - 0x6038, 0x7a0f, 0xc001, 0x09fa, - 0x0020, 0xc104, 0xc103, 0x0e11, - 0x1045, 0xc000, 0xc603, 0x7813, - 0xc04b, 0xf193, 0x0e23, 0x9005, - 0x4608, 0x1c2c, 0x36c0, 0xf18d, - 0x41c3, 0x004d, 0x0000, 0x08ca, - 0xfdef, 0xc007, 0xc005, 0x0815, - 0x0050, 0xc006, 0xc101, 0x2080, - 0x0c43, 0xc046, 0x193f, 0x0002, - 0xc005, 0x080f, 0x0090, 0xc101, - 0xc009, 0x1940, 0x0002, 0xc0af, - 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0xc2e6, 0x0bfa, 0xfe6f, 0x4448, - 0x4300, 0x143f, 0x1080, 0xe00f, - 0x4100, 0x4608, 0x4200, 0x252f, - 0x9387, 0xf20e, 0x6d09, 0x2414, - 0x1000, 0x1440, 0x108b, 0x88e1, - 0x0f0d, 0x12e4, 0x77c5, 0x8800, - 0x0be7, 0x9005, 0x7e2f, 0x0e1d, - 0x17b5, 0x78db, 0x6098, 0x1440, - 0x108b, 0x88e3, 0x0f0d, 0x12e4, - 0x7124, 0x8802, 0x0be9, 0x9005, - 0x4e51, 0x4ab0, 0x7c2f, 0x7a0f, - 0x7291, 0x21ca, 0x000b, 0xca09, - 0x792f, 0x4220, 0xb8c4, 0xe806, - 0xb861, 0x0801, 0x0031, 0x7a5b, - 0x8b00, 0x794f, 0x0907, 0x0005, - 0xab40, 0xc6c6, 0x4200, 0x208c, - 0x8fc3, 0xf408, 0x41c3, 0x9004, - 0x0238, 0x1900, 0x0105, 0xf009, - 0x0a13, 0x01d1, 0x41c3, 0x9004, - 0x0238, 0x1900, 0x0045, 0x1900, - 0x0005, 0xca0c, 0xe804, 0x0019, - 0x0020, 0x4040, 0x41c3, 0x9018, - 0x0064, 0xb140, 0x0051, 0x0020, - 0x1902, 0x0005, 0xc2e2, 0x4508, - 0xca0c, 0xe812, 0xcc32, 0x208c, - 0x8010, 0xf785, 0x09fa, 0xfe0f, - 0xcc32, 0x6821, 0x2615, 0x7000, - 0x8000, 0x1ca0, 0x1a32, 0x305c, - 0xa0a0, 0xc6c2, 0x40c3, 0x9018, - 0x0064, 0xd910, 0xb0a0, 0xb961, - 0x09ff, 0x8031, 0x7dbd, 0xb0a2, - 0x080e, 0x0020, 0x1802, 0x0005, - 0xc6c2, 0x78e0, 0x41c3, 0x9018, + 0x781d, 0x781d, 0x2044, 0x8041, + 0xc016, 0x20ca, 0x00a1, 0xc056, + 0xca0b, 0x2105, 0x803e, 0x740c, + 0xc058, 0x20ca, 0x0061, 0xc058, + 0xd810, 0xc059, 0x20ca, 0x0061, + 0xc059, 0x700c, 0xc04e, 0xf0c5, + 0xc010, 0x6038, 0x88e1, 0x88a0, + 0xc00b, 0x42e1, 0x0d5a, 0x0020, + 0x41a1, 0x4608, 0x700c, 0xc05c, + 0xc019, 0x1460, 0x301b, 0xc04f, + 0xc01d, 0xc057, 0xf094, 0x78af, + 0xc04d, 0xc00c, 0xc64a, 0x7114, + 0x700c, 0xc052, 0xc053, 0xf226, + 0xc00d, 0x252f, 0x16c8, 0x78a2, + 0x790f, 0x78ef, 0xc04a, 0xc00b, + 0x0d1e, 0x0020, 0xc20a, 0xc053, + 0xc00d, 0xc20a, 0x60b8, 0x790f, + 0x0d0e, 0x0020, 0xc00b, 0xc052, + 0xc013, 0x0e0f, 0x1025, 0x78b3, + 0xc113, 0xc14a, 0xf00a, 0xc012, + 0x0e0f, 0x1025, 0x700c, 0xc012, + 0xc04a, 0x4063, 0xf002, 0xc64a, + 0x780e, 0xc05b, 0xc00d, 0xc51b, + 0x651d, 0x78ef, 0xc04d, 0xc00c, + 0x080d, 0x00b1, 0x702c, 0x700c, + 0xc151, 0xf023, 0xc00f, 0x7f10, + 0xc00d, 0x78e2, 0x7a0f, 0x78af, + 0xc05a, 0xc00b, 0x0cba, 0x0020, + 0xc11a, 0xc051, 0xc00d, 0xc11a, + 0x60f8, 0x7a0f, 0x0caa, 0x0020, + 0xc00b, 0xc211, 0xc10a, 0x090f, + 0x00a5, 0x79f3, 0xc211, 0xc24a, + 0xf007, 0xc10a, 0x090b, 0x0025, + 0x702c, 0xc10f, 0xc04a, 0xc70d, + 0x1212, 0x3602, 0x1203, 0x3083, + 0x792e, 0x0a37, 0x00e4, 0x673f, + 0x1202, 0x3083, 0x0b2b, 0x00a4, + 0x7bef, 0xc045, 0xc012, 0xc211, + 0xc141, 0xc043, 0xc013, 0xc246, + 0x7aaf, 0xc042, 0xc01b, 0x41c3, + 0x0051, 0x0009, 0xc040, 0x740c, + 0x0ea2, 0xfdef, 0xc644, 0xc01c, + 0x7104, 0x2053, 0x807e, 0xc05c, + 0xf405, 0xc00f, 0x7767, 0xb864, + 0xc04f, 0xc017, 0xc60a, 0x7704, + 0xc057, 0xc00f, 0x780e, 0x2048, + 0x0040, 0xc04f, 0x202f, 0x06c6, + 0x2048, 0x005b, 0xc017, 0x262f, + 0xf007, 0xf56b, 0xc00e, 0x082d, + 0x0385, 0x5042, 0xca03, 0x091f, + 0x0004, 0xca02, 0x0817, 0x0064, + 0x740c, 0xc210, 0xc30e, 0x41c3, + 0x0052, 0x0003, 0x0e46, 0xfdef, + 0xc640, 0xc555, 0xc754, 0xc64e, + 0xc010, 0x7204, 0xc050, 0xc110, + 0xc016, 0x7110, 0x0674, 0xffe5, + 0xc19e, 0xc00c, 0x082f, 0x00d0, + 0x41c3, 0x0053, 0x0000, 0x0e6e, + 0xfdaf, 0xc00e, 0xc00c, 0x0811, + 0x0050, 0xc115, 0xc00b, 0xb96f, + 0x183f, 0x0042, 0xc00c, 0x080f, + 0x0090, 0xc10b, 0xc014, 0x1940, + 0x0002, 0x2480, 0x3002, 0x1404, + 0x341b, 0xc6c6, 0xc2e6, 0x1cfc, + 0xb6c8, 0x2482, 0x3802, 0xc145, + 0x4508, 0xc088, 0x702c, 0x0bfe, + 0xff2f, 0xda80, 0x704c, 0x70cd, + 0xc242, 0xf02b, 0x8880, 0x8861, + 0x4081, 0xf015, 0x60f8, 0x1000, + 0x008b, 0x7165, 0x1800, 0x02c2, + 0x716d, 0xca15, 0x2044, 0x07db, + 0x4061, 0x0b11, 0x3010, 0x2342, + 0x305b, 0x0bff, 0xb031, 0x781b, + 0x60f8, 0x7f10, 0x0bd9, 0x83e5, + 0xc088, 0x4b90, 0x7714, 0x7104, + 0x20ca, 0x002c, 0x242f, 0x9007, + 0xf206, 0x7b4f, 0x7124, 0x7470, + 0x22ca, 0x000d, 0x71c5, 0x0eb1, + 0x97f4, 0x2514, 0x1380, 0x700c, + 0x708d, 0x70cd, 0x4318, 0xc044, + 0xc047, 0xf01f, 0x8860, 0x8801, + 0x7862, 0x6861, 0x7714, 0xca0f, + 0x7b70, 0x23ca, 0x002c, 0x631f, + 0x784f, 0x0825, 0x03e5, 0x7b6a, + 0xc004, 0x7b70, 0x2600, 0x90ce, + 0x7b8c, 0x2041, 0x8000, 0xc044, + 0xc007, 0x2300, 0xb0db, 0x2041, + 0x8000, 0xc047, 0x7185, 0x0cc9, + 0x97f4, 0x2514, 0x1300, 0xca67, + 0x1266, 0x3082, 0x70ed, 0x706c, + 0xc343, 0xc346, 0xf02d, 0x626a, + 0x120f, 0x308c, 0x2400, 0x108b, + 0x7c2f, 0x0c3b, 0x12c5, 0x221f, + 0x008c, 0xda1f, 0x241a, 0x10df, + 0x260a, 0x37c0, 0xba61, 0x0a01, + 0x0031, 0x262f, 0x3781, 0xc203, + 0x2200, 0x87c2, 0xc243, 0xc206, + 0x2201, 0x8782, 0xc246, 0xc202, + 0x2200, 0x8302, 0x2741, 0x900f, + 0xc242, 0x1215, 0x308c, 0x714c, + 0xbcc4, 0xec06, 0xbc61, 0x0c01, + 0x1031, 0x7a5b, 0x627a, 0x7b50, + 0x08a7, 0x80e5, 0xc288, 0xc812, + 0x1203, 0x3081, 0x0825, 0x0044, + 0x1202, 0x3081, 0x091d, 0x0024, + 0x4263, 0xc002, 0x41c3, 0x0047, + 0x0004, 0xc041, 0xc003, 0x43c1, + 0xc040, 0x0c92, 0xfdef, 0x740c, + 0xc005, 0x088f, 0x00d0, 0xc002, + 0xc104, 0x78e5, 0x79c5, 0x2079, + 0x0000, 0x2179, 0x0001, 0x7824, + 0x41c3, 0x0048, 0x0000, 0x0cbe, + 0xfd8f, 0xc107, 0xc304, 0x4063, + 0x0b7a, 0xfdaf, 0x42c1, 0x4608, + 0xc003, 0xc106, 0xc202, 0x0b6e, + 0xfdaf, 0x43e1, 0xc105, 0x0917, + 0x0070, 0x4300, 0x2642, 0x13c1, + 0x1d3f, 0x1042, 0xc105, 0x090b, + 0x0090, 0x1d40, 0x10c2, 0x5042, + 0xca03, 0x0937, 0x0004, 0xca02, + 0x082f, 0x0064, 0x7acf, 0x7b6f, + 0x41c3, 0x0049, 0x0002, 0x0c1e, + 0xfdef, 0x740c, 0x1540, 0x1080, + 0x153f, 0x10c3, 0x153e, 0x1082, + 0xc040, 0x41c3, 0x004a, 0x0003, + 0x0c02, 0xfdef, 0x740c, 0xca0f, + 0x2480, 0x3802, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0xc2e6, 0x08ca, + 0xfe6f, 0x4548, 0x153f, 0x1081, + 0x4300, 0x2140, 0x03ce, 0x44c9, + 0x222f, 0x8307, 0xf20e, 0x785b, + 0x60b8, 0x10ff, 0x808f, 0x1540, + 0x108b, 0x0f11, 0x12e4, 0x7785, + 0x10fe, 0x8080, 0x0be5, 0x9005, + 0x7ccf, 0x0c1f, 0x17b5, 0x720c, + 0x7894, 0x60b8, 0x88e1, 0x1540, + 0x108b, 0x0f0f, 0x12e4, 0x71c5, + 0x8800, 0x0be9, 0x9005, 0x491f, + 0x790f, 0x4950, 0x7a0f, 0x4c30, + 0x780f, 0x2009, 0x0082, 0xca0c, + 0xb8c4, 0xe806, 0xb861, 0x0801, + 0x0031, 0x7a5b, 0x794f, 0x8b00, + 0x0907, 0x0005, 0xab40, 0xc6c6, + 0x208c, 0x8fc3, 0xf209, 0x0821, + 0x01d1, 0x41c3, 0x9004, 0x0238, + 0x1900, 0x0045, 0xf006, 0x41c3, + 0x9004, 0x0238, 0x1900, 0x0105, + 0x1900, 0x0005, 0x1210, 0x3081, + 0xe903, 0x0019, 0x0000, 0x41c3, + 0x9018, 0x0064, 0xb100, 0x0059, + 0x0020, 0x1902, 0x0005, 0x78e0, + 0xc0f1, 0x1210, 0x3081, 0xe90a, + 0x1236, 0x3702, 0x228c, 0x8010, + 0xf793, 0x0e1e, 0xfdcf, 0xf019, + 0x41c3, 0x9018, 0x0064, 0xda10, + 0xb100, 0xba61, 0x0aff, 0x8031, + 0x781d, 0xb102, 0x0822, 0x0020, + 0x1902, 0x0005, 0xf009, 0x2615, + 0x7081, 0x8000, 0x1ca8, 0xa100, + 0x6a01, 0x1a36, 0x301c, 0xc0d1, + 0x7ee0, 0x78e0, 0x41c3, 0x9018, 0x0008, 0x9100, 0x08ff, 0x801f, 0x195e, 0x0045, 0x9100, 0x0801, - 0x001e, 0x7ee0, 0xc2e4, 0x1cfc, - 0xb6c8, 0x4608, 0x1600, 0x7080, - 0x8000, 0x0001, 0x083b, 0x013f, - 0x4338, 0xca0c, 0x7dd0, 0xe80a, - 0xcc32, 0x218a, 0x0010, 0x7902, - 0x6d02, 0x7030, 0x0980, 0xfe0a, - 0x0f36, 0xffef, 0xd808, 0x0f72, - 0xffef, 0x40c1, 0x70cd, 0xf007, - 0x0f66, 0xffef, 0x1304, 0x3400, - 0x71c5, 0x0ef9, 0x9342, 0x1404, - 0x341b, 0xc6c4, 0x708d, 0x706c, - 0xf002, 0x4a30, 0x796f, 0x0923, - 0x00b5, 0x7a0f, 0x2200, 0x3040, - 0x1066, 0x0081, 0x09ef, 0x80a4, - 0x7164, 0x1068, 0x0080, 0x209a, - 0x0001, 0x2005, 0x008c, 0x7fe0, - 0x788f, 0x78e0, 0xc3e6, 0xc1a3, - 0x706c, 0x4708, 0x2440, 0x3240, - 0x1c0b, 0x30c2, 0x1c0a, 0x30c2, - 0x1c09, 0x30c2, 0xc362, 0xc041, - 0x2440, 0x32c0, 0xc040, 0x40e1, - 0x4548, 0x0f32, 0xfe2f, 0x4628, - 0xc082, 0xc041, 0x2440, 0x3280, - 0xc040, 0x40e1, 0x41c1, 0x42a1, - 0x0f1a, 0xfe2f, 0x716c, 0x140b, - 0x3081, 0x140a, 0x3080, 0x2009, - 0x0041, 0xc812, 0x782c, 0x603a, - 0x1409, 0x3081, 0xc022, 0x2009, - 0x0041, 0xc816, 0x623a, 0x782c, - 0x6058, 0xc7c6, 0xc2e6, 0x1cfc, - 0xb6c8, 0x2482, 0x360f, 0x0ad2, - 0xfe6f, 0x700c, 0x1600, 0x709b, + 0x001e, 0x7ee0, 0xc2e6, 0x4708, + 0x1600, 0x7080, 0x8000, 0x0001, + 0x083b, 0x013f, 0x4528, 0xca10, + 0x7ef0, 0xe80a, 0xcc36, 0x218a, + 0x0010, 0x7902, 0x6e02, 0x7030, + 0x0da4, 0xfdca, 0x0f36, 0xffef, + 0xd808, 0x0f72, 0xffef, 0x40e1, + 0x70ed, 0xf007, 0x0f66, 0xffef, + 0x1504, 0x1400, 0x71e5, 0x0ff9, + 0x9384, 0xc6c6, 0x708d, 0x706c, + 0xf009, 0x2100, 0x0680, 0x106e, + 0x0081, 0x0915, 0x00a5, 0x7164, + 0x4a30, 0x796f, 0x09ef, 0x80b4, + 0x7a0f, 0xf007, 0x1070, 0x008c, + 0x249a, 0x1001, 0x7c45, 0x7fe0, + 0x788f, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1a6, 0x4508, 0x2440, + 0x3540, 0xc041, 0x2440, 0x35c0, + 0x706c, 0xc040, 0x40a1, 0xc243, + 0xc142, 0x1c17, 0x30c2, 0x1c16, + 0x30c2, 0x1c15, 0x30c2, 0x0b36, + 0xfe2f, 0xc365, 0xc085, 0xc102, + 0xc203, 0xc041, 0x2440, 0x3580, + 0xc040, 0x40a1, 0x0b1e, 0xfe2f, + 0x716c, 0x1416, 0x3083, 0x1417, + 0x3082, 0xc814, 0x1414, 0x309b, + 0x2209, 0x00c1, 0x211a, 0x000e, + 0x1415, 0x3080, 0x1203, 0x308c, + 0xc044, 0x2009, 0x06cf, 0xc818, + 0x78ec, 0x661e, 0xc812, 0x083f, + 0x0304, 0x1202, 0x308c, 0x0c37, + 0x1024, 0x740c, 0x45cb, 0x003e, + 0x0003, 0xc140, 0x09ce, 0xfdef, + 0x41a1, 0xc204, 0x254f, 0x1401, + 0x740c, 0x4363, 0x09be, 0xfdef, + 0xc740, 0xc202, 0xc303, 0x732c, + 0x740c, 0xb996, 0x09ae, 0xfdef, + 0xc640, 0x40c1, 0xc0a6, 0x1404, + 0x341b, 0xc6c6, 0xc2e6, 0x1cfc, + 0xb6c8, 0x2482, 0x3411, 0x0f52, + 0xfe2f, 0x700c, 0x1600, 0x708d, 0x8000, 0x0004, 0x40c3, 0x8000, - 0x0794, 0x702c, 0x0a9e, 0xff2f, - 0xda28, 0x0a32, 0xfe6f, 0x700c, - 0xc046, 0x700c, 0xc070, 0xc04f, - 0x40c3, 0x8000, 0x0661, 0x8801, - 0x205f, 0x0641, 0x1600, 0x7080, - 0x8000, 0x0661, 0x205f, 0x0c80, - 0x6038, 0x70c3, 0x8000, 0x0670, - 0x1007, 0x00c0, 0x702c, 0xc044, - 0x0e7a, 0xff6f, 0xc006, 0x231a, - 0x3f8e, 0x0020, 0x0000, 0x2605, - 0x1f80, 0x9002, 0x0092, 0x90e0, - 0x0a3e, 0xfe6f, 0x2753, 0x1140, - 0x4508, 0x78fd, 0xf967, 0x781d, - 0x0a2e, 0xfe6f, 0xb8c5, 0x47cb, - 0x8000, 0x0480, 0x27f4, 0x1342, - 0x4318, 0x41c3, 0x008c, 0x0001, - 0x0de6, 0xfdef, 0x750c, 0x27f4, - 0x16c2, 0x41c3, 0x008c, 0x0001, - 0xb990, 0x0dd6, 0xfdef, 0x750c, - 0xc006, 0x712c, 0x734c, 0x0d72, - 0x0020, 0xc38f, 0x206f, 0x0243, - 0x881a, 0x0883, 0x003e, 0x71ed, - 0x41c3, 0x008e, 0x0000, 0x0db2, - 0xfdef, 0x740c, 0x2605, 0x1f80, - 0x9003, 0xe090, 0xb0e0, 0xf003, - 0x71e5, 0x7eef, 0x0e2b, 0x1135, - 0x750c, 0x41c3, 0x008f, 0x0000, - 0x0d8e, 0xfdcf, 0x40a1, 0x4163, - 0x08ae, 0xffef, 0x42c1, 0xe872, - 0xc006, 0x722c, 0x704c, 0x0d22, - 0x0020, 0xc38f, 0xf1ea, 0x46cb, - 0x0090, 0x0000, 0x0d6a, 0xfdef, - 0x41c1, 0xc230, 0x40a1, 0x088a, - 0xffef, 0x4163, 0x264f, 0x1401, - 0x0d56, 0xfdef, 0x740c, 0xc006, - 0x0da2, 0xff6f, 0x702c, 0xc006, - 0x712c, 0x734c, 0x0cea, 0x0020, - 0x706c, 0xc004, 0x7014, 0x04ca, - 0x002a, 0x2456, 0x3840, 0x1600, + 0x1b74, 0x702c, 0x0f9e, 0xfeef, + 0xda28, 0x0eb6, 0xfe2f, 0x700c, + 0xc046, 0x40c3, 0x8000, 0x06cd, + 0x8801, 0x1c28, 0x3001, 0x1c2c, + 0x3003, 0x205f, 0x0641, 0x1600, + 0x7080, 0x8000, 0x06cd, 0x205f, + 0x0c80, 0x6119, 0x2100, 0x0f80, + 0x8000, 0x0580, 0x1003, 0x00cf, + 0xc006, 0x0bd2, 0xff6f, 0x702c, + 0x251a, 0x1f8d, 0x0020, 0x0000, + 0x2505, 0x1f80, 0x9002, 0x0092, + 0x90c0, 0x2653, 0x115e, 0x0ebe, + 0xfe2f, 0x200a, 0x0780, 0x4318, + 0xf96c, 0xb8c5, 0x0eae, 0xfe2f, + 0xc044, 0x4608, 0x750c, 0x41c3, + 0x0095, 0x0001, 0x0906, 0xfdef, + 0x220a, 0x0780, 0xc204, 0x41c3, + 0x0096, 0x0001, 0x08f6, 0xfdef, + 0x750c, 0xc006, 0x712c, 0x734c, + 0x0d92, 0x0020, 0xc38a, 0x206f, + 0x0243, 0x881a, 0x08a1, 0x003e, + 0x710c, 0xc044, 0x41c3, 0x0097, + 0x0000, 0x08d2, 0xfdef, 0x740c, + 0x2505, 0x1f8d, 0x9003, 0xe090, + 0x1d00, 0x1045, 0xf016, 0x41c3, + 0x0098, 0x0000, 0x08b6, 0xfdef, + 0x750c, 0xc207, 0x4063, 0x0dee, + 0xffaf, 0x41c1, 0xe807, 0xc006, + 0x722c, 0x704c, 0x0d46, 0x0020, + 0xc38a, 0xc004, 0x7104, 0xc044, + 0xc004, 0x780f, 0x08d3, 0x8134, + 0xc047, 0x41c3, 0x0099, 0x0000, + 0x0882, 0xfdef, 0x750c, 0xc22b, + 0x4063, 0x0dba, 0xffaf, 0x41c1, + 0xc02b, 0xe88a, 0x41c3, 0x009a, + 0x0000, 0x086a, 0xfdef, 0x750c, + 0x1d00, 0x1005, 0x41c3, 0x009b, + 0x0000, 0x085a, 0xfdef, 0x740c, + 0xc006, 0x0ae2, 0xff6f, 0x702c, + 0xc006, 0x712c, 0x734c, 0x0cee, + 0x0020, 0x706c, 0x70f5, 0x04c8, + 0x002a, 0x2456, 0x3800, 0x1600, 0x7083, 0x8000, 0x0004, 0x702c, - 0x0962, 0xff2f, 0xdab4, 0xc095, - 0x095a, 0xff2f, 0xdab4, 0x231a, - 0x0f80, 0x0010, 0x0000, 0xc051, - 0x1600, 0x7080, 0x8000, 0x0001, - 0xc053, 0x700c, 0xf003, 0xc008, - 0x7104, 0xc048, 0xc008, 0x780f, - 0x7314, 0x0486, 0x002d, 0xc045, - 0xc105, 0xc006, 0x08eb, 0x806e, - 0x702c, 0x0a76, 0xffef, 0xc005, - 0x4508, 0x2400, 0x3f80, 0x0000, - 0x01bc, 0x702c, 0x090e, 0xff2f, - 0xdab4, 0xc005, 0x70cd, 0x43c9, - 0x2055, 0x0c01, 0xc011, 0x7825, - 0xc04b, 0xf005, 0x1410, 0x300b, - 0x71c5, 0xca04, 0x0e73, 0x1022, - 0x708d, 0xf03d, 0x4381, 0x261a, - 0x1f9b, 0x0000, 0x1000, 0xc00b, - 0x239a, 0x0004, 0x47cb, 0x9002, - 0x0000, 0x2005, 0x06c0, 0x7865, - 0x781b, 0x78e5, 0x0cba, 0xfdef, - 0x9000, 0x7a1c, 0x2400, 0x3f81, - 0x0000, 0x01bc, 0x7a5c, 0x7a5c, - 0x2114, 0x02c1, 0xb100, 0x7a5c, - 0xd91b, 0xb961, 0x09ff, 0x8031, - 0x7a5d, 0x6059, 0x2844, 0x0800, - 0xb9c4, 0x7942, 0x2305, 0x06c2, - 0x7a5b, 0x209a, 0x0010, 0x7185, - 0x7905, 0x2754, 0x1c80, 0x7845, - 0xb020, 0x1410, 0x300b, 0x2340, - 0x1040, 0x0c95, 0x9232, 0xc044, - 0xf1be, 0xc005, 0xc305, 0x41c3, - 0x0089, 0x0002, 0x2038, 0x0340, - 0x0c36, 0xfdaf, 0x42a1, 0x40c3, - 0x8000, 0x0661, 0xa8a1, 0x1600, - 0x7001, 0x8000, 0x0670, 0xc005, - 0x704c, 0x0c82, 0xfeaf, 0x716c, + 0x0e4a, 0xfeef, 0xdab4, 0xc093, + 0x0e42, 0xfeef, 0xdab4, 0x231a, + 0x0f83, 0x0010, 0x0000, 0x1600, + 0x7080, 0x8000, 0x0001, 0xc051, + 0x700c, 0x048b, 0x0020, 0xc350, + 0xc105, 0xc006, 0x2011, 0x8040, + 0x0476, 0x0021, 0x702c, 0xc005, + 0x780f, 0x0f8e, 0xffaf, 0xc04c, + 0x4608, 0x2400, 0x3f80, 0x0000, + 0x01b4, 0x702c, 0x0dfe, 0xfeef, + 0xdab4, 0xc005, 0x70ad, 0x70ed, + 0x2055, 0x0c01, 0xc010, 0x43db, + 0x9002, 0x0000, 0x7825, 0xc04d, + 0xca06, 0xc047, 0xf03b, 0x260a, + 0x33c0, 0xf034, 0x251a, 0x1f81, + 0x0000, 0x1000, 0x209a, 0x0004, + 0x7825, 0xc044, 0xc104, 0xc00d, + 0x7825, 0x781b, 0x2005, 0x06c0, + 0x0fca, 0xfdaf, 0x9000, 0x7a1c, + 0x2400, 0x3f81, 0x0000, 0x01b4, + 0x7a5c, 0x2114, 0x0781, 0x7a5c, + 0xb100, 0x7a5c, 0xd91b, 0xb961, + 0x09ff, 0x8031, 0x7a5d, 0x7164, + 0x6059, 0x2844, 0x0800, 0xb9c4, + 0x7942, 0x2354, 0x3c82, 0x2640, + 0x305e, 0x209a, 0x0010, 0x7905, + 0xc004, 0x781b, 0x7845, 0xb020, + 0x0b9d, 0x8234, 0x4060, 0x71a5, + 0xe709, 0xc007, 0x0d8b, 0x9024, + 0x706c, 0xc005, 0xc305, 0x41c3, + 0x0092, 0x0002, 0x2038, 0x0380, + 0x0f82, 0xfd6f, 0x42c1, 0x40c3, + 0x8000, 0x06cd, 0xa8c1, 0x1600, + 0x7001, 0x8000, 0x06dc, 0xc00c, + 0x704c, 0x0982, 0xfeaf, 0x716c, 0x1600, 0x709b, 0x8000, 0x0004, - 0x1600, 0x710e, 0x9008, 0x01e0, - 0x40a1, 0x0ba2, 0xfdef, 0x702c, - 0xc04c, 0x40c3, 0x0000, 0xffff, - 0x4100, 0x4200, 0x09b6, 0xfe6f, - 0x4300, 0x40c3, 0x0000, 0xffff, - 0x2058, 0x0800, 0x78c4, 0x1e00, - 0x7004, 0x9009, 0xe1e0, 0x40c3, - 0x8000, 0x04bc, 0x0c0a, 0x0060, - 0xd960, 0xd8ff, 0x1e00, 0x7004, - 0x9003, 0xf0c4, 0x710c, 0x2544, - 0x17ce, 0xee06, 0xbe61, 0x0e01, - 0x1031, 0x781b, 0x205a, 0x0100, - 0x702c, 0x200f, 0x0340, 0x780f, - 0x0d9a, 0xff2f, 0xc052, 0x231a, - 0x3f9b, 0x0010, 0x0000, 0x40c3, - 0x8000, 0x0661, 0x88c4, 0x700c, - 0x1c34, 0x36c0, 0xc049, 0xf003, - 0x71c5, 0x40c3, 0x8000, 0x0661, - 0x8805, 0x7acf, 0x080b, 0x00a5, - 0x70ed, 0xf045, 0x71e5, 0x7bef, - 0x0be9, 0x8255, 0xc00c, 0x2055, - 0x0c01, 0xc00d, 0x7825, 0x221a, - 0x0f81, 0x0000, 0x1000, 0x7905, - 0x4060, 0x209a, 0x0004, 0x7825, - 0x781b, 0xb891, 0xb89c, 0xb89f, - 0x0bd5, 0x8231, 0x9080, 0x789d, - 0xf8b2, 0x781d, 0x70ad, 0xe80a, - 0x4508, 0x259a, 0x1010, 0x2453, - 0x1101, 0x2580, 0x1030, 0x7d25, - 0x225f, 0x048b, 0x2400, 0x3f81, - 0x0000, 0x0324, 0x7161, 0x7974, - 0xb1a0, 0x41c3, 0x0000, 0x141f, - 0x0813, 0x0170, 0xbcc4, 0x209a, - 0x0010, 0x2080, 0x0010, 0x2005, - 0x0301, 0x2400, 0x3f80, 0x0000, - 0x0270, 0x7061, 0x7874, 0xb020, - 0xf1be, 0x40c3, 0x8000, 0x0661, - 0x8804, 0x71cd, 0x205f, 0x0240, - 0xc047, 0xf005, 0xc009, 0x7104, - 0xc049, 0x70d5, 0xf2d4, 0x2400, - 0x3f80, 0x0000, 0x0324, 0x0ab2, - 0x0040, 0xd80f, 0x09fa, 0xfdef, - 0xd90f, 0x710c, 0x0eb6, 0xffaf, - 0xd9ff, 0x45cb, 0x9003, 0xe024, - 0x712c, 0x70cd, 0x1d00, 0x1f84, - 0x0000, 0xffff, 0xb526, 0x1dfe, - 0x9044, 0x1da8, 0x139c, 0x1da9, - 0x139c, 0xc013, 0xdf21, 0xb8e2, - 0x27ca, 0x1be1, 0x0d02, 0xfdaf, - 0x40e1, 0x0a5a, 0xff6f, 0x2456, - 0x3840, 0x2400, 0x3f80, 0x0000, - 0x0270, 0x0a5e, 0x0040, 0xd80f, - 0x09a6, 0xfdef, 0xd90f, 0x730c, - 0x0e62, 0xffaf, 0xd9ff, 0x712c, - 0x40e1, 0x1d00, 0x1f84, 0x0000, - 0xffff, 0xb526, 0x1dfe, 0x9044, - 0x1da8, 0x139c, 0x0cc2, 0xfdaf, - 0x1da9, 0x139c, 0x0a16, 0xff6f, - 0xc095, 0xc009, 0xc707, 0x780f, - 0xc054, 0x40c3, 0x8000, 0x0661, - 0x8804, 0xc04a, 0xf004, 0xc00a, - 0x7104, 0xc04a, 0xc00a, 0x790f, - 0x40c3, 0x8000, 0x0661, 0x8805, - 0x08e5, 0x0064, 0x4338, 0x7ff0, - 0xe708, 0x726c, 0xd808, 0xf005, - 0xc004, 0x71e5, 0x7104, 0xc044, - 0xc004, 0x7d0f, 0x0dd3, 0x9275, - 0x7cf0, 0x2456, 0x3840, 0x20f5, - 0x0302, 0xc095, 0x20f5, 0x0301, - 0x4950, 0x2048, 0x0000, 0x780e, - 0x0895, 0x01b3, 0x6159, 0x732c, - 0x213c, 0x0341, 0x0bcd, 0x8060, - 0xc14e, 0xc040, 0x700c, 0x41c3, - 0x00a7, 0x0003, 0x4263, 0x09c2, - 0xfdaf, 0x43a1, 0xc00c, 0x259a, - 0x1001, 0x2055, 0x08c1, 0xc00d, - 0x2504, 0x1f82, 0x0000, 0x3f00, - 0x7905, 0x231a, 0x3f80, 0x0000, - 0x1000, 0x7825, 0x7845, 0x7e1b, - 0xbe91, 0xbe9c, 0xbe9f, 0x9620, - 0xc014, 0xc30e, 0xc140, 0x70c3, - 0x8000, 0x04bc, 0x1060, 0x00c0, - 0x6119, 0xc041, 0x6909, 0x7d2e, - 0x7a10, 0x257f, 0x17c0, 0x205f, - 0x0800, 0xe29e, 0xc542, 0x41c3, - 0x00a8, 0x0006, 0x25ca, 0x100d, - 0x78b0, 0xc043, 0xd80a, 0x094a, - 0xfdef, 0x4263, 0xb6a0, 0xc30e, - 0x71cd, 0xf1a4, 0x262f, 0xf041, - 0x783d, 0x2400, 0x3f81, 0x0000, - 0x01bc, 0x20ca, 0x0024, 0x7994, - 0xb100, 0xf198, 0x70d5, 0x0b14, - 0xff02, 0xf12a, 0x41c3, 0x00a9, - 0x0000, 0x0916, 0xfdef, 0x740c, - 0x40c3, 0x8000, 0x0661, 0x88c4, - 0xf002, 0x71c5, 0x40c3, 0x8000, - 0x0661, 0x8805, 0x7dcf, 0x0855, - 0x0364, 0xdf08, 0xc007, 0x7810, - 0xe008, 0xc047, 0xf01e, 0xc007, - 0x7910, 0x2456, 0x3840, 0x20f5, - 0x004c, 0xc095, 0x20f5, 0x0042, - 0x2400, 0x3f80, 0x0000, 0x01bc, - 0x20f5, 0x0040, 0x41c3, 0x00aa, - 0x0005, 0xc042, 0xc241, 0x740c, - 0x42a1, 0x08be, 0xfdef, 0xc440, - 0xc007, 0x71e5, 0x7104, 0xc047, - 0x7bef, 0x0bc7, 0x8254, 0xf1d3, - 0x08da, 0xfeef, 0xc012, 0x1e00, - 0x7005, 0x9003, 0xe004, 0x1600, - 0x7001, 0x8000, 0x0670, 0xc005, - 0x704c, 0x0902, 0xfeaf, 0x706c, - 0xc00b, 0x2400, 0x3f81, 0x0000, - 0x01bc, 0x704c, 0x0926, 0x0060, - 0x716c, 0x0377, 0xffcf, 0x2480, - 0x360f, 0x1404, 0x341b, 0xc6c6, - 0x781d, 0x781d, 0x781d, 0x781d, - 0x7ee0, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1ac, 0x4768, 0xc049, - 0x710c, 0xc148, 0xc041, 0x0a17, - 0x00b0, 0xc045, 0x2278, 0x00c0, - 0xc045, 0x710c, 0x0a0d, 0x0070, - 0xc044, 0x2278, 0x00c0, 0xc044, - 0x1600, 0x7080, 0x8000, 0x0004, - 0x201a, 0x0f82, 0x0020, 0x0000, - 0x206f, 0x0243, 0x881a, 0x2205, - 0x0f81, 0x9002, 0x0090, 0xc047, - 0x9100, 0x7114, 0x700c, 0xc040, - 0xc043, 0xf409, 0x2205, 0x0f80, - 0x9002, 0x0096, 0x9000, 0xb8c5, - 0xc043, 0x40c3, 0x9003, 0xe024, - 0x702c, 0x1800, 0x0fc5, 0x180c, - 0x0fc5, 0x18fe, 0x8044, 0xc007, - 0xf874, 0x781d, 0xb8c0, 0x68c9, - 0xf004, 0xc000, 0x7104, 0xc040, - 0xc000, 0x7d0f, 0x71b5, 0x018e, - 0x000d, 0xc009, 0x08ef, 0x836e, - 0x40a1, 0x702c, 0x0f16, 0xfdef, - 0xdaff, 0x40a1, 0x0d42, 0xffaf, - 0x702c, 0x4318, 0xc008, 0x41a1, - 0xdaff, 0x7b0f, 0x0d8a, 0xfe2f, - 0x700c, 0x40a1, 0xd9ff, 0x0d76, - 0xff6f, 0x714c, 0x0bf6, 0xfeaf, - 0x40a1, 0xc005, 0x7014, 0x710c, - 0xf218, 0x2344, 0x37c1, 0xe906, + 0x1600, 0x710d, 0x9008, 0x01e0, + 0x40c1, 0x0eae, 0xfdaf, 0x702c, + 0xc04e, 0x40c3, 0x0000, 0xffff, + 0x4100, 0x4200, 0x0eaa, 0xfe2f, + 0x4300, 0x40c3, 0x0000, 0xff7f, + 0x78a4, 0x1e00, 0x7004, 0x9009, + 0xe1e0, 0x2400, 0x3f80, 0x0000, + 0x03f0, 0x41c3, 0x8000, 0x04c5, + 0x0d5a, 0xfd6f, 0xda60, 0x2400, + 0x3f80, 0x0000, 0x03f0, 0x0c12, + 0x0060, 0xd960, 0xd8ff, 0x1e00, + 0x7004, 0x9003, 0xf0c4, 0x710c, + 0x2644, 0x17cd, 0xed05, 0xbd61, + 0x0dff, 0x9031, 0x781b, 0x702c, + 0x205a, 0x0100, 0x200f, 0x0380, + 0x0b02, 0xff2f, 0x780f, 0x700c, + 0xc048, 0xf00e, 0x6861, 0x7a7b, + 0x7034, 0x2400, 0x3f81, 0x0000, + 0x03d0, 0x22ce, 0x0021, 0x6038, + 0xa840, 0x4060, 0x08e9, 0x8434, + 0x2044, 0x0041, 0x40c3, 0x8000, + 0x06cd, 0x1005, 0x009e, 0x40c3, + 0x8000, 0x06cd, 0x231a, 0x3f9b, + 0x0010, 0x0000, 0x8864, 0x4468, + 0x1c3c, 0x36c0, 0xf03f, 0x70ed, + 0xf03a, 0xc00e, 0x2055, 0x0c01, + 0xc00f, 0x7825, 0x251a, 0x1f81, + 0x0000, 0x1000, 0x7905, 0x40e1, + 0x209a, 0x0004, 0x7825, 0x781b, + 0xb891, 0xb89c, 0xb89f, 0x0f4f, + 0x1231, 0x90c0, 0xf8ab, 0xbec4, + 0x702c, 0xe808, 0x4100, 0x219a, + 0x0010, 0x2180, 0x0030, 0x79c5, + 0x255f, 0x148b, 0x2400, 0x3f82, + 0x0000, 0x031c, 0x7261, 0xb228, + 0x41c3, 0x0000, 0x141f, 0x0813, + 0x0150, 0x209a, 0x0010, 0x2080, + 0x0010, 0x2005, 0x0381, 0x2400, + 0x3f80, 0x0000, 0x0268, 0x7061, + 0xb028, 0x71e5, 0x0f8f, 0x9254, + 0x7185, 0x7d8f, 0x0e83, 0xb365, + 0x710c, 0x235f, 0x024f, 0xc044, + 0xf0c9, 0x2400, 0x3f80, 0x0000, + 0x031c, 0x0ac2, 0x0040, 0xd80f, + 0x0cf2, 0xfdaf, 0xd90f, 0x710c, + 0x0bc6, 0xffaf, 0xd9ff, 0x45cb, + 0x9003, 0xe024, 0x1d00, 0x1f84, + 0x0000, 0xffff, 0xb506, 0x1dfe, + 0x9004, 0x700c, 0xc044, 0x1da8, + 0x101c, 0x1da9, 0x101c, 0xc011, + 0xde21, 0x712c, 0xb8e2, 0x26ca, + 0x1be1, 0x0826, 0xfdaf, 0x40c1, + 0x0f96, 0xff2f, 0x2456, 0x3800, + 0x2400, 0x3f80, 0x0000, 0x0268, + 0x0a6a, 0x0040, 0xd80f, 0x0c9e, + 0xfdaf, 0xd90f, 0x730c, 0x0b72, + 0xffaf, 0xd9ff, 0x712c, 0x700c, + 0x1d00, 0x1f84, 0x0000, 0xffff, + 0xb526, 0x1dfe, 0x9044, 0x1da8, + 0x101c, 0x1da9, 0x101c, 0x0fe2, + 0xfd6f, 0x40c1, 0x0f52, 0xff2f, + 0xc093, 0xc008, 0x45e9, 0x780f, + 0xc052, 0x40c3, 0x8000, 0x06cd, + 0x8804, 0xc049, 0xf063, 0xe508, + 0x722c, 0xde08, 0xf05a, 0x7ab0, + 0x2456, 0x3800, 0x20f5, 0x008c, + 0xc093, 0x20f5, 0x0083, 0x4b90, + 0x2048, 0x0000, 0x780e, 0x0883, + 0x01b3, 0x7134, 0x712c, 0xf248, + 0xc040, 0x700c, 0x41c3, 0x00b1, + 0x0003, 0x4263, 0x0d26, 0xfd6f, + 0x43c1, 0xc00e, 0x716c, 0x2055, + 0x08c1, 0xc00f, 0x7905, 0x231a, + 0x3f80, 0x0000, 0x1000, 0x7825, + 0x781b, 0x2005, 0x0f80, 0x9002, + 0x0200, 0xc047, 0x9040, 0xc012, + 0x2400, 0x3f81, 0x0000, 0x03d0, + 0x2133, 0x0001, 0xc240, 0x6158, + 0x780e, 0xc042, 0x2048, 0x0000, + 0x2049, 0x0800, 0xc141, 0xc044, + 0xc043, 0x740c, 0x41c3, 0x00b2, + 0x0006, 0x0c82, 0xfdaf, 0x4263, + 0xc104, 0xc007, 0xb020, 0x710c, + 0x712c, 0xc044, 0xf00c, 0x2400, + 0x3f80, 0x0000, 0x01b4, 0x2014, + 0x0082, 0x6398, 0x781c, 0x2048, + 0x0000, 0xb200, 0x71a5, 0x71c5, + 0x0e4f, 0x9254, 0xc009, 0x7104, + 0xc049, 0xc009, 0x790f, 0x40c3, + 0x8000, 0x06cd, 0x8805, 0x0833, + 0x8065, 0x4338, 0xc004, 0x7014, + 0x0890, 0xff02, 0xc008, 0x7104, + 0xc048, 0xc004, 0x7014, 0xf537, + 0x41c3, 0x00b3, 0x0000, 0x0c1e, + 0xfdaf, 0x740c, 0x40c3, 0x8000, + 0x06cd, 0x88c4, 0xf027, 0x7ff0, + 0xe708, 0xdd08, 0xf01f, 0x2004, + 0x0f81, 0x0001, 0xfffe, 0x2456, + 0x3800, 0x2035, 0x0043, 0xc093, + 0x2035, 0x0042, 0x2400, 0x3f80, + 0x0000, 0x01b4, 0x2035, 0x0040, + 0xc241, 0xc340, 0xc042, 0x740c, + 0x41c3, 0x00b4, 0x0005, 0x4263, + 0x0bd2, 0xfdaf, 0x43a1, 0x71a5, + 0x71e5, 0x0dc7, 0x9274, 0x78fb, + 0x71c5, 0x40c3, 0x8000, 0x06cd, + 0x8805, 0x79cf, 0x08ab, 0x8065, + 0x4338, 0x1e00, 0x7005, 0x9003, + 0xe004, 0x1600, 0x7001, 0x8000, + 0x06dc, 0xc00c, 0x704c, 0x0e06, + 0xfe6f, 0x706c, 0xc00d, 0x2400, + 0x3f81, 0x0000, 0x01b4, 0x704c, + 0x093a, 0x0060, 0x716c, 0xc005, + 0x7104, 0xc045, 0xc005, 0x7314, + 0x0378, 0xffce, 0x2480, 0x3411, + 0x1404, 0x341b, 0xc6c6, 0x78e0, + 0x78dd, 0x781d, 0x781d, 0x781d, + 0x781d, 0x781d, 0x7ee0, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1af, + 0xc242, 0x1600, 0x7082, 0x8000, + 0x0004, 0xc04c, 0xc002, 0x221a, + 0x0f82, 0x0020, 0x0000, 0xc340, + 0x7014, 0x2078, 0x000b, 0x700c, + 0xc041, 0xc046, 0x20ca, 0x00e1, + 0xc14b, 0xc046, 0x2205, 0x0f80, + 0x9002, 0x0090, 0x9020, 0x206f, + 0x0243, 0x88ba, 0x2544, 0x1800, + 0xc048, 0x700c, 0x0919, 0x0071, + 0xc045, 0x2205, 0x0f80, 0x9002, + 0x0096, 0x9000, 0x0896, 0xfe2f, + 0xb8c5, 0xc045, 0x2340, 0x1080, + 0xc04d, 0xc002, 0x41c3, 0x9003, + 0xe024, 0xb881, 0xc04a, 0xc002, + 0xb880, 0xc049, 0x700c, 0x1900, + 0x0fc5, 0x190c, 0x0fc5, 0x19fe, + 0x8004, 0xc008, 0xf872, 0x781d, + 0x68e9, 0x710c, 0xc043, 0xf0bc, + 0xc101, 0xc00c, 0x2011, 0x8040, + 0xf2b3, 0xc001, 0x702c, 0xdaff, + 0x7d0f, 0x0ac6, 0xfdef, 0x40a1, + 0x40a1, 0x0a46, 0xffaf, 0x702c, + 0x4608, 0xc00b, 0x41a1, 0xdaff, + 0x7b0f, 0x0a6a, 0xfe2f, 0x700c, + 0x40a1, 0xd9ff, 0x0aa6, 0xff6f, + 0x714c, 0xc106, 0x08ca, 0xfeaf, + 0x40a1, 0xc009, 0x0833, 0x00f1, + 0x710c, 0x2644, 0x17c1, 0xe906, 0xb961, 0x0901, 0x0031, 0x781b, - 0x205a, 0x0100, 0x200f, 0x06db, - 0x0c3e, 0x0060, 0x40a1, 0x0b0a, - 0xfe2f, 0x40a1, 0x8820, 0x222f, - 0x06c7, 0x0b7a, 0x0020, 0x4040, - 0x40c3, 0x8000, 0x0665, 0x1001, - 0x008b, 0x1600, 0x709b, 0x8000, - 0x0665, 0xd8ff, 0x702c, 0x0ec6, - 0xfdaf, 0xda08, 0xc046, 0x2340, - 0x1040, 0x2002, 0x06c1, 0xc006, - 0x7829, 0xc04a, 0x700c, 0x4318, - 0xc042, 0xf017, 0x219f, 0x0041, - 0x2004, 0x06c0, 0x781d, 0x781d, - 0x5050, 0x2100, 0x0f80, 0x8000, - 0x07bc, 0x722c, 0x0cc6, 0xffaf, - 0xc04b, 0xc20b, 0x40a1, 0x0e7e, - 0xffaf, 0x702c, 0x7167, 0xc00a, - 0x212f, 0x06c7, 0x780f, 0x09d1, - 0x8022, 0xd8fc, 0xc004, 0x7014, - 0x0a50, 0x0062, 0x20ca, 0x0342, - 0x0f0b, 0x9030, 0x206f, 0x0243, - 0x881a, 0xb8e0, 0xf37f, 0x40c3, - 0x8000, 0x0665, 0x8801, 0x1600, - 0x7081, 0x8000, 0x0665, 0x7104, - 0x4831, 0xc006, 0x7909, 0xf005, - 0xc002, 0x7104, 0xc042, 0xc002, - 0x70ad, 0x7c0f, 0x782f, 0x0c0d, - 0x1022, 0x704c, 0xf167, 0x71a5, - 0x7baf, 0x0b29, 0x07f5, 0x4081, - 0x209f, 0x0041, 0x7874, 0x43c3, - 0x8000, 0x07bc, 0x606b, 0x70c3, - 0x8000, 0x07bc, 0x8801, 0x0be3, - 0x8025, 0x6058, 0x4872, 0xf1ed, - 0xc007, 0x0831, 0x015e, 0xc001, - 0x7b10, 0x265a, 0x1400, 0x2805, - 0x00c0, 0xf816, 0x4e10, 0x225a, - 0x040e, 0x2e05, 0x10ce, 0x7edd, - 0x7edd, 0x7edd, 0x7edd, 0x661e, - 0xc001, 0x7104, 0xc041, 0xf1ca, - 0x7650, 0x22ca, 0x0385, 0x4648, - 0xf1c4, 0xef14, 0x206f, 0x0243, - 0x881a, 0x0821, 0x001e, 0x8740, - 0x0a19, 0x03a5, 0x750c, 0x41c3, - 0x0088, 0x0002, 0x0e22, 0xfdaf, - 0x43c1, 0xc003, 0xa7c0, 0xaf04, - 0xc0ac, 0x1404, 0x341b, 0xc6c6, - 0x781d, 0x781d, 0x781d, 0x781d, - 0x7ee0, 0x78e0, 0x42eb, 0xc0e4, - 0xf002, 0x7144, 0x7c4f, 0x0c3f, - 0x10c5, 0x45cb, 0x8000, 0x07fb, - 0xe909, 0x0927, 0x0051, 0x249f, + 0x205a, 0x0100, 0x200f, 0x038e, + 0x0c06, 0x0060, 0x40a1, 0x0f66, + 0xfdef, 0x40a1, 0x8820, 0x7acf, + 0x0b76, 0x0020, 0x4040, 0x46cb, + 0x8000, 0x06d1, 0x8e00, 0x1601, + 0x108b, 0x702c, 0xc044, 0xd8ff, + 0xda08, 0x09be, 0xfdaf, 0x706f, + 0xc047, 0xc004, 0x2340, 0x1041, + 0x7902, 0xc007, 0x7829, 0x780f, + 0xc04e, 0xf018, 0x209f, 0x0041, + 0x793d, 0x793d, 0x1a12, 0x3058, + 0x4e9f, 0x6038, 0x1212, 0x3081, + 0xc20d, 0x09d2, 0xffaf, 0xc044, + 0xc002, 0xe807, 0xc204, 0x40a1, + 0x0e0e, 0xffaf, 0x702c, 0x7167, + 0xc10e, 0x202f, 0x06c7, 0x08d1, + 0x8064, 0x791d, 0xc00a, 0x7314, + 0x0a38, 0x0061, 0x40a1, 0x206f, + 0x0243, 0x88ba, 0xc000, 0x0883, + 0x0010, 0x0d7f, 0x103e, 0x706d, + 0x8e01, 0x8e20, 0x7104, 0x4831, + 0xc007, 0x7829, 0x790f, 0xf030, + 0x706c, 0xf00f, 0x209f, 0x0041, + 0x7854, 0x60d8, 0x100f, 0x009e, + 0x8810, 0x0e0d, 0x3025, 0x6078, + 0x2002, 0x0783, 0x7144, 0x0ae9, + 0x87f4, 0x4081, 0xc008, 0xe887, + 0x7770, 0x23ca, 0x03c5, 0x4768, + 0xf015, 0xc003, 0x235a, 0x0403, + 0x7a10, 0x275a, 0x1400, 0x2805, + 0x0080, 0xf81d, 0x4f10, 0x2b05, + 0x008f, 0x7ffd, 0x7ffd, 0x7ffd, + 0x7ffd, 0x671f, 0xc003, 0x7104, + 0xc043, 0x7165, 0x242f, 0x12c7, + 0x0ca1, 0x9064, 0x704c, 0xc001, + 0x7104, 0xc041, 0xc001, 0x7214, + 0x0688, 0xffc5, 0xc000, 0xe81b, + 0x0d33, 0x101e, 0xc000, 0x8040, + 0x0a1f, 0x03e5, 0x750c, 0x41c3, + 0x0090, 0x0002, 0x092e, 0xfdaf, + 0x43e1, 0xc100, 0xc005, 0xa904, + 0xc000, 0xa0e0, 0xf007, 0x41c3, + 0x0091, 0x0001, 0x0916, 0xfdaf, + 0x42e1, 0xc0af, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0x781d, 0x781d, + 0x781d, 0x781d, 0x7ee0, 0x78e0, + 0xc0e4, 0xf029, 0x45cb, 0x8000, + 0x071f, 0xe910, 0x093b, 0x0070, + 0x4448, 0x0941, 0x00b1, 0x4448, + 0x249a, 0x1041, 0x659d, 0x8dc0, + 0x8881, 0x64dc, 0xad80, 0xf016, + 0x4448, 0x249a, 0x1041, 0x64be, + 0x8ea0, 0x8881, 0x64bc, 0xae80, + 0x8882, 0x2452, 0x11cd, 0x8e81, + 0x64bc, 0xae81, 0xf006, 0x249a, 0x1041, 0x64bc, 0x88a2, 0xaca1, - 0xf1f1, 0xf80a, 0x64dc, 0x88c2, - 0xad80, 0x8d81, 0x64dc, 0x2455, - 0x180c, 0xad81, 0xf1e7, 0x09cf, - 0x8091, 0xf804, 0x64dc, 0xad80, - 0xf1e1, 0x4759, 0xc4c4, 0x78e0, - 0x249f, 0x1041, 0x659d, 0x8dc0, - 0x8881, 0x7ee0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1a1, 0x4738, 0x4100, - 0x40c3, 0x8000, 0x0665, 0x88c0, - 0x43cb, 0x9002, 0x0080, 0xc240, - 0xf002, 0x71c5, 0x8841, 0x7fcf, - 0x0a0b, 0x03e5, 0x70ad, 0xf028, - 0x71a5, 0x7caf, 0x0cef, 0x9255, - 0xc200, 0x0af9, 0x830e, 0x275f, - 0x1242, 0x72e3, 0x2233, 0x0303, - 0x271a, 0x1f82, 0x0000, 0x2000, - 0x249a, 0x1008, 0x633b, 0x2342, - 0x805b, 0x7c45, 0x2405, 0x12c2, - 0xb220, 0xda7f, 0x22ca, 0x002b, - 0x238c, 0xbf41, 0x23ca, 0x008d, - 0x7a6f, 0x2342, 0x1803, 0x7c65, - 0xb440, 0xf1dc, 0x7487, 0x1404, - 0x341b, 0xc6c6, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1a8, 0xc146, 0xc044, - 0x700c, 0xd9ff, 0xc347, 0x0ef6, - 0xfd6f, 0xc245, 0x1434, 0x301b, - 0x70cd, 0x70ad, 0xf002, 0x71a5, - 0x0d77, 0x1335, 0x710c, 0x2532, - 0x1f8f, 0x8000, 0x0540, 0x702c, - 0x0cb2, 0xfd6f, 0x42e1, 0x2104, - 0x06c2, 0xc107, 0x7824, 0x2005, - 0x80be, 0xf5ef, 0xc006, 0x734c, - 0xdb22, 0x60e9, 0xc004, 0x1c0c, - 0x3001, 0xc042, 0xc741, 0xc140, - 0xd8ff, 0x09ca, 0xfdaf, 0x702c, - 0xed03, 0x0dbf, 0x9191, 0x70b5, - 0xd9c8, 0xd8fa, 0x0f2a, 0xfe6f, + 0x7144, 0x0ab5, 0x80c4, 0xc4c4, + 0xc2e6, 0x1cfc, 0xb6c8, 0x260a, + 0x3080, 0x4200, 0x40c3, 0x8000, + 0x06d1, 0x4738, 0x1001, 0x008b, + 0x8820, 0xf02a, 0x0e49, 0x332e, + 0x4789, 0x205f, 0x0243, 0x279a, + 0x1008, 0x45cb, 0x9002, 0x0080, + 0x73e3, 0x2333, 0x030e, 0x201a, + 0x0f83, 0x0000, 0x2000, 0x665e, + 0x2642, 0x905b, 0x7f65, 0x2705, + 0x1343, 0xb340, 0xdb7f, 0x23ca, + 0x002b, 0x238c, 0xbf41, 0x26ca, + 0x10cd, 0x2542, 0x180d, 0x7bcf, + 0x7fa5, 0xb760, 0x7185, 0x0cb9, + 0x9254, 0x7124, 0x782f, 0x0bfb, + 0x9025, 0x708d, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1a8, 0xc145, 0x4318, + 0x700c, 0xd9ff, 0xc346, 0x0a32, + 0xfd6f, 0xc244, 0xc00d, 0x70cd, + 0x70ad, 0xc047, 0xf03a, 0x2532, + 0x1f8f, 0x8000, 0x0528, 0x702c, + 0x0ff6, 0xfd2f, 0x42e1, 0xc207, + 0x7944, 0xc206, 0x7844, 0x2005, + 0x807e, 0xf42b, 0xc005, 0x702c, + 0x734c, 0xdb22, 0x60e8, 0x1c0c, + 0x3001, 0x1c08, 0x36c0, 0xc040, + 0xd8ff, 0x0cf2, 0xfd6f, 0xc741, + 0x0d07, 0x1190, 0xed99, 0x70b5, + 0xd9c8, 0xd8fa, 0x0c1a, 0xfe6f, 0x20ca, 0x0041, 0x781d, 0x781d, - 0x781d, 0x781d, 0x781d, 0x702c, - 0x780f, 0xc142, 0xc141, 0xc140, - 0xc043, 0xd8ff, 0x754c, 0x0996, - 0xfdaf, 0xdb20, 0xf1c5, 0xd8ff, + 0x781d, 0x781d, 0x781d, 0x780f, + 0xc043, 0x700c, 0xc042, 0xc041, + 0xc040, 0xd8ff, 0x702c, 0x754c, + 0x0cba, 0xfd6f, 0xdb20, 0x71a5, + 0x0d8f, 0x9334, 0x710c, 0xd8ff, 0x702c, 0x754c, 0xdb41, 0x1c0c, - 0x3401, 0xc642, 0xc641, 0x0f6a, + 0x3401, 0xc642, 0xc641, 0x0a8e, 0xfd6f, 0xc640, 0xd8ff, 0xd980, 0x754c, 0x746c, 0xc643, 0xc642, - 0xc641, 0x0f56, 0xfd6f, 0xc640, - 0x700c, 0x0eee, 0xfd6f, 0x712c, - 0xc005, 0xe837, 0x700c, 0x0e36, - 0xfd6f, 0xd9ff, 0xc604, 0x70ad, - 0xd84f, 0xbec1, 0xc543, 0xc642, - 0xc541, 0xc040, 0xd8ff, 0xd980, - 0xda17, 0x0f26, 0xfd6f, 0x746c, - 0x700c, 0x0ebe, 0xfd6f, 0x712c, - 0x40c3, 0x000f, 0x4240, 0x0d42, - 0xfeaf, 0x702c, 0x700c, 0x0dfe, - 0xfd6f, 0xd9ff, 0xd851, 0xc543, - 0xc642, 0xc541, 0xc040, 0xd8ff, - 0xd980, 0xda17, 0x0ef2, 0xfd6f, - 0x746c, 0x700c, 0x0e8a, 0xfd6f, - 0x712c, 0x40c3, 0x0000, 0x1f40, - 0x0d0e, 0xfeaf, 0xd91e, 0xc0a8, + 0xc641, 0x0a7a, 0xfd6f, 0xc640, + 0x700c, 0x0a16, 0xfd6f, 0x712c, + 0xc004, 0xe833, 0x700c, 0x0972, + 0xfd6f, 0xd9ff, 0xd84f, 0x70ad, + 0x2353, 0x305b, 0xc040, 0xf817, + 0xc543, 0x1c08, 0x36c0, 0x0a4e, + 0xfd6f, 0xc541, 0x700c, 0x09ea, + 0xfd6f, 0x712c, 0x40c3, 0x000f, + 0x4240, 0x0a3e, 0xfeaf, 0x702c, + 0x700c, 0x093e, 0xfd6f, 0xd9ff, + 0xd851, 0xc040, 0xf80b, 0xc543, + 0x1c08, 0x36c0, 0x0a1e, 0xfd6f, + 0xc541, 0x700c, 0x09ba, 0xfd6f, + 0x712c, 0x40c3, 0x0000, 0x7530, + 0x0a0e, 0xfeaf, 0xd908, 0xc0a8, 0x1404, 0x341b, 0xc6c6, 0x78e0, - 0xc2e6, 0xc1a4, 0x4628, 0x4708, - 0x700c, 0x0dba, 0xfd6f, 0xd9ff, - 0x702c, 0xd8ff, 0x734c, 0xdb22, - 0xc143, 0xc742, 0x1c04, 0x3381, - 0x4528, 0x08c2, 0xfdaf, 0xc640, - 0x0e2e, 0xfe6f, 0xd8fa, 0x781d, - 0x781d, 0x781d, 0x781d, 0x781d, - 0x780f, 0xc542, 0xc541, 0xc540, - 0xc043, 0xd8ff, 0x702c, 0x754c, - 0x089a, 0xfdaf, 0xdb20, 0xd8ff, - 0xd980, 0x754c, 0x726c, 0xc543, - 0xc542, 0xc541, 0x0e72, 0xfd6f, - 0xc540, 0x700c, 0x0e0a, 0xfd6f, - 0x712c, 0xc0a4, 0xc6c6, 0x78e0, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1ad, - 0xc34a, 0xc248, 0xc14b, 0x091e, - 0xfdef, 0xc045, 0xc012, 0x70cd, - 0xddff, 0xc04c, 0xc013, 0xc049, - 0x40c1, 0xf004, 0xc004, 0x7104, - 0xc044, 0xc004, 0xe08b, 0x0170, - 0x000d, 0xc004, 0x2032, 0x0f80, - 0x8000, 0x0540, 0xc047, 0xc107, - 0xc00b, 0x6028, 0xc046, 0x78af, - 0x208c, 0x8fc3, 0xf40a, 0x40c3, - 0x0000, 0x0a00, 0x0cfe, 0xfd6f, - 0xd9ff, 0x0826, 0xff8f, 0x70ad, - 0xc007, 0x712c, 0xc20a, 0xb8c4, - 0xe805, 0xb861, 0x08ff, 0x8031, - 0x793b, 0xd81f, 0x7a24, 0xb861, - 0x08ff, 0x8031, 0x793c, 0xc00c, - 0x7824, 0x2205, 0x803e, 0xf47b, - 0xc006, 0x47cb, 0x8000, 0x0578, - 0x781d, 0x781d, 0x2045, 0x019b, - 0xc009, 0xe80e, 0x8f01, 0x702c, - 0x704c, 0xc043, 0x700c, 0xc042, - 0xc041, 0xc040, 0xd8ff, 0x0db2, - 0xfd6f, 0xdb20, 0x8f01, 0x702c, - 0x704c, 0xc043, 0xc005, 0x726c, - 0x71a5, 0xc042, 0x710c, 0xc041, - 0x2344, 0x3980, 0xc040, 0x0d92, - 0xfd6f, 0xd8ff, 0x8f01, 0x702c, - 0x704c, 0xc043, 0x710c, 0xc642, - 0xc041, 0xc007, 0x726c, 0xb8c5, - 0xc040, 0x0d76, 0xfd6f, 0xd8ff, - 0x8f01, 0x702c, 0x704c, 0xc043, - 0xc005, 0x726c, 0xc042, 0x710c, - 0xc041, 0xc006, 0x781d, 0x2045, - 0x0580, 0x2044, 0x0d80, 0xc040, - 0x0d4e, 0xfd6f, 0xd8ff, 0x8f01, - 0x702c, 0x704c, 0xc043, 0x710c, - 0xc642, 0xc041, 0xc006, 0x726c, - 0xb8c5, 0xc040, 0x0d32, 0xfd6f, - 0xd8ff, 0xc004, 0xe818, 0x40c3, - 0x0000, 0x2710, 0x0b72, 0xfeaf, - 0xd90a, 0x781d, 0x781d, 0x7704, - 0x7f0f, 0xf84a, 0xc642, 0xc641, - 0xc640, 0x0d0e, 0xfd6f, 0xc743, - 0xf846, 0xc743, 0xc642, 0xc641, - 0xc640, 0xf00b, 0x8f01, 0x702c, - 0x704c, 0xc043, 0xc642, 0xc641, - 0xc640, 0xd8ff, 0xdb18, 0x0cea, - 0xfd4f, 0x78af, 0x0811, 0x0110, - 0xc104, 0xe18b, 0xf550, 0x70ad, - 0x7014, 0xf34e, 0x0e9a, 0xff4f, - 0x700c, 0x0c6e, 0xfd6f, 0x712c, - 0xddff, 0xf146, 0xc008, 0x08c9, - 0x0030, 0xd9ff, 0x40c3, 0x0000, - 0x0a00, 0x0baa, 0xfd6f, 0x4318, - 0x0ece, 0xff4f, 0x46cb, 0x8000, - 0x0579, 0x8e00, 0x702c, 0xdf20, - 0xc043, 0xc005, 0x704c, 0x726c, - 0xc042, 0xd8ff, 0xc141, 0x4528, - 0x0c8e, 0xfd6f, 0xc740, 0x8e00, + 0xd8ff, 0xd980, 0xda17, 0x746c, + 0x7ee0, 0x78e0, 0xc2e6, 0xc1a4, + 0x4628, 0x4708, 0x700c, 0x08f2, + 0xfd6f, 0xd9ff, 0x702c, 0xd8ff, + 0x734c, 0xdb22, 0x4528, 0xc143, + 0xc742, 0x1c04, 0x3381, 0x0bde, + 0xfd6f, 0xc640, 0x0b12, 0xfe6f, + 0xd8fa, 0x781d, 0x781d, 0x781d, + 0x781d, 0x781d, 0x780f, 0xc043, + 0xd8ff, 0x702c, 0x754c, 0xdb20, + 0xc542, 0xc541, 0x0bb6, 0xfd6f, + 0xc540, 0xd8ff, 0xd980, 0x754c, + 0x726c, 0xc543, 0xc542, 0xc541, + 0x0992, 0xfd6f, 0xc540, 0x700c, + 0x092e, 0xfd6f, 0x712c, 0xc0a4, + 0xc6c6, 0x78e0, 0xc2e6, 0x1cfc, + 0xb6c8, 0xc1ac, 0xc349, 0xc247, + 0xc14a, 0x0cba, 0xfdaf, 0xc046, + 0xc011, 0x71ed, 0xc04b, 0xc012, + 0xc048, 0xd8ff, 0xc044, 0x700c, + 0xc045, 0xf0b8, 0xc005, 0x2032, + 0x0f9b, 0x8000, 0x0528, 0xc00a, + 0x2032, 0x06ce, 0xc004, 0x780f, + 0x208c, 0x8fc3, 0xf40b, 0x40c3, + 0x0000, 0x0a00, 0x0842, 0xfd6f, + 0xd9ff, 0x0d36, 0xff4f, 0x700c, + 0xc044, 0x41e1, 0x2344, 0x37c0, + 0xc209, 0xe806, 0xb861, 0x0801, + 0x0031, 0x793b, 0xd81f, 0x7a24, + 0xb861, 0x0801, 0x0031, 0x793c, + 0xc00b, 0x7824, 0x2205, 0x803e, + 0xf479, 0x78dd, 0x781d, 0x2044, + 0x0800, 0x2045, 0x018d, 0xc008, + 0xe80b, 0xf87e, 0xc043, 0x700c, + 0xc042, 0xc041, 0xc040, 0xd8ff, + 0x08e2, 0xfd6f, 0xdb20, 0xc004, + 0x702c, 0x704c, 0x7104, 0xc044, + 0x40c3, 0x8000, 0x05e4, 0x8801, + 0x726c, 0xc741, 0xc043, 0xc006, + 0xc540, 0xc042, 0x08be, 0xfd6f, + 0xd8ff, 0x40c3, 0x8000, 0x05e4, + 0x8801, 0x70ad, 0x2353, 0x315b, + 0xc043, 0xf869, 0xc542, 0xc741, + 0x08a2, 0xfd6f, 0x1c00, 0x36c0, + 0xf868, 0xc043, 0xc006, 0x726c, + 0xc741, 0xc042, 0x78dd, 0x2044, + 0x0800, 0x2045, 0x0580, 0xc040, + 0x0882, 0xfd6f, 0xd8ff, 0x40c3, + 0x8000, 0x05e4, 0x8801, 0xbec5, + 0x702c, 0xc043, 0xd8ff, 0x704c, + 0x726c, 0xc542, 0xc741, 0x0866, + 0xfd6f, 0xc640, 0xc005, 0xe819, + 0x40c3, 0x0000, 0x2710, 0x087a, + 0xfeaf, 0xd90a, 0x781d, 0x781d, + 0x7704, 0x7e0f, 0xf850, 0xc542, + 0xc541, 0xc540, 0x083e, 0xfd6f, + 0xc643, 0xf84d, 0xc643, 0xc542, + 0xc541, 0xc540, 0xf00d, 0x40c3, + 0x8000, 0x05e4, 0x8801, 0xc542, + 0xc541, 0xc043, 0xc540, 0xd8ff, + 0x702c, 0x704c, 0xdb18, 0x0816, + 0xfd4f, 0xc004, 0x790f, 0x090f, + 0x0110, 0xc005, 0x0817, 0x02d1, + 0xe909, 0x0bae, 0xff4f, 0x700c, + 0x0f9e, 0xfd2f, 0x712c, 0xd8ff, + 0xc044, 0xc005, 0x7104, 0xc045, + 0xc005, 0xe08c, 0x0690, 0xffc5, + 0xc007, 0x08cf, 0x0030, 0xd9ff, + 0x47cb, 0x0000, 0x0a00, 0x0ee2, + 0xfd2f, 0x40e1, 0x0bd2, 0xff4f, + 0x46cb, 0x8000, 0x05e5, 0x8e00, + 0x702c, 0x234a, 0x3800, 0xc043, + 0xc006, 0x704c, 0x726c, 0xc042, + 0xd8ff, 0x4528, 0xc141, 0x0fae, + 0xfd2f, 0x1c00, 0x36c0, 0x8e00, 0x702c, 0x704c, 0xc043, 0xd8ff, - 0x726c, 0xc542, 0xc541, 0x0c7a, - 0xfd6f, 0x1c00, 0x33c1, 0x0e3a, - 0xff4f, 0x700c, 0x0c0a, 0xfd6f, + 0x726c, 0xc542, 0xc541, 0x0f96, + 0xfd2f, 0x1c00, 0x33c1, 0x0b3a, + 0xff4f, 0x700c, 0x0f2a, 0xfd2f, 0x712c, 0x40c3, 0x000f, 0x4240, - 0x0a8e, 0xfeaf, 0x702c, 0x4063, - 0x0b4a, 0xfd6f, 0xd9ff, 0x0e72, + 0x0f7e, 0xfe6f, 0x702c, 0x40e1, + 0x0e7e, 0xfd2f, 0xd9ff, 0x0b72, 0xff4f, 0x8e00, 0x702c, 0x704c, - 0xc043, 0xc005, 0x726c, 0xc042, - 0xd8ff, 0xc541, 0x0c3a, 0xfd6f, - 0xc740, 0x8e00, 0x702c, 0x704c, - 0xc043, 0xd8ff, 0x726c, 0xc542, - 0xc541, 0x0c26, 0xfd6f, 0x1c00, - 0x3441, 0x8e00, 0x702c, 0x704c, - 0xc043, 0xd8ff, 0xdb10, 0xc542, - 0xc541, 0x0c0e, 0xfd6f, 0xc540, - 0x0dce, 0xff4f, 0x700c, 0x0ba2, - 0xfd6f, 0x712c, 0x0806, 0xfdcf, - 0xc0ad, 0x1404, 0x341b, 0xc6c6, - 0xd8ff, 0x702c, 0x704c, 0x726c, - 0x7ee0, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1aa, 0xc043, 0x1600, - 0x7080, 0x8000, 0x0058, 0x70ad, - 0xc346, 0xc048, 0xd858, 0xb89f, - 0x10cd, 0x809b, 0xc00f, 0xc245, - 0xc144, 0xc047, 0xf002, 0x71a5, - 0x0db7, 0x10b3, 0x710c, 0x2544, - 0x17c1, 0xe906, 0xb961, 0x0901, - 0x0031, 0x781b, 0xc042, 0xc102, - 0xc007, 0x70cd, 0x7824, 0xc049, - 0xf002, 0x71c5, 0x0edb, 0x90b3, - 0x70d5, 0xc108, 0x4063, 0x703c, - 0xc109, 0x782b, 0xf3f7, 0xc002, - 0x2644, 0x17c1, 0x7f0f, 0x710c, + 0xc043, 0xc006, 0x726c, 0xc541, + 0xc042, 0xd8ff, 0x0f56, 0xfd2f, + 0x1c00, 0x36c0, 0x8e00, 0x702c, + 0x704c, 0xc043, 0xd8ff, 0x726c, + 0xc542, 0xc541, 0x0f3e, 0xfd2f, + 0x1c00, 0x3441, 0x8e00, 0x702c, + 0x704c, 0xc043, 0xd8ff, 0xdb10, + 0xc542, 0xc541, 0x0f26, 0xfd2f, + 0xc540, 0x0ace, 0xff4f, 0x700c, + 0x0ebe, 0xfd2f, 0x712c, 0x0bb2, + 0xfd8f, 0xc0ac, 0x1404, 0x341b, + 0xc6c6, 0x78e0, 0xd8ff, 0x702c, + 0x704c, 0x726c, 0x7ee0, 0x78e0, + 0x40c3, 0x8000, 0x05e4, 0x8801, + 0x702c, 0x704c, 0x7ee0, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a9, + 0xc145, 0xd958, 0xb99f, 0xc046, + 0x8900, 0x11cd, 0x808f, 0x70ad, + 0xc047, 0xc00e, 0x71cd, 0xc343, + 0xc244, 0xc048, 0xf058, 0x2544, + 0x17c1, 0x706f, 0xe905, 0xb961, + 0x09ff, 0x8031, 0x781b, 0xc042, + 0xf04b, 0xc007, 0xc202, 0xc108, + 0x7077, 0x20ca, 0x03c1, 0x7944, + 0x790b, 0xf242, 0xc002, 0x2344, + 0x37c1, 0x262f, 0x3007, 0x40c1, 0xe905, 0xb961, 0x09ff, 0x8031, - 0x781b, 0x0d6a, 0xfdaf, 0x780f, - 0x40c3, 0x8000, 0x0578, 0x8800, - 0xe807, 0xd858, 0xb89f, 0x10a9, - 0x8080, 0x082d, 0x00de, 0xc006, - 0x265a, 0x1c81, 0x275f, 0x1102, - 0xc040, 0xc003, 0xc305, 0x6038, - 0x255a, 0x1641, 0x6119, 0xc002, - 0x7845, 0xc204, 0x0b22, 0xffef, - 0x780f, 0xf1cd, 0xc006, 0x265a, - 0x1c81, 0x1c04, 0x3001, 0xc040, - 0xc003, 0x275f, 0x1102, 0xc305, - 0x6038, 0x255a, 0x1641, 0x6119, - 0xc002, 0x7845, 0xc204, 0x0cae, - 0xffef, 0x780f, 0xf1b7, 0x0cfe, - 0xfdaf, 0x730c, 0xc0aa, 0x1404, - 0x341b, 0xc6c6, 0xc2e4, 0x42c3, - 0x8000, 0x0665, 0x8a60, 0x235f, - 0x0241, 0x7834, 0xf002, 0x7164, - 0x8a21, 0x7c6f, 0x090d, 0x0325, - 0x70ad, 0xf016, 0x71a5, 0x7204, - 0x79af, 0xe188, 0xf735, 0xf5fc, - 0x219a, 0x0008, 0x241a, 0x1f8e, - 0x0000, 0x2000, 0x79c5, 0x2105, - 0x0f8e, 0x9002, 0x0064, 0x9020, - 0xb620, 0xf1ee, 0x09b6, 0xfecf, - 0xc6c4, 0x78e0, 0xc2e2, 0x1cfc, - 0xb6c8, 0x4338, 0x4308, 0x70ad, - 0xf002, 0x71a5, 0x2b45, 0x3180, - 0x0d49, 0x1005, 0x255a, 0x1182, - 0x7261, 0x8a01, 0x8a60, 0x8a22, - 0x209a, 0x0004, 0x7b05, 0x8a03, - 0x209a, 0x0004, 0x7825, 0x201a, - 0x0f80, 0x0001, 0x0000, 0x2005, - 0x00c1, 0x8a05, 0x8a64, 0x7734, - 0x209a, 0x0004, 0x7865, 0xf207, - 0x793b, 0xb99c, 0xb99f, 0xb100, - 0xf1dd, 0x0826, 0xfe8f, 0xf1db, - 0x1404, 0x341b, 0xc6c2, 0x78e0, - 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a2, - 0x45cb, 0x8000, 0x0665, 0x8de0, - 0x4318, 0xc340, 0xc141, 0xea04, - 0x7bfb, 0xf004, 0x275f, 0x1243, - 0x7054, 0x274a, 0x3200, 0x27ca, - 0x3062, 0xf003, 0x71e5, 0x8d01, - 0x7710, 0x700c, 0xf6d3, 0xf03b, - 0x219a, 0x0001, 0x7104, 0x7164, - 0x7e25, 0x271a, 0x1f81, 0x0000, - 0x1000, 0x2105, 0x06c1, 0x7985, - 0x793b, 0xb991, 0xb99c, 0xb99f, - 0xb1c0, 0x0fd5, 0xb022, 0x4408, - 0xc101, 0x249a, 0x1004, 0x21f5, - 0x00c2, 0xc100, 0xe912, 0x7e5c, - 0x7edc, 0x7edc, 0x7edc, 0x224a, - 0x16c0, 0x2242, 0x104a, 0x0aff, - 0x9031, 0x7edd, 0x2a44, 0x0801, - 0x62da, 0xbac4, 0x4ad6, 0xf1d6, - 0x268a, 0x1fcf, 0x7e44, 0x2204, - 0x0f81, 0x0000, 0xfc00, 0xda0a, - 0xba61, 0x0a01, 0x0031, 0x793d, - 0xf1c8, 0xc0a2, 0x1404, 0x341b, - 0xc6c6, 0x78e0, 0xc2e6, 0x7014, - 0x40c3, 0x0040, 0x1800, 0x70ad, - 0x216f, 0x0dc3, 0x25ca, 0x1001, - 0x8900, 0x46cb, 0x8000, 0x0680, - 0x0857, 0x003e, 0xc1a1, 0x11e9, - 0x808f, 0x41c3, 0x013e, 0x0000, - 0x0e76, 0xfd6f, 0x740c, 0x275f, - 0x1182, 0x43c3, 0x0012, 0x0310, - 0x41c3, 0x9000, 0x0000, 0xdf08, - 0x6278, 0x7825, 0x9000, 0xae00, - 0xbf61, 0x0f01, 0x1031, 0x781d, - 0xae19, 0x6b02, 0x6058, 0x7825, - 0x9000, 0x41c3, 0x8000, 0x06b2, - 0xda08, 0xa900, 0xba61, 0x0a01, - 0x0031, 0x781d, 0xa919, 0xbe6c, - 0x40c1, 0x0986, 0xff2f, 0xd90d, - 0x40c1, 0xd90d, 0x0b72, 0xfe6f, - 0xda40, 0x40c1, 0xd90d, 0x0b8a, - 0xfd6f, 0xda7f, 0xdfff, 0x40c1, - 0x702c, 0x42a1, 0x706c, 0x0d12, - 0xffef, 0xc740, 0x40c1, 0xd90d, - 0x0b4e, 0xfe6f, 0xda80, 0x40c1, - 0x702c, 0x42c3, 0xffff, 0xdfff, - 0x776c, 0x0cf6, 0xffef, 0xc740, - 0x40c1, 0x0e6a, 0xfeef, 0xd90d, - 0x7487, 0xc6c6, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1b3, 0x1600, 0x709b, - 0x8000, 0x0004, 0xc044, 0x4328, - 0x4020, 0x702c, 0x0d46, 0xfd6f, - 0x734c, 0xc046, 0x4061, 0x742c, - 0x0d3a, 0xfd6f, 0xda08, 0xc047, - 0xc004, 0x231a, 0x3f9b, 0x0010, - 0x0000, 0x70ed, 0x2055, 0x08c0, - 0xc045, 0x1600, 0x7080, 0x8000, - 0x0665, 0xc041, 0xf004, 0xc001, + 0x781b, 0x0902, 0xfdaf, 0x780f, + 0x40c3, 0x8000, 0x05e4, 0x8800, + 0xe81c, 0x1600, 0x7080, 0x8000, + 0x0001, 0x0831, 0x00df, 0xc003, + 0x235a, 0x3c81, 0x265f, 0x3102, + 0xc040, 0xc006, 0xc304, 0x1c04, + 0x3001, 0x6119, 0x255a, 0x1640, + 0x6119, 0xc002, 0x7845, 0xc205, + 0x0cce, 0xffef, 0x780f, 0xf014, + 0xc003, 0x235a, 0x3c81, 0x265f, + 0x3102, 0xc040, 0xc006, 0xc304, + 0x6119, 0x255a, 0x1640, 0x6119, + 0xc002, 0x7845, 0xc205, 0x0af2, + 0xffef, 0x780f, 0x7167, 0x0b6f, + 0xb094, 0x71a5, 0x0d53, 0x90b4, + 0x40c1, 0x088a, 0xfdaf, 0x730c, + 0xc0a9, 0x1404, 0x341b, 0xc6c6, + 0xc2e4, 0x42c3, 0x8000, 0x06d1, + 0x8a80, 0x8ac1, 0x245f, 0x1481, + 0x6038, 0xf013, 0x0919, 0x0211, + 0x231a, 0x0f82, 0x0000, 0x2000, + 0x2205, 0x0f8d, 0x9002, 0x1064, + 0x9040, 0xb540, 0x7124, 0x7204, + 0x09e5, 0x8254, 0x7185, 0x7b8f, + 0x0ef9, 0x90e5, 0x702c, 0x0f16, + 0xfe8f, 0xc6c4, 0xc2e2, 0x260a, + 0x3040, 0x4308, 0x70ad, 0xf024, + 0x255a, 0x1182, 0x7261, 0x8a01, + 0x8a60, 0x8a22, 0x209a, 0x0004, + 0x7b05, 0x8a03, 0x209a, 0x0004, + 0x7825, 0x201a, 0x0f80, 0x0001, + 0x0000, 0x2005, 0x00c1, 0x8a05, + 0x8a64, 0x7734, 0x209a, 0x0004, + 0x7865, 0xf207, 0x793b, 0xb99c, + 0xb99f, 0xb100, 0xf003, 0x0d22, + 0xfe4f, 0x71a5, 0x2e45, 0x3180, + 0x0db9, 0x9004, 0xc6c2, 0x78e0, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1a1, + 0x4718, 0x40c3, 0x8000, 0x06d1, + 0x88c0, 0x4338, 0xc340, 0xea04, + 0x79db, 0xf004, 0x265f, 0x1241, + 0x1001, 0x009e, 0x7054, 0xdd08, + 0x25ca, 0x1062, 0xf039, 0x40e1, + 0x209a, 0x0004, 0xc300, 0x23f5, + 0x3042, 0xeb13, 0x7c5c, 0x7c9c, + 0x7c9c, 0x7c9c, 0x224a, 0x16c0, + 0x2a44, 0x0803, 0x2242, 0x104a, + 0x0afd, 0x9031, 0x7c9d, 0x629a, + 0xbac4, 0x4a94, 0xf00d, 0x248a, + 0x1fcf, 0x7c44, 0x2204, 0x0f83, + 0x0000, 0xfc00, 0xda0a, 0xba61, + 0x0aff, 0x8031, 0x7b7d, 0x261a, + 0x1f82, 0x0000, 0x1000, 0x239a, + 0x0001, 0x71e5, 0x7124, 0x2205, + 0x07c2, 0x7845, 0x781b, 0xb891, + 0xb89c, 0x7b85, 0xb89f, 0xb060, + 0x0d97, 0x93c5, 0x71c5, 0x0efd, + 0xb3a5, 0x70ed, 0x7487, 0x1404, + 0x341b, 0xc6c6, 0xc2e6, 0xc1a1, + 0x216f, 0x0dc3, 0x7014, 0x8900, + 0x45cb, 0x0040, 0x1800, 0x46cb, + 0x8000, 0x058c, 0x0857, 0x003e, + 0x7dc0, 0x11e9, 0x808f, 0x41c3, + 0x0148, 0x0000, 0x097e, 0xfd6f, + 0x740c, 0x275f, 0x1182, 0x43c3, + 0x0012, 0x0310, 0x41c3, 0x9000, + 0x0000, 0xdf08, 0x6278, 0x7825, + 0x9000, 0xae00, 0xbf61, 0x0f01, + 0x1031, 0x781d, 0xae19, 0x6b02, + 0x6058, 0x7825, 0x9000, 0x41c3, + 0x8000, 0x05be, 0xda08, 0xa900, + 0xba61, 0x0a01, 0x0031, 0x781d, + 0xa919, 0xbe6c, 0x40c1, 0x0ebe, + 0xfeef, 0xd90d, 0x40c1, 0xd90d, + 0x088a, 0xfe6f, 0xda40, 0x40c1, + 0xd90d, 0x0e9a, 0xfd2f, 0xda7f, + 0xdfff, 0x40c1, 0x702c, 0x42a1, + 0x706c, 0x0d3a, 0xffef, 0xc740, + 0x40c1, 0xd90d, 0x0866, 0xfe6f, + 0xda80, 0x40c1, 0x702c, 0x42c3, + 0xffff, 0xdfff, 0x776c, 0x0d1e, + 0xffef, 0xc740, 0x40c1, 0x0baa, + 0xfeef, 0xd90d, 0x7487, 0xc6c6, + 0xc2e6, 0x1cfc, 0xb6c8, 0xc1b2, + 0x1600, 0x708d, 0x8000, 0x0004, + 0x4328, 0xc043, 0x4020, 0x702c, + 0x084e, 0xfd6f, 0x734c, 0xc045, + 0x4061, 0x742c, 0x0842, 0xfd6f, + 0xda08, 0xc046, 0xc003, 0x251a, + 0x1f8d, 0x0010, 0x0000, 0x704c, + 0x2055, 0x08c0, 0xc044, 0x1600, + 0x7080, 0x8000, 0x06d1, 0xc041, + 0xf042, 0xc606, 0xc005, 0x2351, + 0xb000, 0x26ca, 0x1001, 0xee36, + 0x784f, 0xd941, 0x790a, 0xc047, + 0x40c3, 0x8000, 0x071e, 0x610a, + 0x6038, 0x1001, 0x00c1, 0xca0c, + 0x4363, 0x6159, 0x2044, 0x07c2, + 0x4020, 0xea06, 0xba61, 0x0a01, + 0x0031, 0x781b, 0x7f0f, 0xc202, + 0x277c, 0x1800, 0x41c3, 0x0078, + 0x0003, 0x08a2, 0xfd2f, 0xc740, + 0xc002, 0xc104, 0x201a, 0x0f80, + 0x0000, 0x1000, 0x79a5, 0x7905, + 0x4063, 0x209a, 0x0004, 0x7825, + 0x781b, 0xb891, 0xb89c, 0xb89f, + 0xb0e0, 0x79cf, 0xc007, 0x611a, + 0x7167, 0x0b8b, 0xb094, 0xc001, 0x7104, 0xc041, 0xc001, 0x780f, - 0xc042, 0x40c3, 0x8000, 0x0665, - 0x8821, 0xc002, 0x7030, 0x700c, - 0xc043, 0xf7c6, 0xf043, 0xc003, - 0x7104, 0xc043, 0xc003, 0x7d0f, - 0x0dd7, 0x90b5, 0xbde0, 0xc606, - 0xc007, 0x761c, 0xee75, 0x7fef, - 0xd941, 0x79ea, 0x40c3, 0x8000, - 0x07fa, 0x43a1, 0x610a, 0x6038, - 0x1001, 0x00c1, 0xca09, 0x6159, - 0x2044, 0x07c2, 0x4020, 0xea06, - 0xba61, 0x0a01, 0x0031, 0x781b, - 0xc202, 0x780f, 0xc048, 0xc040, - 0x41c3, 0x0070, 0x0003, 0x0d32, - 0xfd2f, 0x207a, 0x0800, 0xc005, - 0x259a, 0x1004, 0x2305, 0x3001, - 0xc002, 0x201a, 0x0f80, 0x0000, - 0x1000, 0x7825, 0x7d05, 0x78bb, - 0x2005, 0x0f81, 0x9002, 0x0000, - 0xc008, 0xb100, 0x78cf, 0x671f, - 0xf1bf, 0x1600, 0x7080, 0x8000, - 0x0012, 0x0847, 0x01b5, 0xc089, + 0xc042, 0x40c3, 0x8000, 0x06d1, + 0x8821, 0xc002, 0x09e7, 0x8025, + 0x706f, 0x1600, 0x7080, 0x8000, + 0x0012, 0x0847, 0x01b5, 0xc088, 0x1600, 0x708d, 0x8000, 0x0004, - 0x702c, 0x0912, 0xfeaf, 0xda28, - 0xc204, 0x41c3, 0x0019, 0x0001, - 0x0cc6, 0xfd6f, 0x750c, 0x251a, - 0x1f81, 0x0010, 0x0000, 0xc005, - 0x724c, 0x7825, 0x0c42, 0xfeef, - 0xc189, 0x700c, 0x722c, 0xc289, - 0x0dc6, 0xfd2f, 0x726c, 0xc0b3, + 0x702c, 0x0e0a, 0xfe6f, 0xda28, + 0xc203, 0x41c3, 0x001a, 0x0001, + 0x0fda, 0xfd2f, 0x750c, 0x251a, + 0x1f8d, 0x0010, 0x0000, 0xc004, + 0xc188, 0x724c, 0x09a2, 0xfeef, + 0x78a5, 0x700c, 0x722c, 0xc288, + 0x0916, 0xfd2f, 0x726c, 0xc0b2, 0x1404, 0x341b, 0xc6c6, 0x78e0, 0xc2e6, 0x1cfc, 0xb6c8, 0x2482, - 0x3303, 0x1600, 0x709b, 0x8000, - 0x0004, 0xc044, 0x1600, 0x7080, - 0x8000, 0x0665, 0x231a, 0x3f9b, - 0x0010, 0x0000, 0x70ed, 0xc041, - 0xc004, 0x2055, 0x0c00, 0xc045, - 0xf004, 0xc001, 0x7104, 0xc041, - 0xc001, 0x780f, 0xc042, 0x40c3, - 0x8000, 0x0665, 0x8821, 0xc002, - 0x7030, 0x700c, 0xc043, 0xf7c6, - 0xf05e, 0xc003, 0x7104, 0xc043, - 0xc003, 0x7e0f, 0x0ed7, 0x9275, - 0xd8ff, 0x2644, 0x17c1, 0xe906, - 0xb961, 0x0901, 0x0031, 0x781d, - 0x08e3, 0x803e, 0x78ef, 0x209f, - 0x0041, 0x41c3, 0x8000, 0x07fa, - 0x43c1, 0x602a, 0x6038, 0x1001, - 0x00c1, 0xca09, 0x6159, 0x2044, + 0x3203, 0xc043, 0x1600, 0x7080, + 0x8000, 0x06d1, 0x1600, 0x709b, + 0x8000, 0x0004, 0x70ed, 0xc041, + 0xc003, 0x231a, 0x3f9b, 0x0010, + 0x0000, 0x2055, 0x0c00, 0xc044, + 0xf057, 0xd8ff, 0x2644, 0x17c1, + 0xe905, 0xb961, 0x09ff, 0x8031, + 0x781d, 0x0893, 0x003e, 0x78ef, + 0x209f, 0x0041, 0x41c3, 0x8000, + 0x071e, 0x602a, 0x6038, 0x1001, + 0x00c1, 0xca0c, 0x6159, 0x2044, 0x07c2, 0x4020, 0xea05, 0xba61, - 0x0aff, 0x8031, 0x781b, 0x790e, - 0x783c, 0x781c, 0x781c, 0x781c, - 0x4200, 0xd81b, 0xb861, 0x0801, - 0x0031, 0x7a5d, 0x6158, 0x2944, - 0x0801, 0xb8c4, 0x7842, 0x7a10, - 0x4020, 0x209a, 0x0001, 0x2004, - 0x0f8d, 0x003f, 0xffc0, 0x7d45, - 0x78b0, 0xc202, 0xc040, 0x7830, - 0x41c3, 0x0076, 0x0003, 0x0bca, - 0xfd2f, 0x207c, 0x0200, 0xc005, - 0x269a, 0x1004, 0x71e5, 0x2305, - 0x3001, 0xc002, 0x201a, 0x0f80, - 0x0000, 0x1000, 0x7825, 0x7e05, - 0x78db, 0xb891, 0xb89c, 0xb89f, - 0xb0a0, 0xf1a5, 0x1600, 0x7080, + 0x0aff, 0x8031, 0x781b, 0x7a0e, + 0x785c, 0x781c, 0x781c, 0x781c, + 0x4100, 0xd81b, 0xb861, 0x0801, + 0x0031, 0x793d, 0x6238, 0xb8c4, + 0x4831, 0x2a44, 0x0800, 0xc202, + 0x7d10, 0x257c, 0x1200, 0x259a, + 0x1001, 0x7d25, 0x79b0, 0xc140, + 0x41c3, 0x007e, 0x0003, 0x0f56, + 0xfcef, 0x43c1, 0xc004, 0x71e5, + 0x2305, 0x3001, 0xc002, 0x201a, + 0x0f80, 0x0000, 0x1000, 0x7905, + 0x40c1, 0x209a, 0x0004, 0x7825, + 0x781b, 0xb891, 0xb89c, 0xb89f, + 0xb0a0, 0x71c5, 0x0e5f, 0x9254, + 0xc001, 0x7104, 0xc041, 0xc001, + 0x780f, 0xc042, 0x40c3, 0x8000, + 0x06d1, 0x8821, 0xc002, 0x09e9, + 0x8025, 0x70cd, 0x1600, 0x7080, 0x8000, 0x0012, 0x0845, 0x01b5, - 0xc086, 0x1600, 0x708d, 0x8000, - 0x0004, 0x702c, 0x0fae, 0xfe6f, - 0xdab4, 0xc204, 0x41c3, 0x0017, - 0x0001, 0x0b66, 0xfd6f, 0x750c, - 0x251a, 0x1f81, 0x0010, 0x0000, - 0xc005, 0xda09, 0x7825, 0x0ae2, - 0xfeef, 0xc186, 0x700c, 0xd909, - 0xc286, 0x0c66, 0xfd2f, 0x726c, - 0x2480, 0x3303, 0x1404, 0x341b, + 0xc085, 0x1600, 0x708d, 0x8000, + 0x0004, 0x702c, 0x0cbe, 0xfe6f, + 0xdab4, 0xc203, 0x41c3, 0x0017, + 0x0001, 0x0e92, 0xfd2f, 0x750c, + 0x251a, 0x1f8d, 0x0010, 0x0000, + 0xc004, 0xc185, 0xda09, 0x085a, + 0xfeef, 0x78a5, 0x700c, 0xd909, + 0xc285, 0x0fce, 0xfcef, 0x726c, + 0x2480, 0x3203, 0x1404, 0x341b, 0xc6c6, 0x78e0, 0xc2e6, 0x1cfc, - 0xb6c8, 0xc1ac, 0xc040, 0xd8ff, - 0x702c, 0x0aaa, 0xfd6f, 0x734c, - 0xc045, 0xd8ff, 0x742c, 0x0a9e, - 0xfd6f, 0xda08, 0xc046, 0xc000, - 0x47cb, 0x8000, 0x0663, 0x205f, - 0x0500, 0xc042, 0x8f00, 0xc602, - 0x661e, 0x76c3, 0x8000, 0x0794, - 0x0e96, 0xfdaf, 0xc000, 0x8fa0, - 0x702c, 0xc043, 0xc141, 0xf003, - 0x71a5, 0x8f41, 0x7baf, 0x0a4b, - 0x00e4, 0xbde0, 0x1414, 0x300b, - 0xc006, 0x23ca, 0x1002, 0x0bed, - 0x9010, 0xca05, 0x0b2f, 0x0025, - 0x782f, 0xd941, 0xc044, 0x782a, - 0x2000, 0x0f9b, 0x8000, 0x07fc, - 0x0b9e, 0xff6f, 0x1300, 0x3080, - 0xae00, 0xc004, 0x1300, 0x3082, - 0x2000, 0x02c1, 0xc001, 0x6058, - 0xc041, 0x71c5, 0xf1da, 0x1600, - 0x7080, 0x8000, 0x0012, 0x0861, - 0x01b5, 0xc087, 0x702c, 0x0ec6, - 0xfe6f, 0xda14, 0xc200, 0x41c3, - 0x0018, 0x0001, 0x0a7a, 0xfd6f, - 0x750c, 0x8f62, 0xc002, 0x8f23, - 0x7a7b, 0x2000, 0x0f8c, 0x8000, - 0x0794, 0xf003, 0x7164, 0x786f, - 0x0915, 0x0025, 0x70ad, 0xf00d, - 0xc687, 0x661e, 0x6408, 0x7144, - 0x71a5, 0xae00, 0x78af, 0x08f5, - 0x80b4, 0x7850, 0xf1f0, 0x700c, - 0x722c, 0xc287, 0x0b5a, 0xfd2f, - 0x716c, 0x8f41, 0x8f20, 0x6a01, - 0x7822, 0x7910, 0xc001, 0x2805, - 0x0040, 0x0b0e, 0xff6f, 0x780f, - 0xc103, 0xa900, 0xc0ac, 0x1404, - 0x341b, 0xc6c6, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, + 0xb6c8, 0xc1ad, 0xc040, 0xd8ff, + 0x702c, 0x0dd6, 0xfd2f, 0x734c, + 0xc046, 0xd8ff, 0x742c, 0x0dca, + 0xfd2f, 0xda08, 0xc047, 0xc000, + 0x47cb, 0x8000, 0x06cf, 0x8f20, + 0x205f, 0x0500, 0xc043, 0x603a, + 0x2200, 0x0f9b, 0x8000, 0x1b74, + 0x0b2a, 0xfdaf, 0xc000, 0x8f20, + 0x8fc1, 0x704c, 0x260a, 0x3040, + 0x4328, 0xc042, 0xc241, 0xf020, + 0xc507, 0xc006, 0x25ca, 0x1001, + 0xed18, 0xca07, 0x092b, 0x0025, + 0x784f, 0xd941, 0xc044, 0x782a, + 0x2032, 0x0f80, 0x8000, 0x0720, + 0x0b66, 0xff6f, 0xc045, 0x1b00, + 0x3002, 0xc204, 0xc105, 0xc001, + 0x62ba, 0x6038, 0xc041, 0x7167, + 0x2640, 0x305e, 0x212f, 0x0787, + 0x0ec1, 0x9065, 0x2651, 0xb000, + 0x1600, 0x7080, 0x8000, 0x0012, + 0x0861, 0x01b5, 0xc088, 0x702c, + 0x0bd2, 0xfe6f, 0xda14, 0xc200, + 0x41c3, 0x0018, 0x0001, 0x0da6, + 0xfd2f, 0x750c, 0xc203, 0x8f22, + 0x8f63, 0x2200, 0x0f82, 0x8000, + 0x1b74, 0x7c3b, 0xf00e, 0x70ad, + 0xf007, 0xc688, 0x661e, 0x6208, + 0x7185, 0x71a5, 0xae00, 0x78af, + 0x08f3, 0x80b4, 0x7890, 0x7124, + 0x782f, 0x0be7, 0x8025, 0x700c, + 0x722c, 0xc288, 0x0ec2, 0xfcef, + 0x716c, 0x1700, 0x108b, 0x8fc1, + 0x234e, 0x1040, 0x661e, 0xc001, + 0x79d0, 0x2805, 0x0040, 0x0aca, + 0xff6f, 0x780f, 0xc102, 0xa900, + 0xc0ad, 0x1404, 0x341b, 0xc6c6, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, @@ -9849,7 +9840,7 @@ static u16 lpddr4x_train2d_imem[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, -}; + }; /*******************************************************/ static u16 lpddr4x_train2d_dmem[] = { @@ -9857,7 +9848,7 @@ static u16 lpddr4x_train2d_dmem[] = { 0x0002, 0x283c, 0x0012, 0x0000, 0x0061, 0x0004, 0x0000, 0x0002, 0x0000, 0x0000, 0x0000, 0x0100, - 0x8020, 0x0000, 0x0320, 0x0000, + 0x2080, 0x0000, 0x0320, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x3f74, 0x00f2, 0x1b14, 0x1608, 0x0000, 0x0004, 0x3f74, @@ -9981,8 +9972,9 @@ static u16 lpddr4x_train2d_dmem[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0xff00, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0100, 0x0000, + 0x0100, 0x0014, 0x0000, 0x0000, 0x0000, 0x0000, 0x014a, 0x0181, 0x0118, 0x0118, 0x016f, 0x016f, 0x0159, 0x0181, 0x0120, 0x0120, @@ -9993,8 +9985,10 @@ static u16 lpddr4x_train2d_dmem[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0xbd61, 0x0046, 0x0000, 0x321e, - 0x0100, 0x0000, 0x0100, 0x0302, + 0x9660, 0x00cb, 0x0000, 0x321e, + 0x0100, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0100, 0x0302, 0x0706, 0x0b0a, 0x0f0e, 0x1b1a, 0x1f1e, 0x3b3a, 0x3f3e, 0x0000, 0xffff, 0x01e0, 0x00f0, 0x00a0, @@ -10002,27 +9996,25 @@ static u16 lpddr4x_train2d_dmem[] = { 0x003c, 0x0035, 0x0030, 0x002c, 0x0028, 0x0025, 0x0022, 0x0020, 0x001e, 0x001c, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x2820, 0x140f, - 0x0002, 0x0000, 0xf01f, 0x0001, - 0x0000, 0xffb4, 0x0001, 0x0001, - 0xf0b4, 0x0001, 0x0000, 0xf4b4, - 0x0001, 0x0000, 0xf0b9, 0x0001, - 0x0000, 0xf0ba, 0x0001, 0x0000, - 0xf0bb, 0x0001, 0x0000, 0xf001, - 0x0001, 0x0000, 0xf013, 0x0001, - 0x0000, 0xf0f9, 0x0004, 0x0200, - 0xf0fa, 0x0004, 0x0000, 0xf0fb, - 0x0004, 0x0400, 0xf060, 0x0007, - 0x0008, 0xf065, 0x0007, 0x0000, - 0xff62, 0x0001, 0x0000, 0xf002, - 0x0001, 0x0220, 0x10f8, 0x20e8, - 0x0a06, 0x140e, 0x1c18, 0x2420, - 0x0c06, 0x1610, 0x201c, 0x2824, - 0x0a06, 0x1610, 0x201a, 0x2824, - 0x0c06, 0x1812, 0x241e, 0x2c28, + 0x2820, 0x140f, 0x1f02, 0x01f0, + 0x0000, 0xb400, 0x01ff, 0x0100, + 0xb400, 0x01f0, 0x0000, 0xb400, + 0x01f4, 0x0000, 0xb900, 0x01f0, + 0x0000, 0xba00, 0x01f0, 0x0000, + 0xbb00, 0x01f0, 0x0000, 0x0100, + 0x01f0, 0x0000, 0x1300, 0x01f0, + 0x0000, 0xf900, 0x04f0, 0x0000, + 0xfa02, 0x04f0, 0x0000, 0xfb00, + 0x04f0, 0x0000, 0x6004, 0x07f0, + 0x0800, 0x6500, 0x07f0, 0x0000, + 0x6200, 0x01ff, 0x0000, 0x0200, + 0x01f0, 0x2000, 0x0002, 0x0000, 0x020d, 0x0301, 0x0c0b, 0x160e, - 0x1004, 0x1811, 0x006e, 0x0002, + 0x1004, 0x1811, 0x0c06, 0x1610, + 0x201c, 0x2824, 0x0c06, 0x1812, + 0x241e, 0x2c28, 0x0a06, 0x140e, + 0x1c18, 0x2420, 0x0a06, 0x1610, + 0x201a, 0x2824, 0x006e, 0x0002, 0x0001, 0x00fd, 0x0004, 0x000f, 0x0060, 0x0007, 0x0000, 0x00e8, 0x0004, 0x00ff, 0x00fc, 0x0004, diff --git a/driver/bm1684/bm1684_pcie.c b/driver/bm1684/bm1684_pcie.c index d958004..6d048b8 100644 --- a/driver/bm1684/bm1684_pcie.c +++ b/driver/bm1684/bm1684_pcie.c @@ -28,7 +28,7 @@ int bm1684_get_pcie_func_index(struct bm_device_info *bmdi) } else index = -1; - //index = PCI_FUNC(bmdi->cinfo.pcidev->devfn); + index = PCI_FUNC(bmdi->cinfo.pcidev->devfn); if ((bmdi->cinfo.chip_id == 0x1684) || (bmdi->cinfo.chip_id == 0x1686)) { bmdi->cinfo.pcie_func_index = index; pr_info("bm-sophon%d, pcie_func_index = %d\n", bmdi->dev_index, index); diff --git a/driver/bm1684/bm1684_smmu.c b/driver/bm1684/bm1684_smmu.c index ce553e4..84f1ba8 100644 --- a/driver/bm1684/bm1684_smmu.c +++ b/driver/bm1684/bm1684_smmu.c @@ -319,6 +319,8 @@ static int bm_demand_iommu_entries(struct iommu_ctrl *ctrl, struct iommu_region int real_num; int ret; int t_half, b_half; + int count = 1000; + if (iommu_src->user_start == 0 || iommu_src->user_size == 0 || iommu_dst->user_start == 0 || iommu_dst->user_size == 0) { dev_err(ctrl->device, "invalid input param from user space."); return -EINVAL; @@ -334,7 +336,9 @@ static int bm_demand_iommu_entries(struct iommu_ctrl *ctrl, struct iommu_region demand_pages = round_up(demand_src_pages, IOMMU_TASK_ALIGNMENT) + round_up(demand_dst_pages, IOMMU_TASK_ALIGNMENT); // entry boundry between src and dst need aligned to 16 retry: - + if (count <= 0) { + return -EINTR; + } /* best effort to fulfil user request */ real_num = iommu_alloc_entries(ctrl, demand_pages, &iommu_src->entry_start); if (!real_num) { @@ -342,10 +346,12 @@ static int bm_demand_iommu_entries(struct iommu_ctrl *ctrl, struct iommu_region ret = wait_event_interruptible(ctrl->entry_waitq, iommu_get_free_entries(ctrl, &t_half, &b_half)); if (ret == -ERESTARTSYS) return -EINTR; + count--; goto retry; } else { if (real_num < 32) { iommu_free_entries(ctrl, real_num); + count--; goto retry; } while (real_num < demand_pages) { @@ -413,7 +419,11 @@ static int bm_setup_iommu_pages(struct iommu_ctrl *ctrl, struct bm_buffer_object } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 5, 0) + page_done = get_user_pages(bo->iommu.start_aligned, bo->nr_pages, + bo->iommu.is_dst == 1 ? 1 : 0, // dst need write, src only need read + bo->pages); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) page_done = get_user_pages(bo->iommu.start_aligned, bo->nr_pages, bo->iommu.is_dst == 1 ? 1 : 0, // dst need write, src only need read bo->pages, NULL); @@ -773,14 +783,15 @@ int bm1684_enable_smmu_transfer(struct bm_memcpy_info *memcpy_info, struct iommu ret = bm_bo_create(bo_buffer, bo_rgn); if (ret != 0) { + bm_release_iommu_entries(&memcpy_info->iommuctl, iommu_rgn_src, iommu_rgn_dst); dev_err(memcpy_info->iommuctl.device, "bm_bo_create src failed %d\n", ret); ret = -ENOMEM; return ret; } ret = bm_setup_iommu_pages(&memcpy_info->iommuctl, *bo_buffer); - if (ret < 0) { + bm_release_iommu_entries(&memcpy_info->iommuctl, iommu_rgn_src, iommu_rgn_dst); list_del(&(*bo_buffer)->entry); bm_bo_release(*bo_buffer); dev_err(memcpy_info->iommuctl.device, "bm_setup_iommu_pages src failed %d\n", ret); diff --git a/driver/bm_api.c b/driver/bm_api.c index 343269f..e98caa3 100644 --- a/driver/bm_api.c +++ b/driver/bm_api.c @@ -70,6 +70,11 @@ int bmdrv_api_init(struct bm_device_info *bmdi, u32 channel) bmdi->lib_info = kzalloc(sizeof(struct bmcpu_lib), GFP_KERNEL); INIT_LIST_HEAD(&(bmdi->lib_info->lib_list)); mutex_init(&(bmdi->lib_info->bmcpu_lib_mutex)); +#ifndef SOC_MODE + bmdi->process_info = kzalloc(sizeof(struct bmcpu_process), GFP_KERNEL); + INIT_LIST_HEAD(&(bmdi->process_info->process_list)); + mutex_init(&(bmdi->process_info->bmcpu_process_mutex)); +#endif } return ret; @@ -80,6 +85,10 @@ void bmdrv_api_deinit(struct bm_device_info *bmdi, u32 channel) struct bmcpu_lib *lib_temp, *lib_next; struct bmcpu_lib *lib_info = bmdi->lib_info; struct bmcpu_lib *lib_dyn_info = bmdi->lib_dyn_info; +#ifndef SOC_MODE + struct bmcpu_process *process_temp, *process_next; + struct bmcpu_process *process_info = bmdi->process_info; +#endif if (BM_MSGFIFO_CHANNEL_XPU == channel) { mutex_lock(&lib_dyn_info->bmcpu_lib_mutex); @@ -97,6 +106,15 @@ void bmdrv_api_deinit(struct bm_device_info *bmdi, u32 channel) } mutex_unlock(&lib_info->bmcpu_lib_mutex); kfree(bmdi->lib_info); +#ifndef SOC_MODE + mutex_lock(&process_info->bmcpu_process_mutex); + list_for_each_entry_safe(process_temp, process_next, &process_info->process_list, process_list) { + list_del(&process_temp->process_list); + kfree(process_temp); + } + mutex_unlock(&process_info->bmcpu_process_mutex); + kfree(bmdi->process_info); +#endif } kfifo_free(&bmdi->api_info[channel].api_fifo); @@ -406,6 +424,119 @@ int bmdrv_api_dyn_unload_lib_process(struct bm_device_info *bmdi, bm_api_ext_t * return -1; } +int bmdrv_send_api_close(struct bm_device_info *bmdi, struct file *file, u8 *process_handle) +{ + int ret = 0; + struct bm_thread_info *thd_info; + struct api_fifo_entry *api_entry; + struct api_list_entry *api_entry_list = NULL; + struct bm_api_info *apinfo; + pid_t api_pid; + bm_api_ext_t bm_api; + bm_kapi_header_t api_header; + bm_kapi_opt_header_t api_opt_header; + u32 fifo_empty_number; + struct bm_handle_info *h_info; + u64 local_send_api_seq; + u32 channel; + + if (bmdev_gmem_get_handle_info(bmdi, file, &h_info)) { + pr_err("bm-sophon%d bmdrv: file list is not found!\n", bmdi->dev_index); + return -EINVAL; + } + + bm_api.api_id = BM_API_ID_CLOSE_PROCESS; + bm_api.api_addr = process_handle; + bm_api.api_size = sizeof(bm_api_close_process_t); + bm_api.api_handle = 0; + + channel = BM_MSGFIFO_CHANNEL_CPU; + apinfo = &bmdi->api_info[BM_MSGFIFO_CHANNEL_CPU]; + + mutex_lock(&apinfo->api_mutex); + api_pid = current->pid; + /* check if current pid already recorded */ + thd_info = bmdrv_find_thread_info(h_info, api_pid); + if (!thd_info) { + thd_info = bmdrv_create_thread_info(h_info, api_pid); + if (!thd_info) { + mutex_unlock(&apinfo->api_mutex); + pr_err("%s bm-sophon%d bmdrv: bmdrv_create_thread_info failed!\n", + __func__, bmdi->dev_index); + return -ENOMEM; + } + } + + fifo_empty_number = bm_api.api_size / sizeof(u32) + sizeof(bm_kapi_header_t) / sizeof(u32) + sizeof(bm_kapi_opt_header_t) / sizeof(u32); + + api_entry_list = kmalloc(sizeof(struct api_list_entry), GFP_KERNEL); + if (!api_entry_list) { + mutex_unlock(&apinfo->api_mutex); + pr_err("%s bm-sophon%d bmdrv: kmalloc api_list_entry failed!\n", + __func__, bmdi->dev_index); + return -ENOMEM; + } + api_entry = &api_entry_list->api_entry; + + /* update global api sequence number */ + local_send_api_seq = atomic64_inc_return((atomic64_t *)&bmdi->bm_send_api_seq); + /* update handle api sequence number */ + mutex_lock(&h_info->h_api_seq_mutex); + h_info->h_send_api_seq = local_send_api_seq; + mutex_unlock(&h_info->h_api_seq_mutex); + /* update last_api_seq of current thread */ + /* may overflow */ + thd_info->last_api_seq = local_send_api_seq; + thd_info->profile.sent_api_counter++; + bmdi->profile.sent_api_counter++; + + api_header.api_id = bm_api.api_id; + api_header.api_size = bm_api.api_size / sizeof(u32); + api_header.api_handle = (u64)h_info->file; + api_header.api_seq = thd_info->last_api_seq; + api_header.duration = 0; /* not get from this area now */ + api_header.result = 0; + + /* insert api info to api fifo */ + api_entry->thd_info = thd_info; + api_entry->h_info = h_info; + api_entry->thd_api_seq = thd_info->last_api_seq; + api_entry->dev_api_seq = 0; + api_entry->api_id = bm_api.api_id; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0) + api_entry->sent_time_us = ktime_get_boottime_ns() / 1000; +#else + api_entry->sent_time_us = ktime_get_boot_ns() / 1000; +#endif + api_entry->global_api_seq = local_send_api_seq; + api_entry->api_done_flag = 0; + init_completion(&api_entry->api_done); + + PR_TRACE("bmdrv: %d last_api_seq is %d\n", api_pid, thd_info->last_api_seq); + + /* wait for available fifo space */ + if (bmdev_wait_msgfifo(bmdi, fifo_empty_number, bmdi->cinfo.delay_ms, channel)) { + thd_info->last_api_seq--; + kfree(api_entry); + mutex_unlock(&apinfo->api_mutex); + pr_err("%s bm-sophon%d bmdrv: bmdev_wait_msgfifo timeout!\n", + __func__, bmdi->dev_index); + return -EBUSY; + } + + mutex_lock(&apinfo->api_fifo_mutex); + list_add_tail(&(api_entry_list->api_list_node), &apinfo->api_list); + mutex_unlock(&apinfo->api_fifo_mutex); + + api_opt_header.global_api_seq = local_send_api_seq; + api_opt_header.api_data = 0; + /* copy api data to fifo */ + ret = bmdev_copy_to_msgfifo(bmdi, &api_header, (bm_api_t *)&bm_api, &api_opt_header, channel, false); + + mutex_unlock(&apinfo->api_mutex); + return ret; +} + int ksend_api(struct bm_device_info *bmdi, struct file *file, unsigned char *msg) { int ret = 0; @@ -593,6 +724,11 @@ int bmdrv_send_api(struct bm_device_info *bmdi, struct file *file, unsigned long struct bm_handle_info *h_info; u64 local_send_api_seq; u32 channel; +#ifdef PCIE_MODE_ENABLE_CPU + struct bmcpu_process *process_temp, *process_next; + u8 process_close_handle; + struct bmcpu_process *process_info = bmdi->process_info; +#endif if (bmdev_gmem_get_handle_info(bmdi, file, &h_info)) { pr_err("bm-sophon%d bmdrv: file list is not found!\n", bmdi->dev_index); @@ -660,6 +796,25 @@ int bmdrv_send_api(struct bm_device_info *bmdi, struct file *file, unsigned long } } +#ifdef PCIE_MODE_ENABLE_CPU + if (bm_api.api_id == BM_API_ID_CLOSE_PROCESS) { + ret = copy_from_user(&process_close_handle, bm_api.api_addr, sizeof(u8)); + if (ret) { + pr_err("bm-sophon%d copy_from_user fail\n", bmdi->dev_index); + return ret; + } + mutex_lock(&process_info->bmcpu_process_mutex); + list_for_each_entry_safe(process_temp, process_next, &process_info->process_list, process_list) { + if (process_temp->bmcpu_handle == process_close_handle) { + list_del(&process_temp->process_list); + kfree(process_temp); + break; + } + } + mutex_unlock(&process_info->bmcpu_process_mutex); + } +#endif + mutex_lock(&apinfo->api_mutex); api_pid = current->pid; @@ -791,6 +946,10 @@ int bmdrv_query_api(struct bm_device_info *bmdi, struct file *file, unsigned lon bm_api_data_t bm_api_data; u32 channel; u64 data; +#ifdef PCIE_MODE_ENABLE_CPU + struct bmcpu_process *process_node; + struct bmcpu_process *process_info = bmdi->process_info; +#endif ret = copy_from_user(&bm_api_data, (bm_api_data_t __user *)arg, sizeof(bm_api_data_t)); if (ret) { @@ -813,6 +972,16 @@ int bmdrv_query_api(struct bm_device_info *bmdi, struct file *file, unsigned lon if (0 == ret) put_user(data, (u64 __user *)&(((bm_api_data_t __user *)arg)->data)); +#ifdef PCIE_MODE_ENABLE_CPU + if (bm_api_data.api_id == BM_API_ID_OPEN_PROCESS) { + process_node = kzalloc(sizeof(struct bmcpu_process), GFP_KERNEL); + process_node->bmcpu_handle = data; + process_node->current_pid = current->pid; + mutex_lock(&process_info->bmcpu_process_mutex); + list_add_tail(&(process_node->process_list), &(process_info->process_list)); + mutex_unlock(&process_info->bmcpu_process_mutex); + } +#endif return ret; } @@ -1016,3 +1185,23 @@ int bmdrv_device_sync_api(struct bm_device_info *bmdi) return 0; } + +int bmdrv_set_sync_timeout(struct bm_device_info *bmdi, unsigned long arg) +{ + int ret, timeout; + + ret = copy_from_user(&timeout, (int __user *)arg, sizeof(int)); + if (ret) { + pr_err("bm-sophon%d copy_from_user fail\n", bmdi->dev_index); + return ret; + } + + if (timeout < 0) { + pr_info("set sync timeout error!\n"); + return -1; + } + + bmdi->cinfo.delay_ms = timeout; + + return 0; +} \ No newline at end of file diff --git a/driver/bm_api.h b/driver/bm_api.h index 3f73119..0b10808 100644 --- a/driver/bm_api.h +++ b/driver/bm_api.h @@ -105,13 +105,26 @@ typedef struct bm_kapi_opt_header { u64 api_data; } bm_kapi_opt_header_t; +struct bmcpu_process { + u64 bmcpu_handle; + u32 current_pid; + struct mutex bmcpu_process_mutex; + struct list_head process_list; +}; + +typedef struct bm_api_close_process { + u64 process_handle; +} bm_api_close_process_t; + #define API_ENTRY_SIZE sizeof(struct api_fifo_entry) int bmdrv_api_init(struct bm_device_info *bmdi, u32 channel); void bmdrv_api_deinit(struct bm_device_info *bmdi, u32 channel); int bmdrv_send_api(struct bm_device_info *bmdi, struct file *file, unsigned long arg, bool flag); +int bmdrv_send_api_close(struct bm_device_info *bmdi, struct file *file, u8 *process_handle); int bmdrv_query_api(struct bm_device_info *bmdi, struct file *file, unsigned long arg); int bmdrv_thread_sync_api(struct bm_device_info *bmdi, struct file *file); +int bmdrv_set_sync_timeout(struct bm_device_info *bmdi, unsigned long arg); int bmdrv_handle_sync_api(struct bm_device_info *bmdi, struct file *file); int bmdrv_device_sync_api(struct bm_device_info *bmdi); void bmdrv_api_clear_lib(struct bm_device_info *bmdi, struct file *file); diff --git a/driver/bm_attr.c b/driver/bm_attr.c index a61478f..67beb30 100755 --- a/driver/bm_attr.c +++ b/driver/bm_attr.c @@ -198,16 +198,20 @@ int bmdrv_get_tpu_target_freq(struct bm_device_info *bmdi, enum bm_freq_scaling_ int freq_min = 0; if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { p_data = bmdi->bmcd->vfs_db; if((p_data == NULL) || (p_data->start_flag == VFS_ORIGIN_MODE)) { return 0; } if(caller == FREQ_CALLER_TEMP) { - p_data->thermal_freq[bmdi->cinfo.chip_index] = *target; + p_data->thermal_freq[bmdi->cinfo.chip_no] = *target; } - tmp_freq_target = p_data->thermal_freq[bmdi->cinfo.chip_index]; + tmp_freq_target = p_data->thermal_freq[bmdi->cinfo.chip_no]; vfs_freq_target = p_data->freq_volt_pair[p_data->vf_level].freq; tpu_max_freq = bmdi->boot_info.tpu_max_clk; freq_min = min(tmp_freq_target, tpu_max_freq); @@ -281,9 +285,9 @@ void board_status_update(struct bm_device_info *bmdi, int cur_tmp, int cur_tpu_c /* limit the frequency of SC7 Pro to avoid unpredictable problem * case 1 temperature >= 105, the board will shutdown and need to reset the whole environment(reboot) * case 2 temperature >= 95 && cur_freq != 25M, immediately decrease frequency to 25M - * case 3 temperature >= 65 && current frequency == 1000M, decrease frequence to 750M - * case 4 temperature >= 65 && (temperature < 85 && current frequency == 25M), increase the frequency to 750M - * case 5 temperature < 60 && cur_freq != 1000M && index reach 50 times, increase frequency to 1000M + * case 3 temperature >= 85 && current frequency == 875M, decrease frequence to 700M + * case 4 temperature >= 85 && (temperature < 95 && current frequency == 25M), increase the frequency to 700M + * case 5 temperature < 80 && cur_freq != 875M && index reach 50 times, increase frequency to 875M * @param bmdi the info of current card * @param cur_tmp the current average temperature of each chip */ @@ -336,8 +340,8 @@ static void bmdrv_thermal_update_status_sc7p(struct bm_device_info *bmdi, int cu bmdrv_clk_set_tpu_target_freq(bmdi,target); } } else if (avg_tmp >= c_attr->thermal_info.half_clk_tmp) { - target = (bmdi->boot_info.tpu_max_clk * 75) / 100; - if ((avg_tmp < (c_attr->thermal_info.min_clk_tmp-10)) && + target = (bmdi->boot_info.tpu_max_clk * 80) / 100; + if ((avg_tmp < (c_attr->thermal_info.min_clk_tmp)) && (cur_tpu_clk == bmdi->boot_info.tpu_min_clk)) { if ((index % BM_THERMAL_HALF_LEVEL_SC7P) == 0) { bmdrv_clk_set_tpu_target_freq(bmdi,target); @@ -462,7 +466,11 @@ static int bm_set_tmp451_range_mode(struct bm_device_info *bmdi) if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) i2c_index = 0; @@ -581,6 +589,7 @@ int bmdrv_card_attr_init(struct bm_device_info *bmdi) c_attr->bm_get_vddphy_power = NULL; if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO)) { /* fix this later with bootinfo */ c_attr->bm_set_led_status = set_led_status; @@ -589,7 +598,11 @@ int bmdrv_card_attr_init(struct bm_device_info *bmdi) if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_BM1684X_EVB) { c_attr->bm_get_board_power = bm_read_1684x_evb_power; } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { c_attr->bm_get_board_power = bm_read_sc5_pro_power; c_attr->bm_get_vddc_power = bm_read_vddc_power; @@ -620,7 +633,11 @@ int bmdrv_card_attr_init(struct bm_device_info *bmdi) c_attr->bm_get_chip_temp = bm_read_tmp451_remote_temp_by_mcu; c_attr->bm_get_board_temp = bm_read_tmp451_local_temp_by_mcu; } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { c_attr->bm_get_chip_temp = bm_read_tmp451_remote_temp; c_attr->bm_get_board_temp = bm_read_tmp451_local_temp; @@ -744,6 +761,7 @@ static int set_led_on(struct bm_device_info *bmdi) return 0; } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24)) return set_pwm_high(bmdi, 0); else @@ -769,6 +787,7 @@ static int set_led_off(struct bm_device_info *bmdi) return 0; } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24)) return set_pwm_low(bmdi, 0); else @@ -782,7 +801,7 @@ static int set_led_blink_1_per_2s(struct bm_device_info *bmdi) { #ifndef SOC_MODE if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || - (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24)) return set_pwm_level(bmdi, LED_PWM_PERIOD*2, 50, 0); else @@ -796,7 +815,7 @@ static int set_led_blink_1_per_s(struct bm_device_info *bmdi) { #ifndef SOC_MODE if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || - (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24)) return set_pwm_level(bmdi, LED_PWM_PERIOD, 25, 0); else @@ -810,7 +829,7 @@ static int set_led_blink_3_per_s(struct bm_device_info *bmdi) { #ifndef SOC_MODE if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || - (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24)) return set_pwm_level(bmdi, LED_PWM_PERIOD / 3, 17, 0); else @@ -824,7 +843,7 @@ static int set_led_blink_fast(struct bm_device_info *bmdi) { #ifndef SOC_MODE if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || - (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS || BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24)) return set_pwm_level(bmdi, LED_PWM_PERIOD / 2, 50, 0); else @@ -1009,7 +1028,11 @@ int bm_read_tmp451_local_temp(struct bm_device_info *bmdi, int *temp) if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) i2c_index = 0; @@ -1034,7 +1057,11 @@ int bm_read_tmp451_remote_temp(struct bm_device_info *bmdi, int *temp) if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) i2c_index = 0; @@ -1462,11 +1489,15 @@ int bm_set_vdd_tpu_voltage(struct bm_device_info *bmdi, u32 volt) if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_BM1684X_EVB) { return bm_set_68224_voltage_out(bmdi, 0x0, volt); } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { if (bmdrv_sc5pro_uart_is_connect_mcu(bmdi) != 0x1) return 0; - if ((volt <= 880) && (volt >= 550)) { + if ((volt <= 900) && (volt >= 550)) { sprintf(tpu_volt, "%d", volt); console.uart.bmdi = bmdi; console.uart.uart_index = 0x2; @@ -1483,7 +1514,11 @@ int bm_read_vdd_tpu_voltage(struct bm_device_info *bmdi, u32 *volt) return bm_read_1331_voltage_out(bmdi, 0x60, volt); else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { return bm_read_sc5_pro_tpu_voltage(bmdi, volt); } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SM5M_P) && @@ -1503,7 +1538,11 @@ int bm_read_vdd_tpu_current(struct bm_device_info *bmdi, u32 *cur) return bm_read_1331_current_out(bmdi, 0x60, cur); else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { return bm_read_sc5_pro_tpu_current(bmdi, cur); } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SM5M_P) && @@ -1541,7 +1580,11 @@ int bm_set_vddc_voltage(struct bm_device_info *bmdi, u32 volt) if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_BM1684X_EVB) { return bm_set_68224_voltage_out(bmdi, 0x1, volt); } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { if(bmdrv_sc5pro_uart_is_connect_mcu(bmdi) != 0x1) { return 0; @@ -1563,7 +1606,11 @@ int bm_read_vddc_voltage(struct bm_device_info *bmdi, u32 *volt) return bm_read_1331_voltage_out(bmdi, 0x61, volt); else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { return bm_read_sc7_pro_vddc_voltage(bmdi, volt); } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SM5M_P) && @@ -1583,7 +1630,11 @@ int bm_read_vddc_current(struct bm_device_info *bmdi, u32 *cur) return bm_read_1331_current_out(bmdi, 0x61, cur); else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { return 0; } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SM5M_P) && @@ -1604,7 +1655,11 @@ int bm_read_vdd_tpu_power(struct bm_device_info *bmdi, u32 *tpu_power) return bm_read_1331_power(bmdi, 0x60, tpu_power); else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { return bm_read_sc5_pro_tpu_power(bmdi, tpu_power); } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SM5M_P) && @@ -1636,7 +1691,11 @@ int bm_read_vddc_power(struct bm_device_info *bmdi, u32 *vddc_power) else if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_BM1684X_EVB){ return bm_read_68224_power(bmdi, 0x1, vddc_power); } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { return bm_read_sc7_pro_vddc_power(bmdi, vddc_power); } else @@ -1646,7 +1705,11 @@ int bm_read_vddc_power(struct bm_device_info *bmdi, u32 *vddc_power) int bm_read_vddphy_power(struct bm_device_info *bmdi, u32 *vddphy_power) { if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { return bm_read_sc7_pro_vddphy_power(bmdi, vddphy_power); } else @@ -1850,7 +1913,11 @@ int bm_read_board_current(struct bm_device_info *bmdi, u32 *cur) if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { *cur = mcu_info_reg_read(bmdi, 0x8); } else { @@ -2040,7 +2107,11 @@ int bmdrv_sc5pro_uart_is_connect_mcu(struct bm_device_info *bmdi) if ((BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC5_PRO) && (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PRO) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_FP150) && (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_CP24) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_AIV01X) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_AIV02X) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_AIV03X) && (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PLUS)) return 1; @@ -2061,13 +2132,18 @@ static int bm_set_sn(struct bm_device_info *bmdi, char *sn) if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { if (bmdrv_sc5pro_uart_is_connect_mcu(bmdi) != 0x1) return 0; console.uart.bmdi = bmdi; console.uart.uart_index = 0x2; console_cmd_set_sn(&console, sn); + memcpy(bmdi->bmcd->sn, sn, 17); return 0; } @@ -2094,7 +2170,11 @@ int bm_get_sn(struct bm_device_info *bmdi, char *sn) if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { if (bmdrv_sc5pro_uart_is_connect_mcu(bmdi) != 0x1) return -1; @@ -2173,7 +2253,11 @@ int bm_burning_info_sn(struct bm_device_info *bmdi, unsigned long arg) if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { if ((bmdi->bmcd->sc5p_mcu_bmdi) != NULL && (bmdi->bmcd != NULL)) tmp_bmdi = bmdi->bmcd->sc5p_mcu_bmdi; @@ -2220,7 +2304,11 @@ static int bm_set_mac(struct bm_device_info *bmdi, int id, unsigned char *mac) if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { pr_err("bmsophon%d, sc5p sc7p not support set mac\n", bmdi->dev_index); return -ENOSYS; @@ -2244,7 +2332,11 @@ static int bm_get_mac(struct bm_device_info *bmdi, int id, unsigned char *mac) { if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { pr_err("bmsophon%d, sc5p sc7p not support get mac\n", bmdi->dev_index); return -ENOSYS; @@ -2292,7 +2384,11 @@ static int bm_set_board_type(struct bm_device_info *bmdi, char b_type) int ret = 0x0; if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { pr_err("bmsophon%d, sc5p sc7p not support set board type\n", bmdi->dev_index); return -ENOSYS; @@ -2317,7 +2413,11 @@ int bm_get_board_type(struct bm_device_info *bmdi, char *b_type) { if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { pr_err("bmsophon%d, sc5p sc7p not support get board type\n", bmdi->dev_index); return -ENOSYS; @@ -2474,7 +2574,7 @@ void bmdrv_fetch_attr(struct bm_device_info *bmdi, int count, int is_setspeed) bmdrv_adjust_fan_speed(bmdi, c_attr->chip_temp); mutex_lock(&bmdi->clk_reset_mutex); - if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) { + if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO){ bmdrv_thermal_update_status_sc7p(bmdi, c_attr->chip_temp); } else { bmdrv_thermal_update_status(bmdi, c_attr->chip_temp); @@ -2509,7 +2609,11 @@ void bmdrv_fetch_attr_board_power(struct bm_device_info *bmdi, int count) bm_npu_utilization_stat(bmdi); #ifndef SOC_MODE if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { if (bmdi->bmcd == NULL) return; @@ -2546,7 +2650,11 @@ void bmdrv_fetch_attr_board_power(struct bm_device_info *bmdi, int count) if (bmdi->bmcd != NULL) { if (((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) && (bmdi->dev_index == bmdi->bmcd->dev_start_index)) { bmdrv_record_board_power(bmdi, c_attr->board_power); @@ -2591,18 +2699,22 @@ int bmdrv_find_first_chip_logic_chip_id(struct bm_device_info *bmdi) struct bm_device_info *chip_bmdi = NULL; if ((BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PRO) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_FP150) && (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_CP24) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_AIV01X) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_AIV02X) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_AIV03X) && (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PLUS)) return bmdi->dev_index; for (chip_num = 0; chip_num < bmdi->bmcd->chip_num; chip_num++) { - chip_bmdi = bmdi->bmcd->card_bmdi[chip_num]; - value = gpio_reg_read(chip_bmdi, 0x50); - value = value >> 0x5; - value &= 0xf; - if (value == 0) { - return chip_bmdi->dev_index; - } + chip_bmdi = bmdi->bmcd->card_bmdi[chip_num]; + value = gpio_reg_read(chip_bmdi, 0x50); + value = value >> 0x5; + value &= 0xf; + if (value == 0) { + return chip_bmdi->dev_index; + } } return -1; @@ -2613,7 +2725,11 @@ struct bm_freq_scaling_db * bmdrv_alloc_vfs_database(struct bm_device_info *bmdi struct bm_freq_scaling_db * p_vfs_db = NULL; if ((BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PRO) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_FP150) && (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_CP24) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_AIV01X) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_AIV02X) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_AIV03X) && (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PLUS)) return NULL; @@ -2632,16 +2748,33 @@ void bmdrv_init_freq_scaling_status(struct bm_device_info *bmdi) int chip_num = 0; struct bm_freq_scaling_db * p_data = NULL; struct bm_vfs_pair freq_volt_pair_sc7_pro[VFS_MAX_LEVEL_SC7_PRO] = { - {1000, 880}, - {950, 760}, - {900, 740}, - {850, 720}, - {800, 700}, - {750, 680}, - {700, 660}, - {650, 640}, - {600, 620}, + {875, 860}, + {850, 840}, + {825, 820}, + {800, 800}, + {775, 780}, + {750, 760}, + {725, 740}, + {700, 720}, + {675, 700}, + {650, 600}, + {625, 600}, + {600, 600}, + {575, 600}, {550, 600}, + {525, 600}, + {500, 600}, + {150, 600}, + {100, 600}, + {25, 600} + }; + struct bm_vfs_pair freq_volt_pair_sc7_fp150[VFS_MAX_LEVEL_SC7_FP150] = { + {800, 720}, + {750, 700}, + {700, 680}, + {650, 660}, + {600, 660}, + {550, 660}, {500, 600}, {450, 600}, {400, 600}, @@ -2654,14 +2787,35 @@ void bmdrv_init_freq_scaling_status(struct bm_device_info *bmdi) {25, 600} }; struct bm_vfs_pair freq_volt_pair_sc7_plus[VFS_MAX_LEVEL_SC7_PLUS] = { - {750, 840}, - {550, 840}, - {25, 840} + {750, 740}, + {700, 720}, + {650, 700}, + {600, 680}, + {550, 660}, + {500, 640}, + {450, 640}, + {25, 640} + }; + struct bm_vfs_pair freq_volt_pair_AIV02X[VFS_MAX_LEVEL_AIV02X] = { + {950, 880}, + {750, 740}, + {700, 720}, + {650, 700}, + {600, 680}, + {550, 660}, + {500, 640}, + {450, 640}, + {25, 640} }; struct bm_vfs_pair freq_volt_pair_cp24[VFS_MAX_LEVEL_SC7_PLUS] = { - {1000, 840}, - {550, 840}, - {25, 840} + {750, 740}, + {700, 720}, + {650, 700}, + {600, 680}, + {550, 660}, + {500, 640}, + {450, 640}, + {25, 640} }; p_data = bmdi->bmcd->vfs_db; if (p_data == NULL) @@ -2671,6 +2825,15 @@ void bmdrv_init_freq_scaling_status(struct bm_device_info *bmdi) p_data->vf_init_level = VFS_INIT_LEVEL_SC7_PRO; p_data->vf_relbl_level = VFS_RELBL_LEVEL_SC7_PRO; p_data->vfs_max_level = VFS_MAX_LEVEL_SC7_PRO; + } else if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150){ + p_data->vf_init_level = VFS_INIT_LEVEL_SC7_FP150; + p_data->vf_relbl_level = VFS_RELBL_LEVEL_SC7_FP150; + p_data->vfs_max_level = VFS_MAX_LEVEL_SC7_FP150; + } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X)){ + p_data->vf_init_level = VFS_INIT_LEVEL_AIV02X; + p_data->vf_relbl_level = VFS_RELBL_LEVEL_AIV02X; + p_data->vfs_max_level = VFS_MAX_LEVEL_AIV02X; } else { p_data->vf_init_level = VFS_INIT_LEVEL_SC7_PLUS; p_data->vf_relbl_level = VFS_RELBL_LEVEL_SC7_PLUS; @@ -2682,8 +2845,13 @@ void bmdrv_init_freq_scaling_status(struct bm_device_info *bmdi) if (p_data->thermal_freq[chip_num] == 0) { if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) { p_data->thermal_freq[chip_num] = freq_volt_pair_sc7_pro[p_data->vf_level].freq; + } else if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) { + p_data->thermal_freq[chip_num] = freq_volt_pair_sc7_fp150[p_data->vf_level].freq; } else if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) { p_data->thermal_freq[chip_num] = freq_volt_pair_cp24[p_data->vf_level].freq; + } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X)) { + p_data->thermal_freq[chip_num] = freq_volt_pair_AIV02X[p_data->vf_level].freq; } else { p_data->thermal_freq[chip_num] = freq_volt_pair_sc7_plus[p_data->vf_level].freq; } @@ -2692,8 +2860,13 @@ void bmdrv_init_freq_scaling_status(struct bm_device_info *bmdi) if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) { memcpy((void *)&p_data->freq_volt_pair[0], (void *)&freq_volt_pair_sc7_pro[0], (sizeof(struct bm_vfs_pair) * VFS_MAX_LEVEL_SC7_PRO)); + } else if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) { + memcpy((void *)&p_data->freq_volt_pair[0], (void *)&freq_volt_pair_sc7_fp150[0], (sizeof(struct bm_vfs_pair) * VFS_MAX_LEVEL_SC7_FP150)); } else if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) { memcpy((void *)&p_data->freq_volt_pair[0], (void *)&freq_volt_pair_cp24[0], (sizeof(struct bm_vfs_pair) * VFS_MAX_LEVEL_SC7_PLUS)); + } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X)) { + memcpy((void *)&p_data->freq_volt_pair[0], (void *)&freq_volt_pair_AIV02X[0], (sizeof(struct bm_vfs_pair) * VFS_MAX_LEVEL_SC7_PLUS)); } else { memcpy((void *)&p_data->freq_volt_pair[0], (void *)&freq_volt_pair_sc7_plus[0], (sizeof(struct bm_vfs_pair) * VFS_MAX_LEVEL_SC7_PLUS)); } @@ -2702,10 +2875,17 @@ void bmdrv_init_freq_scaling_status(struct bm_device_info *bmdi) p_data->power_peak_threshold = 180; p_data->power_upper_threshold = 150; p_data->power_lower_threshold = 140; - } else if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS) { + } else if(BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150){ + p_data->power_peak_threshold = 150; + p_data->power_upper_threshold = 130; + p_data->power_lower_threshold = 100; + } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X)) { p_data->power_peak_threshold = 100; p_data->power_upper_threshold = 75; - p_data->power_lower_threshold = 65; + p_data->power_lower_threshold = 75; } else if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) { p_data->power_peak_threshold = 180; p_data->power_upper_threshold = 150; @@ -2780,10 +2960,17 @@ int bm_set_sc7_vddc_rdrop(struct bm_device_info *bmdi, struct bm_rdrop param) struct console_ctx console; int vddc_cores; int i; - vddc_cores = 4; + vddc_cores = 6; - if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS){ + if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS || + BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X){ vddc_cores = 3; + } else if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) { + vddc_cores = 2; + } else if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) { + vddc_cores = 1; + } else if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) { + vddc_cores = 4; } if (bmdrv_sc5pro_uart_is_connect_mcu(bmdi) != 0x1) { @@ -2812,25 +2999,17 @@ int bm_set_rdrop(struct bm_device_info *bmdi) param.page = 0; // set tpu rdrop - if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS) { - param.rdrop = 250; - bm_set_sc7_rdrop(bmdi, param); - mdelay(100); - } else if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO){ - param.rdrop = 200; - bm_set_sc7_rdrop(bmdi, param); + param.rdrop = bmdi->bmcd->rdrop.tpu_rdrop; + bm_set_sc7_rdrop(bmdi, param); mdelay(100); - } // set vddc rdrop param.page = 1; - if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS || - BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) { - param.rdrop = 50; - bm_set_sc7_vddc_rdrop(bmdi, param); + param.rdrop = bmdi->bmcd->rdrop.vddc_rdrop; + bm_set_sc7_vddc_rdrop(bmdi, param); + mdelay(100); - } rdrop = bm_get_sc7_rdrop(bmdi); pr_info("The rdrop is set to: %d\n", rdrop); rdrop = bm_get_sc7_vddc_rdrop(bmdi); @@ -2908,7 +3087,11 @@ int bmdrv_volt_freq_scaling_controller(struct bm_device_info *bmdi) struct bm_freq_scaling_db * p_data = NULL; if ((BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PRO) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_FP150) && (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_CP24) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_AIV01X) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_AIV02X) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_AIV03X) && (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PLUS)) return 0; @@ -2927,17 +3110,13 @@ int bmdrv_volt_freq_scaling_controller(struct bm_device_info *bmdi) else return 0; mode = FREQ_DOWN_MODE; - p_data->freq_up_count = 0; } else if (p_data->power_average <= p_data->power_lower_threshold) { - p_data->freq_up_count++; - if (p_data->freq_up_count < 60) - return 0; - p_data->freq_up_count = 0; if (p_data->vf_level > 0) p_data->vf_level--; + else + return 0; mode = FREQ_UP_MODE; } else { - p_data->freq_up_count = 0; return 0; } @@ -2965,7 +3144,11 @@ int bmdrv_volt_freq_scaling(struct bm_device_info *bmdi) struct bm_freq_scaling_db * p_data = NULL; if ((BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PRO) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_FP150) && (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_CP24) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_AIV01X) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_AIV02X) && + (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_AIV03X) && (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PLUS)) return -1; @@ -2983,7 +3166,8 @@ int bmdrv_volt_freq_scaling(struct bm_device_info *bmdi) p_data->freq_scal_count++; p_data->board_pwr_count++; if ((p_data->power_highest >= p_data->power_peak_threshold) && (p_data->vf_level < p_data->vf_relbl_level)) { //peak clipping - p_data->vf_level = p_data->vf_relbl_level; + pr_info("arrive the threshold highest power!"); + p_data->vf_level++; volt = p_data->freq_volt_pair[p_data->vf_level].volt; bmdrv_set_sc7_pro_tpu_volt_freq(bmdi, volt, (u32)freq, FREQ_INIT_MODE); p_data->freq_scal_count = 0; @@ -3018,6 +3202,9 @@ int bmdrv_volt_freq_scaling(struct bm_device_info *bmdi) bmdrv_init_freq_scaling_status(bmdi); p_data->start_flag = VFS_INIT_MODE; if (((int)BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS) || + ((int)BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || + ((int)BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + ((int)BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || ((int)BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24)) { p_data->start_flag = VFS_MISSION_MODE; } @@ -3025,10 +3212,14 @@ int bmdrv_volt_freq_scaling(struct bm_device_info *bmdi) bmdrv_set_sc7_pro_tpu_volt_freq(bmdi, volt, (u32)freq, FREQ_INIT_MODE); if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) { bm_set_vddc_voltage(bmdi->bmcd->card_bmdi[p_data->chip0_index - bmdi->bmcd->dev_start_index], 980); - bm_set_rdrop(bmdi); - } else if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS) { - bm_set_vddc_voltage(bmdi->bmcd->card_bmdi[p_data->chip0_index - bmdi->bmcd->dev_start_index], 940); - bm_set_rdrop(bmdi); + bm_set_rdrop(bmdi->bmcd->card_bmdi[p_data->chip0_index - bmdi->bmcd->dev_start_index]); + } else if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150)) { + bm_set_vddc_voltage(bmdi->bmcd->card_bmdi[p_data->chip0_index - bmdi->bmcd->dev_start_index], 960); + bm_set_rdrop(bmdi->bmcd->card_bmdi[p_data->chip0_index - bmdi->bmcd->dev_start_index]); } else if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) { bm_set_vddc_voltage(bmdi->bmcd->card_bmdi[p_data->chip0_index - bmdi->bmcd->dev_start_index], 980); } diff --git a/driver/bm_attr.h b/driver/bm_attr.h index 2428769..e27cb0b 100755 --- a/driver/bm_attr.h +++ b/driver/bm_attr.h @@ -6,12 +6,18 @@ #define FAN_PWM_PERIOD 4000 #define LED_PWM_PERIOD 100000000UL // p_clk 100MHz #define BM_THERMAL_WINDOW_WIDTH 5 -#define VFS_MAX_LEVEL_SC7_PRO 20 -#define VFS_MAX_LEVEL_SC7_PLUS 3 +#define VFS_MAX_LEVEL_SC7_PRO 19 +#define VFS_MAX_LEVEL_SC7_FP150 16 +#define VFS_MAX_LEVEL_SC7_PLUS 8 +#define VFS_MAX_LEVEL_AIV02X 9 #define VFS_INIT_LEVEL_SC7_PLUS 0 +#define VFS_INIT_LEVEL_AIV02X 0 #define VFS_INIT_LEVEL_SC7_PRO 0 -#define VFS_RELBL_LEVEL_SC7_PLUS 1 +#define VFS_INIT_LEVEL_SC7_FP150 0 +#define VFS_RELBL_LEVEL_SC7_PLUS 4 +#define VFS_RELBL_LEVEL_AIV02X 5 #define VFS_RELBL_LEVEL_SC7_PRO 5 +#define VFS_RELBL_LEVEL_SC7_FP150 5 #define VFS_PWR_MEAN_SAMPLE_SIZE 10 #define LED_OFF 0 diff --git a/driver/bm_bgm.c b/driver/bm_bgm.c index 483159a..0ece371 100644 --- a/driver/bm_bgm.c +++ b/driver/bm_bgm.c @@ -77,7 +77,7 @@ static int ion_carveout_heap_allocate(struct ion_heap *heap, paddr = ion_carveout_allocate(heap, size); if (paddr == ION_CARVEOUT_ALLOCATE_FAIL) { ret = -ENOMEM; - goto err_free; + goto err_free_table; } // sg_set_page(table->sgl, pfn_to_page(PFN_DOWN(paddr)), size, 0); @@ -89,6 +89,8 @@ static int ion_carveout_heap_allocate(struct ion_heap *heap, return 0; +err_free_table: + sg_free_table(table); err_free: kfree(table); return ret; @@ -396,26 +398,24 @@ static const struct dma_buf_ops dma_buf_ops = { .detach = ion_dma_buf_detatch, .mmap = ion_mmap, #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0) - #if (LINUX_VERSION_CODE == KERNEL_VERSION(4, 18,0)) && (CENTOS_KERNEL_FIX >= 240) + #if (LINUX_VERSION_CODE == KERNEL_VERSION(4, 18,0)) && (CENTOS_KERNEL_FIX >= 147) #else - #if (LINUX_VERSION_CODE <= KERNEL_VERSION(5, 6,0)) - .map = ion_dma_buf_kmap, - .unmap = ion_dma_buf_kunmap, - .vmap = ion_dma_buf_vmap, - .vunmap = ion_dma_buf_vunmap, - #else - .vmap = ion_dma_buf_vmap, - .vunmap = ion_dma_buf_vunmap, - #endif + #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0)) + .map_atomic = ion_dma_buf_kmap, + .unmap_atomic = ion_dma_buf_kunmap, + #endif - #endif - #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0)) && (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0)) - #if (LINUX_VERSION_CODE == KERNEL_VERSION(4, 18,0)) && (CENTOS_KERNEL_FIX >= 147) + #if (LINUX_VERSION_CODE <= KERNEL_VERSION(5, 6,0) && LINUX_VERSION_CODE != KERNEL_VERSION(4, 19,0)) + .map = ion_dma_buf_kmap, + .unmap = ion_dma_buf_kunmap, + .vmap = ion_dma_buf_vmap, + .vunmap = ion_dma_buf_vunmap, #else - .map_atomic = ion_dma_buf_kmap, - .unmap_atomic = ion_dma_buf_kunmap, + .vmap = ion_dma_buf_vmap, + .vunmap = ion_dma_buf_vunmap, #endif + #endif #else #if (LINUX_VERSION_CODE == KERNEL_VERSION(3, 10, 0)) && (CENTOS_KERNEL_FIX > 693) diff --git a/driver/bm_bgm.h b/driver/bm_bgm.h index 381b33a..5126091 100644 --- a/driver/bm_bgm.h +++ b/driver/bm_bgm.h @@ -50,6 +50,26 @@ typedef struct bm_mem_desc { typedef struct bm_mem_desc bm_device_mem_t; +typedef struct bm_mem_desc_u64 { + union { + struct { + unsigned long device_addr; + unsigned int reserved0; + int dmabuf_fd; + } device; + struct { + void *system_addr; + unsigned int reserved; + int reserved1; + } system; + } u; + + bm_mem_flags_t flags; + unsigned long long size; +} bm_mem_desc_u64_t; + +typedef struct bm_mem_desc_u64 bm_device_mem_u64_t; + enum ion_heap_type { ION_HEAP_TYPE_SYSTEM, ION_HEAP_TYPE_SYSTEM_CONTIG, diff --git a/driver/bm_boot_info.c b/driver/bm_boot_info.c index 0456483..7e8035a 100644 --- a/driver/bm_boot_info.c +++ b/driver/bm_boot_info.c @@ -345,12 +345,26 @@ int bmdrv_check_bootinfo(struct bm_device_info *bmdi) if (bmdi->boot_info.board_power_sensor_exist != 1 || bmdi->boot_info.fan_exist != 0 || bmdi->boot_info.max_board_power != 300 || - bmdi->boot_info.tpu_max_clk != 1000 || + bmdi->boot_info.tpu_max_clk != 875 || bmdi->boot_info.tpu_min_clk != 25) { bmdi->boot_info.board_power_sensor_exist = 1; bmdi->boot_info.max_board_power = 300; bmdi->boot_info.tpu_min_clk = 25; - bmdi->boot_info.tpu_max_clk = 1000; + bmdi->boot_info.tpu_max_clk = 875; + bmdi->boot_info.fan_exist = 0; + need_update = 1; + } + break; + case BOARD_TYPE_SC7_FP150: + if (bmdi->boot_info.board_power_sensor_exist != 1 || + bmdi->boot_info.fan_exist != 0 || + bmdi->boot_info.max_board_power != 150 || + bmdi->boot_info.tpu_max_clk != 800 || + bmdi->boot_info.tpu_min_clk != 25) { + bmdi->boot_info.board_power_sensor_exist = 1; + bmdi->boot_info.max_board_power = 150; + bmdi->boot_info.tpu_min_clk = 25; + bmdi->boot_info.tpu_max_clk = 800; bmdi->boot_info.fan_exist = 0; need_update = 1; } @@ -369,6 +383,35 @@ int bmdrv_check_bootinfo(struct bm_device_info *bmdi) need_update = 1; } break; + case BOARD_TYPE_AIV03X: + if (bmdi->boot_info.board_power_sensor_exist != 1 || + bmdi->boot_info.fan_exist != 0 || + bmdi->boot_info.max_board_power != 75 || + bmdi->boot_info.tpu_min_clk != 25 || + bmdi->boot_info.tpu_max_clk != 750) { + bmdi->boot_info.board_power_sensor_exist = 1; + bmdi->boot_info.max_board_power = 75; + bmdi->boot_info.tpu_min_clk = 25; + bmdi->boot_info.tpu_max_clk = 750; + bmdi->boot_info.fan_exist = 0; + need_update = 1; + } + break; + case BOARD_TYPE_AIV01X: + case BOARD_TYPE_AIV02X: + if (bmdi->boot_info.board_power_sensor_exist != 1 || + bmdi->boot_info.fan_exist != 0 || + bmdi->boot_info.max_board_power != 75 || + bmdi->boot_info.tpu_min_clk != 25 || + bmdi->boot_info.tpu_max_clk != 950) { + bmdi->boot_info.board_power_sensor_exist = 1; + bmdi->boot_info.max_board_power = 75; + bmdi->boot_info.tpu_min_clk = 25; + bmdi->boot_info.tpu_max_clk = 950; + bmdi->boot_info.fan_exist = 0; + need_update = 1; + } + break; case BOARD_TYPE_CP24: max_board_power_cmd = 150; if (bmdi->boot_info.board_power_sensor_exist != 1 || @@ -459,7 +502,12 @@ static int bmdrv_set_1684x_default_boot_info(struct bm_device_info *bmdi) bmdi->boot_info.append.append_v1.heap2_size = 0x40000000; if (board_type == BOARD_TYPE_SC7_PRO){ bmdi->boot_info.max_board_power = 300; - } else if (board_type == BOARD_TYPE_SC7_PLUS) { + }else if(board_type == BOARD_TYPE_SC7_FP150){ + bmdi->boot_info.max_board_power = 150; + } else if ((board_type == BOARD_TYPE_SC7_PLUS) || + (board_type == BOARD_TYPE_AIV01X) || + (board_type == BOARD_TYPE_AIV02X) || + (board_type == BOARD_TYPE_AIV03X)){ bmdi->boot_info.max_board_power = 75; } else if (board_type == BOARD_TYPE_CP24) { bmdi->boot_info.max_board_power = 150; diff --git a/driver/bm_card.c b/driver/bm_card.c index 6be45a1..bceea52 100644 --- a/driver/bm_card.c +++ b/driver/bm_card.c @@ -63,8 +63,12 @@ static int bm_update_sc5p_mcu_bmdi_to_card(struct bm_device_info *bmdi) { if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || - (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X)) { if (bmdrv_sc5pro_uart_is_connect_mcu(bmdi) != 0x1) return -1; else { @@ -83,6 +87,9 @@ int bm_card_update_sn(struct bm_device_info *bmdi, char *sn) { if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PLUS) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { if (bmdi->cinfo.chip_index != 0) return -1; @@ -92,10 +99,26 @@ int bm_card_update_sn(struct bm_device_info *bmdi, char *sn) } #endif +/* + * Purpose: calculate the chip index of the card to avoid FP150 using index list + * like {1,2,4,5,6,7} + * @chip_index The dev index of this chip + * @start_index The dev index of the first probe chip in this card + * @chip_num The number of chips in this card +*/ +static int bm_get_chip_no(int chip_index, int start_index, int chip_num) +{ + return ((chip_index - start_index) % chip_num); +} + static int bm_add_chip_to_card(struct bm_device_info *bmdi) { int i = 0; + int chip_index = 0; struct bm_card *bmcd = NULL; + struct rdrop_info ri; + ri.tpu_rdrop = 200; + ri.vddc_rdrop = 50; #ifndef SOC_MODE bm1684_card_get_chip_index(bmdi); @@ -118,9 +141,13 @@ static int bm_add_chip_to_card(struct bm_device_info *bmdi) g_bmcd[i] = bmcd; g_bmcd[i]->card_index = i; g_bmcd[i]->chip_num = bm_card_get_chip_num(bmdi); + + chip_index = bm_get_chip_no(bmdi->dev_index,bmdi->dev_index,g_bmcd[i]->chip_num); + bmdi->cinfo.chip_no = chip_index; g_bmcd[i]->dev_start_index = bmdi->dev_index; - g_bmcd[i]->card_bmdi[bmdi->cinfo.chip_index] = bmdi; + g_bmcd[i]->card_bmdi[chip_index] = bmdi; g_bmcd[i]->first_probe_bmdi = bmdi; + g_bmcd[i]->rdrop = ri; bmdi->bmcd = g_bmcd[i]; g_bmcd[i]->running_chip_num = 0x1; g_bmcd[i]->cdma_max_payload = bmdi->memcpy_info.cdma_max_payload; @@ -135,7 +162,9 @@ static int bm_add_chip_to_card(struct bm_device_info *bmdi) return -1; } else { bmdi->bmcd = bmcd; - bmcd->card_bmdi[bmdi->cinfo.chip_index] = bmdi; + chip_index = bm_get_chip_no(bmdi->dev_index,bmdi->bmcd->first_probe_bmdi->dev_index,bmdi->bmcd->chip_num); + bmdi->cinfo.chip_no = chip_index; + bmcd->card_bmdi[chip_index] = bmdi; bmcd->running_chip_num++; bmcd->cdma_max_payload = bmdi->memcpy_info.cdma_max_payload; #ifndef SOC_MODE @@ -149,6 +178,7 @@ static int bm_add_chip_to_card(struct bm_device_info *bmdi) static int bm_remove_chip_from_card(struct bm_device_info *bmdi) { int index = 0x0; + int chip_index = 0; if (bmdi->bmcd == NULL) return 0; @@ -157,7 +187,9 @@ static int bm_remove_chip_from_card(struct bm_device_info *bmdi) if (g_bmcd[index] == NULL) return 0; - g_bmcd[index]->card_bmdi[bmdi->cinfo.chip_index] = NULL; + chip_index = bm_get_chip_no(bmdi->dev_index,g_bmcd[index]->dev_start_index,g_bmcd[index]->chip_num); + pr_info("Card %d ,chip num = %d\n", index, chip_index); + g_bmcd[index]->card_bmdi[chip_index] = NULL; g_bmcd[index]->running_chip_num--; if (g_bmcd[index]->running_chip_num == 0x0) { pr_info("free card %d\n", index); diff --git a/driver/bm_card.h b/driver/bm_card.h index 49cd899..1a04528 100644 --- a/driver/bm_card.h +++ b/driver/bm_card.h @@ -9,6 +9,10 @@ #define BM_MAX_CHIP_NUM_PER_CARD 1 #define BM_MAX_CHIP_NUM 1 #endif +struct rdrop_info { + int tpu_rdrop; + int vddc_rdrop; +}; struct bm_card { int card_index; @@ -22,6 +26,7 @@ struct bm_card { int cdma_max_payload; char sn[18]; void *vfs_db; + struct rdrop_info rdrop; struct bm_device_info *sc5p_mcu_bmdi; struct bm_device_info *card_bmdi[BM_MAX_CHIP_NUM_PER_CARD]; struct bm_device_info *first_probe_bmdi; diff --git a/driver/bm_common.h b/driver/bm_common.h index edab2ce..8662426 100644 --- a/driver/bm_common.h +++ b/driver/bm_common.h @@ -35,6 +35,7 @@ #endif //#define PR_DEBUG +//#define FEATURE_DEBUG #define BM_CHIP_VERSION PROJECT_VER_MAJOR #define BM_MAJOR_VERSION PROJECT_VER_MINOR @@ -150,6 +151,7 @@ struct chip_info { u32 polling_ms; unsigned int chip_id; int chip_index; + int chip_no; struct bootloader_version version; #ifdef SOC_MODE u32 irq_id_cdma; @@ -223,6 +225,21 @@ struct bmdrv_exec_func bm_get_func_t exec_func; }; +enum bm_rw_op { + BM_READ = 0, + BM_WRITE = 1, + BM_MALLOC = 2, + BM_FREE = 3, +}; + +struct bm_rw +{ + enum bm_rw_op op; + u64 paddr; + u32 value; + void *vaddr; +}; + struct bm_device_info { int dev_index; u64 bm_send_api_seq; @@ -276,6 +293,7 @@ struct bm_device_info { struct proc_dir_entry *proc_dir; + struct bm_rw bm_rw_t; #ifndef SOC_MODE vpp_drv_context_t vppdrvctx; vpu_drv_context_t vpudrvctx; @@ -291,6 +309,7 @@ struct bm_device_info { #ifdef PCIE_MODE_ENABLE_CPU int status_bmcpu; int status_reset; + struct bmcpu_process *process_info; #endif }; diff --git a/driver/bm_ctl.c b/driver/bm_ctl.c index 34f1a74..45b68f5 100644 --- a/driver/bm_ctl.c +++ b/driver/bm_ctl.c @@ -20,6 +20,16 @@ struct bm_ctrl_info *bmci; int dev_count; +#ifdef SOC_MODE +typedef struct { + struct list_head list; + pid_t vpu_pid; + int64_t vpu_gmem_used; +} bm_smi_vpu_proc_gmem; + +static struct list_head vpu_gmem_info = LIST_HEAD_INIT(vpu_gmem_info); +#endif + int bmdrv_init_bmci(struct chip_info *cinfo) { int rc; @@ -409,6 +419,9 @@ static int bmctl_get_smi_proc_gmem(struct bm_ctrl_info *bmci, struct bm_device_info *bmdi; struct bm_handle_info *h_info; int proc_cnt = 0; +#ifdef SOC_MODE + bm_smi_vpu_proc_gmem *vpu_mem_info, *tmp; +#endif bmdi = bmctl_get_bmdi(bmci, smi_proc_gmem->dev_id); if (!bmdi) @@ -422,6 +435,20 @@ static int bmctl_get_smi_proc_gmem(struct bm_ctrl_info *bmci, if (proc_cnt == 128) break; } + +#ifdef SOC_MODE + if (proc_cnt < 128) { + list_for_each_entry_safe(vpu_mem_info, tmp, &vpu_gmem_info, list) { + if (vpu_mem_info->vpu_gmem_used > 0) { + smi_proc_gmem->pid[proc_cnt] = vpu_mem_info->vpu_pid; + smi_proc_gmem->gmem_used[proc_cnt] = vpu_mem_info->vpu_gmem_used / 1024 / 1024; + proc_cnt++; + if (proc_cnt == 128) + break; + } + } + } +#endif mutex_unlock(&bmdi->gmem_info.gmem_mutex); smi_proc_gmem->proc_cnt = proc_cnt; return 0; @@ -511,6 +538,44 @@ int bmctl_ioctl_set_ecc(struct bm_ctrl_info *bmci, unsigned long arg) return 0; } +#ifdef SOC_MODE +int bmctl_update_vpu_gmem(int pid, int mem_used, int is_free, int is_del) +{ + int proc_cnt = 0; + int update_success = 0; + bm_smi_vpu_proc_gmem *vpu_mem_info, *tmp; + if (is_del == 0) { + list_for_each_entry_safe(vpu_mem_info, tmp, &vpu_gmem_info, list) { + if (vpu_mem_info->vpu_pid == pid) { + if (is_free) + vpu_mem_info->vpu_gmem_used -= BGM_4K_ALIGN(mem_used); + else + vpu_mem_info->vpu_gmem_used += BGM_4K_ALIGN(mem_used); + update_success = 1; + proc_cnt++; + if (proc_cnt == 128) + break; + } + } + if (update_success == 0 && is_free == 0 && proc_cnt < 128) { + vpu_mem_info = kzalloc(sizeof(*vpu_mem_info), GFP_KERNEL); + vpu_mem_info->vpu_pid = pid; + vpu_mem_info->vpu_gmem_used = BGM_4K_ALIGN(mem_used); + list_add(&vpu_mem_info->list, &vpu_gmem_info); + } + } else { + list_for_each_entry_safe(vpu_mem_info, tmp, &vpu_gmem_info, list) { + if (pid == vpu_mem_info->vpu_pid) { + list_del(&vpu_mem_info->list); + kfree(vpu_mem_info); + } + } + } + return 0; +} +EXPORT_SYMBOL_GPL(bmctl_update_vpu_gmem); +#endif + #ifndef SOC_MODE int bmctl_ioctl_recovery(struct bm_ctrl_info *bmci, unsigned long arg) { @@ -601,7 +666,8 @@ int bmctl_ioctl_recovery(struct bm_ctrl_info *bmci, unsigned long arg) } if (bmdi->misc_info.chipid == 0x1684 || bmdi->misc_info.chipid == 0x1686) { - if (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PRO) { + if (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PRO || + BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_FP150) { pr_info("to reboot chip, devid is %d\n", dev_id); bmdrv_wdt_start(bmdi); } diff --git a/driver/bm_debug.c b/driver/bm_debug.c index c60c440..385679f 100644 --- a/driver/bm_debug.c +++ b/driver/bm_debug.c @@ -642,15 +642,11 @@ static ssize_t bmdrv_tpu_freq_proc_write(struct file *file, const char __user *b kfree(buf); return -EFAULT; } - if ((res < 750) || (res > 1000)) { - pr_err("Error, valid value range is 750MHz ~ 1GHz\n"); - kfree(buf); - return -1; - } else { - bmdrv_clk_set_tpu_target_freq(bmdi,res); - kfree(buf); - return count; - } + + bmdrv_clk_set_tpu_target_freq(bmdi,res); + kfree(buf); + + return count; } static int bmdrv_tpu_freq_proc_open(struct inode *inode, struct file *file) @@ -705,15 +701,10 @@ static ssize_t bmdrv_tpu_volt_proc_write(struct file *file, const char __user *b kfree(buf); return -EFAULT; } - if ((res < 550) || (res > 820)) { - pr_err("Error, valid value range is 550mv ~ 820mv\n"); - kfree(buf); - return -1; - } else { - bm_set_vdd_tpu_voltage(bmdi,res); - kfree(buf); - return count; - } + + bm_set_vdd_tpu_voltage(bmdi,res); + kfree(buf); + return count; } static int bmdrv_tpu_volt_proc_open(struct inode *inode, struct file *file) @@ -1225,12 +1216,15 @@ static int bmdrv_board_sn_proc_show(struct seq_file *m, void *v) { struct bm_device_info *bmdi = m->private; struct bm_chip_attr *c_attr; - char sn[18] = ""; struct bm_device_info *tmp_bmdi = bmdi; if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { if ((bmdi->bmcd->sc5p_mcu_bmdi) != NULL && (bmdi->bmcd != NULL)) tmp_bmdi = bmdi->bmcd->sc5p_mcu_bmdi; @@ -1241,11 +1235,8 @@ static int bmdrv_board_sn_proc_show(struct seq_file *m, void *v) } c_attr = &tmp_bmdi->c_attr; - mutex_lock(&c_attr->attr_mutex); - bm_get_sn(tmp_bmdi, sn); - mutex_unlock(&c_attr->attr_mutex); - seq_printf(m, "%s\n", sn); + seq_printf(m, "%s\n", bmdi->bmcd->sn); return 0; } @@ -1677,9 +1668,12 @@ static int bmdrv_bmcpu_status_proc_show(struct seq_file *m, void *v) if (bmdi->cinfo.chip_id == 0x1684 || bmdi->cinfo.chip_id == 0x1686) { if (bmdi->status_bmcpu == BMCPU_IDLE) seq_printf(m, "idle\n"); - else if (bmdi->status_bmcpu == BMCPU_RUNNING) - seq_printf(m, "running\n"); - else + else if (bmdi->status_bmcpu == BMCPU_RUNNING) { + if (gp_reg_read_enh(bmdi, GP_REG_ARM9_FW_MODE) == 0x2) + seq_printf(m, "running(mix mode)\n"); + else + seq_printf(m, "running\n"); + } else seq_printf(m, "fault\n"); } else { seq_printf(m, "unsupport\n"); @@ -1862,7 +1856,11 @@ static int bmdrv_location_proc_show(struct seq_file *m, void *v) if (bmdi->cinfo.chip_id != 0x1682) { if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { value = gpio_reg_read(bmdi, 0x50); value = value >> 0x5; diff --git a/driver/bm_drv.c b/driver/bm_drv.c index 549a2f2..d88a628 100644 --- a/driver/bm_drv.c +++ b/driver/bm_drv.c @@ -105,7 +105,11 @@ void bmdrv_post_api_process(struct bm_device_info *bmdi, } } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 2, 0) +static char *bmdrv_class_devnode(const struct device *dev, umode_t *mode) +#else static char *bmdrv_class_devnode(struct device *dev, umode_t *mode) +#endif { if (!mode || !dev) return NULL; @@ -191,9 +195,11 @@ void bmdrv_software_deinit(struct bm_device_info *bmdi) } struct class bmdev_class = { - .name = BM_CLASS_NAME, - .owner = THIS_MODULE, - .devnode = bmdrv_class_devnode, + .name = BM_CLASS_NAME, +#if LINUX_VERSION_CODE <= KERNEL_VERSION(6, 5, 0) + .owner = THIS_MODULE, +#endif + .devnode = bmdrv_class_devnode, }; int bmdrv_class_create(void) diff --git a/driver/bm_fops.c b/driver/bm_fops.c index 3bbe47f..58c0ef6 100644 --- a/driver/bm_fops.c +++ b/driver/bm_fops.c @@ -17,6 +17,7 @@ #include "bm1684_clkrst.h" #include "bm1684_base64.h" #include "bm_timer.h" +#include "bm_api.h" #ifndef SOC_MODE #include "spi.h" #include "i2c.h" @@ -137,6 +138,12 @@ static int bmdev_close(struct inode *inode, struct file *file) struct bm_device_info *bmdi = file->private_data; struct bm_handle_info *h_info, *h_node; int handle_num = 0; +#ifndef SOC_MODE + u8 process_handle; + bm_api_close_process_t api_close_process; + struct bmcpu_process *process_temp, *process_next; + struct bmcpu_process *process_info = bmdi->process_info; +#endif if (bmdev_gmem_get_handle_info(bmdi, file, &h_info)) { pr_err("bmdrv: file list is not found!\n"); @@ -155,6 +162,18 @@ static int bmdev_close(struct inode *inode, struct file *file) bmdrv_api_clear_lib(bmdi, file); #ifndef SOC_MODE + mutex_lock(&process_info->bmcpu_process_mutex); + list_for_each_entry_safe(process_temp, process_next, &process_info->process_list, process_list){ + if(process_temp->current_pid == current->pid) { + process_handle = process_temp->bmcpu_handle; + api_close_process.process_handle = (u64)process_handle; + bmdrv_send_api_close(bmdi, file, (u8 *)&api_close_process); + list_del(&process_temp->process_list); + kfree(process_temp); + } + } + mutex_unlock(&process_info->bmcpu_process_mutex); + if (bmdrv_get_gmem_mode(bmdi) != GMEM_TPU_ONLY) { bm_vpu_release(inode, file); bm_jpu_release(inode, file); @@ -354,6 +373,23 @@ static long bm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; }; + case BMDEV_SYNC_TIME_MIX: + { + struct bm_api_set_time { + u32 tv_sec; + u32 tv_usec; + u32 tz_minuteswest; + u32 tz_dsttime; + } set_time; + + ret = copy_from_user(&set_time, (void *)arg, sizeof(struct bm_api_set_time)); + bm_write32(bmdi, VETH_SHM_START_ADDR_1684X + VETH_TV_SEC, set_time.tv_sec); + bm_write32(bmdi, VETH_SHM_START_ADDR_1684X + VETH_TV_USEC, set_time.tv_usec); + bm_write32(bmdi, VETH_SHM_START_ADDR_1684X + VETH_TZ_MINUTESWEST, set_time.tz_minuteswest); + bm_write32(bmdi, VETH_SHM_START_ADDR_1684X + VETH_TZ_DSTTIME, set_time.tz_dsttime); + bm_write32(bmdi, VETH_SHM_START_ADDR_1684X + CHANGE_VETH_TIME, 0x1); + } + case BMDEV_SET_GATE: { u32 gate; @@ -386,6 +422,12 @@ static long bm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -EFAULT; } gp_reg_write_enh(bmdi, GP_REG_ARM9_FW_MODE, mode); + + if (mode == FW_MIX_MODE) { + gp_reg_write_enh(bmdi, GP_REG_MESSAGE_WP_CHANNEL_XPU, 0); + gp_reg_write_enh(bmdi, GP_REG_MESSAGE_RP_CHANNEL_XPU, 0); + } + break; }; @@ -582,10 +624,18 @@ static long bm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ret = bmdrv_gmem_ioctl_alloc_mem_ion(bmdi, file, arg); break; + case BMDEV_ALLOC_GMEM_ION_U64: + ret = bmdrv_gmem_ioctl_alloc_mem_ion_u64(bmdi, file, arg); + break; + case BMDEV_FREE_GMEM: ret = bmdrv_gmem_ioctl_free_mem(bmdi, file, arg); break; + case BMDEV_FREE_GMEM_U64: + ret = bmdrv_gmem_ioctl_free_mem_u64(bmdi, file, arg); + break; + case BMDEV_TOTAL_GMEM: ret = put_user(bmdrv_gmem_total_size(bmdi), (u64 __user *)arg); break; @@ -630,7 +680,9 @@ static long bm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ret = -EBUSY; } break; - + case BMDEV_SYNC_TIMEOUT_API: + ret = bmdrv_set_sync_timeout(bmdi, arg); + break; case BMDEV_HANDLE_SYNC_API: if (bmdi->status_sync_api == 0) { ret = bmdrv_handle_sync_api(bmdi, file); @@ -751,7 +803,11 @@ static long bm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { if (bmdi->bmcd->sc5p_mcu_bmdi != NULL && bmdi->bmcd != NULL) ctx.uart.bmdi= bmdi->bmcd->sc5p_mcu_bmdi; @@ -822,7 +878,11 @@ static long bm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { pr_err("bmsophon %d, sc5p not support mcu sheck sum\n", bmdi->dev_index); return -ENOSYS; @@ -921,7 +981,63 @@ static long bm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case BMDEV_BOARD_TYPE: ret = bm_burning_info_board_type(bmdi, arg); break; + case BMDEV_RW_HOST: + { + struct bm_rw temp; + + if (copy_from_user(&temp, (struct bm_rw __user *)arg, + sizeof(struct bm_rw))) + return -EFAULT; + + if (temp.op == BM_MALLOC) { + bmdi->bm_rw_t.vaddr = dma_alloc_coherent(bmdi->cinfo.device, 0x1000, &(bmdi->bm_rw_t.paddr), GFP_KERNEL); + } else if (temp.op == BM_FREE) + dma_free_coherent(bmdi->cinfo.device, 0x1000, bmdi->bm_rw_t.vaddr, bmdi->bm_rw_t.paddr); + else if (temp.op == BM_READ) + bmdi->bm_rw_t.value = ioread32(bmdi->bm_rw_t.vaddr); + else if (temp.op == BM_WRITE) { + iowrite32(temp.value, bmdi->bm_rw_t.vaddr); + bmdi->bm_rw_t.value = temp.value; + } + + if (copy_to_user((struct bm_rw __user *)arg, &(bmdi->bm_rw_t), + sizeof(struct bm_rw))) + return -EFAULT; + + break; + } #endif + case BMDEV_RW_MIX: + { + u64 paddr = 0x5fb80000; + void __iomem *vaddr; + struct bm_rw reg; + + if (copy_from_user(®, (struct bm_rw __user *)arg, + sizeof(struct bm_rw))) + return -EFAULT; + + vaddr = ioremap(paddr, 0x100000); + + iowrite32(0xf, vaddr + 0xf044); + iowrite32((u32)(reg.paddr & 0xffffffff), vaddr + 0xf064); + iowrite32((u32)(reg.paddr >> 32), vaddr + 0xf060); + iowrite32((u32)(reg.paddr & 0xffffffff), vaddr + 0xf014); + iowrite32((u32)(reg.paddr >> 32), vaddr + 0xf010); + + if (reg.op == BM_READ) + reg.value = ioread32(vaddr + 0x72000 + (u32)(reg.paddr & 0xfff)); + else if (reg.op == BM_WRITE) + iowrite32(reg.value, vaddr + 0x72000 + (u32)(reg.paddr & 0xfff)); + + iounmap(vaddr); + + if (copy_to_user((struct bm_rw __user *)arg, ®, + sizeof(struct bm_rw))) + return -EFAULT; + + break; + } case BMDEV_GET_STATUS: ret = put_user(bmdi->status,(int __user *)arg); break; @@ -1233,6 +1349,7 @@ static long bm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case BMDEV_SET_TPU_FREQ: mutex_lock(&bmdi->clk_reset_mutex); + bmdi->enable_dyn_freq = 0; ret = bmdev_clk_ioctl_set_tpu_freq(bmdi, arg); mutex_unlock(&bmdi->clk_reset_mutex); break; @@ -1242,7 +1359,109 @@ static long bm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ret = bmdev_clk_ioctl_get_tpu_freq(bmdi, arg); mutex_unlock(&bmdi->clk_reset_mutex); break; +#ifndef SOC_MODE +#ifdef FEATURE_DEBUG + case BMDEV_SET_TPU_VOLT: + { + int volt; + struct bm_device_info *chip_bmdi; + struct chip_info *cinfo; + struct bm_freq_scaling_db *p_data; + if(get_user(volt, (u64 __user *)arg)) { + pr_err("bmdrv: bmdev_clk_ioctl_set_tpu_freq get user failed!\n"); + ret = -1; + break; + } + p_data = bmdi->bmcd->vfs_db; + if ((p_data == NULL) || (p_data->chip0_index == -1)){ + ret = -1; + break; + } + chip_bmdi = bmdi->bmcd->card_bmdi[p_data->chip0_index - bmdi->bmcd->dev_start_index]; + if (chip_bmdi != NULL) { + ret = bm_set_vdd_tpu_voltage(chip_bmdi, volt); + if (ret != 0) { + cinfo = &chip_bmdi->cinfo; + dev_err(cinfo->device, "device chip tpu volt cfg fail, %d\n", ret); + } + } + break; + } + case BMDEV_SET_RDROP: + { + struct bm_freq_scaling_db *p_data; + struct bm_device_info *chip_bmdi; + struct rdrop_info ri; + if(copy_from_user(&ri, (struct rdrop_info *)arg, sizeof(struct rdrop_info))){ + pr_err("bmdrv: get user rdrop input fail\n"); + ret = -1; + break; + } + + + bmdi->bmcd->rdrop.tpu_rdrop = ri.tpu_rdrop; + bmdi->bmcd->rdrop.vddc_rdrop = ri.vddc_rdrop; + p_data = bmdi->bmcd->vfs_db; + if ((p_data == NULL) || (p_data->chip0_index == -1)){ + ret = -1; + break; + } + chip_bmdi = bmdi->bmcd->card_bmdi[p_data->chip0_index - bmdi->bmcd->dev_start_index]; + if (chip_bmdi != NULL) { + bm_set_rdrop(chip_bmdi); + ri.tpu_rdrop = bm_get_sc7_rdrop(chip_bmdi); + ri.vddc_rdrop = bm_get_sc7_vddc_rdrop(chip_bmdi); + } + + if(copy_to_user((struct rdrop_info *)arg, &ri, sizeof(struct rdrop_info))){ + pr_err("bmdrv: set user rdrop output fail\n"); + ret = -1; + break; + } + break; + } + case BMDEV_GET_RDROP: + { + struct rdrop_info ri; + ri = bmdi->bmcd->rdrop; + if(copy_to_user((struct rdrop_info *)arg, &ri, sizeof(struct rdrop_info))){ + pr_err("bmdrv: set user rdrop output fail\n"); + ret = -1; + break; + } + break; + } + case BMDEV_SET_VDDC_VOLT: + { + struct bm_freq_scaling_db *p_data; + struct bm_device_info *chip_bmdi; + int vddc_volt; + if(copy_from_user(&vddc_volt, (int *)arg, sizeof(int))){ + pr_err("bmdrv: get user vddc_volt input fail\n"); + ret = -1; + break; + } + + p_data = bmdi->bmcd->vfs_db; + if ((p_data == NULL) || (p_data->chip0_index == -1)){ + ret = -1; + break; + } + chip_bmdi = bmdi->bmcd->card_bmdi[p_data->chip0_index - bmdi->bmcd->dev_start_index]; + if (chip_bmdi != NULL) { + bm_set_vddc_voltage(bmdi->bmcd->card_bmdi[p_data->chip0_index - bmdi->bmcd->dev_start_index], vddc_volt); + } + + if(copy_to_user((int *)arg, &vddc_volt, sizeof(int))){ + pr_err("bmdrv: set user vddc_volt output fail\n"); + ret = -1; + break; + } + break; + } +#endif +#endif case BMDEV_SET_MODULE_RESET: if (bmdi->misc_info.pcie_soc_mode == BM_DRV_SOC_MODE) ret = -EPERM; diff --git a/driver/bm_fw.c b/driver/bm_fw.c index 16f49d6..98cc4db 100644 --- a/driver/bm_fw.c +++ b/driver/bm_fw.c @@ -40,10 +40,11 @@ static int bmdrv_compare_fw(struct bm_device_info *bmdi, struct file *file, cons mutex_lock(&stagemem_d2s->stage_mutex); for (pass_idx = 0, cur_addr_inc = 0; pass_idx < (size + realmem_size - 1) / realmem_size; pass_idx++) { - if ((pass_idx + 1) * realmem_size < size) + if ((pass_idx + 1) * realmem_size < size){ size_step = realmem_size; - else - size_step = size - pass_idx * realmem_size; + }else{ + size_step = size - pass_idx * realmem_size; + } memset(stagemem_d2s->v_addr, 0, size_step); @@ -200,10 +201,12 @@ static int bmdrv_fw_download_kernel(struct bm_device_info *bmdi, struct file *fi break; case 0x1686: #ifndef SOC_MODE - if (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) + if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X)){ ret = bmdrv_load_firmware(bmdi, file, fw_ddr_array, fw_ddr_size / sizeof(u32), a53lite_park); - else + }else{ ret = bmdrv_request_and_load_firmware(bmdi, file, bm1684x_dyn_fw, a53lite_park); + } #else ret = bmdrv_request_and_load_firmware(bmdi, file, bm1684x_dyn_fw, a53lite_park); #endif @@ -231,7 +234,7 @@ static int bmdrv_fw_download_user(struct bm_device_info *bmdi, struct file *file firmware_header_copy = (struct firmware_header *)fw->ddr_fw; firmware_header_check = kmalloc(sizeof(struct firmware_header), GFP_KERNEL); ret = copy_from_user(firmware_header_check, fw->ddr_fw, sizeof(struct firmware_header)); - if(ret) pr_info("%s copy from user fail!\n",__func__); + if(ret) {pr_info("%s copy from user fail!\n",__func__);} if(firmware_header_check->magic[0] == 's' && firmware_header_check->magic[1] == 'g' && firmware_header_check->magic[2] == 'f' && firmware_header_check->magic[3] == 'w') { @@ -257,7 +260,7 @@ static int bmdrv_fw_download_user(struct bm_device_info *bmdi, struct file *file firmware_header_copy = (struct firmware_header *)fw->itcm_fw; firmware_header_check = kmalloc(sizeof(struct firmware_header), GFP_KERNEL); ret = copy_from_user(firmware_header_check, fw->itcm_fw, sizeof(struct firmware_header)); - if(ret) pr_info("%s copy from user fail!\n",__func__); + if(ret) {pr_info("%s copy from user fail!\n",__func__);} if(firmware_header_check->magic[0] == 's' && firmware_header_check->magic[1] == 'g' && firmware_header_check->magic[2] == 'f' && firmware_header_check->magic[3] == 'w') { @@ -271,8 +274,9 @@ static int bmdrv_fw_download_user(struct bm_device_info *bmdi, struct file *file } else { ret = bmdev_memcpy_s2d(bmdi, file, 0, (int __user *)fw->itcm_fw, fw->itcmfw_size, false, 0); - if (ret) + if (ret){ return ret; + } pr_info("bmdrv: firmware loaded to itcm\n"); } @@ -301,8 +305,9 @@ static int bmdrv_eu_table_load(struct bm_device_info *bmdi) u32 address_shift; u32 *eu_cmd_warp = kmalloc_array(EU_CMD_LEN, sizeof(u32), GFP_KERNEL); - if (!eu_cmd_warp) + if (!eu_cmd_warp){ return -ENOMEM; + } for (i = 0; i < EU_CMD_LEN / 4; i++) { eu_cmd_warp[i * 4 + 0] = eu_cmd[i * 4 + 3]; eu_cmd_warp[i * 4 + 1] = eu_cmd[i * 4 + 2]; @@ -325,8 +330,8 @@ static int bmdrv_eu_table_load(struct bm_device_info *bmdi) cnt = 1000000; while (((bdc_reg_read(bmdi, 0x4) & 0x1) != 0) && - --cnt != 0) - ; + --cnt != 0){ + } if (cnt) { pr_info("bmdrv: load eu table done!\n"); return 0; diff --git a/driver/bm_gmem.c b/driver/bm_gmem.c index 5020454..572b4de 100644 --- a/driver/bm_gmem.c +++ b/driver/bm_gmem.c @@ -348,6 +348,29 @@ int bmdrv_gmem_ioctl_alloc_mem_ion(struct bm_device_info *bmdi, struct file *fil return ret; } +int bmdrv_gmem_ioctl_alloc_mem_ion_u64(struct bm_device_info *bmdi, struct file *file, + unsigned long arg) +{ + int ret = 0; + bm_device_mem_u64_t device_mem; + struct bm_handle_info *h_info; + + if (bmdev_gmem_get_handle_info(bmdi, file, &h_info)) { + pr_err("bm-sophon%d bmdrv: file list is not found!\n", bmdi->dev_index); + return -EINVAL; + } + mutex_lock(&bmdi->gmem_info.gmem_mutex); + if (copy_from_user(&device_mem, (bm_device_mem_u64_t __user *)arg, sizeof(device_mem))) { + mutex_unlock(&bmdi->gmem_info.gmem_mutex); + return -EFAULT; + } + h_info->gmem_used += BGM_4K_ALIGN(device_mem.size); + mutex_unlock(&bmdi->gmem_info.gmem_mutex); + PR_TRACE("bmdrv: gmem ion alloc %x\n", device_mem.size); + + return ret; +} + int bmdrv_gmem_ioctl_free_mem(struct bm_device_info *bmdi, struct file *file, unsigned long arg) { @@ -373,6 +396,31 @@ int bmdrv_gmem_ioctl_free_mem(struct bm_device_info *bmdi, struct file *file, return ret; } +int bmdrv_gmem_ioctl_free_mem_u64(struct bm_device_info *bmdi, struct file *file, + unsigned long arg) +{ + int ret = 0; + bm_device_mem_u64_t device_mem; + struct bm_handle_info *h_info; + + if (bmdev_gmem_get_handle_info(bmdi, file, &h_info)) { + pr_err("bm-sophon%d bmdrv: file list is not found!\n", bmdi->dev_index); + return -EINVAL; + } + + mutex_lock(&bmdi->gmem_info.gmem_mutex); + if (copy_from_user(&device_mem, (bm_device_mem_u64_t __user *)arg, sizeof(device_mem))) { + mutex_unlock(&bmdi->gmem_info.gmem_mutex); + return -EFAULT; + } + + h_info->gmem_used -= BGM_4K_ALIGN(device_mem.size); + mutex_unlock(&bmdi->gmem_info.gmem_mutex); + + PR_TRACE("%s 0x%lx, size 0x%x\n", __func__, device_mem.u.device.device_addr, device_mem.size); + return ret; +} + int bmdrv_gmem_ioctl_get_heap_num(struct bm_device_info *bmdi, unsigned long arg) { struct ion_device *dev = &bmdi->gmem_info.idev; diff --git a/driver/bm_gmem.h b/driver/bm_gmem.h index 8f74b98..aba3022 100644 --- a/driver/bm_gmem.h +++ b/driver/bm_gmem.h @@ -88,6 +88,10 @@ int bmdrv_gmem_ioctl_alloc_mem(struct bm_device_info *bmdi, struct file *file, unsigned long arg); int bmdrv_gmem_ioctl_alloc_mem_ion(struct bm_device_info *bmdi, struct file *file, unsigned long arg); +int bmdrv_gmem_ioctl_alloc_mem_ion_u64(struct bm_device_info *bmdi, struct file *file, + unsigned long arg); int bmdrv_gmem_ioctl_free_mem(struct bm_device_info *bmdi, struct file *file, unsigned long arg); +int bmdrv_gmem_ioctl_free_mem_u64(struct bm_device_info *bmdi, struct file *file, + unsigned long arg); #endif diff --git a/driver/bm_io.c b/driver/bm_io.c index 6add5e0..407f02e 100644 --- a/driver/bm_io.c +++ b/driver/bm_io.c @@ -512,10 +512,10 @@ void io_init(struct bm_device_info *bmdi) bm_reg_init_vaddr(bmdi, bmdi->cinfo.bm_reg->wdt_base_addr, &bmdi->cinfo.bar_info.io_bar_vaddr.wdt_bar_vaddr); bm_reg_init_vaddr(bmdi, bmdi->cinfo.bm_reg->tpu_base_addr, &bmdi->cinfo.bar_info.io_bar_vaddr.tpu_bar_vaddr); bm_reg_init_vaddr(bmdi, bmdi->cinfo.bm_reg->gdma_base_addr, &bmdi->cinfo.bar_info.io_bar_vaddr.gdma_bar_vaddr); - bm_reg_init_vaddr(bmdi, bmdi->cinfo.bm_reg->spacc_base_addr, &bmdi->cinfo.bar_info.io_bar_vaddr.spacc_bar_vaddr); - bm_reg_init_vaddr(bmdi, bmdi->cinfo.bm_reg->pka_base_addr, &bmdi->cinfo.bar_info.io_bar_vaddr.pka_bar_vaddr); bm_reg_init_vaddr(bmdi, bmdi->cinfo.bm_reg->efuse_base_addr, &bmdi->cinfo.bar_info.io_bar_vaddr.efuse_bar_vaddr); #ifndef SOC_MODE + bm_reg_init_vaddr(bmdi, bmdi->cinfo.bm_reg->spacc_base_addr, &bmdi->cinfo.bar_info.io_bar_vaddr.spacc_bar_vaddr); + bm_reg_init_vaddr(bmdi, bmdi->cinfo.bm_reg->pka_base_addr, &bmdi->cinfo.bar_info.io_bar_vaddr.pka_bar_vaddr); bm_reg_init_vaddr(bmdi, bmdi->cinfo.bm_reg->dev_info_base_addr, &bmdi->cinfo.bar_info.io_bar_vaddr.dev_info_bar_vaddr); bm_reg_init_vaddr(bmdi, bmdi->cinfo.bm_reg->i2c_base_addr, &bmdi->cinfo.bar_info.io_bar_vaddr.i2c_bar_vaddr); bm_reg_init_vaddr(bmdi, bmdi->cinfo.bm_reg->ddr_base_addr, &bmdi->cinfo.bar_info.io_bar_vaddr.ddr_bar_vaddr); diff --git a/driver/bm_memcpy.c b/driver/bm_memcpy.c index edaa3c5..5519a2b 100644 --- a/driver/bm_memcpy.c +++ b/driver/bm_memcpy.c @@ -649,24 +649,28 @@ int bmdev_test_p2p_available(struct bm_device_info *bmdi) int init_index; struct bm_device_info *chip_bmdi = NULL; int i; + int chip_num; - if (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PRO) + if (BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_PRO && + BM1684_BOARD_TYPE(bmdi) != BOARD_TYPE_SC7_FP150) return -1; + chip_num = bmdi->bmcd->chip_num; + chip_bmdi = bmdi->bmcd->card_bmdi[0]; init_index = chip_bmdi->dev_index; - if (bmdi->dev_index - init_index != 7) + if (bmdi->dev_index - init_index != chip_num - 1) return -1; if (bmdev_memcpy_p2p_test(bmdi, chip_bmdi)) { pr_info("p2p is unavailable\n"); - for (i = 0; i <= 7; i++) { + for (i = 0; i < chip_num; i++) { bmdi->bmcd->card_bmdi[i]->memcpy_info.p2p_available = 0; } } else { pr_info("p2p is available\n"); - for (i = 0; i <= 7; i++) { + for (i = 0; i < chip_num; i++) { bmdi->bmcd->card_bmdi[i]->memcpy_info.p2p_available = 1; } } diff --git a/driver/bm_napi.c b/driver/bm_napi.c index 1fe9425..8188cc2 100644 --- a/driver/bm_napi.c +++ b/driver/bm_napi.c @@ -166,11 +166,19 @@ static netdev_tx_t eth_ndo_start_xmit(struct sk_buff * skb, return NETDEV_TX_OK; } -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) || (LINUX_VERSION_CODE == KERNEL_VERSION(4,18,0) \ - && CENTOS_KERNEL_FIX >= 240)) + +#ifndef CENTOS_KERNEL_FIX + #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,6,0) void eth_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue) + #else +static void eth_ndo_tx_timeout(struct net_device *ndev) + #endif #else + #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,18,0) && CENTOS_KERNEL_FIX >= 240) +void eth_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue) + #else static void eth_ndo_tx_timeout(struct net_device *ndev) + #endif #endif { struct eth_dev_info *info = *((struct eth_dev_info **)netdev_priv(ndev)); @@ -253,10 +261,11 @@ static void eth_set_a53ipaddress(struct eth_dev_info *info) { if (bmdi->cinfo.chip_id == 0x1684) bm_write32(bmdi, VETH_SHM_START_ADDR_1684 + VETH_IPADDRESS_REG, bmdi->dev_index); else if (bmdi->cinfo.chip_id == 0x1686) { - bm_write32(bmdi, VETH_SHM_START_ADDR_1684X + VETH_IPADDRESS_REG, 0xc0c00002); + bm_write32(bmdi, VETH_SHM_START_ADDR_1684X + VETH_IPADDRESS_REG, 0xc0c00002 + (bmdi->dev_index << 8)); bm_write32(bmdi, VETH_SHM_START_ADDR_1684X + VETH_MASK_REG, 0xffffff00); bm_write32(bmdi, VETH_SHM_START_ADDR_1684X + VETH_GATE_ADDRESS_REG, 0); bm_write32(bmdi, VETH_SHM_START_ADDR_1684X + VETH_RESET_REG, 0); + bm_write32(bmdi, VETH_SHM_START_ADDR_1684X + CHANGE_VETH_TIME, 0); } } diff --git a/driver/bm_napi.h b/driver/bm_napi.h index 275d74d..ae34d38 100644 --- a/driver/bm_napi.h +++ b/driver/bm_napi.h @@ -31,6 +31,11 @@ #define VETH_MASK_REG 0x54 #define MIXMODE_CHIP_TEMP 0x58 #define MIXMODE_BOARD_TEMP 0x5c +#define CHANGE_VETH_TIME 0x6c +#define VETH_TV_SEC 0x70 +#define VETH_TV_USEC 0x74 +#define VETH_TZ_MINUTESWEST 0x78 +#define VETH_TZ_DSTTIME 0x7C #define VETH_SHM_START_ADDR_1684 0x0201be80 #define VETH_SHM_START_ADDR_1684X 0x101fb400 diff --git a/driver/bm_pcie.h b/driver/bm_pcie.h index 87171b3..b85768d 100644 --- a/driver/bm_pcie.h +++ b/driver/bm_pcie.h @@ -34,13 +34,22 @@ #define BOARD_TYPE_SC5_PLUS 0x7 #define BOARD_TYPE_SC5_H 0x8 #define BOARD_TYPE_SC5_PRO 0x9 +#define BOARD_TYPE_AIV01T 0x10 +#define BOARD_TYPE_AIV02T 0x11 +#define BOARD_TYPE_AIV03T 0x12 +#define BOARD_TYPE_AIV03T_24G 0x13 + #define BOARD_TYPE_SM5M_P 0xb #define BOARD_TYPE_BM1684X_EVB 0x20 #define BOARD_TYPE_SC7_PRO 0x21 #define BOARD_TYPE_SC7_PLUS 0x22 +#define BOARD_TYPE_SC7_FP150 0x23 #define BOARD_TYPE_SM7_V0_0 0x30 #define BOARD_TYPE_SM7_MP1_1 0x36 #define BOARD_TYPE_CP24 0x40 +#define BOARD_TYPE_AIV01X 0x50 +#define BOARD_TYPE_AIV02X 0x51 +#define BOARD_TYPE_AIV03X 0x52 #define DUMMY_PCIDEV_NAME "dummy-bmcard-pci" diff --git a/driver/bm_pcie_drv.c b/driver/bm_pcie_drv.c index dad0fbe..98a9254 100644 --- a/driver/bm_pcie_drv.c +++ b/driver/bm_pcie_drv.c @@ -427,6 +427,9 @@ static int bmdrv_hardware_init(struct bm_device_info *bmdi) pr_err("bm-sophon%d bmdrv: ddr init failed!\n", bmdi->dev_index); return -1; } +#ifndef FW_SIMPLE + bm1684_init_iommu(&bmdi->memcpy_info.iommuctl, bmdi->parent); +#endif if (bmdrv_get_gmem_mode(bmdi) != GMEM_TPU_ONLY) { vpp_init(bmdi); bm_vpu_init(bmdi); @@ -489,8 +492,12 @@ static int bmdrv_hardware_early_init(struct bm_device_info *bmdi) } bmdrv_power_and_temp_i2c_init(bmdi); if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || - (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X)) { bmdrv_uart_init(bmdi, uart_index, baudrate); } pr_info("bm-sophon%d 1684x bmdrv_hardware_early_init \n", bmdi->dev_index); @@ -774,6 +781,28 @@ int bmdrv_force_reset_bmcpu_pcie(struct bm_device_info *bmdi) { return ret; } +void bmdrv_fw_unload_mix(struct bm_device_info *bmdi) +{ + // u32 ctrl_word; + int value = 0x0; + + /*send fiq 6 to arm9, let it get ready to die*/ + top_reg_write(bmdi, TOP_GP_REG_ARM9_IRQ_SET_OFFSET, 0x1 << 3); + + udelay(50); + + /*stop smbus*/ + + value = intc_reg_read(bmdi, INTC0_BASE_ADDR_OFFSET + IRQ_MASK_L_OFFSET); + value = value | (0x1 << 27); + intc_reg_write(bmdi, INTC0_BASE_ADDR_OFFSET + IRQ_MASK_L_OFFSET, value); + + /* reset arm9 */ + // ctrl_word = top_reg_read(bmdi, TOP_SW_RESET0); + // ctrl_word &= ~(1 << 1); + // top_reg_write(bmdi, TOP_SW_RESET0, ctrl_word); +} + int bmdrv_reset_bmcpu(struct bm_device_info *bmdi) { int ret = 0; @@ -809,7 +838,7 @@ int bmdrv_reset_bmcpu(struct bm_device_info *bmdi) if (mode == FW_MIX_MODE && bmdi->cinfo.chip_id == BM1684X_DEVICE_ID) { pr_info("bmsophon%d mix mode force reset bmcpu!\n", bmdi->dev_index); - bmdrv_fw_unload(bmdi); + bmdrv_fw_unload_mix(bmdi); return bmdrv_force_reset_bmcpu(bmdi); } @@ -876,7 +905,7 @@ int bmdrv_reset_bmcpu(struct bm_device_info *bmdi) board_version = bmdi->cinfo.board_version; board_type = (u8)((board_version >> 8) & 0xff); - if (bmdi->cinfo.chip_id == BM1684X_DEVICE_ID || board_type == BOARD_TYPE_SC5_H) { + if (bmdi->cinfo.chip_id == BM1684X_DEVICE_ID) { pr_info("force reset bmcpu!\n"); ret = bmdrv_force_reset_bmcpu_pcie(bmdi); } diff --git a/driver/bm_thread.c b/driver/bm_thread.c index b882229..e5a0eeb 100644 --- a/driver/bm_thread.c +++ b/driver/bm_thread.c @@ -40,6 +40,7 @@ struct bm_thread_info *bmdrv_create_thread_info(struct bm_handle_info *h_info, p thd_info->profile.cdma_out_time = 0ULL; thd_info->profile.cdma_out_counter = 0ULL; thd_info->profile.tpu_process_time = 0ULL; + thd_info->profile.tpu1_process_time = 0ULL; thd_info->profile.sent_api_counter = 0ULL; thd_info->profile.completed_api_counter = 0ULL; diff --git a/driver/bm_uapi.h b/driver/bm_uapi.h index 57dbd31..4874614 100644 --- a/driver/bm_uapi.h +++ b/driver/bm_uapi.h @@ -30,6 +30,7 @@ typedef struct bm_profile { u64 cdma_out_time; u64 cdma_out_counter; u64 tpu_process_time; + u64 tpu1_process_time; u64 sent_api_counter; u64 completed_api_counter; } bm_profile_t; @@ -150,9 +151,12 @@ struct bm_reg { #define BMDEV_FLUSH_GMEM _IOWR('p', 0x18, unsigned long) #define BMDEV_ALLOC_GMEM_ION _IOW('p', 0x19, unsigned long) #define BMDEV_GMEM_ADDR _IOW('p', 0x1a, unsigned long) +#define BMDEV_ALLOC_GMEM_ION_U64 _IOW('p', 0x1b, unsigned long) +#define BMDEV_FREE_GMEM_U64 _IOW('p', 0x1c, unsigned long) #define BMDEV_SEND_API _IOW('p', 0x20, unsigned long) #define BMDEV_THREAD_SYNC_API _IOW('p', 0x21, unsigned long) +#define BMDEV_SYNC_TIMEOUT_API _IOW('p', 0x22, unsigned long) #define BMDEV_DEVICE_SYNC_API _IOW('p', 0x23, unsigned long) #define BMDEV_HANDLE_SYNC_API _IOW('p', 0x27, unsigned long) #define BMDEV_SEND_API_EXT _IOW('p', 0x28, unsigned long) @@ -173,6 +177,7 @@ struct bm_reg { #define BMDEV_SET_REG _IOWR('p', 0x3c, unsigned long) #define BMDEV_GET_REG _IOWR('p', 0x3d, unsigned long) #define BMDEV_GET_DEV_STAT _IOWR('p', 0x3e, unsigned long) +#define BMDEV_RW_MIX _IOWR('p', 0x3f, unsigned long) #define BMDEV_TRACE_ENABLE _IOW('p', 0x40, unsigned long) #define BMDEV_TRACE_DISABLE _IOW('p', 0x41, unsigned long) @@ -182,11 +187,16 @@ struct bm_reg { #define BMDEV_ENABLE_PERF_MONITOR _IOWR('p', 0x45, unsigned long) #define BMDEV_DISABLE_PERF_MONITOR _IOWR('p', 0x46, unsigned long) #define BMDEV_GET_DEVICE_TIME _IOWR('p', 0x47, unsigned long) +#define BMDEV_RW_HOST _IOWR('p', 0x48, unsigned long) #define BMDEV_SET_TPU_DIVIDER _IOWR('p', 0x50, unsigned long) #define BMDEV_SET_MODULE_RESET _IOWR('p', 0x51, unsigned long) #define BMDEV_SET_TPU_FREQ _IOWR('p', 0x52, unsigned long) #define BMDEV_GET_TPU_FREQ _IOWR('p', 0x53, unsigned long) +#define BMDEV_SET_TPU_VOLT _IOWR('p', 0x54, unsigned long) +#define BMDEV_SET_RDROP _IOWR('p', 0x55, unsigned long) +#define BMDEV_GET_RDROP _IOWR('p', 0x56, unsigned long) +#define BMDEV_SET_VDDC_VOLT _IOWR('p', 0x57, unsigned long) #define BMDEV_TRIGGER_VPP _IOWR('p', 0x60, unsigned long) #define BMDEV_TRIGGER_SPACC _IOWR('p', 0x61, unsigned long) @@ -223,6 +233,7 @@ struct bm_reg { #define BMDEV_COMM_SET_CARDID _IOWR('p', 0xAA, unsigned long) #define BMDEV_SET_IP _IOWR('p', 0xAC, unsigned long) #define BMDEV_SET_GATE _IOWR('p', 0xAD, unsigned long) +#define BMDEV_SYNC_TIME_MIX _IOWR('p', 0xAE, unsigned long) #define BMDEV_GET_TPUC _IOWR('p', 0x81, unsigned long) #define BMDEV_GET_MAXP _IOWR('p', 0x82, unsigned long) @@ -303,7 +314,7 @@ struct bm_smi_attr { int ecc_correct_num; char sn[18]; - char board_type[6]; + char board_type[10]; /* vpu mem and instant info*/ int vpu_instant_usage[MAX_NUM_VPU_CORE]; diff --git a/driver/i2c.c b/driver/i2c.c index f9d1c2b..0e398d8 100644 --- a/driver/i2c.c +++ b/driver/i2c.c @@ -153,7 +153,11 @@ void bmdrv_power_and_temp_i2c_init(struct bm_device_info *bmdi) if ((BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC5_PRO) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PRO) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_FP150) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_CP24) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV01X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV02X) || + (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_AIV03X) || (BM1684_BOARD_TYPE(bmdi) == BOARD_TYPE_SC7_PLUS)) { i2c_addr = 0x4c; rx_level = 0; diff --git a/driver/vpp/bm1684_vpp.c b/driver/vpp/bm1684_vpp.c index 921eca6..6b8bcb1 100755 --- a/driver/vpp/bm1684_vpp.c +++ b/driver/vpp/bm1684_vpp.c @@ -784,9 +784,9 @@ int vpp_handle_setup(struct bm_device_info *bmdi, struct vpp_batch *batch) if (signal_pending(current)) { ret |= VPP_ERESTARTSYS; - pr_err("signal_pending ret=%d,current->pid %d,current->tgid %d,vpp_idle_bit_map %ld, dev_index %d\n", - ret, current->pid, current->tgid, - bmdi->vppdrvctx.vpp_idle_bit_map, bmdi->dev_index); + // pr_err("signal_pending ret=%d,current->pid %d,current->tgid %d,vpp_idle_bit_map %ld, dev_index %d\n", + // ret, current->pid, current->tgid, + // bmdi->vppdrvctx.vpp_idle_bit_map, bmdi->dev_index); } return ret; diff --git a/driver/vpp/bm1686_vpp.c b/driver/vpp/bm1686_vpp.c index d2eef82..0310886 100755 --- a/driver/vpp/bm1686_vpp.c +++ b/driver/vpp/bm1686_vpp.c @@ -447,9 +447,9 @@ static int vpp_handle_setup(struct bm_device_info *bmdi, struct vpp_batch_n *bat if (signal_pending(current)) { ret |= VPP_ERESTARTSYS; - pr_err("signal_pending ret=%d,current->pid %d,current->tgid %d,vpp_idle_bit_map %ld, dev_index %d\n", - ret, current->pid, current->tgid, - bmdi->vppdrvctx.vpp_idle_bit_map, bmdi->dev_index); + // pr_err("signal_pending ret=%d,current->pid %d,current->tgid %d,vpp_idle_bit_map %ld, dev_index %d\n", + // ret, current->pid, current->tgid, + // bmdi->vppdrvctx.vpp_idle_bit_map, bmdi->dev_index); } return ret; diff --git a/driver/vpu/vpu.c b/driver/vpu/vpu.c index 94e9d7d..a4886ce 100644 --- a/driver/vpu/vpu.c +++ b/driver/vpu/vpu.c @@ -20,11 +20,17 @@ #include #include #include +#include +#include +#include #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) #include #else #include #endif +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 3) +#include +#endif #include "vmm.h" #include "vpu.h" #include "bm_irq.h" @@ -394,6 +400,28 @@ static void release_lock(struct bm_device_info *bmdi, int core_idx, LOCK_INDEX l __sync_lock_release(addr); } +static void release_exception_lock(struct bm_device_info *bmdi, long except_info) +{ + int core_idx = (except_info >> 32) & 0xff; + vpudrv_buffer_t* p = bmdi->vpudrvctx.instance_pool; + volatile int *current_addr = (int *)(p[core_idx].base + p[core_idx].size - PTHREAD_MUTEX_T_HANDLE_SIZE*4); + volatile int *tmp_addr; + int i; + + if(*current_addr != 0 && *current_addr != current->tgid && *current_addr != current->pid) + { + for(i=0; itgid || *tmp_addr == current->pid ) + release_lock(bmdi, i, CORE_LOCK); + } + } +} + static int get_vpu_create_inst_flag(struct bm_device_info *bmdi, int core_idx) { vpudrv_buffer_t* p = &(bmdi->vpudrvctx.instance_pool[core_idx]); @@ -437,6 +465,7 @@ static int WaitBusyTimeout(struct bm_device_info *bmdi, u32 core, u32 addr) if (time_after(jiffies, timeout)) { return 1; } + msleep(1); } return 0; } @@ -470,11 +499,11 @@ static int SendQuery(struct bm_device_info *bmdi, u32 core, u32 instanceIndex, u return 0; } -static int Wave5DecClrDispFlag(struct bm_device_info *bmdi, u32 core, u32 instanceIndex, u32 index) +static int Wave5DecClrDispFlag(struct bm_device_info *bmdi, u32 core, u32 instanceIndex) { int ret = 0; - vpu_write_register(core, W5_CMD_DEC_CLR_DISP_IDC, (1< 5) { + regVal = vpu_read_register(core, W5_RET_FAIL_REASON); + return regVal; + } + msleep(1); + count += 1; + } + + *instance_info = vpu_read_register(core, W5_RET_QUERY_DEC_GET_INSTANCE_INFO); + + return 0; +} static int FlushDecResult(struct bm_device_info *bmdi, u32 core, u32 instanceIndex) { @@ -550,6 +602,7 @@ static int FlushEncResult(struct bm_device_info *bmdi, u32 core, u32 instanceInd int Wave5CloseInstanceCommand(struct bm_device_info *bmdi, int core, u32 instanceIndex) { int ret = 0; + int regVal; unsigned long timeout = jiffies + HZ; /* vpu wait timeout to 1sec */ #define W5_DESTROY_INSTANCE 0x0020 @@ -565,12 +618,17 @@ int Wave5CloseInstanceCommand(struct bm_device_info *bmdi, int core, u32 instanc ret = 1; goto DONE_CMD; } + msleep(1); } if (vpu_read_register(core, W5_RET_SUCCESS) == 0) { // pr_info("Wave5CloseInstanceCommand failed REASON=[0x%x]\n", vpu_read_register(core, W5_RET_FAIL_REASON)); #define WAVE5_VPU_STILL_RUNNING 0x00001000 - if (vpu_read_register(core, W5_RET_FAIL_REASON) == WAVE5_VPU_STILL_RUNNING) +#define WAVE5_INVALID_TASK_BUF 0x00040000 + regVal = vpu_read_register(core, W5_RET_FAIL_REASON); + if (regVal == WAVE5_INVALID_TASK_BUF) + ret = 0; + else if (regVal == WAVE5_VPU_STILL_RUNNING) ret = 2; else ret = 1; @@ -585,29 +643,59 @@ int Wave5CloseInstanceCommand(struct bm_device_info *bmdi, int core, u32 instanc int CloseInstanceCommand(struct bm_device_info *bmdi, int core, u32 instanceIndex) { int product_code; + int ret = 0; + int count = 0; + int vpu_create_inst_flag = 0; product_code = vpu_read_register(core, VPU_PRODUCT_CODE_REGISTER); if (PRODUCT_CODE_W_SERIES(product_code)) { - if(WAVE521C_CODE != product_code) { - u32 i =0; - u32 interrupt_flag_in_q = 0; - Wave5VpuDecSetBitstreamFlag(bmdi, core, instanceIndex); - interrupt_flag_in_q = kfifo_out_spinlocked(&bmdi->vpudrvctx.interrupt_pending_q[core][instanceIndex], - &i, sizeof(u32), &bmdi->vpudrvctx.s_kfifo_lock[core][instanceIndex]); - if (interrupt_flag_in_q > 0) { - //FlushDecResult(bmdi, core, instanceIndex); - DPRINTK("interrupt flag : %d\n", interrupt_flag_in_q); + vpu_create_inst_flag = get_vpu_create_inst_flag(bmdi, core); + if ((vpu_create_inst_flag & (1 << instanceIndex)) != 0) { + if(WAVE521C_CODE != product_code) { + u32 i =0; + u32 interrupt_flag_in_q = 0; + Wave5VpuDecSetBitstreamFlag(bmdi, core, instanceIndex); + interrupt_flag_in_q = kfifo_out_spinlocked(&bmdi->vpudrvctx.interrupt_pending_q[core][instanceIndex], + &i, sizeof(u32), &bmdi->vpudrvctx.s_kfifo_lock[core][instanceIndex]); + if (interrupt_flag_in_q > 0) { + //FlushDecResult(bmdi, core, instanceIndex); + DPRINTK("interrupt flag : %d\n", interrupt_flag_in_q); + } + FlushDecResult(bmdi, core, instanceIndex); + Wave5DecClrDispFlag(bmdi, core, instanceIndex); + } + + if (WAVE521C_CODE == product_code) { + FlushEncResult(bmdi, core, instanceIndex); } - FlushDecResult(bmdi, core, instanceIndex); - for(i=0; i<32; i++) { - int ret = Wave5DecClrDispFlag(bmdi, core, instanceIndex, i); - if(ret != 0) + + while (1) + { + ret = Wave5CloseInstanceCommand(bmdi, core, instanceIndex); + if(ret == 0) { + break; + } + if(count > 500) { + pr_err("CloseInstanceCommand failed REASON=%d\n", ret); break; + } + if(ret == 2) { + if(WAVE521C_CODE != product_code) { + FlushDecResult(bmdi, core, instanceIndex); + Wave5VpuDecSetBitstreamFlag(bmdi, core, instanceIndex); + } + else { + FlushEncResult(bmdi, core, instanceIndex); + } + } + msleep(20); + count += 1; } + + return ret; + } + else { + return 0; } - if (WAVE521C_CODE == product_code) { - FlushEncResult(bmdi, core, instanceIndex); - } - return Wave5CloseInstanceCommand(bmdi, core, instanceIndex); } else { return CodaCloseInstanceCommand(bmdi, core, instanceIndex); @@ -615,17 +703,106 @@ int CloseInstanceCommand(struct bm_device_info *bmdi, int core, u32 instanceInde } #endif -static int bm_vpu_alloc_dma_buffer(struct bm_device_info *bmdi, vpudrv_buffer_t *vb) +static void* vpu_dma_buffer_attach_sg(struct bm_device_info *bmdi, vpudrv_buffer_t *vb) +{ + void* ret = ERR_PTR(0); + + vb->dma_buf = dma_buf_get(vb->ion_fd); + if (IS_ERR(vb->dma_buf)) { + ret = vb->dma_buf; + goto err0; + } + + vb->attach = dma_buf_attach(vb->dma_buf, bmdi->dev); + if (IS_ERR(vb->attach)) { + ret = vb->attach; + goto err1; + } + + vb->table = dma_buf_map_attachment(vb->attach, DMA_FROM_DEVICE); + if (IS_ERR(vb->table)) { + ret = vb->table; + goto err2; + } + if (vb->table->nents != 1) { + printk("muti-sg is not prefer\n"); + ret = ERR_PTR(-EINVAL); + goto err2; + } + + vb->phys_addr = sg_dma_address(vb->table->sgl); + + DPRINTK("ion_fd = %d attach_sg result is pass\n", vb->ion_fd); + + return ret; + +err2: + dma_buf_detach(vb->dma_buf, vb->attach); +err1: + dma_buf_put(vb->dma_buf); +err0: + + DPRINTK("ion_fd = %d attach_sg result is failed\n", vb->ion_fd); + + return ret; +} + +static void vpu_dma_buffer_unattach_sg(vpudrv_buffer_t *vb) { + dma_buf_unmap_attachment(vb->attach, vb->table, DMA_FROM_DEVICE); + dma_buf_detach(vb->dma_buf, vb->attach); + dma_buf_put(vb->dma_buf); +} + +static int bm_vpu_alloc_dma_buffer(struct bm_device_info *bmdi, struct file* file, vpudrv_buffer_t *vb) +{ + int ret; + void* attach_ret = ERR_PTR(0); + struct ion_allocation_data alloc_data; + struct bm_handle_info *h_info; if (!vb) return -1; - if ((bmdi->cinfo.chip_id == 0x1684) || (bmdi->cinfo.chip_id == 0x1686)) { - vb->phys_addr = (unsigned long)vmem_alloc(&bmdi->vpudrvctx.s_vmem, vb->size, 0); - if (vb->phys_addr == (unsigned long)-1) { - DPRINTK(KERN_ERR "[VPUDRV] Physical memory allocation error size=%d\n", vb->size); + if (bmdev_gmem_get_handle_info(bmdi, file, &h_info)) { + pr_err("bmdrv: bm-sophon%d file list is not found!\n", bmdi->dev_index); + return -EINVAL; + } + if ((bmdi->cinfo.chip_id == 0x1684) || (bmdi->cinfo.chip_id == 0x1686)) { + memset(&alloc_data, 0, sizeof(struct ion_allocation_data)); + alloc_data.heap_id_mask = 0x4; + alloc_data.len = vb->size; + + ret = ion_alloc(bmdi, &alloc_data); + if (ret < 0) { + printk("%s 0x%llx, size 0x%llx\n", __func__, + alloc_data.paddr, + alloc_data.len); + return -1; + } + + vb->base = alloc_data.paddr; + vb->phys_addr = alloc_data.paddr; + vb->ion_fd = alloc_data.fd; + + attach_ret = vpu_dma_buffer_attach_sg(bmdi, vb); + if (IS_ERR(attach_ret)) { + vb->ion_fd = -1; + vb->base = 0; + vb->phys_addr = 0; return -1; } + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0) + close_fd(vb->ion_fd); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + ksys_close(vb->ion_fd); +#else + sys_close(vb->ion_fd); +#endif + + mutex_lock(&bmdi->gmem_info.gmem_mutex); + h_info->gmem_used += BGM_4K_ALIGN(alloc_data.len); + mutex_unlock(&bmdi->gmem_info.gmem_mutex); } else if (bmdi->cinfo.chip_id == 0x1682) { if (vb->core_idx < 2) { vb->phys_addr = vmem_alloc(&bmdi->vpudrvctx.s_vmemboda, vb->size, 0); @@ -642,12 +819,25 @@ static int bm_vpu_alloc_dma_buffer(struct bm_device_info *bmdi, vpudrv_buffer_t return 0; } -static void bm_vpu_free_dma_buffer(struct vpudrv_buffer_t *vb, video_mm_t *s_vmem) +static void bm_vpu_free_dma_buffer(struct bm_device_info *bmdi, struct file* file, struct vpudrv_buffer_t *vb) { + struct bm_handle_info *h_info; if (!vb) return; + + if (bmdev_gmem_get_handle_info(bmdi, file, &h_info)) { + pr_err("bmdrv: bm-sophon%d file list is not found!\n", bmdi->dev_index); + return; + } + if (vb->base) - vmem_free(s_vmem, vb->phys_addr, 0); + { + vpu_dma_buffer_unattach_sg(vb); + vb->phys_addr = 0; + } + mutex_lock(&bmdi->gmem_info.gmem_mutex); + h_info->gmem_used -= BGM_4K_ALIGN(vb->size); + mutex_unlock(&bmdi->gmem_info.gmem_mutex); } static int bm_vpu_free_instances(struct file *filp) @@ -707,17 +897,18 @@ static int bm_vpu_free_buffers(struct file *filp) vb = pool->vb; if (vb.base) { if ((bmdi->cinfo.chip_id == 0x1684) || (bmdi->cinfo.chip_id == 0x1686)) { - bm_vpu_free_dma_buffer(&vb, &dev->s_vmem); + bm_vpu_free_dma_buffer(bmdi, filp, &vb); } else { if (vb.phys_addr >= dev->s_vmemwave.base_addr) { - bm_vpu_free_dma_buffer(&vb, &dev->s_vmemwave); + bm_vpu_free_dma_buffer(bmdi, filp, &vb); } else { - bm_vpu_free_dma_buffer(&vb, &dev->s_vmemboda); + bm_vpu_free_dma_buffer(bmdi, filp, &vb); } } list_del(&pool->list); kfree(pool); + vb.base = 0; } } } @@ -1269,7 +1460,7 @@ long bm_vpu_ioctl(struct file *filp, u_int cmd, u_long arg) return -EFAULT; } - ret = bm_vpu_alloc_dma_buffer(bmdi, &(vbp->vb)); + ret = bm_vpu_alloc_dma_buffer(bmdi, filp, &(vbp->vb)); if (ret == -1) { ret = -ENOMEM; kfree(vbp); @@ -1309,13 +1500,13 @@ long bm_vpu_ioctl(struct file *filp, u_int cmd, u_long arg) } if (vb.base) { - if ((bmdi->cinfo.chip_id == 0x1684) || (bmdi->cinfo.chip_id == 0x1686)) { - bm_vpu_free_dma_buffer(&vb, &bmdi->vpudrvctx.s_vmem); + if ((bmdi->cinfo.chip_id == 0x1684) || (bmdi->cinfo.chip_id == 0x1686)) { + bm_vpu_free_dma_buffer(bmdi, filp, &vb); } else { if (vb.base >= dev->s_vmemwave.base_addr) { - bm_vpu_free_dma_buffer(&vb, &dev->s_vmemwave); + bm_vpu_free_dma_buffer(bmdi, filp, &vb); } else{ - bm_vpu_free_dma_buffer(&vb, &dev->s_vmemboda); + bm_vpu_free_dma_buffer(bmdi, filp, &vb); } } @@ -1327,6 +1518,7 @@ long bm_vpu_ioctl(struct file *filp, u_int cmd, u_long arg) break; } } + vb.base = 0; up(&dev->s_vpu_sem); } else { return -ERESTARTSYS; @@ -1376,6 +1568,7 @@ long bm_vpu_ioctl(struct file *filp, u_int cmd, u_long arg) u32 intr_reason_in_q; u32 interrupt_flag_in_q; u32 core_idx; + u32 got_fifo_out = 0; DPRINTK("[VPUDRV][+]VDI_IOCTL_WAIT_INTERRUPT\n"); ret = copy_from_user(&info, (vpudrv_intr_info_t *)arg, sizeof(vpudrv_intr_info_t)); @@ -1433,6 +1626,7 @@ long bm_vpu_ioctl(struct file *filp, u_int cmd, u_long arg) &intr_reason_in_q, sizeof(u32), &bmdi->vpudrvctx.s_kfifo_lock[core_idx][intr_inst_index]); if (interrupt_flag_in_q > 0) { dev->interrupt_reason[core_idx][intr_inst_index] = intr_reason_in_q; + got_fifo_out = 1; } else { dev->interrupt_reason[core_idx][intr_inst_index] = 0; } @@ -1444,7 +1638,8 @@ long bm_vpu_ioctl(struct file *filp, u_int cmd, u_long arg) //INTERRUPT_REMAIN_IN_QUEUE: info.intr_reason = dev->interrupt_reason[core_idx][intr_inst_index]; - bmdi->vpudrvctx.interrupt_flag[core_idx][intr_inst_index] = 0; + if (got_fifo_out) + bmdi->vpudrvctx.interrupt_flag[core_idx][intr_inst_index] = 0; dev->interrupt_reason[core_idx][intr_inst_index] = 0; atomic_dec(&bmdi->vpudrvctx.s_vpu_usage_info.vpu_busy_status[core_idx]); @@ -1587,7 +1782,7 @@ long bm_vpu_ioctl(struct file *filp, u_int cmd, u_long arg) } } else { memcpy(&bmdi->vpudrvctx.s_common_memory[vdb.core_idx], &vdb, sizeof(vpudrv_buffer_t)); - if (bm_vpu_alloc_dma_buffer(bmdi, &bmdi->vpudrvctx.s_common_memory[vdb.core_idx]) != -1) { + if (bm_vpu_alloc_dma_buffer(bmdi, filp, &bmdi->vpudrvctx.s_common_memory[vdb.core_idx]) != -1) { ret = copy_to_user((void __user *)arg, &bmdi->vpudrvctx.s_common_memory[vdb.core_idx], sizeof(vpudrv_buffer_t)); } else { @@ -1664,7 +1859,8 @@ long bm_vpu_ioctl(struct file *filp, u_int cmd, u_long arg) list_for_each_entry_safe(vil, n, &bmdi->vpudrvctx.s_inst_list_head, list) { if (vil->inst_idx == inst_info.inst_idx && vil->core_idx == inst_info.core_idx) { list_del(&vil->list); - vpu_open_inst_count--; + if (bmdi->cinfo.chip_id == 0x1682) + vpu_open_inst_count--; bmdi->vpudrvctx.s_vpu_usage_info.vpu_open_ref_count[vil->core_idx]--; inst_info.inst_open_count = bmdi->vpudrvctx.s_vpu_usage_info.vpu_open_ref_count[vil->core_idx]; kfree(vil); @@ -1765,10 +1961,18 @@ long bm_vpu_ioctl(struct file *filp, u_int cmd, u_long arg) if (get_user(core_idx, (u32 __user *) arg)) return -EFAULT; - if (core_idx >= get_max_num_vpu_core(bmdi)) + if (core_idx >= get_max_num_vpu_core(bmdi) || core_idx < 0) return -EFAULT; - if (bmdi->vpudrvctx.s_bit_firmware_info[core_idx].size == 0) - ret = 100; + if ((ret = mutex_lock_interruptible(&dev->s_vpu_lock)) == 0) { + if (bmdi->vpudrvctx.s_bit_firmware_info[core_idx].size == 0) + ret = 100; + else + ret = bmdi->vpudrvctx.s_bit_firmware_info[core_idx].size; + + mutex_unlock(&dev->s_vpu_lock); + } else { + return -ERESTARTSYS; + } DPRINTK("[VPUDRV][-]VDI_IOCTL_GET_FIRMWARE_STATUS\n"); } @@ -1833,17 +2037,80 @@ long bm_vpu_ioctl(struct file *filp, u_int cmd, u_long arg) } break; case VDI_IOCTL_CTRL_KERNEL_RESET: + { + vpudrv_reset_flag_node_t *reset_flag; + vpudrv_reset_flag_node_t *vrf, *n; + DPRINTK("[VPUDRV][+]VDI_IOCTL_CTRL_KERNEL_RESET, tpid: 0x%x, pid: 0x%x\n", current->tgid, current->pid); + if ((ret = mutex_lock_interruptible(&dev->s_vpu_lock)) == 0) { + reset_flag = kzalloc(sizeof(*reset_flag), GFP_KERNEL); + ret = copy_from_user(reset_flag, (vpudrv_reset_flag *)arg, sizeof(vpudrv_reset_flag)); + if (ret != 0) { + kfree(reset_flag); + mutex_unlock(&dev->s_vpu_lock); + return -EFAULT; + } + + if (reset_flag->reset_flag.core_idx < 0 || reset_flag->reset_flag.core_idx >= get_max_num_vpu_core(bmdi)) { + kfree(reset_flag); + mutex_unlock(&dev->s_vpu_lock); + return -EFAULT; + } + if(reset_flag->reset_flag.reset == 0) + { + list_for_each_entry_safe(vrf, n, &bmdi->vpudrvctx.s_reset_flag_head, list) + { + if(vrf->reset_flag.pid == reset_flag->reset_flag.pid && vrf->reset_flag.core_idx == reset_flag->reset_flag.core_idx) + { + list_del(&vrf->list); + kfree(vrf); + } + } + kfree(reset_flag); + } else { + list_add(&reset_flag->list, &bmdi->vpudrvctx.s_reset_flag_head); + } + mutex_unlock(&dev->s_vpu_lock); + } + else + { + return -ERESTARTSYS; + } + + DPRINTK("[VPUDRV][-]VDI_IOCTL_CTRL_KERNEL_RESET, tpid: 0x%x, pid: 0x%x\n", current->tgid, current->pid); + } + break; + case VDI_IOCTL_GET_KERNEL_RESET_STATUS: { vpudrv_reset_flag reset_flag; - DPRINTK("[VPUDRV][+]VDI_IOCTL_CTRL_KERNEL_RESET\n"); + vpudrv_reset_flag_node_t *vrf, *n; + DPRINTK("[VPUDRV][+]VDI_IOCTL_GET_KERNEL_RESET_STATUS\n"); + ret = copy_from_user(&reset_flag, (vpudrv_reset_flag *)arg, sizeof(vpudrv_reset_flag)); if (ret != 0) return -EFAULT; - if (reset_flag.core_idx >= get_max_num_vpu_core(bmdi)) + if (reset_flag.core_idx < 0 || reset_flag.core_idx >= get_max_num_vpu_core(bmdi)) + return -EFAULT; + + if ((ret = mutex_lock_interruptible(&dev->s_vpu_lock)) == 0) { + reset_flag.reset = 0; + list_for_each_entry_safe(vrf, n, &bmdi->vpudrvctx.s_reset_flag_head, list) + { + if(vrf->reset_flag.pid == reset_flag.pid && vrf->reset_flag.core_idx == reset_flag.core_idx) + { + reset_flag.reset = 1; + break; + } + } + mutex_unlock(&dev->s_vpu_lock); + } + else { + return -ERESTARTSYS; + } + ret = copy_to_user((void __user *)arg, &reset_flag, sizeof(vpudrv_reset_flag)); + if (ret != 0) return -EFAULT; - bmdi->vpudrvctx.reset_vpu_core_disable[reset_flag.core_idx] = reset_flag.reset_core_disable; - DPRINTK("[VPUDRV][-]VDI_IOCTL_CTRL_KERNEL_RESET\n"); + DPRINTK("[VPUDRV][-]VDI_IOCTL_GET_KERNEL_RESET_STATUS\n"); } break; @@ -2033,7 +2300,6 @@ static long get_exception_instance_info(struct file *filp) static int close_vpu_instance(long flags, struct file *filp) { int core_idx, i; - int vpu_create_inst_flag = 0; struct bm_device_info *bmdi = (struct bm_device_info *)filp->private_data; int release_lock_flag = 0; if (flags == 0) @@ -2052,54 +2318,24 @@ static int close_vpu_instance(long flags, struct file *filp) for (i = 0; i < MAX_NUM_INSTANCE_VPU; i++) { if ((flags & (1UL< 5) { - pr_err("can not stop instances core %d inst %d\n", (int)core_idx, i); - release_vpu_create_inst_flag(bmdi, core_idx, i); - return release_lock_flag; - } - continue; // means there is command which should be flush. - } else { - release_vpu_create_inst_flag(bmdi, core_idx, i); - DPRINTK("stop instances core %d inst %d success\n", (int)core_idx, i); - } - break; - } + CloseInstanceCommand(bmdi, core_idx, i); + release_lock(bmdi, core_idx, CORE_LOCK); } } + return release_lock_flag; } int bm_vpu_release(struct inode *inode, struct file *filp) { int ret =0, i, j; - u32 open_count; long except_info = 0; struct bm_device_info *bmdi = (struct bm_device_info *)filp->private_data; vpu_drv_context_t *dev = &bmdi->vpudrvctx; int core_idx, release_lock_flag; int vpu_open_ref_count_sum = 0; - int vpu_disable_reset_flag_sum = 0; + vpudrv_reset_flag_node_t *vrf, *n; DPRINTK("[VPUDRV] vpu_release\n"); @@ -2107,6 +2343,8 @@ int bm_vpu_release(struct inode *inode, struct file *filp) core_idx = get_core_idx(filp); //pr_info("core_idx : %d, filp: %p\n", core_idx, filp); except_info = get_exception_instance_info(filp); + if (except_info != 0) + release_exception_lock(bmdi, except_info); mutex_unlock(&dev->s_vpu_lock); release_lock_flag = close_vpu_instance(except_info, filp); @@ -2129,25 +2367,22 @@ int bm_vpu_release(struct inode *inode, struct file *filp) release_lock(bmdi, core_idx, CORE_DISPLAY_LOCK); } - if (vpu_polling_create == 1 && vpu_open_inst_count == 0) { - destory_irq_poll_thread(); - vpu_polling_create = 0; - } - mutex_lock(&dev->s_vpu_lock); bmdi->vpudrvctx.open_count--; - open_count = bmdi->vpudrvctx.open_count; - for (j = 0; j < get_max_num_vpu_core(bmdi); j++){ vpu_open_ref_count_sum += bmdi->vpudrvctx.s_vpu_usage_info.vpu_open_ref_count[j]; - if ((bmdi->vpudrvctx.reset_vpu_core_disable[j]==current->tgid) || (bmdi->vpudrvctx.reset_vpu_core_disable[j]==current->pid)) - bmdi->vpudrvctx.reset_vpu_core_disable[j] = 0; + } - if (bmdi->vpudrvctx.reset_vpu_core_disable[j] != 0) - vpu_disable_reset_flag_sum++; + list_for_each_entry_safe(vrf, n, &bmdi->vpudrvctx.s_reset_flag_head, list) + { + if((vrf->reset_flag.core_idx == core_idx) && ((vrf->reset_flag.pid == current->tgid) || (vrf->reset_flag.pid == current->pid))) + { + list_del(&vrf->list); + kfree(vrf); + } } - if (vpu_open_ref_count_sum == 0 && vpu_disable_reset_flag_sum == 0) { + if (vpu_open_ref_count_sum == 0 && list_empty(&bmdi->vpudrvctx.s_reset_flag_head)) { for (j = 0; j < get_max_num_vpu_core(bmdi); j++) { for (i = 0; i < bmdi->vpudrvctx.max_num_instance; i++) { kfifo_reset(&bmdi->vpudrvctx.interrupt_pending_q[j][i]); @@ -2178,17 +2413,11 @@ int bm_vpu_release(struct inode *inode, struct file *filp) bmdi->vpudrvctx.s_common_memory[i].base = 0; } */ + bmdi->vpudrvctx.s_bit_firmware_info[i].size = 0; } - } memset(&dev->crst_cxt[0], 0, sizeof(vpu_crst_context_t) * get_max_num_vpu_core(bmdi)); - if ((bmdi->cinfo.chip_id == 0x1684) || (bmdi->cinfo.chip_id == 0x1686)) { - if (ret > 0 && bmdi->vpudrvctx.s_vpu_usage_info.vpu_open_ref_count[ret-1] == 0 && bmdi->vpudrvctx.reset_vpu_core_disable[ret-1] == 0) { - DPRINTK(KERN_INFO "exception will reset the vpu core: %d\n", ret - 1); - bmdi->vpudrvctx.s_bit_firmware_info[ret-1].size = 0; - } - } mutex_unlock(&dev->s_vpu_lock); return 0; @@ -2207,7 +2436,11 @@ static int bm_vpu_map_to_register(struct file *filp, struct vm_area_struct *vm, unsigned long pfn; struct bm_device_info *bmdi = (struct bm_device_info *)filp->private_data; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 5, 0) + vm_flags_set(vm, VM_IO | VM_RESERVED); +#else vm->vm_flags |= VM_IO | VM_RESERVED; +#endif vm->vm_page_prot = pgprot_noncached(vm->vm_page_prot); pfn = bmdi->vpudrvctx.s_vpu_register[core_idx].phys_addr >> PAGE_SHIFT; @@ -2216,7 +2449,11 @@ static int bm_vpu_map_to_register(struct file *filp, struct vm_area_struct *vm, static int bm_vpu_map_to_physical_memory(struct file *filp, struct vm_area_struct *vm) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 5, 0) + vm_flags_set(vm, VM_IO | VM_RESERVED); +#else vm->vm_flags |= VM_IO | VM_RESERVED; +#endif vm->vm_page_prot = pgprot_noncached(vm->vm_page_prot); return remap_pfn_range(vm, vm->vm_start, vm->vm_pgoff, vm->vm_end-vm->vm_start, vm->vm_page_prot) ? -EAGAIN : 0; @@ -2231,7 +2468,11 @@ static int bm_vpu_map_to_instance_pool_memory(struct file *fp, struct vm_area_st char *vmalloc_area_ptr = (char *)bmdi->vpudrvctx.instance_pool[core_idx].base; unsigned long pfn; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 5, 0) + vm_flags_set(vm, VM_RESERVED); +#else vm->vm_flags |= VM_RESERVED; +#endif /* loop over all pages, map it page individually */ while (length > 0) { @@ -2254,7 +2495,11 @@ static int vpu_map_vmalloc(struct file *fp, struct vm_area_struct *vm, char *vma unsigned long start = vm->vm_start; unsigned long pfn; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 5, 0) + vm_flags_set(vm, VM_RESERVED); +#else vm->vm_flags |= VM_RESERVED; +#endif /* loop over all pages, map it page individually */ while (length > 0) { pfn = vmalloc_to_pfn(vmalloc_area_ptr); @@ -2493,7 +2738,7 @@ static void bm_vpu_free_common_mem(struct bm_device_info *bmdi) int i; for (i = 0; i < get_max_num_vpu_core(bmdi); i++) { if (bmdi->vpudrvctx.s_common_memory[i].base) { - bm_vpu_free_dma_buffer(&bmdi->vpudrvctx.s_common_memory[i], &bmdi->vpudrvctx.s_vmem); + vmem_free(&bmdi->vpudrvctx.s_vmem, bmdi->vpudrvctx.s_common_memory[i].phys_addr, bmdi->vpudrvctx.s_common_memory[i].size); bmdi->vpudrvctx.s_common_memory[i].base = 0; } } @@ -2505,6 +2750,7 @@ static int bm_vpu_res_init(struct bm_device_info *bmdi) INIT_LIST_HEAD(&bmdi->vpudrvctx.s_vbp_head); INIT_LIST_HEAD(&bmdi->vpudrvctx.s_inst_list_head); INIT_LIST_HEAD(&bmdi->vpudrvctx.s_core_idx_head); + INIT_LIST_HEAD(&bmdi->vpudrvctx.s_reset_flag_head); mutex_init(&bmdi->vpudrvctx.s_vpu_lock); sema_init(&bmdi->vpudrvctx.s_vpu_sem, 1); @@ -2635,7 +2881,8 @@ int bm_vpu_init(struct bm_device_info *bmdi) { int ret = 0; struct bmdi_list *blist; - int i = 0; + struct vpudrv_buffer_t* common_buffer; + int i; DPRINTK("[VPUDRV] begin vpu_init\n"); @@ -2694,8 +2941,17 @@ int bm_vpu_init(struct bm_device_info *bmdi) bm_vpu_topaddr_set(bmdi); for (i = 0; i < get_max_num_vpu_core(bmdi); i++) { - bmdi->vpudrvctx.s_common_memory[i].size = SIZE_COMMON; - if (bm_vpu_alloc_dma_buffer(bmdi, &bmdi->vpudrvctx.s_common_memory[i]) == -1) { + if(i == (get_max_num_vpu_core(bmdi) - 1)){ + bmdi->vpudrvctx.s_common_memory[i].size = SIZE_COMMON_ENC; + } + else{ + bmdi->vpudrvctx.s_common_memory[i].size = SIZE_COMMON; + } + + common_buffer = &bmdi->vpudrvctx.s_common_memory[i]; + common_buffer->phys_addr = (unsigned long)vmem_alloc(&bmdi->vpudrvctx.s_vmem, common_buffer->size, 0); + common_buffer->base = (unsigned long)(bmdi->vpudrvctx.s_video_memory.base + (common_buffer->phys_addr - bmdi->vpudrvctx.s_video_memory.phys_addr)); + if (common_buffer->phys_addr == 0) { pr_err("[%s,%d] can not allocate the common buffer.\n", __func__, __LINE__); ret = -EFAULT; break; diff --git a/driver/vpu/vpu.h b/driver/vpu/vpu.h index 0ebf08d..1e4ce9a 100644 --- a/driver/vpu/vpu.h +++ b/driver/vpu/vpu.h @@ -54,7 +54,7 @@ #define VDI_IOCTL_GET_CHIP_ID _IO(VDI_IOCTL_MAGIC, 32) #define VDI_IOCTL_GET_MAX_CORE_NUM _IO(VDI_IOCTL_MAGIC, 33) #define VDI_IOCTL_CTRL_KERNEL_RESET _IO(VDI_IOCTL_MAGIC, 34) - +#define VDI_IOCTL_GET_KERNEL_RESET_STATUS _IO(VDI_IOCTL_MAGIC, 35) typedef struct vpudrv_syscxt_info_s { unsigned int core_idx; unsigned int inst_idx; @@ -74,7 +74,10 @@ typedef struct vpudrv_buffer_t { unsigned long virt_addr; /* virtual user space address */ unsigned int core_idx; - unsigned int reserved; + unsigned int ion_fd; + struct dma_buf_attachment *attach; + struct sg_table *table; + struct dma_buf *dma_buf; } vpudrv_buffer_t; typedef struct vpu_bit_firmware_info_t { @@ -183,15 +186,22 @@ typedef struct vpu_statistic_info { typedef struct { int core_idx; - pid_t reset_core_disable; + pid_t pid; + int reset; } vpudrv_reset_flag; +typedef struct vpudrv_reset_flag_node_t { + vpudrv_reset_flag reset_flag; + struct list_head list; +} vpudrv_reset_flag_node_t; + typedef struct vpu_drv_context { struct fasync_struct *async_queue; struct mutex s_vpu_lock; struct semaphore s_vpu_sem; struct list_head s_vbp_head; struct list_head s_inst_list_head; + struct list_head s_reset_flag_head; struct proc_dir_entry *entry[64]; u32 open_count; /*!<< device reference count. Not instance count */ u32 max_num_vpu_core; diff --git a/driver/vpu/vpuconfig.h b/driver/vpu/vpuconfig.h index 6c7e021..6a88d2f 100755 --- a/driver/vpu/vpuconfig.h +++ b/driver/vpu/vpuconfig.h @@ -145,6 +145,7 @@ /* VPU COMMON MEMORY */ /************************************************************************/ #define COMMAND_QUEUE_DEPTH 4 +#define COMMAND_QUEUE_DEPTH_ENC 1 #define ENC_SRC_BUF_NUM (12+COMMAND_QUEUE_DEPTH) //!< case of GOPsize = 8 (IBBBBBBBP), max src buffer num = 12 @@ -155,6 +156,7 @@ #define ONE_TASKBUF_SIZE_FOR_CQ ONE_TASKBUF_SIZE_FOR_W5DEC_CQ #define SIZE_COMMON ((2*1024*1024) + (COMMAND_QUEUE_DEPTH*ONE_TASKBUF_SIZE_FOR_CQ)) +#define SIZE_COMMON_ENC ((2*1024*1024) + (COMMAND_QUEUE_DEPTH_ENC*ONE_TASKBUF_SIZE_FOR_CQ)) //=====4. VPU REPORT MEMORY ======================// #define SIZE_REPORT_BUF (0x10000) diff --git a/release.rst b/release.rst index a359822..a71a1f9 100644 --- a/release.rst +++ b/release.rst @@ -2,36 +2,38 @@ Release note --------------- .. table:: - :widths: 20 25 55 + :widths: 15 25 25 55 - ========== ========== ====================================================== - 版本 发布日期 说明 - ---------- ---------- ------------------------------------------------------ - V0.1.0 2022.07.12 第一次发布,包含bmlib, bm-smi and tpu runtime。 - ---------- ---------- ------------------------------------------------------ - V0.2.0 2022.07.30 增加bmvid;补充文档。 - ---------- ---------- ------------------------------------------------------ - V0.3.0 2022.08.30 增加soc mode 支持; 增加bmcv支持; 支持bm1684 - ---------- ---------- ------------------------------------------------------ - V0.4.0 2022.09.15 完善bm1684支持,增加soc mode 交叉编译指南; 支持SC7加速卡 - ---------- ---------- ------------------------------------------------------ - V0.4.1 2022.09.21 完善bm1684 soc mode 支持; fix some opencv bug - ---------- ---------- ------------------------------------------------------ - V0.4.2 2022.10.15 支持arm pcie mode - ---------- ---------- ------------------------------------------------------ - V0.4.3 2022.11.15 支持sc7 hp75 加速卡;支持rpm包安装 - ---------- ---------- ------------------------------------------------------ - V0.4.4 2022.12.15 fix bug - ---------- ---------- ------------------------------------------------------ - V0.4.5 2023.2.7 支持动态算子加载 - ---------- ---------- ------------------------------------------------------ - V0.4.6 2023.3.13 支持mix mode - ---------- ---------- ------------------------------------------------------ - V0.4.7 2023.4.13 fix bug - ---------- ---------- ------------------------------------------------------ - V0.4.8 2023.5.16 add 64 bit dev mem manager - ---------- ---------- ------------------------------------------------------ - V0.4.9 2023.8.1 add virtual ethernet driver - ---------- ---------- ------------------------------------------------------ - V0.5.0 2023.10.1 fix some bug - ========== ========== ====================================================== + ========== ============== ========== ====================================================== + 版本 对应SDK版本 发布日期 说明 + ---------- -------------- ---------- ------------------------------------------------------ + V0.1.0 2022.07.12 第一次发布,包含bmlib, bm-smi and tpu runtime。 + ---------- -------------- ---------- ------------------------------------------------------ + V0.2.0 2022.07.30 增加bmvid;补充文档。 + ---------- -------------- ---------- ------------------------------------------------------ + V0.3.0 2022.08.30 增加soc mode 支持; 增加bmcv支持; 支持bm1684 + ---------- -------------- ---------- ------------------------------------------------------ + V0.4.0 2022.09.15 完善bm1684支持,增加soc mode 交叉编译指南; 支持SC7加速卡 + ---------- -------------- ---------- ------------------------------------------------------ + V0.4.1 V22.09.02 2022.09.21 完善bm1684 soc mode 支持; fix some opencv bug + ---------- -------------- ---------- ------------------------------------------------------ + V0.4.2 V22.10.01 2022.10.15 支持arm pcie mode + ---------- -------------- ---------- ------------------------------------------------------ + V0.4.3 V22.11.01 2022.11.15 支持sc7 hp75 加速卡;支持rpm包安装 + ---------- -------------- ---------- ------------------------------------------------------ + V0.4.4 V22.12.01 2022.12.15 fix bug + ---------- -------------- ---------- ------------------------------------------------------ + V0.4.5 2023.2.7 支持动态算子加载 + ---------- -------------- ---------- ------------------------------------------------------ + V0.4.6 V22.03.01 2023.3.13 支持mix mode + ---------- -------------- ---------- ------------------------------------------------------ + V0.4.7 2023.4.13 fix bug + ---------- -------------- ---------- ------------------------------------------------------ + V0.4.8 V22.05.01 2023.5.16 add 64 bit dev mem manager + ---------- -------------- ---------- ------------------------------------------------------ + V0.4.9 V22.07.01 2023.8.1 add virtual ethernet driver + ---------- -------------- ---------- ------------------------------------------------------ + V0.5.0 V23.10.01 2023.10.1 fix some bug + ---------- -------------- ---------- ------------------------------------------------------ + V0.5.1 V24.04.01 2024.4.8 fix TPU jitter bug + ========== ============== ========== ====================================================== \ No newline at end of file diff --git a/toolchain-loongarch64-linux.cmake b/toolchain-loongarch64-linux.cmake index 3585c0c..585a5e6 100644 --- a/toolchain-loongarch64-linux.cmake +++ b/toolchain-loongarch64-linux.cmake @@ -8,7 +8,7 @@ set( CMAKE_SYSTEM_PROCESSOR loongarch64 ) # The toolchain prefix for all toolchain executables set( CROSS_COMPILE ${CROSS_COMPILE_PATH}/bin/loongarch64-linux-gnu- ) -set( ARCH lonngarch64 ) +set( ARCH loongarch64 ) # specify the cross compiler. We force the compiler so that CMake doesn't # attempt to build a simple test program as this will fail without us using diff --git a/tpu-bmodel/.gitignore b/tpu-bmodel/.gitignore index 39fb4e6..74cf243 100644 --- a/tpu-bmodel/.gitignore +++ b/tpu-bmodel/.gitignore @@ -1,2 +1,3 @@ build*/ obj +.vscode diff --git a/tpu-bmodel/CMakeLists.txt b/tpu-bmodel/CMakeLists.txt index 0469b9d..f5119e9 100644 --- a/tpu-bmodel/CMakeLists.txt +++ b/tpu-bmodel/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.6) cmake_policy(SET CMP0048 NEW) cmake_policy(SET CMP0046 NEW) @@ -33,6 +33,14 @@ else() target_compile_definitions(tpu_model PRIVATE VER="${revision}") target_compile_features(tpu_model PUBLIC cxx_std_11) + add_custom_target(model_tool_link ALL + COMMAND ${CMAKE_COMMAND} -E create_symlink tpu_model model_tool + DEPENDS tpu_model) + install(PROGRAMS + "${CMAKE_CURRENT_BINARY_DIR}/model_tool" + DESTINATION bin + COMPONENT libsophon + ) install(TARGETS tpu_model bmodel RUNTIME DESTINATION bin COMPONENT libsophon @@ -44,4 +52,4 @@ else() FILES_MATCHING PATTERN "*.hpp" PATTERN "*.h") -endif() \ No newline at end of file +endif() diff --git a/tpu-bmodel/include/bmodel.hpp b/tpu-bmodel/include/bmodel.hpp index f0a88f9..70c482c 100644 --- a/tpu-bmodel/include/bmodel.hpp +++ b/tpu-bmodel/include/bmodel.hpp @@ -38,6 +38,8 @@ typedef struct { typedef struct { uint64_t bd_cmd_mem_size; // bd instruction total size uint64_t gdma_cmd_mem_size; // gdma instruction total size + uint64_t hau_cmd_mem_size; // hau instruction total size + uint64_t sdma_cmd_mem_size; // sdma instruction totoal size uint64_t dynamic_ir_mem_size; // dynamic ir total size uint64_t neuron_mem_size; // total neuron mem uint64_t coeff_mem_size; // total coeff size @@ -50,7 +52,14 @@ const int SHA256_LEN = 32; void CalcSha256(const uint8_t *buffer, uint64_t size, uint8_t sha256[SHA256_LEN]); class ModelGen { - public: +public: + typedef struct { + int64_t device_id; + int64_t step; + std::string main_name; // name for Cascade + } CASCADE_INFO_T; + +public: ModelGen(uint32_t reserved_size = 0x1000000); virtual ~ModelGen(); flatbuffers::FlatBufferBuilder &Builder(); @@ -58,11 +67,16 @@ class ModelGen { // add model elements void AddChip(const std::string &arch_name); + void AddNumDevice(int num_device); void AddNet(const flatbuffers::Offset &net); void AddNet(std::string net_name, const flatbuffers::Offset ¶meter, - uint32_t *net_idx = NULL, uint32_t *stage_idx = NULL); + uint32_t *net_idx = NULL, uint32_t *stage_idx = NULL, + const bmodel::Cascade * cascade = NULL, int32_t addr_mode = 0); + void AddNet(const std::string &net_name, const CASCADE_INFO_T &cascade, + const flatbuffers::Offset ¶meter, int32_t addr_mode = 0); // void AddTpuModule(Binary tpu_module); void AddKernelModule(std::string &version, Binary &tpu_module); + void AddCpuModule(std::string &version, Binary &lib_cpu); // finish and save to file void Finish(const std::string &filename); @@ -73,14 +87,16 @@ class ModelGen { void Save(void *buffer); // save to buffer uint8_t *GetBufferPointer(); - private: +private: bool IsTensorConflict(const flatbuffers::Vector> *, const flatbuffers::Vector> *); bool IsShapeSame(const Shape *, const Shape *); typedef struct { std::string name; + CASCADE_INFO_T cascade; std::vector> parameters; + int32_t addr_mode; } NET_INFO_T; typedef struct { @@ -88,7 +104,13 @@ class ModelGen { Binary binary; } KERNEL_MODULE_T; + typedef struct { + std::string file_name; + Binary binary; + } CPUOP_MODULE_T; + std::string chip_; + int num_device_; flatbuffers::FlatBufferBuilder builder_; std::vector binary_; std::vector binary_vector_; @@ -97,6 +119,7 @@ class ModelGen { uint64_t max_neuron_size_; // Binary tpu_module_; KERNEL_MODULE_T kernel_module_; + CPUOP_MODULE_T cpuop_module_; }; class ModelCtx { @@ -110,13 +133,21 @@ class ModelCtx { // read binary data to buffer void read_binary(const bmodel::Binary *binary, uint8_t *buffer); // read binary from offset - void read_binary(const bmodel::Binary *binary, uint32_t offset, uint8_t *buffer, uint32_t size); + void read_binary(const bmodel::Binary *binary, uint64_t offset, uint8_t *buffer, uint64_t size); + // write buffer to binary + void write_binary(const bmodel::Binary *binary, uint8_t *buffer); + // write buffer to offset of binary + void write_binary(const bmodel::Binary *binary, uint64_t offset, + uint8_t *buffer, uint64_t size); // model buffer data for parse const void *data() const; const MODEL_HEADER_T &header() const; + bool get_weight(const std::string &net_name, int stage_idx, uint64_t offset, + Binary &bin, std::string &op_name) const; + bmodel_mem_info_t get_bmodel_mem_info(); protected: void update_bmodel(); @@ -125,13 +156,14 @@ class ModelCtx { void update_net(const std::string &net_name, const flatbuffers::Vector> *net_dynamic); + private: MODEL_HEADER_T header_; ModelGen *model_gen_; const Model *model_; void *model_buffer_; - uint32_t binary_offset_; - std::ifstream file_; // bmodel in file + uint64_t binary_offset_; + std::fstream file_; // bmodel in file const void *bmodel_pointer_; // bmodel in buffer }; diff --git a/tpu-bmodel/include/model_generated.h b/tpu-bmodel/include/model_generated.h index b2b4284..0f59d83 100644 --- a/tpu-bmodel/include/model_generated.h +++ b/tpu-bmodel/include/model_generated.h @@ -16,9 +16,15 @@ struct ShapeT; struct CmdGroup; struct CmdGroupT; +struct CoreCommands; +struct CoreCommandsT; + struct StageIR; struct StageIRT; +struct Location; +struct LocationT; + struct CoeffMem; struct CoeffMemT; @@ -52,12 +58,18 @@ struct NetDynamicT; struct NetParameter; struct NetParameterT; +struct Cascade; +struct CascadeT; + struct Net; struct NetT; struct KernelModule; struct KernelModuleT; +struct CpuopModule; +struct CpuopModuleT; + struct Model; struct ModelT; @@ -285,6 +297,127 @@ inline flatbuffers::Offset CreateCmdGroup( flatbuffers::Offset CreateCmdGroup(flatbuffers::FlatBufferBuilder &_fbb, const CmdGroupT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct CoreCommandsT : public flatbuffers::NativeTable { + typedef CoreCommands TableType; + std::vector> gdma_tiu_commands; + std::vector sdma_commands; + std::vector hau_commands; + std::vector cdma_commands; + CoreCommandsT() { + } +}; + +struct CoreCommands FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef CoreCommandsT NativeTableType; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_GDMA_TIU_COMMANDS = 4, + VT_SDMA_COMMANDS = 6, + VT_HAU_COMMANDS = 8, + VT_CDMA_COMMANDS = 10 + }; + const flatbuffers::Vector> *gdma_tiu_commands() const { + return GetPointer> *>(VT_GDMA_TIU_COMMANDS); + } + flatbuffers::Vector> *mutable_gdma_tiu_commands() { + return GetPointer> *>(VT_GDMA_TIU_COMMANDS); + } + const flatbuffers::Vector *sdma_commands() const { + return GetPointer *>(VT_SDMA_COMMANDS); + } + flatbuffers::Vector *mutable_sdma_commands() { + return GetPointer *>(VT_SDMA_COMMANDS); + } + const flatbuffers::Vector *hau_commands() const { + return GetPointer *>(VT_HAU_COMMANDS); + } + flatbuffers::Vector *mutable_hau_commands() { + return GetPointer *>(VT_HAU_COMMANDS); + } + const flatbuffers::Vector *cdma_commands() const { + return GetPointer *>(VT_CDMA_COMMANDS); + } + flatbuffers::Vector *mutable_cdma_commands() { + return GetPointer *>(VT_CDMA_COMMANDS); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_GDMA_TIU_COMMANDS) && + verifier.VerifyVector(gdma_tiu_commands()) && + verifier.VerifyVectorOfTables(gdma_tiu_commands()) && + VerifyOffset(verifier, VT_SDMA_COMMANDS) && + verifier.VerifyVector(sdma_commands()) && + VerifyOffset(verifier, VT_HAU_COMMANDS) && + verifier.VerifyVector(hau_commands()) && + VerifyOffset(verifier, VT_CDMA_COMMANDS) && + verifier.VerifyVector(cdma_commands()) && + verifier.EndTable(); + } + CoreCommandsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(CoreCommandsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const CoreCommandsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct CoreCommandsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_gdma_tiu_commands(flatbuffers::Offset>> gdma_tiu_commands) { + fbb_.AddOffset(CoreCommands::VT_GDMA_TIU_COMMANDS, gdma_tiu_commands); + } + void add_sdma_commands(flatbuffers::Offset> sdma_commands) { + fbb_.AddOffset(CoreCommands::VT_SDMA_COMMANDS, sdma_commands); + } + void add_hau_commands(flatbuffers::Offset> hau_commands) { + fbb_.AddOffset(CoreCommands::VT_HAU_COMMANDS, hau_commands); + } + void add_cdma_commands(flatbuffers::Offset> cdma_commands) { + fbb_.AddOffset(CoreCommands::VT_CDMA_COMMANDS, cdma_commands); + } + explicit CoreCommandsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + CoreCommandsBuilder &operator=(const CoreCommandsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateCoreCommands( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset>> gdma_tiu_commands = 0, + flatbuffers::Offset> sdma_commands = 0, + flatbuffers::Offset> hau_commands = 0, + flatbuffers::Offset> cdma_commands = 0) { + CoreCommandsBuilder builder_(_fbb); + builder_.add_cdma_commands(cdma_commands); + builder_.add_hau_commands(hau_commands); + builder_.add_sdma_commands(sdma_commands); + builder_.add_gdma_tiu_commands(gdma_tiu_commands); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateCoreCommandsDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const std::vector> *gdma_tiu_commands = nullptr, + const std::vector *sdma_commands = nullptr, + const std::vector *hau_commands = nullptr, + const std::vector *cdma_commands = nullptr) { + auto gdma_tiu_commands__ = gdma_tiu_commands ? _fbb.CreateVector>(*gdma_tiu_commands) : 0; + auto sdma_commands__ = sdma_commands ? _fbb.CreateVectorOfStructs(*sdma_commands) : 0; + auto hau_commands__ = hau_commands ? _fbb.CreateVectorOfStructs(*hau_commands) : 0; + auto cdma_commands__ = cdma_commands ? _fbb.CreateVectorOfStructs(*cdma_commands) : 0; + return bmodel::CreateCoreCommands( + _fbb, + gdma_tiu_commands__, + sdma_commands__, + hau_commands__, + cdma_commands__); +} + +flatbuffers::Offset CreateCoreCommands(flatbuffers::FlatBufferBuilder &_fbb, const CoreCommandsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct StageIRT : public flatbuffers::NativeTable { typedef StageIR TableType; uint32_t ir_info_len; @@ -402,11 +535,113 @@ inline flatbuffers::Offset CreateStageIR( flatbuffers::Offset CreateStageIR(flatbuffers::FlatBufferBuilder &_fbb, const StageIRT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct LocationT : public flatbuffers::NativeTable { + typedef Location TableType; + std::string name; + uint64_t offset; + uint64_t size; + LocationT() + : offset(0), + size(0) { + } +}; + +struct Location FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef LocationT NativeTableType; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NAME = 4, + VT_OFFSET = 6, + VT_SIZE = 8 + }; + const flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + flatbuffers::String *mutable_name() { + return GetPointer(VT_NAME); + } + uint64_t offset() const { + return GetField(VT_OFFSET, 0); + } + bool mutate_offset(uint64_t _offset) { + return SetField(VT_OFFSET, _offset, 0); + } + uint64_t size() const { + return GetField(VT_SIZE, 0); + } + bool mutate_size(uint64_t _size) { + return SetField(VT_SIZE, _size, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffsetRequired(verifier, VT_NAME) && + verifier.VerifyString(name()) && + VerifyField(verifier, VT_OFFSET) && + VerifyField(verifier, VT_SIZE) && + verifier.EndTable(); + } + LocationT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LocationT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LocationT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct LocationBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_name(flatbuffers::Offset name) { + fbb_.AddOffset(Location::VT_NAME, name); + } + void add_offset(uint64_t offset) { + fbb_.AddElement(Location::VT_OFFSET, offset, 0); + } + void add_size(uint64_t size) { + fbb_.AddElement(Location::VT_SIZE, size, 0); + } + explicit LocationBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + LocationBuilder &operator=(const LocationBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + fbb_.Required(o, Location::VT_NAME); + return o; + } +}; + +inline flatbuffers::Offset CreateLocation( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset name = 0, + uint64_t offset = 0, + uint64_t size = 0) { + LocationBuilder builder_(_fbb); + builder_.add_size(size); + builder_.add_offset(offset); + builder_.add_name(name); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateLocationDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + uint64_t offset = 0, + uint64_t size = 0) { + auto name__ = name ? _fbb.CreateString(name) : 0; + return bmodel::CreateLocation( + _fbb, + name__, + offset, + size); +} + +flatbuffers::Offset CreateLocation(flatbuffers::FlatBufferBuilder &_fbb, const LocationT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct CoeffMemT : public flatbuffers::NativeTable { typedef CoeffMem TableType; uint64_t address; std::vector check_code; std::unique_ptr binary_coeff; + std::vector> location; CoeffMemT() : address(0) { } @@ -417,7 +652,8 @@ struct CoeffMem FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_ADDRESS = 4, VT_CHECK_CODE = 6, - VT_BINARY_COEFF = 8 + VT_BINARY_COEFF = 8, + VT_LOCATION = 10 }; uint64_t address() const { return GetField(VT_ADDRESS, 0); @@ -437,12 +673,21 @@ struct CoeffMem FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { Binary *mutable_binary_coeff() { return GetStruct(VT_BINARY_COEFF); } + const flatbuffers::Vector> *location() const { + return GetPointer> *>(VT_LOCATION); + } + flatbuffers::Vector> *mutable_location() { + return GetPointer> *>(VT_LOCATION); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_ADDRESS) && VerifyOffset(verifier, VT_CHECK_CODE) && verifier.VerifyVector(check_code()) && VerifyField(verifier, VT_BINARY_COEFF) && + VerifyOffset(verifier, VT_LOCATION) && + verifier.VerifyVector(location()) && + verifier.VerifyVectorOfTables(location()) && verifier.EndTable(); } CoeffMemT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; @@ -462,6 +707,9 @@ struct CoeffMemBuilder { void add_binary_coeff(const Binary *binary_coeff) { fbb_.AddStruct(CoeffMem::VT_BINARY_COEFF, binary_coeff); } + void add_location(flatbuffers::Offset>> location) { + fbb_.AddOffset(CoeffMem::VT_LOCATION, location); + } explicit CoeffMemBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); @@ -478,9 +726,11 @@ inline flatbuffers::Offset CreateCoeffMem( flatbuffers::FlatBufferBuilder &_fbb, uint64_t address = 0, flatbuffers::Offset> check_code = 0, - const Binary *binary_coeff = 0) { + const Binary *binary_coeff = 0, + flatbuffers::Offset>> location = 0) { CoeffMemBuilder builder_(_fbb); builder_.add_address(address); + builder_.add_location(location); builder_.add_binary_coeff(binary_coeff); builder_.add_check_code(check_code); return builder_.Finish(); @@ -490,13 +740,16 @@ inline flatbuffers::Offset CreateCoeffMemDirect( flatbuffers::FlatBufferBuilder &_fbb, uint64_t address = 0, const std::vector *check_code = nullptr, - const Binary *binary_coeff = 0) { + const Binary *binary_coeff = 0, + const std::vector> *location = nullptr) { auto check_code__ = check_code ? _fbb.CreateVector(*check_code) : 0; + auto location__ = location ? _fbb.CreateVector>(*location) : 0; return bmodel::CreateCoeffMem( _fbb, address, check_code__, - binary_coeff); + binary_coeff, + location__); } flatbuffers::Offset CreateCoeffMem(flatbuffers::FlatBufferBuilder &_fbb, const CoeffMemT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); @@ -514,6 +767,8 @@ struct TensorT : public flatbuffers::NativeTable { uint32_t cpu_addr; uint32_t pad_h; int32_t zero_point; + int32_t hidden; + int32_t index; TensorT() : data_type(0), gmem_stmode(0), @@ -523,7 +778,9 @@ struct TensorT : public flatbuffers::NativeTable { scale(1.0f), cpu_addr(0), pad_h(0), - zero_point(0) { + zero_point(0), + hidden(0), + index(0) { } }; @@ -540,7 +797,9 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VT_SCALE = 18, VT_CPU_ADDR = 20, VT_PAD_H = 22, - VT_ZERO_POINT = 24 + VT_ZERO_POINT = 24, + VT_HIDDEN = 26, + VT_INDEX = 28 }; const flatbuffers::String *name() const { return GetPointer(VT_NAME); @@ -608,6 +867,18 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { bool mutate_zero_point(int32_t _zero_point) { return SetField(VT_ZERO_POINT, _zero_point, 0); } + int32_t hidden() const { + return GetField(VT_HIDDEN, 0); + } + bool mutate_hidden(int32_t _hidden) { + return SetField(VT_HIDDEN, _hidden, 0); + } + int32_t index() const { + return GetField(VT_INDEX, 0); + } + bool mutate_index(int32_t _index) { + return SetField(VT_INDEX, _index, 0); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyOffsetRequired(verifier, VT_NAME) && @@ -624,6 +895,8 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VerifyField(verifier, VT_CPU_ADDR) && VerifyField(verifier, VT_PAD_H) && VerifyField(verifier, VT_ZERO_POINT) && + VerifyField(verifier, VT_HIDDEN) && + VerifyField(verifier, VT_INDEX) && verifier.EndTable(); } TensorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; @@ -667,6 +940,12 @@ struct TensorBuilder { void add_zero_point(int32_t zero_point) { fbb_.AddElement(Tensor::VT_ZERO_POINT, zero_point, 0); } + void add_hidden(int32_t hidden) { + fbb_.AddElement(Tensor::VT_HIDDEN, hidden, 0); + } + void add_index(int32_t index) { + fbb_.AddElement(Tensor::VT_INDEX, index, 0); + } explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); @@ -692,10 +971,14 @@ inline flatbuffers::Offset CreateTensor( float scale = 1.0f, uint32_t cpu_addr = 0, uint32_t pad_h = 0, - int32_t zero_point = 0) { + int32_t zero_point = 0, + int32_t hidden = 0, + int32_t index = 0) { TensorBuilder builder_(_fbb); builder_.add_size(size); builder_.add_device_addr(device_addr); + builder_.add_index(index); + builder_.add_hidden(hidden); builder_.add_zero_point(zero_point); builder_.add_pad_h(pad_h); builder_.add_cpu_addr(cpu_addr); @@ -720,7 +1003,9 @@ inline flatbuffers::Offset CreateTensorDirect( float scale = 1.0f, uint32_t cpu_addr = 0, uint32_t pad_h = 0, - int32_t zero_point = 0) { + int32_t zero_point = 0, + int32_t hidden = 0, + int32_t index = 0) { auto name__ = name ? _fbb.CreateString(name) : 0; auto shape__ = shape ? _fbb.CreateVector>(*shape) : 0; return bmodel::CreateTensor( @@ -735,7 +1020,9 @@ inline flatbuffers::Offset CreateTensorDirect( scale, cpu_addr, pad_h, - zero_point); + zero_point, + hidden, + index); } flatbuffers::Offset CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); @@ -845,8 +1132,10 @@ struct CpuParamT : public flatbuffers::NativeTable { int32_t op_type; std::unique_ptr binary_param; std::vector> cpu_const; + int32_t is_custom; CpuParamT() - : op_type(0) { + : op_type(0), + is_custom(0) { } }; @@ -855,7 +1144,8 @@ struct CpuParam FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_OP_TYPE = 4, VT_BINARY_PARAM = 6, - VT_CPU_CONST = 8 + VT_CPU_CONST = 8, + VT_IS_CUSTOM = 10 }; int32_t op_type() const { return GetField(VT_OP_TYPE, 0); @@ -875,6 +1165,12 @@ struct CpuParam FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { flatbuffers::Vector> *mutable_cpu_const() { return GetPointer> *>(VT_CPU_CONST); } + int32_t is_custom() const { + return GetField(VT_IS_CUSTOM, 0); + } + bool mutate_is_custom(int32_t _is_custom) { + return SetField(VT_IS_CUSTOM, _is_custom, 0); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_OP_TYPE) && @@ -882,6 +1178,7 @@ struct CpuParam FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VerifyOffset(verifier, VT_CPU_CONST) && verifier.VerifyVector(cpu_const()) && verifier.VerifyVectorOfTables(cpu_const()) && + VerifyField(verifier, VT_IS_CUSTOM) && verifier.EndTable(); } CpuParamT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; @@ -901,6 +1198,9 @@ struct CpuParamBuilder { void add_cpu_const(flatbuffers::Offset>> cpu_const) { fbb_.AddOffset(CpuParam::VT_CPU_CONST, cpu_const); } + void add_is_custom(int32_t is_custom) { + fbb_.AddElement(CpuParam::VT_IS_CUSTOM, is_custom, 0); + } explicit CpuParamBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); @@ -917,8 +1217,10 @@ inline flatbuffers::Offset CreateCpuParam( flatbuffers::FlatBufferBuilder &_fbb, int32_t op_type = 0, const Binary *binary_param = 0, - flatbuffers::Offset>> cpu_const = 0) { + flatbuffers::Offset>> cpu_const = 0, + int32_t is_custom = 0) { CpuParamBuilder builder_(_fbb); + builder_.add_is_custom(is_custom); builder_.add_cpu_const(cpu_const); builder_.add_binary_param(binary_param); builder_.add_op_type(op_type); @@ -929,13 +1231,15 @@ inline flatbuffers::Offset CreateCpuParamDirect( flatbuffers::FlatBufferBuilder &_fbb, int32_t op_type = 0, const Binary *binary_param = 0, - const std::vector> *cpu_const = nullptr) { + const std::vector> *cpu_const = nullptr, + int32_t is_custom = 0) { auto cpu_const__ = cpu_const ? _fbb.CreateVector>(*cpu_const) : 0; return bmodel::CreateCpuParam( _fbb, op_type, binary_param, - cpu_const__); + cpu_const__, + is_custom); } flatbuffers::Offset CreateCpuParam(flatbuffers::FlatBufferBuilder &_fbb, const CpuParamT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); @@ -1173,6 +1477,7 @@ struct SubNetT : public flatbuffers::NativeTable { std::vector next_subnet_ids; std::unique_ptr merge_param; std::unique_ptr switch_param; + std::vector> core_commands; SubNetT() : subnet_mode(0), is_dynamic(0), @@ -1200,7 +1505,8 @@ struct SubNet FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VT_ID = 24, VT_NEXT_SUBNET_IDS = 26, VT_MERGE_PARAM = 28, - VT_SWITCH_PARAM = 30 + VT_SWITCH_PARAM = 30, + VT_CORE_COMMANDS = 32 }; int32_t subnet_mode() const { return GetField(VT_SUBNET_MODE, 0); @@ -1286,6 +1592,12 @@ struct SubNet FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { SwitchParam *mutable_switch_param() { return GetPointer(VT_SWITCH_PARAM); } + const flatbuffers::Vector> *core_commands() const { + return GetPointer> *>(VT_CORE_COMMANDS); + } + flatbuffers::Vector> *mutable_core_commands() { + return GetPointer> *>(VT_CORE_COMMANDS); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_SUBNET_MODE) && @@ -1313,6 +1625,9 @@ struct SubNet FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { verifier.VerifyTable(merge_param()) && VerifyOffset(verifier, VT_SWITCH_PARAM) && verifier.VerifyTable(switch_param()) && + VerifyOffset(verifier, VT_CORE_COMMANDS) && + verifier.VerifyVector(core_commands()) && + verifier.VerifyVectorOfTables(core_commands()) && verifier.EndTable(); } SubNetT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; @@ -1365,6 +1680,9 @@ struct SubNetBuilder { void add_switch_param(flatbuffers::Offset switch_param) { fbb_.AddOffset(SubNet::VT_SWITCH_PARAM, switch_param); } + void add_core_commands(flatbuffers::Offset>> core_commands) { + fbb_.AddOffset(SubNet::VT_CORE_COMMANDS, core_commands); + } explicit SubNetBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); @@ -1392,8 +1710,10 @@ inline flatbuffers::Offset CreateSubNet( int32_t id = -1, flatbuffers::Offset> next_subnet_ids = 0, flatbuffers::Offset merge_param = 0, - flatbuffers::Offset switch_param = 0) { + flatbuffers::Offset switch_param = 0, + flatbuffers::Offset>> core_commands = 0) { SubNetBuilder builder_(_fbb); + builder_.add_core_commands(core_commands); builder_.add_switch_param(switch_param); builder_.add_merge_param(merge_param); builder_.add_next_subnet_ids(next_subnet_ids); @@ -1426,12 +1746,14 @@ inline flatbuffers::Offset CreateSubNetDirect( int32_t id = -1, const std::vector *next_subnet_ids = nullptr, flatbuffers::Offset merge_param = 0, - flatbuffers::Offset switch_param = 0) { + flatbuffers::Offset switch_param = 0, + const std::vector> *core_commands = nullptr) { auto cmd_group__ = cmd_group ? _fbb.CreateVector>(*cmd_group) : 0; auto cpu_param__ = cpu_param ? _fbb.CreateVector>(*cpu_param) : 0; auto input_tensor__ = input_tensor ? _fbb.CreateVector>(*input_tensor) : 0; auto output_tensor__ = output_tensor ? _fbb.CreateVector>(*output_tensor) : 0; auto next_subnet_ids__ = next_subnet_ids ? _fbb.CreateVector(*next_subnet_ids) : 0; + auto core_commands__ = core_commands ? _fbb.CreateVector>(*core_commands) : 0; return bmodel::CreateSubNet( _fbb, subnet_mode, @@ -1447,7 +1769,8 @@ inline flatbuffers::Offset CreateSubNetDirect( id, next_subnet_ids__, merge_param, - switch_param); + switch_param, + core_commands__); } flatbuffers::Offset CreateSubNet(flatbuffers::FlatBufferBuilder &_fbb, const SubNetT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); @@ -1890,13 +2213,20 @@ struct NetParameterT : public flatbuffers::NativeTable { uint32_t cpu_mem_size; std::vector ctx_sizes; std::unique_ptr net_stat; + uint32_t core_num; + uint64_t io_addr; + uint64_t io_size; + std::unique_ptr tensor_loc; NetParameterT() : ctx_addr(0), ctx_size(0), is_dynamic(0), n_dynamic(0), h_w_dynamic(0), - cpu_mem_size(0) { + cpu_mem_size(0), + core_num(0), + io_addr(0), + io_size(0) { } }; @@ -1918,7 +2248,11 @@ struct NetParameter FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VT_SUB_NET = 28, VT_CPU_MEM_SIZE = 30, VT_CTX_SIZES = 32, - VT_NET_STAT = 34 + VT_NET_STAT = 34, + VT_CORE_NUM = 36, + VT_IO_ADDR = 38, + VT_IO_SIZE = 40, + VT_TENSOR_LOC = 42 }; const flatbuffers::Vector> *input_tensor() const { return GetPointer> *>(VT_INPUT_TENSOR); @@ -2016,6 +2350,30 @@ struct NetParameter FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { Binary *mutable_net_stat() { return GetStruct(VT_NET_STAT); } + uint32_t core_num() const { + return GetField(VT_CORE_NUM, 0); + } + bool mutate_core_num(uint32_t _core_num) { + return SetField(VT_CORE_NUM, _core_num, 0); + } + uint64_t io_addr() const { + return GetField(VT_IO_ADDR, 0); + } + bool mutate_io_addr(uint64_t _io_addr) { + return SetField(VT_IO_ADDR, _io_addr, 0); + } + uint64_t io_size() const { + return GetField(VT_IO_SIZE, 0); + } + bool mutate_io_size(uint64_t _io_size) { + return SetField(VT_IO_SIZE, _io_size, 0); + } + const Binary *tensor_loc() const { + return GetStruct(VT_TENSOR_LOC); + } + Binary *mutable_tensor_loc() { + return GetStruct(VT_TENSOR_LOC); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyOffsetRequired(verifier, VT_INPUT_TENSOR) && @@ -2046,6 +2404,10 @@ struct NetParameter FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VerifyOffset(verifier, VT_CTX_SIZES) && verifier.VerifyVector(ctx_sizes()) && VerifyField(verifier, VT_NET_STAT) && + VerifyField(verifier, VT_CORE_NUM) && + VerifyField(verifier, VT_IO_ADDR) && + VerifyField(verifier, VT_IO_SIZE) && + VerifyField(verifier, VT_TENSOR_LOC) && verifier.EndTable(); } NetParameterT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; @@ -2104,6 +2466,18 @@ struct NetParameterBuilder { void add_net_stat(const Binary *net_stat) { fbb_.AddStruct(NetParameter::VT_NET_STAT, net_stat); } + void add_core_num(uint32_t core_num) { + fbb_.AddElement(NetParameter::VT_CORE_NUM, core_num, 0); + } + void add_io_addr(uint64_t io_addr) { + fbb_.AddElement(NetParameter::VT_IO_ADDR, io_addr, 0); + } + void add_io_size(uint64_t io_size) { + fbb_.AddElement(NetParameter::VT_IO_SIZE, io_size, 0); + } + void add_tensor_loc(const Binary *tensor_loc) { + fbb_.AddStruct(NetParameter::VT_TENSOR_LOC, tensor_loc); + } explicit NetParameterBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); @@ -2135,10 +2509,18 @@ inline flatbuffers::Offset CreateNetParameter( flatbuffers::Offset>> sub_net = 0, uint32_t cpu_mem_size = 0, flatbuffers::Offset> ctx_sizes = 0, - const Binary *net_stat = 0) { + const Binary *net_stat = 0, + uint32_t core_num = 0, + uint64_t io_addr = 0, + uint64_t io_size = 0, + const Binary *tensor_loc = 0) { NetParameterBuilder builder_(_fbb); + builder_.add_io_size(io_size); + builder_.add_io_addr(io_addr); builder_.add_ctx_size(ctx_size); builder_.add_ctx_addr(ctx_addr); + builder_.add_tensor_loc(tensor_loc); + builder_.add_core_num(core_num); builder_.add_net_stat(net_stat); builder_.add_ctx_sizes(ctx_sizes); builder_.add_cpu_mem_size(cpu_mem_size); @@ -2173,7 +2555,11 @@ inline flatbuffers::Offset CreateNetParameterDirect( const std::vector> *sub_net = nullptr, uint32_t cpu_mem_size = 0, const std::vector *ctx_sizes = nullptr, - const Binary *net_stat = 0) { + const Binary *net_stat = 0, + uint32_t core_num = 0, + uint64_t io_addr = 0, + uint64_t io_size = 0, + const Binary *tensor_loc = 0) { auto input_tensor__ = input_tensor ? _fbb.CreateVector>(*input_tensor) : 0; auto output_tensor__ = output_tensor ? _fbb.CreateVector>(*output_tensor) : 0; auto cmd_group__ = cmd_group ? _fbb.CreateVector>(*cmd_group) : 0; @@ -2197,18 +2583,125 @@ inline flatbuffers::Offset CreateNetParameterDirect( sub_net__, cpu_mem_size, ctx_sizes__, - net_stat); + net_stat, + core_num, + io_addr, + io_size, + tensor_loc); } flatbuffers::Offset CreateNetParameter(flatbuffers::FlatBufferBuilder &_fbb, const NetParameterT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct CascadeT : public flatbuffers::NativeTable { + typedef Cascade TableType; + uint32_t device_id; + uint32_t step; + std::string main_name; + CascadeT() + : device_id(0), + step(0) { + } +}; + +struct Cascade FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef CascadeT NativeTableType; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_DEVICE_ID = 4, + VT_STEP = 6, + VT_MAIN_NAME = 8 + }; + uint32_t device_id() const { + return GetField(VT_DEVICE_ID, 0); + } + bool mutate_device_id(uint32_t _device_id) { + return SetField(VT_DEVICE_ID, _device_id, 0); + } + uint32_t step() const { + return GetField(VT_STEP, 0); + } + bool mutate_step(uint32_t _step) { + return SetField(VT_STEP, _step, 0); + } + const flatbuffers::String *main_name() const { + return GetPointer(VT_MAIN_NAME); + } + flatbuffers::String *mutable_main_name() { + return GetPointer(VT_MAIN_NAME); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_DEVICE_ID) && + VerifyField(verifier, VT_STEP) && + VerifyOffset(verifier, VT_MAIN_NAME) && + verifier.VerifyString(main_name()) && + verifier.EndTable(); + } + CascadeT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(CascadeT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const CascadeT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct CascadeBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_device_id(uint32_t device_id) { + fbb_.AddElement(Cascade::VT_DEVICE_ID, device_id, 0); + } + void add_step(uint32_t step) { + fbb_.AddElement(Cascade::VT_STEP, step, 0); + } + void add_main_name(flatbuffers::Offset main_name) { + fbb_.AddOffset(Cascade::VT_MAIN_NAME, main_name); + } + explicit CascadeBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + CascadeBuilder &operator=(const CascadeBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateCascade( + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t device_id = 0, + uint32_t step = 0, + flatbuffers::Offset main_name = 0) { + CascadeBuilder builder_(_fbb); + builder_.add_main_name(main_name); + builder_.add_step(step); + builder_.add_device_id(device_id); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateCascadeDirect( + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t device_id = 0, + uint32_t step = 0, + const char *main_name = nullptr) { + auto main_name__ = main_name ? _fbb.CreateString(main_name) : 0; + return bmodel::CreateCascade( + _fbb, + device_id, + step, + main_name__); +} + +flatbuffers::Offset CreateCascade(flatbuffers::FlatBufferBuilder &_fbb, const CascadeT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct NetT : public flatbuffers::NativeTable { typedef Net TableType; std::string name; std::vector> net_static; std::vector> net_dynamic; std::vector> parameter; - NetT() { + std::unique_ptr cascade; + int32_t addr_mode; + NetT() + : addr_mode(0) { } }; @@ -2218,7 +2711,9 @@ struct Net FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VT_NAME = 4, VT_NET_STATIC = 6, VT_NET_DYNAMIC = 8, - VT_PARAMETER = 10 + VT_PARAMETER = 10, + VT_CASCADE = 12, + VT_ADDR_MODE = 14 }; const flatbuffers::String *name() const { return GetPointer(VT_NAME); @@ -2244,6 +2739,18 @@ struct Net FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { flatbuffers::Vector> *mutable_parameter() { return GetPointer> *>(VT_PARAMETER); } + const Cascade *cascade() const { + return GetPointer(VT_CASCADE); + } + Cascade *mutable_cascade() { + return GetPointer(VT_CASCADE); + } + int32_t addr_mode() const { + return GetField(VT_ADDR_MODE, 0); + } + bool mutate_addr_mode(int32_t _addr_mode) { + return SetField(VT_ADDR_MODE, _addr_mode, 0); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyOffsetRequired(verifier, VT_NAME) && @@ -2257,6 +2764,9 @@ struct Net FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VerifyOffset(verifier, VT_PARAMETER) && verifier.VerifyVector(parameter()) && verifier.VerifyVectorOfTables(parameter()) && + VerifyOffset(verifier, VT_CASCADE) && + verifier.VerifyTable(cascade()) && + VerifyField(verifier, VT_ADDR_MODE) && verifier.EndTable(); } NetT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; @@ -2279,6 +2789,12 @@ struct NetBuilder { void add_parameter(flatbuffers::Offset>> parameter) { fbb_.AddOffset(Net::VT_PARAMETER, parameter); } + void add_cascade(flatbuffers::Offset cascade) { + fbb_.AddOffset(Net::VT_CASCADE, cascade); + } + void add_addr_mode(int32_t addr_mode) { + fbb_.AddElement(Net::VT_ADDR_MODE, addr_mode, 0); + } explicit NetBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); @@ -2297,8 +2813,12 @@ inline flatbuffers::Offset CreateNet( flatbuffers::Offset name = 0, flatbuffers::Offset>> net_static = 0, flatbuffers::Offset>> net_dynamic = 0, - flatbuffers::Offset>> parameter = 0) { + flatbuffers::Offset>> parameter = 0, + flatbuffers::Offset cascade = 0, + int32_t addr_mode = 0) { NetBuilder builder_(_fbb); + builder_.add_addr_mode(addr_mode); + builder_.add_cascade(cascade); builder_.add_parameter(parameter); builder_.add_net_dynamic(net_dynamic); builder_.add_net_static(net_static); @@ -2311,7 +2831,9 @@ inline flatbuffers::Offset CreateNetDirect( const char *name = nullptr, const std::vector> *net_static = nullptr, const std::vector> *net_dynamic = nullptr, - const std::vector> *parameter = nullptr) { + const std::vector> *parameter = nullptr, + flatbuffers::Offset cascade = 0, + int32_t addr_mode = 0) { auto name__ = name ? _fbb.CreateString(name) : 0; auto net_static__ = net_static ? _fbb.CreateVector>(*net_static) : 0; auto net_dynamic__ = net_dynamic ? _fbb.CreateVector>(*net_dynamic) : 0; @@ -2321,7 +2843,9 @@ inline flatbuffers::Offset CreateNetDirect( name__, net_static__, net_dynamic__, - parameter__); + parameter__, + cascade, + addr_mode); } flatbuffers::Offset CreateNet(flatbuffers::FlatBufferBuilder &_fbb, const NetT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); @@ -2410,6 +2934,90 @@ inline flatbuffers::Offset CreateKernelModuleDirect( flatbuffers::Offset CreateKernelModule(flatbuffers::FlatBufferBuilder &_fbb, const KernelModuleT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct CpuopModuleT : public flatbuffers::NativeTable { + typedef CpuopModule TableType; + std::string file_name; + std::unique_ptr binary; + CpuopModuleT() { + } +}; + +struct CpuopModule FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef CpuopModuleT NativeTableType; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FILE_NAME = 4, + VT_BINARY = 6 + }; + const flatbuffers::String *file_name() const { + return GetPointer(VT_FILE_NAME); + } + flatbuffers::String *mutable_file_name() { + return GetPointer(VT_FILE_NAME); + } + const Binary *binary() const { + return GetStruct(VT_BINARY); + } + Binary *mutable_binary() { + return GetStruct(VT_BINARY); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffsetRequired(verifier, VT_FILE_NAME) && + verifier.VerifyString(file_name()) && + VerifyFieldRequired(verifier, VT_BINARY) && + verifier.EndTable(); + } + CpuopModuleT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(CpuopModuleT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const CpuopModuleT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct CpuopModuleBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_file_name(flatbuffers::Offset file_name) { + fbb_.AddOffset(CpuopModule::VT_FILE_NAME, file_name); + } + void add_binary(const Binary *binary) { + fbb_.AddStruct(CpuopModule::VT_BINARY, binary); + } + explicit CpuopModuleBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + CpuopModuleBuilder &operator=(const CpuopModuleBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + fbb_.Required(o, CpuopModule::VT_FILE_NAME); + fbb_.Required(o, CpuopModule::VT_BINARY); + return o; + } +}; + +inline flatbuffers::Offset CreateCpuopModule( + flatbuffers::FlatBufferBuilder &_fbb, + flatbuffers::Offset file_name = 0, + const Binary *binary = 0) { + CpuopModuleBuilder builder_(_fbb); + builder_.add_binary(binary); + builder_.add_file_name(file_name); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateCpuopModuleDirect( + flatbuffers::FlatBufferBuilder &_fbb, + const char *file_name = nullptr, + const Binary *binary = 0) { + auto file_name__ = file_name ? _fbb.CreateString(file_name) : 0; + return bmodel::CreateCpuopModule( + _fbb, + file_name__, + binary); +} + +flatbuffers::Offset CreateCpuopModule(flatbuffers::FlatBufferBuilder &_fbb, const CpuopModuleT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct ModelT : public flatbuffers::NativeTable { typedef Model TableType; std::string type; @@ -2419,8 +3027,11 @@ struct ModelT : public flatbuffers::NativeTable { std::vector> net; uint64_t neuron_size; std::unique_ptr kernel_module; + uint32_t device_num; + std::unique_ptr cpuop_module; ModelT() - : neuron_size(0) { + : neuron_size(0), + device_num(0) { } }; @@ -2433,7 +3044,9 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VT_CHIP = 10, VT_NET = 12, VT_NEURON_SIZE = 14, - VT_KERNEL_MODULE = 16 + VT_KERNEL_MODULE = 16, + VT_DEVICE_NUM = 18, + VT_CPUOP_MODULE = 20 }; const flatbuffers::String *type() const { return GetPointer(VT_TYPE); @@ -2477,6 +3090,18 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { KernelModule *mutable_kernel_module() { return GetPointer(VT_KERNEL_MODULE); } + uint32_t device_num() const { + return GetField(VT_DEVICE_NUM, 0); + } + bool mutate_device_num(uint32_t _device_num) { + return SetField(VT_DEVICE_NUM, _device_num, 0); + } + const CpuopModule *cpuop_module() const { + return GetPointer(VT_CPUOP_MODULE); + } + CpuopModule *mutable_cpuop_module() { + return GetPointer(VT_CPUOP_MODULE); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyOffsetRequired(verifier, VT_TYPE) && @@ -2493,6 +3118,9 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VerifyField(verifier, VT_NEURON_SIZE) && VerifyOffset(verifier, VT_KERNEL_MODULE) && verifier.VerifyTable(kernel_module()) && + VerifyField(verifier, VT_DEVICE_NUM) && + VerifyOffset(verifier, VT_CPUOP_MODULE) && + verifier.VerifyTable(cpuop_module()) && verifier.EndTable(); } ModelT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; @@ -2524,6 +3152,12 @@ struct ModelBuilder { void add_kernel_module(flatbuffers::Offset kernel_module) { fbb_.AddOffset(Model::VT_KERNEL_MODULE, kernel_module); } + void add_device_num(uint32_t device_num) { + fbb_.AddElement(Model::VT_DEVICE_NUM, device_num, 0); + } + void add_cpuop_module(flatbuffers::Offset cpuop_module) { + fbb_.AddOffset(Model::VT_CPUOP_MODULE, cpuop_module); + } explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); @@ -2549,9 +3183,13 @@ inline flatbuffers::Offset CreateModel( flatbuffers::Offset chip = 0, flatbuffers::Offset>> net = 0, uint64_t neuron_size = 0, - flatbuffers::Offset kernel_module = 0) { + flatbuffers::Offset kernel_module = 0, + uint32_t device_num = 0, + flatbuffers::Offset cpuop_module = 0) { ModelBuilder builder_(_fbb); builder_.add_neuron_size(neuron_size); + builder_.add_cpuop_module(cpuop_module); + builder_.add_device_num(device_num); builder_.add_kernel_module(kernel_module); builder_.add_net(net); builder_.add_chip(chip); @@ -2569,7 +3207,9 @@ inline flatbuffers::Offset CreateModelDirect( const char *chip = nullptr, const std::vector> *net = nullptr, uint64_t neuron_size = 0, - flatbuffers::Offset kernel_module = 0) { + flatbuffers::Offset kernel_module = 0, + uint32_t device_num = 0, + flatbuffers::Offset cpuop_module = 0) { auto type__ = type ? _fbb.CreateString(type) : 0; auto version__ = version ? _fbb.CreateString(version) : 0; auto time__ = time ? _fbb.CreateString(time) : 0; @@ -2583,7 +3223,9 @@ inline flatbuffers::Offset CreateModelDirect( chip__, net__, neuron_size, - kernel_module); + kernel_module, + device_num, + cpuop_module); } flatbuffers::Offset CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); @@ -2655,6 +3297,41 @@ inline flatbuffers::Offset CreateCmdGroup(flatbuffers::FlatBufferBuild _gdma_cmd_byte); } +inline CoreCommandsT *CoreCommands::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new CoreCommandsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void CoreCommands::UnPackTo(CoreCommandsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = gdma_tiu_commands(); if (_e) { _o->gdma_tiu_commands.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->gdma_tiu_commands[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; + { auto _e = sdma_commands(); if (_e) { _o->sdma_commands.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->sdma_commands[_i] = *_e->Get(_i); } } }; + { auto _e = hau_commands(); if (_e) { _o->hau_commands.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->hau_commands[_i] = *_e->Get(_i); } } }; + { auto _e = cdma_commands(); if (_e) { _o->cdma_commands.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->cdma_commands[_i] = *_e->Get(_i); } } }; +} + +inline flatbuffers::Offset CoreCommands::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CoreCommandsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateCoreCommands(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateCoreCommands(flatbuffers::FlatBufferBuilder &_fbb, const CoreCommandsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CoreCommandsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _gdma_tiu_commands = _o->gdma_tiu_commands.size() ? _fbb.CreateVector> (_o->gdma_tiu_commands.size(), [](size_t i, _VectorArgs *__va) { return CreateCmdGroup(*__va->__fbb, __va->__o->gdma_tiu_commands[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _sdma_commands = _o->sdma_commands.size() ? _fbb.CreateVectorOfStructs(_o->sdma_commands) : 0; + auto _hau_commands = _o->hau_commands.size() ? _fbb.CreateVectorOfStructs(_o->hau_commands) : 0; + auto _cdma_commands = _o->cdma_commands.size() ? _fbb.CreateVectorOfStructs(_o->cdma_commands) : 0; + return bmodel::CreateCoreCommands( + _fbb, + _gdma_tiu_commands, + _sdma_commands, + _hau_commands, + _cdma_commands); +} + inline StageIRT *StageIR::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new StageIRT(); UnPackTo(_o, _resolver); @@ -2693,6 +3370,38 @@ inline flatbuffers::Offset CreateStageIR(flatbuffers::FlatBufferBuilder _width_low); } +inline LocationT *Location::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new LocationT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void Location::UnPackTo(LocationT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = name(); if (_e) _o->name = _e->str(); }; + { auto _e = offset(); _o->offset = _e; }; + { auto _e = size(); _o->size = _e; }; +} + +inline flatbuffers::Offset Location::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LocationT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateLocation(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateLocation(flatbuffers::FlatBufferBuilder &_fbb, const LocationT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LocationT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _name = _fbb.CreateString(_o->name); + auto _offset = _o->offset; + auto _size = _o->size; + return bmodel::CreateLocation( + _fbb, + _name, + _offset, + _size); +} + inline CoeffMemT *CoeffMem::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new CoeffMemT(); UnPackTo(_o, _resolver); @@ -2705,6 +3414,7 @@ inline void CoeffMem::UnPackTo(CoeffMemT *_o, const flatbuffers::resolver_functi { auto _e = address(); _o->address = _e; }; { auto _e = check_code(); if (_e) { _o->check_code.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->check_code[_i] = _e->Get(_i); } } }; { auto _e = binary_coeff(); if (_e) _o->binary_coeff = std::unique_ptr(new Binary(*_e)); }; + { auto _e = location(); if (_e) { _o->location.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->location[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; } inline flatbuffers::Offset CoeffMem::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CoeffMemT* _o, const flatbuffers::rehasher_function_t *_rehasher) { @@ -2718,11 +3428,13 @@ inline flatbuffers::Offset CreateCoeffMem(flatbuffers::FlatBufferBuild auto _address = _o->address; auto _check_code = _o->check_code.size() ? _fbb.CreateVector(_o->check_code) : 0; auto _binary_coeff = _o->binary_coeff ? _o->binary_coeff.get() : 0; + auto _location = _o->location.size() ? _fbb.CreateVector> (_o->location.size(), [](size_t i, _VectorArgs *__va) { return CreateLocation(*__va->__fbb, __va->__o->location[i].get(), __va->__rehasher); }, &_va ) : 0; return bmodel::CreateCoeffMem( _fbb, _address, _check_code, - _binary_coeff); + _binary_coeff, + _location); } inline TensorT *Tensor::UnPack(const flatbuffers::resolver_function_t *_resolver) const { @@ -2745,6 +3457,8 @@ inline void Tensor::UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t { auto _e = cpu_addr(); _o->cpu_addr = _e; }; { auto _e = pad_h(); _o->pad_h = _e; }; { auto _e = zero_point(); _o->zero_point = _e; }; + { auto _e = hidden(); _o->hidden = _e; }; + { auto _e = index(); _o->index = _e; }; } inline flatbuffers::Offset Tensor::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorT* _o, const flatbuffers::rehasher_function_t *_rehasher) { @@ -2766,6 +3480,8 @@ inline flatbuffers::Offset CreateTensor(flatbuffers::FlatBufferBuilder & auto _cpu_addr = _o->cpu_addr; auto _pad_h = _o->pad_h; auto _zero_point = _o->zero_point; + auto _hidden = _o->hidden; + auto _index = _o->index; return bmodel::CreateTensor( _fbb, _name, @@ -2778,7 +3494,9 @@ inline flatbuffers::Offset CreateTensor(flatbuffers::FlatBufferBuilder & _scale, _cpu_addr, _pad_h, - _zero_point); + _zero_point, + _hidden, + _index); } inline CpuConstT *CpuConst::UnPack(const flatbuffers::resolver_function_t *_resolver) const { @@ -2825,6 +3543,7 @@ inline void CpuParam::UnPackTo(CpuParamT *_o, const flatbuffers::resolver_functi { auto _e = op_type(); _o->op_type = _e; }; { auto _e = binary_param(); if (_e) _o->binary_param = std::unique_ptr(new Binary(*_e)); }; { auto _e = cpu_const(); if (_e) { _o->cpu_const.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->cpu_const[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; + { auto _e = is_custom(); _o->is_custom = _e; }; } inline flatbuffers::Offset CpuParam::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CpuParamT* _o, const flatbuffers::rehasher_function_t *_rehasher) { @@ -2838,11 +3557,13 @@ inline flatbuffers::Offset CreateCpuParam(flatbuffers::FlatBufferBuild auto _op_type = _o->op_type; auto _binary_param = _o->binary_param ? _o->binary_param.get() : 0; auto _cpu_const = _o->cpu_const.size() ? _fbb.CreateVector> (_o->cpu_const.size(), [](size_t i, _VectorArgs *__va) { return CreateCpuConst(*__va->__fbb, __va->__o->cpu_const[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _is_custom = _o->is_custom; return bmodel::CreateCpuParam( _fbb, _op_type, _binary_param, - _cpu_const); + _cpu_const, + _is_custom); } inline OutputFromT *OutputFrom::UnPack(const flatbuffers::resolver_function_t *_resolver) const { @@ -2949,6 +3670,7 @@ inline void SubNet::UnPackTo(SubNetT *_o, const flatbuffers::resolver_function_t { auto _e = next_subnet_ids(); if (_e) { _o->next_subnet_ids.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->next_subnet_ids[_i] = _e->Get(_i); } } }; { auto _e = merge_param(); if (_e) _o->merge_param = std::unique_ptr(_e->UnPack(_resolver)); }; { auto _e = switch_param(); if (_e) _o->switch_param = std::unique_ptr(_e->UnPack(_resolver)); }; + { auto _e = core_commands(); if (_e) { _o->core_commands.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->core_commands[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; } inline flatbuffers::Offset SubNet::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubNetT* _o, const flatbuffers::rehasher_function_t *_rehasher) { @@ -2973,6 +3695,7 @@ inline flatbuffers::Offset CreateSubNet(flatbuffers::FlatBufferBuilder & auto _next_subnet_ids = _o->next_subnet_ids.size() ? _fbb.CreateVector(_o->next_subnet_ids) : 0; auto _merge_param = _o->merge_param ? CreateMergeParam(_fbb, _o->merge_param.get(), _rehasher) : 0; auto _switch_param = _o->switch_param ? CreateSwitchParam(_fbb, _o->switch_param.get(), _rehasher) : 0; + auto _core_commands = _o->core_commands.size() ? _fbb.CreateVector> (_o->core_commands.size(), [](size_t i, _VectorArgs *__va) { return CreateCoreCommands(*__va->__fbb, __va->__o->core_commands[i].get(), __va->__rehasher); }, &_va ) : 0; return bmodel::CreateSubNet( _fbb, _subnet_mode, @@ -2988,7 +3711,8 @@ inline flatbuffers::Offset CreateSubNet(flatbuffers::FlatBufferBuilder & _id, _next_subnet_ids, _merge_param, - _switch_param); + _switch_param, + _core_commands); } inline NetStaticT *NetStatic::UnPack(const flatbuffers::resolver_function_t *_resolver) const { @@ -3116,6 +3840,10 @@ inline void NetParameter::UnPackTo(NetParameterT *_o, const flatbuffers::resolve { auto _e = cpu_mem_size(); _o->cpu_mem_size = _e; }; { auto _e = ctx_sizes(); if (_e) { _o->ctx_sizes.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->ctx_sizes[_i] = _e->Get(_i); } } }; { auto _e = net_stat(); if (_e) _o->net_stat = std::unique_ptr(new Binary(*_e)); }; + { auto _e = core_num(); _o->core_num = _e; }; + { auto _e = io_addr(); _o->io_addr = _e; }; + { auto _e = io_size(); _o->io_size = _e; }; + { auto _e = tensor_loc(); if (_e) _o->tensor_loc = std::unique_ptr(new Binary(*_e)); }; } inline flatbuffers::Offset NetParameter::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NetParameterT* _o, const flatbuffers::rehasher_function_t *_rehasher) { @@ -3142,6 +3870,10 @@ inline flatbuffers::Offset CreateNetParameter(flatbuffers::FlatBuf auto _cpu_mem_size = _o->cpu_mem_size; auto _ctx_sizes = _o->ctx_sizes.size() ? _fbb.CreateVector(_o->ctx_sizes) : 0; auto _net_stat = _o->net_stat ? _o->net_stat.get() : 0; + auto _core_num = _o->core_num; + auto _io_addr = _o->io_addr; + auto _io_size = _o->io_size; + auto _tensor_loc = _o->tensor_loc ? _o->tensor_loc.get() : 0; return bmodel::CreateNetParameter( _fbb, _input_tensor, @@ -3159,7 +3891,43 @@ inline flatbuffers::Offset CreateNetParameter(flatbuffers::FlatBuf _sub_net, _cpu_mem_size, _ctx_sizes, - _net_stat); + _net_stat, + _core_num, + _io_addr, + _io_size, + _tensor_loc); +} + +inline CascadeT *Cascade::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new CascadeT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void Cascade::UnPackTo(CascadeT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = device_id(); _o->device_id = _e; }; + { auto _e = step(); _o->step = _e; }; + { auto _e = main_name(); if (_e) _o->main_name = _e->str(); }; +} + +inline flatbuffers::Offset Cascade::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CascadeT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateCascade(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateCascade(flatbuffers::FlatBufferBuilder &_fbb, const CascadeT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CascadeT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _device_id = _o->device_id; + auto _step = _o->step; + auto _main_name = _o->main_name.empty() ? 0 : _fbb.CreateString(_o->main_name); + return bmodel::CreateCascade( + _fbb, + _device_id, + _step, + _main_name); } inline NetT *Net::UnPack(const flatbuffers::resolver_function_t *_resolver) const { @@ -3175,6 +3943,8 @@ inline void Net::UnPackTo(NetT *_o, const flatbuffers::resolver_function_t *_res { auto _e = net_static(); if (_e) { _o->net_static.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->net_static[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; { auto _e = net_dynamic(); if (_e) { _o->net_dynamic.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->net_dynamic[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; { auto _e = parameter(); if (_e) { _o->parameter.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->parameter[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; + { auto _e = cascade(); if (_e) _o->cascade = std::unique_ptr(_e->UnPack(_resolver)); }; + { auto _e = addr_mode(); _o->addr_mode = _e; }; } inline flatbuffers::Offset Net::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NetT* _o, const flatbuffers::rehasher_function_t *_rehasher) { @@ -3189,12 +3959,16 @@ inline flatbuffers::Offset CreateNet(flatbuffers::FlatBufferBuilder &_fbb, auto _net_static = _o->net_static.size() ? _fbb.CreateVector> (_o->net_static.size(), [](size_t i, _VectorArgs *__va) { return CreateNetStatic(*__va->__fbb, __va->__o->net_static[i].get(), __va->__rehasher); }, &_va ) : 0; auto _net_dynamic = _o->net_dynamic.size() ? _fbb.CreateVector> (_o->net_dynamic.size(), [](size_t i, _VectorArgs *__va) { return CreateNetDynamic(*__va->__fbb, __va->__o->net_dynamic[i].get(), __va->__rehasher); }, &_va ) : 0; auto _parameter = _o->parameter.size() ? _fbb.CreateVector> (_o->parameter.size(), [](size_t i, _VectorArgs *__va) { return CreateNetParameter(*__va->__fbb, __va->__o->parameter[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _cascade = _o->cascade ? CreateCascade(_fbb, _o->cascade.get(), _rehasher) : 0; + auto _addr_mode = _o->addr_mode; return bmodel::CreateNet( _fbb, _name, _net_static, _net_dynamic, - _parameter); + _parameter, + _cascade, + _addr_mode); } inline KernelModuleT *KernelModule::UnPack(const flatbuffers::resolver_function_t *_resolver) const { @@ -3226,6 +4000,35 @@ inline flatbuffers::Offset CreateKernelModule(flatbuffers::FlatBuf _binary); } +inline CpuopModuleT *CpuopModule::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new CpuopModuleT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void CpuopModule::UnPackTo(CpuopModuleT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = file_name(); if (_e) _o->file_name = _e->str(); }; + { auto _e = binary(); if (_e) _o->binary = std::unique_ptr(new Binary(*_e)); }; +} + +inline flatbuffers::Offset CpuopModule::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CpuopModuleT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateCpuopModule(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateCpuopModule(flatbuffers::FlatBufferBuilder &_fbb, const CpuopModuleT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CpuopModuleT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _file_name = _fbb.CreateString(_o->file_name); + auto _binary = _o->binary ? _o->binary.get() : 0; + return bmodel::CreateCpuopModule( + _fbb, + _file_name, + _binary); +} + inline ModelT *Model::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new ModelT(); UnPackTo(_o, _resolver); @@ -3242,6 +4045,8 @@ inline void Model::UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t * { auto _e = net(); if (_e) { _o->net.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->net[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; { auto _e = neuron_size(); _o->neuron_size = _e; }; { auto _e = kernel_module(); if (_e) _o->kernel_module = std::unique_ptr(_e->UnPack(_resolver)); }; + { auto _e = device_num(); _o->device_num = _e; }; + { auto _e = cpuop_module(); if (_e) _o->cpuop_module = std::unique_ptr(_e->UnPack(_resolver)); }; } inline flatbuffers::Offset Model::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT* _o, const flatbuffers::rehasher_function_t *_rehasher) { @@ -3259,6 +4064,8 @@ inline flatbuffers::Offset CreateModel(flatbuffers::FlatBufferBuilder &_f auto _net = _fbb.CreateVector> (_o->net.size(), [](size_t i, _VectorArgs *__va) { return CreateNet(*__va->__fbb, __va->__o->net[i].get(), __va->__rehasher); }, &_va ); auto _neuron_size = _o->neuron_size; auto _kernel_module = _o->kernel_module ? CreateKernelModule(_fbb, _o->kernel_module.get(), _rehasher) : 0; + auto _device_num = _o->device_num; + auto _cpuop_module = _o->cpuop_module ? CreateCpuopModule(_fbb, _o->cpuop_module.get(), _rehasher) : 0; return bmodel::CreateModel( _fbb, _type, @@ -3267,7 +4074,9 @@ inline flatbuffers::Offset CreateModel(flatbuffers::FlatBufferBuilder &_f _chip, _net, _neuron_size, - _kernel_module); + _kernel_module, + _device_num, + _cpuop_module); } inline const bmodel::Model *GetModel(const void *buf) { diff --git a/tpu-bmodel/model/model.fbs b/tpu-bmodel/model/model.fbs index 46de8e9..d7f2736 100644 --- a/tpu-bmodel/model/model.fbs +++ b/tpu-bmodel/model/model.fbs @@ -25,14 +25,21 @@ table Shape { } table CmdGroup { - bdc_num:uint32 (id: 0); // m_bdc_group_id_v - gdma_num:uint32 (id: 1); // m_gdma_group_id_v - binary_bdc:Binary (id: 2); // bdc binary data - binary_gdma:Binary (id: 3); // gdma binary data + bdc_num:uint32 (id: 0); // m_bdc_group_id_v + gdma_num:uint32 (id: 1); // m_gdma_group_id_v + binary_bdc:Binary (id: 2); // bdc binary data + binary_gdma:Binary (id: 3); // gdma binary data bdc_cmd_byte:uint32 (id: 4); // m_bdc_cmd_byte_v gdma_cmd_byte:uint32 (id: 5); // m_gdma_cmd_byte_v } +table CoreCommands { + gdma_tiu_commands:[CmdGroup] (id: 0); /* tpu command list with multiple id sync groups */ + sdma_commands: [Binary] (id: 1); /* sdma command list */ + hau_commands: [Binary] (id: 2); /* hau command list */ + cdma_commands: [Binary] (id: 3); /* 6x cdma command list for each direction */ +} + table StageIR { ir_info_len:uint32 (id: 0); // ir_info_len_vv height_high:int32 (id: 1); // stage_param_vv @@ -41,10 +48,17 @@ table StageIR { width_low:int32 (id: 4); } +table Location { + name:string (required, id:0); // operation name + offset:uint64 (id:1); // offset in binary_coeff + size:uint64 (id:2); // coeff size of operation +} + table CoeffMem { address:uint64 (id: 0); check_code:[uint8] (id: 1); // sha256 for check binary binary_coeff:Binary (id: 2); // coeff binary data + location:[Location] (id: 3); // each location in binary for coeff of operations } table Tensor { @@ -62,6 +76,8 @@ table Tensor { cpu_addr:uint32 (id: 8); // tensor cpu mem. for cpu layer, recode every tensor's offset pad_h:uint32 (id: 9); // this item is for conv 3ic(hack for BM1684 to improve efficiency): higher 16bit: pad_h_top, lower 16bit: pad_h_down zero_point:int32 = 0 (id: 10); // zero point for requant or dequant + hidden:int32 (id: 11); // hidden tensor, for cascade model. 0:hidden;1:input;2:output + index:int32 (id: 12); // input or output index } table CpuConst { @@ -74,6 +90,7 @@ table CpuParam { op_type:int32 (id: 0); /* cpu layer op type */ binary_param:Binary (id: 1); /* cpu layer paramter */ cpu_const:[CpuConst] (id: 2); + is_custom:int32 (id: 3); /* is cpu layer custom */ } table OutputFrom { @@ -85,7 +102,7 @@ table MergeParam { } table SwitchParam { - output_from: [int32] (id: 0); /* len(output_from)==len(output_tensor), */ + output_from: [int32] (id: 0); /* len(output_from)==len(output_tensor), */ output_branch: [int32] (id: 1); /* 0: false branch, 1: true branch */ } @@ -104,6 +121,7 @@ table SubNet { next_subnet_ids:[int32] (id: 11); /* next subnet ids for running: -1 means invalid branch, empty means end */ merge_param: MergeParam (id: 12); switch_param: SwitchParam (id: 13); + core_commands: [CoreCommands] (id: 14); } table NetStatic { // for old bmodel, not use any more @@ -152,6 +170,20 @@ table NetParameter { // record net stat_gen info for simulate, optional net_stat:Binary (id: 15); + + // The net may be deployed on multi cores + core_num: uint32 (id: 16); + + // io address alone + io_addr:uint64 (id: 17); + io_size:uint64 (id: 18); + tensor_loc:Binary (id: 19); +} + +table Cascade { + device_id:uint32 (id: 0); // which device to run + step:uint32 (id: 1); // step to run: if multi, run in parallel; if none, end + main_name:string (id: 2); // net belong to } table Net { @@ -159,6 +191,9 @@ table Net { net_static:[NetStatic] (id: 1); // for old bmodel, not use any more net_dynamic:[NetDynamic] (id: 2); // for old bmodel, not use any more parameter:[NetParameter] (id: 3); // one net can one or more stages + cascade:Cascade (id: 4); // may run in multi devices or multi steps + addr_mode:int32 (id: 5); // 0: basic mode, io and neuron mem alloc together by runtime + // 1: io alone mode, io mem and neuron mem alloc seperated by runtime } table KernelModule { @@ -166,6 +201,11 @@ table KernelModule { binary:Binary (required, id: 1); } +table CpuopModule { + file_name:string (required, id: 0); + binary:Binary (required, id: 1); +} + table Model { type:string (required, id: 0); version:string (required, id: 1); @@ -174,6 +214,8 @@ table Model { net:[Net] (required, id: 4); neuron_size:uint64 (id: 5); // max neuron size kernel_module:KernelModule (id: 6); + device_num:uint32 (id: 7); // The model may be run in multi devices + cpuop_module:CpuopModule (id: 8); } root_type Model; diff --git a/tpu-bmodel/src/bmodel.cpp b/tpu-bmodel/src/bmodel.cpp old mode 100644 new mode 100755 index 83cf6cf..91e07bb --- a/tpu-bmodel/src/bmodel.cpp +++ b/tpu-bmodel/src/bmodel.cpp @@ -45,6 +45,7 @@ ModelGen::ModelGen(uint32_t reserved_size) { binary_.reserve(reserved_size); max_neuron_size_ = 0; + num_device_ = 0; } FlatBufferBuilder &ModelGen::Builder() @@ -81,8 +82,25 @@ void ModelGen::AddNet(const flatbuffers::Offset &net) nets_.push_back(net); } +void ModelGen::AddNet(const std::string &net_name, + const CASCADE_INFO_T &cascade, + const flatbuffers::Offset ¶meter, + int32_t addr_mode) { + auto net_new = reinterpret_cast( + builder_.GetCurrentBufferPointer() + builder_.GetSize() - parameter.o); + if (net_new->ctx_size() > max_neuron_size_) { + max_neuron_size_ = net_new->ctx_size(); + } + NET_INFO_T net_info; + net_info.name = net_name; + net_info.cascade = cascade; + net_info.parameters.push_back(parameter); + net_info.addr_mode = addr_mode; + net_vector_.push_back(net_info); +} + void ModelGen::AddNet(string net_name, const Offset ¶meter, uint32_t *net_idx, - uint32_t *stage_idx) + uint32_t *stage_idx, const bmodel::Cascade * cascade, int32_t addr_mode) { ASSERT(net_name.empty() == false); auto net_new = reinterpret_cast(builder_.GetCurrentBufferPointer() + @@ -99,15 +117,30 @@ void ModelGen::AddNet(string net_name, const Offset ¶meter, ui if (net_idx != NULL) { *net_idx = idx; } + if (cascade != NULL) { + // cascade not support multi stage + ASSERT(idx == net_vector_.size()); + } if (idx == net_vector_.size()) { // if not found NET_INFO_T net_info; net_info.name = net_name; net_info.parameters.push_back(parameter); + net_info.addr_mode = addr_mode; + if (cascade) { + net_info.cascade.main_name = cascade->main_name()->str(); + net_info.cascade.device_id = cascade->device_id(); + net_info.cascade.step = cascade->step(); + } net_vector_.push_back(net_info); if (stage_idx != NULL) { *stage_idx = 0; } } else { // if found + if (net_vector_[idx].addr_mode != addr_mode) { + BMODEL_LOG(FATAL) << "net[" << net_name + << "] addr_mode should be the same" << std::endl; + exit(-1); + } auto ¶meters = net_vector_[idx].parameters; for (auto &net_offset : parameters) { // check whether conflict @@ -216,11 +249,18 @@ void ModelGen::AddChip(const std::string &arch_name) chip_ = arch_name; } +void ModelGen::AddNumDevice(int num_device) { num_device_ = num_device; } + void ModelGen::AddKernelModule(std::string &file_name, Binary &tpu_module) { kernel_module_.file_name = file_name; kernel_module_.binary = tpu_module; } +void ModelGen::AddCpuModule(std::string &file_name, Binary &cpu_module) { + cpuop_module_.file_name = file_name; + cpuop_module_.binary = cpu_module; +} + void ModelGen::Finish(const string &filename) { this->Finish(); @@ -237,11 +277,21 @@ size_t ModelGen::Finish() } ASSERT(parameter.IsNull() == false); - + flatbuffers::Offset cascade = 0; + if (net_info.cascade.main_name.empty() == false) { + auto main_name = builder_.CreateString(net_info.cascade.main_name); + bmodel::CascadeBuilder cb(builder_); + cb.add_device_id(net_info.cascade.device_id); + cb.add_step(net_info.cascade.step); + cb.add_main_name(main_name); + cascade = cb.Finish(); + } auto net_name = builder_.CreateString(net_info.name); bmodel::NetBuilder nb(builder_); nb.add_name(net_name); + nb.add_cascade(cascade); nb.add_parameter(parameter); + nb.add_addr_mode(net_info.addr_mode); nets_.push_back(nb.Finish()); } if (nets_.empty()) { @@ -259,13 +309,19 @@ size_t ModelGen::Finish() // kernel_module related flatbuffers::Offset kernel_module; - if (chip_ == "BM1684X") { - auto module_name = builder_.CreateString(kernel_module_.file_name); - bmodel::KernelModuleBuilder kb(builder_); - kb.add_file_name(module_name); - kb.add_binary(&kernel_module_.binary); - kernel_module = kb.Finish(); - } + auto module_name = builder_.CreateString(kernel_module_.file_name); + bmodel::KernelModuleBuilder kb(builder_); + kb.add_file_name(module_name); + kb.add_binary(&kernel_module_.binary); + kernel_module = kb.Finish(); + + // cpuop_module related + flatbuffers::Offset cpuop_module; + auto cpu_module_name = builder_.CreateString(cpuop_module_.file_name); + bmodel::CpuopModuleBuilder cb(builder_); + cb.add_file_name(cpu_module_name); + cb.add_binary(&cpuop_module_.binary); + cpuop_module = cb.Finish(); bmodel::ModelBuilder mb(builder_); mb.add_chip(chip); @@ -274,10 +330,9 @@ size_t ModelGen::Finish() mb.add_version(version); mb.add_net(net); mb.add_neuron_size(max_neuron_size_); - - if (chip_ == "BM1684X") { - mb.add_kernel_module(kernel_module); - } + mb.add_kernel_module(kernel_module); + mb.add_device_num(num_device_); + mb.add_cpuop_module(cpuop_module); auto model = mb.Finish(); builder_.Finish(model); @@ -330,16 +385,16 @@ void ModelGen::Save(void *buffer) ModelCtx::ModelCtx(const string &filename) : model_gen_(NULL), model_(NULL), bmodel_pointer_(NULL) { // read file - file_.open(filename, std::ios::binary | std::ios::in); + file_.open(filename, std::ios::binary | std::ios::in | std::ios::out); if (!file_) { BMODEL_LOG(FATAL) << "File[" << filename << "] open failed." << std::endl; - exit(-1); + throw std::runtime_error("failed to construct"); } file_.seekg(0, std::ios::end); size_t length = file_.tellg(); if (length <= sizeof(header_)) { BMODEL_LOG(FATAL) << "File[" << filename << "] is broken ." << std::endl; - exit(-1); + throw std::runtime_error("failed to construct"); } file_.seekg(0, std::ios::beg); @@ -348,11 +403,11 @@ ModelCtx::ModelCtx(const string &filename) : model_gen_(NULL), model_(NULL), bmo file_.read((char *)&header_, sizeof(header_)); if (header_.magic != BMODEL_MAGIC) { BMODEL_LOG(FATAL) << "File[" << filename << "] is broken .." << std::endl; - exit(-1); + throw std::runtime_error("failed to construct"); } if (length < header_.header_size + header_.flatbuffers_size + header_.binary_size) { BMODEL_LOG(FATAL) << "File[" << filename << "] is broken ..." << std::endl; - exit(-1); + throw std::runtime_error("failed to construct"); } binary_offset_ = header_.header_size + header_.flatbuffers_size; model_buffer_ = (void *)malloc(header_.flatbuffers_size); @@ -369,7 +424,7 @@ ModelCtx::ModelCtx(const string &filename) : model_gen_(NULL), model_(NULL), bmo BMODEL_LOG(FATAL) << "Chip: " << model_->chip()->c_str() << std::endl; BMODEL_LOG(FATAL) << "Date: " << model_->time()->c_str() << std::endl; } - exit(-1); + throw std::runtime_error("failed to construct"); } model_ = bmodel::GetModel(model_buffer_); ASSERT(model_ != NULL); @@ -454,7 +509,7 @@ void ModelCtx::read_binary(const Binary *binary, uint8_t *buffer) } // read binary from offset -void ModelCtx::read_binary(const Binary *binary, uint32_t offset, uint8_t *buffer, uint32_t size) +void ModelCtx::read_binary(const Binary *binary, uint64_t offset, uint8_t *buffer, uint64_t size) { ASSERT(binary != NULL); ASSERT(buffer != NULL); @@ -467,6 +522,52 @@ void ModelCtx::read_binary(const Binary *binary, uint32_t offset, uint8_t *buffe } } +void ModelCtx::write_binary(const Binary *binary, uint8_t *buffer) { + write_binary(binary, 0, buffer, binary->size()); +} + +// write buffer to binary offset +void ModelCtx::write_binary(const Binary *binary, uint64_t offset, + uint8_t *buffer, uint64_t size) { + ASSERT(binary != NULL); + ASSERT(buffer != NULL); + ASSERT(size + offset <= binary->size()); + auto offset_file = binary_offset_ + binary->start() + offset; + if (bmodel_pointer_ == NULL) { // from file + file_.seekg(offset_file, std::ios::beg); + file_.write((char *)buffer, size); + } else { // from buffer + memcpy((uint8_t *)bmodel_pointer_ + offset_file, buffer, size); + } +} + +bool ModelCtx::get_weight(const std::string &net_name, int stage_idx, + uint64_t offset, Binary &bin, + std::string &op_name) const { + auto num_net = model_->net()->size(); + for (int i = 0; i < num_net; i++) { + auto param = model_->net()->Get(i)->parameter(); + if (model_->net()->Get(i)->name()->str() == net_name) { + if (stage_idx >= 0 && stage_idx < param->size()) { + auto weight = param->Get(stage_idx)->coeff_mem(); + auto num_weight = weight->location()->size(); + for (int j = 0; j < num_weight; j++) { + auto loc = weight->location()->Get(j); + if (loc->offset() == offset) { + auto weight_bin = weight->binary_coeff(); + op_name = loc->name()->str(); + bin.mutate_start(weight_bin->start() + offset); + bin.mutate_size(loc->size()); + return true; + } + } + } + return false; + } + } + return false; +} + template static Offset Pack(ModelGen *model_gen, const T *item) { @@ -779,6 +880,8 @@ bmodel::bmodel_mem_info_t ModelCtx::get_bmodel_mem_info() memset(&info, 0, sizeof(info)); size_t load_net_num = model()->net()->size(); uint64_t net_max_neuron_size = 0; + std::set> device_check_codes; + std::set> host_check_codes; for (size_t net_idx = 0; net_idx < load_net_num; net_idx++) { auto net_params = model()->net()->Get(net_idx)->parameter(); @@ -786,8 +889,6 @@ bmodel::bmodel_mem_info_t ModelCtx::get_bmodel_mem_info() auto stage_num = net_params->size(); info.neuron_mem_size= 0; - std::set> device_check_codes; - std::set> host_check_codes; uint64_t max_neuron_size = 0; bool multi_subnet = false; for(size_t stage_idx=0; stage_idx < stage_num; stage_idx++){ @@ -827,9 +928,11 @@ bmodel::bmodel_mem_info_t ModelCtx::get_bmodel_mem_info() if(subnet->is_dynamic()){ info.dynamic_ir_mem_size += subnet->ir_len()*sizeof(uint32_t); } else { - int group_num = subnet->cmd_group()->size(); + const auto cmd_groups = subnet->cmd_group() ? subnet->cmd_group() : + subnet->core_commands()->Get(0)->gdma_tiu_commands(); + int group_num = cmd_groups->size(); for (int group_idx = 0; group_idx < group_num; group_idx++) { - auto cmd_group = subnet->cmd_group()->Get(group_idx); + auto cmd_group = cmd_groups->Get(group_idx); // just for bm1684. bm1684x instructions may be of variable length if(model()->chip()->str() == "BM1682"){ info.bd_cmd_mem_size += cmd_group->bdc_num()*(1<<8); @@ -837,6 +940,22 @@ bmodel::bmodel_mem_info_t ModelCtx::get_bmodel_mem_info() } else if(model()->chip()->str() == "BM1684"){ info.bd_cmd_mem_size += cmd_group->bdc_num()*(1<<7); info.gdma_cmd_mem_size += cmd_group->gdma_num()*(1<<7); + } else { + info.bd_cmd_mem_size += cmd_group->binary_bdc()->size(); + info.gdma_cmd_mem_size += cmd_group->binary_gdma()->size(); + } + } + if (model()->chip()->str() == "SG2260") { + for (unsigned int i = 0; i < subnet->core_commands()->size(); ++i) { + auto core_cmd = subnet->core_commands()->Get(i); + if (core_cmd->hau_commands()) { + for (unsigned int j = 0; j < core_cmd->hau_commands()->size(); ++j) + info.hau_cmd_mem_size += core_cmd->hau_commands()->Get(j)->size(); + } + if (core_cmd->sdma_commands()) { + for (unsigned int j = 0; j < core_cmd->sdma_commands()->size(); ++j) + info.sdma_cmd_mem_size += core_cmd->sdma_commands()->Get(j)->size(); + } } } } diff --git a/tpu-bmodel/tools/model_fbs.h b/tpu-bmodel/tools/model_fbs.h index 626e38b..5e6e743 100644 --- a/tpu-bmodel/tools/model_fbs.h +++ b/tpu-bmodel/tools/model_fbs.h @@ -26,14 +26,21 @@ const char * schema_text = "}\n" "\n" "table CmdGroup {\n" -"bdc_num:uint32 (id: 0); // m_bdc_group_id_v\n" -"gdma_num:uint32 (id: 1); // m_gdma_group_id_v\n" -"binary_bdc:Binary (id: 2); // bdc binary data\n" -"binary_gdma:Binary (id: 3); // gdma binary data\n" +"bdc_num:uint32 (id: 0); // m_bdc_group_id_v\n" +"gdma_num:uint32 (id: 1); // m_gdma_group_id_v\n" +"binary_bdc:Binary (id: 2); // bdc binary data\n" +"binary_gdma:Binary (id: 3); // gdma binary data\n" "bdc_cmd_byte:uint32 (id: 4); // m_bdc_cmd_byte_v\n" "gdma_cmd_byte:uint32 (id: 5); // m_gdma_cmd_byte_v\n" "}\n" "\n" +"table CoreCommands {\n" +"gdma_tiu_commands:[CmdGroup] (id: 0); /* tpu command list with multiple id sync groups */\n" +"sdma_commands: [Binary] (id: 1); /* sdma command list */\n" +"hau_commands: [Binary] (id: 2); /* hau command list */\n" +"cdma_commands: [Binary] (id: 3); /* 6x cdma command list for each direction */\n" +"}\n" +"\n" "table StageIR {\n" "ir_info_len:uint32 (id: 0); // ir_info_len_vv\n" "height_high:int32 (id: 1); // stage_param_vv\n" @@ -42,10 +49,17 @@ const char * schema_text = "width_low:int32 (id: 4);\n" "}\n" "\n" +"table Location {\n" +"name:string (required, id:0); // operation name\n" +"offset:uint64 (id:1); // offset in binary_coeff\n" +"size:uint64 (id:2); // coeff size of operation\n" +"}\n" +"\n" "table CoeffMem {\n" "address:uint64 (id: 0);\n" "check_code:[uint8] (id: 1); // sha256 for check binary\n" "binary_coeff:Binary (id: 2); // coeff binary data\n" +"location:[Location] (id: 3); // each location in binary for coeff of operations\n" "}\n" "\n" "table Tensor {\n" @@ -63,6 +77,8 @@ const char * schema_text = "cpu_addr:uint32 (id: 8); // tensor cpu mem. for cpu layer, recode every tensor's offset\n" "pad_h:uint32 (id: 9); // this item is for conv 3ic(hack for BM1684 to improve efficiency): higher 16bit: pad_h_top, lower 16bit: pad_h_down\n" "zero_point:int32 = 0 (id: 10); // zero point for requant or dequant\n" +"hidden:int32 (id: 11); // hidden tensor, for cascade model. 0:hidden;1:input;2:output\n" +"index:int32 (id: 12); // input or output index\n" "}\n" "\n" "table CpuConst {\n" @@ -75,6 +91,7 @@ const char * schema_text = "op_type:int32 (id: 0); /* cpu layer op type */\n" "binary_param:Binary (id: 1); /* cpu layer paramter */\n" "cpu_const:[CpuConst] (id: 2);\n" +"is_custom:int32 (id: 3); /* is cpu layer custom */\n" "}\n" "\n" "table OutputFrom {\n" @@ -86,7 +103,7 @@ const char * schema_text = "}\n" "\n" "table SwitchParam {\n" -"output_from: [int32] (id: 0); /* len(output_from)==len(output_tensor), */\n" +"output_from: [int32] (id: 0); /* len(output_from)==len(output_tensor), */\n" "output_branch: [int32] (id: 1); /* 0: false branch, 1: true branch */\n" "}\n" "\n" @@ -105,6 +122,7 @@ const char * schema_text = "next_subnet_ids:[int32] (id: 11); /* next subnet ids for running: -1 means invalid branch, empty means end */\n" "merge_param: MergeParam (id: 12);\n" "switch_param: SwitchParam (id: 13);\n" +"core_commands: [CoreCommands] (id: 14);\n" "}\n" "\n" "table NetStatic { // for old bmodel, not use any more\n" @@ -153,6 +171,20 @@ const char * schema_text = "\n" "// record net stat_gen info for simulate, optional\n" "net_stat:Binary (id: 15);\n" +"\n" +"// The net may be deployed on multi cores\n" +"core_num: uint32 (id: 16);\n" +"\n" +"// io address alone\n" +"io_addr:uint64 (id: 17);\n" +"io_size:uint64 (id: 18);\n" +"tensor_loc:Binary (id: 19);\n" +"}\n" +"\n" +"table Cascade {\n" +"device_id:uint32 (id: 0); // which device to run\n" +"step:uint32 (id: 1); // step to run: if multi, run in parallel; if none, end\n" +"main_name:string (id: 2); // net belong to\n" "}\n" "\n" "table Net {\n" @@ -160,6 +192,9 @@ const char * schema_text = "net_static:[NetStatic] (id: 1); // for old bmodel, not use any more\n" "net_dynamic:[NetDynamic] (id: 2); // for old bmodel, not use any more\n" "parameter:[NetParameter] (id: 3); // one net can one or more stages\n" +"cascade:Cascade (id: 4); // may run in multi devices or multi steps\n" +"addr_mode:int32 (id: 5); // 0: basic mode, io and neuron mem alloc together by runtime\n" +"// 1: io alone mode, io mem and neuron mem alloc seperated by runtime\n" "}\n" "\n" "table KernelModule {\n" @@ -167,6 +202,11 @@ const char * schema_text = "binary:Binary (required, id: 1);\n" "}\n" "\n" +"table CpuopModule {\n" +"file_name:string (required, id: 0);\n" +"binary:Binary (required, id: 1);\n" +"}\n" +"\n" "table Model {\n" "type:string (required, id: 0);\n" "version:string (required, id: 1);\n" @@ -175,6 +215,8 @@ const char * schema_text = "net:[Net] (required, id: 4);\n" "neuron_size:uint64 (id: 5); // max neuron size\n" "kernel_module:KernelModule (id: 6);\n" +"device_num:uint32 (id: 7); // The model may be run in multi devices\n" +"cpuop_module:CpuopModule (id: 8);\n" "}\n" "\n" "root_type Model;\n" diff --git a/tpu-bmodel/tools/tpu_model.cpp b/tpu-bmodel/tools/tpu_model.cpp index 59ddbcb..1a89651 100644 --- a/tpu-bmodel/tools/tpu_model.cpp +++ b/tpu-bmodel/tools/tpu_model.cpp @@ -39,6 +39,8 @@ static void usage(char *argv[]) cout << " " << argv[0] << endl << " --info model_file : show brief model info" << endl << " --print model_file : show detailed model info" << endl + << " --weight model_file : show model weight info" << endl + << " --update_weight dst_model dst_net dst_offset src_model src_net src_offset" << endl << " --extract model_file : extract one multi-net bmodel to multi one-net bmodels" << endl << " --combine file1 .. fileN -o new_file: combine bmodels to one bmodel by filepath" << endl << " --combine_dir dir1 .. dirN -o new_dir: combine bmodels to one bmodel by directory path" << endl @@ -46,6 +48,7 @@ static void usage(char *argv[]) << " --version show tool version" << endl << " --kernel_dump model_file -o kernel_file_name : dump kernel_module file" << endl << " --kernel_update model_file kernel_name : add/update kernel_module file" << endl + << " --custom_ap_update model_file libcpuop_file : add/update custom libcpuop file" << endl << endl; } @@ -71,8 +74,10 @@ static void print(const string &filename) cout << json_text << endl; } -static const char *type_name_array[] = {"float32", "float16", "int8", "uint8", "int16", "uint16", "int32", "uint32"}; -static const int type_size_array[] = {4, 2, 1, 1, 2, 2, 4, 4}; +static const char *type_name_array[] = { + "float32", "float16", "int8", "uint8", "int16", "uint16", + "int32", "uint32", "bfloat16", "int4", "uint4"}; +static const float type_size_array[] = {4, 2, 1, 1, 2, 2, 4, 4, 2, 0.5, 0.5}; static const int DATA_TYPE_NUM = sizeof(type_name_array) / sizeof(char *); static const char *type_name(uint32_t data_type) @@ -136,6 +141,20 @@ static void show(const NetParameter *parameter, bool dynamic = false) } } +static void reorder(std::vector> &tensors) { + std::sort(tensors.begin(), tensors.end(), + [](const std::pair &a, + const std::pair &b) { + if (a.first < b.first) { + return true; + } else if (a.first > b.first) { + return false; + } else { + return a.second->index() <= b.second->index(); + } + }); +} + // print brief model info static void show(const string &filename) { @@ -145,8 +164,11 @@ static void show(const string &filename) } auto model = model_ctx.model(); cout << "bmodel version: " << model->type()->c_str() << "." << model->version()->c_str() << endl; - cout << "chip: " << model->chip()->c_str() << endl; - cout << "create time: " << model->time()->c_str() << endl; + cout << "chip: " << model->chip()->c_str(); + if (model->device_num() > 1) { + cout << ", device num: " << model->device_num(); + } + cout << "\ncreate time: " << model->time()->c_str() << endl; // kernel_module info auto kernel_module = model->kernel_module(); @@ -163,8 +185,39 @@ static void show(const string &filename) cout << "kernel_module md5: " << module_md5 << endl; } + // cpuop_module info + auto cpuop_module = model->cpuop_module(); + if (!cpuop_module) { + cout << "cpuop_module: not found!" << endl; + } else { + auto module_binary = cpuop_module->binary(); + size_t module_size = module_binary->size(); + std::unique_ptr binary(new uint8_t[module_size]); + model_ctx.read_binary(module_binary, binary.get()); + string module_md5 = gen_md5_string((unsigned char *)binary.get(), module_size); + cout << "cpuop_module name: " << cpuop_module->file_name()->c_str() << endl; + cout << "cpuop_module size: " << module_size << endl; + cout << "cpuop_module md5: " << module_md5 << endl; + } + + std::map>> cascade_nets; for (uint32_t idx = 0; idx < model->net()->size(); idx++) { auto net = model->net()->Get(idx); + auto cascade = net->cascade(); + if (cascade) { + auto main_name = cascade->main_name()->str(); + if (!main_name.empty()) { + auto it = cascade_nets.find(main_name); + if (it != cascade_nets.end()) { + it->second->push_back(idx); + } else { + auto net_idx = std::make_shared>(); + net_idx->push_back(idx); + cascade_nets[main_name] = net_idx; + } + continue; + } + } auto parameter = net->parameter(); if (parameter == NULL || parameter->size() == 0) { continue; @@ -173,36 +226,173 @@ static void show(const string &filename) string net_type = is_dynamic ? "dynamic" : "static"; cout << "==========================================" << endl; cout << "net " << idx << ": [" << net->name()->c_str() << "] " << net_type << endl; + if (net->addr_mode()) { + cout << "addr mode: " << net->addr_mode() << endl; + } for (uint32_t i = 0; i < parameter->size(); i++) { auto net_param = parameter->Get(i); auto subnet = net_param->sub_net(); + const int core_num = net_param->core_num() == 0 ? 1 : net_param->core_num(); cout << "------------" << endl; - cout << "stage " << i << ":" << endl; + cout << "stage " << i << ", core num: " << core_num << endl; if (subnet != NULL && subnet->size() > 1) { cout << "subnet number: " << subnet->size() << endl; } show(parameter->Get(i), is_dynamic); } - auto mem_info = model_ctx.get_bmodel_mem_info(); - cout << std::endl; - cout << "device mem size: "<< - mem_info.coeff_mem_size + - mem_info.neuron_mem_size + - mem_info.bd_cmd_mem_size + - mem_info.gdma_cmd_mem_size + - mem_info.middle_buffer_size + - mem_info.dynamic_ir_mem_size - << " (coeff: "<> ins; + std::vector> outs; + for (auto idx : *it.second) { + auto net = model->net()->Get(idx); + auto parameter = net->parameter()->Get(0); + int devid = net->cascade()->device_id(); + auto input_tensors = parameter->input_tensor(); + auto output_tensors = parameter->output_tensor(); + for (uint32_t idx = 0; idx < input_tensors->size(); idx++) { + auto in = input_tensors->Get(idx); + if (in->hidden() == 1 || in->hidden() == 3) { + ins.push_back({devid, in}); + } else if (in->hidden() == 2 || in->hidden() == 4) { + outs.push_back({devid, in}); + } + } + for (uint32_t idx = 0; idx < output_tensors->size(); idx++) { + auto out = output_tensors->Get(idx); + if (out->hidden() == 1 || out->hidden() == 3) { + ins.push_back({devid, out}); + } else if (out->hidden() == 2 || out->hidden() == 4) { + outs.push_back({devid, out}); + } + } + } + reorder(ins); + reorder(outs); + for (auto &in : ins) { + cout << tensor_str(in.second, false); + } + for (auto &out : outs) { + cout << tensor_str(out.second, true); + } + } + cout << std::endl; + auto mem_info = model_ctx.get_bmodel_mem_info(); + cout << std::endl; + cout << "device mem size: "<< + mem_info.coeff_mem_size + + mem_info.neuron_mem_size + + mem_info.bd_cmd_mem_size + + mem_info.gdma_cmd_mem_size + + mem_info.hau_cmd_mem_size + + mem_info.sdma_cmd_mem_size + + mem_info.middle_buffer_size + + mem_info.dynamic_ir_mem_size + << " (weight: "<net()->size(); + for (int i = 0; i < num_net; i++) { + auto net = model->net()->Get(i); + auto num_stage = model->net()->Get(i)->parameter()->size(); + for (int j = 0; j < num_stage; j++) { + auto param = model->net()->Get(i)->parameter()->Get(j); + auto coeff = param->coeff_mem(); + if (coeff == nullptr) { + continue; + } + auto location = coeff->location(); + if (location == nullptr || location->size() == 0) { + continue; + } + printf("net %d : \"%s\", stage:%d\n", i, net->name()->c_str(), j); + cout << "-------------------------------" << endl; + for (int k = 0; k < location->size(); k++) { + auto info = location->Get(k); + printf("%s : [0x%lx, 0x%lx)\n", info->name()->c_str(), info->offset(), + info->offset() + info->size()); + } + cout << "==========================================" << endl; + } + } +} + +// read binary from bmodel +static uint64_t str2ull(const char *str) { + string ull_str(str); + if (ull_str.empty()) { + return 0; + } + if (ull_str.compare(0, 2, "0x") == 0 || ull_str.compare(0, 2, "0X") == 0) { + return strtoull(ull_str.c_str(), 0, 16); + } else { + return strtoull(ull_str.c_str(), 0, 10); + } +} + +static void update_weight(int argc, char **argv) { + if (argc != 8) { + FATAL("parameters are not correct"); + } + auto dst_model = argv[2]; + auto dst_net = argv[3]; + auto dst_offset = str2ull(argv[4]); + auto src_model = argv[5]; + auto src_net = argv[6]; + auto src_offset = str2ull(argv[7]); + printf("read dst model:%s ...\n", dst_model); + ModelCtx dst_model_ctx(dst_model); + if (!dst_model_ctx) { + FATAL("file[%s] is not correct", dst_model); + } + printf("read src model:%s ...\n", src_model); + ModelCtx src_model_ctx(src_model); + if (!src_model_ctx) { + FATAL("file[%s] is not correct", src_model); + } + bmodel::Binary src_bin, dst_bin; + std::string src_name, dst_name; + auto dst_ret = + dst_model_ctx.get_weight(dst_net, 0, dst_offset, dst_bin, dst_name); + if (dst_ret == false || dst_bin.size() == 0) { + FATAL("get dst weight failed by net_name:%s, offset:%lx\n", dst_net, + dst_offset); + } + auto src_ret = + src_model_ctx.get_weight(src_net, 0, src_offset, src_bin, src_name); + if (src_ret == false || src_bin.size() == 0) { + FATAL("get src weight failed by net_name:%s, offset:%lx\n", src_net, + src_offset); + } + if (dst_name != src_name || dst_bin.size() != src_bin.size()) { + FATAL("weight not the same"); + } + printf("update weight ...\n"); + auto src_weight = new uint8_t[src_bin.size()]; + src_model_ctx.read_binary(&src_bin, src_weight); + dst_model_ctx.write_binary(&dst_bin, src_weight); + delete[] src_weight; + printf("update success\n"); } // update binary data when copy one net to new flatbuffers @@ -250,6 +440,7 @@ static void update_table(Table *table, const StructDef *struct_def, ModelGen &mo model_ctx.read_binary(binary, data); auto new_binary = model_gen.WriteBinary(binary->size(), data); binary->mutate_start(new_binary.start()); + delete[] data; } } break; @@ -313,6 +504,7 @@ static void extract(const string &filename) for (uint32_t net_idx = 0; net_idx < model->net()->size(); net_idx++) { auto net = model->net()->Get(net_idx); string net_name = net->name()->str(); + int32_t addr_mode = net->addr_mode(); if (net->parameter() == NULL || net->parameter()->size() == 0) { continue; } @@ -324,7 +516,7 @@ static void extract(const string &filename) auto net_offset = NetParameter::Pack(builder, netT); delete netT; model_gen.AddChip(model->chip()->str()); - model_gen.AddNet(net_name, net_offset); + model_gen.AddNet(net_name, net_offset, NULL, NULL, NULL, addr_mode); model_gen.Finish(); update_model(model_gen, model_ctx); ostringstream filename; @@ -364,12 +556,12 @@ static size_t tensor_bytes(const Vector> * tensor) if (type >= DATA_TYPE_NUM) { FATAL("unknown data type[%u]", type); } - size_t lsize = type_size_array[type]; + float lsize = type_size_array[type]; auto shape = tensor->Get(idx)->shape()->Get(0)->dim(); for (uint32_t i = 0; i < shape->size(); i++) { - lsize *= shape->Get(i); + lsize *= (float)shape->Get(i); } - size += lsize; + size += (size_t)lsize; } return size; } @@ -422,15 +614,25 @@ static void combine_bmodels(ModelGen &model_gen, vector> { model_gen.AddChip(model_vec[0]->model_ctx->model()->chip()->str()); auto &builder = model_gen.Builder(); + uint32_t device_num = 0; for (uint32_t model_idx = 0; model_idx < model_vec.size(); model_idx++) { auto &model_info = model_vec[model_idx]; auto model = model_info->model_ctx->model(); + if (model->device_num() > device_num) { + device_num = model->device_num(); + } for (uint32_t net_idx = 0; net_idx < model->net()->size(); net_idx++) { auto net = model->net()->Get(net_idx); if (net->parameter() == NULL || net->parameter()->size() == 0) { continue; } auto net_name = net->name()->str(); + auto cascade = net->cascade(); + if (cascade) { + // no more stage + assert(net->parameter()->size() == 1); + } + auto addr_mode = net->addr_mode(); for (uint32_t idx = 0; idx < net->parameter()->size(); idx++) { shared_ptr net_idx(new NET_INDEX_T); if (is_dir) { @@ -439,7 +641,8 @@ static void combine_bmodels(ModelGen &model_gen, vector> } auto netT = net->parameter()->Get(idx)->UnPack(); auto net_offset = NetParameter::Pack(builder, netT); - model_gen.AddNet(net_name, net_offset, &net_idx->net_idx, &net_idx->stage_idx); + model_gen.AddNet(net_name, net_offset, &net_idx->net_idx, + &net_idx->stage_idx, cascade, addr_mode); delete netT; model_info->net_index_v.push_back(net_idx); } @@ -454,6 +657,16 @@ static void combine_bmodels(ModelGen &model_gen, vector> auto module_tmp = model_gen.WriteBinary(module_binary->size(), binary.get()); model_gen.AddKernelModule(module_name, module_tmp); } + auto cpuop_module = model_vec[0]->model_ctx->model()->cpuop_module(); + if (cpuop_module) { + auto module_binary = cpuop_module->binary(); + auto module_name = cpuop_module->file_name()->str(); + std::unique_ptr binary(new uint8_t[module_binary->size()]); + model_vec[0]->model_ctx->read_binary(module_binary, binary.get()); + auto module_tmp = model_gen.WriteBinary(module_binary->size(), binary.get()); + model_gen.AddCpuModule(module_name, module_tmp); + } + model_gen.AddNumDevice(device_num); model_gen.Finish(); for (uint32_t idx = 0; idx < model_vec.size(); idx++) { auto &model_info = model_vec[idx]; @@ -555,20 +768,6 @@ static void combine_bmodels(int argc, char **argv, bool is_dir = false) cout << "Success: combined to [" << ofile << "]." << endl; } -// read binary from bmodel -static uint64_t str2ull(const char * str) -{ - string ull_str(str); - if (ull_str.empty()) { - return 0; - } - if (ull_str.compare(0, 2, "0x") == 0 || ull_str.compare(0, 2, "0X") == 0) { - return strtoull(ull_str.c_str(), 0, 16); - } else { - return strtoull(ull_str.c_str(), 0, 10); - } -} - static void dump_binary(int argc, char **argv) { if (argc != 6) { @@ -639,6 +838,7 @@ static void dump_kernel_module(int argc, char **argv) { static void update_kernel(ModelGen &model_gen, shared_ptr& model_info, uint8_t* module_binary, size_t binary_size, string module_name) { model_gen.AddChip(model_info->model_ctx->model()->chip()->str()); + model_gen.AddNumDevice(model_info->model_ctx->model()->device_num()); auto &builder = model_gen.Builder(); auto model = model_info->model_ctx->model(); for (uint32_t net_idx = 0; net_idx < model->net()->size(); net_idx++) { @@ -651,7 +851,13 @@ static void update_kernel(ModelGen &model_gen, shared_ptr& model_in shared_ptr net_idx(new NET_INDEX_T); auto netT = net->parameter()->Get(idx)->UnPack(); auto net_offset = NetParameter::Pack(builder, netT); - model_gen.AddNet(net_name, net_offset, &net_idx->net_idx, &net_idx->stage_idx); + auto cascade = net->cascade(); + if (cascade) { + // no more stage + assert(net->parameter()->size() == 1); + } + model_gen.AddNet(net_name, net_offset, &net_idx->net_idx, + &net_idx->stage_idx, cascade, net->addr_mode()); delete netT; model_info->net_index_v.push_back(net_idx); } @@ -664,6 +870,41 @@ static void update_kernel(ModelGen &model_gen, shared_ptr& model_in } } +static void update_cpuop(ModelGen &model_gen, shared_ptr& model_info, uint8_t* libcpu_binary, size_t binary_size, string libcpu_name) +{ + model_gen.AddChip(model_info->model_ctx->model()->chip()->str()); + model_gen.AddNumDevice(model_info->model_ctx->model()->device_num()); + auto &builder = model_gen.Builder(); + auto model = model_info->model_ctx->model(); + for (uint32_t net_idx = 0; net_idx < model->net()->size(); net_idx++) { + auto net = model->net()->Get(net_idx); + if (net->parameter() == NULL || net->parameter()->size() == 0) { + continue; + } + auto net_name = net->name()->str(); + for (uint32_t idx = 0; idx < net->parameter()->size(); idx++) { + shared_ptr net_idx(new NET_INDEX_T); + auto netT = net->parameter()->Get(idx)->UnPack(); + auto net_offset = NetParameter::Pack(builder, netT); + auto cascade = net->cascade(); + if (cascade) { + // no more stage + assert(net->parameter()->size() == 1); + } + model_gen.AddNet(net_name, net_offset, &net_idx->net_idx, + &net_idx->stage_idx, cascade, net->addr_mode()); + delete netT; + model_info->net_index_v.push_back(net_idx); + } + } + Binary cpuop_module = model_gen.WriteBinary(binary_size, libcpu_binary); + model_gen.AddCpuModule(libcpu_name, cpuop_module); + model_gen.Finish(); + for (auto &net_index : model_info->net_index_v) { + update_net(model_gen, *model_info->model_ctx, net_index->net_idx, net_index->stage_idx); + } +} + static void update_kernel_module(int argc, char **argv) { // tpu_model --kernel_add xx.bmodel xx.so if (argc != 4) { @@ -696,6 +937,36 @@ static void update_kernel_module(int argc, char **argv) { f_kernel.close(); } +static void update_cpuop_module(int argc, char **argv) { + if (argc != 4) { + FATAL("--update_cpuop parameter error."); + } + string model_path = argv[2]; + string libcpu_path = argv[3]; + ifstream f_kernel(libcpu_path, ios::in | ios::binary); + if (!f_kernel) { + FATAL("libcpuop name [%s] is not correct", libcpu_path.c_str()); + } + shared_ptr model_info(new MODEL_CTX_T); + model_info->model_ctx = make_shared(model_path); + if (model_info->model_ctx == NULL || !(*model_info->model_ctx)) { + FATAL("file[%s] is not correct", model_path.c_str()); + } + + f_kernel.seekg(0, f_kernel.end); + int binary_size = f_kernel.tellg(); + f_kernel.seekg(0, f_kernel.beg); + shared_ptr libcpu_binary(new char[binary_size]); + f_kernel.read(libcpu_binary.get(), binary_size); + string libcpu_name = libcpu_path.substr(libcpu_path.find_last_of('/') + 1); + + ModelGen model_gen; + update_cpuop(model_gen, model_info, (uint8_t*)libcpu_binary.get(), binary_size, libcpu_name); + model_gen.Save(model_path); + cout << "Success: update to [" << libcpu_name << "]." << endl; + f_kernel.close(); +} + int main(int argc, char **argv) { if (argc < 2) { @@ -708,6 +979,10 @@ int main(int argc, char **argv) print(argv[2]); } else if (cmd == "--info") { show(argv[2]); + } else if (cmd == "--weight") { + show_weight(argv[2]); + } else if (cmd == "--update_weight") { + update_weight(argc, argv); } else if (cmd == "--extract") { extract(argv[2]); } else if (cmd == "--combine") { @@ -722,6 +997,8 @@ int main(int argc, char **argv) dump_kernel_module(argc, argv); } else if (cmd == "--kernel_update") { update_kernel_module(argc, argv); + } else if (cmd == "--custom_ap_update") { + update_cpuop_module(argc, argv); }else { usage(argv); exit(-1); diff --git a/tpu-cpuop/CMakeLists.txt b/tpu-cpuop/CMakeLists.txt index 9359a7f..7cad24f 100644 --- a/tpu-cpuop/CMakeLists.txt +++ b/tpu-cpuop/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.80) +cmake_minimum_required(VERSION 3.6) project(tpu-cpuop) if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") include(old.cmake) diff --git a/tpu-cpuop/src/cpu.cpp b/tpu-cpuop/src/cpu.cpp index 9052582..042168b 100644 --- a/tpu-cpuop/src/cpu.cpp +++ b/tpu-cpuop/src/cpu.cpp @@ -58,7 +58,7 @@ void* bmcpu_init() if (layer_idx != CPU_USER_DEFINED) { cpu_layers_[layer_idx] = CpuLayerRegistry::createlayer(layer_idx); } else { - printf("bmcpu init: skip cpu_user_defined\n"); + // printf("bmcpu init: skip cpu_user_defined\n"); } } cpu_layers_[CPU_DEBUG] = CpuLayerRegistry::createlayer(CPU_DEBUG); @@ -98,11 +98,11 @@ void* bmcpu_init() CPU_ASSERT(bmcpu_user_reshape_ != NULL); CPU_ASSERT(bmcpu_user_dtype_ != NULL); } else { - #ifdef __linux__ - printf("Cannot open libusercpu.so, disable user cpu layer.\n"); - #else - printf("Cannot open libusercpu.dll, disable user cpu layer.\n"); - #endif + // #ifdef __linux__ + // printf("Cannot open libusercpu.so, disable user cpu layer.\n"); + // #else + // printf("Cannot open libusercpu.dll, disable user cpu layer.\n"); + // #endif } } else { #ifdef __linux__ diff --git a/tpu-cpuop/src/cpu_adaptive_average_pooling.cpp b/tpu-cpuop/src/cpu_adaptive_average_pooling.cpp index dc91337..cc3bde9 100644 --- a/tpu-cpuop/src/cpu_adaptive_average_pooling.cpp +++ b/tpu-cpuop/src/cpu_adaptive_average_pooling.cpp @@ -31,7 +31,7 @@ void cpu_adaptive_average_poolinglayer::adaptive_average_pooling( } beginW[0] = 0; endW[osizew-1] = in_shape[3]; - for (int i=0; i ps(num_thread); - for (int i = 0; i < num_thread; ++i) { - ps[i].start = start; - int len = opt + (i < OH - opt * num_thread ? 1 : 0); - ps[i].end = start + len; - start = ps[i].end; - } - auto func = [&](const ThreadParam_t *tp) { - for (int n = 0; n < N; ++n) { - const float *input = FLOAT_PTR(input_tensors_[0]) + n * C * IH * IW; - const float *grid = FLOAT_PTR(input_tensors_[1]) + n * OH * OW * 2 + tp->start * OW * 2; - float *output = FLOAT_PTR(output_tensors_[0]) + n * C * OH * OW + tp->start * OW; - for (int h = tp->start; h < tp->end; ++h) { - for (int w = 0; w < OW; ++w) { - auto fx = computeIndex(*grid, IW, rip->padding_mode, rip->align_corners); - ++grid; - auto fy = computeIndex(*grid, IH, rip->padding_mode, rip->align_corners); - ++grid; - switch (rip->mode) { - case GridSamplerBilinear: { - int x = INT(std::floor(fx)); - int y = INT(std::floor(fy)); - float dx = fx - x; - float dy = fy - y; - float tx = 1.f - dx; - float ty = 1.f - dy; - float txty = tx * ty, dxty = dx * ty, txdy = tx * dy, dxdy = dx * dy; - bool yBound_0 = y >= 0 && y < IH; - bool yBound_1 = y + 1 >= 0 && y + 1 < IH; - bool xBound_0 = x >= 0 && x < IW; - bool xBound_1 = x + 1 >= 0 && x + 1 < IW; - const float *iiter = input + y * IW + x; - float *oiter = output; - for (int c = 0; c < C; ++c) { - *oiter = 0.f; - if (yBound_0) { - if (xBound_0) - *oiter += iiter[0] * txty; - if (xBound_1) - *oiter += iiter[1] * dxty; + if (dims == 4) { + const int IH = input_shapes_[0][2]; + const int IW = input_shapes_[0][3]; + const int OH = input_shapes_[1][1]; + const int OW = input_shapes_[1][2]; + int opt = OH / num_thread; + int start = 0; + for (int i = 0; i < num_thread; ++i) { + ps[i].start = start; + int len = opt + (i < OH - opt * num_thread ? 1 : 0); + ps[i].end = start + len; + start = ps[i].end; + } + auto func = [&](const ThreadParam_t *tp) { + for (int n = 0; n < N; ++n) { + const float *input = FLOAT_PTR(input_tensors_[0]) + n * C * IH * IW; + const float *grid = FLOAT_PTR(input_tensors_[1]) + n * OH * OW * 2 + tp->start * OW * 2; + float *output = FLOAT_PTR(output_tensors_[0]) + n * C * OH * OW + tp->start * OW; + for (int h = tp->start; h < tp->end; ++h) { + for (int w = 0; w < OW; ++w) { + auto fx = computeIndex(*grid, IW, rip->padding_mode, rip->align_corners); + ++grid; + auto fy = computeIndex(*grid, IH, rip->padding_mode, rip->align_corners); + ++grid; + switch (rip->mode) { + case GridSamplerBilinear: { + int x = INT(std::floor(fx)); + int y = INT(std::floor(fy)); + float dx = fx - x; + float dy = fy - y; + float tx = 1.f - dx; + float ty = 1.f - dy; + float txty = tx * ty, dxty = dx * ty, txdy = tx * dy, dxdy = dx * dy; + bool yBound_0 = y >= 0 && y < IH; + bool yBound_1 = y + 1 >= 0 && y + 1 < IH; + bool xBound_0 = x >= 0 && x < IW; + bool xBound_1 = x + 1 >= 0 && x + 1 < IW; + const float *iiter = input + y * IW + x; + float *oiter = output; + for (int c = 0; c < C; ++c) { + *oiter = 0.f; + if (yBound_0) { + if (xBound_0) + *oiter += iiter[0] * txty; + if (xBound_1) + *oiter += iiter[1] * dxty; + } + if (yBound_1) { + if (xBound_0) + *oiter += iiter[IW] * txdy; + if (xBound_1) + *oiter += iiter[IW + 1] * dxdy; + } + iiter += IH * IW; + oiter += OH * OW; } - if (yBound_1) { - if (xBound_0) - *oiter += iiter[IW] * txdy; - if (xBound_1) - *oiter += iiter[IW + 1] * dxdy; + } + break; + case GridSamplerNearest: { + int x = INT(std::round(fx)); + int y = INT(std::round(fy)); + const float *iiter = input + y * IW + x; + float *oiter = output; + for (int c = 0; c < C; ++c) { + *oiter = y >= 0 && y < IH && x >= 0 && x < IW ? *iiter : 0.f; + iiter += IH * IW; + oiter += OH * OW; } - iiter += IH * IW; - oiter += OH * OW; } - } - break; - case GridSamplerNearest: { - int x = INT(std::round(fx)); - int y = INT(std::round(fy)); - const float *iiter = input + y * IW + x; - float *oiter = output; - for (int c = 0; c < C; ++c) { - *oiter = y >= 0 && y < IH && x >= 0 && x < IW ? *iiter : 0.f; - iiter += IH * IW; - oiter += OH * OW; + break; + default: + CPU_ASSERT(0); } + ++output; } - break; - default: - CPU_ASSERT(0); + } + } + }; + if (num_thread == 1) + func(ps.data()); + else { + std::vector threads; + for (auto &it : ps) + threads.push_back(std::thread(func, &it)); + for (auto &it : threads) + it.join(); + } + *output_shapes_ = {{N, C, OH, OW}}; + } else { + const int ID = input_shapes_[0][2]; + const int IH = input_shapes_[0][3]; + const int IW = input_shapes_[0][4]; + const int OD = input_shapes_[1][1]; + const int OH = input_shapes_[1][2]; + const int OW = input_shapes_[1][3]; + int opt = OH / num_thread; + int start = 0; + for (int i = 0; i < num_thread; ++i) { + ps[i].start = start; + int len = opt + (i < OH - opt * num_thread ? 1 : 0); + ps[i].end = start + len; + start = ps[i].end; + } + auto func = [&](const ThreadParam_t *tp) { + for (int n = 0; n < N; ++n) { + const float *input = FLOAT_PTR(input_tensors_[0]) + n * C * ID * IH * IW; + const float *grid = FLOAT_PTR(input_tensors_[1]) + n * OD * OH * OW * 3 + OD * tp->start * OW * 3; + float *output = FLOAT_PTR(output_tensors_[0]) + n * C * OD * OH * OW + OD * tp->start * OW; + for (int d = 0; d < OD; ++d) { + for (int h = tp->start; h < tp->end; ++h) { + for (int w = 0; w < OW; ++w) { + auto fx = computeIndex(*grid, IW, rip->padding_mode, rip->align_corners); + ++grid; + auto fy = computeIndex(*grid, IH, rip->padding_mode, rip->align_corners); + ++grid; + auto fz = computeIndex(*grid, ID, rip->padding_mode, rip->align_corners); + ++grid; + switch (rip->mode) { + case GridSamplerBilinear: { + int x = INT(std::floor(fx)); + int y = INT(std::floor(fy)); + int z = INT(std::floor(fz)); + float dx = fx - x; + float dy = fy - y; + float dz = fz - z; + float tx = 1.f - dx; + float ty = 1.f - dy; + float tz = 1.f - dz; + float txtytz = tx * ty * tz, txtydz = tx * ty * dz, dxtytz = dx * ty * tz, dxtydz = dx * ty * dz; + float txdytz = tx * dy * tz, txdydz = tx * dy * dz, dxdytz = dx * dy * tz, dxdydz = dx * dy * dz; + bool zBound_0 = z >= 0 && z < ID; + bool zBound_1 = z + 1 >= 0 && z + 1 < ID; + bool yBound_0 = y >= 0 && y < IH; + bool yBound_1 = y + 1 >= 0 && y + 1 < IH; + bool xBound_0 = x >= 0 && x < IW; + bool xBound_1 = x + 1 >= 0 && x + 1 < IW; + const float *iiter = input + z * IH * IW + y * IW + x; + float *oiter = output; + for (int c = 0; c < C; ++c) { + *oiter = 0.f; + if (zBound_0) { + if (yBound_0) { + if (xBound_0) + *oiter += iiter[0] * txtytz; + if (xBound_1) + *oiter += iiter[1] * dxtytz; + } + if (yBound_1) { + if (xBound_0) + *oiter += iiter[IW] * txdytz; + if (xBound_1) + *oiter += iiter[IW + 1] * dxdytz; + } + } + if (zBound_1) { + if (yBound_0) { + if (xBound_0) + *oiter += iiter[IH * IW + 0] * txtydz; + if (xBound_1) + *oiter += iiter[IH * IW + 1] * dxtydz; + } + if (yBound_1) { + if (xBound_0) + *oiter += iiter[IH * IW + IW] * txdydz; + if (xBound_1) + *oiter += iiter[IH * IW + IW + 1] * dxdydz; + } + } + iiter += ID * IH * IW; + oiter += OD * OH * OW; + } + } + break; + case GridSamplerNearest: { + int x = INT(std::round(fx)); + int y = INT(std::round(fy)); + int z = INT(std::round(fz)); + const float *iiter = input + z * IH * IW + y * IW + x; + float *oiter = output; + for (int c = 0; c < C; ++c) { + *oiter = z >= 0 && z < ID && y >= 0 && y < IH && x >= 0 && x < IW ? *iiter : 0.f; + iiter += ID * IH * IW; + oiter += OD * OH * OW; + } + } + break; + default: + CPU_ASSERT(0); + } + ++output; + } } - ++output; } } + }; + if (num_thread == 1) + func(ps.data()); + else { + std::vector threads; + for (auto &it : ps) + threads.push_back(std::thread(func, &it)); + for (auto &it : threads) + it.join(); } - }; - if (num_thread == 1) - func(ps.data()); - else { - std::vector threads; - for (auto &it : ps) - threads.push_back(std::thread(func, &it)); - for (auto &it : threads) - it.join(); + *output_shapes_ = {{N, C, OD, OH, OW}}; } - *output_shapes_ = {{N, C, OH, OW}}; return 0; } int cpu_grid_samplerlayer::reshape(void *param, int psize, diff --git a/tpu-runtime/.gitignore b/tpu-runtime/.gitignore index 984151d..58070e5 100644 --- a/tpu-runtime/.gitignore +++ b/tpu-runtime/.gitignore @@ -3,3 +3,4 @@ obj .vscode include/kernel_module.h +build_thirdparty diff --git a/tpu-runtime/.gitmodules b/tpu-runtime/.gitmodules index 2d12cea..b239baa 100644 --- a/tpu-runtime/.gitmodules +++ b/tpu-runtime/.gitmodules @@ -1,3 +1,3 @@ -[submodule "tpu-common"] - path = tpu-common - url = ssh://jufa.wen@gerrit-ai.sophgo.vip:29418/tpu-common +[submodule "bmodel-zoo"] + path = bmodel-zoo + url = ../bmodel-zoo diff --git a/tpu-runtime/CMakeLists.txt b/tpu-runtime/CMakeLists.txt index 701e364..45e1ba1 100644 --- a/tpu-runtime/CMakeLists.txt +++ b/tpu-runtime/CMakeLists.txt @@ -1,7 +1,26 @@ -cmake_minimum_required(VERSION 2.80) +cmake_minimum_required(VERSION 3.6) project(tpu-runtime) -if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") + +set(COMMIT_HASH "xxxxxxxx") +set (BRANCH_NAME "unknown") +execute_process(COMMAND date +%Y%m%d-%H:%M:%S OUTPUT_VARIABLE COMPILE_TIME) +execute_process(COMMAND git rev-parse --git-dir RESULT_VARIABLE result OUTPUT_QUIET) +if(result EQUAL 0) + execute_process(COMMAND git describe --always --abbrev=8 OUTPUT_VARIABLE COMMIT_HASH) + execute_process(COMMAND bash -c "git branch --contains HEAD | sed -n '/\* /s///p'" OUTPUT_VARIABLE BRANCH_NAME) +endif() +STRING(STRIP ${COMMIT_HASH} COMMIT_HASH) +STRING(STRIP ${BRANCH_NAME} BRANCH_NAME) +STRING(STRIP ${COMPILE_TIME} COMPILE_TIME) + +message(STATUS "COMMIT_HASH: ${COMMIT_HASH}, BRANCH_NAME: ${BRANCH_NAME}, COMPILE_TIME: ${COMPILE_TIME}") +configure_file("${PROJECT_SOURCE_DIR}/config/bmrt_version.h.in" "${CMAKE_BINARY_DIR}/bmrt_version.h") + +if("$ENV{ONLY_TEST}" STREQUAL "1") +include(test.cmake) +elseif(${CMAKE_SYSTEM_NAME} MATCHES "Windows") include(old.cmake) else() +set(CMAKE_CXX_STANDARD 17) include(new.cmake) endif() diff --git a/tpu-runtime/Makefile b/tpu-runtime/Makefile index aca705a..89e482d 100644 --- a/tpu-runtime/Makefile +++ b/tpu-runtime/Makefile @@ -46,7 +46,7 @@ SRCS_CXX += $(wildcard src/bmfunc/*.cpp) SRCS_CXX += $(wildcard src/bmtap2/*.cpp) SRCS_CXX += $(wildcard src/cpp/*.cpp) APP_C = $(wildcard app/*.c) -APP_CXX += $(wildcard app/*.cpp) +APP_CXX += app/bmrt_test.cpp app/bmrt_test_case.cpp ########translate the *cpp path to *.o path under the obj path ################ OBJS_C = $(addprefix $(OBJ_DIR)/, $(notdir $(SRCS_C:.c=.o))) diff --git a/tpu-runtime/README.md b/tpu-runtime/README.md new file mode 100644 index 0000000..9c6a496 --- /dev/null +++ b/tpu-runtime/README.md @@ -0,0 +1,157 @@ +# TPU-RUNTIME + +## 工程说明 + +tpu-runtime依赖于`libsophon`,最终编译会产生`libbmrt.so`和`bmrt_test`两个文件,并随着libsophon生成的deb包发布出去。 +其中 +1. `libbmrt.so`是加载`bmodel`并执行的核心库,用于模型推理程序开发。 +2. `bmrt_test`用于测试`bmodel`的应用程序 + +基本的软件关系如下 +``` + ┌─────────────┐ ┌──────────────┐ + │ app │ │ bmrt_test │ + └─────────┬───┘ └───┬──────────┘ + │ │ + ┌──▼─────────▼──┐ +kernel_module ---->│ libbmrt.so │ + └───────┬───────┘ + │ + ┌───────▼───────┐ + │ libbmlib.so │ + └───────┬───────┘ + │ + ┌───────▼───────┐ + │ driver │ + └───────┬───────┘ + │ +───────────────────────────┼─────────────────────────────── + │ + ┌────────▼──────┐ + │ TPU │ + │ Devices │ + └───────────────┘ +``` + +注意: +1. tpu-runtime提交后,要与libsophon绑定才能release出去。 +2. tpu-runtime库不区分cmodel版和设备版,只有一套代码。但libbmlib是区分cmodel和设备的 +3. 如果是支持动态加载kernel_module的设备,需要提供缺省的kernel_module, 会以静态数组的方法编译到程序中 +4. 如果tpu-runtime代码更新后,需要与libsophon进行submodule绑定,才会在打包libsophon里生效 + +## 环境准备 +```shell +# 针对所用后端下载对应工程:TPU1684或TPU1686 +git clone https://username@gerrit-ai.sophgo.vip:8443/a/TPU1686 +git clone https://username@gerrit-ai.sophgo.vip:8443/a/libsophon +cd libsophon +git submodule update --init tpu-common # 不用--recursive +git submodule update --init tpu-bmodel # 不用--recursive +git submodule update --init tpu-cpuop # 不用--recursive +git submodule update --init tpu-runtime # 不用--recursive + ./install_build_dependencies.sh +``` +注意: +目前BM1684X和A2的libsophon在两个不同的分支,BM1684x是master分支,发版用的是release分支; A2是a2_dev分支,发版用的是a2_release分支 +tpu-runtime用master分支,和在libsophon任意分支下编译,但由于bmlib在不同分支下功能不同, 建议: +* 如果编译A2上的runtime, libsophon要切换到`remotes/origin/a2_dev`分支, 可支持cmodel和设备模式 +* 如果编译BM1684X上的runtime, 使用libsophon的master分支 + +## 更新缺省kernel_module +缺省的kernel_module在tpu-runtime/lib目录下, 这里的kernel_module仅对实际设备有用,不影响cmodel模式 +如果kernel_module修改了bug, 可以采用如下方式更新kernel_module, 尽量不要手动更新,维护version比较麻烦 +```shell +source scripts/envsetup.sh +# 更新BM1684X动态加载的kernel_module, TPU1686_PATH可选,默认在tpu-runtime/../../TPU1686 +update_firmware bm1684x [TPU1686_PATH] +# 更新A2动态加载的kernel_module, TPU1686_PATH可选,默认在tpu-runtime/../../TPU1686 +update_firmware bm1686 [TPU1686_PATH] +``` + +## X86平台编译 +目前推荐的编译方式有两种, +1. 在libsophon中编译 +```shell +#当前在libsophon目录下 +mkdir build && cd build +cmake ../ -DPLATFORM=cmodel -DCMAKE_BUILD_TYPE=Debug +make -j + +# 可选,生成libsophon的deb包 +cpack +``` +2. 利用回归脚本 +```shell +# 当前在tpu-runtime目录下 +source scripts/envsetup.sh +# 默认是Release版本, 通过EXTRA_CONFIG可以配置编译Debug版本 +# export EXTRA_CONFIG="-DCMAKE_BUILD_TYPE=Debug" +rebuild_tpu_runtime +# 或 +build_tpu_runtime + +# 此外build_thirdparty目录中有cmodel版的libbmlib.so等依赖库,也可以取用, 但注意不要放到设备上 +``` + +## SOC平台编译(在A53上运行): +```shell +source scripts/envsetup.sh +# 默认是Release版本, 通过EXTRA_CONFIG可以配置编译Debug版本 +# export EXTRA_CONFIG="-DCMAKE_BUILD_TYPE=Debug" +rebuild_tpu_runtime_soc +# 然后将编译好的文件,复制到目标机器工作目录中, 这里以'soc_device:work_dir'为例 +# 包括bmrt_test和libbmrt.so文件 +scp build/*bmrt* soc_device:work_dir + +# 此外build_thirdparty目录中还有libbmlib.so等依赖库,也可以取用 +``` + +## 设备上测试 +在目标机器上, 可以不用更换libsophon, 采用如下方式 +``` +# 利用`ssh target`命令登陆目标机器 + +# 指定寻找libbmrt.so的路径 +export LD_LIBRARY_PATH=work_dir + +# 可以测试了 +cd work_dir +./bmrt_test --bmodel test.bmodel +``` + +## kernel_module的临时调试 +引入了环境变量BMRUNTIME_USING_FIRMWARE,用于在特定路径下加载kernel_module + +kernel_module通常是在TPU1686下生成出来,可用如下命令 +``` +cd TPU1686 +# ${CHIP_ARCH} 可以为 bm1684x或bm1686 +source scripts/envsetup.sh ${CHIP_ARCH} +rebuild_firmware +# 会生成kernel_module在build/firmware_core/libfirmware_core.so +# 可将该文件复制到目标机器上, 如/home/linaro目录 + +# 在目标机器上 +export BMRUNTIME_USING_FIRMWARE="/home/linaro/libfirmware_core.so" +# 然后调用bmrt_test等命令即可使用外部的kernel_module运行了 +``` + +## 回归测试 +`scripts/envsetup.sh`提供了runtime的脚本函数 +```shell +cmodel_run_bmodel xxx.bmodel +cmodel_run_bmodel bmodel_dir +cmodel_batch_run_bmodel dir_include_bmodels + +# 进入gdb调试模式 +export EXTRA_EXEC="gdb --args" +cmodel_run_bmodel xxx.bmodel +``` + +目前线上回归的入口脚本为`test/regression.sh`,后续如要加bmodel测试可在里面修改 + +## 其他 +### BMRT_LOG控制: +目前BMRT_LOG分为-1: DEBUG, 0: INFO, 1: WARNING, 2: WRONG, 3: FATAL几个等级; +可以通过BMRT_LOG_VERION环境变量来控制,如`export BMRT_LOG_VERSION=-1`, 表示所有大于等于-1等级的日志都会打印; +默认是0, 只打印INFO,WARNING, WRONG, FATAL信息。 diff --git a/tpu-runtime/app/bmrt_test.cpp b/tpu-runtime/app/bmrt_test.cpp index ca9a568..0e04b13 100755 --- a/tpu-runtime/app/bmrt_test.cpp +++ b/tpu-runtime/app/bmrt_test.cpp @@ -1,3 +1,4 @@ +#include #include "bmrt_test_inner.h" #ifndef __linux__ //#include @@ -17,6 +18,19 @@ using namespace tpu; extern u64 bmrt_must_alloc_device_mem(void*, bm_device_mem_t*, u32); extern void bmrt_must_free_device_mem(void*, bm_device_mem_t); +void dump_tensor(bm_handle_t bm_handle, bm_tensor_t &tensor) { + auto shape = tensor.shape; + int size = 1; + for (int i = 0; i < shape.num_dims; ++i){ + size *= shape.dims[i]; + } + std::vector data(size); + bm_memcpy_d2s(bm_handle, data.data(), tensor.device_mem); + // std::cout<< data[0] << "\t" << data[data.size()-1] << std::endl; + auto ptr = data.data(); + ptr[0] = ptr[0]; +} + string CHIPNAME; vector CONTEXT_DIR_V; string TEST_CASE; @@ -43,6 +57,8 @@ bool b_print_subnet_time = false; bool b_bmodel_dir = true; vector shapes; vector output_shapes; +vector devices; +std::vector> core_lists; typedef struct { FILE *f_input_ref; @@ -235,7 +251,7 @@ static int array_cmp_fix32b(void *p_exp, void *p_got, } template -void dump_debug_data(T *host_output_data, T *ref_output_data, int output_idx, int len) { +void dump_debug_data(const T *host_output_data, const T *ref_output_data, int output_idx, int len) { string outfile_name = "output_"; outfile_name += std::to_string(output_idx) + ".dat"; FILE* out_fp = fopen(outfile_name.c_str(), "w"); @@ -361,65 +377,89 @@ static int dump_tensor_all(string & tensor_name, void const* data, int dtype, v return res; } -int result_cmp(int8_t **host_output_data, int8_t **ref_output_data, int output_num, int *o_count, - bm_data_type_t *o_dtype) -{ - int res_flag = 0; - const char *info_label = "error comparing the last tensor: "; - for (int i = 0; i < output_num; ++i) { +int result_cmp_inner(int output_index, const void* host_data, const void* ref_data, int len, bm_data_type_t dtype){ int flag = 0; - int len = o_count[i]; - int dtype = o_dtype[i]; + const char *info_label = "error comparing the last tensor: "; switch (dtype) { case BM_INT32: - case BM_UINT32: - BMRT_DEBUG("The %d-th tensor got[0] = %d, ref[0] = %d", i, - ((int *)(host_output_data[i]))[0], ((int *)(ref_output_data[i]))[0]); - flag = array_cmp_fix32b((void *)(ref_output_data[i]), (void *)(host_output_data[i]), + case BM_UINT32: { + auto host_data_ptr =(int *)host_data; + auto ref_data_ptr =(int *)ref_data; + BMRT_DEBUG("The %d-th tensor got[0] = %d, ref[0] = %d", output_index, host_data_ptr[0], ref_data_ptr[0]); + flag = array_cmp_fix32b((void *)ref_data_ptr, (void *)host_data_ptr, dtype == BM_INT32 ? 1 : 0, len, info_label, DELTA_INT); if (OUTPUT_DEBUG) { - dump_debug_data((int *)(host_output_data[i]), (int *)(ref_output_data[i]), i, len); + dump_debug_data(host_data_ptr, ref_data_ptr, output_index, len); } - break; + } + break; case BM_INT16: - case BM_UINT16: - BMRT_DEBUG("The %d-th tensor got[0] = %d, ref[0] = %d", i, - ((short *)(host_output_data[i]))[0], ((short *)(ref_output_data[i]))[0]); - flag = array_cmp_fix16b((void *)(ref_output_data[i]), (void *)(host_output_data[i]), + case BM_UINT16: { + auto host_data_ptr =(short *)host_data; + auto ref_data_ptr =(short *)ref_data; + BMRT_DEBUG("The %d-th tensor got[0] = %d, ref[0] = %d", output_index, host_data_ptr[0], ref_data_ptr[0]); + flag = array_cmp_fix16b((void *)ref_data_ptr, (void *)host_data_ptr, dtype == BM_INT16 ? 1 : 0, len, info_label, DELTA_INT); if (OUTPUT_DEBUG) { - dump_debug_data((short *)(host_output_data[i]), (short *)(ref_output_data[i]), i, len); + dump_debug_data(host_data_ptr, ref_data_ptr, output_index, len); } - break; - case BM_INT8: - BMRT_DEBUG("The %d-th tensor got[0] = %d, ref[0] = %d", i, - ((char *)(host_output_data[i]))[0], ((char *)(ref_output_data[i]))[0]); - flag = array_cmp_fix8b((void *)(ref_output_data[i]), (void *)(host_output_data[i]), 1, len, info_label, DELTA_INT); + } + break; + case BM_INT8:{ + auto host_data_ptr =(char *)host_data; + auto ref_data_ptr =(char *)ref_data; + BMRT_DEBUG("The %d-th tensor got[0] = %d, ref[0] = %d", output_index, + host_data_ptr[0], ref_data_ptr[0]); + flag = array_cmp_fix8b((void*)ref_data_ptr, (void *)host_data_ptr, 1, len, info_label, DELTA_INT); if (OUTPUT_DEBUG) { - dump_debug_data((char *)(host_output_data[i]), (char *)(ref_output_data[i]), i, len); + dump_debug_data(host_data_ptr, ref_data_ptr, output_index, len); } - break; - case BM_UINT8: - BMRT_DEBUG("The %d-th tensor got[0] = %d, ref[0] = %d", i, - ((unsigned char *)(host_output_data[i]))[0], ((unsigned char *)(ref_output_data[i]))[0]); - flag = array_cmp_fix8b((void *)(ref_output_data[i]), (void *)(host_output_data[i]), 0, len, info_label, DELTA_INT); + } + break; + case BM_UINT8: { + auto host_data_ptr =(unsigned char *)host_data; + auto ref_data_ptr =(unsigned char *)ref_data; + BMRT_DEBUG("The %d-th tensor got[0] = %d, ref[0] = %d", output_index, + host_data_ptr[0], ref_data_ptr[0]); + flag = array_cmp_fix8b((void *)ref_data_ptr, (void *)host_data_ptr, 0, len, info_label, DELTA_INT); if (OUTPUT_DEBUG) { - dump_debug_data((unsigned char *)(host_output_data[i]), (unsigned char *)(ref_output_data[i]), i, len); + dump_debug_data(host_data_ptr, ref_data_ptr, output_index, len); } - break; - default: + } + break; + default:{ BMRT_DEBUG("The %d-th tensor ", i); - flag = array_cmp_float(ref_output_data[i], host_output_data[i], dtype, len, - info_label, DELTA_FLOAT); + flag = array_cmp_float(ref_data, host_data, dtype, len, info_label, DELTA_FLOAT); if (OUTPUT_DEBUG) { - dump_debug_data((float*)(host_output_data[i]), (float*)(ref_output_data[i]), i, len); + dump_debug_data((float*)ref_data, (float*)host_data, output_index, len); } - break; + } + break; } if(flag){ - BMRT_LOG(WRONG, "comparing #%d output failed!", i); + BMRT_LOG(WRONG, "comparing #%d output failed!", output_index); } - res_flag |= flag; + return flag; + +} +int result_cmp(int8_t **host_output_data, int8_t **ref_output_data, int output_num, + int *o_count, bm_data_type_t *o_dtype) { + int res_flag = 0; + for (int i = 0; i < output_num; ++i) { + res_flag |= result_cmp_inner(i, host_output_data[i], ref_output_data[i], o_count[i], o_dtype[i]); + } + return res_flag; +} + +int result_cmp(const std::vector>& host_output_data, + const std::vector>& ref_output_data, + const std::vector& o_count, + const bm_data_type_t* o_dtype) +{ + int res_flag = 0; + int output_num = host_output_data.size(); + for (int i = 0; i < output_num; ++i) { + res_flag |= result_cmp_inner(i, host_output_data[i].data(), ref_output_data[i].data(), o_count[i], o_dtype[i]); } return res_flag; } @@ -432,37 +472,37 @@ string fix_bmodel_path(const string& path) { } /* ------------------------------------------------------------------------- */ + #ifdef __linux__ -static struct timeval g_time; -static void start_time() -{ - gettimeofday(&g_time, NULL); +typedef struct timeval bmrt_time_t; +static inline void bmrt_gettime(bmrt_time_t& val){ + gettimeofday(&val, NULL); } - -static long end_time() -{ - struct timeval time; - gettimeofday(&time, NULL); - long elapsed = (time.tv_sec - g_time.tv_sec) * 1000000 + time.tv_usec - g_time.tv_usec; - g_time = time; - return elapsed; +static inline long bmrt_interval(const bmrt_time_t& start, const bmrt_time_t& end){ + return (end.tv_sec - start.tv_sec) * 1000000 + end.tv_usec - start.tv_usec; } #else -static struct timespec g_time; -static void start_time() -{ - bmrt_clock_gettime(0, &g_time); +typedef struct timespec bmrt_time_t; +static inline void bmrt_gettime(bmrt_time_t& val){ + bmrt_clock_gettime(0, &val); +} +static inline long bmrt_interval(const bmrt_time_t& start, const bmrt_time_t& end){ + return (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_nsec - start.tv_nsec)/1000; +} +#endif + +static bmrt_time_t g_time; +static void start_time() { + bmrt_gettime(g_time); } -static long end_time() -{ - struct timespec time; - bmrt_clock_gettime(0, &time); - long elapsed = (time.tv_sec - g_time.tv_sec) * 1000000 + (time.tv_nsec - g_time.tv_nsec)/1000; - g_time = time; +static long end_time() { + bmrt_time_t end_time; + bmrt_gettime(end_time); + long elapsed = bmrt_interval(g_time, end_time); + g_time = end_time; return elapsed; } -#endif /* --------------------------------------------------------------------------*/ /* code for read input and output reference data */ @@ -644,10 +684,11 @@ void save_neuron(void *p_bmrt, int net_idx, int stage_idx) bm_handle_t handle = ((Bmruntime *)p_bmrt)->get_bm_handle(); size_t size = 0; for (const auto &mem : device_mem) - size += bm_mem_get_device_size(mem); + size += bm_mem_get_device_size_u64(mem); char *data = new char[size]; - for (const auto &mem : device_mem) - bm_memcpy_d2s_partial(handle, data, mem, bm_mem_get_device_size(mem)); + for (const auto &mem : device_mem) { + bm_memcpy_d2s_partial_u64(handle, data, mem, bm_mem_get_device_size_u64(mem)); + } string filename = str.str(); FILE *fout = fopen(filename.c_str(), "wb"); if (NULL == fout) { @@ -668,6 +709,15 @@ void save_output(int net_idx, int8_t data[], size_t size) return; } } +void save_data(const char* filename, const void* data, size_t byte_size){ + FILE* fp = fopen(filename, "wb"); + if(!fp) { + BMRT_LOG(WARNING, "Cannot save data to file %s", filename); + return; + } + fwrite(data, byte_size, 1, fp); + fclose(fp); +} /* --------------------------------------------------------------------------*/ /* code for inference by new nntoolchain runtime interface */ @@ -714,6 +764,39 @@ void print_array_ex(const void* data, int len, int dtype, const char* prefix = n } } +void bmrt_launch_tensor_thread_func( + bm_handle_t &handle, void *p_bmrt, const char *net_name, + const bm_tensor_t *input_tensors, + int input_num, bm_tensor_t *output_tensors, + int output_num, bool user_mem, + bool user_stmode, const int *core_list, + int core_num, int net_idx, int stage_idx, + int chipid, bmrt_time_t &launch_time, bm_profile_t &start, bm_profile_t &end) { + if (chipid != 0x1682) { + bm_get_profile(handle, &start); + } + bool ret = bmrt_launch_tensor_multi_cores(p_bmrt, net_name, input_tensors, input_num, + output_tensors, output_num, user_mem, user_stmode, core_list, core_num); + if (ret == false) { + print_array_ex(core_list, core_num, BM_INT32, "cores="); + BMRT_LOG(FATAL, "The %d-th neuron network '%s' stage '%d' launch failed", net_idx, + net_name, stage_idx); + } + bmrt_gettime(launch_time); + for (int i = 0; i < core_num; ++i) { + auto core_id = core_list[i]; + bm_status_t core_status = bm_thread_sync_from_core(handle, core_id); + if (core_status != BM_SUCCESS) { + print_array_ex(core_list, core_num, BM_INT32, "cores="); + BMRT_LOG(FATAL, "The %d-th neuron network '%s' stage '%d' sync failed, core=%d", net_idx, + net_name, stage_idx, core_id); + } + } + if (chipid != 0x1682) { + bm_get_profile(handle, &end); + } +} + void bmrt_test() { #ifdef __linux__ @@ -725,20 +808,32 @@ void bmrt_test() #endif // create bmruntime - bm_handle_t bm_handle; - bm_device_mem_t prealloc_mem; - bm_status_t status = bm_dev_request(&bm_handle, DEV_ID); - if (BM_SUCCESS != status) { - BMRT_LOG(FATAL, "bm_dev_request failed, id:[%d]", DEV_ID); - } unsigned int chipid = 0; - if (0 != bm_get_chipid(bm_handle, &chipid)) { - BMRT_LOG(FATAL, "Cannot get chipid"); + if (devices.empty()) { + devices.push_back(DEV_ID); } - #ifdef _WIN32 - chipid = 0x1684; - #endif - void *p_bmrt = bmrt_create(bm_handle); + int device_num = devices.size(); + vector bm_handles(device_num); + bm_device_mem_t prealloc_mem; + bm_status_t status; + for (int i = 0; i < device_num; i++) { + status = bm_dev_request(&bm_handles[i], devices[i]); + if (BM_SUCCESS != status) { + BMRT_LOG(FATAL, "bm_dev_request failed, id:[%d]", devices[i]); + } + unsigned int chipid_ = 0; + if (0 != bm_get_chipid(bm_handles[i], &chipid_)) { + BMRT_LOG(FATAL, "Cannot get chipid"); + } + if (i == 0) { + chipid = chipid_; + } else if (chipid != chipid_) { + BMRT_LOG(FATAL, "Not same chipid"); + } + BMRT_LOG(INFO, "device_index=%d, chip_id=%d", i, chipid_); + } + bm_handle_t bm_handle = bm_handles[0]; + auto p_bmrt = bmrt_create_ex(bm_handles.data(), device_num); if (PREALLOC_SIZE != 0) { bmrt_must_alloc_device_mem(p_bmrt, &prealloc_mem, PREALLOC_SIZE); @@ -751,6 +846,18 @@ void bmrt_test() #ifdef SOC_MODE set_bmrt_mmap(p_bmrt, b_enable_mmap); #endif + int REAL_MEM_NUM = MEM_NUM * core_lists.size(); + + bool use_multi_thread = true; + if (core_lists.size() <= 1) { + //only multi-core multi-mession use threads, cause start thread reduce about 100us performance + use_multi_thread = false; + } +#ifdef USING_CMODEL + //TODO, dynamic backend exist data race + use_multi_thread = false; +#endif + // load bmodel by file for (auto &context_dir : CONTEXT_DIR_V) { context_info_t info; @@ -781,19 +888,9 @@ void bmrt_test() BMRT_LOG(INFO, "==> running network #%d, name: %s, loop: %d", net_idx, net_names[net_idx], loop); auto net_info = bmrt_get_network_info(p_bmrt, net_names[net_idx]); - #ifdef __linux__ - bm_tensor_t output_tensors[MEM_NUM][net_info->output_num]; - #else - //boost::shared_array> output_tensors( - // new boost::shared_array[MEM_NUM]); - //for (int row = 0; row < MEM_NUM; ++row) { - // output_tensors[row].reset(new bm_shape_t[net_info->output_num]); - //} - bm_tensor_t **output_tensors; - output_tensors = new bm_tensor_t *[MEM_NUM]; - for (int i = 0; i < MEM_NUM; i++) - output_tensors[i] = new bm_tensor_t[net_info->output_num]; - #endif + + + std::vector> output_tensors(REAL_MEM_NUM, std::vector(net_info->output_num)); for (int stage_idx = 0; stage_idx < net_info->stage_num; stage_idx++) { if (STAGE_SEL != -1) { @@ -807,67 +904,46 @@ void bmrt_test() auto &stage = net_info->stages[stage_idx]; // prepare output tensor - #ifdef __linux__ - int8_t *host_output[MEM_NUM][net_info->output_num]; - int output_count[net_info->output_num]; - #else - int8_t ***host_output; - host_output = new int8_t **[MEM_NUM]; - for (int i = 0; i < MEM_NUM; i++) - host_output[i] = new int8_t *[net_info->output_num]; - std::shared_ptr output_count_(new int[net_info->output_num], std::default_delete()); - int* output_count = output_count_.get(); - #endif + std::vector>> host_output(REAL_MEM_NUM, std::vector>(net_info->output_num)); + std::vector output_count(net_info->output_num); + size_t size,ref_size; - for (int mem_idx = 0; mem_idx < MEM_NUM; ++mem_idx) { + for (int mem_idx = 0; mem_idx < REAL_MEM_NUM; ++mem_idx) { for (int output_idx = 0; output_idx < net_info->output_num; output_idx++) { + int devid = net_info->output_loc_devices[output_idx]; auto &output_tensor = output_tensors[mem_idx][output_idx]; if (output_idx < (int)output_shapes.size()) { - bmrt_tensor(&output_tensor, p_bmrt, net_info->input_dtypes[output_idx], - shapes[output_idx]); + bmrt_tensor_ex(&output_tensor, p_bmrt, devid, net_info->input_dtypes[output_idx], shapes[output_idx]); } else { - bmrt_tensor(&output_tensor, p_bmrt, net_info->output_dtypes[output_idx], - stage.output_shapes[output_idx]); + bmrt_tensor_ex(&output_tensor, p_bmrt, devid, net_info->output_dtypes[output_idx], stage.output_shapes[output_idx]); } size = bmrt_tensor_bytesize(&output_tensor); ref_size = bmrt_shape_count(&stage.output_shapes[output_idx]) * bmrt_data_type_size(output_tensor.dtype); if (output_shapes.size() > 0) ref_size = size; // If we set shape, ref_size may be smaller than size - host_output[mem_idx][output_idx] = new int8_t[ref_size]; - memset(host_output[mem_idx][output_idx], 0, ref_size); + host_output[mem_idx][output_idx].resize(ref_size); + memset(host_output[mem_idx][output_idx].data(), 0, ref_size); } } // prepare input tensor - #ifdef __linux__ - struct timeval t1, t2, t3, t4, t5; - gettimeofday(&t1, NULL); - #else - struct timespec t1, t2, t3, t4, t5; - bmrt_clock_gettime(0, &t1); - #endif - #ifdef __linux__ - bm_tensor_t input_tensors[net_info->input_num]; - #else - std::shared_ptr input_tensors_(new bm_tensor_t[net_info->input_num], - std::default_delete()); - bm_tensor_t *input_tensors = input_tensors_.get(); - #endif + bmrt_time_t t1, t2, t3, t4, t5, t6; + std::vector launch_times(core_lists.size()); + bmrt_gettime(t1); + std::vector input_tensors(net_info->input_num); for (int input_idx = 0; input_idx < net_info->input_num; input_idx++) { + int devid = net_info->input_loc_devices[input_idx]; auto &input_tensor = input_tensors[input_idx]; if (input_idx < (int)shapes.size()) { - bmrt_tensor(&input_tensor, p_bmrt, net_info->input_dtypes[input_idx], - shapes[input_idx]); + bmrt_tensor_ex(&input_tensor, p_bmrt, devid, net_info->input_dtypes[input_idx], shapes[input_idx]); } else { - bmrt_tensor(&input_tensor, p_bmrt, net_info->input_dtypes[input_idx], - stage.input_shapes[input_idx]); + bmrt_tensor_ex(&input_tensor, p_bmrt, devid, net_info->input_dtypes[input_idx], stage.input_shapes[input_idx]); } size_t size = bmrt_tensor_bytesize(&input_tensor); - int8_t *host_data = new int8_t[size]; - + std::vector host_data(size); BMRT_LOG(INFO, "reading input #%d, bytesize=%zu", input_idx, size); - read_ref_data(host_data, size, net_idx, true); - print_array_ex(host_data, bmrt_shape_count(&input_tensor.shape), net_info->input_dtypes[input_idx], " --> input_data:"); + read_ref_data(host_data.data(), size, net_idx, true); + print_array_ex(host_data.data(), bmrt_shape_count(&input_tensor.shape), net_info->input_dtypes[input_idx], " --> input_data:"); // dump input ref tensors in NCHW format into current directory if (OUTPUT_DEBUG) { @@ -882,61 +958,78 @@ void bmrt_test() for (int i = 0; i < input_shape.num_dims; i ++) { shape.push_back(input_shape.dims[i]); } - dump_tensor_all(tensor_name, host_data, net_info->input_dtypes[input_idx], shape); + dump_tensor_all(tensor_name, host_data.data(), net_info->input_dtypes[input_idx], shape); } // <== dump input ref tensors - bm_memcpy_s2d(bm_handle, input_tensor.device_mem, ((void *)host_data)); - delete[] host_data; + bm_memcpy_s2d(bm_handles[devid], input_tensor.device_mem, ((void *)host_data.data())); } - bm_profile_t start,end; - memset(&start, 0, sizeof(bm_profile_t)); - memset(&end, 0, sizeof(bm_profile_t)); + std::vector starts(1), ends(1); + std::vector threads; + if (use_multi_thread) { + starts.resize(2); + ends.resize(2); + threads.resize(core_lists.size()); + } + for (auto &s : starts) memset(&s, 0, sizeof(bm_profile_t)); + for (auto &e : ends) memset(&e, 0, sizeof(bm_profile_t)); + + bmrt_gettime(t2); + for(size_t group_idx = 0; group_idxname, stage_idx, core_list.data(), core_list.size()); + if (!pre_alloc_neuron_ret) { + std::string core_list_str = ""; + for (auto &core_id_ : core_list) { core_list_str += std::to_string(core_id_) + ","; } + BMRT_LOG(FATAL, "net:%s pre_alloc_neuron failed, stage_idx:%d, core_list:%s", net_info->name, stage_idx, core_list_str.c_str()); + } + } - #ifdef __linux__ - gettimeofday(&t2, NULL); - #else - bmrt_clock_gettime(0, &t2); - #endif + bmrt_gettime(t3); // calculate for CAL_TIMES times - #ifdef __linux__ - unsigned long total_elapsed = 0, npu_elapsed = 0; - #else - unsigned long long total_elapsed = 0, npu_elapsed = 0; - #endif + unsigned long total_elapsed = 0, npu_elapsed = 0, npu1_elapsed = 0; for (int t = 0; t < CAL_TIMES; t++) { if (chipid != 0x1682) { - bm_get_profile(bm_handle, &start); start_time(); } int n = (MEM_NUM == 1) ? 0 : t; - bool ret = bmrt_launch_tensor_ex(p_bmrt, net_names[net_idx], input_tensors, - net_info->input_num, output_tensors[n], net_info->output_num, - true, false); - if (ret == true) { - /* TODO : exception now if bmodel only have cpu subnet */ - status = bm_thread_sync(bm_handle); - } - if (ret == false || BM_SUCCESS != status) { - BMRT_LOG(FATAL, "The %d-th neuron network '%s' stage '%d' inference failed", net_idx, - net_info->name, stage_idx); + if (use_multi_thread) { + for(size_t group_idx = 0; group_idxname, input_tensors.data(), net_info->input_num, + output_tensors[n * core_lists.size() + group_idx].data(), net_info->output_num, true, false, + core_list.data(), core_list.size(), net_idx, stage_idx, + chipid, std::ref(launch_times[group_idx]), std::ref(starts[group_idx]), std::ref(ends[group_idx]))); + } + for (auto& thread: threads) { + thread.join(); + } + } else { + for(size_t group_idx = 0; group_idxname, input_tensors.data(), net_info->input_num, + output_tensors[n * core_lists.size() + group_idx].data(), net_info->output_num, true, false, + core_list.data(), core_list.size(), net_idx, stage_idx, + chipid, launch_times[group_idx], starts[group_idx], ends[group_idx]); + } } + /* TODO : exception now if bmodel only have cpu subnet */ + if (chipid != 0x1682) { total_elapsed += end_time(); - bm_get_profile(bm_handle, &end); - npu_elapsed += end.tpu_process_time - start.tpu_process_time; + for (size_t i = 0; i < ends.size(); i++) { + npu_elapsed += ends[i].tpu_process_time - starts[i].tpu_process_time; + npu1_elapsed += ends[i].tpu1_process_time - starts[i].tpu1_process_time; + } } } // end continue calculate (add by nan.wu) - #ifdef __linux__ - gettimeofday(&t3, NULL); - #else - bmrt_clock_gettime(0, &t3); - #endif + bmrt_gettime(t4); // save neuron memory for debug if (EXPORT_NEURON && loop == 0) { @@ -944,7 +1037,7 @@ void bmrt_test() } // memcpy output data from device to system - for (int t = 0; t < MEM_NUM; t++) { + for (int t = 0; t < REAL_MEM_NUM; t++) { for (int output_idx = 0; output_idx < net_info->output_num; ++output_idx) { auto &output_tensor = output_tensors[t][output_idx]; if (bmrt_shape_count(&output_tensor.shape) > @@ -957,11 +1050,11 @@ void bmrt_test() } size_t out_size = bmrt_tensor_bytesize(&output_tensor); size_t out_ref_size = bmrt_shape_count(&stage.output_shapes[output_idx]) * bmrt_data_type_size(output_tensor.dtype); - bm_memcpy_d2s_partial(bm_handle, host_output[t][output_idx], output_tensor.device_mem, + bm_memcpy_d2s_partial(bm_handle, host_output[t][output_idx].data(), output_tensor.device_mem, out_size); output_count[output_idx] = bmrt_shape_count(&output_tensor.shape); if (EXPORT_OUTPUT && loop == 0 && t == 0) { - save_output(net_idx, host_output[0][output_idx], out_ref_size); + save_output(net_idx, host_output[0][output_idx].data(), out_ref_size); } // dump output tensors in NCHW format into current directory @@ -972,24 +1065,15 @@ void bmrt_test() for (int i = 0; i < output_shape.num_dims; i ++) { shape.push_back(output_shape.dims[i]); } - dump_tensor_all(tensor_name, host_output[t][output_idx], output_tensor.dtype, shape); + dump_tensor_all(tensor_name, host_output[t][output_idx].data(), output_tensor.dtype, shape); } // <== dump output tensors } } - #ifdef __linux__ - gettimeofday(&t4, NULL); - #else - bmrt_clock_gettime(0, &t4); - #endif + bmrt_gettime(t5); // and read ref output data - #ifdef __linux__ - int8_t *ref_output[net_info->output_num]; - #else - std::shared_ptr ref_output_(new int8_t*[net_info->output_num], std::default_delete()); - int8_t** ref_output = ref_output_.get(); - #endif + std::vector> ref_output(net_info->output_num); for (int output_idx = 0; output_idx < net_info->output_num; ++output_idx) { auto &output_tensor = output_tensors[0][output_idx]; /* shape gap count = max shape count - real shape count */ @@ -1001,10 +1085,10 @@ void bmrt_test() net_idx, net_info->name, stage_idx, output_idx); } size_t size = bmrt_tensor_bytesize(&output_tensor); - ref_output[output_idx] = new int8_t[size]; + ref_output[output_idx].resize(size); BMRT_LOG(INFO, "reading output #%d, bytesize=%zu", output_idx, size); - read_ref_data(ref_output[output_idx], size, net_idx, false, shape_gap_count * bmrt_data_type_size(output_tensor.dtype)); - print_array_ex(ref_output[output_idx], elem_num, net_info->output_dtypes[output_idx], " --> output ref_data:"); + read_ref_data(ref_output[output_idx].data(), size, net_idx, false, shape_gap_count * bmrt_data_type_size(output_tensor.dtype)); + print_array_ex(ref_output[output_idx].data(), elem_num, net_info->output_dtypes[output_idx], " --> output ref_data:"); // dump output ref tensors in NCHW format into current directory if (OUTPUT_DEBUG) { @@ -1014,19 +1098,18 @@ void bmrt_test() for (int i = 0; i < output_shape.num_dims; i ++) { shape.push_back(output_shape.dims[i]); } - dump_tensor_all(tensor_name, ref_output[output_idx], output_tensor.dtype, shape); + dump_tensor_all(tensor_name, ref_output[output_idx].data(), output_tensor.dtype, shape); } // <== dump output ref tensors - } if (chipid == 0x1682) { // get true performance time + unsigned long long last_api_process_time_us = 0; #ifdef __linux__ - long unsigned int last_api_process_time_us = 0; + bm_get_last_api_process_time_us(bm_handle, (long unsigned int*)&last_api_process_time_us); #else - unsigned long long last_api_process_time_us = 0; - #endif bm_get_last_api_process_time_us(bm_handle, &last_api_process_time_us); + #endif BMRT_LOG(INFO, "net[%s] stage[%d], the last_api_process_time_us is %lu us", net_info->name, stage_idx, last_api_process_time_us); } else { @@ -1034,9 +1117,30 @@ void bmrt_test() total_elapsed = npu_elapsed; } npu_elapsed /= CAL_TIMES; + npu1_elapsed /= CAL_TIMES; total_elapsed /= CAL_TIMES; - BMRT_LOG(INFO, "net[%s] stage[%d], launch total time is %lu us (npu %lu us, cpu %lu us)", - net_info->name, stage_idx, total_elapsed, npu_elapsed, total_elapsed - npu_elapsed); + unsigned long max_npu_time = max(npu_elapsed, npu1_elapsed); + unsigned long cpu_time = total_elapsed - max_npu_time; + + auto launch_time = launch_times.front(); + if (use_multi_thread) { + for (size_t i = 1; i < launch_times.size(); ++i) { + #ifdef __linux__ + if (launch_times[i].tv_usec > launch_time.tv_usec) launch_time = launch_times[i]; + #else + if (launch_times[i].tv_nsec > launch_time.tv_nsec) launch_time = launch_times[i]; + #endif + } + } + unsigned long launch_time_us = bmrt_interval(t3, launch_time); + unsigned long sync_time_us = bmrt_interval(launch_time, t4); + if (core_lists.size() == 1){ + BMRT_LOG(INFO, "net[%s] stage[%d], launch total time is %lu us (npu %lu us, cpu %lu us), (launch func time %lu us, sync %lu us)", + net_info->name, stage_idx, total_elapsed, max_npu_time, cpu_time, launch_time_us, sync_time_us); + } else { + BMRT_LOG(INFO, "net[%s] stage[%d], launch total time is %lu us (npu %lu us [%lu us, %lu us], cpu %lu us), (launch func time %lu us, sync %lu us)", + net_info->name, stage_idx, total_elapsed, max_npu_time, npu_elapsed, npu1_elapsed, cpu_time, launch_time_us, sync_time_us); + } } BMRT_LOG(INFO, "+++ The network[%s] stage[%d] output_data +++", net_info->name, stage_idx); @@ -1049,17 +1153,43 @@ void bmrt_test() prefix = prefix + std::to_string(out_tensor.shape.dims[j]) + " "; } prefix += "]"; - print_array_ex(host_output[0][i], num_elem, net_info->output_dtypes[i], prefix.c_str()); + print_array_ex(host_output[0][i].data(), num_elem, net_info->output_dtypes[i], prefix.c_str()); } if (NEED_CMP) { // compare inference output data with reference data - int flag = 0; - for (int t = 0; t < MEM_NUM; t++) { - BMRT_LOG(INFO, "==>comparing #%d output ... ", t); - flag |= result_cmp(host_output[t], ref_output, net_info->output_num, output_count, - net_info->output_dtypes); + BMRT_LOG(INFO, "==>comparing output in mem #0 ... "); + bool failed = result_cmp(host_output[0], ref_output, output_count, net_info->output_dtypes); + + // compare multiple outputs from multiple infer mems, all of them should be bit level equal + if(!failed && REAL_MEM_NUM>1){ + BMRT_LOG(INFO, "==> comparing multiple mems ... "); + for (int output_idx = 0; output_idx < net_info->output_num; ++output_idx) { + auto ref_output_data = host_output[0][output_idx].data(); + auto& ref_tensor = output_tensors[0][output_idx]; + int elem_num = output_count[output_idx]; + int byte_len = elem_num * bmrt_data_type_size(ref_tensor.dtype); + std::string ref_info = std::string("mem")+std::to_string(0)+"_output" +std::to_string(output_idx); + print_array_ex(ref_output_data, elem_num, ref_tensor.dtype, ref_info.c_str()); + for(int t = 1; tname, stage_idx); } else { bmrt_trace(p_bmrt); @@ -1067,44 +1197,30 @@ void bmrt_test() } } - #ifdef __linux__ - gettimeofday(&t5, NULL); - long use1 = (t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec; - long use2 = (t3.tv_sec - t2.tv_sec) * 1000000 + t3.tv_usec - t2.tv_usec; - long use3 = (t4.tv_sec - t3.tv_sec) * 1000000 + t4.tv_usec - t3.tv_usec; - long use4 = (t5.tv_sec - t4.tv_sec) * 1000000 + t5.tv_usec - t4.tv_usec; - #else - bmrt_clock_gettime(0, &t5); - long use1 = (t2.tv_sec - t1.tv_sec) * 1000000 + (t2.tv_nsec - t1.tv_nsec)/1000; - long use2 = (t3.tv_sec - t2.tv_sec) * 1000000 + (t3.tv_nsec - t2.tv_nsec)/1000; - long use3 = (t4.tv_sec - t3.tv_sec) * 1000000 + (t4.tv_nsec - t3.tv_nsec)/1000; - long use4 = (t5.tv_sec - t4.tv_sec) * 1000000 + (t5.tv_nsec - t4.tv_nsec)/1000; - #endif + bmrt_gettime(t6); + + long use1 = bmrt_interval(t1, t2); + long use2 = bmrt_interval(t2, t3); + long use3 = bmrt_interval(t3, t4); + long use4 = bmrt_interval(t4, t5); + long use5 = bmrt_interval(t5, t6); + BMRT_LOG(INFO, "load input time(s): %f", (float)use1 / 1000000); - BMRT_LOG(INFO, "calculate time(s): %f", (float)use2 / 1000000); - BMRT_LOG(INFO, "get output time(s): %f", (float)use3 / 1000000); - BMRT_LOG(INFO, "compare time(s): %f", (float)use4 / 1000000); + BMRT_LOG(INFO, "pre alloc time(s): %f", (float)use2 / 1000000); + BMRT_LOG(INFO, "calculate time(s): %f", (float)use3 / 1000000); + BMRT_LOG(INFO, "get output time(s): %f", (float)use4 / 1000000); + BMRT_LOG(INFO, "compare time(s): %f", (float)use5 / 1000000); // free memory for (int i = 0; i < net_info->input_num; ++i) { - bmrt_must_free_device_mem(p_bmrt, input_tensors[i].device_mem); + int devid = net_info->input_loc_devices[i]; + bm_free_device(bm_handles[devid], input_tensors[i].device_mem); } - for (int t = 0; t < MEM_NUM; t++) { + for (int t = 0; t < REAL_MEM_NUM; t++) { for (int i = 0; i < net_info->output_num; ++i) { - bmrt_must_free_device_mem(p_bmrt, output_tensors[t][i].device_mem); - delete[] host_output[t][i]; + int devid = net_info->output_loc_devices[i]; + bm_free_device(bm_handles[devid], output_tensors[t][i].device_mem); } - #ifndef __linux__ - delete[] host_output[t]; - delete[] output_tensors[t]; - #endif - } - #ifndef __linux__ - delete[] host_output; - delete[] output_tensors; - #endif - for (int i = 0; i < net_info->output_num; ++i) { - delete[] ref_output[i]; } if (STAGE_SEL != -1) { fseek_ref_file_by_stage(stage_idx + 1, net_info->stage_num, net_idx, net_info); @@ -1113,19 +1229,21 @@ void bmrt_test() if (shapes.size() > 0) { break; } - } + } // stage_num if (NET_SEL != -1) { break; } - } - } + } // net_num + } // loopnum free(net_names); close_ref_file(); if (PREALLOC_SIZE != 0) { bmrt_must_free_device_mem(p_bmrt, prealloc_mem); } bmrt_destroy(p_bmrt); - bm_dev_free(bm_handle); + for (int i = 0; i < device_num; i++) { + bm_dev_free(bm_handles[i]); + } } extern void bmrt_test_case(); @@ -1140,6 +1258,8 @@ vector test_case_v = { "bmrt_launch_data", "bmrt_simple_api", "bmrt_multi_thread", + "bmrt_get_bmodel_api", + "bmrt_get_bmodel_api_c", // bmruntime new c++ interface "bmcpp_load_bmodel", "bmcpp_load_bmodel_data", @@ -1152,6 +1272,8 @@ vector test_case_v = { // bmtap2 c++ interface "bmtap2cpp_load_bmodel", "bmtap2cpp_multi_thread", + // bmruntime multi-core interface + "bmmc_multi_mession", }; void Usage() @@ -1180,6 +1302,10 @@ void Usage() " --debug_output : Dump output data and reference data for debug.\n" " --shapes : Set shapes of the input shapes.\n" " --output_shapes : Set shapes of the output shapes.\n" + " --cascade_device : Set devices to run for cascade model, e.g. 1,2\n" + " --core_list : Set the core id list those will be used to inference\n" + " e.g. 0,1 means using 0,1 core together to infer the multi-core compiled bmodel.\n" + " 0:1 means using 0,1 core to infer the single-core compiled bmodel with parallelly mession.\n" #ifdef DEBUG " --test_case : Test api case, \n" " Option:\n" @@ -1214,10 +1340,12 @@ DEFINE_int32(stage_idx, -1, "Select the stage with stage_idx to run."); DEFINE_bool(debug_output, false, "Dump output data and reference data for debug."); DEFINE_string(shapes, "", "Set shapes of the input shapes."); DEFINE_string(output_shapes, "", "Set shapes of the output shapes."); +DEFINE_string(cascade_device, "", "Set devices to run for cascade model, e.g. 1,2"); DEFINE_bool(mmap, true, "Mmap"); DEFINE_int32(calculate_times, 1, "Calculate time."); DEFINE_string(output_ref, "", "Output_ref"); DEFINE_bool(only_cmp_last, false, "only cmp last"); +DEFINE_string(core_list, "", ""); DECLARE_bool(help); #ifdef DEBUG DEFINE_string(test_case, "", "Test api case"); @@ -1303,6 +1431,58 @@ static vector parseShapes(const string &str) { return dst; } +static void split(const std::string &s, const std::string &delim, + std::vector &ret) { + size_t last = 0; + size_t index = s.find_first_of(delim, last); + while (index != std::string::npos) { + ret.push_back(s.substr(last, index - last)); + last = index + 1; + index = s.find_first_of(delim, last); + } + if (last < s.length()) { + ret.push_back(s.substr(last)); + } +} + +static vector parseCascadeDevices(const string &str) { + vector devices; + vector sub_str; + split(str, ",", sub_str); + for(auto &s :sub_str) { + devices.push_back(std::atoi(s.c_str())); + } + return devices; +} + +static vector parseList(const string &str) { + vector dst; + std::string dst_str(str); + for (auto &iter : dst_str) { + if (iter == ',') + iter = ' '; + } + std::istringstream out(dst_str); + string tmp; + while (out >> tmp) { + if (tmp == "," || tmp == " ") + continue; + dst.emplace_back(std::stoi(tmp)); + } + return dst; +} + +static vector> parseMultipleList(const string &str) { + std::vector list_strings; + split(str, ":", list_strings); + std::vector> results; + for(auto& list_string: list_strings){ + auto list = parseList(list_string); + results.push_back(list); + } + return results; +} + static void deal_with_options(int argc, char **argv) { #ifdef __linux__ @@ -1334,6 +1514,8 @@ static void deal_with_options(int argc, char **argv) {"debug_output", no_argument, &lopt, 14}, {"shapes", required_argument, &lopt, 15}, {"output_shapes", required_argument, &lopt, 16}, + {"cascade_device", required_argument, &lopt, 17}, + {"core_list", required_argument, &lopt, 18}, {0, 0, 0, 0}}; if (argc < 2) { @@ -1433,6 +1615,12 @@ static void deal_with_options(int argc, char **argv) case 16: output_shapes = parseShapes(optarg); break; + case 17: + devices = parseCascadeDevices(optarg); + break; + case 18: + core_lists = parseMultipleList(optarg); + break; } break; case '?': @@ -1468,8 +1656,11 @@ static void deal_with_options(int argc, char **argv) " --stage_idx : Select the stage with stage_idx to run.\n" " --debug_output : Dump output data and reference data for debug.\n" " --shapes : Set shapes of the input shapes.\n" - " --output_shapes : Set shapes of the output shapes.\n"); - + " --output_shapes : Set shapes of the output shapes.\n" + " --core_list : Set the core id list those will be used to inference.\n" + " e.g. 0,1,2,3 means using 0,1,2,3 core to infer the bmodel.\n" + " 0,1:2,3 means using 0,1 core and 2,3 core to infer the bmodel parallelly.\n" + ); gflags::ParseCommandLineFlags(&argc, &argv, true); if (FLAGS_context_dir.empty() == false) @@ -1493,7 +1684,8 @@ static void deal_with_options(int argc, char **argv) CAL_TIMES = FLAGS_calculate_times; EXPORT_NEURON = FLAGS_export_neuron; EXPORT_OUTPUT = FLAGS_export_output; - OUTPUT_REF_NAME = FLAGS_output_ref; + if (FLAGS_output_ref.empty() == false) + OUTPUT_REF_NAME = FLAGS_output_ref; if (FLAGS_bmodel.empty() == false) { BMODEL_PATH = FLAGS_bmodel; NEED_CMP = false; @@ -1503,6 +1695,7 @@ static void deal_with_options(int argc, char **argv) if (FLAGS_debug_output) OUTPUT_DEBUG = true; shapes = parseShapes(FLAGS_shapes); output_shapes = parseShapes(FLAGS_output_shapes); + core_lists = parseMultipleList(FLAGS_core_list); if (FLAGS_only_cmp_last) MEM_NUM = 1; if (!NEED_CMP) MEM_NUM = 1; gflags::HandleCommandLineHelpFlags(); @@ -1511,10 +1704,21 @@ static void deal_with_options(int argc, char **argv) BMRT_LOG(INFO, "Loop num: %d", LOOP_NUM); #endif + if (core_lists.empty()) { + core_lists = {{0}}; + } +} + +static inline void init_log() { + auto level = bmrt_get_current_log_level(); + if(level > BMRT_LogLevel::INFO) { + bmrt_set_current_log_level(BMRT_LogLevel::INFO); + } } int main(int argc, char **argv) { + init_log(); deal_with_options(argc, argv); try { if (TEST_CASE.empty()) { diff --git a/tpu-runtime/app/bmrt_test_case.cpp b/tpu-runtime/app/bmrt_test_case.cpp old mode 100644 new mode 100755 index fd98b28..f2e2ab9 --- a/tpu-runtime/app/bmrt_test_case.cpp +++ b/tpu-runtime/app/bmrt_test_case.cpp @@ -260,13 +260,20 @@ static void thread_entry_bmrt_launch(int thread_id, launch_unit_t *launch_unit) for (int i = 0; i < input_num; i++) { input_datas[i] = launch_unit->ref_input_v[i]; } - bool ret = bmrt_launch_data(g_bmrt, net_name, input_datas, launch_unit->input_shape_v.data(), - input_num, output_datas, output_shapes, output_num, false); + + auto& core_list = core_lists[thread_id%core_lists.size()]; + bool ret = bmrt_launch_data_multi_cores(g_bmrt, net_name, input_datas, launch_unit->input_shape_v.data(), + input_num, output_datas, output_shapes, output_num, false, core_list.data(), core_list.size()); if (!ret) { BMRT_LOG(FATAL, "launch net[%s] stage[%d] failed", net_name, launch_unit->stage_idx); } // sync, wait for finishing inference - bm_thread_sync(g_bm_handle); + for(size_t core_idx=0; core_idx < core_list.size(); core_idx++) { + bm_status_t core_status = bm_thread_sync_from_core(g_bm_handle, core_list[core_idx]); + if (core_status != BM_SUCCESS) { + BMRT_LOG(FATAL, "bm_thread_sync_from_core failed, core_id:%d, status:%d", core_list[core_idx], core_status); + } + } for (int i = 0; i < output_num; i++) { count_v.push_back(bmrt_shape_count(&output_shapes[i])); } @@ -288,13 +295,50 @@ static void thread_entry_bmrt_launch(int thread_id, launch_unit_t *launch_unit) std::shared_ptr output_tensors_(new bm_tensor_t[output_num], std::default_delete()); bm_tensor_t* output_tensors = output_tensors_.get(); #endif - bool ret = bmrt_launch_tensor(g_bmrt, net_name, input_tensors, input_num, output_tensors, - output_num); - if (!ret) { - BMRT_LOG(FATAL, "launch net[%s] stage[%d] failed", net_name, launch_unit->stage_idx); + auto& core_list = core_lists[thread_id%core_lists.size()]; + if (TEST_CASE == "bmrt_get_bmodel_api") { + auto api_info = bmruntime::get_bmodel_api_info(g_bmrt, net_name, input_tensors, input_num, + output_tensors, output_num, false, false, (uint32_t*)core_list.data()); + for(size_t core_idx=0; core_idx < core_list.size(); core_idx++){ + auto api_id = api_info.api_id[core_idx]; + bm_status_t core_status = tpu_kernel_launch_async_from_core(g_bm_handle, api_id, + api_info.api_data[core_idx].data(), + api_info.api_data[core_idx].size(), + core_list[core_idx]); + if (BM_SUCCESS != core_status) { + BMRT_LOG(FATAL, "tpu_kernel_launch_async_from_core failed, core_id:%d, api id:%d, status:%d", + core_list[core_idx], api_id, core_status); + } + } + } else if (TEST_CASE == "bmrt_get_bmodel_api_c") { + auto api_info = get_bmodel_api_info_c(g_bmrt, net_name, input_tensors, input_num, + output_tensors, output_num, false, false, (uint32_t*)core_list.data()); + for(size_t core_idx=0; core_idx < core_list.size(); core_idx++) { + auto api_id = api_info->api_id[core_idx]; + bm_status_t core_status = tpu_kernel_launch_async_from_core(g_bm_handle, api_id, + api_info->api_data[core_idx], + api_info->api_data_subsize[core_idx], + core_list[core_idx]); + if (BM_SUCCESS != core_status) { + BMRT_LOG(FATAL, "tpu_kernel_launch_async_from_core failed, core_id:%d, api id:%d, status:%d", + core_list[core_idx], api_id, core_status); + } + } + bmrt_free_api_info(api_info); + } else { + bool ret = bmrt_launch_tensor_multi_cores(g_bmrt, net_name, input_tensors, input_num, output_tensors, + output_num, false, false, core_list.data(), core_list.size()); + if (!ret) { + BMRT_LOG(FATAL, "launch net[%s] stage[%d] failed", net_name, launch_unit->stage_idx); + } } // sync, wait for finishing inference - bm_thread_sync(g_bm_handle); + for(size_t core_idx=0; core_idx < core_list.size(); core_idx++) { + bm_status_t core_status = bm_thread_sync_from_core(g_bm_handle, core_list[core_idx]); + if (core_status != BM_SUCCESS) { + BMRT_LOG(FATAL, "bm_thread_sync_from_core failed, core_id:%d, status:%d", core_list[core_idx], core_status); + } + } for (int i = 0; i < output_num; ++i) { auto &output_tensor = output_tensors[i]; size_t size = bmrt_tensor_bytesize(&output_tensor); @@ -923,6 +967,194 @@ static void bmtap2cpp_api_test_case() free_test_data(); } +static void thread_entry_bmrtmcore_launch( + const char *net_name, + const bm_tensor_t *input_tensors, int input_num, + bm_tensor_t *output_tensors, int output_num, + std::vector core_list, int stage_idx) { + bool ret = bmrt_launch_tensor_multi_cores(g_bmrt, net_name, input_tensors, input_num, + output_tensors, output_num, true, false, core_list.data(), core_list.size()); + if (ret == false) { + BMRT_LOG(FATAL, "The network '%s' stage '%d' launch failed", net_name, stage_idx); + } + for (int i = 0; i < core_list.size(); ++i) { + auto core_id = core_list[i]; + bm_status_t core_status = bm_thread_sync_from_core(g_bm_handle, core_id); + if (core_status != BM_SUCCESS) { + BMRT_LOG(FATAL, "The network '%s' stage '%d' sync failed, core=%d", net_name, stage_idx, core_id); + } + } +} + +static void test_bmrtmcore_launch_multi_mession() +{ + uint32_t launch_num = g_launch_unit_v.size(); + uint32_t mession_num = core_lists.size(); + if (launch_num < mession_num) { + BMRT_LOG(FATAL, "model number must equel or bigger than core mession number"); + } + bool use_multi_thread = true; +#ifdef USING_CMODEL + use_multi_thread = false; +#endif + for (uint32_t launch_i = 0; launch_i < launch_num; launch_i += mession_num) + { + std::vector input_nums, output_nums, stage_idxs; + std::vector net_names; + uint32_t mession_num_ext = (std::min)(mession_num, launch_num - launch_i); + for (uint32_t m_i = 0; m_i < mession_num_ext; ++m_i) + { + uint32_t model_idx = launch_i + m_i; + input_nums.emplace_back(g_launch_unit_v[model_idx].ref_input_v.size()); + output_nums.emplace_back(g_launch_unit_v[model_idx].ref_output_v.size()); + net_names.emplace_back(g_launch_unit_v[model_idx].net_name.c_str()); + stage_idxs.emplace_back(g_launch_unit_v[model_idx].stage_idx); + } +#ifdef __linux__ + std::vector> all_input_tensors, all_output_tensors; + std::vector> all_output_datas; + for (auto &input_num : input_nums) + { + std::vector input_tensors(input_num); + all_input_tensors.emplace_back(input_tensors); + } + for (auto &output_num : output_nums) + { + std::vector output_tensors(output_num); + all_output_tensors.emplace_back(output_tensors); + std::vector output_datas(output_num); + all_output_datas.emplace_back(output_datas); + } +#else + std::vector all_input_tensors, all_output_tensors; + std::vector all_output_datas; + for (auto &input_num : input_nums) + { + std::shared_ptr input_tensors_(new bm_tensor_t[input_num], std::default_delete()); + bm_tensor_t *input_tensors = input_tensors_.get(); + all_input_tensors.emplace_back(input_tensors); + } + for (auto &output_num : output_nums) + { + std::shared_ptr output_tensors_(new bm_tensor_t[output_num], std::default_delete()); + bm_tensor_t *output_tensors = output_tensors_.get(); + all_output_tensors.emplace_back(output_tensors); + std::shared_ptr output_datas_(new void *[output_num], std::default_delete()); + void **output_datas = output_datas_.get(); + all_output_datas.emplace_back(output_datas); + } +#endif + + for (uint32_t m_i = 0; m_i < mession_num_ext; ++m_i) + { + auto input_num = input_nums[m_i]; + for (int i = 0; i < input_num; ++i) + { + uint32_t model_idx = launch_i + m_i; + bmrt_tensor(&all_input_tensors[m_i][i], g_bmrt, g_launch_unit_v[model_idx].input_type_v[i], + g_launch_unit_v[model_idx].input_shape_v[i]); + bm_memcpy_s2d(g_bm_handle, all_input_tensors[m_i][i].device_mem, g_launch_unit_v[model_idx].ref_input_v[i]); + } + auto output_num = output_nums[m_i]; + for (int i = 0; i < output_num; ++i) + { + uint32_t model_idx = launch_i + m_i; + bmrt_tensor(&all_output_tensors[m_i][i], g_bmrt, g_launch_unit_v[model_idx].output_type_v[i], + g_launch_unit_v[model_idx].output_shape_v[i]); + } + } + for (uint32_t loop_i = 0; loop_i < LOOP_NUM; loop_i++) + { +#ifdef __linux__ + std::thread thread_v[mession_num_ext]; +#else + std::shared_ptr thread_v_(new thread[mession_num_ext], std::default_delete()); + thread *thread_v = thread_v_.get(); +#endif + for (uint32_t mession_i = 0; mession_i < mession_num_ext; ++mession_i) + { + auto core_list = core_lists[mession_i]; +#ifdef __linux__ + bm_tensor_t *input_tensors = all_input_tensors[mession_i].data(); + bm_tensor_t *output_tensors = all_output_tensors[mession_i].data(); +#else + bm_tensor_t *input_tensors = all_input_tensors[mession_i]; + bm_tensor_t *output_tensors = all_output_tensors[mession_i]; +#endif + if (use_multi_thread) + { + thread_v[mession_i] = std::thread(thread_entry_bmrtmcore_launch, net_names[mession_i], input_tensors, input_nums[mession_i], + output_tensors, output_nums[mession_i], core_list, stage_idxs[mession_i]); + } else { + thread_entry_bmrtmcore_launch(net_names[mession_i], input_tensors, input_nums[mession_i], + output_tensors, output_nums[mession_i], core_list, stage_idxs[mession_i]); + } + } + if (use_multi_thread) { + for (uint32_t m_i = 0; m_i < mession_num_ext; ++m_i) + { + thread_v[m_i].join(); + } + } + } + + for (uint32_t m_i = 0; m_i < mession_num_ext; ++m_i) + { + std::vector cmp_element_v; + auto output_num = output_nums[m_i]; + for (int i = 0; i < output_num; ++i) + { + auto &output_tensor = all_output_tensors[m_i][i]; + size_t size = bmrt_tensor_bytesize(&output_tensor); + cmp_element_v.push_back(bmrt_shape_count(&all_output_tensors[m_i][i].shape)); + all_output_datas[m_i][i] = malloc(size); + bm_memcpy_d2s_partial(g_bm_handle, all_output_datas[m_i][i], output_tensor.device_mem, size); + bm_free_device(g_bm_handle, output_tensor.device_mem); + } + + auto input_num = input_nums[m_i]; + for (int i = 0; i < input_num; ++i) + { + bm_free_device(g_bm_handle, all_input_tensors[m_i][i].device_mem); + } + + uint32_t model_idx = launch_i + m_i; +#ifdef __linux__ + int8_t** output_datas_p = (int8_t**)all_output_datas[m_i].data(); +#else + int8_t** output_datas_p = (int8_t**)all_output_datas[m_i]; +#endif + result_cmp(output_datas_p, g_launch_unit_v[model_idx], cmp_element_v); + + for (int i = 0; i < output_num; ++i) + { + free(all_output_datas[m_i][i]); + } + } + } +} + +static void bmmc_multi_mession_test_case() { + prepare_test_data(); + + bm_status_t ret = bm_dev_request(&g_bm_handle, DEV_ID); + if (ret != BM_SUCCESS) { + BMRT_LOG(FATAL, "bm_dev_request failed, ret:[%d]", ret); + } + g_bmrt = bmrt_create(g_bm_handle); + if (g_bmrt == NULL) { + BMRT_LOG(FATAL, "create runtime failed"); + } + + test_bmrt_load_bmodel(); + test_bmrtmcore_launch_multi_mession(); + + bmrt_destroy(g_bmrt); + bm_dev_free(g_bm_handle); + + free_test_data(); +} + /* --------------------------------------------------------------------------*/ /* main test case process */ typedef void TestPtr(); @@ -934,7 +1166,8 @@ typedef struct { test_pair_t test_pair[] = {{"bmrt", bmrt_api_test_case}, {"bmcpp", bmcpp_api_test_case}, {"bmtap2", bmtap2_api_test_case}, - {"bmtap2cpp", bmtap2cpp_api_test_case}}; + {"bmtap2cpp", bmtap2cpp_api_test_case}, + {"bmmc", bmmc_multi_mession_test_case}}; void bmrt_test_case() { diff --git a/tpu-runtime/app/bmrt_test_fps.cpp b/tpu-runtime/app/bmrt_test_fps.cpp new file mode 100755 index 0000000..4407098 --- /dev/null +++ b/tpu-runtime/app/bmrt_test_fps.cpp @@ -0,0 +1,779 @@ +#include "bmrt_test_inner.h" +#ifndef __linux__ +//#include +#include +#else +#include +#include +#endif +#include "tpu_fp16.hpp" +using namespace tpu; + +/* Internal */ +#define COMPARE_EPSILON (1e-2) +#define COMPARE_FIX_ERR (0) +#define IS_NAN(x) ((((x >> 23) & 0xff) == 0xff) && ((x & 0x7fffff) != 0)) + +extern u64 bmrt_must_alloc_device_mem(void*, bm_device_mem_t*, u32); +extern void bmrt_must_free_device_mem(void*, bm_device_mem_t); + +vector CONTEXT_DIR_V; +string BMODEL_PATH; +string OUTPUT_REF_NAME = OUTPUT_REF_DATA; +bool NEED_CMP = true; +int DEV_ID = 0; +int LOOP_NUM = 1; +int LOOP_PERIOD_US = -1; +int LOOP_NBATCH = 1; +float LOOP_FPS = -1; +int DELTA_INT = COMPARE_FIX_ERR; +float DELTA_FLOAT = COMPARE_EPSILON; +bool b_enable_mmap = true; +bool b_bmodel_dir = true; + +typedef struct { + FILE *f_input_ref; + FILE *f_output_ref; + FILE *f_output; + string dir; + int net_num; +} context_info_t; + +vector context_info_v; +/* --------------------------------------------------------------------------*/ +/* code for compare the result */ + +typedef union { + int ival; + float fval; +} IF_VAL; + +static int array_cmp_fp32(const float *p_exp_, const float *p_got_, int len, const char *info_label, float delta) +{ + int max_error_count = 30, error_count = 0; + const float *p_exp = p_exp_, *p_got = p_got_; + bool only_warning = false; + if (1e4 == delta) { + delta = 1e-2; + only_warning = true; + } + + for (int idx = 0; idx < len; idx++) { + if (max(fabs(p_exp[idx]), fabs(p_got[idx])) > 1.0) { + // compare rel + if (min(fabs(p_exp[idx]), fabs(p_got[idx])) < 1e-20) { + if (!only_warning) error_count ++; + BMRT_LOG(WRONG, "%s rel warning at index %d exp %.20f got %.20f", info_label, idx, p_exp[idx], p_got[idx]); + if (!only_warning && error_count > max_error_count) return -1; + } + + if (fabs(p_exp[idx] - p_got[idx]) / min(fabs(p_exp[idx]), fabs(p_got[idx])) > delta) { + if (!only_warning) error_count ++; + BMRT_LOG(WRONG, "%s rel warning at index %d exp %.20f got %.20f", info_label, idx, p_exp[idx], p_got[idx]); + if (!only_warning && error_count > max_error_count) return -1; + } + } else { + // compare abs + if (fabs(p_exp[idx] - p_got[idx]) > delta) { + if (!only_warning) error_count ++; + BMRT_LOG(WRONG, "%s abs warning at index %d exp %.20f got %.20f", info_label, idx, p_exp[idx], p_got[idx]); + if (!only_warning && error_count > max_error_count) return -1; + } + } + + IF_VAL if_val_exp, if_val_got; + if_val_exp.fval = p_exp[idx]; + if_val_got.fval = p_got[idx]; + if (IS_NAN(if_val_got.ival) && !IS_NAN(if_val_exp.ival)) { + BMRT_LOG(WRONG, "There are nans in %s idx %d", info_label, idx); + BMRT_LOG(WRONG, "floating form exp %.10f got %.10f", if_val_exp.fval, if_val_got.fval); + BMRT_LOG(WRONG, "hex form exp %8.8x got %8.8x", if_val_exp.ival, if_val_got.ival); + return -2; + } + } + + if (error_count > 0) { + return - 1; + } else { + return 0; + } +} + +static int array_cmp_float(const void*p_exp_, const void*p_got_, int dtype, int len, const char *info_label, float delta) { + if(dtype == BM_FLOAT16) { + auto fp16_exp_ptr = (const fp16*)p_exp_; + auto fp16_got_ptr = (const fp16*)p_got_; + std::vector exp_vec(len); + std::vector got_vec(len); + for(int i=0; i(fp16_exp_ptr[i]); + got_vec[i] = to(fp16_got_ptr[i]); + } + BMRT_DEBUG(" got[0] = %f(0x%4x), ref[0] = %f(0x%04x), dtype=fp16", got_vec[0], fp16_got_ptr[0].bits, + exp_vec[0], fp16_got_ptr[0].bits); + return array_cmp_fp32(exp_vec.data(), got_vec.data(), len, info_label, delta); + } else if(dtype == BM_BFLOAT16) { + auto bf16_exp_ptr = (const bf16*)p_exp_; + auto bf16_got_ptr = (const bf16*)p_got_; + std::vector exp_vec(len); + std::vector got_vec(len); + for(int i=0; i(bf16_exp_ptr[i]); + got_vec[i] = to(bf16_got_ptr[i]); + } + BMRT_DEBUG(" got[0] = %f(0x%4x), ref[0] = %f(0x%04x), dtype=bf16", got_vec[0], bf16_got_ptr[0].bits, + exp_vec[0], bf16_got_ptr[0].bits); + return array_cmp_fp32(exp_vec.data(), got_vec.data(), len, info_label, delta); + } else if(dtype == BM_FLOAT32) { + BMRT_DEBUG(" got[0] = %f, ref[0] = %f, dtype=fp32", ((float*)p_got_)[0], ((float*)p_exp_)[0]); + return array_cmp_fp32((const float*)p_exp_, (const float*)p_got_, len, info_label, delta); + + } else { + assert(0 && "not support dtype"); + } + return 0; +} + +static int array_cmp_fix8b(void *p_exp, void *p_got, + int sign, // 0: unsigned, 1: signed + int len, const char *info_label, int delta) +{ +#define MAX_NUM_ERRORS 30 // max number of error print before return -1 + + int ret = 0; + int num_errors = 0; + + int idx = 0; + for (idx = 0; idx < len; idx++) { + int error = 0; + int exp_int = 0; + int got_int = 0; + if (sign) { + exp_int = (int)(*((signed char *)p_exp + idx)); + got_int = (int)(*((signed char *)p_got + idx)); + } else { + exp_int = (int)(*((unsigned char *)p_exp + idx)); + got_int = (int)(*((unsigned char *)p_got + idx)); + } + error = abs(exp_int - got_int); + if (error > delta) { + ret = -1; + BMRT_LOG(WRONG, "%s error at index %3d: [exp %3d, got %3d] diff=%2d (delta=%d)", info_label, idx, exp_int, got_int, got_int - exp_int , delta); + num_errors ++; + if (num_errors < MAX_NUM_ERRORS) { + continue; + } else { + return ret; + } + } + } + + return ret; +} + +static int array_cmp_fix16b(void *p_exp, void *p_got, + int sign, // 0: unsigned, 1: signed + int len, const char *info_label, int delta) +{ + int idx = 0; + for (idx = 0; idx < len; idx++) { + int error = 0; + int exp_int = 0; + int got_int = 0; + if (sign) { + exp_int = (int)(*((short *)p_exp + idx)); + got_int = (int)(*((short *)p_got + idx)); + } else { + exp_int = (int)(*((unsigned short *)p_exp + idx)); + got_int = (int)(*((unsigned short *)p_got + idx)); + } + error = abs(exp_int - got_int); + if (error > delta) { + BMRT_LOG(WRONG, "%s error at index %d exp %d got %d", info_label, idx, exp_int, got_int); + return -1; + } + } + return 0; +} + +static int array_cmp_fix32b(void *p_exp, void *p_got, + int sign, // 0: unsigned, 1: signed + int len, const char *info_label, int delta) +{ + int idx = 0; + for (idx = 0; idx < len; idx++) { + int error = 0; + int exp_int = 0; + int got_int = 0; + if (sign) { + exp_int = (int)(*((int *)p_exp + idx)); + got_int = (int)(*((int *)p_got + idx)); + } else { + exp_int = (int)(*((unsigned int *)p_exp + idx)); + got_int = (int)(*((unsigned int *)p_got + idx)); + } + error = abs(exp_int - got_int); + if (error > delta) { + BMRT_LOG(WRONG, "%s error at index %d exp %d got %d", info_label, idx, exp_int, got_int); + return -1; + } + } + return 0; +} + +int result_cmp(const void *host_output_data, const void* ref_output_data, size_t len, bm_data_type_t dtype, int i) +{ + const char *info_label = "error comparing the last tensor: "; + int flag = 0; + switch (dtype) { + case BM_INT32: + case BM_UINT32: + BMRT_DEBUG("The %d-th tensor got[0] = %d, ref[0] = %d", i, + ((int *)(host_output_data))[0], ((int *)(ref_output_data))[0]); + flag = array_cmp_fix32b((void *)(ref_output_data), (void *)(host_output_data), + dtype == BM_INT32 ? 1 : 0, len, info_label, DELTA_INT); + break; + case BM_INT16: + case BM_UINT16: + BMRT_DEBUG("The %d-th tensor got[0] = %d, ref[0] = %d", i, + ((short *)(host_output_data))[0], ((short *)(ref_output_data))[0]); + flag = array_cmp_fix16b((void *)(ref_output_data), (void *)(host_output_data), + dtype == BM_INT16 ? 1 : 0, len, info_label, DELTA_INT); + break; + case BM_INT8: + BMRT_DEBUG("The %d-th tensor got[0] = %d, ref[0] = %d", i, + ((char *)(host_output_data))[0], ((char *)(ref_output_data))[0]); + flag = array_cmp_fix8b((void *)(ref_output_data), (void *)(host_output_data), 1, len, info_label, DELTA_INT); + break; + case BM_UINT8: + BMRT_DEBUG("The %d-th tensor got[0] = %d, ref[0] = %d", i, + ((unsigned char *)(host_output_data))[0], ((unsigned char *)(ref_output_data))[0]); + flag = array_cmp_fix8b((void *)(ref_output_data), (void *)(host_output_data), 0, len, info_label, DELTA_INT); + break; + default: + BMRT_DEBUG("The %d-th tensor ", i); + flag = array_cmp_float(ref_output_data, host_output_data, dtype, len, + info_label, DELTA_FLOAT); + break; + } + if(flag){ + BMRT_LOG(WRONG, "comparing #%d output failed!", i); + } + return flag; +} + +string fix_bmodel_path(const string& path) { + if (b_bmodel_dir) { + return path + "/compilation.bmodel"; + } + return path; +} + + +static long long time_us() +{ + struct timeval time; + gettimeofday(&time, NULL); + long long elapsed = time.tv_sec * 1000000 + time.tv_usec; + return elapsed; +} + +/* --------------------------------------------------------------------------*/ +/* code for read input and output reference data */ + +void open_ref_file(const string &context_dir, FILE *&f_input, FILE *&f_output) +{ + if (NEED_CMP == false) { + return; + } + string input_ref_dir = context_dir + "/" + INPUT_REF_DATA; + string output_ref_dir = context_dir + "/" + OUTPUT_REF_NAME; + + f_input = fopen(input_ref_dir.c_str(), "rb"); + if (f_input == NULL) { + BMRT_LOG(FATAL, "cannot open file %s", input_ref_dir.c_str()); + } + f_output = fopen(output_ref_dir.c_str(), "rb"); + if (f_output == NULL) { + BMRT_LOG(FATAL, "cannot open file %s", output_ref_dir.c_str()); + } +} + +void close_ref_file() +{ + for (auto &info : context_info_v) { + if (info.f_input_ref != NULL) { + fclose(info.f_input_ref); + } + if (info.f_output_ref != NULL) { + fclose(info.f_output_ref); + } + if (info.f_output != NULL) { + fclose(info.f_output); + } + } +} + +void read_ref_data(void *host_data, size_t size, int net_idx, bool is_input = true, size_t gap_size = 0) +{ + if (NEED_CMP == false) { + // use 0 input + memset(host_data, 0, size); + return; + } + for (auto &info : context_info_v) { + if (net_idx >= info.net_num) { + continue; + } + FILE *file = is_input ? info.f_input_ref : info.f_output_ref; + auto pos = ftell(file); + if (size > 0 && 1 != fread(host_data, size, 1, file)) { + BMRT_LOG(FATAL, "Failed to fread reference data of net #%d, need_bytesize=%zu, but just %zu bytes left", net_idx, size, ftell(file)-pos); + break; + } + if (gap_size != 0) { + fseek(file, gap_size, SEEK_CUR); + } + return; + } +} + +void fseek_ref_file_by_stage(int begin_stage, int end_stage, int net_idx, const bm_net_info_t *net_info) +{ + if (NEED_CMP == false) return; + int begin, end; + begin = begin_stage >= 0 ? begin_stage : 0; + end = end_stage <= net_info->stage_num ? end_stage : net_info->stage_num; + size_t input_ref_offset = 0; + size_t output_ref_offset = 0; + for (int sidx = begin; sidx < end; ++sidx) { + auto &stage = net_info->stages[sidx]; + for (int input_idx = 0; input_idx < net_info->input_num; input_idx++) { + input_ref_offset += bmrt_shape_count(&stage.input_shapes[input_idx]) * + bmrt_data_type_size(net_info->input_dtypes[input_idx]); + } + for (int output_idx = 0; output_idx < net_info->output_num; ++output_idx) { + output_ref_offset += bmrt_shape_count(&stage.output_shapes[output_idx]) * + bmrt_data_type_size(net_info->output_dtypes[output_idx]); + } + } + for (auto &info : context_info_v) { + if (net_idx >= info.net_num) { + continue; + } + if (input_ref_offset != 0) { + fseek(info.f_input_ref, input_ref_offset, SEEK_CUR); + } + if (output_ref_offset != 0) { + fseek(info.f_output_ref, output_ref_offset, SEEK_CUR); + } + return; + } + BMRT_LOG(FATAL, "Failed to fseek ref data of net #%d from stage %d to %d", net_idx, begin, end); +} + +void fseek_ref_file_to_net(int net_idx, void *p_bmrt) +{ + if (NEED_CMP == false) return; + const char **net_names = NULL; + bmrt_get_network_names(p_bmrt, &net_names); + int net_num = bmrt_get_network_number(p_bmrt); + if (net_idx >= net_num) { + BMRT_LOG(FATAL, "net #%d cannot be found!", net_idx); + } + for (int i = 0; i < net_idx; ++i) { + auto net_info = bmrt_get_network_info(p_bmrt, net_names[i]); + fseek_ref_file_by_stage(0, net_info->stage_num, i, net_info); + } +} + +// set enable mmap flag in bmruntime +void set_bmrt_mmap(void *p_bmrt, bool enable) +{ + ((Bmruntime *)p_bmrt)->set_bmrt_mmap(enable); +} + +/* --------------------------------------------------------------------------*/ +/* code for inference by new nntoolchain runtime interface */ + +static void load_bmodel(const string &dir, void *p_bmrt) +{ + string bmodel_path = fix_bmodel_path(dir); + bool flag = bmrt_load_bmodel(p_bmrt, bmodel_path.c_str()); + if (!flag) { + BMRT_LOG(FATAL, "Load bmodel[%s] failed", bmodel_path.c_str()); + } +} + +template +void print_array(const T* data, int len, const char* prefix = nullptr, int max_print=16){ + std::ostringstream ss; + if(prefix){ + ss<max_print? max_print: len; + ss << "< "; + for(int i=0; i len="<"; + } + string info = ss.str(); + BMRT_LOG(INFO, "%s", info.c_str()); +} + +void print_array_ex(const void* data, int len, int dtype, const char* prefix = nullptr, int max_print=16){ + if (dtype == BM_INT32) { + print_array((const int *)data, len, prefix); + } else if (dtype == BM_FLOAT32) { + print_array((const float *)data, len, prefix); + } +} + +void bmrt_test_fps() +{ + // create bmruntime + bm_handle_t bm_handle; + bm_status_t status = bm_dev_request(&bm_handle, DEV_ID); + if (BM_SUCCESS != status) { + BMRT_LOG(FATAL, "bm_dev_request failed, id:[%d]", DEV_ID); + } + + void *p_bmrt = bmrt_create(bm_handle); + +#ifdef SOC_MODE + set_bmrt_mmap(p_bmrt, b_enable_mmap); +#endif + + // load bmodel by file + for (auto &context_dir : CONTEXT_DIR_V) { + context_info_t info; + info.f_input_ref = NULL; + info.f_output_ref = NULL; + info.f_output = NULL; + load_bmodel(context_dir, p_bmrt); + + open_ref_file(context_dir, info.f_input_ref, info.f_output_ref); + // get network number + info.net_num = bmrt_get_network_number(p_bmrt); + info.dir = context_dir; + context_info_v.push_back(info); + } + + bmrt_show_neuron_network(p_bmrt); + + const char **net_names = NULL; + bmrt_get_network_names(p_bmrt, &net_names); + int net_idx = 0; + int stage_idx = 0; + BMRT_LOG(INFO, "==> running network #%d, name: %s, loop: %d", net_idx, net_names[net_idx], LOOP_NUM); + auto net_info = bmrt_get_network_info(p_bmrt, net_names[net_idx]); + std::vector output_tensors(net_info->output_num); + auto &stage = net_info->stages[stage_idx]; + + std::vector> host_output(net_info->output_num); + std::vector> ref_output(net_info->output_num); + + for (int output_idx = 0; output_idx < net_info->output_num; output_idx++) { + auto &output_tensor = output_tensors[output_idx]; + bmrt_tensor(&output_tensor, p_bmrt, net_info->output_dtypes[output_idx], stage.output_shapes[output_idx]); + auto size = bmrt_tensor_bytesize(&output_tensor); + host_output[output_idx].resize(size); + } + + // prepare input tensor + std::vector input_tensors(net_info->input_num); + std::vector> input_data(net_info->input_num); + + for (int input_idx = 0; input_idx < net_info->input_num; input_idx++) { + auto &input_tensor = input_tensors[input_idx]; + bmrt_tensor(&input_tensor, p_bmrt, net_info->input_dtypes[input_idx], stage.input_shapes[input_idx]); + size_t size = bmrt_tensor_bytesize(&input_tensor); + auto& host_data = input_data[input_idx]; + host_data.resize(size); + BMRT_LOG(INFO, "reading input #%d, bytesize=%zu", input_idx, size); + read_ref_data(host_data.data(), size, net_idx, true); + print_array_ex(host_data.data(), bmrt_shape_count(&input_tensor.shape), net_info->input_dtypes[input_idx], " --> input_data:"); + // dump input ref tensors in NCHW format into current directory + // bm_memcpy_s2d(bm_handle, input_tensor.device_mem, ((void *)host_data.data())); + } + + bm_profile_t npu_start, npu_end; + memset(&npu_start, 0, sizeof(bm_profile_t)); + memset(&npu_end, 0, sizeof(bm_profile_t)); + unsigned long long load_time = 0; + unsigned long long infer_time = 0; + unsigned long long fetch_time = 0; + unsigned long long npu_time = 0; + + int batch = input_tensors[0].shape.dims[0]; + int overtime_count = 0; + if (LOOP_FPS>0){ + LOOP_PERIOD_US = 1e6/LOOP_FPS * batch * LOOP_NBATCH; + BMRT_LOG(INFO, "%dxbatch=%dx%d, fps=%.2f, loop_period=%dus", LOOP_NBATCH, batch, LOOP_FPS, LOOP_PERIOD_US); + } + + int MAX_STAT_FRAME_COUNT = 32; + + int stat_frame_count = 0; + long long loop_begin = time_us(); + long long stat_begin = loop_begin; + long long host_infer_time = 0; + long long tpu_s2d_time = 0; + long long tpu_d2s_time = 0; + long long sleep_time = 0; + long long fixed_sleep = 0; + long long tpu_stat_time = 0; + for (int loop = 0; loopinput_num; input_idx++) { + auto &input_tensor = input_tensors[input_idx]; + auto& host_data = input_data[input_idx]; + bm_memcpy_s2d(bm_handle, input_tensor.device_mem, ((void *)host_data.data())); + } + } + + long long before_infer = time_us(); + auto single_load_time = before_infer - begin_usec; + load_time += single_load_time; + tpu_s2d_time += single_load_time; + + bm_get_profile(bm_handle, &npu_start); + for(int n=0; ninput_num, output_tensors.data(), net_info->output_num, true, false); + if (ret == false) { + BMRT_LOG(FATAL, "The %d-th neuron network '%s' stage '%d' inference failed", net_idx, net_info->name, stage_idx); + } + } + status = bm_thread_sync(bm_handle); + if (BM_SUCCESS != status) { + BMRT_LOG(FATAL, "The %d-th neuron network '%s' stage '%d' inference failed", net_idx, net_info->name, stage_idx); + } + bm_get_profile(bm_handle, &npu_end); + auto single_npu_time = npu_end.tpu_process_time - npu_start.tpu_process_time; + npu_time += single_npu_time; + tpu_stat_time += single_npu_time; + + // memcpy output data from device to system + long long before_fetch = time_us(); + auto single_infer_time = before_fetch - before_infer; + infer_time += single_infer_time; + host_infer_time += single_infer_time; + + for(int n=0; noutput_num; ++output_idx) { + auto &output_tensor = output_tensors[output_idx]; + bm_memcpy_d2s_partial(bm_handle, host_output[output_idx].data(), output_tensor.device_mem, host_output[output_idx].size()); + } + } + long long end_usec = time_us(); + auto single_fetch_time = end_usec - before_fetch; + fetch_time += single_fetch_time; + tpu_d2s_time += single_fetch_time; + stat_frame_count += batch * LOOP_NBATCH; + + long long sleep_us = 0; + if(LOOP_PERIOD_US>0) { + long long elapse_usec = end_usec - begin_usec; + if(elapse_usec=MAX_STAT_FRAME_COUNT){ + float tpu_fps = 1e6*stat_frame_count/tpu_stat_time; + float loop_fps = 1e6*stat_frame_count/stat_interval; + BMRT_LOG(INFO, "--> stat_interval: %lld us, frame = %d, loop_fps=%.2f, tpu_fps: %.2f, sleep: %lld us, host_infer: %lld us, tpu_infer: %lld, s2d: %lld us, d2s: %lld us", + stat_interval, stat_frame_count, loop_fps, tpu_fps, sleep_time, host_infer_time, tpu_stat_time, tpu_s2d_time, tpu_d2s_time); + stat_begin = time_us(); + host_infer_time = 0; + tpu_s2d_time = 0; + tpu_d2s_time = 0; + sleep_time = 0; + stat_frame_count = 0; + tpu_stat_time = 0; + } + BMRT_LOG(WARNING, "--> single: sleep: %lld us, host_infer: %lld us, s2d: %lld us, d2s: %lld us, tpu_infer %lld us", + sleep_us, single_infer_time, single_load_time, single_fetch_time, single_npu_time); + } + long long loop_end = time_us(); + + for (int output_idx = 0; output_idx < net_info->output_num; ++output_idx) { + auto &output_tensor = output_tensors[output_idx]; + /* shape gap count = max shape count - real shape count */ + size_t size = bmrt_tensor_bytesize(&output_tensor); + auto dtype = net_info->input_dtypes[output_idx]; + int dsize = bmrt_data_type_size(dtype); + int elem_num = size/dsize; + + if(NEED_CMP){ + ref_output[output_idx].resize(size); + BMRT_LOG(INFO, "reading output ref: #%d, bytesize=%zu", output_idx, size); + read_ref_data(ref_output[output_idx].data(), size, net_idx, false, 0); + print_array_ex(ref_output[output_idx].data(), elem_num, net_info->output_dtypes[output_idx], " --> output ref_data:"); + BMRT_LOG(INFO, "==>comparing #%d output ... ", output_idx); + int flag = result_cmp(host_output[output_idx].data(), ref_output[output_idx].data(), elem_num, dtype, output_idx); + if (flag == 0) { + BMRT_LOG(INFO, "+++ The network[%s] stage[%d] cmp success +++", net_info->name, stage_idx); + } else { + BMRT_LOG(FATAL, "+++ The network[%s] stage[%d] cmp failed +++", net_info->name, stage_idx); + } + } else { + print_array_ex(host_output[output_idx].data(), elem_num, dtype, " --> output data:"); + } + } + + unsigned total_elapsed = infer_time /LOOP_NUM; + unsigned npu_elapsed = npu_time/LOOP_NUM; + + BMRT_LOG(INFO, "net[%s] stage[%d], launch total time is %lu us (npu %lu us, cpu %lu us)", + net_info->name, stage_idx, total_elapsed, npu_elapsed, total_elapsed - npu_elapsed); + // compare inference output data with reference data + float ratio = LOOP_NUM*1e6; + BMRT_LOG(INFO, "load input time(s): %f", load_time/ratio); + BMRT_LOG(INFO, "calculate time(s): %f", infer_time/ratio); + BMRT_LOG(INFO, "get output time(s): %f", fetch_time/ratio); + + BMRT_LOG(INFO, "max batch per second: %f", ratio/(load_time+infer_time+fetch_time)); + BMRT_LOG(INFO, "real batch per second: %f", ratio/(loop_end - loop_begin)); + BMRT_LOG(INFO, "overtime ratio: %d/%d, period: %d us", overtime_count, LOOP_NUM, LOOP_PERIOD_US); + + // free memory + for (int i = 0; i < net_info->input_num; ++i) { + bmrt_must_free_device_mem(p_bmrt, input_tensors[i].device_mem); + } + for (int i = 0; i < net_info->output_num; ++i) { + bmrt_must_free_device_mem(p_bmrt, output_tensors[i].device_mem); + } + free(net_names); + close_ref_file(); + bmrt_destroy(p_bmrt); + bm_dev_free(bm_handle); +} + +void Usage() +{ + printf( + "Usage:\n" + " --version : Show version.\n" + " --context_dir : The dir of context after compilation.\n" + " --bmodel : The path of bmodel, just test bmodel, no compare.\n" + " --devid : The number of device.\n" + " --nbatch : Run nbatch x model_batch frame as a loop.\n" + " --compare : If 0, no result compare, else do compare.\n" + " --loopnum : Set net launch loop times, one time as default.\n" + " --period : Set net launch loop period, default -1 means as fast as possible.\n" + " --fps: : Set simulated fps, loop period will be caculated automatically, default -1 means as fast as possible.\n" + ); +} + + +static void check_options() +{ + if (BMODEL_PATH.empty() == false && CONTEXT_DIR_V.empty() == false) { + BMRT_LOG(FATAL, "can't use dir and bmodel at the same time"); + } + + if (BMODEL_PATH.empty() == false) { + CONTEXT_DIR_V.push_back(BMODEL_PATH); + b_bmodel_dir = false; + NEED_CMP = false; + } + + if (CONTEXT_DIR_V.empty()) { + BMRT_LOG(FATAL, "no context files"); + } + if (LOOP_NUM < 1) { + BMRT_LOG(FATAL, "loopnum should larger than 0"); + } +} + +static void deal_with_options(int argc, char **argv) +{ + int ch, option_index = 0; + static struct option long_options[] = {{"context_dir", required_argument, NULL, 'd'}, + {"devid", required_argument, NULL, 'i'}, + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'v'}, + {"compare", required_argument, NULL, 'c'}, + {"mmap", required_argument, NULL, 'm'}, + {"loopnum", required_argument, NULL, 'l'}, + {"nbatch", required_argument, NULL, 'n'}, + {"period", required_argument, NULL, 'p'}, + {"bmodel", required_argument, NULL, 'b'}, + {"fps", required_argument, NULL, 'f'}, + {0, 0, 0, 0}}; + + if (argc < 2) { + Usage(); + exit(-1); + } + + while ((ch = getopt_long(argc, argv, "d:i:hvc:f:p:m:l:b:n:", long_options, &option_index)) != -1) { + switch (ch) { + case 'd': + CONTEXT_DIR_V.push_back(optarg); + break; + case 'i': + DEV_ID = atoi(optarg); + break; + case 'b': + BMODEL_PATH = optarg; + break; + case 'c': + NEED_CMP = (atoi(optarg) != 0); + break; + case 'l': + LOOP_NUM = atoi(optarg); + break; + case 'f': + LOOP_FPS = (float)atof(optarg); + break; + case 'p': + LOOP_PERIOD_US = atoi(optarg); + break; + case 'n': + LOOP_NBATCH = atoi(optarg); + break; + case 'h': + Usage(); + exit(-1); + break; + case 'v': + std::cout << VER << std::endl; + exit(0); + break; + case 'm': + b_enable_mmap = (atoi(optarg) != 0); + break; + default: + // unknown option + BMRT_LOG(FATAL, "Unknown option %d", ch); + break; + } + } + check_options(); + BMRT_LOG(INFO, "Loop num: %d", LOOP_NUM); +} + +int main(int argc, char **argv) +{ + deal_with_options(argc, argv); + try { + bmrt_test_fps(); + } catch (const std::runtime_error &e) { + return -1; + } + return 0; +} diff --git a/tpu-runtime/app/bmrt_test_inner.h b/tpu-runtime/app/bmrt_test_inner.h old mode 100644 new mode 100755 index fcd8343..5cbc790 --- a/tpu-runtime/app/bmrt_test_inner.h +++ b/tpu-runtime/app/bmrt_test_inner.h @@ -36,6 +36,8 @@ #include "bmruntime_bmnet.h" #include "bmruntime_cpp.h" #include "bmruntime_interface.h" +#include "bmruntime_legacy.h" +#include "bmruntime_common.h" using bmodel::ModelCtx; using bmruntime::Bmruntime; @@ -48,6 +50,7 @@ using std::vector; #define INPUT_REF_DATA "input_ref_data.dat" #define OUTPUT_REF_DATA "output_ref_data.dat" +extern vector> core_lists; extern vector CONTEXT_DIR_V; extern bool b_bmodel_dir; extern string TEST_CASE; diff --git a/tpu-runtime/app/model_runner/LICENSE b/tpu-runtime/app/model_runner/LICENSE new file mode 100644 index 0000000..e60eadb --- /dev/null +++ b/tpu-runtime/app/model_runner/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) Carl Rogers, 2011 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/tpu-runtime/app/model_runner/cnpy.cpp b/tpu-runtime/app/model_runner/cnpy.cpp new file mode 100644 index 0000000..a49a14d --- /dev/null +++ b/tpu-runtime/app/model_runner/cnpy.cpp @@ -0,0 +1,823 @@ +// Copyright (C) 2011 Carl Rogers +// Released under MIT License +// license available in LICENSE file, or at +// http://www.opensource.org/licenses/mit-license.php + +#ifndef __linux__ +#else +#define _FILE_OFFSET_BITS 64 +#define __USE_FILE_OFFSET64 +#define __USE_LARGEFILE64 +#define _LARGEFILE64_SOURCE + +#include "cnpy.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#define ZIP64_LIMIT ((((size_t)1) << 31) - 1) + +namespace cnpy { + +static char BigEndianTest() { + int x = 1; + return (((char *)&x)[0]) ? '<' : '>'; +} + +template struct mapType; + +#define DEFMAPTYPE(type, typeIdentifier) \ + template <> struct mapType { \ + static constexpr char value = typeIdentifier; \ + }; + +DEFMAPTYPE(float, 'f') +DEFMAPTYPE(double, 'f') +DEFMAPTYPE(long double, 'f') +DEFMAPTYPE(int, 'i') +DEFMAPTYPE(char, 'i') +DEFMAPTYPE(signed char, 'i') +DEFMAPTYPE(short, 'i') +DEFMAPTYPE(long, 'i') +DEFMAPTYPE(long long, 'i') +DEFMAPTYPE(unsigned char, 'u') +DEFMAPTYPE(unsigned short, 'u') +DEFMAPTYPE(unsigned long, 'u') +DEFMAPTYPE(unsigned long long, 'u') +DEFMAPTYPE(unsigned int, 'u') +DEFMAPTYPE(bool, 'b') +DEFMAPTYPE(std::complex, 'c') +DEFMAPTYPE(std::complex, 'c') +DEFMAPTYPE(std::complex, 'c') + +template +std::vector &operator+=(std::vector &lhs, const T rhs) { + // write in little endian + for (size_t byte = 0; byte < sizeof(T); byte++) { + char val = *((const char *)&rhs + byte); + lhs.push_back(val); + } + return lhs; +} + +template <> +std::vector &operator+=(std::vector &lhs, const std::string rhs) { + lhs.insert(lhs.end(), rhs.begin(), rhs.end()); + return lhs; +} + +template <> +std::vector &operator+=(std::vector &lhs, const char *rhs) { + // write in little endian + size_t len = strlen(rhs); + lhs.reserve(len); + for (size_t byte = 0; byte < len; byte++) { + lhs.push_back(rhs[byte]); + } + return lhs; +} + +std::vector create_npy_header(const std::vector &shape, + size_t word_size, char type) { + std::vector dict; + dict += "{'descr': '"; + dict += BigEndianTest(); + dict += type; + dict += std::to_string(word_size); + dict += "', 'fortran_order': False, 'shape': ("; + dict += std::to_string(shape[0]); + for (size_t i = 1; i < shape.size(); i++) { + dict += ", "; + dict += std::to_string(shape[i]); + } + if (shape.size() == 1) + dict += ","; + dict += "), }"; + // pad with spaces so that preamble+dict is modulo 16 bytes. + // preamble is 10 bytes. dict needs to end with \n + int remainder = 16 - (10 + dict.size()) % 16; + dict.insert(dict.end(), remainder, ' '); + dict.back() = '\n'; + + std::vector header; + header += (char)0x93; + header += "NUMPY"; + header += (char)0x01; // major version of numpy format + header += (char)0x00; // minor version of numpy format + header += (uint16_t)dict.size(); + header.insert(header.end(), dict.begin(), dict.end()); + + return header; +} + +void parse_npy_header(unsigned char *buffer, size_t &word_size, char &type, + std::vector &shape, bool &fortran_order) { + // std::string magic_string(buffer,6); + // uint8_t major_version = *reinterpret_cast(buffer+6); + // uint8_t minor_version = *reinterpret_cast(buffer+7); + uint16_t header_len = *reinterpret_cast(buffer + 8); + std::string header(reinterpret_cast(buffer + 9), header_len); + + size_t loc1, loc2; + + // fortran order + loc1 = header.find("fortran_order") + 16; + fortran_order = (header.substr(loc1, 4) == "True" ? true : false); + + // shape + loc1 = header.find("("); + loc2 = header.find(")"); + + std::regex num_regex("[0-9][0-9]*"); + std::smatch sm; + shape.clear(); + + std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1); + while (std::regex_search(str_shape, sm, num_regex)) { + shape.push_back(std::stoi(sm[0].str())); + str_shape = sm.suffix().str(); + } + + // endian, word size, data type + // byte order code | stands for not applicable. + // not sure when this applies except for byte array + loc1 = header.find("descr") + 9; + bool littleEndian = + (header[loc1] == '<' || header[loc1] == '|' ? true : false); + assert(littleEndian); + + type = header[loc1 + 1]; + // assert(type == mapType(T)::value); + + std::string str_ws = header.substr(loc1 + 2); + loc2 = str_ws.find("'"); + word_size = atoi(str_ws.substr(0, loc2).c_str()); +} + +void parse_npy_header(FILE *fp, size_t &word_size, char &type, + std::vector &shape, bool &fortran_order) { + char buffer[256]; + size_t res = fread(buffer, sizeof(char), 11, fp); + if (res != 11) + throw std::runtime_error("parse_npy_header: failed fread"); + std::string header = fgets(buffer, 256, fp); + assert(header[header.size() - 1] == '\n'); + + size_t loc1, loc2; + + // fortran order + loc1 = header.find("fortran_order"); + if (loc1 == std::string::npos) + throw std::runtime_error("parse_npy_header: " + "failed to find header keyword: 'fortran_order'"); + loc1 += 16; + fortran_order = (header.substr(loc1, 4) == "True" ? true : false); + + // shape + loc1 = header.find("("); + loc2 = header.find(")"); + if (loc1 == std::string::npos || loc2 == std::string::npos) + throw std::runtime_error("parse_npy_header: " + "failed to find header keyword: '(' or ')'"); + + std::regex num_regex("[0-9][0-9]*"); + std::smatch sm; + shape.clear(); + + std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1); + while (std::regex_search(str_shape, sm, num_regex)) { + shape.push_back(std::stoi(sm[0].str())); + str_shape = sm.suffix().str(); + } + + // endian, word size, data type + // byte order code | stands for not applicable. + // not sure when this applies except for byte array + loc1 = header.find("descr"); + if (loc1 == std::string::npos) + throw std::runtime_error("parse_npy_header: " + "failed to find header keyword: 'descr'"); + loc1 += 9; + bool littleEndian = + (header[loc1] == '<' || header[loc1] == '|' ? true : false); + assert(littleEndian); + + type = header[loc1 + 1]; + // assert(type == mapType(T)); + + std::string str_ws = header.substr(loc1 + 2); + loc2 = str_ws.find("'"); + word_size = atoi(str_ws.substr(0, loc2).c_str()); +} + +void parse_zip_footer(FILE *fp, uint16_t &nrecs, size_t &global_header_size, + size_t &global_header_offset) { + std::vector footer(22); + fseek(fp, -22, SEEK_END); + size_t res = fread(&footer[0], sizeof(char), 22, fp); + if (res != 22) + throw std::runtime_error("parse_zip_footer: failed fread"); + + uint16_t disk_no, disk_start, nrecs_on_disk, comment_len; + disk_no = *(uint16_t *)&footer[4]; + disk_start = *(uint16_t *)&footer[6]; + nrecs_on_disk = *(uint16_t *)&footer[8]; + nrecs = *(uint16_t *)&footer[10]; + global_header_size = *(uint32_t *)&footer[12]; + global_header_offset = *(uint32_t *)&footer[16]; + comment_len = *(uint16_t *)&footer[20]; + + assert(disk_no == 0); + assert(disk_start == 0); + assert(nrecs_on_disk == nrecs); + assert(comment_len == 0); + if (global_header_offset >= 0xFFFFFFFF) { + // get global header offset from extra data + std::vector zip64endrec_header(56); + fseek(fp, -98, SEEK_END); + fread(&zip64endrec_header[0], sizeof(char), 56, fp); + global_header_offset = *(uint64_t *)&zip64endrec_header[48]; + } +} + +template +void npy_save(std::string fname, const T *data, const std::vector shape, + std::string mode) { + FILE *fp = NULL; + // if appending, the shape of existing + new data + std::vector true_data_shape; + + if (mode == "a") + fp = fopen(fname.c_str(), "r+b"); + + if (fp) { + // file exists. we need to append to it. read the header, modify the array + // size + size_t word_size; + char type; + bool fortran_order; + parse_npy_header(fp, word_size, type, true_data_shape, fortran_order); + assert(!fortran_order); + + if (word_size != sizeof(T)) { + std::cout << "libnpy error: " << fname << " has word size " << word_size + << " but npy_save appending data sized " << sizeof(T) << "\n"; + assert(word_size == sizeof(T)); + } + if (true_data_shape.size() != shape.size()) { + std::cout << "libnpy error: npy_save attempting to append " + << "misdimensioned data to " << fname << "\n"; + assert(true_data_shape.size() != shape.size()); + } + + for (size_t i = 1; i < shape.size(); i++) { + if (shape[i] != true_data_shape[i]) { + std::cout << "libnpy error: npy_save attempting to append " + << "misshaped data to " << fname << "\n"; + assert(shape[i] == true_data_shape[i]); + } + } + true_data_shape[0] += shape[0]; + } else { + fp = fopen(fname.c_str(), "wb"); + true_data_shape = shape; + } + + size_t word_size = sizeof(T); + char type = mapType::value; + std::vector header = + create_npy_header(true_data_shape, word_size, type); + size_t nels = + std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + + fseek(fp, 0, SEEK_SET); + fwrite(&header[0], sizeof(char), header.size(), fp); + fseek(fp, 0, SEEK_END); + fwrite(data, sizeof(T), nels, fp); + fclose(fp); +} + +template void npy_save>(std::string, + const std::complex *, + const std::vector, + std::string); +template void npy_save(std::string, const double *, + const std::vector, std::string); +template void npy_save(std::string, const char *, + const std::vector, std::string); + +template +void npy_save(std::string fname, const std::vector data, std::string mode) { + std::vector shape; + shape.push_back(data.size()); + npy_save(fname, &data[0], shape, mode); +} + +template +void npz_save(std::string zipname, std::string fname, const T *data, + const std::vector &shape, std::string mode) { + // first, append a .npy to the fname + fname += ".npy"; + + // now, on with the show + FILE *fp = NULL; + uint16_t nrecs = 0; + size_t global_header_offset = 0; + std::vector global_header; + + if (mode == "a") + fp = fopen(zipname.c_str(), "r+b"); + + if (fp) { + // zip file exists. we need to add a new npy file to it. + // first read the footer. + // this gives us the offset and size of the global header + // then read and store the global header. + // below, we will write the the new data at the start of the global + // header then append the global header and footer below it + size_t global_header_size; + parse_zip_footer(fp, nrecs, global_header_size, global_header_offset); + fseek(fp, global_header_offset, SEEK_SET); + global_header.resize(global_header_size); + size_t res = fread(&global_header[0], sizeof(char), global_header_size, fp); + if (res != global_header_size) { + throw std::runtime_error( + "npz_save: " + "header read error while adding to existing zip"); + } + fseek(fp, global_header_offset, SEEK_SET); + } else { + fp = fopen(zipname.c_str(), "wb"); + } + + size_t word_size = sizeof(T); + char type = mapType::value; + std::vector npy_header; + if (shape.size() != 0) { + npy_header = create_npy_header(shape, word_size, type); + } else { + std::cerr << "[Warning] zip name: " << fname + << " npz shape size is 0, skip it\n"; + fclose(fp); + return; + } + + size_t nels = + std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + size_t nbytes = nels * sizeof(T) + npy_header.size(); + + // get the CRC of the data to be added + uint32_t crc = crc32(0L, (uint8_t *)&npy_header[0], npy_header.size()); + crc = crc32(crc, (const uint8_t *)data, nels * sizeof(T)); + + // build the local header + std::vector local_header; + local_header += "PK"; // first part of sig + local_header += (uint16_t)0x0403; // second part of sig + local_header += (uint16_t)20; // min version to extract + local_header += (uint16_t)0; // general purpose bit flag + local_header += (uint16_t)0; // compression method + local_header += (uint16_t)0; // file last mod time + local_header += (uint16_t)0; // file last mod date + local_header += (uint32_t)crc; // crc + local_header += (uint32_t)nbytes; // compressed size + local_header += (uint32_t)nbytes; // uncompressed size + local_header += (uint16_t)fname.size(); // fname length + local_header += (uint16_t)0; // extra field length + local_header += fname; + + fwrite(&local_header[0], sizeof(char), local_header.size(), fp); + fwrite(&npy_header[0], sizeof(char), npy_header.size(), fp); + fwrite(data, sizeof(T), nels, fp); + /* + Only support global_header_offset is larger than ZIP64_LIMIT. + Not support size is larger than ZIP64_LIMIT now. + */ + if (global_header_offset + nbytes + local_header.size() >= ZIP64_LIMIT) { + // structCentralDir = "<4s4B4HL2L5H2L" + // centdir = struct.pack(structCentralDir, + // stringCentralDir, create_version, + // zinfo.create_system, extract_version, zinfo.reserved, + // flag_bits, zinfo.compress_type, dostime, dosdate, + // zinfo.CRC, compress_size, file_size, + // len(filename), len(extra_data), len(zinfo.comment), + // 0, zinfo.internal_attr, zinfo.external_attr, + // header_offset) + + // build global header + global_header += "PK"; // first part of sig + global_header += (uint16_t)0x0201; // second part of sig + global_header += (uint8_t)45; // create_version + global_header += (uint8_t)3; // zinfo.create_system + global_header += (uint8_t)45; // extract_version + global_header += (uint8_t)0; // zinfo.reserved + global_header.insert(global_header.end(), local_header.begin() + 6, + local_header.begin() + 28); + global_header += (uint16_t)12; // extran data length + global_header += (uint16_t)0; // file comment length + global_header += (uint16_t)0; // disk number where file starts + global_header += (uint16_t)0; // internal file attributes + global_header += (uint32_t)0; // external file attributes + // relative offset of local file header + // since it begins where the global header used to begin + global_header += (uint32_t)0xFFFFFFFF; // global_header_offset; + global_header += fname; + // Append a ZIP64 field to the extra's + // extra_data = struct.pack( + // '= ZIP64_LIMIT) { + // structEndArchive64 = "<4sQ2H2L4Q" + // zip64endrec = struct.pack( + // structEndArchive64, stringEndArchive64, + // 44, 45, 45, 0, 0, centDirCount, centDirCount, + // centDirSize, centDirOffset) + // self.fp.write(zip64endrec) + std::vector zip64endrec_header; + zip64endrec_header += "PK"; + zip64endrec_header += (uint16_t)0x0606; + zip64endrec_header += (uint64_t)0x44; + zip64endrec_header += (uint16_t)0x45; + zip64endrec_header += (uint16_t)0x45; + zip64endrec_header += (uint32_t)0x0; + zip64endrec_header += (uint32_t)0x0; + zip64endrec_header += (uint64_t)(nrecs + 1); // centDirCount + zip64endrec_header += (uint64_t)(nrecs + 1); // centDirCount + zip64endrec_header += (uint64_t)global_header.size(); // centDirSize + zip64endrec_header += (uint64_t)global_header_offset + nbytes + + local_header.size(); // centDirOffset + fwrite(&zip64endrec_header[0], sizeof(char), zip64endrec_header.size(), fp); + + // structEndArchive64Locator = "<4sLQL" + // zip64locrec = struct.pack( + // structEndArchive64Locator, + // stringEndArchive64Locator, 0, pos2, 1) + // self.fp.write(zip64locrec) + std::vector zip64locrec_header; + zip64locrec_header += "PK"; + zip64locrec_header += (uint16_t)0x0706; + zip64locrec_header += (uint32_t)0x0; + zip64locrec_header += + (uint64_t)global_header_offset + nbytes + local_header.size() + + zip64endrec_header.size(); // zip64endrec_header offset + zip64locrec_header += (uint32_t)0x1; + fwrite(&zip64locrec_header[0], sizeof(char), zip64locrec_header.size(), fp); + } + // build footer + std::vector footer; + footer += "PK"; // first part of sig + footer += (uint16_t)0x0605; // second part of sig + footer += (uint16_t)0; // number of this disk + footer += (uint16_t)0; // disk where footer starts + footer += (uint16_t)(nrecs + 1); // number of records on this disk + footer += (uint16_t)(nrecs + 1); // total number of records + footer += (uint32_t)global_header.size(); // nbytes of global headers + // offset of start of global headers + // since global header now starts after newly written array + footer += + (global_header_offset >= ZIP64_LIMIT) + ? (uint32_t)0xFFFFFFFF + : (uint32_t)(global_header_offset + nbytes + local_header.size()); + footer += (uint16_t)0; // zip file comment length + + fwrite(&footer[0], sizeof(char), footer.size(), fp); + fclose(fp); +} + +template void npz_save>(std::string, std::string, + const std::complex *, + const std::vector &, + std::string); +template void npz_save(std::string, std::string, const double *, + const std::vector &, std::string); +template void npz_save(std::string, std::string, const char *, + const std::vector &, std::string); + +template +void npz_save(std::string zipname, std::string fname, + const std::vector &data, std::string mode) { + std::vector shape; + shape.push_back(data.size()); + npz_save(zipname, fname, &data[0], shape, mode); +} + +template +void npz_save(std::string zipname, std::string fname, NpyArray &array, + std::string mode) { + npz_save(zipname, fname, array.data(), array.shape, mode); +} + +template +void npz_add_array(npz_t &map, std::string fname, const T *data, + const std::vector shape) { + size_t word_size = sizeof(T); + char type = mapType::value; + bool fortran_order = false; + NpyArray array(shape, word_size, type, fortran_order); + memcpy(array.data(), data, array.num_bytes()); + map[fname] = array; +} + +void npz_clone_array(npz_t &map, std::string fname, std::string new_name) { + auto array = map[fname]; + map[new_name] = array; +} + +template void npz_add_array>(npz_t &, std::string, + const std::complex *, + const std::vector); +template void npz_add_array(npz_t &, std::string, const float *, + const std::vector); +template void npz_add_array(npz_t &, std::string, const int8_t *, + const std::vector); +template void npz_add_array(npz_t &, std::string, const uint8_t *, + const std::vector); +template void npz_add_array(npz_t &, std::string, const int16_t *, + const std::vector); +template void npz_add_array(npz_t &, std::string, const uint16_t *, + const std::vector); +template void npz_add_array(npz_t &, std::string, const uint32_t *, + const std::vector); +template void npz_add_array(npz_t &, std::string, const int32_t *, + const std::vector); + +template +void npz_add_array(npz_t &map, std::string fname, const std::vector &data) { + std::vector shape; + shape.push_back(data.size()); + npz_add_array(map, fname, &data[0], shape); +} + +template void +npz_add_array>(npz_t &, std::string, + const std::vector> &); +template void npz_add_array(npz_t &, std::string, + const std::vector &); +template void npz_add_array(npz_t &, std::string, + const std::vector &); +template void npz_add_array(npz_t &, std::string, + const std::vector &); +template void npz_add_array(npz_t &, std::string, + const std::vector &); +template void npz_add_array(npz_t &, std::string, + const std::vector &); +template void npz_add_array(npz_t &, std::string, + const std::vector &); + +void npz_save_all(std::string zipname, npz_t &map) { + for (auto it = map.begin(); it != map.end(); it++) { + std::string mode = (it == map.begin()) ? "w" : "a"; + NpyArray &arr = it->second; + if (arr.type == 'f') { + // support float only for now + assert(arr.word_size = sizeof(float)); + npz_save(zipname, it->first, it->second, mode); + } else if (arr.type == 'i') { + // support int8/int16/int32 only + if (arr.word_size == sizeof(int8_t)) { + npz_save(zipname, it->first, it->second, mode); + } else if (arr.word_size == sizeof(int16_t)) { + npz_save(zipname, it->first, it->second, mode); + } else if (arr.word_size == sizeof(int32_t)) { + npz_save(zipname, it->first, it->second, mode); + } else { + assert(0); + } + } else if (arr.type == 'u') { + // support uint8/uint16/uint32 + if (arr.word_size == sizeof(uint8_t)) { + npz_save(zipname, it->first, it->second, mode); + } else if (arr.word_size == sizeof(uint16_t)) { + npz_save(zipname, it->first, it->second, mode); + } else if (arr.word_size == sizeof(uint32_t)) { + npz_save(zipname, it->first, it->second, mode); + } else { + assert(0); + } + } else if (arr.type == 'b') { + // not support yet + assert(0); + } else if (arr.type == 'c') { + // not support yet + assert(0); + } else { + // invalid type + std::cout << "libcnpy error: invalid array type " << arr.type << ", for " + << it->first << "\n"; + assert(0); + } + } +} + +static NpyArray load_the_npy_file(FILE *fp) { + std::vector shape; + size_t word_size; + char type; + bool fortran_order; + parse_npy_header(fp, word_size, type, shape, fortran_order); + + NpyArray arr(shape, word_size, type, fortran_order); + size_t nread = fread(arr.data(), 1, arr.num_bytes(), fp); + if (nread != arr.num_bytes()) + throw std::runtime_error("load_the_npy_file: failed fread"); + return arr; +} + +static NpyArray load_the_npz_array(FILE *fp, uint32_t compr_bytes, + uint32_t uncompr_bytes) { + std::vector buffer_compr(compr_bytes); + std::vector buffer_uncompr(uncompr_bytes); + size_t nread = fread(&buffer_compr[0], 1, compr_bytes, fp); + if (nread != compr_bytes) + throw std::runtime_error("load_the_npy_file: failed fread"); + + int err; + z_stream d_stream; + + d_stream.zalloc = Z_NULL; + d_stream.zfree = Z_NULL; + d_stream.opaque = Z_NULL; + d_stream.avail_in = 0; + d_stream.next_in = Z_NULL; + err = inflateInit2(&d_stream, -MAX_WBITS); + assert(err = 0); + + d_stream.avail_in = compr_bytes; + d_stream.next_in = &buffer_compr[0]; + d_stream.avail_out = uncompr_bytes; + d_stream.next_out = &buffer_uncompr[0]; + + err = inflate(&d_stream, Z_FINISH); + assert(err = 0); + err = inflateEnd(&d_stream); + assert(err = 0); + + std::vector shape; + size_t word_size; + char type; + bool fortran_order; + parse_npy_header(&buffer_uncompr[0], word_size, type, shape, fortran_order); + + NpyArray array(shape, word_size, type, fortran_order); + + size_t offset = uncompr_bytes - array.num_bytes(); + memcpy(array.data(), &buffer_uncompr[0] + offset, + array.num_bytes()); + + return array; +} + +npz_t npz_load(std::string fname) { + npz_t arrays; + arrays.clear(); + + FILE *fp = fopen(fname.c_str(), "rb"); + if (!fp) { + // throw std::runtime_error("npz_load: Error! Unable to open file + // "+fname+"!"); + return arrays; + } + + while (1) { + std::vector local_header(30); + size_t headerres = fread(&local_header[0], sizeof(char), 30, fp); + if (headerres != 30) + break; + + // if we've reached the global header, stop reading + if (local_header[2] != 0x03 || local_header[3] != 0x04) + break; + + // read in the variable name + uint16_t name_len = *(uint16_t *)&local_header[26]; + std::string varname(name_len, ' '); + size_t vname_res = fread(&varname[0], sizeof(char), name_len, fp); + if (vname_res != name_len) + throw std::runtime_error("npz_load: failed fread"); + + // erase the lagging .npy + varname.erase(varname.end() - 4, varname.end()); + + // read in the extra field + uint16_t extra_field_len = *(uint16_t *)&local_header[28]; + if (extra_field_len > 0) { + std::vector buff(extra_field_len); + size_t efield_res = fread(&buff[0], sizeof(char), extra_field_len, fp); + if (efield_res != extra_field_len) + throw std::runtime_error("npz_load: failed fread"); + } + + uint16_t compr_method = *reinterpret_cast(&local_header[0] + 8); + uint32_t compr_bytes = *reinterpret_cast(&local_header[0] + 18); + uint32_t uncompr_bytes = + *reinterpret_cast(&local_header[0] + 22); + + if (compr_method == 0) { + arrays[varname] = load_the_npy_file(fp); + } else { + arrays[varname] = load_the_npz_array(fp, compr_bytes, uncompr_bytes); + } + } + + fclose(fp); + return arrays; +} + +NpyArray npz_load(std::string fname, std::string varname) { + FILE *fp = fopen(fname.c_str(), "rb"); + + if (!fp) + throw std::runtime_error("npz_load: Unable to open file " + fname); + + while (1) { + std::vector local_header(30); + size_t header_res = fread(&local_header[0], sizeof(char), 30, fp); + if (header_res != 30) + throw std::runtime_error("npz_load: failed fread"); + + // if we've reached the global header, stop reading + if (local_header[2] != 0x03 || local_header[3] != 0x04) + break; + + // read in the variable name + uint16_t name_len = *(uint16_t *)&local_header[26]; + std::string vname(name_len, ' '); + size_t vname_res = fread(&vname[0], sizeof(char), name_len, fp); + if (vname_res != name_len) + throw std::runtime_error("npz_load: failed fread"); + vname.erase(vname.end() - 4, vname.end()); // erase the lagging .npy + + // read in the extra field + uint16_t extra_field_len = *(uint16_t *)&local_header[28]; + fseek(fp, extra_field_len, SEEK_CUR); // skip past the extra field + + uint16_t compr_method = *reinterpret_cast(&local_header[0] + 8); + uint32_t compr_bytes = *reinterpret_cast(&local_header[0] + 18); + uint32_t uncompr_bytes = + *reinterpret_cast(&local_header[0] + 22); + + if (vname == varname) { + NpyArray array = (compr_method == 0) + ? load_the_npy_file(fp) + : load_the_npz_array(fp, compr_bytes, uncompr_bytes); + fclose(fp); + return array; + } else { + // skip past the data + uint32_t size = *(uint32_t *)&local_header[22]; + fseek(fp, size, SEEK_CUR); + } + } + + fclose(fp); + + // if we get here, we haven't found the variable in the file + throw std::runtime_error("npz_load: Variable name " + varname + + " not found in " + fname); +} + +NpyArray npy_load(std::string fname) { + + FILE *fp = fopen(fname.c_str(), "rb"); + + if (!fp) + throw std::runtime_error("npy_load: Unable to open file " + fname); + + NpyArray arr = load_the_npy_file(fp); + + fclose(fp); + return arr; +} + +} // namespace cnpy + +#endif diff --git a/tpu-runtime/app/model_runner/cnpy.h b/tpu-runtime/app/model_runner/cnpy.h new file mode 100644 index 0000000..1a9096b --- /dev/null +++ b/tpu-runtime/app/model_runner/cnpy.h @@ -0,0 +1,119 @@ +//Copyright (C) 2011 Carl Rogers +//Released under MIT License +//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php + +#ifndef LIBCNPY_H_ +#define LIBCNPY_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cnpy { + +struct NpyArray { + NpyArray(const std::vector& _shape, size_t _word_size, + char _type, bool _fortran_order) + : shape(_shape), word_size(_word_size), + type(_type), fortran_order(_fortran_order) { + num_vals = 1; + for(size_t i = 0;i < shape.size();i++) num_vals *= shape[i]; + data_holder = std::shared_ptr>( + new std::vector(num_vals * word_size)); + } + + NpyArray() : shape(0), word_size(0), type(0), fortran_order(0), num_vals(0) {} + + template + T* data() { + return reinterpret_cast(&(*data_holder)[0]); + } + + template + const T* data() const { + return reinterpret_cast(&(*data_holder)[0]); + } + + template + std::vector as_vec() const { + const T* p = data(); + return std::vector(p, p+num_vals); + } + + size_t num_bytes() const { + return data_holder->size(); + } + + std::shared_ptr> data_holder; + std::vector shape; + size_t word_size; + char type; + bool fortran_order; + size_t num_vals; +}; + +using npz_t = std::map; + +std::vector create_npy_header(const std::vector& shape, + size_t word_size, char type); +void parse_npy_header(FILE* fp,size_t& word_size, char& type, + std::vector& shape, bool& fortran_order); +void parse_npy_header(unsigned char* buffer, size_t& word_size, char& type, + std::vector& shape, bool& fortran_order); +void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, + size_t& global_header_offset); +npz_t npz_load(std::string fname); +NpyArray npz_load(std::string fname, std::string varname); +NpyArray npy_load(std::string fname); + +template +std::vector& operator+=(std::vector& lhs, const T rhs); +template<> +std::vector& operator+=(std::vector& lhs, const std::string rhs); +template<> +std::vector& operator+=(std::vector& lhs, const char* rhs); + +template +void npy_save(std::string fname, const T* data, + const std::vector shape, std::string mode = "w"); +template +void npy_save(std::string fname, const std::vector data, + std::string mode = "w"); + +template +void npz_save(std::string zipname, std::string fname, + const T* data, const std::vector& shape, + std::string mode = "w"); +template +void npz_save(std::string zipname, std::string fname, + const std::vector &data, std::string mode = "w"); +template +void npz_save(std::string zipname, std::string fname, + NpyArray &array, std::string mode = "w"); + + +template +void npz_add_array(npz_t &map, std::string fname, + const T* data, const std::vector shape); +template +void npz_add_array(npz_t &map, std::string fname, + const std::vector &data); + +void npz_clone_array(npz_t &map, std::string fname, std::string new_name); + +void npz_save_all(std::string zipname, npz_t &map); + +} // namespace cnpy + +#endif diff --git a/tpu-runtime/app/model_runner/model_runner.cpp b/tpu-runtime/app/model_runner/model_runner.cpp new file mode 100644 index 0000000..67d683a --- /dev/null +++ b/tpu-runtime/app/model_runner/model_runner.cpp @@ -0,0 +1,265 @@ +#ifndef __linux__ +int main(int argc, char **argv) { + // do nothing + return 0; +} +#else +#include "bmlib_runtime.h" +#include "bmodel.hpp" +#include "bmruntime.h" +#include "bmruntime.hpp" +#include "bmruntime_bmnet.h" +#include "bmruntime_cpp.h" +#include "bmruntime_interface.h" +#include "cnpy.h" +#include + +static std::string in_file; +static std::string model_file; +static std::string out_file; +static vector devices; + +void Usage() { + printf("Usage:\n" + " --version : Show version.\n" + " --input : Set input npz file \n" + " --model : Set model path \n" + " --output : Set output npz file \n" + " --devid : Set devices to run for model, e.g. 1,2. if not " + "set, use 0\n"); +} + +static void split(const std::string &s, const std::string &delim, + std::vector &ret) { + size_t last = 0; + size_t index = s.find_first_of(delim, last); + while (index != std::string::npos) { + ret.push_back(s.substr(last, index - last)); + last = index + 1; + index = s.find_first_of(delim, last); + } + if (last < s.length()) { + ret.push_back(s.substr(last)); + } +} + +static vector parseDevices(const string &str) { + vector devices; + vector sub_str; + split(str, ",", sub_str); + for (auto &s : sub_str) { + devices.push_back(std::atoi(s.c_str())); + } + return devices; +} + +static void deal_with_options(int argc, char **argv) { + int ch, lopt, idx = 0; + static struct option options[] = {{"version", no_argument, NULL, 'v'}, + {"input", required_argument, NULL, 'i'}, + {"model", required_argument, NULL, 'm'}, + {"output", required_argument, NULL, 'o'}, + {"devid", required_argument, NULL, 'd'}, + {0, 0, 0, 0}}; + + if (argc < 2) { + Usage(); + exit(-1); + } + + while ((ch = getopt_long(argc, argv, "v:i:m:o:d:", options, &idx)) != -1) { + switch (ch) { + case 'v': + std::cout << VER << std::endl; + exit(0); + break; + case 'i': + in_file = optarg; + break; + case 'm': + model_file = optarg; + break; + case 'o': + out_file = optarg; + break; + case 'd': + devices = parseDevices(optarg); + break; + default: + // unknown option + BMRT_LOG(FATAL, "Unknown option"); + Usage(); + break; + } + } + if (in_file.empty() || model_file.empty() || out_file.empty()) { + BMRT_LOG(FATAL, "Unknown option"); + Usage(); + exit(-1); + } +} + +static void add_array(cnpy::npz_t &map, std::string name, bm_handle_t bm_handle, + const bm_tensor_t &dst) { + std::vector shape; + size_t count = 1; + for (int i = 0; i < dst.shape.num_dims; i++) { + auto d = dst.shape.dims[i]; + shape.push_back(d); + count *= d; + } + size_t real_bytes = bmrt_tensor_bytesize(&dst); + switch (dst.dtype) { + case BM_FLOAT32: { + std::vector data(count); + bm_memcpy_d2s_partial(bm_handle, data.data(), dst.device_mem, real_bytes); + cnpy::npz_add_array(map, name, data.data(), shape); + } break; + case BM_INT32: { + std::vector data(count); + bm_memcpy_d2s_partial(bm_handle, data.data(), dst.device_mem, real_bytes); + cnpy::npz_add_array(map, name, data.data(), shape); + } break; + case BM_UINT32: { + std::vector data(count); + bm_memcpy_d2s_partial(bm_handle, data.data(), dst.device_mem, real_bytes); + cnpy::npz_add_array(map, name, data.data(), shape); + } break; + case BM_UINT16: + case BM_FLOAT16: + case BM_BFLOAT16: { + std::vector data(count); + bm_memcpy_d2s_partial(bm_handle, data.data(), dst.device_mem, real_bytes); + cnpy::npz_add_array(map, name, data.data(), shape); + } break; + case BM_INT16: { + std::vector data(count); + bm_memcpy_d2s_partial(bm_handle, data.data(), dst.device_mem, real_bytes); + cnpy::npz_add_array(map, name, data.data(), shape); + } break; + case BM_INT8: { + std::vector data(count); + bm_memcpy_d2s_partial(bm_handle, data.data(), dst.device_mem, real_bytes); + cnpy::npz_add_array(map, name, data.data(), shape); + } break; + case BM_UINT8: { + std::vector data(count); + bm_memcpy_d2s_partial(bm_handle, data.data(), dst.device_mem, real_bytes); + cnpy::npz_add_array(map, name, data.data(), shape); + } break; + default: + BMRT_LOG(FATAL, "Not support type %d\n", dst.dtype); + exit(-1); + } +} + +void readTensor(cnpy::npz_t &map, const std::string &name, uint8_t *data, + size_t bytes, bm_shape_t &shape) { + auto it = map.find(name.c_str()); + if (it == map.end()) { + BMRT_LOG(FATAL, "failed to find tensor %s\n", name.c_str()); + exit(-1); + } + auto arr = it->second; + if (arr.num_bytes() > bytes) { + BMRT_LOG(FATAL, "size is too large for tensor %s\n", name.c_str()); + exit(-1); + } + if (arr.shape.size() > 0) { + shape.num_dims = arr.shape.size(); + for (int i = 0; i < shape.num_dims; ++i) { + shape.dims[i] = arr.shape[i]; + } + } + memcpy(data, arr.data_holder->data(), arr.num_bytes()); +} + +int main(int argc, char **argv) { + deal_with_options(argc, argv); + auto npz_in = cnpy::npz_load(in_file); + cnpy::npz_t npz_out; + if (devices.empty()) { + devices.push_back(0); + } + int device_num = devices.size(); + bm_handle_t bm_handles[device_num]; + bm_status_t status; + unsigned int chipid; + for (int i = 0; i < device_num; i++) { + auto status = bm_dev_request(&bm_handles[i], devices[i]); + if (BM_SUCCESS != status) { + BMRT_LOG(FATAL, "bm_dev_request failed, id:[%d]", devices[i]); + exit(-1); + } + unsigned int chipid_ = 0; + if (0 != bm_get_chipid(bm_handles[i], &chipid_)) { + BMRT_LOG(FATAL, "Cannot get chipid"); + exit(-1); + } + if (i == 0) { + chipid = chipid_; + } else if (chipid != chipid_) { + BMRT_LOG(FATAL, "Not same chipid"); + exit(-1); + } + } + auto p_bmrt = bmrt_create_ex(bm_handles, devices.size()); + bool flag = bmrt_load_bmodel(p_bmrt, model_file.c_str()); + if (!flag) { + BMRT_LOG(FATAL, "Load bmodel[%s] failed", model_file.c_str()); + exit(-1); + } + bmrt_show_neuron_network(p_bmrt); + const char **net_names = NULL; + bmrt_get_network_names(p_bmrt, &net_names); + int net_num = bmrt_get_network_number(p_bmrt); + if (net_num != 1) { + BMRT_LOG(FATAL, "Only support one net bmodel"); + exit(-1); + } + auto net_info = bmrt_get_network_info(p_bmrt, net_names[0]); + if (net_info->stage_num != 1) { + BMRT_LOG(FATAL, "Only support one stage bmodel"); + exit(-1); + } + std::vector input_tensors(net_info->input_num); + std::vector output_tensors(net_info->output_num); + auto &stage = net_info->stages[0]; + for (int i = 0; i < net_info->input_num; i++) { + int devid = net_info->input_loc_devices[i]; + uint8_t *buffer = new uint8_t[net_info->max_input_bytes[i]]; + auto real_shape = stage.input_shapes[i]; + readTensor(npz_in, net_info->input_names[i], buffer, + net_info->max_input_bytes[i], real_shape); + bmrt_tensor_ex(&input_tensors[i], p_bmrt, devid, + net_info->input_dtypes[i], real_shape); + bm_memcpy_s2d(bm_handles[devid], input_tensors[i].device_mem, buffer); + delete[] buffer; + } + for (int i = 0; i < net_info->output_num; i++) { + bmrt_tensor_ex(&output_tensors[i], p_bmrt, net_info->output_loc_devices[i], + net_info->output_dtypes[i], stage.output_shapes[i]); + } + bool ret = bmrt_launch_tensor_ex(p_bmrt, net_names[0], input_tensors.data(), + net_info->input_num, output_tensors.data(), + net_info->output_num, true, false); + if (ret == true) { + status = bm_thread_sync(bm_handles[0]); + } + if (ret == false || BM_SUCCESS != status) { + BMRT_LOG(FATAL, "Neuron network '%s' inference failed", net_names[0]); + exit(-1); + } + for (int i = 0; i < net_info->output_num; i++) { + int devid = net_info->output_loc_devices[i]; + add_array(npz_out, net_info->output_names[i], bm_handles[devid], + output_tensors[i]); + } + cnpy::npz_save_all(out_file, npz_out); + bmrt_destroy(p_bmrt); + for (int i = 0; i < device_num; i++) { + bm_dev_free(bm_handles[i]); + } + return 0; +} +#endif diff --git a/tpu-runtime/config/bmrt_version.h.in b/tpu-runtime/config/bmrt_version.h.in new file mode 100644 index 0000000..6d994d8 --- /dev/null +++ b/tpu-runtime/config/bmrt_version.h.in @@ -0,0 +1,3 @@ +#cmakedefine COMMIT_HASH "@COMMIT_HASH@" +#cmakedefine BRANCH_NAME "@BRANCH_NAME@" +#cmakedefine COMPILE_TIME "@COMPILE_TIME@" diff --git a/tpu-runtime/docs/reference/README.md b/tpu-runtime/docs/reference/README.md index 1e6c98f..ed5e586 100644 --- a/tpu-runtime/docs/reference/README.md +++ b/tpu-runtime/docs/reference/README.md @@ -1,4 +1,4 @@ -This is the document repo of NNToolChain, which is used for deploying models on Sophon TPU (https://sophon.ai). +This is the document repo of NNToolChain, which is used for deploying models on SOPHON deep-learning processors (https://sophon.ai). * Frontend parsers for Caffe/TensorFlow/MXNet/PyTorch * Graph optimization compiler and runtime @@ -30,8 +30,8 @@ If fandol exists in /usr/share/fonts, we can ignore this step. make html # build document to static html files python3 -m http.server --directory build/html # open http://localhost:8000 in browser - -## make pdf + +## make pdf ``` @@ -45,8 +45,8 @@ TODO ## Built With -* [Sphinx](http://www.sphinx-doc.org) - Document auto generate tool -* [Latex](https://www.latex-project.org/) - High-quality typesetting system +* [Sphinx](http://www.sphinx-doc.org) - Document auto generate tool +* [Latex](https://www.latex-project.org/) - High-quality typesetting system ## License diff --git a/tpu-runtime/docs/reference/source_en/0_disclaimer.rst b/tpu-runtime/docs/reference/source_en/0_disclaimer.rst index b9ed895..f000bab 100644 --- a/tpu-runtime/docs/reference/source_en/0_disclaimer.rst +++ b/tpu-runtime/docs/reference/source_en/0_disclaimer.rst @@ -9,11 +9,11 @@ Disclaimer :alt: SOPHGO LOGO | **Legal Disclaimer** -| Copyright © SOPHGO 2022. All rights reserved. +| Copyright © SOPHGO 2024. All rights reserved. | No part of this document may be reproduced or transmitted in any form or by any means without prior written consent of SOPHGO . | **Notice** -| The purchased products, services and features are stipulated by the contract made between SOPHGO and the +| The purchased products, services and features are stipulated by the contract made between SOPHGO and the customer. All or part of the products, services and features described in this document may not be within the purchase scope or the usage scope. Unless otherwise specified in the contract, all statements, information, and recommendations in this document are provided "AS IS" without warranties, guarantees or @@ -60,7 +60,7 @@ Disclaimer ---------- ------------- ------------------------------ V2.6.0 2021.01.30 Revised and released Version V.2.6.0. ---------- ------------- ------------------------------ - V2.7.0 2022.03.16 Released Version V2.7.0, patch version on May 31, 2022. + V2.7.0 2024.03.16 Released Version V2.7.0, patch version on May 31, 2024. ---------- ------------- ------------------------------ - V3.0.0 2022.07.16 Released Version V3.0.0. + V3.0.0 2024.07.16 Released Version V3.0.0. ========== ============= ============================== \ No newline at end of file diff --git a/tpu-runtime/docs/reference/source_en/bmodel/bmodel.rst b/tpu-runtime/docs/reference/source_en/bmodel/bmodel.rst index ce3c393..43f0c78 100644 --- a/tpu-runtime/docs/reference/source_en/bmodel/bmodel.rst +++ b/tpu-runtime/docs/reference/source_en/bmodel/bmodel.rst @@ -5,9 +5,9 @@ BModel About BModel _____________ -Bmodel is a deep neural network model file format for Sophon's TPU processors. +Bmodel is a deep neural network model file format for SOPHON deep-learning processors. Generated by model compilers (such as bmnetc/bmnett, etc.), it contains parameter information of one or more networks, such as input and output information. -It is loaded and used as a model file in the runtime phase. +It is loaded and used as a model file in the runtime phase. Bmodel also serves as the compilation output file for the BMLang programming language and it is generated in the BMLang compilation phase. Bmodel contains the information of one or more BMLang functions,such as parameters, input and output. @@ -21,18 +21,18 @@ Multi-stage bmodel description: Alternatively, compile two bmodels with the inputs [1,3,200,200] and [1,3,100,100] to build the model that supports inputs 200*200 and 100*100. Static bmodel description: - -1. Static bmodel saves the atomic operation instructions with fixed parameters that can be directly used on the chip. TPU can automatically read such atomic operation instructions, execute them in the flow line without any interruption in the halfway. + +1. Static bmodel saves the atomic operation instructions with fixed parameters that can be directly used on the device. The deep-learning processor can automatically read such atomic operation instructions, execute them in the flow line without any interruption in the halfway. 2. When the static bmodel is executed, the size input of the model must be same with its size in compilation. -3. Due to the simplicity and stability of a static interface, the model compiled under the new sdk can usually run on the old machine without refreshing the new firmware. It should be noted that although static compilation is designated for some models, some operators must have the internal MCU of TPU or host cpu involved, such as sorting, nms, where, detect_out and other operators with more logical operations. This part will be divided into subnets and implemented in a dynamic way. If the part for updating sdk compilation is a dynamic model, it is preferred to refresh or update the firmware to ensure sdk and runtime are consistent (It can be judged by the output of tpu_model \--info xx.bmodel. For static compilation, if the subnet number is 1, it indicates a purely static network. See the section on tpu_model use for details. +3. Due to the simplicity and stability of a static interface, the model compiled under the new sdk can usually run on the old machine without refreshing the new firmware. It should be noted that although static compilation is designated for some models, some operators must have the internal MCU of deep-learning processor or host processor involved, such as sorting, nms, where, detect_out and other operators with more logical operations. This part will be divided into subnets and implemented in a dynamic way. If the part for updating sdk compilation is a dynamic model, it is preferred to refresh or update the firmware to ensure sdk and runtime are consistent (It can be judged by the output of tpu_model \--info xx.bmodel. For static compilation, if the subnet number is 1, it indicates a purely static network. See the section on tpu_model use for details. 4. If the input shape only has several fixed discrete cases, the multi-stage bmodel aforementioned may be used to achieve the effect of the dynamic model. Dynamic bmodel description: -1. The dynamic bmodel stores the parameter information of each operator and cannot run directly on the TPU. It is necessary for the MCU inside the TPU to parse parameters layer by layer for shape and dtype inference and call atomic operations for achieving specific functions. So, the dynamic bmodel has a worse performance than the static one. +1. The dynamic bmodel stores the parameter information of each operator and cannot run directly on the deep-learning processor. It is necessary for the MCU inside the deep-learning processor to parse parameters layer by layer for shape and dtype inference and call atomic operations for achieving specific functions. So, the dynamic bmodel has a worse performance than the static one. 2. When running the dynamic bmodel on the bm168x platform, it is preferred to start icache. Otherwise, the bmodel will run slowly. @@ -58,7 +58,7 @@ Currently, the following six operation methods are available: bmodel version: B.2.2 # bmodel version No. chip: BM1684 # Chip type supported - create time: Mon Apr 11 13:37:45 2022 # Creation time + create time: Mon Apr 11 13:37:45 2024 # Creation time ========================================== # Network dividing line: If there are multiple nets, there will be several dividing lines. @@ -77,7 +77,7 @@ Currently, the following six operation methods are available: output: Identity, [1, 400, 7], float32, scale: 1 device mem size: 942757216 (coeff: 141710112, instruct: 12291552, runtime: - 788755552) # The memory size occupied by the model on the TPU, in byte, + 788755552) # The memory size occupied by the model on the deep-learning processor, in byte, # format: Total memory size occupied (size of constant memory, size of instruction # memory, size of data memory during the running) host mem size: 8492192 (coeff: 32, runtime: 8492160) # Memory size occupied on diff --git a/tpu-runtime/docs/reference/source_en/bmrt_test/bmrt_test.rst b/tpu-runtime/docs/reference/source_en/bmrt_test/bmrt_test.rst index 505b697..ddacabe 100644 --- a/tpu-runtime/docs/reference/source_en/bmrt_test/bmrt_test.rst +++ b/tpu-runtime/docs/reference/source_en/bmrt_test/bmrt_test.rst @@ -17,7 +17,7 @@ _____________________________________________ .. table:: bmrt_test main parameter description :widths: 15 10 50 - + +---------------+------------+---------------------------------------------------------------------------------------------------------------------------------------------+ | args | type | Description | +===============+============+=============================================================================================================================================+ @@ -89,6 +89,11 @@ ____________________________________________ bmrt_test --bmodel xxx.bmodel --stage_idx 0 --shapes "[1,3,224,224]" # Run the # multi-stage bmodel model and specify the bmodel for running stage 0. + bmrt_test --bmodel xxx.bmodel --core_list 0,1 + # run the bmodel on the deep learning processor core0 and core1 at the same time + # note that the bmodel is multi-core compiled and can be architected to support multi-core + # the value in core_list is at least 0 and cannot be greater than the number of ( deep learning processor cores - 1 ). + # The following instructions are functions provided by using environmental variables # and bmruntime and can be used by other applications. BMRUNTIME_ENABLE_PROFILE=1 bmrt_test --bmodel xxx.bmodel # Generate @@ -100,7 +105,7 @@ ____________________________________________ Comparison Data Generation and Verification Example ___________________________________________________ -1. Upon the completion of model compilation, run with comparing the model. +1. Upon the completion of model compilation, run with comparing the model. When compiling the model, you must indicate \--cmp=True, which is enabled by default. input_ref_data.dat and output_ref_data.dat files will be generated in the compilation output folder. @@ -128,7 +133,7 @@ ___________________________________________________ f.write(output_data.astype(np.float32).tobytes()) # astype will convert # according to the output data type of bmodel - Put the generated input_ref_data.dat and output_ref_data.dat in the bmodel_dir file folder + Put the generated input_ref_data.dat and output_ref_data.dat in the bmodel_dir file folder and then in 'bmrt_test \--context_dir bmodel_dir' to see if the result is a comparison error. FAQs diff --git a/tpu-runtime/docs/reference/source_en/bmruntime/runtime.rst b/tpu-runtime/docs/reference/source_en/bmruntime/runtime.rst index a39486d..3a40dc2 100644 --- a/tpu-runtime/docs/reference/source_en/bmruntime/runtime.rst +++ b/tpu-runtime/docs/reference/source_en/bmruntime/runtime.rst @@ -1,13 +1,13 @@ BMRuntime ================ -BMRuntime is used to read the compiled output (.bmodel) of BMCompiler and drive it to be executed in the SOPHON TPU chip. BMRuntime provides users with diversified interfaces, which are convenient for users to transplant algorithms. Its software architecture is shown as follows: +BMRuntime is used to read the compiled output (.bmodel) of BMCompiler and drive it to be executed in the deep-learning processor. BMRuntime provides users with diversified interfaces, which are convenient for users to transplant algorithms. Its software architecture is shown as follows: .. image:: ../_static/bmruntime.png BMRuntime has two interfaces available, C and C++. Some interfaces are reserved in order to be compatible with the previous generation of applications. However, it is not recommened to continue using new applications. -Interfaces in this chapter are all synchronous by default and some of them are asynchronous (functions are executed by the NPU and the CPU may continue to execute from up to bottom), which will be specially described. +Interfaces in this chapter are all synchronous by default and some of them are asynchronous (functions are executed by the deep-learning processor and the host processor may continue to execute from up to bottom), which will be specially described. This chapter consists of four parts: @@ -241,12 +241,12 @@ The interface described in this section is only valid on the SoC. On the SoC, al Mmap may be used to get the virtual address of Device Memory so that it can be directly accessed by the application. -**Special note**: The NPU directly accesses DDR when accessing Device Memory without passing cache but cache is passed when an application accesses it. +**Special note**: The deep-learning processor directly accesses DDR when accessing Device Memory without passing cache but cache is passed when an application accesses it. Thus, it is necessary to ensure the consistency of caches. This means: -* The application revises the data of Device Memory through the virtual address. It is necessary to flush before NPU inference so as to ensure the cache data has been synchronized with DDR. -* Device Memory data is modified upon the ending of NPU inference. The application needs to be invalidated before access through the virutal address so as to ensure DDR data has been synchronized with cache. +* The application revises the data of Device Memory through the virtual address. It is necessary to flush before deep-learning processor inference so as to ensure the cache data has been synchronized with DDR. +* Device Memory data is modified upon the ending of deep-learning processor inference. The application needs to be invalidated before access through the virutal address so as to ensure DDR data has been synchronized with cache. bm_mem_mmap_device_mem ::::::::::::::::::::::: @@ -322,9 +322,9 @@ Refresh cache data or ensure cache data has been sychronized with DDR. * [in] offset * [in] len */ - bm_status_t bm_mem_flush_partial_device_mem(bm_handle_t handle, bm_device_mem_t + bm_status_t bm_mem_flush_partial_device_mem(bm_handle_t handle, bm_device_mem_t *dmem,unsigned int offset, unsigned int len); - + Specify cache refreshing within the offset and size of device mem. @@ -376,7 +376,7 @@ Program synchronize bm_status_t bm_thread_sync(bm_handle_t handle); -Synchronous interface. Normally, npu inference is made asynchronously and the user’s cpu program can continue to be executed. This interface is used in the cpu process to ensure the npu inference is completed. Unless otherwise specially described, all interfaces introduced in this chapter are synchronous ones. There are only a few asynchronous interfaces that need to call bm_thread_sync for synchronization. +Synchronous interface. Normally, deep-learning processor inference is made asynchronously and the user’s host program can continue to be executed. This interface is used in the host process to ensure the deep-learning processor inference is completed. Unless otherwise specially described, all interfaces introduced in this chapter are synchronous ones. There are only a few asynchronous interfaces that need to call bm_thread_sync for synchronization. C Interface @@ -421,7 +421,7 @@ Store mode BM_STORE_4N = 2, } bm_store_mode_t; -bm_store_mode_t specifies how data is stored. You only need to focus on BM_STORE_1N. If you want to focus on the bottom layer and optimize performance, you need to focus on BM_STORE_2N and BM_STORE_4N. +bm_store_mode_t specifies how data is stored. You only need to focus on BM_STORE_1N. If you want to focus on the bottom layer and optimize performance, you need to focus on BM_STORE_2N and BM_STORE_4N. BM_STORE_1N is the default storage method for data types. It indicates data is stored as normal. @@ -583,6 +583,22 @@ bmrt_create Create bmruntime and return the runtime pointer. For other interfaces (bmrt_xxxx class interfaces), the required handle is the runtime pointer. + +bmrt_create_ex +>>>>>>>>>>>>>>>>>>>>>>> + +.. code-block:: cpp + + /* + Parameters: [in] bm_handles - BM handles. They must be initialized by using bmlib. + Parameters: [in] num_handles - Number of bm_handles. + Returns: void* - The pointer of a bmruntime helper. + */ + void *bmrt_create_ex(bm_handle_t *bm_handles, int num_handles); + +Create a bmruntime that supports passing in multiple bm_handle, used to run distributed bmodels. + + bmrt_destroy >>>>>>>>>>>>>>>>>>>> @@ -721,6 +737,8 @@ Network information is expressed as follows: typedef struct bm_stage_info_s { bm_shape_t* input_shapes; /* input_shapes[0] / [1] / ... / [input_num-1] */ bm_shape_t* output_shapes; /* output_shapes[0] / [1] / ... / [output_num-1] */ + bm_device_mem_t *input_mems; /* input_mems[0] / [1] / ... / [input_num-1] */ + bm_device_mem_t *output_mems; /* output_mems[0] / [1] / ... / [output_num-1] */ } bm_stage_info_t; /* bm_tensor_info_t holds all information of one net */ @@ -739,6 +757,12 @@ Network information is expressed as follows: bm_stage_info_t* stages; /* stages[0] / [1] / ... / [stage_num-1] */ size_t * max_input_bytes; /* max_input_bytes[0]/ [1] / ... / [input_num-1] */ size_t * max_output_bytes; /* max_output_bytes[0] / [1] / ... / [output_num-1] */ + int* input_zero_point; /* input_zero_point[0] / [1] / .../ [input_num-1] */ + int* output_zero_point; /* output_zero_point[0] / [1] / .../ [output_num-1] */ + int *input_loc_devices; /* input_loc_device[0] / [1] / .../ [input_num-1] */ + int *output_loc_devices; /* output_loc_device[0] / [1] / .../ [output_num-1] */ + int core_num; /* core number */ + int32_t addr_mode; /* address assign mode */ } bm_net_info_t; bm_net_info_t represents all information of a network and bm_stage_info_t represents the conditions of different shapes supported by the network. @@ -752,6 +776,14 @@ input_scales and output_scales are only useful when they are integers and are 1. max_input_bytes represents the maximum number of bytes for each input and max_output_bytes represents the maximum number of bytes for each output. Each network may have multiple stages. The user may request the maximum number of bytes for each input/output and store the data of various stages. +input_zero_point and output_zero_point record the zero_point values for inputs and outputs in the case of an asymmetric quantized int8 network. + +input_loc_devices and output_loc_devices record the device id for inputs and outputs in the case of a distributed network. + +core_num records the number of cores required by the network. + +addr_mode records the network's address allocation mode, where 1 indicates the basic mode, and 2 indicates the io_alone mode. + bmrt_get_network_info gets the information of a given network according to the network name. The interface is declared as follows: .. code-block:: cpp @@ -776,15 +808,15 @@ Print network information. It is required in debugging. The interface is declare bmrt_launch_tensor >>>>>>>>>>>>>>>>>>>>>> -Infer npu for the designated network. The interface is declared as follows: +Infer deep-learning processor for the designated network. The interface is declared as follows: .. code-block:: cpp /* To launch the inference of the neuron network with setting input tensors. This API supports the neuron nework, that is static-compiled or dynamic-compiled. - After calling this API, inference on TPU is launched. The CPU program will not be blocked - if the neuron network is static-compiled and has no cpu layer. Otherwize, the CPU + After calling this API, inference on deep-learning processor is launched. The host program will not be blocked + if the neuron network is static-compiled and has no cpu layer. Otherwize, the host program will be blocked. This API support multiple inputs, and multi thread safety. Parameters: [in] p_bmrt - Bmruntime that had been created. @@ -842,15 +874,15 @@ The example of the use method is shown as follows: bmrt_launch_tensor_ex >>>>>>>>>>>>>>>>>>>>>> -Infer npu for a given network. The interface is declared as follows: +Infer deep-learning processor for a given network. The interface is declared as follows: .. code-block:: cpp /* To launch the inference of the neuron network with setting input tensors. This API supports the neuron nework, that is static-compiled or dynamic-compiled. - After calling this API, inference on TPU is launched. The CPU program will not be blocked - if the neuron network is static-compiled and has no cpu layer. Otherwize, the CPU + After calling this API, inference on deep-learning processor is launched. The host program will not be blocked + if the neuron network is static-compiled and has no cpu layer. Otherwize, the host program will be blocked. This API supports multiple inputs, and multi thread safety. Parameters: [in] p_bmrt - Bmruntime that had been created. @@ -931,14 +963,14 @@ The example of the use method is shown as follows: bmrt_launch_data >>>>>>>>>>>>>>>>> -Infer npu for a given network. The interface is declared as follows: +Infer deep-learning processor for a given network. The interface is declared as follows: .. code-block:: cpp /* To launch the inference of the neuron network with setting input datas in system memory. This API supports the neuron nework, that is static-compiled or dynamic-compiled. - After calling this API, inference on TPU is launched. And the CPU program will be blocked. + After calling this API, inference on deep-learning processor is launched. And the host program will be blocked. This API supports multiple inputs, and multi thread safety. Parameters: [in] p_bmrt - Bmruntime that had been created. @@ -1178,7 +1210,7 @@ The use reference is shown as follows: .. code-block:: cpp - //net1, the shapes of input tensors can be reshaped + //net1, the shapes of input tensors can be reshaped Network net1(*p_ctx, "net1"); //net2, the shape of stage [1] in bm_net_info_ is adopted and will not be reshaped later. Network net2(*p_ctx, "net2", 1); @@ -1372,7 +1404,7 @@ Set the device mem of the tensor. Before inference, you can configure the device mem of the input to specify the store position of the input data or configure the device mem of output to indicate the store position of output. -Both input and output will be stored in the device mem automatically requested by network if you set nothing. +Both input and output will be stored in the device mem automatically requested by network if you set nothing. Additionally, you can configure the size of device mem, which cannot be smaller than ByteSize (); otherwise, errors will be returned due to the failure in storing the data of the entire tensor. @@ -1458,14 +1490,14 @@ The programming model is shown as follows: The figure uses the C interface as an example. -For the C++ interface, create a single Context instance and then load the network model via load_bmodel. +For the C++ interface, create a single Context instance and then load the network model via load_bmodel. -Next, create network instances in multiple threads for inference. The networks for the same instance may be the same or different. +Next, create network instances in multiple threads for inference. The networks for the same instance may be the same or different. multi runtime >>>>>>>>>>>>>>>> -You can create multiple threads, each creating a bumruntime. The loading model of each bmruntime is independent, with the same model loaded among them. +You can create multiple threads, each creating a bumruntime. The loading model of each bmruntime is independent, with the same model loaded among them. The programming model is shown as follows: diff --git a/tpu-runtime/docs/reference/source_en/bmruntime_sample/bmruntime_sample.rst b/tpu-runtime/docs/reference/source_en/bmruntime_sample/bmruntime_sample.rst index 89c1fdf..c57bab6 100644 --- a/tpu-runtime/docs/reference/source_en/bmruntime_sample/bmruntime_sample.rst +++ b/tpu-runtime/docs/reference/source_en/bmruntime_sample/bmruntime_sample.rst @@ -14,7 +14,7 @@ Example description: * Create bm_handle and a runtime instance. * Load bmodel, which has one testnet network, two inputs and two outputs. * Prepare input tensors, including the shape and data of each input. -* Start inference. +* Start inference. * Upon the ending of inference, copy the result data in output_tensor to the system memory. * Before exiting from the program, release device mem, runtime instances and bm_handle. diff --git a/tpu-runtime/docs/reference/source_en/conf.py b/tpu-runtime/docs/reference/source_en/conf.py index 6789ef1..d5f13a5 100644 --- a/tpu-runtime/docs/reference/source_en/conf.py +++ b/tpu-runtime/docs/reference/source_en/conf.py @@ -20,7 +20,7 @@ # -- Project information ----------------------------------------------------- project = u'BMRuntime' -copyright = u'2022, SOPHGO' +copyright = u'2024, SOPHGO' author = u'SOPHGO' import os, subprocess @@ -151,10 +151,10 @@ # Full text text left aligned:\usepackage[document]{ragged2e} 'preamble':r''' \usepackage{tocloft} - \renewcommand\cftfignumwidth{4em} - \renewcommand\cfttabnumwidth{4em} - \renewcommand\cftsecnumwidth{4em} - \renewcommand\cftsubsecnumwidth{6em} + \renewcommand\cftfignumwidth{4em} + \renewcommand\cfttabnumwidth{4em} + \renewcommand\cftsecnumwidth{4em} + \renewcommand\cftsubsecnumwidth{6em} \renewcommand\cftparanumwidth{6em} \usepackage{fancyhdr} \setlength\headheight{14pt} diff --git a/tpu-runtime/docs/reference/source_zh/0_disclaimer.rst b/tpu-runtime/docs/reference/source_zh/0_disclaimer.rst index e40947b..644bbb6 100644 --- a/tpu-runtime/docs/reference/source_zh/0_disclaimer.rst +++ b/tpu-runtime/docs/reference/source_zh/0_disclaimer.rst @@ -9,7 +9,7 @@ :alt: SOPHGO LOGO | **法律声明** -| 版权所有 © 算能 2022. 保留一切权利。 +| 版权所有 © 算能 2024. 保留一切权利。 | 非经本公司书面许可,任何单位和个人不得擅自摘抄、复制本文档内容的部分或全部,并不得以任何形式传播。 | **注意** diff --git a/tpu-runtime/docs/reference/source_zh/bmodel/bmodel.rst b/tpu-runtime/docs/reference/source_zh/bmodel/bmodel.rst index 1b7f033..abe71d2 100644 --- a/tpu-runtime/docs/reference/source_zh/bmodel/bmodel.rst +++ b/tpu-runtime/docs/reference/source_zh/bmodel/bmodel.rst @@ -5,13 +5,10 @@ BModel BModel 介绍 _____________ -bmodel是面向算能TPU处理器的深度神经网络模型文件格式。 +bmodel是面向算能深度学习处理器的深度神经网络模型文件格式。 通过模型编译器工具(如bmnetc/bmnett等)生成,包含一个至多个网络的参数信息,如输入输出等信息。 并在runtime阶段作为模型文件被加载和使用。 -bmodel也作为BMLang编程语言的编译输出文件,由BMLang编译阶段生成,包含一个或多个BMLang功能Function的 -参数、输入输出等信息。 - 多stage bmodel说明: bmodel里stage是用多种input_shape分别编译出bmodel,然后用tpu_model将多个bmodel合并到一起成为一个bmodel,而里面包含的每个bmodel是一个stage。stage_num就是指合并的bmodel个数。未经过合并的bmodel的stage_num=1。当以某种shape运行模型时,bmruntime会自动选择有相同输入shape的bmodel运行。 @@ -22,18 +19,18 @@ bmodel也作为BMLang编程语言的编译输出文件,由BMLang编译阶段 也可以分别以[1,3,200,200],[1,3,100,100]的输入编译出两个bmodel,达到支持200x200和100x100输入的模型 静态bmodel说明: - -1. 静态bmodel保存的是芯片上可直接使用的固定参数原子操作指令,TPU可以自动读取该原子操作指令,流水执行,中间无中断。 + +1. 静态bmodel保存的是芯片上可直接使用的固定参数原子操作指令,深度学习处理器可以自动读取该原子操作指令,流水执行,中间无中断。 2. 静态bmodel被执行时,模型输入大小必须和编译时的大小相同。 -3. 由于静态接口简单稳定,在新的sdk下编译出来的模型通常能在旧机器上运行,不用更新firmware刷机。需要注意的是,有些模型虽然指定的是静态编译,但有些算子必需有TPU内部mcu参与或host cpu参与,如排序、nms、where、detect_out这类逻辑运算比较多的算子,该部分会被切分成子网,用动态方式实现。如果更新sdk重新编译的这类部分是动态的模型,最好刷机或更新firmware,以保证sdk和runtime是一致的。(可以通过tpu_model \--info xx.bmodel的输出来判断,如果是static且subnet number为1时,是纯静态网络,具体可见tpu_model使用章节)。 +3. 由于静态接口简单稳定,在新的sdk下编译出来的模型通常能在旧机器上运行,不用更新firmware刷机。需要注意的是,有些模型虽然指定的是静态编译,但有些算子必需有深度学习处理器内部mcu参与或主机处理器参与,如排序、nms、where、detect_out这类逻辑运算比较多的算子,该部分会被切分成子网,用动态方式实现。如果更新sdk重新编译的这类部分是动态的模型,最好刷机或更新firmware,以保证sdk和runtime是一致的。(可以通过tpu_model \--info xx.bmodel的输出来判断,如果是static且subnet number为1时,是纯静态网络,具体可见tpu_model使用章节)。 4. 如果输入的shape只有固定离散的几种情况,可以使用上面说的多stage bmodel来达到动态模型的效果。 动态bmodel说明: -1. 动态bmodel保存的是每个算子的参数信息,并不能直接在TPU上运行。需要TPU内部的mcu逐层解析参数,进行shape及dtype推理,调用原子操作来实现具体的功能,故运行效率比静态bmodel稍差。 +1. 动态bmodel保存的是每个算子的参数信息,并不能直接在深度学习处理器上运行。需要深度学习处理器内部的mcu逐层解析参数,进行shape及dtype推理,调用原子操作来实现具体的功能,故运行效率比静态bmodel稍差。 2. 在bm168x平台上运行时,最好打开icache,否则运行比较慢。 @@ -59,12 +56,12 @@ _____________ bmodel version: B.2.2 # bmodel的格式版本号 chip: BM1684 # 支持的芯片类型 - create time: Mon Apr 11 13:37:45 2022 # 创建时间 + create time: Mon Apr 11 13:37:45 2024 # 创建时间 ========================================== # 网络分割线,如果有多个net,会有多条分割线 net 0: [informer_frozen_graph] static # 网络名称为informer_frozen_graph, 为static类型网络(即静态网络),如果是dynamic,为动态编译网络 ------------ # stage分割线,如果每个网络有多个stage,会有多个分割线 - stage 0: # 第一个stage信息 + stage 0, core_num: x # 第一个stage信息和对应的深度学习处理器core数量 subnet number: 41 # 该stage中子网个数,这个是编译时切分的,以支持在不同设备切换运行。通常子网个数 # 越少越好 input: x_1, [1, 600, 9], float32, scale: 1 # 输入输出信息:名称、形状、量化的scale值 @@ -73,7 +70,7 @@ _____________ input: x_2, [1, 500, 9], float32, scale: 1 output: Identity, [1, 400, 7], float32, scale: 1 - device mem size: 942757216 (coeff: 141710112, instruct: 12291552, runtime: 788755552) # 该模型在TPU上内存占用情况(以byte为单位),格式为: 总占用内存大小(常量内存大小,指令内存大小, 运行时数据内存占用大小) + device mem size: 942757216 (coeff: 141710112, instruct: 12291552, runtime: 788755552) # 该模型在深度学习处理器上内存占用情况(以byte为单位),格式为: 总占用内存大小(常量内存大小,指令内存大小, 运行时数据内存占用大小) host mem size: 8492192 (coeff: 32, runtime: 8492160) # 宿主机上内存占用情况(以byte为单位),格式为: 总占用内存大小(常量内存大小,运行时数据内存大小) diff --git a/tpu-runtime/docs/reference/source_zh/bmrt_test/bmrt_test.rst b/tpu-runtime/docs/reference/source_zh/bmrt_test/bmrt_test.rst index 0daf16d..c893ffe 100644 --- a/tpu-runtime/docs/reference/source_zh/bmrt_test/bmrt_test.rst +++ b/tpu-runtime/docs/reference/source_zh/bmrt_test/bmrt_test.rst @@ -17,7 +17,7 @@ _____________________________________________ .. table:: bmrt_test主要参数说明 :widths: 15 10 50 - + +---------------+------------+-----------------------------------------------------------------+ | args | type | Description | +===============+============+=================================================================+ @@ -53,6 +53,8 @@ _____________________________________________ +---------------+------------+-----------------------------------------------------------------+ | subnet_time | bool | 可选, 是否显示bmodel的subnet时间 | +---------------+------------+-----------------------------------------------------------------+ + | core_list | string | 可选, 指定深度学习处理器core参数列表(仅对支持多核的硬件架构有效) + + +---------------+------------+-----------------------------------------------------------------+ bmrt_test输出 ____________________________________________ @@ -84,7 +86,7 @@ ____________________________________________ bmrt_test --context_dir bmodel_dir --compare=0 # 运行bmodel,bmodel_dir中要包含compilation.bmodel bmrt_test --bmodel xxx.bmodel # 直接运行bmodel,不比对数据 bmrt_test --bmodel xxx.bmodel --stage_idx 0 --shapes "[1,3,224,224]" # 运行多stage的bmodel模型,指定运行stage0的bmodel - + bmrt_test --bmodel xxx.bmodel --core_list 0,1 # 在深度学习处理器core0和core1上同时运行该bmodel,注意该bmodel为多核编译且能够架构支持多核运行,core_list中的值至少为0且不能大于深度学习处理器core数量-1 # 以下命令是通过环境变量使用bmruntime提供的功能,其他应用程序也可以使用 BMRUNTIME_ENABLE_PROFILE=1 bmrt_test --bmodel xxx.bmodel # 生成profile数据:bmprofile_data-x BMRT_SAVE_IO_TENSORS=1 bmrt_test --bmodel xxx.bmodel # 将模型推理的数据保存成input_ref_data.dat.bmrt和output_ref_data.dat.bmrt @@ -131,4 +133,4 @@ _________________ 如果随机位置的零星错误,可能是个别值计算误差引起的。原因是编译时用的是随机数据,不排除会出现这种情况,所以建议编译时先加上 \--cmp 0,在实际业务程序上验证结果是否正确 -还有一种可能是网络中存在随机算子(如uniform_random)或者排序算子(如topk、nms、argmin等),由于在前面计算过程会产生输入数据的浮点尾数误差,即使很小也会导致排序结果的index不同。 这种情况下,可以看到比对出错的数据顺序上有差异,只能到实际业务上去测试 \ No newline at end of file +还有一种可能是网络中存在随机算子(如uniform_random)或者排序算子(如topk、nms、argmin等),由于在前面计算过程会产生输入数据的浮点尾数误差,即使很小也会导致排序结果的index不同。 这种情况下,可以看到比对出错的数据顺序上有差异,只能到实际业务上去测试 diff --git a/tpu-runtime/docs/reference/source_zh/bmruntime/runtime.rst b/tpu-runtime/docs/reference/source_zh/bmruntime/runtime.rst index 1e9e368..2df4d78 100644 --- a/tpu-runtime/docs/reference/source_zh/bmruntime/runtime.rst +++ b/tpu-runtime/docs/reference/source_zh/bmruntime/runtime.rst @@ -1,13 +1,13 @@ BMRuntime ================ -BMRuntime用于读取BMCompiler的编译输出(.bmodel),驱动其在SOPHON TPU芯片中执行。BMRuntime向用户提供了丰富的接口,便于用户移植算法,其软件架构如下: +BMRuntime用于读取BMCompiler的编译输出(.bmodel),驱动其在深度学习处理器中执行。BMRuntime向用户提供了丰富的接口,便于用户移植算法,其软件架构如下: .. image:: ../_static/bmruntime.png BMRuntime有C和C++两种接口;另外为了兼容上一代应用程序,保留一些接口,但不推荐新的应用程序继续使用。 -本章节中的接口默认都是同步接口,有个别是异步接口(由NPU执行功能,CPU可以继续往下执行),会特别说明。 +本章节中的接口默认都是同步接口,有个别是异步接口(由深度学习处理器执行功能,主机处理器可以继续往下执行),会特别说明。 本章节分4个部分: @@ -578,6 +578,22 @@ bmrt_create 创建bmruntime,返回runtime指针。其他接口(bmrt_xxxx类接口),需要的句柄都是该runtime指针。 + +bmrt_create_ex +>>>>>>>>>>>>>>>>>>>>>>> + +.. code-block:: cpp + + /* + Parameters: [in] bm_handles - BM handles. They must be initialized by using bmlib. + Parameters: [in] num_handles - Number of bm_handles. + Returns: void* - The pointer of a bmruntime helper. + */ + void *bmrt_create_ex(bm_handle_t *bm_handles, int num_handles); + +创建bmruntime,支持传入多个bm_handle,用于运行分布式的bmodel。 + + bmrt_destroy >>>>>>>>>>>>>>>>>>>> @@ -716,6 +732,8 @@ bmrt_get_network_info typedef struct bm_stage_info_s { bm_shape_t* input_shapes; /* input_shapes[0] / [1] / ... / [input_num-1] */ bm_shape_t* output_shapes; /* output_shapes[0] / [1] / ... / [output_num-1] */ + bm_device_mem_t *input_mems; /* input_mems[0] / [1] / ... / [input_num-1] */ + bm_device_mem_t *output_mems; /* output_mems[0] / [1] / ... / [output_num-1] */ } bm_stage_info_t; /* bm_tensor_info_t holds all information of one net */ @@ -734,6 +752,12 @@ bmrt_get_network_info bm_stage_info_t* stages; /* stages[0] / [1] / ... / [stage_num-1] */ size_t * max_input_bytes; /* max_input_bytes[0]/ [1] / ... / [input_num-1] */ size_t * max_output_bytes; /* max_output_bytes[0] / [1] / ... / [output_num-1] */ + int* input_zero_point; /* input_zero_point[0] / [1] / .../ [input_num-1] */ + int* output_zero_point; /* output_zero_point[0] / [1] / .../ [output_num-1] */ + int *input_loc_devices; /* input_loc_device[0] / [1] / .../ [input_num-1] */ + int *output_loc_devices; /* output_loc_device[0] / [1] / .../ [output_num-1] */ + int core_num; /* core number */ + int32_t addr_mode; /* address assign mode */ } bm_net_info_t; bm_net_info_t表示一个网络的全部信息,bm_stage_info_t表示该网络支持的不同的shape情况。 @@ -747,6 +771,14 @@ input_scales和output_scales只有整型时有用;浮点型时为默认值1.0 max_input_bytes表示每个input最大的字节数,max_output_bytes表示每个output最大的字节数。 每个网络可能有多个stage,用户可能需要申请每个input/output的最大字节数,存放各种stage的数据。 +input_zero_point和output_zero_point记录在非对称量化int8网络的情况下输入和输出的zero_point值。 + +input_loc_devices和output_loc_devices记录在分布式网络的情况下输入和输出设备号。 + +core_num记录网络所需的core数量。 + +addr_mode记录网络的地址分配模式,0表示基础模式,1表示io_alone模式,2 表示 io_tag 模式。 + bmrt_get_network_info根据网络名,得到某个网络的信息,接口声明如下: .. code-block:: cpp @@ -771,15 +803,15 @@ bmrt_print_network_info bmrt_launch_tensor >>>>>>>>>>>>>>>>>>>>>> -对指定的网络,进行npu推理。接口声明如下: +对指定的网络,进行推理。接口声明如下: .. code-block:: cpp /* To launch the inference of the neuron network with setting input tensors. This API supports the neuron nework that is static-compiled or dynamic-compiled. - After calling this API, inference on TPU is launched. The CPU program will not be blocked - if the neuron network is static-compiled and has no cpu layer. Otherwize, the CPU + After calling this API, inference on deep-learning processor is launched. The host processor program will not be blocked + if the neuron network is static-compiled and has no cpu layer. Otherwize, the host processor program will be blocked. This API support multiple inputs, and multi thread safety. Parameters: [in] p_bmrt - Bmruntime that had been created. @@ -837,15 +869,15 @@ bmrt_launch_tensor bmrt_launch_tensor_ex >>>>>>>>>>>>>>>>>>>>>> -对指定的网络,进行npu推理。接口声明如下: +对指定的网络,进行推理。接口声明如下: .. code-block:: cpp /* To launch the inference of the neuron network with setting input tensors. This API supports the neuron nework that is static-compiled or dynamic-compiled. - After calling this API, inference on TPU is launched. The CPU program will not be blocked - if the neuron network is static-compiled and has no cpu layer. Otherwize, the CPU + After calling this API, inference on deep-learning processor is launched. The host program will not be blocked + if the neuron network is static-compiled and has no cpu layer. Otherwize, the host program will be blocked. This API support multiple inputs, and multi thread safety. Parameters: [in] p_bmrt - Bmruntime that had been created. @@ -884,6 +916,7 @@ bmrt_luanch_tensor == bmrt_launch_tensor_ex(user_mem = false, user_stmode = fals * 当user_mem为true时,接口不会为output_tensor申请device mem,用户需要在外部申请,申请的大小可以通过bm_net_info_t中的max_output_bytes指定。 * 当user_stmode为false时,输出数据以BM_STROE_1N排列。 * 当user_stmode为true时,输出数据根据各个output_tensor中的st_mode指定。 +* 当深度学习处理器硬件架构支持多核时,该接口默认使用从core0开始的N个core来做推理,如果需要指定使用具体的深度学习处理器core,需要使用 bmrt_launch_tensor_multi_cores 来完成。N由当前bmodel决定。 **需要注意:** 该接口为异步接口,用户需要调用bm_thread_sync确保推理完成。 @@ -933,7 +966,7 @@ bmrt_launch_data /* To launch the inference of the neuron network with setting input datas in system memory. This API supports the neuron nework that is static-compiled or dynamic-compiled. - After calling this API, inference on TPU is launched. And the CPU program will be blocked. + After calling this API, inference on deep-learning processor is launched. And the host program will be blocked. This API support multiple inputs, and multi thread safety. Parameters: [in] p_bmrt - Bmruntime that had been created. @@ -969,6 +1002,82 @@ bmrt_launch_data * 输入和输出都存储在系统内存。 * 为同步接口。接口返回的时候推理已经完成。 +bmrt_launch_tensor_multi_cores +>>>>>>>>>>>>>>>>>>>>>> + +对指定的网络,选择指定的深度学习处理器core推理。接口声明如下: + +.. code-block:: cpp + + /* + To launch the inference of the neuron network with setting input tensors, and support multi core inference. + This API supports the neuron nework that is static-compiled or dynamic-compiled + After calling this API, inference on deep-learning processor is launched. And the host program will not + be blocked. bm_thread_sync_from_core should be called to make sure inference is finished. + This API support multiple inputs, and multi thread safety. + + Parameters: [in] p_bmrt - Bmruntime that had been created. + [in] net_name - The name of the neuron network. + [in] input_tensors - Array of input tensor. + Defined like bm_tensor_t input_tensors[input_num]. + User should initialize each input tensor. + [in] input_num - Input number. + [out] output_tensors - Array of output tensor. + Defined like bm_tensor_t output_tensors[output_num]. + User can set device_mem or stmode of output tensors. + If user_mem is true, this interface will use device mem of + output_tensors, and will not alloc device mem; Or this + interface will alloc devcie mem to store output. + User should free each device mem by bm_free_device after + the result data is useless. + [in] output_num - Output number. + [in] user_mem - true: device_mem in output_tensors have been allocated. + false: have not been allocated. + [in] user_stmode - true: output will use store mode that set in output_tensors. + false: output will use BM_STORE_1N. + [in] core_list core id list those will be used to inference + [in] core_num number of the core list + Returns: bool - true: Launch success. false: Launch failed. + */ + bool bmrt_launch_tensor_multi_cores(void* p_bmrt, const char * net_name, + const bm_tensor_t input_tensors[], int input_num, + bm_tensor_t output_tensors[], int output_num, + bool user_mem, bool user_stmode, + const int *core_list, int core_num); + +具体说明如下: + +* 该函数可以选择推理时的深度学习处理器core,仅对于支持多核深度学习处理器的硬件架构有效。其余参数使用同 bmrt_launch_tensor_ex 接口。 + +**需要注意:** 该接口为异步接口,用户需要调用bm_thread_sync_from_core确保推理完成。 + +bmrt_pre_alloc_neuron_multi_cores +>>>>>>>>>>>>>>>>>>>>>> + +对指定的网络,预先申请深度学习处理器推理计算所需要的设备内存。接口声明如下: + +.. code-block:: cpp + + /* + To pre-allocate the neuron network compute memory during multi-cores arch inference. + This API only used for multi-cores arch runtime, need call before bmrt_launch_tensor_multi_cores API. + After calling this API, the memory during neuron network inference is pre-allocated, can reduce first bmrt_launch_tensor_multi_cores API time cost. + If no use this API, is also OK, bmrt will auto alloc compute memory during first launch tensor. + + Parameters: [in] p_bmrt - Bmruntime that had been created. + [in] net_name - The name of the neuron network. + [in] stage_idx - Witch network stage need to be pre-allocate. + [in] core_list core id list those will be used to inference + [in] core_num number of the core list + Returns: bool - true: Pre-allocate success. false: Pre-allocate failed. + */ + bool bmrt_pre_alloc_neuron_multi_cores(void *p_bmrt, const char *net_name, int stage_idx, + const int *core_list, int core_num); +具体说明如下: + +* 该函数仅对于支持多核深度学习处理器的硬件架构有效,可以减少第一次调用bmrt_launch_tensor_multi_cores接口时的时间。 +* 默认不使用该函数的情况下,在指定模型第一次调用bmrt_launch_tensor_multi_cores时会自动地花费时间申请深度学习处理器推理计算所需要的设备内存。 + bmrt_trace >>>>>>>>>>>>>>>>>>>> @@ -983,6 +1092,45 @@ bmrt_trace 该接口用于DEBUG。它会校验runtime的数据,打印runtime的一些信息,方便调试。 +get_bmodel_api_info_c +>>>>>>>>>>>>>>>>>>>> + +.. code-block:: cpp + /* + * This API only supports the neuron nework that is static-compiled. + * After calling this API, api info will be setted and return, + * and then you can call `bm_send_api` to start deep-learning processor inference. + * When you no longer need the memory, call bmrt_free_api_info to avoid memory leaks. + * + * @param [in] p_bmrt Bmruntime that had been created + * @param [in] net_name The name of the neuron network + * @param [in] input_tensors Array of input tensor, defined like bm_tensor_t input_tensors[input_num], + * User should initialize each input tensor. + * @param [in] input_num Input number + * @param [in] output_tensors Array of output tensor, defined like bm_tensor_t output_tensors[output_num]. + * User can set device_mem or stmode of output tensors. If user_mem is true, this interface + * will use device mem of output_tensors to store output data, and not alloc device mem; + * Or it will alloc device mem to store output. If user_stmode is true, it will use stmode in + * each output tensor; Or stmode will be BM_STORE_1N as default. + * @param [in] output_num Output number + * @param [in] user_mem whether device_mem of output tensors are set + * @param [in] user_stmode whether stmode of output tensors are set + */ + api_info_c *get_bmodel_api_info_c(void *p_bmrt, const char *net_name, + const bm_tensor_t *input_tensors, int input_num, + bm_tensor_t *output_tensors, int output_num, + bool user_mem, bool user_stmode); +* 该函数使用方法类似 bmrt_launch_tensor_ex,但是它只是返回 bmodel 推理前需要下发给深度学习处理器的推理信息,并不会启动推理。该函数返回的信息可以通过 bm_send_api 发送给深度学习处理器启动推理,因此 get_bmodel_api_info + bm_send_api 和 bmrt_launc_tensor_ex 作用是等价的。 +* **在该 api_info 使用结束后需要调用 bmrt_free_api_info 来释放内存。** + +bmrt_free_api_info +>>>>>>>>>>>>>>>>>>> + +.. code-block:: cpp + void bmrt_free_api_info(api_info_c *api_info); + +* 释放 api_info 所申请的内存空间。 + C++ Interface _____________________ diff --git a/tpu-runtime/docs/reference/source_zh/conf.py b/tpu-runtime/docs/reference/source_zh/conf.py index a2659af..82d2426 100644 --- a/tpu-runtime/docs/reference/source_zh/conf.py +++ b/tpu-runtime/docs/reference/source_zh/conf.py @@ -20,7 +20,7 @@ # -- Project information ----------------------------------------------------- project = u'BMRuntime' -copyright = u'2022, SOPHGO' +copyright = u'2024, SOPHGO' author = u'SOPHGO' import os, subprocess @@ -151,10 +151,10 @@ # 全文文本左对齐:\usepackage[document]{ragged2e} 'preamble':r''' \usepackage{tocloft} - \renewcommand\cftfignumwidth{4em} - \renewcommand\cfttabnumwidth{4em} - \renewcommand\cftsecnumwidth{4em} - \renewcommand\cftsubsecnumwidth{6em} + \renewcommand\cftfignumwidth{4em} + \renewcommand\cfttabnumwidth{4em} + \renewcommand\cftsecnumwidth{4em} + \renewcommand\cftsubsecnumwidth{6em} \renewcommand\cftparanumwidth{6em} \usepackage{fancyhdr} \setlength\headheight{14pt} diff --git a/tpu-runtime/gen_kernel_header.cmake b/tpu-runtime/gen_kernel_header.cmake new file mode 100644 index 0000000..0f1a806 --- /dev/null +++ b/tpu-runtime/gen_kernel_header.cmake @@ -0,0 +1,15 @@ +# dump kernel_module.so to kernel_module.h +set(KERNEL_MODULE_PATH_1684X "${PROJECT_SOURCE_DIR}/lib/libbm1684x_kernel_module.so") +set(KERNEL_MODULE_PATH_TPULV60 "${PROJECT_SOURCE_DIR}/lib/libtpulv60_kernel_module.so") +set(KERNEL_HEADER_FILE "${CMAKE_BINARY_DIR}/kernel_module.h") +add_custom_command( + OUTPUT ${KERNEL_HEADER_FILE} + COMMAND echo "" > ${KERNEL_HEADER_FILE} + COMMAND echo "static const unsigned char kernel_module_data_1684x[] = {" >> ${KERNEL_HEADER_FILE} + COMMAND hexdump -v -e '8/1 \"0x%02x,\" \"\\n\"' ${KERNEL_MODULE_PATH_1684X} >> ${KERNEL_HEADER_FILE} + COMMAND echo "}\;" >> ${KERNEL_HEADER_FILE} + + COMMAND echo "static const unsigned char kernel_module_data_tpulv60[] = {" >> ${KERNEL_HEADER_FILE} + COMMAND hexdump -v -e '8/1 \"0x%02x,\" \"\\n\"' ${KERNEL_MODULE_PATH_TPULV60} >> ${KERNEL_HEADER_FILE} + COMMAND echo "}\;" >> ${KERNEL_HEADER_FILE} + ) diff --git a/tpu-runtime/include/bm1684_profile.h b/tpu-runtime/include/bm1684_profile.h index 5f664c4..8424061 100644 --- a/tpu-runtime/include/bm1684_profile.h +++ b/tpu-runtime/include/bm1684_profile.h @@ -72,18 +72,7 @@ class BMProfileDevice:public bmruntime::BMProfileDeviceBase { // BMProfileDeviceBase interface public: - BMProfileDevice(BMProfile* profile):BMProfileDeviceBase(profile) { - enable = profile->getenv_bool(ENV_ENABLE_PROFILE); - if(enable){ - gdma_record_len = profile->getenv_int(ENV_PROFILE_GDMA_SIZE, gdma_record_len); - bdc_record_len = profile->getenv_int(ENV_PROFILE_BDC_SIZE, bdc_record_len); - dyn_max_size = profile->getenv_int(ENV_PROFILE_ARM_SIZE, dyn_max_size); - enable_gdma = !profile->getenv_bool(ENV_DISABLE_GDMA) && gdma_record_len > 0; - enable_bdc = !profile->getenv_bool(ENV_DISABLE_BDC) && bdc_record_len > 0; - enable_arm = !profile->getenv_bool(ENV_DISABLE_ARM) && dyn_max_size > 0; - enable = enable_gdma || enable_arm || enable_bdc; - } - } + BMProfileDevice(BMProfile* profile):BMProfileDeviceBase(profile) { } bool init(); bool begin(net_ctx_t* net_ctx); bool end(net_ctx_t* net_ctx); @@ -96,13 +85,6 @@ class BMProfileDevice:public bmruntime::BMProfileDeviceBase { buffer_pair dyn_buffer; bm_perf_monitor tpu_perf_monitor; bm_perf_monitor gdma_perf_monitor; - size_t gdma_record_len = 1024*1024; - size_t bdc_record_len = 1024*1024; - size_t dyn_max_size = 16*1024*1024; - bool enable_gdma =false; - bool enable_bdc = false; - bool enable_arm = false; - bool enable = false; bm_device_mem_t aligned_mem; // BMProfileDeviceBase interface diff --git a/tpu-runtime/include/bm1684x_profile.h b/tpu-runtime/include/bm1684x_profile.h index 22fcbfc..d728502 100644 --- a/tpu-runtime/include/bm1684x_profile.h +++ b/tpu-runtime/include/bm1684x_profile.h @@ -80,18 +80,7 @@ class BMProfileDevice:public BMProfileDeviceBase { // BMProfileDeviceBase interface public: - BMProfileDevice(BMProfile* profile):bmruntime::BMProfileDeviceBase(profile) { - enable = profile->getenv_bool(ENV_ENABLE_PROFILE); - if(enable){ - gdma_record_len = profile->getenv_int(ENV_PROFILE_GDMA_SIZE, gdma_record_len); - bdc_record_len = profile->getenv_int(ENV_PROFILE_BDC_SIZE, bdc_record_len); - dyn_max_size = profile->getenv_int(ENV_PROFILE_ARM_SIZE, dyn_max_size); - enable_gdma = !profile->getenv_bool(ENV_DISABLE_GDMA) && gdma_record_len > 0; - enable_bdc = !profile->getenv_bool(ENV_DISABLE_BDC) && bdc_record_len > 0; - enable_arm = !profile->getenv_bool(ENV_DISABLE_ARM) && dyn_max_size > 0; - enable = enable_gdma || enable_arm || enable_bdc; - } - } + BMProfileDevice(BMProfile* profile):bmruntime::BMProfileDeviceBase(profile) { } bool init(); bool begin(net_ctx_t* net_ctx); bool end(net_ctx_t* net_ctx); @@ -104,13 +93,6 @@ class BMProfileDevice:public BMProfileDeviceBase { buffer_pair dyn_buffer; bm_perf_monitor tpu_perf_monitor; bm_perf_monitor gdma_perf_monitor; - size_t gdma_record_len = 1024*1024; - size_t bdc_record_len = 1024*1024; - size_t dyn_max_size = 16*1024*1024; - bool enable_gdma = false; - bool enable_bdc = false; - bool enable_arm = false; - bool enable = false; }; diff --git a/tpu-runtime/include/bm1688_profile.h b/tpu-runtime/include/bm1688_profile.h new file mode 100644 index 0000000..8b11300 --- /dev/null +++ b/tpu-runtime/include/bm1688_profile.h @@ -0,0 +1,143 @@ +#ifndef BM1688_PROFILE_H +#define BM1688_PROFILE_H +#include "bmruntime_profile.h" + +using namespace bmruntime; +namespace bm1688_profile { + +#pragma pack(1) +typedef struct { + // 0x00-0x0F + unsigned int inst_start_time; + unsigned int inst_end_time; + unsigned int inst_id : 20; + unsigned int reserved0 : 12; + unsigned int axi_d0_aw_bytes; + + // 0x10-0x1F + unsigned int axi_d0_wr_bytes; + unsigned int axi_d0_ar_bytes; + unsigned int axi_d1_aw_bytes; + unsigned int axi_d1_wr_bytes; + + // 0x20-0x2F + unsigned int axi_d1_ar_bytes; + unsigned int gif_fmem_aw_bytes; + unsigned int gif_fmem_wr_bytes; + unsigned int gif_fmem_ar_bytes; + + // 0x30-0x3F + unsigned int gif_l2sram_aw_bytes; + unsigned int gif_l2sram_wr_bytes; + unsigned int gif_l2sram_ar_bytes; + unsigned int reserved1; + + // 0x40-0x4F + unsigned int axi_d0_wr_valid_bytes; + unsigned int axi_d0_rd_valid_bytes; + unsigned int axi_d1_wr_valid_bytes; + unsigned int axi_d1_rd_valid_bytes; + + // 0x50-0x5F + unsigned int gif_fmem_wr_valid_bytes; + unsigned int gif_fmem_rd_valid_bytes; + unsigned int gif_l2sram_wr_valid_bytes; + unsigned int gif_l2sram_rd_valid_bytes; + + // 0x60-0x6F + unsigned int axi_d0_wr_stall_bytes; + unsigned int axi_d0_rd_stall_bytes; + unsigned int axi_d1_wr_stall_bytes; + unsigned int axi_d1_rd_stall_bytes; + + // 0x70-0x7F + unsigned int gif_fmem_wr_stall_bytes; + unsigned int gif_fmem_rd_stall_bytes; + unsigned int gif_l2sram_wr_stall_bytes; + unsigned int gif_l2sram_rd_stall_bytes; + + // 0x80-0x8F + unsigned int axi_d0_aw_end; + unsigned int axi_d0_aw_st; + unsigned int axi_d0_ar_end; + unsigned int axi_d0_ar_st; + + // 0x90-0x9F + unsigned int axi_d0_wr_end; + unsigned int axi_d0_wr_st; + unsigned int axi_d0_rd_end; + unsigned int axi_d0_rd_st; + + // 0xA0-0xAF + unsigned int axi_d1_aw_end; + unsigned int axi_d1_aw_st; + unsigned int axi_d1_ar_end; + unsigned int axi_d1_ar_st; + + // 0xB0-0xBF + unsigned int axi_d1_wr_end; + unsigned int axi_d1_wr_st; + unsigned int axi_d1_rd_end; + unsigned int axi_d1_rd_st; + + // 0xC0-0xCF + unsigned int reserved2; + unsigned int reserved3; + unsigned int gif_fmem_ar_end; + unsigned int gif_fmem_ar_st; + + // 0xD0-0xDF + unsigned int gif_fmem_wr_end; + unsigned int gif_fmem_wr_st; + unsigned int gif_fmem_rd_end; + unsigned int gif_fmem_rd_st; + + // 0xE0-0xEF + unsigned int reserved4; + unsigned int reserved5; + unsigned int gif_l2sram_ar_end; + unsigned int gif_l2sram_ar_st; + + // 0xF0-0xFF + unsigned int gif_l2sram_wr_end; + unsigned int gif_l2sram_wr_st; + unsigned int gif_l2sram_rd_end; + unsigned int gif_l2sram_rd_st; +}GDMA_PROFILE_FORMAT; + +typedef struct { + unsigned int inst_start_time; + unsigned int inst_end_time; + unsigned long long inst_id : 16; + unsigned long long computation_load : 48; + unsigned int num_read; + unsigned int num_read_stall; + unsigned int num_write; + unsigned int reserved; +} TPU_PROFILE_FORMAT; +#pragma pack() + +typedef struct { + buffer_pair tiu; + buffer_pair gdma; + buffer_pair mcu; +} profile_core_buffer_t; + +using namespace bmruntime; +class BMProfileDevice:public BMProfileDeviceBase { + + // BMProfileDeviceBase interface +public: + BMProfileDevice(BMProfile* profile):bmruntime::BMProfileDeviceBase(profile) { } + bool init(); + bool begin(net_ctx_t* net_ctx); + bool end(net_ctx_t* net_ctx); + void deinit(); + bool enabled(); + +private: + std::vector buffers; +}; + +} +#endif // BM1686_PROFILE_H diff --git a/tpu-runtime/include/bmdef.h b/tpu-runtime/include/bmdef.h old mode 100644 new mode 100755 index efcd484..4822e75 --- a/tpu-runtime/include/bmdef.h +++ b/tpu-runtime/include/bmdef.h @@ -46,6 +46,13 @@ typedef enum bm_store_mode_e { BM_STORE_4N = 2, } bm_store_mode_t; +/* flags for runtime */ +typedef enum bm_runtime_flag_e { + BM_RUNTIME_AUTO = 0, /* auto flag*/ + BM_RUNTIME_SHARE_MEM = 1 << 0, /*bit0: 0,dyn mem; 1,share mem */ + BM_RUNTIME_CHECK_MEM = 1 << 1 /*bit1: 0,no check; 1,check sha256*/ +} bm_runtime_flag_t; + /* bm_shape_t holds the shape info */ #define BM_MAX_DIMS_NUM 8 typedef struct bm_shape_s { @@ -71,11 +78,13 @@ typedef struct bm_tensor_s { /* --------------------------------------------------------------------------*/ /* network information structure */ -/* bm_stage_info_t holds input shapes and output shapes; every network can contain one or more +/* bm_stage_info_t holds input/output shapes and device mems; every network can contain one or more * stages */ typedef struct bm_stage_info_s { - bm_shape_t* input_shapes; /* input_shapes[0] / [1] / ... / [input_num-1] */ - bm_shape_t* output_shapes; /* output_shapes[0] / [1] / ... / [output_num-1] */ + bm_shape_t *input_shapes; /* input_shapes[0] / [1] / ... / [input_num-1] */ + bm_shape_t *output_shapes; /* output_shapes[0] / [1] / ... / [output_num-1] */ + bm_device_mem_t *input_mems; /* input_mems[0] / [1] / ... / [input_num-1] */ + bm_device_mem_t *output_mems; /* output_mems[0] / [1] / ... / [output_num-1] */ } bm_stage_info_t; /* bm_tensor_info_t holds all information of one net. @@ -96,9 +105,34 @@ typedef struct bm_net_info_s { size_t* max_input_bytes; /* max_input_bytes[0]/ [1] / ... / [input_num-1] */ size_t* max_output_bytes; /* max_output_bytes[0] / [1] / ... / [output_num-1] */ int* input_zero_point; /* input_zero_point[0] / [1] / .../ [input_num-1] */ - int* output_zero_point; /* output_zero_point[0] / [1] / .../ [input_num-1] */ + int* output_zero_point; /* output_zero_point[0] / [1] / .../ [output_num-1] */ + int *input_loc_devices; /* input_loc_device[0] / [1] / .../ [input_num-1] */ + int *output_loc_devices; /* output_loc_device[0] / [1] / .../ [output_num-1] */ + int core_num; /* core number */ + int32_t addr_mode; /* address assign mode */ } bm_net_info_t; +typedef struct api_info_s { + /// @brief api_id to be sent to driver + uint32_t *api_id; + /// @brief size of api_id to be sent to driver + size_t api_id_size; + /// @brief api data to be sent to driver + uint8_t **api_data; + /// @brief size of the api data to be sent to driver + size_t api_data_size; + /// @brief subsize of the api data to be sent to driver + size_t *api_data_subsize; + /// @brief offset of input tensors' addr in api_data + uint32_t *input_addr_offset; + /// @brief number of the offset of input tensors' addr in api_data + size_t input_addr_offset_number; + /// @brief offset of output tensors' addr in api_data + uint32_t *output_addr_offset; + /// @brief number of the offset of output tensors' addr in api_data + size_t output_addr_offset_number; +} api_info_c; + #if defined(__cplusplus) } #endif diff --git a/tpu-runtime/include/bmfunc/bmdnn_func.h b/tpu-runtime/include/bmfunc/bmdnn_func.h old mode 100644 new mode 100755 index c224dbf..49fceeb --- a/tpu-runtime/include/bmfunc/bmdnn_func.h +++ b/tpu-runtime/include/bmfunc/bmdnn_func.h @@ -2,13 +2,104 @@ #define BMDNN_FUNC_H_ #include "bmruntime_common.h" +#include "bmruntime_cpp.h" namespace bmruntime { +struct tpu_tensor_info_t { + uint16_t dtype; + /// storage mode of input/output tensors which are setted by user + uint16_t user_stmode; + /// storage mode of input/output tensors which are fixed when compiling + uint16_t compiled_stmode; + uint32_t tensor_byte_size; + int32_t n; + int32_t c; + int32_t h; + int32_t w; + /// value of padding h for conv(only used for BM1684 conv 3ic) + uint32_t padding_h; + /// global addr that is malloced by user + uint64_t user_global_addr; + /// global addr of input/output tensors which are fixed when compiling + uint64_t compiled_global_addr; +}; +struct tpu_cmd_info_t { + /// number of bdc command + int32_t bdc_cmd_num; + /// number of gdma command + int32_t gdma_cmd_num; + /// number of cdma command + int32_t cdma_cmd_num; + /// byte size of bdc command + uint64_t bdc_cmd_byte_size; + /// byte size of gdma command + uint64_t gdma_cmd_byte_size; + /// byte size of cdma command + uint64_t cdma_cmd_byte_size; +}; +struct tpu_single_core_cmd_t { + std::vector cmd_info; + /// global addr of bdc command + uint64_t bdc_cmd_addr; + /// global addr of gdma command + uint64_t gdma_cmd_addr; + /// global addr of cdma command + uint64_t cdma_cmd_addr; + /// global addr of hau command + uint64_t hau_cmd_addr; + //// global addr of sdma command + uint64_t sdma_cmd_addr; +}; + +typedef struct tpu_kernel_allreduce_1684x { + u64 i_global_addr[8]; + u64 i_global_addr_1[8]; + u64 o_global_addr[8]; + u32 count; + int dtype; + int reduce_method; + int group[8]; + int rank; + int chip_num; + int group_size; +} tpu_kernel_allreduce_1684x_t; + +typedef struct tpu_kernel_global_move_1684x { + u64 src_global_addr; + u64 dst_global_addr; + int num_dims; + int shape[4]; + int src_stride[4]; + int dst_stride[4]; + int type_size; +} tpu_kernel_global_move_1684x_t; + +typedef struct { + std::vector input_info; + std::vector output_info; + std::vector core_commands; + std::vector core_list; + /// kernel func id(used for dynamic loading) + std::vector kernel_func_ids; + /// coeff start addr + uint64_t coeff_start_addr = -1; + /// neuron start addr + std::vector neuron_start_addr; + int32_t do_allreduce = 0; + tpu_kernel_allreduce_1684x_t allreduce_param; +} tpu_net_info_t; + class bmdnn_func { public: bmdnn_func() {}; - ~bmdnn_func() {}; + virtual ~bmdnn_func() {}; + + virtual bm_status_t + _bmdnn_multi_fullnet_(bm_handle_t handle, + const tpu_net_info_t &net_info) = 0; + virtual void fill_api_info(const tpu_net_info_t &net_info, + api_info_t &api_info) = 0; }; class bmdnn_func_1682 : public bmdnn_func { @@ -23,63 +114,12 @@ class bmdnn_func_1682 : public bmdnn_func { void set_bmdnn_func_profile(bool enable) { b_enable_profile = enable; } - bm_status_t _bmdnn_multi_fullnet_( + virtual bm_status_t _bmdnn_multi_fullnet_( bm_handle_t handle, - int input_num, - unsigned long long* user_input_global_offset, - unsigned long long* cmd_input_global_offset, - int* input_tensor_size, - unsigned short* input_dtype, - int output_num, - unsigned long long* user_output_global_offset, - unsigned long long* cmd_output_global_offset, - int* output_tensor_size, - unsigned short* output_dtype, - unsigned long long bdc_cmd_offset, - unsigned long long gdma_cmd_offset, - unsigned long long cdma_cmd_offset, - int* bdc_cmd_num, - int* gdma_cmd_num, - int* cdma_cmd_num, - int cmdgroup_num - ); -#if 0 - // legacy interface - bm_status_t _bmdnn_dynamic_fullnet_( - bm_handle_t handle, - unsigned long long compiled_ir_global_addr, - unsigned int compiled_ir_length, //unit dword - unsigned int batch_num, - unsigned int input_num, - unsigned long long* input_global_offset, - unsigned int* input_c, - unsigned int* input_height, - unsigned int* input_width, - unsigned int output_num, - unsigned long long* output_global_offset, - unsigned long long apd_ctx_mem_offset, - bool get_output_shape, - unsigned long long output_shape_global_addr - ); - - bm_status_t _bmdnn_dynamic_fullnet_ex_( - bm_handle_t handle, - unsigned long long compiled_ir_global_addr, - unsigned int compiled_ir_length, //unit dword - unsigned int batch_num, - unsigned int input_num, - unsigned long long* input_global_offset, - unsigned int* input_c, - unsigned int* input_height, - unsigned int* input_width, - unsigned int output_num, - unsigned long long* output_global_offset, - unsigned long long apd_ctx_mem_offset, - unsigned long long apd_coeff_mem_offset, - bool get_output_shape, - unsigned long long output_shape_global_addr - ); -#endif + const tpu_net_info_t &net_info); + virtual void fill_api_info( + const tpu_net_info_t &net_info, + api_info_t &api_info); bm_status_t _bmdnn_dynamic_fullnet_v2_( bm_handle_t handle, @@ -117,33 +157,12 @@ class bmdnn_func_1684 : public bmdnn_func { BM_API_ID_GET_PROFILE_DATA = 987; MAX_API_MSG_SIZE = 1022 * sizeof(u32); // ref to 1684 }; - bm_status_t _bmdnn_multi_fullnet_( + virtual bm_status_t _bmdnn_multi_fullnet_( bm_handle_t handle, - int input_num, - u64* user_input_global_offset, - u64* cmd_input_global_offset, - int* input_n, - int* input_c, - int* input_h, - int* input_w, - unsigned short* input_data_type, - unsigned char* input_st_mode, - unsigned char* real_in_stmode, - int output_num, - u64* user_output_global_offset, - u64* cmd_output_global_offset, - int* output_n, - int* output_length, - unsigned short* output_data_type, - unsigned char* output_st_mode, - unsigned char* force_out_stmode, - u64 bdc_cmd_offset, - u64 gdma_cmd_offset, - int* bdc_cmd_num, - int* gdma_cmd_num, - int cmdgroup_num, - u32* input_pad_h); - + const tpu_net_info_t &net_info); + virtual void fill_api_info( + const tpu_net_info_t &net_info, + api_info_t &api_info); bm_status_t _bmdnn_dynamic_fullnet_v2_( bm_handle_t handle, unsigned long long compiled_ir_global_addr, @@ -189,30 +208,12 @@ class bmdnn_func_1880 : public bmdnn_func { bmdnn_func_1880() { ; }; - bm_status_t _bmdnn_multi_fullnet_( + virtual bm_status_t _bmdnn_multi_fullnet_( bm_handle_t handle, - int input_num, - u64* user_input_global_offset, - u64* cmd_input_global_offset, - int* input_n, - int* input_length, - unsigned short* input_data_type, - unsigned char* input_st_mode, - unsigned char* real_in_stmode, - int output_num, - u64* user_output_global_offset, - u64* cmd_output_global_offset, - int* output_n, - int* output_length, - unsigned short* output_data_type, - unsigned char* output_st_mode, - unsigned char* force_out_stmode, - u64 bdc_cmd_offset, - u64 gdma_cmd_offset, - int* bdc_cmd_num, - int* gdma_cmd_num, - int cmdgroup_num - ); + const tpu_net_info_t &net_info); + virtual void fill_api_info( + const tpu_net_info_t &net_info, + api_info_t &api_info); bm_status_t _bmdnn_dynamic_fullnet_v2_( bm_handle_t handle, @@ -242,34 +243,22 @@ class bmdnn_func_1684x : public bmdnn_func { public: bmdnn_func_1684x() { - SG_API_ID_MULTI_FULLNET = 0x0ffffffb; - SG_API_ID_DYNAMIC_FULLNET = 0x0ffffffc; - SG_API_ID_SET_PROFILE_ENABLE = 986; - SG_API_ID_GET_PROFILE_DATA = 987; + BM_API_ID_MULTI_FULLNET = 0x0ffffffb; + BM_API_ID_DYNAMIC_FULLNET = 0x0ffffffc; + BM_API_ID_SET_PROFILE_ENABLE = 986; + BM_API_ID_GET_PROFILE_DATA = 987; MAX_API_MSG_SIZE = 1016 * sizeof(u32); }; - bm_status_t _bmdnn_multi_fullnet_( + virtual bm_status_t _bmdnn_multi_fullnet_( bm_handle_t handle, - int func_id, - int input_num, - u64* user_input_global_offset, - u64* cmd_input_global_offset, - u32* input_dsize, // in bytes - int output_num, - u64* user_output_global_offset, - u64* cmd_output_global_offset, - u32* output_dsize, // in bytes - u64 bdc_cmd_offset, - u64 gdma_cmd_offset, - int* bdc_cmd_num, - int* gdma_cmd_num, - u32* bdc_cmd_byte_size, - u32* gdma_cmd_byte_size, - int cmdgroup_num); + const tpu_net_info_t &net_info); + virtual void fill_api_info( + const tpu_net_info_t &net_info, + api_info_t &api_info); bm_status_t _bmdnn_dynamic_fullnet_( bm_handle_t handle, - int func_id, + tpu_kernel_function_t func_id, unsigned long long compiled_ir_global_addr, unsigned int compiled_ir_length, //unit dword unsigned int input_num, @@ -283,8 +272,11 @@ class bmdnn_func_1684x : public bmdnn_func { std::vector apd_ctx_mem_borders, std::vector apd_ctx_mem_offset, unsigned long long apd_coeff_mem_offset, + unsigned long long apd_io_start, + unsigned long long apd_io_mem_offset, bool get_output_shape, - unsigned long long output_shape_global_addr); + unsigned long long output_shape_global_addr, + tpu_kernel_allreduce_1684x_t *p_allreduce_param); bm_status_t _bmdnn_set_profile_enable_(bm_handle_t handle, tpu_kernel_function_t func_id, bool enable); bm_status_t _bmdnn_get_profile_data_(bm_handle_t handle, @@ -295,40 +287,78 @@ class bmdnn_func_1684x : public bmdnn_func { unsigned int data_category //0: profile time records, 1: extra data ); private: - u32 SG_API_ID_MULTI_FULLNET; - u32 SG_API_ID_DYNAMIC_FULLNET; - u32 SG_API_ID_SET_PROFILE_ENABLE; - u32 SG_API_ID_GET_PROFILE_DATA; + u32 BM_API_ID_MULTI_FULLNET; + u32 BM_API_ID_DYNAMIC_FULLNET; + u32 BM_API_ID_SET_PROFILE_ENABLE; + u32 BM_API_ID_GET_PROFILE_DATA; + u32 MAX_API_MSG_SIZE; +}; + +class bmdnn_func_1688 : public bmdnn_func { + public: + + bmdnn_func_1688() { + MAX_API_MSG_SIZE = 1016 * sizeof(u32); + }; + virtual bm_status_t _bmdnn_multi_fullnet_( + bm_handle_t handle, + const tpu_net_info_t &net_info); + virtual void fill_api_info( + const tpu_net_info_t &net_info, + api_info_t &api_info); + + bm_status_t _bmdnn_dynamic_fullnet_( + bm_handle_t handle, + const std::vector & func_id_list, + const unsigned long long compiled_ir_global_addr, + const unsigned int compiled_ir_length, //unit dword + const unsigned int input_num, + const unsigned long long *input_addrs, + const int * const * input_shapes, + const int * input_elem_nums, + const int * input_dtype_and_dims, + const unsigned int output_num, + const unsigned long long *output_addrs, + const unsigned long long apd_ctx_start, + const std::vector apd_ctx_mem_borders, + const std::vector apd_ctx_mem_offset, + const unsigned long long apd_coeff_mem_offset, + const unsigned long long apd_io_start, + const unsigned long long apd_io_mem_offset, + bool get_output_shape, + const unsigned long long output_shape_global_addr, + const std::vector &core_list); + + bm_status_t _bmdnn_set_engine_profile_param_(bm_handle_t handle, int core, tpu_kernel_function_t func_id, int engine_type, unsigned long long addr, unsigned long long size); + bm_status_t _bmdnn_set_profile_enable_(bm_handle_t handle, int core, tpu_kernel_function_t func_id, unsigned int enable); + bm_status_t _bmdnn_get_profile_data_(bm_handle_t handle, + int core, + tpu_kernel_function_t func_id, + unsigned long long output_global_addr, + unsigned int output_max_size, + unsigned int offset, + unsigned int data_category //0: profile time records, 1: extra data + ); + private: u32 MAX_API_MSG_SIZE; }; -class bmdnn_func_1686 : public bmdnn_func { +class bmdnn_func_2260 : public bmdnn_func { public: - bmdnn_func_1686() { - SG_API_ID_MULTI_FULLNET = 0x0ffffffb; - SG_API_ID_DYNAMIC_FULLNET = 0x0ffffffc; - SG_API_ID_SET_PROFILE_ENABLE = 986; - SG_API_ID_GET_PROFILE_DATA = 987; + bmdnn_func_2260() { + BM_API_ID_MULTI_FULLNET = 0x0ffffffb; + BM_API_ID_DYNAMIC_FULLNET = 0x0ffffffc; + BM_API_ID_SET_PROFILE_ENABLE = 986; + BM_API_ID_GET_PROFILE_DATA = 987; MAX_API_MSG_SIZE = 1016 * sizeof(u32); }; - bm_status_t _bmdnn_multi_fullnet_( + virtual bm_status_t _bmdnn_multi_fullnet_( bm_handle_t handle, - int input_num, - u64* user_input_global_offset, - u64* cmd_input_global_offset, - u32* input_dsize, // in bytes - int output_num, - u64* user_output_global_offset, - u64* cmd_output_global_offset, - u32* output_dsize, // in bytes - u64 bdc_cmd_offset, - u64 gdma_cmd_offset, - int* bdc_cmd_num, - int* gdma_cmd_num, - u32* bdc_cmd_byte_size, - u32* gdma_cmd_byte_size, - int cmdgroup_num); + const tpu_net_info_t &net_info); + virtual void fill_api_info( + const tpu_net_info_t &net_info, + api_info_t &api_info); bm_status_t _bmdnn_dynamic_fullnet_( bm_handle_t handle, @@ -345,10 +375,13 @@ class bmdnn_func_1686 : public bmdnn_func { std::vector apd_ctx_mem_borders, std::vector apd_ctx_mem_offset, unsigned long long apd_coeff_mem_offset, + unsigned long long apd_io_start, + unsigned long long apd_io_mem_offset, bool get_output_shape, - unsigned long long output_shape_global_addr); + unsigned long long output_shape_global_addr, + const std::vector &core_list); - bm_status_t _bmdnn_set_profile_enable_(bm_handle_t handle, bool enable); + bm_status_t _bmdnn_set_profile_enable_(bm_handle_t handle, unsigned int enable); bm_status_t _bmdnn_get_profile_data_(bm_handle_t handle, unsigned long long output_global_addr, unsigned int output_max_size, @@ -356,12 +389,122 @@ class bmdnn_func_1686 : public bmdnn_func { unsigned int data_category //0: profile time records, 1: extra data ); private: - u32 SG_API_ID_MULTI_FULLNET; - u32 SG_API_ID_DYNAMIC_FULLNET; - u32 SG_API_ID_SET_PROFILE_ENABLE; - u32 SG_API_ID_GET_PROFILE_DATA; + u32 BM_API_ID_MULTI_FULLNET; + u32 BM_API_ID_DYNAMIC_FULLNET; + u32 BM_API_ID_SET_PROFILE_ENABLE; + u32 BM_API_ID_GET_PROFILE_DATA; u32 MAX_API_MSG_SIZE; }; + +class bmdnn_func_mars3 : public bmdnn_func { + public: + + bmdnn_func_mars3() { + BM_API_ID_MULTI_FULLNET = 0x0ffffffb; + BM_API_ID_DYNAMIC_FULLNET = 0x0ffffffc; + BM_API_ID_SET_PROFILE_ENABLE = 986; + BM_API_ID_GET_PROFILE_DATA = 987; + MAX_API_MSG_SIZE = 1016 * sizeof(u32); + }; + virtual bm_status_t _bmdnn_multi_fullnet_( + bm_handle_t handle, + const tpu_net_info_t &net_info); + virtual void fill_api_info( + const tpu_net_info_t &net_info, + api_info_t &api_info); + + bm_status_t _bmdnn_dynamic_fullnet_( + bm_handle_t handle, + unsigned long long compiled_ir_global_addr, + unsigned int compiled_ir_length, //unit dword + unsigned int input_num, + const unsigned long long *input_addrs, + const int * const * input_shapes, + const int * input_elem_nums, + const int * input_dtype_and_dims, + unsigned int output_num, + const unsigned long long *output_addrs, + unsigned long long apd_ctx_start, + std::vector apd_ctx_mem_borders, + std::vector apd_ctx_mem_offset, + unsigned long long apd_coeff_mem_offset, + unsigned long long apd_io_start, + unsigned long long apd_io_mem_offset, + bool get_output_shape, + unsigned long long output_shape_global_addr, + const std::vector &core_list); + + bm_status_t _bmdnn_set_profile_enable_(bm_handle_t handle, unsigned int enable); + bm_status_t _bmdnn_get_profile_data_(bm_handle_t handle, + unsigned long long output_global_addr, + unsigned int output_max_size, + unsigned int offset, + unsigned int data_category //0: profile time records, 1: extra data + ); + private: + u32 BM_API_ID_MULTI_FULLNET; + u32 BM_API_ID_DYNAMIC_FULLNET; + u32 BM_API_ID_SET_PROFILE_ENABLE; + u32 BM_API_ID_GET_PROFILE_DATA; + u32 MAX_API_MSG_SIZE; +}; + +class bmdnn_func_2380 : public bmdnn_func { + public: + + bmdnn_func_2380() { + BM_API_ID_MULTI_FULLNET = 0x0ffffffb; + BM_API_ID_DYNAMIC_FULLNET = 0x0ffffffc; + BM_API_ID_SET_PROFILE_ENABLE = 986; + BM_API_ID_GET_PROFILE_DATA = 987; + MAX_API_MSG_SIZE = 1016 * sizeof(u32); + }; + virtual bm_status_t _bmdnn_multi_fullnet_( + bm_handle_t handle, + const tpu_net_info_t &net_info); + virtual void fill_api_info( + const tpu_net_info_t &net_info, + api_info_t &api_info); + + bm_status_t _bmdnn_dynamic_fullnet_( + bm_handle_t handle, + const std::vector & func_id_list, + const unsigned long long compiled_ir_global_addr, + const unsigned int compiled_ir_length, //unit dword + const unsigned int input_num, + const unsigned long long *input_addrs, + const int * const * input_shapes, + const int * input_elem_nums, + const int * input_dtype_and_dims, + const unsigned int output_num, + const unsigned long long *output_addrs, + const unsigned long long apd_ctx_start, + const std::vector apd_ctx_mem_borders, + const std::vector apd_ctx_mem_offset, + const unsigned long long apd_coeff_mem_offset, + const unsigned long long apd_io_mem_offset, + bool get_output_shape, + const unsigned long long output_shape_global_addr, + const std::vector &core_list); + + bm_status_t _bmdnn_set_engine_profile_param_(bm_handle_t handle, int core, tpu_kernel_function_t func_id, int engine_type, unsigned long long addr, unsigned long long size); + bm_status_t _bmdnn_set_profile_enable_(bm_handle_t handle, int core, tpu_kernel_function_t func_id, unsigned int enable); + bm_status_t _bmdnn_get_profile_data_(bm_handle_t handle, + int core, + tpu_kernel_function_t func_id, + unsigned long long output_global_addr, + unsigned int output_max_size, + unsigned int offset, + unsigned int data_category //0: profile time records, 1: extra data + ); + private: + u32 BM_API_ID_MULTI_FULLNET; + u32 BM_API_ID_DYNAMIC_FULLNET; + u32 BM_API_ID_SET_PROFILE_ENABLE; + u32 BM_API_ID_GET_PROFILE_DATA; + u32 MAX_API_MSG_SIZE; +}; + } #endif diff --git a/tpu-runtime/include/bmfunc/bmfunc.h b/tpu-runtime/include/bmfunc/bmfunc.h index 9bd7587..a280a1d 100644 --- a/tpu-runtime/include/bmfunc/bmfunc.h +++ b/tpu-runtime/include/bmfunc/bmfunc.h @@ -7,29 +7,46 @@ namespace bmruntime { class bmfunc { - public: - explicit bmfunc(const string& arch_name); - virtual ~bmfunc(); - - static bmdnn_func* bmdnn_base() {return sta_bmfunc_ptr->bmdnn_fn;} - static bmdnn_func_1682* bmdnn_1682() {return sta_bmfunc_ptr->bmdnn_1682_fn;} - static bmdnn_func_1684* bmdnn_1684() {return sta_bmfunc_ptr->bmdnn_1684_fn;} - static bmdnn_func_1880* bmdnn_1880() {return sta_bmfunc_ptr->bmdnn_1880_fn;} - static bmdnn_func_1684x* bmdnn_1684x() {return sta_bmfunc_ptr->bmdnn_1684x_fn;} - static bmdnn_func_1686* bmdnn_1686() {return sta_bmfunc_ptr->bmdnn_1686_fn;} - bmrt_arch_info * get_arch_info_ptr() {return p_bmtpu_arch; } - private: - static bmfunc* sta_bmfunc_ptr; /* why not this ? */ - bmrt_arch_info* p_bmtpu_arch; - - bmdnn_func* bmdnn_fn; - bmdnn_func_1682* bmdnn_1682_fn; - bmdnn_func_1684* bmdnn_1684_fn; - bmdnn_func_1880* bmdnn_1880_fn; - bmdnn_func_1684x* bmdnn_1684x_fn; - bmdnn_func_1686* bmdnn_1686_fn; -}; +public: + explicit bmfunc(const string &arch_name); + virtual ~bmfunc(); + + static bmdnn_func *bmdnn_base() { return sta_bmfunc_ptr->bmdnn_fn; } + static bmdnn_func_1682 *bmdnn_1682() { + return (bmdnn_func_1682 *)(sta_bmfunc_ptr->bmdnn_fn); + } + static bmdnn_func_1684 *bmdnn_1684() { + return (bmdnn_func_1684 *)(sta_bmfunc_ptr->bmdnn_fn); + } + static bmdnn_func_1880 *bmdnn_1880() { + return (bmdnn_func_1880 *)(sta_bmfunc_ptr->bmdnn_fn); + } + static bmdnn_func_1684x *bmdnn_1684x() { + return (bmdnn_func_1684x *)(sta_bmfunc_ptr->bmdnn_fn); + } + static bmdnn_func_1688 *bmdnn_1688() { + return (bmdnn_func_1688 *)(sta_bmfunc_ptr->bmdnn_fn); + } + static bmdnn_func_mars3 *bmdnn_mars3() { + return (bmdnn_func_mars3 *)(sta_bmfunc_ptr->bmdnn_fn); + } + + static bmdnn_func_2260 *bmdnn_2260() { + return (bmdnn_func_2260 *)(sta_bmfunc_ptr->bmdnn_fn); + } + + static bmdnn_func_2380 *bmdnn_2380() { + return (bmdnn_func_2380 *)(sta_bmfunc_ptr->bmdnn_fn); + } + bmrt_arch_info *get_arch_info_ptr() { return p_bmtpu_arch; } + +private: + static bmfunc *sta_bmfunc_ptr; /* why not this ? */ + bmrt_arch_info *p_bmtpu_arch; + + bmdnn_func *bmdnn_fn; +}; } #endif diff --git a/tpu-runtime/include/bmrt_arch_info.h b/tpu-runtime/include/bmrt_arch_info.h index cfc145d..866c9f4 100644 --- a/tpu-runtime/include/bmrt_arch_info.h +++ b/tpu-runtime/include/bmrt_arch_info.h @@ -19,7 +19,10 @@ typedef enum bmtpu_arch { BM1684, BM1880, BM1684X, - BM1686, + BM1688, + BM1690, + MARS3, + SG2380, UNKOWN_ARCH } bmtpu_arch_t; @@ -33,6 +36,7 @@ class bmrt_arch_info { static bool is_soc_mode() { return sta_bmtpu_ptr->m_soc_mode; } static void set_current_arch_info(bmrt_arch_info *arch_ptr) { if(arch_ptr) sta_bmtpu_ptr = arch_ptr; } + static int get_npu_num(); static int get_eu_num(bm_data_type_t dtype); static int get_lmem_size(); @@ -50,6 +54,8 @@ class bmrt_arch_info { static u32 get_gdma_cmd_num(); static u64 get_soc_base_distance(); + static u64 addr_mask(); + private: static bmrt_arch_info* sta_bmtpu_ptr; bmtpu_arch_t target_bmtpu_arch; diff --git a/tpu-runtime/include/bmruntime.h b/tpu-runtime/include/bmruntime.h old mode 100644 new mode 100755 index e008b0f..2df8821 --- a/tpu-runtime/include/bmruntime.h +++ b/tpu-runtime/include/bmruntime.h @@ -10,6 +10,9 @@ #ifndef BMRUNTIME_H_ #define BMRUNTIME_H_ #include +#include +#include +#include #include "bmfunc/bmfunc.h" //#include "bmcpu.h" #include "bmruntime_common.h" @@ -17,6 +20,7 @@ #include "bmodel.hpp" #include "bmlib_runtime.h" +#include using bmodel::CoeffMem; using bmodel::ModelCtx; @@ -44,43 +48,44 @@ class BmCoeff; class KernelModule; struct BmMemory { - string desc; // description + string desc; // description bm_device_mem_t device_mem; - u8 check_code[bmodel::SHA256_LEN]; // sha256 + u8 check_code[bmodel::SHA256_LEN]; // sha256 u64 addr; u32 bytes; u32 dword_len; bm_handle_t bm_handle; + bool do_check; - void Init(const string& desc, bm_handle_t handle, const bm_device_mem_t& mem, void* buffer); + void Init(const string &desc, bm_handle_t handle, const bm_device_mem_t &mem, + void *buffer, bool do_check = false); int Check(); }; +struct single_core_command_t { + vector gdma_id; // for static + vector bdc_id; // for static + vector gdma_cmd_byte; // for static + vector bdc_cmd_byte; // for static + + BmMemory gdma_mem; // for static + BmMemory bdc_mem; // for static + BmMemory hau_mem; // for static + BmMemory sdma_mem; // for static + u64 gdma_offset; // for static subnet + u64 bdc_offset; // for static subnet + + BmMemory ir_mem; // for dynamic + u32 ir_offset; // for dynamic subnet + u32 ir_len; // for dynamic subnet +}; typedef struct subnet_tpu_info { subnet_tpu_info() { - gdma_offset = -1; - bdc_offset = -1; - cmdgroup_num = -1; - gdma_group_id_v.clear(); - bdc_group_id_v.clear(); - bdc_cmd_byte_v.clear(); - gdma_cmd_byte_v.clear(); + core_commands.clear(); } - int is_dynamic; - - vector gdma_group_id_v; - vector bdc_group_id_v; - vector gdma_cmd_byte_v; - vector bdc_cmd_byte_v; - int cmdgroup_num; - - u64 gdma_offset; - u64 bdc_offset; - - u32 ir_offset; - u32 ir_len; + std::vector core_commands; } SUBNET_TPU_INFO_T; /* TODO: reuse cpu_layer_param_t */ @@ -145,6 +150,7 @@ typedef enum { typedef struct { void* addr; + u32 tensor_cpu_addr; u64 size; host_mem_type_t type; } host_mem_t; @@ -184,14 +190,16 @@ struct net_stage_t { u64 ctx_start; vector ctx_offset; vector ctx_borders; + std::vector neuron_mem; + std::vector neuron_size; - vector gdma_id; // for static - vector bdc_id; // for static - vector gdma_cmd_byte; // for static - vector bdc_cmd_byte; // for static - BmMemory gdma_mem; // for static - BmMemory bdc_mem; // for static - BmMemory ir_mem; // for dynamic + std::vector core_commands; + + // io alone + u64 io_start; + u64 io_size; + u64 io_offset; + bm_device_mem_t io_mem; // have multi subnet int subnet_num; /* subnet num per net */ @@ -208,6 +216,19 @@ struct net_stage_t { float* cpu_addr; }; +/* Record post dynamic alloc neuron usage info + detailed to each stage and core permutations +*/ +struct dyn_neuron_stage_t { + vector input_v; + vector output_v; + vector ctx_offset; + std::vector neuron_mem; + + map subnet_tensor_v; + float* cpu_addr; +}; + struct net_ctx_t { string net_name; vector input_name_v; @@ -219,24 +240,82 @@ struct net_ctx_t { vector output_scale_v; vector output_zero_point_v; vector stage_v; // each net has multi stages + std::unordered_map dyn_neuron_stage_dict; // {neron_code: dyn_neuron_stage_info} // Bulk neuron memories. - std::vector neuron_mem; + vector neuron_mem; std::mutex neuron_mutex; // to avoid neuron mem used by other thread - bool is_dynamic; - int n_can_change; // for dynamic - int h_w_can_change; // for dynamic + bool is_dynamic = 0; + int core_num = 1; + int n_can_change = 0; // for dynamic + int h_w_can_change = 0; // for dynamic vector middlebuff_input; // for dynamic, one net share one middlebuf vector middlebuff_output; // for dynamic bm_net_info_t net_info; // create for users by c interface std::shared_ptr kernel_module_; + + // net with cascade + int32_t device_id = 0; + int32_t step_id = 0; + bool in_cascade = false; + int32_t addr_mode = 0; + vector input_from; // input is loaded from which device + vector input_hidden_v; + vector input_index_v; + vector output_hidden_v; + vector output_index_v; + int32_t do_allreduce = 0; + tpu_kernel_allreduce_1684x_t allreduce_param; +}; + +// net with cascade +struct mem_cascade_t { + string name; + int32_t device_id; + bm_tensor_t tensor; +}; + +struct net_cascade_t { + string main_name; // net name + int num_device; // num device used + vector> step_ids; // each step of nets + std::vector hidden_inputs; + std::vector hidden_outputs; + std::vector hidden_inputs_step_ids; + std::vector hidden_outputs_step_ids; + + vector input_names; + vector input_types; + vector input_scales; + vector input_zps; + vector input_shapes; + vector input_bytes; + vector input_mems; + vector input_loc_devices; + + vector output_names; + vector output_types; + vector output_scales; + vector output_zps; + vector output_shapes; + vector output_bytes; + vector output_mems; + vector output_loc_devices; + int32_t addr_mode; + bool is_dynamic; + + bm_net_info_t net_info; }; +class CascadeThread; + class Bmruntime { public: Bmruntime(bm_handle_t* bm_handle, bool user_initlized, const string& arch_name); Bmruntime(const string& arch_name, int devid); + Bmruntime(bm_handle_t* bm_handles, int num_handles, + bool using_internal_hiddens, const string& arch_name); ~Bmruntime(); friend class BMProfile; @@ -253,7 +332,7 @@ class Bmruntime { const vector* get_output_tensor(int net_idx) const; const vector* get_net_profile(int net_idx, int stage_idx); void init_output_tensors(net_ctx_t* net_ctx, net_stage_t* stage, - bm_tensor_t* output_tensors, bool user_mem, bool user_stmode); + bm_tensor_t* output_tensors, bool user_mem, bool user_stmode); /* C style Interface */ @@ -264,12 +343,35 @@ class Bmruntime { bool launch(int net_idx, const int input_num, const bm_device_mem_t* input_mems, int* input_shapes, int* input_dims, int* in_stmode, int output_num, const bm_device_mem_t* output_mems, int* out_stmode, bm_shape_t * output_shapes = NULL); - bool launch(int net_idx, const bm_tensor_t* input_tensors, int input_num, - bm_tensor_t* output_tensors, int output_num, bool user_mem = false, + bool launch(int net_idx, const bm_tensor_t *input_tensors, int input_num, + bm_tensor_t *output_tensors, int output_num, bool user_mem = false, bool user_stmode = false); - bool launch(int net_idx, void* const input_datas[], const bm_shape_t input_shapes[], + bool launch_multi_cores(int net_idx, const bm_tensor_t *input_tensors, + int input_num, bm_tensor_t *output_tensors, + int output_num, const std::vector &core_list, + bool user_mem, bool user_stmode); + bool launch_multi_cores(int net_idx, void* const input_datas[], const bm_shape_t input_shapes[], int input_num, void* output_tensors[], bm_shape_t output_shapes[], int output_num, - bool user_mem = false); + bool user_mem = false, const std::vector& core_list={}); + bool launch(const net_cascade_t * net_c, const bm_tensor_t* input_tensors, int input_num, + bm_tensor_t* output_tensors, int output_num); + void pre_alloc_neuron_multi_cores(int net_idx, int stage_idx, const std::vector &core_list); + bool memcpy_s2d_parallel(bm_tensor_t tensors[], void * datas[], + int tensor_num[], int device_num); + bool memcpy_d2s_parallel(void * datas[], bm_tensor_t tensors[], + int tensor_num[], int device_num); + bool memcpy_d2d_byte_parallel(bm_tensor_t dst_tensors[], size_t dst_offsets[], + bm_tensor_t src_tensors[], size_t src_offsets[], + size_t sizes[], int tensor_num[], int device_num); + bool memcpy_d2d_stride_ex_parallel(bm_tensor_t dst_tensors[], + size_t dst_offsets[], + bm_shape_t dst_strides[], + bm_tensor_t src_tensors[], + size_t src_offsets[], + bm_shape_t src_strides[], + bm_shape_t shapes[], + int tensor_num[], + int device_num); const bm_shape_t* get_input_max_shape(int net_idx, int input_idx); const bm_shape_t* get_output_max_shape(int net_idx, int output_idx); @@ -296,82 +398,189 @@ class Bmruntime { void get_network_names(vector* names); void show_neuron_network(); + + /* flag get/set */ + inline uint32_t get_flags() { + return m_flags; + } + + inline void set_flags(uint32_t flags) { + m_flags = flags; + } + inline int get_network_number() { - return m_net_ctx_v.size(); + auto num_cascade = m_net_cascade_v.size(); + auto num_net = m_net_ctx_v.size(); + if (num_cascade != 0) { + for (auto v : m_net_ctx_v) { + if (v->in_cascade) { + num_net--; + } + } + } + return num_cascade + num_net; } - inline bm_handle_t get_bm_handle() + inline bm_handle_t get_bm_handle(int device_idx=0) { - return m_handle; + return m_handles[device_idx]; } - inline int get_devid(){ - return m_devid; + inline int get_devid(int device_idx=0){ + return m_devids[device_idx]; } + const net_cascade_t* get_net_cascade(const string& net_name); + bool cascade_thread_step(int net_idx, + std::vector *src, + std::vector *dst, + bm_handle_t m_handle); + bool cascade_thread_global_move_data( + int devid, bm_handle_t handle, + std::vector *param); int get_net_idx(const string& net_name); const bm_net_info_t* get_net_info(int net_idx); + const bm_net_info_t* get_net_info(const string& net_name); - const std::vector &get_neuron_mem(int net_idx); + const vector &get_neuron_mem(int net_idx); void trace(); size_t size_4N_align(const bm_shape_t& shape, const bm_data_type_t& type); - u64 must_alloc_device_mem(bm_device_mem_t* mem, u64 size, const string& desc = "", int type_len=1); - bm_device_mem_t must_alloc_device_mem(u64 size, const string& desc = "", int type_len=1); - void must_free_device_mem(bm_device_mem_t& mem); + u64 must_alloc_device_mem(uint32_t devid, bm_device_mem_t* mem, u64 size, const string& desc = "", int type_len=1); + bm_device_mem_t must_alloc_device_mem(uint32_t devid, u64 size, const string& desc = "", int type_len=1); + void must_free_device_mem(uint32_t devid, bm_device_mem_t& mem); + + // sg alloc for over 4GB + u64 must_alloc_device_mem_u64(uint32_t devid, bm_device_mem_u64_t* mem, u64 size, const string& desc = "", int type_len=1); + bm_device_mem_u64_t must_alloc_device_mem_u64(uint32_t devid, u64 size, const string& desc = "", int type_len=1); + void must_free_device_mem_u64(uint32_t devid, bm_device_mem_u64_t& mem); protected: void init(); void init_bmfunc(const string& arch_name); + void sync_cores(bm_handle_t handle, const std::vector& core_list); bool launch_static(net_ctx_t* net_ctx, net_stage_t* stage, const bm_tensor_t* input_tensors, - int input_num, bm_tensor_t* output_tensors, int output_num); + int input_num, bm_tensor_t* output_tensors, int output_num, + const std::vector &core_list, const size_t dyn_core_mask); bool launch_ir(net_ctx_t* net_ctx, net_stage_t* stage, const bm_tensor_t* input_tensors, - int input_num, bm_tensor_t* output_tensors, int output_num); + int input_num, bm_tensor_t* output_tensors, int output_num, + const size_t dyn_core_mask); int get_stage_idx(const net_ctx_t* net_ctx, const bm_tensor_t* input_tensors); int get_static_stage_idx(const net_ctx_t* net_ctx, const bm_tensor_t* input_tensors); int get_dynamic_stage_idx(const net_ctx_t* net_ctx, const bm_tensor_t* input_tensors); + std::vector refine_core_list(const net_stage_t *stage, + const std::vector &core_list, + bm_handle_t handle); - protected: +protected: // functions for load bmodel u64 fix_gdma_addr(const net_stage_t* stage, u64 origin_addr, bool is_src); void convert_cmd(u32* cmd, int engine_id, bool last_cmd, u64 start_address, const net_stage_t* stage); - bool setup_cmd_context(ModelCtx* model_ctx, const Vector>* cmd_group, - net_stage_t* stage); + bool setup_cmd_context(ModelCtx* model_ctx, const bmodel::NetParameter *param, + net_stage_t* stage, uint32_t device_id); bool setup_ir_context(ModelCtx* model_ctx, const bmodel::Binary* binary_ir, - const Vector>* stage_ir, net_stage_t* stage); + const Vector>* stage_ir, net_stage_t* stage, uint32_t device_id); bool load_bmodel(ModelCtx*); - bool load_bmodel_net(ModelCtx*, int net_idx, std::shared_ptr kernel_module); + bool load_bmodel_net(ModelCtx*, int net_idx); bool load_bmodel_net(ModelCtx*, int net_idx, net_ctx_t* net_ctx); - void load_tpu_module(ModelCtx*, std::shared_ptr& kernel_module); + void load_tpu_module(ModelCtx*); + void load_cpu_module(ModelCtx*); bool fill_net_ctx( ModelCtx* model_ctx, net_ctx_t* net_ctx, const Vector>* params, - std::vector> &stage_ctx_sizes, net_stage_t *stages); + vector> &stage_ctx_sizes, net_stage_t *stages); + void fill_subnet_dyn_neuron_tensor( + net_ctx_t* net_ctx, const size_t dyn_core_mask, + const net_stage_t *common_stage_info); + void net_ctx_alloc_dyn_neuron(net_ctx_t* net_ctx, const size_t dyn_core_mask, + const net_stage_t *common_stage_info, bool use_multi_subnet); void fill_net_info(net_ctx_t* net_ctx); void free_net_info(net_ctx_t* net_ctx); - void update_net_middlebuf(net_ctx_t* net_ctx); + void free_dyn_neuron(net_ctx_t* net_ctx); + void update_net_middlebuf(net_ctx_t *net_ctx); void update_max_middlebuf_size(net_ctx_t* net_ctx); - void update_max_neuron_mem(const std::vector &sizes); + void update_max_neuron_mem(uint32_t devid, const vector &sizes); bool setup_profile_context(ModelCtx* model_ctx, net_stage_t* net_stage, const bmodel::Binary* net_profile, const bmodel::Binary* net_stat); void set_profile_enabled(bool enable); - protected: - static const int MAX_NET_NUM = 256; // one bmruntime can load 256 nets at most - vector m_net_ctx_v; + // functions for fill static bmdnn net info + void fill_tpu_net_info(net_ctx_t *net_ctx, net_stage_t *stage, + const bm_tensor_t *input_tensors, int input_num, + bm_tensor_t *output_tensors, int output_num, + const std::vector &core_list, + tpu_net_info_t &net_info, + const size_t dyn_core_mask); + template + void fill_tpu_tensor_info(vector &tensor_info, + const T_stage *stage, + const bm_tensor_t *user_tensors, bool is_input); + void fill_tpu_cmd_info(std::vector &cmd_info, + const net_stage_t *stage, const int32_t core_idx); + // function for fill tpu static subnet net info + template + void fill_tpu_tensor_info(vector &tensor_info, + const T_stage *stage, + const SUBNET_INFO_T *subnet, + const bm_tensor_t *user_tensors, bool is_input); + void fill_tpu_cmd_info(std::vector &cmd_info, + const SUBNET_INFO_T *subnet, + const int32_t core_idx); + // functions for cascade + void cascade_fill_net_info(net_cascade_t *net_cascade); + void cascade_free_net_info(net_cascade_t *net_cascade); + bool cascade_insert_net(int net_idx, net_ctx_t *net_ctx, + const string &main_name); + void cascade_update_all_info(); + void cascade_update_input(net_cascade_t &v); + void cascade_update_output(net_cascade_t &v); + void cascade_update_max_hidden_buffer_size(net_cascade_t &v); + void cascade_update_hidden_buffer(net_cascade_t &v); + bm_tensor_t * + cascade_prepare_input(const string &name, + int32_t devid, + std::vector *src, + std::vector *dst); + bm_tensor_t * + cascade_prepare_output(const string &name, uint32_t devid, + std::vector *dst); + + bool cascade_update_output_shape(net_ctx_t *net_ctx, + std::vector *dst, + std::vector out_tensors); + uint32_t get_dyn_core_mask(int stage_idx, const std::vector core_list); + std::vector get_core_list_from_core_mask(uint32_t dyn_core_mask); +public: + api_info_t get_api_info(int net_idx, const bm_tensor_t *input_tensors, + int input_num, bm_tensor_t *output_tensors, + int output_num, bool user_mem, bool user_stmode, + uint32_t *core_ids); - bm_handle_t m_handle; +protected: // one bmruntime can load nets at most + vector m_net_ctx_v; + vector m_net_cascade_v; // net in cascade info + vector> m_cascade_thread_v; // thread for cascade + + static const int MAX_DEVICE_NUM = 32; // one bmruntime can run 32 device at most + bm_handle_t m_handles[MAX_DEVICE_NUM]; + int m_device_num; + unsigned int m_core_num; + bool using_internal_hidden_tensors; /* internal initlized hidden_tensors device_mem or accept from user parameter when launch */ bool using_internal_bm_handle; /* internal initlized bm_handle or accept from user parameter */ - int m_devid; + int m_devids[MAX_DEVICE_NUM]; + bool using_fast_allreduce; vector m_device_mem_vec; /* save device memory address, for free */ + vector m_device_mem_ids; /* record each device memory belong which device*/ + + vector m_sg_device_mem_vec; /* save device memory address, for free */ + vector m_sg_device_mem_ids; /* record each device memory belong which device*/ - std::shared_ptr m_local_coeff; - bool m_share_coeff; + std::shared_ptr m_local_coeffs[MAX_DEVICE_NUM]; static map> m_global_coeff_map; static std::mutex m_global_coeff_mutex; @@ -382,45 +591,57 @@ class Bmruntime { bool b_enable_mmap; bool m_subnet_time_print; + uint32_t m_flags; std::shared_ptr m_profile; // For middle buffer // Because max_middle_buffer is also record in m_device_mem_vec. // So we do not need to free max_middle_buffer at last. - bm_device_mem_t max_middle_buffer; - u64 max_middle_buffer_size; - u32 middle_buffer_num; + bm_device_mem_t max_middle_buffer[MAX_DEVICE_NUM]; + u64 max_middle_buffer_size[MAX_DEVICE_NUM]; + u32 middle_buffer_num[MAX_DEVICE_NUM]; + + // For hidden buffer + // Because max_hidden_buffer is also record in m_device_mem_vec. + // So we do not need to free max_hidden_buffer at last. + bm_device_mem_t max_hidden_buffer[MAX_DEVICE_NUM]; + u64 max_hidden_buffer_size[MAX_DEVICE_NUM]; + u32 hidden_buffer_num[MAX_DEVICE_NUM]; // For neuron memory share u32 m_neuron_heap_mask; - std::vector max_neuron_mem; + vector max_neuron_mem[MAX_DEVICE_NUM]; + std::shared_ptr kernel_modules[MAX_DEVICE_NUM]; protected: /* functions for subnet */ void bmcpu_setup(); void bmtpu_setup(); - bool launch_cpu_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUBNET_INFO_T* subnet, + bool launch_cpu_subnet(net_ctx_t* net_ctx, map *subnet_tensor_v, const SUBNET_INFO_T* subnet, const bm_tensor_t* input_tensors, bm_shape_t real_out_shape[]); - bool launch_tpu_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUBNET_INFO_T* subnet, + bool launch_tpu_subnet(net_ctx_t* net_ctx, net_stage_t* stage, const SUBNET_INFO_T* subnet, const bm_tensor_t* input_tensors, int input_num, - bm_tensor_t* output_tensors, int output_num); - bool launch_tpu_ir_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUBNET_INFO_T* subnet, + bm_tensor_t* output_tensors, int output_num, + const uint32_t dyn_core_mask); + bool launch_tpu_ir_subnet(net_ctx_t* net_ctx, net_stage_t* stage, const SUBNET_INFO_T* subnet, const bm_tensor_t* input_tensors, const int* input_elem_num, int input_num, - bm_tensor_t* output_tensors, int* output_elem_num, int output_num); + bm_tensor_t* output_tensors, int* output_elem_num, int output_num, + const uint32_t dyn_core_mask); bool launch_multi_subnet(net_ctx_t* net_ctx, net_stage_t* stage, const bm_tensor_t* input_tensors, - int input_num, bm_tensor_t* output_tensors, int output_num); + int input_num, bm_tensor_t* output_tensors, int output_num, + const uint32_t dyn_core_mask); void fill_sub_net(ModelCtx* model_ctx, const Vector>* subnet_set_v, net_ctx_t* net_ctx, net_stage_t* net_stage); void fill_subnet_tensor_map(net_ctx_t* net_ctx, net_stage_t* net_stage, SUBNET_INFO_T* subnet, const Vector>* tensor_set_v, bool is_input, std::set subnet_switch_inputs); void subnet_clear(net_ctx_t* net_ctx); - void subnet_tensor_s2d(net_stage_t* net_stage, const string& tensor_name, + void subnet_tensor_s2d(uint32_t devid, map *subnet_tensor_v, const string& tensor_name, bm_device_mem_t* out_dev_mem = NULL, u64 offset = 0, u64 size = 0); - void* subnet_tensor_d2s(net_stage_t* net_stage, const string& tensor_name, + void* subnet_tensor_d2s(uint32_t devid, map *subnet_tensor_v, const string& tensor_name, bm_device_mem_t* out_dev_mem = NULL, u64 offset = 0, u64 size = 0); - void subnet_tensor_forward(net_stage_t* stage, const string& src_tensor, const string& dst_tensor, bm_tensor_t* output_tensors); + void subnet_tensor_forward(uint32_t devid, map *subnet_tensor_v, const string& src_tensor, const string& dst_tensor, const bm_tensor_t* output_tensors); protected: typedef void* (*t_bmcpu_init)(); @@ -432,10 +653,21 @@ class Bmruntime { t_bmcpu_init bmcpu_init_; t_bmcpu_uninit bmcpu_uninit_; t_bmcpu_process bmcpu_process_; + + void* customcpu_handle_ = NULL; + t_bmcpu_init customcpu_init_ = NULL; + t_bmcpu_uninit customcpu_uninit_ = NULL; + t_bmcpu_process customcpu_process_ = NULL; + std::shared_ptr kernel_module_; private: bmfunc* p_bmfunc; + + // temp custom cpu related + void *tmpcpuso_handle_ = NULL; + std::string temp_filename_; + int card_chip_num; }; class BmCoeff { @@ -446,45 +678,233 @@ class BmCoeff { u64 Register(ModelCtx* model_ctx, const CoeffMem* coeff_mem); int Check(); - bm_device_mem_t GetCoeffDeviceMem() { + bm_device_mem_u64_t GetCoeffDeviceMem() { return m_latest_device_mem; } protected: - map, bm_device_mem_t> m_coeff_map; /* to share the same coeff, by check code*/ + map, bm_device_mem_u64_t> m_coeff_map; /* to share the same coeff, by check code*/ std::mutex m_coeff_mutex; bm_handle_t m_handle; bool m_inner_handle; int m_devid; - bm_device_mem_t m_latest_device_mem; + bm_device_mem_u64_t m_latest_device_mem; }; class KernelModule { public: - explicit KernelModule(bm_handle_t &handle, const char* file_name); - explicit KernelModule(bm_handle_t &handle, const char* binary, size_t size); + explicit KernelModule(bm_handle_t &handle):m_handle(handle) {} ~KernelModule(); private: - KernelModule(); + void preload_funcs(int core_id); +public: + void add_core_module(int core_id, const unsigned char* binary, size_t size); + void add_core_module(int core_id, const char* filename); + vector get_multi_fullnet_func_id(const vector& core_list); + vector get_dynamic_fullnet_func_id(const vector& core_list); + vector get_enable_profile_func_id(const vector& core_list); + vector get_get_profile_func_id(const vector& core_list); + vector get_set_engine_profile_param_func_id(const vector& core_list); + vector get_global_move_1684x_func_id(const vector& core_list); + private: - void check_exist() { - BMRT_ASSERT_INFO(_kernel_module, "kernel_module shouldn't be NULL!!\n"); - } - void preload_funcs(bm_handle_t &handle); + bm_handle_t m_handle; + map _kernel_modules; + map _multi_fullnet_func_id; + map _dynamic_fullnet_func_id; + map _enable_profile_func_id; + map _get_profile_func_id; + map _set_engine_profile_param_func_id; + map _global_move_1684x_func_id; +}; + +class CascadeThread { + typedef enum { + NET_MODE = 0, + S2D_MODE = 1, + D2S_MODE = 2, + D2D_MODE = 3, + D2D_STRIDE_EX_MODE = 4, + UNKNOWN = -1, + } FUNC_MODE_T; public: - tpu_kernel_module_t get_kernel_module(); - tpu_kernel_function_t get_multi_fullnet_func_id(); - tpu_kernel_function_t get_dynamic_fullnet_func_id(); - tpu_kernel_function_t get_enable_profile_func_id(); - tpu_kernel_function_t get_get_profile_func_id(); + CascadeThread(Bmruntime *rt, bm_handle_t handle) + : m_stop(false), m_paramReady(false), m_done(true), + m_ok(true), m_handle(handle), m_rt(rt) + { + m_worker = std::thread(&CascadeThread::threadFunction, this); + } + + ~CascadeThread() { + { + // std::unique_lock lock(m_mutex); + m_done = false; + m_stop = true; + // m_condition.notify_all(); + while(m_done == false) {std::this_thread::yield();} + } + if (m_worker.joinable()) { + m_worker.join(); + } + } + + void run(int net_idx, + vector *src, + vector *dst) { + // std::unique_lock lock(m_mutex); + m_net_idx = net_idx; + m_src = src; + m_dst = dst; + m_mode = NET_MODE; + m_done = false; + m_paramReady = true; + // m_condition.notify_all(); + } + + void s2d(bm_tensor_t *tensors, void **datas, int tensor_num) { + // std::unique_lock lock(m_mutex); + m_dst_tensors = tensors; + m_datas = datas; + m_mode = S2D_MODE; + m_tensor_num = tensor_num; + m_done = false; + m_paramReady = true; + // m_condition.notify_all(); + } + + void d2s(void **datas, bm_tensor_t *tensors, int tensor_num) { + // std::unique_lock lock(m_mutex); + m_src_tensors = tensors; + m_datas = datas; + m_mode = D2S_MODE; + m_tensor_num = tensor_num; + m_done = false; + m_paramReady = true; + // m_condition.notify_all(); + } + + void d2d(bm_tensor_t *dst_tensors, size_t *dst_offsets, + bm_tensor_t *src_tensors, size_t *src_offsets, + size_t *sizes, int tensor_num) { + // std::unique_lock lock(m_mutex); + m_src_tensors = src_tensors; + m_dst_tensors = dst_tensors; + m_src_offsets = src_offsets; + m_dst_offsets = dst_offsets; + m_sizes = sizes; + m_mode = D2D_MODE; + m_tensor_num = tensor_num; + m_done = false; + m_paramReady = true; + // m_condition.notify_all(); + } + + void d2d_stride_ex(int devid, + std::vector *params) { + // std::unique_lock lock(m_mutex); + m_devid = devid; + m_global_move_params = params; + m_mode = D2D_STRIDE_EX_MODE; + m_done = false; + m_paramReady = true; + // m_condition.notify_all(); + } + + bool sync() { + // std::unique_lock lock(m_mutex); + // m_doneCondition.wait(lock, [this]() { return m_done; }); + while(m_done == false) {std::this_thread::yield();} + return m_ok; + } private: + void threadFunction() { + while (true) { + // std::unique_lock lock(m_mutex); + // m_condition.wait(lock, [this]() { return m_paramReady || m_stop; }); + // BMRT_LOG(INFO, "M_MODE is %d\n", m_mode); + while (m_paramReady == false && m_stop == false) {std::this_thread::yield();} + if (m_stop) { + m_done = true; + return; + } + if (m_mode == NET_MODE) { + m_ok = m_rt->cascade_thread_step(m_net_idx, m_src, m_dst, m_handle); + if (m_ok) { + auto status = bm_thread_sync(m_handle); + m_ok = BM_SUCCESS == status; + } + } else if (m_mode == S2D_MODE) { + for (int i = 0; i < m_tensor_num; ++i) { + auto status = bm_memcpy_s2d(m_handle, m_dst_tensors[i].device_mem, m_datas[i]); + if (BM_SUCCESS != status) { + m_ok = false; + break; + } else { + m_ok = true; + } + } + } else if (m_mode == D2S_MODE) { + for (int i = 0; i < m_tensor_num; ++i) { + auto status = bm_memcpy_d2s(m_handle, m_datas[i], m_src_tensors[i].device_mem); + if (BM_SUCCESS != status) { + m_ok = false; + break; + } else { + m_ok = true; + } + } + } else if (m_mode == D2D_MODE) { + for (int i = 0; i < m_tensor_num; ++i) { + auto status = bm_memcpy_d2d_byte(m_handle, m_dst_tensors[i].device_mem, m_dst_offsets[i], + m_src_tensors[i].device_mem, m_src_offsets[i], m_sizes[i]); + if (BM_SUCCESS != status) { + m_ok = false; + break; + } else { + m_ok = true; + } + } + } else if (m_mode == D2D_STRIDE_EX_MODE) { + // global move + m_ok = m_rt->cascade_thread_global_move_data(m_devid, m_handle, + m_global_move_params); + } + m_paramReady = false; + m_done = true; + // m_doneCondition.notify_one(); + } + } + + std::thread m_worker; + // std::mutex m_mutex; + // std::condition_variable m_condition; + // std::condition_variable m_doneCondition; + std::atomic_bool m_stop; + std::atomic_bool m_paramReady; + std::atomic_bool m_done; + // bool m_stop; + // bool m_paramReady; + // bool m_done; + bool m_ok; + // s2d/d2s/d2d param + bm_tensor_t *m_src_tensors; + bm_tensor_t *m_dst_tensors; + void **m_datas; + FUNC_MODE_T m_mode; + int m_tensor_num; + size_t *m_src_offsets; + size_t *m_dst_offsets; + size_t *m_sizes; + // net param + int m_net_idx; bm_handle_t m_handle; - tpu_kernel_module_t _kernel_module = {0}; - tpu_kernel_function_t _multi_fullnet_func_id = {0}; - tpu_kernel_function_t _dynamic_fullnet_func_id = {0}; - tpu_kernel_function_t _enable_profile_func_id = {0}; - tpu_kernel_function_t _get_profile_func_id = {0}; + Bmruntime *m_rt; + vector *m_src; + vector *m_dst; + // d2d_stride_ex param + int m_devid; + std::vector *m_global_move_params; }; } // namespace bmruntime diff --git a/tpu-runtime/include/bmruntime_common.h b/tpu-runtime/include/bmruntime_common.h index a5b580a..569c952 100644 --- a/tpu-runtime/include/bmruntime_common.h +++ b/tpu-runtime/include/bmruntime_common.h @@ -19,6 +19,7 @@ #include #include #include +#include #include "bmlib_runtime.h" #include "bmruntime_interface.h" #include "bmruntime_legacy.h" @@ -39,14 +40,24 @@ constexpr bool strings_equal(char const* a, char const* b) } typedef enum { + DEBUG =-1, INFO = 0, WARNING = 1, WRONG = 2, FATAL = 3, } BMRT_LogLevel; +extern BMRT_LogLevel BMRT_LOG_LEVEL_THRESHOLD; + +#ifdef __cplusplus +extern "C" { +#endif +BMRT_LogLevel bmrt_get_current_log_level(); +void bmrt_set_current_log_level(BMRT_LogLevel level); +#ifdef __cplusplus +} +#endif -extern int BMRT_LOG_LEVEL_THRESHOLD; #ifdef __linux__ template typename std::enable_if::type __bmrt_log(const char*fmt, ArgTypes ...args){ @@ -66,10 +77,20 @@ typename std::enable_if::type __bmrt_log(const char*fmt, Ar #define BMRT_LOG(severity, fmt, ...) \ __bmrt_log("[BMRT][%s:%d] %s:" fmt "\n", __func__, __LINE__, #severity, ##__VA_ARGS__) +#define BMRT_LOG_RUN(severity, code) \ + do \ + { \ + if (severity >= BMRT_LOG_LEVEL_THRESHOLD) \ + { \ + code \ + } \ + } while (0) + #else #include void bmrt_log_default_callback(int level, const char* fmt, va_list args); void BMRT_LOG(int level, const char* fmt, ...); +#define BMRT_LOG_RUN(code) #endif using std::cout; @@ -150,6 +171,8 @@ typedef enum { ENGINE_GDMA = 1, ENGINE_CDMA = 2, ENGINE_HDMA = 3, + ENGINE_HAU = 4, + ENGINE_SDMA = 5, ENGINE_END } ENGINE_ID; @@ -239,6 +262,39 @@ inline u32 get_mem_index(const std::vector &ctx_borders, u64 ctx_start, u64 return i; } +template +std::string vector_to_string(const std::vector& list, const std::string& prefix="[", const std::string& suffix="]", const std::string& sep=", "){ + std::string result = prefix; + for(size_t i=0; i api_id; + /// @brief api data to be sent to driver, {core_idx, core_api_data} + std::vector> api_data; + /// @brief offset of input tensors' addr in api_data + std::vector input_addr_offset; + /// @brief offset of output tensors' addr in api_data + std::vector output_addr_offset; +}; +/** + * @name get_bmodel_api_info + * @brief To get the api info setting input tensors + * @ingroup bmruntime + * + * This API only supports the neuron nework that is static-compiled. + * After calling this API, api info will be setted and return, + * and then you can call `bm_send_api` to start TPU inference. + * + * @param [in] p_bmrt Bmruntime that had been created + * @param [in] net_name The name of the neuron network + * @param [in] input_tensors Array of input tensor, defined like bm_tensor_t input_tensors[input_num], + * User should initialize each input tensor. + * @param [in] input_num Input number + * @param [in] output_tensors Array of output tensor, defined like bm_tensor_t output_tensors[output_num]. + * User can set device_mem or stmode of output tensors. If user_mem is true, this interface + * will use device mem of output_tensors to store output data, and not alloc device mem; + * Or it will alloc device mem to store output. If user_stmode is true, it will use stmode in + * each output tensor; Or stmode will be BM_STORE_1N as default. + * @param [in] output_num Output number + * @param [in] user_mem whether device_mem of output tensors are set + * @param [in] user_stmode whether stmode of output tensors are set + * @param [in] core_ids select which cores to uese + */ +api_info_t get_bmodel_api_info(void *p_bmrt, const char *net_name, + const bm_tensor_t *input_tensors, int input_num, + bm_tensor_t *output_tensors, int output_num, + bool user_mem, bool user_stmode, uint32_t *core_ids); } // namespace bmruntime #endif /* __BMRUNTIME_CPP_H__ */ diff --git a/tpu-runtime/include/bmruntime_interface.h b/tpu-runtime/include/bmruntime_interface.h index 9ef3b96..5ce1594 100644 --- a/tpu-runtime/include/bmruntime_interface.h +++ b/tpu-runtime/include/bmruntime_interface.h @@ -59,6 +59,12 @@ it will alloc device mem to tensor->device_mem, so user should bmrt_free_device( tensor->device_mem) to free it.*/ DECL_EXPORT bool bmrt_tensor(bm_tensor_t* tensor, void* p_bmrt, bm_data_type_t dtype, bm_shape_t shape); +/* +fill a tensor with data type and shape, and st_mode = 0 as default. +tensor and p_bmrt should not be NULL, shape count should not be 0. +it will alloc device mem to tensor->device_mem on devid-th device.*/ +DECL_EXPORT bool bmrt_tensor_ex(bm_tensor_t* tensor, void* p_bmrt, int devid, bm_data_type_t dtype, bm_shape_t shape); + /* fill a tensor with device mem existed, tensor byte size should not large than device mem size */ DECL_EXPORT void bmrt_tensor_with_device(bm_tensor_t* tensor, bm_device_mem_t device_mem, bm_data_type_t dtype, bm_shape_t shape); @@ -87,6 +93,22 @@ DECL_EXPORT void bmrt_print_network_info(const bm_net_info_t* net_info); */ DECL_EXPORT void* bmrt_create(bm_handle_t bm_handle); +/* --------------------------------------------------------------------------*/ +/** + * @name bmrt_create_ex + * @brief To create the bmruntime with one or more bm_handle. + * @ingroup bmruntime + * + * This API creates the bmruntime. It returns a void* pointer which is the pointer + * of bmruntime. + * + * @param [in] bm_handles bm handles. They must be initialized by using bmlib. + * @param [in] num_handles number of bm_handles. + * + * @retval void* the pointer of bmruntime + */ +DECL_EXPORT void *bmrt_create_ex(bm_handle_t *bm_handles, int num_handles); + /** * @name bmrt_destroy * @brief To destroy the bmruntime pointer @@ -109,6 +131,32 @@ DECL_EXPORT void bmrt_destroy(void* p_bmrt); */ DECL_EXPORT void * bmrt_get_bm_handle(void* p_bmrt); +/* --------------------------------------------------------------------------*/ +/** + * @name bmrt_set_flags + * @brief set runtime flags for different situations + * @ingroup bmruntime + * + * This API set runtime flags, for various situations. flag defined by bm_runtime_flag_t + * + * @param [in] p_bmrt Bmruntime that had been created + * + */ +DECL_EXPORT void bmrt_set_flags(void* p_bmrt, uint32_t flags); + +/* --------------------------------------------------------------------------*/ +/** + * @name bmrt_get_flags + * @brief get runtime flags for different situations + * @ingroup bmruntime + * + * This API get runtime flags, for various situations. flag defined by bm_runtime_flag_t + * + * @param [in] p_bmrt Bmruntime that had been created + * + */ +DECL_EXPORT uint32_t bmrt_get_flags(void* p_bmrt); + /** * @name bmrt_load_bmodel * @brief To load the bmodel which is created by BM compiler @@ -276,6 +324,42 @@ DECL_EXPORT bool bmrt_launch_data(void* p_bmrt, const char* net_name, void* cons const bm_shape_t input_shapes[], int input_num, void * output_datas[], bm_shape_t output_shapes[], int output_num, bool user_mem); +/** + * @name bmrt_launch_data_multi_core + * @brief To launch the inference of the neuron network with setting input datas in system memory on the assigned cores + * @ingroup bmruntime + * + * This API supports the neuron nework that is static-compiled or dynamic-compiled + * After calling this API, inference on TPU is launched. And the CPU + * program will be blocked. + * This API support multiple inputs, and multi thread safety + * + * @param [in] p_bmrt Bmruntime that had been created + * @param [in] net_name The name of the neuron network + * @param [in] input_datas Array of input data, defined like void * input_datas[input_num]. User should + * initialize each data pointer as input. + * @param [in] input_shapes Array of input shape, defined like bm_shape_t input_shapes[input_num]. + * User should set each input shape + * @param [in] input_num Input number + * @param [out] output_datas Array of output data, defined like void * output_datas[output_num]. + * If user don't alloc each output data, set user_mem to false, and this api will alloc + * output mem, user should free each output mem when output data not used. Also + * user can alloc system memory for each output data by self and set user_mem = true. + * @param [out] output_shapes Array of output shape, defined like bm_shape_t output_shapes[output_num]. + * It will store each output shape. + * @param [in] output_num Output number + * @param [in] user_mem whether output_datas[i] have allocated memory + * @param [in] core_list the cores to launch on. If core_list = NULL, core_num must be 0 + * @param [in] core_num number of cores to use. If core_num=0, bmruntime will alloc the proper cores automatically to launch + * + * @retval true Launch success. + * @retval false Launch failed. + */ +DECL_EXPORT bool bmrt_launch_data_multi_cores(void* p_bmrt, const char* net_name, void* const input_datas[], + const bm_shape_t input_shapes[], int input_num, void * output_datas[], + bm_shape_t output_shapes[], int output_num, bool user_mem, const int* core_list, int core_num); + + /** * @name bmrt_trace * @brief To check runtime environment, and collect info for DEBUG @@ -288,6 +372,188 @@ DECL_EXPORT bool bmrt_launch_data(void* p_bmrt, const char* net_name, void* cons */ DECL_EXPORT void bmrt_trace(void* p_bmrt); +/** + * @name bmrt_launch_tensor_multi_cores + * @brief To launch the inference of the neuron network with setting input tensors, and support multi core inference. + * @ingroup bmruntime + * + * This API supports the neuron nework that is static-compiled or dynamic-compiled + * After calling this API, inference on TPU is launched. And the CPU program will not + * be blocked. bm_thread_sync_from_core should be called to make sure inference is finished. + * This API support multiple inputs, and multi thread safety + * + * @param [in] p_bmrt Bmruntime that had been created + * @param [in] net_name The name of the neuron network + * @param [in] input_tensors Array of input tensor, defined like bm_tensor_t input_tensors[input_num], + * User should initialize each input tensor. + * @param [in] input_num Input number + * @param [out] output_tensors Array of output tensor, defined like bm_tensor_t output_tensors[output_num]. + * User can set device_mem or stmode of output tensors. If user_mem is true, this interface + * will use device mem of output_tensors to store output data, and not alloc device mem; + * Or it will alloc device mem to store output. If user_stmode is true, it will use stmode in + * each output tensor; Or stmode will be BM_STORE_1N as default. + * @param [in] output_num Output number + * @param [in] user_mem whether device_mem of output tensors are set + * @param [in] user_stmode whether stmode of output tensors are set + * @param [in] core_list core id list those will be used to inference + * @param [in] core_num number of the core list + * + * @retval true Launch success. + * @retval false Launch failed. + */ +DECL_EXPORT bool bmrt_launch_tensor_multi_cores( + void *p_bmrt, + const char *net_name, + const bm_tensor_t input_tensors[], + int input_num, + bm_tensor_t output_tensors[], + int output_num, + bool user_mem, + bool user_stmode, + const int *core_list, + int core_num); + +/** + * @name bmrt_pre_alloc_neuron_multi_cores + * @brief To pre-allocate the neuron network compute memory during multi-cores arch inference. + * @ingroup bmruntime + * + * This API only used for multi-cores arch runtime, need call before bmrt_launch_tensor_multi_cores API. + * After calling this API, the memory during neuron network inference is pre-allocated, can reduce first bmrt_launch_tensor_multi_cores API time cost. + * If no use this API, is also OK, bmrt will auto alloc compute memory during first launch tensor. + * + * @param [in] p_bmrt Bmruntime that had been created + * @param [in] net_name The name of the neuron network + * @param [in] stage_idx Witch network stage need to be pre-allocate + * @param [in] core_list core id list those will be used to inference + * @param [in] core_num number of the core list + * + * @retval true Pre-allocate success. + * @retval false Pre-allocate failed. + */ +DECL_EXPORT bool bmrt_pre_alloc_neuron_multi_cores( + void *p_bmrt, + const char *net_name, + int stage_idx, + const int *core_list, + int core_num); + +/** + * @name bmrt_memcpy_s2d_parallel + * @brief To copy data from system memory to muti-devices memory in parallel + * @ingroup bmruntime + * + * This API only could be used when the p_bmrt is created with bmrt_create_ex on multi devices. + * After calling this API, datas[:tensor_num[0]] will be copied to the first device, and + * datas[tensor_num[0]:tensor_num[0]+tensor_num[1]] will be copied to the second device and so on. + * The process of copying data to different devices is done in parallel and to the same device is in sequence. + * + * @param [in] p_bmrt Bmruntime that had been created with multi bm_handles + * @param [in] tensors Array of tensors that will be copied to devices + * @param [in] datas Array of datas allocated in system memory + * @param [in] tensor_num Array of tensor_num that will be copied to each device + * @param [in] device_num Device number +*/ +DECL_EXPORT bool bmrt_memcpy_s2d_parallel( + void *p_bmrt, + bm_tensor_t tensors[], + void *datas[], + int tensor_num[], + int device_num); + +/** + * @name bmrt_memcpy_d2s_parallel + * @brief To copy data from muti-devices memory to system memory in parallel + * @ingroup bmruntime + * + * This API only could be used when the p_bmrt is created with bmrt_create_ex on multi devices. + * After calling this API, tensors on the first device will be copied to datas[:tensor_num[0]] , and + * tensors on the second device will be copied to datas[tensor_num[0]:tensor_num[0]+tensor_num[1]] and so on. + * The process of copying data from different devices is done in parallel and from the same device is in sequence. + * + * @param [in] p_bmrt Bmruntime that had been created with multi bm_handles + * @param [in] datas Array of datas allocated in system memory + * @param [in] tensors Array of tensors that will be copied from devices + * @param [in] tensor_num Array of tensor_num that will be copied from each device + * @param [in] device_num Device number +*/ +DECL_EXPORT bool bmrt_memcpy_d2s_parallel( + void *p_bmrt, + void *datas[], + bm_tensor_t tensors[], + int tensor_num[], + int device_num); + +/** + * @name bmrt_memcpy_d2d_byte_parallel + * @brief To copy specified bytes of data from one piece of device memory to + * another piece of device memory within one device and this will be + * done in parallel across multi-devices. Both source and destination + * offsets can be specified. + * @ingroup bmruntime + * + * This API only could be used when the p_bmrt is created with bmrt_create_ex on multi devices. + * After calling this API, data in src_tensors[:tensor_num[0]] on the first device will be copied + * to dst_tensors[:tensor_num[0]] , and src_tensors[tensor_num[0]:tensor_num[0]+tensor_num[1]] on the + * second device will be copied to dst_tensors[tensor_num[0]:tensor_num[0]+tensor_num[1]] and so on. + * The process is in parallel across different devices and is in sequence within the same device. + * + * @param [in] p_bmrt Bmruntime that had been created with multi bm_handles + * @param [in] dst_tensors Array of tensors that will be copied to devices + * @param [in] dst_offsets Array of offsets for each dst_tensor (in bytes) + * @param [in] src_tensors Array of tensors that will be copied from devices + * @param [in] src_offsets Array of offsets for each src_tensor (in bytes) + * @param [in] sizes Array of sizes that will be copyied for each tensor (in bytes) + * @param [in] tensor_num Array of tensor_num that will be copied for each device + * @param [in] device_num Device number +*/ +DECL_EXPORT bool bmrt_memcpy_d2d_byte_parallel( + void *p_bmrt, + bm_tensor_t dst_tensors[], + size_t dst_offsets[], + bm_tensor_t src_tensors[], + size_t src_offsets[], + size_t sizes[], + int tensor_num[], + int device_num); + +/** + * @name bmrt_memcpy_d2d_stride_ex_parallel + * @brief To copy a piece of data according to a specified shape and stride from a particular addr offset + * of source tensor to another offset of the destination tensor. The two tensors are in the same device + * and the process between different devices is performed in parallel. + * @ingroup bmruntime + * + * This API only could be used when the p_bmrt is created with bmrt_create_ex on multi devices. + * After calling this API, data in src_tensors[:tensor_num[0]] on the first device will be copied + * to dst_tensors[:tensor_num[0]] , and src_tensors[tensor_num[0]:tensor_num[0]+tensor_num[1]] on the + * second device will be copied to dst_tensors[tensor_num[0]:tensor_num[0]+tensor_num[1]] and so on. + * The process is in parallel across different devices and is in sequence within the same device. + * + * @param [in] p_bmrt Bmruntime that had been created with multi bm_handles + * @param [in] dst_tensors Array of tensors that will be copied to devices + * @param [in] dst_offsets Array of offsets for each dst_tensor (in bytes) + * @param [in] dst_strides Array of strides that are specified for writing each piece of data + * @param [in] src_tensors Array of tensors that will be copied from devices + * @param [in] src_offsets Array of offsets for each src_tensor (in bytes) + * @param [in] src_strides Array of strides that are specified for reading each piece of data + * @param [in] shapes Array of shapes that are specified for each piece of data + * @param [in] tensor_num Array of tensor_num that will be copied for each device + * @param [in] device_num Device number +*/ + +DECL_EXPORT bool bmrt_memcpy_d2d_stride_ex_parallel( + void *p_bmrt, + bm_tensor_t dst_tensors[], + size_t dst_offsets[], + bm_shape_t dst_strides[], + bm_tensor_t src_tensors[], + size_t src_offsets[], + bm_shape_t src_strides[], + bm_shape_t shapes[], + int tensor_num[], + int device_num); + #if defined (__cplusplus) } #endif diff --git a/tpu-runtime/include/bmruntime_legacy.h b/tpu-runtime/include/bmruntime_legacy.h old mode 100644 new mode 100755 index 54c460f..be5bac7 --- a/tpu-runtime/include/bmruntime_legacy.h +++ b/tpu-runtime/include/bmruntime_legacy.h @@ -706,6 +706,50 @@ int bmrt_dev_getcount(void* p_bmrt, int* dev_count); */ void bmrt_get_last_api_process_time_us(void* p_bmrt, unsigned long* time_us); +/** + * @name get_bmodel_api_info_c + * @brief To get the api info setting input tensors + * @ingroup bmruntime + * + * This API only supports the neuron nework that is static-compiled. + * After calling this API, api info will be setted and return, + * and then you can call `bm_send_api` to start TPU inference. + * When you no longer need the memory, call bmrt_free_api_info to avoid memory leaks. + * + * @param [in] p_bmrt Bmruntime that had been created + * @param [in] net_name The name of the neuron network + * @param [in] input_tensors Array of input tensor, defined like bm_tensor_t input_tensors[input_num], + * User should initialize each input tensor. + * @param [in] input_num Input number + * @param [in] output_tensors Array of output tensor, defined like bm_tensor_t output_tensors[output_num]. + * User can set device_mem or stmode of output tensors. If user_mem is true, this interface + * will use device mem of output_tensors to store output data, and not alloc device mem; + * Or it will alloc device mem to store output. If user_stmode is true, it will use stmode in + * each output tensor; Or stmode will be BM_STORE_1N as default. + * @param [in] output_num Output number + * @param [in] user_mem whether device_mem of output tensors are set + * @param [in] user_stmode whether stmode of output tensors are set + * @param [in] core_ids select which cores to uese + */ +api_info_c *get_bmodel_api_info_c(void *p_bmrt, const char *net_name, + const bm_tensor_t *input_tensors, int input_num, + bm_tensor_t *output_tensors, int output_num, + bool user_mem, bool user_stmode, uint32_t *core_ids); + +/** + * @name bmrt_free_api_info + * @brief To release memory allocated by the get_bmodel_api_info_c function's return value. + * @ingroup bmruntime + * + * This function is used to release the memory block returned by the get_bmodel_api_info_c function. + * After calling get_bmodel_api_info_c to retrieve model API information, make sure to call this function + * when you no longer need the information to avoid memory leaks. + * + * @param [in] api_info return value of get_bmodel_api_info_c + * + */ +void bmrt_free_api_info(api_info_c *api_info); + #if defined (__cplusplus) } #endif diff --git a/tpu-runtime/include/bmruntime_profile.h b/tpu-runtime/include/bmruntime_profile.h index 85986b4..7b24b84 100644 --- a/tpu-runtime/include/bmruntime_profile.h +++ b/tpu-runtime/include/bmruntime_profile.h @@ -23,6 +23,10 @@ #define ENV_DISABLE_BDC "BMRUNTIME_DISABLE_BDC_PERF" #define ENV_DISABLE_ARM "BMRUNTIME_DISABLE_ARM_PERF" +#define PROFILE_ENGINE_MCU 0 +#define PROFILE_ENGINE_GDMA 1 +#define PROFILE_ENGINE_TIU 2 + namespace bmruntime { typedef enum { @@ -47,7 +51,7 @@ typedef enum { typedef struct { PROFILE_MEM_TYPE_T type; u64 addr; - u32 size; + u64 size; u64 alloc_usec; u64 free_usec; string desc; @@ -87,24 +91,14 @@ typedef struct { bm_device_mem_t mem; } buffer_pair; +typedef pair < u64, u64> mem_pair_t; + class BMProfile; class Bmruntime; +class BMProfileDeviceBase; struct net_ctx_t; // struct net_ctx_t; -class BMProfileDeviceBase { -public: - BMProfileDeviceBase(BMProfile* profile):profile(profile) {} - virtual bool enabled() = 0; - virtual bool init() = 0; - virtual bool begin(net_ctx_t* net_ctx) = 0; - virtual bool end(net_ctx_t* net_ctx) = 0; - virtual void deinit() = 0; - virtual ~BMProfileDeviceBase(){} -protected: - BMProfile* profile; -}; - class BMProfile { public: BMProfile(Bmruntime* p_bmrt); @@ -114,20 +108,26 @@ class BMProfile { set iterations=set(), set subnet_ids = set(), set subnet_modes = set()); - void init(const std::string &net_name, const vector &data, const vector &stat); + void init(const std::string &net_name, const vector &data, const vector &stat, const std::vector& core_list); void begin_subnet(net_ctx_t* net_ctx, int iteration, int subnet_id, int subnet_mode); void set_extra_data(u64 data); void end_subnet(net_ctx_t* net_ctx); void deinit(); void print_note(); bool is_enabled() { return enabled; } - void record_alloc_device_mem(const bm_device_mem_t& mem, const string& desc=""); + void record_alloc_device_mem(const mem_pair_t &mem, const string& desc=""); + void record_cpu_mem(const void* ptr, u32 len, const string& desc=""); - void record_mem(PROFILE_MEM_TYPE_T mtype, u64 addr, u32 size, const string& desc=""); - void record_free_device_mem(const bm_device_mem_t &mem); + void record_mem(PROFILE_MEM_TYPE_T mtype, u64 addr, u64 size, const string& desc=""); + void record_free_device_mem(u64 mem_addr); - profile_cmd_num_t* record_subnet_cmd_info(u64 gdma_addr, u64 gdma_offset, u64 bdc_addr, u64 bdc_offset, u32 group_num); - void record_cmd_data(ENGINE_ID engine, const void* cmd_ptr, u32 cmd_len, u64 store_addr); + profile_cmd_num_t* record_subnet_cmd_info(int core, u64 gdma_addr, u64 gdma_offset, u64 bdc_addr, u64 bdc_offset, u32 group_num); + void record_cmd_data(int core, ENGINE_ID engine, const void* cmd_ptr, u32 cmd_len, u64 store_addr); + + void set_core_list(const vector& core_list); + const vector& get_core_list() { + return this->core_list; + } private: profile_subnet_summary_t summary; @@ -139,7 +139,6 @@ class BMProfile { void set_save_dir(const string &value); string get_global_filename(); - public: bool need_profile(int iteration, int subnet_id, int subnet_mode); void alloc_buffer(buffer_pair* bp, size_t size, const std::string &desc); @@ -152,11 +151,11 @@ class BMProfile { void save_cmd_profile(); int getenv_int(const char* name, int default_val = 0); bool getenv_bool(const char* name, bool default_val = false); + bm_handle_t get_handle() { return handle; } - bm_handle_t handle = nullptr; private: Bmruntime* p_bmrt = nullptr; - profile_cmd_info_t *cmd_info = nullptr; + std::vector cmd_infos; int arch = -1; int devid = -1; bool enabled = false; @@ -171,7 +170,43 @@ class BMProfile { string save_dir = "bmprofile_data"; vector mem_info; + vector core_list; + bm_handle_t handle = nullptr; +}; + +class BMProfileDeviceBase { +public: + BMProfileDeviceBase(BMProfile* profile):profile(profile) { + enable = profile->getenv_bool(ENV_ENABLE_PROFILE); + if(enable){ + gdma_record_len = profile->getenv_int(ENV_PROFILE_GDMA_SIZE, gdma_record_len); + bdc_record_len = profile->getenv_int(ENV_PROFILE_BDC_SIZE, bdc_record_len); + dyn_max_size = profile->getenv_int(ENV_PROFILE_ARM_SIZE, dyn_max_size); + enable_gdma = !profile->getenv_bool(ENV_DISABLE_GDMA) && gdma_record_len > 0; + enable_bdc = !profile->getenv_bool(ENV_DISABLE_BDC) && bdc_record_len > 0; + enable_arm = !profile->getenv_bool(ENV_DISABLE_ARM) && dyn_max_size > 0; + enable = enable_gdma || enable_arm || enable_bdc; + } + BMRT_LOG(INFO, "gdma=%d, tiu=%d, mcu=%d", enable_gdma, enable_bdc, enable_gdma); + } + virtual bool enabled() = 0; + virtual bool init() = 0; + virtual bool begin(net_ctx_t* net_ctx) = 0; + virtual bool end(net_ctx_t* net_ctx) = 0; + virtual void deinit() = 0; + virtual ~BMProfileDeviceBase(){} + +protected: + BMProfile* profile; + size_t gdma_record_len = 1024*1024; + size_t bdc_record_len = 1024*1024; + size_t dyn_max_size = 16*1024*1024; + bool enable_gdma = false; + bool enable_bdc = false; + bool enable_arm = false; + bool enable = false; }; + } #endif // BMRUNTIME_PROFILE_H diff --git a/tpu-runtime/lib/libbm1684x_kernel_module.so b/tpu-runtime/lib/libbm1684x_kernel_module.so index 7002c1d..946dbc2 100755 Binary files a/tpu-runtime/lib/libbm1684x_kernel_module.so and b/tpu-runtime/lib/libbm1684x_kernel_module.so differ diff --git a/tpu-runtime/lib/libbm1684x_kernel_module_version.txt b/tpu-runtime/lib/libbm1684x_kernel_module_version.txt new file mode 100644 index 0000000..e380e85 --- /dev/null +++ b/tpu-runtime/lib/libbm1684x_kernel_module_version.txt @@ -0,0 +1,2 @@ +TPU1686: e8a88b92663eef61e649eb4832b67bf1af0585b8 +updated: 2024年 06月 12日 星期三 15:48:28 CST diff --git a/tpu-runtime/lib/libtpulv60_kernel_module.so b/tpu-runtime/lib/libtpulv60_kernel_module.so new file mode 100755 index 0000000..528bc6c Binary files /dev/null and b/tpu-runtime/lib/libtpulv60_kernel_module.so differ diff --git a/tpu-runtime/lib/libtpulv60_kernel_module_version.txt b/tpu-runtime/lib/libtpulv60_kernel_module_version.txt new file mode 100644 index 0000000..da5d10b --- /dev/null +++ b/tpu-runtime/lib/libtpulv60_kernel_module_version.txt @@ -0,0 +1,2 @@ +TPU1686: 21cdedee58cac420e86da3fb931fcad4913a8cdf +updated: 2024年 02月 22日 星期四 16:39:27 CST diff --git a/tpu-runtime/new.cmake b/tpu-runtime/new.cmake index fdd7adf..586e6e7 100644 --- a/tpu-runtime/new.cmake +++ b/tpu-runtime/new.cmake @@ -3,7 +3,6 @@ cmake_policy(SET CMP0048 NEW) cmake_policy(SET CMP0046 NEW) set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install CACHE PATH "Install prefix") - set(common_dir ${CMAKE_CURRENT_SOURCE_DIR}/../tpu-common) if (NOT EXISTS ${common_dir}) message(FATAL_ERROR "${common_dir} does not exist") @@ -16,45 +15,69 @@ find_package(bmlib REQUIRED) find_package(bmodel REQUIRED) find_package(Threads REQUIRED) -# dump kernel_module.so to kernel_module.h -set(KERNEL_MODULE_PATH "${PROJECT_SOURCE_DIR}/lib/libbm1684x_kernel_module.so") -set(OUTPUT_FILE "${CMAKE_BINARY_DIR}/kernel_module.h") -add_custom_command( - OUTPUT ${OUTPUT_FILE} - COMMAND echo "const unsigned char kernel_module_data[] = {" > ${OUTPUT_FILE} - COMMAND hexdump -v -e '8/1 \"0x%02x,\" \"\\n\"' ${KERNEL_MODULE_PATH} >> ${OUTPUT_FILE} - COMMAND echo "}\;" >> ${OUTPUT_FILE} -) -add_custom_target(kernel_header DEPENDS ${OUTPUT_FILE}) +include(gen_kernel_header.cmake) +add_custom_target(kernel_header DEPENDS ${KERNEL_HEADER_FILE}) file(GLOB_RECURSE srcs src/*.cpp src/*.c) add_library(bmrt SHARED ${srcs}) +add_library(bmrt_static STATIC ${srcs}) add_dependencies(bmrt kernel_header) +add_dependencies(bmrt_static kernel_header) + target_link_libraries(bmrt PUBLIC bmodel::bmodel bmlib::bmlib ${CMAKE_DL_LIBS} Threads::Threads) +target_link_libraries(bmrt_static PUBLIC + bmodel::bmodel bmlib::bmlib + ${CMAKE_DL_LIBS} + Threads::Threads) target_include_directories(bmrt PUBLIC ${common_dir}/base/ ${CMAKE_CURRENT_SOURCE_DIR}/include/bmtap2 ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_BINARY_DIR}) +target_include_directories(bmrt_static PUBLIC + ${common_dir}/base/ + ${CMAKE_CURRENT_SOURCE_DIR}/include/bmtap2 + ${CMAKE_CURRENT_SOURCE_DIR}/include + ${CMAKE_BINARY_DIR}) include(git-utils) get_version_from_tag(version soversion revision) set_target_properties(bmrt PROPERTIES SOVERSION ${soversion}) - +set_target_properties(bmrt_static PROPERTIES SOVERSION ${soversion}) set(app_srcs app/bmrt_test.cpp app/bmrt_test_case.cpp) add_executable(bmrt_test ${app_srcs}) -target_link_libraries(bmrt_test bmrt) +target_link_libraries(bmrt_test bmrt bmrt_static) target_compile_definitions(bmrt_test PRIVATE VER="${revision}") -install(TARGETS bmrt bmrt_test +set(runner_srcs + app/model_runner/cnpy.cpp + app/model_runner/model_runner.cpp) +add_executable(model_runner ${runner_srcs}) + +if("${ARCH}" STREQUAL "arm64") + find_library(ZLIB_LIBRARY NAMES z PATHS ${LIB_DIR}/lib/) + target_include_directories(model_runner PRIVATE ${LIB_DIR}/include/) + target_link_libraries(model_runner bmrt bmrt_static ${LIB_DIR}/lib/libz.so) +elseif("${ARCH}" STREQUAL "loongarch64") + find_library(ZLIB_LIBRARY NAMES z PATHS ${LIB_DIR}/lib/) + target_include_directories(model_runner PRIVATE ${LIB_DIR}/include/) + target_link_libraries(model_runner bmrt bmrt_static ${LIB_DIR}/lib/libz.so) +else() + target_link_libraries(model_runner bmrt bmrt_static z) +endif() + +target_compile_definitions(model_runner PRIVATE + VER="${revision}") +set_target_properties(bmrt_static PROPERTIES OUTPUT_NAME bmrt) +install(TARGETS bmrt bmrt_test bmrt_static model_runner LIBRARY DESTINATION lib COMPONENT libsophon RUNTIME DESTINATION bin diff --git a/tpu-runtime/old.cmake b/tpu-runtime/old.cmake index 91d92ee..6892897 100644 --- a/tpu-runtime/old.cmake +++ b/tpu-runtime/old.cmake @@ -76,6 +76,7 @@ add_custom_target(GEN_KERNEL_MODULE COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} ) +set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) add_library(libbmrt SHARED ${SRCS}) add_dependencies(libbmrt GEN_KERNEL_MODULE) target_include_directories(libbmrt PUBLIC ${CMAKE_BINARY_DIR}) @@ -98,5 +99,5 @@ file(COPY file( COPY ${CMAKE_CURRENT_SOURCE_DIR}/lib/libbm1684x_kernel_module.so - DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/tpu_module + DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/tpu_module ) \ No newline at end of file diff --git a/tpu-runtime/scripts/build_thirdparty.sh b/tpu-runtime/scripts/build_thirdparty.sh new file mode 100755 index 0000000..cdf048a --- /dev/null +++ b/tpu-runtime/scripts/build_thirdparty.sh @@ -0,0 +1,119 @@ +#!/bin/bash + +####################################### +# required projects hierarchy +# . +# ├── libsophon(LIBSOPHON_DIR) +# │   ├── bmlib +# │   ├── tpu-bmodel +# │   ├── tpu-common +# │   ├── tpu-cpuop +# │   └── ... +# ├── tpu-runtime(RUNTIME_DIR) +# │ ├── scripts +# │   └── ... +# ├── TPU1684 +# │ ├── ... +# │ └── ... +# └── TPU1686 +# ├── ... +# └── ... + +RUNTIME_DIR=$(realpath `dirname ${BASH_SOURCE}`/..) +THIRDPARTY_DIR=$RUNTIME_DIR/build_thirdparty +LIBSOPHON_DIR=$(realpath $RUNTIME_DIR/../libsophon) +if [ ! -d $LIBSOPHON_DIR ]; then + LIBSOPHON_DIR=$(realpath $RUNTIME_DIR/..) +fi + +build_dep_lib () { + local TARGET_DIR=`cd $LIBSOPHON_DIR/$1 && pwd`; + pushd $TARGET_DIR; + rm -rf build; + mkdir build && cd build; + cmake ../ -DPLATFORM=cmodel -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_STANDARD=11 \ + -DPROJECT_VERSION_MAJOR=9 -DPROJECT_VERSION_MINOR=9 -DPROJECT_VERSION_PATCH=9 \ + -DCMAKE_PROJECT_VERSION=9.9.9 + ret=$? + if [ $ret -eq 0 ]; then + make -j $2; + ret=$? + fi + popd + echo "build $1 failed!" + return $ret +} + +mkdir -p $THIRDPARTY_DIR/bin +mkdir -p $THIRDPARTY_DIR/lib +mkdir -p $THIRDPARTY_DIR/include + +# build dependent lib +if [ ! -d "$LIBSOPHON_DIR" ]; then + echo "Please clone 'libsophon' project and its submodules outside tpu-runtime directory" + exit -1 +fi + +cp $LIBSOPHON_DIR/bmlib/CMakeLists.txt CMakeLists.txt.tmp +sed -i '/SOVERSION/d' $LIBSOPHON_DIR/bmlib/CMakeLists.txt +build_dep_lib bmlib bmlib +if [ $ret -ne 0 ]; then exit $ret; fi +mv CMakeLists.txt.tmp $LIBSOPHON_DIR/bmlib/CMakeLists.txt + +build_dep_lib tpu-bmodel +if [ $ret -ne 0 ]; then exit $ret; fi +build_dep_lib tpu-cpuop +if [ $ret -ne 0 ]; then exit $ret; fi + +pushd $LIBSOPHON_DIR +cp -v bmlib/build/*.so* $THIRDPARTY_DIR/lib/ +cp -v tpu-cpuop/build/*.so* $THIRDPARTY_DIR/lib/ +cp -v tpu-bmodel/build/*.a $THIRDPARTY_DIR/lib/ +cp -v tpu-bmodel/build/tpu_model $THIRDPARTY_DIR/bin +cp -v bmlib/include/bmlib_runtime.h $THIRDPARTY_DIR/include +cp -v bmlib/src/*.h $THIRDPARTY_DIR/include +cp -v tpu-common/base/* $THIRDPARTY_DIR/include +cp -r tpu-bmodel/include/* $THIRDPARTY_DIR/include + +echo "libsophon: `git rev-parse HEAD`" > $THIRDPARTY_DIR/version.txt + +cd tpu-bmodel +echo "tpu-bmodel: `git rev-parse HEAD`" >> $THIRDPARTY_DIR/version.txt +cd ../ + +cd tpu-cpuop +echo "tpu-cpuop: `git rev-parse HEAD`" >> $THIRDPARTY_DIR/version.txt +cd ../ +popd + +backend="" + +if [ -d $LIBSOPHON_DIR/../TPU1684 ]; then + pushd $LIBSOPHON_DIR/../TPU1684 + source scripts/envsetup.sh + build_backend_lib_cmodel + RET=$?; if [ $RET -ne 0 ]; then exit $RET; fi; + cp -v ./build_runtime/c_model/libcmodel.so $THIRDPARTY_DIR/lib/libcmodel_bm1684.so + echo "TPU1684: `git rev-parse HEAD`" >> $THIRDPARTY_DIR/version.txt + backend="$backend bm1684" + popd +fi + +if [ -d $LIBSOPHON_DIR/../TPU1686 ]; then + pushd $LIBSOPHON_DIR/../TPU1686 + CHIPS=(bm1684x bm1686 sg2260) + for chip in ${CHIPS[@]}; do + source scripts/envsetup.sh ${chip} + rm -rf build + rebuild_firmware_cmodel + RET=$?; if [ $RET -ne 0 ]; then exit $RET; fi; + cp -v ./build/firmware_core/libcmodel_firmware.so $THIRDPARTY_DIR/lib/libcmodel_${chip}.so + backend="$backend $chip" + done + echo "TPU1686: `git rev-parse HEAD`" >> $THIRDPARTY_DIR/version.txt + popd +fi + +find $THIRDPARTY_DIR + +echo "Build thirdparty lib with backend [${backend} ] done" diff --git a/tpu-runtime/scripts/build_thirdparty_soc.sh b/tpu-runtime/scripts/build_thirdparty_soc.sh new file mode 100755 index 0000000..0cfff68 --- /dev/null +++ b/tpu-runtime/scripts/build_thirdparty_soc.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +####################################### +# required projects hierarchy +# . +# ├── libsophon(LIBSOPHON_DIR) +# │   ├── bmlib +# │   ├── tpu-bmodel +# │   ├── tpu-common +# │   ├── tpu-cpuop +# │   └── ... +# ├── tpu-runtime(RUNTIME_DIR) +# │ ├── scripts +# │   └── ... +# ├── TPU1684 +# │ ├── ... +# │ └── ... +# └── TPU1686 +# ├── ... +# └── ... + +RUNTIME_DIR=$(realpath `dirname ${BASH_SOURCE}`/..) +THIRDPARTY_DIR=$RUNTIME_DIR/build_thirdparty +LIBSOPHON_DIR=$(realpath $RUNTIME_DIR/..) + +PREBUILT_DIR=`cd $RUNTIME_DIR/../../bm_prebuilt_toolchains && pwd` +CROSS_TOOLCHAIN=$PREBUILT_DIR/gcc-linaro-6.3.1-2017.05-x86_64_aarch64-linux-gnu/ +C_COMPILER=$CROSS_TOOLCHAIN/bin/aarch64-linux-gnu-gcc +CXX_COMPILER=$CROSS_TOOLCHAIN/bin/aarch64-linux-gnu-g++ + +build_soc_lib () { + local TARGET_DIR=`cd $RUNTIME_DIR/../$1 && pwd`; + pushd $TARGET_DIR; + rm -rf build; + mkdir build && cd build; + cmake ../ -DPLATFORM=soc -DCMAKE_C_COMPILER=$C_COMPILER -DCMAKE_CXX_COMPILER=$CXX_COMPILER \ + -DPROJECT_VERSION_MAJOR=9 -DPROJECT_VERSION_MINOR=9 -DPROJECT_VERSION_PATCH=9 \ + -DCMAKE_PROJECT_VERSION=9.9.9 + ret=$? + if [ $ret -eq 0 ]; then + make -j $2; + ret=$? + fi + popd + echo "build $1 failed!" + return $ret +} + + +mkdir -p $THIRDPARTY_DIR/bin +mkdir -p $THIRDPARTY_DIR/lib +mkdir -p $THIRDPARTY_DIR/include + +# build dependent lib +if [ ! -d "$LIBSOPHON_DIR" ]; then + echo "Please clone 'libsophon' project and its submodules outside tpu-runtime directory" + exit -1 +fi + +cp $LIBSOPHON_DIR/bmlib/CMakeLists.txt CMakeLists.txt.tmp +sed -i '/SOVERSION/d' $LIBSOPHON_DIR/bmlib/CMakeLists.txt +build_soc_lib bmlib bmlib +if [ $ret -ne 0 ]; then exit $ret; fi +mv CMakeLists.txt.tmp $LIBSOPHON_DIR/bmlib/CMakeLists.txt + +build_soc_lib tpu-bmodel +if [ $ret -ne 0 ]; then exit $ret; fi +build_soc_lib tpu-cpuop +if [ $ret -ne 0 ]; then exit $ret; fi + +pushd $LIBSOPHON_DIR +cp -v bmlib/build/*.so* $THIRDPARTY_DIR/lib/ +cp -v tpu-cpuop/build/*.so* $THIRDPARTY_DIR/lib/ +cp -v tpu-bmodel/build/*.a $THIRDPARTY_DIR/lib/ +cp -v tpu-bmodel/build/tpu_model $THIRDPARTY_DIR/bin +cp -v bmlib/include/bmlib_runtime.h $THIRDPARTY_DIR/include +cp -v bmlib/src/*.h $THIRDPARTY_DIR/include +cp -v tpu-common/base/* $THIRDPARTY_DIR/include +cp -r tpu-bmodel/include/* $THIRDPARTY_DIR/include + +echo "libsophon: `git rev-parse HEAD`" > $THIRDPARTY_DIR/version.txt + +cd tpu-bmodel +echo "tpu-bmodel: `git rev-parse HEAD`" >> $THIRDPARTY_DIR/version.txt +cd ../ + +cd tpu-cpuop +echo "tpu-cpuop: `git rev-parse HEAD`" >> $THIRDPARTY_DIR/version.txt +cd ../ +popd + diff --git a/tpu-runtime/scripts/envsetup.sh b/tpu-runtime/scripts/envsetup.sh new file mode 100755 index 0000000..34d9f16 --- /dev/null +++ b/tpu-runtime/scripts/envsetup.sh @@ -0,0 +1,105 @@ +#/bin/bash + +SCRIPT_DIR=`dirname $(realpath ${BASH_SOURCE})` +RUNTIME_DIR=$(realpath $SCRIPT_DIR/..) +export THIRDPARTY_DIR=$RUNTIME_DIR/build_thirdparty + +PREBUILT_DIR=`cd $RUNTIME_DIR/../../bm_prebuilt_toolchains && pwd` +CROSS_TOOLCHAIN=$PREBUILT_DIR/gcc-linaro-6.3.1-2017.05-x86_64_aarch64-linux-gnu/ +C_COMPILER=$CROSS_TOOLCHAIN/bin/aarch64-linux-gnu-gcc +CXX_COMPILER=$CROSS_TOOLCHAIN/bin/aarch64-linux-gnu-g++ + +function build_tpu_runtime_soc() { + pushd $RUNTIME_DIR + rm -rf build + mkdir build && cd build + ONLY_TEST=1 cmake ../ -DCMAKE_C_COMPILER=$C_COMPILER -DCMAKE_CXX_COMPILER=$CXX_COMPILER -DCMAKE_BUILD_TYPE=Release $EXTRA_CONFIG + make -j$((`nproc`-1)) + RET=$?; if [ $RET -ne 0 ]; then popd; return $RET; fi + popd +} + + +function rebuild_tpu_runtime_soc() { + $SCRIPT_DIR/build_thirdparty_soc.sh + RET=$?; if [ $RET -ne 0 ]; then popd; return $RET; fi + build_tpu_runtime_soc +} + +function build_tpu_runtime() { + pushd $RUNTIME_DIR + rm -rf build + mkdir build && cd build + ONLY_TEST=1 cmake ../ -DCMAKE_BUILD_TYPE=Release $EXTRA_CONFIG + make -j$((`nproc`-1)) + RET=$?; if [ $RET -ne 0 ]; then popd; return $RET; fi + popd +} + +function rebuild_tpu_runtime(){ + $SCRIPT_DIR/build_thirdparty.sh + RET=$?; if [ $RET -ne 0 ]; then return $RET; fi + build_tpu_runtime +} + +function show_bmodel_chip() { + local bmodel_name="$1" + local chip_str=`$THIRDPARTY_DIR/bin/tpu_model --info ${bmodel_name} | grep chip` + local chip_name=${chip_str##* } + echo "${chip_name,,}" +} + +function cmodel_run_bmodel() { + local bmodel_name="$1" + if [ -z "$bmodel_name" ]; then + echo "bmodel name should not be null!" + return -1 + fi + local bmodel_args="--bmodel" + if [ -d "$bmodel_name" ]; then + bmodel_args="--context_dir" + bmodel_name="$bmodel_name/compilation.bmodel" + if [ ! -e "$bmodel_name/output_ref_data.dat" ]; then + bmodel_args="--compare 0 --context_dir" + fi + fi + local chip=`show_bmodel_chip ${bmodel_name}` + if [ $chip = 'bm1688' ]; then + chip="bm1686" + elif [ $chip = 'bm1690' ]; then + chip="sg2260" + fi + TPUKERNEL_FIRMWARE_PATH=$THIRDPARTY_DIR/lib/libcmodel_${chip}.so $EXTRA_EXEC $RUNTIME_DIR/build/bmrt_test $bmodel_args $* +} + +function cmodel_batch_run_bmodel() { + local bmodel_dir="$1" + local all_bmodels=`find "$bmodel_dir" -name *.bmodel` + for bmodel_name in ${all_bmodels[@]}; do + if [ "`basename $bmodel_name`" == "compilation.bmodel" ]; then + bmodel_name=`dirname $bmodel_name` + fi + cmodel_run_bmodel $bmodel_name + RET=$?; if [ $RET -ne 0 ]; then return $RET; fi + done +} + +function update_firmware(){ + CHIP_ARCH=${1:-bm1684x} + TPU1686_PATH=${2:-$RUNTIME_DIR/../../TPU1686} + FIRMWARE_PATH=$RUNTIME_DIR/lib + pushd $TPU1686_PATH + source scripts/envsetup.sh $CHIP_ARCH + rebuild_firmware + LIB_SUFFIX=${CHIP_ARCH} + if [ "${CHIP_ARCH}" == "bm1686" ]; then + LIB_SUFFIX="tpulv60" + fi + LIBNAME=lib${LIB_SUFFIX}_kernel_module + cp -v build/firmware_core/libfirmware_core.so ${FIRMWARE_PATH}/${LIBNAME}.so + echo "TPU1686: `git rev-parse HEAD`" > ${FIRMWARE_PATH}/${LIBNAME}_version.txt + echo "updated: `date`" >> ${FIRMWARE_PATH}/${LIBNAME}_version.txt + popd + cat ${FIRMWARE_PATH}/${CHIP_ARCH}_version.txt +} + diff --git a/tpu-runtime/src/bm1684_profile.cpp b/tpu-runtime/src/bm1684_profile.cpp index a714641..29f5dea 100644 --- a/tpu-runtime/src/bm1684_profile.cpp +++ b/tpu-runtime/src/bm1684_profile.cpp @@ -104,7 +104,7 @@ bool BMProfileDevice::init() bool BMProfileDevice::begin(net_ctx_t* net_ctx) { bm_status_t ret = BM_SUCCESS; - auto handle = profile->handle; + auto handle = profile->get_handle(); if(enable_bdc){ memset(tpu_buffer.ptr, -1, tpu_buffer.size); ret = bm_memcpy_s2d(handle, tpu_buffer.mem, tpu_buffer.ptr); @@ -132,7 +132,7 @@ bool BMProfileDevice::begin(net_ctx_t* net_ctx) bool BMProfileDevice::end(net_ctx_t* net_ctx) { - auto handle = profile->handle; + auto handle = profile->get_handle(); int ret = BM_SUCCESS; if (enable_bdc){ bm_disable_perf_monitor(handle, &tpu_perf_monitor); diff --git a/tpu-runtime/src/bm1684x_profile.cpp b/tpu-runtime/src/bm1684x_profile.cpp index 9c8128e..ff610e9 100644 --- a/tpu-runtime/src/bm1684x_profile.cpp +++ b/tpu-runtime/src/bm1684x_profile.cpp @@ -33,7 +33,7 @@ bool BMProfileDevice::init() bool BMProfileDevice::begin(net_ctx_t* net_ctx) { bm_status_t ret = BM_SUCCESS; - auto handle = profile->handle; + auto handle = profile->get_handle(); if(enable_bdc){ memset(tpu_buffer.ptr, -1, tpu_buffer.size); ret = bm_memcpy_s2d(handle, tpu_buffer.mem, tpu_buffer.ptr); @@ -53,8 +53,8 @@ bool BMProfileDevice::begin(net_ctx_t* net_ctx) // enable dynamic profile if(enable_arm){ - auto enable_func_id = net_ctx->kernel_module_->get_enable_profile_func_id(); - ret = bmfunc::bmdnn_1684x()->_bmdnn_set_profile_enable_(handle, enable_func_id, true); + auto enable_func_id = net_ctx->kernel_module_->get_enable_profile_func_id({0}); + ret = bmfunc::bmdnn_1684x()->_bmdnn_set_profile_enable_(handle, enable_func_id[0], true); CHECK_status(ret); } return true; @@ -62,7 +62,7 @@ bool BMProfileDevice::begin(net_ctx_t* net_ctx) bool BMProfileDevice::end(net_ctx_t* net_ctx) { - auto handle = profile->handle; + auto handle = profile->get_handle(); int ret = BM_SUCCESS; if (enable_bdc){ bm_disable_perf_monitor(handle, &tpu_perf_monitor); @@ -100,7 +100,7 @@ bool BMProfileDevice::end(net_ctx_t* net_ctx) size_t total_len = 0; u32 block_type = (i == 0) ? BLOCK_DYN_DATA : BLOCK_DYN_EXTRA; while(1){ - auto get_func_id = net_ctx->kernel_module_->get_get_profile_func_id(); + auto get_func_id = net_ctx->kernel_module_->get_get_profile_func_id({0})[0]; bm_status_t status = bmfunc::bmdnn_1684x()->_bmdnn_get_profile_data_( handle, get_func_id, @@ -122,7 +122,7 @@ bool BMProfileDevice::end(net_ctx_t* net_ctx) } profile->write_block(block_type, data.size(), data.data()); } - auto enable_func_id = net_ctx->kernel_module_->get_enable_profile_func_id(); + auto enable_func_id = net_ctx->kernel_module_->get_enable_profile_func_id({0})[0]; bm_status_t status = bmfunc::bmdnn_1684x()->_bmdnn_set_profile_enable_(handle, enable_func_id, false); CHECK_status(status); } diff --git a/tpu-runtime/src/bm1688_profile.cpp b/tpu-runtime/src/bm1688_profile.cpp new file mode 100644 index 0000000..b411039 --- /dev/null +++ b/tpu-runtime/src/bm1688_profile.cpp @@ -0,0 +1,165 @@ +#include +#include +#include "bmruntime.h" +#include "bm1688_profile.h" + +namespace bm1688_profile { + +bool BMProfileDevice::init() +{ + if(!enable) { + return false; + } + auto& core_list = profile->get_core_list(); + auto core_num = core_list.size(); + this->buffers.resize(core_num); + for(auto& buffer: this->buffers){ + if(enable_arm){ + profile->alloc_buffer(&buffer.mcu, dyn_max_size, "dyn_profile"); + } + if(enable_bdc){ + u64 tpu_device_buffer_size = bdc_record_len * sizeof(TPU_PROFILE_FORMAT); + profile->alloc_buffer(&buffer.tiu, tpu_device_buffer_size, "bdc_perf_monitor"); + } + if(enable_gdma){ + u64 gdma_device_buffer_size = gdma_record_len * sizeof(GDMA_PROFILE_FORMAT); + profile->alloc_buffer(&buffer.gdma, gdma_device_buffer_size, "gdma_perf_monitor"); + } + } + return true; +} + +bool BMProfileDevice::begin(net_ctx_t* net_ctx) +{ + bm_status_t ret = BM_SUCCESS; + auto handle = profile->get_handle(); + auto& core_list = profile->get_core_list(); + auto enable_bits = ((!!enable_gdma)<kernel_module_->get_set_engine_profile_param_func_id(core_list); + auto enable_func_ids = net_ctx->kernel_module_->get_enable_profile_func_id(core_list); + for(size_t i=0; i_bmdnn_set_engine_profile_param_( + handle, core_list[i], set_func_ids[i], + PROFILE_ENGINE_TIU, + bm_mem_get_device_addr(tiu_buffer.mem), + bm_mem_get_device_size(tiu_buffer.mem) + ); + } + if(enable_gdma){ + auto& gdma_buffer = buffers[i].gdma; + memset(gdma_buffer.ptr, 0, gdma_buffer.size); + ret = bm_memcpy_s2d(handle, gdma_buffer.mem, gdma_buffer.ptr); + if (ret != BM_SUCCESS) { + BMRT_LOG(FATAL, "init device buffer data failed, ret = %d\n", ret); + } + bmfunc::bmdnn_1688()->_bmdnn_set_engine_profile_param_( + handle, core_list[i], set_func_ids[i], + PROFILE_ENGINE_GDMA, + bm_mem_get_device_addr(gdma_buffer.mem), + bm_mem_get_device_size(gdma_buffer.mem) + ); + } + // enable dynamic profile + ret = bmfunc::bmdnn_1688()->_bmdnn_set_profile_enable_(handle, core_list[i], enable_func_ids[i], enable_bits); + CHECK_status(ret); + } + return true; +} + +bool BMProfileDevice::end(net_ctx_t* net_ctx) +{ + auto handle = profile->get_handle(); + int ret = BM_SUCCESS; + auto& core_list = profile->get_core_list(); + for(size_t i=0; iwrite_block(BLOCK_MONITOR_BDC, valid_len * sizeof(tpu_data[0]), tpu_data); + } + if (enable_gdma){ + auto& gdma_buffer = buffers[i].gdma; + ret = bm_memcpy_d2s(handle, gdma_buffer.ptr, gdma_buffer.mem); + if (ret != BM_SUCCESS) { + BMRT_LOG(FATAL, "Get monitor profile from device to system failed, ret = %d\n", ret); + } + auto gdma_data = (GDMA_PROFILE_FORMAT*)gdma_buffer.ptr; + size_t valid_len = 0; + while (gdma_data[valid_len].inst_start_time != 0 && gdma_data[valid_len].inst_end_time != 0 && valid_len < gdma_record_len) valid_len++; + BMRT_LOG(INFO, "core_index=%d, core_id=%d, gdma record_num=%d, max_record_num=%d", (int)i, (int)core_list[i], (int)valid_len, (int)gdma_record_len); + profile->write_block(BLOCK_MONITOR_GDMA, valid_len * sizeof(GDMA_PROFILE_FORMAT), gdma_data); + } + auto get_func_ids = net_ctx->kernel_module_->get_get_profile_func_id(core_list); + auto enable_func_ids = net_ctx->kernel_module_->get_enable_profile_func_id(core_list); + if (enable_arm) { + for(int j=0; j<2; j++){ + auto& mcu_buffer = buffers[i].mcu; + vector data; + size_t offset = 0; + size_t total_len = 0; + u32 block_type = (j == 0) ? BLOCK_DYN_DATA : BLOCK_DYN_EXTRA; + while(1){ + bm_status_t status = bmfunc::bmdnn_1688()->_bmdnn_get_profile_data_( + handle, + core_list[i], + get_func_ids[i], + bm_mem_get_device_addr(mcu_buffer.mem), + bm_mem_get_device_size(mcu_buffer.mem), + offset, j); + CHECK_status(status); + status = bm_memcpy_d2s(handle, mcu_buffer.ptr, mcu_buffer.mem); + CHECK_status(status); + auto u32_ptr = (u32*)mcu_buffer.ptr; + auto read_len = u32_ptr[0]; + if(total_len==0){ + total_len = u32_ptr[1]; + } + auto data_ptr = (u8*)&u32_ptr[2]; + data.insert(data.end(), data_ptr, data_ptr + read_len); + offset += read_len; + if(offset>=total_len) break; + } + profile->write_block(block_type, data.size(), data.data()); + } + bm_status_t status = bmfunc::bmdnn_1688()->_bmdnn_set_profile_enable_(handle, core_list[i], enable_func_ids[i], 0); + CHECK_status(status); + } + } + return true; +} + +void BMProfileDevice::deinit() +{ + if (!enable) return; + for (auto& buffer: buffers){ + profile->free_buffer(&buffer.tiu); + profile->free_buffer(&buffer.gdma); + profile->free_buffer(&buffer.mcu); + } +} + +bool BMProfileDevice::enabled() +{ + return enable; +} + +} +#undef BLOCK_SIZE +#undef BURST_LEN diff --git a/tpu-runtime/src/bmfunc/bmdnn_func_1682.cpp b/tpu-runtime/src/bmfunc/bmdnn_func_1682.cpp old mode 100644 new mode 100755 index fbc8252..43919b8 --- a/tpu-runtime/src/bmfunc/bmdnn_func_1682.cpp +++ b/tpu-runtime/src/bmfunc/bmdnn_func_1682.cpp @@ -6,110 +6,128 @@ namespace bmruntime { -/* multiple fullnet mode - */ -bm_status_t bmdnn_func_1682::_bmdnn_multi_fullnet_( - bm_handle_t handle, - int input_num, - u64* user_input_global_offset, - u64* cmd_input_global_offset, - int* input_tensor_size, // unit: float word - unsigned short* input_dtype, - int output_num, - u64* user_output_global_offset, - u64* cmd_output_global_offset, - int* output_tensor_size, // unit: float word - unsigned short* output_dtype, - u64 bdc_cmd_offset, - u64 gdma_cmd_offset, - u64 cdma_cmd_offset, - int* bdc_cmd_num, - int* gdma_cmd_num, - int* cdma_cmd_num, - int cmdgroup_num - ) -{ - BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); - - - u32 api_buffer_size = sizeof(int) + (input_num * (sizeof(u64) * 2 + sizeof(int))) + //api buffer size for input - sizeof(int) + (output_num * (sizeof(u64) * 2 + sizeof(int))) + //api buffer size for output - sizeof(u64) * 3 + sizeof(int) * 3 * cmdgroup_num + sizeof(int); - - u8* api_buffer = new u8 [api_buffer_size]; - void* p_api = api_buffer; - - //input global offset process - *(int*)p_api = input_num; - p_api = (int*)p_api + 1; - for (int i = 0; i < input_num; ++i) { - *(u64*)p_api = user_input_global_offset[i]; - p_api = (u64*)p_api + 1; - *(u64*)p_api = cmd_input_global_offset[i]; - p_api = (u64*)p_api + 1; - if(input_dtype[i] == BM_INT8 || input_dtype[i] == BM_UINT8){ - *(int*)p_api = (input_tensor_size[i]+3)/4; - } else if(input_dtype[i] == BM_INT16 || input_dtype[i] == BM_FLOAT16 || input_dtype[i] == BM_UINT16){ - *(int*)p_api = (input_tensor_size[i]+1)/2; - } else if(input_dtype[i] == BM_INT32 || input_dtype[i] == BM_FLOAT32){ - *(int*)p_api = input_tensor_size[i]; - } else { - BMRT_ASSERT_INFO(0,"Unsupported input data type %d\n",input_dtype[i]); - } - p_api = (int*)p_api + 1; - } - - //output global offset process - *(int*)p_api = output_num; - p_api = (int*)p_api + 1; - for (int i = 0; i < output_num; ++i) { - *(u64*)p_api = user_output_global_offset[i]; - p_api = (u64*)p_api + 1; - *(u64*)p_api = cmd_output_global_offset[i]; - p_api = (u64*)p_api + 1; - if(output_dtype[i] == BM_INT8 || output_dtype[i] == BM_UINT8){ - *(int*)p_api = (output_tensor_size[i]+3)/4; - } else if(output_dtype[i] == BM_INT16 || output_dtype[i] == BM_FLOAT16 || output_dtype[i] == BM_UINT16){ - *(int*)p_api = (output_tensor_size[i]+1)/2; - } else if(output_dtype[i] == BM_INT32 || output_dtype[i] == BM_FLOAT32){ - *(int*)p_api = output_tensor_size[i]; - } else { - BMRT_ASSERT_INFO(0,"Unsupported input data type %d\n",input_dtype[i]); - } - p_api = (int*)p_api + 1; +void bmdnn_func_1682::fill_api_info(const tpu_net_info_t &net_info, + api_info_t &api_info) { + const std::vector &input_info = net_info.input_info; + const std::vector &output_info = net_info.output_info; + const std::vector &cmd_info = net_info.core_commands[0].cmd_info; + + u32 api_buffer_size = + sizeof(int) + + (input_info.size() * + (sizeof(u64) * 2 + sizeof(int))) + // api buffer size for input + sizeof(int) + + (output_info.size() * + (sizeof(u64) * 2 + sizeof(int))) + // api buffer size for output + sizeof(u64) * 3 + + sizeof(int) * 3 * cmd_info.size() + sizeof(int); + + api_info.api_id.push_back(b_enable_profile ? BM_API_ID_MULTI_FULLNET_PROFILE + : BM_API_ID_MULTI_FULLNET); + api_info.api_data.resize(1); + api_info.api_data[0].assign(api_buffer_size, 0); + api_info.input_addr_offset.assign(input_info.size(), 0); + api_info.output_addr_offset.assign(output_info.size(), 0); + void *p_api = api_info.api_data[0].data(); + + // input global offset process + *(int *)p_api = (int)input_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < input_info.size(); ++i) { + api_info.input_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = input_info.at(i).user_global_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = input_info.at(i).compiled_global_addr; + p_api = (u64 *)p_api + 1; + int dtype_size = + bmrt_data_type_size((bm_data_type_t)input_info.at(i).dtype); + const int32_t length = (input_info.at(i).n * input_info.at(i).c * + input_info.at(i).h * input_info.at(i).w) / + dtype_size; + if (dtype_size == 1) { + *(int *)p_api = (length + 3) / 4; + } else if (dtype_size == 2) { + *(int *)p_api = (length + 1) / 2; + } else if (dtype_size == 4) { + *(int *)p_api = length; + } else { + BMRT_ASSERT_INFO(0, "Unsupported input data type %d\n", + input_info.at(i).dtype); } - - //memcpy cmd offset and num - *(u64*)p_api = bdc_cmd_offset; - p_api = (u64*)p_api + 1; - *(u64*)p_api = gdma_cmd_offset; - p_api = (u64*)p_api + 1; - *(u64*)p_api = cdma_cmd_offset; - p_api = (u64*)p_api + 1; - *(int*)p_api = cmdgroup_num; - for (int i = 0; i < cmdgroup_num; i++) { - p_api = (int*)p_api + 1; - *(int*)p_api = bdc_cmd_num[i]; - p_api = (int*)p_api + 1; - *(int*)p_api = gdma_cmd_num[i]; - p_api = (int*)p_api + 1; - *(int*)p_api = cdma_cmd_num[i]; + p_api = (int *)p_api + 1; + } + + // output global offset process + *(int *)p_api = (int)output_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < output_info.size(); ++i) { + api_info.output_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = output_info.at(i).user_global_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = output_info.at(i).compiled_global_addr; + p_api = (u64 *)p_api + 1; + int dtype_size = + bmrt_data_type_size((bm_data_type_t)output_info.at(i).dtype); + const int32_t length = (output_info.at(i).n * output_info.at(i).c * + output_info.at(i).h * output_info.at(i).w) / + dtype_size; + if (dtype_size) { + *(int *)p_api = (length + 3) / 4; + } else if (dtype_size) { + *(int *)p_api = (length + 1) / 2; + } else if (dtype_size) { + *(int *)p_api = length; + } else { + BMRT_ASSERT_INFO(0, "Unsupported input data type %d\n", + output_info.at(i).dtype); } - - bm_api_id_t tmp_api_id = (bm_api_id_t)((b_enable_profile) ? BM_API_ID_MULTI_FULLNET_PROFILE - : BM_API_ID_MULTI_FULLNET); - bm_status_t status = bm_send_api(handle, tmp_api_id, api_buffer, api_buffer_size); + p_api = (int *)p_api + 1; + } + + // memcpy cmd offset and num + *(u64 *)p_api = net_info.core_commands[0].bdc_cmd_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = net_info.core_commands[0].gdma_cmd_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = net_info.core_commands[0].cdma_cmd_addr; + p_api = (u64 *)p_api + 1; + *(int *)p_api = (int)cmd_info.size(); + for (size_t i = 0; i < cmd_info.size(); i++) { + p_api = (int *)p_api + 1; + *(int *)p_api = cmd_info.at(i).bdc_cmd_num; + p_api = (int *)p_api + 1; + *(int *)p_api = cmd_info.at(i).gdma_cmd_num; + p_api = (int *)p_api + 1; + *(int *)p_api = cmd_info.at(i).cdma_cmd_num; + } +} +/* multiple fullnet mode + */ +bm_status_t +bmdnn_func_1682::_bmdnn_multi_fullnet_(bm_handle_t handle, + const tpu_net_info_t &net_info) { + BMRT_ASSERT_INFO(handle, "handle shouldn't be NULL\n"); + api_info_t api_info; + fill_api_info(net_info, api_info); + + bm_status_t status = + bm_send_api(handle, (bm_api_id_t)api_info.api_id[0], + api_info.api_data[0].data(), + api_info.api_data[0].size()); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", api_info.api_id[0], + status); + } else { + status = bm_sync_api(handle); if (BM_SUCCESS != status) { - BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", tmp_api_id, status); - } else { - status = bm_sync_api(handle); - if (BM_SUCCESS != status) { - BMRT_LOG(WRONG, "bm_sync_api failed, api id:%d, status:%d", tmp_api_id, status); - } + BMRT_LOG(WRONG, "bm_sync_api failed, api id:%d, status:%d", api_info.api_id[0], + status); } + } - delete[] api_buffer; - return status; + return status; } /* diff --git a/tpu-runtime/src/bmfunc/bmdnn_func_1684.cpp b/tpu-runtime/src/bmfunc/bmdnn_func_1684.cpp index 398a518..6cb72e3 100644 --- a/tpu-runtime/src/bmfunc/bmdnn_func_1684.cpp +++ b/tpu-runtime/src/bmfunc/bmdnn_func_1684.cpp @@ -1,109 +1,114 @@ #include "bmfunc/bmfunc.h" #include +#include "string.h" namespace bmruntime { -bm_status_t bmdnn_func_1684::_bmdnn_multi_fullnet_( - bm_handle_t handle, - int input_num, - u64* user_input_global_offset, - u64* cmd_input_global_offset, - int* input_n, - int* input_c, - int* input_h, - int* input_w, - unsigned short* input_data_type, //0: FP32, 1: FP16, 2: INT8, 3: UINT8, 4: INT16, 5: UINT16 - unsigned char* input_st_mode, //0: 1N, 1: 2N, 2: 4N - unsigned char* real_in_stmode, //0: 1N, 1: 2N, 2: 4N - int output_num, - u64* user_output_global_offset, - u64* cmd_output_global_offset, - int* output_n, - int* output_length, - unsigned short* output_data_type, //0: FP32, 1: FP16, 2: INT8, 3: UINT8, 4: INT16, 5: UINT16 - unsigned char* output_st_mode, //0: 1N, 1: 2N, 2: 4N - unsigned char* force_out_stmode, //0: 1N, 1: 2N, 2: 4N - u64 bdc_cmd_offset, - u64 gdma_cmd_offset, - int* bdc_cmd_num, - int* gdma_cmd_num, - int cmdgroup_num, - u32* input_pad_h - ) -{ - BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); - u32 api_buffer_size = sizeof(int) + (input_num * (sizeof(u64) * 2 + sizeof(int) * 4 + sizeof(unsigned short) + sizeof(unsigned char) * 2 + sizeof(int))) + //api buffer size for input - sizeof(int) + (output_num * (sizeof(u64) * 2 + sizeof(int) * 2 + sizeof(unsigned short) + sizeof(unsigned char) * 2)) + //api buffer size for output - sizeof(u64) * 2 + sizeof(int) * 2 * cmdgroup_num + sizeof(int); - - u8* api_buffer = new u8 [api_buffer_size]; - - void* p_api = api_buffer; - //input global offset process - *(int*)p_api = input_num; - p_api = (int*)p_api + 1; - for (int i = 0; i < input_num; ++i) { - *(u64*)p_api = user_input_global_offset[i]; - p_api = (u64*)p_api + 1; - *(u64*)p_api = cmd_input_global_offset[i]; - p_api = (u64*)p_api + 1; - *(int*)p_api = input_n[i]; - p_api = (int*)p_api + 1; - *(int*)p_api = input_c[i]; - p_api = (int*)p_api + 1; - *(int*)p_api = input_h[i]; - p_api = (int*)p_api + 1; - *(int*)p_api = input_w[i]; - p_api = (int*)p_api + 1; - *(unsigned short*)p_api = input_data_type[i]; - p_api = (unsigned short*)p_api + 1; - *(unsigned char*)p_api = input_st_mode[i]; - p_api = (unsigned char*)p_api + 1; - *(unsigned char*)p_api = real_in_stmode[i]; - p_api = (unsigned char*)p_api + 1; - *(u32*)p_api = input_pad_h[i]; - p_api = (u32*)p_api + 1; - } - - //output global offset process - *(int*)p_api = output_num; - p_api = (int*)p_api + 1; - for (int i = 0; i < output_num; ++i) { - *(u64*)p_api = user_output_global_offset[i]; - p_api = (u64*)p_api + 1; - *(u64*)p_api = cmd_output_global_offset[i]; - p_api = (u64*)p_api + 1; - *(int*)p_api = output_n[i]; - p_api = (int*)p_api + 1; - *(int*)p_api = output_length[i]; - p_api = (int*)p_api + 1; - *(unsigned short*)p_api = output_data_type[i]; - p_api = (unsigned short*)p_api + 1; - *(unsigned char*)p_api = output_st_mode[i]; - p_api = (unsigned char*)p_api + 1; - *(unsigned char*)p_api = force_out_stmode[i]; - p_api = (unsigned char*)p_api + 1; - } - - //memcpy cmd offset and num - *(u64*)p_api = bdc_cmd_offset; - p_api = (u64*)p_api + 1; - *(u64*)p_api = gdma_cmd_offset; - p_api = (u64*)p_api + 1; - *(int*)p_api = cmdgroup_num; - for (int i = 0; i < cmdgroup_num; i++) { - p_api = (int*)p_api + 1; - *(int*)p_api = bdc_cmd_num[i]; - p_api = (int*)p_api + 1; - *(int*)p_api = gdma_cmd_num[i]; - } - - bm_status_t status = bm_send_api(handle, (bm_api_id_t)BM_API_ID_MULTI_FULLNET, api_buffer, api_buffer_size); - if (BM_SUCCESS != status) { - BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", BM_API_ID_MULTI_FULLNET, status); - } - delete [] api_buffer; - return status; +void bmdnn_func_1684::fill_api_info(const tpu_net_info_t &net_info, + api_info_t &api_info) { + const std::vector &input_info = net_info.input_info; + const std::vector &output_info = net_info.output_info; + const std::vector &cmd_info = net_info.core_commands[0].cmd_info; + + u32 api_buffer_size = + sizeof(int) + + (input_info.size() * + (sizeof(u64) * 2 + sizeof(int) * 4 + sizeof(unsigned short) + + sizeof(unsigned char) * 2 + sizeof(int))) + // api buffer size for input + sizeof(int) + + (output_info.size() * + (sizeof(u64) * 2 + sizeof(int) * 2 + sizeof(unsigned short) + + sizeof(unsigned char) * 2)) + // api buffer size for output + sizeof(u64) * 2 + + sizeof(int) * 2 * cmd_info.size() + sizeof(int); + + api_info.api_id.push_back(BM_API_ID_MULTI_FULLNET); + api_info.api_data.resize(1); + api_info.api_data[0].assign(api_buffer_size, 0); + api_info.input_addr_offset.assign(input_info.size(), 0); + api_info.output_addr_offset.assign(output_info.size(), 0); + + void *p_api = api_info.api_data[0].data(); + // input global offset process + *(int *)p_api = input_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < input_info.size(); ++i) { + const auto &info = input_info.at(i); + api_info.input_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = info.user_global_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = info.compiled_global_addr; + p_api = (u64 *)p_api + 1; + *(int *)p_api = info.n; + p_api = (int *)p_api + 1; + *(int *)p_api = info.c; + p_api = (int *)p_api + 1; + *(int *)p_api = info.h; + p_api = (int *)p_api + 1; + *(int *)p_api = info.w; + p_api = (int *)p_api + 1; + *(unsigned short *)p_api = info.dtype; + p_api = (unsigned short *)p_api + 1; + *(unsigned char *)p_api = info.compiled_stmode; + p_api = (unsigned char *)p_api + 1; + *(unsigned char *)p_api = info.user_stmode; + p_api = (unsigned char *)p_api + 1; + *(u32 *)p_api = info.padding_h; + p_api = (u32 *)p_api + 1; + } + + // output global offset process + *(int *)p_api = output_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < output_info.size(); ++i) { + const auto &info = output_info.at(i); + api_info.output_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = info.user_global_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = info.compiled_global_addr; + p_api = (u64 *)p_api + 1; + *(int *)p_api = info.n; + p_api = (int *)p_api + 1; + *(int *)p_api = (info.c * info.h * info.w); + p_api = (int *)p_api + 1; + *(unsigned short *)p_api = info.dtype; + p_api = (unsigned short *)p_api + 1; + *(unsigned char *)p_api = info.compiled_stmode; + p_api = (unsigned char *)p_api + 1; + *(unsigned char *)p_api = info.user_stmode; + p_api = (unsigned char *)p_api + 1; + } + + // memcpy cmd offset and num + *(u64 *)p_api = net_info.core_commands[0].bdc_cmd_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = net_info.core_commands[0].gdma_cmd_addr; + p_api = (u64 *)p_api + 1; + *(int *)p_api = cmd_info.size(); + for (size_t i = 0; i < cmd_info.size(); i++) { + p_api = (int *)p_api + 1; + *(int *)p_api = cmd_info.at(i).bdc_cmd_num; + p_api = (int *)p_api + 1; + *(int *)p_api = cmd_info.at(i).gdma_cmd_num; + } +} +bm_status_t +bmdnn_func_1684::_bmdnn_multi_fullnet_(bm_handle_t handle, + const tpu_net_info_t &net_info) { + BMRT_ASSERT_INFO(handle, "handle shouldn't be NULL\n"); + + api_info_t api_info; + fill_api_info(net_info, api_info); + bm_status_t status = + bm_send_api(handle, (bm_api_id_t)api_info.api_id[0], + api_info.api_data[0].data(), api_info.api_data[0].size()); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", + BM_API_ID_MULTI_FULLNET, status); + } + return status; } /* @@ -255,7 +260,7 @@ using_arm_buffer_size,arm_reserved_size); } else { status = bm_sync_api(handle); if (BM_SUCCESS != status) { - BMRT_LOG(WRONG, "bm_sync_api failed, api id:%d, status:%d", BM_API_ID_DYNAMIC_FULLNET, status); + BMRT_LOG(WRONG, "bm_sync_api failed, api id:%d, status:%d", BM_API_ID_DYNAMIC_FULLNET, status); } } diff --git a/tpu-runtime/src/bmfunc/bmdnn_func_1684x.cpp b/tpu-runtime/src/bmfunc/bmdnn_func_1684x.cpp index 50a2b01..38d2b85 100644 --- a/tpu-runtime/src/bmfunc/bmdnn_func_1684x.cpp +++ b/tpu-runtime/src/bmfunc/bmdnn_func_1684x.cpp @@ -5,86 +5,107 @@ namespace bmruntime { -bm_status_t bmdnn_func_1684x::_bmdnn_multi_fullnet_( - bm_handle_t handle, - int func_id, - int input_num, - u64* user_input_global_offset, - u64* cmd_input_global_offset, - u32* input_dsize, // in bytes - int output_num, - u64* user_output_global_offset, - u64* cmd_output_global_offset, - u32* output_dsize, // in bytes - u64 bdc_cmd_offset, - u64 gdma_cmd_offset, - int* bdc_cmd_num, - int* gdma_cmd_num, - u32* bdc_cmd_byte_size, - u32* gdma_cmd_byte_size, - int cmdgroup_num) -{ - BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); - u32 api_buffer_size = sizeof(int) + (input_num * (sizeof(u64) * 2 + sizeof(u32))) + // input - sizeof(int) + (output_num * (sizeof(u64) * 2 + sizeof(u32))) + // output - sizeof(u64) * 2 + (sizeof(int) * 2 + sizeof(u32) * 2) * cmdgroup_num + - sizeof(int); - u8* api_buffer = new u8 [api_buffer_size]; - - void* p_api = api_buffer; +void bmdnn_func_1684x::fill_api_info(const tpu_net_info_t &net_info, + api_info_t &api_info) { + const std::vector &input_info = net_info.input_info; + const std::vector &output_info = net_info.output_info; + const std::vector &cmd_info = net_info.core_commands[0].cmd_info; + + u32 api_buffer_size = + sizeof(int) + + (input_info.size() * (sizeof(u64) * 2 + sizeof(u32))) + // input + sizeof(int) + + (output_info.size() * (sizeof(u64) * 2 + sizeof(u32))) + // output + sizeof(u64) * 2 + (sizeof(int) * 2 + sizeof(u32) * 2) * cmd_info.size() + + sizeof(int); + if (net_info.do_allreduce) { + api_buffer_size += sizeof(u32); + api_buffer_size += sizeof(tpu_kernel_allreduce_1684x_t); + } + api_info.api_data.resize(1); + api_info.api_data[0].assign(api_buffer_size, 0); + api_info.input_addr_offset.assign(input_info.size(), 0); + api_info.output_addr_offset.assign(output_info.size(), 0); + api_info.api_id.emplace_back(net_info.kernel_func_ids[0]); + + void *p_api = api_info.api_data[0].data(); // input global offset process - *(int*)p_api = input_num; - p_api = (int*)p_api + 1; - for (int i = 0; i < input_num; ++i) { - *(u64*)p_api = user_input_global_offset[i]; - p_api = (u64*)p_api + 1; - *(u64*)p_api = cmd_input_global_offset[i]; - p_api = (u64*)p_api + 1; - *(u32*)p_api = input_dsize[i]; - p_api = (u32*)p_api + 1; + *(int *)p_api = input_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < input_info.size(); ++i) { + const auto &info = input_info.at(i); + api_info.input_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = info.user_global_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = info.compiled_global_addr; + p_api = (u64 *)p_api + 1; + *(u32 *)p_api = bmrt_data_type_size((bm_data_type_t)info.dtype) * + (info.n * info.c * info.h * info.w); + p_api = (u32 *)p_api + 1; } // output global offset process - *(int*)p_api = output_num; - p_api = (int*)p_api + 1; - for (int i = 0; i < output_num; ++i) { - *(u64*)p_api = user_output_global_offset[i]; - p_api = (u64*)p_api + 1; - *(u64*)p_api = cmd_output_global_offset[i]; - p_api = (u64*)p_api + 1; - *(u32*)p_api = output_dsize[i]; - p_api = (u32*)p_api + 1; + *(int *)p_api = output_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < output_info.size(); ++i) { + const auto &info = output_info.at(i); + api_info.output_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = info.user_global_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = info.compiled_global_addr; + p_api = (u64 *)p_api + 1; + *(u32 *)p_api = bmrt_data_type_size((bm_data_type_t)info.dtype) * + (info.n * info.c * info.h * info.w); + p_api = (u32 *)p_api + 1; } // memcpy cmd offset and num - *(u64*)p_api = bdc_cmd_offset; - p_api = (u64*)p_api + 1; - *(u64*)p_api = gdma_cmd_offset; - p_api = (u64*)p_api + 1; - *(int*)p_api = cmdgroup_num; - p_api = (int*)p_api + 1; - for (int i = 0; i < cmdgroup_num; i++) { - *(int*)p_api = bdc_cmd_num[i]; - p_api = (int*)p_api + 1; - *(int*)p_api = gdma_cmd_num[i]; - p_api = (int*)p_api + 1; - *(u32*)p_api = bdc_cmd_byte_size[i]; - p_api = (u32*)p_api + 1; - *(u32*)p_api = gdma_cmd_byte_size[i]; - p_api = (u32*)p_api + 1; + *(u64 *)p_api = net_info.core_commands[0].bdc_cmd_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = net_info.core_commands[0].gdma_cmd_addr; + p_api = (u64 *)p_api + 1; + *(int *)p_api = cmd_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < cmd_info.size(); i++) { + *(int *)p_api = cmd_info.at(i).bdc_cmd_num; + p_api = (int *)p_api + 1; + *(int *)p_api = cmd_info.at(i).gdma_cmd_num; + p_api = (int *)p_api + 1; + *(u32 *)p_api = cmd_info.at(i).bdc_cmd_byte_size; + p_api = (u32 *)p_api + 1; + *(u32 *)p_api = cmd_info.at(i).gdma_cmd_byte_size; + p_api = (u32 *)p_api + 1; + } + + if (net_info.do_allreduce == 1) { + *(u32 *)p_api = net_info.do_allreduce; + p_api = (u32 *)p_api + 1; + *(tpu_kernel_allreduce_1684x_t *)p_api = net_info.allreduce_param; + p_api = (tpu_kernel_allreduce_1684x_t *)p_api + 1; } +} +bm_status_t +bmdnn_func_1684x::_bmdnn_multi_fullnet_(bm_handle_t handle, + const tpu_net_info_t &net_info) { + BMRT_ASSERT_INFO(handle, "handle shouldn't be NULL\n"); - bm_status_t status = tpu_kernel_launch_async(handle, func_id, api_buffer, api_buffer_size); + api_info_t api_info; + fill_api_info(net_info, api_info); + auto api_id = net_info.kernel_func_ids[0]; + bm_status_t status = tpu_kernel_launch_async(handle, api_id, + api_info.api_data[0].data(), + api_info.api_data[0].size()); if (BM_SUCCESS != status) { - BMRT_LOG(WRONG, "tpu_kernel_launch failed, func id:%d, status:%d", func_id, status); + BMRT_LOG(WRONG, "tpu_kernel_launch failed, func id:%d, status:%d", api_id, status); } - delete [] api_buffer; return status; } bm_status_t bmdnn_func_1684x::_bmdnn_dynamic_fullnet_( bm_handle_t handle, - int func_id, + tpu_kernel_function_t func_id, unsigned long long compiled_ir_global_addr, unsigned int compiled_ir_length, //unit dword unsigned int input_num, @@ -98,8 +119,11 @@ bm_status_t bmdnn_func_1684x::_bmdnn_dynamic_fullnet_( std::vector apd_ctx_mem_borders, std::vector apd_ctx_mem_offset, unsigned long long apd_coeff_mem_offset, + unsigned long long apd_io_start, + unsigned long long apd_io_mem_offset, bool get_output_shape, - unsigned long long output_shape_global_addr) + unsigned long long output_shape_global_addr, + tpu_kernel_allreduce_1684x_t *p_allreduce_param) { BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); BMRT_ASSERT_INFO( @@ -118,8 +142,12 @@ bm_status_t bmdnn_func_1684x::_bmdnn_dynamic_fullnet_( output_num * sizeof(u64) + //get_output_shape, global_shape_mem_addr, apd_ctx_start, (ctx_num, apd_ctx_mem_borders, apd_ctx_mem_offset), sizeof(u32) + sizeof(u64) + sizeof(u64) + ( sizeof(u32)+sizeof(u64)*ctx_num*2 ) + - //apd_coeff_mem_offset - sizeof(u64); + //apd_coeff_mem_offset, apd_io_start, apd_io_mem_offset + sizeof(u64) + sizeof(u64) + sizeof(u64); + if (p_allreduce_param != NULL) { + api_buffer_size += sizeof(u32); + api_buffer_size += sizeof(tpu_kernel_allreduce_1684x_t); + } if (api_buffer_size > MAX_API_MSG_SIZE) { //decrease the api buffer size @@ -191,6 +219,19 @@ bm_status_t bmdnn_func_1684x::_bmdnn_dynamic_fullnet_( *(u64*)p_api = apd_coeff_mem_offset; p_api = (u64*)p_api + 1; + *(u64*)p_api = apd_io_start; + p_api = (u64*)p_api + 1; + + *(u64*)p_api = apd_io_mem_offset; + p_api = (u64*)p_api + 1; + + if (p_allreduce_param != NULL) { + *(u32*)p_api = 1; + p_api = (u32*)p_api + 1; + *(tpu_kernel_allreduce_1684x_t *)p_api = *p_allreduce_param; + p_api = (tpu_kernel_allreduce_1684x_t *)p_api + 1; + } + bm_status_t status = tpu_kernel_launch_async(handle, func_id, api_buffer, api_buffer_size); if (BM_SUCCESS != status) { BMRT_LOG(WRONG, "tpu_kernel_launch failed, func id:%d, status:%d", func_id, status); @@ -213,7 +254,7 @@ bm_status_t bmdnn_func_1684x::_bmdnn_set_profile_enable_(bm_handle_t handle, tp u32 profile_enable = enable; bm_status_t status = tpu_kernel_launch(handle, func_id, (u8*)&profile_enable, api_buffer_size); if (BM_SUCCESS != status) { - BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", SG_API_ID_SET_PROFILE_ENABLE, status); + BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", BM_API_ID_SET_PROFILE_ENABLE, status); } return status; } @@ -244,7 +285,7 @@ bm_status_t bmdnn_func_1684x::_bmdnn_get_profile_data_( api_data.byte_offset = byte_offset; api_data.data_category = data_category; - bm_api_id_t api_code = (bm_api_id_t)SG_API_ID_GET_PROFILE_DATA; + bm_api_id_t api_code = (bm_api_id_t)BM_API_ID_GET_PROFILE_DATA; bm_status_t status = tpu_kernel_launch_async(handle, func_id, (u8*)&api_data, api_buffer_size); if (BM_SUCCESS != status) { BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", api_code, status); diff --git a/tpu-runtime/src/bmfunc/bmdnn_func_1686.cpp b/tpu-runtime/src/bmfunc/bmdnn_func_1686.cpp deleted file mode 100644 index d1d0c4e..0000000 --- a/tpu-runtime/src/bmfunc/bmdnn_func_1686.cpp +++ /dev/null @@ -1,257 +0,0 @@ -#include "bmfunc/bmfunc.h" -#include - -namespace bmruntime { - -bm_status_t bmdnn_func_1686::_bmdnn_multi_fullnet_( - bm_handle_t handle, - int input_num, - u64* user_input_global_offset, - u64* cmd_input_global_offset, - u32* input_dsize, // in bytes - int output_num, - u64* user_output_global_offset, - u64* cmd_output_global_offset, - u32* output_dsize, // in bytes - u64 bdc_cmd_offset, - u64 gdma_cmd_offset, - int* bdc_cmd_num, - int* gdma_cmd_num, - u32* bdc_cmd_byte_size, - u32* gdma_cmd_byte_size, - int cmdgroup_num) -{ - BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); - u32 api_buffer_size = sizeof(int) + (input_num * (sizeof(u64) * 2 + sizeof(u32))) + // input - sizeof(int) + (output_num * (sizeof(u64) * 2 + sizeof(u32))) + // output - sizeof(u64) * 2 + (sizeof(int) * 2 + sizeof(u32) * 2) * cmdgroup_num + - sizeof(int); - u8* api_buffer = new u8 [api_buffer_size]; - - void* p_api = api_buffer; - // input global offset process - *(int*)p_api = input_num; - p_api = (int*)p_api + 1; - for (int i = 0; i < input_num; ++i) { - *(u64*)p_api = user_input_global_offset[i]; - p_api = (u64*)p_api + 1; - *(u64*)p_api = cmd_input_global_offset[i]; - p_api = (u64*)p_api + 1; - *(u32*)p_api = input_dsize[i]; - p_api = (u32*)p_api + 1; - } - - // output global offset process - *(int*)p_api = output_num; - p_api = (int*)p_api + 1; - for (int i = 0; i < output_num; ++i) { - *(u64*)p_api = user_output_global_offset[i]; - p_api = (u64*)p_api + 1; - *(u64*)p_api = cmd_output_global_offset[i]; - p_api = (u64*)p_api + 1; - *(u32*)p_api = output_dsize[i]; - p_api = (u32*)p_api + 1; - } - - // memcpy cmd offset and num - *(u64*)p_api = bdc_cmd_offset; - p_api = (u64*)p_api + 1; - *(u64*)p_api = gdma_cmd_offset; - p_api = (u64*)p_api + 1; - *(int*)p_api = cmdgroup_num; - p_api = (int*)p_api + 1; - for (int i = 0; i < cmdgroup_num; i++) { - *(int*)p_api = bdc_cmd_num[i]; - p_api = (int*)p_api + 1; - *(int*)p_api = gdma_cmd_num[i]; - p_api = (int*)p_api + 1; - *(u32*)p_api = bdc_cmd_byte_size[i]; - p_api = (u32*)p_api + 1; - *(u32*)p_api = gdma_cmd_byte_size[i]; - p_api = (u32*)p_api + 1; - } - - bm_status_t status = bm_send_api(handle, (bm_api_id_t)SG_API_ID_MULTI_FULLNET, api_buffer, api_buffer_size); - if (BM_SUCCESS != status) { - BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", SG_API_ID_MULTI_FULLNET, status); - } - delete [] api_buffer; - return status; -} - -bm_status_t bmdnn_func_1686::_bmdnn_dynamic_fullnet_( - bm_handle_t handle, - unsigned long long compiled_ir_global_addr, - unsigned int compiled_ir_length, //unit dword - unsigned int input_num, - const unsigned long long *input_addrs, - const int * const * input_shapes, - const int * input_elem_nums, - const int * input_dtype_and_dims, - unsigned int output_num, - const unsigned long long *output_addrs, - unsigned long long apd_ctx_start, - std::vector apd_ctx_mem_borders, - std::vector apd_ctx_mem_offset, - unsigned long long apd_coeff_mem_offset, - bool get_output_shape, - unsigned long long output_shape_global_addr) -{ - BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); - BMRT_ASSERT_INFO( - apd_ctx_mem_borders.size() == apd_ctx_mem_offset.size(), - "ctx borders and offset should have same size"); - - size_t ctx_num = apd_ctx_mem_borders.size(); - u32 api_buffer_size = sizeof(u64) +sizeof(u32) + // compiled_ir addr, length - // input num - sizeof(u32) + - // input_addr dtype_dims dim_shape elem_num - input_num * (sizeof(u64) + sizeof(int) + sizeof(int) * BM_MAX_DIMS_NUM + sizeof(int)) + - // output num - sizeof(u32) + - // output_addr - output_num * sizeof(u64) + - //get_output_shape, global_shape_mem_addr, apd_ctx_start, (ctx_num, apd_ctx_mem_borders, apd_ctx_mem_offset), - sizeof(u32) + sizeof(u64) + sizeof(u64) + ( sizeof(u32)+sizeof(u64)*ctx_num*2 ) + - //apd_coeff_mem_offset - sizeof(u64); - - if (api_buffer_size > MAX_API_MSG_SIZE) { - //decrease the api buffer size - for (u32 i = 0; i < input_num; ++i) { - u32 cur_dim = (u32)(input_dtype_and_dims[i] & 0xFFFF); - api_buffer_size -= (BM_MAX_DIMS_NUM - cur_dim) * sizeof(int); - } - } - - u8* api_buffer = new u8 [api_buffer_size]; - - void* p_api = api_buffer; - //compiled ir information - *(u64*)p_api = compiled_ir_global_addr; - p_api = (u64*)p_api + 1; - *(u32*)p_api = compiled_ir_length; - p_api = (u32*)p_api + 1; - - //input information - *(u32*)p_api = input_num; - p_api = (u32*)p_api + 1; - - for(u32 i = 0; i < input_num; ++i){ - *(u64*)p_api = input_addrs[i]; - p_api = (u64*)p_api + 1; - - *(u32*)p_api = input_dtype_and_dims[i]; - p_api = (u32*)p_api + 1; - u32 cur_dim = (u32)(input_dtype_and_dims[i] & 0xFFFF); - for(u32 j = 0; j < cur_dim; j++){ - *(u32 *)p_api = (u32)input_shapes[i][j]; - p_api = (u32 *)p_api + 1; - } - *(u32*)p_api = input_elem_nums[i]; - p_api = (u32*)p_api + 1; - } - //output information - *(u32*)p_api = output_num; - p_api = (u32*)p_api + 1; - - for(u32 i = 0; i < output_num; ++i){ - *(u64*)p_api = output_addrs[i]; - p_api = (u64*)p_api + 1; - } - //output shape info related - *(u32*)p_api = (u32)get_output_shape; - p_api = (u32*)p_api + 1; - *(u64*)p_api = output_shape_global_addr; - p_api = (u64*)p_api + 1; - - //The memory address in cmd gdma need to be offset when append context,here is the offset value. - *(u64*)p_api = apd_ctx_start; - p_api = (u64*)p_api + 1; - - *(u32*)p_api = ctx_num; - p_api = (u32*)p_api + 1; - - for (size_t i = 0; i < ctx_num; ++i) - { - *(u64*)p_api = apd_ctx_mem_borders[i]; - p_api = (u64*)p_api + 1; - } - for (size_t i = 0; i < ctx_num; ++i) - { - *(u64*)p_api = apd_ctx_mem_offset[i]; - p_api = (u64*)p_api + 1; - } - - *(u64*)p_api = apd_coeff_mem_offset; - p_api = (u64*)p_api + 1; - - bm_status_t status = - bm_send_api(handle, (bm_api_id_t)SG_API_ID_DYNAMIC_FULLNET, api_buffer, api_buffer_size); - if (BM_SUCCESS != status) { - BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", SG_API_ID_DYNAMIC_FULLNET, status); - } else { - status = bm_sync_api(handle); - if (BM_SUCCESS != status) { - BMRT_LOG(WRONG, "bm_sync_api failed, api id:%d, status:%d", SG_API_ID_DYNAMIC_FULLNET, status); - } - } - - bm_gmem_arm_reserved_release(handle); - - delete[] api_buffer; - return status; -} - -bm_status_t bmdnn_func_1686::_bmdnn_set_profile_enable_(bm_handle_t handle, bool enable){ - BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); - u32 api_buffer_size = sizeof(u32); - u32 profile_enable = enable; - bm_status_t status = bm_send_api(handle, (bm_api_id_t)SG_API_ID_SET_PROFILE_ENABLE, (u8*)&profile_enable, api_buffer_size); - if (BM_SUCCESS != status) { - BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", SG_API_ID_SET_PROFILE_ENABLE, status); - } - return status; -} -bm_status_t bmdnn_func_1686::_bmdnn_get_profile_data_( - bm_handle_t handle, - unsigned long long output_global_addr, - unsigned int output_max_size, - unsigned int byte_offset, - unsigned int data_category //0: profile time records, 1: extra data - ){ - BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); -#pragma pack(1) - struct { - u64 arm_reserved_addr; - u64 output_global_addr; - u32 output_size; - u32 byte_offset; - u32 data_category; //0: profile_data, 1: profile extra data - } api_data; -#pragma pack() - - const u32 api_buffer_size = sizeof(api_data); - - api_data.arm_reserved_addr = -1; - api_data.output_global_addr = output_global_addr; - api_data.output_size = output_max_size; - api_data.byte_offset = byte_offset; - api_data.data_category = data_category; - - bm_api_id_t api_code = (bm_api_id_t)SG_API_ID_GET_PROFILE_DATA; - bm_status_t status = - bm_send_api(handle, api_code, (u8*)&api_data, api_buffer_size); - if (BM_SUCCESS != status) { - BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", api_code, status); - } else { - status = bm_sync_api(handle); - if (BM_SUCCESS != status) { - BMRT_LOG(WRONG, "bm_sync_api failed, api id:%d, status:%d", api_code, status); - } - } - return status; -} - -} diff --git a/tpu-runtime/src/bmfunc/bmdnn_func_1688.cpp b/tpu-runtime/src/bmfunc/bmdnn_func_1688.cpp new file mode 100755 index 0000000..3d46b35 --- /dev/null +++ b/tpu-runtime/src/bmfunc/bmdnn_func_1688.cpp @@ -0,0 +1,407 @@ +#include "bmfunc/bmfunc.h" +#include +#include "bmlib_runtime.h" +#include "bmruntime.h" + +namespace bmruntime { +void bmdnn_func_1688::fill_api_info(const tpu_net_info_t &net_info, + api_info_t &api_info) { + BMRT_ASSERT_INFO(net_info.neuron_start_addr.size() == 1, + "only support one neuron addr"); + const std::vector &input_info = net_info.input_info; + const std::vector &output_info = net_info.output_info; + api_info.api_data.resize(net_info.core_commands.size()); + int base_message_id = 0; + for (auto core_id : net_info.core_list) { + base_message_id |= (1 << core_id); + } + for (size_t core_idx = 0; core_idx < net_info.core_list.size(); core_idx++) { + const std::vector &cmd_info = + net_info.core_commands[core_idx].cmd_info; + + u32 api_buffer_size = + sizeof(int) + + (input_info.size() * (sizeof(u64) * 2 + sizeof(u32))) + // input + sizeof(int) + + (output_info.size() * (sizeof(u64) * 2 + sizeof(u32))) + // output + sizeof(u64) * 2 + + (sizeof(int) * 2 + sizeof(u32) * 2) * cmd_info.size() + sizeof(int) + + 2 * sizeof(u64) + sizeof(int); // base message id + api_info.api_id.push_back(net_info.kernel_func_ids[core_idx]); + api_info.api_data[core_idx].assign(api_buffer_size, 0); + api_info.input_addr_offset.assign(input_info.size(), 0); + api_info.output_addr_offset.assign(output_info.size(), 0); + + void *p_api = api_info.api_data[core_idx].data(); + // input global offset process + *(int *)p_api = input_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < input_info.size(); ++i) { + const auto &info = input_info.at(i); + api_info.input_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = info.user_global_addr; + p_api = (u64 *)p_api + 1; + if (core_idx > 0 && ((info.compiled_global_addr >> 36) & 0x7) == 0) { + /// If the bmodel use multi core, we only move the user's input data to + /// compiled ddr once. + *(u64 *)p_api = info.user_global_addr; + } else { + *(u64 *)p_api = info.compiled_global_addr; + } + p_api = (u64 *)p_api + 1; + *(u32 *)p_api = bmrt_data_type_size((bm_data_type_t)info.dtype) * + (info.n * info.c * info.h * info.w); + p_api = (u32 *)p_api + 1; + } + + // output global offset process + *(int *)p_api = output_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < output_info.size(); ++i) { + const auto &info = output_info.at(i); + api_info.output_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = info.user_global_addr; + p_api = (u64 *)p_api + 1; + if (core_idx > 0 && ((info.compiled_global_addr >> 36) & 0x7) == 0) { + /// If the bmodel use multi core, we only move the user's input data to + /// compiled ddr once. + *(u64 *)p_api = info.user_global_addr; + } else { + *(u64 *)p_api = info.compiled_global_addr; + } + p_api = (u64 *)p_api + 1; + *(u32 *)p_api = bmrt_data_type_size((bm_data_type_t)info.dtype) * + (info.n * info.c * info.h * info.w); + p_api = (u32 *)p_api + 1; + } + + // memcpy cmd offset and num + *(u64 *)p_api = net_info.core_commands[core_idx].bdc_cmd_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = net_info.core_commands[core_idx].gdma_cmd_addr; + p_api = (u64 *)p_api + 1; + *(int *)p_api = cmd_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < cmd_info.size(); i++) { + const tpu_cmd_info_t info = cmd_info.at(i); + *(int *)p_api = info.bdc_cmd_num; + p_api = (int *)p_api + 1; + *(int *)p_api = info.gdma_cmd_num; + p_api = (int *)p_api + 1; + *(u32 *)p_api = info.bdc_cmd_byte_size; + p_api = (u32 *)p_api + 1; + *(u32 *)p_api = info.gdma_cmd_byte_size; + p_api = (u32 *)p_api + 1; + } + + *((u64 *)p_api) = net_info.coeff_start_addr; + p_api = ((u64 *)p_api) + 1; + *((u64 *)p_api) = net_info.neuron_start_addr[0]; + p_api = ((u64 *)p_api) + 1; + *((int *)p_api) = base_message_id; + p_api = ((u32 *)p_api) + 1; + } + + BMRT_LOG_RUN(DEBUG, { + for (size_t i = 0; i < input_info.size(); ++i) { + const auto &info = input_info.at(i); + auto byte_size = bmrt_data_type_size((bm_data_type_t)info.dtype) * + (info.n * info.c * info.h * info.w); + BMRT_LOG(DEBUG, "in[%d] user_addr=0x%llx, cmd_addr=0x%llx, shape=[%d, %d, %d, %d], dtype=%s, byte_size=%d", + i, info.user_global_addr, info.compiled_global_addr, info.n, info.c, info.h, info.w, dtype_to_string((bm_data_type_t)info.dtype), byte_size); + } + for (size_t i = 0; i < output_info.size(); ++i) { + const auto &info = output_info.at(i); + auto byte_size = bmrt_data_type_size((bm_data_type_t)info.dtype) * + (info.n * info.c * info.h * info.w); + BMRT_LOG(DEBUG, "out[%d] user_addr=0x%llx, cmd_addr=0x%llx, shape=[%d, %d, %d, %d], dtype=%s, byte_size=%d", + i, info.user_global_addr, info.compiled_global_addr, info.n, info.c, info.h, info.w, dtype_to_string((bm_data_type_t)info.dtype), byte_size); + } + for (size_t core_idx = 0; core_idx < net_info.core_list.size(); core_idx++) { + BMRT_LOG(DEBUG, "core[%d], tiu_cmd_addr=0x%llx, gdma_cmd_addr=0x%llx", core_idx, + net_info.core_commands[core_idx].bdc_cmd_addr, net_info.core_commands[core_idx].gdma_cmd_addr); + } + BMRT_LOG(DEBUG, "coeff_addr=0x%llx, neuron_addr=0x%llx , base_message_id=%d", net_info.coeff_start_addr, net_info.neuron_start_addr[0], base_message_id); + }); + +} +bm_status_t +bmdnn_func_1688::_bmdnn_multi_fullnet_(bm_handle_t handle, + const tpu_net_info_t &net_info) { + BMRT_ASSERT_INFO(handle, "handle shouldn't be NULL\n"); + + api_info_t api_info; + size_t core_num = net_info.core_list.size(); + BMRT_ASSERT_INFO(core_num == net_info.kernel_func_ids.size(), "core_num=%d, kernel_func_ids.size()=%d", + core_num, net_info.kernel_func_ids.size()); + fill_api_info(net_info, api_info); + std::vector launch_params(net_info.core_list.size()); + for(size_t core_idx=0; core_idx & func_id_list, + const unsigned long long compiled_ir_global_addr, + const unsigned int compiled_ir_length, //unit dword + const unsigned int input_num, + const unsigned long long *input_addrs, + const int * const * input_shapes, + const int * input_elem_nums, + const int * input_dtype_and_dims, + const unsigned int output_num, + const unsigned long long *output_addrs, + const unsigned long long apd_ctx_start, + const std::vector apd_ctx_mem_borders, + const std::vector apd_ctx_mem_offset, + const unsigned long long apd_coeff_mem_offset, + const unsigned long long apd_io_start, + const unsigned long long apd_io_mem_offset, + bool get_output_shape, + const unsigned long long output_shape_global_addr, + const std::vector& core_list) { + + BMRT_ASSERT_INFO(core_list.size() == 1, "dynamic compile do not support tensor parallel\n"); + BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); + BMRT_ASSERT_INFO( + apd_ctx_mem_borders.size() == apd_ctx_mem_offset.size(), + "ctx borders and offset should have same size"); + BMRT_ASSERT_INFO( + core_list.size() == func_id_list.size(), + "core_num=%d, func_list_size=%d", + core_list.size(), func_id_list.size()); + + size_t ctx_num = apd_ctx_mem_borders.size(); + u32 api_buffer_size = sizeof(u64) +sizeof(u32) + // compiled_ir addr, length + // input num + sizeof(u32) + + // input_addr dtype_dims dim_shape elem_num + input_num * (sizeof(u64) + sizeof(int) + sizeof(int) * BM_MAX_DIMS_NUM + sizeof(int)) + + // output num + sizeof(u32) + + // output_addr + output_num * sizeof(u64) + + //get_output_shape, global_shape_mem_addr, apd_ctx_start, (ctx_num, apd_ctx_mem_borders, apd_ctx_mem_offset), + sizeof(u32) + sizeof(u64) + sizeof(u64) + ( sizeof(u32)+sizeof(u64)*ctx_num*2 ) + + //apd_coeff_mem_offset + sizeof(u64) + + // core_idx + core_num + group_msg_id + 3 * sizeof(u32) + + //apd_io_start + apd_io_mem_offset + sizeof(u64) + sizeof(u64); + + if (api_buffer_size > MAX_API_MSG_SIZE) { + //decrease the api buffer size + for (u32 i = 0; i < input_num; ++i) { + u32 cur_dim = (u32)(input_dtype_and_dims[i] & 0xFFFF); + api_buffer_size -= (BM_MAX_DIMS_NUM - cur_dim) * sizeof(int); + } + } + size_t group_msg_id = 0; + for (size_t i = 0; i < core_list.size(); i++) { + group_msg_id |= 1<> api_buffers(core_list.size()); + std::vector launch_params(core_list.size()); + + for (size_t core_idx = 0; core_idx < core_list.size(); core_idx++) { + api_buffers[core_idx].assign(api_buffer_size, 0); + void* p_api = api_buffers[core_idx].data(); + launch_params[core_idx].core_id = core_list[core_idx]; + launch_params[core_idx].func_id = func_id_list[core_idx]; + launch_params[core_idx].param_data = api_buffers[core_idx].data(); + launch_params[core_idx].param_size = api_buffers[core_idx].size(); + + //compiled ir information + *(u64*)p_api = compiled_ir_global_addr; + p_api = (u64*)p_api + 1; + *(u32*)p_api = compiled_ir_length; + p_api = (u32*)p_api + 1; + + //input information + *(u32*)p_api = input_num; + p_api = (u32*)p_api + 1; + + for(u32 i = 0; i < input_num; ++i){ + *(u64*)p_api = input_addrs[i]; + p_api = (u64*)p_api + 1; + + *(u32*)p_api = input_dtype_and_dims[i]; + p_api = (u32*)p_api + 1; + u32 cur_dim = (u32)(input_dtype_and_dims[i] & 0xFFFF); + for(u32 j = 0; j < cur_dim; j++){ + *(u32 *)p_api = (u32)input_shapes[i][j]; + p_api = (u32 *)p_api + 1; + } + *(u32*)p_api = input_elem_nums[i]; + p_api = (u32*)p_api + 1; + } + //output information + *(u32*)p_api = output_num; + p_api = (u32*)p_api + 1; + + for(u32 i = 0; i < output_num; ++i){ + *(u64*)p_api = output_addrs[i]; + p_api = (u64*)p_api + 1; + } + //output shape info related + *(u32*)p_api = (u32)get_output_shape; + p_api = (u32*)p_api + 1; + *(u64*)p_api = output_shape_global_addr; + p_api = (u64*)p_api + 1; + + //The memory address in cmd gdma need to be offset when append context,here is the offset value. + *(u64*)p_api = apd_ctx_start; + p_api = (u64*)p_api + 1; + + *(u32*)p_api = ctx_num; + p_api = (u32*)p_api + 1; + + for (size_t i = 0; i < ctx_num; ++i) { + *(u64*)p_api = apd_ctx_mem_borders[i]; + p_api = (u64*)p_api + 1; + } + + for (size_t i = 0; i < ctx_num; ++i) { + *(u64*)p_api = apd_ctx_mem_offset[i]; + p_api = (u64*)p_api + 1; + } + + *(u64*)p_api = apd_coeff_mem_offset; + p_api = (u64*)p_api + 1; + + *(u32*)p_api = core_idx; + p_api = (u32*)p_api + 1; + *(u32*)p_api = core_list.size(); + p_api = (u32*)p_api + 1; + *(u32*)p_api = group_msg_id; + p_api = (u32*)p_api + 1; + + *(u64*)p_api = apd_io_start; + p_api = (u64*)p_api + 1; + *(u64*)p_api = apd_io_mem_offset; + p_api = (u64*)p_api + 1; + + BMRT_LOG_RUN(DEBUG, { + for (size_t core_idx = 0; core_idx < core_list.size(); core_idx++) { + BMRT_LOG(DEBUG, "ir_addr=0x%llx, ir_length=%d[0x%x]", compiled_ir_global_addr, compiled_ir_length, compiled_ir_length); + for(u32 i = 0; i < input_num; ++i){ + auto dims = input_dtype_and_dims[i]&0xFFFF; + auto dtype = (input_dtype_and_dims[i]>>16)&0xFFFF; + std::string shape_str = std::to_string(input_shapes[i][0]); + for(u32 j = 1; j < dims; j++){ + shape_str += "," + std::to_string(input_shapes[i][j]); + } + BMRT_LOG(DEBUG, "in[%d] addr=0x%llx, shape=[%s], dtype=%s, elem_num=%d", + i, input_addrs[i], shape_str.c_str(), dtype_to_string((bm_data_type_t)dtype), input_elem_nums[i]); + } + //output information + for(u32 i = 0; i < output_num; ++i){ + BMRT_LOG(DEBUG, "out[%d] addr=0x%llx", i, output_addrs[i]); + } + //output shape info related + BMRT_LOG(DEBUG, "out_shape_addr=0x%llx", output_shape_global_addr); + BMRT_LOG(DEBUG, "ctx_start=0x%llx, coeff_mem_offset=0x%llx", apd_ctx_start, apd_coeff_mem_offset); + + *(u32*)p_api = ctx_num; + p_api = (u32*)p_api + 1; + + for (size_t i = 0; i < ctx_num; ++i) { + BMRT_LOG(DEBUG, "ctx[%d]: border=0x%llx, offset=0x%llx",i , apd_ctx_mem_borders[i], apd_ctx_mem_offset[i]); + } + BMRT_LOG(DEBUG, "core_index=%d, core_num=%d, base_msg_id=%d", core_idx, core_list.size(), group_msg_id); + } + }); + } + + bm_status_t status = tpu_kernel_launch_async_multicores(handle, launch_params.data(), launch_params.size()); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "tpu_kernel_launch_async_multicores failed, status:%d", status); + } + + return status; +} + +bm_status_t bmdnn_func_1688::_bmdnn_set_profile_enable_(bm_handle_t handle, int core, tpu_kernel_function_t func_id, unsigned int enable_bits){ + BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); + u32 api_buffer_size = sizeof(u32); + u32 profile_enable = enable_bits; + bm_status_t status = tpu_kernel_launch_async_from_core(handle, func_id, (u8*)&profile_enable, api_buffer_size, core); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "launch kernel failed: core_id:%d, func id:%d, status:%d", core, func_id, status); + } + return status; +} + +bm_status_t bmdnn_func_1688::_bmdnn_get_profile_data_( + bm_handle_t handle, + int core, + tpu_kernel_function_t func_id, + unsigned long long output_global_addr, + unsigned int output_max_size, + unsigned int byte_offset, + unsigned int data_category //0: profile time records, 1: extra data + ){ + BMRT_ASSERT_INFO(handle, "handle shouldn't be NULL\n"); +#pragma pack(1) + struct { + u64 arm_reserved_addr; + u64 output_global_addr; + u32 output_size; + u32 byte_offset; + u32 data_category; //0: profile_data, 1: profile extra data + } api_data; +#pragma pack() + + const u32 api_buffer_size = sizeof(api_data); + + api_data.arm_reserved_addr = -1; + api_data.output_global_addr = output_global_addr; + api_data.output_size = output_max_size; + api_data.byte_offset = byte_offset; + api_data.data_category = data_category; + + bm_status_t status = tpu_kernel_launch_async_from_core(handle, func_id, (u8*)&api_data, api_buffer_size, core); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "tpu_kernel_launch_async_from_core failed, cor_id:%d, api id:%d, status:%d", core, func_id, status); + } else { + status = bm_thread_sync_from_core(handle, core); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "bm_sync_api failed, core_id:%d, api id:%d, status:%d", core, func_id, status); + } + } + return status; +} + +#pragma pack(1) +typedef struct { + int engine; + unsigned long long addr; + unsigned long long size; +} bm_api_engine_profile_param_t; +#pragma pack() + +bm_status_t bmdnn_func_1688::_bmdnn_set_engine_profile_param_(bm_handle_t handle, int core, tpu_kernel_function_t func_id, int engine_type, unsigned long long addr, unsigned long long size){ + bm_api_engine_profile_param_t param; + param.engine = engine_type; + param.addr = addr; + param.size = size; + bm_status_t core_status = tpu_kernel_launch_async_from_core(handle, func_id, (u8*)¶m, sizeof(param), core); + return core_status; +} + +} diff --git a/tpu-runtime/src/bmfunc/bmdnn_func_1880.cpp b/tpu-runtime/src/bmfunc/bmdnn_func_1880.cpp index 60393da..17fabdc 100644 --- a/tpu-runtime/src/bmfunc/bmdnn_func_1880.cpp +++ b/tpu-runtime/src/bmfunc/bmdnn_func_1880.cpp @@ -1,108 +1,115 @@ #include "bmfunc/bmfunc.h" #include +#include namespace bmruntime { - -bm_status_t bmdnn_func_1880::_bmdnn_multi_fullnet_( - bm_handle_t handle, - int input_num, - u64* user_input_global_offset, - u64* cmd_input_global_offset, - int* input_n, - int* input_length, - unsigned short* input_data_type, //0: FP32, 1: FP16, 2: INT8, 3: UINT8, 4: INT16, 5: UINT16 - unsigned char* input_st_mode, //0: 1N, 1: 2N, 2: 4N - unsigned char* real_in_stmode, //0: 1N, 1: 2N, 2: 4N - int output_num, - u64* user_output_global_offset, - u64* cmd_output_global_offset, - int* output_n, - int* output_length, - unsigned short* output_data_type, //0: FP32, 1: FP16, 2: INT8, 3: UINT8, 4: INT16, 5: UINT16 - unsigned char* output_st_mode, //0: 1N, 1: 2N, 2: 4N - unsigned char* force_out_stmode, //0: 1N, 1: 2N, 2: 4N - u64 bdc_cmd_offset, - u64 gdma_cmd_offset, - int* bdc_cmd_num, - int* gdma_cmd_num, - int cmdgroup_num - ) -{ - BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); - u32 api_buffer_size = sizeof(int) + (input_num * (sizeof(u64) * 2 + sizeof(int) * 2 + sizeof(unsigned short) + sizeof(unsigned char) * 2)) + //api buffer size for input - sizeof(int) + (output_num * (sizeof(u64) * 2 + sizeof(int) * 2 + sizeof(unsigned short) + sizeof(unsigned char) * 2)) + //api buffer size for output - sizeof(u64) * 2 + sizeof(int) * 2 * cmdgroup_num + sizeof(int); - - u8* api_buffer = new u8 [api_buffer_size]; - - void* p_api = api_buffer; - //input global offset process - *(int*)p_api = input_num; - p_api = (int*)p_api + 1; - for (int i = 0; i < input_num; ++i) { - *(u64*)p_api = user_input_global_offset[i]; - p_api = (u64*)p_api + 1; - *(u64*)p_api = cmd_input_global_offset[i]; - p_api = (u64*)p_api + 1; - *(int*)p_api = input_n[i]; - p_api = (int*)p_api + 1; - *(int*)p_api = input_length[i]; - p_api = (int*)p_api + 1; - *(unsigned short*)p_api = input_data_type[i]; - p_api = (unsigned short*)p_api + 1; - *(unsigned char*)p_api = input_st_mode[i]; - p_api = (unsigned char*)p_api + 1; - BMRT_ASSERT_INFO(input_st_mode[i] != 1,"input_st_mode[%d] shouldn't be 2N\n",i); - *(unsigned char*)p_api = real_in_stmode[i]; - p_api = (unsigned char*)p_api + 1; - BMRT_ASSERT_INFO(real_in_stmode[i] != 1,"real_in_stmode[%d] shouldn't be 2N\n",i); - } - - //output global offset process - *(int*)p_api = output_num; - p_api = (int*)p_api + 1; - for (int i = 0; i < output_num; ++i) { - *(u64*)p_api = user_output_global_offset[i]; - p_api = (u64*)p_api + 1; - *(u64*)p_api = cmd_output_global_offset[i]; - p_api = (u64*)p_api + 1; - *(int*)p_api = output_n[i]; - p_api = (int*)p_api + 1; - *(int*)p_api = output_length[i]; - p_api = (int*)p_api + 1; - *(unsigned short*)p_api = output_data_type[i]; - p_api = (unsigned short*)p_api + 1; - *(unsigned char*)p_api = output_st_mode[i]; - p_api = (unsigned char*)p_api + 1; - BMRT_ASSERT_INFO(output_st_mode[i] != 1,"output_st_mode[%d] shouldn't be 2N\n",i); - *(unsigned char*)p_api = force_out_stmode[i]; - p_api = (unsigned char*)p_api + 1; - BMRT_ASSERT_INFO(force_out_stmode[i] != 1,"force_out_stmode[%d] shouldn't be 2N\n",i); - - } - - //memcpy cmd offset and num - *(u64*)p_api = bdc_cmd_offset; - p_api = (u64*)p_api + 1; - *(u64*)p_api = gdma_cmd_offset; - p_api = (u64*)p_api + 1; - *(int*)p_api = cmdgroup_num; - for (int i = 0; i < cmdgroup_num; i++) { - p_api = (int*)p_api + 1; - *(int*)p_api = bdc_cmd_num[i]; - p_api = (int*)p_api + 1; - *(int*)p_api = gdma_cmd_num[i]; - } - - bm_status_t status = BM_SUCCESS; - //status = (bm_status_t)bm_multi_fullnet(handle, api_buffer, api_buffer_size); - status = (bm_status_t)bm_send_api(handle, (bm_api_id_t)0xfff, api_buffer, api_buffer_size); - if (BM_SUCCESS != status) { - BMRT_LOG(WRONG, "bm_multi_fullnet run failed, status:%d", status); - } - - delete [] api_buffer; - return status; +void bmdnn_func_1880::fill_api_info(const tpu_net_info_t &net_info, + api_info_t &api_info) { + const std::vector &input_info = net_info.input_info; + const std::vector &output_info = net_info.output_info; + const std::vector &cmd_info = net_info.core_commands[0].cmd_info; + u32 api_buffer_size = + sizeof(int) + + (input_info.size() * + (sizeof(u64) * 2 + sizeof(int) * 2 + sizeof(unsigned short) + + sizeof(unsigned char) * 2)) + // api buffer size for input + sizeof(int) + + (output_info.size() * + (sizeof(u64) * 2 + sizeof(int) * 2 + sizeof(unsigned short) + + sizeof(unsigned char) * 2)) + // api buffer size for output + sizeof(u64) * 2 + + sizeof(int) * 2 * cmd_info.size() + sizeof(int); + + api_info.api_id.push_back(0xfff); + api_info.api_data.resize(1); + api_info.api_data[0].assign(api_buffer_size, 0); + api_info.input_addr_offset.assign(input_info.size(), 0); + api_info.output_addr_offset.assign(output_info.size(), 0); + + void *p_api = api_info.api_data[0].data(); + // input global offset process + *(int *)p_api = input_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < input_info.size(); ++i) { + const auto &info = input_info.at(i); + api_info.input_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = info.user_global_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = info.compiled_global_addr; + p_api = (u64 *)p_api + 1; + *(int *)p_api = info.n; + p_api = (int *)p_api + 1; + *(int *)p_api = info.c * info.h * info.w; + p_api = (int *)p_api + 1; + *(unsigned short *)p_api = info.dtype; + p_api = (unsigned short *)p_api + 1; + *(unsigned char *)p_api = info.compiled_stmode; + p_api = (unsigned char *)p_api + 1; + BMRT_ASSERT_INFO(info.compiled_stmode != 1, + "input_st_mode[%d] shouldn't be 2N\n", i); + *(unsigned char *)p_api = info.user_stmode; + p_api = (unsigned char *)p_api + 1; + BMRT_ASSERT_INFO(info.user_stmode != 1, + "real_in_stmode[%d] shouldn't be 2N\n", i); + } + + // output global offset process + *(int *)p_api = output_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < output_info.size(); ++i) { + const tpu_tensor_info_t &info = output_info.at(i); + api_info.output_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = info.user_global_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = info.compiled_global_addr; + p_api = (u64 *)p_api + 1; + *(int *)p_api = info.n; + p_api = (int *)p_api + 1; + *(int *)p_api = info.c * info.h * info.w; + p_api = (int *)p_api + 1; + *(unsigned short *)p_api = info.dtype; + p_api = (unsigned short *)p_api + 1; + *(unsigned char *)p_api = info.compiled_stmode; + p_api = (unsigned char *)p_api + 1; + BMRT_ASSERT_INFO(info.compiled_stmode != 1, + "output_st_mode[%d] shouldn't be 2N\n", i); + *(unsigned char *)p_api = info.user_stmode; + p_api = (unsigned char *)p_api + 1; + BMRT_ASSERT_INFO(info.user_stmode != 1, + "force_out_stmode[%d] shouldn't be 2N\n", i); + } + + // memcpy cmd offset and num + *(u64 *)p_api = net_info.core_commands[0].bdc_cmd_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = net_info.core_commands[0].gdma_cmd_addr; + p_api = (u64 *)p_api + 1; + *(int *)p_api = cmd_info.size(); + for (size_t i = 0; i < cmd_info.size(); i++) { + p_api = (int *)p_api + 1; + *(int *)p_api = cmd_info.at(i).bdc_cmd_num; + p_api = (int *)p_api + 1; + *(int *)p_api = cmd_info.at(i).gdma_cmd_num; + } +} +bm_status_t +bmdnn_func_1880::_bmdnn_multi_fullnet_(bm_handle_t handle, + const tpu_net_info_t &net_info) { + BMRT_ASSERT_INFO(handle, "handle shouldn't be NULL\n"); + + api_info_t api_info; + fill_api_info(net_info, api_info); + bm_status_t status = BM_SUCCESS; + status = (bm_status_t)bm_send_api(handle, (bm_api_id_t)api_info.api_id[0], + api_info.api_data[0].data(), + api_info.api_data[0].size()); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "bm_multi_fullnet run failed, status:%d", status); + } + + return status; } /* diff --git a/tpu-runtime/src/bmfunc/bmdnn_func_2260.cpp b/tpu-runtime/src/bmfunc/bmdnn_func_2260.cpp new file mode 100755 index 0000000..784a82f --- /dev/null +++ b/tpu-runtime/src/bmfunc/bmdnn_func_2260.cpp @@ -0,0 +1,400 @@ +#include "bmfunc/bmfunc.h" +#include + +namespace bmruntime { +extern "C" bm_status_t bm_send_api_to_core( + bm_handle_t handle, + int api_id, + const u8 *api, + u32 size, + int core_id); + +void bmdnn_func_2260::fill_api_info(const tpu_net_info_t &net_info, + api_info_t &api_info) { + BMRT_ASSERT_INFO(net_info.neuron_start_addr.size() == 1, + "only support one neuron addr"); + const std::vector &input_info = net_info.input_info; + const std::vector &output_info = net_info.output_info; + api_info.api_data.resize(net_info.core_commands.size()); + int base_message_id = 0; + for (auto core_id : net_info.core_list) { + base_message_id |= (1 << core_id); + } + + for (size_t core_idx = 0; core_idx < net_info.core_list.size(); core_idx++) { + const std::vector &cmd_info = + net_info.core_commands[core_idx].cmd_info; + + u32 api_buffer_size = + sizeof(int) + + (input_info.size() * (sizeof(u64) * 2 + sizeof(u32))) + // input + sizeof(int) + + (output_info.size() * (sizeof(u64) * 2 + sizeof(u32))) + // output + sizeof(u64) * 2 + + (sizeof(int) * 2 + sizeof(u32) * 2) * cmd_info.size() + sizeof(int) + + 2 * sizeof(u64) + sizeof(int) + // base message id + 2 * sizeof(u64); // hau_cmd_addr, sdma_cmd_addr + api_info.api_id.push_back(BM_API_ID_MULTI_FULLNET); + api_info.api_data[core_idx].assign(api_buffer_size, 0); + api_info.input_addr_offset.assign(input_info.size(), 0); + api_info.output_addr_offset.assign(output_info.size(), 0); + + void *p_api = api_info.api_data[core_idx].data(); + // input global offset process + *(int *)p_api = input_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < input_info.size(); ++i) { + const auto &info = input_info.at(i); + api_info.input_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = info.user_global_addr; + p_api = (u64 *)p_api + 1; + if (core_idx > 0) { + /// If the bmodel use multi core, we only move the user's input data to + /// compiled ddr once. + *(u64 *)p_api = info.user_global_addr; + } else { + *(u64 *)p_api = info.compiled_global_addr; + } + p_api = (u64 *)p_api + 1; + *(u32 *)p_api = bmrt_data_type_size((bm_data_type_t)info.dtype) * + (info.n * info.c * info.h * info.w); + p_api = (u32 *)p_api + 1; + } + + // output global offset process + *(int *)p_api = output_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < output_info.size(); ++i) { + const auto &info = output_info.at(i); + api_info.output_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = info.user_global_addr; + p_api = (u64 *)p_api + 1; + if (core_idx > 0) { + /// If the bmodel use multi core, we only move the user's input data to + /// compiled ddr once. + *(u64 *)p_api = info.user_global_addr; + } else { + *(u64 *)p_api = info.compiled_global_addr; + } + p_api = (u64 *)p_api + 1; + *(u32 *)p_api = bmrt_data_type_size((bm_data_type_t)info.dtype) * + (info.n * info.c * info.h * info.w); + p_api = (u32 *)p_api + 1; + } + + // memcpy cmd offset and num + *(u64 *)p_api = net_info.core_commands[core_idx].bdc_cmd_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = net_info.core_commands[core_idx].gdma_cmd_addr; + p_api = (u64 *)p_api + 1; + *(int *)p_api = cmd_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < cmd_info.size(); i++) { + const tpu_cmd_info_t info = cmd_info.at(i); + *(int *)p_api = info.bdc_cmd_num; + p_api = (int *)p_api + 1; + *(int *)p_api = info.gdma_cmd_num; + p_api = (int *)p_api + 1; + *(u32 *)p_api = info.bdc_cmd_byte_size; + p_api = (u32 *)p_api + 1; + *(u32 *)p_api = info.gdma_cmd_byte_size; + p_api = (u32 *)p_api + 1; + } + + *((u64 *)p_api) = net_info.coeff_start_addr; + p_api = ((u64 *)p_api) + 1; + *((u64 *)p_api) = net_info.neuron_start_addr[0]; + p_api = ((u64 *)p_api) + 1; + *((int *)p_api) = base_message_id; + p_api = ((u32 *)p_api) + 1; + + *((u64 *)p_api) = net_info.core_commands[core_idx].hau_cmd_addr; + p_api = ((u64 *)p_api) + 1; + *((u64 *)p_api) = net_info.core_commands[core_idx].sdma_cmd_addr; + p_api = ((u64 *)p_api) + 1; + } +} +bm_status_t +bmdnn_func_2260::_bmdnn_multi_fullnet_(bm_handle_t handle, + const tpu_net_info_t &net_info) { + BMRT_ASSERT_INFO(handle, "handle shouldn't be NULL\n"); + + api_info_t api_info; + fill_api_info(net_info, api_info); + bm_status_t status = BM_SUCCESS; + if (api_info.api_data[0].size() api_mem(net_info.core_list.size()); + #pragma pack(1) + typedef struct{ + u32 input_num = 0; + u64 cmd_addr; + u64 cmd_size; + }long_cmd_param_t; + #pragma pack() + for (size_t core_idx = 0; core_idx < net_info.core_list.size(); core_idx++) { + u32 malloc_size = api_info.api_data[core_idx].size(); + bm_status_t mem_status = bm_malloc_device_byte(handle, &api_mem[core_idx], malloc_size); + if (mem_status != BM_SUCCESS) { + status = (status == BM_SUCCESS) ? mem_status : status; + BMRT_LOG(WRONG, "bm_malloc_device_byte failed, malloc mem:%d", malloc_size); + } + long_cmd_param_t new_api; + auto data = api_info.api_data[core_idx].data(); + bm_status_t s2d_status = bm_memcpy_s2d(handle, api_mem[core_idx], (void*)data); + new_api.cmd_addr = api_mem[core_idx].u.device.device_addr; + printf("command_addr runtime: %lld\n",new_api.cmd_addr); + new_api.cmd_size = api_info.api_data[core_idx].size(); + if (BM_SUCCESS != s2d_status) { + status = (status == BM_SUCCESS) ? s2d_status : status; + BMRT_LOG(WRONG, "bm_memcpy_s2d failed, ret = %d\n", s2d_status); + } + bm_status_t core_status = bm_send_api_to_core( + handle, (bm_api_id_t)api_info.api_id[0], + (u8 *)(&new_api), + sizeof(new_api), + net_info.core_list.at(core_idx)); + if (BM_SUCCESS != core_status) { + status = (status == BM_SUCCESS) ? core_status : status; + BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", + BM_API_ID_MULTI_FULLNET, status); + } + } + for (size_t core_idx = 0; core_idx < net_info.core_list.size(); core_idx++) { + bm_status_t core_status = bm_thread_sync_from_core(handle, core_idx); + if (core_status != BM_SUCCESS) { + status = (status == BM_SUCCESS) ? core_status : status; + BMRT_LOG(WRONG, "bm_thread_sync_from_core failed, core_idx:%d", core_idx); + } + } + for (size_t core_idx = 0; core_idx < net_info.core_list.size(); core_idx++) { + bm_free_device(handle, api_mem[core_idx]); + } + } + return status; +} + +bm_status_t bmdnn_func_2260::_bmdnn_dynamic_fullnet_( + bm_handle_t handle, + unsigned long long compiled_ir_global_addr, + unsigned int compiled_ir_length, //unit dword + unsigned int input_num, + const unsigned long long *input_addrs, + const int * const * input_shapes, + const int * input_elem_nums, + const int * input_dtype_and_dims, + unsigned int output_num, + const unsigned long long *output_addrs, + unsigned long long apd_ctx_start, + std::vector apd_ctx_mem_borders, + std::vector apd_ctx_mem_offset, + unsigned long long apd_coeff_mem_offset, + unsigned long long apd_io_start, + unsigned long long apd_io_mem_offset, + bool get_output_shape, + unsigned long long output_shape_global_addr, + const std::vector &core_list) +{ + BMRT_ASSERT_INFO(core_list.size() == 1, "Dynamic compile do not support tensor parallel\n"); + BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); + BMRT_ASSERT_INFO( + apd_ctx_mem_borders.size() == apd_ctx_mem_offset.size(), + "ctx borders and offset should have same size"); + + size_t ctx_num = apd_ctx_mem_borders.size(); + u32 api_buffer_size = sizeof(u64) +sizeof(u32) + // compiled_ir addr, length + // input num + sizeof(u32) + + // input_addr dtype_dims dim_shape elem_num + input_num * (sizeof(u64) + sizeof(int) + sizeof(int) * BM_MAX_DIMS_NUM + sizeof(int)) + + // output num + sizeof(u32) + + // output_addr + output_num * sizeof(u64) + + //get_output_shape, global_shape_mem_addr, apd_ctx_start, (ctx_num, apd_ctx_mem_borders, apd_ctx_mem_offset), + sizeof(u32) + sizeof(u64) + sizeof(u64) + ( sizeof(u32)+sizeof(u64)*ctx_num*2 ) + + //apd_coeff_mem_offset, apd_io_start, apd_io_mem_offset + sizeof(u64) + sizeof(u64) + sizeof(u64); + + if (api_buffer_size > MAX_API_MSG_SIZE) { + //decrease the api buffer size + for (u32 i = 0; i < input_num; ++i) { + u32 cur_dim = (u32)(input_dtype_and_dims[i] & 0xFFFF); + api_buffer_size -= (BM_MAX_DIMS_NUM - cur_dim) * sizeof(int); + } + } + + u8* api_buffer = new u8 [api_buffer_size]; + + void* p_api = api_buffer; + //compiled ir information + *(u64*)p_api = compiled_ir_global_addr; + p_api = (u64*)p_api + 1; + *(u32*)p_api = compiled_ir_length; + p_api = (u32*)p_api + 1; + + //input information + *(u32*)p_api = input_num; + p_api = (u32*)p_api + 1; + + for(u32 i = 0; i < input_num; ++i){ + *(u64*)p_api = input_addrs[i]; + p_api = (u64*)p_api + 1; + + *(u32*)p_api = input_dtype_and_dims[i]; + p_api = (u32*)p_api + 1; + u32 cur_dim = (u32)(input_dtype_and_dims[i] & 0xFFFF); + for(u32 j = 0; j < cur_dim; j++){ + *(u32 *)p_api = (u32)input_shapes[i][j]; + p_api = (u32 *)p_api + 1; + } + *(u32*)p_api = input_elem_nums[i]; + p_api = (u32*)p_api + 1; + } + //output information + *(u32*)p_api = output_num; + p_api = (u32*)p_api + 1; + + for(u32 i = 0; i < output_num; ++i){ + *(u64*)p_api = output_addrs[i]; + p_api = (u64*)p_api + 1; + } + //output shape info related + *(u32*)p_api = (u32)get_output_shape; + p_api = (u32*)p_api + 1; + *(u64*)p_api = output_shape_global_addr; + p_api = (u64*)p_api + 1; + + //The memory address in cmd gdma need to be offset when append context,here is the offset value. + *(u64*)p_api = apd_ctx_start; + p_api = (u64*)p_api + 1; + + *(u32*)p_api = ctx_num; + p_api = (u32*)p_api + 1; + + for (size_t i = 0; i < ctx_num; ++i) + { + *(u64*)p_api = apd_ctx_mem_borders[i]; + p_api = (u64*)p_api + 1; + } + for (size_t i = 0; i < ctx_num; ++i) + { + *(u64*)p_api = apd_ctx_mem_offset[i]; + p_api = (u64*)p_api + 1; + } + + *(u64*)p_api = apd_coeff_mem_offset; + p_api = (u64*)p_api + 1; + + *(u64*)p_api = apd_io_start; + p_api = (u64*)p_api + 1; + *(u64*)p_api = apd_io_mem_offset; + p_api = (u64*)p_api + 1; + + bm_status_t status; + if (api_buffer_size +#include "bmlib_runtime.h" +#include "bmruntime.h" + +namespace bmruntime { +extern "C" bm_status_t bm_send_api_to_core( + bm_handle_t handle, + int api_id, + const u8 *api, + u32 size, + int core_id); + +void bmdnn_func_2380::fill_api_info(const tpu_net_info_t &net_info, + api_info_t &api_info) { + BMRT_ASSERT_INFO(net_info.neuron_start_addr.size() == 1, + "only support one neuron addr"); + const std::vector &input_info = net_info.input_info; + const std::vector &output_info = net_info.output_info; + api_info.api_data.resize(net_info.core_commands.size()); + int base_message_id = 0; + for (auto core_id : net_info.core_list) { + base_message_id |= (1 << core_id); + } + for (size_t core_idx = 0; core_idx < net_info.core_list.size(); core_idx++) { + const std::vector &cmd_info = + net_info.core_commands[core_idx].cmd_info; + + u32 api_buffer_size = + sizeof(int) + + (input_info.size() * (sizeof(u64) * 2 + sizeof(u32))) + // input + sizeof(int) + + (output_info.size() * (sizeof(u64) * 2 + sizeof(u32))) + // output + sizeof(u64) * 2 + + (sizeof(int) * 2 + sizeof(u32) * 2) * cmd_info.size() + sizeof(int) + + 2 * sizeof(u64) + sizeof(int); // base message id + api_info.api_id.push_back(BM_API_ID_MULTI_FULLNET); + api_info.api_data[core_idx].assign(api_buffer_size, 0); + api_info.input_addr_offset.assign(input_info.size(), 0); + api_info.output_addr_offset.assign(output_info.size(), 0); + + void *p_api = api_info.api_data[core_idx].data(); + // input global offset process + *(int *)p_api = input_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < input_info.size(); ++i) { + const auto &info = input_info.at(i); + api_info.input_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = info.user_global_addr; + p_api = (u64 *)p_api + 1; + if (core_idx > 0 && ((info.compiled_global_addr >> 40) & 0x1f) == 0) { + /// If the bmodel use multi core, we only move the user's input data to + /// compiled ddr once. + *(u64 *)p_api = info.user_global_addr; + } else { + *(u64 *)p_api = info.compiled_global_addr; + } + p_api = (u64 *)p_api + 1; + *(u32 *)p_api = bmrt_data_type_size((bm_data_type_t)info.dtype) * + (info.n * info.c * info.h * info.w); + p_api = (u32 *)p_api + 1; + } + + // output global offset process + *(int *)p_api = output_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < output_info.size(); ++i) { + const auto &info = output_info.at(i); + api_info.output_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = info.user_global_addr; + p_api = (u64 *)p_api + 1; + if (core_idx > 0 && ((info.compiled_global_addr >> 40) & 0x1f) == 0) { + /// If the bmodel use multi core, we only move the user's input data to + /// compiled ddr once. + *(u64 *)p_api = info.user_global_addr; + } else { + *(u64 *)p_api = info.compiled_global_addr; + } + p_api = (u64 *)p_api + 1; + *(u32 *)p_api = bmrt_data_type_size((bm_data_type_t)info.dtype) * + (info.n * info.c * info.h * info.w); + p_api = (u32 *)p_api + 1; + } + + // memcpy cmd offset and num + *(u64 *)p_api = net_info.core_commands[core_idx].bdc_cmd_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = net_info.core_commands[core_idx].gdma_cmd_addr; + p_api = (u64 *)p_api + 1; + *(int *)p_api = cmd_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < cmd_info.size(); i++) { + const tpu_cmd_info_t info = cmd_info.at(i); + *(int *)p_api = info.bdc_cmd_num; + p_api = (int *)p_api + 1; + *(int *)p_api = info.gdma_cmd_num; + p_api = (int *)p_api + 1; + *(u32 *)p_api = info.bdc_cmd_byte_size; + p_api = (u32 *)p_api + 1; + *(u32 *)p_api = info.gdma_cmd_byte_size; + p_api = (u32 *)p_api + 1; + } + + *((u64 *)p_api) = net_info.coeff_start_addr; + p_api = ((u64 *)p_api) + 1; + *((u64 *)p_api) = net_info.neuron_start_addr[0]; + p_api = ((u64 *)p_api) + 1; + *((int *)p_api) = base_message_id; + p_api = ((u32 *)p_api) + 1; + } + + BMRT_LOG_RUN(DEBUG, { + for (size_t i = 0; i < input_info.size(); ++i) { + const auto &info = input_info.at(i); + auto byte_size = bmrt_data_type_size((bm_data_type_t)info.dtype) * + (info.n * info.c * info.h * info.w); + BMRT_LOG(DEBUG, "in[%d] user_addr=0x%llx, cmd_addr=0x%llx, shape=[%d, %d, %d, %d], dtype=%s, byte_size=%d", + i, info.user_global_addr, info.compiled_global_addr, info.n, info.c, info.h, info.w, dtype_to_string((bm_data_type_t)info.dtype), byte_size); + } + for (size_t i = 0; i < output_info.size(); ++i) { + const auto &info = output_info.at(i); + auto byte_size = bmrt_data_type_size((bm_data_type_t)info.dtype) * + (info.n * info.c * info.h * info.w); + BMRT_LOG(DEBUG, "out[%d] user_addr=0x%llx, cmd_addr=0x%llx, shape=[%d, %d, %d, %d], dtype=%s, byte_size=%d", + i, info.user_global_addr, info.compiled_global_addr, info.n, info.c, info.h, info.w, dtype_to_string((bm_data_type_t)info.dtype), byte_size); + } + for (size_t core_idx = 0; core_idx < net_info.core_list.size(); core_idx++) { + BMRT_LOG(DEBUG, "core[%d], tiu_cmd_addr=0x%llx, gdma_cmd_addr=0x%llx", core_idx, + net_info.core_commands[core_idx].bdc_cmd_addr, net_info.core_commands[core_idx].gdma_cmd_addr); + } + BMRT_LOG(DEBUG, "coeff_addr=0x%llx, neuron_addr=0x%llx , base_message_id=%d", net_info.coeff_start_addr, net_info.neuron_start_addr[0], base_message_id); + }); + +} + +bm_status_t bmdnn_func_2380::_bmdnn_multi_fullnet_( + bm_handle_t handle, const tpu_net_info_t &net_info) { + BMRT_ASSERT_INFO(handle, "handle shouldn't be NULL\n"); + + api_info_t api_info; + fill_api_info(net_info, api_info); + bm_status_t status = BM_SUCCESS; + for (size_t core_idx = 0; core_idx < net_info.core_list.size(); core_idx++) { + bm_status_t core_status = bm_send_api_to_core( + handle, (bm_api_id_t)api_info.api_id[0], + api_info.api_data[core_idx].data(), + api_info.api_data[core_idx].size(), + net_info.core_list.at(core_idx)); + if (BM_SUCCESS != core_status) { + status = (status == BM_SUCCESS) ? core_status : status; + BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", + BM_API_ID_MULTI_FULLNET, core_status); + } + } + return status; +} + +bm_status_t bmdnn_func_2380::_bmdnn_dynamic_fullnet_( + bm_handle_t handle, + const std::vector & func_id_list, + const unsigned long long compiled_ir_global_addr, + const unsigned int compiled_ir_length, //unit dword + const unsigned int input_num, + const unsigned long long *input_addrs, + const int * const * input_shapes, + const int * input_elem_nums, + const int * input_dtype_and_dims, + const unsigned int output_num, + const unsigned long long *output_addrs, + const unsigned long long apd_ctx_start, + const std::vector apd_ctx_mem_borders, + const std::vector apd_ctx_mem_offset, + const unsigned long long apd_coeff_mem_offset, + const unsigned long long apd_io_mem_offset, + bool get_output_shape, + const unsigned long long output_shape_global_addr, + const std::vector& core_list) { + + BMRT_ASSERT_INFO(core_list.size() == 1, "dynamic compile do not support tensor parallel\n"); + BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); + BMRT_ASSERT_INFO( + apd_ctx_mem_borders.size() == apd_ctx_mem_offset.size(), + "ctx borders and offset should have same size"); + BMRT_ASSERT_INFO( + core_list.size() == func_id_list.size(), + "core_num=%d, func_list_size=%d", + core_list.size(), func_id_list.size()); + + size_t ctx_num = apd_ctx_mem_borders.size(); + u32 api_buffer_size = sizeof(u64) +sizeof(u32) + // compiled_ir addr, length + // input num + sizeof(u32) + + // input_addr dtype_dims dim_shape elem_num + input_num * (sizeof(u64) + sizeof(int) + sizeof(int) * BM_MAX_DIMS_NUM + sizeof(int)) + + // output num + sizeof(u32) + + // output_addr + output_num * sizeof(u64) + + //get_output_shape, global_shape_mem_addr, apd_ctx_start, (ctx_num, apd_ctx_mem_borders, apd_ctx_mem_offset), + sizeof(u32) + sizeof(u64) + sizeof(u64) + ( sizeof(u32)+sizeof(u64)*ctx_num*2 ) + + //apd_coeff_mem_offset + sizeof(u64) + + // core_idx + core_num + group_msg_id + 3 * sizeof(u32) + + //apd_io_mem_offset + sizeof(u64); + + if (api_buffer_size > MAX_API_MSG_SIZE) { + //decrease the api buffer size + for (u32 i = 0; i < input_num; ++i) { + u32 cur_dim = (u32)(input_dtype_and_dims[i] & 0xFFFF); + api_buffer_size -= (BM_MAX_DIMS_NUM - cur_dim) * sizeof(int); + } + } + size_t group_msg_id = 0; + for (size_t i = 0; i < core_list.size(); i++) { + group_msg_id |= 1<> api_buffers(core_list.size()); + std::vector launch_params(core_list.size()); + + for (size_t core_idx = 0; core_idx < core_list.size(); core_idx++) { + api_buffers[core_idx].assign(api_buffer_size, 0); + void* p_api = api_buffers[core_idx].data(); + launch_params[core_idx].core_id = core_list[core_idx]; + launch_params[core_idx].func_id = func_id_list[core_idx]; + launch_params[core_idx].param_data = api_buffers[core_idx].data(); + launch_params[core_idx].param_size = api_buffers[core_idx].size(); + + //compiled ir information + *(u64*)p_api = compiled_ir_global_addr; + p_api = (u64*)p_api + 1; + *(u32*)p_api = compiled_ir_length; + p_api = (u32*)p_api + 1; + + //input information + *(u32*)p_api = input_num; + p_api = (u32*)p_api + 1; + + for(u32 i = 0; i < input_num; ++i){ + *(u64*)p_api = input_addrs[i]; + p_api = (u64*)p_api + 1; + + *(u32*)p_api = input_dtype_and_dims[i]; + p_api = (u32*)p_api + 1; + u32 cur_dim = (u32)(input_dtype_and_dims[i] & 0xFFFF); + for(u32 j = 0; j < cur_dim; j++){ + *(u32 *)p_api = (u32)input_shapes[i][j]; + p_api = (u32 *)p_api + 1; + } + *(u32*)p_api = input_elem_nums[i]; + p_api = (u32*)p_api + 1; + } + //output information + *(u32*)p_api = output_num; + p_api = (u32*)p_api + 1; + + for(u32 i = 0; i < output_num; ++i){ + *(u64*)p_api = output_addrs[i]; + p_api = (u64*)p_api + 1; + } + //output shape info related + *(u32*)p_api = (u32)get_output_shape; + p_api = (u32*)p_api + 1; + *(u64*)p_api = output_shape_global_addr; + p_api = (u64*)p_api + 1; + + //The memory address in cmd gdma need to be offset when append context,here is the offset value. + *(u64*)p_api = apd_ctx_start; + p_api = (u64*)p_api + 1; + + *(u32*)p_api = ctx_num; + p_api = (u32*)p_api + 1; + + for (size_t i = 0; i < ctx_num; ++i) { + *(u64*)p_api = apd_ctx_mem_borders[i]; + p_api = (u64*)p_api + 1; + } + + for (size_t i = 0; i < ctx_num; ++i) { + *(u64*)p_api = apd_ctx_mem_offset[i]; + p_api = (u64*)p_api + 1; + } + + *(u64*)p_api = apd_coeff_mem_offset; + p_api = (u64*)p_api + 1; + + *(u32*)p_api = core_idx; + p_api = (u32*)p_api + 1; + *(u32*)p_api = core_list.size(); + p_api = (u32*)p_api + 1; + *(u32*)p_api = group_msg_id; + p_api = (u32*)p_api + 1; + + *(u64*)p_api = apd_io_mem_offset; + p_api = (u64*)p_api + 1; + + BMRT_LOG_RUN(DEBUG, { + for (size_t core_idx = 0; core_idx < core_list.size(); core_idx++) { + BMRT_LOG(DEBUG, "ir_addr=0x%llx, ir_length=%d[0x%x]", compiled_ir_global_addr, compiled_ir_length, compiled_ir_length); + for(u32 i = 0; i < input_num; ++i){ + auto dims = input_dtype_and_dims[i]&0xFFFF; + auto dtype = (input_dtype_and_dims[i]>>16)&0xFFFF; + std::string shape_str = std::to_string(input_shapes[i][0]); + for(u32 j = 1; j < dims; j++){ + shape_str += "," + std::to_string(input_shapes[i][j]); + } + BMRT_LOG(DEBUG, "in[%d] addr=0x%llx, shape=[%s], dtype=%s, elem_num=%d", + i, input_addrs[i], shape_str.c_str(), dtype_to_string((bm_data_type_t)dtype), input_elem_nums[i]); + } + //output information + for(u32 i = 0; i < output_num; ++i){ + BMRT_LOG(DEBUG, "out[%d] addr=0x%llx", i, output_addrs[i]); + } + //output shape info related + BMRT_LOG(DEBUG, "out_shape_addr=0x%llx", output_shape_global_addr); + BMRT_LOG(DEBUG, "ctx_start=0x%llx, coeff_mem_offset=0x%llx", apd_ctx_start, apd_coeff_mem_offset); + + *(u32*)p_api = ctx_num; + p_api = (u32*)p_api + 1; + + for (size_t i = 0; i < ctx_num; ++i) { + BMRT_LOG(DEBUG, "ctx[%d]: border=0x%llx, offset=0x%llx",i , apd_ctx_mem_borders[i], apd_ctx_mem_offset[i]); + } + BMRT_LOG(DEBUG, "core_index=%d, core_num=%d, base_msg_id=%d", core_idx, core_list.size(), group_msg_id); + } + }); + } + + bm_status_t status = tpu_kernel_launch_async_multicores(handle, launch_params.data(), launch_params.size()); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "tpu_kernel_launch_async_multicores failed, status:%d", status); + } + + return status; +} + +bm_status_t bmdnn_func_2380::_bmdnn_set_profile_enable_(bm_handle_t handle, int core, tpu_kernel_function_t func_id, unsigned int enable_bits){ + BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); + u32 api_buffer_size = sizeof(u32); + u32 profile_enable = enable_bits; + bm_status_t status = tpu_kernel_launch_async_from_core(handle, func_id, (u8*)&profile_enable, api_buffer_size, core); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "launch kernel failed: core_id:%d, func id:%d, status:%d", core, func_id, status); + } + return status; +} + +bm_status_t bmdnn_func_2380::_bmdnn_get_profile_data_( + bm_handle_t handle, + int core, + tpu_kernel_function_t func_id, + unsigned long long output_global_addr, + unsigned int output_max_size, + unsigned int byte_offset, + unsigned int data_category //0: profile time records, 1: extra data + ){ + BMRT_ASSERT_INFO(handle, "handle shouldn't be NULL\n"); +#pragma pack(1) + struct { + u64 arm_reserved_addr; + u64 output_global_addr; + u32 output_size; + u32 byte_offset; + u32 data_category; //0: profile_data, 1: profile extra data + } api_data; +#pragma pack() + + const u32 api_buffer_size = sizeof(api_data); + + api_data.arm_reserved_addr = -1; + api_data.output_global_addr = output_global_addr; + api_data.output_size = output_max_size; + api_data.byte_offset = byte_offset; + api_data.data_category = data_category; + + bm_status_t status = tpu_kernel_launch_async_from_core(handle, func_id, (u8*)&api_data, api_buffer_size, core); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "tpu_kernel_launch_async_from_core failed, cor_id:%d, api id:%d, status:%d", core, func_id, status); + } else { + status = bm_thread_sync_from_core(handle, core); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "bm_sync_api failed, core_id:%d, api id:%d, status:%d", core, func_id, status); + } + } + return status; +} + +#pragma pack(1) +typedef struct { + int engine; + unsigned long long addr; + unsigned long long size; +} bm_api_engine_profile_param_t; +#pragma pack() + +bm_status_t bmdnn_func_2380::_bmdnn_set_engine_profile_param_(bm_handle_t handle, int core, tpu_kernel_function_t func_id, int engine_type, unsigned long long addr, unsigned long long size){ + bm_api_engine_profile_param_t param; + param.engine = engine_type; + param.addr = addr; + param.size = size; + bm_status_t core_status = tpu_kernel_launch_async_from_core(handle, func_id, (u8*)¶m, sizeof(param), core); + return core_status; +} + +} diff --git a/tpu-runtime/src/bmfunc/bmdnn_func_mars3.cpp b/tpu-runtime/src/bmfunc/bmdnn_func_mars3.cpp new file mode 100755 index 0000000..4405b67 --- /dev/null +++ b/tpu-runtime/src/bmfunc/bmdnn_func_mars3.cpp @@ -0,0 +1,319 @@ +#include "bmfunc/bmfunc.h" +#include + +namespace bmruntime { +extern "C" bm_status_t bm_send_api_to_core( + bm_handle_t handle, + int api_id, + const u8 *api, + u32 size, + int core_id); + +void bmdnn_func_mars3::fill_api_info(const tpu_net_info_t &net_info, + api_info_t &api_info) { + BMRT_ASSERT_INFO(net_info.neuron_start_addr.size() == 1, + "only support one neuron addr"); + const std::vector &input_info = net_info.input_info; + const std::vector &output_info = net_info.output_info; + api_info.api_data.resize(net_info.core_commands.size()); + int base_message_id = 0; + for (auto core_id : net_info.core_list) { + base_message_id |= (1 << core_id); + } + for (size_t core_idx = 0; core_idx < net_info.core_list.size(); core_idx++) { + const std::vector &cmd_info = + net_info.core_commands[core_idx].cmd_info; + + u32 api_buffer_size = + sizeof(int) + + (input_info.size() * (sizeof(u64) * 2 + sizeof(u32))) + // input + sizeof(int) + + (output_info.size() * (sizeof(u64) * 2 + sizeof(u32))) + // output + sizeof(u64) * 2 + + (sizeof(int) * 2 + sizeof(u32) * 2) * cmd_info.size() + sizeof(int) + + 2 * sizeof(u64) + sizeof(int); // base message id + api_info.api_id.push_back(BM_API_ID_MULTI_FULLNET); + api_info.api_data[core_idx].assign(api_buffer_size, 0); + api_info.input_addr_offset.assign(input_info.size(), 0); + api_info.output_addr_offset.assign(output_info.size(), 0); + + void *p_api = api_info.api_data[core_idx].data(); + // input global offset process + *(int *)p_api = input_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < input_info.size(); ++i) { + const auto &info = input_info.at(i); + api_info.input_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = info.user_global_addr; + p_api = (u64 *)p_api + 1; + if (core_idx > 0) { + /// If the bmodel use multi core, we only move the user's input data to + /// compiled ddr once. + *(u64 *)p_api = info.user_global_addr; + } else { + *(u64 *)p_api = info.compiled_global_addr; + } + p_api = (u64 *)p_api + 1; + *(u32 *)p_api = bmrt_data_type_size((bm_data_type_t)info.dtype) * + (info.n * info.c * info.h * info.w); + p_api = (u32 *)p_api + 1; + } + + // output global offset process + *(int *)p_api = output_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < output_info.size(); ++i) { + const auto &info = output_info.at(i); + api_info.output_addr_offset.at(i) = + (uint8_t *)p_api - (uint8_t *)(api_info.api_data.data()); + *(u64 *)p_api = info.user_global_addr; + p_api = (u64 *)p_api + 1; + if (core_idx > 0) { + /// If the bmodel use multi core, we only move the user's input data to + /// compiled ddr once. + *(u64 *)p_api = info.user_global_addr; + } else { + *(u64 *)p_api = info.compiled_global_addr; + } + p_api = (u64 *)p_api + 1; + *(u32 *)p_api = bmrt_data_type_size((bm_data_type_t)info.dtype) * + (info.n * info.c * info.h * info.w); + p_api = (u32 *)p_api + 1; + } + + // memcpy cmd offset and num + *(u64 *)p_api = net_info.core_commands[core_idx].bdc_cmd_addr; + p_api = (u64 *)p_api + 1; + *(u64 *)p_api = net_info.core_commands[core_idx].gdma_cmd_addr; + p_api = (u64 *)p_api + 1; + *(int *)p_api = cmd_info.size(); + p_api = (int *)p_api + 1; + for (size_t i = 0; i < cmd_info.size(); i++) { + const tpu_cmd_info_t info = cmd_info.at(i); + *(int *)p_api = info.bdc_cmd_num; + p_api = (int *)p_api + 1; + *(int *)p_api = info.gdma_cmd_num; + p_api = (int *)p_api + 1; + *(u32 *)p_api = info.bdc_cmd_byte_size; + p_api = (u32 *)p_api + 1; + *(u32 *)p_api = info.gdma_cmd_byte_size; + p_api = (u32 *)p_api + 1; + } + + *((u64 *)p_api) = net_info.coeff_start_addr; + p_api = ((u64 *)p_api) + 1; + *((u64 *)p_api) = net_info.neuron_start_addr[0]; + p_api = ((u64 *)p_api) + 1; + *((int *)p_api) = base_message_id; + p_api = ((u32 *)p_api) + 1; + } +} +bm_status_t +bmdnn_func_mars3::_bmdnn_multi_fullnet_(bm_handle_t handle, + const tpu_net_info_t &net_info) { + BMRT_ASSERT_INFO(handle, "handle shouldn't be NULL\n"); + + api_info_t api_info; + fill_api_info(net_info, api_info); + bm_status_t status = BM_SUCCESS; + for (size_t core_idx = 0; core_idx < net_info.core_list.size(); core_idx++) { + bm_status_t core_status = bm_send_api_to_core( + handle, (bm_api_id_t)api_info.api_id[0], + api_info.api_data[core_idx].data(), + api_info.api_data[core_idx].size(), + net_info.core_list.at(core_idx)); + if (BM_SUCCESS != core_status) { + status = (status == BM_SUCCESS) ? core_status : status; + BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", + BM_API_ID_MULTI_FULLNET, core_status); + } + } + return status; +} + +bm_status_t bmdnn_func_mars3::_bmdnn_dynamic_fullnet_( + bm_handle_t handle, + unsigned long long compiled_ir_global_addr, + unsigned int compiled_ir_length, //unit dword + unsigned int input_num, + const unsigned long long *input_addrs, + const int * const * input_shapes, + const int * input_elem_nums, + const int * input_dtype_and_dims, + unsigned int output_num, + const unsigned long long *output_addrs, + unsigned long long apd_ctx_start, + std::vector apd_ctx_mem_borders, + std::vector apd_ctx_mem_offset, + unsigned long long apd_coeff_mem_offset, + unsigned long long apd_io_start, + unsigned long long apd_io_mem_offset, + bool get_output_shape, + unsigned long long output_shape_global_addr, + const std::vector &core_list) +{ + BMRT_ASSERT_INFO(core_list.size() == 1, "Dynamic compile do not support tensor parallel\n"); + BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); + BMRT_ASSERT_INFO( + apd_ctx_mem_borders.size() == apd_ctx_mem_offset.size(), + "ctx borders and offset should have same size"); + + size_t ctx_num = apd_ctx_mem_borders.size(); + u32 api_buffer_size = sizeof(u64) +sizeof(u32) + // compiled_ir addr, length + // input num + sizeof(u32) + + // input_addr dtype_dims dim_shape elem_num + input_num * (sizeof(u64) + sizeof(int) + sizeof(int) * BM_MAX_DIMS_NUM + sizeof(int)) + + // output num + sizeof(u32) + + // output_addr + output_num * sizeof(u64) + + //get_output_shape, global_shape_mem_addr, apd_ctx_start, (ctx_num, apd_ctx_mem_borders, apd_ctx_mem_offset), + sizeof(u32) + sizeof(u64) + sizeof(u64) + ( sizeof(u32)+sizeof(u64)*ctx_num*2 ) + + //apd_coeff_mem_offset, apd_io_start, apd_io_mem_offset + sizeof(u64) + sizeof(u64) + sizeof(u64); + + if (api_buffer_size > MAX_API_MSG_SIZE) { + //decrease the api buffer size + for (u32 i = 0; i < input_num; ++i) { + u32 cur_dim = (u32)(input_dtype_and_dims[i] & 0xFFFF); + api_buffer_size -= (BM_MAX_DIMS_NUM - cur_dim) * sizeof(int); + } + } + + u8* api_buffer = new u8 [api_buffer_size]; + + void* p_api = api_buffer; + //compiled ir information + *(u64*)p_api = compiled_ir_global_addr; + p_api = (u64*)p_api + 1; + *(u32*)p_api = compiled_ir_length; + p_api = (u32*)p_api + 1; + + //input information + *(u32*)p_api = input_num; + p_api = (u32*)p_api + 1; + + for(u32 i = 0; i < input_num; ++i){ + *(u64*)p_api = input_addrs[i]; + p_api = (u64*)p_api + 1; + + *(u32*)p_api = input_dtype_and_dims[i]; + p_api = (u32*)p_api + 1; + u32 cur_dim = (u32)(input_dtype_and_dims[i] & 0xFFFF); + for(u32 j = 0; j < cur_dim; j++){ + *(u32 *)p_api = (u32)input_shapes[i][j]; + p_api = (u32 *)p_api + 1; + } + *(u32*)p_api = input_elem_nums[i]; + p_api = (u32*)p_api + 1; + } + //output information + *(u32*)p_api = output_num; + p_api = (u32*)p_api + 1; + + for(u32 i = 0; i < output_num; ++i){ + *(u64*)p_api = output_addrs[i]; + p_api = (u64*)p_api + 1; + } + //output shape info related + *(u32*)p_api = (u32)get_output_shape; + p_api = (u32*)p_api + 1; + *(u64*)p_api = output_shape_global_addr; + p_api = (u64*)p_api + 1; + + //The memory address in cmd gdma need to be offset when append context,here is the offset value. + *(u64*)p_api = apd_ctx_start; + p_api = (u64*)p_api + 1; + + *(u32*)p_api = ctx_num; + p_api = (u32*)p_api + 1; + + for (size_t i = 0; i < ctx_num; ++i) + { + *(u64*)p_api = apd_ctx_mem_borders[i]; + p_api = (u64*)p_api + 1; + } + for (size_t i = 0; i < ctx_num; ++i) + { + *(u64*)p_api = apd_ctx_mem_offset[i]; + p_api = (u64*)p_api + 1; + } + + *(u64*)p_api = apd_coeff_mem_offset; + p_api = (u64*)p_api + 1; + + *(u64*)p_api = apd_io_start; + p_api = (u64*)p_api + 1; + *(u64*)p_api = apd_io_mem_offset; + p_api = (u64*)p_api + 1; + + bm_status_t status = + bm_send_api(handle, (bm_api_id_t)BM_API_ID_DYNAMIC_FULLNET, api_buffer, api_buffer_size); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", BM_API_ID_DYNAMIC_FULLNET, status); + } else { + status = bm_sync_api(handle); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "bm_sync_api failed, api id:%d, status:%d", BM_API_ID_DYNAMIC_FULLNET, status); + } + } + + bm_gmem_arm_reserved_release(handle); + + delete[] api_buffer; + return status; +} + +bm_status_t bmdnn_func_mars3::_bmdnn_set_profile_enable_(bm_handle_t handle, unsigned int enable){ + BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); + u32 api_buffer_size = sizeof(u32); + u32 profile_enable = enable; + bm_status_t status = bm_send_api(handle, (bm_api_id_t)BM_API_ID_SET_PROFILE_ENABLE, (u8*)&profile_enable, api_buffer_size); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", BM_API_ID_SET_PROFILE_ENABLE, status); + } + return status; +} +bm_status_t bmdnn_func_mars3::_bmdnn_get_profile_data_( + bm_handle_t handle, + unsigned long long output_global_addr, + unsigned int output_max_size, + unsigned int byte_offset, + unsigned int data_category //0: profile time records, 1: extra data + ){ + BMRT_ASSERT_INFO(handle,"handle shouldn't be NULL\n"); +#pragma pack(1) + struct { + u64 arm_reserved_addr; + u64 output_global_addr; + u32 output_size; + u32 byte_offset; + u32 data_category; //0: profile_data, 1: profile extra data + } api_data; +#pragma pack() + + const u32 api_buffer_size = sizeof(api_data); + + api_data.arm_reserved_addr = -1; + api_data.output_global_addr = output_global_addr; + api_data.output_size = output_max_size; + api_data.byte_offset = byte_offset; + api_data.data_category = data_category; + + bm_api_id_t api_code = (bm_api_id_t)BM_API_ID_GET_PROFILE_DATA; + bm_status_t status = + bm_send_api(handle, api_code, (u8*)&api_data, api_buffer_size); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "bm_send_api failed, api id:%d, status:%d", api_code, status); + } else { + status = bm_sync_api(handle); + if (BM_SUCCESS != status) { + BMRT_LOG(WRONG, "bm_sync_api failed, api id:%d, status:%d", api_code, status); + } + } + return status; +} + +} diff --git a/tpu-runtime/src/bmfunc/bmfunc.cpp b/tpu-runtime/src/bmfunc/bmfunc.cpp index 6296829..80a960f 100644 --- a/tpu-runtime/src/bmfunc/bmfunc.cpp +++ b/tpu-runtime/src/bmfunc/bmfunc.cpp @@ -2,64 +2,41 @@ namespace bmruntime { -bmfunc* bmfunc::sta_bmfunc_ptr; +bmfunc *bmfunc::sta_bmfunc_ptr; -bmfunc::bmfunc(const string& arch_name) -{ +bmfunc::bmfunc(const string &arch_name) { sta_bmfunc_ptr = this; - bmdnn_1684_fn = NULL; - bmdnn_1682_fn = NULL; - bmdnn_1880_fn = NULL; - bmdnn_1684x_fn = NULL; - bmdnn_1686_fn = NULL; bmdnn_fn = NULL; p_bmtpu_arch = new bmrt_arch_info(arch_name); bmtpu_arch_t arch = bmrt_arch_info::get_bmtpu_arch(); - if(arch == BM1684) { - bmdnn_1684_fn = new bmdnn_func_1684(); - bmdnn_fn = bmdnn_1684_fn; - } else if(arch == BM1880) { - bmdnn_1880_fn = new bmdnn_func_1880(); - bmdnn_fn = bmdnn_1880_fn; - } else if(arch == BM1682) { - bmdnn_1682_fn = new bmdnn_func_1682(); - bmdnn_fn = bmdnn_1682_fn; - } else if(arch == BM1684X) { - bmdnn_1684x_fn = new bmdnn_func_1684x(); - bmdnn_fn = bmdnn_1684x_fn; - } else if(arch == BM1686) { - bmdnn_1686_fn = new bmdnn_func_1686(); - bmdnn_fn = bmdnn_1686_fn; + if (arch == BM1684) { + bmdnn_fn = new bmdnn_func_1684(); + } else if (arch == BM1880) { + bmdnn_fn = new bmdnn_func_1880(); + } else if (arch == BM1682) { + bmdnn_fn = new bmdnn_func_1682(); + } else if (arch == BM1684X) { + bmdnn_fn = new bmdnn_func_1684x(); + } else if (arch == BM1688) { + bmdnn_fn = new bmdnn_func_1688(); + } else if (arch == BM1690) { + bmdnn_fn = new bmdnn_func_2260(); + } else if (arch == SG2380) { + bmdnn_fn = new bmdnn_func_2380(); + } else if (arch == MARS3) { + bmdnn_fn = new bmdnn_func_mars3(); } else { - BMRT_LOG(FATAL, "Error: unkown architecture [%d]", arch); + BMRT_LOG(FATAL, "Error: unkown architecture [%d]", arch); } } -bmfunc::~bmfunc() -{ - if(bmdnn_1684_fn != NULL) { - delete bmdnn_1684_fn; - } - - if(bmdnn_1880_fn != NULL) { - delete bmdnn_1880_fn; - } - - if(bmdnn_1682_fn != NULL) { - delete bmdnn_1682_fn; - } - - if(bmdnn_1684x_fn != NULL) { - delete bmdnn_1684x_fn; - } - - if(bmdnn_1686_fn != NULL) { - delete bmdnn_1686_fn; - } +bmfunc::~bmfunc() { + if (bmdnn_fn) + delete bmdnn_fn; delete p_bmtpu_arch; } -} +} // namespace bmruntime diff --git a/tpu-runtime/src/bmrt_arch_info.cpp b/tpu-runtime/src/bmrt_arch_info.cpp index a5873d7..7e4f66f 100644 --- a/tpu-runtime/src/bmrt_arch_info.cpp +++ b/tpu-runtime/src/bmrt_arch_info.cpp @@ -27,8 +27,14 @@ bmrt_arch_info::bmrt_arch_info(const string& arch_name) target_bmtpu_arch = BM1880; } else if (arch_name == "BM1684X") { target_bmtpu_arch = BM1684X; - } else if (arch_name == "BM1686") { - target_bmtpu_arch = BM1686; + } else if (arch_name == "BM1688") { + target_bmtpu_arch = BM1688; + } else if (arch_name == "BM1690") { + target_bmtpu_arch = BM1690; + } else if (arch_name == "SG2380") { + target_bmtpu_arch = SG2380; + } else if (arch_name == "MARS3") { + target_bmtpu_arch = MARS3; } else { BMRT_LOG(FATAL, "Error: unknown processor name [%s]", arch_name.c_str()); } @@ -47,9 +53,12 @@ int bmrt_arch_info::get_npu_num() npu_num = 64; break; case BM1880: - case BM1686: + case BM1688: + case SG2380: npu_num = 32; break; + case BM1690: + npu_num = 64; default: BMRT_LOG(FATAL, "Unknown bmtpu arch"); } @@ -68,8 +77,13 @@ int bmrt_arch_info::get_eu_num(bm_data_type_t dtype) case BM1684X: eu_num = 16; break; - case BM1686: + case BM1688: + case SG2380: eu_num = 4; + break; + case BM1690: + eu_num = 32; + break; default: BMRT_LOG(FATAL, "Unknown bmtpu arch"); } @@ -87,11 +101,14 @@ int bmrt_arch_info::get_lmem_size() case BM1684: lmem_size = (1<<19); //512KB break; - case BM1686: + case BM1688: + case SG2380: lmem_size = (1<<17); //128KB case BM1880: lmem_size = (1<<16); //64KB break; + case BM1690: + lmem_size = 1 << 18; // 256KB default: BMRT_LOG(FATAL, "Unknown bmtpu arch"); } @@ -110,9 +127,13 @@ u64 bmrt_arch_info::get_gmem_start() case BM1684: case BM1880: case BM1684X: - case BM1686: + case BM1688: gmem_start = 0x100000000; break; + case BM1690: + case SG2380: + gmem_start = 0x0; + break; default: BMRT_LOG(FATAL, "Unknown bmtpu arch"); } @@ -148,7 +169,9 @@ u64 bmrt_arch_info::get_gmem_offset_soc() case BM1684: case BM1880: case BM1684X: - case BM1686: + case BM1688: + case BM1690: + case SG2380: gmem_offset = 0x0; break; default: @@ -167,7 +190,9 @@ int bmrt_arch_info::get_lmem_banks() lmem_banks = 8; break; case BM1684X: - case BM1686: + case BM1688: + case BM1690: + case SG2380: lmem_banks = 16; default: BMRT_LOG(FATAL, "Unknown bmtpu arch"); @@ -189,7 +214,10 @@ u64 bmrt_arch_info::get_gmem_cmd_start_offset() case BM1684: case BM1880: case BM1684X: - case BM1686: + case MARS3: + case BM1688: + case BM1690: + case SG2380: gmem_start = 0x0; break; default: @@ -213,7 +241,9 @@ u64 bmrt_arch_info::get_ctx_start_addr() ctx_start_addr = (get_gmem_start() + 0x5000000 + 0x100000); break; case BM1684X: - case BM1686: + case BM1688: + case BM1690: + case SG2380: // BM1684X does not has arm reserved and const memory ctx_start_addr = get_gmem_start(); break; @@ -303,5 +333,17 @@ u32 bmrt_arch_info::get_gdma_cmd_num() return num; } +u64 bmrt_arch_info::addr_mask() { + u64 mask = 0xffffffffffffffff; + if (sta_bmtpu_ptr->target_bmtpu_arch == BM1688) { + // relative address, only lower 35bit is valie + mask = (1ull << 35) - 1; + } else if (sta_bmtpu_ptr->target_bmtpu_arch == BM1690 || + sta_bmtpu_ptr->target_bmtpu_arch == SG2380) { + mask = (1ull << 40) - 1; + } + return mask; +} + } diff --git a/tpu-runtime/src/bmruntime.cpp b/tpu-runtime/src/bmruntime.cpp old mode 100644 new mode 100755 index ef2ab4d..cc55ca6 --- a/tpu-runtime/src/bmruntime.cpp +++ b/tpu-runtime/src/bmruntime.cpp @@ -15,7 +15,9 @@ #include #include #include +#include #include "bmlib_runtime.h" +#include "bmruntime_common.h" #ifdef _WIN32 #define BILLION (1E9) @@ -46,20 +48,64 @@ int bmrt_clock_gettime(int dummy, struct timespec* ct) } #endif -int BMRT_LOG_LEVEL_THRESHOLD = 0; +BMRT_LogLevel BMRT_LOG_LEVEL_THRESHOLD = BMRT_LogLevel::WRONG; //wrong level, default output wrong and fatal print + namespace { struct LogLevel { LogLevel() { const char *env_str = nullptr; if ((env_str = getenv("BMRT_LOG_VERSION")) != nullptr) - BMRT_LOG_LEVEL_THRESHOLD = atoi(env_str); - else - BMRT_LOG_LEVEL_THRESHOLD = 0; + BMRT_LOG_LEVEL_THRESHOLD = (BMRT_LogLevel)atoi(env_str); } }; static LogLevel log_level_init; } +#ifdef _MSC_VER +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT __attribute__((visibility("default"))) +#endif + +#include "bmrt_version.h" +static const char* version_string = "libbmrt_version:1.0.0, branch:" BRANCH_NAME ",commit:" COMMIT_HASH " ,compiled on " COMPILE_TIME "."; +extern "C" { + DLLEXPORT const char* libbmrt_version() { + return version_string; + } + + DLLEXPORT BMRT_LogLevel bmrt_get_current_log_level() { + return BMRT_LOG_LEVEL_THRESHOLD; + } + + DLLEXPORT void bmrt_set_current_log_level(BMRT_LogLevel level) { + BMRT_LOG_LEVEL_THRESHOLD = level; + } +} + +const char* _bmrt_version() { + return version_string; +} + +const char* _libsophon_version() { + FILE *fp = NULL; + static char data[64] = {0}; + if (access("/proc/bmsophon/driver_version", F_OK) == 0) { + fp = popen("cat /proc/bmsophon/driver_version", "r"); + if (fp == NULL) + { + return NULL; + } + + if (fgets(data, sizeof(data), fp) != NULL) + { + return data; + } + } + + return NULL; +} + #ifndef __linux__ #include #include "bmruntime_common.h" @@ -88,6 +134,20 @@ extern void BMRT_LOG(int level, const char* fmt, ...) #include "bmruntime.h" +static void dump_tensor(bm_handle_t bm_handle, bm_tensor_t &tensor) { + auto shape = tensor.shape; + int size = 1; + for (int i = 0; i < shape.num_dims; ++i){ + size *= shape.dims[i]; + } + std::vector data(size); + bm_memcpy_d2s(bm_handle, data.data(), tensor.device_mem); + // std::cout<< data[0] << "\t" << data[data.size()-1] << std::endl; + auto ptr = data.data(); + ptr[0] = ptr[0]; +} + + #define SWAP(a, b) \ do { \ (a) = (a) + (b); \ @@ -121,12 +181,14 @@ static void _reorder_bd_ins(uint8_t *cmdbuf) typedef struct bm_device_mem_ext { bm_device_mem_t mem; Bmruntime *rt; - bm_device_mem_ext(bm_device_mem_t mem, Bmruntime *rt) { + uint32_t devid; + bm_device_mem_ext(bm_device_mem_t mem, Bmruntime *rt, uint32_t devid) { this->mem = mem; this->rt = rt; + this->devid = devid; } ~bm_device_mem_ext() { - rt->must_free_device_mem(mem); + rt->must_free_device_mem(devid, mem); } } bm_device_mem_ext_t; @@ -168,50 +230,74 @@ static void bmruntime_unlock() void Bmruntime::init() { + // init core num + m_core_num = 1; + auto arch = bmrt_arch_info::get_bmtpu_arch(); + switch (arch) { + case BM1688: + bm_get_tpu_core_num(m_handles[0], &m_core_num); + break; + case SG2380: + m_core_num = 4; + break; + case BM1690: + m_core_num = 8; + break; + } + + // init mem m_device_mem_vec.clear(); m_net_ctx_v.clear(); - m_net_ctx_v.reserve(MAX_NET_NUM); + // pre alloc mem to avoid mem reallocation for multi-threads case + m_net_ctx_v.reserve(1024); + m_net_cascade_v.reserve(1024); bmcpu_setup(); bmtpu_setup(); if (bmcpu_init_ != NULL) { bmcpu_handle_ = bmcpu_init_(); } + if (customcpu_init_ != NULL) { + customcpu_handle_ = customcpu_init_(); + } m_subnet_time_print = false; if (bmrt_arch_info::is_soc_mode()) { b_enable_mmap = true; /* soc default using mmap */ } else { b_enable_mmap = false; } - m_devid = bm_get_devid(m_handle); - m_share_coeff = true; - -#ifndef SOC_MODE - // 1682 cmodel not support multi bm_handle - if (bmrt_arch_info::get_bmtpu_arch() == BM1682) { - m_share_coeff = false; + for (int i = 0; i < m_device_num;i++) { + m_devids[i] = bm_get_devid(m_handles[i]); } -#endif - if (m_share_coeff) { + if (true) { std::lock_guard guard(m_global_coeff_mutex); - auto iter = m_global_coeff_map.find(m_devid); - if (iter == m_global_coeff_map.end()) { - m_local_coeff = std::make_shared(m_devid); - m_global_coeff_map[m_devid] = m_local_coeff; - } else if (iter->second == NULL) { - m_local_coeff = std::make_shared(m_devid); - iter->second = m_local_coeff; - } else { - m_local_coeff = iter->second; + for (int i = 0; i < m_device_num; i++) { + auto iter = m_global_coeff_map.find(m_devids[i]); + if (iter == m_global_coeff_map.end()) { + m_local_coeffs[i] = std::make_shared(m_handles[i]); + m_global_coeff_map[m_devids[i]] = m_local_coeffs[i]; + } else if (iter->second == NULL) { + m_local_coeffs[i] = std::make_shared(m_handles[i]); + iter->second = m_local_coeffs[i]; + } else { + m_local_coeffs[i] = iter->second; + } } - } else { - m_local_coeff = std::make_shared(m_handle); } // middle buffer - bm_set_device_mem(&max_middle_buffer, 0, 0); - max_middle_buffer_size = 0; - middle_buffer_num = 0; - + for (int i = 0; i < m_device_num; i++) { + bm_set_device_mem(&max_middle_buffer[i], 0, 0); + max_middle_buffer_size[i] = 0; + middle_buffer_num[i] = 0; + bm_set_device_mem(&max_hidden_buffer[i], 0, 0); + max_hidden_buffer_size[i] = 0; + hidden_buffer_num[i] = 0; + } + // set default flags + m_flags = 0; + if (m_core_num == 1) { + m_flags |= BM_RUNTIME_SHARE_MEM; + } auto neuron_heap_mask_env = std::getenv("BMRUNTIME_NEURON_HEAP_MASK"); if (neuron_heap_mask_env) { @@ -224,6 +310,13 @@ void Bmruntime::init() m_neuron_heap_mask = 7; } m_profile = std::make_shared(this); + if (m_device_num > 1) { + for (int i = 0; i < m_device_num; i++) { + m_cascade_thread_v.push_back( + std::make_shared(this, m_handles[i])); + } + } + temp_filename_ = "/tmp/lib_tmp_cpuop.so-XXXXXX"; } void Bmruntime::init_bmfunc(const string& arch_name) @@ -251,7 +344,7 @@ void Bmruntime::init_bmfunc(const string& arch_name) } /* using user parameter bm_handle */ -Bmruntime::Bmruntime(bm_handle_t* bm_handle, bool user_initlized, const string& arch_name) +Bmruntime::Bmruntime(bm_handle_t * bm_handle, bool user_initlized, const string& arch_name) { init_bmfunc(arch_name); @@ -262,9 +355,31 @@ Bmruntime::Bmruntime(bm_handle_t* bm_handle, bool user_initlized, const string& BMRT_ASSERT_INFO(bm_handle,"bm_handle shouldn't be NULL\n"); using_internal_bm_handle = true; } + m_handles[0] = *bm_handle; + m_device_num = 1; + init(); +} - m_handle = *bm_handle; +Bmruntime::Bmruntime(bm_handle_t *bm_handles, int num_handles, + bool using_internal_hiddens, const string &arch_name) { + BMRT_ASSERT_INFO(num_handles > 0 && num_handles <= MAX_DEVICE_NUM, + "num_handles should > 0 and <= %d", MAX_DEVICE_NUM); + init_bmfunc(arch_name); + using_internal_bm_handle = false; + using_internal_hidden_tensors = using_internal_hiddens; + m_device_num = num_handles; + std::copy(bm_handles, bm_handles + num_handles, m_handles); init(); + struct bm_misc_info misc_info; + bm_get_misc_info(bm_handles[0], &misc_info); + u8 board_type = (u8)((misc_info.board_version >> 8) & 0xff); + if (board_type == 0x21) { + card_chip_num = 8; + } else if (board_type == 0x23) { + card_chip_num = 6; + } else { + // BMRT_ASSERT_INFO(0, "CascadeNet can not run on this board."); + } } /* using internal initilized bm_handle ,with specific dev_id */ @@ -272,9 +387,10 @@ Bmruntime::Bmruntime(const string& arch_name, int devid) { init_bmfunc(arch_name); - bm_dev_request(&m_handle, devid); - BMRT_ASSERT_INFO(m_handle,"m_handle shouldn't be NULL\n"); + bm_dev_request(&m_handles[0], devid); + BMRT_ASSERT_INFO(m_handles[0],"m_handle shouldn't be NULL\n"); using_internal_bm_handle = true; + m_device_num = 1; init(); } @@ -283,34 +399,57 @@ Bmruntime::~Bmruntime() if (bmcpu_uninit_ != NULL) { bmcpu_uninit_(bmcpu_handle_); } + if (customcpu_handle_ != NULL && customcpu_uninit_ != NULL) { + customcpu_uninit_(customcpu_handle_); + } - for (auto& dev_mem : m_device_mem_vec) { + for (size_t i = 0; i < m_device_mem_vec.size(); i++) { + auto &dev_mem = m_device_mem_vec[i]; + auto id = m_device_mem_ids[i]; BMRT_DEBUG("Free device memory, byte size %d\n", bm_mem_get_device_size(dev_mem)); - must_free_device_mem(dev_mem); + must_free_device_mem(id, dev_mem); + } + + for (size_t i = 0; i < m_sg_device_mem_vec.size(); i++) { + auto &dev_mem = m_sg_device_mem_vec[i]; + auto id = m_sg_device_mem_ids[i]; + BMRT_DEBUG("Free device memory, byte size %d\n", bm_mem_get_device_size_u64(dev_mem)); + must_free_device_mem_u64(id, dev_mem); } for (auto net_ctx : m_net_ctx_v) { subnet_clear(net_ctx); + free_dyn_neuron(net_ctx); free_net_info(net_ctx); delete []net_ctx->stage_v[0]; delete net_ctx; } + for (auto net_cascade : m_net_cascade_v) { + cascade_free_net_info(&net_cascade); + } // if this is last bmruntime, free coeff mem - if (m_share_coeff) { + if (true) { std::lock_guard guard(m_global_coeff_mutex); - m_local_coeff = NULL; - auto iter = m_global_coeff_map.find(m_devid); - if (iter->second.unique()) { - iter->second = NULL; + for (int i = 0; i< m_device_num; i++) { + m_local_coeffs[i] = NULL; + auto iter = m_global_coeff_map.find(m_devids[i]); + if (iter->second.unique()) { + iter->second = NULL; + } } - } else { - m_local_coeff = NULL; } if (using_internal_bm_handle) { - bm_dev_free(m_handle); + bm_dev_free(m_handles[0]); + } +#ifdef __linux__ + if (customcpu_handle_ != NULL && tmpcpuso_handle_ != NULL) { + unlink(&temp_filename_[0]); + dlclose(tmpcpuso_handle_); + remove(&temp_filename_[0]); } +#endif } static bool is_cmodel_mode(){ @@ -374,8 +513,12 @@ void Bmruntime::bmtpu_setup() #endif if(!load_func){ BMRT_LOG(WARNING, "cannot find bmkernel_load_firmware function"); - } else if(load_func(m_handle, tcm_firmware.c_str(), ddr_firmware.c_str()) != BM_SUCCESS){ - BMRT_LOG(WARNING, "fail to load firmware: %s", tcm_firmware.c_str()); + } else { + for (int i = 0; i < m_device_num; i++) { + if(load_func(m_handles[i], tcm_firmware.c_str(), ddr_firmware.c_str()) != BM_SUCCESS){ + BMRT_LOG(WARNING, "fail to load firmware: %s", tcm_firmware.c_str()); + } + } } } } @@ -417,6 +560,22 @@ void Bmruntime::bmcpu_setup() } else { BMRT_LOG(INFO, "cpu.so already exist, don't dlopen"); } + + if (customcpu_process_ == NULL) { + const char cpu_lib[] = "libcustomcpuop.so"; + void* customcpu_so_handle_ = nullptr; + customcpu_so_handle_ = dlopen(cpu_lib, RTLD_LAZY); + if(customcpu_so_handle_) { + BMRT_LOG(INFO, "customcpu_lib '%s' is loaded.", cpu_lib); + } + if (!customcpu_so_handle_) { + BMRT_LOG(INFO, "Not able to open %s", "libcustomcpuop.so"); + } + customcpu_init_ = (t_bmcpu_init)dlsym(customcpu_so_handle_, "bmcpu_init"); + customcpu_uninit_ = (t_bmcpu_uninit)dlsym(customcpu_so_handle_, "bmcpu_uninit"); + customcpu_process_ = (t_bmcpu_process)dlsym(customcpu_so_handle_, "customcpu_process"); + } + #else if (bmcpu_process_ == NULL) { std::vector cpu_libs = { @@ -453,13 +612,18 @@ u64 Bmruntime::fix_gdma_addr(const net_stage_t* stage, u64 origin_addr, bool is_ return origin_addr; #endif } - if (origin_addr < stage->ctx_start) { + bool io_alone = stage->io_size > 0; // has io space + auto coeff_limit = io_alone ? stage->io_start : stage->ctx_start; + if (origin_addr < coeff_limit) { if (false == is_src) { - BMRT_LOG(FATAL, "gdma dst shouldn't be coeff, origin[0x%llx], ctx[0x%llx]", origin_addr, - stage->ctx_start); + BMRT_LOG(FATAL, "gdma dst shouldn't be coeff, origin[0x%llx], ctx[0x%llx]", + origin_addr, coeff_limit); } return origin_addr + stage->coeff_offset; } + if (io_alone && origin_addr < stage->ctx_start) { + return origin_addr + stage->io_offset; + } return origin_addr + stage->ctx_offset[get_mem_index(stage->ctx_borders, stage->ctx_start, origin_addr)]; } @@ -646,18 +810,17 @@ void Bmruntime::convert_cmd(u32* cmd, int engine_id, bool last_cmd, u64 start_ad } } break; - case BM1686: - if (id == ENGINE_GDMA && !last_cmd) { + case MARS3: + case BM1688: + if (id == ENGINE_GDMA && !last_cmd && stage->io_size > 0) { int cmd_type = (cmd[1] & 0x0f); if(cmd_type == 6) return; //cmd_type: DMA_sys u64 src_addr = ((u64)(cmd[17] & 0xff) << 32) | ((u64)cmd[16]); u64 dst_addr = ((u64)(cmd[19] & 0xff) << 32) | ((u64)cmd[18]); - BMRT_ASSERT_INFO(((src_addr >> 36) & 0x7) == 0, "support tag != 0"); - BMRT_ASSERT_INFO(((dst_addr >> 36) & 0x7) == 0, "support tag != 0"); bool src_in_global = (src_addr >> 39) & 0x1; bool dst_in_global = (dst_addr >> 39) & 0x1; u64 fix_addr; - if (src_in_global) { + if (src_in_global && ((src_addr >> 36) & 0x7) == 0) { fix_addr = fix_gdma_addr(stage, src_addr & ((1ull << 35) - 1), true); fix_addr |= (1ull << 39); if (fix_addr != src_addr) { @@ -665,7 +828,7 @@ void Bmruntime::convert_cmd(u32* cmd, int engine_id, bool last_cmd, u64 start_ad cmd[17] = ((u32)((fix_addr >> 32) & 0xff)) | (cmd[17] & 0xffffff00); } } - if (dst_in_global) { + if (dst_in_global && ((dst_addr >> 36) & 0x7) == 0) { fix_addr = fix_gdma_addr(stage, dst_addr & ((1ull << 35) - 1), false); fix_addr |= (1ull << 39); if (fix_addr != dst_addr) { @@ -679,8 +842,9 @@ void Bmruntime::convert_cmd(u32* cmd, int engine_id, bool last_cmd, u64 start_ad // fix index_tensor or mask_tensor addr if (cmd_type == 2 || cmd_type == 7 || cmd_type == 8 || cmd_type == 0xa || cmd_type == 0xb) { u64 index_addr = ((u64)(cmd[21] & 0xff) << 32) | ((u64)cmd[20]); - if ((index_addr >> 39) & 0x1) { + if (((index_addr >> 39) & 0x1) && ((index_addr >> 36) & 0x7) == 0) { fix_addr = fix_gdma_addr(stage, index_addr & ((1ull << 35) - 1), true); + fix_addr |= (1ull << 39); if (fix_addr != index_addr) { cmd[20] = fix_addr & 0xffffffff; cmd[21] = ((u32)((fix_addr >> 32) & 0xff)) | (cmd[21] & 0xffffff00); @@ -689,7 +853,10 @@ void Bmruntime::convert_cmd(u32* cmd, int engine_id, bool last_cmd, u64 start_ad } } break; - + case SG2380: + break; + case BM1690: + break; default: BMRT_LOG(FATAL, "Unkown BM TPU"); } @@ -717,7 +884,8 @@ bool Bmruntime::launch(int net_idx, const int input_num, const bm_device_mem_t* return false; } - auto& net_ctx = m_net_ctx_v[net_idx]; + auto net_ctx = m_net_ctx_v[net_idx]; + auto devid = net_ctx->device_id; #ifdef __linux__ bm_tensor_t input_tensors[input_num]; #else @@ -759,7 +927,7 @@ bool Bmruntime::launch(int net_idx, const int input_num, const bm_device_mem_t* bool ret = launch(net_idx, input_tensors, input_num, output_tensors, output_num, true, user_stmode); // sync is needed for profile save data if (ret == true && m_profile->is_enabled()){ - ret = (BM_SUCCESS == bm_thread_sync(m_handle)); + ret = (BM_SUCCESS == bm_thread_sync(m_handles[devid])); } if (!ret) { BMRT_LOG(WRONG, "launch net[%d] failed", net_idx); @@ -773,13 +941,25 @@ bool Bmruntime::launch(int net_idx, const int input_num, const bm_device_mem_t* return true; } +void Bmruntime::sync_cores(bm_handle_t handle, const std::vector& core_list) +{ + for (int core_idx=0; core_idxdevice_id; #ifdef __linux__ int *user_input_shapes[input_num]; u64 user_input_global_addrs[input_num]; @@ -801,7 +981,7 @@ bool Bmruntime::launch_ir(net_ctx_t* net_ctx, net_stage_t* stage, user_input_shapes[idx] = (int*)input_tensors[idx].shape.dims; input_dims[idx] = input_tensors[idx].shape.num_dims; auto input_dtype = 0; - if (arch == BM1684X || arch == BM1686) { + if (arch == BM1684X || arch == BM1688 || arch == BM1690 || arch == SG2380) { input_dtype = input_tensors[idx].dtype; } else { if (input_tensors[idx].dtype == BM_FLOAT32) { @@ -878,8 +1058,8 @@ bool Bmruntime::launch_ir(net_ctx_t* net_ctx, net_stage_t* stage, // for multi-stage ir bm_device_mem_t output_shape_mem; - u64 output_shape_global_addr = must_alloc_device_mem(&output_shape_mem, output_num*sizeof(bm_shape_ex_t)); - bm_device_mem_ext_t output_shape_mem_ext(output_shape_mem, this); + u64 output_shape_global_addr = must_alloc_device_mem(devid, &output_shape_mem, output_num*sizeof(bm_shape_ex_t)); + bm_device_mem_ext_t output_shape_mem_ext(output_shape_mem, this, devid); #ifdef __linux__ int input_elem_num[input_num]; @@ -888,9 +1068,13 @@ bool Bmruntime::launch_ir(net_ctx_t* net_ctx, net_stage_t* stage, int* input_elem_num = input_elem_num_.get(); #endif memset(input_elem_num, 0, sizeof(int) * input_num); //setting to 0 means that does not need to count elem_num + auto core_list = get_core_list_from_core_mask(dyn_core_mask); + if (core_list.size() > 1) { + core_list.resize(1); + } if (arch == BM1682) { status = bmfunc::bmdnn_1682()->_bmdnn_dynamic_fullnet_v2_( - m_handle, stage->ir_mem.addr, stage->ir_mem.dword_len, input_num, user_input_global_addrs, + m_handles[devid], stage->core_commands[0].ir_mem.addr, stage->core_commands[0].ir_mem.dword_len, input_num, user_input_global_addrs, user_input_shapes, input_elem_num, input_dims, output_num, user_output_global_addrs, stage->ctx_start, // There is an assertion in bmruntime_bmodel.cpp to ensure ctx_offset @@ -901,7 +1085,7 @@ bool Bmruntime::launch_ir(net_ctx_t* net_ctx, net_stage_t* stage, ); } else if (arch == BM1684) { status = bmfunc::bmdnn_1684()->_bmdnn_dynamic_fullnet_v2_( - m_handle, stage->ir_mem.addr, stage->ir_mem.dword_len, input_num, user_input_global_addrs, + m_handles[devid], stage->core_commands[0].ir_mem.addr, stage->core_commands[0].ir_mem.dword_len, input_num, user_input_global_addrs, user_input_global_addr_middle, user_input_shapes, input_elem_num, input_dims, output_num, user_output_global_addrs, user_output_global_addr_middle, stage->ctx_start, stage->ctx_borders, stage->ctx_offset, @@ -910,34 +1094,61 @@ bool Bmruntime::launch_ir(net_ctx_t* net_ctx, net_stage_t* stage, 0 // no arm reserved buffer used ); } else if (arch == BM1684X) { - int func_id = net_ctx->kernel_module_->get_dynamic_fullnet_func_id(); + auto func_id = net_ctx->kernel_module_->get_dynamic_fullnet_func_id(core_list); status = bmfunc::bmdnn_1684x()->_bmdnn_dynamic_fullnet_( - m_handle, func_id, stage->ir_mem.addr, stage->ir_mem.dword_len, input_num, user_input_global_addrs, + m_handles[devid], func_id[0], stage->core_commands[0].ir_mem.addr, stage->core_commands[0].ir_mem.dword_len, input_num, user_input_global_addrs, user_input_shapes, input_elem_num, input_dims, output_num, user_output_global_addrs, stage->ctx_start, stage->ctx_borders, stage->ctx_offset, - stage->coeff_offset, true, - output_shape_global_addr); - } else if (arch == BM1686) { - status = bmfunc::bmdnn_1686()->_bmdnn_dynamic_fullnet_( - m_handle, stage->ir_mem.addr, stage->ir_mem.dword_len, input_num, user_input_global_addrs, + stage->coeff_offset, stage->io_start, stage->io_offset, true, + output_shape_global_addr, + net_ctx->do_allreduce == 1 ? &(net_ctx->allreduce_param) : NULL); + } else if (arch == BM1688) { + auto func_id = net_ctx->kernel_module_->get_dynamic_fullnet_func_id(core_list); + status = bmfunc::bmdnn_1688()->_bmdnn_dynamic_fullnet_( + m_handles[devid], func_id, stage->core_commands[0].ir_mem.addr, stage->core_commands[0].ir_mem.dword_len, input_num, user_input_global_addrs, + user_input_shapes, input_elem_num, input_dims, output_num, + user_output_global_addrs, stage->ctx_start, + stage->ctx_borders, (m_flags & BM_RUNTIME_SHARE_MEM) ? stage->ctx_offset : net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->ctx_offset, + stage->coeff_offset, stage->io_start, stage->io_offset, true, + output_shape_global_addr, + core_list); + } else if (arch == BM1690) { + status = bmfunc::bmdnn_2260()->_bmdnn_dynamic_fullnet_( + m_handles[devid], stage->core_commands[0].ir_mem.addr, stage->core_commands[0].ir_mem.dword_len, input_num, user_input_global_addrs, + user_input_shapes, input_elem_num, input_dims, output_num, + user_output_global_addrs, stage->ctx_start, + stage->ctx_borders, (m_flags & BM_RUNTIME_SHARE_MEM) ? stage->ctx_offset : net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->ctx_offset, + stage->coeff_offset, stage->io_start, stage->io_offset, true, + output_shape_global_addr, + core_list); + } else if (arch == MARS3) { + status = bmfunc::bmdnn_mars3()->_bmdnn_dynamic_fullnet_( + m_handles[devid], stage->core_commands[0].ir_mem.addr, stage->core_commands[0].ir_mem.dword_len, input_num, user_input_global_addrs, user_input_shapes, input_elem_num, input_dims, output_num, user_output_global_addrs, stage->ctx_start, stage->ctx_borders, stage->ctx_offset, - stage->coeff_offset, true, - output_shape_global_addr); + stage->coeff_offset, stage->io_start, stage->io_offset, true, + output_shape_global_addr, + core_list); + } else if (arch == SG2380) { + auto func_id = net_ctx->kernel_module_->get_dynamic_fullnet_func_id(core_list); + status = bmfunc::bmdnn_2380()->_bmdnn_dynamic_fullnet_( + m_handles[devid], func_id, stage->core_commands[0].ir_mem.addr, stage->core_commands[0].ir_mem.dword_len, input_num, user_input_global_addrs, + user_input_shapes, input_elem_num, input_dims, output_num, + user_output_global_addrs, stage->ctx_start, + stage->ctx_borders, (m_flags & BM_RUNTIME_SHARE_MEM) ? stage->ctx_offset : net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->ctx_offset, + stage->coeff_offset, stage->io_offset, true, + output_shape_global_addr, + core_list); } else { BMRT_LOG(FATAL, "Unknown BM TPU"); } if (status == BM_SUCCESS) { - status = bm_thread_sync(m_handle); + sync_cores(m_handles[devid], core_list); } - if (BM_SUCCESS != status) { - BMRT_LOG(WRONG, "launch failed, status:%d", status); - trace(); - } if (BM_SUCCESS == status) { // update output shape #ifdef __linux__ @@ -946,7 +1157,7 @@ bool Bmruntime::launch_ir(net_ctx_t* net_ctx, net_stage_t* stage, std::shared_ptr output_shape_v_(new bm_shape_ex_t[output_num], std::default_delete()); bm_shape_ex_t* output_shape_v = output_shape_v_.get(); #endif - status = bm_memcpy_d2s(m_handle, output_shape_v, output_shape_mem); + status = bm_memcpy_d2s(m_handles[devid], output_shape_v, output_shape_mem); CHECK_status(status); for (int idx = 0; idx < output_num; idx++) { output_tensors[idx].shape = output_shape_v[idx].shape; @@ -956,240 +1167,188 @@ bool Bmruntime::launch_ir(net_ctx_t* net_ctx, net_stage_t* stage, return BM_SUCCESS == status; } -bool Bmruntime::launch_static(net_ctx_t* net_ctx, net_stage_t* stage, - const bm_tensor_t* input_tensors, int input_num, - bm_tensor_t* output_tensors, int output_num) -{ - #ifdef __linux__ - u64 user_input_global_offset[input_num]; - u64 cmd_input_global_offset[input_num]; - - int input_data_len[input_num]; - int input_n[input_num]; - int input_c[input_num]; - int input_h[input_num]; - int input_w[input_num]; - int input_length[input_num]; - unsigned short input_dtype[input_num]; - unsigned char input_stmode[input_num]; - unsigned char real_in_stmode[input_num]; - unsigned int input_pad_h[input_num]; - #else - std::shared_ptr user_input_global_offset_(new u64[input_num], std::default_delete()); - u64* user_input_global_offset = user_input_global_offset_.get(); - std::shared_ptr cmd_input_global_offset_(new u64[input_num], std::default_delete()); - u64* cmd_input_global_offset = cmd_input_global_offset_.get(); - - std::shared_ptr input_data_len_(new int[input_num], std::default_delete()); - int* input_data_len = input_data_len_.get(); - std::shared_ptr input_n_(new int[input_num], std::default_delete()); - int* input_n = input_n_.get(); - std::shared_ptr input_c_(new int[input_num], std::default_delete()); - int* input_c = input_c_.get(); - std::shared_ptr input_h_(new int[input_num], std::default_delete()); - int* input_h = input_h_.get(); - std::shared_ptr input_w_(new int[input_num], std::default_delete()); - int* input_w = input_w_.get(); - std::shared_ptr input_length_(new int[input_num], std::default_delete()); - int* input_length = input_length_.get(); - std::shared_ptr input_dtype_(new unsigned short[input_num], std::default_delete()); - unsigned short* input_dtype = input_dtype_.get(); - std::shared_ptr input_stmode_(new unsigned char[input_num], std::default_delete()); - unsigned char* input_stmode = input_stmode_.get(); - std::shared_ptr real_in_stmode_(new unsigned char[input_num], std::default_delete()); - unsigned char* real_in_stmode = real_in_stmode_.get(); - std::shared_ptr input_pad_h_(new unsigned int[input_num], std::default_delete()); - unsigned int* input_pad_h = input_pad_h_.get(); - #endif +api_info_t +Bmruntime::get_api_info(int net_idx, const bm_tensor_t *input_tensors, + int input_num, bm_tensor_t *output_tensors, + int output_num, bool user_mem, bool user_stmode, + uint32_t *core_ids) { + api_info_t api_info; + auto net_ctx = m_net_ctx_v[net_idx]; + int stage_idx = get_stage_idx(net_ctx, input_tensors); + if (stage_idx == -1) { + BMRT_LOG(WRONG, "Shapes of the input tensors are not supported"); + return api_info; + } + auto &stage = net_ctx->stage_v[stage_idx]; - for (u32 idx = 0; idx < stage->input_v.size(); idx++) { - auto& cmd_input = stage->input_v[idx]; - auto& user_input = input_tensors[idx]; - cmd_input_global_offset[idx] = - bm_mem_get_device_addr(cmd_input.dev_mem) + GLOBAL_MEM_CMD_START_OFFSET; - user_input_global_offset[idx] = - bm_mem_get_device_addr(user_input.device_mem) + GLOBAL_MEM_CMD_START_OFFSET; + // multi core info + auto core_num = stage->core_commands.size(); + std::vector core_list; + for (size_t idx = 0; idx < core_num; ++idx) { + core_list.emplace_back(core_ids[idx]); + } - input_data_len[idx] = bmrt_shape_count(&user_input.shape); - input_n[idx] = user_input.shape.dims[0]; - input_c[idx] = user_input.shape.num_dims > 1 ? user_input.shape.dims[1] : 1; - input_h[idx] = user_input.shape.num_dims > 2 ? user_input.shape.dims[2] : 1; - input_w[idx] = 1; - for (int s = 3; s < user_input.shape.num_dims; s++) { - input_w[idx] *= user_input.shape.dims[s]; - } + // init output tensors + init_output_tensors(net_ctx, stage, output_tensors, user_mem, user_stmode); - // input_length[idx] = max_c * max_h * max_w; - input_length[idx] = 1; - for (int s = 1; s < user_input.shape.num_dims; s++) { - input_length[idx] *= user_input.shape.dims[s]; - } - input_dtype[idx] = (unsigned short)user_input.dtype; - input_stmode[idx] = (unsigned short)cmd_input.st_mode; - BMRT_ASSERT_INFO(input_stmode[idx] == BM_STORE_1N || input_stmode[idx] == BM_STORE_4N,\ - "input_stmode[%d]:%d shouldn't be BM_STORE_2N\n", idx, input_stmode[idx]); - real_in_stmode[idx] = user_input.st_mode; - BMRT_ASSERT_INFO(real_in_stmode[idx] == BM_STORE_1N || real_in_stmode[idx] == BM_STORE_4N,\ - "real_in_stmode[%d] shouldn't be BM_STORE_2N\n", idx, real_in_stmode[idx]); + BMRT_ASSERT(!net_ctx->is_dynamic && stage->subnet_num == 1); - // pad_h for conv 3ic(for BM1684) - input_pad_h[idx] = stage->input_v[idx].pad_h; + uint32_t core_mask = get_dyn_core_mask(stage_idx, core_list); + if (!(m_flags & BM_RUNTIME_SHARE_MEM)) { + net_ctx_alloc_dyn_neuron(net_ctx, core_mask, stage, false); } - #ifdef __linux__ - u64 user_output_global_offset[output_num]; - u64 cmd_output_global_offset[output_num]; - int output_n[output_num]; - int output_length[output_num]; - int output_data_len[output_num]; - unsigned short output_dtype[output_num]; - unsigned char output_stmode[output_num]; - unsigned char force_out_stmode[output_num]; - #else - std::shared_ptr user_output_global_offset_(new u64[output_num], std::default_delete()); - u64* user_output_global_offset = user_output_global_offset_.get(); - std::shared_ptr cmd_output_global_offset_(new u64[output_num], std::default_delete()); - u64* cmd_output_global_offset = cmd_output_global_offset_.get(); - std::shared_ptr output_n_(new int[output_num], std::default_delete()); - int* output_n = output_n_.get(); - std::shared_ptr output_length_(new int[output_num], std::default_delete()); - int* output_length = output_length_.get(); - std::shared_ptr output_data_len_(new int[output_num], std::default_delete()); - int* output_data_len = output_data_len_.get(); - std::shared_ptr output_dtype_(new unsigned short[output_num], std::default_delete()); - unsigned short* output_dtype = output_dtype_.get(); - std::shared_ptr output_stmode_(new unsigned char[output_num], std::default_delete()); - unsigned char* output_stmode = output_stmode_.get(); - std::shared_ptr force_out_stmode_(new unsigned char[output_num], std::default_delete()); - unsigned char* force_out_stmode = force_out_stmode_.get(); - #endif - for (u32 idx = 0; idx < stage->output_v.size(); idx++) { - auto& cmd_output = stage->output_v[idx]; - auto& user_output = output_tensors[idx]; - cmd_output_global_offset[idx] = - bm_mem_get_device_addr(cmd_output.dev_mem) + GLOBAL_MEM_CMD_START_OFFSET; - - user_output_global_offset[idx] = - bm_mem_get_device_addr(user_output.device_mem) + GLOBAL_MEM_CMD_START_OFFSET; - output_n[idx] = cmd_output.shape.num_dims == 0 ? 1 : cmd_output.shape.dims[0]; - // output_length[idx] = max_c * max_h * max_w; - output_length[idx] = 1; - for (int s = 1; s < cmd_output.shape.num_dims; s++) { - output_length[idx] *= cmd_output.shape.dims[s]; - } - output_data_len[idx] = output_n[idx] * output_length[idx]; - output_dtype[idx] = (unsigned short)user_output.dtype; - output_stmode[idx] = (unsigned short)cmd_output.st_mode; - BMRT_ASSERT_INFO(output_stmode[idx] == BM_STORE_1N || output_stmode[idx] == BM_STORE_4N,\ - "output_stmode[%d] shouldn't be BM_STORE_2N\n", idx, output_stmode[idx]); - force_out_stmode[idx] = user_output.st_mode; - BMRT_ASSERT_INFO(force_out_stmode[idx] == BM_STORE_1N || force_out_stmode[idx] == BM_STORE_4N, "force_out_stmode[%d] shouldn't be BM_STORE_2N\n", idx, force_out_stmode[idx]); - } - - int group_num = stage->bdc_id.size(); - #ifdef __linux__ - int bdc_id_num[group_num]; - int gdma_id_num[group_num]; - int cdma_id_num[group_num]; /* useless, need been removed */ - u32 bdc_cmd_byte_size[group_num], gdma_cmd_byte_size[group_num]; - #else - std::shared_ptr bdc_id_num_(new int[group_num], std::default_delete()); - int* bdc_id_num = bdc_id_num_.get(); - std::shared_ptr gdma_id_num_(new int[group_num], std::default_delete()); - int* gdma_id_num = gdma_id_num_.get(); - std::shared_ptr cdma_id_num_(new int[group_num], std::default_delete()); - int* cdma_id_num = cdma_id_num_.get(); - std::shared_ptr bdc_cmd_byte_size_(new u32[group_num], std::default_delete()); - u32* bdc_cmd_byte_size = bdc_cmd_byte_size_.get(); - std::shared_ptr gdma_cmd_byte_size_(new u32[group_num], std::default_delete()); - u32* gdma_cmd_byte_size = gdma_cmd_byte_size_.get(); - #endif - for (int group_idx = 0; group_idx < group_num; group_idx++) { - bdc_id_num[group_idx] = stage->bdc_id[group_idx]; - gdma_id_num[group_idx] = stage->gdma_id[group_idx]; - cdma_id_num[group_idx] = 0; - bdc_cmd_byte_size[group_idx] = stage->bdc_cmd_byte[group_idx]; - gdma_cmd_byte_size[group_idx] = stage->gdma_cmd_byte[group_idx]; - } - - if(m_profile->is_enabled()){ - auto cmd_num = m_profile->record_subnet_cmd_info(stage->gdma_mem.addr, GLOBAL_MEM_CMD_START_OFFSET, - stage->bdc_mem.addr, GLOBAL_MEM_CMD_START_OFFSET, - group_num); - for(int i=0; ifill_api_info(net_info, api_info); + return api_info; +} +void Bmruntime::fill_tpu_cmd_info(std::vector &cmd_info, + const net_stage_t *stage, + const int core_idx) { + cmd_info.clear(); + const size_t group_num = stage->core_commands[core_idx].bdc_id.size(); + for (size_t group_idx = 0; group_idx < group_num; group_idx++) { + tpu_cmd_info_t info = {0}; + info.bdc_cmd_num = stage->core_commands[core_idx].bdc_id[group_idx]; + info.gdma_cmd_num = stage->core_commands[core_idx].gdma_id[group_idx]; + info.bdc_cmd_byte_size = + stage->core_commands[core_idx].bdc_cmd_byte[group_idx]; + info.gdma_cmd_byte_size = + stage->core_commands[core_idx].gdma_cmd_byte[group_idx]; + cmd_info.push_back(std::move(info)); } - bm_status_t status = BM_SUCCESS; - switch (bmrt_arch_info::get_bmtpu_arch()) { - case BM1684: - status = bmfunc::bmdnn_1684()->_bmdnn_multi_fullnet_( - m_handle, input_num, user_input_global_offset, cmd_input_global_offset, input_n, - input_c, input_h, input_w, input_dtype, input_stmode, real_in_stmode, output_num, - user_output_global_offset, cmd_output_global_offset, output_n, output_length, - output_dtype, output_stmode, force_out_stmode, - stage->bdc_mem.addr + GLOBAL_MEM_CMD_START_OFFSET, - stage->gdma_mem.addr + GLOBAL_MEM_CMD_START_OFFSET, bdc_id_num, gdma_id_num, group_num, input_pad_h); - break; - case BM1684X: { - for (int s = 0; s < input_num; s++) { - input_data_len[s] *= bmrt_data_type_size((bm_data_type_t)input_dtype[s]); - } - for (int s = 0; s < output_num; s++) { - output_data_len[s] *= bmrt_data_type_size((bm_data_type_t)output_dtype[s]); - } - int func_id = net_ctx->kernel_module_->get_multi_fullnet_func_id(); - status = bmfunc::bmdnn_1684x()->_bmdnn_multi_fullnet_( - m_handle, func_id, input_num, user_input_global_offset, cmd_input_global_offset, - (u32*)input_data_len, output_num, user_output_global_offset, - cmd_output_global_offset, (u32*)output_data_len, - stage->bdc_mem.addr + GLOBAL_MEM_CMD_START_OFFSET, - stage->gdma_mem.addr + GLOBAL_MEM_CMD_START_OFFSET, - bdc_id_num, gdma_id_num, bdc_cmd_byte_size, - gdma_cmd_byte_size, group_num); - break; - } - case BM1686: { - for (int s = 0; s < input_num; s++) { - input_data_len[s] *= bmrt_data_type_size((bm_data_type_t)input_dtype[s]); - } - for (int s = 0; s < output_num; s++) { - output_data_len[s] *= bmrt_data_type_size((bm_data_type_t)output_dtype[s]); +} + +template +void Bmruntime::fill_tpu_tensor_info( + std::vector &tensor_info, const T_stage *stage, + const bm_tensor_t *user_tensors, bool is_input) { + tensor_info.clear(); + auto ref_tensors = is_input ? stage->input_v : stage->output_v; + for (u32 idx = 0; idx < ref_tensors.size(); idx++) { + tpu_tensor_info_t info = {0}; + /// info that is given by users + auto &user_input = user_tensors[idx]; + info.dtype = user_input.dtype; + info.user_stmode = user_input.st_mode; + info.user_global_addr = bm_mem_get_device_addr(user_input.device_mem) + + GLOBAL_MEM_CMD_START_OFFSET; + BMRT_ASSERT_INFO( + info.user_stmode == BM_STORE_1N || info.user_stmode == BM_STORE_4N, + "user stmode[%d]:%d shouldn't be BM_STORE_2N\n", idx, info.user_stmode); + + /// info that fixed when compiling + auto &cmd_input = ref_tensors[idx]; + info.compiled_stmode = cmd_input.st_mode; + info.padding_h = cmd_input.pad_h; + info.compiled_global_addr = + bm_mem_get_device_addr(cmd_input.dev_mem) + GLOBAL_MEM_CMD_START_OFFSET; + + BMRT_ASSERT_INFO(info.compiled_stmode == BM_STORE_1N || + info.compiled_stmode == BM_STORE_4N, + "user stmode[%d]:%d shouldn't be BM_STORE_2N\n", idx, + info.compiled_stmode); + const auto &ref_shape = is_input ? user_input.shape : cmd_input.shape; + info.n = ref_shape.dims[0]; + info.c = ref_shape.num_dims > 1 ? ref_shape.dims[1] : 1; + info.h = ref_shape.num_dims > 2 ? ref_shape.dims[2] : 1; + info.w = 1; + for (int s = 3; s < ref_shape.num_dims; s++) { + info.w *= ref_shape.dims[s]; + } + info.tensor_byte_size = (uint32_t)info.n * info.c * info.h * info.w * + bmrt_data_type_size((bm_data_type_t)info.dtype); + tensor_info.push_back(std::move(info)); + } +} +void +Bmruntime::fill_tpu_net_info(net_ctx_t *net_ctx, net_stage_t *stage, + const bm_tensor_t *input_tensors, int input_num, + bm_tensor_t *output_tensors, int output_num, + const std::vector &core_list, + tpu_net_info_t &net_info, + const size_t dyn_core_mask) { + std::vector input_info; + std::vector output_info; + if (m_flags & BM_RUNTIME_SHARE_MEM) { + fill_tpu_tensor_info(input_info, stage, input_tensors, true); + fill_tpu_tensor_info(output_info, stage, output_tensors, false); + } else { + fill_tpu_tensor_info(input_info, net_ctx->dyn_neuron_stage_dict[dyn_core_mask], input_tensors, true); + fill_tpu_tensor_info(output_info, net_ctx->dyn_neuron_stage_dict[dyn_core_mask], output_tensors, false); + } + + std::vector core_command(core_list.size()); + for (size_t core_idx = 0; core_idx < core_list.size(); core_idx++) { + std::vector cmd_info; + fill_tpu_cmd_info(cmd_info, stage, core_idx); + core_command[core_idx].cmd_info = std::move(cmd_info); + core_command[core_idx].bdc_cmd_addr = + stage->core_commands[core_idx].bdc_mem.addr + + GLOBAL_MEM_CMD_START_OFFSET; + core_command[core_idx].gdma_cmd_addr = + stage->core_commands[core_idx].gdma_mem.addr + + GLOBAL_MEM_CMD_START_OFFSET; + core_command[core_idx].cdma_cmd_addr = 0; + core_command[core_idx].hau_cmd_addr = + stage->core_commands[core_idx].hau_mem.addr + + GLOBAL_MEM_CMD_START_OFFSET; + core_command[core_idx].sdma_cmd_addr = + stage->core_commands[core_idx].sdma_mem.addr + + GLOBAL_MEM_CMD_START_OFFSET; + } + + memset(&net_info, 0x0, sizeof(tpu_net_info_t)); + net_info.input_info = std::move(input_info); + net_info.output_info = std::move(output_info); + net_info.core_commands = std::move(core_command); + net_info.core_list = core_list; + net_info.coeff_start_addr = stage->coeff_offset; + if (m_flags & BM_RUNTIME_SHARE_MEM) { + net_info.neuron_start_addr.assign(stage->ctx_offset.begin(), + stage->ctx_offset.end()); + } else { + net_info.neuron_start_addr.assign(net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->ctx_offset.begin(), + net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->ctx_offset.end()); + } + + if (bmrt_arch_info::get_bmtpu_arch() == BM1684X || + bmrt_arch_info::get_bmtpu_arch() == BM1688 ) { + net_info.kernel_func_ids = net_ctx->kernel_module_->get_multi_fullnet_func_id(core_list); + } + net_info.do_allreduce = net_ctx->do_allreduce; + if (bmrt_arch_info::get_bmtpu_arch() == BM1684X && net_ctx->do_allreduce == 1) { + net_info.allreduce_param = net_ctx->allreduce_param; + } +} +bool Bmruntime::launch_static(net_ctx_t* net_ctx, net_stage_t* stage, + const bm_tensor_t* input_tensors, int input_num, + bm_tensor_t* output_tensors, int output_num, + const std::vector &core_list, + const size_t dyn_core_mask) +{ + auto devid = net_ctx->device_id; + + tpu_net_info_t net_info; + fill_tpu_net_info(net_ctx, stage, input_tensors, input_num, output_tensors, + output_num, core_list, net_info, dyn_core_mask); + if (m_profile->is_enabled()) { + for(size_t core_idx=0; core_idx < core_list.size(); core_idx++){ + const size_t group_num = stage->core_commands[core_idx].bdc_id.size(); + auto cmd_num = m_profile->record_subnet_cmd_info(core_idx, + stage->core_commands[core_idx].gdma_mem.addr, GLOBAL_MEM_CMD_START_OFFSET, + stage->core_commands[core_idx].bdc_mem.addr, GLOBAL_MEM_CMD_START_OFFSET, + group_num); + for (size_t i = 0; i < group_num; i++) { + cmd_num[i].bdc = net_info.core_commands[core_idx].cmd_info.at(i).bdc_cmd_num; + cmd_num[i].gdma = net_info.core_commands[core_idx].cmd_info.at(i).gdma_cmd_num; } - status = bmfunc::bmdnn_1686()->_bmdnn_multi_fullnet_( - m_handle, input_num, user_input_global_offset, cmd_input_global_offset, - (u32*)input_data_len, output_num, user_output_global_offset, - cmd_output_global_offset, (u32*)output_data_len, - stage->bdc_mem.addr + GLOBAL_MEM_CMD_START_OFFSET, - stage->gdma_mem.addr + GLOBAL_MEM_CMD_START_OFFSET, - bdc_id_num, gdma_id_num, bdc_cmd_byte_size, - gdma_cmd_byte_size, group_num); - break; + } - case BM1880: - status = bmfunc::bmdnn_1880()->_bmdnn_multi_fullnet_( - m_handle, input_num, user_input_global_offset, cmd_input_global_offset, input_n, - input_length, input_dtype, input_stmode, real_in_stmode, output_num, - user_output_global_offset, cmd_output_global_offset, output_n, output_length, - output_dtype, output_stmode, force_out_stmode, - // when engine fetch command, the commands can only locate at the previous 4G - stage->bdc_mem.addr + GLOBAL_MEM_CMD_START_OFFSET, - stage->gdma_mem.addr + GLOBAL_MEM_CMD_START_OFFSET, bdc_id_num, gdma_id_num, group_num); - break; - case BM1682: - status = bmfunc::bmdnn_1682()->_bmdnn_multi_fullnet_( - m_handle, input_num, user_input_global_offset, cmd_input_global_offset, input_data_len, input_dtype, - output_num, user_output_global_offset, cmd_output_global_offset, output_data_len, output_dtype, - // when engine fetch command, the commands can only locate at the previous 4G - stage->bdc_mem.addr + GLOBAL_MEM_CMD_START_OFFSET, - stage->gdma_mem.addr + GLOBAL_MEM_CMD_START_OFFSET, - // m_cdma_cmd_start_address_v[net_idx] + GLOBAL_MEM_CMD_START_OFFSET, - 0, bdc_id_num, gdma_id_num, cdma_id_num, group_num); - break; - default: - BMRT_LOG(FATAL, "Unknown BM TPU"); } + bm_status_t status = bmfunc::bmdnn_base()->_bmdnn_multi_fullnet_(m_handles[devid], net_info); if (BM_SUCCESS != status) { BMRT_LOG(WRONG, "launch failed, status:%d", status); trace(); @@ -1276,8 +1435,9 @@ static inline void saveData(bm_handle_t handle, const std::string &file, } void Bmruntime::init_output_tensors(net_ctx_t* net_ctx, net_stage_t* stage, - bm_tensor_t* output_tensors, bool user_mem, bool user_stmode) + bm_tensor_t* output_tensors, bool user_mem, bool user_stmode) { + auto devid = net_ctx->device_id; for (u32 idx = 0; idx < stage->output_v.size(); idx++) { auto& output = output_tensors[idx]; output.shape = stage->output_v[idx].shape; @@ -1299,22 +1459,276 @@ void Bmruntime::init_output_tensors(net_ctx_t* net_ctx, net_stage_t* stage, } else { mem_size = bmrt_tensor_bytesize(&output); } - must_alloc_device_mem(&output.device_mem, mem_size, "output_mem"); + must_alloc_device_mem(devid, &output.device_mem, mem_size, "output_mem"); } } } -bool Bmruntime::launch(int net_idx, const bm_tensor_t* input_tensors, int input_num, - bm_tensor_t* output_tensors, int output_num, bool user_mem, bool user_stmode) -{ +bool Bmruntime::launch(int net_idx, const bm_tensor_t *input_tensors, + int input_num, bm_tensor_t *output_tensors, + int output_num, bool user_mem, bool user_stmode) { + auto net_ctx = m_net_ctx_v[net_idx]; + int stage_idx = get_stage_idx(net_ctx, input_tensors); + if (stage_idx == -1) { + BMRT_LOG(WRONG, "Shapes of the input tensors are not supported"); + return false; + } + auto &stage = net_ctx->stage_v[stage_idx]; + // multi core info + auto core_num = stage->core_commands.size(); + std::vector core_list(core_num); + std::iota(core_list.begin(), core_list.end(), 0); + + return launch_multi_cores(net_idx, input_tensors, input_num, output_tensors, + output_num, core_list, user_mem, user_stmode); +} + +std::vector +Bmruntime::refine_core_list(const net_stage_t *stage, + const std::vector &core_list, + bm_handle_t handle) { + // check valid core_list + uint32_t arch_core_num; + bm_get_tpu_scalar_num(handle, &arch_core_num); + for (auto &core_idx : core_list) { + if (core_idx >= arch_core_num || core_idx < 0) { + BMRT_LOG(FATAL, "invalid core_id:%d, arch max core id:%d\n", core_idx, arch_core_num-1); + } + } + + // use core_num to generate core_list + std::vector full_core_list(m_core_num); + std::iota(full_core_list.begin(), full_core_list.end(), 0); + auto bmodel_core_num = stage->core_commands.size(); + if (bmodel_core_num > full_core_list.size()) { + BMRT_LOG(FATAL, + "(%d) the bmodel is not compatible with the current target.\n", + full_core_list.size() * 14); + return {}; + } + std::vector final_core_list; + for (auto core_id : core_list) { + if (core_id < m_core_num && + std::find(final_core_list.begin(), final_core_list.end(), core_id) == + final_core_list.end()) + final_core_list.emplace_back(core_id); + } + if (final_core_list.size() > bmodel_core_num) { + final_core_list.resize(bmodel_core_num); + } else if (final_core_list.size() < bmodel_core_num) { + for (auto core_id : full_core_list) { + if (std::find(final_core_list.begin(), final_core_list.end(), core_id) == + final_core_list.end()) + final_core_list.emplace_back(core_id); + if (final_core_list.size() == bmodel_core_num) + break; + } + } + + BMRT_LOG_RUN(DEBUG, { + std::string core_list_str = vector_to_string(final_core_list); + BMRT_LOG(DEBUG, "Launch on cores: %s", core_list_str.c_str()); + }); + + return std::move(final_core_list); +} + +/* + dyn_core_mask record the unique core-mession, combine with stage_idx and core_list + for exp: stage_idx = 1, max_arch_core_num = 8, core_list = 0,1 core_mask = 100000011 + stage_idx = 1, max_arch_core_num = 8, core_list = 1 core_mask = 100000010 +*/ +uint32_t Bmruntime::get_dyn_core_mask(int stage_idx, const std::vector core_list) { + uint32_t core_mask = 0; + if (m_core_num > 1) { // use core_num to judge multi_core arch + if (stage_idx > ((std::numeric_limits::max)() >> m_core_num)) { + BMRT_LOG(FATAL, "get dyn neuron code overlap"); + } + // use core_num to get mask with size of core_num + core_mask = stage_idx << m_core_num; + for (auto &core_idx : core_list) { + core_mask |= (1 << core_idx); + } + } + return core_mask; +} + +std::vector Bmruntime::get_core_list_from_core_mask(uint32_t dyn_core_mask) { + std::vector core_list; + // use core_num to generate mask and get core_list + for (size_t i = 0; i < m_core_num; ++i) { + if (dyn_core_mask & 0x1) { + core_list.emplace_back(i); + } + dyn_core_mask >>= 1; + } + if (core_list.empty()) core_list = {0}; + return core_list; +} + +void Bmruntime::net_ctx_alloc_dyn_neuron(net_ctx_t* net_ctx, const size_t dyn_core_mask, + const net_stage_t *common_stage_info, bool use_multi_subnet) { + if (net_ctx->dyn_neuron_stage_dict.find(dyn_core_mask) != net_ctx->dyn_neuron_stage_dict.end()) { + return; + } + + auto devid = net_ctx->device_id; + auto dyn_neuron_info = new dyn_neuron_stage_t(); + dyn_neuron_info->neuron_mem.resize(common_stage_info->neuron_size.size()); + for (size_t i = 0; i < common_stage_info->neuron_size.size(); ++i) { + auto &mem = dyn_neuron_info->neuron_mem[i]; + must_alloc_device_mem_u64(devid, &mem, common_stage_info->neuron_size[i], std::to_string(dyn_core_mask) + "_neuron_mem" + std::to_string(i)); + BMRT_DEBUG("dyn alloc neuron_mem: %lu\n", dyn_neuron_info->neuron_mem[i].u.device.device_addr); + } + + auto &ctx_sizes = common_stage_info->neuron_size; + auto ctx_start = common_stage_info->ctx_start & bmrt_arch_info::addr_mask(); + if (!ctx_sizes.empty()) { + dyn_neuron_info->ctx_offset.resize(ctx_sizes.size()); + for (size_t i = 0; i < ctx_sizes.size(); ++i) + { + u64 ctx_addr = bm_mem_get_device_addr_u64(dyn_neuron_info->neuron_mem[i]); + dyn_neuron_info->ctx_offset[i] = ctx_addr - ctx_start; + if (i > 0) + { + dyn_neuron_info->ctx_offset[i] -= common_stage_info->ctx_borders[i - 1]; + } + } + } else { + dyn_neuron_info->ctx_offset.emplace_back(0); + } + + dyn_neuron_info->input_v = common_stage_info->input_v; + dyn_neuron_info->output_v = common_stage_info->output_v; + + if (common_stage_info->io_size == 0) { + for (size_t i = 0; i < dyn_neuron_info->input_v.size(); ++i) { + u64 io_addr = bm_mem_get_device_addr(common_stage_info->input_v[i].dev_mem); + int tag = ((io_addr >> 36) & 0x7); + if (tag < 3) { + io_addr += dyn_neuron_info->ctx_offset[get_mem_index( + common_stage_info->ctx_borders, common_stage_info->ctx_start, + io_addr)]; + io_addr &= bmrt_arch_info::addr_mask(); + dyn_neuron_info->input_v[i].dev_mem = bm_mem_from_device( + io_addr, common_stage_info->input_v[i].dev_mem.size); + } + } + + for (size_t i = 0; i < dyn_neuron_info->output_v.size(); ++i) { + u64 io_addr = bm_mem_get_device_addr(common_stage_info->output_v[i].dev_mem); + int tag = ((io_addr >> 36) & 0x7); + if (tag < 3) { + io_addr += dyn_neuron_info->ctx_offset[get_mem_index( + common_stage_info->ctx_borders, common_stage_info->ctx_start, + io_addr)]; + io_addr &= bmrt_arch_info::addr_mask(); + dyn_neuron_info->output_v[i].dev_mem = bm_mem_from_device( + io_addr, common_stage_info->output_v[i].dev_mem.size); + } + } + } + + std::unique_lock neuron_stage_lock(net_ctx->neuron_mutex); + net_ctx->dyn_neuron_stage_dict.emplace(dyn_core_mask, dyn_neuron_info); + + if (use_multi_subnet) { + fill_subnet_dyn_neuron_tensor(net_ctx, dyn_core_mask, common_stage_info); + } + neuron_stage_lock.unlock(); +} + +void Bmruntime::fill_subnet_dyn_neuron_tensor( + net_ctx_t* net_ctx, const size_t dyn_core_mask, + const net_stage_t *common_stage_info) { + for (auto &subnet_tensor : common_stage_info->subnet_tensor_v) { + tensor_ext_t bm_tensor_ext = subnet_tensor.second; + std::string tensor_name = subnet_tensor.first; + + if (bm_tensor_ext.mem_type & 0x1 == MEM_TYPE_TPU) { + u64 tensor_addr = bm_tensor_ext.tensor_info.device_mem.u.device.device_addr; //fake device addr, record bmodel compile addr + u32 tensor_size = bm_tensor_ext.tensor_info.device_mem.size; + if (tensor_addr < common_stage_info->ctx_start) { + bm_tensor_ext.tensor_info.device_mem = bm_mem_from_device( + (tensor_addr & bmrt_arch_info::addr_mask()) + common_stage_info->coeff_offset, tensor_size); + } else { + u32 idx = get_mem_index(common_stage_info->ctx_borders, common_stage_info->ctx_start, tensor_addr); + tensor_addr += net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->ctx_offset[idx]; + tensor_addr &= bmrt_arch_info::addr_mask(); + bm_tensor_ext.tensor_info.device_mem = bm_mem_from_device(tensor_addr, tensor_size); + } + } + + if (bm_tensor_ext.mem_type >= MEM_TYPE_CPU) { + float* host_mem = NULL; + bool need_mem_alloc = true; + if (bm_tensor_ext.host_mem.type == HOST_MEM_MMAP) { +#ifndef SOC_MODE + BMRT_LOG(FATAL, "Only soc mode run here"); +#else + bm_status_t ret = bm_mem_mmap_device_mem(m_handles[net_ctx->device_id], &bm_tensor_ext.tensor_info.device_mem, (u64 *)&host_mem); + if (ret == BM_SUCCESS) { + need_mem_alloc = false; + } else { + BMRT_LOG(WRONG, "mmap failed, malloc host memory"); + } +#endif + } + + if (need_mem_alloc) { + if (!net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->cpu_addr + && common_stage_info->cpu_mem_size > 0) { + net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->cpu_addr = new float[common_stage_info->cpu_mem_size]; + } + if (net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->cpu_addr) { + host_mem = net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->cpu_addr + bm_tensor_ext.host_mem.tensor_cpu_addr; + } else { + host_mem = new float[bm_tensor_ext.host_mem.size]; + } + bm_tensor_ext.host_mem.type = HOST_MEM_ALLOC; + } + bm_tensor_ext.host_mem.addr = host_mem; + } + + net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->subnet_tensor_v.insert(make_pair(tensor_name, bm_tensor_ext)); + } +} + +void Bmruntime::pre_alloc_neuron_multi_cores(int net_idx, int stage_idx, const std::vector &core_list) { + if (m_flags & BM_RUNTIME_SHARE_MEM) { + return; + } + auto net_ctx = m_net_ctx_v[net_idx]; + auto& stage = net_ctx->stage_v[stage_idx]; + auto final_core_list = refine_core_list(stage, core_list, m_handles[net_ctx->device_id]); + uint32_t core_mask = get_dyn_core_mask(stage_idx, final_core_list); + bool use_multi_subnet = stage->subnet_num > 1 || + (stage->subnet_num == 1 && stage->subnet_v[0]->subnet_mode == SUBNET_MODE_CPU); + net_ctx_alloc_dyn_neuron(net_ctx, core_mask, stage, use_multi_subnet); +} + +bool Bmruntime::launch_multi_cores(int net_idx, + const bm_tensor_t *input_tensors, + int input_num, bm_tensor_t *output_tensors, + int output_num, + const std::vector &core_list, + bool user_mem, bool user_stmode) { + auto net_ctx = m_net_ctx_v[net_idx]; + auto devid = net_ctx->device_id; bool save_io = false; char *nt = getenv("BMRT_SAVE_IO_TENSORS"); if (nt != nullptr) save_io = static_cast(atoi(nt)); if (save_io) - saveData(m_handle, "input_ref_data.dat.bmrt", input_tensors, input_num); + saveData(m_handles[devid], "input_ref_data.dat.bmrt", input_tensors, input_num); + + // static int save_count = 0; + // BMRT_LOG_RUN(DEBUG, { + // std::string filename = std::to_string(save_count) + "_input.dat"; + // saveData(m_handles[devid], filename, input_tensors, input_num); + // }); + // check parameters - auto net_ctx = m_net_ctx_v[net_idx]; int stage_idx = get_stage_idx(net_ctx, input_tensors); if (stage_idx == -1) { BMRT_LOG(WRONG, "Shapes of the input tensors are not supported"); @@ -1322,48 +1736,651 @@ bool Bmruntime::launch(int net_idx, const bm_tensor_t* input_tensors, int input_ } auto& stage = net_ctx->stage_v[stage_idx]; + // process core list + auto final_core_list = refine_core_list(stage, core_list, m_handles[devid]); + if (false == check_launch_params(net_ctx, stage, input_tensors, input_num, output_tensors, output_num, user_stmode)) { return false; } + bool use_multi_subnet = stage->subnet_num > 1 || + (stage->subnet_num == 1 && stage->subnet_v[0]->subnet_mode == SUBNET_MODE_CPU); + uint32_t core_mask = get_dyn_core_mask(stage_idx, final_core_list);; + if (!(m_flags & BM_RUNTIME_SHARE_MEM)) { + net_ctx_alloc_dyn_neuron(net_ctx, core_mask, stage, use_multi_subnet); + } + // init output tensors init_output_tensors(net_ctx, stage, output_tensors, user_mem, user_stmode); bool ret = true; - m_profile->init(net_ctx->net_name, stage->net_profile, stage->net_stat); - if (stage->subnet_num > 1 || - (stage->subnet_num == 1 && stage->subnet_v[0]->subnet_mode == SUBNET_MODE_CPU)) { + m_profile->init(net_ctx->net_name, stage->net_profile, stage->net_stat, final_core_list); + if (use_multi_subnet) { ret = launch_multi_subnet(net_ctx, stage, input_tensors, input_num, output_tensors, - output_num); + output_num, core_mask); } else { m_profile->begin_subnet(net_ctx, 0, 0, SUBNET_MODE_TPU); m_profile->set_extra_data(net_ctx->is_dynamic); if(net_ctx->is_dynamic) { // launch_ir calls bm_thread_sync internally - ret = launch_ir(net_ctx, stage, input_tensors, input_num, output_tensors, output_num); + ret = launch_ir(net_ctx, stage, input_tensors, input_num, output_tensors, output_num, core_mask); } else { // launch_static does not call bm_thread_sync internally - ret = launch_static(net_ctx, stage, input_tensors, input_num, output_tensors, output_num); + ret = launch_static(net_ctx, stage, input_tensors, input_num, output_tensors, output_num, final_core_list, core_mask); // so sync at some cases if(m_profile->is_enabled() || save_io){ - ret = (BM_SUCCESS == bm_thread_sync(m_handle)); + sync_cores(m_handles[devid], final_core_list); } } m_profile->end_subnet(net_ctx); } m_profile->deinit(); + if (save_io) - saveData(m_handle, "output_ref_data.dat.bmrt", output_tensors, output_num); + saveData(m_handles[devid], "output_ref_data.dat.bmrt", output_tensors, output_num); + + // BMRT_LOG_RUN(DEBUG, { + // std::string filename = std::to_string(save_count++) + "_output.dat"; + // saveData(m_handles[devid], filename, input_tensors, input_num); + // }); + // free output mem if failed if (ret == false && user_mem == false) { for (int idx = 0; idx < output_num; idx++) { - must_free_device_mem(output_tensors[idx].device_mem); + must_free_device_mem(devid, output_tensors[idx].device_mem); + } + } + return ret; +} +static mem_cascade_t * +get_tensor(std::vector *tensors, const std::string &name) { + for (auto &t : *tensors) { + if (t.name == name) { + return &t; + } + } + return nullptr; +} + +static mem_cascade_t * +get_tensor(std::vector *tensors, const std::string &name, + int32_t devid) { + for (auto &t : *tensors) { + if (t.name == name && t.device_id == devid) { + return &t; + } + } + return nullptr; +} + +// std::atomic comm_time{0}; +// std::atomic comm_count{0}; + +bm_tensor_t * +Bmruntime::cascade_prepare_input(const std::string &name, + int32_t devid, + std::vector *src, + std::vector *dst) { + auto from = get_tensor(dst, name, devid); + if (!from) { + from = get_tensor(dst, name); + if (!from) { + return nullptr; + } + } + if (from->device_id == devid) { + return &from->tensor; + } + + auto to = get_tensor(src, name, devid); + if (!to) { + return nullptr; + } + +// #ifdef __linux__ +// struct timeval t1, t2; +// gettimeofday(&t1, NULL); +// #else +// struct timespec t1, t2; +// bmrt_clock_gettime(0, &t1); +// #endif + + bm_tensor_t from_tensor, to_tensor; + bmrt_tensor_with_device(&from_tensor, from->tensor.device_mem, + from->tensor.dtype, from->tensor.shape); + bmrt_tensor_with_device(&to_tensor, to->tensor.device_mem, + to->tensor.dtype, from->tensor.shape); + bm_memcpy_p2p(m_handles[from->device_id], from->tensor.device_mem, + m_handles[devid], to->tensor.device_mem); + // when net is dynamic, input_shape need to be changed to its real shape + to->tensor.shape = from->tensor.shape; + +// #ifdef __linux__ +// gettimeofday(&t2, NULL); +// long use1 = (t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec; +// #else +// bmrt_clock_gettime(0, &t2); +// long use1 = (t2.tv_sec - t1.tv_sec) * 1000000 + (t2.tv_nsec - t1.tv_nsec)/1000; +// #endif +// +// comm_time += use1; +// comm_count += 1; + + return &to->tensor; +} + +bm_tensor_t * +Bmruntime::cascade_prepare_output(const std::string &name, uint32_t devid, + std::vector *dst) { + auto from = get_tensor(dst, name, devid); + if (!from) { + return nullptr; + } + return &from->tensor; +} + +static bool update_tensor_shape(std::vector *tensors, + const std::string &name, int32_t devid, + const bm_tensor_t &ref) { + for (auto &t : *tensors) { + if (t.name == name && t.device_id == devid) { + t.tensor.shape = ref.shape; + return true; + } + } + return false; +} + +bool Bmruntime::cascade_update_output_shape(net_ctx_t *net_ctx, + std::vector *dst, + std::vector out_tensors) { + int out_num = net_ctx->output_name_v.size(); + bool ret = false; + for (int i = 0; i < out_num; i++) { + ret = update_tensor_shape(dst, net_ctx->output_name_v[i], + net_ctx->device_id, out_tensors[i]); + if (!ret) { + return false; + } + } + return true; +} + +bool Bmruntime::cascade_thread_step(int net_idx, + vector *src, + vector *dst, + bm_handle_t m_handle) { + auto ctx = m_net_ctx_v[net_idx]; + int in_num = ctx->input_name_v.size(); + int out_num = ctx->output_name_v.size(); + std::vector in_tensors(in_num); + std::vector out_tensors(out_num); + for (int i = 0; i < in_num; i++) { + auto in = cascade_prepare_input(ctx->input_name_v[i], + ctx->device_id, src, dst); + if (in == nullptr) { + BMRT_LOG(WRONG, "input tensor[%s] are not in %d", + ctx->input_name_v[i].c_str(), ctx->device_id); + return false; + } + in_tensors[i] = *in; + } + for (int i = 0; i < out_num; i++) { + auto out = + cascade_prepare_output(ctx->output_name_v[i], ctx->device_id, dst); + if (out == nullptr) { + BMRT_LOG(WRONG, "output tensor[%s] are not in %d", + ctx->output_name_v[i].c_str(), ctx->device_id); + return false; + } + out_tensors[i] = *out; + } + + auto ret = launch(net_idx, in_tensors.data(), in_num, out_tensors.data(), + out_num, true, false); + + if (ctx->is_dynamic) { + ret = cascade_update_output_shape(ctx, dst, out_tensors); + } + + if (!ret) { + BMRT_LOG(WRONG, "launch %d is not correct", net_idx); + return false; + } + return true; +} + +bool Bmruntime::cascade_thread_global_move_data( + int devid, bm_handle_t handle, + std::vector *param) { + std::vector core_list{0}; + auto func_id = kernel_modules[devid]->get_global_move_1684x_func_id(core_list)[0]; + int tensor_num = param->size(); + bool ret = false; + for (int i = 0; i < tensor_num; i++) { + auto status = tpu_kernel_launch(handle, func_id, &(param->at(i)), sizeof(tpu_kernel_global_move_1684x_t)); + if (BM_SUCCESS != status) { + ret = false; + break; + } else { + ret = true; } } return ret; } +bool Bmruntime::launch(const net_cascade_t *net_c, + const bm_tensor_t *input_tensors, int input_num, + bm_tensor_t *output_tensors, int output_num) { + // run step by step + if ((size_t)input_num != net_c->input_names.size() || + (size_t)output_num != net_c->output_names.size()) { + BMRT_LOG(WRONG, "launch parameter is not correct"); + return false; + } + // prepare all inputs and outputs + std::vector src(net_c->hidden_inputs); + std::vector dst(net_c->hidden_outputs); + for (size_t i = 0; i < input_num; i++) { + int devid = net_c->net_info.input_loc_devices[i]; + dst.emplace_back(mem_cascade_t{net_c->input_names[i], devid, input_tensors[i]}); + } + for (size_t i = 0; i < output_num; i++) { + int devid = net_c->net_info.output_loc_devices[i]; + dst.emplace_back(mem_cascade_t{net_c->output_names[i], devid, output_tensors[i]}); + } + + for (size_t s = 0; s < net_c->step_ids.size(); s++) { +// #ifdef __linux__ +// struct timeval t1, t2; +// gettimeofday(&t1, NULL); +// #else +// struct timespec t1, t2; +// bmrt_clock_gettime(0, &t1); +// #endif + + // device_num = 4/8 support fast_allreduce for both prefill and decode + // device_num = 6 only supports fast_allreduce for prefill + // device_num = 2 does not support either + u32 count = bmrt_shape_count(&input_tensors[0].shape); + if (using_fast_allreduce && + net_c->step_ids[0].size() == m_device_num && + net_c->step_ids.size() > 1 && + (m_device_num == 4 || m_device_num == 8 || m_device_num == 6)) { + bool skip = m_device_num == 8 && (s == 1 || s == 2 || s == 3 || s == 5 || s == 6 || s == 7); + skip = skip || (m_device_num == 6 && (s == 1 || s == 2 || s == 3 || s == 5 || s == 6 || s == 7)); + skip = skip || (m_device_num == 4 && (s == 1 || s == 2 || s == 4 || s == 5)); + skip = skip || (m_device_num == 2 && (s == 1 || s == 3)); + if (skip) { + if (net_c->is_dynamic) { + for (int devid = 0; devid < net_c->step_ids[s].size(); devid++) { + auto &net_ctx = m_net_ctx_v[net_c->step_ids[s][devid]]; + for (int idx = 0; idx < net_ctx->output_name_v.size(); idx++) { + auto out_name = net_ctx->output_name_v[idx]; + for (auto &t : dst) { + if (t.name == out_name && t.device_id == devid) { + t.tensor.shape.dims[1] = input_tensors[0].shape.dims[1]; + } + } + } + } + } + continue; + } + + if ((m_device_num == 8 && (s == 0 || s == 4)) || + (m_device_num == 6 && (s == 0 || s == 4)) || + (m_device_num == 4 && (s == 0 || s == 3))) { + int pre_s = s; + int ss = s + 1; + int next_s = s + std::log2(net_c->step_ids.size()); + std::vector core_list{0}; + std::vector params(net_c->step_ids[ss].size()); + std::vector inputs, outputs; + for (int devid = 0; devid < net_c->step_ids[ss].size(); devid++) { + bm_shape_t shape = input_tensors[0].shape; + + bm_tensor_t in_tensor; + auto pre_net_idx = net_c->step_ids[pre_s]; + auto pre_ctx = m_net_ctx_v[pre_net_idx[devid]]; + std::string in_name = ""; + int32_t in_idx = 0; + bm_data_type_t bm_dtype = BM_FLOAT32; + auto cur_net_idx = net_c->step_ids[ss]; + auto &cur_ctx = m_net_ctx_v[cur_net_idx[devid]]; + auto &cur_in_name_v = cur_ctx->input_name_v; + for (int idx = 0; idx < pre_ctx->output_name_v.size(); idx++) { + auto name = pre_ctx->output_name_v[idx]; + if (std::find(cur_in_name_v.begin(), cur_in_name_v.end(), name) != cur_in_name_v.end()) { + in_name = name; + in_idx = idx; + bm_dtype = pre_ctx->output_type_v[idx]; + break; + } + } + assert(in_name != ""); + in_tensor = get_tensor(&dst, in_name, devid)->tensor; + inputs.push_back(in_tensor); + + auto net_idx_v = net_c->step_ids[ss]; + auto net_ctx = m_net_ctx_v[net_idx_v[devid]]; + auto in1_tensor = get_tensor(&src, net_ctx->input_name_v[1], devid)->tensor; + + + bm_tensor_t out_tensor; + if (ss == 1) { + auto net_idx = net_c->step_ids[next_s]; + auto &ctx = m_net_ctx_v[net_idx[devid]]; + auto &out_name_v = ctx->output_name_v; + + auto next_net_idx = net_c->step_ids[next_s+1]; + auto next_ctx = m_net_ctx_v[next_net_idx[devid]]; + std::string out_name = ""; + int32_t out_idx = 0; + for (int idx = 0; idx < next_ctx->input_name_v.size(); idx++) { + auto name = next_ctx->input_name_v[idx]; + if (std::find(out_name_v.begin(), out_name_v.end(), name) != out_name_v.end()) { + out_name = name; + out_idx = idx; + break; + } + } + assert(out_name != ""); + out_tensor = get_tensor(&dst, out_name, devid)->tensor; + outputs.push_back(out_tensor); + } else { + auto &out_name_v = net_c->output_names; + + auto next_net_idx = net_c->step_ids[next_s]; + auto next_ctx = m_net_ctx_v[next_net_idx[devid]]; + std::string out_name = ""; + int32_t out_idx = 0; + for (int idx = 0; idx < next_ctx->output_name_v.size(); idx++) { + auto name = next_ctx->output_name_v[idx]; + if (std::find(out_name_v.begin(), out_name_v.end(), name) != out_name_v.end()) { + out_name = name; + out_idx = idx; + break; + } + } + assert(out_name != ""); + out_tensor = get_tensor(&dst, out_name, devid)->tensor; + outputs.push_back(out_tensor); + } + + int dtype = 0; + if (bm_dtype == BM_FLOAT32) { + dtype = (2 << 1) | 1; + } else if (bm_dtype == BM_FLOAT16) { + dtype = (1 << 1) | 1; + } else if (bm_dtype == BM_BFLOAT16) { + dtype = (5 << 1) | 1; + } else { + BMRT_LOG(WRONG, "Allreduce only support float32/float16/bfloat16 now"); + } + // size should be aligned to 4096 for allreduce + size_t type_size = bmrt_data_type_size(bm_dtype); + u32 aligned_count = ALIGN(type_size * count, 4096) / type_size; + + tpu_kernel_allreduce_1684x_t param = { + .i_global_addr = {}, + .i_global_addr_1 = {}, + .o_global_addr = {}, + .count = aligned_count, + .dtype = dtype, + .reduce_method = 1, // sum + // .group = {m_devids[0], m_devids[1], m_devids[2], m_devids[3], + // m_devids[4], m_devids[5], m_devids[6], m_devids[7]}, + // .rank = m_devids[devid], + .group = {0, 1, 2, 3, 4, 5, 6, 7}, + .rank = devid, + .chip_num = 8, // card_chip_num, + .group_size = m_device_num + }; + + // clear addr high 6 bits because tpu-train use those bits as device id + // TODO: move the logic to tpu-train + param.i_global_addr[devid] = (in_tensor.device_mem.u.device.device_addr << 6) >> 6; + param.i_global_addr_1[devid] = (in1_tensor.device_mem.u.device.device_addr << 6) >> 6; + param.o_global_addr[devid] = (out_tensor.device_mem.u.device.device_addr << 6) >> 6; + params[devid] = param; + } + + for (int i = 0; i < net_c->step_ids[ss].size(); i++) { + for (int j = 0; j < net_c->step_ids[ss].size(); j++) { + params[i].i_global_addr[j] = params[j].i_global_addr[j]; + params[i].i_global_addr_1[j] = params[j].i_global_addr_1[j]; + params[i].o_global_addr[j] = params[j].o_global_addr[j]; + } + } + + for (int devid = 0; devid < net_c->step_ids[s].size(); devid++) { + auto net_idx = net_c->step_ids[s]; + auto net_ctx = m_net_ctx_v[net_idx[devid]]; + net_ctx->do_allreduce = 1; + net_ctx->allreduce_param = params[devid]; + } + + } + } + + std::set devices; + if (net_c->step_ids[s].size() > 1 && net_c->num_device > 1 && + m_device_num > 1) { + for (auto net_idx : net_c->step_ids[s]) { + auto devid = m_net_ctx_v[net_idx]->device_id; + if (devices.find(devid) != devices.end()) { + m_cascade_thread_v[devid]->sync(); + } + devices.insert(devid); + m_cascade_thread_v[devid]->run(net_idx, &src, &dst); + } + for (auto d: devices) { + if (false == m_cascade_thread_v[d]->sync()){ + return false; + } + } + } else { + // single device + for (auto net_idx : net_c->step_ids[s]) { + int devid = m_net_ctx_v[net_idx]->device_id; + devices.insert(devid); + if (false == cascade_thread_step(net_idx, &src, &dst, m_handles[devid])) { + return false; + } + } + for (auto d : devices) { + bm_handle_sync(m_handles[d]); + } + } + +// #ifdef __linux__ +// gettimeofday(&t2, NULL); +// long use1 = (t2.tv_sec - t1.tv_sec) * 1000000 + t2.tv_usec - t1.tv_usec; +// #else +// bmrt_clock_gettime(0, &t2); +// long use1 = (t2.tv_sec - t1.tv_sec) * 1000000 + (t2.tv_nsec - t1.tv_nsec)/1000; +// #endif + + // BMRT_LOG(INFO, "step %d calc time(ms): %f", s, (float)use1 / 1000); + } + // BMRT_LOG(INFO, "all-reduce comm time(ms): %f", (float)comm_time.load() / 1000); + // BMRT_LOG(INFO, "communication count: %d", comm_count.load()); + // comm_time = 0; + // comm_count = 0; + + if (net_c->is_dynamic) { + for (int i = 0; i < output_num; i++) { + int devid = net_c->net_info.output_loc_devices[i]; + std::string name = net_c->output_names[i]; + auto out = get_tensor(&dst, name, devid); + output_tensors[i].shape = out->tensor.shape; + } + } + return true; +} + +bool Bmruntime::memcpy_s2d_parallel(bm_tensor_t tensors[], + void * datas[], + int tensor_num[], + int device_num) { + if (m_cascade_thread_v.size() == 0 && device_num == 1) { + auto status = bm_memcpy_s2d(m_handles[0], tensors[0].device_mem, datas[0]); + return BM_SUCCESS == status; + } + + if (m_cascade_thread_v.size() < device_num) { + BMRT_LOG(WRONG, "It doesn't support s2d parallel because device_num %d is larger than cascade_net thread_num %d.", + device_num, m_cascade_thread_v.size()); + } + int offset = 0; + for (int d = 0; d < device_num; ++d) { + m_cascade_thread_v[d]->s2d(tensors+offset, datas+offset, tensor_num[d]); + offset += tensor_num[d]; + } + + for (int d = 0; d < device_num; ++d) { + bool ret = m_cascade_thread_v[d]->sync(); + if (!ret) { + return ret; + } + } + return true; +} + +bool Bmruntime::memcpy_d2s_parallel(void *datas[], + bm_tensor_t tensors[], + int tensor_num[], + int device_num) { + if (m_cascade_thread_v.size() == 0 && device_num == 1) { + auto status = bm_memcpy_d2s(m_handles[0], datas[0], tensors[0].device_mem); + return BM_SUCCESS == status; + } + + if (m_cascade_thread_v.size() < device_num) { + BMRT_LOG(WRONG, "It doesn't support d2s parallel because device_num %d is larger than cascade_net thread_num %d.", + device_num, m_cascade_thread_v.size()); + } + int offset = 0; + for (int d = 0; d < device_num; ++d) { + m_cascade_thread_v[d]->d2s(datas+offset, tensors+offset, tensor_num[d]); + offset += tensor_num[d]; + } + + for (int d = 0; d < device_num; ++d) { + bool ret = m_cascade_thread_v[d]->sync(); + if (!ret) { + return ret; + } + } + + return true; +} + +bool Bmruntime::memcpy_d2d_byte_parallel(bm_tensor_t dst_tensors[], + size_t dst_offsets[], + bm_tensor_t src_tensors[], + size_t src_offsets[], + size_t sizes[], + int tensor_num[], + int device_num) { + if (m_cascade_thread_v.size() == 0 && device_num == 1) { + auto status = bm_memcpy_d2d_byte(m_handles[0], dst_tensors[0].device_mem, dst_offsets[0], + src_tensors[0].device_mem, src_offsets[0], sizes[0]); + return BM_SUCCESS == status; + } + + if (m_cascade_thread_v.size() < device_num) { + BMRT_LOG(WRONG, "It doesn't support d2d parallel because device_num %d is larger than cascade_net thread_num %d.", + device_num, m_cascade_thread_v.size()); + } + + int offset = 0; + for (int d = 0; d < device_num; ++d) { + m_cascade_thread_v[d]->d2d(dst_tensors + offset, dst_offsets + offset, + src_tensors + offset, src_offsets + offset, + sizes + offset, tensor_num[d]); + offset += tensor_num[d]; + } + + for (int d = 0; d < device_num; ++d) { + bool ret = m_cascade_thread_v[d]->sync(); + if (!ret) { + return ret; + } + } + + return true; +} + +bool Bmruntime::memcpy_d2d_stride_ex_parallel(bm_tensor_t dst_tensors[], + size_t dst_offsets[], + bm_shape_t dst_strides[], + bm_tensor_t src_tensors[], + size_t src_offsets[], + bm_shape_t src_strides[], + bm_shape_t shapes[], + int tensor_num[], + int device_num) { + BMRT_ASSERT(bmrt_arch_info::get_bmtpu_arch() == BM1684X); + std::vector> params(device_num); + int processed_num = 0; + for (int i = 0; i < device_num; ++i) { + params[i].resize(tensor_num[i]); + auto dst_tens = dst_tensors + processed_num; + auto dst_offs = dst_offsets + processed_num; + auto dst_strs = dst_strides + processed_num; + auto src_tens = src_tensors + processed_num; + auto src_offs = src_offsets + processed_num; + auto src_strs = src_strides + processed_num; + auto p_shape = shapes + processed_num; + for (int j = 0; j < tensor_num[i]; ++j) { + params[i][j].num_dims = p_shape[j].num_dims; + if (params[i][j].num_dims > 4) { + BMRT_LOG(WRONG, "Only support shape/stride num_dims <= 4, but num_dims passed is %d", params[i][j].num_dims); + } + for (int k = 0; k < p_shape[j].num_dims; ++k) { + params[i][j].dst_stride[k] = dst_strs[j].dims[k]; + params[i][j].src_stride[k] = src_strs[j].dims[k]; + params[i][j].shape[k] = p_shape[j].dims[k]; + } + params[i][j].dst_global_addr = bm_mem_get_device_addr(dst_tens[j].device_mem) + dst_offs[j]; + params[i][j].src_global_addr = bm_mem_get_device_addr(src_tens[j].device_mem) + src_offs[j]; + params[i][j].type_size = bmrt_data_type_size(src_tens[j].dtype); + int dst_type_size = bmrt_data_type_size(dst_tens[j].dtype); + if (params[i][j].type_size != dst_type_size) { + BMRT_LOG(WRONG, "dst_type_size should be the same as src_type_size", dst_type_size, params[i][j].type_size); + } + } + processed_num += tensor_num[i]; + } + + // 1 device + if (m_cascade_thread_v.size() == 0 && device_num == 1) { + auto ret = cascade_thread_global_move_data(0, m_handles[0], ¶ms[0]); + return ret; + } + + if (m_cascade_thread_v.size() < device_num) { + BMRT_LOG(WRONG, "It doesn't support d2d_stride_ex parallel because device_num %d is larger than cascade_net thread_num %d.", + device_num, m_cascade_thread_v.size()); + } + + // multi devices + for (int d = 0; d < device_num; ++d) { + m_cascade_thread_v[d]->d2d_stride_ex(d, ¶ms[d]); + } + + for (int d = 0; d < device_num; ++d) { + bool ret = m_cascade_thread_v[d]->sync(); + if (!ret) { + return ret; + } + } + return true; +} + static bool check_launch_params(net_ctx_t* net_ctx, void* const input_datas[], const bm_shape_t input_shapes[], int input_num, void* output_datas[], bm_shape_t output_shapes[], int output_num, @@ -1400,37 +2417,32 @@ static bool check_launch_params(net_ctx_t* net_ctx, void* const input_datas[], return true; } -bool Bmruntime::launch(int net_idx, void* const input_datas[], const bm_shape_t input_shapes[], - int input_num, void* output_datas[], bm_shape_t output_shapes[], - int output_num, bool user_mem) +bool Bmruntime::launch_multi_cores(int net_idx, void* const input_datas[], const bm_shape_t input_shapes[], + int input_num, void* output_datas[], bm_shape_t output_shapes[], int output_num, + bool user_mem, const std::vector& core_list) { auto net_ctx = m_net_ctx_v[net_idx]; + auto devid = net_ctx->device_id; // check parameters if (false == check_launch_params(net_ctx, input_datas, input_shapes, input_num, output_datas, output_shapes, output_num, user_mem)) { return false; } // prepare input and output tensors - #ifdef __linux__ - bm_tensor_t input_tensors[input_num]; - bm_tensor_t output_tensors[output_num]; - #else - std::shared_ptr input_tensors_(new bm_tensor_t[input_num], std::default_delete()); - bm_tensor_t* input_tensors = input_tensors_.get(); - std::shared_ptr output_tensors_(new bm_tensor_t[output_num], std::default_delete()); - bm_tensor_t* output_tensors = output_tensors_.get(); - #endif + std::vector input_tensors(input_num); + std::vector output_tensors(output_num); + for (int i = 0; i < input_num; i++) { bmrt_tensor(&input_tensors[i], this, net_ctx->input_type_v[i], input_shapes[i]); - bm_memcpy_s2d(m_handle, input_tensors[i].device_mem, (void*)input_datas[i]); + bm_memcpy_s2d(m_handles[devid], input_tensors[i].device_mem, (void*)input_datas[i]); } // launch may not call sync internally - bool ret = launch(net_idx, input_tensors, input_num, output_tensors, output_num, false, false); + bool ret = launch_multi_cores(net_idx, input_tensors.data(), input_num, output_tensors.data(), output_num, core_list, false, false); // sync is needed for the following d2s to fetch output data if (ret){ - ret = (BM_SUCCESS == bm_thread_sync(m_handle)); + ret = (BM_SUCCESS == bm_thread_sync(m_handles[devid])); } if (!ret) { BMRT_LOG(WRONG, "launch net[%s] failed", net_ctx->net_name.c_str()); @@ -1441,14 +2453,14 @@ bool Bmruntime::launch(int net_idx, void* const input_datas[], const bm_shape_t } } for (int i = 0; i < output_num; i++) { - bm_memcpy_d2s_partial(m_handle, output_datas[i], output_tensors[i].device_mem, + bm_memcpy_d2s_partial(m_handles[devid], output_datas[i], output_tensors[i].device_mem, bmrt_tensor_bytesize(&output_tensors[i])); - must_free_device_mem(output_tensors[i].device_mem); + must_free_device_mem(devid, output_tensors[i].device_mem); output_shapes[i] = output_tensors[i].shape; } } for (int i = 0; i < input_num; i++) { - must_free_device_mem(input_tensors[i].device_mem); + must_free_device_mem(devid, input_tensors[i].device_mem); } return ret; } @@ -1516,30 +2528,40 @@ static void show_net_info(const bm_net_info_t* netinfo, int index) { "UINT16", "INT32", "UINT32", + "BFLOAT16", + "INT4", + "UINT4" }; BMRT_LOG(INFO, " ########################"); - BMRT_LOG(INFO, " NetName: %s, Index=%d", netinfo->name, index); + // string version = netinfo; + if (index < 0) { + BMRT_LOG(INFO, " NetName: %s, Cascade", netinfo->name); + } else { + BMRT_LOG(INFO, " NetName: %s, Index=%d, CoreNum=%d", netinfo->name, index, netinfo->core_num); + } for(int s=0; sstage_num; s++){ BMRT_LOG(INFO, " ---- stage %d ----", s); for(int i=0; iinput_num; i++){ auto shapeStr = shape_to_str(netinfo->stages[s].input_shapes[i]); - BMRT_LOG(INFO, " Input %d) '%s' shape=%s dtype=%s scale=%g zero_point=%d", + BMRT_LOG(INFO, " Input %d) '%s' shape=%s dtype=%s scale=%g zero_point=%d device_id=%d", i, netinfo->input_names[i], shapeStr.c_str(), dtypeMap[netinfo->input_dtypes[i]], netinfo->input_scales[i], - netinfo->input_zero_point[i]); + netinfo->input_zero_point[i], + netinfo->input_loc_devices[i]); } for(int i=0; ioutput_num; i++){ auto shapeStr = shape_to_str(netinfo->stages[s].output_shapes[i]); - BMRT_LOG(INFO, " Output %d) '%s' shape=%s dtype=%s scale=%g zero_point=%d", + BMRT_LOG(INFO, " Output %d) '%s' shape=%s dtype=%s scale=%g zero_point=%d device_id=%d", i, netinfo->output_names[i], shapeStr.c_str(), dtypeMap[netinfo->output_dtypes[i]], netinfo->output_scales[i], - netinfo->output_zero_point[i]); + netinfo->output_zero_point[i], + netinfo->output_loc_devices[i]); } } BMRT_LOG(INFO, " ########################"); @@ -1549,8 +2571,13 @@ void Bmruntime::show_neuron_network() { int size = m_net_ctx_v.size(); for (int idx = 0; idx < size; idx++) { - auto net_info = get_net_info(idx); - show_net_info(net_info, idx); + if (!m_net_ctx_v[idx]->in_cascade) { + auto net_info = get_net_info(idx); + show_net_info(net_info, idx); + } + } + for (auto &v : m_net_cascade_v) { + show_net_info(&v.net_info, -1); } } @@ -1566,6 +2593,15 @@ int Bmruntime::get_net_idx(const string& net_name) return -1; } +const net_cascade_t *Bmruntime::get_net_cascade(const string &net_name) { + for (auto &v : m_net_cascade_v) { + if (v.main_name == net_name) { + return &v; + } + } + return nullptr; +} + bool Bmruntime::can_batch_size_change(int net_idx) { return m_net_ctx_v[net_idx]->is_dynamic ? m_net_ctx_v[net_idx]->n_can_change == 1 : false; } @@ -1626,8 +2662,14 @@ const bm_shape_t *Bmruntime::get_output_max_shape(int net_idx, int output_idx) { void Bmruntime::get_network_names(vector *names) { if (names != nullptr) { names->clear(); - for (auto &net_ctx : m_net_ctx_v) + for (auto net_ctx : m_net_ctx_v) { + if (!net_ctx->in_cascade) { names->push_back(net_ctx->net_name.c_str()); + } + } + for (auto net_cascade : m_net_cascade_v) { + names->push_back(net_cascade.main_name.c_str()); + } } } @@ -1692,7 +2734,7 @@ int Bmruntime::get_output_gmem_stmode(const string& tensor_name, int net_idx) { int Bmruntime::get_stage_idx(const net_ctx_t* net_ctx, const bm_tensor_t* input_tensors) { return net_ctx->is_dynamic ? get_dynamic_stage_idx(net_ctx, input_tensors) - : get_static_stage_idx(net_ctx, input_tensors); + : get_static_stage_idx(net_ctx, input_tensors); } /* for static net */ @@ -1710,7 +2752,21 @@ int Bmruntime::get_static_stage_idx(const net_ctx_t* net_ctx, const bm_tensor_t* return stage_idx; } } - + /* if only one stage, count number is same also ok, these would make api + * stronger adaptability*/ + if (net_ctx->stage_v.size() == 1) { + auto &input_v = net_ctx->stage_v[0]->input_v; + u32 input_idx; + for (input_idx = 0; input_idx < input_v.size(); input_idx++) { + if (bmrt_shape_count(&input_tensors[input_idx].shape) != + bmrt_shape_count(&input_v[input_idx].shape)) { + break; + } + } + if (input_idx == input_v.size()) { + return 0; + } + } return -1; } @@ -1788,6 +2844,21 @@ const bm_net_info_t* Bmruntime::get_net_info(int net_idx) return &m_net_ctx_v[net_idx]->net_info; } +const bm_net_info_t* Bmruntime::get_net_info(const string& net_name) +{ + for (auto &v:m_net_cascade_v) { + if (v.main_name == net_name) { + return &v.net_info; + } + } + for (auto v:m_net_ctx_v) { + if (v->net_name == net_name) { + return &v->net_info; + } + } + return NULL; +} + void Bmruntime::set_bmrt_mmap(bool enable) { if (!bmrt_arch_info::is_soc_mode()) { @@ -1802,42 +2873,105 @@ void Bmruntime::subnet_time_print(bool enable) m_subnet_time_print = enable; } -const std::vector &Bmruntime::get_neuron_mem(int net_idx) +const std::vector &Bmruntime::get_neuron_mem(int net_idx) { return m_net_ctx_v[net_idx]->neuron_mem; } void Bmruntime::set_debug_mode(int mode) { - bm_set_debug_mode(m_handle , mode); + for (int i = 0; i < m_device_num; i++) { + bm_set_debug_mode(m_handles[i] , mode); + } } -u64 Bmruntime::must_alloc_device_mem(bm_device_mem_t *mem, u64 size, const std::string &desc, int type_len) +u64 Bmruntime::must_alloc_device_mem(uint32_t devid, bm_device_mem_t *mem, u64 size, const std::string &desc, int type_len) { - bm_status_t status = bm_malloc_device_byte_heap_mask(m_handle, mem, m_neuron_heap_mask, size*type_len); - if (BM_SUCCESS != status) { - BMRT_LOG(FATAL, "device mem alloc failed: size=%llu[0x%x] type_len=%d status=%d desc=%s", - size, size, type_len, status, desc.c_str()); + if (size == 0) { + *mem = bm_mem_from_device(CTX_START_ADDR, 0); + } else { + bm_status_t status = bm_malloc_device_byte_heap_mask(m_handles[devid], mem, m_neuron_heap_mask, size*type_len); + if (BM_SUCCESS != status) { + BMRT_LOG(FATAL, "device mem alloc failed: size=%llu[0x%x] type_len=%d status=%d desc=%s", + size, size, type_len, status, desc.c_str()); + } } -#if 0 + BMRT_LOG_RUN(DEBUG, { + float nan = std::nanf(""); + if (bm_memset_device_ext(m_handles[devid], &nan, 4, *mem) != BM_SUCCESS) { + BMRT_LOG(FATAL, "bm_memset_device_ext failed"); + } + u64 mem_addr = bm_mem_get_device_addr(*mem); + u64 mem_size = bm_mem_get_device_size(*mem); + BMRT_LOG(DEBUG, "alloc mem devid=%d: %s [0x%llx, 0x%llx), size=%lld[0x%x]", devid, desc.c_str(), mem_addr, mem_addr+mem_size, mem_size, mem_size); + }); + // Setting all neuron to nan can be useful when debugging net inference - float nan = std::nanf(""); - if (bm_memset_device_ext(m_handle, &nan, 4, *mem) != BM_SUCCESS) - { - BMRT_LOG(FATAL, "bm_memset_device_ext failed"); + mem_pair_t mem_pair = { bm_mem_get_device_addr(*mem), bm_mem_get_device_size(*mem)}; + m_profile->record_alloc_device_mem(mem_pair, desc); + return mem_pair.first; +} + +u64 Bmruntime::must_alloc_device_mem_u64(uint32_t devid, bm_device_mem_u64_t *mem, u64 size, const std::string &desc, int type_len) +{ + if (size == 0) { + *mem = bm_mem_from_device_u64(CTX_START_ADDR, 0); + } else { + bm_status_t status = bm_malloc_device_byte_heap_mask_u64(m_handles[devid], mem, m_neuron_heap_mask, size*type_len); + if (BM_SUCCESS != status) { + BMRT_LOG(FATAL, "device mem alloc failed: size=%llu[0x%x] type_len=%d status=%d desc=%s", + size, size, type_len, status, desc.c_str()); + } } -#endif - m_profile->record_alloc_device_mem(*mem, desc); - return bm_mem_get_device_addr(*mem); + + // Setting all neuron to nan can be useful when debugging net inference + mem_pair_t mem_pair = { bm_mem_get_device_addr_u64(*mem), bm_mem_get_device_size_u64(*mem)}; + m_profile->record_alloc_device_mem(mem_pair, desc); + BMRT_LOG_RUN(DEBUG, { + u64 mem_addr = bm_mem_get_device_addr_u64(*mem); + u64 mem_size = bm_mem_get_device_size_u64(*mem); + BMRT_LOG(DEBUG, "alloc mem devid=%d: %s [0x%llx, 0x%llx), size=%lld[0x%x]", devid, desc.c_str(), mem_addr, mem_addr+mem_size, mem_size, mem_size); + }); + + return mem_pair.first; } -bm_device_mem_t Bmruntime::must_alloc_device_mem(u64 size, const std::string &desc, int type_len){ + +bm_device_mem_t Bmruntime::must_alloc_device_mem(uint32_t devid, u64 size, const std::string &desc, int type_len){ bm_device_mem_t mem; - must_alloc_device_mem(&mem, size, desc, type_len); + must_alloc_device_mem(devid, &mem, size, desc, type_len); return mem; } -void Bmruntime::must_free_device_mem(bm_device_mem_t& mem){ - bm_free_device(m_handle, mem); - m_profile->record_free_device_mem(mem); + +bm_device_mem_u64_t Bmruntime::must_alloc_device_mem_u64(uint32_t devid, u64 size, const string& desc, int type_len) { + bm_device_mem_u64_t mem; + must_alloc_device_mem_u64(devid, &mem, size, desc, type_len); + return mem; +} + +void Bmruntime::must_free_device_mem(uint32_t devid, bm_device_mem_t& mem){ + if (bm_mem_get_device_size(mem) == 0) { + return; + } + BMRT_LOG_RUN(DEBUG, { + u64 mem_addr = bm_mem_get_device_addr(mem); + u64 mem_size = bm_mem_get_device_size(mem); + BMRT_LOG(DEBUG, "free mem devid=%d: [0x%llx, 0x%llx), size=%lld[0x%x]", devid, mem_addr, mem_addr+mem_size, mem_size, mem_size); + }); + bm_free_device(m_handles[devid], mem); + m_profile->record_free_device_mem(bm_mem_get_device_addr(mem)); +} + +void Bmruntime::must_free_device_mem_u64(uint32_t devid, bm_device_mem_u64_t& mem){ + if (bm_mem_get_device_size_u64(mem) == 0) { + return; + } + BMRT_LOG_RUN(DEBUG, { + u64 mem_addr = bm_mem_get_device_addr_u64(mem); + u64 mem_size = bm_mem_get_device_size_u64(mem); + BMRT_LOG(DEBUG, "free mem devid=%d: [0x%llx, 0x%llx), size=%lld[0x%x]", devid, mem_addr, mem_addr+mem_size, mem_size, mem_size); + }); + bm_free_device_u64(m_handles[devid], mem); + m_profile->record_free_device_mem(bm_mem_get_device_addr_u64(mem)); } } // namespace bmruntime diff --git a/tpu-runtime/src/bmruntime_bmodel.cpp b/tpu-runtime/src/bmruntime_bmodel.cpp old mode 100644 new mode 100755 index e379f72..d114cdd --- a/tpu-runtime/src/bmruntime_bmodel.cpp +++ b/tpu-runtime/src/bmruntime_bmodel.cpp @@ -8,10 +8,12 @@ #endif #include #include +#include #include "bmodel.hpp" #include "bmruntime.h" #include "bmlib_runtime.h" #include "kernel_module.h" +#include #define SP(D, T) (std::shared_ptr((D), std::default_delete())) @@ -35,15 +37,26 @@ static void fill_tensor_attr( tensor_attr_t* attr, u64 ctx_start, const std::vector &ctx_borders, - const std::vector &ctx_offset) + const std::vector &ctx_offset, + uint32_t flags) { attr->shape.num_dims = tensor->shape()->Get(0)->dim()->size(); for (int i = 0; i < attr->shape.num_dims; i++) { attr->shape.dims[i] = tensor->shape()->Get(0)->dim()->Get(i); } u64 addr = tensor->device_addr(); - addr += ctx_offset[get_mem_index(ctx_borders, ctx_start, addr)]; - attr->dev_mem = bm_mem_from_device(addr, tensor->size()); + if (flags & BM_RUNTIME_SHARE_MEM) { + addr += ctx_offset[get_mem_index(ctx_borders, ctx_start, addr)]; + addr &= bmrt_arch_info::addr_mask(); + attr->dev_mem = bm_mem_from_device(addr, tensor->size()); + } else { + /* + multi-core in load bmodel stage, use fake device_addr to record tensor_addr in bmodel + and assign real device_addr in launch tensor stage + */ + attr->dev_mem.u.device.device_addr = addr; + attr->dev_mem.size = tensor->size(); + } attr->st_mode = (bm_store_mode_t)tensor->gmem_stmode(); attr->pad_h = tensor->pad_h(); } @@ -53,7 +66,8 @@ static void fill_tensor_attr( vector& tensor_v, u64 ctx_start, const std::vector &ctx_borders, - const std::vector &ctx_offset) + const std::vector &ctx_offset, + uint32_t flags) { for (u32 i = 0; i < tensors->size(); i++) { auto tensor = tensors->Get(i); @@ -61,31 +75,98 @@ static void fill_tensor_attr( continue; } tensor_attr_t attr; - fill_tensor_attr(tensor, &attr, ctx_start, ctx_borders, ctx_offset); + fill_tensor_attr(tensor, &attr, ctx_start, ctx_borders, ctx_offset, flags); + tensor_v.push_back(attr); + } +} + +static void fill_io_attr(const bmodel::Tensor *tensor, tensor_attr_t *attr, + u64 io_offset) { + attr->shape.num_dims = tensor->shape()->Get(0)->dim()->size(); + for (int i = 0; i < attr->shape.num_dims; i++) { + attr->shape.dims[i] = tensor->shape()->Get(0)->dim()->Get(i); + } + u64 addr = tensor->device_addr() + io_offset; + addr &= bmrt_arch_info::addr_mask(); + attr->dev_mem = bm_mem_from_device(addr, tensor->size()); + attr->st_mode = (bm_store_mode_t)tensor->gmem_stmode(); + attr->pad_h = tensor->pad_h(); +} + +static void fill_io_attr(const Vector> *tensors, + vector &tensor_v, u64 io_offset) { + for (u32 i = 0; i < tensors->size(); i++) { + auto tensor = tensors->Get(i); + if (tensor->shape() == NULL || tensor->shape()->size() == 0) { + continue; + } + tensor_attr_t attr; + fill_io_attr(tensor, &attr, io_offset); + tensor_v.push_back(attr); + } +} + +static void fill_io_tag_attr(const bmodel::Tensor *tensor, tensor_attr_t *attr) { + attr->shape.num_dims = tensor->shape()->Get(0)->dim()->size(); + for (int i = 0; i < attr->shape.num_dims; i++) { + attr->shape.dims[i] = tensor->shape()->Get(0)->dim()->Get(i); + } + u64 addr = tensor->device_addr(); + u64 tag = ((addr >> 36) & 0x7); + addr &= bmrt_arch_info::addr_mask(); + addr += tag << 36; + attr->dev_mem = bm_mem_from_device(addr, tensor->size()); + attr->st_mode = (bm_store_mode_t)tensor->gmem_stmode(); + attr->pad_h = tensor->pad_h(); +} + + +static void fill_io_tag_attr( + const Vector> *tensors, + vector &tensor_v, + u64 ctx_start, + const std::vector &ctx_borders, + const std::vector &ctx_offset, + uint32_t flags) +{ + for (u32 i = 0; i < tensors->size(); i++) { + auto tensor = tensors->Get(i); + if (tensor->shape() == NULL || tensor->shape()->size() == 0) { + continue; + } + u64 addr = tensor->device_addr(); + u64 tag = ((addr >> 36) & 0x7); + tensor_attr_t attr; + if (tag >= 3){ + fill_io_tag_attr(tensor, &attr); + } + else{ + fill_tensor_attr(tensor, &attr, ctx_start, ctx_borders, ctx_offset, flags); + } tensor_v.push_back(attr); } } static void upload_coeff_data(ModelCtx* model_ctx, const bmodel::CoeffMem* coeff_mem, - bm_handle_t handle, bm_device_mem_t& dev_mem) + bm_handle_t handle, bm_device_mem_u64_t& dev_mem) { bm_status_t status = BM_SUCCESS; u64 size = coeff_mem->binary_coeff()->size(); #ifdef SOC_MODE void* vmem = NULL; - status = bm_mem_mmap_device_mem(handle, &dev_mem, (u64*)&vmem); + status = bm_mem_mmap_device_mem_u64(handle, &dev_mem, (u64*)&vmem); CHECK_status(status); model_ctx->read_binary(coeff_mem->binary_coeff(), (u8*)vmem); - status = bm_mem_flush_device_mem(handle, &dev_mem); + status = bm_mem_flush_device_mem_u64(handle, &dev_mem); CHECK_status(status); - bm_mem_unmap_device_mem(handle, vmem, size); + bm_mem_unmap_device_mem_u64(handle, vmem, size); #else #define COEFF_BLK_SIZE 0x1000000 u8* data = new u8[COEFF_BLK_SIZE]; auto data_sp = SP(data, u8); u64 left_size = size; u64 offset = 0; - u64 address = bm_mem_get_device_addr(dev_mem); + u64 address = bm_mem_get_device_addr_u64(dev_mem); while (left_size > 0) { u64 data_size = (left_size >= COEFF_BLK_SIZE ? COEFF_BLK_SIZE : left_size); model_ctx->read_binary(coeff_mem->binary_coeff(), offset, data, data_size); @@ -98,85 +179,80 @@ static void upload_coeff_data(ModelCtx* model_ctx, const bmodel::CoeffMem* coeff #endif } -static uint32_t get_bdc_cmd_len( - ModelCtx* model_ctx, - const bmodel::CmdGroup* cmd_group, - u32 start_offset, bool last_cmd) -{ - u32* cmd_buf = new u32[2]; - auto cmd_buf_sp = SP(cmd_buf, u32); - model_ctx->read_binary(cmd_group->binary_bdc(), start_offset, (u8*)cmd_buf, - sizeof(u32) * 2); +static uint32_t get_bdc_cmd_len(const u8 *bdc_buffer, u64 start_offset, + bool last_cmd) { uint32_t len = 0; switch (bmrt_arch_info::get_bmtpu_arch()) { - case BM1682: - case BM1684: - len = 1 << BDC_ENGINE_CMD_ALIGNED_BIT; - break; - case BM1880: - len = 112; - break; - case BM1686: - case BM1684X: { - u32 tsk_type = (cmd_buf[1] >> 9) & 0xf; - int eu_type = (cmd_buf[1] >> 13) & 0x1f; - bool is_short = cmd_buf[0] & 0x1; - if (tsk_type == 15 || tsk_type == 12) { - len = 16; - } else if (!is_short) { - len = 128; - } else if (tsk_type == 0 || tsk_type == 1) { - len = 64; - } else if (tsk_type == 6 || tsk_type == 13 || tsk_type == 14) { - len = 48; - } else if (tsk_type == 4 || tsk_type == 5 || tsk_type == 9 || tsk_type == 10) { - len = 32; - } else if (tsk_type == 2) { - len = eu_type > 3 ? 32 : 48; - } else if (tsk_type == 3) { - len = (eu_type == 24 || eu_type == 25) ? 16 : 64; - } else { - BMRT_ASSERT(0); - } - if (last_cmd) { - return (ALIGN(start_offset + len, 128) - start_offset); - } - break; - } - default: + case BM1682: + case BM1684: + len = 1 << BDC_ENGINE_CMD_ALIGNED_BIT; + break; + case BM1880: + len = 112; + break; + case SG2380: + case BM1690: + case BM1688: + case MARS3: + case BM1684X: { + u32 cmd_buf[2]; + memcpy(cmd_buf, bdc_buffer + start_offset, sizeof(cmd_buf)); + u32 tsk_type = (cmd_buf[1] >> 9) & 0xf; + int eu_type = (cmd_buf[1] >> 13) & 0x1f; + bool is_short = cmd_buf[0] & 0x1; + if (tsk_type == 15 || tsk_type == 12) { + len = 16; + } else if (!is_short) { + len = 128; + } else if (tsk_type == 0 || tsk_type == 1) { + len = 64; + } else if (tsk_type == 6 || tsk_type == 13 || tsk_type == 14) { + len = 48; + } else if (tsk_type == 4 || tsk_type == 5 || tsk_type == 9 || + tsk_type == 10) { + len = 32; + } else if (tsk_type == 2) { + len = eu_type > 3 ? 32 : 48; + } else if (tsk_type == 3) { + len = (eu_type == 24 || eu_type == 25) ? 16 : 64; + } else { BMRT_ASSERT(0); + } + if (last_cmd) { + return (ALIGN(start_offset + len, 128) - start_offset); + } + break; + } + default: + BMRT_ASSERT(0); } return len; } -static uint32_t get_gdma_cmd_len( - ModelCtx* model_ctx, - const bmodel::CmdGroup* cmd_group, - u32 start_offset, bool last_cmd) -{ - uint32_t len = 96; //default: common gdma instrution size +static uint32_t get_gdma_cmd_len(const u8 *gdma_buffer, u64 start_offset, + bool last_cmd) { + uint32_t len = 96; // default: common gdma instrution size bmtpu_arch_t arch = bmrt_arch_info::get_bmtpu_arch(); - if(BM1686 == arch) { - u32 cmd_head[2] ={0}; - model_ctx->read_binary(cmd_group->binary_gdma(), start_offset, (u8*)&cmd_head, - sizeof(cmd_head)); - u32 tsk_type = cmd_head[1] & 0xf; - if(tsk_type == 0x6){ // DMA_sys - len = 16; - } - // sys end - if (last_cmd) { - len = ALIGN(start_offset + 16, 128) - start_offset; - } + if (BM1688 == arch || BM1690 == arch || MARS3 == arch || SG2380 == arch) { + u32 cmd_head[2] = {0}; + memcpy(cmd_head, gdma_buffer + start_offset, sizeof(cmd_head)); + u32 tsk_type = cmd_head[1] & 0xf; + if (tsk_type == 0x6) { // DMA_sys + len = 16; + } + // sys end + if (last_cmd) { + len = ALIGN(start_offset + 16, 128) - start_offset; + } } else if (BM1684X == arch) { - // sys end - if (last_cmd) { - len = ALIGN(start_offset + 16, 128) - start_offset; - } + // sys end + if (last_cmd) { + len = ALIGN(start_offset + 16, 128) - start_offset; + } } else { - len = 1<>* cmd_group, - net_stage_t* stage) + const bmodel::NetParameter* param, + net_stage_t* stage, uint32_t devid) { u32 cmd_word_num; u32 *cmd_buf, *p_cmd_buf; bm_device_mem_t pmem; - if (cmd_group == NULL || cmd_group->size() == 0) - return true; + const auto core_num = stage->core_commands.size(); + for (uint32_t core_idx = 0; core_idx < core_num; core_idx++) { + u32 bdc_total_id = 0, gdma_total_id = 0; + u32 bdc_total_cmd_byte = 0, gdam_total_cmd_byte = 0; + // TODO: Here is a huge problem, if have one more subnets in 1688 or sg2260 + auto core_commands = param->sub_net()->Get(0)->core_commands(); + auto cmd_group = core_commands + ? core_commands->Get(core_idx)->gdma_tiu_commands() + : param->cmd_group(); + if (!cmd_group || cmd_group->size() == 0) + continue; + for (u32 i = 0; i < cmd_group->size(); i++) { + stage->core_commands[core_idx].bdc_id.push_back(cmd_group->Get(i)->bdc_num()); + stage->core_commands[core_idx].gdma_id.push_back(cmd_group->Get(i)->gdma_num()); + stage->core_commands[core_idx].bdc_cmd_byte.push_back(cmd_group->Get(i)->bdc_cmd_byte()); + stage->core_commands[core_idx].gdma_cmd_byte.push_back(cmd_group->Get(i)->gdma_cmd_byte()); + bdc_total_id += cmd_group->Get(i)->bdc_num(); + gdma_total_id += cmd_group->Get(i)->gdma_num(); + bdc_total_cmd_byte += cmd_group->Get(i)->bdc_cmd_byte(); + gdam_total_cmd_byte += cmd_group->Get(i)->gdma_cmd_byte(); + } - u32 bdc_total_id = 0, gdma_total_id = 0; - u32 bdc_total_cmd_byte = 0, gdam_total_cmd_byte = 0; - for (u32 i = 0; i < cmd_group->size(); i++) { - stage->bdc_id.push_back(cmd_group->Get(i)->bdc_num()); - stage->gdma_id.push_back(cmd_group->Get(i)->gdma_num()); - stage->bdc_cmd_byte.push_back(cmd_group->Get(i)->bdc_cmd_byte()); - stage->gdma_cmd_byte.push_back(cmd_group->Get(i)->gdma_cmd_byte()); - bdc_total_id += cmd_group->Get(i)->bdc_num(); - gdma_total_id += cmd_group->Get(i)->gdma_num(); - bdc_total_cmd_byte += cmd_group->Get(i)->bdc_cmd_byte(); - gdam_total_cmd_byte += cmd_group->Get(i)->gdma_cmd_byte(); - } - - // ENGINE_BD - if (bdc_total_cmd_byte > 0) { - cmd_word_num = bdc_total_cmd_byte / sizeof(u32); - } else { - cmd_word_num = bdc_total_id * BD_ENGINE_COMMAND_NUM_aligned; - } - if (cmd_word_num != 0) { - u64 cmd_buf_addr = must_alloc_device_mem(&pmem, cmd_word_num, "bd_cmd_mem", 4); - m_device_mem_vec.push_back(pmem); - cmd_buf = new u32[cmd_word_num]; - auto cmd_buf_sp = SP(cmd_buf, u32); - p_cmd_buf = cmd_buf; - for (u32 group_idx = 0; group_idx < cmd_group->size(); group_idx++) { - u32 bdc_offset = 0; - auto cur_cmd_group = cmd_group->Get(group_idx); - for (u32 cmd_idx = 0; cmd_idx < cur_cmd_group->bdc_num(); cmd_idx++) { - uint32_t read_size = get_bdc_cmd_len(model_ctx, cur_cmd_group, bdc_offset, + // ENGINE_BD + if (bdc_total_cmd_byte > 0) { + cmd_word_num = bdc_total_cmd_byte / sizeof(u32); + } else { + cmd_word_num = bdc_total_id * BD_ENGINE_COMMAND_NUM_aligned; + } + if (cmd_word_num != 0) { + u64 cmd_buf_addr = + must_alloc_device_mem(devid, &pmem, cmd_word_num, "bd_cmd_mem", 4); + m_device_mem_vec.push_back(pmem); + m_device_mem_ids.push_back(devid); + cmd_buf = new u32[cmd_word_num]; + auto cmd_buf_sp = SP(cmd_buf, u32); + p_cmd_buf = cmd_buf; + for (u32 group_idx = 0; group_idx < cmd_group->size(); group_idx++) { + u64 bdc_offset = 0; + auto cur_cmd_group = cmd_group->Get(group_idx); + if (0 == cur_cmd_group->bdc_num()) { + continue; + } + u8 *bdc_buffer = new u8[cur_cmd_group->binary_bdc()->size()]; + model_ctx->read_binary(cur_cmd_group->binary_bdc(), bdc_buffer); + for (u32 cmd_idx = 0; cmd_idx < cur_cmd_group->bdc_num(); cmd_idx++) { + uint32_t read_size = + get_bdc_cmd_len(bdc_buffer, bdc_offset, (cmd_idx == cur_cmd_group->bdc_num() - 1)); + memcpy(p_cmd_buf, bdc_buffer + bdc_offset, read_size); + convert_cmd(p_cmd_buf, ENGINE_BD, + cmd_idx == (cur_cmd_group->bdc_num() - 1), + cmd_buf_addr + GLOBAL_MEM_CMD_START_OFFSET, stage); + p_cmd_buf += read_size / sizeof(uint32_t); + bdc_offset += read_size; + } + delete[] bdc_buffer; + } + m_profile->record_cmd_data(core_idx, ENGINE_BD, cmd_buf, cmd_word_num * 4, + cmd_buf_addr); + stage->core_commands[core_idx].bdc_mem.Init("bdc", m_handles[devid], pmem, cmd_buf, m_flags&BM_RUNTIME_CHECK_MEM); + } - model_ctx->read_binary(cur_cmd_group->binary_bdc(), bdc_offset, (u8*)p_cmd_buf, - read_size); - convert_cmd(p_cmd_buf, ENGINE_BD, cmd_idx == (cur_cmd_group->bdc_num() - 1), - cmd_buf_addr + GLOBAL_MEM_CMD_START_OFFSET, stage); - p_cmd_buf += read_size / sizeof(uint32_t); - bdc_offset += read_size; + // ENGINE_GDMA + if (gdam_total_cmd_byte > 0) { + cmd_word_num = gdam_total_cmd_byte / sizeof(u32); + } else { + cmd_word_num = gdma_total_id * GDMA_ENGINE_COMMAND_NUM_aligned; + } + if (cmd_word_num != 0) { + u64 cmd_buf_addr = + must_alloc_device_mem(devid, &pmem, cmd_word_num, "gdma_cmd_mem", 4); + m_device_mem_vec.push_back(pmem); + m_device_mem_ids.push_back(devid); + cmd_buf = new u32[cmd_word_num]; + auto cmd_buf_sp = SP(cmd_buf, u32); + p_cmd_buf = cmd_buf; + for (u32 group_idx = 0; group_idx < cmd_group->size(); group_idx++) { + u64 gdma_offset = 0; + auto cur_cmd_group = cmd_group->Get(group_idx); + if (0 == cur_cmd_group->gdma_num()) { + continue; + } + u8 *gdma_buffer = new u8[cur_cmd_group->binary_gdma()->size()]; + model_ctx->read_binary(cur_cmd_group->binary_gdma(), gdma_buffer); + for (u32 cmd_idx = 0; cmd_idx < cur_cmd_group->gdma_num(); + cmd_idx++) { + u32 gdma_size = + get_gdma_cmd_len(gdma_buffer, gdma_offset, + (cmd_idx == cur_cmd_group->gdma_num() - 1)); + memcpy(p_cmd_buf, gdma_buffer + gdma_offset, gdma_size); + convert_cmd(p_cmd_buf, ENGINE_GDMA, + cmd_idx == cur_cmd_group->gdma_num() - 1, + cmd_buf_addr + GLOBAL_MEM_CMD_START_OFFSET, stage); + p_cmd_buf += gdma_size / sizeof(u32); + gdma_offset += gdma_size; + } + delete[] gdma_buffer; + } + m_profile->record_cmd_data(core_idx, ENGINE_GDMA, cmd_buf, cmd_word_num * 4, + cmd_buf_addr); + stage->core_commands[core_idx].gdma_mem.Init("gdma", m_handles[devid], pmem, cmd_buf, m_flags&BM_RUNTIME_CHECK_MEM); } - } - m_profile->record_cmd_data(ENGINE_BD, cmd_buf, cmd_word_num*4, cmd_buf_addr); - stage->bdc_mem.Init("bdc", m_handle, pmem, cmd_buf); } - // ENGINE_GDMA - if (gdam_total_cmd_byte > 0) { - cmd_word_num = gdam_total_cmd_byte / sizeof(u32); - } else { - cmd_word_num = gdma_total_id * GDMA_ENGINE_COMMAND_NUM_aligned; - } - if (cmd_word_num != 0) { - u64 cmd_buf_addr = must_alloc_device_mem(&pmem, cmd_word_num, "gdma_cmd_mem", 4); - m_device_mem_vec.push_back(pmem); - cmd_buf = new u32[cmd_word_num]; - auto cmd_buf_sp = SP(cmd_buf, u32); - p_cmd_buf = cmd_buf; - for (u32 group_idx = 0; group_idx < cmd_group->size(); group_idx++) { - u32 gdma_offset = 0; - auto cur_cmd_group = cmd_group->Get(group_idx); - for (u32 cmd_idx = 0; cmd_idx < cur_cmd_group->gdma_num(); cmd_idx++) { - u32 gdma_size = get_gdma_cmd_len(model_ctx, cur_cmd_group, - gdma_offset, (cmd_idx == cur_cmd_group->gdma_num() - 1)); - - model_ctx->read_binary(cur_cmd_group->binary_gdma(), gdma_offset, (u8*)p_cmd_buf, - gdma_size); - convert_cmd(p_cmd_buf, ENGINE_GDMA, cmd_idx == cur_cmd_group->gdma_num() - 1, - cmd_buf_addr + GLOBAL_MEM_CMD_START_OFFSET, stage); - p_cmd_buf += gdma_size / sizeof(u32); - gdma_offset += gdma_size; - } + for (uint32_t core_idx = 0; core_idx < core_num; core_idx++) { + auto core_commands = param->sub_net()->Get(0)->core_commands(); + if(!core_commands) continue; + auto hau_commands = core_commands->Get(core_idx)->hau_commands(); + auto sdma_commands = core_commands->Get(core_idx)->sdma_commands(); + + u32 hau_total_cmd_byte = (hau_commands && hau_commands->size()) ? + hau_commands[0][0]->size() : 0; + u32 sdma_total_cmd_byte = (sdma_commands && sdma_commands->size()) ? + sdma_commands[0][0]->size() : 0; + + // ENGINE_HAU + if (hau_total_cmd_byte) { + cmd_word_num = hau_total_cmd_byte / sizeof(u32); + u64 cmd_buf_addr = + must_alloc_device_mem(devid, &pmem, cmd_word_num, "hau_cmd_mem", 4); + m_device_mem_vec.push_back(pmem); + m_device_mem_ids.push_back(devid); + cmd_buf = new u32[cmd_word_num]; + auto cmd_buf_sp = SP(cmd_buf, u32); + p_cmd_buf = cmd_buf; + + model_ctx->read_binary(hau_commands->Get(0), (u8 *)p_cmd_buf); + m_profile->record_cmd_data(core_idx, ENGINE_HAU, cmd_buf, cmd_word_num * 4, + cmd_buf_addr); + stage->core_commands[core_idx].hau_mem.Init("hau", m_handles[devid], pmem, cmd_buf, m_flags&BM_RUNTIME_CHECK_MEM); } - m_profile->record_cmd_data(ENGINE_GDMA, cmd_buf, cmd_word_num*4 , cmd_buf_addr); - stage->gdma_mem.Init("gdma", m_handle, pmem, cmd_buf); - } + // ENGINE_SDMA + if (sdma_total_cmd_byte) { + cmd_word_num = sdma_total_cmd_byte / sizeof(u32); + u64 cmd_buf_addr = + must_alloc_device_mem(devid, &pmem, cmd_word_num, "sdma_cmd_mem", 4); + m_device_mem_vec.push_back(pmem); + m_device_mem_ids.push_back(devid); + cmd_buf = new u32[cmd_word_num]; + auto cmd_buf_sp = SP(cmd_buf, u32); + p_cmd_buf = cmd_buf; + + model_ctx->read_binary(sdma_commands->Get(0), (u8 *)p_cmd_buf); + m_profile->record_cmd_data(core_idx, ENGINE_SDMA, cmd_buf, cmd_word_num * 4, + cmd_buf_addr); + stage->core_commands[core_idx].sdma_mem.Init("sdma", m_handles[devid], pmem, cmd_buf, m_flags&BM_RUNTIME_CHECK_MEM); + } + } return true; } @@ -279,16 +426,18 @@ void Bmruntime::trace() int err_count = 0; fprintf(stderr, "*** bmruntime trace: ***\n"); fprintf(stderr, "============ check coeff =============\n"); - err_count += m_local_coeff->Check(); + for (int i = 0; i < m_device_num; i++) { + err_count += m_local_coeffs[i]->Check(); + } int net_num = m_net_ctx_v.size(); for (int i = 0; i < net_num; i++) { auto net_ctx = m_net_ctx_v[i]; for (u32 j = 0; j < net_ctx->stage_v.size(); j++) { fprintf(stderr, "============ check net[%s] stage[%d] =======\n", net_ctx->net_name.c_str(), j); auto stage = net_ctx->stage_v[j]; - err_count += stage->ir_mem.Check(); - err_count += stage->gdma_mem.Check(); - err_count += stage->bdc_mem.Check(); + err_count += stage->core_commands[0].ir_mem.Check(); + err_count += stage->core_commands[0].gdma_mem.Check(); + err_count += stage->core_commands[0].bdc_mem.Check(); } } fprintf(stderr, "================\n"); @@ -297,19 +446,22 @@ void Bmruntime::trace() bool Bmruntime::setup_ir_context(ModelCtx* model_ctx, const bmodel::Binary* binary_ir, const Vector>* stage_ir, - net_stage_t* stage) + net_stage_t* stage, uint32_t devid) { if (binary_ir == NULL || 0 == binary_ir->size()) { return true; } - u32 ir_len = stage_ir->Get(0)->ir_info_len(); + u64 ir_len = stage_ir->Get(0)->ir_info_len(); u32* ir_buffer = new u32[ir_len]; auto ir_buffer_sp = SP(ir_buffer, u32); model_ctx->read_binary(binary_ir, 0, (u8*)ir_buffer, ir_len * sizeof(u32)); - auto pmem = must_alloc_device_mem(ir_len, "dynamic_ir", 4); + auto pmem = must_alloc_device_mem(devid, ir_len, "dynamic_ir", 4); m_device_mem_vec.push_back(pmem); - stage->ir_mem.Init("ir", m_handle, pmem, ir_buffer); + m_device_mem_ids.push_back(devid); + // TODO: support multi core ir + // only use commands 0 to run ir + stage->core_commands[0].ir_mem.Init("ir", m_handles[devid], pmem, ir_buffer); return true; } @@ -376,16 +528,26 @@ void Bmruntime::fill_subnet_tensor_map(net_ctx_t* net_ctx, net_stage_t* net_stag /* device memory offset from bmcompiler */ if (tensor->size() > 0) { bm_device_mem_t dev_mem; - if (tensor->device_addr() < net_stage->ctx_start) { - //subnet input may share mem with coeff mem - dev_mem = bm_mem_from_device( - tensor->device_addr() + net_stage->coeff_offset, tensor->size()); + if (m_flags & BM_RUNTIME_SHARE_MEM) { + if (tensor->device_addr() < net_stage->ctx_start) { + // subnet input may share mem with coeff mem + dev_mem = bm_mem_from_device( + (tensor->device_addr() & bmrt_arch_info::addr_mask()) + net_stage->coeff_offset, tensor->size()); + } else { + // rellocate + u64 addr = tensor->device_addr(); + u32 idx = get_mem_index(net_stage->ctx_borders, net_stage->ctx_start, addr); + addr += net_stage->ctx_offset[idx]; + addr &= bmrt_arch_info::addr_mask(); + dev_mem = bm_mem_from_device(addr, tensor->size()); + } } else { - // rellocate - u64 addr = tensor->device_addr(); - u32 idx = get_mem_index(net_stage->ctx_borders, net_stage->ctx_start, addr); - addr += net_stage->ctx_offset[idx]; - dev_mem = bm_mem_from_device(addr, tensor->size()); + /* + multi-core arch in load bmodel stage, use fake device_addr to record tensor_addr in bmodel + and assign real device_addr in launch tensor stage + */ + dev_mem.u.device.device_addr = tensor->device_addr(); + dev_mem.size = tensor->size(); } bm_tensor_ext.tensor_info.device_mem = dev_mem; bm_tensor_ext.mem_type |= MEM_TYPE_TPU; @@ -398,19 +560,33 @@ void Bmruntime::fill_subnet_tensor_map(net_ctx_t* net_ctx, net_stage_t* net_stag /* fix : pure cpu net input tensor_size = 0 */ float* host_mem = NULL; u64 mem_size = bmrt_shape_count(&max_shape_reg); - if (b_enable_mmap && tensor->size() > 0) { - // [NEED FIX] subnet i/o tensor might share a large device memory as compiler using compatc alloc, - // here, the tensor size is not accomdate with tensor shape. - //BMRT_ASSERT(mem_size * bmrt_data_type_size(bm_tensor_ext.tensor_info.dtype) == - // bm_mem_get_device_size(bm_tensor_ext.tensor_info.device_mem)); -#ifndef SOC_MODE - BMRT_LOG(FATAL, "Only soc mode run here"); -#else - bm_status_t ret = bm_mem_mmap_device_mem(m_handle, &bm_tensor_ext.tensor_info.device_mem, (u64 *)&host_mem); - if (ret == BM_SUCCESS) { - bm_tensor_ext.host_mem.type = HOST_MEM_MMAP; + if (m_flags & BM_RUNTIME_SHARE_MEM) { + if (b_enable_mmap && tensor->size() > 0) { + // [NEED FIX] subnet i/o tensor might share a large device memory as compiler using compatc alloc, + // here, the tensor size is not accomdate with tensor shape. + //BMRT_ASSERT(mem_size * bmrt_data_type_size(bm_tensor_ext.tensor_info.dtype) == + // bm_mem_get_device_size(bm_tensor_ext.tensor_info.device_mem)); + #ifndef SOC_MODE + BMRT_LOG(FATAL, "Only soc mode run here"); + #else + bm_status_t ret = bm_mem_mmap_device_mem(m_handles[net_ctx->device_id], &bm_tensor_ext.tensor_info.device_mem, (u64 *)&host_mem); + if (ret == BM_SUCCESS) { + bm_tensor_ext.host_mem.type = HOST_MEM_MMAP; + } else { + BMRT_LOG(WRONG, "mmap failed, malloc host memory"); + if (!net_stage->cpu_addr && net_stage->cpu_mem_size > 0) { + net_stage->cpu_addr = new float[net_stage->cpu_mem_size]; + } + // To be compatible with the bmodel at low version + if (net_stage->cpu_addr) { + host_mem = tensor->cpu_addr() + net_stage->cpu_addr; + } else { + host_mem = new float[mem_size]; + } + bm_tensor_ext.host_mem.type = HOST_MEM_ALLOC; + } + #endif } else { - BMRT_LOG(WRONG, "mmap failed, malloc host memory"); if (!net_stage->cpu_addr && net_stage->cpu_mem_size > 0) { net_stage->cpu_addr = new float[net_stage->cpu_mem_size]; } @@ -422,18 +598,17 @@ void Bmruntime::fill_subnet_tensor_map(net_ctx_t* net_ctx, net_stage_t* net_stag } bm_tensor_ext.host_mem.type = HOST_MEM_ALLOC; } -#endif } else { - if (!net_stage->cpu_addr && net_stage->cpu_mem_size > 0) { - net_stage->cpu_addr = new float[net_stage->cpu_mem_size]; - } - // To be compatible with the bmodel at low version - if (net_stage->cpu_addr) { - host_mem = tensor->cpu_addr() + net_stage->cpu_addr; + /* + multi-core arch in load bmodel stage, use fake host_mem to record cpu_addr and if use_mmap + and malloc/assign real host_mem in launch tensor stage + */ + bm_tensor_ext.host_mem.tensor_cpu_addr = tensor->cpu_addr(); + if (b_enable_mmap && tensor->size() > 0) { + bm_tensor_ext.host_mem.type = HOST_MEM_MMAP; } else { - host_mem = new float[mem_size]; + bm_tensor_ext.host_mem.type = HOST_MEM_ALLOC; } - bm_tensor_ext.host_mem.type = HOST_MEM_ALLOC; } bm_tensor_ext.host_mem.addr = host_mem; bm_tensor_ext.host_mem.size = mem_size; @@ -450,8 +625,9 @@ void Bmruntime::fill_subnet_tensor_map(net_ctx_t* net_ctx, net_stage_t* net_stag void Bmruntime::fill_sub_net(ModelCtx* model_ctx, const Vector>* subnet_set_v, net_ctx_t* net_ctx, net_stage_t* net_stage) { - u64 subnet_bdc_offset = 0; - u64 subnet_gdma_offset = 0; + auto core_num = net_stage->core_commands.size(); + std::vector subnet_bdc_offset(core_num,0); + std::vector subnet_gdma_offset(core_num, 0); if (subnet_set_v == NULL) { net_stage->subnet_num = 0; return; @@ -532,39 +708,59 @@ void Bmruntime::fill_sub_net(ModelCtx* model_ctx, const Vectortpu_info.is_dynamic = sub_net->is_dynamic(); if (subnet->tpu_info.is_dynamic) { - subnet->tpu_info.ir_offset = sub_net->ir_offset(); - subnet->tpu_info.ir_len = sub_net->ir_len(); + // TODO: support multi core for dynamic + subnet->tpu_info.core_commands.resize(1); + subnet->tpu_info.core_commands[0].ir_offset = sub_net->ir_offset(); + subnet->tpu_info.core_commands[0].ir_len = sub_net->ir_len(); } else { - int group_num = sub_net->cmd_group()->size(); - subnet->tpu_info.cmdgroup_num = group_num; - subnet->tpu_info.bdc_group_id_v.resize(group_num); - subnet->tpu_info.gdma_group_id_v.resize(group_num); - subnet->tpu_info.bdc_cmd_byte_v.resize(group_num); - subnet->tpu_info.gdma_cmd_byte_v.resize(group_num); - - subnet->tpu_info.bdc_offset = subnet_bdc_offset; - subnet->tpu_info.gdma_offset = subnet_gdma_offset; - - for (int group_idx = 0; group_idx < group_num; group_idx++) { - auto cmd_group = sub_net->cmd_group()->Get(group_idx); - u32 group_bdc_num = cmd_group->bdc_num(); - u32 group_gdma_num = cmd_group->gdma_num(); - - subnet->tpu_info.bdc_group_id_v[group_idx] = group_bdc_num; - subnet->tpu_info.gdma_group_id_v[group_idx] = group_gdma_num; - u32 bdc_cmd_byte = cmd_group->bdc_cmd_byte(); - u32 gdma_cmd_byte = cmd_group->gdma_cmd_byte(); - subnet->tpu_info.bdc_cmd_byte_v[group_idx] = bdc_cmd_byte; - subnet->tpu_info.gdma_cmd_byte_v[group_idx] = gdma_cmd_byte; - if (bdc_cmd_byte > 0) { - subnet_bdc_offset += bdc_cmd_byte; - } else { - subnet_bdc_offset += group_bdc_num * (1 << BDC_ENGINE_CMD_ALIGNED_BIT); - } - if (gdma_cmd_byte > 0) { - subnet_gdma_offset += gdma_cmd_byte; - } else { - subnet_gdma_offset += group_gdma_num * (1 << GDMA_ENGINE_CMD_ALIGNED_BIT); + auto core_num = + sub_net->core_commands() ? sub_net->core_commands()->size() : 1; + subnet->tpu_info.core_commands.resize(core_num); + for (uint32_t core_idx = 0; core_idx < core_num; core_idx++) { + auto cmd_groups = + sub_net->core_commands() + ? sub_net->core_commands()->Get(core_idx)->gdma_tiu_commands() + : sub_net->cmd_group(); + int group_num = cmd_groups->size(); + subnet->tpu_info.core_commands[core_idx].bdc_id.resize(group_num); + subnet->tpu_info.core_commands[core_idx].gdma_id.resize(group_num); + subnet->tpu_info.core_commands[core_idx].bdc_cmd_byte.resize( + group_num); + subnet->tpu_info.core_commands[core_idx].gdma_cmd_byte.resize( + group_num); + + subnet->tpu_info.core_commands[core_idx].bdc_offset = + subnet_bdc_offset[core_idx]; + subnet->tpu_info.core_commands[core_idx].gdma_offset = + subnet_gdma_offset[core_idx]; + + for (int group_idx = 0; group_idx < group_num; group_idx++) { + auto cmd_group = cmd_groups->Get(group_idx); + u32 group_bdc_num = cmd_group->bdc_num(); + u32 group_gdma_num = cmd_group->gdma_num(); + + subnet->tpu_info.core_commands[core_idx].bdc_id[group_idx] = + group_bdc_num; + subnet->tpu_info.core_commands[core_idx].gdma_id[group_idx] = + group_gdma_num; + u32 bdc_cmd_byte = cmd_group->bdc_cmd_byte(); + u32 gdma_cmd_byte = cmd_group->gdma_cmd_byte(); + subnet->tpu_info.core_commands[core_idx].bdc_cmd_byte[group_idx] = + bdc_cmd_byte; + subnet->tpu_info.core_commands[core_idx].gdma_cmd_byte[group_idx] = + gdma_cmd_byte; + if (bdc_cmd_byte > 0) { + subnet_bdc_offset[core_idx] += bdc_cmd_byte; + } else { + subnet_bdc_offset[core_idx] += + group_bdc_num * (1 << BDC_ENGINE_CMD_ALIGNED_BIT); + } + if (gdma_cmd_byte > 0) { + subnet_gdma_offset[core_idx] += gdma_cmd_byte; + } else { + subnet_gdma_offset[core_idx] += + group_gdma_num * (1 << GDMA_ENGINE_CMD_ALIGNED_BIT); + } } } } @@ -639,12 +835,14 @@ bool Bmruntime::fill_net_ctx( std::vector> &stage_ctx_sizes, net_stage_t *stages) { + auto devid = net_ctx->device_id; if (params == NULL || params->size() == 0) { BMRT_LOG(WRONG, "Net[%s] has no parameter.", net_ctx->net_name.c_str()); return false; } // fill net_ctx info by first NetParameter auto param = params->Get(0); + net_ctx->core_num = param->core_num() != 0 ? param->core_num() : 1; net_ctx->is_dynamic = param->is_dynamic(); if (net_ctx->is_dynamic) { net_ctx->n_can_change = param->n_dynamic(); @@ -663,6 +861,9 @@ bool Bmruntime::fill_net_ctx( net_ctx->input_type_v.push_back((bm_data_type_t)tensor->data_type()); net_ctx->input_scale_v.push_back(tensor->scale()); net_ctx->input_zero_point_v.push_back(tensor->zero_point()); + net_ctx->input_hidden_v.push_back(tensor->hidden()); + net_ctx->input_index_v.push_back(tensor->index()); + } for (u32 i = 0; i < param->output_tensor()->size(); i++) { auto tensor = param->output_tensor()->Get(i); @@ -670,6 +871,8 @@ bool Bmruntime::fill_net_ctx( net_ctx->output_type_v.push_back((bm_data_type_t)tensor->data_type()); net_ctx->output_scale_v.push_back(tensor->scale()); net_ctx->output_zero_point_v.push_back(tensor->zero_point()); + net_ctx->output_hidden_v.push_back(tensor->hidden()); + net_ctx->output_index_v.push_back(tensor->index()); } // alloc ctx memory @@ -695,10 +898,17 @@ bool Bmruntime::fill_net_ctx( max_ctx_sizes.push_back(stage_sizes[i]); auto subnet = params->Get(stage_idx)->sub_net(); - if (subnet != NULL && subnet->size() > 1) multi_subnet = true; + if (subnet != NULL && subnet->size() > 1) { + multi_subnet = true; + } + stages[stage_idx].coeff_offset = m_local_coeffs[devid]->Register(model_ctx, stage->coeff_mem()); + if (!(m_flags & BM_RUNTIME_SHARE_MEM)) { + stages[stage_idx].neuron_size.resize(stage_sizes.size()); + stages[stage_idx].neuron_size = stage_sizes; + } stage_ctx_sizes.push_back(std::move(stage_sizes)); - - stages[stage_idx].coeff_offset = m_local_coeff->Register(model_ctx, stage->coeff_mem()); + stages[stage_idx].io_start = stage->io_addr(); + stages[stage_idx].io_size = stage->io_size(); } if (BM1682 == bmrt_arch_info::get_bmtpu_arch() && @@ -710,31 +920,395 @@ bool Bmruntime::fill_net_ctx( BMRT_ASSERT(0); } - if (!max_ctx_sizes.empty()) { + if (!max_ctx_sizes.empty() && (m_flags & BM_RUNTIME_SHARE_MEM)) { if (multi_subnet) { // Own an neuron memory if subnet number > 1 net_ctx->neuron_mem.resize(max_ctx_sizes.size()); for (u32 i = 0; i < max_ctx_sizes.size(); ++i) { auto &mem = net_ctx->neuron_mem[i]; - must_alloc_device_mem(&mem, max_ctx_sizes[i], "neuron_mem"); - m_device_mem_vec.push_back(mem); + must_alloc_device_mem_u64(devid, &mem, max_ctx_sizes[i], "neuron_mem"); + m_sg_device_mem_vec.push_back(mem); + m_sg_device_mem_ids.push_back(devid); } } else { // Update max_neuron_mem_size - update_max_neuron_mem(max_ctx_sizes); - net_ctx->neuron_mem = max_neuron_mem; + update_max_neuron_mem(devid, max_ctx_sizes); + net_ctx->neuron_mem = max_neuron_mem[devid]; + } + for (size_t stage_idx = 0; stage_idx < params->size(); stage_idx++) { + stages[stage_idx].neuron_mem = net_ctx->neuron_mem; + } + } + + if (net_ctx->addr_mode == 1) { + // addr alone allocate io mem + for (u32 stage_idx = 0; stage_idx < params->size(); stage_idx++) { + auto &s = stages[stage_idx]; + s.io_mem = must_alloc_device_mem(devid, s.io_size, "io_mem"); + m_device_mem_vec.push_back(s.io_mem); + m_device_mem_ids.push_back(devid); + s.io_offset = bm_mem_get_device_addr(s.io_mem) - s.io_start; } - } else { - BMRT_LOG(INFO, "net[%s] has no ctx mem", net_ctx->net_name.c_str()); } return true; } +bool Bmruntime::cascade_insert_net(int net_idx, net_ctx_t *net_ctx, + const std::string &main_name) { + if (net_ctx->device_id == 0 && net_ctx->step_id == 0) { + // fisrt step + net_cascade_t nc; + nc.is_dynamic = net_ctx->is_dynamic; + nc.addr_mode = net_ctx->addr_mode; + nc.main_name = main_name; + nc.num_device = net_ctx->device_id + 1; + nc.step_ids.push_back({net_idx}); + m_net_cascade_v.emplace_back(nc); + return true; + } + for (auto &v : m_net_cascade_v) { + if (v.main_name == main_name) { + if (v.num_device <= (int)net_ctx->device_id) { + v.num_device = net_ctx->device_id + 1; + } + if (net_ctx->step_id == v.step_ids.size() - 1) { + v.step_ids[net_ctx->step_id].push_back(net_idx); + return true; + } else if (net_ctx->step_id == v.step_ids.size()) { + v.step_ids.push_back({net_idx}); + return true; + } + break; + } + } + BMRT_LOG(WRONG, "Error: load net[%s] failed", main_name.c_str()); + return false; +} + +struct tensor_info_t { + string name; + bm_data_type_t type; + float scale; + int zp; + bm_shape_t shape; + bm_device_mem_t mem; + size_t bytes; + int index; + int device_id; + + bool operator==(const tensor_info_t &other) const { + return this->name == other.name && this->device_id == other.device_id; + } +}; + +struct CompareTensors { + bool operator()(const tensor_info_t &a, const tensor_info_t &b) const { + if (a.device_id < b.device_id) { + return true; + } else if (a.device_id > b.device_id) { + return false; + } else { + return a.index < b.index; + } + } +}; + +void Bmruntime::cascade_update_output(net_cascade_t &v) { + // all steps is tensor parallel, not master-slave structure + // bool all_steps_tp = true; + // for (size_t s = 0; s < v.step_ids.size(); s++) { + // if (v.step_ids[s].size() != m_device_num) { + // all_steps_tp = false; + // } + // } + + std::vector output_tensors; + for (size_t s = 0; s < v.step_ids.size(); s++) { + for (auto &idx : v.step_ids[s]) { + // make sure step is correct + BMRT_ASSERT_INFO((idx >= 0 && idx < (int)m_net_ctx_v.size()), + "Error: step [%d] is empty in net[%s]", s, + v.main_name.c_str()); + BMRT_ASSERT_INFO(m_net_ctx_v[idx]->step_id == (uint32_t)s, + "\n step: %d, net_idx: %d, net_step:%d\nError: step " + "error in net[%s]", + s, idx, m_net_ctx_v[idx]->step_id, v.main_name.c_str()); + auto ctx = m_net_ctx_v[idx]; + auto &outputs = ctx->output_name_v; + for (size_t i = 0; i < outputs.size(); i++) { + if (ctx->output_hidden_v[i] == 2 || ctx->output_hidden_v[i] == 4) { + tensor_info_t t; + t.name = outputs[i]; + t.type = ctx->output_type_v[i]; + t.scale = ctx->output_scale_v[i]; + t.zp = ctx->output_zero_point_v[i]; + t.shape = ctx->net_info.stages[0].output_shapes[i]; + t.mem = ctx->net_info.stages[0].output_mems[i]; + t.bytes = ctx->net_info.max_output_bytes[i]; + t.index = ctx->output_index_v[i]; + t.device_id = ctx->device_id; + output_tensors.emplace_back(t); + } else if (ctx->output_hidden_v[i] == 0) { + // create output hidden tensor + mem_cascade_t output_hidden; + output_hidden.name = outputs[i]; + output_hidden.device_id = ctx->device_id; + bmrt_tensor_with_device( + &output_hidden.tensor, + ctx->stage_v[0]->output_v[i].dev_mem, + ctx->output_type_v[i], + ctx->stage_v[0]->output_v[i].shape); + // output_hidden.tensor.dtype = ctx->output_type_v[i]; + // output_hidden.tensor.st_mode = BM_STORE_1N; + // output_hidden.tensor.shape = ctx->stage_v[0]->output_v[i].shape; + v.hidden_outputs.push_back(output_hidden); + v.hidden_outputs_step_ids.push_back(s); + } + } + } + } + + std::stable_sort(output_tensors.begin(), output_tensors.end(), CompareTensors()); + for (auto &out : output_tensors) { + v.output_names.push_back(out.name); + v.output_types.push_back(out.type); + v.output_scales.push_back(out.scale); + v.output_zps.push_back(out.zp); + v.output_shapes.push_back(out.shape); + v.output_mems.push_back(out.mem); + v.output_bytes.push_back(out.bytes); + v.output_loc_devices.push_back(out.device_id); + } +} + +void Bmruntime::cascade_update_input(net_cascade_t &v) { + std::vector input_tensors; + for (size_t s = 0; s < v.step_ids.size(); s++) { + for (auto &idx : v.step_ids[s]) { + auto ctx = m_net_ctx_v[idx]; + auto &inputs = ctx->input_name_v; + for (size_t i = 0; i < inputs.size(); i++) { + if (ctx->input_hidden_v[i] == 1 || ctx->input_hidden_v[i] == 3) { + tensor_info_t t; + t.name = inputs[i]; + t.type = ctx->input_type_v[i]; + t.scale = ctx->input_scale_v[i]; + t.zp = ctx->input_zero_point_v[i]; + t.shape = ctx->net_info.stages[0].input_shapes[i]; + t.mem = ctx->net_info.stages[0].input_mems[i]; + t.bytes = ctx->net_info.max_input_bytes[i]; + t.index = ctx->input_index_v[i]; + t.device_id = ctx->device_id; + if (std::find(input_tensors.begin(), input_tensors.end(), t) == input_tensors.end()) { + input_tensors.emplace_back(t); + } + } else if (ctx->input_hidden_v[i] == 0) { + bool find = false; + for (auto &h : v.hidden_outputs) { + if (h.device_id == ctx->device_id && h.name == inputs[i]) { + bmrt_tensor_with_device( + &h.tensor, + ctx->stage_v[0]->input_v[i].dev_mem, + ctx->input_type_v[i], + ctx->stage_v[0]->input_v[i].shape); + find = true; + break; + } + } + if (find) { + continue; + } + // create output hidden tensor + mem_cascade_t input_hidden; + input_hidden.name = inputs[i]; + input_hidden.device_id = ctx->device_id; + bmrt_tensor_with_device( + &input_hidden.tensor, + ctx->stage_v[0]->input_v[i].dev_mem, + ctx->input_type_v[i], + ctx->stage_v[0]->input_v[i].shape); + // input_hidden.tensor.dtype = ctx->input_type_v[i]; + // input_hidden.tensor.st_mode = BM_STORE_1N; + // input_hidden.tensor.shape = ctx->stage_v[0]->input_v[i].shape; + v.hidden_inputs.push_back(input_hidden); + v.hidden_inputs_step_ids.push_back(s); + } + } + } + } + + std::stable_sort(input_tensors.begin(), input_tensors.end(), CompareTensors()); + for (auto &in : input_tensors) { + v.input_names.push_back(in.name); + v.input_types.push_back(in.type); + v.input_scales.push_back(in.scale); + v.input_zps.push_back(in.zp); + v.input_shapes.push_back(in.shape); + v.input_mems.push_back(in.mem); + v.input_bytes.push_back(in.bytes); + v.input_loc_devices.push_back(in.device_id); + } +} + +// FIXME: It is not clear why aligning to 128/256 bytes leads to wrong results in case llama2-13B. +// TODO: Try to create a life time for hidden tensors to save more memory. +void Bmruntime::cascade_update_max_hidden_buffer_size(net_cascade_t &v) { + for (int d = 0; d < m_device_num; ++d) { + std::vector in_max_size(v.step_ids.size(), 0); + // for (size_t i = 0; i < v.hidden_inputs.size(); ++i) { + // auto &t = v.hidden_inputs[i]; + // if (t.device_id != d) { + // continue; + // } + // u64 size = bmrt_tensor_bytesize(&t.tensor); + // // We align to 4096 bytes, because it will affect GDMA speed. + // size = ALIGN(size, 4096); + // in_max_size[v.hidden_inputs_step_ids[i]] += size; + // } + std::vector out_max_size(v.step_ids.size(), 0); + for (size_t i = 0; i < v.hidden_outputs.size(); ++i) { + auto &t = v.hidden_outputs[i]; + if (t.device_id != d) { + continue; + } + u64 size = bmrt_tensor_bytesize(&t.tensor); + // We align to 4096 bytes, because it will affect GDMA speed. + size = ALIGN(size, 4096); + out_max_size[v.hidden_outputs_step_ids[i]] += size; + } + u64 output_max_size = 0; + u64 input_max_size = 0; + for (size_t i = 0; i < v.step_ids.size(); ++i) { + if (output_max_size < out_max_size[i]) { + output_max_size = out_max_size[i]; + } + if (input_max_size < in_max_size[i]) { + input_max_size = in_max_size[i]; + } + } + if (input_max_size + output_max_size > max_hidden_buffer_size[d]) { + max_hidden_buffer_size[d] = input_max_size + output_max_size; + } + } +} + +void Bmruntime::cascade_update_hidden_buffer(net_cascade_t &v) { + std::vector> offset_v(m_device_num, std::vector(v.step_ids.size(), 0)); + for (size_t i = 0; i < v.hidden_outputs.size(); ++i) { + auto &t = v.hidden_outputs[i]; + int d = t.device_id; + int s = v.hidden_outputs_step_ids[i]; + u64 size = bmrt_tensor_bytesize(&t.tensor); + u64 addr = bm_mem_get_device_addr(max_hidden_buffer[d]) + offset_v[d][s]; + bm_set_device_mem(&t.tensor.device_mem, size, addr); + + // We align to 4096 bytes, because it will affect GDMA speed. + size = ALIGN(size, 4096); + offset_v[d][s] += size; + } + + for (size_t i = 0; i < offset_v.size(); ++i) { + u64 max_size = 0; + for (size_t j = 0; j < v.step_ids.size(); ++j) { + if (offset_v[i][j] > max_size) { + max_size = offset_v[i][j]; + } + } + for (size_t j = 0; j < v.step_ids.size(); ++j) { + offset_v[i][j] = max_size; + } + } + + // for (size_t i = 0; i < v.hidden_inputs.size(); ++i) { + // auto &t = v.hidden_inputs[i]; + // int d = t.device_id; + // int s = v.hidden_inputs_step_ids[i]; + // u64 size = bmrt_tensor_bytesize(&t.tensor); + // u64 addr = bm_mem_get_device_addr(max_hidden_buffer[d]) + offset_v[d][s]; + // bm_set_device_mem(&t.tensor.device_mem, size, addr); + + // // We align to 4096 bytes, because it will affect GDMA speed. + // size = ALIGN(size, 4096); + // offset_v[d][s] += size; + // } +} + +void Bmruntime::cascade_update_all_info() { + using_fast_allreduce = (getenv("BMRUNTIME_USING_FAST_ALLREDUCE") != NULL); + if(using_fast_allreduce) { + BMRT_LOG(INFO, "use fast AllreduceOp because BMRUNTIME_USING_FAST_ALLREDUCE env is set."); + } + // force use fast AllreduceOp + using_fast_allreduce = true; + for (auto &v : m_net_cascade_v) { + // update output hidden tensor + cascade_update_output(v); + // update input hidden tensor + cascade_update_input(v); + } + + // Alloc input/output hidden tensor + // 1. Hidden tensors in the same step should be allocated + // 2. Hidden tensors in different steps share the same memory block + std::vector size_v; + for (auto &v : m_net_cascade_v) { + cascade_update_max_hidden_buffer_size(v); + } + for (int d = 0; d < m_device_num; ++d) { + if ((u64)(bm_mem_get_device_size(max_hidden_buffer[d])) < + max_hidden_buffer_size[d]) { + must_alloc_device_mem(d, &max_hidden_buffer[d], max_hidden_buffer_size[d], + string("hidden_buffer") + std::to_string(hidden_buffer_num[d])); + m_device_mem_vec.push_back(max_hidden_buffer[d]); + m_device_mem_ids.push_back(d); + hidden_buffer_num[d]++; + } + } + for (auto &v : m_net_cascade_v) { + cascade_update_hidden_buffer(v); + } + + // info for c interface + for (auto &v : m_net_cascade_v) { + cascade_fill_net_info(&v); + } +} + bool Bmruntime::load_bmodel_net(ModelCtx* model_ctx, int net_idx, net_ctx_t* net_ctx) { - auto net_params = model_ctx->model()->net()->Get(net_idx)->parameter(); + auto net = model_ctx->model()->net()->Get(net_idx); + net_ctx->device_id = 0; + net_ctx->step_id = 0; + net_ctx->in_cascade = false; + net_ctx->addr_mode = net->addr_mode(); + auto param = net->parameter()->Get(0); + net_ctx->is_dynamic = param->is_dynamic(); + if (net_ctx->is_dynamic) { + net_ctx->n_can_change = param->n_dynamic(); + net_ctx->h_w_can_change = param->h_w_dynamic(); + if (!param->n_dynamic()) { + BMRT_LOG(WARNING, "Net[%s] may contains layers that not support dynamic N", net_ctx->net_name.c_str()); + } + if (!param->h_w_dynamic()) { + BMRT_LOG(WARNING, "Net[%s] may contains layers that not support dynamic H/W", net_ctx->net_name.c_str()); + } + } + if (net->cascade()) { + auto main_name = net->cascade()->main_name()->str(); + if (!main_name.empty()) { + net_ctx->device_id = net->cascade()->device_id() % m_device_num; + net_ctx->step_id = net->cascade()->step(); + net_ctx->in_cascade = true; + auto ret = cascade_insert_net(net_idx, net_ctx, main_name); + if (false == ret) { + return false; + } + } + } + auto devid = net_ctx->device_id; + auto net_params = net->parameter(); std::vector> stage_ctx_sizes; auto stages = new net_stage_t[net_params->size()]; if (false == fill_net_ctx(model_ctx, net_ctx, net_params, stage_ctx_sizes, stages)) { @@ -749,6 +1323,9 @@ bool Bmruntime::load_bmodel_net(ModelCtx* model_ctx, int net_idx, net_ctx_t* net // fill ctx and coeff net_stage->ctx_start = param->ctx_addr(); + + // Use relative address since 1688. + auto ctx_start = net_stage->ctx_start & bmrt_arch_info::addr_mask(); auto &ctx_sizes = stage_ctx_sizes[stage_idx]; if (!ctx_sizes.empty()) { @@ -759,13 +1336,22 @@ bool Bmruntime::load_bmodel_net(ModelCtx* model_ctx, int net_idx, net_ctx_t* net { net_stage->ctx_borders[i] = net_stage->ctx_borders[i - 1] + ctx_sizes[i]; } - for (size_t i = 0; i < ctx_sizes.size(); ++i) - { - u64 ctx_addr = bm_mem_get_device_addr(net_ctx->neuron_mem[i]); - net_stage->ctx_offset[i] = ctx_addr - net_stage->ctx_start; - if (i > 0) + /* + Multi-core runtime not set ctx_offset in load bmodel stage, + set ctx_offset in launch tensor stage + */ + if (m_flags & BM_RUNTIME_SHARE_MEM) { + for (size_t i = 0; i < ctx_sizes.size(); ++i) { - net_stage->ctx_offset[i] -= net_stage->ctx_borders[i - 1]; + if (net_stage->neuron_mem[i].size > 0) { + u64 ctx_addr = bm_mem_get_device_addr_u64(net_stage->neuron_mem[i]); + net_stage->ctx_offset[i] = ctx_addr - ctx_start; + } else { + net_stage->ctx_offset[i] = 0; + } + if (i > 0) { + net_stage->ctx_offset[i] -= net_stage->ctx_borders[i - 1]; + } } } } else { @@ -777,24 +1363,43 @@ bool Bmruntime::load_bmodel_net(ModelCtx* model_ctx, int net_idx, net_ctx_t* net net_stage->cpu_mem_size = param->cpu_mem_size(); net_stage->cpu_addr = nullptr; - m_profile->record_alloc_device_mem(m_local_coeff->GetCoeffDeviceMem(), "coeff"); + + mem_pair_t mem_pair = { + bm_mem_get_device_addr_u64(m_local_coeffs[devid]->GetCoeffDeviceMem()), + bm_mem_get_device_size_u64(m_local_coeffs[devid]->GetCoeffDeviceMem())}; + m_profile->record_alloc_device_mem(mem_pair, "coeff"); // setup input and output tensor info - fill_tensor_attr( - param->input_tensor(), net_stage->input_v, - net_stage->ctx_start, - net_stage->ctx_borders, net_stage->ctx_offset); - fill_tensor_attr( - param->output_tensor(), net_stage->output_v, - net_stage->ctx_start, - net_stage->ctx_borders, net_stage->ctx_offset); + if (net_ctx->addr_mode == 1) { + fill_io_attr(param->input_tensor(), net_stage->input_v, + net_stage->io_offset); + fill_io_attr(param->output_tensor(), net_stage->output_v, + net_stage->io_offset); + } else if(net_ctx->addr_mode == 2) { + fill_io_tag_attr(param->input_tensor(), net_stage->input_v, + net_stage->ctx_start, net_stage->ctx_borders, + net_stage->ctx_offset, m_flags); + fill_io_tag_attr(param->output_tensor(), net_stage->output_v, + net_stage->ctx_start, net_stage->ctx_borders, + net_stage->ctx_offset, m_flags); + } + else { + fill_tensor_attr(param->input_tensor(), net_stage->input_v, + net_stage->ctx_start, net_stage->ctx_borders, + net_stage->ctx_offset, m_flags); + fill_tensor_attr(param->output_tensor(), net_stage->output_v, + net_stage->ctx_start, net_stage->ctx_borders, + net_stage->ctx_offset, m_flags); + } // setup subnet + const auto core_num = param->core_num() != 0 ? param->core_num() : 1; + net_stage->core_commands.resize(core_num); fill_sub_net(model_ctx, param->sub_net(), net_ctx, net_stage); // setup gdma/bdc, or ir - setup_ir_context(model_ctx, param->binary_ir(), param->stage_ir(), net_stage); - setup_cmd_context(model_ctx, param->cmd_group(), net_stage); + setup_ir_context(model_ctx, param->binary_ir(), param->stage_ir(), net_stage, devid); + setup_cmd_context(model_ctx, param, net_stage, devid); // setup profile info if (m_profile->is_enabled()) { @@ -805,7 +1410,7 @@ bool Bmruntime::load_bmodel_net(ModelCtx* model_ctx, int net_idx, net_ctx_t* net return true; } -bool Bmruntime::load_bmodel_net(ModelCtx* model_ctx, int net_idx, std::shared_ptr kernel_module) +bool Bmruntime::load_bmodel_net(ModelCtx* model_ctx, int net_idx) { auto net = model_ctx->model()->net()->Get(net_idx); for (auto each_net : m_net_ctx_v) { @@ -816,14 +1421,13 @@ bool Bmruntime::load_bmodel_net(ModelCtx* model_ctx, int net_idx, std::shared_pt } net_ctx_t* net_ctx = new net_ctx_t(); net_ctx->net_name = net->name()->str(); - net_ctx->kernel_module_ = kernel_module; // fill each stage info if (false == load_bmodel_net(model_ctx, net_idx, net_ctx)) { BMRT_LOG(WRONG, "Error: load net[%s] failed", net_ctx->net_name.c_str()); return false; } - + net_ctx->kernel_module_ = kernel_modules[net_ctx->device_id]; update_max_middlebuf_size(net_ctx); fill_net_info(net_ctx); m_net_ctx_v.push_back(net_ctx); @@ -856,6 +1460,7 @@ static void fill_middlebuff_size(const vector& attr_v, } void Bmruntime::update_max_middlebuf_size(net_ctx_t* net_ctx) { + auto devid = net_ctx->device_id; u32 input_num = net_ctx->input_name_v.size(); u32 output_num = net_ctx->output_name_v.size(); vector input_size_v(input_num, 0); @@ -871,13 +1476,14 @@ void Bmruntime::update_max_middlebuf_size(net_ctx_t* net_ctx) { for (auto size : output_size_v) { total_middelbuf_size += size; } - if (total_middelbuf_size > max_middle_buffer_size) { - max_middle_buffer_size = total_middelbuf_size; + if (total_middelbuf_size > max_middle_buffer_size[devid]) { + max_middle_buffer_size[devid] = total_middelbuf_size; } } void Bmruntime::update_net_middlebuf(net_ctx_t* net_ctx) { + auto devid = net_ctx->device_id; bm_device_mem_t mem; u32 input_num = net_ctx->input_name_v.size(); u32 output_num = net_ctx->output_name_v.size(); @@ -887,7 +1493,7 @@ void Bmruntime::update_net_middlebuf(net_ctx_t* net_ctx) fill_middlebuff_size(stage->input_v, input_size_v, net_ctx->is_dynamic); fill_middlebuff_size(stage->output_v, output_size_v, net_ctx->is_dynamic); } - u64 addr = bm_mem_get_device_addr(max_middle_buffer); + u64 addr = bm_mem_get_device_addr(max_middle_buffer[devid]); for (u32 i = 0; i < input_num; i++) { if (input_size_v[i] == 0) { bm_set_device_mem(&mem, 0, 0); @@ -908,25 +1514,27 @@ void Bmruntime::update_net_middlebuf(net_ctx_t* net_ctx) } } -void Bmruntime::update_max_neuron_mem(const std::vector &sizes) +void Bmruntime::update_max_neuron_mem(uint32_t devid, const std::vector &sizes) { - size_t size_min = std::min(sizes.size(), max_neuron_mem.size()), i; + size_t size_min = std::min(sizes.size(), max_neuron_mem[devid].size()), i; for (i = 0; i < size_min; ++i) { - auto &mem = max_neuron_mem[i]; - if (sizes[i] > bm_mem_get_device_size(mem)) { + auto &mem = max_neuron_mem[devid][i]; + if (sizes[i] > bm_mem_get_device_size_u64(mem)) { // DON'T free old memory. // In case any previously loaded model have already been bound with them. - must_alloc_device_mem(&mem, sizes[i], "neuron_mem"+std::to_string(i)); - m_device_mem_vec.push_back(mem); + must_alloc_device_mem_u64(devid, &mem, sizes[i], "neuron_mem" + std::to_string(i)); + m_sg_device_mem_vec.push_back(mem); + m_sg_device_mem_ids.push_back(devid); } } for (; i < sizes.size(); ++i) { - bm_device_mem_t mem; - must_alloc_device_mem(&mem, sizes[i], "neuron_mem"+std::to_string(i)); - max_neuron_mem.push_back(mem); - m_device_mem_vec.push_back(mem); + bm_device_mem_u64_t mem; + must_alloc_device_mem_u64(devid, &mem, sizes[i], "neuron_mem" + std::to_string(i)); + max_neuron_mem[devid].push_back(mem); + m_sg_device_mem_vec.push_back(mem); + m_sg_device_mem_ids.push_back(devid); } } @@ -946,14 +1554,18 @@ void Bmruntime::fill_net_info(net_ctx_t* net_ctx) { auto& net_info = net_ctx->net_info; net_info.name = net_ctx->net_name.c_str(); + net_info.core_num = net_ctx->core_num; net_info.is_dynamic = net_ctx->is_dynamic; + net_info.addr_mode = net_ctx->addr_mode; net_info.input_num = net_ctx->input_name_v.size(); net_info.input_dtypes = net_ctx->input_type_v.data(); net_info.input_scales = net_ctx->input_scale_v.data(); net_info.input_zero_point = net_ctx->input_zero_point_v.data(); net_info.input_names = (const char**)malloc(net_info.input_num * sizeof(char*)); + net_info.input_loc_devices = (int*)malloc(net_info.input_num * sizeof(int)); for (int i = 0; i < net_info.input_num; i++) { net_info.input_names[i] = net_ctx->input_name_v[i].c_str(); + net_info.input_loc_devices[i] = 0; } net_info.output_num = net_ctx->output_name_v.size(); net_info.output_dtypes = net_ctx->output_type_v.data(); @@ -971,8 +1583,10 @@ void Bmruntime::fill_net_info(net_ctx_t* net_ctx) net_info.stages = (bm_stage_info_t*)malloc(net_info.stage_num * sizeof(bm_stage_info_t)); for (int i = 0; i < net_info.stage_num; i++) { net_info.stages[i].input_shapes = (bm_shape_t*)malloc(sizeof(bm_shape_t) * net_info.input_num); + net_info.stages[i].input_mems = (bm_device_mem_t*)malloc(sizeof(bm_device_mem_t) * net_info.input_num); for (int j = 0; j < net_info.input_num; j++) { net_info.stages[i].input_shapes[j] = net_ctx->stage_v[i]->input_v[j].shape; + net_info.stages[i].input_mems[j] = net_ctx->stage_v[i]->input_v[j].dev_mem; size_t temp_size = size_4N_align(net_info.stages[i].input_shapes[j], net_info.input_dtypes[j]); if (temp_size > net_info.max_input_bytes[j]) { @@ -981,8 +1595,12 @@ void Bmruntime::fill_net_info(net_ctx_t* net_ctx) } net_info.stages[i].output_shapes = (bm_shape_t*)malloc(sizeof(bm_shape_t) * net_info.output_num); + net_info.stages[i].output_mems = (bm_device_mem_t*)malloc(sizeof(bm_device_mem_t) * net_info.output_num); + net_info.output_loc_devices = (int*)malloc(net_info.output_num * sizeof(int)); for (int j = 0; j < net_info.output_num; j++) { + net_info.output_loc_devices[j] = 0; net_info.stages[i].output_shapes[j] = net_ctx->stage_v[i]->output_v[j].shape; + net_info.stages[i].output_mems[j] = net_ctx->stage_v[i]->output_v[j].dev_mem; size_t temp_size = size_4N_align(net_info.stages[i].output_shapes[j], net_info.output_dtypes[j]); if (temp_size > net_info.max_output_bytes[j]) { @@ -1000,6 +1618,86 @@ void Bmruntime::free_net_info(net_ctx_t* net_ctx) for (int i = 0; i < net_info.stage_num; i++) { free(net_info.stages[i].input_shapes); free(net_info.stages[i].output_shapes); + free(net_info.stages[i].input_mems); + free(net_info.stages[i].output_mems); + } + free(net_info.max_input_bytes); + free(net_info.max_output_bytes); + free(net_info.input_loc_devices); + free(net_info.output_loc_devices); + free(net_info.stages); +} + +void Bmruntime::free_dyn_neuron(net_ctx_t* net_ctx) { + BMRT_DEBUG("im free_dyn_neuron\n"); + auto dev_id = net_ctx->device_id; + for (auto &dyn_mem_pair : net_ctx->dyn_neuron_stage_dict) { + auto dyn_neuron_stage = dyn_mem_pair.second; + for (size_t i = 0; i < dyn_neuron_stage->neuron_mem.size(); ++i) { + BMRT_DEBUG("Free device memory, byte size %d\n", bm_mem_get_device_size_u64(dyn_neuron_stage->neuron_mem[i])); + must_free_device_mem_u64(dev_id, dyn_neuron_stage->neuron_mem[i]); + } + delete dyn_mem_pair.second; + } +} + +void Bmruntime::cascade_fill_net_info(net_cascade_t * net_cascade) { + auto &net_info = net_cascade->net_info; + net_info.name = net_cascade->main_name.c_str(); + net_info.is_dynamic = net_cascade->is_dynamic; + net_info.input_num = net_cascade->input_names.size(); + net_info.input_dtypes = net_cascade->input_types.data(); + net_info.input_scales = net_cascade->input_scales.data(); + net_info.input_zero_point = net_cascade->input_zps.data(); + net_info.input_names = (const char**)malloc(net_info.input_num * sizeof(char*)); + net_info.input_loc_devices = (int*)malloc(net_info.input_num * sizeof(int)); + for (int i = 0; i < net_info.input_num; i++) { + net_info.input_names[i] = net_cascade->input_names[i].c_str(); + net_info.input_loc_devices[i] = net_cascade->input_loc_devices[i]; + } + net_info.output_num = net_cascade->output_names.size(); + net_info.output_dtypes = net_cascade->output_types.data(); + net_info.output_scales = net_cascade->output_scales.data(); + net_info.output_zero_point = net_cascade->output_zps.data(); + net_info.output_names = (const char**)malloc(net_info.output_num * sizeof(char*)); + for (int i = 0; i < net_info.output_num; i++) { + net_info.output_names[i] = net_cascade->output_names[i].c_str(); + } + net_info.max_input_bytes = (size_t*)malloc(net_info.input_num * sizeof(size_t)); + net_info.max_output_bytes = (size_t*)malloc(net_info.output_num * sizeof(size_t)); + memset(net_info.max_input_bytes, 0, net_info.input_num * sizeof(size_t)); + memset(net_info.max_output_bytes, 0, net_info.output_num * sizeof(size_t)); + net_info.stage_num = 1; + net_info.stages = (bm_stage_info_t*)malloc(net_info.stage_num * sizeof(bm_stage_info_t)); + net_info.stages[0].input_shapes = (bm_shape_t*)malloc(sizeof(bm_shape_t) * net_info.input_num); + net_info.stages[0].input_mems = (bm_device_mem_t*)malloc(sizeof(bm_device_mem_t) * net_info.input_num); + for (int j = 0; j < net_info.input_num; j++) { + net_info.stages[0].input_shapes[j] = net_cascade->input_shapes[j]; + net_info.stages[0].input_mems[j] = net_cascade->input_mems[j]; + net_info.max_input_bytes[j] = net_cascade->input_bytes[j]; + } + net_info.stages[0].output_shapes = + (bm_shape_t*)malloc(sizeof(bm_shape_t) * net_info.output_num); + net_info.stages[0].output_mems = (bm_device_mem_t*)malloc(sizeof(bm_device_mem_t) * net_info.output_num); + net_info.output_loc_devices = (int*)malloc(net_info.output_num * sizeof(int)); + for (int j = 0; j < net_info.output_num; j++) { + net_info.stages[0].output_shapes[j] = net_cascade->output_shapes[j]; + net_info.stages[0].output_mems[j] = net_cascade->output_mems[j]; + net_info.max_output_bytes[j] = net_cascade->output_bytes[j]; + net_info.output_loc_devices[j] = net_cascade->output_loc_devices[j]; + } + net_info.addr_mode = net_cascade->addr_mode; +} + +void Bmruntime::cascade_free_net_info(net_cascade_t * net_cascade) { + auto& net_info = net_cascade->net_info; + free(net_info.input_names); + free(net_info.output_names); + for (int i = 0; i < net_info.stage_num; i++) { + free(net_info.stages[i].input_shapes); + free(net_info.stages[i].output_shapes); + free(net_info.stages[i].input_mems); + free(net_info.stages[i].output_mems); } free(net_info.max_input_bytes); free(net_info.max_output_bytes); @@ -1010,46 +1708,61 @@ bool Bmruntime::load_bmodel(ModelCtx* model_ctx) { bool ret = true; string model_chip = model_ctx->model()->chip()->str(); - if (model_chip != bmrt_arch_info::get_bmtpu_name() && - // BM1684X was firstly named BM1686, then Athena2 took it's name as BM1686. + if (model_chip != bmrt_arch_info::get_bmtpu_name()) { + // BM1684X was firstly named BM1686, then Athena2 took it's name as BM1688. // So there are bmodels claim to be BM1686 but are actually BM1684X. // And we happily allow this unspeakable abomination. - !(model_chip == "BM1686" && bmrt_arch_info::get_bmtpu_name() == "BM1684X")) { - BMRT_LOG(WRONG, "Error: runtime arch[%s] is not the same with bmodel arch[%s]", + if(model_chip == "BM1686" && bmrt_arch_info::get_bmtpu_name() == "BM1684X") { + } else if(model_chip == "CV186X" && bmrt_arch_info::get_bmtpu_name() == "BM1688") { + } else if(model_chip == "MARS3"){ + } else if(model_chip == "BM1686" && bmrt_arch_info::get_bmtpu_name() == "BM1688") { + } else if(model_chip == "SG2380" && bmrt_arch_info::get_bmtpu_name() == "SG2380") { + } else { + BMRT_LOG(WRONG, "Error: runtime arch[%s] is not the same with bmodel arch[%s]", bmrt_arch_info::get_bmtpu_name().c_str(), model_chip.c_str()); - return false; + return false; + } } + auto version = model_ctx->model()->version()->c_str(); + const char *bmrt_version = _bmrt_version(); + const char *sophon_driver_version = _libsophon_version(); + + BMRT_LOG(INFO, "Bmodel loaded, version %s", version); + BMRT_LOG(INFO, "BM Runtime: %s", bmrt_version); + BMRT_LOG(INFO, "BM Sophon driver version: %s", sophon_driver_version); u32 load_net_num = model_ctx->model()->net()->size(); BMRT_LOG(INFO, "pre net num: %lu, load net num: %u", static_cast(m_net_ctx_v.size()), load_net_num); - if (m_net_ctx_v.size() + load_net_num > MAX_NET_NUM) { - BMRT_LOG(WRONG, "Error: max net num [%d], new %d nets can't be loaded", MAX_NET_NUM, load_net_num); - return false; - } + // remove load net num restrict + // if (m_net_ctx_v.size() + load_net_num > MAX_NET_NUM) { + // BMRT_LOG(WRONG, "Error: max net num [%d], new %d nets can't be loaded", MAX_NET_NUM, load_net_num); + // return false; + // } - std::shared_ptr kernel_module = nullptr; - if (bmrt_arch_info::get_bmtpu_arch() == BM1684X) { - load_tpu_module(model_ctx, kernel_module); - } + load_tpu_module(model_ctx); + load_cpu_module(model_ctx); u32 cur_net_idx = m_net_ctx_v.size(); for (u32 net_idx = 0; net_idx < load_net_num; net_idx++) { - ret = load_bmodel_net(model_ctx, net_idx, kernel_module); + ret = load_bmodel_net(model_ctx, net_idx); if (!ret) { break; } } - + cascade_update_all_info(); /* Although ret may be false, but we need to set middle buffer and neuron_mem * for the net that had beed loaded successfully. */ // Process middle buffer and neuron memory share optimizaton. // Middle buffer is only used for BM1684 now, because of 1N/2N/4N transpose. - if ((u64)(bm_mem_get_device_size(max_middle_buffer)) < - max_middle_buffer_size) { - must_alloc_device_mem(&max_middle_buffer, max_middle_buffer_size, - string("middle_buffer") + std::to_string(middle_buffer_num)); - m_device_mem_vec.push_back(max_middle_buffer); - middle_buffer_num++; + for (int i = 0; i < m_device_num; i++) { + if ((u64)(bm_mem_get_device_size(max_middle_buffer[i])) < + max_middle_buffer_size[i]) { + must_alloc_device_mem(i, &max_middle_buffer[i], max_middle_buffer_size[i], + string("middle_buffer") + std::to_string(middle_buffer_num[i])); + m_device_mem_vec.push_back(max_middle_buffer[i]); + m_device_mem_ids.push_back(i); + middle_buffer_num[i]++; + } } for (u32 net_idx = cur_net_idx; net_idx < m_net_ctx_v.size(); net_idx++) { update_net_middlebuf(m_net_ctx_v[net_idx]); @@ -1057,25 +1770,195 @@ bool Bmruntime::load_bmodel(ModelCtx* model_ctx) return ret; } -void Bmruntime::load_tpu_module(ModelCtx* model_ctx, std::shared_ptr& kernel_module) { +void Bmruntime::load_tpu_module(ModelCtx* model_ctx) { // if kernel_module does not exist in bmodel, use the default kernel in kernel_module.h // kernel in kernel_module.h should be update manually: // 1: replace lib/libbm1684x_kernel_module_*.so by the latest. // 2: remake tpu_runtime + if (bmrt_arch_info::get_bmtpu_arch() != BM1684X && bmrt_arch_info::get_bmtpu_arch() != BM1688) { + for (int i = 0; i < MAX_DEVICE_NUM; i++) { + kernel_modules[i] = nullptr; + } + return; + } + + const unsigned char* firmware_data = nullptr; + bool using_inner_firmware = (getenv("BMRUNTIME_USING_INNER_FIRMWARE") != NULL); + if(using_inner_firmware) { + BMRT_LOG(INFO, "force loading firmare in runtime because BMRUNTIME_USING_INNER_FIRMWARE env is set"); + } + size_t firmware_size = 0; + #ifdef __linux__ + if (bmrt_arch_info::get_bmtpu_arch() == BM1684X){ + firmware_data = kernel_module_data_1684x; + firmware_size = sizeof(kernel_module_data_1684x); + } else if (bmrt_arch_info::get_bmtpu_arch() == BM1688){ + firmware_data = kernel_module_data_tpulv60; + firmware_size = sizeof(kernel_module_data_tpulv60); + } + #endif + + vector external_firmware; + const char* kernel_path = getenv("BMRUNTIME_USING_FIRMWARE"); + if(!using_inner_firmware && kernel_path){ + BMRT_LOG(INFO, "loading firmare from ENV BMRUNTIME_USING_FIRMWARE=%s", kernel_path); + string real_kernel_path = kernel_path; + FILE* kernel_file = fopen(real_kernel_path.c_str(), "rb"); + if(!kernel_file) { + BMRT_LOG(WARNING, "cannot open firmware file: %s", real_kernel_path.c_str()); + } else { + fseek(kernel_file, 0, SEEK_END); + size_t file_size = ftell(kernel_file); + external_firmware.resize(file_size); + fseek(kernel_file, 0, SEEK_SET); + size_t read_size = fread(external_firmware.data(), file_size, 1, kernel_file); + fclose(kernel_file); + if (read_size==0) { + BMRT_LOG(WARNING, "cannot reading firmware file error: %s, read_size=%d, file_size=%d", real_kernel_path.c_str(), read_size, file_size); + external_firmware.clear(); + } + } + } + + // load from bmodel, all cores should use the same firmware auto _kernel_module = model_ctx->model()->kernel_module(); - if (_kernel_module) { + if (!using_inner_firmware && _kernel_module && external_firmware.empty()) { auto module_binary = _kernel_module->binary(); - if (module_binary->size()) - { - std::shared_ptr binary(new uint8_t[module_binary->size()], [](void* p) { - delete [] (uint8_t*)p; - }); - model_ctx->read_binary(module_binary, binary.get()); - kernel_module = std::make_shared(m_handle, (const char*)binary.get(), module_binary->size()); - return; + if (module_binary->size()) { + external_firmware.resize(module_binary->size()); + model_ctx->read_binary(module_binary, (uint8_t*)external_firmware.data()); + BMRT_LOG(INFO, "loading firmare in bmodel"); + } + } + + if(!external_firmware.empty()) { + firmware_data = external_firmware.data(); + firmware_size = external_firmware.size(); + } else if (firmware_data) { + BMRT_LOG(INFO, "loading default firmare in runtime"); + } else { + BMRT_LOG(WARNING, "No firmare loaded in runtime"); + } + + for (int i = 0; i < m_device_num; i++) { + kernel_modules[i] = std::make_shared(m_handles[i]); + for (size_t core_id = 0; core_id < m_core_num; core_id++) { + kernel_modules[i]->add_core_module(core_id, firmware_data, firmware_size); } } - kernel_module = std::make_shared(m_handle, (const char*)kernel_module_data, sizeof(kernel_module_data)); +} + +#ifdef __linux__ +const std::array ELF_MAGIC = {0x7f, 'E', 'L', 'F'}; + +struct Elf64_Ehdr { + unsigned char e_ident[16]; // ELF magic number + uint16_t e_type; // object file type + uint16_t e_machine; // arch +}; + +bool check_so_architecture(const char* so_path) { + const std::map elf_arch_map = { + {40, "arm"}, + {62, "x86_64"}, + {183, "aarch64"}, + {243, "risc-v"} + }; + std::ifstream file(so_path, std::ios::in | std::ios::binary); + if (!file) { + BMRT_LOG(INFO, "Cannot open cpuop.so file in bmodel"); + return false; + } + Elf64_Ehdr header; + file.read(reinterpret_cast(&header), sizeof(header)); + if (!std::equal(std::begin(ELF_MAGIC), std::end(ELF_MAGIC), std::begin(header.e_ident))) { + std::cerr << "cpuop.so in bmodel is not an ELF file.\n"; + return false; + } + std::string host_arch; + std::string so_arch; + bool same_arch = false; +#ifdef __x86_64__ + host_arch = "x86_64"; + same_arch = header.e_machine == 62; +#elif defined(__aarch64__) + host_arch = "aarch64"; + same_arch = header.e_machine == 183; +#elif defined(__arm__) + host_arch = "arm"; + same_arch = header.e_machine == 40; +#elif defined(__riscv) + host_arch = "risc-v"; + same_arch = header.e_machine == 243; +#else + same_arch = false; +#endif + if (!same_arch) { + auto it = elf_arch_map.find(header.e_machine); + if (it != elf_arch_map.end()) + so_arch = it->second; + else + so_arch = "UNKOWN ARCH"; + BMRT_LOG(WARNING, "custom cpuop's arch expect %s but got %s", + host_arch.c_str(), so_arch.c_str()); + } + return same_arch; +} +#endif + +void Bmruntime::load_cpu_module(ModelCtx* model_ctx) { + vector external_cpuop; + auto _cpuop_module = model_ctx->model()->cpuop_module(); + if (_cpuop_module && external_cpuop.empty()) { + #ifdef __linux__ + auto module_binary = _cpuop_module->binary(); + if (module_binary->size()) { + external_cpuop.resize(module_binary->size()); + model_ctx->read_binary(module_binary, (uint8_t*)external_cpuop.data()); + BMRT_LOG(INFO, "loading cpuop in bmodel"); + int fd = mkstemp(&temp_filename_[0]); + fchmod(fd, S_IRUSR | S_IWUSR); + ssize_t module_size = module_binary->size(); + ssize_t write_size = write(fd, external_cpuop.data(), module_size); + if(write_size!=module_size) { + BMRT_LOG(FATAL, "loadding cpuop failed: write_size=%d, module_size=%d", (int)write_size, (int)module_size); + } + close(fd); + + int fd_ = open(&temp_filename_[0], O_RDONLY, 0); + if (fd_ < 0) { + BMRT_LOG(INFO, "creating tmp so false"); + return; + } + if (!check_so_architecture(&temp_filename_[0])) { + BMRT_LOG(WRONG, "Arch of custom cpuop.so is not the same with host! \n" + "Try to remake custom cpuop.so in the same arch with host cpu,\n" + "and embed into *.bmodel with 'tpu_model' tool"); + } + + tmpcpuso_handle_ = dlopen(&temp_filename_[0], RTLD_LAZY); + if (!tmpcpuso_handle_) { + BMRT_LOG(WRONG, "dlopen failed: %s\n", dlerror()); + exit(EXIT_FAILURE); + } else { + customcpu_init_ = (t_bmcpu_init)dlsym(tmpcpuso_handle_, "bmcpu_init"); + customcpu_uninit_ = (t_bmcpu_uninit)dlsym(tmpcpuso_handle_, "bmcpu_uninit"); + customcpu_process_ = (t_bmcpu_process)dlsym(tmpcpuso_handle_, "customcpu_process"); + } + if (!customcpu_init_ || !customcpu_uninit_ || !customcpu_process_) { + BMRT_LOG(WRONG, "read custom cpuop's symbol failed: %s\n", dlerror()); + dlclose(tmpcpuso_handle_); + exit(EXIT_FAILURE); + } + customcpu_handle_ = customcpu_init_(); + BMRT_LOG(INFO, "cpuop module is loaded, arch of custom cpuop is the same with host."); + } + #else + BMRT_LOG(INFO, "Only trying to load cpu_module on Linux for now."); + #endif + } else { + BMRT_LOG(INFO, "No cpu_module in bmodel."); + } } /* Load bmodel file, which is pre-compiled by bmcompiler */ @@ -1114,7 +1997,7 @@ BmCoeff::BmCoeff(bm_handle_t handle) BMRT_ASSERT_INFO(handle != NULL,"handle shouldn't be NULL\n"); m_handle = handle; m_inner_handle = false; - m_devid = 0; + m_devid = bm_get_devid(handle); } BmCoeff::BmCoeff(int devid) @@ -1131,7 +2014,7 @@ BmCoeff::~BmCoeff() { for(auto &mem: m_coeff_map) { - bm_free_device(m_handle, mem.second); + bm_free_device_u64(m_handle, mem.second); } if (m_inner_handle) { bm_dev_free(m_handle); @@ -1144,6 +2027,8 @@ u64 BmCoeff::Register(ModelCtx* model_ctx, const CoeffMem* coeff_mem) return 0; } u64 coeff_start = coeff_mem->address(); + // Use relative address since 1688. + coeff_start &= bmrt_arch_info::addr_mask(); u64 coeff_size = coeff_mem->binary_coeff()->size(); u8* coeff_size_ptr = (u8*)&coeff_size; @@ -1153,19 +2038,19 @@ u64 BmCoeff::Register(ModelCtx* model_ctx, const CoeffMem* coeff_mem) std::lock_guard guard(m_coeff_mutex); auto iter = m_coeff_map.find(check_code); if (iter != m_coeff_map.end()) { - return bm_mem_get_device_addr(iter->second) - coeff_start; + return bm_mem_get_device_addr_u64(iter->second) - coeff_start; } - bm_device_mem_t pmem; + bm_device_mem_u64_t pmem; // allocate device memory for coeff - if (BM_SUCCESS != bm_malloc_device_byte_heap_mask(m_handle, &pmem, 7, coeff_size)) { + if (BM_SUCCESS != bm_malloc_device_byte_heap_mask_u64(m_handle, &pmem, 7, coeff_size)) { BMRT_LOG(FATAL, "coeff alloc failed, size[0x%llx]", coeff_size); } m_latest_device_mem = pmem; upload_coeff_data(model_ctx, coeff_mem, m_handle, pmem); - m_coeff_map.insert(std::pair, bm_device_mem_t>(check_code, pmem)); - return bm_mem_get_device_addr(pmem) - coeff_start; + m_coeff_map.insert(std::pair, bm_device_mem_u64_t>(check_code, pmem)); + return bm_mem_get_device_addr_u64(pmem) - coeff_start; } int BmCoeff::Check() @@ -1176,15 +2061,19 @@ int BmCoeff::Check() for (auto& coeff : m_coeff_map) { auto &sha = coeff.first; auto &mem = coeff.second; - u32 size = bm_mem_get_device_size(mem); + u64 size = bm_mem_get_device_size_u64(mem); + if(size > 0x40000000) { + fprintf(stderr, "Coeff size[0x%llx] is greater than 1GB, ignore the SHA check\n", size); + continue; + } uint8_t* buffer = new uint8_t[size]; auto buffer_sp = SP(buffer, uint8_t); - bm_status_t status = bm_memcpy_d2s(m_handle, buffer, coeff.second); + bm_status_t status = bm_memcpy_d2s_u64(m_handle, buffer, coeff.second); CHECK_status(status); bmodel::CalcSha256(buffer, size, crc32); - u64 addr = bm_mem_get_device_addr(mem); + u64 addr = bm_mem_get_device_addr_u64(mem); fprintf(stderr, "Coeff, chip[%d], SHA[%02X%02X%02X%02X], addr[0x%llx], size[0x%x]", - m_devid, sha[0], sha[1], sha[2], sha[3], addr, size); + m_devid, sha[0], sha[1], sha[2], sha[3], addr, (u32)size); if (0 != memcmp(crc32, coeff.first.data(), bmodel::SHA256_LEN)) { fprintf(stderr, ", Check:**FAILED**\n"); err_count++; @@ -1195,23 +2084,25 @@ int BmCoeff::Check() return err_count; } -void BmMemory::Init(const string& description, bm_handle_t handle, const bm_device_mem_t& mem, - void* buffer) -{ +void BmMemory::Init(const string &description, bm_handle_t handle, + const bm_device_mem_t &mem, void *buffer, bool do_check_) { desc = description; bm_handle = handle; device_mem = mem; addr = bm_mem_get_device_addr(mem); bytes = bm_mem_get_device_size(mem); dword_len = (bytes + 3) / 4; - bmodel::CalcSha256((uint8_t*)buffer, bytes, check_code); + do_check = do_check_; + if (do_check) { + bmodel::CalcSha256((uint8_t *)buffer, bytes, check_code); + } bm_status_t status = bm_memcpy_s2d(handle, mem, buffer); CHECK_status(status); } int BmMemory::Check() { - if (desc.empty()) { + if ((!do_check) || desc.empty()) { return 0; } int err_count = 0; @@ -1231,46 +2122,72 @@ int BmMemory::Check() return err_count; } -KernelModule::KernelModule(bm_handle_t &handle, const char* file_name) : m_handle(handle) { - _kernel_module = tpu_kernel_load_module_file(handle, file_name); - check_exist(); - preload_funcs(handle); +void KernelModule::add_core_module(int core_id, const char* filename) { + BMRT_ASSERT_INFO(_kernel_modules.count(core_id)==0, "the core module has been already added, core_id=%d", core_id); + _kernel_modules[core_id] = tpu_kernel_load_module_file(m_handle, filename); + preload_funcs(core_id); +} +void KernelModule::add_core_module(int core_id, const unsigned char* binary, size_t size) { + BMRT_ASSERT_INFO(_kernel_modules.count(core_id)==0, "the core module has been already added, core_id=%d", core_id); + _kernel_modules[core_id] = tpu_kernel_load_module_to_core(m_handle, (char*)binary, size, core_id); + preload_funcs(core_id); } KernelModule::~KernelModule() { - check_exist(); - auto status = tpu_kernel_unload_module(m_handle, _kernel_module); - BMRT_ASSERT_INFO(status == BM_SUCCESS, "kernel_module unload failed!!\n"); + for (auto& item: _kernel_modules) { + auto module = item.second; + auto core_id = item.first; + auto status = tpu_kernel_unload_module_from_core(m_handle, module, core_id); + BMRT_ASSERT_INFO(status == BM_SUCCESS, "kernel_module unload failed!! core_id=%d\n", core_id); + } } +void KernelModule::preload_funcs(int core_id) { + BMRT_ASSERT(_kernel_modules[core_id]); -KernelModule::KernelModule(bm_handle_t &handle, const char* binary, size_t size) : m_handle(handle) { - _kernel_module = tpu_kernel_load_module(handle, (char*)binary, size); - check_exist(); - preload_funcs(handle); -} + auto _kernel_module = _kernel_modules[core_id]; + _multi_fullnet_func_id[core_id] = tpu_kernel_get_function_from_core(m_handle, _kernel_module, "sg_api_multi_fullnet", core_id); + BMRT_LOG(INFO, " core_id=%d, multi_fullnet_func_id=%d", core_id, _multi_fullnet_func_id[core_id]); + + _dynamic_fullnet_func_id[core_id] = tpu_kernel_get_function_from_core(m_handle, _kernel_module, "sg_api_dynamic_fullnet", core_id); + BMRT_LOG(INFO, " core_id=%d, dynamic_fullnet_func_id=%d", core_id, _dynamic_fullnet_func_id[core_id]); -void KernelModule::preload_funcs(bm_handle_t &handle) { - _multi_fullnet_func_id = tpu_kernel_get_function(handle, _kernel_module, "sg_api_multi_fullnet"); - _dynamic_fullnet_func_id = tpu_kernel_get_function(handle, _kernel_module, "sg_api_dynamic_fullnet"); - _enable_profile_func_id = tpu_kernel_get_function(handle, _kernel_module, "sg_api_set_profile"); - _get_profile_func_id = tpu_kernel_get_function(handle, _kernel_module, "sg_api_get_profile_data"); + _enable_profile_func_id[core_id] = tpu_kernel_get_function_from_core(m_handle, _kernel_module, "sg_api_set_profile", core_id); + _get_profile_func_id[core_id] = tpu_kernel_get_function_from_core(m_handle, _kernel_module, "sg_api_get_profile_data", core_id); + + if(bmrt_arch_info::get_bmtpu_arch() == BM1684X){ + _global_move_1684x_func_id[core_id] = tpu_kernel_get_function_from_core(m_handle, _kernel_module, "global_move_1684x", core_id); + } + + if(bmrt_arch_info::get_bmtpu_arch() == BM1688 || bmrt_arch_info::get_bmtpu_arch() == SG2380){ + _set_engine_profile_param_func_id[core_id] = tpu_kernel_get_function_from_core(m_handle, _kernel_module, "sg_api_set_engine_profile_param", core_id); + } } -tpu_kernel_function_t KernelModule::get_multi_fullnet_func_id() { - check_exist(); - return _multi_fullnet_func_id; +static inline vector __map_to_vector_funcs(const vector& core_list, const map func_map) { + vector func_ids(core_list.size()); + for(size_t i=0; i KernelModule::get_multi_fullnet_func_id(const vector& core_list) { + return __map_to_vector_funcs(core_list, _multi_fullnet_func_id); } -tpu_kernel_function_t KernelModule::get_enable_profile_func_id() { - check_exist(); - return _enable_profile_func_id; +vector KernelModule::get_dynamic_fullnet_func_id(const vector& core_list) { + return __map_to_vector_funcs(core_list, _dynamic_fullnet_func_id); } -tpu_kernel_function_t KernelModule::get_get_profile_func_id() { - check_exist(); - return _get_profile_func_id; +vector KernelModule::get_enable_profile_func_id(const vector& core_list) { + return __map_to_vector_funcs(core_list, _enable_profile_func_id); +} +vector KernelModule::get_get_profile_func_id(const vector& core_list) { + return __map_to_vector_funcs(core_list, _get_profile_func_id); +} +vector KernelModule::get_set_engine_profile_param_func_id(const vector& core_list) { + return __map_to_vector_funcs(core_list, _set_engine_profile_param_func_id); +} + +vector KernelModule::get_global_move_1684x_func_id(const vector& core_list) { + return __map_to_vector_funcs(core_list, _global_move_1684x_func_id); } } // namespace bmruntime diff --git a/tpu-runtime/src/bmruntime_interface.cpp b/tpu-runtime/src/bmruntime_interface.cpp index 89bfe4e..1755c62 100644 --- a/tpu-runtime/src/bmruntime_interface.cpp +++ b/tpu-runtime/src/bmruntime_interface.cpp @@ -72,6 +72,11 @@ bool bmrt_shape_is_same(const bm_shape_t* left, const bm_shape_t* right) } bool bmrt_tensor(bm_tensor_t* tensor, void* p_bmrt, bm_data_type_t dtype, bm_shape_t shape) +{ + return bmrt_tensor_ex(tensor, p_bmrt, 0, dtype, shape); +} + +bool bmrt_tensor_ex(bm_tensor_t* tensor, void* p_bmrt, int devid, bm_data_type_t dtype, bm_shape_t shape) { BMRT_ASSERT_INFO(tensor != NULL && p_bmrt != NULL, "tensor:%p or p_bmrt:%p shouldn't be NULL", tensor, p_bmrt); uint64_t number_shape = bmrt_shape_count(&shape); @@ -80,8 +85,8 @@ bool bmrt_tensor(bm_tensor_t* tensor, void* p_bmrt, bm_data_type_t dtype, bm_sha tensor->shape = shape; tensor->st_mode = BM_STORE_1N; try { - ((Bmruntime*)p_bmrt)->must_alloc_device_mem(&tensor->device_mem, bmrt_tensor_bytesize(tensor), "tensor"); - return true; + ((Bmruntime*)p_bmrt)->must_alloc_device_mem(devid, &tensor->device_mem, bmrt_tensor_bytesize(tensor), "tensor"); + return true; } catch (const std::runtime_error &e) { return false; } @@ -151,26 +156,37 @@ void bmrt_print_network_info(const bm_net_info_t* net_info) } } -void* bmrt_create(bm_handle_t bm_handle) -{ - unsigned int chipid = 0; - if (0 != bm_get_chipid(bm_handle, &chipid)) { - BMRT_LOG(WRONG, "Error: cannot get chipid:%x",chipid); - return nullptr; - } +static std::string chip_name_by_id(unsigned int chipid) { std::string chip_name = ""; if (chipid == 0x1684) { chip_name = "BM1684"; } else if (chipid == 0x1686) { chip_name = "BM1684X"; } else if (chipid == 0x1686a200) { - chip_name = "BM1686"; + chip_name = "BM1688"; } else if (chipid == 0x1682) { chip_name = "BM1682"; } else if (chipid == 0x1880) { chip_name = "BM1880"; + } else if (chipid == 0x2260) { + chip_name = "BM1690"; + } else if (chipid == 0x2380) { + chip_name = "SG2380"; + } else if (chipid == 0x3000) { + chip_name = "MARS3"; + } + return chip_name; +} - } else { +void* bmrt_create(bm_handle_t bm_handle) +{ + unsigned int chipid = 0; + if (0 != bm_get_chipid(bm_handle, &chipid)) { + BMRT_LOG(WRONG, "Error: cannot get chipid:%x",chipid); + return nullptr; + } + std::string chip_name = chip_name_by_id(chipid); + if (chip_name.empty()) { BMRT_LOG(WRONG, "Error: unknown chipid %x", chipid); return nullptr; } @@ -182,12 +198,52 @@ void* bmrt_create(bm_handle_t bm_handle) } } +void *bmrt_create_ex(bm_handle_t *bm_handles, int num_handles) { + BMRT_ASSERT_INFO(num_handles > 0, "num_handles should > 0"); + unsigned int chipid = 0; + if (0 != bm_get_chipid(bm_handles[0], &chipid)) { + BMRT_LOG(WRONG, "Error: cannot get chipid:%x",chipid); + return nullptr; + } + // check all handles are the same + for (int i = 1; i < num_handles; i++) { + unsigned int chipid2 = 0; + if (0 != bm_get_chipid(bm_handles[i], &chipid2)) { + BMRT_LOG(WRONG, "Error: cannot get chipid:%x", chipid); + return nullptr; + } + if (chipid != chipid2) { + BMRT_LOG(WRONG, "Error: chipid not same:[0]:%x,[%d]:%x", chipid, i, chipid2); + return nullptr; + } + } + std::string chip_name = chip_name_by_id(chipid); + if (chip_name.empty()) { + BMRT_LOG(WRONG, "Error: unknown chipid %x", chipid); + return nullptr; + } + try { + Bmruntime* p_bmrt = new Bmruntime(bm_handles, num_handles, true, chip_name); + return (void*)p_bmrt; + } catch (const std::runtime_error &e) { + return nullptr; + } +} + u64 bmrt_must_alloc_device_mem(void* p_bmrt, bm_device_mem_t* pmem, u32 size){ - return ((Bmruntime*)p_bmrt)->must_alloc_device_mem(pmem, size, "interface"); + return ((Bmruntime*)p_bmrt)->must_alloc_device_mem(0, pmem, size, "interface"); } void bmrt_must_free_device_mem(void* p_bmrt, bm_device_mem_t mem){ - ((Bmruntime*)p_bmrt)->must_free_device_mem(mem); + ((Bmruntime*)p_bmrt)->must_free_device_mem(0, mem); +} + +uint32_t bmrt_get_flags(void* p_bmrt) { + return ((Bmruntime*)p_bmrt)->get_flags(); +} + +void bmrt_set_flags(void* p_bmrt, uint32_t flags) { + return ((Bmruntime*)p_bmrt)->set_flags(flags); } void bmrt_destroy(void* p_bmrt) @@ -239,6 +295,10 @@ bool bmrt_launch_tensor_ex(void* p_bmrt, const char* net_name, const bm_tensor_t BMRT_LOG(WRONG, "parameter invalid p_bmrt is NULL or net_name is NULL"); return false; } + if (auto net_c = ((Bmruntime*)p_bmrt)->get_net_cascade(net_name)) { + return ((Bmruntime*)p_bmrt) + ->launch(net_c, input_tensors, input_num, output_tensors, output_num); + } int net_idx = ((Bmruntime*)p_bmrt)->get_net_idx(net_name); if (net_idx < 0) { BMRT_LOG(WRONG, "net name:%s invalid", net_name); @@ -249,10 +309,66 @@ bool bmrt_launch_tensor_ex(void* p_bmrt, const char* net_name, const bm_tensor_t user_stmode); } -bool bmrt_launch_data(void* p_bmrt, const char* net_name, void* const input_datas[], +bool bmrt_launch_tensor_multi_cores(void *p_bmrt, const char *net_name, + const bm_tensor_t input_tensors[], + int input_num, bm_tensor_t output_tensors[], + int output_num, bool user_mem, + bool user_stmode, const int *core_list, + int core_num) { + if (p_bmrt == NULL || net_name == NULL) { + BMRT_LOG(WRONG, "parameter invalid p_bmrt is NULL or net_name is NULL"); + return false; + } + + if (auto net_c = ((Bmruntime*)p_bmrt)->get_net_cascade(net_name)) { + return ((Bmruntime*)p_bmrt) + ->launch(net_c, input_tensors, input_num, output_tensors, output_num); + } + + int net_idx = ((Bmruntime *)p_bmrt)->get_net_idx(net_name); + if (net_idx < 0) { + BMRT_LOG(WRONG, "net name:%s invalid", net_name); + return false; + } + std::vector core_vector{core_list, core_list + core_num}; + return ((Bmruntime *)p_bmrt) + ->launch_multi_cores(net_idx, input_tensors, input_num, output_tensors, + output_num, core_vector, user_mem, user_stmode); +} + +bool bmrt_pre_alloc_neuron_multi_cores( + void *p_bmrt, + const char *net_name, + int stage_idx, + const int *core_list, + int core_num) { + if (p_bmrt == NULL || net_name == NULL) { + BMRT_LOG(WRONG, "parameter invalid p_bmrt is NULL or net_name is NULL"); + return false; + } + if (auto net_c = ((Bmruntime*)p_bmrt)->get_net_cascade(net_name)) { + return true; + } + int net_idx = ((Bmruntime *)p_bmrt)->get_net_idx(net_name); + if (net_idx < 0) { + BMRT_LOG(WRONG, "net name:%s invalid", net_name); + return false; + } + auto net_info = bmrt_get_network_info(p_bmrt, net_name); + if (stage_idx < 0 || stage_idx >= net_info->stage_num) { + BMRT_LOG(WRONG, "stage_idx:%d invalid", stage_idx); + return false; + } + std::vector core_vector{core_list, core_list + core_num}; + ((Bmruntime *)p_bmrt)->pre_alloc_neuron_multi_cores(net_idx, stage_idx, core_vector); + return true; +} + +bool bmrt_launch_data_multi_cores(void* p_bmrt, const char* net_name, void* const input_datas[], const bm_shape_t input_shapes[], int input_num, void* output_datas[], - bm_shape_t output_shapes[], int output_num, bool user_mem) -{ + bm_shape_t output_shapes[], int output_num, bool user_mem, + const int* core_list, int core_num) { + if (p_bmrt == NULL || net_name == NULL) { BMRT_LOG(WRONG, "parameter invalid p_bmrt is NULL or net_name is NULL"); return false; @@ -262,9 +378,21 @@ bool bmrt_launch_data(void* p_bmrt, const char* net_name, void* const input_data BMRT_LOG(WRONG, "net name:%s invalid", net_name); return false; } + std::vector core_vector(core_list, core_list + core_num); return ((Bmruntime*)p_bmrt) - ->launch(net_idx, input_datas, input_shapes, input_num, output_datas, output_shapes, - output_num, user_mem); + ->launch_multi_cores(net_idx, input_datas, input_shapes, input_num, output_datas, output_shapes, + output_num, user_mem, core_vector); + +} + +bool bmrt_launch_data(void* p_bmrt, const char* net_name, void* const input_datas[], + const bm_shape_t input_shapes[], int input_num, void* output_datas[], + bm_shape_t output_shapes[], int output_num, bool user_mem) +{ + return bmrt_launch_data_multi_cores(p_bmrt, net_name, + input_datas, input_shapes, input_num, + output_datas, output_shapes, output_num, + user_mem, NULL, 0); } void bmrt_show_neuron_network(void* p_bmrt) @@ -319,12 +447,12 @@ const bm_net_info_t* bmrt_get_network_info(void* p_bmrt, const char* net_name) BMRT_LOG(WRONG, "parameter invalid p_bmrt is NULL or net_name is NULL"); return NULL; } - int net_idx = ((Bmruntime*)p_bmrt)->get_net_idx(net_name); - if (net_idx < 0) { + auto ret = ((Bmruntime*)p_bmrt)->get_net_info(net_name); + if (ret == NULL) { BMRT_LOG(WRONG, "net name:%s invalid", net_name); return NULL; } - return ((Bmruntime*)p_bmrt)->get_net_info(net_idx); + return ret; } void bmrt_trace(void* p_bmrt) @@ -335,3 +463,52 @@ void bmrt_trace(void* p_bmrt) } ((Bmruntime*)p_bmrt)->trace(); } + +bool bmrt_memcpy_s2d_parallel(void *p_bmrt, + bm_tensor_t tensors[], + void* datas[], + int tensor_num[], + int device_num) { + return ((Bmruntime*)p_bmrt) + ->memcpy_s2d_parallel(tensors, datas, tensor_num, device_num); +} + +bool bmrt_memcpy_d2s_parallel(void *p_bmrt, + void* datas[], + bm_tensor_t tensors[], + int tensor_num[], + int device_num) { + return ((Bmruntime*)p_bmrt) + ->memcpy_d2s_parallel(datas, tensors, tensor_num, device_num); +} + +bool bmrt_memcpy_d2d_byte_parallel(void *p_bmrt, + bm_tensor_t dst_tensors[], + size_t dst_offsets[], + bm_tensor_t src_tensors[], + size_t src_offsets[], + size_t sizes[], + int tensor_num[], + int device_num) { + return ((Bmruntime*)p_bmrt) + ->memcpy_d2d_byte_parallel(dst_tensors, dst_offsets, src_tensors, src_offsets, + sizes, tensor_num, device_num); +} + +bool bmrt_memcpy_d2d_stride_ex_parallel( + void *p_bmrt, + bm_tensor_t dst_tensors[], + size_t dst_offsets[], + bm_shape_t dst_strides[], + bm_tensor_t src_tensors[], + size_t src_offsets[], + bm_shape_t src_strides[], + bm_shape_t shapes[], + int tensor_num[], + int device_num) { + return ((Bmruntime*)p_bmrt) + ->memcpy_d2d_stride_ex_parallel( + dst_tensors, dst_offsets, dst_strides, + src_tensors, src_offsets, src_strides, + shapes, tensor_num, device_num); +} \ No newline at end of file diff --git a/tpu-runtime/src/bmruntime_legacy.cpp b/tpu-runtime/src/bmruntime_legacy.cpp old mode 100644 new mode 100755 index 5df929e..b053448 --- a/tpu-runtime/src/bmruntime_legacy.cpp +++ b/tpu-runtime/src/bmruntime_legacy.cpp @@ -3,10 +3,12 @@ #include "bmruntime.h" #include "bmruntime_common.h" #include "bmruntime_interface.h" +#include "bmruntime_cpp.h" #include "string.h" using bmruntime::bmfunc; using bmruntime::Bmruntime; +using bmruntime::api_info_t; // Make sure net_idx is greater than or equal to zero #define CHECK_net_idx(net_idx) \ @@ -690,3 +692,75 @@ int bmrt_thread_sync(void* p_bmrt) bm_handle_t bm_handle = ((Bmruntime*)p_bmrt)->get_bm_handle(); return bm_thread_sync(bm_handle); } + +api_info_c *get_bmodel_api_info_c(void *p_bmrt, const char *net_name, + const bm_tensor_t *input_tensors, + int input_num, bm_tensor_t *output_tensors, + int output_num, bool user_mem, + bool user_stmode, uint32_t *core_ids) { + api_info_c *api_info = new api_info_c; + memset(api_info, 0x0, sizeof(api_info_c)); + if (p_bmrt == NULL || net_name == NULL) { + BMRT_LOG(WRONG, "parameter invalid p_bmrt is NULL or net_name is NULL"); + return api_info; + } + int net_idx = ((Bmruntime *)p_bmrt)->get_net_idx(net_name); + if (net_idx < 0) { + BMRT_LOG(WRONG, "net name:%s invalid", net_name); + return api_info; + } + const api_info_t &pinfo = + ((Bmruntime *)p_bmrt) + ->get_api_info(net_idx, input_tensors, input_num, output_tensors, + output_num, user_mem, user_stmode, core_ids); + + api_info->api_id_size = pinfo.api_id.size(); + api_info->api_data_size = pinfo.api_data.size(); + api_info->output_addr_offset_number = pinfo.output_addr_offset.size(); + api_info->input_addr_offset_number = pinfo.input_addr_offset.size(); + api_info->api_data_subsize = new size_t[api_info->api_data_size]; + for (size_t i = 0; i < api_info->api_data_size; i++) { + api_info->api_data_subsize[i] = pinfo.api_data[i].size(); + } + + api_info->api_id = new uint32_t[api_info->api_id_size]; + api_info->api_data = new uint8_t *[api_info->api_data_size]; + for (size_t i = 0; i < api_info->api_data_size; i++) { + api_info->api_data[i] = new uint8_t[api_info->api_data_subsize[i]]; + } + api_info->input_addr_offset = + new uint32_t[api_info->input_addr_offset_number]; + api_info->output_addr_offset = + new uint32_t[api_info->output_addr_offset_number]; + for (size_t i = 0; i < api_info->api_data_size; i++) { + memcpy(api_info->api_data[i], pinfo.api_data[i].data(), + pinfo.api_data[i].size() * sizeof(uint8_t)); + } + memcpy(api_info->api_id, pinfo.api_id.data(), + api_info->api_id_size * sizeof(uint32_t)); + memcpy(api_info->input_addr_offset, pinfo.input_addr_offset.data(), + api_info->input_addr_offset_number * sizeof(uint32_t)); + memcpy(api_info->output_addr_offset, pinfo.output_addr_offset.data(), + api_info->output_addr_offset_number * sizeof(uint32_t)); + + return api_info; +} + +void bmrt_free_api_info(api_info_c *api_info) { + for (size_t i = 0; i < api_info->api_data_size; i++) { + delete[] api_info->api_data[i]; + } + delete[] api_info->api_id; + delete[] api_info->api_data; + delete[] api_info->input_addr_offset; + delete[] api_info->output_addr_offset; + delete[] api_info->api_data_subsize; + + api_info->api_id = nullptr; + api_info->api_data = nullptr; + api_info->input_addr_offset = nullptr; + api_info->output_addr_offset = nullptr; + api_info->api_data_subsize = nullptr; + delete api_info; + +} diff --git a/tpu-runtime/src/bmruntime_profile.cpp b/tpu-runtime/src/bmruntime_profile.cpp index 90b485b..4b5d7ea 100644 --- a/tpu-runtime/src/bmruntime_profile.cpp +++ b/tpu-runtime/src/bmruntime_profile.cpp @@ -4,6 +4,7 @@ #include "bm1682_profile.h" #include "bm1684_profile.h" #include "bm1684x_profile.h" +#include "bm1688_profile.h" #ifndef __linux__ #include #endif @@ -42,15 +43,15 @@ int bm_mkdir(const char *dirname, bool must_new) #endif return 1; } + int status = 0; if(must_new){ string cmd = "rm "; cmd += dname + "*"; - system(cmd.c_str()); + status = system(cmd.c_str()); } - return 0; + return status; } - BMProfile::BMProfile(Bmruntime* p_bmrt): p_bmrt(p_bmrt), enabled(false) { set_save_dir("bmprofile_data"); handle = p_bmrt->get_bm_handle(); @@ -60,10 +61,15 @@ BMProfile::BMProfile(Bmruntime* p_bmrt): p_bmrt(p_bmrt), enabled(false) { device = decltype(device)(new bm1684_profile::BMProfileDevice(this)); } else if(arch == BM1684X){ device = decltype(device)(new bm1684x_profile::BMProfileDevice(this)); + } else if (arch == BM1688) { + device = decltype(device)(new bm1688_profile::BMProfileDevice(this)); } else { BMRT_LOG(WARNING, "Not support profile for arch=%d", arch); } enabled = device && device->enabled(); + if (enabled){ + BMRT_LOG(INFO, "Profile For arch=%d", arch); + } } BMProfile::~BMProfile(){ @@ -75,7 +81,7 @@ BMProfile::~BMProfile(){ return; } for(auto& minfo: mem_info){ - fprintf(fp, "[bmprofile] mtype=%d addr=%lld size=%d alloc=%lld free=%lld desc=%s\n", + fprintf(fp, "[bmprofile] mtype=%d addr=%lld size=%lld alloc=%lld free=%lld desc=%s\n", minfo.type, minfo.addr, minfo.size, minfo.alloc_usec, minfo.free_usec, minfo.desc.c_str()); } fclose(fp); @@ -86,7 +92,7 @@ void BMProfile::deinit() if (is_enabled()) print_note(); if (device) { device->deinit(); - } + } } void BMProfile::print_note() @@ -99,15 +105,16 @@ void BMProfile::print_note() BMRT_LOG(INFO, "*****************************************************************"); } -void BMProfile::record_alloc_device_mem(const bm_device_mem_t &mem, const std::string &desc) +void BMProfile::record_alloc_device_mem(const mem_pair_t &mem, const std::string &desc) { if(!enabled) return; - record_mem(MEM_GLOBAL, bm_mem_get_device_addr(mem), bm_mem_get_device_size(mem), desc); + record_mem(MEM_GLOBAL, mem.first, mem.second, desc); + // record_mem(MEM_GLOBAL, bm_mem_get_device_addr(mem), bm_mem_get_device_size(mem), desc); } -void BMProfile::record_free_device_mem(const bm_device_mem_t &mem){ +void BMProfile::record_free_device_mem(u64 mem_addr){ if(!enabled) return; - auto addr = bm_mem_get_device_addr(mem); + auto addr = mem_addr; bool find = false; for(auto& mi: mem_info){ if(mi.type==MEM_GLOBAL && mi.alloc_usec>0 && mi.free_usec==0 && mi.addr == addr){ @@ -121,9 +128,13 @@ void BMProfile::record_free_device_mem(const bm_device_mem_t &mem){ } } -profile_cmd_num_t *BMProfile::record_subnet_cmd_info(u64 gdma_addr, u64 gdma_offset, u64 bdc_addr, u64 bdc_offset, u32 group_num) +profile_cmd_num_t *BMProfile::record_subnet_cmd_info(int core_idx, u64 gdma_addr, u64 gdma_offset, u64 bdc_addr, u64 bdc_offset, u32 group_num) { if(!current_enabled) return nullptr; + BMRT_ASSERT_INFO(core_idx<(int)core_list.size(), "core_idx=%d, core_list.size()=%d", core_idx, (int)core_list.size()); + BMRT_ASSERT(cmd_infos[core_idx] == nullptr); + auto& cmd_info = cmd_infos[core_idx]; + u8* info_buffer = new u8[sizeof(profile_cmd_info_t)+group_num*sizeof(profile_cmd_num_t)]; cmd_info = (profile_cmd_info_t*)info_buffer; cmd_info->gdma_base_addr = gdma_addr; @@ -134,11 +145,24 @@ profile_cmd_num_t *BMProfile::record_subnet_cmd_info(u64 gdma_addr, u64 gdma_off return cmd_info->cmd_num; } -void BMProfile::record_cmd_data(ENGINE_ID engine, const void *cmd_ptr, u32 cmd_len, u64 store_addr) +void BMProfile::set_core_list(const vector& core_list) { + // BMRT_ASSERT(this->core_list.empty()); + this->core_list = core_list; + string filename = get_global_filename(); + auto fp = fopen(filename.c_str(), "ab"); + fprintf(fp, "[bmprofile] core_list="); + for(auto core: core_list){ + fprintf(fp, "%d,", core); + } + fprintf(fp, "\n"); + fclose(fp); +} + +void BMProfile::record_cmd_data(int core_idx, ENGINE_ID engine, const void *cmd_ptr, u32 cmd_len, u64 store_addr) { if(!enabled) return; char filename[256] = {0}; - sprintf(filename, "cmd_%llx_%d.dat", store_addr, engine); + sprintf(filename, "cmd_%llx_%d_%d.dat", store_addr, core_idx, engine); auto path = get_save_dir(); path += "/"; path += filename; @@ -154,7 +178,7 @@ void BMProfile::record_cpu_mem(const void *ptr, u32 len, const std::string &desc record_mem(MEM_CPU, (u64)ptr, len, desc); } -void BMProfile::record_mem(PROFILE_MEM_TYPE_T mtype, u64 addr, u32 size, const std::string &desc) +void BMProfile::record_mem(PROFILE_MEM_TYPE_T mtype, u64 addr, u64 size, const std::string &desc) { if(!enabled) return; profile_mem_info_t info; @@ -185,7 +209,7 @@ std::string BMProfile::get_global_filename() return get_save_dir() + "/" + "global.profile"; } -void BMProfile::init(const string& net_name, const vector& data, const vector& stat) +void BMProfile::init(const string& net_name, const vector& data, const vector& stat, const std::vector& core_list) { if (!is_enabled()) return; auto arch = bmrt_arch_info::get_bmtpu_arch(); @@ -196,10 +220,13 @@ void BMProfile::init(const string& net_name, const vector& data, const vecto fprintf(fp, "[bmprofile] arch=%d\n", arch); fprintf(fp, "[bmprofile] net_name=%s\n", net_name.c_str()); fprintf(fp, "[bmprofile] tpu_freq=%d\n", freq); + cmd_infos.assign(core_list.size(), nullptr); + if(data.size()>0){ fwrite(data.data(), data.size(), 1, fp); } fclose(fp); + set_core_list(core_list); if(stat.size()>0){ std::string stat_filename = get_save_dir() + "/" + "net_stat.sim"; auto fp = fopen(stat_filename.c_str(), "wb"); @@ -229,7 +256,7 @@ void BMProfile::alloc_buffer(buffer_pair *bp, size_t size, const string& desc) if (!bp->ptr) { BMRT_LOG(FATAL, "malloc system buffer failed for profile"); } - p_bmrt->must_alloc_device_mem(&bp->mem, size, desc); + p_bmrt->must_alloc_device_mem(0, &bp->mem, size, desc); bp->size = size; } } @@ -239,7 +266,7 @@ void BMProfile::free_buffer(buffer_pair *bp) if(bp->ptr){ delete [] bp->ptr; bp->ptr =nullptr; - p_bmrt->must_free_device_mem(bp->mem); + p_bmrt->must_free_device_mem(0, bp->mem); } } @@ -283,16 +310,18 @@ bool BMProfile::getenv_bool(const char *name, bool default_val) void BMProfile::save_cmd_profile() { if(!current_enabled) return; - if(!cmd_info) return; - write_block(BLOCK_CMD, sizeof(profile_cmd_info_t)+cmd_info->group_num*sizeof(profile_cmd_num_t), cmd_info); - delete []((u8*)cmd_info); - cmd_info = nullptr; + for(auto& cmd_info: cmd_infos){ + if(!cmd_info) continue; + write_block(BLOCK_CMD, sizeof(profile_cmd_info_t)+cmd_info->group_num*sizeof(profile_cmd_num_t), cmd_info); + delete []((u8*)cmd_info); + cmd_info = nullptr; + } } void BMProfile::write_block(u32 type, size_t len, const void *data) { if(len == 0) return; - BMRT_LOG(INFO, "%s: type=%d, len=%d", __func__, type, (int)len); + BMRT_LOG(INFO, " %s: type=%d, len=%d", __func__, type, (int)len); fwrite(&type, sizeof(type), 1, profile_fp); fwrite(&len, sizeof(u32), 1, profile_fp); fwrite(data, len, 1, profile_fp); @@ -317,7 +346,6 @@ void BMProfile::begin_subnet(net_ctx_t* net_ctx, int iteration, int subnet_id, i { current_enabled = need_profile(iteration, subnet_id, subnet_mode); if(!current_enabled) return; - summary.iteration = iteration; summary.subnet_id = subnet_id; summary.subnet_type = subnet_mode; diff --git a/tpu-runtime/src/bmruntime_subnet.cpp b/tpu-runtime/src/bmruntime_subnet.cpp old mode 100644 new mode 100755 index 73f775c..2b0046c --- a/tpu-runtime/src/bmruntime_subnet.cpp +++ b/tpu-runtime/src/bmruntime_subnet.cpp @@ -45,7 +45,7 @@ static long end_time(struct timespec& time) } #endif -static void print_subnet_time(SUBNET_INFO_T * subnet, int idx, long elapsed) +static void print_subnet_time(const SUBNET_INFO_T * subnet, int idx, long elapsed) { if (subnet->subnet_mode == SUBNET_MODE_CPU) { BMRT_LOG(INFO, "subnet[%d]: cpu layer[%d], time %ld us", idx, subnet->cpu_info.op_type, elapsed); @@ -65,9 +65,11 @@ void Bmruntime::subnet_clear(net_ctx_t* net_ctx) #ifndef SOC_MODE BMRT_LOG(FATAL, "Only soc mode run here"); #else - bm_mem_unmap_device_mem(m_handle, bm_host_mem.addr, bm_host_mem.size); - BMRT_DEBUG("HOSTMEM UNMAP %p SIZE %llx NAME %s", bm_host_mem.addr, bm_host_mem.size, - subnet_tensor.first.c_str()); + if (m_flags & BM_RUNTIME_SHARE_MEM) { + bm_mem_unmap_device_mem(m_handles[net_ctx->device_id], bm_host_mem.addr, bm_host_mem.size); + BMRT_DEBUG("HOSTMEM UNMAP %p SIZE %llx NAME %s", bm_host_mem.addr, bm_host_mem.size, + subnet_tensor.first.c_str()); + } #endif break; case HOST_MEM_ALLOC: /* free host memory */ @@ -75,7 +77,7 @@ void Bmruntime::subnet_clear(net_ctx_t* net_ctx) if (!stage->cpu_addr) { BMRT_DEBUG("HOSTMEM FREE %p SIZE %llx NAME %s", bm_host_mem.addr, bm_host_mem.size, subnet_tensor.first.c_str()); - delete[] (float*)(bm_host_mem.addr); + if (bm_host_mem.addr != NULL) delete[] (float*)(bm_host_mem.addr); } break; @@ -89,6 +91,33 @@ void Bmruntime::subnet_clear(net_ctx_t* net_ctx) } } + // multi-core arch + for (auto &dyn_mem_pair : net_ctx->dyn_neuron_stage_dict) { + auto dyn_neuron_stage = dyn_mem_pair.second; + for (auto &subnet_tensor : dyn_neuron_stage->subnet_tensor_v) { + auto bm_host_mem = subnet_tensor.second.host_mem; + switch (bm_host_mem.type) { + case HOST_MEM_MMAP: /* unmap */ +#ifndef SOC_MODE + BMRT_LOG(FATAL, "Only soc mode run here"); +#else + bm_mem_unmap_device_mem(m_handles[net_ctx->device_id], bm_host_mem.addr, bm_host_mem.size); +#endif + break; + case HOST_MEM_ALLOC: /* free host memory */ + if (!dyn_neuron_stage->cpu_addr) { + delete[] (float*)(bm_host_mem.addr); + } + break; + default: + break; + } + } + if (dyn_neuron_stage->cpu_addr) { + delete[] (float*)(dyn_neuron_stage->cpu_addr); + } + } + for (auto& stage : net_ctx->stage_v) { for (auto& subnet : stage->subnet_v) { /* per subnet */ if (subnet->subnet_mode == SUBNET_MODE_CPU) @@ -102,11 +131,11 @@ void Bmruntime::subnet_clear(net_ctx_t* net_ctx) } } -void Bmruntime::subnet_tensor_s2d(net_stage_t* stage, const string& tensor_name, +void Bmruntime::subnet_tensor_s2d(uint32_t devid, map *subnet_tensor_v, const string& tensor_name, bm_device_mem_t *out_dev_mem, u64 offset, u64 size) { - auto iter = stage->subnet_tensor_v.find(tensor_name); - BMRT_ASSERT_INFO(iter != stage->subnet_tensor_v.end(), \ + auto iter = subnet_tensor_v->find(tensor_name); + BMRT_ASSERT_INFO(iter != subnet_tensor_v->end(), \ "Wrong subnet_tensor_v named:%s", tensor_name.c_str()); auto &tensor_ext = iter->second; @@ -115,12 +144,12 @@ void Bmruntime::subnet_tensor_s2d(net_stage_t* stage, const string& tensor_name, #ifndef SOC_MODE BMRT_LOG(FATAL, "Only soc mode run here"); #else - bm_mem_flush_partial_device_mem(m_handle, &tensor_ext.tensor_info.device_mem, 0, + bm_mem_flush_partial_device_mem(m_handles[devid], &tensor_ext.tensor_info.device_mem, 0, (size > 0 ? size : bmrt_shape_count(&tensor_ext.tensor_info.shape)) * bmrt_data_type_size(tensor_ext.tensor_info.dtype)); if (out_dev_mem != NULL) { /* d2d from internal dev_mem to outside dev_mem */ // dword copy, divide by 4 - bm_memcpy_d2d(m_handle, *out_dev_mem, offset, tensor_ext.tensor_info.device_mem, 0, + bm_memcpy_d2d(m_handles[devid], *out_dev_mem, offset, tensor_ext.tensor_info.device_mem, 0, (size > 0 ? size : bmrt_shape_count(&tensor_ext.tensor_info.shape)) * bmrt_data_type_size(tensor_ext.tensor_info.dtype) / 4); } @@ -141,7 +170,7 @@ void Bmruntime::subnet_tensor_s2d(net_stage_t* stage, const string& tensor_name, BMRT_LOG(FATAL, "Only MEM_TYPE_TPU run here"); } - bm_memcpy_s2d_partial_offset(m_handle, dev_mem, + bm_memcpy_s2d_partial_offset(m_handles[devid], dev_mem, (u8 *)(tensor_ext.host_mem.addr), (size > 0 ? size : bmrt_shape_count(&tensor_ext.tensor_info.shape)) * bmrt_data_type_size(tensor_ext.tensor_info.dtype), @@ -157,22 +186,22 @@ void Bmruntime::subnet_tensor_s2d(net_stage_t* stage, const string& tensor_name, } } -static tensor_ext_t& must_get_tensor_in_stage(net_stage_t* stage, const string& name){ - auto iter = stage->subnet_tensor_v.find(name); - BMRT_ASSERT_INFO(iter != stage->subnet_tensor_v.end(), \ +static tensor_ext_t& must_get_tensor_in_subnet(map *subnet_tensor_v, const string& name){ + auto iter = subnet_tensor_v->find(name); + BMRT_ASSERT_INFO(iter != subnet_tensor_v->end(), \ "Wrong tensor named:%s", name.c_str()); return iter->second; } -void Bmruntime::subnet_tensor_forward(net_stage_t* stage, const string& src_name, const string& dst_name, bm_tensor_t* output_tensors){ - auto &src_tensor = must_get_tensor_in_stage(stage, src_name); - auto &dst_tensor = must_get_tensor_in_stage(stage, dst_name); +void Bmruntime::subnet_tensor_forward(uint32_t devid, map *subnet_tensor_v, const string& src_name, const string& dst_name, const bm_tensor_t* output_tensors){ + auto &src_tensor = must_get_tensor_in_subnet(subnet_tensor_v, src_name); + auto &dst_tensor = must_get_tensor_in_subnet(subnet_tensor_v, dst_name); auto src_mem = src_tensor.tensor_info.device_mem; auto dst_mem = dst_tensor.tensor_info.device_mem; if(dst_tensor.io_type == TENSOR_TYPE_NET_OUTPUT){ dst_mem = output_tensors[dst_tensor.io_index].device_mem; } if (src_tensor.src_subnet && src_tensor.src_subnet->subnet_mode == SUBNET_MODE_CPU) { - subnet_tensor_s2d(stage, src_name, &dst_mem); + subnet_tensor_s2d(devid, subnet_tensor_v, src_name, &dst_mem); } else { //copy or move src_tensor data to dst_tensor BMRT_DEBUG("%s D2D from=0x%llx, to=0x%llx, len=%d", @@ -181,17 +210,17 @@ void Bmruntime::subnet_tensor_forward(net_stage_t* stage, const string& src_name (u64)dst_mem.u.device.device_addr, src_mem.size ); - bm_memcpy_d2d_byte(m_handle, dst_mem, 0, src_mem, 0, src_mem.size); + bm_memcpy_d2d_byte(m_handles[devid], dst_mem, 0, src_mem, 0, src_mem.size); } dst_tensor.tensor_info.shape = src_tensor.tensor_info.shape; } -void* Bmruntime::subnet_tensor_d2s(net_stage_t* stage, const string& tensor_name, +void* Bmruntime::subnet_tensor_d2s(uint32_t devid, map *subnet_tensor_v, const string& tensor_name, bm_device_mem_t *out_dev_mem, u64 offset, u64 size) // offset is out_dev_mem offset { - auto iter = stage->subnet_tensor_v.find(tensor_name); - BMRT_ASSERT_INFO(iter != stage->subnet_tensor_v.end(), \ + auto iter = subnet_tensor_v->find(tensor_name); + BMRT_ASSERT_INFO(iter != subnet_tensor_v->end(), \ "Wrong subnet_tensor_v named:%s", tensor_name.c_str()); auto &tensor_ext = iter->second; @@ -201,7 +230,7 @@ void* Bmruntime::subnet_tensor_d2s(net_stage_t* stage, const string& tensor_name BMRT_LOG(FATAL, "Only soc mode run here"); #else if (out_dev_mem) { - bm_memcpy_d2d(m_handle, tensor_ext.tensor_info.device_mem, 0, *out_dev_mem, offset, + bm_memcpy_d2d(m_handles[devid], tensor_ext.tensor_info.device_mem, 0, *out_dev_mem, offset, (size > 0 ? size : bmrt_shape_count(&tensor_ext.tensor_info.shape)) * bmrt_data_type_size(tensor_ext.tensor_info.dtype) / 4); BMRT_DEBUG("SUBNET TENSOR D2D FROM %llx TO %llx SIZE %llx", @@ -210,7 +239,7 @@ void* Bmruntime::subnet_tensor_d2s(net_stage_t* stage, const string& tensor_name (u64)(size > 0 ? size : bmrt_shape_count(&tensor_ext.tensor_info.shape)) * bmrt_data_type_size(tensor_ext.tensor_info.dtype) / 4); } - bm_mem_invalidate_partial_device_mem(m_handle, &tensor_ext.tensor_info.device_mem, 0, + bm_mem_invalidate_partial_device_mem(m_handles[devid], &tensor_ext.tensor_info.device_mem, 0, (size > 0 ? size : bmrt_shape_count(&tensor_ext.tensor_info.shape)) * bmrt_data_type_size(tensor_ext.tensor_info.dtype)); break; @@ -227,7 +256,7 @@ void* Bmruntime::subnet_tensor_d2s(net_stage_t* stage, const string& tensor_name } /* host_mem.size using max shape, here using real type */ - bm_memcpy_d2s_partial_offset(m_handle, tensor_ext.host_mem.addr, dev_mem, + bm_memcpy_d2s_partial_offset(m_handles[devid], tensor_ext.host_mem.addr, dev_mem, (size > 0 ? size : bmrt_shape_count(&tensor_ext.tensor_info.shape)) * bmrt_data_type_size(tensor_ext.tensor_info.dtype), offset); @@ -244,11 +273,12 @@ void* Bmruntime::subnet_tensor_d2s(net_stage_t* stage, const string& tensor_name } /* TODO : refactor by launch_ir */ -bool Bmruntime::launch_tpu_ir_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUBNET_INFO_T* subnet, +bool Bmruntime::launch_tpu_ir_subnet(net_ctx_t* net_ctx, net_stage_t* stage, const SUBNET_INFO_T* subnet, const bm_tensor_t* input_tensors, const int* input_elem_num, int input_num, - bm_tensor_t* output_tensors, int* output_elem_num, int output_num) + bm_tensor_t* output_tensors, int* output_elem_num, int output_num, + const uint32_t dyn_core_mask) { - + auto devid = net_ctx->device_id; //BMRT_ASSERT(true == net_ctx->is_dynamic); auto arch = bmrt_arch_info::get_bmtpu_arch(); bm_status_t status = BM_SUCCESS; @@ -285,7 +315,7 @@ bool Bmruntime::launch_tpu_ir_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUB user_input_shapes[idx] = (int*)input_tensors[idx].shape.dims; input_dims[idx] = input_tensors[idx].shape.num_dims; auto input_dtype = 0; - if (arch == BM1684X || arch == BM1686) { + if (arch == BM1684X || arch == BM1688 || arch == BM1690 || arch == SG2380) { input_dtype = input_tensors[idx].dtype; } else { if(input_tensors[idx].dtype == BM_FLOAT32){ @@ -338,11 +368,24 @@ bool Bmruntime::launch_tpu_ir_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUB u32* output_need_middle_buff_flag = output_need_middle_buff_flag_.get(); #endif if (arch != BM1682) { + map *subnet_tensor_v; + vector *input_v; + vector *output_v; + if (m_flags & BM_RUNTIME_SHARE_MEM) { + subnet_tensor_v = &(stage->subnet_tensor_v); + input_v = &(stage->input_v); + output_v = &(stage->output_v); + } else { + subnet_tensor_v = &(net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->subnet_tensor_v); + input_v = &(net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->input_v); + output_v = &(net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->output_v); + } + // input, only 1N will switch to 4N int stmode_flag = ST_NO_CHANGE; for (int idx = 0; idx < input_num; idx++) { auto& tensor_name = subnet->input_tensor_name_v[idx]; - auto& tensor_ext = stage->subnet_tensor_v.find(tensor_name)->second; + auto& tensor_ext = subnet_tensor_v->find(tensor_name)->second; if (tensor_ext.io_type != TENSOR_TYPE_NET_INPUT) { BMRT_DEBUG("subnet immediate tensor %s do not need input middle buffer", tensor_name.c_str()); user_input_global_addr_middle[idx] = 0; @@ -350,7 +393,7 @@ bool Bmruntime::launch_tpu_ir_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUB } bm_store_mode_t user_stmode = input_tensors[idx].st_mode; - bm_store_mode_t stmode = stage->input_v[tensor_ext.io_index].st_mode; + bm_store_mode_t stmode = input_v->at(tensor_ext.io_index).st_mode; u64 middle_addr = bm_mem_get_device_addr(net_ctx->middlebuff_input[tensor_ext.io_index]); if (middle_addr == 0 || stmode == user_stmode) { user_input_global_addr_middle[idx] = 0; @@ -368,7 +411,7 @@ bool Bmruntime::launch_tpu_ir_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUB // output for (int idx = 0; idx < output_num; idx++) { auto& tensor_name = subnet->output_tensor_name_v[idx]; - auto& tensor_ext = stage->subnet_tensor_v.find(tensor_name)->second; + auto& tensor_ext = subnet_tensor_v->find(tensor_name)->second; /* TODO: net output tensor could be also imm input tensor of subnet. * (1) as net output tensor, need middlebuffer for 1N/4N convert. * (2) as imm tensor, do not need stmode convert.. @@ -383,7 +426,7 @@ bool Bmruntime::launch_tpu_ir_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUB } bm_store_mode_t user_stmode = output_tensors[idx].st_mode; - bm_store_mode_t stmode = stage->output_v[tensor_ext.io_index].st_mode; + bm_store_mode_t stmode = output_v->at(tensor_ext.io_index).st_mode; u64 middle_addr = bm_mem_get_device_addr(net_ctx->middlebuff_output[tensor_ext.io_index]); if (stmode == user_stmode || middle_addr == 0) { user_output_global_addr_middle[idx] = 0; @@ -401,13 +444,18 @@ bool Bmruntime::launch_tpu_ir_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUB u64 output_shape_global_addr = 0; if (output_num != 0) { - output_shape_global_addr = must_alloc_device_mem(&output_shape_mem, output_num * sizeof(bm_shape_ex_t)); + output_shape_global_addr = must_alloc_device_mem(devid, &output_shape_mem, output_num * sizeof(bm_shape_ex_t)); } + auto core_list = get_core_list_from_core_mask(dyn_core_mask); + if (core_list.size() > 1) { + // ir only use one core to run + core_list.resize(1); + } if (arch == BM1682) { status = bmfunc::bmdnn_1682()->_bmdnn_dynamic_fullnet_v2_( - m_handle, stage->ir_mem.addr + subnet->tpu_info.ir_offset, - ((subnet->tpu_info.ir_len/*bytes*/ + 3) / 4), //length unit is dword + m_handles[devid], stage->core_commands[0].ir_mem.addr + subnet->tpu_info.core_commands[0].ir_offset, + ((subnet->tpu_info.core_commands[0].ir_len/*bytes*/ + 3) / 4), //length unit is dword input_num, user_input_global_addrs, user_input_shapes, input_elem_num, input_dims, output_num, user_output_global_addrs, stage->ctx_start, // There is an assertion in bmruntime_bmodel.cpp to ensure ctx_offset @@ -419,8 +467,8 @@ bool Bmruntime::launch_tpu_ir_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUB ); } else if (arch == BM1684) { status = bmfunc::bmdnn_1684()->_bmdnn_dynamic_fullnet_v2_( - m_handle, stage->ir_mem.addr + subnet->tpu_info.ir_offset, - ((subnet->tpu_info.ir_len/*bytes*/ + 3) / 4), //length unit is dword + m_handles[devid], stage->core_commands[0].ir_mem.addr + subnet->tpu_info.core_commands[0].ir_offset, + ((subnet->tpu_info.core_commands[0].ir_len/*bytes*/ + 3) / 4), //length unit is dword input_num, user_input_global_addrs, user_input_global_addr_middle, user_input_shapes, input_elem_num, input_dims, output_num, user_output_global_addrs, user_output_global_addr_middle, stage->ctx_start, @@ -431,35 +479,64 @@ bool Bmruntime::launch_tpu_ir_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUB 0 // no arm reserved buffer used ); } else if (arch == BM1684X) { - int func_id = net_ctx->kernel_module_->get_dynamic_fullnet_func_id(); + auto func_ids = net_ctx->kernel_module_->get_dynamic_fullnet_func_id(core_list); status = bmfunc::bmdnn_1684x()->_bmdnn_dynamic_fullnet_( - m_handle, func_id, stage->ir_mem.addr + subnet->tpu_info.ir_offset, - ((subnet->tpu_info.ir_len + 3) / 4), input_num, user_input_global_addrs, + m_handles[devid], func_ids[0], stage->core_commands[0].ir_mem.addr + subnet->tpu_info.core_commands[0].ir_offset, + ((subnet->tpu_info.core_commands[0].ir_len + 3) / 4), input_num, user_input_global_addrs, user_input_shapes, input_elem_num, input_dims, output_num, user_output_global_addrs, stage->ctx_start, stage->ctx_borders, stage->ctx_offset, - stage->coeff_offset, true, - output_shape_global_addr); - } else if (arch == BM1686) { - status = bmfunc::bmdnn_1686()->_bmdnn_dynamic_fullnet_( - m_handle, stage->ir_mem.addr + subnet->tpu_info.ir_offset, - ((subnet->tpu_info.ir_len + 3) / 4), input_num, user_input_global_addrs, + stage->coeff_offset, stage->io_start, stage->io_offset, true, + output_shape_global_addr, + net_ctx->do_allreduce == 1 ? &(net_ctx->allreduce_param) : NULL); + } else if (arch == BM1688) { + auto func_ids = net_ctx->kernel_module_->get_dynamic_fullnet_func_id(core_list); + status = bmfunc::bmdnn_1688()->_bmdnn_dynamic_fullnet_( + m_handles[devid], func_ids, stage->core_commands[0].ir_mem.addr + subnet->tpu_info.core_commands[0].ir_offset, + ((subnet->tpu_info.core_commands[0].ir_len + 3) / 4), input_num, user_input_global_addrs, + user_input_shapes, input_elem_num, input_dims, output_num, + user_output_global_addrs, stage->ctx_start, + stage->ctx_borders, (m_flags & BM_RUNTIME_SHARE_MEM) ? stage->ctx_offset : net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->ctx_offset, + stage->coeff_offset, stage->io_start, stage->io_offset, true, + output_shape_global_addr, + core_list); + } else if (arch == BM1690) { + status = bmfunc::bmdnn_2260()->_bmdnn_dynamic_fullnet_( + m_handles[devid], stage->core_commands[0].ir_mem.addr + subnet->tpu_info.core_commands[0].ir_offset, + ((subnet->tpu_info.core_commands[0].ir_len + 3) / 4), input_num, user_input_global_addrs, + user_input_shapes, input_elem_num, input_dims, output_num, + user_output_global_addrs, stage->ctx_start, + stage->ctx_borders, (m_flags & BM_RUNTIME_SHARE_MEM) ? stage->ctx_offset : net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->ctx_offset, + stage->coeff_offset, stage->io_start, stage->io_offset, true, + output_shape_global_addr, + core_list); + } else if (arch == MARS3) { + status = bmfunc::bmdnn_mars3()->_bmdnn_dynamic_fullnet_( + m_handles[devid], stage->core_commands[0].ir_mem.addr + subnet->tpu_info.core_commands[0].ir_offset, + ((subnet->tpu_info.core_commands[0].ir_len + 3) / 4), input_num, user_input_global_addrs, user_input_shapes, input_elem_num, input_dims, output_num, user_output_global_addrs, stage->ctx_start, stage->ctx_borders, stage->ctx_offset, - stage->coeff_offset, true, - output_shape_global_addr); + stage->coeff_offset, stage->io_start, stage->io_offset, true, + output_shape_global_addr, + core_list); + } else if (arch == SG2380) { + auto func_ids = net_ctx->kernel_module_->get_dynamic_fullnet_func_id(core_list); + status = bmfunc::bmdnn_2380()->_bmdnn_dynamic_fullnet_( + m_handles[devid], func_ids, stage->core_commands[0].ir_mem.addr + subnet->tpu_info.core_commands[0].ir_offset, + ((subnet->tpu_info.core_commands[0].ir_len + 3) / 4), input_num, user_input_global_addrs, + user_input_shapes, input_elem_num, input_dims, output_num, + user_output_global_addrs, stage->ctx_start, + stage->ctx_borders, (m_flags & BM_RUNTIME_SHARE_MEM) ? stage->ctx_offset : net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->ctx_offset, + stage->coeff_offset, stage->io_offset, true, + output_shape_global_addr, + core_list); } else { BMRT_LOG(FATAL, "Error: unknown BM TPU"); } - if (BM_SUCCESS == status) { - status = bm_thread_sync(m_handle); - } - - if (BM_SUCCESS != status) { - BMRT_LOG(WRONG, "launch failed, status:%d", status); - trace(); + if (status == BM_SUCCESS) { + sync_cores(m_handles[devid], core_list); } if (output_num == 0) { @@ -475,7 +552,7 @@ bool Bmruntime::launch_tpu_ir_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUB bm_shape_ex_t* output_shape_ex_v = output_shape_ex_v_.get(); #endif if (output_num != 0) { - status = bm_memcpy_d2s(m_handle, output_shape_ex_v, output_shape_mem); + status = bm_memcpy_d2s(m_handles[devid], output_shape_ex_v, output_shape_mem); } else { status = BM_SUCCESS; @@ -487,196 +564,146 @@ bool Bmruntime::launch_tpu_ir_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUB } } - must_free_device_mem(output_shape_mem); + must_free_device_mem(devid, output_shape_mem); return BM_SUCCESS == status; } /* TODO : refactor by launch_static */ -bool Bmruntime::launch_tpu_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUBNET_INFO_T* subnet, +template +void Bmruntime::fill_tpu_tensor_info( + std::vector &tensor_info, const T_stage *stage, + const SUBNET_INFO_T *subnet, const bm_tensor_t *user_tensors, + bool is_input) { + tensor_info.clear(); + const auto &ref_tensors = + is_input ? subnet->input_tensor_name_v : subnet->output_tensor_name_v; + for (u32 idx = 0; idx < ref_tensors.size(); idx++) { + tpu_tensor_info_t info = {0}; + /// info that is given by users + auto &user_input = user_tensors[idx]; + info.dtype = user_input.dtype; + info.user_stmode = user_input.st_mode; + info.user_global_addr = bm_mem_get_device_addr(user_input.device_mem) + + GLOBAL_MEM_CMD_START_OFFSET; + BMRT_ASSERT_INFO( + info.user_stmode == BM_STORE_1N || info.user_stmode == BM_STORE_4N, + "user stmode[%d]:%d shouldn't be BM_STORE_2N\n", idx, info.user_stmode); + + /// info that fixed when compiling + const auto &tensor_name = ref_tensors.at(idx); + const auto &cmd_input = stage->subnet_tensor_v.find(tensor_name)->second; + info.compiled_stmode = cmd_input.tensor_info.st_mode; + info.padding_h = cmd_input.pad_h; + info.compiled_global_addr = + bm_mem_get_device_addr(cmd_input.tensor_info.device_mem) + + GLOBAL_MEM_CMD_START_OFFSET; + + BMRT_ASSERT_INFO(info.compiled_stmode == BM_STORE_1N || + info.compiled_stmode == BM_STORE_4N, + "user stmode[%d]:%d shouldn't be BM_STORE_2N\n", idx, + info.compiled_stmode); + const auto &ref_shape = + is_input ? user_input.shape : cmd_input.tensor_info.shape; + info.n = ref_shape.dims[0]; + info.c = ref_shape.num_dims > 1 ? ref_shape.dims[1] : 1; + info.h = ref_shape.num_dims > 2 ? ref_shape.dims[2] : 1; + info.w = 1; + for (int s = 3; s < ref_shape.num_dims; s++) { + info.w *= ref_shape.dims[s]; + } + info.tensor_byte_size = (uint32_t)info.n * info.c * info.h * info.w * + bmrt_data_type_size((bm_data_type_t)info.dtype); + tensor_info.push_back(std::move(info)); + } +} +void Bmruntime::fill_tpu_cmd_info(std::vector &cmd_info, + const SUBNET_INFO_T *subnet, + const int32_t core_idx) { + cmd_info.clear(); + const size_t group_num = + subnet->tpu_info.core_commands[core_idx].bdc_id.size(); + for (size_t group_idx = 0; group_idx < group_num; group_idx++) { + tpu_cmd_info_t info = {0}; + info.bdc_cmd_num = + subnet->tpu_info.core_commands[core_idx].bdc_id[group_idx]; + info.gdma_cmd_num = + subnet->tpu_info.core_commands[core_idx].gdma_id[group_idx]; + info.bdc_cmd_byte_size = + subnet->tpu_info.core_commands[core_idx].bdc_cmd_byte[group_idx]; + info.gdma_cmd_byte_size = + subnet->tpu_info.core_commands[core_idx].gdma_cmd_byte[group_idx]; + cmd_info.push_back(std::move(info)); + } +} +/* TODO : refactor by launch_static */ +bool Bmruntime::launch_tpu_subnet(net_ctx_t* net_ctx, net_stage_t* stage, const SUBNET_INFO_T* subnet, const bm_tensor_t* input_tensors, int input_num, - bm_tensor_t* output_tensors, int output_num) + bm_tensor_t* output_tensors, int output_num, + const uint32_t dyn_core_mask) { - // check parameters - //BMRT_ASSERT(false == net_ctx->is_dynamic); - - #ifdef __linux__ - u64 user_input_global_offset[input_num]; - u64 cmd_input_global_offset[input_num]; - int input_data_len[input_num]; - int input_n[input_num]; - int input_c[input_num]; - int input_h[input_num]; - int input_w[input_num]; - int input_length[input_num]; - - unsigned int input_dsize[input_num]; - unsigned short input_dtype[input_num]; - unsigned char input_stmode[input_num]; - unsigned char real_in_stmode[input_num]; - unsigned int input_pad_h[input_num]; - #else - std::shared_ptr user_input_global_offset_(new u64[input_num], std::default_delete()); - u64* user_input_global_offset = user_input_global_offset_.get(); - std::shared_ptr cmd_input_global_offset_(new u64[input_num], std::default_delete()); - u64* cmd_input_global_offset = cmd_input_global_offset_.get(); - std::shared_ptr input_data_len_(new int[input_num], std::default_delete()); - int* input_data_len = input_data_len_.get(); - std::shared_ptr input_n_(new int[input_num], std::default_delete()); - int* input_n = input_n_.get(); - std::shared_ptr input_c_(new int[input_num], std::default_delete()); - int* input_c = input_c_.get(); - std::shared_ptr input_h_(new int[input_num], std::default_delete()); - int* input_h = input_h_.get(); - std::shared_ptr input_w_(new int[input_num], std::default_delete()); - int* input_w = input_w_.get(); - std::shared_ptr input_length_(new int[input_num], std::default_delete()); - int* input_length = input_length_.get(); - std::shared_ptr input_dsize_(new unsigned int[input_num], std::default_delete()); - unsigned int* input_dsize = input_dsize_.get(); - std::shared_ptr input_dtype_(new unsigned short[input_num], std::default_delete()); - unsigned short* input_dtype = input_dtype_.get(); - std::shared_ptr input_stmode_(new unsigned char[input_num], std::default_delete()); - unsigned char* input_stmode = input_stmode_.get(); - std::shared_ptr real_in_stmode_(new unsigned char[input_num], std::default_delete()); - unsigned char* real_in_stmode = real_in_stmode_.get(); - std::shared_ptr input_pad_h_(new unsigned int[input_num], std::default_delete()); - unsigned int* input_pad_h = input_pad_h_.get(); - #endif - - for (u32 idx = 0; idx < subnet->input_tensor_name_v.size(); ++idx) { - auto& tensor_name = subnet->input_tensor_name_v[idx]; - auto& tensor_ext = stage->subnet_tensor_v.find(tensor_name)->second; - auto& user_input = input_tensors[idx]; - /* command input mem */ - bm_device_mem_t input_mem = tensor_ext.tensor_info.device_mem; - cmd_input_global_offset[idx] = bm_mem_get_device_addr(input_mem); - /* user input mem */ - user_input_global_offset[idx] = bm_mem_get_device_addr(user_input.device_mem); - - input_data_len[idx] = bmrt_shape_count(&user_input.shape); - input_dsize[idx] = (unsigned int)input_data_len[idx] * ByteSize(user_input.dtype); - input_n[idx] = user_input.shape.dims[0]; - input_c[idx] = user_input.shape.num_dims > 1 ? user_input.shape.dims[1] : 1; - input_h[idx] = user_input.shape.num_dims > 2 ? user_input.shape.dims[2] : 1; - input_w[idx] = 1; - input_length[idx] = 1; - for (int s = 3; s < user_input.shape.num_dims; s++) { - input_w[idx] *= user_input.shape.dims[s]; - } - for (int s = 1; s < user_input.shape.num_dims; s++) { - input_length[idx] *= user_input.shape.dims[s]; - } - input_dtype[idx] = (unsigned short)user_input.dtype; - input_stmode[idx] = (unsigned short)tensor_ext.tensor_info.st_mode; - real_in_stmode[idx] = user_input.st_mode; - - // pad_h for conv 3ic(for BM1684) - input_pad_h[idx] = tensor_ext.pad_h; - } - - #ifdef __linux__ - u64 user_output_global_offset[output_num]; - u64 cmd_output_global_offset[output_num]; - int output_n[output_num]; - int output_length[output_num]; - int output_data_len[output_num]; - unsigned int output_dsize[input_num]; - unsigned short output_dtype[output_num]; - unsigned char output_stmode[output_num]; - unsigned char force_out_stmode[output_num]; - #else - std::shared_ptr user_output_global_offset_(new u64[output_num], std::default_delete()); - u64* user_output_global_offset = user_output_global_offset_.get(); - std::shared_ptr cmd_output_global_offset_(new u64[output_num], std::default_delete()); - u64* cmd_output_global_offset = cmd_output_global_offset_.get(); - std::shared_ptr output_n_(new int[output_num], std::default_delete()); - int* output_n = output_n_.get(); - std::shared_ptr output_length_(new int[output_num], std::default_delete()); - int* output_length = output_length_.get(); - std::shared_ptr output_data_len_(new int[output_num], std::default_delete()); - int* output_data_len = output_data_len_.get(); - std::shared_ptr output_dsize_(new unsigned int[output_num], std::default_delete()); - unsigned int* output_dsize = output_dsize_.get(); - std::shared_ptr output_dtype_(new unsigned short[output_num], std::default_delete()); - unsigned short* output_dtype = output_dtype_.get(); - std::shared_ptr output_stmode_(new unsigned char[output_num], std::default_delete()); - unsigned char* output_stmode = output_stmode_.get(); - std::shared_ptr force_out_stmode_(new unsigned char[output_num], std::default_delete()); - unsigned char* force_out_stmode = force_out_stmode_.get(); - #endif - for (u32 idx = 0; idx < subnet->output_tensor_name_v.size(); ++idx) { - auto& tensor_name = subnet->output_tensor_name_v[idx]; - auto& tensor_ext = stage->subnet_tensor_v.find(tensor_name)->second; - auto& user_output = output_tensors[idx]; - - /* cmd output */ - const bm_device_mem_t output_mem = tensor_ext.tensor_info.device_mem; - cmd_output_global_offset[idx] = bm_mem_get_device_addr(output_mem); - /* user output */ - user_output_global_offset[idx] = bm_mem_get_device_addr(user_output.device_mem); - - output_n[idx] = tensor_ext.tensor_info.shape.dims[0]; - //output_length[idx] = max_c * max_h * max_w; - output_length[idx] = 1; - for(int s=1; sdevice_id; + std::vector input_info; + std::vector output_info; + if (m_flags & BM_RUNTIME_SHARE_MEM) { + fill_tpu_tensor_info(input_info, stage, subnet, input_tensors, true); + fill_tpu_tensor_info(output_info, stage, subnet, output_tensors, false); + } else { + fill_tpu_tensor_info(input_info, net_ctx->dyn_neuron_stage_dict[dyn_core_mask], subnet, input_tensors, true); + fill_tpu_tensor_info(output_info, net_ctx->dyn_neuron_stage_dict[dyn_core_mask], subnet, output_tensors, false); + } - #ifdef __linux__ - int bdc_id_num[subnet->tpu_info.cmdgroup_num]; - int gdma_id_num[subnet->tpu_info.cmdgroup_num]; - int cdma_id_num[subnet->tpu_info.cmdgroup_num]; - unsigned int bdc_cmd_byte_size[subnet->tpu_info.cmdgroup_num]; - unsigned int gdma_cmd_byte_size[subnet->tpu_info.cmdgroup_num]; - #else - std::shared_ptr bdc_id_num_(new int[subnet->tpu_info.cmdgroup_num], std::default_delete()); - int* bdc_id_num = bdc_id_num_.get(); - std::shared_ptr gdma_id_num_(new int[subnet->tpu_info.cmdgroup_num], std::default_delete()); - int* gdma_id_num = gdma_id_num_.get(); - std::shared_ptr cdma_id_num_(new int[subnet->tpu_info.cmdgroup_num], std::default_delete()); - int* cdma_id_num = cdma_id_num_.get(); - std::shared_ptr bdc_cmd_byte_size_(new unsigned int[subnet->tpu_info.cmdgroup_num], std::default_delete()); - unsigned int* bdc_cmd_byte_size = bdc_cmd_byte_size_.get(); - std::shared_ptr gdma_cmd_byte_size_(new unsigned int[subnet->tpu_info.cmdgroup_num], std::default_delete()); - unsigned int* gdma_cmd_byte_size = gdma_cmd_byte_size_.get(); - #endif + auto core_list = get_core_list_from_core_mask(dyn_core_mask); + std::vector core_command(core_list.size()); + for (size_t core_idx = 0; core_idx < core_list.size(); core_idx++) { + std::vector cmd_info; + fill_tpu_cmd_info(cmd_info, subnet, core_idx); + core_command[core_idx].cmd_info = std::move(cmd_info); + core_command[core_idx].bdc_cmd_addr = + stage->core_commands[core_idx].bdc_mem.addr + + subnet->tpu_info.core_commands[core_idx].bdc_offset; + core_command[core_idx].gdma_cmd_addr = + stage->core_commands[core_idx].gdma_mem.addr + + subnet->tpu_info.core_commands[core_idx].gdma_offset; + core_command[core_idx].cdma_cmd_addr = 0; + core_command[core_idx].hau_cmd_addr = + stage->core_commands[core_idx].hau_mem.addr; + core_command[core_idx].sdma_cmd_addr = + stage->core_commands[core_idx].sdma_mem.addr; + } - for(int group_idx = 0; group_idx < subnet->tpu_info.cmdgroup_num; group_idx ++) { - bdc_id_num[group_idx] = subnet->tpu_info.bdc_group_id_v[group_idx]; - gdma_id_num[group_idx] = subnet->tpu_info.gdma_group_id_v[group_idx]; - bdc_cmd_byte_size[group_idx] = subnet->tpu_info.bdc_cmd_byte_v[group_idx]; - gdma_cmd_byte_size[group_idx] = subnet->tpu_info.gdma_cmd_byte_v[group_idx]; - cdma_id_num[group_idx] = 0; - } + tpu_net_info_t net_info; + net_info.input_info = std::move(input_info); + net_info.output_info = std::move(output_info); + net_info.core_commands = std::move(core_command); + net_info.core_list = core_list; + net_info.coeff_start_addr = stage->coeff_offset; + net_info.do_allreduce = net_ctx->do_allreduce; + if (m_flags & BM_RUNTIME_SHARE_MEM) { + net_info.neuron_start_addr.assign(stage->ctx_offset.begin(), stage->ctx_offset.end()); + } else { + net_info.neuron_start_addr.assign(net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->ctx_offset.begin(), net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->ctx_offset.end()); + } + if (bmrt_arch_info::get_bmtpu_arch() == BM1684X || bmrt_arch_info::get_bmtpu_arch() == BM1688) { + net_info.kernel_func_ids = net_ctx->kernel_module_->get_multi_fullnet_func_id(core_list); + } #ifdef DEBUG BMRT_DEBUG("TPU SUBNET LAUNCHED START"); { for (int i = 0; i < input_num; i++) { - BMRT_DEBUG("TPU SUBNET input %d : Device Address %llx SIZE %x", i, - user_input_global_offset[i], input_data_len[i]); + const auto &info = net_info.input_info.at(i); + int32_t data_len = info.n * info.c * info.h * info.w; + BMRT_DEBUG("TPU SUBNET input %d : Device Address %lx SIZE %x", i, + info.user_global_addr, data_len); if (input_tensors[i].dtype != BM_FLOAT32 || input_tensors[i].device_mem.size == 0) { continue; } - float* input_value = new float[std::min(input_data_len[i], 10)]; - bm_memcpy_d2s_partial(this->get_bm_handle(), input_value, input_tensors[i].device_mem, - std::min(input_data_len[i], 10) * sizeof(float)); + float* input_value = new float[std::min(data_len, 10)]; + bm_memcpy_d2s_partial(m_handles[devid], input_value, input_tensors[i].device_mem, + std::min(data_len, 10) * sizeof(float)); std::stringstream debug_msg; - for (int idx = 0; idx < std::min(10, input_data_len[i]); idx++) + for (int idx = 0; idx < std::min(10, data_len); idx++) debug_msg << " " << input_value[idx]; delete[] input_value; BMRT_DEBUG("%s", debug_msg.str().c_str()); @@ -684,89 +711,23 @@ bool Bmruntime::launch_tpu_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUBNET } #endif - bm_status_t status = BM_SUCCESS; - switch(bmrt_arch_info::get_bmtpu_arch()) { - case BM1682: - status= bmfunc::bmdnn_1682()->_bmdnn_multi_fullnet_( - m_handle, - input_num, - user_input_global_offset, - cmd_input_global_offset, - input_data_len, - input_dtype, - output_num, - user_output_global_offset, - cmd_output_global_offset, - output_data_len, - output_dtype, - //when engine fetch command, the commands can only locate at the previous 4G - stage->bdc_mem.addr + subnet->tpu_info.bdc_offset, - stage->gdma_mem.addr + subnet->tpu_info.gdma_offset, - //m_bdc_cmd_start_address_v[net_idx], - //m_gdma_cmd_start_address_v[net_idx], - //m_cdma_cmd_start_address_v[net_idx], - 0, - bdc_id_num, - gdma_id_num, - cdma_id_num, - subnet->tpu_info.cmdgroup_num); - //m_cmdgroup_num[net_idx]) == BM_SUCCESS); - break; - case BM1684: - status= bmfunc::bmdnn_1684()->_bmdnn_multi_fullnet_( - m_handle, - input_num, - user_input_global_offset, - cmd_input_global_offset, - input_n, input_c, input_h, input_w, - input_dtype, input_stmode, real_in_stmode, - output_num, - user_output_global_offset, - cmd_output_global_offset, - output_n, output_length, - output_dtype, output_stmode, force_out_stmode, - //when engine fetch command, the commands can only locate at the previous 4G - stage->bdc_mem.addr + subnet->tpu_info.bdc_offset, - stage->gdma_mem.addr + subnet->tpu_info.gdma_offset, - bdc_id_num, - gdma_id_num, - subnet->tpu_info.cmdgroup_num, - input_pad_h); - break; - case BM1684X: { - int func_id = net_ctx->kernel_module_->get_multi_fullnet_func_id(); - status= bmfunc::bmdnn_1684x()->_bmdnn_multi_fullnet_( - m_handle, - func_id, - input_num, - user_input_global_offset, - cmd_input_global_offset, - input_dsize,// in bytes - output_num, - user_output_global_offset, - cmd_output_global_offset, - output_dsize,// in bytes - stage->bdc_mem.addr + subnet->tpu_info.bdc_offset, - stage->gdma_mem.addr + subnet->tpu_info.gdma_offset, - bdc_id_num, - gdma_id_num, - bdc_cmd_byte_size, - gdma_cmd_byte_size, - subnet->tpu_info.cmdgroup_num); - break;} - default: - BMRT_LOG(FATAL, "Error: unknown BM TPU"); - } + bm_status_t status = bmfunc::bmdnn_base()->_bmdnn_multi_fullnet_(m_handles[devid], net_info); if (BM_SUCCESS == status) { - status = bm_thread_sync(m_handle); + for (auto core_id : core_list) { + bm_status_t core_status = bm_thread_sync_from_core(m_handles[devid], core_id); + status = core_status == BM_SUCCESS ? status : core_status; + } } if (m_profile->is_enabled()) { - auto cmd_num = m_profile->record_subnet_cmd_info(stage->gdma_mem.addr, subnet->tpu_info.gdma_offset, - stage->bdc_mem.addr, subnet->tpu_info.bdc_offset, - subnet->tpu_info.cmdgroup_num); - for(int i=0; itpu_info.cmdgroup_num; i++){ - cmd_num[i].bdc = bdc_id_num[i]; - cmd_num[i].gdma = gdma_id_num[i]; + for(size_t core_idx = 0; core_idxrecord_subnet_cmd_info(core_idx, + stage->core_commands[core_idx].gdma_mem.addr, subnet->tpu_info.core_commands[core_idx].gdma_offset, + stage->core_commands[core_idx].bdc_mem.addr, subnet->tpu_info.core_commands[core_idx].bdc_offset, + subnet->tpu_info.core_commands[core_idx].gdma_id.size()); + for (size_t i = 0; i < subnet->tpu_info.core_commands[core_idx].gdma_id.size(); i++) { + cmd_num[i].bdc = net_info.core_commands[core_idx].cmd_info[i].bdc_cmd_num; + cmd_num[i].gdma = net_info.core_commands[core_idx].cmd_info[i].gdma_cmd_num; + } } } if (BM_SUCCESS != status) { @@ -776,18 +737,20 @@ bool Bmruntime::launch_tpu_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUBNET #ifdef DEBUG if (BM_SUCCESS == status) { for (int i = 0; i < output_num; i++) { + const auto &info = net_info.output_info.at(i); + int32_t data_len = info.n * info.c * info.h * info.w; int really_output_size = bm_mem_get_device_size(output_tensors[i].device_mem) / sizeof(float); - BMRT_DEBUG("TPU SUBNET output %d : Device Address %llx STATIC SIZE %x REALLY SIZE %x", - i, user_output_global_offset[i], output_data_len[i], really_output_size); + BMRT_DEBUG("TPU SUBNET output %d : Device Address %lx STATIC SIZE %x REALLY SIZE %x", + i, info.user_global_addr, data_len, really_output_size); if (output_tensors[i].dtype != BM_FLOAT32 || output_tensors[i].device_mem.size == 0) { continue; } float* output_value = new float[10]; - bm_memcpy_d2s_partial(this->get_bm_handle(), output_value, output_tensors[i].device_mem, - std::min(really_output_size, 10) * sizeof(float)); + bm_memcpy_d2s_partial(m_handles[devid], output_value, output_tensors[i].device_mem, + std::min((int)really_output_size, 10) * sizeof(float)); std::stringstream debug_msg; - for (int idx = 0; idx < std::min(output_data_len[i], 10); idx++) + for (int idx = 0; idx < std::min(data_len, 10); idx++) debug_msg << " " << output_value[idx]; delete[] output_value; BMRT_DEBUG("%s", debug_msg.str().c_str()); @@ -800,14 +763,14 @@ bool Bmruntime::launch_tpu_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUBNET } /* launch static net, no n/h/w specified */ -bool Bmruntime::launch_cpu_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUBNET_INFO_T* subnet, +bool Bmruntime::launch_cpu_subnet(net_ctx_t* net_ctx, map *subnet_tensor_v, const SUBNET_INFO_T* subnet, const bm_tensor_t* input_tensors, bm_shape_t real_out_shape[]) { if (bmcpu_process_ == NULL) { BMRT_LOG(WRONG, "cpu.so load failed, can't run cpu layer"); return false; } - + auto devid = net_ctx->device_id; int op_type = subnet->cpu_info.op_type; void *user_param = subnet->cpu_info.user_param; int param_size = subnet->cpu_info.param_size; @@ -828,10 +791,10 @@ bool Bmruntime::launch_cpu_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUBNET for (auto& tensor_name: subnet->input_tensor_name_v) { bool need_d2s = false; - auto iter = stage->subnet_tensor_v.find(tensor_name); - BMRT_ASSERT_INFO(iter != stage->subnet_tensor_v.end(), "Wrong subnet_tensor_v named:%s", tensor_name.c_str()); + auto iter = subnet_tensor_v->find(tensor_name); + BMRT_ASSERT_INFO(iter != subnet_tensor_v->end(), "Wrong subnet_tensor_v named:%s", tensor_name.c_str()); // BMRT_ASSERT(stage->subnet_tensor_v.find(tensor_name) != stage->subnet_tensor_v.end()); - auto& tensor_ext = stage->subnet_tensor_v.find(tensor_name)->second; + auto& tensor_ext = subnet_tensor_v->find(tensor_name)->second; switch(tensor_ext.io_type) { case TENSOR_TYPE_NET_INPUT: @@ -843,7 +806,7 @@ bool Bmruntime::launch_cpu_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUBNET /* now net inputs are always device memory, TOBE REFINE !! */ dev_mem = input_tensors[tensor_ext.io_index].device_mem; need_d2s = true; - subnet_tensor_d2s(stage, tensor_name, &dev_mem, 0, bmrt_shape_count(&shape)); + subnet_tensor_d2s(devid, subnet_tensor_v, tensor_name, &dev_mem, 0, bmrt_shape_count(&shape)); break; case TENSOR_TYPE_NET_OUTPUT: case TENSOR_TYPE_IMM_IO: @@ -857,7 +820,7 @@ bool Bmruntime::launch_cpu_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUBNET /* for cpu subnet, if input tensor is from tpu subnet : d2s */ dev_mem = tensor_ext.tensor_info.device_mem; need_d2s = true; - subnet_tensor_d2s(stage, tensor_name); + subnet_tensor_d2s(devid, subnet_tensor_v, tensor_name); } break; @@ -877,7 +840,7 @@ bool Bmruntime::launch_cpu_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUBNET tensor_idx = 0; for (auto& tensor_name: subnet->output_tensor_name_v) { - auto& tensor_ext = stage->subnet_tensor_v.find(tensor_name)->second; + auto& tensor_ext = subnet_tensor_v->find(tensor_name)->second; auto& shape = tensor_ext.tensor_info.shape; output_dtypes.push_back(tensor_ext.tensor_info.dtype); vector output_shape(shape.dims, shape.dims + shape.num_dims); @@ -910,10 +873,20 @@ bool Bmruntime::launch_cpu_subnet(net_ctx_t* net_ctx, net_stage_t* stage, SUBNET (void)(output_dtypes); /* NOTE: need keep input/output tensor order accordingly with bmcpu */ - bmcpu_process_(bmcpu_handle_, op_type, - user_param, param_size, - input_tensor_data_v, input_shapes_v, - output_tensor_data_v, output_shapes_v); + if (op_type < 10000) { + bmcpu_process_(bmcpu_handle_, op_type, + user_param, param_size, + input_tensor_data_v, input_shapes_v, + output_tensor_data_v, output_shapes_v); + } else if (customcpu_handle_) { + BMRT_LOG(INFO, "customcpu_process_"); + customcpu_process_(customcpu_handle_, op_type, + user_param, param_size, + input_tensor_data_v, input_shapes_v, + output_tensor_data_v, output_shapes_v); + } else { + BMRT_LOG(WRONG, "Cpu process function not found."); + } #ifdef DEBUG { @@ -951,7 +924,8 @@ bool Bmruntime::launch_multi_subnet( const bm_tensor_t* input_tensors, int input_num, bm_tensor_t* output_tensors, - int output_num) + int output_num, + const uint32_t dyn_core_mask) { int tensor_idx = 0; #ifdef __linux__ @@ -960,9 +934,15 @@ bool Bmruntime::launch_multi_subnet( struct timespec time; #endif bool ret = true; - - std::lock_guard guard(net_ctx->neuron_mutex); - auto subnet = stage->subnet_v.front(); + auto devid = net_ctx->device_id; + const SUBNET_INFO_T *subnet = stage->subnet_v.front(); + std::unique_lock lock(net_ctx->neuron_mutex); + map *subnet_tensor_v; + if (m_flags & BM_RUNTIME_SHARE_MEM) { + subnet_tensor_v = &(stage->subnet_tensor_v); + } else { + subnet_tensor_v = &(net_ctx->dyn_neuron_stage_dict[dyn_core_mask]->subnet_tensor_v); + } int iteration = 0; map tensor_iteration; while(subnet){ @@ -1000,7 +980,7 @@ bool Bmruntime::launch_multi_subnet( BMRT_ASSERT_INFO(in_idx>=0, "in_idx:%d shouldn't less than 0", in_idx); //forward tensor to output auto in_name = subnet->input_tensor_name_v[in_idx]; - subnet_tensor_forward(stage, in_name, subnet->output_tensor_name_v[idx], output_tensors); + subnet_tensor_forward(devid, subnet_tensor_v, in_name, subnet->output_tensor_name_v[idx], output_tensors); } } else if (subnet->subnet_mode == SUBNET_MODE_SWITCH) { int subnet_id_size = subnet->next_subnet_ids.size(); @@ -1012,11 +992,11 @@ bool Bmruntime::launch_multi_subnet( bool run_true_subnet = false; auto cond_name = subnet->input_tensor_name_v.back(); const bm_device_mem_t* cond_mem = nullptr; - auto& tensor_ext = stage->subnet_tensor_v.find(cond_name)->second; + auto& tensor_ext = subnet_tensor_v->find(cond_name)->second; if(tensor_ext.io_type == TENSOR_TYPE_NET_INPUT){ cond_mem = &(input_tensors[tensor_ext.io_index].device_mem); } - auto data_ptr = (int*)subnet_tensor_d2s(stage, cond_name, const_cast(cond_mem)); + auto data_ptr = (int*)subnet_tensor_d2s(devid, subnet_tensor_v, cond_name, const_cast(cond_mem)); run_true_subnet = data_ptr[0] != 0; if(!subnet->switch_info.valid){ @@ -1033,8 +1013,8 @@ bool Bmruntime::launch_multi_subnet( auto out_name = subnet->output_tensor_name_v[out_idx]; auto in_name = subnet->input_tensor_name_v[subnet->switch_info.output_from[out_idx]]; tensor_iteration[out_name] = iteration; - auto &src_tensor = must_get_tensor_in_stage(stage, in_name); - auto &dst_tensor = must_get_tensor_in_stage(stage, out_name); + auto &src_tensor = must_get_tensor_in_subnet(subnet_tensor_v, in_name); + auto &dst_tensor = must_get_tensor_in_subnet(subnet_tensor_v, out_name); dst_tensor.tensor_info.shape = src_tensor.tensor_info.shape; } } @@ -1049,12 +1029,12 @@ bool Bmruntime::launch_multi_subnet( std::shared_ptr real_out_shape_(new bm_shape_t[subnet->output_tensor_name_v.size()], std::default_delete()); bm_shape_t* real_out_shape = real_out_shape_.get(); #endif - ret = launch_cpu_subnet(net_ctx, stage, subnet, input_tensors, real_out_shape); + ret = launch_cpu_subnet(net_ctx, subnet_tensor_v, subnet, input_tensors, real_out_shape); BMRT_ASSERT_INFO(ret == true, "launch_cpu_subnet return false"); tensor_idx = 0; for (auto& tensor_name: subnet->output_tensor_name_v) { - auto& tensor_ext = stage->subnet_tensor_v.find(tensor_name)->second; + auto& tensor_ext = subnet_tensor_v->find(tensor_name)->second; #if 0 if (!bmrt_shape_is_same(&tensor_ext.max_shape, &real_out_shape[tensor_idx]) && (tensor_ext.mem_type & MEM_TYPE_TPU)) { @@ -1079,7 +1059,7 @@ bool Bmruntime::launch_multi_subnet( /* update net output shape/data */ if (tensor_ext.io_type == TENSOR_TYPE_NET_OUTPUT) { output_tensors[tensor_ext.io_index].shape = real_out_shape[tensor_idx]; - subnet_tensor_s2d(stage, tensor_name, &output_tensors[tensor_ext.io_index].device_mem); + subnet_tensor_s2d(devid, subnet_tensor_v, tensor_name, &output_tensors[tensor_ext.io_index].device_mem); } #ifdef DEBUG @@ -1097,6 +1077,9 @@ bool Bmruntime::launch_multi_subnet( } } else if(subnet->subnet_mode == SUBNET_MODE_TPU) { + if (!(m_flags & BM_RUNTIME_SHARE_MEM)) { + lock.unlock(); + } int subnet_input_num = subnet->input_tensor_name_v.size(); int subnet_output_num = subnet->output_tensor_name_v.size(); #ifdef __linux__ @@ -1118,7 +1101,7 @@ bool Bmruntime::launch_multi_subnet( /* set user input */ tensor_idx = 0; for (auto& tensor_name: subnet->input_tensor_name_v) { - auto& tensor_ext = stage->subnet_tensor_v.find(tensor_name)->second; + auto& tensor_ext = subnet_tensor_v->find(tensor_name)->second; switch(tensor_ext.io_type) { case TENSOR_TYPE_NET_INPUT: /* subnet input tensor is also net input tensor, using user input */ @@ -1136,7 +1119,7 @@ bool Bmruntime::launch_multi_subnet( subnet_input_elem_nums[tensor_idx] = tensor_ext.record_elem_num; /* for tpu subnet, if input tensor is from cpu subnet : s2d */ if (tensor_ext.src_subnet && tensor_ext.src_subnet->subnet_mode == SUBNET_MODE_CPU) { - subnet_tensor_s2d(stage, tensor_name); + subnet_tensor_s2d(devid, subnet_tensor_v, tensor_name); BMRT_DEBUG("TPU SUBNET TENSOR %s S2D FROM %p TO %llx", tensor_name.c_str(), tensor_ext.host_mem.addr, bm_mem_get_device_addr(subnet_input_tensors[tensor_idx].device_mem)); @@ -1152,7 +1135,7 @@ bool Bmruntime::launch_multi_subnet( /* set user output */ tensor_idx = 0; for (auto& tensor_name: subnet->output_tensor_name_v) { - auto& tensor_ext = stage->subnet_tensor_v.find(tensor_name)->second; + auto& tensor_ext = subnet_tensor_v->find(tensor_name)->second; switch(tensor_ext.io_type) { case TENSOR_TYPE_NET_OUTPUT: /* subnet input tensor is also net output tensor, using user input */ @@ -1174,13 +1157,13 @@ bool Bmruntime::launch_multi_subnet( if (subnet->tpu_info.is_dynamic) { ret = launch_tpu_ir_subnet(net_ctx, stage, subnet, subnet_input_tensors, subnet_input_elem_nums, subnet_input_num, - subnet_output_tensors, subnet_output_elem_nums, subnet_output_num); + subnet_output_tensors, subnet_output_elem_nums, subnet_output_num, dyn_core_mask); BMRT_ASSERT_INFO(ret == true, "launch_tpu_ir_subnet return false"); /* reshape output tensors */ tensor_idx = 0; for (auto& tensor_name: subnet->output_tensor_name_v) { - auto& tensor_ext = stage->subnet_tensor_v.find(tensor_name)->second; + auto& tensor_ext = subnet_tensor_v->find(tensor_name)->second; switch(tensor_ext.io_type) { case TENSOR_TYPE_NET_OUTPUT: /* subnet input tensor is also net output tensor, using user input */ @@ -1215,10 +1198,15 @@ bool Bmruntime::launch_multi_subnet( } else { ret = launch_tpu_subnet(net_ctx, stage, subnet, subnet_input_tensors, subnet_input_num, - subnet_output_tensors, subnet_output_num); + subnet_output_tensors, subnet_output_num, + dyn_core_mask); BMRT_ASSERT_INFO(ret == true, "launch_tpu_subnet return false"); } + if (!(m_flags & BM_RUNTIME_SHARE_MEM)) { + lock.lock(); + } + } else { BMRT_LOG(FATAL, "Not supported subnet_mode=%d, subnet_id=%d", subnet->subnet_mode, subnet->id); } diff --git a/tpu-runtime/src/cpp/bmruntime_cpp.cpp b/tpu-runtime/src/cpp/bmruntime_cpp.cpp old mode 100644 new mode 100755 index 5ff2f0f..6cfb9b3 --- a/tpu-runtime/src/cpp/bmruntime_cpp.cpp +++ b/tpu-runtime/src/cpp/bmruntime_cpp.cpp @@ -89,6 +89,30 @@ bool IsSameShape(const bm_shape_t &left, const bm_shape_t &right) return true; } +static std::string chip_name_by_id(unsigned int chipid) { + std::string chip_name = ""; + if (chipid == 0x1684) { + chip_name = "BM1684"; + } else if (chipid == 0x1686) { + chip_name = "BM1684X"; + } else if (chipid == 0x1686a200) { + chip_name = "BM1688"; + } else if (chipid == 0x1682) { + chip_name = "BM1682"; + } else if (chipid == 0x1880) { + chip_name = "BM1880"; + } else if (chipid == 0x2260) { + chip_name = "BM1690"; + } else if (chipid == 0x3000) { + chip_name = "MARS3"; + } else if (chipid == 0x2380) { + chip_name = "SG2380"; + } else { + BMRT_LOG(FATAL, "Unknown chipid %x", chipid); + } + return chip_name; +} + Context::Context(bm_handle_t bm_handle) { bm_handle_ = NULL; @@ -99,16 +123,8 @@ Context::Context(bm_handle_t bm_handle) if (0 != bm_get_chipid(bm_handle, &chipid)) { BMRT_LOG(FATAL, "Cannot get chipid"); } - std::string chip_name; - if (chipid == 0x1682) { - chip_name = "BM1682"; - } else if (chipid == 0x1684) { - chip_name = "BM1684"; - } else if (chipid == 0x1686) { - chip_name = "BM1684X"; - } else { - BMRT_LOG(FATAL, "Unknown chipid %x", chipid); - } + + std::string chip_name = chip_name_by_id(chipid); Bmruntime *p_bmrt = new Bmruntime(&bm_handle, true, chip_name); BMRT_ASSERT_INFO(p_bmrt != NULL,"p_bmrt shouldn't be NULL,chip_name: %s",chip_name.c_str()); @@ -127,16 +143,8 @@ Context::Context(int devid) if (0 != bm_get_chipid(bm_handle_, &chipid)) { BMRT_LOG(FATAL, "Cannot get chipid"); } - std::string chip_name; - if (chipid == 0x1682) { - chip_name = "BM1682"; - } else if (chipid == 0x1684) { - chip_name = "BM1684"; - } else if (chipid == 0x1686) { - chip_name = "BM1684X"; - } else { - BMRT_LOG(FATAL, "Unknown chipid %x", chipid); - } + + std::string chip_name = chip_name_by_id(chipid); Bmruntime *p_bmrt = new Bmruntime(&bm_handle_, true, chip_name); BMRT_ASSERT_INFO(p_bmrt != NULL,"p_bmrt shouldn't be NULL"); @@ -196,7 +204,7 @@ void Context::trace() const const bm_net_info_t *Context::get_network_info(const char *net_name) const { Bmruntime *p_bmrt = (Bmruntime *)body_; - return p_bmrt->get_net_info(p_bmrt->get_net_idx(net_name)); + return p_bmrt->get_net_info(net_name); } // Network -------------------------------------------------------------------------- @@ -434,4 +442,23 @@ bool Tensor::ready() return true; } +api_info_t get_bmodel_api_info(void *p_bmrt, const char *net_name, + const bm_tensor_t *input_tensors, int input_num, + bm_tensor_t *output_tensors, int output_num, + bool user_mem, bool user_stmode, uint32_t *core_ids) { + api_info_t api_info; + memset(&api_info, 0x0, sizeof(api_info_t)); + if (p_bmrt == NULL || net_name == NULL) { + BMRT_LOG(WRONG, "parameter invalid p_bmrt is NULL or net_name is NULL"); + return api_info; + } + int net_idx = ((Bmruntime *)p_bmrt)->get_net_idx(net_name); + if (net_idx < 0) { + BMRT_LOG(WRONG, "net name:%s invalid", net_name); + return api_info; + } + return ((Bmruntime *)p_bmrt) + ->get_api_info(net_idx, input_tensors, input_num, output_tensors, + output_num, user_mem, user_stmode, core_ids); +} } // namespace bmruntime diff --git a/tpu-runtime/test.cmake b/tpu-runtime/test.cmake new file mode 100644 index 0000000..8d068fc --- /dev/null +++ b/tpu-runtime/test.cmake @@ -0,0 +1,31 @@ +cmake_minimum_required(VERSION 2.8) + +project(bmrt LANGUAGES CXX) +set(CMAKE_CXX_STANDARD 11) + +message(STATUS "third party: " $ENV{THIRDPARTY_DIR}) +find_package(Threads REQUIRED) +link_directories($ENV{THIRDPARTY_DIR}/lib) + +include(gen_kernel_header.cmake) +add_custom_target(kernel_header DEPENDS ${KERNEL_HEADER_FILE}) + +file(GLOB_RECURSE srcs src/*.cpp src/*.c) +add_library(bmrt SHARED ${srcs}) +target_link_libraries(bmrt PUBLIC bmodel bmlib ${CMAKE_DL_LIBS} Threads::Threads) +target_include_directories(bmrt PUBLIC + $ENV{THIRDPARTY_DIR} + $ENV{THIRDPARTY_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR}/include/bmtap2 + ${CMAKE_CURRENT_SOURCE_DIR}/include + ${CMAKE_BINARY_DIR} + ) +add_dependencies(bmrt kernel_header) + +set(app_srcs + app/bmrt_test.cpp + app/bmrt_test_case.cpp) +add_executable(bmrt_test ${app_srcs}) +target_link_libraries(bmrt_test bmrt) +target_compile_definitions(bmrt_test PRIVATE VER="test") +target_compile_definitions(bmrt_test PUBLIC -DUSING_CMODEL=1) \ No newline at end of file diff --git a/tpu-runtime/test/env.sh b/tpu-runtime/test/env.sh new file mode 100644 index 0000000..64075a1 --- /dev/null +++ b/tpu-runtime/test/env.sh @@ -0,0 +1,31 @@ +#/bin/bash +export BMRT_TEST_TOP="$PWD" + +function bmrt_set_libcmodel() { + local runtime_arch=$1 + export TPUKERNEL_FIRMWARE_PATH=${BMRT_TEST_TOP}/lib/libcmodel_${runtime_arch}.so +} + +#注意: 回归环境较老,更新的动态库需要在FTP/docker/bmnnsdk2_ubuntu16.04_py35.docker环境中编译 +function bmrt_update_libcmodel() { + local backend_path=$1 + local arch_name=$2 + local libpath=${BMRT_TEST_TOP}/lib + pushd ${backend_path} + local backened_name=${arch_name} + if [ ${arch_name} == "bm1688" ]; then + backened_name="bm1686" + fi + if [ ${arch_name} == "bm1690" ]; then + backened_name="sg2260" + fi + source scripts/envsetup.sh ${backened_name} + rebuild_firmware_cmodel + cp ./build/firmware_core/libcmodel_firmware.so ${libpath}/libcmodel_${arch_name}.so + popd +} + +function bmrt_clean_test_env() { + unset ${BMRT_TEST_TOP} + unset ${TPUKERNEL_FIRMWARE_PATH} +} \ No newline at end of file diff --git a/tpu-runtime/test/lib/README.md b/tpu-runtime/test/lib/README.md new file mode 100644 index 0000000..7414a4d --- /dev/null +++ b/tpu-runtime/test/lib/README.md @@ -0,0 +1,18 @@ +### 简介 +请勿轻易更换libcmodel动态库 +编译动态库需要在ftp://172.28.141.89/docker/bmnnsdk2_ubuntu16.04_py35.docker环境下 + +## BM1684 2023-08-01 +commit id: 0b2507035a79fae10ed01abb9ae8e4fe84ed21fa +`cd TPU1684 && source scripts/envsetup.sh && rebuild_backend_lib_cmodel` +`cp ./build_runtime/c_model/libcmodel.so /workspace/libsophon/tpu-runtime/test/lib/libcmodel_bm1684.so` + +## BM1684X +commit id: 1c5867c30b21d6ad471c36d8418150a1ef518c2d +`cd TPU1686 && source scripts/envsetup.sh bm1684x && rebuild_firmware_cmodel` +`cp ./build/firmware_core/libcmodel_firmware.so /workspace/libsophon/tpu-runtime/test/lib/libcmodel_bm1684x.so` + +## BM1688 +commit id: 1c5867c30b21d6ad471c36d8418150a1ef518c2d +`cd TPU1686 && source scripts/envsetup.sh bm1686 && rebuild_firmware_cmodel` +`cp ./build/firmware_core/libcmodel_firmware.so /workspace/libsophon/tpu-runtime/test/lib/libcmodel_bm1688.so` diff --git a/tpu-runtime/test/lib/libcmodel_bm1684.so b/tpu-runtime/test/lib/libcmodel_bm1684.so new file mode 100755 index 0000000..08c667a Binary files /dev/null and b/tpu-runtime/test/lib/libcmodel_bm1684.so differ diff --git a/tpu-runtime/test/lib/libcmodel_bm1684x.so b/tpu-runtime/test/lib/libcmodel_bm1684x.so new file mode 100755 index 0000000..cab524f Binary files /dev/null and b/tpu-runtime/test/lib/libcmodel_bm1684x.so differ diff --git a/tpu-runtime/test/lib/libcmodel_bm1688.so b/tpu-runtime/test/lib/libcmodel_bm1688.so new file mode 100755 index 0000000..f13fe6a Binary files /dev/null and b/tpu-runtime/test/lib/libcmodel_bm1688.so differ diff --git a/tpu-runtime/test/python/global_def.py b/tpu-runtime/test/python/global_def.py new file mode 100644 index 0000000..0100511 --- /dev/null +++ b/tpu-runtime/test/python/global_def.py @@ -0,0 +1,30 @@ +from collections import defaultdict +class GlobalDefine: + def __init__(self) -> None: + self.CONST_CORE_NUM_DICT = {"bm1684" : 1, "bm1684x" : 1, "bm1688" : 2, "bm1690": 8} + self.CONST_CASE_NAME = [ "bmrt_test", + "bmrt_load_bmodel_data", + "bmrt_load_context", + "bmrt_launch_data", + "bmrt_simple_api", + "bmrt_multi_thread", + "bmcpp_load_bmodel", + "bmcpp_load_bmodel_data", + "bmcpp_reshape", + "bmcpp_multi_thread", + "bmtap2_register_bmodel", + "bmtap2_register_data", + "bmtap2_multi_thread", + "bmtap2cpp_load_bmodel", + "bmtap2cpp_multi_thread", + "bmrt_get_bmodel_api", + "bmrt_get_bmodel_api_c", + "bmmc_multi_mession"] + self.G_BMRT_APP = "bmrt_test" + self.DEF_ERROR = -1 + self.DEF_TIMEOUT = -2 + self.DEF_CMODEL = 0 + self.DEF_SOC = 1 + self.G_FAILED_CASES = defaultdict(str) + +gloabl_def = GlobalDefine() \ No newline at end of file diff --git a/tpu-runtime/test/python/main.py b/tpu-runtime/test/python/main.py new file mode 100644 index 0000000..eb75cd0 --- /dev/null +++ b/tpu-runtime/test/python/main.py @@ -0,0 +1,47 @@ +from utils import * +from test_case import * +import sys +import argparse + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Script description") + parser.add_argument("--use_soc", action="store_true", help="Choose CModel or SOC (default: CModel)") + parser.add_argument("--root_path", type=str, default="./", help="Root path (default: ./)") + parser.add_argument("--archs", type=str, help="Test architecture, exp.'bm1684,bm1688...'") + + args = parser.parse_args() + use_cmodel = not args.use_soc + root_path = args.root_path + test_archs = args.archs.split(",") + if use_cmodel: + root_path = bmrt_get_env("BMRT_TEST_TOP") + gloabl_def.G_BMRT_APP = root_path + "/../build/bmrt_test" + else: + raise Exception("not implemented") + ret = run_cmd(gloabl_def.G_BMRT_APP + " --help > /dev/null", 255) + if not ret: + raise Exception("check bmrt test failed\n") + + bmodels = defaultdict(defaultdict) + regression_model_path = root_path + "/../bmodel-zoo/regression_models/" + run_types = get_run_type_forlder(regression_model_path) + for run_type in run_types: + bmodels[run_type] = bmrt_get_bmodel_list(regression_model_path + run_type) + + for test_arch in test_archs: + #basic bmrt_test (static, dynamic, multi_subnet) + basic_test_func(bmodels, test_arch, 1) + + #bmrt api test "bmrt_load_bmodel_data","bmmc_multi_mession", ... + test_api_func(bmodels, test_arch, 1) + + #multi_core_multi_mession test + if test_arch == "bm1688": + #multi_core_multi_mession test + test_multi_core_mession_func(bmodels, test_arch, 2) + + ret = 0 + if gloabl_def.G_FAILED_CASES.keys(): + bmrt_out_faild_log(gloabl_def.G_FAILED_CASES) + ret = -1 + sys.exit(ret) diff --git a/tpu-runtime/test/python/test_case.py b/tpu-runtime/test/python/test_case.py new file mode 100644 index 0000000..fcfa098 --- /dev/null +++ b/tpu-runtime/test/python/test_case.py @@ -0,0 +1,58 @@ +from utils import * + +def basic_test_func(bmodels : defaultdict, runtime_arch : str, loop_num : int): + case = TestBase(["bmrt_test"], loop_num) + case.set_arch_name(runtime_arch) + + for run_type in bmodels.keys(): + bmodel_list = [info.full_path for info in bmodels[run_type][runtime_arch]] + if not bmodel_list: continue + case.set_bmodel_list(bmodel_list) + case.run_single_mession() + +def test_api_func(bmodels : defaultdict, runtime_arch : str, loop_num : int): + common_cases = ["bmrt_load_bmodel_data", + "bmrt_load_context", + "bmrt_launch_data", + "bmrt_simple_api", + "bmrt_multi_thread", + "bmcpp_load_bmodel", + "bmcpp_load_bmodel_data", + "bmcpp_reshape", + "bmcpp_multi_thread", + "bmtap2_register_bmodel", + "bmtap2_register_data", + "bmtap2_multi_thread", + "bmtap2cpp_load_bmodel", + "bmtap2cpp_multi_thread"] + case = TestBase(common_cases, loop_num) + case.set_arch_name(runtime_arch) + bmodel_list= [] + for info in bmodels["static"][runtime_arch]: + if info.core_num == "1": bmodel_list.append(info.full_path) + case.set_bmodel_list(bmodel_list) + case.run_single_mession() + + if runtime_arch == "bm1688" or runtime_arch == "bm1684x": + test_cases = ["bmrt_get_bmodel_api", "bmrt_get_bmodel_api_c"] + case.set_case_names(test_cases) + case.run_single_mession() + + if runtime_arch == "bm1688": + case.set_case_names(["bmmc_multi_mession"]) + case.run_multi_mession() + +def test_multi_core_mession_func(bmodels : defaultdict, runtime_arch : str, loop_num : int): + assert runtime_arch == "bm1688" + test_case = TestBM1688(["bmmc_multi_mession"], loop_num) + bmodel_list = [] + for run_type in bmodels.keys(): + for info in bmodels[run_type][runtime_arch]: + if info.core_num == "1": + bmodel_list.append(info.full_path) + test_case.set_bmodel_list(bmodel_list) + test_case.run_multi_mession() + + test_case.set_case_names(["bmrt_test"]) + test_case.run_multi_mession() + diff --git a/tpu-runtime/test/python/utils.py b/tpu-runtime/test/python/utils.py new file mode 100755 index 0000000..ed7ff72 --- /dev/null +++ b/tpu-runtime/test/python/utils.py @@ -0,0 +1,211 @@ +import os +import random +import subprocess +import threading +from collections import defaultdict +from global_def import gloabl_def + +def run_cmd(cmd, right_ret = 0): + ret = os.system(cmd) + ret >>= 8 + if ret != right_ret: return False + else: return True + +class TestBase: + def __init__(self, case_names, loop_num): + self.case_names = case_names + for case_name in case_names: + if case_name not in gloabl_def.CONST_CASE_NAME: + raise ValueError("unknown case name: " + case_name) + self.arch_name = None + self.bmodel_list = None + self.bmodel_list_file = "bmodel_list_temp.txt" + self.loop_num = loop_num + self.bmrt_app = gloabl_def.G_BMRT_APP + self.cmd = None + self.seed = random.randint(0, 10000) + self.runtime_mode = gloabl_def.DEF_CMODEL #todo, extend for soc + + def set_arch_name(self, arch_name): + if arch_name not in gloabl_def.CONST_CORE_NUM_DICT.keys(): + raise ValueError("unknown arch name: " + arch_name) + self.arch_name = arch_name + + def set_case_names(self, case_names): + for case_name in case_names: + if case_name not in gloabl_def.CONST_CASE_NAME: + raise ValueError("unknown case name: " + case_name) + self.case_names = case_names + + def set_bmodel_list(self, bmodel_list): + self.bmodel_list = bmodel_list + + def set_bmodel_list_file(self, bmodel_list_file): + self.bmodel_list_file = bmodel_list_file + + def set_loop_num(self, loop_num): + self.loop_num = loop_num + + def run_cmd_and_record(self, cmd, case_name, right_ret = 0): + stdout_ = None + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) + time_out = False + try: + stdout_, stderr = process.communicate(timeout=1200) + except subprocess.TimeoutExpired: + time_out = True + if stdout_: print(stdout_.decode()) + ret = process.returncode + if ret != right_ret: + issue_log = stdout_.decode() if stdout_ else None + log_info = LogInfo() + log_info.log = issue_log + log_info.bmodel_list = self.bmodel_list + log_info.run_cmd = cmd + log_info.type = gloabl_def.DEF_TIMEOUT if time_out else gloabl_def.DEF_ERROR + gloabl_def.G_FAILED_CASES[self.arch_name + "_" + case_name] = log_info + + def gen_run_cmd(self, case_name): + self.cmd = self.bmrt_app + " --loopnum " + str(self.loop_num) + if case_name != "bmrt_test": + self.cmd += " -t " + case_name + if len(self.bmodel_list) == 1: + self.cmd += " --context " + self.bmodel_list[0] + else: + random.seed(self.seed) + random.shuffle(self.bmodel_list) + with open(self.bmodel_list_file,'w') as file: + for bmodel in self.bmodel_list: + file.writelines(bmodel + "\n") + self.cmd += " --bmodel_list " + self.bmodel_list_file + return self.cmd + + def run_single_mession(self): + if self.runtime_mode == gloabl_def.DEF_CMODEL: + bmrt_set_libcmodel(self.arch_name) + for case_name in self.case_names: + self.run_cmd_and_record(self.gen_run_cmd(case_name), case_name) + + def run_multi_mession(self): + pass + + def __del__(self): + if os.path.exists(self.bmodel_list_file): + os.system("rm " + self.bmodel_list_file) + if os.getenv("TPUKERNEL_FIRMWARE_PATH") != None: + os.environ.pop("TPUKERNEL_FIRMWARE_PATH") + +class TestBM1684(TestBase): + def __init__(self, case_names, loop_num): + super(TestBM1684, self).__init__(case_names, loop_num) + self.set_arch_name("bm1684") + +class TestBM1684X(TestBase): + def __init__(self, case_names, loop_num): + super(TestBM1684X, self).__init__(case_names, loop_num) + self.set_arch_name("bm1684x") + +class TestBM1688(TestBase): + def __init__(self, case_names, loop_num): + super(TestBM1688, self).__init__(case_names, loop_num) + self.set_arch_name("bm1688") + + def run_multi_mession(self): + if self.runtime_mode == gloabl_def.DEF_CMODEL: + bmrt_set_libcmodel(self.arch_name) + for case_name in self.case_names: + cmd_ = self.gen_run_cmd(case_name) + cmd_ += " --core_list 0:1" + self.run_cmd_and_record(cmd_, case_name) + +class TestBM1690(TestBase): + def __init__(self, case_names, loop_num): + super(TestBM1690, self).__init__(case_names, loop_num) + self.set_arch_name("bm1690") + +class BmodelInfo: + def __init__(self): + self.model_name = None + self.dtype = None + self.full_path = None + self.core_num = None + self.arch_name = None + + def __repr__(self) -> str: + bmodel_info = ",".join([self.model_name, self.arch_name, self.full_path]) + return "[ " + bmodel_info + "]" + +class LogInfo: + def __init__(self): + self.log = None + self.bmodel_list = None + self.run_cmd = None + self.type = None + +def get_bmodel_list(path): + result_folders = [] + for root, dirs, files in os.walk(path): + if "compilation.bmodel" in files: + result_folders.append(root) + return result_folders + +def get_bmodel_list_info(bmodel_list): + res = defaultdict(list) + for i in range(len(bmodel_list)): + bmodel_path = bmodel_list[i] + bmodel_name = bmodel_path.split("/")[-1] + name_li = bmodel_name.split("_") + bmodel_info = BmodelInfo() + if 'core' in name_li[-1]: + bmodel_info.core_num = name_li[-1] + bmodel_info.dtype = name_li[-2] + bmodel_info.arch_name = name_li[-3] + bmodel_info.model_name = ('_').join(name_li[:-3]) + else: + bmodel_info.core_num = "1" + bmodel_info.dtype = name_li[-1] + bmodel_info.arch_name = name_li[-2] + bmodel_info.model_name = ('_').join(name_li[:-2]) + bmodel_info.full_path = bmodel_path + res[bmodel_info.arch_name].append(bmodel_info) + return res + +def get_run_type_forlder(path): + res = [] + entries = os.listdir(path) + for entry in entries: + full_path = os.path.join(path, entry) + if os.path.isdir(full_path): res.append(entry) + return res + +def bmrt_get_env(env_name): + env = os.getenv(env_name) + if not env: + raise Exception("get env faild, name: " + env_name) + return env + +def bmrt_get_bmodel_list(path): + bmodel_paths = get_bmodel_list(path) + return get_bmodel_list_info(bmodel_paths) + +def bmrt_set_libcmodel(arch_name): + root_path = bmrt_get_env("BMRT_TEST_TOP") + libcmodel_path = root_path + "/lib/libcmodel_" + arch_name + ".so" + os.environ["TPUKERNEL_FIRMWARE_PATH"]=libcmodel_path + +def bmrt_out_faild_log(failed_infos : defaultdict): + if not failed_infos.keys(): return + type_to_str = { gloabl_def.DEF_TIMEOUT : "Time Out", + gloabl_def.DEF_ERROR : "Run Error" } + case_names = list(failed_infos.keys()) + for i in range(len(case_names)): + log_ = failed_infos[case_names[i]].log + if log_: print(log_) + print("follow cases failed: \n") + for i in range(len(case_names)): + if "bmodel_list" in failed_infos[case_names[i]].run_cmd: + print("[ {}, case name: {}, run cmd: {}\n bmodel list:\n{} ]\n".format(type_to_str[failed_infos[case_names[i]].type], case_names[i], \ + failed_infos[case_names[i]].run_cmd, '\n'.join(failed_infos[case_names[i]].bmodel_list))) + else: + print("[ {}, case name: {}, run cmd: {} ]\n".format(type_to_str[failed_infos[case_names[i]].type], case_names[i], failed_infos[case_names[i]].run_cmd)) + diff --git a/tpu-runtime/test/regression.sh b/tpu-runtime/test/regression.sh new file mode 100755 index 0000000..a2e9179 --- /dev/null +++ b/tpu-runtime/test/regression.sh @@ -0,0 +1,22 @@ +#/bin/bash + +SCRIPT_DIR=$(realpath `dirname ${BASH_SOURCE}`/../scripts) +source $SCRIPT_DIR/envsetup.sh + +function test_bmrt_api() { + pushd $SCRIPT_DIR/../test/ + source env.sh + python3 python/main.py --archs "bm1684,bm1684x,bm1688" + ret=$? + popd + return $ret +} + +function main(){ + rebuild_tpu_runtime; ret=$? + if [ $ret -ne 0 ]; then echo "rebuild_tpu_runtime failed"; return $ret; fi + test_bmrt_api; ret=$? + if [ $ret -ne 0 ]; then echo "test_bmrt_api regression failed"; return $ret; fi +} + +main \ No newline at end of file