[MNN:Sync] Sync Internal Gitlab: 2.5.1

alibaba · May 18, 2023 · c70ecef · c70ecef
1 parent d7d1efe
commit c70ecef
Show file tree

Hide file tree

Showing 98 changed files with 3,849 additions and 1,164 deletions.
diff --git a/MNN.podspec b/MNN.podspec
@@ -74,6 +74,6 @@ Pod::Spec.new do |s|
   end
 
   s.compiler_flags = '-arch arm64 -march=armv8.2-a+simd+fp16'
-  s.pod_target_xcconfig = {'METAL_LIBRARY_FILE_BASE' => 'mnn', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include" "$(PODS_TARGET_SRCROOT)/3rd_party/flatbuffers/include" "$(PODS_TARGET_SRCROOT)/source" "$(PODS_TARGET_SRCROOT)/3rd_party/half" "$(PODS_TARGET_SRCROOT)/source/backend/coreml/mlmodel/include" "$(PODS_TARGET_SRCROOT)/tools/cv/include"', 'GCC_PREPROCESSOR_DEFINITIONS' => '$(inherited) MNN_CODEGEN_REGISTER=1 MNN_SUPPORT_TFLITE_QUAN=1 MNN_METAL_ENABLED=1 MNN_SUPPORT_BF16=1 MNN_COREML_ENABLED=1 USE_LZ4_FLAG=1 MNN_INTERNAL_ENABLED=1'}
+  s.pod_target_xcconfig = {'METAL_LIBRARY_FILE_BASE' => 'mnn', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include" "$(PODS_TARGET_SRCROOT)/3rd_party/flatbuffers/include" "$(PODS_TARGET_SRCROOT)/source" "$(PODS_TARGET_SRCROOT)/3rd_party/half" "$(PODS_TARGET_SRCROOT)/source/backend/coreml/mlmodel/include" "$(PODS_TARGET_SRCROOT)/tools/cv/include"', 'GCC_PREPROCESSOR_DEFINITIONS' => '$(inherited) MNN_CODEGEN_REGISTER=1 MNN_SUPPORT_TFLITE_QUAN=1 MNN_METAL_ENABLED=1 MNN_SUPPORT_BF16=1 MNN_COREML_ENABLED=1 USE_LZ4_FLAG=1 MNN_INTERNAL_ENABLED=1 MNN_USE_SPARSE_COMPUTE=1'}
   s.user_target_xcconfig = { 'OTHER_LDFLAGS' => '-force_load $(BUILD_DIR)/$(CONFIGURATION)$(EFFECTIVE_PLATFORM_NAME)/MNN/libMNN.a', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include"' }
 end
diff --git a/docs/tools/benchmark.md b/docs/tools/benchmark.md
@@ -2,13 +2,17 @@
 ## Linux / macOS / Ubuntu
 [从源码编译](../compile/tools.html#benchmark)，然后执行如下命令:
 ```bash
-./benchmark.out models_folder loop_count warm_up_count forwardtype
+./benchmark.out models_folder loop_count warm_up_count forwardtype numberThread precision weightSparsity weightSparseBlockNumber
 ```
 参数如下:
 - models_folder: benchmark models文件夹，[benchmark models](https://github.com/alibaba/MNN/tree/master/benchmark/models)。
 - loop_count: 可选，默认是10
 - warm_up_count: 预热次数
 - forwardtype: 可选，默认是0，即CPU，forwardtype有0->CPU，1->Metal，3->OpenCL，6->OpenGL，7->Vulkan
+- numberThread: 可选，默认是4，为 CPU 线程数或者 GPU 的运行模式
+- precision: 可选，默认是 2 （precision_low）
+- weightSparsity: 可选，默认是 0.0 ，在 weightSparsity > 0.5 时且后端支持时，开启稀疏计算
+- weightSparseBlockNumber: 可选，默认是 1 ，仅当 weightSparsity > 0.5 时生效，为稀疏计算 block 大小，越大越有利于稀疏计算的加速，一般选择 1, 4, 8, 16
 ## Android
 在[benchmark目录](https://github.com/alibaba/MNN/tree/master/benchmark/android)下直接执行脚本`bench_android.sh`，默认编译armv7，加参数-64编译armv8，参数-p将[benchmarkModels](https://github.com/alibaba/MNN/tree/master/benchmark/models) push到机器上。
 脚本执行完成在[benchmark目录](https://github.com/alibaba/MNN/tree/master/benchmark/android)下得到测试结果`benchmark.txt`

diff --git a/express/Executor.cpp b/express/Executor.cpp
@@ -107,6 +107,7 @@ void Executor::setGlobalExecutorConfig(MNNForwardType type, const BackendConfig&
         std::shared_ptr<Runtime> bn(creator->onCreate(info));
         mRuntimes[mAttr->firstType] = bn;
     }
+    _refreshRuntime();
 }
 
 int Executor::getCurrentRuntimeStatus(RuntimeStatus statusEnum) {
@@ -139,6 +140,7 @@ Executor::Executor(std::shared_ptr<Runtime> backend, MNNForwardType type, int nu
     defaultConfig.flags = 4;
     std::shared_ptr<Backend> defaultBackend(mRuntimes[DEFAULT_BACKUP_RUNTIME_KEY]->onCreate(&defaultConfig));
     mAttr->constantBackend = defaultBackend;
+    _refreshRuntime();
 }
 Executor::~Executor(){
     // Do nothing
@@ -205,15 +207,38 @@ std::shared_ptr<Executor> Executor::newExecutor(MNNForwardType type,
     auto executor = new Executor(runtime, type, numberThread);
     return std::shared_ptr<Executor>(executor);
 }
+void Executor::_refreshRuntime() {
+    mRuntimeInfo.first.clear();
+    mRuntimeInfo.second = mRuntimes[DEFAULT_BACKUP_RUNTIME_KEY];
+    auto firstIter = mRuntimes.find(getAttr()->firstType);
+    if (firstIter != mRuntimes.end()) {
+        mRuntimeInfo.first.insert(std::make_pair(firstIter->first.first, firstIter->second));
+    } else {
+        MNN_ASSERT(false);
+    }
+    for (auto& iter : mRuntimes) {
+        if (iter.first.first != getAttr()->firstType.first) {
+            mRuntimeInfo.first.insert(std::make_pair(iter.first.first, iter.second));
+        }
+    }
+}
 
 RuntimeInfo Executor::getRuntime() {
-    RuntimeInfo info;
     auto glo = ExecutorScope::Current();
-    info.second = glo->mRuntimes[DEFAULT_BACKUP_RUNTIME_KEY];
-    for (auto& iter : glo->mRuntimes) {
-        info.first.insert(std::make_pair(iter.first.first, iter.second));
+    return glo->mRuntimeInfo;
+}
+bool Executor::getComputeInfo(EXPRP expr, Interpreter::SessionInfoCode code, void* ptr) {
+    if (nullptr == expr) {
+        return false;
+    }
+    if (nullptr == expr->inside()->mCache.get()) {
+        return false;
+    }
+    auto session = expr->inside()->mCache->getSession();
+    if (nullptr == session) {
+        return false;
     }
-    return info;
+    return session->getInfo(code, ptr);
 }
 
 static bool loadCache(std::shared_ptr<Runtime> &rt, const void* buffer, size_t size) {
@@ -352,6 +377,7 @@ Executor::RuntimeManager* Executor::RuntimeManager::createRuntimeManager(const S
     } else {
         res->mInside->mUserConfig = false;
     }
+    glo->_refreshRuntime();
     return res;
 }
 ExecutorAttr* Executor::getAttr() const {
@@ -603,6 +629,7 @@ void Executor::_makeCache(const std::vector<EXPRP>& expr, bool forceCPU) {
         scheduleInfo.pipelineInfo[0].first.info.type = MNN_FORWARD_CPU;
     } else {
         scheduleInfo.pipelineInfo[0].first.info.type = current->getAttr()->firstType.first;
+        scheduleInfo.pipelineInfo[0].first.info.numThread = current->getAttr()->firstType.second;
     }
     scheduleInfo.pipelineInfo[0].first.needComputeShape = false;
     scheduleInfo.pipelineInfo[0].first.needComputeGeometry = mLazyMode != LAZY_CONTENT;

diff --git a/include/MNN/Interpreter.hpp b/include/MNN/Interpreter.hpp
@@ -343,6 +343,9 @@ class MNN_PUBLIC Interpreter {
 
         /** Resize Info, int*, 0: ready to execute, 1: need malloc, 2: need resize */
         RESIZE_STATUS = 3,
+
+        /** Mode / NumberThread, int* */
+        THREAD_NUMBER = 4,
 
         ALL
     };

diff --git a/include/MNN/MNNDefine.h b/include/MNN/MNNDefine.h
@@ -69,6 +69,6 @@ MNN_ERROR("Check failed: %s ==> %s\n", #success, #log); \
 #define STR(x) STR_IMP(x)
 #define MNN_VERSION_MAJOR 2
 #define MNN_VERSION_MINOR 5
-#define MNN_VERSION_PATCH 0
+#define MNN_VERSION_PATCH 1
 #define MNN_VERSION STR(MNN_VERSION_MAJOR) "." STR(MNN_VERSION_MINOR) "." STR(MNN_VERSION_PATCH)
 #endif /* MNNDefine_h */
diff --git a/include/MNN/expr/Executor.hpp b/include/MNN/expr/Executor.hpp
@@ -133,11 +133,15 @@ class MNN_PUBLIC Executor {
         friend class StaticModule;
         RuntimeManager();
     };
+    static bool getComputeInfo(EXPRP expr, Interpreter::SessionInfoCode code, void* ptr);
 private:
+    void _refreshRuntime();
     Executor(std::shared_ptr<Runtime> backend, MNNForwardType type, int numberThread);
     void _makeCache(const std::vector<EXPRP>& outputs, bool forceCPU);
 
+    // TODO: Remove mRuntimes, only use mRuntimeInfo
     std::map<std::pair<MNNForwardType, int>, std::shared_ptr<Runtime>> mRuntimes;
+    RuntimeInfo mRuntimeInfo;
     std::shared_ptr<DebugTools> mDebug;
     std::map<std::string, std::shared_ptr<SubGraph>> mSubGraph;
     LazyMode mLazyMode = LAZY_FULL;

diff --git a/project/ios/MNN.xcodeproj/project.pbxproj b/project/ios/MNN.xcodeproj/project.pbxproj
@@ -3953,7 +3953,7 @@
 				CODE_SIGN_STYLE = Automatic;
 				DEAD_CODE_STRIPPING = YES;
 				DEFINES_MODULE = YES;
-				DEVELOPMENT_TEAM = Q48UX93J22;
+				DEVELOPMENT_TEAM = 6G7464HHUS;
 				DYLIB_COMPATIBILITY_VERSION = 1;
 				DYLIB_CURRENT_VERSION = 1;
 				DYLIB_INSTALL_NAME_BASE = "@rpath";
@@ -3971,6 +3971,7 @@
 					"ENABLE_ARMV82=1",
 					"MNN_COREML_ENABLED=1",
 					"USE_LZ4_FLAG=1",
+					"MNN_USE_SPARSE_COMPUTE=1",
 				);
 				GCC_SYMBOLS_PRIVATE_EXTERN = YES;
 				GCC_WARN_SHADOW = NO;
@@ -3995,7 +3996,7 @@
 				METAL_LIBRARY_FILE_BASE = mnn;
 				ONLY_ACTIVE_ARCH = YES;
 				OTHER_CFLAGS = "";
-				PRODUCT_BUNDLE_IDENTIFIER = jiuqi.bbbbb.test;
+				PRODUCT_BUNDLE_IDENTIFIER = com.taobao.mnn.playground.abcd;
 				PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
 				PROVISIONING_PROFILE_SPECIFIER = "";
 				"PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = "";
@@ -4016,7 +4017,7 @@
 				CODE_SIGN_STYLE = Automatic;
 				DEAD_CODE_STRIPPING = YES;
 				DEFINES_MODULE = YES;
-				DEVELOPMENT_TEAM = Q48UX93J22;
+				DEVELOPMENT_TEAM = 6G7464HHUS;
 				DYLIB_COMPATIBILITY_VERSION = 1;
 				DYLIB_CURRENT_VERSION = 1;
 				DYLIB_INSTALL_NAME_BASE = "@rpath";
@@ -4033,6 +4034,7 @@
 					"ENABLE_ARMV82=1",
 					"MNN_COREML_ENABLED=1",
 					"USE_LZ4_FLAG=1",
+					"MNN_USE_SPARSE_COMPUTE=1",
 				);
 				GCC_SYMBOLS_PRIVATE_EXTERN = YES;
 				GCC_WARN_SHADOW = YES;
@@ -4056,7 +4058,7 @@
 				MACH_O_TYPE = staticlib;
 				METAL_LIBRARY_FILE_BASE = mnn;
 				OTHER_CFLAGS = "";
-				PRODUCT_BUNDLE_IDENTIFIER = jiuqi.bbbbb.test;
+				PRODUCT_BUNDLE_IDENTIFIER = com.taobao.mnn.playground.abcd;
 				PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
 				PROVISIONING_PROFILE_SPECIFIER = "";
 				"PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = "";
@@ -4075,7 +4077,7 @@
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				ASSETCATALOG_COMPILER_LAUNCHIMAGE_NAME = LaunchImage;
 				CODE_SIGN_STYLE = Automatic;
-				DEVELOPMENT_TEAM = Q48UX93J22;
+				DEVELOPMENT_TEAM = 6G7464HHUS;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
 				HEADER_SEARCH_PATHS = (
@@ -4088,7 +4090,7 @@
 				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
 				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
 				OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)";
-				PRODUCT_BUNDLE_IDENTIFIER = jiuqi.bbbbb.test;
+				PRODUCT_BUNDLE_IDENTIFIER = com.taobao.mnn.playground.abcd;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				TARGETED_DEVICE_FAMILY = "1,2";
 			};
@@ -4100,7 +4102,7 @@
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
 				ASSETCATALOG_COMPILER_LAUNCHIMAGE_NAME = LaunchImage;
 				CODE_SIGN_STYLE = Automatic;
-				DEVELOPMENT_TEAM = Q48UX93J22;
+				DEVELOPMENT_TEAM = 6G7464HHUS;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;
 				HEADER_SEARCH_PATHS = (
@@ -4113,7 +4115,7 @@
 				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
 				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
 				OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)";
-				PRODUCT_BUNDLE_IDENTIFIER = jiuqi.bbbbb.test;
+				PRODUCT_BUNDLE_IDENTIFIER = com.taobao.mnn.playground.abcd;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				TARGETED_DEVICE_FAMILY = "1,2";
 			};

diff --git a/pymnn/pip_package/build_deps.py b/pymnn/pip_package/build_deps.py
@@ -23,6 +23,10 @@
 if len(sys.argv) > 1 and sys.argv[1] == '-trt':
     USE_TRT=True
 
+USE_CUDA=False
+if len(sys.argv) > 1 and sys.argv[1] == '-cuda':
+    USE_CUDA=True
+
 def build_deps():
     if os.path.isdir('../../schema/private'):
         IS_INTERNAL_BUILD = args.internal
@@ -49,6 +53,7 @@ def build_deps():
         -DCMAKE_LIBRARY_PATH=/usr/local/cuda/lib64/stubs/ ' if USE_TRT else ' '
         extra_opts += ' -DMNN_INTERNAL=ON ' if IS_INTERNAL_BUILD else ' '
         extra_opts += ' -DMNN_BUILD_TORCH=ON ' if IS_BUILD_TORCH else ' '
+        extra_opts += ' -DMNN_CUDA=ON ' if USE_CUDA else ' '
         os.system('cmake ' + extra_opts +
             '-DMNN_BUILD_CONVERTER=on -DMNN_BUILD_TRAIN=ON -DCMAKE_BUILD_TYPE=Release \
             -DMNN_BUILD_SHARED_LIBS=OFF -DMNN_AAPL_FMWK=OFF -DMNN_SEP_BUILD=OFF -DMNN_BUILD_OPENCV=ON -DMNN_IMGCODECS=ON \

diff --git a/pymnn/pip_package/build_manylinux2014.sh b/pymnn/pip_package/build_manylinux2014.sh
@@ -9,6 +9,10 @@
 
 set -e
 
+echo "clean build cache:"
+echo ">>> rm -rf build dist *.egg-info wheelhouse/*"
+rm -rf build dist *.egg-info wheelhouse/*
+
 PROJECT_ROOT=$(cd `dirname $0`;cd ../../;pwd)
 echo $PROJECT_ROOT
 export PROJECT_ROOT
@@ -17,6 +21,8 @@ for PYBIN in /opt/python/*/bin; do
     "${PYBIN}/pip" install -U numpy
     if [ "$1" == "-trt" ]; then
         USE_TRT=true "${PYBIN}/python" setup.py bdist_wheel
+    elif [ "$1" == "-cuda" ]; then
+        USE_CUDA=true "${PYBIN}/python" setup.py bdist_wheel
     else
         "${PYBIN}/python" setup.py bdist_wheel
     fi
@@ -26,6 +32,8 @@ done
 for whl in dist/*.whl; do
     if [ "$1" == "-trt" ]; then
         LD_LIBRARY_PATH=${PROJECT_ROOT}/pymnn_build/source/backend/tensorrt:$LD_LIBRARY_PATH auditwheel repair "$whl" --plat manylinux2014_x86_64 -w wheelhouse/
+    elif [ "$1" == "-cuda" ]; then
+        LD_LIBRARY_PATH=${PROJECT_ROOT}/pymnn_build/source/backend/cuda:$LD_LIBRARY_PATH auditwheel repair "$whl" --plat manylinux2014_x86_64 -w wheelhouse/
     else
         auditwheel repair "$whl" --plat manylinux2014_x86_64 -w wheelhouse/
     fi

diff --git a/pymnn/pip_package/setup.py b/pymnn/pip_package/setup.py
@@ -59,9 +59,11 @@ def report(*args):
 
 package_name = 'MNN'
 USE_TRT=check_env_flag('USE_TRT')
+USE_CUDA = check_env_flag("USE_CUDA")
 IS_INTERNAL_BUILD = False
 
 print ("USE_TRT ", USE_TRT)
+print("USE_CUDA:", USE_CUDA)
 
 if os.path.isdir('../../schema/private'):
     IS_INTERNAL_BUILD = args.serving
@@ -149,7 +151,8 @@ def configure_extension_build():
     engine_library_dirs += [os.path.join(root_dir, BUILD_DIR, "tools", "train")]
     engine_library_dirs += [os.path.join(root_dir, BUILD_DIR, "tools", "cv")]
     engine_library_dirs += [os.path.join(root_dir, BUILD_DIR, "source", "backend", "tensorrt")]
-    if USE_TRT:
+    engine_library_dirs += [os.path.join(root_dir, BUILD_DIR, "source", "backend", "cuda")]
+    if USE_TRT or USE_CUDA:
         # Note: TensorRT-5.1.5.0/lib should be set in $LIBRARY_PATH of the build system.
         engine_library_dirs += ['/usr/local/cuda/lib64/']
 
@@ -187,6 +190,7 @@ def configure_extension_build():
     engine_include_dirs += [np.get_include()]
 
     trt_depend = ['-lTRT_CUDA_PLUGIN', '-lnvinfer', '-lnvparsers', '-lnvinfer_plugin', '-lcudart']
+    cuda_depend = ['-lMNN_Cuda_Main']
     engine_depend = ['-lMNN']
 
     # enable logging & model authentication on linux.
@@ -196,12 +200,16 @@ def configure_extension_build():
     if USE_TRT:
         engine_depend += trt_depend
 
+    if USE_CUDA:
+        engine_depend += cuda_depend
+
     tools_compile_args = []
     tools_libraries = []
     tools_depend = ['-lMNN', '-lMNNConvertDeps', '-lprotobuf']
     tools_library_dirs = [os.path.join(root_dir, BUILD_DIR)]
     tools_library_dirs += [os.path.join(root_dir, BUILD_DIR, "tools", "converter")]
     tools_library_dirs += [os.path.join(root_dir, BUILD_DIR, "source", "backend", "tensorrt")]
+    tools_library_dirs += [os.path.join(root_dir, BUILD_DIR, "source", "backend", "cuda")]
     tools_library_dirs += [os.path.join(root_dir, BUILD_DIR, "3rd_party", "protobuf", "cmake")]
 
     # add libTorch dependency
@@ -227,7 +235,7 @@ def configure_extension_build():
                                   os.path.join(torch_lib, 'libc10.dylib')]),
                          ('.dylibs', [os.path.join(torch_path, '.dylibs', 'libiomp5.dylib')])]
             '''
-    if USE_TRT:
+    if USE_TRT or USE_CUDA:
         # Note: TensorRT-5.1.5.0/lib should be set in $LIBRARY_PATH of the build system.
         tools_library_dirs += ['/usr/local/cuda/lib64/']
 
@@ -269,6 +277,9 @@ def configure_extension_build():
     if USE_TRT:
         tools_depend += trt_depend
 
+    if USE_CUDA:
+        tools_depend += cuda_depend
+
     if IS_DARWIN:
         engine_link_args += ['-stdlib=libc++']
         engine_link_args += ['-Wl,-all_load']