From ba8f7bf756241be5b766d198a69184fb8095213e Mon Sep 17 00:00:00 2001 From: Stefano Sinigardi Date: Sun, 18 Apr 2021 15:11:36 +0200 Subject: [PATCH] improve build process even more (#7610) * test for shared libs * improve flag handling * build so lib in ci * clone vcpkg if not found * fix * improve clang handling * bump vcpkg.json version * gemm.c compatible with ARM * remove unnecessary vcpkg.json field * do not unnecessarily rebuild vcpkg in the CI build stage * use alexeyab nuget cache * enable self-removal of build folder * add interactivity in build.ps1 if not opt-out * spellcheck README * fix another leftover spelling error * also auto-update darknet if possible * do not self-update darknet in CI --- .github/workflows/ccpp.yml | 34 +++++++------- CMakeLists.txt | 38 +++++++++------- README.md | 55 ++++++++++------------ build.ps1 | 92 +++++++++++++++++++++++++++++++++---- src/gemm.c | 93 ++++++++++++++------------------------ src/http_stream.cpp | 5 +- src/httplib.h | 5 +- vcpkg.json | 22 +++++++-- 8 files changed, 204 insertions(+), 140 deletions(-) diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index 077b3cf99f8..d6c18fda3fd 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -111,10 +111,10 @@ jobs: ./vcpkg/bootstrap-vcpkg.sh; mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add - -source "https://nuget.pkg.github.com/cenit/index.json" + -source "https://nuget.pkg.github.com/AlexeyAB/index.json" -storepasswordincleartext -name "vcpkgbinarycache" - -username "cenit" + -username "AlexeyAB" -password "${{ secrets.GITHUB_TOKEN }}" - name: 'Build' @@ -124,7 +124,7 @@ jobs: CUDA_PATH: "/usr/local/cuda" CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" - run: ./build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -ForceStaticLib + run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET - uses: actions/upload-artifact@v2 with: @@ -163,7 +163,7 @@ jobs: CUDA_PATH: "/usr/local/cuda" CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" - run: ./build.ps1 -EnableOPENCV + run: ./build.ps1 -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET - uses: actions/upload-artifact@v2 with: @@ -222,7 +222,7 @@ jobs: CUDA_PATH: "/usr/local/cuda" CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" - run: ./build.ps1 -EnableOPENCV -EnableCUDA + run: ./build.ps1 -EnableOPENCV -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET - uses: actions/upload-artifact@v2 with: @@ -251,7 +251,7 @@ jobs: - name: 'Build' shell: pwsh - run: ./build.ps1 -ForceCPP + run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET osx-vcpkg: @@ -271,15 +271,15 @@ jobs: ./vcpkg/bootstrap-vcpkg.sh; mono $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add - -source "https://nuget.pkg.github.com/cenit/index.json" + -source "https://nuget.pkg.github.com/AlexeyAB/index.json" -storepasswordincleartext -name "vcpkgbinarycache" - -username "cenit" + -username "AlexeyAB" -password "${{ secrets.GITHUB_TOKEN }}" - name: 'Build' shell: pwsh - run: ./build.ps1 -UseVCPKG + run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -DisableInteractive -DoNotUpdateDARKNET - uses: actions/upload-artifact@v2 with: @@ -311,7 +311,7 @@ jobs: - name: 'Build' shell: pwsh - run: ./build.ps1 -EnableOPENCV + run: ./build.ps1 -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET - uses: actions/upload-artifact@v2 with: @@ -340,7 +340,7 @@ jobs: - name: 'Build' shell: pwsh - run: ./build.ps1 -ForceCPP + run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET win-vcpkg: @@ -357,15 +357,15 @@ jobs: ./vcpkg/bootstrap-vcpkg.sh; $(./vcpkg/vcpkg fetch nuget | tail -n 1) sources add - -source "https://nuget.pkg.github.com/cenit/index.json" + -source "https://nuget.pkg.github.com/AlexeyAB/index.json" -storepasswordincleartext -name "vcpkgbinarycache" - -username "cenit" + -username "AlexeyAB" -password "${{ secrets.GITHUB_TOKEN }}" - name: 'Build' shell: pwsh - run: ./build.ps1 -UseVCPKG -EnableOPENCV + run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET - uses: actions/upload-artifact@v2 with: @@ -398,7 +398,7 @@ jobs: - name: 'Build' shell: pwsh - run: ./build.ps1 + run: ./build.ps1 -DisableInteractive -DoNotUpdateDARKNET - uses: actions/upload-artifact@v2 with: @@ -431,7 +431,7 @@ jobs: - name: 'Build' shell: pwsh - run: ./build.ps1 -ForceCPP + run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET win-intlibs-cuda: @@ -454,7 +454,7 @@ jobs: CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2\\bin\\nvcc.exe" shell: pwsh - run: ./build.ps1 -EnableCUDA + run: ./build.ps1 -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET mingw: diff --git a/CMakeLists.txt b/CMakeLists.txt index 00f446fcccf..0029abe78ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,8 +19,9 @@ option(ENABLE_CUDNN "Enable CUDNN" ON) option(ENABLE_CUDNN_HALF "Enable CUDNN Half precision" ON) option(ENABLE_ZED_CAMERA "Enable ZED Camera support" ON) option(ENABLE_VCPKG_INTEGRATION "Enable VCPKG integration" ON) +option(VCPKG_BUILD_OPENCV_WITH_CUDA "Build OpenCV with CUDA extension integration" ON) -if(ENABLE_OPENCV_WITH_CUDA AND NOT APPLE) +if(VCPKG_BUILD_OPENCV_WITH_CUDA AND NOT APPLE) list(APPEND VCPKG_MANIFEST_FEATURES "opencv-cuda") endif() if(ENABLE_CUDA AND NOT APPLE) @@ -33,18 +34,6 @@ if(ENABLE_CUDNN AND ENABLE_CUDA AND NOT APPLE) list(APPEND VCPKG_MANIFEST_FEATURES "cudnn") endif() -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") - set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG TRUE) - if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") - set(CMAKE_COMPILER_IS_CLANG TRUE) - else() - set(CMAKE_COMPILER_IS_CLANG FALSE) - endif() -else() - set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG FALSE) - set(CMAKE_COMPILER_IS_CLANG FALSE) -endif() - if(NOT CMAKE_HOST_SYSTEM_PROCESSOR AND NOT WIN32) execute_process(COMMAND "uname" "-m" OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR OUTPUT_STRIP_TRAILING_WHITESPACE) endif() @@ -87,6 +76,18 @@ enable_language(CXX) set(CMAKE_CXX_STANDARD 11) set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/Modules/" ${CMAKE_MODULE_PATH}) +if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" MATCHES "Clang" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG TRUE) + if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "clang") + set(CMAKE_COMPILER_IS_CLANG TRUE) + else() + set(CMAKE_COMPILER_IS_CLANG FALSE) + endif() +else() + set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG FALSE) + set(CMAKE_COMPILER_IS_CLANG FALSE) +endif() + set(default_build_type "Release") if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) message(STATUS "Setting build type to '${default_build_type}' as none was specified.") @@ -201,12 +202,14 @@ endif() set(ADDITIONAL_CXX_FLAGS "-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -Wno-deprecated-declarations -Wno-write-strings") set(ADDITIONAL_C_FLAGS "-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -Wno-deprecated-declarations -Wno-write-strings") +if(UNIX AND BUILD_SHARED_LIBS AND NOT CMAKE_COMPILER_IS_CLANG) + set(SHAREDLIB_CXX_FLAGS "-Wl,-Bsymbolic") + set(SHAREDLIB_C_FLAGS "-Wl,-Bsymbolic") +endif() if(MSVC) set(ADDITIONAL_CXX_FLAGS "/wd4013 /wd4018 /wd4028 /wd4047 /wd4068 /wd4090 /wd4101 /wd4113 /wd4133 /wd4190 /wd4244 /wd4267 /wd4305 /wd4477 /wd4996 /wd4819 /fp:fast") set(ADDITIONAL_C_FLAGS "/wd4013 /wd4018 /wd4028 /wd4047 /wd4068 /wd4090 /wd4101 /wd4113 /wd4133 /wd4190 /wd4244 /wd4267 /wd4305 /wd4477 /wd4996 /wd4819 /fp:fast") - set(CMAKE_CXX_FLAGS "${ADDITIONAL_CXX_FLAGS} ${CMAKE_CXX_FLAGS}") - set(CMAKE_C_FLAGS "${ADDITIONAL_C_FLAGS} ${CMAKE_C_FLAGS}") string(REGEX REPLACE "/O2" "/Ox" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) string(REGEX REPLACE "/O2" "/Ox" CMAKE_C_FLAGS_RELEASE ${CMAKE_C_FLAGS_RELEASE}) endif() @@ -218,8 +221,6 @@ if(CMAKE_COMPILER_IS_GNUCC_OR_CLANG) set(CMAKE_C_FLAGS "-pthread ${CMAKE_C_FLAGS}") endif() endif() - set(CMAKE_CXX_FLAGS "${ADDITIONAL_CXX_FLAGS} ${CMAKE_CXX_FLAGS}") - set(CMAKE_C_FLAGS "${ADDITIONAL_C_FLAGS} ${CMAKE_C_FLAGS}") string(REGEX REPLACE "-O0" "-Og" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) string(REGEX REPLACE "-O3" "-Ofast" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) string(REGEX REPLACE "-O0" "-Og" CMAKE_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG}) @@ -230,6 +231,9 @@ if(CMAKE_COMPILER_IS_GNUCC_OR_CLANG) endif() endif() +set(CMAKE_CXX_FLAGS "${ADDITIONAL_CXX_FLAGS} ${SHAREDLIB_CXX_FLAGS} ${CMAKE_CXX_FLAGS}") +set(CMAKE_C_FLAGS "${ADDITIONAL_C_FLAGS} ${SHAREDLIB_C_FLAGS} ${CMAKE_C_FLAGS}") + if(OpenCV_FOUND) if(ENABLE_CUDA AND NOT OpenCV_CUDA_VERSION) set(BUILD_USELIB_TRACK "FALSE" CACHE BOOL "Build uselib_track" FORCE) diff --git a/README.md b/README.md index 2de8c7bf980..1e839f8a44a 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ About Darknet framework: http://pjreddie.com/darknet/ * [YOLOv4 model zoo](https://github.com/AlexeyAB/darknet/wiki/YOLOv4-model-zoo) -* [Requirements (and how to install dependecies)](#requirements) +* [Requirements (and how to install dependencies)](#requirements) * [Pre-trained models](#pre-trained-models) * [FAQ - frequently asked questions](https://github.com/AlexeyAB/darknet/wiki/FAQ---frequently-asked-questions) * [Explanations in issues](https://github.com/AlexeyAB/darknet/issues?q=is%3Aopen+is%3Aissue+label%3AExplanations) @@ -107,7 +107,7 @@ Others: https://www.youtube.com/user/pjreddie/videos #### How to evaluate AP of YOLOv4 on the MS COCO evaluation server 1. Download and unzip test-dev2017 dataset from MS COCO server: http://images.cocodataset.org/zips/test2017.zip -2. Download list of images for Detection taks and replace the paths with yours: https://raw.githubusercontent.com/AlexeyAB/darknet/master/scripts/testdev2017.txt +2. Download list of images for Detection tasks and replace the paths with yours: https://raw.githubusercontent.com/AlexeyAB/darknet/master/scripts/testdev2017.txt 3. Download `yolov4.weights` file 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) ) 4. Content of the file `cfg/coco.data` should be @@ -202,13 +202,13 @@ You can get cfg-files by path: `darknet/cfg/` * **Powershell** (already installed on windows): https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell * **CUDA >= 10.2**: https://developer.nvidia.com/cuda-toolkit-archive (on Linux do [Post-installation Actions](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions)) * **OpenCV >= 2.4**: use your preferred package manager (brew, apt), build from source using [vcpkg](https://github.com/Microsoft/vcpkg) or download from [OpenCV official site](https://opencv.org/releases.html) (on Windows set system variable `OpenCV_DIR` = `C:\opencv\build` - where are the `include` and `x64` folders [image](https://user-images.githubusercontent.com/4096485/53249516-5130f480-36c9-11e9-8238-a6e82e48c6f2.png)) -* **cuDNN >= 8.0.2** https://developer.nvidia.com/rdp/cudnn-archive (on **Linux** copy `cudnn.h`,`libcudnn.so`... as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installlinux-tar , on **Windows** copy `cudnn.h`,`cudnn64_7.dll`, `cudnn64_7.lib` as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installwindows ) +* **cuDNN >= 8.0.2** https://developer.nvidia.com/rdp/cudnn-archive (on **Linux** copy `cudnn.h`,`libcudnn.so`... as described here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installlinux-tar , on **Windows** copy `cudnn.h`,`cudnn64_7.dll`, `cudnn64_7.lib` as described here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installwindows ) * **GPU with CC >= 3.0**: https://en.wikipedia.org/wiki/CUDA#GPUs_supported ### Yolo v4 in other frameworks * **Pytorch - Scaled-YOLOv4:** https://github.com/WongKinYiu/ScaledYOLOv4 -* **TensorFlow:** `pip install yolov4` YOLOv4 on TensorFlow 2.0 / TFlite / Andriod: https://github.com/hunglc007/tensorflow-yolov4-tflite +* **TensorFlow:** `pip install yolov4` YOLOv4 on TensorFlow 2.0 / TFlite / Android: https://github.com/hunglc007/tensorflow-yolov4-tflite Official TF models: https://github.com/tensorflow/models/tree/master/official/vision/beta/projects/yolo For YOLOv4 - convert `yolov4.weights`/`cfg` files to `yolov4.pb` by using [TNTWEN](https://github.com/TNTWEN/OpenVINO-YOLOV4) project, and to `yolov4.tflite` [TensorFlow-lite](https://www.tensorflow.org/lite/guide/get_started#2_convert_the_model_format) * **OpenCV-dnn** the fastest implementation of YOLOv4 for CPU (x86/ARM-Android), OpenCV can be compiled with [OpenVINO-backend](https://github.com/opencv/opencv/wiki/Intel's-Deep-Learning-Inference-Engine-backend) for running on (Myriad X / USB Neural Compute Stick / Arria FPGA), use `yolov4.weights`/`cfg` with: [C++ example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.cpp#L192-L221) or [Python example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.py#L129-L150) @@ -226,7 +226,7 @@ You can get cfg-files by path: `darknet/cfg/` * **Triton Inference Server / TensorRT** https://github.com/isarsoft/yolov4-triton-tensorrt * **Xilinx Zynq Ultrascale+ Deep Learning Processor (DPU) ZCU102/ZCU104:** https://github.com/Xilinx/Vitis-In-Depth-Tutorial/tree/master/Machine_Learning/Design_Tutorials/07-yolov4-tutorial * **Amazon Neurochip / Amazon EC2 Inf1 instances** 1.85 times higher throughput and 37% lower cost per image for TensorFlow based YOLOv4 model, using Keras [URL](https://aws.amazon.com/ru/blogs/machine-learning/improving-performance-for-deep-learning-based-object-detection-with-an-aws-neuron-compiled-yolov4-model-on-aws-inferentia/) -* **TVM** - compilation of deep learning models (Keras, MXNet, PyTorch, Tensorflow, CoreML, DarkNet) into minimum deployable modules on diverse hardware backends (CPUs, GPUs, FPGA, and specialized accelerators): https://tvm.ai/about +* **TVM** - compilation of deep learning models (Keras, MXNet, PyTorch, Tensorflow, CoreML, DarkNet) into minimum deployable modules on diverse hardware backend (CPUs, GPUs, FPGA, and specialized accelerators): https://tvm.ai/about * **OpenDataCam** - It detects, tracks and counts moving objects by using YOLOv4: https://github.com/opendatacam/opendatacam#-hardware-pre-requisite * **Netron** - Visualizer for neural networks: https://github.com/lutzroeder/netron @@ -287,12 +287,12 @@ On Linux find executable file `./darknet` in the root directory, while on Window * Train on **Amazon EC2**, to see mAP & Loss-chart using URL like: `http://ec2-35-160-228-91.us-west-2.compute.amazonaws.com:8090` in the Chrome/Firefox (**Darknet should be compiled with OpenCV**): `./darknet detector train cfg/coco.data yolov4.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map` * 186 MB Yolo9000 - image: `darknet.exe detector test cfg/combine9k.data cfg/yolo9000.cfg yolo9000.weights` -* Remeber to put data/9k.tree and data/coco9k.map under the same folder of your app if you use the cpp api to build an app +* Remember to put data/9k.tree and data/coco9k.map under the same folder of your app if you use the cpp api to build an app * To process a list of images `data/train.txt` and save results of detection to `result.json` file use: `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output -dont_show -out result.json < data/train.txt` * To process a list of images `data/train.txt` and save results of detection to `result.txt` use: `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -dont_show -ext_output < data/train.txt > result.txt` -* Pseudo-lableing - to process a list of images `data/new_train.txt` and save results of detection in Yolo training format for each image as label `.txt` (in this way you can increase the amount of training data) use: +* Pseudo-labelling - to process a list of images `data/new_train.txt` and save results of detection in Yolo training format for each image as label `.txt` (in this way you can increase the amount of training data) use: `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25 -dont_show -save_labels < data/new_train.txt` * To calculate anchors: `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416` * To check accuracy mAP@IoU=50: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights` @@ -324,11 +324,6 @@ To update CMake on Ubuntu, it's better to follow guide here: https://apt.kitware Open a shell and type these commands ```PowerShell -PS Code/> git clone https://github.com/microsoft/vcpkg -PS Code/> cd vcpkg -PS Code/vcpkg> ./bootstrap-vcpkg.sh -PS Code/vcpkg> $env:VCPKG_ROOT=$PWD -PS Code/vcpkg> cd .. PS Code/> git clone https://github.com/AlexeyAB/darknet PS Code/> cd darknet PS Code/darknet> ./build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN @@ -359,9 +354,9 @@ Before make, you can set such options in the `Makefile`: [link](https://github.c * `CUDNN=1` to build with cuDNN v5-v7 to accelerate training by using GPU (cuDNN should be in `/usr/local/cudnn`) * `CUDNN_HALF=1` to build for Tensor Cores (on Titan V / Tesla V100 / DGX-2 and later) speedup Detection 3x, Training 2x * `OPENCV=1` to build with OpenCV 4.x/3.x/2.4.x - allows to detect on video files and video streams from network cameras or web-cams -* `DEBUG=1` to bould debug version of Yolo +* `DEBUG=1` to build debug version of Yolo * `OPENMP=1` to build with OpenMP support to accelerate Yolo by using multi-core CPU -* `LIBSO=1` to build a library `darknet.so` and binary runable file `uselib` that uses this library. Or you can try to run so `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib test.mp4` How to use this SO-library from your own code - you can look at C++ example: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp +* `LIBSO=1` to build a library `darknet.so` and binary runnable file `uselib` that uses this library. Or you can try to run so `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib test.mp4` How to use this SO-library from your own code - you can look at C++ example: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp or use in such a way: `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights test.mp4` * `ZED_CAMERA=1` to build a library with ZED-3D-camera support (should be ZED SDK installed), then run `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights zed_camera` @@ -371,16 +366,17 @@ To run Darknet on Linux use examples from this article, just use `./darknet` ins ### How to compile on Windows (using `CMake`) -Requires: -* MSVS: https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community +Requires: + +* MSVC: https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community * CMake GUI: `Windows win64-x64 Installer`https://cmake.org/download/ * Download Darknet zip-archive with the latest commit and uncompress it: [master.zip](https://github.com/AlexeyAB/darknet/archive/master.zip) -In Windows: +In Windows: -* Start (button) -> All programms -> CMake -> CMake (gui) -> +* Start (button) -> All programs -> CMake -> CMake (gui) -> -* [look at image](https://habrastorage.org/webt/pz/s1/uu/pzs1uu4heb7vflfcjqn-lxy-aqu.jpeg) In CMake: Enter input path to the darknet Source, and output path to the Binaries -> Configure (button) -> Optional platform for generator: `x64` -> Finish -> Generate -> Open Project -> +* [look at image](https://habrastorage.org/webt/pz/s1/uu/pzs1uu4heb7vflfcjqn-lxy-aqu.jpeg) In CMake: Enter input path to the darknet Source, and output path to the Binaries -> Configure (button) -> Optional platform for generator: `x64` -> Finish -> Generate -> Open Project -> * in MS Visual Studio: Select: x64 and Release -> Build -> Build solution @@ -400,11 +396,6 @@ This is the recommended approach to build Darknet on Windows. 3. Open Powershell (Start -> All programs -> Windows Powershell) and type these commands: ```PowerShell -PS Code/> git clone https://github.com/microsoft/vcpkg -PS Code/> cd vcpkg -PS Code/vcpkg> .\bootstrap-vcpkg.bat -PS Code/vcpkg> $env:VCPKG_ROOT=$PWD -PS Code/vcpkg> cd .. PS Code/> git clone https://github.com/AlexeyAB/darknet PS Code/> cd darknet PS Code/darknet> .\build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN @@ -490,7 +481,7 @@ It will create `.txt`-file for each `.jpg`-image-file - in the same directory an * `` - integer object number from `0` to `(classes-1)` * ` ` - float values **relative** to width and height of image, it can be equal from `(0.0 to 1.0]` * for example: ` = / ` or ` = / ` - * atention: ` ` - are center of rectangle (are not top-left corner) + * attention: ` ` - are center of rectangle (are not top-left corner) For example for `img1.jpg` you will be created `img1.txt` containing: @@ -570,15 +561,15 @@ Usually sufficient 2000 iterations for each class(object), but not less than num * **9002** - iteration number (number of batch) * **0.60730 avg** - average loss (error) - **the lower, the better** - When you see that average loss **0.xxxxxx avg** no longer decreases at many iterations then you should stop training. The final avgerage loss can be from `0.05` (for a small model and easy dataset) to `3.0` (for a big model and a difficult dataset). + When you see that average loss **0.xxxxxx avg** no longer decreases at many iterations then you should stop training. The final average loss can be from `0.05` (for a small model and easy dataset) to `3.0` (for a big model and a difficult dataset). Or if you train with flag `-map` then you will see mAP indicator `Last accuracy mAP@0.5 = 18.50%` in the console - this indicator is better than Loss, so train while mAP increases. 2. Once training is stopped, you should take some of last `.weights`-files from `darknet\build\darknet\x64\backup` and choose the best of them: -For example, you stopped training after 9000 iterations, but the best result can give one of previous weights (7000, 8000, 9000). It can happen due to overfitting. **Overfitting** - is case when you can detect objects on images from training-dataset, but can't detect objects on any others images. You should get weights from **Early Stopping Point**: +For example, you stopped training after 9000 iterations, but the best result can give one of previous weights (7000, 8000, 9000). It can happen due to over-fitting. **Over-fitting** - is case when you can detect objects on images from training-dataset, but can't detect objects on any others images. You should get weights from **Early Stopping Point**: -![Overfitting](https://hsto.org/files/5dc/7ae/7fa/5dc7ae7fad9d4e3eb3a484c58bfc1ff5.png) +![Over-fitting](https://hsto.org/files/5dc/7ae/7fa/5dc7ae7fad9d4e3eb3a484c58bfc1ff5.png) To get weights from Early Stopping Point: @@ -592,7 +583,7 @@ To get weights from Early Stopping Point: * `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_8000.weights` * `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_9000.weights` -And comapre last output lines for each weights (7000, 8000, 9000): +And compare last output lines for each weights (7000, 8000, 9000): Choose weights-file **with the highest mAP (mean average precision)** or IoU (intersect over union) @@ -610,7 +601,7 @@ So you will see mAP-chart (red-line) in the Loss-chart Window. mAP will be calcu Example of custom object detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights` -* **IoU** (intersect over union) - average instersect over union of objects and detections for a certain threshold = 0.24 +* **IoU** (intersect over union) - average intersect over union of objects and detections for a certain threshold = 0.24 * **mAP** (mean average precision) - mean value of `average precisions` for each class, where `average precision` is average value of 11 points on PR-curve for each possible threshold (each probability of detection) for the same class (Precision-Recall in terms of PascalVOC, where Precision=TP/(TP+FP) and Recall=TP/(TP+FN) ), page-11: http://homepages.inf.ed.ac.uk/ckiw/postscript/ijcv_voc09.pdf @@ -639,7 +630,7 @@ Example of custom object detection: `darknet.exe detector test data/obj.data yol * my Loss is very high and mAP is very low, is training wrong? Run training with ` -show_imgs` flag at the end of training command, do you see correct bounded boxes of objects (in windows or in files `aug_...jpg`)? If no - your training dataset is wrong. -* for each object which you want to detect - there must be at least 1 similar object in the Training dataset with about the same: shape, side of object, relative size, angle of rotation, tilt, illumination. So desirable that your training dataset include images with objects at diffrent: scales, rotations, lightings, from different sides, on different backgrounds - you should preferably have 2000 different images for each class or more, and you should train `2000*classes` iterations or more +* for each object which you want to detect - there must be at least 1 similar object in the Training dataset with about the same: shape, side of object, relative size, angle of rotation, tilt, illumination. So desirable that your training dataset include images with objects at different: scales, rotations, lightings, from different sides, on different backgrounds - you should preferably have 2000 different images for each class or more, and you should train `2000*classes` iterations or more * desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files) - use as many images of negative samples as there are images with objects @@ -670,7 +661,7 @@ Example of custom object detection: `darknet.exe detector test data/obj.data yol * to speedup training (with decreasing detection accuracy) set param `stopbackward=1` for layer-136 in cfg-file -* each: `model of object, side, illimination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used. +* each: `model of object, side, illumination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used. * to make the detected bounded boxes more accurate, you can add 3 parameters `ignore_thresh = .9 iou_normalizer=0.5 iou_loss=giou` to each `[yolo]` layer and train, it will increase mAP@0.9, but decrease mAP@0.5. diff --git a/build.ps1 b/build.ps1 index 7a38d0d250d..e147fb878ef 100755 --- a/build.ps1 +++ b/build.ps1 @@ -1,11 +1,15 @@ #!/usr/bin/env pwsh param ( + [switch]$DisableInteractive = $false, [switch]$EnableCUDA = $false, [switch]$EnableCUDNN = $false, [switch]$EnableOPENCV = $false, [switch]$EnableOPENCV_CUDA = $false, [switch]$UseVCPKG = $false, + [switch]$DoNotUpdateVCPKG = $false, + [switch]$DoNotUpdateDARKNET = $false, + [switch]$DoNotDeleteBuildFolder = $false, [switch]$DoNotSetupVS = $false, [switch]$DoNotUseNinja = $false, [switch]$ForceCPP = $false, @@ -13,9 +17,45 @@ param ( [switch]$ForceGCC8 = $false ) +if (-Not $DisableInteractive -and -Not $UseVCPKG) { + $Result = Read-Host "Enable vcpkg to install darknet dependencies (yes/no)" + if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') { + $UseVCPKG = $true + } +} + +if (-Not $DisableInteractive -and -Not $EnableCUDA -and -Not $IsMacOS) { + $Result = Read-Host "Enable CUDA integration (yes/no)" + if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') { + $EnableCUDA = $true + } +} + +if ($EnableCUDA -and -Not $DisableInteractive -and -Not $EnableCUDNN) { + $Result = Read-Host "Enable CUDNN optional dependency (yes/no)" + if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') { + $EnableCUDNN = $true + } +} + +if (-Not $DisableInteractive -and -Not $EnableOPENCV) { + $Result = Read-Host "Enable OpenCV optional dependency (yes/no)" + if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') { + $EnableOPENCV = $true + } +} + $number_of_build_workers = 8 #$additional_build_setup = " -DCMAKE_CUDA_ARCHITECTURES=30" +if ($IsLinux -or $IsMacOS) { + $bootstrap_ext = ".sh" +} +elseif ($IsWindows) { + $bootstrap_ext = ".bat" +} +Write-Host "Native shell script extension: ${bootstrap_ext}" + if (-Not $IsWindows) { $DoNotSetupVS = $true } @@ -36,7 +76,7 @@ if ($IsWindows -and -Not $env:VCPKG_DEFAULT_TRIPLET) { } if ($EnableCUDA) { - if($IsMacOS) { + if ($IsMacOS) { Write-Host "Cannot enable CUDA on macOS" -ForegroundColor Yellow $EnableCUDA = $false } @@ -82,6 +122,12 @@ elseif ($EnableOPENCV_CUDA -and -not $EnableCUDA -and -not $EnableOPENCV) { if ($UseVCPKG) { Write-Host "VCPKG is enabled" + if ($DoNotUpdateVCPKG) { + Write-Host "VCPKG will not be updated to latest version if found" -ForegroundColor Yellow + } + else { + Write-Host "VCPKG will be updated to latest version if found" + } } else { Write-Host "VCPKG is disabled, please pass -UseVCPKG to the script to enable" @@ -110,6 +156,18 @@ else { Push-Location $PSScriptRoot +$GIT_EXE = Get-Command git 2> $null | Select-Object -ExpandProperty Definition +if (-Not $GIT_EXE) { + throw "Could not find git, please install it" +} +else { + Write-Host "Using git from ${GIT_EXE}" +} + +if ((Test-Path "$PSScriptRoot/.git") -and -not $DoNotUpdateDARKNET) { + & $GIT_EXE pull +} + $CMAKE_EXE = Get-Command cmake 2> $null | Select-Object -ExpandProperty Definition if (-Not $CMAKE_EXE) { throw "Could not find CMake, please install it" @@ -216,7 +274,10 @@ elseif ((Test-Path "${RUNVCPKG_VCPKG_ROOT_OUT}") -and $UseVCPKG) { Write-Host "Found vcpkg in RUNVCPKG_VCPKG_ROOT_OUT: ${RUNVCPKG_VCPKG_ROOT_OUT}" $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON" } -elseif ((Test-Path "$PWD/vcpkg") -and $UseVCPKG) { +elseif ($UseVCPKG) { + if (-Not (Test-Path "$PWD/vcpkg")) { + & $GIT_EXE clone https://github.com/microsoft/vcpkg + } $vcpkg_path = "$PWD/vcpkg" $env:VCPKG_ROOT = "$PWD/vcpkg" Write-Host "Found vcpkg in $PWD/vcpkg: $PWD/vcpkg" @@ -227,6 +288,13 @@ else { $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=OFF" } +if ($UseVCPKG -and (Test-Path "$vcpkg_path/.git") -and -not $DoNotUpdateVCPKG) { + Push-Location $vcpkg_path + & $GIT_EXE pull + & $PWD/bootstrap-vcpkg${bootstrap_ext} -disableMetrics + Pop-Location +} + if (-Not $DoNotSetupVS) { if ($null -eq (Get-Command "cl.exe" -ErrorAction SilentlyContinue)) { $vsfound = getLatestVisualStudioWithDesktopWorkloadPath @@ -239,7 +307,7 @@ if (-Not $DoNotSetupVS) { } } Pop-Location - Write-Host "Visual Studio Command Prompt variables set" -ForegroundColor Yellow + Write-Host "Visual Studio Command Prompt variables set" } $tokens = getLatestVisualStudioWithDesktopWorkloadVersion @@ -270,13 +338,13 @@ if (-Not $DoNotSetupVS) { if ($DoNotSetupVS -and $DoNotUseNinja) { $generator = "Unix Makefiles" } -Write-Host "Setting up environment to use CMake generator: $generator" -ForegroundColor Yellow +Write-Host "Setting up environment to use CMake generator: $generator" if (-Not $IsMacOS -and $EnableCUDA) { if ($null -eq (Get-Command "nvcc" -ErrorAction SilentlyContinue)) { if (Test-Path env:CUDA_PATH) { $env:PATH += ";${env:CUDA_PATH}/bin" - Write-Host "Found cuda in ${env:CUDA_PATH}" -ForegroundColor Yellow + Write-Host "Found cuda in ${env:CUDA_PATH}" } else { Write-Host "Unable to find CUDA, if necessary please install it or define a CUDA_PATH env variable pointing to the install folder" -ForegroundColor Yellow @@ -311,12 +379,18 @@ if (-Not($EnableOPENCV)) { $additional_build_setup = $additional_build_setup + " -DENABLE_OPENCV:BOOL=OFF" } -if ($EnableOPENCV_CUDA) { - $additional_build_setup = $additional_build_setup + " -DENABLE_OPENCV_WITH_CUDA:BOOL=ON" +if (-Not($EnableOPENCV_CUDA)) { + $additional_build_setup = $additional_build_setup + " -DVCPKG_BUILD_OPENCV_WITH_CUDA:BOOL=OFF" +} + +$build_folder = "./build_release" +if (-Not $DoNotDeleteBuildFolder) { + Write-Host "Removing folder $build_folder" -ForegroundColor Yellow + Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $build_folder } -New-Item -Path ./build_release -ItemType directory -Force -Set-Location build_release +New-Item -Path $build_folder -ItemType directory -Force +Set-Location $build_folder $cmake_args = "-G `"$generator`" ${additional_build_setup} -S .." Write-Host "CMake args: $cmake_args" Start-Process -NoNewWindow -Wait -FilePath $CMAKE_EXE -ArgumentList $cmake_args diff --git a/src/gemm.c b/src/gemm.c index 519751c0622..84a7e9a6815 100644 --- a/src/gemm.c +++ b/src/gemm.c @@ -8,13 +8,29 @@ #include #include #include -#ifdef _WIN32 -#include -#endif #if defined(_OPENMP) #include #endif +#if defined(_MSC_VER) +#if defined(_M_ARM) || defined(_M_ARM64) +static inline uint32_t popcnt(uint32_t v) { + v = v - ((v >> 1) & 0x55555555); + v = (v & 0x33333333) + ((v >> 2) & 0x33333333); + return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; +} +#define POPCNT(x) popcnt((x)) +#define POPCNT64(x) (popcnt((unsigned)(x)) + popcnt((unsigned)((uint64_t)(x) >> 32))) +#else +#include +#define POPCNT(x) __popcnt(x) +#define POPCNT64(x) __popcnt64(x) +#endif +#elif defined(__GNUC__) +#define POPCNT(x) __builtin_popcount(x) +#define POPCNT64(x) __builtin_popcountll(x) +#endif + #define TILE_M 4 // 4 ops #define TILE_N 16 // AVX2 = 2 ops * 8 floats #define TILE_K 16 // loop @@ -230,7 +246,7 @@ void gemm_nn_custom_bin_mean(int M, int N, int K, float ALPHA_UNUSED, uint64_t b_bit64 = *((uint64_t *)(B + (k_ldb + j) / 8)); uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64); //printf("\n %d \n",__builtin_popcountll(c_bit64)); // gcc - printf("\n %d \n", __popcnt64(c_bit64)); // msvs + printf("\n %d \n", POPCNT64(c_bit64)); // msvs int h; for (h = 0; h < 64; ++h) @@ -298,11 +314,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED, uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8)); uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64); -#ifdef WIN32 - int tmp_count = __popcnt64(c_bit64); -#else - int tmp_count = __builtin_popcountll(c_bit64); -#endif + int tmp_count = POPCNT64(c_bit64); if (K - k < 64) tmp_count = tmp_count - (64 - (K - k)); // remove extra bits count += tmp_count; @@ -503,16 +515,6 @@ void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m, } } -static inline int popcnt_32(uint32_t val32) { -#ifdef WIN32 // Windows MSVS - int tmp_count = __popcnt(val32); -#else // Linux GCC - int tmp_count = __builtin_popcount(val32); -#endif - return tmp_count; -} -//---------------------------- - #if (defined(__AVX__) && defined(__x86_64__)) || (defined(_WIN64) && !defined(__MINGW32__)) #if (defined(_WIN64) && !defined(__MINGW64__)) @@ -925,14 +927,14 @@ void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA, // waiting for - CPUID Flags: AVX512VPOPCNTDQ: __m512i _mm512_popcnt_epi32(__m512i a) __m256 count = _mm256_setr_ps( - popcnt_32(_mm256_extract_epi32(xnor256, 0)), - popcnt_32(_mm256_extract_epi32(xnor256, 1)), - popcnt_32(_mm256_extract_epi32(xnor256, 2)), - popcnt_32(_mm256_extract_epi32(xnor256, 3)), - popcnt_32(_mm256_extract_epi32(xnor256, 4)), - popcnt_32(_mm256_extract_epi32(xnor256, 5)), - popcnt_32(_mm256_extract_epi32(xnor256, 6)), - popcnt_32(_mm256_extract_epi32(xnor256, 7))); + POPCNT(_mm256_extract_epi32(xnor256, 0)), + POPCNT(_mm256_extract_epi32(xnor256, 1)), + POPCNT(_mm256_extract_epi32(xnor256, 2)), + POPCNT(_mm256_extract_epi32(xnor256, 3)), + POPCNT(_mm256_extract_epi32(xnor256, 4)), + POPCNT(_mm256_extract_epi32(xnor256, 5)), + POPCNT(_mm256_extract_epi32(xnor256, 6)), + POPCNT(_mm256_extract_epi32(xnor256, 7))); __m256 val2 = _mm256_set1_ps(2); count = _mm256_mul_ps(count, val2); // count * 2 @@ -952,7 +954,7 @@ void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA, { PUT_IN_REGISTER uint32_t B_PART = B[s*ldb + j]; uint32_t xnor_result = ~(A_PART ^ B_PART); - int32_t count = popcnt_32(xnor_result); // must be Signed int + int32_t count = POPCNT(xnor_result); // must be Signed int C[i*ldc + j] += (2 * count - 32) * mean_val; } @@ -1140,13 +1142,7 @@ void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride, static inline int popcnt128(__m128i n) { const __m128i n_hi = _mm_unpackhi_epi64(n, n); -#if defined(_MSC_VER) - return __popcnt64(_mm_cvtsi128_si64(n)) + __popcnt64(_mm_cvtsi128_si64(n_hi)); -#elif defined(__APPLE__) && defined(__clang__) - return _mm_popcnt_u64(_mm_cvtsi128_si64(n)) + _mm_popcnt_u64(_mm_cvtsi128_si64(n_hi)); -#else - return __popcntq(_mm_cvtsi128_si64(n)) + __popcntq(_mm_cvtsi128_si64(n_hi)); -#endif + return POPCNT64(_mm_cvtsi128_si64(n)) + POPCNT64(_mm_cvtsi128_si64(n_hi)); } static inline int popcnt256(__m256i n) { @@ -2021,7 +2017,7 @@ void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA, PUT_IN_REGISTER uint32_t B_PART = B[s * ldb + j]; uint32_t xnor_result = ~(A_PART ^ B_PART); //printf(" xnor_result = %d, ", xnor_result); - int32_t count = popcnt_32(xnor_result); // must be Signed int + int32_t count = POPCNT(xnor_result); // must be Signed int C[i*ldc + j] += (2 * count - 32) * mean_val; //c[i*n + j] += count*mean; @@ -2079,25 +2075,6 @@ void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride, } } -static inline int popcnt_64(uint64_t val64) { -#ifdef WIN32 // Windows -#ifdef _WIN64 // Windows 64-bit - int tmp_count = __popcnt64(val64); -#else // Windows 32-bit - int tmp_count = __popcnt(val64); - tmp_count += __popcnt(val64 >> 32); -#endif -#else // Linux -#if defined(__x86_64__) || defined(__aarch64__) // Linux 64-bit - int tmp_count = __builtin_popcountll(val64); -#else // Linux 32-bit - int tmp_count = __builtin_popcount(val64); - tmp_count += __builtin_popcount(val64 >> 32); -#endif -#endif - return tmp_count; -} - void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED, unsigned char *A, int lda, unsigned char *B, int ldb, @@ -2118,7 +2095,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED, uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8)); uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64); - int tmp_count = popcnt_64(c_bit64); + int tmp_count = POPCNT64(c_bit64); if (K - k < 64) tmp_count = tmp_count - (64 - (K - k)); // remove extra bits count += tmp_count; @@ -2518,7 +2495,7 @@ void gemm_nn_bin_transposed_32bit_packed(int M, int N, int K, float ALPHA, PUT_IN_REGISTER uint32_t A_PART = ((uint32_t*)A)[i*lda + s]; PUT_IN_REGISTER uint32_t B_PART = ((uint32_t*)B)[j * ldb + s]; uint32_t xnor_result = ~(A_PART ^ B_PART); - int32_t count = popcnt_32(xnor_result); // must be Signed int + int32_t count = POPCNT(xnor_result); // must be Signed int val += (2 * count - 32) * mean_val; } @@ -2581,7 +2558,7 @@ void convolution_repacked(uint32_t *packed_input, uint32_t *packed_weights, floa uint32_t weight = ((uint32_t *)packed_weights)[fil*new_lda / 32 + chan*size*size + f_y*size + f_x]; uint32_t xnor_result = ~(input ^ weight); - int32_t count = popcnt_32(xnor_result); // mandatory Signed int + int32_t count = POPCNT(xnor_result); // mandatory Signed int sum += (2 * count - 32) * mean_val; } } diff --git a/src/http_stream.cpp b/src/http_stream.cpp index 3ec7e851593..b17edfb5d36 100644 --- a/src/http_stream.cpp +++ b/src/http_stream.cpp @@ -70,8 +70,12 @@ static int close_socket(SOCKET s) { #define SOCKADDR struct sockaddr #define SOCKADDR_IN struct sockaddr_in #define ADDRPOINTER unsigned int* +#ifndef INVALID_SOCKET #define INVALID_SOCKET -1 +#endif +#ifndef SOCKET_ERROR #define SOCKET_ERROR -1 +#endif struct _IGNORE_PIPE_SIGNAL { struct sigaction new_actn, old_actn; @@ -934,4 +938,3 @@ void set_track_id(detection *new_dets, int new_dets_num, float thresh, float sim } } } - diff --git a/src/httplib.h b/src/httplib.h index 41fbfb19419..e4678faeeb2 100644 --- a/src/httplib.h +++ b/src/httplib.h @@ -126,7 +126,10 @@ using socket_t = SOCKET; #include using socket_t = int; -#define INVALID_SOCKET (-1) + +#ifndef INVALID_SOCKET +#define INVALID_SOCKET -1 +#endif #endif //_WIN32 #include diff --git a/vcpkg.json b/vcpkg.json index a7f66f260fc..c57fe9175bf 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,12 +1,11 @@ { "name": "darknet", - "version-string": "0.2.5.4", - "port-version": 2, - "homepage": "https://github.com/alexeyab/darknet", + "version-date": "2021-04-16", "description": "Darknet is an open source neural network framework written in C and CUDA. You only look once (YOLO) is a state-of-the-art, real-time object detection system, best example of darknet functionalities.", + "homepage": "https://github.com/alexeyab/darknet", "dependencies": [ - "stb", - "pthreads" + "pthreads", + "stb" ], "features": { "cuda": { @@ -22,6 +21,19 @@ "cudnn" ] }, + "full": { + "description": "Build darknet fully featured", + "dependencies": [ + { + "name": "darknet", + "features": [ + "cuda", + "cudnn", + "opencv-cuda" + ] + } + ] + }, "opencv-base": { "description": "Build darknet with support for latest version of OpenCV", "dependencies": [