From ba8f7bf756241be5b766d198a69184fb8095213e Mon Sep 17 00:00:00 2001
From: Stefano Sinigardi <stesinigardi@hotmail.com>
Date: Sun, 18 Apr 2021 15:11:36 +0200
Subject: [PATCH] improve build process even more (#7610)

* test for shared libs

* improve flag handling

* build so lib in ci

* clone vcpkg if not found

* fix

* improve clang handling

* bump vcpkg.json version

* gemm.c compatible with ARM

* remove unnecessary vcpkg.json field

* do not unnecessarily rebuild vcpkg in the CI build stage

* use alexeyab nuget cache

* enable self-removal of build folder

* add interactivity in build.ps1 if not opt-out

* spellcheck README

* fix another leftover spelling error

* also auto-update darknet if possible

* do not self-update darknet in CI
---
 .github/workflows/ccpp.yml | 34 +++++++-------
 CMakeLists.txt             | 38 +++++++++-------
 README.md                  | 55 ++++++++++------------
 build.ps1                  | 92 +++++++++++++++++++++++++++++++++----
 src/gemm.c                 | 93 ++++++++++++++------------------------
 src/http_stream.cpp        |  5 +-
 src/httplib.h              |  5 +-
 vcpkg.json                 | 22 +++++++--
 8 files changed, 204 insertions(+), 140 deletions(-)

diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml
index 077b3cf99f8..d6c18fda3fd 100644
--- a/.github/workflows/ccpp.yml
+++ b/.github/workflows/ccpp.yml
@@ -111,10 +111,10 @@ jobs:
         ./vcpkg/bootstrap-vcpkg.sh;
         mono $(./vcpkg/vcpkg fetch nuget | tail -n 1)
         sources add
-        -source "https://nuget.pkg.github.com/cenit/index.json"
+        -source "https://nuget.pkg.github.com/AlexeyAB/index.json"
         -storepasswordincleartext
         -name "vcpkgbinarycache"
-        -username "cenit"
+        -username "AlexeyAB"
         -password "${{ secrets.GITHUB_TOKEN }}"
 
     - name: 'Build'
@@ -124,7 +124,7 @@ jobs:
         CUDA_PATH: "/usr/local/cuda"
         CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
         LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
-      run: ./build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -ForceStaticLib
+      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -163,7 +163,7 @@ jobs:
         CUDA_PATH: "/usr/local/cuda"
         CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
         LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
-      run: ./build.ps1 -EnableOPENCV
+      run: ./build.ps1 -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -222,7 +222,7 @@ jobs:
         CUDA_PATH: "/usr/local/cuda"
         CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
         LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH"
-      run: ./build.ps1 -EnableOPENCV -EnableCUDA
+      run: ./build.ps1 -EnableOPENCV -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -251,7 +251,7 @@ jobs:
 
     - name: 'Build'
       shell: pwsh
-      run: ./build.ps1 -ForceCPP
+      run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET
 
 
   osx-vcpkg:
@@ -271,15 +271,15 @@ jobs:
         ./vcpkg/bootstrap-vcpkg.sh;
         mono $(./vcpkg/vcpkg fetch nuget | tail -n 1)
         sources add
-        -source "https://nuget.pkg.github.com/cenit/index.json"
+        -source "https://nuget.pkg.github.com/AlexeyAB/index.json"
         -storepasswordincleartext
         -name "vcpkgbinarycache"
-        -username "cenit"
+        -username "AlexeyAB"
         -password "${{ secrets.GITHUB_TOKEN }}"
 
     - name: 'Build'
       shell: pwsh
-      run: ./build.ps1 -UseVCPKG
+      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -311,7 +311,7 @@ jobs:
 
     - name: 'Build'
       shell: pwsh
-      run: ./build.ps1 -EnableOPENCV
+      run: ./build.ps1 -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -340,7 +340,7 @@ jobs:
 
     - name: 'Build'
       shell: pwsh
-      run: ./build.ps1 -ForceCPP
+      run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET
 
 
   win-vcpkg:
@@ -357,15 +357,15 @@ jobs:
         ./vcpkg/bootstrap-vcpkg.sh;
         $(./vcpkg/vcpkg fetch nuget | tail -n 1)
         sources add
-        -source "https://nuget.pkg.github.com/cenit/index.json"
+        -source "https://nuget.pkg.github.com/AlexeyAB/index.json"
         -storepasswordincleartext
         -name "vcpkgbinarycache"
-        -username "cenit"
+        -username "AlexeyAB"
         -password "${{ secrets.GITHUB_TOKEN }}"
 
     - name: 'Build'
       shell: pwsh
-      run: ./build.ps1 -UseVCPKG -EnableOPENCV
+      run: ./build.ps1 -UseVCPKG -DoNotUpdateVCPKG -EnableOPENCV -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -398,7 +398,7 @@ jobs:
 
     - name: 'Build'
       shell: pwsh
-      run: ./build.ps1
+      run: ./build.ps1 -DisableInteractive -DoNotUpdateDARKNET
 
     - uses: actions/upload-artifact@v2
       with:
@@ -431,7 +431,7 @@ jobs:
 
     - name: 'Build'
       shell: pwsh
-      run: ./build.ps1 -ForceCPP
+      run: ./build.ps1 -ForceCPP -DisableInteractive -DoNotUpdateDARKNET
 
 
   win-intlibs-cuda:
@@ -454,7 +454,7 @@ jobs:
         CUDA_TOOLKIT_ROOT_DIR: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
         CUDACXX: "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2\\bin\\nvcc.exe"
       shell: pwsh
-      run: ./build.ps1 -EnableCUDA
+      run: ./build.ps1 -EnableCUDA -DisableInteractive -DoNotUpdateDARKNET
 
 
   mingw:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 00f446fcccf..0029abe78ee 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,8 +19,9 @@ option(ENABLE_CUDNN "Enable CUDNN" ON)
 option(ENABLE_CUDNN_HALF "Enable CUDNN Half precision" ON)
 option(ENABLE_ZED_CAMERA "Enable ZED Camera support" ON)
 option(ENABLE_VCPKG_INTEGRATION "Enable VCPKG integration" ON)
+option(VCPKG_BUILD_OPENCV_WITH_CUDA "Build OpenCV with CUDA extension integration" ON)
 
-if(ENABLE_OPENCV_WITH_CUDA AND NOT APPLE)
+if(VCPKG_BUILD_OPENCV_WITH_CUDA AND NOT APPLE)
   list(APPEND VCPKG_MANIFEST_FEATURES "opencv-cuda")
 endif()
 if(ENABLE_CUDA AND NOT APPLE)
@@ -33,18 +34,6 @@ if(ENABLE_CUDNN AND ENABLE_CUDA AND NOT APPLE)
   list(APPEND VCPKG_MANIFEST_FEATURES "cudnn")
 endif()
 
-if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
-  set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG TRUE)
-  if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
-    set(CMAKE_COMPILER_IS_CLANG TRUE)
-  else()
-    set(CMAKE_COMPILER_IS_CLANG FALSE)
-  endif()
-else()
-  set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG FALSE)
-  set(CMAKE_COMPILER_IS_CLANG FALSE)
-endif()
-
 if(NOT CMAKE_HOST_SYSTEM_PROCESSOR AND NOT WIN32)
   execute_process(COMMAND "uname" "-m" OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR OUTPUT_STRIP_TRAILING_WHITESPACE)
 endif()
@@ -87,6 +76,18 @@ enable_language(CXX)
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/Modules/" ${CMAKE_MODULE_PATH})
 
+if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" MATCHES "Clang" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+  set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG TRUE)
+  if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "clang")
+    set(CMAKE_COMPILER_IS_CLANG TRUE)
+  else()
+    set(CMAKE_COMPILER_IS_CLANG FALSE)
+  endif()
+else()
+  set(CMAKE_COMPILER_IS_GNUCC_OR_CLANG FALSE)
+  set(CMAKE_COMPILER_IS_CLANG FALSE)
+endif()
+
 set(default_build_type "Release")
 if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
   message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
@@ -201,12 +202,14 @@ endif()
 
 set(ADDITIONAL_CXX_FLAGS "-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -Wno-deprecated-declarations -Wno-write-strings")
 set(ADDITIONAL_C_FLAGS "-Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -Wno-deprecated-declarations -Wno-write-strings")
+if(UNIX AND BUILD_SHARED_LIBS AND NOT CMAKE_COMPILER_IS_CLANG)
+  set(SHAREDLIB_CXX_FLAGS "-Wl,-Bsymbolic")
+  set(SHAREDLIB_C_FLAGS "-Wl,-Bsymbolic")
+endif()
 
 if(MSVC)
   set(ADDITIONAL_CXX_FLAGS "/wd4013 /wd4018 /wd4028 /wd4047 /wd4068 /wd4090 /wd4101 /wd4113 /wd4133 /wd4190 /wd4244 /wd4267 /wd4305 /wd4477 /wd4996 /wd4819 /fp:fast")
   set(ADDITIONAL_C_FLAGS "/wd4013 /wd4018 /wd4028 /wd4047 /wd4068 /wd4090 /wd4101 /wd4113 /wd4133 /wd4190 /wd4244 /wd4267 /wd4305 /wd4477 /wd4996 /wd4819 /fp:fast")
-  set(CMAKE_CXX_FLAGS "${ADDITIONAL_CXX_FLAGS} ${CMAKE_CXX_FLAGS}")
-  set(CMAKE_C_FLAGS "${ADDITIONAL_C_FLAGS} ${CMAKE_C_FLAGS}")
   string(REGEX REPLACE "/O2" "/Ox" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
   string(REGEX REPLACE "/O2" "/Ox" CMAKE_C_FLAGS_RELEASE ${CMAKE_C_FLAGS_RELEASE})
 endif()
@@ -218,8 +221,6 @@ if(CMAKE_COMPILER_IS_GNUCC_OR_CLANG)
       set(CMAKE_C_FLAGS "-pthread ${CMAKE_C_FLAGS}")
     endif()
   endif()
-  set(CMAKE_CXX_FLAGS "${ADDITIONAL_CXX_FLAGS} ${CMAKE_CXX_FLAGS}")
-  set(CMAKE_C_FLAGS "${ADDITIONAL_C_FLAGS} ${CMAKE_C_FLAGS}")
   string(REGEX REPLACE "-O0" "-Og" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG})
   string(REGEX REPLACE "-O3" "-Ofast" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
   string(REGEX REPLACE "-O0" "-Og" CMAKE_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG})
@@ -230,6 +231,9 @@ if(CMAKE_COMPILER_IS_GNUCC_OR_CLANG)
   endif()
 endif()
 
+set(CMAKE_CXX_FLAGS "${ADDITIONAL_CXX_FLAGS} ${SHAREDLIB_CXX_FLAGS} ${CMAKE_CXX_FLAGS}")
+set(CMAKE_C_FLAGS "${ADDITIONAL_C_FLAGS} ${SHAREDLIB_C_FLAGS} ${CMAKE_C_FLAGS}")
+
 if(OpenCV_FOUND)
   if(ENABLE_CUDA AND NOT OpenCV_CUDA_VERSION)
     set(BUILD_USELIB_TRACK "FALSE" CACHE BOOL "Build uselib_track" FORCE)
diff --git a/README.md b/README.md
index 2de8c7bf980..1e839f8a44a 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ About Darknet framework: http://pjreddie.com/darknet/
 
 
 * [YOLOv4 model zoo](https://github.com/AlexeyAB/darknet/wiki/YOLOv4-model-zoo)
-* [Requirements (and how to install dependecies)](#requirements)
+* [Requirements (and how to install dependencies)](#requirements)
 * [Pre-trained models](#pre-trained-models)
 * [FAQ - frequently asked questions](https://github.com/AlexeyAB/darknet/wiki/FAQ---frequently-asked-questions)
 * [Explanations in issues](https://github.com/AlexeyAB/darknet/issues?q=is%3Aopen+is%3Aissue+label%3AExplanations)
@@ -107,7 +107,7 @@ Others: https://www.youtube.com/user/pjreddie/videos
 #### How to evaluate AP of YOLOv4 on the MS COCO evaluation server
 
 1. Download and unzip test-dev2017 dataset from MS COCO server: http://images.cocodataset.org/zips/test2017.zip
-2. Download list of images for Detection taks and replace the paths with yours: https://raw.githubusercontent.com/AlexeyAB/darknet/master/scripts/testdev2017.txt
+2. Download list of images for Detection tasks and replace the paths with yours: https://raw.githubusercontent.com/AlexeyAB/darknet/master/scripts/testdev2017.txt
 3. Download `yolov4.weights` file 245 MB: [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) (Google-drive mirror [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT) )
 4. Content of the file `cfg/coco.data` should be
 
@@ -202,13 +202,13 @@ You can get cfg-files by path: `darknet/cfg/`
 * **Powershell** (already installed on windows): https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell
 * **CUDA >= 10.2**: https://developer.nvidia.com/cuda-toolkit-archive (on Linux do [Post-installation Actions](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions))
 * **OpenCV >= 2.4**: use your preferred package manager (brew, apt), build from source using [vcpkg](https://github.com/Microsoft/vcpkg) or download from [OpenCV official site](https://opencv.org/releases.html) (on Windows set system variable `OpenCV_DIR` = `C:\opencv\build` - where are the `include` and `x64` folders [image](https://user-images.githubusercontent.com/4096485/53249516-5130f480-36c9-11e9-8238-a6e82e48c6f2.png))
-* **cuDNN >= 8.0.2** https://developer.nvidia.com/rdp/cudnn-archive (on **Linux** copy `cudnn.h`,`libcudnn.so`... as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installlinux-tar , on **Windows** copy `cudnn.h`,`cudnn64_7.dll`, `cudnn64_7.lib` as desribed here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installwindows )
+* **cuDNN >= 8.0.2** https://developer.nvidia.com/rdp/cudnn-archive (on **Linux** copy `cudnn.h`,`libcudnn.so`... as described here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installlinux-tar , on **Windows** copy `cudnn.h`,`cudnn64_7.dll`, `cudnn64_7.lib` as described here https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installwindows )
 * **GPU with CC >= 3.0**: https://en.wikipedia.org/wiki/CUDA#GPUs_supported
 
 ### Yolo v4 in other frameworks
 
 * **Pytorch - Scaled-YOLOv4:** https://github.com/WongKinYiu/ScaledYOLOv4
-* **TensorFlow:** `pip install yolov4` YOLOv4 on TensorFlow 2.0 / TFlite / Andriod: https://github.com/hunglc007/tensorflow-yolov4-tflite
+* **TensorFlow:** `pip install yolov4` YOLOv4 on TensorFlow 2.0 / TFlite / Android: https://github.com/hunglc007/tensorflow-yolov4-tflite
     Official TF models: https://github.com/tensorflow/models/tree/master/official/vision/beta/projects/yolo
     For YOLOv4 - convert `yolov4.weights`/`cfg` files to `yolov4.pb` by using [TNTWEN](https://github.com/TNTWEN/OpenVINO-YOLOV4) project, and to `yolov4.tflite` [TensorFlow-lite](https://www.tensorflow.org/lite/guide/get_started#2_convert_the_model_format)
 * **OpenCV-dnn** the fastest implementation of YOLOv4 for CPU (x86/ARM-Android), OpenCV can be compiled with [OpenVINO-backend](https://github.com/opencv/opencv/wiki/Intel's-Deep-Learning-Inference-Engine-backend) for running on (Myriad X / USB Neural Compute Stick / Arria FPGA), use `yolov4.weights`/`cfg` with: [C++ example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.cpp#L192-L221) or [Python example](https://github.com/opencv/opencv/blob/8c25a8eb7b10fb50cda323ee6bec68aa1a9ce43c/samples/dnn/object_detection.py#L129-L150)
@@ -226,7 +226,7 @@ You can get cfg-files by path: `darknet/cfg/`
 * **Triton Inference Server / TensorRT** https://github.com/isarsoft/yolov4-triton-tensorrt
 * **Xilinx Zynq Ultrascale+ Deep Learning Processor (DPU) ZCU102/ZCU104:** https://github.com/Xilinx/Vitis-In-Depth-Tutorial/tree/master/Machine_Learning/Design_Tutorials/07-yolov4-tutorial
 * **Amazon Neurochip / Amazon EC2 Inf1 instances** 1.85 times higher throughput and 37% lower cost per image for TensorFlow based YOLOv4 model, using Keras [URL](https://aws.amazon.com/ru/blogs/machine-learning/improving-performance-for-deep-learning-based-object-detection-with-an-aws-neuron-compiled-yolov4-model-on-aws-inferentia/)
-* **TVM** - compilation of deep learning models (Keras, MXNet, PyTorch, Tensorflow, CoreML, DarkNet) into minimum deployable modules on diverse hardware backends (CPUs, GPUs, FPGA, and specialized accelerators): https://tvm.ai/about
+* **TVM** - compilation of deep learning models (Keras, MXNet, PyTorch, Tensorflow, CoreML, DarkNet) into minimum deployable modules on diverse hardware backend (CPUs, GPUs, FPGA, and specialized accelerators): https://tvm.ai/about
 * **OpenDataCam** - It detects, tracks and counts moving objects by using YOLOv4: https://github.com/opendatacam/opendatacam#-hardware-pre-requisite
 * **Netron** - Visualizer for neural networks: https://github.com/lutzroeder/netron
 
@@ -287,12 +287,12 @@ On Linux find executable file `./darknet` in the root directory, while on Window
 * Train on **Amazon EC2**, to see mAP & Loss-chart using URL like: `http://ec2-35-160-228-91.us-west-2.compute.amazonaws.com:8090` in the Chrome/Firefox (**Darknet should be compiled with OpenCV**): 
     `./darknet detector train cfg/coco.data yolov4.cfg yolov4.conv.137 -dont_show -mjpeg_port 8090 -map`
 * 186 MB Yolo9000 - image: `darknet.exe detector test cfg/combine9k.data cfg/yolo9000.cfg yolo9000.weights`
-* Remeber to put data/9k.tree and data/coco9k.map under the same folder of your app if you use the cpp api to build an app
+* Remember to put data/9k.tree and data/coco9k.map under the same folder of your app if you use the cpp api to build an app
 * To process a list of images `data/train.txt` and save results of detection to `result.json` file use: 
     `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -ext_output -dont_show -out result.json < data/train.txt`
 * To process a list of images `data/train.txt` and save results of detection to `result.txt` use:                             
     `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -dont_show -ext_output < data/train.txt > result.txt`
-* Pseudo-lableing - to process a list of images `data/new_train.txt` and save results of detection in Yolo training format for each image as label `<image_name>.txt` (in this way you can increase the amount of training data) use:
+* Pseudo-labelling - to process a list of images `data/new_train.txt` and save results of detection in Yolo training format for each image as label `<image_name>.txt` (in this way you can increase the amount of training data) use:
     `darknet.exe detector test cfg/coco.data cfg/yolov4.cfg yolov4.weights -thresh 0.25 -dont_show -save_labels < data/new_train.txt`
 * To calculate anchors: `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416`
 * To check accuracy mAP@IoU=50: `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_7000.weights`
@@ -324,11 +324,6 @@ To update CMake on Ubuntu, it's better to follow guide here: https://apt.kitware
 Open a shell and type these commands
 
 ```PowerShell
-PS Code/>              git clone https://github.com/microsoft/vcpkg
-PS Code/>              cd vcpkg
-PS Code/vcpkg>         ./bootstrap-vcpkg.sh
-PS Code/vcpkg>         $env:VCPKG_ROOT=$PWD
-PS Code/vcpkg>         cd ..
 PS Code/>              git clone https://github.com/AlexeyAB/darknet
 PS Code/>              cd darknet
 PS Code/darknet>       ./build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN
@@ -359,9 +354,9 @@ Before make, you can set such options in the `Makefile`: [link](https://github.c
 * `CUDNN=1` to build with cuDNN v5-v7 to accelerate training by using GPU (cuDNN should be in `/usr/local/cudnn`)
 * `CUDNN_HALF=1` to build for Tensor Cores (on Titan V / Tesla V100 / DGX-2 and later) speedup Detection 3x, Training 2x
 * `OPENCV=1` to build with OpenCV 4.x/3.x/2.4.x - allows to detect on video files and video streams from network cameras or web-cams
-* `DEBUG=1` to bould debug version of Yolo
+* `DEBUG=1` to build debug version of Yolo
 * `OPENMP=1` to build with OpenMP support to accelerate Yolo by using multi-core CPU
-* `LIBSO=1` to build a library `darknet.so` and binary runable file `uselib` that uses this library. Or you can try to run so `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib test.mp4` How to use this SO-library from your own code - you can look at C++ example: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
+* `LIBSO=1` to build a library `darknet.so` and binary runnable file `uselib` that uses this library. Or you can try to run so `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib test.mp4` How to use this SO-library from your own code - you can look at C++ example: https://github.com/AlexeyAB/darknet/blob/master/src/yolo_console_dll.cpp
     or use in such a way: `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights test.mp4`
 * `ZED_CAMERA=1` to build a library with ZED-3D-camera support (should be ZED SDK installed), then run
     `LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./uselib data/coco.names cfg/yolov4.cfg yolov4.weights zed_camera`
@@ -371,16 +366,17 @@ To run Darknet on Linux use examples from this article, just use `./darknet` ins
 
 ### How to compile on Windows (using `CMake`)
 
-Requires: 
-* MSVS: https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community
+Requires:
+
+* MSVC: https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=Community
 * CMake GUI: `Windows win64-x64 Installer`https://cmake.org/download/
 * Download Darknet zip-archive with the latest commit and uncompress it: [master.zip](https://github.com/AlexeyAB/darknet/archive/master.zip)
 
-In Windows: 
+In Windows:
 
-* Start (button) -> All programms -> CMake -> CMake (gui) -> 
+* Start (button) -> All programs -> CMake -> CMake (gui) ->
 
-* [look at image](https://habrastorage.org/webt/pz/s1/uu/pzs1uu4heb7vflfcjqn-lxy-aqu.jpeg) In CMake: Enter input path to the darknet Source, and output path to the Binaries -> Configure (button) -> Optional platform for generator: `x64`  -> Finish -> Generate -> Open Project -> 
+* [look at image](https://habrastorage.org/webt/pz/s1/uu/pzs1uu4heb7vflfcjqn-lxy-aqu.jpeg) In CMake: Enter input path to the darknet Source, and output path to the Binaries -> Configure (button) -> Optional platform for generator: `x64`  -> Finish -> Generate -> Open Project ->
 
 * in MS Visual Studio: Select: x64 and Release -> Build -> Build solution
 
@@ -400,11 +396,6 @@ This is the recommended approach to build Darknet on Windows.
 3. Open Powershell (Start -> All programs -> Windows Powershell) and type these commands:
 
 ```PowerShell
-PS Code/>              git clone https://github.com/microsoft/vcpkg
-PS Code/>              cd vcpkg
-PS Code/vcpkg>         .\bootstrap-vcpkg.bat
-PS Code/vcpkg>         $env:VCPKG_ROOT=$PWD
-PS Code/vcpkg>         cd ..
 PS Code/>              git clone https://github.com/AlexeyAB/darknet
 PS Code/>              cd darknet
 PS Code/darknet>       .\build.ps1 -UseVCPKG -EnableOPENCV -EnableCUDA -EnableCUDNN
@@ -490,7 +481,7 @@ It will create `.txt`-file for each `.jpg`-image-file - in the same directory an
   * `<object-class>` - integer object number from `0` to `(classes-1)`
   * `<x_center> <y_center> <width> <height>` - float values **relative** to width and height of image, it can be equal from `(0.0 to 1.0]`
   * for example: `<x> = <absolute_x> / <image_width>` or `<height> = <absolute_height> / <image_height>`
-  * atention: `<x_center> <y_center>` - are center of rectangle (are not top-left corner)
+  * attention: `<x_center> <y_center>` - are center of rectangle (are not top-left corner)
 
   For example for `img1.jpg` you will be created `img1.txt` containing:
 
@@ -570,15 +561,15 @@ Usually sufficient 2000 iterations for each class(object), but not less than num
   * **9002** - iteration number (number of batch)
   * **0.60730 avg** - average loss (error) - **the lower, the better**
 
-  When you see that average loss **0.xxxxxx avg** no longer decreases at many iterations then you should stop training. The final avgerage loss can be from `0.05` (for a small model and easy dataset) to `3.0` (for a big model and a difficult dataset).
+  When you see that average loss **0.xxxxxx avg** no longer decreases at many iterations then you should stop training. The final average loss can be from `0.05` (for a small model and easy dataset) to `3.0` (for a big model and a difficult dataset).
   
   Or if you train with flag `-map` then you will see mAP indicator `Last accuracy mAP@0.5 = 18.50%` in the console - this indicator is better than Loss, so train while mAP increases. 
 
 2. Once training is stopped, you should take some of last `.weights`-files from `darknet\build\darknet\x64\backup` and choose the best of them:
 
-For example, you stopped training after 9000 iterations, but the best result can give one of previous weights (7000, 8000, 9000). It can happen due to overfitting. **Overfitting** - is case when you can detect objects on images from training-dataset, but can't detect objects on any others images. You should get weights from **Early Stopping Point**:
+For example, you stopped training after 9000 iterations, but the best result can give one of previous weights (7000, 8000, 9000). It can happen due to over-fitting. **Over-fitting** - is case when you can detect objects on images from training-dataset, but can't detect objects on any others images. You should get weights from **Early Stopping Point**:
 
-![Overfitting](https://hsto.org/files/5dc/7ae/7fa/5dc7ae7fad9d4e3eb3a484c58bfc1ff5.png) 
+![Over-fitting](https://hsto.org/files/5dc/7ae/7fa/5dc7ae7fad9d4e3eb3a484c58bfc1ff5.png) 
 
 To get weights from Early Stopping Point:
 
@@ -592,7 +583,7 @@ To get weights from Early Stopping Point:
 * `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_8000.weights`
 * `darknet.exe detector map data/obj.data yolo-obj.cfg backup\yolo-obj_9000.weights`
 
-And comapre last output lines for each weights (7000, 8000, 9000):
+And compare last output lines for each weights (7000, 8000, 9000):
 
 Choose weights-file **with the highest mAP (mean average precision)** or IoU (intersect over union)
 
@@ -610,7 +601,7 @@ So you will see mAP-chart (red-line) in the Loss-chart Window. mAP will be calcu
 
 Example of custom object detection: `darknet.exe detector test data/obj.data yolo-obj.cfg yolo-obj_8000.weights`
 
-* **IoU** (intersect over union) - average instersect over union of objects and detections for a certain threshold = 0.24
+* **IoU** (intersect over union) - average intersect over union of objects and detections for a certain threshold = 0.24
 
 * **mAP** (mean average precision) - mean value of `average precisions` for each class, where `average precision` is average value of 11 points on PR-curve for each possible threshold (each probability of detection) for the same class (Precision-Recall in terms of PascalVOC, where Precision=TP/(TP+FP) and Recall=TP/(TP+FN) ), page-11: http://homepages.inf.ed.ac.uk/ckiw/postscript/ijcv_voc09.pdf
 
@@ -639,7 +630,7 @@ Example of custom object detection: `darknet.exe detector test data/obj.data yol
 
 * my Loss is very high and mAP is very low, is training wrong? Run training with ` -show_imgs` flag at the end of training command, do you see correct bounded boxes of objects (in windows or in files `aug_...jpg`)? If no - your training dataset is wrong.
 
-* for each object which you want to detect - there must be at least 1 similar object in the Training dataset with about the same: shape, side of object, relative size, angle of rotation, tilt, illumination. So desirable that your training dataset include images with objects at diffrent: scales, rotations, lightings, from different sides, on different backgrounds - you should preferably have 2000 different images for each class or more, and you should train `2000*classes` iterations or more
+* for each object which you want to detect - there must be at least 1 similar object in the Training dataset with about the same: shape, side of object, relative size, angle of rotation, tilt, illumination. So desirable that your training dataset include images with objects at different: scales, rotations, lightings, from different sides, on different backgrounds - you should preferably have 2000 different images for each class or more, and you should train `2000*classes` iterations or more
 
 * desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files) - use as many images of negative samples as there are images with objects
 
@@ -670,7 +661,7 @@ Example of custom object detection: `darknet.exe detector test data/obj.data yol
 
 * to speedup training (with decreasing detection accuracy) set param `stopbackward=1` for layer-136 in cfg-file
 
-* each: `model of object, side, illimination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used.
+* each: `model of object, side, illumination, scale, each 30 grad` of the turn and inclination angles - these are *different objects* from an internal perspective of the neural network. So the more *different objects* you want to detect, the more complex network model should be used.
 
 * to make the detected bounded boxes more accurate, you can add 3 parameters `ignore_thresh = .9 iou_normalizer=0.5 iou_loss=giou` to each `[yolo]` layer and train, it will increase mAP@0.9, but decrease mAP@0.5.
 
diff --git a/build.ps1 b/build.ps1
index 7a38d0d250d..e147fb878ef 100755
--- a/build.ps1
+++ b/build.ps1
@@ -1,11 +1,15 @@
 #!/usr/bin/env pwsh
 
 param (
+  [switch]$DisableInteractive = $false,
   [switch]$EnableCUDA = $false,
   [switch]$EnableCUDNN = $false,
   [switch]$EnableOPENCV = $false,
   [switch]$EnableOPENCV_CUDA = $false,
   [switch]$UseVCPKG = $false,
+  [switch]$DoNotUpdateVCPKG = $false,
+  [switch]$DoNotUpdateDARKNET = $false,
+  [switch]$DoNotDeleteBuildFolder = $false,
   [switch]$DoNotSetupVS = $false,
   [switch]$DoNotUseNinja = $false,
   [switch]$ForceCPP = $false,
@@ -13,9 +17,45 @@ param (
   [switch]$ForceGCC8 = $false
 )
 
+if (-Not $DisableInteractive -and -Not $UseVCPKG) {
+  $Result = Read-Host "Enable vcpkg to install darknet dependencies (yes/no)"
+  if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') {
+    $UseVCPKG = $true
+  }
+}
+
+if (-Not $DisableInteractive -and -Not $EnableCUDA -and -Not $IsMacOS) {
+  $Result = Read-Host "Enable CUDA integration (yes/no)"
+  if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') {
+    $EnableCUDA = $true
+  }
+}
+
+if ($EnableCUDA -and -Not $DisableInteractive -and -Not $EnableCUDNN) {
+  $Result = Read-Host "Enable CUDNN optional dependency (yes/no)"
+  if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') {
+    $EnableCUDNN = $true
+  }
+}
+
+if (-Not $DisableInteractive -and -Not $EnableOPENCV) {
+  $Result = Read-Host "Enable OpenCV optional dependency (yes/no)"
+  if ($Result -eq 'Yes' -or $Result -eq 'Y' -or $Result -eq 'yes' -or $Result -eq 'y') {
+    $EnableOPENCV = $true
+  }
+}
+
 $number_of_build_workers = 8
 #$additional_build_setup = " -DCMAKE_CUDA_ARCHITECTURES=30"
 
+if ($IsLinux -or $IsMacOS) {
+  $bootstrap_ext = ".sh"
+}
+elseif ($IsWindows) {
+  $bootstrap_ext = ".bat"
+}
+Write-Host "Native shell script extension: ${bootstrap_ext}"
+
 if (-Not $IsWindows) {
   $DoNotSetupVS = $true
 }
@@ -36,7 +76,7 @@ if ($IsWindows -and -Not $env:VCPKG_DEFAULT_TRIPLET) {
 }
 
 if ($EnableCUDA) {
-  if($IsMacOS) {
+  if ($IsMacOS) {
     Write-Host "Cannot enable CUDA on macOS" -ForegroundColor Yellow
     $EnableCUDA = $false
   }
@@ -82,6 +122,12 @@ elseif ($EnableOPENCV_CUDA -and -not $EnableCUDA -and -not $EnableOPENCV) {
 
 if ($UseVCPKG) {
   Write-Host "VCPKG is enabled"
+  if ($DoNotUpdateVCPKG) {
+    Write-Host "VCPKG will not be updated to latest version if found" -ForegroundColor Yellow
+  }
+  else {
+    Write-Host "VCPKG will be updated to latest version if found"
+  }
 }
 else {
   Write-Host "VCPKG is disabled, please pass -UseVCPKG to the script to enable"
@@ -110,6 +156,18 @@ else {
 
 Push-Location $PSScriptRoot
 
+$GIT_EXE = Get-Command git 2> $null | Select-Object -ExpandProperty Definition
+if (-Not $GIT_EXE) {
+  throw "Could not find git, please install it"
+}
+else {
+  Write-Host "Using git from ${GIT_EXE}"
+}
+
+if ((Test-Path "$PSScriptRoot/.git") -and -not $DoNotUpdateDARKNET) {
+  & $GIT_EXE pull
+}
+
 $CMAKE_EXE = Get-Command cmake 2> $null | Select-Object -ExpandProperty Definition
 if (-Not $CMAKE_EXE) {
   throw "Could not find CMake, please install it"
@@ -216,7 +274,10 @@ elseif ((Test-Path "${RUNVCPKG_VCPKG_ROOT_OUT}") -and $UseVCPKG) {
   Write-Host "Found vcpkg in RUNVCPKG_VCPKG_ROOT_OUT: ${RUNVCPKG_VCPKG_ROOT_OUT}"
   $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=ON"
 }
-elseif ((Test-Path "$PWD/vcpkg") -and $UseVCPKG) {
+elseif ($UseVCPKG) {
+  if (-Not (Test-Path "$PWD/vcpkg")) {
+    & $GIT_EXE clone https://github.com/microsoft/vcpkg
+  }
   $vcpkg_path = "$PWD/vcpkg"
   $env:VCPKG_ROOT = "$PWD/vcpkg"
   Write-Host "Found vcpkg in $PWD/vcpkg: $PWD/vcpkg"
@@ -227,6 +288,13 @@ else {
   $additional_build_setup = $additional_build_setup + " -DENABLE_VCPKG_INTEGRATION:BOOL=OFF"
 }
 
+if ($UseVCPKG -and (Test-Path "$vcpkg_path/.git") -and -not $DoNotUpdateVCPKG) {
+  Push-Location $vcpkg_path
+  & $GIT_EXE pull
+  & $PWD/bootstrap-vcpkg${bootstrap_ext} -disableMetrics
+  Pop-Location
+}
+
 if (-Not $DoNotSetupVS) {
   if ($null -eq (Get-Command "cl.exe" -ErrorAction SilentlyContinue)) {
     $vsfound = getLatestVisualStudioWithDesktopWorkloadPath
@@ -239,7 +307,7 @@ if (-Not $DoNotSetupVS) {
       }
     }
     Pop-Location
-    Write-Host "Visual Studio Command Prompt variables set" -ForegroundColor Yellow
+    Write-Host "Visual Studio Command Prompt variables set"
   }
 
   $tokens = getLatestVisualStudioWithDesktopWorkloadVersion
@@ -270,13 +338,13 @@ if (-Not $DoNotSetupVS) {
 if ($DoNotSetupVS -and $DoNotUseNinja) {
   $generator = "Unix Makefiles"
 }
-Write-Host "Setting up environment to use CMake generator: $generator" -ForegroundColor Yellow
+Write-Host "Setting up environment to use CMake generator: $generator"
 
 if (-Not $IsMacOS -and $EnableCUDA) {
   if ($null -eq (Get-Command "nvcc" -ErrorAction SilentlyContinue)) {
     if (Test-Path env:CUDA_PATH) {
       $env:PATH += ";${env:CUDA_PATH}/bin"
-      Write-Host "Found cuda in ${env:CUDA_PATH}" -ForegroundColor Yellow
+      Write-Host "Found cuda in ${env:CUDA_PATH}"
     }
     else {
       Write-Host "Unable to find CUDA, if necessary please install it or define a CUDA_PATH env variable pointing to the install folder" -ForegroundColor Yellow
@@ -311,12 +379,18 @@ if (-Not($EnableOPENCV)) {
   $additional_build_setup = $additional_build_setup + " -DENABLE_OPENCV:BOOL=OFF"
 }
 
-if ($EnableOPENCV_CUDA) {
-  $additional_build_setup = $additional_build_setup + " -DENABLE_OPENCV_WITH_CUDA:BOOL=ON"
+if (-Not($EnableOPENCV_CUDA)) {
+  $additional_build_setup = $additional_build_setup + " -DVCPKG_BUILD_OPENCV_WITH_CUDA:BOOL=OFF"
+}
+
+$build_folder = "./build_release"
+if (-Not $DoNotDeleteBuildFolder) {
+  Write-Host "Removing folder $build_folder" -ForegroundColor Yellow
+  Remove-Item -Force -Recurse -ErrorAction SilentlyContinue $build_folder
 }
 
-New-Item -Path ./build_release -ItemType directory -Force
-Set-Location build_release
+New-Item -Path $build_folder -ItemType directory -Force
+Set-Location $build_folder
 $cmake_args = "-G `"$generator`" ${additional_build_setup} -S .."
 Write-Host "CMake args: $cmake_args"
 Start-Process -NoNewWindow -Wait -FilePath $CMAKE_EXE -ArgumentList $cmake_args
diff --git a/src/gemm.c b/src/gemm.c
index 519751c0622..84a7e9a6815 100644
--- a/src/gemm.c
+++ b/src/gemm.c
@@ -8,13 +8,29 @@
 #include <float.h>
 #include <string.h>
 #include <stdint.h>
-#ifdef _WIN32
-#include <intrin.h>
-#endif
 #if defined(_OPENMP)
 #include <omp.h>
 #endif
 
+#if defined(_MSC_VER)
+#if defined(_M_ARM) || defined(_M_ARM64)
+static inline uint32_t popcnt(uint32_t v) {
+  v = v - ((v >> 1) & 0x55555555);
+  v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
+  return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
+}
+#define POPCNT(x) popcnt((x))
+#define POPCNT64(x) (popcnt((unsigned)(x)) + popcnt((unsigned)((uint64_t)(x) >> 32)))
+#else
+#include <intrin.h>
+#define POPCNT(x) __popcnt(x)
+#define POPCNT64(x) __popcnt64(x)
+#endif
+#elif defined(__GNUC__)
+#define POPCNT(x) __builtin_popcount(x)
+#define POPCNT64(x) __builtin_popcountll(x)
+#endif
+
 #define TILE_M 4 // 4 ops
 #define TILE_N 16 // AVX2 = 2 ops * 8 floats
 #define TILE_K 16 // loop
@@ -230,7 +246,7 @@ void gemm_nn_custom_bin_mean(int M, int N, int K, float ALPHA_UNUSED,
                     uint64_t b_bit64 = *((uint64_t *)(B + (k_ldb + j) / 8));
                     uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64);
                     //printf("\n %d \n",__builtin_popcountll(c_bit64)); // gcc
-                    printf("\n %d \n", __popcnt64(c_bit64));    // msvs
+                    printf("\n %d \n", POPCNT64(c_bit64));    // msvs
 
                     int h;
                     for (h = 0; h < 64; ++h)
@@ -298,11 +314,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
                 uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8));
                 uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64);
 
-#ifdef WIN32
-                int tmp_count = __popcnt64(c_bit64);
-#else
-                int tmp_count = __builtin_popcountll(c_bit64);
-#endif
+                int tmp_count = POPCNT64(c_bit64);
 
                 if (K - k < 64)  tmp_count = tmp_count - (64 - (K - k));    // remove extra bits
                 count += tmp_count;
@@ -503,16 +515,6 @@ void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m,
     }
 }
 
-static inline int popcnt_32(uint32_t val32) {
-#ifdef WIN32  // Windows MSVS
-    int tmp_count = __popcnt(val32);
-#else   // Linux GCC
-    int tmp_count = __builtin_popcount(val32);
-#endif
-    return tmp_count;
-}
-//----------------------------
-
 #if (defined(__AVX__) && defined(__x86_64__)) || (defined(_WIN64) && !defined(__MINGW32__))
 
 #if (defined(_WIN64) && !defined(__MINGW64__))
@@ -925,14 +927,14 @@ void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA,
 
                 // waiting for - CPUID Flags: AVX512VPOPCNTDQ: __m512i _mm512_popcnt_epi32(__m512i a)
                 __m256 count = _mm256_setr_ps(
-                    popcnt_32(_mm256_extract_epi32(xnor256, 0)),
-                    popcnt_32(_mm256_extract_epi32(xnor256, 1)),
-                    popcnt_32(_mm256_extract_epi32(xnor256, 2)),
-                    popcnt_32(_mm256_extract_epi32(xnor256, 3)),
-                    popcnt_32(_mm256_extract_epi32(xnor256, 4)),
-                    popcnt_32(_mm256_extract_epi32(xnor256, 5)),
-                    popcnt_32(_mm256_extract_epi32(xnor256, 6)),
-                    popcnt_32(_mm256_extract_epi32(xnor256, 7)));
+                    POPCNT(_mm256_extract_epi32(xnor256, 0)),
+                    POPCNT(_mm256_extract_epi32(xnor256, 1)),
+                    POPCNT(_mm256_extract_epi32(xnor256, 2)),
+                    POPCNT(_mm256_extract_epi32(xnor256, 3)),
+                    POPCNT(_mm256_extract_epi32(xnor256, 4)),
+                    POPCNT(_mm256_extract_epi32(xnor256, 5)),
+                    POPCNT(_mm256_extract_epi32(xnor256, 6)),
+                    POPCNT(_mm256_extract_epi32(xnor256, 7)));
 
                 __m256 val2 = _mm256_set1_ps(2);
                 count = _mm256_mul_ps(count, val2);     // count * 2
@@ -952,7 +954,7 @@ void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA,
             {
                 PUT_IN_REGISTER uint32_t B_PART = B[s*ldb + j];
                 uint32_t xnor_result = ~(A_PART ^ B_PART);
-                int32_t count = popcnt_32(xnor_result);  // must be Signed int
+                int32_t count = POPCNT(xnor_result);  // must be Signed int
 
                 C[i*ldc + j] += (2 * count - 32) * mean_val;
             }
@@ -1140,13 +1142,7 @@ void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride,
 
 static inline int popcnt128(__m128i n) {
     const __m128i n_hi = _mm_unpackhi_epi64(n, n);
-#if defined(_MSC_VER)
-    return __popcnt64(_mm_cvtsi128_si64(n)) + __popcnt64(_mm_cvtsi128_si64(n_hi));
-#elif defined(__APPLE__) && defined(__clang__)
-    return _mm_popcnt_u64(_mm_cvtsi128_si64(n)) + _mm_popcnt_u64(_mm_cvtsi128_si64(n_hi));
-#else
-    return __popcntq(_mm_cvtsi128_si64(n)) + __popcntq(_mm_cvtsi128_si64(n_hi));
-#endif
+    return POPCNT64(_mm_cvtsi128_si64(n)) + POPCNT64(_mm_cvtsi128_si64(n_hi));
 }
 
 static inline int popcnt256(__m256i n) {
@@ -2021,7 +2017,7 @@ void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA,
                 PUT_IN_REGISTER uint32_t B_PART = B[s * ldb + j];
                 uint32_t xnor_result = ~(A_PART ^ B_PART);
                 //printf(" xnor_result = %d, ", xnor_result);
-                int32_t count = popcnt_32(xnor_result);  // must be Signed int
+                int32_t count = POPCNT(xnor_result);  // must be Signed int
 
                 C[i*ldc + j] += (2 * count - 32) * mean_val;
                 //c[i*n + j] += count*mean;
@@ -2079,25 +2075,6 @@ void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride,
     }
 }
 
-static inline int popcnt_64(uint64_t val64) {
-#ifdef WIN32  // Windows
-#ifdef _WIN64 // Windows 64-bit
-    int tmp_count = __popcnt64(val64);
-#else         // Windows 32-bit
-    int tmp_count = __popcnt(val64);
-    tmp_count += __popcnt(val64 >> 32);
-#endif
-#else   // Linux
-#if defined(__x86_64__) || defined(__aarch64__)  // Linux 64-bit
-    int tmp_count = __builtin_popcountll(val64);
-#else  // Linux 32-bit
-    int tmp_count = __builtin_popcount(val64);
-    tmp_count += __builtin_popcount(val64 >> 32);
-#endif
-#endif
-    return tmp_count;
-}
-
 void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
     unsigned char *A, int lda,
     unsigned char *B, int ldb,
@@ -2118,7 +2095,7 @@ void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED,
                 uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8));
                 uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64);
 
-                int tmp_count = popcnt_64(c_bit64);
+                int tmp_count = POPCNT64(c_bit64);
 
                 if (K - k < 64)  tmp_count = tmp_count - (64 - (K - k));    // remove extra bits
                 count += tmp_count;
@@ -2518,7 +2495,7 @@ void gemm_nn_bin_transposed_32bit_packed(int M, int N, int K, float ALPHA,
                 PUT_IN_REGISTER uint32_t A_PART = ((uint32_t*)A)[i*lda + s];
                 PUT_IN_REGISTER uint32_t B_PART = ((uint32_t*)B)[j * ldb + s];
                 uint32_t xnor_result = ~(A_PART ^ B_PART);
-                int32_t count = popcnt_32(xnor_result);  // must be Signed int
+                int32_t count = POPCNT(xnor_result);  // must be Signed int
 
                 val += (2 * count - 32) * mean_val;
             }
@@ -2581,7 +2558,7 @@ void convolution_repacked(uint32_t *packed_input, uint32_t *packed_weights, floa
                             uint32_t weight = ((uint32_t *)packed_weights)[fil*new_lda / 32 + chan*size*size + f_y*size + f_x];
 
                             uint32_t xnor_result = ~(input ^ weight);
-                            int32_t count = popcnt_32(xnor_result); // mandatory Signed int
+                            int32_t count = POPCNT(xnor_result); // mandatory Signed int
                             sum += (2 * count - 32) * mean_val;
                         }
                     }
diff --git a/src/http_stream.cpp b/src/http_stream.cpp
index 3ec7e851593..b17edfb5d36 100644
--- a/src/http_stream.cpp
+++ b/src/http_stream.cpp
@@ -70,8 +70,12 @@ static int close_socket(SOCKET s) {
 #define SOCKADDR    struct sockaddr
 #define SOCKADDR_IN  struct sockaddr_in
 #define ADDRPOINTER  unsigned int*
+#ifndef INVALID_SOCKET
 #define INVALID_SOCKET -1
+#endif
+#ifndef SOCKET_ERROR
 #define SOCKET_ERROR   -1
+#endif
 struct _IGNORE_PIPE_SIGNAL
 {
     struct sigaction new_actn, old_actn;
@@ -934,4 +938,3 @@ void set_track_id(detection *new_dets, int new_dets_num, float thresh, float sim
         }
     }
 }
-
diff --git a/src/httplib.h b/src/httplib.h
index 41fbfb19419..e4678faeeb2 100644
--- a/src/httplib.h
+++ b/src/httplib.h
@@ -126,7 +126,10 @@ using socket_t = SOCKET;
 #include <unistd.h>
 
 using socket_t = int;
-#define INVALID_SOCKET (-1)
+
+#ifndef INVALID_SOCKET
+#define INVALID_SOCKET -1
+#endif
 #endif //_WIN32
 
 #include <array>
diff --git a/vcpkg.json b/vcpkg.json
index a7f66f260fc..c57fe9175bf 100644
--- a/vcpkg.json
+++ b/vcpkg.json
@@ -1,12 +1,11 @@
 {
   "name": "darknet",
-  "version-string": "0.2.5.4",
-  "port-version": 2,
-  "homepage": "https://github.com/alexeyab/darknet",
+  "version-date": "2021-04-16",
   "description": "Darknet is an open source neural network framework written in C and CUDA. You only look once (YOLO) is a state-of-the-art, real-time object detection system, best example of darknet functionalities.",
+  "homepage": "https://github.com/alexeyab/darknet",
   "dependencies": [
-    "stb",
-    "pthreads"
+    "pthreads",
+    "stb"
   ],
   "features": {
     "cuda": {
@@ -22,6 +21,19 @@
         "cudnn"
       ]
     },
+    "full": {
+      "description": "Build darknet fully featured",
+      "dependencies": [
+        {
+          "name": "darknet",
+          "features": [
+            "cuda",
+            "cudnn",
+            "opencv-cuda"
+          ]
+        }
+      ]
+    },
     "opencv-base": {
       "description": "Build darknet with support for latest version of OpenCV",
       "dependencies": [